use OpenSRF::Utils::Logger qw/$logger/;
use Data::Dumper;
-my $parser = XML::LibXML->new();
+my $parser = XML::LibXML->new();
my $xslt = XML::LibXSLT->new();
my $mods_sheet;
# ----------------------------------------------------------------------------------------
# XPATH for extracting info from a MODS doc
-my $isbn_xpath = "//mods:mods/mods:identifier[\@type='isbn' and not(\@invalid)]";
-my $resource_xpath = "//mods:mods/mods:typeOfResource";
-my $pub_xpath = "//mods:mods/mods:originInfo//mods:dateIssued[\@encoding='marc']|" .
+my $isbn_xpath = "//mods:mods/mods:identifier[\@type='isbn' and not(\@invalid)]";
+my $resource_xpath = "//mods:mods/mods:typeOfResource";
+my $pub_xpath = "//mods:mods/mods:originInfo//mods:dateIssued[\@encoding='marc']|" .
"//mods:mods/mods:originInfo//mods:dateIssued[1]";
-my $tcn_xpath = "//mods:mods/mods:recordInfo/mods:recordIdentifier";
-my $publisher_xpath = "//mods:mods/mods:originInfo//mods:publisher[1]";
-my $edition_xpath = "//mods:mods/mods:originInfo//mods:edition[1]";
-my $abstract_xpath = "//mods:mods/mods:abstract";
-my $related_xpath = "";
+my $tcn_xpath = "//mods:mods/mods:recordInfo/mods:recordIdentifier";
+my $publisher_xpath = "//mods:mods/mods:originInfo//mods:publisher[1]";
+my $edition_xpath = "//mods:mods/mods:originInfo//mods:edition[1]";
+my $abstract_xpath = "//mods:mods/mods:abstract";
+my $related_xpath = "";
my $online_loc_xpath = "//mods:location/mods:url";
-my $physical_desc = "(//mods:mods/mods:physicalDescription/mods:form|//mods:mods/mods:physicalDescription/mods:extent|".
+my $physical_desc = "(//mods:mods/mods:physicalDescription/mods:form|//mods:mods/mods:physicalDescription/mods:extent|".
"//mods:mods/mods:physicalDescription/mods:reformattingQuality|//mods:mods/mods:physicalDescription/mods:internetMediaType|".
"//mods:mods/mods:physicalDescription/mods:digitalOrigin)";
-my $toc_xpath = "//mods:tableOfContents";
+my $toc_xpath = "//mods:tableOfContents";
my $xpathset = {
title => {
- abbreviated =>
+ abbreviated =>
"//mods:mods/mods:titleInfo[mods:title and (\@type='abbreviated')]",
translated =>
"//mods:mods/mods:titleInfo[mods:title and (\@type='translated')]",
"//mods:mods/mods:titleInfo[mods:title and (\@type='uniform')]",
proper =>
"//mods:mods/mods:titleInfo[mods:title and not (\@type)]",
+ sortkey =>
+ "//mods:mods/mods:titleInfo[mods:title and not (\@type)]",
any =>
"//mods:mods/mods:titleInfo",
},
author => {
- corporate =>
+ corporate =>
"//mods:mods/mods:name[\@type='corporate']/*[local-name()='namePart']".
"[../mods:role/mods:text[text()='creator']".
" or ../mods:role/mods:roleTerm[".
" and \@authority='marcrelator'".
" and text()='creator']".
"][1]",
- personal =>
+ personal =>
"//mods:mods/mods:name[\@type='personal']/*[local-name()='namePart']".
"[../mods:role/mods:text[text()='creator']".
" or ../mods:role/mods:roleTerm[".
" and \@authority='marcrelator'".
" and text()='creator']".
"][1]",
- conference =>
+ conference =>
"//mods:mods/mods:name[\@type='conference']/*[local-name()='namePart']".
"[../mods:role/mods:text[text()='creator']".
" or ../mods:role/mods:roleTerm[".
" and \@authority='marcrelator'".
" and text()='creator']".
"][1]",
- other =>
+ other =>
"//mods:mods/mods:name[\@type='personal']/*[local-name()='namePart']",
- any =>
+ any =>
"//mods:mods/mods:name/*[local-name()='namePart'][1]",
},
subject => {
- topic =>
+ topic =>
"//mods:mods/mods:subject/*[".
" local-name()='geographic'".
" or local-name()='name'".
" or local-name()='topic'".
"]/parent::mods:subject",
-# geographic =>
-# "//mods:mods/*[local-name()='subject']/*[local-name()='geographic']",
-# name =>
-# "//mods:mods/*[local-name()='subject']/*[local-name()='name']",
-# temporal =>
-# "//mods:mods/*[local-name()='subject']/*[local-name()='temporal']",
-# topic =>
-# "//mods:mods/*[local-name()='subject']/*[local-name()='topic']",
+# geographic =>
+# "//mods:mods/*[local-name()='subject']/*[local-name()='geographic']",
+# name =>
+# "//mods:mods/*[local-name()='subject']/*[local-name()='name']",
+# temporal =>
+# "//mods:mods/*[local-name()='subject']/*[local-name()='temporal']",
+# topic =>
+# "//mods:mods/*[local-name()='subject']/*[local-name()='topic']",
},
#keyword => { keyword => "//mods:mods/*[not(local-name()='originInfo')]", },
sub get_field_value {
- my( $self, $mods, $xpath, $type) = @_;
+ my( $self, $mods, $xpath, $class, $type) = @_;
my @string;
my @children = $value->childNodes();
my @child_text;
for my $child (@children) {
+ # Magic for title-sorting special value, skip the nonSort node
+ next if ($class && $class eq 'title' && $type && $type eq 'sortkey' && $child->nodeName =~ m/nonSort/);
+
# MODS strips the punctuation from 245abc, which often
# results in "title subtitle" rather than "title : subtitle";
# this hack gets it back for us
- if ($type && $type eq 'title' && $child->nodeName =~ m/subTitle/) {
- push(@child_text, " : ");
+ if ($class && $class eq 'title' && $child->nodeName =~ m/subTitle/) {
+ push(@child_text, " : ");
}
next unless( $child->nodeType != 3 );
push(@child_text, join(' ', @a));
} else {
- push(@child_text, $child->textContent);
+ push(@child_text, $child->textContent);
}
}
my $class = "title";
$data->{$class} = {};
for my $type(keys %{$xpathset->{$class}}) {
- my @value = $self->get_field_value( $mods, $xpathset->{$class}->{$type}, "title" );
+ my @value = $self->get_field_value( $mods, $xpathset->{$class}->{$type}, "title", $type );
for my $arr (@value) {
if( ref($arr) ) {
$data->{$class}->{$type} = shift @$arr;
my( $self, $modsperl ) = @_;
my $title = "";
+ my $titlesort = "";
my $author = "";
my $subject = [];
- my $series = [];
+ my $series = [];
my $tmp = $modsperl->{title};
($title = $tmp->{any});
}
+ # Just another title value check, so we can re-use the previous tmp lookup
+ if(!$tmp) { $titlesort = ""; }
+ else {
+ ($titlesort = $tmp->{sortkey}) ||
+ ($titlesort = $title);
+ }
+
$tmp = $modsperl->{author};
if(!$tmp) { $author = ""; }
else {
($author = $tmp->{corporate}) ||
($author = $tmp->{conference}) ||
($author = $tmp->{other}) ||
- ($author = $tmp->{any});
+ ($author = $tmp->{any});
}
$tmp = $modsperl->{subject};
- if(!$tmp) { $subject = {}; }
+ if(!$tmp) { $subject = {}; }
else {
for my $key( keys %{$tmp}) {
push(@$subject, @{$tmp->{$key}}) if ($tmp->{$key});
else { $series = $tmp->{'series'}; }
- return { series => $series, title => $title,
+ return { series => $series, title => $title, titlesort => $titlesort,
author => $author, subject => $subject };
}
$self->{master_doc} = $self->modsdoc_to_values( $mods );
$self->{master_doc} = $self->mods_values_to_mods_slim( $self->{master_doc} );
- ($self->{master_doc}->{isbn}) =
+ ($self->{master_doc}->{isbn}) =
$self->get_field_value( $mods, $isbn_xpath );
- $self->{master_doc}->{type_of_resource} =
+ $self->{master_doc}->{type_of_resource} =
[ $self->get_field_value( $mods, $resource_xpath ) ];
- ($self->{master_doc}->{tcn}) =
+ ($self->{master_doc}->{tcn}) =
$self->get_field_value( $mods, $tcn_xpath );
- ($self->{master_doc}->{pubdate}) =
+ ($self->{master_doc}->{pubdate}) =
$self->get_field_value( $mods, $pub_xpath );
- ($self->{master_doc}->{publisher}) =
+ ($self->{master_doc}->{publisher}) =
$self->get_field_value( $mods, $publisher_xpath );
($self->{master_doc}->{edition}) =
push(@{$self->{master_doc}->{online_loc}}, $url->getAttribute('note') || '');
}
- ($self->{master_doc}->{synopsis}) =
+ ($self->{master_doc}->{synopsis}) =
$self->get_field_value( $mods, $abstract_xpath );
$self->{master_doc}->{physical_description} = [];
push(@{$self->{master_doc}->{physical_description}},
$self->get_field_value( $mods, $physical_desc ) );
- $self->{master_doc}->{physical_description} =
+ $self->{master_doc}->{physical_description} =
join( ' ', @{$self->{master_doc}->{physical_description}});
($self->{master_doc}->{toc}) = $self->get_field_value($mods, $toc_xpath);
}
}
- push( @{$self->{master_doc}->{type_of_resource}},
+ push( @{$self->{master_doc}->{type_of_resource}},
$self->get_field_value( $mods, $resource_xpath ));
if(!($self->{master_doc}->{isbn}) ) {
- ($self->{master_doc}->{isbn}) =
+ ($self->{master_doc}->{isbn}) =
$self->get_field_value( $mods, $isbn_xpath );
}
}
#(my $title = $perl->{title}) =~ s/\[.*?\]//og;
#(my $author = $perl->{author}) =~ s/\(.*?\)//og;
my $title = $perl->{title};
+ my $titlesort = $perl->{titlesort};
my $author = $perl->{author};
my @series;
$rtypes = [ keys %hash ];
$record->title($title);
+ $record->titlesort($titlesort);
$record->author($author);
$record->doc_id($perl->{doc_id});