We don't use performer notes any more thanks to BibTemplate, farewell
authordbs <dbs@6d9bc8c9-1ec2-4278-b937-99fde70a366f>
Wed, 11 Aug 2010 17:36:09 +0000 (17:36 +0000)
committerdbs <dbs@6d9bc8c9-1ec2-4278-b937-99fde70a366f>
Wed, 11 Aug 2010 17:36:09 +0000 (17:36 +0000)
git-svn-id: svn://svn.open-ils.org/ILS-Contrib/conifer/branches/rel_1_6_0@952 6d9bc8c9-1ec2-4278-b937-99fde70a366f

src/perlmods/OpenILS/Utils/ModsParser.pm [deleted file]

diff --git a/src/perlmods/OpenILS/Utils/ModsParser.pm b/src/perlmods/OpenILS/Utils/ModsParser.pm
deleted file mode 100644 (file)
index 14e3085..0000000
+++ /dev/null
@@ -1,492 +0,0 @@
-package OpenILS::Utils::ModsParser;
-use strict; use warnings;
-
-use OpenSRF::EX qw/:try/;
-use XML::LibXML;
-use XML::LibXSLT;
-use Time::HiRes qw(time);
-use OpenILS::Utils::Fieldmapper;
-use OpenSRF::Utils::SettingsClient;
-use OpenSRF::Utils::Logger qw/$logger/;
-use Data::Dumper;
-
-my $parser             = XML::LibXML->new();
-my $xslt                       = XML::LibXSLT->new();
-my $mods_sheet;
-
-# ----------------------------------------------------------------------------------------
-# XPATH for extracting info from a MODS doc
-my $isbn_xpath                 = "//mods:mods/mods:identifier[\@type='isbn']";
-my $resource_xpath     = "//mods:mods/mods:typeOfResource";
-my $pub_xpath                  = "//mods:mods/mods:originInfo//mods:dateIssued[1]|" .
-       "//mods:mods/mods:originInfo//mods:dateIssued[\@encoding='marc']";
-my $tcn_xpath                  = "//mods:mods/mods:recordInfo/mods:recordIdentifier";
-my $pub_place_xpath            = "//mods:mods/mods:originInfo//mods:place//mods:placeTerm[\@type='text']";
-my $publisher_xpath            = "//mods:mods/mods:originInfo//mods:publisher";
-my $edition_xpath              = "//mods:mods/mods:originInfo//mods:edition[1]";
-my $abstract_xpath     = "//mods:mods/mods:abstract";
-my $related_xpath              = "";
-my $online_loc_xpath = "//mods:location/mods:url";
-my $physical_desc              = "(//mods:physicalDescription/mods:form|//mods:physicalDescription/mods:extent|".
-       "//mods:physicalDescription/mods:reformattingQuality|//mods:physicalDescription/mods:internetMediaType|".
-       "//mods:physicalDescription/mods:digitalOrigin)";
-my $extent_xpath               = "//mods:physicalDescription/mods:extent";
-my $toc_xpath                  = "//mods:tableOfContents";
-my $performers_xpath           = "//mods:note[\@type='performers']";
-
-my $xpathset = {
-
-       title => {
-               abbreviated => 
-                       "//mods:mods/mods:titleInfo[mods:title and (\@type='abbreviated')]",
-               translated =>
-                       "//mods:mods/mods:titleInfo[mods:title and (\@type='translated')]",
-               uniform =>
-                       "//mods:mods/mods:titleInfo[mods:title and (\@type='uniform')]",
-               proper =>
-                       "//mods:mods/mods:titleInfo[mods:title and not (\@type)]",
-               any =>
-                       "//mods:mods/mods:titleInfo",
-       },
-
-       author => {
-               corporate => 
-                       "//mods:mods/mods:name[\@type='corporate']/*[local-name()='namePart']".
-                               "[../mods:role/mods:text[text()='creator']".
-                               " or ../mods:role/mods:roleTerm[".
-                               "        \@type='text'".
-                               "        and \@authority='marcrelator'".
-                               "        and text()='creator']".
-                               "][1]",
-               personal => 
-                       "//mods:mods/mods:name[\@type='personal']/*[local-name()='namePart']".
-                               "[../mods:role/mods:text[text()='creator']".
-                               " or ../mods:role/mods:roleTerm[".
-                               "        \@type='text'".
-                               "        and \@authority='marcrelator'".
-                               "        and text()='creator']".
-                               "][1]",
-               conference => 
-                       "//mods:mods/mods:name[\@type='conference']/*[local-name()='namePart']".
-                               "[../mods:role/mods:text[text()='creator']".
-                               " or ../mods:role/mods:roleTerm[".
-                               "        \@type='text'".
-                               "        and \@authority='marcrelator'".
-                               "        and text()='creator']".
-                               "][1]",
-               other => 
-                       "//mods:mods/mods:name[\@type='personal']/*[local-name()='namePart']",
-               any => 
-                       "//mods:mods/mods:name/*[local-name()='namePart'][1]",
-       },
-
-       subject => {
-
-               topic => 
-                       "//mods:mods/mods:subject/*[".
-                       "   local-name()='geographic'".
-                       "   or local-name()='name'".
-                       "   or local-name()='temporal'".
-                       "   or local-name()='topic'".
-                       "]/parent::mods:subject",
-
-#              geographic => 
-#                      "//mods:mods/*[local-name()='subject']/*[local-name()='geographic']",
-#              name => 
-#                      "//mods:mods/*[local-name()='subject']/*[local-name()='name']",
-#              temporal => 
-#                      "//mods:mods/*[local-name()='subject']/*[local-name()='temporal']",
-#              topic => 
-#                      "//mods:mods/*[local-name()='subject']/*[local-name()='topic']",
-       },
-       #keyword => { keyword => "//mods:mods/*[not(local-name()='originInfo')]", },
-
-       series => {
-               series => "//mods:mods/mods:relatedItem[\@type='series']/mods:titleInfo"
-       }
-};
-# ----------------------------------------------------------------------------------------
-
-
-
-sub new { return bless( {}, shift() ); }
-
-sub get_field_value {
-
-       my( $self, $mods, $xpath ) = @_;
-
-       my @string;
-
-       my $root = $mods->documentElement;
-       $root->setNamespace( "http://www.loc.gov/mods/v3", "mods", 1 );
-
-       try {
-               # grab the set of matching nodes
-               my @nodes = $root->findnodes( $xpath );
-               for my $value (@nodes) {
-
-                       # grab all children of the node
-                       my @children = $value->childNodes();
-                       my @child_text;
-                       for my $child (@children) {
-                               next unless( $child->nodeType != 3 );
-
-                               if($child->childNodes) {
-                                       my @a;
-                                       for my $c (@{$child->childNodes}){
-                                               push @a, $c->textContent;
-                                       }
-                                       push(@child_text, join(' ', @a));
-
-                               } else {
-                                       push(@child_text, $child->textContent); 
-                               }
-
-                       }
-                       if(@child_text) {
-                               push(@string, \@child_text);
-                       }
-
-                       if( !@child_text  ) {
-                               push(@string, $value->textContent );
-                       }
-               }
-       } otherwise {
-               $logger->info("MODS-izing failure: ".shift());
-               $logger->info("Failed MODS xml: ".$root->toString);
-               $logger->info("Failed MODS xpath: $xpath");
-       };
-       return @string;
-}
-
-=head
-sub _modsdoc_to_values {
-       my( $self, $mods ) = @_;
-       my $data = {};
-       for my $class (keys %$xpathset) {
-               $data->{$class} = {};
-               for my $type(keys %{$xpathset->{$class}}) {
-                       my @value = $self->get_field_value( $mods, $xpathset->{$class}->{$type} );
-                       if( $class eq "subject" ) {
-                               push( @{$data->{$class}->{$type}},  @value );
-                       } else {
-                               $data->{$class}->{$type} = $value[0];
-                       }
-               }
-       }
-       return $data;
-}
-=cut
-
-sub modsdoc_to_values {
-       my( $self, $mods ) = @_;
-       my $data = {};
-
-       {
-               my $class = "subject";
-               $data->{$class} = {};
-               for my $type(keys %{$xpathset->{$class}}) {
-                       my @value = $self->get_field_value( $mods, $xpathset->{$class}->{$type} );
-                       for my $arr (@value) {
-                               push( @{$data->{$class}->{$type}},  $arr);
-                       }
-               }
-       }
-
-       {
-               my $class = "title";
-               $data->{$class} = {};
-               for my $type(keys %{$xpathset->{$class}}) {
-                       my @value = $self->get_field_value( $mods, $xpathset->{$class}->{$type} );
-                       for my $arr (@value) {
-                               if( ref($arr) ) {
-                                       $data->{$class}->{$type} = shift @$arr;
-
-                                       my $t = lc($data->{$class}->{$type});
-                                       if($t and $t =~ /^l[eoa]s|l[ae]|el|the|un[ae]?|an?\s?$/o ) {
-                                               my $val = shift @$arr || "";
-                                               $data->{$class}->{$type} .= " $val" if $data->{$class}->{$type};
-                                               $data->{$class}->{$type} = " $val" unless $data->{$class}->{$type};
-                                       }
-
-                                       for my $t (@$arr) {
-                                               $data->{$class}->{$type} .= ' : ' if ($data->{$class}->{$type} =~ /\w\s*$/o);
-                                               $data->{$class}->{$type} .= " $t";
-                                       }
-                               } else {
-                                       $data->{$class}->{$type} = $arr;
-                               }
-                       }
-                       $data->{$class}->{$type} =~ s/\s+/ /go if ($data->{$class}->{$type});
-               }
-       }
-
-       {
-               my $class = "author";
-               $data->{$class} = {};
-               for my $type(keys %{$xpathset->{$class}}) {
-                       my @value = $self->get_field_value( $mods, $xpathset->{$class}->{$type} );
-                       $data->{$class}->{$type} = $value[0];
-               }
-       }
-
-       {
-               my $class = "series";
-               $data->{$class} = {};
-               for my $type(keys %{$xpathset->{$class}}) {
-                       my @value = $self->get_field_value( $mods, $xpathset->{$class}->{$type} );
-                       for my $arr (@value) {
-                               if( ref($arr) ) {
-                                       push(@{$data->{$class}->{$type}}, join(" ", @$arr));
-                               } else {
-                                       push( @{$data->{$class}->{$type}}, $arr );
-                               }
-                       }
-               }
-
-       }
-
-       return $data;
-}
-
-
-
-
-# ---------------------------------------------------------------------------
-# Grabs the data 'we want' from the MODS doc and returns it in hash form
-# ---------------------------------------------------------------------------
-sub mods_values_to_mods_slim {
-       my( $self, $modsperl ) = @_;
-
-       my $title = "";
-       my $author = "";
-       my $subject = [];
-       my $series      = [];
-
-       my $tmp = $modsperl->{title};
-
-
-       if(!$tmp) { $title = ""; }
-       else {
-               ($title = $tmp->{proper}) ||
-               ($title = $tmp->{translated}) ||
-               ($title = $tmp->{abbreviated}) ||
-               ($title = $tmp->{uniform}) ||
-               ($title = $tmp->{any});
-       }
-
-       $tmp = $modsperl->{author};
-       if(!$tmp) { $author = ""; }
-       else {
-               ($author = $tmp->{personal}) ||
-               ($author = $tmp->{corporate}) ||
-               ($author = $tmp->{conference}) ||
-               ($author = $tmp->{other}) ||
-               ($author = $tmp->{any}); 
-       }
-
-       $tmp = $modsperl->{subject};
-       if(!$tmp) { $subject = {}; } 
-       else {
-               for my $key( keys %{$tmp}) {
-                       push(@$subject, @{$tmp->{$key}}) if ($tmp->{$key});
-               }
-               my $subh = {};
-               for my $s (@$subject) {
-                       if(defined($subh->{$s})) { $subh->{$s->[0]}++ } else { $subh->{$s->[0]} = 1;}
-               }
-               $subject = $subh
-       }
-
-       $tmp = $modsperl->{'series'};
-       if(!$tmp) { $series = []; }
-       else { $series = $tmp->{'series'}; }
-
-
-       return { series => $series, title => $title, 
-                       author => $author, subject => $subject };
-}
-
-
-
-# ---------------------------------------------------------------------------
-# Initializes a MARC -> Unified MODS batch process
-# ---------------------------------------------------------------------------
-
-sub start_mods_batch {
-
-       my( $self, $master_doc ) = @_;
-
-       if(!$master_doc) {
-               $self->{master_doc} = undef;
-               return;
-       }
-
-       if(!$mods_sheet) {
-                my $xslt_doc = $parser->parse_file(
-                       OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS33.xsl");
-               $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
-       }
-
-
-       my $xmldoc = $parser->parse_string($master_doc);
-       my $mods = $mods_sheet->transform($xmldoc);
-
-       $self->{master_doc} = $self->modsdoc_to_values( $mods );
-       $self->{master_doc} = $self->mods_values_to_mods_slim( $self->{master_doc} );
-
-       ($self->{master_doc}->{isbn}) = 
-               $self->get_field_value( $mods, $isbn_xpath );
-
-       $self->{master_doc}->{type_of_resource} = 
-               [ $self->get_field_value( $mods, $resource_xpath ) ];
-
-       ($self->{master_doc}->{tcn}) = 
-               $self->get_field_value( $mods, $tcn_xpath );
-
-       ($self->{master_doc}->{pubdate}) = 
-               $self->get_field_value( $mods, $pub_xpath );
-
-       my @pub_place = $self->get_field_value( $mods, $pub_place_xpath );
-       my @publisher = $self->get_field_value( $mods, $publisher_xpath );
-
-       if (@pub_place && @publisher) {
-               ($self->{master_doc}->{publisher}) = $pub_place[0] . " : " . $publisher[0];
-       } elsif (@pub_place) {
-               ($self->{master_doc}->{publisher}) = $pub_place[0];
-       } elsif (@publisher) {
-               ($self->{master_doc}->{publisher}) = $publisher[0];
-       } else {
-               ($self->{master_doc}->{publisher}) = undef;
-       }
-
-       ($self->{master_doc}->{edition}) =
-               $self->get_field_value( $mods, $edition_xpath );
-
-       ($self->{master_doc}->{performer_notes}) = 
-               $self->get_field_value( $mods, $performers_xpath );
-
-# ------------------------------
-       # holds an array of [ link, title, link, title, ... ]
-       $self->{master_doc}->{online_loc} = [];
-       for my $url ($mods->findnodes($online_loc_xpath)) {
-               push(@{$self->{master_doc}->{online_loc}}, $url->textContent);
-               push(@{$self->{master_doc}->{online_loc}}, $url->getAttribute('displayLabel') || '');
-               push(@{$self->{master_doc}->{online_loc}}, $url->getAttribute('note') || '');
-       }
-
-       ($self->{master_doc}->{synopsis}) = 
-               $self->get_field_value( $mods, $abstract_xpath );
-
-       $self->{master_doc}->{physical_description} = [];
-       push(@{$self->{master_doc}->{physical_description}},
-               $self->get_field_value( $mods, $physical_desc ) );
-       $self->{master_doc}->{physical_description} = 
-               join( ' ', @{$self->{master_doc}->{physical_description}});
-
-       ($self->{master_doc}->{toc}) = $self->get_field_value($mods, $toc_xpath);
-
-       ($self->{master_doc}->{extent}) = 
-               $self->get_field_value($mods, $extent_xpath);
-
-}
-
-
-
-# ---------------------------------------------------------------------------
-# Takes a MARCXML string and adds it to the growing MODS doc
-# ---------------------------------------------------------------------------
-sub push_mods_batch {
-       my( $self, $marcxml ) = @_;
-
-       my $xmldoc = $parser->parse_string($marcxml);
-       my $mods = $mods_sheet->transform($xmldoc);
-
-       my $xmlperl = $self->modsdoc_to_values( $mods );
-       $xmlperl = $self->mods_values_to_mods_slim( $xmlperl );
-
-       # for backwards compatibility, remove the array part when all is decided
-       if(ref($xmlperl->{subject}) eq 'ARRAY' ) {
-               for my $subject( @{$xmlperl->{subject}} ) {
-                       push @{$self->{master_doc}->{subject}}, $subject;
-               }
-       } else {
-               for my $subject ( keys %{$xmlperl->{subject}} ) {
-                       my $s = $self->{master_doc}->{subject};
-                       if(defined($s->{$subject})) { $s->{$subject}++; } else { $s->{$subject} = 1; }
-               }
-       }
-
-       push( @{$self->{master_doc}->{type_of_resource}}, 
-               $self->get_field_value( $mods, $resource_xpath ));
-
-       if(!($self->{master_doc}->{isbn}) ) {
-               ($self->{master_doc}->{isbn}) = 
-                       $self->get_field_value( $mods, $isbn_xpath );
-       }
-}
-
-
-# ---------------------------------------------------------------------------
-# Completes a MARC -> Unified MODS batch process and returns the perl hash
-# ---------------------------------------------------------------------------
-sub init_virtual_record {
-       my $record = Fieldmapper::metabib::virtual_record->new;
-       $record->subject([]);
-       $record->types_of_resource([]);
-       $record->call_numbers([]);
-       return $record;
-}
-
-sub finish_mods_batch {
-       my $self = shift;
-
-       return undef unless $self->{master_doc};
-
-       my $perl = $self->{master_doc};
-       my $record = init_virtual_record();
-
-       # turn the hash into a fieldmapper object
-       #(my $title = $perl->{title}) =~ s/\[.*?\]//og;
-       #(my $author = $perl->{author}) =~ s/\(.*?\)//og;
-       my $title = $perl->{title};
-       my $author = $perl->{author};
-
-       my @series;
-       for my $s (@{$perl->{series}}) {
-               push @series, (split( /\s*;/, $s ))[0];
-       }
-
-       # uniquify the types of resource
-       my $rtypes = $perl->{type_of_resource};
-       my %hash = map { ($_ => 1) } @$rtypes;
-       $rtypes = [ keys %hash ];
-
-       $record->title($title);
-       $record->author($author);
-
-       $record->doc_id($perl->{doc_id});
-       $record->isbn($perl->{isbn});
-       $record->pubdate($perl->{pubdate});
-       $record->publisher($perl->{publisher});
-       $record->tcn($perl->{tcn});
-
-       $record->edition($perl->{edition});
-
-       $record->subject($perl->{subject});
-       $record->types_of_resource($rtypes);
-       $record->series(\@series);
-
-       $record->online_loc($perl->{online_loc});
-       $record->synopsis($perl->{synopsis});
-       $record->physical_description($perl->{physical_description});
-       $record->toc($perl->{toc});
-       $record->performer_notes($perl->{performer_notes});
-       $record->extent($perl->{extent});
-
-       $self->{master_doc} = undef;
-       return $record;
-}
-
-
-