From 8d3d18afc661dfa534dffd68efa208484cff93c3 Mon Sep 17 00:00:00 2001 From: Dan Scott Date: Thu, 18 Dec 2014 01:08:22 -0500 Subject: [PATCH] Linked data enrichment Signed-off-by: Dan Scott --- .../src/sql/Pg/update_marc_records_in_database.sql | 124 ++++++++++++++++++++- 1 file changed, 122 insertions(+), 2 deletions(-) diff --git a/Open-ILS/src/sql/Pg/update_marc_records_in_database.sql b/Open-ILS/src/sql/Pg/update_marc_records_in_database.sql index 2029470bfb..c838335928 100644 --- a/Open-ILS/src/sql/Pg/update_marc_records_in_database.sql +++ b/Open-ILS/src/sql/Pg/update_marc_records_in_database.sql @@ -1,10 +1,12 @@ -CREATE OR REPLACE FUNCTION conifer.osul_to_lusys(record BIGINT) RETURNS TEXT AS $func$ +CREATE OR REPLACE FUNCTION conifer.fix_https_uris(record BIGINT) RETURNS TEXT AS $func$ use strict; use MARC::Record; use MARC::File::XML (BinaryEncoding => 'UTF-8'); use MARC::Charset; use Encode; use Unicode::Normalize; +use LWP::UserAgent; +use JSON::XS; MARC::Charset->assume_unicode(1); @@ -17,15 +19,32 @@ my @eights = $record->field('856'); foreach my $ocho (@eights) { my @ous = $ocho->subfield('9'); foreach my $ou (@ous) { - if ($ou eq 'WINDSYS') { + if ($ou eq 'WINDSYS' or $ou eq 'OWA') { $record->delete_field($ocho); } if ($ou eq 'OSUL') { $ocho->update('9' => 'LUSYS'); } } + + # Fix URIs that raise SSL cert issues + my $uri = $ocho->subfield('u'); + if ($uri =~ m#^https://librweb#) { + $uri =~ s{^https://librweb}{http://librweb}; + $ocho->update('u' => $uri); + } + + # Provide an indication of the platform + if ($uri =~ m#scholarsportal#) { + $ocho->update('y' => 'Available online / disponible en ligne (ScholarsPortal)'); + } elsif ($uri =~ m#myilibrary#) { + $ocho->update('y' => 'Available online / disponible en ligne (MyiLibrary)'); + } elsif ($uri =~ m#sagepub#) { + $ocho->update('y' => 'Available online / disponible en ligne (Sage)'); + } } +# Do not need access notes anymore my @access = $record->field('506'); foreach my $note (@access) { my @ous = $note->subfield('9'); @@ -36,6 +55,101 @@ foreach my $note (@access) { } } +# Update provenance of the records +my @provs = $record->field('040'); +foreach my $prov (@provs) { + $prov->delete_subfield(code => 'd', match => qr/CaOWA/); + my @subfields = $prov->subfield('d'); + my $found = 0; + foreach my $subfield (@subfields) { + if ($subfield eq 'CaOSUL') { + $found = 1; + } + } + if (!$found) { + $prov->add_subfields('d' => 'CaOSUL'); + } +} + +# Enrich the record +my $ua = LWP::UserAgent->new; +$ua->timeout(10); +my $lccn_number; +my $oclc_number; +my $owi_number; + +# 010 = LCCN (NR) +my $lccn = $record->field('010'); +if ($lccn) { + $lccn_number = $lccn->subfield('a'); +} +if ($lccn_number) { + # trim the whitespace + $lccn_number =~ s{^\s*(\S+)\s*$}{$1}; + $lccn->update('a', $lccn_number); + my $response = $ua->get("http://xisbn.worldcat.org/webservices/xid/lccn/$lccn_number?method=getMetadata&format=json&fl=*"); + my $metadata = decode_json($response->decoded_content); + $oclc_number = $metadata->{'list'}->[0]->{'oclcnum'}->[0]; + $owi_number = $metadata->{'list'}->[0]->{'owi'}->[0]; +} else { + my @isbntags = $record->field('020'); + foreach my $isbntag (@isbntags) { + my $rawisbn = $isbntag->subfield('a'); + next unless $rawisbn; + $rawisbn =~ s{^\s*(\S+)\s*$}{$1}; + my ($isbn, $rest) = split(/ /, $rawisbn, 2); + if ($rawisbn ne $isbn) { + $isbntag->update('a', $isbn); + } + if ($rest and $isbntag->subfield('q') ne $rest) { + $isbntag->update('q', $rest); + } + my $response = $ua->get("http://xisbn.worldcat.org/webservices/xid/isbn/$isbn?method=getMetadata&format=json&fl=*"); + my $metadata = decode_json($response->decoded_content); + if (exists $metadata->{'list'}->[0]->{'lccn'} and !$lccn_number) { + $lccn_number = $metadata->{'list'}->[0]->{'lccn'}->[0]; + my $nf = MARC::Field->new('010', '', '', 'a' => "$lccn_number"); + $record->insert_grouped_field($nf); + my $response = $ua->get("http://xisbn.worldcat.org/webservices/xid/lccn/$lccn_number?method=getMetadata&format=json&fl=*"); + my $metadata = decode_json($response->decoded_content); + $owi_number = $metadata->{'list'}->[0]->{'owi'}->[0]; + } + if (exists $metadata->{'list'}->[0]->{'oclcnum'} and !$oclc_number) { + $oclc_number = $metadata->{'list'}->[0]->{'oclcnum'}->[0]; + } + } +} + +my $found_oclcnum = 0; +my $found_owinumber = 0; + +if ($owi_number) { + $owi_number =~ s{^owi}{}; + my @idtags = $record->field('024'); + foreach my $idtag (@idtags) { + if ($idtag->indicator(1) eq '7' and $idtag->subfield('a') eq "http://worldcat.org/entity/work/id/$owi_number" and $idtag->subfield('2') eq 'uri') { + $found_owinumber = 1; + } + } + if (!$found_owinumber) { + my $nf = MARC::Field->new('024', '7', '', 'a' => "http://worldcat.org/entity/work/id/$owi_number", '2' => 'uri'); + $record->insert_grouped_field($nf); + } +} + +if ($oclc_number) { + my @idtags = $record->field('035'); + foreach my $idtag (@idtags) { + if ($idtag->subfield('a') =~ m#^(OCoLC)$oclc_number#) { + $found_oclcnum = 1; + } + } + if (!$found_oclcnum) { + my $nf = MARC::Field->new('035', '', '', 'a' => "(OCoLC)$oclc_number"); + $record->insert_grouped_field($nf); + } +} + my $xml = $record->as_xml_record(); $xml =~ s/\n//sgo; $xml =~ s/^<\?xml.+\?\s*>//go; @@ -57,3 +171,9 @@ $xml =~ s/[\x00-\x1f]//go; return $xml; $func$ LANGUAGE PLPERLU; + +CREATE OR REPLACE FUNCTION conifer.marc_version() RETURNS TEXT AS $func$ +use MARC::Record; +return $MARC::Record::VERSION; +$func$ LANGUAGE PLPERLU; + -- 2.11.0