From: Dan Scott Date: Sat, 27 Dec 2014 21:51:32 +0000 (-0500) Subject: More example in-database MARC fixing X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=a1638d30c3acaa13d5f5c2f2b26786d2ad79ac0f;p=contrib%2FConifer.git More example in-database MARC fixing Generated the fix file with \o ecco_uris_to_fix.sql SELECT 'UPDATE biblio.record_entry SET marc = conifer.fix_ecco_uris(' || id || ') WHERE id = ' || id || ';' FROM biblio.record_entry WHERE marc ~ 'galegroup.com/ecco' AND marc ~ 'wind05901' ORDER BY id ASC; \o And then running ecco_uris_to_fix.sql Signed-off-by: Dan Scott --- diff --git a/Open-ILS/src/sql/Pg/fix_ecco_uris.sql b/Open-ILS/src/sql/Pg/fix_ecco_uris.sql new file mode 100644 index 0000000000..19fd52693f --- /dev/null +++ b/Open-ILS/src/sql/Pg/fix_ecco_uris.sql @@ -0,0 +1,112 @@ +CREATE OR REPLACE FUNCTION conifer.fix_ecco_uris(record BIGINT) RETURNS TEXT AS $func$ +use strict; +use MARC::Record; +use MARC::File::XML (BinaryEncoding => 'UTF-8'); +use MARC::Charset; +use Encode; +use Unicode::Normalize; +use LWP::UserAgent; +use JSON::XS; + +MARC::Charset->assume_unicode(1); + +my $q = spi_prepare('SELECT marc FROM biblio.record_entry WHERE id = $1', 'BIGINT'); +my $marc = spi_exec_prepared($q, $_[0])->{rows}->[0]->{marc}; + +my $record = MARC::Record->new_from_xml($marc); + +my @eights = $record->field('856'); +foreach my $ocho (@eights) { + my @ous = $ocho->subfield('9'); + foreach my $ou (@ous) { + if ($ou eq 'WINDSYS' or $ou eq 'OWA') { + $record->delete_field($ocho); + } + if ($ou eq 'OSUL') { + $ocho->update('9' => 'LUSYS'); + } + } + + # Fix URIs that raise SSL cert issues + my $uri = $ocho->subfield('u'); + my $old_uri = $uri; + if ($uri =~ m#^https://librweb#) { + $uri =~ s{^https://librweb}{http://librweb}; + } + + # Fix URIs with a space after the URL proxy parameter + $uri =~ s{url= http}{url=http}; + + # Point at LU ECCO URIs + $uri =~ s{wind05901}{subd78095}; + + if ($old_uri ne $uri) { + $ocho->update('u' => $uri); + } + + # Provide an indication of the platform + if ($uri =~ m#scholarsportal#) { + $ocho->update('y' => 'Available online / disponible en ligne (ScholarsPortal)'); + } elsif ($uri =~ m#myilibrary#) { + $ocho->update('y' => 'Available online / disponible en ligne (MyiLibrary)'); + } elsif ($uri =~ m#sagepub#) { + $ocho->update('y' => 'Available online / disponible en ligne (Sage)'); + } elsif ($uri =~ m#galegroup.com/ecco#) { + $ocho->update('y' => 'Available online / disponible en ligne (Gale ECCO)'); + } +} + +# Do not need access notes anymore +my @access = $record->field('506'); +foreach my $note (@access) { + my @ous = $note->subfield('9'); + foreach my $ou (@ous) { + if ($ou eq 'OWA') { + $record->delete_field($note); + } + } +} + +# Update provenance of the records +my @provs = $record->field('040'); +foreach my $prov (@provs) { + $prov->delete_subfield(code => 'd', match => qr/CaOWA/); + my @subfields = $prov->subfield('d'); + my $found = 0; + foreach my $subfield (@subfields) { + if ($subfield eq 'CaOSUL') { + $found = 1; + } + } + if (!$found) { + $prov->add_subfields('d' => 'CaOSUL'); + } +} + +my $xml = $record->as_xml_record(); +$xml =~ s/\n//sgo; +$xml =~ s/^<\?xml.+\?\s*>//go; +$xml =~ s/>\s+entityize() +# to avoid having to set PERL5LIB for PostgreSQL as well + +$xml = NFC($xml); + +# Convert raw ampersands to entities +$xml =~ s/&(?!\S+;)/&/gso; + +# Convert Unicode characters to entities +$xml =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe; + +$xml =~ s/[\x00-\x1f]//go; + +return $xml; +$func$ LANGUAGE PLPERLU; + +CREATE OR REPLACE FUNCTION conifer.marc_version() RETURNS TEXT AS $func$ +use MARC::Record; +return $MARC::Record::VERSION; +$func$ LANGUAGE PLPERLU; +