From faed9bcdeb4d89b38df644dcf549a1f9e981634c Mon Sep 17 00:00:00 2001 From: Dan Scott Date: Mon, 18 Apr 2016 14:37:36 -0400 Subject: [PATCH] Fold separate NOSME and NOSMW URIs to just OSM Signed-off-by: Dan Scott --- .../src/sql/Pg/update_marc_records_in_database.sql | 62 ++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/Open-ILS/src/sql/Pg/update_marc_records_in_database.sql b/Open-ILS/src/sql/Pg/update_marc_records_in_database.sql index 3e116cfff4..f02372abfa 100644 --- a/Open-ILS/src/sql/Pg/update_marc_records_in_database.sql +++ b/Open-ILS/src/sql/Pg/update_marc_records_in_database.sql @@ -278,3 +278,65 @@ EXCEPTION END; $func$ LANGUAGE PLPGSQL; +CREATE OR REPLACE FUNCTION conifer.squash_to_osm(record BIGINT) RETURNS TEXT AS $func$ +use strict; +use MARC::Record; +use MARC::File::XML (BinaryEncoding => 'UTF-8'); +use MARC::Charset; +use Encode; +use Unicode::Normalize; +use LWP::UserAgent; +use JSON::XS; +use Digest::MD5; + +MARC::Charset->assume_unicode(1); + +my $md5 = Digest::MD5->new; +my $json = new JSON::XS; +$json->latin1(1); + +my $q = spi_prepare('SELECT marc FROM biblio.record_entry WHERE id = $1', 'BIGINT'); +my $marc = spi_exec_prepared($q, $_[0])->{rows}->[0]->{marc}; +my $record = MARC::Record->new_from_xml($marc); + +my @eights = $record->field('856'); +my $saw_nosme = 0; +foreach my $ocho (@eights) { + my @ous = $ocho->subfield('9'); + foreach my $ou (sort @ous) { + if ($ou eq 'NOSME') { + $ocho->update('9' => 'OSM'); + $saw_nosme = 1; + } + } +} +foreach my $ocho (@eights) { + my @ous = $ocho->subfield('9'); + foreach my $ou (sort @ous) { + if ($ou eq 'NOSMW' && $saw_nosme) { + $record->delete_field($ocho); + } + } +} + +my $xml = $record->as_xml_record(); +$xml =~ s/\n//sgo; +$xml =~ s/^<\?xml.+\?\s*>//go; +$xml =~ s/>\s+entityize() +# to avoid having to set PERL5LIB for PostgreSQL as well + +$xml = NFC($xml); + +# Convert raw ampersands to entities +$xml =~ s/&(?!\S+;)/&/gso; + +# Convert Unicode characters to entities +$xml =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe; + +$xml =~ s/[\x00-\x1f]//go; + +return $xml; +$func$ LANGUAGE PLPERLU; -- 2.11.0