Add located URI update script (PGSQL function)
authorDan Scott <dscott@laurentian.ca>
Tue, 10 Jan 2012 18:25:38 +0000 (13:25 -0500)
committerDan Scott <dscott@laurentian.ca>
Tue, 7 May 2013 18:57:23 +0000 (14:57 -0400)
Given MARCXML, modifies the 856 $9 subfields to map to owning systems
rather than individual branches to address the located URI visibility
change in 2.1.

Signed-off-by: Dan Scott <dscott@laurentian.ca>
tools/migration-scripts/located_uris_2.0_2.1.sql [new file with mode: 0644]

diff --git a/tools/migration-scripts/located_uris_2.0_2.1.sql b/tools/migration-scripts/located_uris_2.0_2.1.sql
new file mode 100644 (file)
index 0000000..d62b159
--- /dev/null
@@ -0,0 +1,101 @@
+CREATE OR REPLACE FUNCTION scratchpad.uri_lib2sys( TEXT ) RETURNS TEXT AS $func$
+use strict;
+use MARC::Record;
+use MARC::File::XML (BinaryEncoding => 'UTF-8');
+use MARC::Charset;
+use Encode;
+use Unicode::Normalize;
+
+MARC::Charset->assume_unicode(1);
+
+my $marc = shift;
+
+my $lib2sys = {
+    '0SUL' => 'LUSYS',
+    '0WA' => 'WINDSYS',
+    CAOWA => 'WINDSYS',
+    HRSRH => 'HRSRH',
+    IWA => 'WINDSYS',
+#    LINK => '???',
+#    LUSYS
+    NOSME => 'OSM',
+    NOSMW => 'OSM',
+    OOSM => 'OSM',
+    OSBO => 'BOREALSYS',
+#    OSM
+    OSTMA => 'ALGOMASYS',
+    OSU => 'LUSYS',
+    OSUL => 'LUSYS',
+    OUSL => 'LUSYS',
+    OW => 'WINDSYS',
+    OWA => 'WINDSYS',
+#    OWAL
+    OWS => 'WINDSYS',
+    OWW => 'WINDSYS',
+#    SUDBURY
+    UWINDSYS => 'WINDSYS',
+#    WINDSYS
+    WINDSYSS => 'WINDSYS',
+    WINSYS => 'WINDSYS',
+};
+
+my $record;
+eval {
+    $record = MARC::Record->new_from_xml($marc);
+};
+
+foreach my $luri ($record->field('856')) {
+    my @owning_libs = $luri->subfield('9');
+    if (scalar(@owning_libs)) {
+        my @subs;
+        foreach my $sf ($luri->subfields()) {
+            my $code = $sf->[0];
+            my $data = $sf->[1];
+            if ($code eq 'u' and $data =~ /site.ebrary.com/) {
+                $data =~ s{https?://ezproxy.uwindsor.ca/login\?url=http://site.ebrary.com}{http://site.ebrary.com};
+                $data =~ s{https?://librweb.laurentian.ca/login\?url=http://site.ebrary.com}{http://site.ebrary.com};
+                $data =~ s{https?://libproxy.auc.ca/login\?url=http://site.ebrary.com}{http://site.ebrary.com};
+            }
+
+            if ($code ne '9' or not exists($lib2sys->{$data})) {
+                push @subs, $code, $data;
+            } else {
+               push @subs, $code, $lib2sys->{$data};
+            }
+        }
+        my $new_field = MARC::Field->new('856', '4', '0', @subs);
+        $luri->replace_with($new_field);
+    }
+}
+
+my $xml = $record->as_xml_record();
+$xml =~ s/\n//sgo;
+$xml =~ s/^<\?xml.+\?\s*>//go;
+$xml =~ s/>\s+</></go;
+$xml =~ s/\p{Cc}//go;
+
+return entityize($xml);
+
+sub entityize { 
+    my($string, $form) = @_;
+    $form ||= "";
+
+# If we're going to convert non-ASCII characters to XML entities,
+# we had better be dealing with a UTF8 string to begin with
+    $string = decode_utf8($string);
+
+    if ($form eq 'D') {
+        $string = NFD($string);
+    } else {
+        $string = NFC($string);
+    }
+
+# Convert raw ampersands to entities
+    $string =~ s/&(?!\S+;)/&amp;/gso;
+
+# Convert Unicode characters to entities
+    $string =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
+
+    return $string;
+}
+$func$ LANGUAGE 'plperlu' IMMUTABLE;