From: Dan Scott <dscott@laurentian.ca>
Date: Thu, 13 Nov 2014 20:44:48 +0000 (-0500)
Subject: Updating MARC records using a database function
X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=3aa6f05bff0958b80d72cb9331ae7d2948c9c00b;p=contrib%2FConifer.git

Updating MARC records using a database function

There are often debates around the best way to run a batch update of
records in an Evergreen database. My preferred way is to do it using
a PostgreSQL function so that you can simply invoke it as part of a
typical UPDATE statement (including getting as complex as you like
with the surrounding SELECT statements, keeping track of updates in
a separate table, etc).

This example does the following:

* updates 856 $9 subfields that currently have 'OSUL' as a value
  to 'LUSYS'
* deletes any 856 fields that have a $9 value of 'WINDSYS'
* deletes any 506 fields that have a $9 value of 'OWA'

** NOTE: $9 is a repeatable subfield, so strictly speaking you should
   iterate through all of the matching subfields before deleting or
   blindly updating the value... but in our system, we have followed
   the convention of a single 856 per URL and $9, so we're safe. Your
   mileage may vary.

With this function, you can test your work by doing a simple

SELECT conifer.osul_to_lusys(<record-id>);

statement, then actually issue the update using something like:

UPDATE biblio.record_entry
  SET marc = conifer.osul_to_lusys(2683915)
  WHERE id = 2683915;

Signed-off-by: Dan Scott <dscott@laurentian.ca>
---

diff --git a/Open-ILS/src/sql/Pg/update_marc_records_in_database.sql b/Open-ILS/src/sql/Pg/update_marc_records_in_database.sql
new file mode 100644
index 0000000000..2029470bfb
--- /dev/null
+++ b/Open-ILS/src/sql/Pg/update_marc_records_in_database.sql
@@ -0,0 +1,59 @@
+CREATE OR REPLACE FUNCTION conifer.osul_to_lusys(record BIGINT) RETURNS TEXT AS $func$
+use strict;
+use MARC::Record;
+use MARC::File::XML (BinaryEncoding => 'UTF-8');
+use MARC::Charset;
+use Encode;
+use Unicode::Normalize;
+
+MARC::Charset->assume_unicode(1);
+
+my $q = spi_prepare('SELECT marc FROM biblio.record_entry WHERE id = $1', 'BIGINT');
+my $marc = spi_exec_prepared($q, $_[0])->{rows}->[0]->{marc};
+
+my $record = MARC::Record->new_from_xml($marc);
+
+my @eights = $record->field('856');
+foreach my $ocho (@eights) {
+    my @ous = $ocho->subfield('9');
+    foreach my $ou (@ous) {
+        if ($ou eq 'WINDSYS') {
+            $record->delete_field($ocho);
+        }
+        if ($ou eq 'OSUL') {
+            $ocho->update('9' => 'LUSYS');
+        }
+    }
+}
+
+my @access = $record->field('506');
+foreach my $note (@access) {
+    my @ous = $note->subfield('9');
+    foreach my $ou (@ous) {
+        if ($ou eq 'OWA') {
+            $record->delete_field($note);
+        }
+    }
+}
+
+my $xml = $record->as_xml_record();
+$xml =~ s/\n//sgo;
+$xml =~ s/^<\?xml.+\?\s*>//go;
+$xml =~ s/>\s+</></go;
+$xml =~ s/\p{Cc}//go;
+
+# Embed a version of OpenILS::Application::AppUtils->entityize()
+# to avoid having to set PERL5LIB for PostgreSQL as well
+
+$xml = NFC($xml);
+
+# Convert raw ampersands to entities
+$xml =~ s/&(?!\S+;)/&amp;/gso;
+
+# Convert Unicode characters to entities
+$xml =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
+
+$xml =~ s/[\x00-\x1f]//go;
+
+return $xml;
+$func$ LANGUAGE PLPERLU;