From: Dan Scott Date: Thu, 13 Nov 2014 20:44:48 +0000 (-0500) Subject: Updating MARC records using a database function X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=3aa6f05bff0958b80d72cb9331ae7d2948c9c00b;p=contrib%2FConifer.git Updating MARC records using a database function There are often debates around the best way to run a batch update of records in an Evergreen database. My preferred way is to do it using a PostgreSQL function so that you can simply invoke it as part of a typical UPDATE statement (including getting as complex as you like with the surrounding SELECT statements, keeping track of updates in a separate table, etc). This example does the following: * updates 856 $9 subfields that currently have 'OSUL' as a value to 'LUSYS' * deletes any 856 fields that have a $9 value of 'WINDSYS' * deletes any 506 fields that have a $9 value of 'OWA' ** NOTE: $9 is a repeatable subfield, so strictly speaking you should iterate through all of the matching subfields before deleting or blindly updating the value... but in our system, we have followed the convention of a single 856 per URL and $9, so we're safe. Your mileage may vary. With this function, you can test your work by doing a simple SELECT conifer.osul_to_lusys(); statement, then actually issue the update using something like: UPDATE biblio.record_entry SET marc = conifer.osul_to_lusys(2683915) WHERE id = 2683915; Signed-off-by: Dan Scott --- diff --git a/Open-ILS/src/sql/Pg/update_marc_records_in_database.sql b/Open-ILS/src/sql/Pg/update_marc_records_in_database.sql new file mode 100644 index 0000000000..2029470bfb --- /dev/null +++ b/Open-ILS/src/sql/Pg/update_marc_records_in_database.sql @@ -0,0 +1,59 @@ +CREATE OR REPLACE FUNCTION conifer.osul_to_lusys(record BIGINT) RETURNS TEXT AS $func$ +use strict; +use MARC::Record; +use MARC::File::XML (BinaryEncoding => 'UTF-8'); +use MARC::Charset; +use Encode; +use Unicode::Normalize; + +MARC::Charset->assume_unicode(1); + +my $q = spi_prepare('SELECT marc FROM biblio.record_entry WHERE id = $1', 'BIGINT'); +my $marc = spi_exec_prepared($q, $_[0])->{rows}->[0]->{marc}; + +my $record = MARC::Record->new_from_xml($marc); + +my @eights = $record->field('856'); +foreach my $ocho (@eights) { + my @ous = $ocho->subfield('9'); + foreach my $ou (@ous) { + if ($ou eq 'WINDSYS') { + $record->delete_field($ocho); + } + if ($ou eq 'OSUL') { + $ocho->update('9' => 'LUSYS'); + } + } +} + +my @access = $record->field('506'); +foreach my $note (@access) { + my @ous = $note->subfield('9'); + foreach my $ou (@ous) { + if ($ou eq 'OWA') { + $record->delete_field($note); + } + } +} + +my $xml = $record->as_xml_record(); +$xml =~ s/\n//sgo; +$xml =~ s/^<\?xml.+\?\s*>//go; +$xml =~ s/>\s+entityize() +# to avoid having to set PERL5LIB for PostgreSQL as well + +$xml = NFC($xml); + +# Convert raw ampersands to entities +$xml =~ s/&(?!\S+;)/&/gso; + +# Convert Unicode characters to entities +$xml =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe; + +$xml =~ s/[\x00-\x1f]//go; + +return $xml; +$func$ LANGUAGE PLPERLU;