From fb527c4e1cfae4c96d4aa2bd880343f66b329dfa Mon Sep 17 00:00:00 2001 From: dbs Date: Sat, 20 Nov 2010 19:47:54 +0000 Subject: [PATCH] Address maintain_control_numbers() database function bug #677160 Jason Stephenson reported a bug handling records with multiple 001 or 003 fields, and supplied a set of test records to reproduce the condition. The bug caused the ingest process to throw a database error, rolling back the transaction and preventing the actual ingest of those records. The solution was to simplify the logic in maintain_control_numbers(). Now, in the case that there are either multiple 001s or 003s in the incoming record, we simply delete all of the 003s and 001s and create the desired 001 and 003. Also, if there are not exactly one 001 and one 003 in the incoming record, we do not try to preserve one of those values in the 035 as it would be close to meaningless. Many thanks to Jason for the clear bug report and test cases! git-svn-id: svn://svn.open-ils.org/ILS/trunk@18809 dcc99617-32d9-48b4-a31d-7c20da2025e4 --- Open-ILS/src/sql/Pg/002.functions.config.sql | 25 ++--- Open-ILS/src/sql/Pg/002.schema.config.sql | 2 +- .../0465.function.maintain_control_numbers.sql | 121 +++++++++++++++++++++ 3 files changed, 131 insertions(+), 17 deletions(-) create mode 100644 Open-ILS/src/sql/Pg/upgrade/0465.function.maintain_control_numbers.sql diff --git a/Open-ILS/src/sql/Pg/002.functions.config.sql b/Open-ILS/src/sql/Pg/002.functions.config.sql index 818348c5e6..f54607838c 100644 --- a/Open-ILS/src/sql/Pg/002.functions.config.sql +++ b/Open-ILS/src/sql/Pg/002.functions.config.sql @@ -513,8 +513,6 @@ if ($ous_rv->{processed}) { my ($create, $munge) = (0, 0); -# Incoming MARC records may have multiple 001s or 003s, despite the spec -my @control_ids = $record->field('003'); my @scns = $record->field('035'); foreach my $id_field ('001', '003') { @@ -528,32 +526,27 @@ foreach my $id_field ('001', '003') { } # Create the 001/003 if none exist - if (scalar(@controls) == 0) { - $record->insert_fields_ordered(MARC::Field->new($id_field, $spec_value)); - $create = 1; - } elsif (scalar(@controls) > 1) { - # Do we already have the right 001/003 value in the existing set? + if (scalar(@controls) == 1) { + # Only one field; check to see if we need to munge it unless (grep $_->data() eq $spec_value, @controls) { $munge = 1; } - + } else { # Delete the other fields, as with more than 1 001/003 we do not know which 003/001 to match foreach my $control (@controls) { unless ($control->data() eq $spec_value) { $record->delete_field($control); } } - } else { - # Only one field; check to see if we need to munge it - unless (grep $_->data() eq $spec_value, @controls) { - $munge = 1; - } + $record->insert_fields_ordered(MARC::Field->new($id_field, $spec_value)); + $create = 1; } } -# Now, if we need to munge the 001, we will first push the existing 001/003 into the 035; -# but if the record did not have a 003 to begin with, skip this process -if ($munge && scalar(@control_ids) > 0) { +# Now, if we need to munge the 001, we will first push the existing 001/003 +# into the 035; but if the record did not have one (and one only) 001 and 003 +# to begin with, skip this process +if ($munge and not $create) { my $scn = "(" . $record->field('003')->data() . ")" . $record->field('001')->data(); # Do not create duplicate 035 fields diff --git a/Open-ILS/src/sql/Pg/002.schema.config.sql b/Open-ILS/src/sql/Pg/002.schema.config.sql index 61f20be135..d64150cfca 100644 --- a/Open-ILS/src/sql/Pg/002.schema.config.sql +++ b/Open-ILS/src/sql/Pg/002.schema.config.sql @@ -70,7 +70,7 @@ CREATE TABLE config.upgrade_log ( install_date TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW() ); -INSERT INTO config.upgrade_log (version) VALUES ('0464'); -- dbs +INSERT INTO config.upgrade_log (version) VALUES ('0465'); -- dbs CREATE TABLE config.bib_source ( id SERIAL PRIMARY KEY, diff --git a/Open-ILS/src/sql/Pg/upgrade/0465.function.maintain_control_numbers.sql b/Open-ILS/src/sql/Pg/upgrade/0465.function.maintain_control_numbers.sql new file mode 100644 index 0000000000..ed71a054e7 --- /dev/null +++ b/Open-ILS/src/sql/Pg/upgrade/0465.function.maintain_control_numbers.sql @@ -0,0 +1,121 @@ +BEGIN; + +INSERT INTO config.upgrade_log (version) VALUES ('0465'); -- dbs + +CREATE OR REPLACE FUNCTION maintain_control_numbers() RETURNS TRIGGER AS $func$ +use strict; +use MARC::Record; +use MARC::File::XML (BinaryEncoding => 'UTF-8'); +use Encode; +use Unicode::Normalize; + +my $record = MARC::Record->new_from_xml($_TD->{new}{marc}); +my $schema = $_TD->{table_schema}; +my $rec_id = $_TD->{new}{id}; + +# Short-circuit if maintaining control numbers per MARC21 spec is not enabled +my $enable = spi_exec_query("SELECT enabled FROM config.global_flag WHERE name = 'cat.maintain_control_numbers'"); +if (!($enable->{processed}) or $enable->{rows}[0]->{enabled} eq 'f') { + return; +} + +# Get the control number identifier from an OU setting based on $_TD->{new}{owner} +my $ou_cni = 'EVRGRN'; + +my $owner; +if ($schema eq 'serial') { + $owner = $_TD->{new}{owning_lib}; +} else { + # are.owner and bre.owner can be null, so fall back to the consortial setting + $owner = $_TD->{new}{owner} || 1; +} + +my $ous_rv = spi_exec_query("SELECT value FROM actor.org_unit_ancestor_setting('cat.marc_control_number_identifier', $owner)"); +if ($ous_rv->{processed}) { + $ou_cni = $ous_rv->{rows}[0]->{value}; + $ou_cni =~ s/"//g; # Stupid VIM syntax highlighting" +} else { + # Fall back to the shortname of the OU if there was no OU setting + $ous_rv = spi_exec_query("SELECT shortname FROM actor.org_unit WHERE id = $owner"); + if ($ous_rv->{processed}) { + $ou_cni = $ous_rv->{rows}[0]->{shortname}; + } +} + +my ($create, $munge) = (0, 0); + +my @scns = $record->field('035'); + +foreach my $id_field ('001', '003') { + my $spec_value; + my @controls = $record->field($id_field); + + if ($id_field eq '001') { + $spec_value = $rec_id; + } else { + $spec_value = $ou_cni; + } + + # Create the 001/003 if none exist + if (scalar(@controls) == 1) { + # Only one field; check to see if we need to munge it + unless (grep $_->data() eq $spec_value, @controls) { + $munge = 1; + } + } else { + # Delete the other fields, as with more than 1 001/003 we do not know which 003/001 to match + foreach my $control (@controls) { + unless ($control->data() eq $spec_value) { + $record->delete_field($control); + } + } + $record->insert_fields_ordered(MARC::Field->new($id_field, $spec_value)); + $create = 1; + } +} + +# Now, if we need to munge the 001, we will first push the existing 001/003 +# into the 035; but if the record did not have one (and one only) 001 and 003 +# to begin with, skip this process +if ($munge and not $create) { + my $scn = "(" . $record->field('003')->data() . ")" . $record->field('001')->data(); + + # Do not create duplicate 035 fields + unless (grep $_->subfield('a') eq $scn, @scns) { + $record->insert_fields_ordered(MARC::Field->new('035', '', '', 'a' => $scn)); + } +} + +# Set the 001/003 and update the MARC +if ($create or $munge) { + $record->field('001')->data($rec_id); + $record->field('003')->data($ou_cni); + + my $xml = $record->as_xml_record(); + $xml =~ s/\n//sgo; + $xml =~ s/^<\?xml.+\?\s*>//go; + $xml =~ s/>\s+entityize() + # to avoid having to set PERL5LIB for PostgreSQL as well + + # If we are going to convert non-ASCII characters to XML entities, + # we had better be dealing with a UTF8 string to begin with + $xml = decode_utf8($xml); + + $xml = NFC($xml); + + # Convert raw ampersands to entities + $xml =~ s/&(?!\S+;)/&/gso; + + # Convert Unicode characters to entities + $xml =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe; + + $xml =~ s/[\x00-\x1f]//go; + $_TD->{new}{marc} = $xml; + + return "MODIFY"; +} + +COMMIT; -- 2.11.0