From: Dan Scott Date: Tue, 24 Jul 2012 18:14:01 +0000 (-0400) Subject: Switch to a PLPERLU maintain_901() trigger function X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=119034544e2934f223ec71a63f884f1eed487d6a;p=evergreen%2Fequinox.git Switch to a PLPERLU maintain_901() trigger function We've been burned by regexes that fail to grok XML properly numerous times now. Standardizing on something that actually understands MARC seems like a good idea. Signed-off-by: Dan Scott Signed-off-by: Mike Rylander --- diff --git a/Open-ILS/src/sql/Pg/002.functions.config.sql b/Open-ILS/src/sql/Pg/002.functions.config.sql index b441ad8c2b..274adc3d50 100644 --- a/Open-ILS/src/sql/Pg/002.functions.config.sql +++ b/Open-ILS/src/sql/Pg/002.functions.config.sql @@ -420,67 +420,104 @@ CREATE OR REPLACE FUNCTION oils_json_to_text( TEXT ) RETURNS TEXT AS $f$ $f$ LANGUAGE PLPERLU; CREATE OR REPLACE FUNCTION evergreen.maintain_901 () RETURNS TRIGGER AS $func$ -DECLARE - use_id_for_tcn BOOLEAN; -BEGIN - -- Remove any existing 901 fields before we insert the authoritative one - NEW.marc := REGEXP_REPLACE(NEW.marc, E']*?tag="901".+?', '', 'g'); +use strict; +use MARC::Record; +use MARC::File::XML (BinaryEncoding => 'UTF-8'); +use MARC::Charset; +use Encode; +use Unicode::Normalize; - IF TG_TABLE_SCHEMA = 'biblio' THEN - -- Set TCN value to record ID? - SELECT enabled FROM config.global_flag INTO use_id_for_tcn - WHERE name = 'cat.bib.use_id_for_tcn'; +MARC::Charset->assume_unicode(1); - IF use_id_for_tcn = 't' THEN - NEW.tcn_value := NEW.id; - END IF; +my $schema = $_TD->{table_schema}; +my $marc = MARC::Record->new_from_xml($_TD->{new}{marc}); + +my @old901s = $marc->field('901'); +$marc->delete_fields(@old901s); + +if ($schema eq 'biblio') { + my $tcn_value = $_TD->{new}{tcn_value}; + + # Set TCN value to record ID? + my $id_as_tcn = spi_exec_query(" + SELECT enabled + FROM config.global_flag + WHERE name = 'cat.bib.use_id_for_tcn' + "); + if (($id_as_tcn->{processed}) && $id_as_tcn->{rows}[0]->{enabled} eq 't') { + $tcn_value = $_TD->{new}{id}; + } - NEW.marc := REGEXP_REPLACE( - NEW.marc, - E'(]*?:)?record>)', - E'' || - '' || REPLACE(evergreen.xml_escape(NEW.tcn_value), E'\\', E'\\\\') || E'' || - '' || REPLACE(evergreen.xml_escape(NEW.tcn_source), E'\\', E'\\\\') || E'' || - '' || NEW.id || E'' || - '' || TG_TABLE_SCHEMA || E'' || - CASE WHEN NEW.owner IS NOT NULL THEN '' || NEW.owner || E'' ELSE '' END || - CASE WHEN NEW.share_depth IS NOT NULL THEN '' || NEW.share_depth || E'' ELSE '' END || - E'\\1' - ); - ELSIF TG_TABLE_SCHEMA = 'authority' THEN - NEW.marc := REGEXP_REPLACE( - NEW.marc, - E'(]*?:)?record>)', - E'' || - '' || NEW.id || E'' || - '' || TG_TABLE_SCHEMA || E'' || - E'\\1' - ); - ELSIF TG_TABLE_SCHEMA = 'serial' THEN - NEW.marc := REGEXP_REPLACE( - NEW.marc, - E'(]*?:)?record>)', - E'' || - '' || NEW.id || E'' || - '' || TG_TABLE_SCHEMA || E'' || - '' || NEW.owning_lib || E'' || - CASE WHEN NEW.record IS NOT NULL THEN '' || NEW.record || E'' ELSE '' END || - E'\\1' - ); - ELSE - NEW.marc := REGEXP_REPLACE( - NEW.marc, - E'(]*?:)?record>)', - E'' || - '' || NEW.id || E'' || - '' || TG_TABLE_SCHEMA || E'' || - E'\\1' - ); - END IF; + my $new_901 = MARC::Field->new("901", " ", " ", + "a" => $tcn_value, + "b" => $_TD->{new}{tcn_source}, + "c" => $_TD->{new}{id}, + "t" => $schema + ); - RETURN NEW; -END; -$func$ LANGUAGE PLPGSQL; + if ($_TD->{new}{owner}) { + $new_901->add_subfields("o" => $_TD->{new}{owner}); + } + + if ($_TD->{new}{share_depth}) { + $new_901->add_subfields("d" => $_TD->{new}{share_depth}); + } + + $marc->append_fields($new_901); +} elsif ($schema = 'authority') { + $marc->append_fields( + ["901", " ", " ", + "c" => $_TD->{new}{id}, + "t" => $schema, + ] + ); +} elsif ($schema = 'serial') { + my $new_901 = MARC::Field->new("901", " ", " ", + "c" => $_TD->{new}{id}, + "t" => $schema, + "o" => $_TD->{new}{owning_lib}, + ); + + if ($_TD->{new}{record}) { + $new_901->add_subfields("r" => $_TD->{new}{record}); + } + + $marc->append_fields($new_901); +} else { + $marc->append_fields( + ["901", " ", " ", + "c" => $_TD->{new}{id}, + "t" => $schema, + ] + ); +} + +my $xml = $marc->as_xml_record(); +$xml =~ s/\n//sgo; +$xml =~ s/^<\?xml.+\?\s*>//go; +$xml =~ s/>\s+entityize() +# to avoid having to set PERL5LIB for PostgreSQL as well + +# If we are going to convert non-ASCII characters to XML entities, +# we had better be dealing with a UTF8 string to begin with +$xml = decode_utf8($xml); + +$xml = NFC($xml); + +# Convert raw ampersands to entities +$xml =~ s/&(?!\S+;)/&/gso; + +# Convert Unicode characters to entities +$xml =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe; + +$xml =~ s/[\x00-\x1f]//go; +$_TD->{new}{marc} = $xml; + +return "MODIFY"; +$func$ LANGUAGE PLPERLU; CREATE OR REPLACE FUNCTION evergreen.force_unicode_normal_form(string TEXT, form TEXT) RETURNS TEXT AS $func$ use Unicode::Normalize 'normalize'; diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.fix_maintain_901_regex.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.fix_maintain_901_regex.sql index 607e3a7e25..334caebf62 100644 --- a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.fix_maintain_901_regex.sql +++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.fix_maintain_901_regex.sql @@ -1,68 +1,99 @@ -BEGIN; +CREATE OR REPLACE FUNCTION evergreen.maintain_901 () RETURNS TRIGGER AS $func$ +use strict; +use MARC::Record; +use MARC::File::XML (BinaryEncoding => 'UTF-8'); +use MARC::Charset; +use Encode; +use Unicode::Normalize; -SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version); +MARC::Charset->assume_unicode(1); -CREATE OR REPLACE FUNCTION evergreen.maintain_901 () RETURNS TRIGGER AS $func$ -DECLARE - use_id_for_tcn BOOLEAN; -BEGIN - -- Remove any existing 901 fields before we insert the authoritative one - NEW.marc := REGEXP_REPLACE(NEW.marc, E']*?tag="901".+?', '', 'g'); - - IF TG_TABLE_SCHEMA = 'biblio' THEN - -- Set TCN value to record ID? - SELECT enabled FROM config.global_flag INTO use_id_for_tcn - WHERE name = 'cat.bib.use_id_for_tcn'; - - IF use_id_for_tcn = 't' THEN - NEW.tcn_value := NEW.id; - END IF; - - NEW.marc := REGEXP_REPLACE( - NEW.marc, - E'(]*?:)?record>)', - E'' || - '' || REPLACE(evergreen.xml_escape(NEW.tcn_value), E'\\', E'\\\\') || E'' || - '' || REPLACE(evergreen.xml_escape(NEW.tcn_source), E'\\', E'\\\\') || E'' || - '' || NEW.id || E'' || - '' || TG_TABLE_SCHEMA || E'' || - CASE WHEN NEW.owner IS NOT NULL THEN '' || NEW.owner || E'' ELSE '' END || - CASE WHEN NEW.share_depth IS NOT NULL THEN '' || NEW.share_depth || E'' ELSE '' END || - E'\\1' - ); - ELSIF TG_TABLE_SCHEMA = 'authority' THEN - NEW.marc := REGEXP_REPLACE( - NEW.marc, - E'(]*?:)?record>)', - E'' || - '' || NEW.id || E'' || - '' || TG_TABLE_SCHEMA || E'' || - E'\\1' - ); - ELSIF TG_TABLE_SCHEMA = 'serial' THEN - NEW.marc := REGEXP_REPLACE( - NEW.marc, - E'(]*?:)?record>)', - E'' || - '' || NEW.id || E'' || - '' || TG_TABLE_SCHEMA || E'' || - '' || NEW.owning_lib || E'' || - CASE WHEN NEW.record IS NOT NULL THEN '' || NEW.record || E'' ELSE '' END || - E'\\1' - ); - ELSE - NEW.marc := REGEXP_REPLACE( - NEW.marc, - E'(]*?:)?record>)', - E'' || - '' || NEW.id || E'' || - '' || TG_TABLE_SCHEMA || E'' || - E'\\1' - ); - END IF; - - RETURN NEW; -END; -$func$ LANGUAGE PLPGSQL; - -COMMIT; +my $schema = $_TD->{table_schema}; +my $marc = MARC::Record->new_from_xml($_TD->{new}{marc}); + +my @old901s = $marc->field('901'); +$marc->delete_fields(@old901s); + +if ($schema eq 'biblio') { + my $tcn_value = $_TD->{new}{tcn_value}; + + # Set TCN value to record ID? + my $id_as_tcn = spi_exec_query(" + SELECT enabled + FROM config.global_flag + WHERE name = 'cat.bib.use_id_for_tcn' + "); + if (($id_as_tcn->{processed}) && $id_as_tcn->{rows}[0]->{enabled} eq 't') { + $tcn_value = $_TD->{new}{id}; + } + + my $new_901 = MARC::Field->new("901", " ", " ", + "a" => $tcn_value, + "b" => $_TD->{new}{tcn_source}, + "c" => $_TD->{new}{id}, + "t" => $schema + ); + + if ($_TD->{new}{owner}) { + $new_901->add_subfields("o" => $_TD->{new}{owner}); + } + + if ($_TD->{new}{share_depth}) { + $new_901->add_subfields("d" => $_TD->{new}{share_depth}); + } + + $marc->append_fields($new_901); +} elsif ($schema = 'authority') { + $marc->append_fields( + ["901", " ", " ", + "c" => $_TD->{new}{id}, + "t" => $schema, + ] + ); +} elsif ($schema = 'serial') { + my $new_901 = MARC::Field->new("901", " ", " ", + "c" => $_TD->{new}{id}, + "t" => $schema, + "o" => $_TD->{new}{owning_lib}, + ); + + if ($_TD->{new}{record}) { + $new_901->add_subfields("r" => $_TD->{new}{record}); + } + + $marc->append_fields($new_901); +} else { + $marc->append_fields( + ["901", " ", " ", + "c" => $_TD->{new}{id}, + "t" => $schema, + ] + ); +} + +my $xml = $marc->as_xml_record(); +$xml =~ s/\n//sgo; +$xml =~ s/^<\?xml.+\?\s*>//go; +$xml =~ s/>\s+entityize() +# to avoid having to set PERL5LIB for PostgreSQL as well + +# If we are going to convert non-ASCII characters to XML entities, +# we had better be dealing with a UTF8 string to begin with +$xml = decode_utf8($xml); + +$xml = NFC($xml); + +# Convert raw ampersands to entities +$xml =~ s/&(?!\S+;)/&/gso; + +# Convert Unicode characters to entities +$xml =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe; + +$xml =~ s/[\x00-\x1f]//go; +$_TD->{new}{marc} = $xml; + +return "MODIFY"; +$func$ LANGUAGE PLPERLU;