From: dbs Date: Wed, 19 Jan 2011 14:53:48 +0000 (+0000) Subject: Avoid escaping issues in authority.normalize_heading() by parameterizing the query X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=cea1a7eddc2523d2a89e45d76e83f6f41ffd64dc;p=contrib%2FConifer.git Avoid escaping issues in authority.normalize_heading() by parameterizing the query Long story short: MARC subfield values containing backslashes caused noise and in some cases painful errors. Using spi_prepare/spi_exec_query is the safest way of handling escaping, rather than adding more regexes and munging the data before it even gets to naco_normalize(). Most painful case was Foo, Bar\ - the trailing slash ended up escaping the enclosing single quote (because PostgreSQL isn't configured by default with strict conformance to SQL escaping rules yet) and threw an error. git-svn-id: svn://svn.open-ils.org/ILS/trunk@19201 dcc99617-32d9-48b4-a31d-7c20da2025e4 --- diff --git a/Open-ILS/src/sql/Pg/002.schema.config.sql b/Open-ILS/src/sql/Pg/002.schema.config.sql index 382bb06afe..a8370a0719 100644 --- a/Open-ILS/src/sql/Pg/002.schema.config.sql +++ b/Open-ILS/src/sql/Pg/002.schema.config.sql @@ -70,7 +70,7 @@ CREATE TABLE config.upgrade_log ( install_date TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW() ); -INSERT INTO config.upgrade_log (version) VALUES ('0475'); -- dbwells +INSERT INTO config.upgrade_log (version) VALUES ('0476'); -- dbs CREATE TABLE config.bib_source ( id SERIAL PRIMARY KEY, diff --git a/Open-ILS/src/sql/Pg/020.schema.functions.sql b/Open-ILS/src/sql/Pg/020.schema.functions.sql index a1179ead5a..7e1de17ba9 100644 --- a/Open-ILS/src/sql/Pg/020.schema.functions.sql +++ b/Open-ILS/src/sql/Pg/020.schema.functions.sql @@ -375,12 +375,12 @@ CREATE OR REPLACE FUNCTION authority.normalize_heading( TEXT ) RETURNS TEXT AS $ } } - # Perhaps better to parameterize the spi and pass as a parameter - $auth_txt =~ s/'//go; - if ($auth_txt) { - my $result = spi_exec_query("SELECT public.naco_normalize('$auth_txt') AS norm_text"); + my $stmt = spi_prepare('SELECT public.naco_normalize($1) AS norm_text', 'TEXT'); + my $result = spi_exec_prepared($stmt, $auth_txt); my $norm_txt = $result->{rows}[0]->{norm_text}; + spi_freeplan($stmt); + undef($stmt); return $head->tag() . "_" . $thes_code . " " . $norm_txt; } diff --git a/Open-ILS/src/sql/Pg/1.6.1-2.0-upgrade-db.sql b/Open-ILS/src/sql/Pg/1.6.1-2.0-upgrade-db.sql index 193f19d1cb..1e27959d50 100644 --- a/Open-ILS/src/sql/Pg/1.6.1-2.0-upgrade-db.sql +++ b/Open-ILS/src/sql/Pg/1.6.1-2.0-upgrade-db.sql @@ -18724,7 +18724,7 @@ CREATE OR REPLACE FUNCTION authority.normalize_heading( TEXT ) RETURNS TEXT AS $ # Default to "No attempt to code" if the leader is horribly broken my $fixed_field = $r->field('008'); my $thes_char = '|'; - if ($fixed_field) { + if ($fixed_field) { $thes_char = substr($fixed_field->data(), 11, 1) || '|'; } @@ -18746,13 +18746,13 @@ CREATE OR REPLACE FUNCTION authority.normalize_heading( TEXT ) RETURNS TEXT AS $ $auth_txt .= '‡' . $sf->[0] . ' ' . $sf->[1]; } } - - # Perhaps better to parameterize the spi and pass as a parameter - $auth_txt =~ s/'//go; - + if ($auth_txt) { - my $result = spi_exec_query("SELECT public.naco_normalize('$auth_txt') AS norm_text"); + my $stmt = spi_prepare('SELECT public.naco_normalize($1) AS norm_text', 'TEXT'); + my $result = spi_exec_prepared($stmt, $auth_txt); my $norm_txt = $result->{rows}[0]->{norm_text}; + spi_freeplan($stmt); + undef($stmt); return $head->tag() . "_" . $thes_code . " " . $norm_txt; } diff --git a/Open-ILS/src/sql/Pg/upgrade/0476.schema.authority_normalize_heading.sql b/Open-ILS/src/sql/Pg/upgrade/0476.schema.authority_normalize_heading.sql new file mode 100644 index 0000000000..90305ee1c5 --- /dev/null +++ b/Open-ILS/src/sql/Pg/upgrade/0476.schema.authority_normalize_heading.sql @@ -0,0 +1,83 @@ +-- Use spi_prepare/spi_exec_query to delegate escaping issues to the database +-- (where they belong) and avoid ugly MARC corner cases +BEGIN; + +INSERT INTO config.upgrade_log (version) VALUES ('0476'); -- dbs + +CREATE OR REPLACE FUNCTION authority.normalize_heading( TEXT ) RETURNS TEXT AS $func$ + use strict; + use warnings; + + use utf8; + use MARC::Record; + use MARC::File::XML (BinaryEncoding => 'UTF8'); + use UUID::Tiny ':std'; + + my $xml = shift() or return undef; + + my $r; + + # Prevent errors in XML parsing from blowing out ungracefully + eval { + $r = MARC::Record->new_from_xml( $xml ); + 1; + } or do { + return 'BAD_MARCXML_' . create_uuid_as_string(UUID_MD5, $xml); + }; + + if (!$r) { + return 'BAD_MARCXML_' . create_uuid_as_string(UUID_MD5, $xml); + } + + # From http://www.loc.gov/standards/sourcelist/subject.html + my $thes_code_map = { + a => 'lcsh', + b => 'lcshac', + c => 'mesh', + d => 'nal', + k => 'cash', + n => 'notapplicable', + r => 'aat', + s => 'sears', + v => 'rvm', + }; + + # Default to "No attempt to code" if the leader is horribly broken + my $fixed_field = $r->field('008'); + my $thes_char = '|'; + if ($fixed_field) { + $thes_char = substr($fixed_field->data(), 11, 1) || '|'; + } + + my $thes_code = 'UNDEFINED'; + + if ($thes_char eq 'z') { + # Grab the 040 $f per http://www.loc.gov/marc/authority/ad040.html + $thes_code = $r->subfield('040', 'f') || 'UNDEFINED'; + } elsif ($thes_code_map->{$thes_char}) { + $thes_code = $thes_code_map->{$thes_char}; + } + + my $auth_txt = ''; + my $head = $r->field('1..'); + if ($head) { + # Concatenate all of these subfields together, prefixed by their code + # to prevent collisions along the lines of "Fiction, North Carolina" + foreach my $sf ($head->subfields()) { + $auth_txt .= '‡' . $sf->[0] . ' ' . $sf->[1]; + } + } + + if ($auth_txt) { + my $stmt = spi_prepare('SELECT public.naco_normalize($1) AS norm_text', 'TEXT'); + my $result = spi_exec_prepared($stmt, $auth_txt); + my $norm_txt = $result->{rows}[0]->{norm_text}; + spi_freeplan($stmt); + undef($stmt); + return $head->tag() . "_" . $thes_code . " " . $norm_txt; + } + + return 'NOHEADING_' . $thes_code . ' ' . create_uuid_as_string(UUID_MD5, $xml); +$func$ LANGUAGE 'plperlu' IMMUTABLE; + +COMMIT;