From cea1a7eddc2523d2a89e45d76e83f6f41ffd64dc Mon Sep 17 00:00:00 2001
From: dbs <dbs@dcc99617-32d9-48b4-a31d-7c20da2025e4>
Date: Wed, 19 Jan 2011 14:53:48 +0000
Subject: [PATCH] Avoid escaping issues in authority.normalize_heading() by
 parameterizing the query

Long story short: MARC subfield values containing backslashes caused noise
and in some cases painful errors. Using spi_prepare/spi_exec_query is the
safest way of handling escaping, rather than adding more regexes and munging
the data before it even gets to naco_normalize().

Most painful case was <subfield code="a">Foo, Bar\</subfield> - the trailing
slash ended up escaping the enclosing single quote (because PostgreSQL isn't
configured by default with strict conformance to SQL escaping rules yet) and
threw an error.

git-svn-id: svn:// dcc99617-32d9-48b4-a31d-7c20da2025e4
 Open-ILS/src/sql/Pg/002.schema.config.sql          |  2 +-
 Open-ILS/src/sql/Pg/020.schema.functions.sql       |  8 +--
 Open-ILS/src/sql/Pg/1.6.1-2.0-upgrade-db.sql       | 12 ++--
 .../0476.schema.authority_normalize_heading.sql    | 83 ++++++++++++++++++++++
 4 files changed, 94 insertions(+), 11 deletions(-)
 create mode 100644 Open-ILS/src/sql/Pg/upgrade/0476.schema.authority_normalize_heading.sql

diff --git a/Open-ILS/src/sql/Pg/002.schema.config.sql b/Open-ILS/src/sql/Pg/002.schema.config.sql
index 382bb06afe..a8370a0719 100644
--- a/Open-ILS/src/sql/Pg/002.schema.config.sql
+++ b/Open-ILS/src/sql/Pg/002.schema.config.sql
@@ -70,7 +70,7 @@ CREATE TABLE config.upgrade_log (
-INSERT INTO config.upgrade_log (version) VALUES ('0475'); -- dbwells
+INSERT INTO config.upgrade_log (version) VALUES ('0476'); -- dbs
 CREATE TABLE config.bib_source (
diff --git a/Open-ILS/src/sql/Pg/020.schema.functions.sql b/Open-ILS/src/sql/Pg/020.schema.functions.sql
index a1179ead5a..7e1de17ba9 100644
--- a/Open-ILS/src/sql/Pg/020.schema.functions.sql
+++ b/Open-ILS/src/sql/Pg/020.schema.functions.sql
@@ -375,12 +375,12 @@ CREATE OR REPLACE FUNCTION authority.normalize_heading( TEXT ) RETURNS TEXT AS $
-    # Perhaps better to parameterize the spi and pass as a parameter
-    $auth_txt =~ s/'//go;
     if ($auth_txt) {
-        my $result = spi_exec_query("SELECT public.naco_normalize('$auth_txt') AS norm_text");
+        my $stmt = spi_prepare('SELECT public.naco_normalize($1) AS norm_text', 'TEXT');
+        my $result = spi_exec_prepared($stmt, $auth_txt);
         my $norm_txt = $result->{rows}[0]->{norm_text};
+        spi_freeplan($stmt);
+        undef($stmt);
         return $head->tag() . "_" . $thes_code . " " . $norm_txt;
diff --git a/Open-ILS/src/sql/Pg/1.6.1-2.0-upgrade-db.sql b/Open-ILS/src/sql/Pg/1.6.1-2.0-upgrade-db.sql
index 193f19d1cb..1e27959d50 100644
--- a/Open-ILS/src/sql/Pg/1.6.1-2.0-upgrade-db.sql
+++ b/Open-ILS/src/sql/Pg/1.6.1-2.0-upgrade-db.sql
@@ -18724,7 +18724,7 @@ CREATE OR REPLACE FUNCTION authority.normalize_heading( TEXT ) RETURNS TEXT AS $
     # Default to "No attempt to code" if the leader is horribly broken
     my $fixed_field = $r->field('008');
     my $thes_char = '|';
-    if ($fixed_field) {
+    if ($fixed_field) { 
         $thes_char = substr($fixed_field->data(), 11, 1) || '|';
@@ -18746,13 +18746,13 @@ CREATE OR REPLACE FUNCTION authority.normalize_heading( TEXT ) RETURNS TEXT AS $
             $auth_txt .= '‡' . $sf->[0] . ' ' . $sf->[1];
-    # Perhaps better to parameterize the spi and pass as a parameter
-    $auth_txt =~ s/'//go;
     if ($auth_txt) {
-        my $result = spi_exec_query("SELECT public.naco_normalize('$auth_txt') AS norm_text");
+        my $stmt = spi_prepare('SELECT public.naco_normalize($1) AS norm_text', 'TEXT');
+        my $result = spi_exec_prepared($stmt, $auth_txt);
         my $norm_txt = $result->{rows}[0]->{norm_text};
+        spi_freeplan($stmt);
+        undef($stmt);
         return $head->tag() . "_" . $thes_code . " " . $norm_txt;
diff --git a/Open-ILS/src/sql/Pg/upgrade/0476.schema.authority_normalize_heading.sql b/Open-ILS/src/sql/Pg/upgrade/0476.schema.authority_normalize_heading.sql
new file mode 100644
index 0000000000..90305ee1c5
--- /dev/null
+++ b/Open-ILS/src/sql/Pg/upgrade/0476.schema.authority_normalize_heading.sql
@@ -0,0 +1,83 @@
+-- Use spi_prepare/spi_exec_query to delegate escaping issues to the database
+-- (where they belong) and avoid ugly MARC corner cases
+INSERT INTO config.upgrade_log (version) VALUES ('0476'); -- dbs
+CREATE OR REPLACE FUNCTION authority.normalize_heading( TEXT ) RETURNS TEXT AS $func$
+    use strict;
+    use warnings;
+    use utf8;
+    use MARC::Record;
+    use MARC::File::XML (BinaryEncoding => 'UTF8');
+    use UUID::Tiny ':std';
+    my $xml = shift() or return undef;
+    my $r;
+    # Prevent errors in XML parsing from blowing out ungracefully
+    eval {
+        $r = MARC::Record->new_from_xml( $xml );
+        1;
+    } or do {
+       return 'BAD_MARCXML_' . create_uuid_as_string(UUID_MD5, $xml);
+    };
+    if (!$r) {
+       return 'BAD_MARCXML_' . create_uuid_as_string(UUID_MD5, $xml);
+    }
+    # From
+    my $thes_code_map = {
+        a => 'lcsh',
+        b => 'lcshac',
+        c => 'mesh',
+        d => 'nal',
+        k => 'cash',
+        n => 'notapplicable',
+        r => 'aat',
+        s => 'sears',
+        v => 'rvm',
+    };
+    # Default to "No attempt to code" if the leader is horribly broken
+    my $fixed_field = $r->field('008');
+    my $thes_char = '|';
+    if ($fixed_field) { 
+        $thes_char = substr($fixed_field->data(), 11, 1) || '|';
+    }
+    my $thes_code = 'UNDEFINED';
+    if ($thes_char eq 'z') {
+        # Grab the 040 $f per
+        $thes_code = $r->subfield('040', 'f') || 'UNDEFINED';
+    } elsif ($thes_code_map->{$thes_char}) {
+        $thes_code = $thes_code_map->{$thes_char};
+    }
+    my $auth_txt = '';
+    my $head = $r->field('1..');
+    if ($head) {
+        # Concatenate all of these subfields together, prefixed by their code
+        # to prevent collisions along the lines of "Fiction, North Carolina"
+        foreach my $sf ($head->subfields()) {
+            $auth_txt .= '‡' . $sf->[0] . ' ' . $sf->[1];
+        }
+    }
+    if ($auth_txt) {
+        my $stmt = spi_prepare('SELECT public.naco_normalize($1) AS norm_text', 'TEXT');
+        my $result = spi_exec_prepared($stmt, $auth_txt);
+        my $norm_txt = $result->{rows}[0]->{norm_text};
+        spi_freeplan($stmt);
+        undef($stmt);
+        return $head->tag() . "_" . $thes_code . " " . $norm_txt;
+    }
+    return 'NOHEADING_' . $thes_code . ' ' . create_uuid_as_string(UUID_MD5, $xml);
+$func$ LANGUAGE 'plperlu' IMMUTABLE;