From 8c407fdb8fb896132d6b57aa0ceffa9e77e0841e Mon Sep 17 00:00:00 2001 From: miker Date: Thu, 18 Mar 2010 00:43:03 +0000 Subject: [PATCH] escape &s in xml nodes (not sure why XPATH does not); skip null values in flatten_marc git-svn-id: svn://svn.open-ils.org/ILS/trunk@15900 dcc99617-32d9-48b4-a31d-7c20da2025e4 --- Open-ILS/src/sql/Pg/002.schema.config.sql | 2 +- Open-ILS/src/sql/Pg/030.schema.metabib.sql | 4 +- .../upgrade/0200.schema.in-db-ingest-SP-bugs.sql | 91 ++++++++++++++++++++++ 3 files changed, 95 insertions(+), 2 deletions(-) create mode 100644 Open-ILS/src/sql/Pg/upgrade/0200.schema.in-db-ingest-SP-bugs.sql diff --git a/Open-ILS/src/sql/Pg/002.schema.config.sql b/Open-ILS/src/sql/Pg/002.schema.config.sql index 0e78d5b16..9c9d41e73 100644 --- a/Open-ILS/src/sql/Pg/002.schema.config.sql +++ b/Open-ILS/src/sql/Pg/002.schema.config.sql @@ -58,7 +58,7 @@ CREATE TABLE config.upgrade_log ( install_date TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW() ); -INSERT INTO config.upgrade_log (version) VALUES ('0199'); -- miker +INSERT INTO config.upgrade_log (version) VALUES ('0200'); -- miker CREATE TABLE config.bib_source ( id SERIAL PRIMARY KEY, diff --git a/Open-ILS/src/sql/Pg/030.schema.metabib.sql b/Open-ILS/src/sql/Pg/030.schema.metabib.sql index 313d8058c..f51a0abd1 100644 --- a/Open-ILS/src/sql/Pg/030.schema.metabib.sql +++ b/Open-ILS/src/sql/Pg/030.schema.metabib.sql @@ -263,7 +263,7 @@ BEGIN IF raw_text IS NOT NULL THEN raw_text := raw_text || joiner; END IF; - raw_text := COALESCE(raw_text,'') || ARRAY_TO_STRING(oils_xpath( '//text()', xml_node ), ' '); + raw_text := COALESCE(raw_text,'') || ARRAY_TO_STRING(oils_xpath( '//text()', REGEXP_REPLACE(xml_node,'&(?!amp;)','&','g')), ' '); END LOOP; CONTINUE WHEN raw_text IS NULL; @@ -305,6 +305,8 @@ BEGIN output.value := field.value; END IF; + CONTINUE WHEN output.value IS NULL; + RETURN NEXT output; END LOOP; END; diff --git a/Open-ILS/src/sql/Pg/upgrade/0200.schema.in-db-ingest-SP-bugs.sql b/Open-ILS/src/sql/Pg/upgrade/0200.schema.in-db-ingest-SP-bugs.sql new file mode 100644 index 000000000..f787f26d7 --- /dev/null +++ b/Open-ILS/src/sql/Pg/upgrade/0200.schema.in-db-ingest-SP-bugs.sql @@ -0,0 +1,91 @@ + +BEGIN; + +INSERT INTO config.upgrade_log (version) VALUES ('0200'); -- miker + +CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( rid BIGINT, default_joiner TEXT ) RETURNS SETOF metabib.field_entry_template AS $func$ +DECLARE + bib biblio.record_entry%ROWTYPE; + idx config.metabib_field%ROWTYPE; + xfrm config.xml_transform%ROWTYPE; + prev_xfrm TEXT; + transformed_xml TEXT; + xml_node TEXT; + xml_node_list TEXT[]; + raw_text TEXT; + joiner TEXT := default_joiner; -- XXX will index defs supply a joiner? + output_row metabib.field_entry_template%ROWTYPE; +BEGIN + + -- Get the record + SELECT INTO bib * FROM biblio.record_entry WHERE id = rid; + + -- Loop over the indexing entries + FOR idx IN SELECT * FROM config.metabib_field ORDER BY format LOOP + + SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format; + + -- See if we can skip the XSLT ... it's expensive + IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN + -- Can't skip the transform + IF xfrm.xslt <> '---' THEN + transformed_xml := oils_xslt_process(bib.marc,xfrm.xslt); + ELSE + transformed_xml := bib.marc; + END IF; + + prev_xfrm := xfrm.name; + END IF; + + xml_node_list := oils_xpath( idx.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] ); + + raw_text := NULL; + FOR xml_node IN SELECT x FROM explode_array(xml_node_list) AS x LOOP + IF raw_text IS NOT NULL THEN + raw_text := raw_text || joiner; + END IF; + raw_text := COALESCE(raw_text,'') || ARRAY_TO_STRING(oils_xpath( '//text()', REGEXP_REPLACE(xml_node,'&(?!amp;)','&','g')), ' '); + END LOOP; + + CONTINUE WHEN raw_text IS NULL; + + output_row.field_class = idx.field_class; + output_row.field = idx.id; + output_row.source = rid; + output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g')); + + RETURN NEXT output_row; + + END LOOP; + +END; + +CREATE OR REPLACE FUNCTION biblio.flatten_marc ( rid BIGINT ) RETURNS SETOF metabib.full_rec AS $func$ +DECLARE + bib biblio.record_entry%ROWTYPE; + output metabib.full_rec%ROWTYPE; + field RECORD; +BEGIN + SELECT INTO bib * FROM biblio.record_entry WHERE id = rid; + + FOR field IN SELECT * FROM biblio.flatten_marc( bib.marc ) LOOP + output.record := rid; + output.ind1 := field.ind1; + output.ind2 := field.ind2; + output.tag := field.tag; + output.subfield := field.subfield; + IF field.subfield IS NOT NULL AND field.tag NOT IN ('020','022','024') THEN -- exclude standard numbers and control fields + output.value := naco_normalize(field.value, field.subfield); + ELSE + output.value := field.value; + END IF; + + CONTINUE WHEN output.value IS NULL; + + RETURN NEXT output; + END LOOP; +END; +$func$ LANGUAGE PLPGSQL; + +COMMIT; + -- 2.11.0