From cb9bc278ee2b0731413b29a2ae0818063bcc4044 Mon Sep 17 00:00:00 2001 From: Dan Wells Date: Mon, 6 Oct 2014 11:52:27 -0400 Subject: [PATCH] LP#1379815 Fetch tag data as a table using tag/xpath combo New function: oils_xpath_tag_to_table() This function is adapted from oils_xpath_table() with the goal of being more targeted and simpler to use. The main issue with oils_xpath_table() is that it relies on peer UNNEST() functions, and that leads to unexpected behavior whenever the xpath arguments result in uneven or "gapped" selections. In the first type of case, the resulting table includes rows representing the least common multiple of the underlying xpath selections. In the second type, though the xpaths may sometimes return the same number of values, those values are not correlated except by order in the marc, which does not account for the real possibility of null values in the set. Crude Example: 999 $d ABC $e 123 999 $d DEF 999 $d GHI 999 $e 456 We need a table representing subfields 'd' and 'e' of the '999' fields, so we might try an xpath like: //*[@tag="999"]/*[@code="d"]|//*[@tag="999"]/*[@code="e"] We want: d | e --------- ABC | 123 DEF | GHI | | 456 but we get: d | e --------- ABC | 123 DEF | 456 GHI | 123 ABC | 456 DEF | 123 GHI | 456 This example illustrates both negative behaviors (non-correlated fields and least-common-multiple row multiplication). The new method, while internally quite similar, has a different signature, with the most significant change being a 'tag' argument which serves as a common base element for the xpaths (now an array rather than a pipe-delimited string). Signed-off-by: Dan Wells --- Open-ILS/src/sql/Pg/002.functions.config.sql | 42 +++++++++++++++++++ ...X.function.vandelay-oils_xpath_tag_to_table.sql | 48 ++++++++++++++++++++++ 2 files changed, 90 insertions(+) create mode 100644 Open-ILS/src/sql/Pg/upgrade/XXXX.function.vandelay-oils_xpath_tag_to_table.sql diff --git a/Open-ILS/src/sql/Pg/002.functions.config.sql b/Open-ILS/src/sql/Pg/002.functions.config.sql index 3dab7d9dea..036d7c9d24 100644 --- a/Open-ILS/src/sql/Pg/002.functions.config.sql +++ b/Open-ILS/src/sql/Pg/002.functions.config.sql @@ -185,6 +185,48 @@ SELECT * FROM ( END; $func$ LANGUAGE PLPGSQL IMMUTABLE; +CREATE OR REPLACE FUNCTION oils_xpath_tag_to_table(marc text, tag text, xpaths text[]) RETURNS SETOF record AS $function$ + +-- This function currently populates columns with the FIRST matching value +-- of each XPATH. It would be reasonable to add a 'return_arrays' option +-- where each column is an array of all matching values for each path, but +-- that remains as a TODO + +DECLARE + field RECORD; + output RECORD; + select_list TEXT[]; + from_list TEXT[]; + q TEXT; +BEGIN + -- setup query select + FOR i IN 1 .. ARRAY_UPPER(xpaths,1) LOOP + IF xpaths[i] = 'null()' THEN + select_list := ARRAY_APPEND(select_list, 'NULL::TEXT AS c_' || i ); + ELSE + select_list := ARRAY_APPEND(select_list, '(oils_xpath(' || + quote_literal( + CASE + WHEN xpaths[i] ~ $re$/[^/[]*@[^/]+$$re$ -- attribute + OR xpaths[i] ~ $re$text\(\)$$re$ + THEN xpaths[i] + ELSE xpaths[i] || '//text()' + END + ) || ', field_marc))[1] AS cl_' || i); + -- hardcoded to first value for each path + END IF; + END LOOP; + + -- run query over tag set + q := 'SELECT ' || ARRAY_TO_STRING(select_list, ',') + || ' FROM UNNEST(oils_xpath(' || quote_literal('//*[@tag="' || tag + || '"]') || ', ' || quote_literal(marc) || ')) AS field_marc;'; + --RAISE NOTICE '%', q; + + RETURN QUERY EXECUTE q; +END; + +$function$ LANGUAGE PLPGSQL; CREATE OR REPLACE FUNCTION extract_marc_field ( TEXT, BIGINT, TEXT, TEXT ) RETURNS TEXT AS $$ DECLARE diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.function.vandelay-oils_xpath_tag_to_table.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.function.vandelay-oils_xpath_tag_to_table.sql new file mode 100644 index 0000000000..48e44f8d31 --- /dev/null +++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.function.vandelay-oils_xpath_tag_to_table.sql @@ -0,0 +1,48 @@ +BEGIN; + +--SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version); + +CREATE OR REPLACE FUNCTION oils_xpath_tag_to_table(marc text, tag text, xpaths text[]) RETURNS SETOF record AS $function$ + +-- This function currently populates columns with the FIRST matching value +-- of each XPATH. It would be reasonable to add a 'return_arrays' option +-- where each column is an array of all matching values for each path, but +-- that remains as a TODO + +DECLARE + field RECORD; + output RECORD; + select_list TEXT[]; + from_list TEXT[]; + q TEXT; +BEGIN + -- setup query select + FOR i IN 1 .. ARRAY_UPPER(xpaths,1) LOOP + IF xpaths[i] = 'null()' THEN + select_list := ARRAY_APPEND(select_list, 'NULL::TEXT AS c_' || i ); + ELSE + select_list := ARRAY_APPEND(select_list, '(oils_xpath(' || + quote_literal( + CASE + WHEN xpaths[i] ~ $re$/[^/[]*@[^/]+$$re$ -- attribute + OR xpaths[i] ~ $re$text\(\)$$re$ + THEN xpaths[i] + ELSE xpaths[i] || '//text()' + END + ) || ', field_marc))[1] AS cl_' || i); + -- hardcoded to first value for each path + END IF; + END LOOP; + + -- run query over tag set + q := 'SELECT ' || ARRAY_TO_STRING(select_list, ',') + || ' FROM UNNEST(oils_xpath(' || quote_literal('//*[@tag="' || tag + || '"]') || ', ' || quote_literal(marc) || ')) AS field_marc;'; + --RAISE NOTICE '%', q; + + RETURN QUERY EXECUTE q; +END; + +$function$ LANGUAGE PLPGSQL; + +COMMIT; -- 2.11.0