From 626f2372bd3ee6eaf780921036e832569fff1fc2 Mon Sep 17 00:00:00 2001 From: miker <miker@dcc99617-32d9-48b4-a31d-7c20da2025e4> Date: Thu, 25 Mar 2010 17:24:02 +0000 Subject: [PATCH] add a table to allow labelling (and eventual expansion) of search classes; index each node separately for facets, instead of (well, in addition to) globbing; remove extra (redundant) oils_xslt_process function git-svn-id: svn://svn.open-ils.org/ILS/trunk@15979 dcc99617-32d9-48b4-a31d-7c20da2025e4 --- Open-ILS/src/sql/Pg/002.functions.config.sql | 38 ------ Open-ILS/src/sql/Pg/002.schema.config.sql | 10 +- Open-ILS/src/sql/Pg/030.schema.metabib.sql | 140 ++++++++++++--------- Open-ILS/src/sql/Pg/950.data.seed-values.sql | 75 ++++++----- ...213.schema.config.metabib_field_class_stuff.sql | 121 ++++++++++++++++++ 5 files changed, 251 insertions(+), 133 deletions(-) create mode 100644 Open-ILS/src/sql/Pg/upgrade/0213.schema.config.metabib_field_class_stuff.sql diff --git a/Open-ILS/src/sql/Pg/002.functions.config.sql b/Open-ILS/src/sql/Pg/002.functions.config.sql index 0ac7600d75..f2d80abf4b 100644 --- a/Open-ILS/src/sql/Pg/002.functions.config.sql +++ b/Open-ILS/src/sql/Pg/002.functions.config.sql @@ -331,43 +331,5 @@ CREATE OR REPLACE FUNCTION is_json (TEXT) RETURNS BOOL AS $func$ return $@ ? 0 : 1; $func$ LANGUAGE PLPERLU; -CREATE OR REPLACE FUNCTION oils_xslt_process(TEXT, TEXT) RETURNS TEXT AS $func$ - use strict; - - use XML::LibXSLT; - use XML::LibXML; - - my $doc = shift; - my $xslt = shift; - - # The following approach uses the older XML::LibXML 1.69 / XML::LibXSLT 1.68 - # methods of parsing XML documents and stylesheets, in the hopes of broader - # compatibility with distributions - my $parser = $_SHARED{'_xslt_process'}{parsers}{xml} || XML::LibXML->new(); - - # Cache the XML parser, if we do not already have one - $_SHARED{'_xslt_process'}{parsers}{xml} = $parser - unless ($_SHARED{'_xslt_process'}{parsers}{xml}); - - my $xslt_parser = $_SHARED{'_xslt_process'}{parsers}{xslt} || XML::LibXSLT->new(); - - # Cache the XSLT processor, if we do not already have one - $_SHARED{'_xslt_process'}{parsers}{xslt} = $xslt_parser - unless ($_SHARED{'_xslt_process'}{parsers}{xslt}); - - my $stylesheet = $_SHARED{'_xslt_process'}{stylesheets}{$xslt} || - $xslt_parser->parse_stylesheet( $parser->parse_string($xslt) ); - - $_SHARED{'_xslt_process'}{stylesheets}{$xslt} = $stylesheet - unless ($_SHARED{'_xslt_process'}{stylesheets}{$xslt}); - - return $stylesheet->output_string( - $stylesheet->transform( - $parser->parse_string($doc) - ) - ); - -$func$ LANGUAGE 'plperlu' STRICT IMMUTABLE; - COMMIT; diff --git a/Open-ILS/src/sql/Pg/002.schema.config.sql b/Open-ILS/src/sql/Pg/002.schema.config.sql index 665987db53..064320a6fa 100644 --- a/Open-ILS/src/sql/Pg/002.schema.config.sql +++ b/Open-ILS/src/sql/Pg/002.schema.config.sql @@ -59,7 +59,7 @@ CREATE TABLE config.upgrade_log ( install_date TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW() ); -INSERT INTO config.upgrade_log (version) VALUES ('0212'); -- miker +INSERT INTO config.upgrade_log (version) VALUES ('0213'); -- miker CREATE TABLE config.bib_source ( id SERIAL PRIMARY KEY, @@ -189,10 +189,16 @@ INSERT INTO config.biblio_fingerprint (name, xpath, format, first_word) TRUE ); +CREATE TABLE config.metabib_class ( + name TEXT PRIMARY KEY, + label TEXT NOT NULL UNIQUE +); + CREATE TABLE config.metabib_field ( id SERIAL PRIMARY KEY, - field_class TEXT NOT NULL CHECK (lower(field_class) IN ('title','author','subject','keyword','series')), + field_class TEXT NOT NULL REFERENCES config.metabib_class (name), name TEXT NOT NULL, + label TEXT NOT NULL, xpath TEXT NOT NULL, weight INT NOT NULL DEFAULT 1, format TEXT NOT NULL DEFAULT 'mods33', diff --git a/Open-ILS/src/sql/Pg/030.schema.metabib.sql b/Open-ILS/src/sql/Pg/030.schema.metabib.sql index 4e2c1bbb88..2bc89da54f 100644 --- a/Open-ILS/src/sql/Pg/030.schema.metabib.sql +++ b/Open-ILS/src/sql/Pg/030.schema.metabib.sql @@ -233,74 +233,96 @@ CREATE TYPE metabib.field_entry_template AS ( CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( rid BIGINT, default_joiner TEXT ) RETURNS SETOF metabib.field_entry_template AS $func$ DECLARE - bib biblio.record_entry%ROWTYPE; - idx config.metabib_field%ROWTYPE; - xfrm config.xml_transform%ROWTYPE; - prev_xfrm TEXT; - transformed_xml TEXT; - xml_node TEXT; - xml_node_list TEXT[]; - raw_text TEXT; - joiner TEXT := default_joiner; -- XXX will index defs supply a joiner? - output_row metabib.field_entry_template%ROWTYPE; + bib biblio.record_entry%ROWTYPE; + idx config.metabib_field%ROWTYPE; + xfrm config.xml_transform%ROWTYPE; + prev_xfrm TEXT; + transformed_xml TEXT; + xml_node TEXT; + xml_node_list TEXT[]; + raw_text TEXT; + curr_text TEXT; + joiner TEXT := default_joiner; -- XXX will index defs supply a joiner? + output_row metabib.field_entry_template%ROWTYPE; BEGIN - -- Get the record - SELECT INTO bib * FROM biblio.record_entry WHERE id = rid; + -- Get the record + SELECT INTO bib * FROM biblio.record_entry WHERE id = rid; - -- Loop over the indexing entries - FOR idx IN SELECT * FROM config.metabib_field ORDER BY format LOOP + -- Loop over the indexing entries + FOR idx IN SELECT * FROM config.metabib_field ORDER BY format LOOP - SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format; + SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format; - -- See if we can skip the XSLT ... it's expensive - IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN - -- Can't skip the transform - IF xfrm.xslt <> '---' THEN - transformed_xml := oils_xslt_process(bib.marc,xfrm.xslt); - ELSE - transformed_xml := bib.marc; - END IF; + -- See if we can skip the XSLT ... it's expensive + IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN + -- Can't skip the transform + IF xfrm.xslt <> '---' THEN + transformed_xml := oils_xslt_process(bib.marc,xfrm.xslt); + ELSE + transformed_xml := bib.marc; + END IF; - prev_xfrm := xfrm.name; - END IF; + prev_xfrm := xfrm.name; + END IF; - xml_node_list := oils_xpath( idx.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] ); + xml_node_list := oils_xpath( idx.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] ); + + raw_text := NULL; + FOR xml_node IN SELECT x FROM explode_array(xml_node_list) AS x LOOP + CONTINUE WHEN xml_node !~ E'^\\s*<'; + + curr_text := ARRAY_TO_STRING( + oils_xpath( '//text()', + REGEXP_REPLACE( -- This escapes all &s not followed by "amp;". Data ise returned from oils_xpath (above) in UTF-8, not entity encoded + REGEXP_REPLACE( -- This escapes embeded <s + xml_node, + $re$(>[^<]+)(<)([^>]+<)$re$, + E'\\1<\\3', + 'g' + ), + '&(?!amp;)', + '&', + 'g' + ) + ), + ' ' + ); + + CONTINUE WHEN curr_text IS NULL OR curr_text = ''; + + IF raw_text IS NOT NULL THEN + raw_text := raw_text || joiner; + END IF; - raw_text := NULL; - FOR xml_node IN SELECT x FROM explode_array(xml_node_list) AS x LOOP - CONTINUE WHEN xml_node !~ E'^\\s*<'; - IF raw_text IS NOT NULL THEN - raw_text := raw_text || joiner; - END IF; - raw_text := COALESCE(raw_text,'') || ARRAY_TO_STRING( - oils_xpath( '//text()', - REGEXP_REPLACE( -- This escapes all &s not followed by "amp;". Data ise returned from oils_xpath (above) in UTF-8, not entity encoded - REGEXP_REPLACE( -- This escapes embeded <s - xml_node, - $re$(>[^<]+)(<)([^>]+<)$re$, - E'\\1<\\3', - 'g' - ), - '&(?!amp;)', - '&', - 'g' - ) - ), - ' ' - ); - END LOOP; - - CONTINUE WHEN raw_text IS NULL; - - output_row.field_class = idx.field_class; - output_row.field = idx.id; - output_row.source = rid; - output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g')); - - RETURN NEXT output_row; + raw_text := COALESCE(raw_text,'') || curr_text; - END LOOP; + -- insert raw node text for faceting + IF idx.facet_field THEN + + output_row.field_class = idx.field_class; + output_row.field = idx.id; + output_row.source = rid; + output_row.value = BTRIM(REGEXP_REPLACE(curr_text, E'\\s+', ' ', 'g')); + + RETURN NEXT output_row; + END IF; + + END LOOP; + + CONTINUE WHEN raw_text IS NULL OR raw_text = ''; + + -- insert combined node text for searching + IF idx.search_field THEN + output_row.field_class = idx.field_class; + output_row.field = idx.id; + output_row.source = rid; + output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g')); + + RETURN NEXT output_row; + END IF; + + END LOOP; END; $func$ LANGUAGE PLPGSQL; diff --git a/Open-ILS/src/sql/Pg/950.data.seed-values.sql b/Open-ILS/src/sql/Pg/950.data.seed-values.sql index d60c09dd56..3eabf7487d 100644 --- a/Open-ILS/src/sql/Pg/950.data.seed-values.sql +++ b/Open-ILS/src/sql/Pg/950.data.seed-values.sql @@ -11,40 +11,47 @@ INSERT INTO config.standing (id, value) VALUES (1, oils_i18n_gettext(1, 'Good', INSERT INTO config.standing (id, value) VALUES (2, oils_i18n_gettext(2, 'Barred', 'cst', 'value')); SELECT SETVAL('config.standing_id_seq'::TEXT, 100); -INSERT INTO config.metabib_field ( field_class, name, format, xpath ) VALUES - ( 'series', 'seriestitle', 'mods32', $$//mods32:mods/mods32:relatedItem[@type="series"]/mods32:titleInfo$$ ); -INSERT INTO config.metabib_field ( field_class, name, format, xpath ) VALUES - ( 'title', 'abbreviated', 'mods32', $$//mods32:mods/mods32:titleInfo[mods32:title and (@type='abbreviated')]$$ ); -INSERT INTO config.metabib_field ( field_class, name, format, xpath ) VALUES - ( 'title', 'translated', 'mods32', $$//mods32:mods/mods32:titleInfo[mods32:title and (@type='translated')]$$ ); -INSERT INTO config.metabib_field ( field_class, name, format, xpath ) VALUES - ( 'title', 'alternative', 'mods32', $$//mods32:mods/mods32:titleInfo[mods32:title and (@type='alternative')]$$ ); -INSERT INTO config.metabib_field ( field_class, name, format, xpath ) VALUES - ( 'title', 'uniform', 'mods32', $$//mods32:mods/mods32:titleInfo[mods32:title and (@type='uniform')]$$ ); -INSERT INTO config.metabib_field ( field_class, name, format, xpath ) VALUES - ( 'title', 'proper', 'mods32', $$//mods32:mods/mods32:titleInfo[mods32:title and not (@type)]$$ ); -INSERT INTO config.metabib_field ( field_class, name, format, xpath ) VALUES - ( 'author', 'corporate', 'mods32', $$//mods32:mods/mods32:name[@type='corporate']/mods32:namePart[../mods32:role/mods32:roleTerm[text()='creator']]$$ ); -INSERT INTO config.metabib_field ( field_class, name, format, xpath ) VALUES - ( 'author', 'personal', 'mods32', $$//mods32:mods/mods32:name[@type='personal']/mods32:namePart[../mods32:role/mods32:roleTerm[text()='creator']]$$ ); -INSERT INTO config.metabib_field ( field_class, name, format, xpath ) VALUES - ( 'author', 'conference', 'mods32', $$//mods32:mods/mods32:name[@type='conference']/mods32:namePart[../mods32:role/mods32:roleTerm[text()='creator']]$$ ); -INSERT INTO config.metabib_field ( field_class, name, format, xpath ) VALUES - ( 'author', 'other', 'mods32', $$//mods32:mods/mods32:name[@type='personal']/mods32:namePart[not(../mods32:role)]$$ ); -INSERT INTO config.metabib_field ( field_class, name, format, xpath ) VALUES - ( 'subject', 'geographic', 'mods32', $$//mods32:mods/mods32:subject/mods32:geographic$$ ); -INSERT INTO config.metabib_field ( field_class, name, format, xpath ) VALUES - ( 'subject', 'name', 'mods32', $$//mods32:mods/mods32:subject/mods32:name$$ ); -INSERT INTO config.metabib_field ( field_class, name, format, xpath ) VALUES - ( 'subject', 'temporal', 'mods32', $$//mods32:mods/mods32:subject/mods32:temporal$$ ); -INSERT INTO config.metabib_field ( field_class, name, format, xpath ) VALUES - ( 'subject', 'topic', 'mods32', $$//mods32:mods/mods32:subject/mods32:topic$$ ); ---INSERT INTO config.metabib_field ( field_class, name, format, xpath ) VALUES --- ( field_class, name, xpath ) VALUES ( 'subject', 'genre', 'mods32', $$//mods32:mods/mods32:genre$$ ); -INSERT INTO config.metabib_field ( field_class, name, format, xpath ) VALUES - ( 'keyword', 'keyword', 'mods32', $$//mods32:mods/*[not(local-name()='originInfo')]$$ ); -- /* to fool vim */; -INSERT INTO config.metabib_field (field_class, name, format, xpath ) VALUES - ( 'subject', 'complete', 'mods32', $$//mods32:mods/mods32:subject//text()$$ ); +INSERT INTO config.metabib_class ( name, label ) VALUES ( 'keyword', oils_i18n_gettext('keyword', 'Keyword', 'cmc', 'name') ); +INSERT INTO config.metabib_class ( name, label ) VALUES ( 'title', oils_i18n_gettext('title', 'Title', 'cmc', 'name') ); +INSERT INTO config.metabib_class ( name, label ) VALUES ( 'author', oils_i18n_gettext('author', 'Author', 'cmc', 'name') ); +INSERT INTO config.metabib_class ( name, label ) VALUES ( 'subject', oils_i18n_gettext('subject', 'Subject', 'cmc', 'name') ); +INSERT INTO config.metabib_class ( name, label ) VALUES ( 'series', oils_i18n_gettext('series', 'Series', 'cmc', 'name') ); + +INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath ) VALUES + (1, 'series', 'seriestitle', oils_i18n_gettext(1, 'Series Title', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:relatedItem[@type="series"]/mods32:titleInfo$$ ); +INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath ) VALUES + (2, 'title', 'abbreviated', oils_i18n_gettext(2, 'Abbreviated Title', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:titleInfo[mods32:title and (@type='abbreviated')]$$ ); +INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath ) VALUES + (3, 'title', 'translated', oils_i18n_gettext(3, 'Translated Title', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:titleInfo[mods32:title and (@type='translated')]$$ ); +INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath ) VALUES + (4, 'title', 'alternative', oils_i18n_gettext(4, 'Alternate Title', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:titleInfo[mods32:title and (@type='alternative')]$$ ); +INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath ) VALUES + (5, 'title', 'uniform', oils_i18n_gettext(5, 'Uniform Title', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:titleInfo[mods32:title and (@type='uniform')]$$ ); +INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath ) VALUES + (6, 'title', 'proper', oils_i18n_gettext(6, 'Title Proper', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:titleInfo[mods32:title and not (@type)]$$ ); +INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath ) VALUES + (7, 'author', 'corporate', oils_i18n_gettext(7, 'Corporate Author', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:name[@type='corporate']/mods32:namePart[../mods32:role/mods32:roleTerm[text()='creator']]$$ ); +INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath ) VALUES + (8, 'author', 'personal', oils_i18n_gettext(8, 'Personal Author', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:name[@type='personal']/mods32:namePart[../mods32:role/mods32:roleTerm[text()='creator']]$$ ); +INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath ) VALUES + (9, 'author', 'conference', oils_i18n_gettext(9, 'Conference Author', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:name[@type='conference']/mods32:namePart[../mods32:role/mods32:roleTerm[text()='creator']]$$ ); +INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath ) VALUES + (10, 'author', 'other', oils_i18n_gettext(10, 'Other Author', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:name[@type='personal']/mods32:namePart[not(../mods32:role)]$$ ); +INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath ) VALUES + (11, 'subject', 'geographic', oils_i18n_gettext(11, 'Geographic Subject', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:subject/mods32:geographic$$ ); +INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath ) VALUES + (12, 'subject', 'name', oils_i18n_gettext(12, 'Name Subject', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:subject/mods32:name$$ ); +INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath ) VALUES + (13, 'subject', 'temporal', oils_i18n_gettext(13, 'Temporal Subject', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:subject/mods32:temporal$$ ); +INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath ) VALUES + (14, 'subject', 'topic', oils_i18n_gettext(14, 'Topic Subject', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:subject/mods32:topic$$ ); +--INSERT INTO config.metabib_field ( id, field_class, name, format, xpath ) VALUES +-- ( id, field_class, name, xpath ) VALUES ( 'subject', 'genre', 'mods32', $$//mods32:mods/mods32:genre$$ ); +INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath ) VALUES + (15, 'keyword', 'keyword', oils_i18n_gettext(15, 'General Keywords', 'cmf', 'label'), 'mods32', $$//mods32:mods/*[not(local-name()='originInfo')]$$ ); -- /* to fool vim */; +INSERT INTO config.metabib_field (field_class, name, label, format, xpath ) VALUES + (16, 'subject', 'complete', oils_i18n_gettext(16, 'All Subjects', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:subject//text()$$ ); +SELECT SETVAL('config.metabib_field_id_seq'::TEXT, (SELECT MAX(id) FROM config.metabib_field), TRUE); INSERT INTO config.non_cataloged_type ( id, owning_lib, name ) VALUES ( 1, 1, oils_i18n_gettext(1, 'Paperback Book', 'cnct', 'name') ); SELECT SETVAL('config.non_cataloged_type_id_seq'::TEXT, 100); diff --git a/Open-ILS/src/sql/Pg/upgrade/0213.schema.config.metabib_field_class_stuff.sql b/Open-ILS/src/sql/Pg/upgrade/0213.schema.config.metabib_field_class_stuff.sql new file mode 100644 index 0000000000..637c8ce932 --- /dev/null +++ b/Open-ILS/src/sql/Pg/upgrade/0213.schema.config.metabib_field_class_stuff.sql @@ -0,0 +1,121 @@ + +BEGIN; + +INSERT INTO config.upgrade_log (version) VALUES ('0213'); + +CREATE TABLE config.metabib_class ( + name TEXT PRIMARY KEY, + label TEXT NOT NULL UNIQUE +); + +INSERT INTO config.metabib_class ( name, label ) VALUES ( 'keyword', oils_i18n_gettext('keyword', 'Keyword', 'cmc', 'label') ); +INSERT INTO config.metabib_class ( name, label ) VALUES ( 'title', oils_i18n_gettext('title', 'Title', 'cmc', 'label') ); +INSERT INTO config.metabib_class ( name, label ) VALUES ( 'author', oils_i18n_gettext('author', 'Author', 'cmc', 'label') ); +INSERT INTO config.metabib_class ( name, label ) VALUES ( 'subject', oils_i18n_gettext('subject', 'Subject', 'cmc', 'label') ); +INSERT INTO config.metabib_class ( name, label ) VALUES ( 'series', oils_i18n_gettext('series', 'Series', 'cmc', 'label') ); + +ALTER TABLE config.metabib_field ADD COLUMN label TEXT; +UPDATE config.metabib_field SET label = name; +ALTER TABLE config.metabib_field ALTER COLUMN label SET NOT NULL; + +ALTER TABLE config.metabib_field ADD CONSTRAINT field_class_fkey FOREIGN KEY (field_class) REFERENCES config.metabib_class (name); + +CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( rid BIGINT, default_joiner TEXT ) RETURNS SETOF metabib.field_entry_template AS $func$ +DECLARE + bib biblio.record_entry%ROWTYPE; + idx config.metabib_field%ROWTYPE; + xfrm config.xml_transform%ROWTYPE; + prev_xfrm TEXT; + transformed_xml TEXT; + xml_node TEXT; + xml_node_list TEXT[]; + raw_text TEXT; + curr_text TEXT; + joiner TEXT := default_joiner; -- XXX will index defs supply a joiner? + output_row metabib.field_entry_template%ROWTYPE; +BEGIN + + -- Get the record + SELECT INTO bib * FROM biblio.record_entry WHERE id = rid; + + -- Loop over the indexing entries + FOR idx IN SELECT * FROM config.metabib_field ORDER BY format LOOP + + SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format; + + -- See if we can skip the XSLT ... it's expensive + IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN + -- Can't skip the transform + IF xfrm.xslt <> '---' THEN + transformed_xml := oils_xslt_process(bib.marc,xfrm.xslt); + ELSE + transformed_xml := bib.marc; + END IF; + + prev_xfrm := xfrm.name; + END IF; + + xml_node_list := oils_xpath( idx.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] ); + + raw_text := NULL; + FOR xml_node IN SELECT x FROM explode_array(xml_node_list) AS x LOOP + CONTINUE WHEN xml_node !~ E'^\\s*<'; + + curr_text := ARRAY_TO_STRING( + oils_xpath( '//text()', + REGEXP_REPLACE( -- This escapes all &s not followed by "amp;". Data ise returned from oils_xpath (above) in UTF-8, not entity encoded + REGEXP_REPLACE( -- This escapes embeded <s + xml_node, + $re$(>[^<]+)(<)([^>]+<)$re$, + E'\\1<\\3', + 'g' + ), + '&(?!amp;)', + '&', + 'g' + ) + ), + ' ' + ); + + CONTINUE WHEN curr_text IS NULL OR curr_text = ''; + + IF raw_text IS NOT NULL THEN + raw_text := raw_text || joiner; + END IF; + + raw_text := COALESCE(raw_text,'') || curr_text; + + -- insert raw node text for faceting + IF idx.facet_field THEN + + output_row.field_class = idx.field_class; + output_row.field = idx.id; + output_row.source = rid; + output_row.value = BTRIM(REGEXP_REPLACE(curr_text, E'\\s+', ' ', 'g')); + + RETURN NEXT output_row; + END IF; + + END LOOP; + + CONTINUE WHEN raw_text IS NULL OR raw_text = ''; + + -- insert combined node text for searching + IF idx.search_field THEN + output_row.field_class = idx.field_class; + output_row.field = idx.id; + output_row.source = rid; + output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g')); + + RETURN NEXT output_row; + END IF; + + END LOOP; + +END; +$func$ LANGUAGE PLPGSQL; + + +COMMIT; + -- 2.11.0