From: Mike Rylander Date: Thu, 16 Feb 2017 15:05:55 +0000 (-0500) Subject: Stamping upgrade script for separating fingerprint components X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=c4d1595fba44d24825f51a5097b7ee7b07523780;p=working%2FEvergreen.git Stamping upgrade script for separating fingerprint components Signed-off-by: Mike Rylander --- diff --git a/Open-ILS/src/sql/Pg/002.schema.config.sql b/Open-ILS/src/sql/Pg/002.schema.config.sql index bc6502e3eb..b2cb8cb0a5 100644 --- a/Open-ILS/src/sql/Pg/002.schema.config.sql +++ b/Open-ILS/src/sql/Pg/002.schema.config.sql @@ -91,7 +91,7 @@ CREATE TRIGGER no_overlapping_deps BEFORE INSERT OR UPDATE ON config.db_patch_dependencies FOR EACH ROW EXECUTE PROCEDURE evergreen.array_overlap_check ('deprecates'); -INSERT INTO config.upgrade_log (version, applied_to) VALUES ('1016', :eg_version); -- kmlussier/miker +INSERT INTO config.upgrade_log (version, applied_to) VALUES ('1017', :eg_version); -- gmcharlt/miker CREATE TABLE config.bib_source ( id SERIAL PRIMARY KEY, diff --git a/Open-ILS/src/sql/Pg/upgrade/1017.schema.update_fingerprinting.sql b/Open-ILS/src/sql/Pg/upgrade/1017.schema.update_fingerprinting.sql new file mode 100644 index 0000000000..ad4fc4738c --- /dev/null +++ b/Open-ILS/src/sql/Pg/upgrade/1017.schema.update_fingerprinting.sql @@ -0,0 +1,86 @@ +BEGIN; + +SELECT evergreen.upgrade_deps_block_check('1017', :eg_version); + +CREATE OR REPLACE FUNCTION biblio.extract_fingerprint ( marc text ) RETURNS TEXT AS $func$ +DECLARE + idx config.biblio_fingerprint%ROWTYPE; + xfrm config.xml_transform%ROWTYPE; + prev_xfrm TEXT; + transformed_xml TEXT; + xml_node TEXT; + xml_node_list TEXT[]; + raw_text TEXT; + output_text TEXT := ''; +BEGIN + + IF marc IS NULL OR marc = '' THEN + RETURN NULL; + END IF; + + -- Loop over the indexing entries + FOR idx IN SELECT * FROM config.biblio_fingerprint ORDER BY format, id LOOP + + SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format; + + -- See if we can skip the XSLT ... it's expensive + IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN + -- Can't skip the transform + IF xfrm.xslt <> '---' THEN + transformed_xml := oils_xslt_process(marc,xfrm.xslt); + ELSE + transformed_xml := marc; + END IF; + + prev_xfrm := xfrm.name; + END IF; + + raw_text := COALESCE( + naco_normalize( + ARRAY_TO_STRING( + oils_xpath( + '//text()', + (oils_xpath( + idx.xpath, + transformed_xml, + ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] + ))[1] + ), + '' + ) + ), + '' + ); + + raw_text := REGEXP_REPLACE(raw_text, E'\\[.+?\\]', E''); + raw_text := REGEXP_REPLACE(raw_text, E'\\mthe\\M|\\man?d?d\\M', E'', 'g'); -- arg! the pain! + + IF idx.first_word IS TRUE THEN + raw_text := REGEXP_REPLACE(raw_text, E'^(\\w+).*?$', E'\\1'); + END IF; + + output_text := output_text || idx.name || ':' || + REGEXP_REPLACE(raw_text, E'\\s+', '', 'g') || ' '; + + END LOOP; + + RETURN BTRIM(output_text); + +END; +$func$ LANGUAGE PLPGSQL; + +COMMIT; + +\qecho Recalculating bib fingerprints +ALTER TABLE biblio.record_entry DISABLE TRIGGER USER; +UPDATE biblio.record_entry SET fingerprint = biblio.extract_fingerprint(marc) WHERE NOT deleted; +ALTER TABLE biblio.record_entry ENABLE TRIGGER USER; + +SELECT metabib.remap_metarecord_for_bib(id, fingerprint) +FROM biblio.record_entry +WHERE NOT deleted; + +\qecho Remapping metarecords +SELECT metabib.remap_metarecord_for_bib(id, fingerprint) +FROM biblio.record_entry +WHERE NOT deleted; diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.update_fingerprinting.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.update_fingerprinting.sql deleted file mode 100644 index 2eb5ac889e..0000000000 --- a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.update_fingerprinting.sql +++ /dev/null @@ -1,86 +0,0 @@ -BEGIN; - ---- SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version); - -CREATE OR REPLACE FUNCTION biblio.extract_fingerprint ( marc text ) RETURNS TEXT AS $func$ -DECLARE - idx config.biblio_fingerprint%ROWTYPE; - xfrm config.xml_transform%ROWTYPE; - prev_xfrm TEXT; - transformed_xml TEXT; - xml_node TEXT; - xml_node_list TEXT[]; - raw_text TEXT; - output_text TEXT := ''; -BEGIN - - IF marc IS NULL OR marc = '' THEN - RETURN NULL; - END IF; - - -- Loop over the indexing entries - FOR idx IN SELECT * FROM config.biblio_fingerprint ORDER BY format, id LOOP - - SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format; - - -- See if we can skip the XSLT ... it's expensive - IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN - -- Can't skip the transform - IF xfrm.xslt <> '---' THEN - transformed_xml := oils_xslt_process(marc,xfrm.xslt); - ELSE - transformed_xml := marc; - END IF; - - prev_xfrm := xfrm.name; - END IF; - - raw_text := COALESCE( - naco_normalize( - ARRAY_TO_STRING( - oils_xpath( - '//text()', - (oils_xpath( - idx.xpath, - transformed_xml, - ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] - ))[1] - ), - '' - ) - ), - '' - ); - - raw_text := REGEXP_REPLACE(raw_text, E'\\[.+?\\]', E''); - raw_text := REGEXP_REPLACE(raw_text, E'\\mthe\\M|\\man?d?d\\M', E'', 'g'); -- arg! the pain! - - IF idx.first_word IS TRUE THEN - raw_text := REGEXP_REPLACE(raw_text, E'^(\\w+).*?$', E'\\1'); - END IF; - - output_text := output_text || idx.name || ':' || - REGEXP_REPLACE(raw_text, E'\\s+', '', 'g') || ' '; - - END LOOP; - - RETURN BTRIM(output_text); - -END; -$func$ LANGUAGE PLPGSQL; - -COMMIT; - -\qecho Recalculating bib fingerprints -ALTER TABLE biblio.record_entry DISABLE TRIGGER USER; -UPDATE biblio.record_entry SET fingerprint = biblio.extract_fingerprint(marc) WHERE NOT deleted; -ALTER TABLE biblio.record_entry ENABLE TRIGGER USER; - -SELECT metabib.remap_metarecord_for_bib(id, fingerprint) -FROM biblio.record_entry -WHERE NOT deleted; - -\qecho Remapping metarecords -SELECT metabib.remap_metarecord_for_bib(id, fingerprint) -FROM biblio.record_entry -WHERE NOT deleted;