From: Jane Sandberg Date: Thu, 27 Oct 2022 22:03:23 +0000 (-0700) Subject: LP#1864507 follow-up, stamp upgrade script, and release notes X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=b18c38358b5164ad8d5e6797e811c86a0fc6376c;p=working%2FEvergreen.git LP#1864507 follow-up, stamp upgrade script, and release notes Signed-off-by: Jane Sandberg --- diff --git a/Open-ILS/src/sql/Pg/002.schema.config.sql b/Open-ILS/src/sql/Pg/002.schema.config.sql index 68a29567a3..b3ebef822d 100644 --- a/Open-ILS/src/sql/Pg/002.schema.config.sql +++ b/Open-ILS/src/sql/Pg/002.schema.config.sql @@ -92,7 +92,7 @@ CREATE TRIGGER no_overlapping_deps BEFORE INSERT OR UPDATE ON config.db_patch_dependencies FOR EACH ROW EXECUTE PROCEDURE evergreen.array_overlap_check ('deprecates'); -INSERT INTO config.upgrade_log (version, applied_to) VALUES ('1343', :eg_version); -- khuckins/christine-burns/mmorgan +INSERT INTO config.upgrade_log (version, applied_to) VALUES ('1343', :eg_version); -- mrylander/jweston/sandbergja CREATE TABLE config.bib_source ( id SERIAL PRIMARY KEY, diff --git a/Open-ILS/src/sql/Pg/t/lp1308090-facet_punct.pg b/Open-ILS/src/sql/Pg/t/lp1308090-facet_punct.pg index d68eaf2d3d..f9030032a0 100644 --- a/Open-ILS/src/sql/Pg/t/lp1308090-facet_punct.pg +++ b/Open-ILS/src/sql/Pg/t/lp1308090-facet_punct.pg @@ -1,6 +1,6 @@ BEGIN; -SELECT plan(12); +SELECT plan(18); SELECT can('metabib', ARRAY['trim_trailing_punctuation'], 'metabib.trim_trailing_punctuation function exists'); diff --git a/Open-ILS/src/sql/Pg/upgrade/1344.function.trim_trailing_punct-improvement.sql b/Open-ILS/src/sql/Pg/upgrade/1344.function.trim_trailing_punct-improvement.sql new file mode 100644 index 0000000000..5f74889ad2 --- /dev/null +++ b/Open-ILS/src/sql/Pg/upgrade/1344.function.trim_trailing_punct-improvement.sql @@ -0,0 +1,76 @@ +BEGIN; + +SELECT evergreen.upgrade_deps_block_check('1344', :eg_version); + +-- This function is used to help clean up facet labels. Due to quirks in +-- MARC parsing, some facet labels may be generated with periods or commas +-- at the end. This will strip a trailing commas off all the time, and +-- periods when they don't look like they are part of initials or dotted +-- abbreviations. +-- Smith, John => no change +-- Smith, John, => Smith, John +-- Smith, John. => Smith, John +-- Public, John Q. => no change +-- Public, John, Ph.D. => no change +-- Atlanta -- Georgia -- U.S. => no change +-- Atlanta -- Georgia. => Atlanta, Georgia +-- The fellowship of the rings / => The fellowship of the rings +-- Some title ; => Some title +CREATE OR REPLACE FUNCTION metabib.trim_trailing_punctuation ( TEXT ) RETURNS TEXT AS $$ +DECLARE + result TEXT; + last_char TEXT; +BEGIN + result := $1; + last_char = substring(result from '.$'); + + IF last_char = ',' THEN + result := substring(result from '^(.*),$'); + + ELSIF last_char = '.' THEN + -- must have a single word-character following at least one non-word character + IF substring(result from '\W\w\.$') IS NULL THEN + result := substring(result from '^(.*)\.$'); + END IF; + + ELSIF last_char IN ('/',':',';','=') THEN -- Dangling subtitle/SoR separator + IF substring(result from ' .$') IS NOT NULL THEN -- must have a space before last_char + result := substring(result from '^(.*) .$'); + END IF; + END IF; + + RETURN result; + +END; +$$ language 'plpgsql'; + + +INSERT INTO config.index_normalizer (name, description, func, param_count) VALUES ( + 'Trim Trailing Punctuation', + 'Eliminate extraneous trailing ISBD punctuation in text: slashes, colons, commas, and periods', + 'metabib.trim_trailing_punctuation', + 0 +); + +INSERT INTO config.metabib_field_index_norm_map (field,norm,pos) + SELECT m.id, + i.id, + -1 + FROM config.metabib_field m, + config.index_normalizer i + WHERE i.func = 'metabib.trim_trailing_punctuation' + m.field_class='title' AND (m.browse_field OR m.facet_field OR m.display_field) + AND NOT EXISTS (SELECT 1 FROM config.metabib_field_index_norm_map WHERE field = m.id AND norm = i.id); + +COMMIT; + +\qecho A partial reingest is necessary to get the full benefit of this change. +\qecho It will take a while. You can cancel now withoug losing the effect of +\qecho the rest of the upgrade script, and arrange the reingest later. +\qecho + +SELECT metabib.reingest_metabib_field_entries( + id, TRUE, FALSE, FALSE, TRUE, + (SELECT ARRAY_AGG(id) INTO field_list FROM config.metabib_field WHERE field_class='title' AND (browse_field OR facet_field OR display_field)) +) FROM biblio.record_entry; + diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.function.trim_trailing_punct-improvement.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.function.trim_trailing_punct-improvement.sql deleted file mode 100644 index e469f2b2af..0000000000 --- a/Open-ILS/src/sql/Pg/upgrade/XXXX.function.trim_trailing_punct-improvement.sql +++ /dev/null @@ -1,76 +0,0 @@ -BEGIN; - -SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version); - --- This function is used to help clean up facet labels. Due to quirks in --- MARC parsing, some facet labels may be generated with periods or commas --- at the end. This will strip a trailing commas off all the time, and --- periods when they don't look like they are part of initials or dotted --- abbreviations. --- Smith, John => no change --- Smith, John, => Smith, John --- Smith, John. => Smith, John --- Public, John Q. => no change --- Public, John, Ph.D. => no change --- Atlanta -- Georgia -- U.S. => no change --- Atlanta -- Georgia. => Atlanta, Georgia --- The fellowship of the rings / => The fellowship of the rings --- Some title ; => Some title -CREATE OR REPLACE FUNCTION metabib.trim_trailing_punctuation ( TEXT ) RETURNS TEXT AS $$ -DECLARE - result TEXT; - last_char TEXT; -BEGIN - result := $1; - last_char = substring(result from '.$'); - - IF last_char = ',' THEN - result := substring(result from '^(.*),$'); - - ELSIF last_char = '.' THEN - -- must have a single word-character following at least one non-word character - IF substring(result from '\W\w\.$') IS NULL THEN - result := substring(result from '^(.*)\.$'); - END IF; - - ELSIF last_char IN ('/',':',';','=') THEN -- Dangling subtitle/SoR separator - IF substring(result from ' .$') IS NOT NULL THEN -- must have a space before last_char - result := substring(result from '^(.*) .$'); - END IF; - END IF; - - RETURN result; - -END; -$$ language 'plpgsql'; - - -INSERT INTO config.index_normalizer (name, description, func, param_count) VALUES ( - 'Trim Trailing Punctuation', - 'Eliminate extraneous trailing ISBD punctuation in text: slashes, colons, commas, and periods', - 'metabib.trim_trailing_punctuation', - 0 -); - -INSERT INTO config.metabib_field_index_norm_map (field,norm,pos) - SELECT m.id, - i.id, - -1 - FROM config.metabib_field m, - config.index_normalizer i - WHERE i.func = 'metabib.trim_trailing_punctuation' - m.field_class='title' AND (m.browse_field OR m.facet_field OR m.display_field) - AND NOT EXISTS (SELECT 1 FROM config.metabib_field_index_norm_map WHERE field = m.id AND norm = i.id); - -COMMIT; - -\qecho A partial reingest is necessary to get the full benefit of this change. -\qecho It will take a while. You can cancel now withoug losing the effect of -\qecho the rest of the upgrade script, and arrange the reingest later. -\qecho - -SELECT metabib.reingest_metabib_field_entries( - id, TRUE, FALSE, FALSE, TRUE, - (SELECT ARRAY_AGG(id) INTO field_list FROM config.metabib_field WHERE field_class='title' AND (browse_field OR facet_field OR display_field)) -) FROM biblio.record_entry; - diff --git a/docs/RELEASE_NOTES_NEXT/OPAC/marcxml_fields_punctuation.adoc b/docs/RELEASE_NOTES_NEXT/OPAC/marcxml_fields_punctuation.adoc new file mode 100644 index 0000000000..56005d8790 --- /dev/null +++ b/docs/RELEASE_NOTES_NEXT/OPAC/marcxml_fields_punctuation.adoc @@ -0,0 +1,10 @@ +== Additional trailing punctuation removed from certain fields == + +MarcXML facet, display, and browse fields will undergo some extra +cleanup before displaying to a user. Of particular note for any +title fields that match these criteria, ending `/`, `:`, `;`, and +`=` will be removed. + +This change does not affect MODS fields. You can check if a +particular field uses MarcXML or MODS in Server Administration +-> MARC Search/Facet Fields by consulting the Format column. \ No newline at end of file