From 9287045f2d7b7d8729b15a179443af1783834258 Mon Sep 17 00:00:00 2001 From: Galen Charlton Date: Fri, 5 Nov 2021 16:05:30 -0400 Subject: [PATCH] update upgrade script for 3.7.1-3.8.0 Signed-off-by: Galen Charlton --- ...a-upgrade-db.sql => 3.7.1-3.8.0-upgrade-db.sql} | 358 ++++++++++++++++++++- 1 file changed, 355 insertions(+), 3 deletions(-) rename Open-ILS/src/sql/Pg/version-upgrade/{3.7.1-3.8-beta-upgrade-db.sql => 3.7.1-3.8.0-upgrade-db.sql} (90%) diff --git a/Open-ILS/src/sql/Pg/version-upgrade/3.7.1-3.8-beta-upgrade-db.sql b/Open-ILS/src/sql/Pg/version-upgrade/3.7.1-3.8.0-upgrade-db.sql similarity index 90% rename from Open-ILS/src/sql/Pg/version-upgrade/3.7.1-3.8-beta-upgrade-db.sql rename to Open-ILS/src/sql/Pg/version-upgrade/3.7.1-3.8.0-upgrade-db.sql index 28d4446cb0..2f3645fc2c 100644 --- a/Open-ILS/src/sql/Pg/version-upgrade/3.7.1-3.8-beta-upgrade-db.sql +++ b/Open-ILS/src/sql/Pg/version-upgrade/3.7.1-3.8.0-upgrade-db.sql @@ -1,7 +1,7 @@ ---Upgrade Script for 3.7.1 to 3.8-beta -\set eg_version '''3.8-beta''' +--Upgrade Script for 3.7.1 to 3.8.0 +\set eg_version '''3.8.0''' BEGIN; -INSERT INTO config.upgrade_log (version, applied_to) VALUES ('3.8-beta', :eg_version); +INSERT INTO config.upgrade_log (version, applied_to) VALUES ('3.8.0', :eg_version); SELECT evergreen.upgrade_deps_block_check('1260', :eg_version); @@ -4280,8 +4280,360 @@ BEGIN END; $$ LANGUAGE plpgsql; +SELECT evergreen.upgrade_deps_block_check('1306', :eg_version); + +-- We don't pass this function arrays with nulls, so we save 5% not testing for that +CREATE OR REPLACE FUNCTION evergreen.text_array_merge_unique ( + TEXT[], TEXT[] +) RETURNS TEXT[] AS $F$ + SELECT NULLIF(ARRAY( + SELECT * FROM UNNEST($1) x + UNION + SELECT * FROM UNNEST($2) y + ),'{}'); +$F$ LANGUAGE SQL; + +CREATE OR REPLACE FUNCTION search.symspell_build_raw_entry ( + raw_input TEXT, + source_class TEXT, + no_limit BOOL DEFAULT FALSE, + prefix_length INT DEFAULT 6, + maxED INT DEFAULT 3 +) RETURNS SETOF search.symspell_dictionary AS $F$ +DECLARE + key TEXT; + del_key TEXT; + key_list TEXT[]; + entry search.symspell_dictionary%ROWTYPE; +BEGIN + key := raw_input; + + IF NOT no_limit AND CHARACTER_LENGTH(raw_input) > prefix_length THEN + key := SUBSTRING(key FROM 1 FOR prefix_length); + key_list := ARRAY[raw_input, key]; + ELSE + key_list := ARRAY[key]; + END IF; + + FOREACH del_key IN ARRAY key_list LOOP + -- skip empty keys + CONTINUE WHEN del_key IS NULL OR CHARACTER_LENGTH(del_key) = 0; + + entry.prefix_key := del_key; + + entry.keyword_count := 0; + entry.title_count := 0; + entry.author_count := 0; + entry.subject_count := 0; + entry.series_count := 0; + entry.identifier_count := 0; + + entry.keyword_suggestions := '{}'; + entry.title_suggestions := '{}'; + entry.author_suggestions := '{}'; + entry.subject_suggestions := '{}'; + entry.series_suggestions := '{}'; + entry.identifier_suggestions := '{}'; + + IF source_class = 'keyword' THEN entry.keyword_suggestions := ARRAY[raw_input]; END IF; + IF source_class = 'title' THEN entry.title_suggestions := ARRAY[raw_input]; END IF; + IF source_class = 'author' THEN entry.author_suggestions := ARRAY[raw_input]; END IF; + IF source_class = 'subject' THEN entry.subject_suggestions := ARRAY[raw_input]; END IF; + IF source_class = 'series' THEN entry.series_suggestions := ARRAY[raw_input]; END IF; + IF source_class = 'identifier' THEN entry.identifier_suggestions := ARRAY[raw_input]; END IF; + IF source_class = 'keyword' THEN entry.keyword_suggestions := ARRAY[raw_input]; END IF; + + IF del_key = raw_input THEN + IF source_class = 'keyword' THEN entry.keyword_count := 1; END IF; + IF source_class = 'title' THEN entry.title_count := 1; END IF; + IF source_class = 'author' THEN entry.author_count := 1; END IF; + IF source_class = 'subject' THEN entry.subject_count := 1; END IF; + IF source_class = 'series' THEN entry.series_count := 1; END IF; + IF source_class = 'identifier' THEN entry.identifier_count := 1; END IF; + END IF; + + RETURN NEXT entry; + END LOOP; + + FOR del_key IN SELECT x FROM UNNEST(search.symspell_generate_edits(key, 1, maxED)) x LOOP + + -- skip empty keys + CONTINUE WHEN del_key IS NULL OR CHARACTER_LENGTH(del_key) = 0; + -- skip suggestions that are already too long for the prefix key + CONTINUE WHEN CHARACTER_LENGTH(del_key) <= (prefix_length - maxED) AND CHARACTER_LENGTH(raw_input) > prefix_length; + + entry.keyword_suggestions := '{}'; + entry.title_suggestions := '{}'; + entry.author_suggestions := '{}'; + entry.subject_suggestions := '{}'; + entry.series_suggestions := '{}'; + entry.identifier_suggestions := '{}'; + + IF source_class = 'keyword' THEN entry.keyword_count := 0; END IF; + IF source_class = 'title' THEN entry.title_count := 0; END IF; + IF source_class = 'author' THEN entry.author_count := 0; END IF; + IF source_class = 'subject' THEN entry.subject_count := 0; END IF; + IF source_class = 'series' THEN entry.series_count := 0; END IF; + IF source_class = 'identifier' THEN entry.identifier_count := 0; END IF; + + entry.prefix_key := del_key; + + IF source_class = 'keyword' THEN entry.keyword_suggestions := ARRAY[raw_input]; END IF; + IF source_class = 'title' THEN entry.title_suggestions := ARRAY[raw_input]; END IF; + IF source_class = 'author' THEN entry.author_suggestions := ARRAY[raw_input]; END IF; + IF source_class = 'subject' THEN entry.subject_suggestions := ARRAY[raw_input]; END IF; + IF source_class = 'series' THEN entry.series_suggestions := ARRAY[raw_input]; END IF; + IF source_class = 'identifier' THEN entry.identifier_suggestions := ARRAY[raw_input]; END IF; + IF source_class = 'keyword' THEN entry.keyword_suggestions := ARRAY[raw_input]; END IF; + + RETURN NEXT entry; + END LOOP; + +END; +$F$ LANGUAGE PLPGSQL STRICT IMMUTABLE; + +CREATE OR REPLACE FUNCTION search.symspell_build_entries ( + full_input TEXT, + source_class TEXT, + old_input TEXT DEFAULT NULL, + include_phrases BOOL DEFAULT FALSE +) RETURNS SETOF search.symspell_dictionary AS $F$ +DECLARE + prefix_length INT; + maxED INT; + word_list TEXT[]; + input TEXT; + word TEXT; + entry search.symspell_dictionary; +BEGIN + IF full_input IS NOT NULL THEN + SELECT value::INT INTO prefix_length FROM config.internal_flag WHERE name = 'symspell.prefix_length' AND enabled; + prefix_length := COALESCE(prefix_length, 6); + + SELECT value::INT INTO maxED FROM config.internal_flag WHERE name = 'symspell.max_edit_distance' AND enabled; + maxED := COALESCE(maxED, 3); + + input := evergreen.lowercase(full_input); + word_list := ARRAY_AGG(x) FROM search.symspell_parse_words_distinct(input) x; + IF word_list IS NULL THEN + RETURN; + END IF; + + IF CARDINALITY(word_list) > 1 AND include_phrases THEN + RETURN QUERY SELECT * FROM search.symspell_build_raw_entry(input, source_class, TRUE, prefix_length, maxED); + END IF; + + FOREACH word IN ARRAY word_list LOOP + -- Skip words that have runs of 5 or more digits (I'm looking at you, ISxNs) + CONTINUE WHEN CHARACTER_LENGTH(word) > 4 AND word ~ '\d{5,}'; + RETURN QUERY SELECT * FROM search.symspell_build_raw_entry(word, source_class, FALSE, prefix_length, maxED); + END LOOP; + END IF; + + IF old_input IS NOT NULL THEN + input := evergreen.lowercase(old_input); + + FOR word IN SELECT x FROM search.symspell_parse_words_distinct(input) x LOOP + -- similarly skip words that have 5 or more digits here to + -- avoid adding erroneous prefix deletion entries to the dictionary + CONTINUE WHEN CHARACTER_LENGTH(word) > 4 AND word ~ '\d{5,}'; + entry.prefix_key := word; + + entry.keyword_count := 0; + entry.title_count := 0; + entry.author_count := 0; + entry.subject_count := 0; + entry.series_count := 0; + entry.identifier_count := 0; + + entry.keyword_suggestions := '{}'; + entry.title_suggestions := '{}'; + entry.author_suggestions := '{}'; + entry.subject_suggestions := '{}'; + entry.series_suggestions := '{}'; + entry.identifier_suggestions := '{}'; + + IF source_class = 'keyword' THEN entry.keyword_count := -1; END IF; + IF source_class = 'title' THEN entry.title_count := -1; END IF; + IF source_class = 'author' THEN entry.author_count := -1; END IF; + IF source_class = 'subject' THEN entry.subject_count := -1; END IF; + IF source_class = 'series' THEN entry.series_count := -1; END IF; + IF source_class = 'identifier' THEN entry.identifier_count := -1; END IF; + + RETURN NEXT entry; + END LOOP; + END IF; +END; +$F$ LANGUAGE PLPGSQL; + +CREATE OR REPLACE FUNCTION search.symspell_build_and_merge_entries ( + full_input TEXT, + source_class TEXT, + old_input TEXT DEFAULT NULL, + include_phrases BOOL DEFAULT FALSE +) RETURNS SETOF search.symspell_dictionary AS $F$ +DECLARE + new_entry RECORD; + conflict_entry RECORD; +BEGIN + + IF full_input = old_input THEN -- neither NULL, and are the same + RETURN; + END IF; + + FOR new_entry IN EXECUTE $q$ + SELECT count, + prefix_key, + s AS suggestions + FROM (SELECT prefix_key, + ARRAY_AGG(DISTINCT $q$ || source_class || $q$_suggestions[1]) s, + SUM($q$ || source_class || $q$_count) count + FROM search.symspell_build_entries($1, $2, $3, $4) + GROUP BY 1) x + $q$ USING full_input, source_class, old_input, include_phrases + LOOP + EXECUTE $q$ + SELECT prefix_key, + $q$ || source_class || $q$_suggestions suggestions, + $q$ || source_class || $q$_count count + FROM search.symspell_dictionary + WHERE prefix_key = $1 $q$ + INTO conflict_entry + USING new_entry.prefix_key; + + IF new_entry.count <> 0 THEN -- Real word, and count changed + IF conflict_entry.prefix_key IS NOT NULL THEN -- we'll be updating + IF conflict_entry.count > 0 THEN -- it's a real word + RETURN QUERY EXECUTE $q$ + UPDATE search.symspell_dictionary + SET $q$ || source_class || $q$_count = $2 + WHERE prefix_key = $1 + RETURNING * $q$ + USING new_entry.prefix_key, GREATEST(0, new_entry.count + conflict_entry.count); + ELSE -- it was a prefix key or delete-emptied word before + IF conflict_entry.suggestions @> new_entry.suggestions THEN -- already have all suggestions here... + RETURN QUERY EXECUTE $q$ + UPDATE search.symspell_dictionary + SET $q$ || source_class || $q$_count = $2 + WHERE prefix_key = $1 + RETURNING * $q$ + USING new_entry.prefix_key, GREATEST(0, new_entry.count); + ELSE -- new suggestion! + RETURN QUERY EXECUTE $q$ + UPDATE search.symspell_dictionary + SET $q$ || source_class || $q$_count = $2, + $q$ || source_class || $q$_suggestions = $3 + WHERE prefix_key = $1 + RETURNING * $q$ + USING new_entry.prefix_key, GREATEST(0, new_entry.count), evergreen.text_array_merge_unique(conflict_entry.suggestions,new_entry.suggestions); + END IF; + END IF; + ELSE + -- We keep the on-conflict clause just in case... + RETURN QUERY EXECUTE $q$ + INSERT INTO search.symspell_dictionary AS d ( + $q$ || source_class || $q$_count, + prefix_key, + $q$ || source_class || $q$_suggestions + ) VALUES ( $1, $2, $3 ) ON CONFLICT (prefix_key) DO + UPDATE SET $q$ || source_class || $q$_count = d.$q$ || source_class || $q$_count + EXCLUDED.$q$ || source_class || $q$_count, + $q$ || source_class || $q$_suggestions = evergreen.text_array_merge_unique(d.$q$ || source_class || $q$_suggestions, EXCLUDED.$q$ || source_class || $q$_suggestions) + RETURNING * $q$ + USING new_entry.count, new_entry.prefix_key, new_entry.suggestions; + END IF; + ELSE -- key only, or no change + IF conflict_entry.prefix_key IS NOT NULL THEN -- we'll be updating + IF NOT conflict_entry.suggestions @> new_entry.suggestions THEN -- There are new suggestions + RETURN QUERY EXECUTE $q$ + UPDATE search.symspell_dictionary + SET $q$ || source_class || $q$_suggestions = $2 + WHERE prefix_key = $1 + RETURNING * $q$ + USING new_entry.prefix_key, evergreen.text_array_merge_unique(conflict_entry.suggestions,new_entry.suggestions); + END IF; + ELSE + RETURN QUERY EXECUTE $q$ + INSERT INTO search.symspell_dictionary AS d ( + $q$ || source_class || $q$_count, + prefix_key, + $q$ || source_class || $q$_suggestions + ) VALUES ( $1, $2, $3 ) ON CONFLICT (prefix_key) DO -- key exists, suggestions may be added due to this entry + UPDATE SET $q$ || source_class || $q$_suggestions = evergreen.text_array_merge_unique(d.$q$ || source_class || $q$_suggestions, EXCLUDED.$q$ || source_class || $q$_suggestions) + RETURNING * $q$ + USING new_entry.count, new_entry.prefix_key, new_entry.suggestions; + END IF; + END IF; + END LOOP; +END; +$F$ LANGUAGE PLPGSQL; + COMMIT; +\qecho '' +\qecho 'The following should be run at the end of the upgrade before any' +\qecho 'reingest occurs. Because new triggers are installed already,' +\qecho 'updates to indexed strings will cause zero-count dictionary entries' +\qecho 'to be recorded which will require updating every row again (or' +\qecho 'starting from scratch) so best to do this before other batch' +\qecho 'changes. A later reingest that does not significantly change' +\qecho 'indexed strings will /not/ cause table bloat here, and will be' +\qecho 'as fast as normal. A copy of the SQL in a ready-to-use, non-escaped' +\qecho 'form is available inside a comment at the end of this upgrade sub-' +\qecho 'script so you do not need to copy this comment from the psql ouptut.' +\qecho '' +\qecho '\\a' +\qecho '\\t' +\qecho '' +\qecho '\\o title' +\qecho 'select value from metabib.title_field_entry where source in (select id from biblio.record_entry where not deleted);' +\qecho '\\o author' +\qecho 'select value from metabib.author_field_entry where source in (select id from biblio.record_entry where not deleted);' +\qecho '\\o subject' +\qecho 'select value from metabib.subject_field_entry where source in (select id from biblio.record_entry where not deleted);' +\qecho '\\o series' +\qecho 'select value from metabib.series_field_entry where source in (select id from biblio.record_entry where not deleted);' +\qecho '\\o identifier' +\qecho 'select value from metabib.identifier_field_entry where source in (select id from biblio.record_entry where not deleted);' +\qecho '\\o keyword' +\qecho 'select value from metabib.keyword_field_entry where source in (select id from biblio.record_entry where not deleted);' +\qecho '' +\qecho '\\o' +\qecho '\\a' +\qecho '\\t' +\qecho '' +\qecho '// Then, at the command line:' +\qecho '' +\qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl title > title.sql' +\qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl author > author.sql' +\qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl subject > subject.sql' +\qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl series > series.sql' +\qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl identifier > identifier.sql' +\qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl keyword > keyword.sql' +\qecho '' +\qecho '// And, back in psql' +\qecho '' +\qecho 'ALTER TABLE search.symspell_dictionary SET UNLOGGED;' +\qecho 'TRUNCATE search.symspell_dictionary;' +\qecho '' +\qecho '\\i identifier.sql' +\qecho '\\i author.sql' +\qecho '\\i title.sql' +\qecho '\\i subject.sql' +\qecho '\\i series.sql' +\qecho '\\i keyword.sql' +\qecho '' +\qecho 'CLUSTER search.symspell_dictionary USING symspell_dictionary_pkey;' +\qecho 'REINDEX TABLE search.symspell_dictionary;' +\qecho 'ALTER TABLE search.symspell_dictionary SET LOGGED;' +\qecho 'VACUUM ANALYZE search.symspell_dictionary;' +\qecho '' +\qecho 'DROP TABLE search.symspell_dictionary_partial_title;' +\qecho 'DROP TABLE search.symspell_dictionary_partial_author;' +\qecho 'DROP TABLE search.symspell_dictionary_partial_subject;' +\qecho 'DROP TABLE search.symspell_dictionary_partial_series;' +\qecho 'DROP TABLE search.symspell_dictionary_partial_identifier;' +\qecho 'DROP TABLE search.symspell_dictionary_partial_keyword;' + -- Update auditor tables to catch changes to source tables. -- Can be removed/skipped if there were no schema changes. SELECT auditor.update_auditors(); -- 2.11.0