BEFORE INSERT OR UPDATE ON config.db_patch_dependencies
FOR EACH ROW EXECUTE PROCEDURE evergreen.array_overlap_check ('deprecates');
-INSERT INTO config.upgrade_log (version, applied_to) VALUES ('1305', :eg_version); -- phasefx/csharp/gmcharlt
+INSERT INTO config.upgrade_log (version, applied_to) VALUES ('1306', :eg_version); -- miker/gmcharlt
CREATE TABLE config.bib_source (
id SERIAL PRIMARY KEY,
--- /dev/null
+BEGIN;
+
+SELECT evergreen.upgrade_deps_block_check('1306', :eg_version);
+
+-- We don't pass this function arrays with nulls, so we save 5% not testing for that
+CREATE OR REPLACE FUNCTION evergreen.text_array_merge_unique (
+ TEXT[], TEXT[]
+) RETURNS TEXT[] AS $F$
+ SELECT NULLIF(ARRAY(
+ SELECT * FROM UNNEST($1) x
+ UNION
+ SELECT * FROM UNNEST($2) y
+ ),'{}');
+$F$ LANGUAGE SQL;
+
+CREATE OR REPLACE FUNCTION search.symspell_build_raw_entry (
+ raw_input TEXT,
+ source_class TEXT,
+ no_limit BOOL DEFAULT FALSE,
+ prefix_length INT DEFAULT 6,
+ maxED INT DEFAULT 3
+) RETURNS SETOF search.symspell_dictionary AS $F$
+DECLARE
+ key TEXT;
+ del_key TEXT;
+ key_list TEXT[];
+ entry search.symspell_dictionary%ROWTYPE;
+BEGIN
+ key := raw_input;
+
+ IF NOT no_limit AND CHARACTER_LENGTH(raw_input) > prefix_length THEN
+ key := SUBSTRING(key FROM 1 FOR prefix_length);
+ key_list := ARRAY[raw_input, key];
+ ELSE
+ key_list := ARRAY[key];
+ END IF;
+
+ FOREACH del_key IN ARRAY key_list LOOP
+ -- skip empty keys
+ CONTINUE WHEN del_key IS NULL OR CHARACTER_LENGTH(del_key) = 0;
+
+ entry.prefix_key := del_key;
+
+ entry.keyword_count := 0;
+ entry.title_count := 0;
+ entry.author_count := 0;
+ entry.subject_count := 0;
+ entry.series_count := 0;
+ entry.identifier_count := 0;
+
+ entry.keyword_suggestions := '{}';
+ entry.title_suggestions := '{}';
+ entry.author_suggestions := '{}';
+ entry.subject_suggestions := '{}';
+ entry.series_suggestions := '{}';
+ entry.identifier_suggestions := '{}';
+
+ IF source_class = 'keyword' THEN entry.keyword_suggestions := ARRAY[raw_input]; END IF;
+ IF source_class = 'title' THEN entry.title_suggestions := ARRAY[raw_input]; END IF;
+ IF source_class = 'author' THEN entry.author_suggestions := ARRAY[raw_input]; END IF;
+ IF source_class = 'subject' THEN entry.subject_suggestions := ARRAY[raw_input]; END IF;
+ IF source_class = 'series' THEN entry.series_suggestions := ARRAY[raw_input]; END IF;
+ IF source_class = 'identifier' THEN entry.identifier_suggestions := ARRAY[raw_input]; END IF;
+ IF source_class = 'keyword' THEN entry.keyword_suggestions := ARRAY[raw_input]; END IF;
+
+ IF del_key = raw_input THEN
+ IF source_class = 'keyword' THEN entry.keyword_count := 1; END IF;
+ IF source_class = 'title' THEN entry.title_count := 1; END IF;
+ IF source_class = 'author' THEN entry.author_count := 1; END IF;
+ IF source_class = 'subject' THEN entry.subject_count := 1; END IF;
+ IF source_class = 'series' THEN entry.series_count := 1; END IF;
+ IF source_class = 'identifier' THEN entry.identifier_count := 1; END IF;
+ END IF;
+
+ RETURN NEXT entry;
+ END LOOP;
+
+ FOR del_key IN SELECT x FROM UNNEST(search.symspell_generate_edits(key, 1, maxED)) x LOOP
+
+ -- skip empty keys
+ CONTINUE WHEN del_key IS NULL OR CHARACTER_LENGTH(del_key) = 0;
+ -- skip suggestions that are already too long for the prefix key
+ CONTINUE WHEN CHARACTER_LENGTH(del_key) <= (prefix_length - maxED) AND CHARACTER_LENGTH(raw_input) > prefix_length;
+
+ entry.keyword_suggestions := '{}';
+ entry.title_suggestions := '{}';
+ entry.author_suggestions := '{}';
+ entry.subject_suggestions := '{}';
+ entry.series_suggestions := '{}';
+ entry.identifier_suggestions := '{}';
+
+ IF source_class = 'keyword' THEN entry.keyword_count := 0; END IF;
+ IF source_class = 'title' THEN entry.title_count := 0; END IF;
+ IF source_class = 'author' THEN entry.author_count := 0; END IF;
+ IF source_class = 'subject' THEN entry.subject_count := 0; END IF;
+ IF source_class = 'series' THEN entry.series_count := 0; END IF;
+ IF source_class = 'identifier' THEN entry.identifier_count := 0; END IF;
+
+ entry.prefix_key := del_key;
+
+ IF source_class = 'keyword' THEN entry.keyword_suggestions := ARRAY[raw_input]; END IF;
+ IF source_class = 'title' THEN entry.title_suggestions := ARRAY[raw_input]; END IF;
+ IF source_class = 'author' THEN entry.author_suggestions := ARRAY[raw_input]; END IF;
+ IF source_class = 'subject' THEN entry.subject_suggestions := ARRAY[raw_input]; END IF;
+ IF source_class = 'series' THEN entry.series_suggestions := ARRAY[raw_input]; END IF;
+ IF source_class = 'identifier' THEN entry.identifier_suggestions := ARRAY[raw_input]; END IF;
+ IF source_class = 'keyword' THEN entry.keyword_suggestions := ARRAY[raw_input]; END IF;
+
+ RETURN NEXT entry;
+ END LOOP;
+
+END;
+$F$ LANGUAGE PLPGSQL STRICT IMMUTABLE;
+
+CREATE OR REPLACE FUNCTION search.symspell_build_entries (
+ full_input TEXT,
+ source_class TEXT,
+ old_input TEXT DEFAULT NULL,
+ include_phrases BOOL DEFAULT FALSE
+) RETURNS SETOF search.symspell_dictionary AS $F$
+DECLARE
+ prefix_length INT;
+ maxED INT;
+ word_list TEXT[];
+ input TEXT;
+ word TEXT;
+ entry search.symspell_dictionary;
+BEGIN
+ IF full_input IS NOT NULL THEN
+ SELECT value::INT INTO prefix_length FROM config.internal_flag WHERE name = 'symspell.prefix_length' AND enabled;
+ prefix_length := COALESCE(prefix_length, 6);
+
+ SELECT value::INT INTO maxED FROM config.internal_flag WHERE name = 'symspell.max_edit_distance' AND enabled;
+ maxED := COALESCE(maxED, 3);
+
+ input := evergreen.lowercase(full_input);
+ word_list := ARRAY_AGG(x) FROM search.symspell_parse_words_distinct(input) x;
+ IF word_list IS NULL THEN
+ RETURN;
+ END IF;
+
+ IF CARDINALITY(word_list) > 1 AND include_phrases THEN
+ RETURN QUERY SELECT * FROM search.symspell_build_raw_entry(input, source_class, TRUE, prefix_length, maxED);
+ END IF;
+
+ FOREACH word IN ARRAY word_list LOOP
+ -- Skip words that have runs of 5 or more digits (I'm looking at you, ISxNs)
+ CONTINUE WHEN CHARACTER_LENGTH(word) > 4 AND word ~ '\d{5,}';
+ RETURN QUERY SELECT * FROM search.symspell_build_raw_entry(word, source_class, FALSE, prefix_length, maxED);
+ END LOOP;
+ END IF;
+
+ IF old_input IS NOT NULL THEN
+ input := evergreen.lowercase(old_input);
+
+ FOR word IN SELECT x FROM search.symspell_parse_words_distinct(input) x LOOP
+ -- similarly skip words that have 5 or more digits here to
+ -- avoid adding erroneous prefix deletion entries to the dictionary
+ CONTINUE WHEN CHARACTER_LENGTH(word) > 4 AND word ~ '\d{5,}';
+ entry.prefix_key := word;
+
+ entry.keyword_count := 0;
+ entry.title_count := 0;
+ entry.author_count := 0;
+ entry.subject_count := 0;
+ entry.series_count := 0;
+ entry.identifier_count := 0;
+
+ entry.keyword_suggestions := '{}';
+ entry.title_suggestions := '{}';
+ entry.author_suggestions := '{}';
+ entry.subject_suggestions := '{}';
+ entry.series_suggestions := '{}';
+ entry.identifier_suggestions := '{}';
+
+ IF source_class = 'keyword' THEN entry.keyword_count := -1; END IF;
+ IF source_class = 'title' THEN entry.title_count := -1; END IF;
+ IF source_class = 'author' THEN entry.author_count := -1; END IF;
+ IF source_class = 'subject' THEN entry.subject_count := -1; END IF;
+ IF source_class = 'series' THEN entry.series_count := -1; END IF;
+ IF source_class = 'identifier' THEN entry.identifier_count := -1; END IF;
+
+ RETURN NEXT entry;
+ END LOOP;
+ END IF;
+END;
+$F$ LANGUAGE PLPGSQL;
+
+CREATE OR REPLACE FUNCTION search.symspell_build_and_merge_entries (
+ full_input TEXT,
+ source_class TEXT,
+ old_input TEXT DEFAULT NULL,
+ include_phrases BOOL DEFAULT FALSE
+) RETURNS SETOF search.symspell_dictionary AS $F$
+DECLARE
+ new_entry RECORD;
+ conflict_entry RECORD;
+BEGIN
+
+ IF full_input = old_input THEN -- neither NULL, and are the same
+ RETURN;
+ END IF;
+
+ FOR new_entry IN EXECUTE $q$
+ SELECT count,
+ prefix_key,
+ s AS suggestions
+ FROM (SELECT prefix_key,
+ ARRAY_AGG(DISTINCT $q$ || source_class || $q$_suggestions[1]) s,
+ SUM($q$ || source_class || $q$_count) count
+ FROM search.symspell_build_entries($1, $2, $3, $4)
+ GROUP BY 1) x
+ $q$ USING full_input, source_class, old_input, include_phrases
+ LOOP
+ EXECUTE $q$
+ SELECT prefix_key,
+ $q$ || source_class || $q$_suggestions suggestions,
+ $q$ || source_class || $q$_count count
+ FROM search.symspell_dictionary
+ WHERE prefix_key = $1 $q$
+ INTO conflict_entry
+ USING new_entry.prefix_key;
+
+ IF new_entry.count <> 0 THEN -- Real word, and count changed
+ IF conflict_entry.prefix_key IS NOT NULL THEN -- we'll be updating
+ IF conflict_entry.count > 0 THEN -- it's a real word
+ RETURN QUERY EXECUTE $q$
+ UPDATE search.symspell_dictionary
+ SET $q$ || source_class || $q$_count = $2
+ WHERE prefix_key = $1
+ RETURNING * $q$
+ USING new_entry.prefix_key, GREATEST(0, new_entry.count + conflict_entry.count);
+ ELSE -- it was a prefix key or delete-emptied word before
+ IF conflict_entry.suggestions @> new_entry.suggestions THEN -- already have all suggestions here...
+ RETURN QUERY EXECUTE $q$
+ UPDATE search.symspell_dictionary
+ SET $q$ || source_class || $q$_count = $2
+ WHERE prefix_key = $1
+ RETURNING * $q$
+ USING new_entry.prefix_key, GREATEST(0, new_entry.count);
+ ELSE -- new suggestion!
+ RETURN QUERY EXECUTE $q$
+ UPDATE search.symspell_dictionary
+ SET $q$ || source_class || $q$_count = $2,
+ $q$ || source_class || $q$_suggestions = $3
+ WHERE prefix_key = $1
+ RETURNING * $q$
+ USING new_entry.prefix_key, GREATEST(0, new_entry.count), evergreen.text_array_merge_unique(conflict_entry.suggestions,new_entry.suggestions);
+ END IF;
+ END IF;
+ ELSE
+ -- We keep the on-conflict clause just in case...
+ RETURN QUERY EXECUTE $q$
+ INSERT INTO search.symspell_dictionary AS d (
+ $q$ || source_class || $q$_count,
+ prefix_key,
+ $q$ || source_class || $q$_suggestions
+ ) VALUES ( $1, $2, $3 ) ON CONFLICT (prefix_key) DO
+ UPDATE SET $q$ || source_class || $q$_count = d.$q$ || source_class || $q$_count + EXCLUDED.$q$ || source_class || $q$_count,
+ $q$ || source_class || $q$_suggestions = evergreen.text_array_merge_unique(d.$q$ || source_class || $q$_suggestions, EXCLUDED.$q$ || source_class || $q$_suggestions)
+ RETURNING * $q$
+ USING new_entry.count, new_entry.prefix_key, new_entry.suggestions;
+ END IF;
+ ELSE -- key only, or no change
+ IF conflict_entry.prefix_key IS NOT NULL THEN -- we'll be updating
+ IF NOT conflict_entry.suggestions @> new_entry.suggestions THEN -- There are new suggestions
+ RETURN QUERY EXECUTE $q$
+ UPDATE search.symspell_dictionary
+ SET $q$ || source_class || $q$_suggestions = $2
+ WHERE prefix_key = $1
+ RETURNING * $q$
+ USING new_entry.prefix_key, evergreen.text_array_merge_unique(conflict_entry.suggestions,new_entry.suggestions);
+ END IF;
+ ELSE
+ RETURN QUERY EXECUTE $q$
+ INSERT INTO search.symspell_dictionary AS d (
+ $q$ || source_class || $q$_count,
+ prefix_key,
+ $q$ || source_class || $q$_suggestions
+ ) VALUES ( $1, $2, $3 ) ON CONFLICT (prefix_key) DO -- key exists, suggestions may be added due to this entry
+ UPDATE SET $q$ || source_class || $q$_suggestions = evergreen.text_array_merge_unique(d.$q$ || source_class || $q$_suggestions, EXCLUDED.$q$ || source_class || $q$_suggestions)
+ RETURNING * $q$
+ USING new_entry.count, new_entry.prefix_key, new_entry.suggestions;
+ END IF;
+ END IF;
+ END LOOP;
+END;
+$F$ LANGUAGE PLPGSQL;
+
+COMMIT;
+
+\qecho ''
+\qecho 'The following should be run at the end of the upgrade before any'
+\qecho 'reingest occurs. Because new triggers are installed already,'
+\qecho 'updates to indexed strings will cause zero-count dictionary entries'
+\qecho 'to be recorded which will require updating every row again (or'
+\qecho 'starting from scratch) so best to do this before other batch'
+\qecho 'changes. A later reingest that does not significantly change'
+\qecho 'indexed strings will /not/ cause table bloat here, and will be'
+\qecho 'as fast as normal. A copy of the SQL in a ready-to-use, non-escaped'
+\qecho 'form is available inside a comment at the end of this upgrade sub-'
+\qecho 'script so you do not need to copy this comment from the psql ouptut.'
+\qecho ''
+\qecho '\\a'
+\qecho '\\t'
+\qecho ''
+\qecho '\\o title'
+\qecho 'select value from metabib.title_field_entry where source in (select id from biblio.record_entry where not deleted);'
+\qecho '\\o author'
+\qecho 'select value from metabib.author_field_entry where source in (select id from biblio.record_entry where not deleted);'
+\qecho '\\o subject'
+\qecho 'select value from metabib.subject_field_entry where source in (select id from biblio.record_entry where not deleted);'
+\qecho '\\o series'
+\qecho 'select value from metabib.series_field_entry where source in (select id from biblio.record_entry where not deleted);'
+\qecho '\\o identifier'
+\qecho 'select value from metabib.identifier_field_entry where source in (select id from biblio.record_entry where not deleted);'
+\qecho '\\o keyword'
+\qecho 'select value from metabib.keyword_field_entry where source in (select id from biblio.record_entry where not deleted);'
+\qecho ''
+\qecho '\\o'
+\qecho '\\a'
+\qecho '\\t'
+\qecho ''
+\qecho '// Then, at the command line:'
+\qecho ''
+\qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl title > title.sql'
+\qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl author > author.sql'
+\qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl subject > subject.sql'
+\qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl series > series.sql'
+\qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl identifier > identifier.sql'
+\qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl keyword > keyword.sql'
+\qecho ''
+\qecho '// And, back in psql'
+\qecho ''
+\qecho 'ALTER TABLE search.symspell_dictionary SET UNLOGGED;'
+\qecho 'TRUNCATE search.symspell_dictionary;'
+\qecho ''
+\qecho '\\i identifier.sql'
+\qecho '\\i author.sql'
+\qecho '\\i title.sql'
+\qecho '\\i subject.sql'
+\qecho '\\i series.sql'
+\qecho '\\i keyword.sql'
+\qecho ''
+\qecho 'CLUSTER search.symspell_dictionary USING symspell_dictionary_pkey;'
+\qecho 'REINDEX TABLE search.symspell_dictionary;'
+\qecho 'ALTER TABLE search.symspell_dictionary SET LOGGED;'
+\qecho 'VACUUM ANALYZE search.symspell_dictionary;'
+\qecho ''
+\qecho 'DROP TABLE search.symspell_dictionary_partial_title;'
+\qecho 'DROP TABLE search.symspell_dictionary_partial_author;'
+\qecho 'DROP TABLE search.symspell_dictionary_partial_subject;'
+\qecho 'DROP TABLE search.symspell_dictionary_partial_series;'
+\qecho 'DROP TABLE search.symspell_dictionary_partial_identifier;'
+\qecho 'DROP TABLE search.symspell_dictionary_partial_keyword;'
+
+/* To run by hand:
+
+\a
+\t
+
+\o title
+select value from metabib.title_field_entry where source in (select id from biblio.record_entry where not deleted);
+
+\o author
+select value from metabib.author_field_entry where source in (select id from biblio.record_entry where not deleted);
+
+\o subject
+select value from metabib.subject_field_entry where source in (select id from biblio.record_entry where not deleted);
+
+\o series
+select value from metabib.series_field_entry where source in (select id from biblio.record_entry where not deleted);
+
+\o identifier
+select value from metabib.identifier_field_entry where source in (select id from biblio.record_entry where not deleted);
+
+\o keyword
+select value from metabib.keyword_field_entry where source in (select id from biblio.record_entry where not deleted);
+
+\o
+\a
+\t
+
+// Then, at the command line:
+
+$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl title > title.sql
+$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl author > author.sql
+$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl subject > subject.sql
+$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl series > series.sql
+$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl identifier > identifier.sql
+$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl keyword > keyword.sql
+
+// To the extent your hardware allows, the above commands can be run in
+// in parallel, in different shells. Each will use a full CPU, and RAM
+// may be a limiting resource, so keep an eye on that with `top`.
+
+
+// And, back in psql
+
+ALTER TABLE search.symspell_dictionary SET UNLOGGED;
+TRUNCATE search.symspell_dictionary;
+
+\i identifier.sql
+\i author.sql
+\i title.sql
+\i subject.sql
+\i series.sql
+\i keyword.sql
+
+CLUSTER search.symspell_dictionary USING symspell_dictionary_pkey;
+REINDEX TABLE search.symspell_dictionary;
+ALTER TABLE search.symspell_dictionary SET LOGGED;
+VACUUM ANALYZE search.symspell_dictionary;
+
+DROP TABLE search.symspell_dictionary_partial_title;
+DROP TABLE search.symspell_dictionary_partial_author;
+DROP TABLE search.symspell_dictionary_partial_subject;
+DROP TABLE search.symspell_dictionary_partial_series;
+DROP TABLE search.symspell_dictionary_partial_identifier;
+DROP TABLE search.symspell_dictionary_partial_keyword;
+
+*/
+
+++ /dev/null
-BEGIN;
-
-SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version);
-
--- We don't pass this function arrays with nulls, so we save 5% not testing for that
-CREATE OR REPLACE FUNCTION evergreen.text_array_merge_unique (
- TEXT[], TEXT[]
-) RETURNS TEXT[] AS $F$
- SELECT NULLIF(ARRAY(
- SELECT * FROM UNNEST($1) x
- UNION
- SELECT * FROM UNNEST($2) y
- ),'{}');
-$F$ LANGUAGE SQL;
-
-CREATE OR REPLACE FUNCTION search.symspell_build_raw_entry (
- raw_input TEXT,
- source_class TEXT,
- no_limit BOOL DEFAULT FALSE,
- prefix_length INT DEFAULT 6,
- maxED INT DEFAULT 3
-) RETURNS SETOF search.symspell_dictionary AS $F$
-DECLARE
- key TEXT;
- del_key TEXT;
- key_list TEXT[];
- entry search.symspell_dictionary%ROWTYPE;
-BEGIN
- key := raw_input;
-
- IF NOT no_limit AND CHARACTER_LENGTH(raw_input) > prefix_length THEN
- key := SUBSTRING(key FROM 1 FOR prefix_length);
- key_list := ARRAY[raw_input, key];
- ELSE
- key_list := ARRAY[key];
- END IF;
-
- FOREACH del_key IN ARRAY key_list LOOP
- -- skip empty keys
- CONTINUE WHEN del_key IS NULL OR CHARACTER_LENGTH(del_key) = 0;
-
- entry.prefix_key := del_key;
-
- entry.keyword_count := 0;
- entry.title_count := 0;
- entry.author_count := 0;
- entry.subject_count := 0;
- entry.series_count := 0;
- entry.identifier_count := 0;
-
- entry.keyword_suggestions := '{}';
- entry.title_suggestions := '{}';
- entry.author_suggestions := '{}';
- entry.subject_suggestions := '{}';
- entry.series_suggestions := '{}';
- entry.identifier_suggestions := '{}';
-
- IF source_class = 'keyword' THEN entry.keyword_suggestions := ARRAY[raw_input]; END IF;
- IF source_class = 'title' THEN entry.title_suggestions := ARRAY[raw_input]; END IF;
- IF source_class = 'author' THEN entry.author_suggestions := ARRAY[raw_input]; END IF;
- IF source_class = 'subject' THEN entry.subject_suggestions := ARRAY[raw_input]; END IF;
- IF source_class = 'series' THEN entry.series_suggestions := ARRAY[raw_input]; END IF;
- IF source_class = 'identifier' THEN entry.identifier_suggestions := ARRAY[raw_input]; END IF;
- IF source_class = 'keyword' THEN entry.keyword_suggestions := ARRAY[raw_input]; END IF;
-
- IF del_key = raw_input THEN
- IF source_class = 'keyword' THEN entry.keyword_count := 1; END IF;
- IF source_class = 'title' THEN entry.title_count := 1; END IF;
- IF source_class = 'author' THEN entry.author_count := 1; END IF;
- IF source_class = 'subject' THEN entry.subject_count := 1; END IF;
- IF source_class = 'series' THEN entry.series_count := 1; END IF;
- IF source_class = 'identifier' THEN entry.identifier_count := 1; END IF;
- END IF;
-
- RETURN NEXT entry;
- END LOOP;
-
- FOR del_key IN SELECT x FROM UNNEST(search.symspell_generate_edits(key, 1, maxED)) x LOOP
-
- -- skip empty keys
- CONTINUE WHEN del_key IS NULL OR CHARACTER_LENGTH(del_key) = 0;
- -- skip suggestions that are already too long for the prefix key
- CONTINUE WHEN CHARACTER_LENGTH(del_key) <= (prefix_length - maxED) AND CHARACTER_LENGTH(raw_input) > prefix_length;
-
- entry.keyword_suggestions := '{}';
- entry.title_suggestions := '{}';
- entry.author_suggestions := '{}';
- entry.subject_suggestions := '{}';
- entry.series_suggestions := '{}';
- entry.identifier_suggestions := '{}';
-
- IF source_class = 'keyword' THEN entry.keyword_count := 0; END IF;
- IF source_class = 'title' THEN entry.title_count := 0; END IF;
- IF source_class = 'author' THEN entry.author_count := 0; END IF;
- IF source_class = 'subject' THEN entry.subject_count := 0; END IF;
- IF source_class = 'series' THEN entry.series_count := 0; END IF;
- IF source_class = 'identifier' THEN entry.identifier_count := 0; END IF;
-
- entry.prefix_key := del_key;
-
- IF source_class = 'keyword' THEN entry.keyword_suggestions := ARRAY[raw_input]; END IF;
- IF source_class = 'title' THEN entry.title_suggestions := ARRAY[raw_input]; END IF;
- IF source_class = 'author' THEN entry.author_suggestions := ARRAY[raw_input]; END IF;
- IF source_class = 'subject' THEN entry.subject_suggestions := ARRAY[raw_input]; END IF;
- IF source_class = 'series' THEN entry.series_suggestions := ARRAY[raw_input]; END IF;
- IF source_class = 'identifier' THEN entry.identifier_suggestions := ARRAY[raw_input]; END IF;
- IF source_class = 'keyword' THEN entry.keyword_suggestions := ARRAY[raw_input]; END IF;
-
- RETURN NEXT entry;
- END LOOP;
-
-END;
-$F$ LANGUAGE PLPGSQL STRICT IMMUTABLE;
-
-CREATE OR REPLACE FUNCTION search.symspell_build_entries (
- full_input TEXT,
- source_class TEXT,
- old_input TEXT DEFAULT NULL,
- include_phrases BOOL DEFAULT FALSE
-) RETURNS SETOF search.symspell_dictionary AS $F$
-DECLARE
- prefix_length INT;
- maxED INT;
- word_list TEXT[];
- input TEXT;
- word TEXT;
- entry search.symspell_dictionary;
-BEGIN
- IF full_input IS NOT NULL THEN
- SELECT value::INT INTO prefix_length FROM config.internal_flag WHERE name = 'symspell.prefix_length' AND enabled;
- prefix_length := COALESCE(prefix_length, 6);
-
- SELECT value::INT INTO maxED FROM config.internal_flag WHERE name = 'symspell.max_edit_distance' AND enabled;
- maxED := COALESCE(maxED, 3);
-
- input := evergreen.lowercase(full_input);
- word_list := ARRAY_AGG(x) FROM search.symspell_parse_words_distinct(input) x;
- IF word_list IS NULL THEN
- RETURN;
- END IF;
-
- IF CARDINALITY(word_list) > 1 AND include_phrases THEN
- RETURN QUERY SELECT * FROM search.symspell_build_raw_entry(input, source_class, TRUE, prefix_length, maxED);
- END IF;
-
- FOREACH word IN ARRAY word_list LOOP
- -- Skip words that have runs of 5 or more digits (I'm looking at you, ISxNs)
- CONTINUE WHEN CHARACTER_LENGTH(word) > 4 AND word ~ '\d{5,}';
- RETURN QUERY SELECT * FROM search.symspell_build_raw_entry(word, source_class, FALSE, prefix_length, maxED);
- END LOOP;
- END IF;
-
- IF old_input IS NOT NULL THEN
- input := evergreen.lowercase(old_input);
-
- FOR word IN SELECT x FROM search.symspell_parse_words_distinct(input) x LOOP
- -- similarly skip words that have 5 or more digits here to
- -- avoid adding erroneous prefix deletion entries to the dictionary
- CONTINUE WHEN CHARACTER_LENGTH(word) > 4 AND word ~ '\d{5,}';
- entry.prefix_key := word;
-
- entry.keyword_count := 0;
- entry.title_count := 0;
- entry.author_count := 0;
- entry.subject_count := 0;
- entry.series_count := 0;
- entry.identifier_count := 0;
-
- entry.keyword_suggestions := '{}';
- entry.title_suggestions := '{}';
- entry.author_suggestions := '{}';
- entry.subject_suggestions := '{}';
- entry.series_suggestions := '{}';
- entry.identifier_suggestions := '{}';
-
- IF source_class = 'keyword' THEN entry.keyword_count := -1; END IF;
- IF source_class = 'title' THEN entry.title_count := -1; END IF;
- IF source_class = 'author' THEN entry.author_count := -1; END IF;
- IF source_class = 'subject' THEN entry.subject_count := -1; END IF;
- IF source_class = 'series' THEN entry.series_count := -1; END IF;
- IF source_class = 'identifier' THEN entry.identifier_count := -1; END IF;
-
- RETURN NEXT entry;
- END LOOP;
- END IF;
-END;
-$F$ LANGUAGE PLPGSQL;
-
-CREATE OR REPLACE FUNCTION search.symspell_build_and_merge_entries (
- full_input TEXT,
- source_class TEXT,
- old_input TEXT DEFAULT NULL,
- include_phrases BOOL DEFAULT FALSE
-) RETURNS SETOF search.symspell_dictionary AS $F$
-DECLARE
- new_entry RECORD;
- conflict_entry RECORD;
-BEGIN
-
- IF full_input = old_input THEN -- neither NULL, and are the same
- RETURN;
- END IF;
-
- FOR new_entry IN EXECUTE $q$
- SELECT count,
- prefix_key,
- s AS suggestions
- FROM (SELECT prefix_key,
- ARRAY_AGG(DISTINCT $q$ || source_class || $q$_suggestions[1]) s,
- SUM($q$ || source_class || $q$_count) count
- FROM search.symspell_build_entries($1, $2, $3, $4)
- GROUP BY 1) x
- $q$ USING full_input, source_class, old_input, include_phrases
- LOOP
- EXECUTE $q$
- SELECT prefix_key,
- $q$ || source_class || $q$_suggestions suggestions,
- $q$ || source_class || $q$_count count
- FROM search.symspell_dictionary
- WHERE prefix_key = $1 $q$
- INTO conflict_entry
- USING new_entry.prefix_key;
-
- IF new_entry.count <> 0 THEN -- Real word, and count changed
- IF conflict_entry.prefix_key IS NOT NULL THEN -- we'll be updating
- IF conflict_entry.count > 0 THEN -- it's a real word
- RETURN QUERY EXECUTE $q$
- UPDATE search.symspell_dictionary
- SET $q$ || source_class || $q$_count = $2
- WHERE prefix_key = $1
- RETURNING * $q$
- USING new_entry.prefix_key, GREATEST(0, new_entry.count + conflict_entry.count);
- ELSE -- it was a prefix key or delete-emptied word before
- IF conflict_entry.suggestions @> new_entry.suggestions THEN -- already have all suggestions here...
- RETURN QUERY EXECUTE $q$
- UPDATE search.symspell_dictionary
- SET $q$ || source_class || $q$_count = $2
- WHERE prefix_key = $1
- RETURNING * $q$
- USING new_entry.prefix_key, GREATEST(0, new_entry.count);
- ELSE -- new suggestion!
- RETURN QUERY EXECUTE $q$
- UPDATE search.symspell_dictionary
- SET $q$ || source_class || $q$_count = $2,
- $q$ || source_class || $q$_suggestions = $3
- WHERE prefix_key = $1
- RETURNING * $q$
- USING new_entry.prefix_key, GREATEST(0, new_entry.count), evergreen.text_array_merge_unique(conflict_entry.suggestions,new_entry.suggestions);
- END IF;
- END IF;
- ELSE
- -- We keep the on-conflict clause just in case...
- RETURN QUERY EXECUTE $q$
- INSERT INTO search.symspell_dictionary AS d (
- $q$ || source_class || $q$_count,
- prefix_key,
- $q$ || source_class || $q$_suggestions
- ) VALUES ( $1, $2, $3 ) ON CONFLICT (prefix_key) DO
- UPDATE SET $q$ || source_class || $q$_count = d.$q$ || source_class || $q$_count + EXCLUDED.$q$ || source_class || $q$_count,
- $q$ || source_class || $q$_suggestions = evergreen.text_array_merge_unique(d.$q$ || source_class || $q$_suggestions, EXCLUDED.$q$ || source_class || $q$_suggestions)
- RETURNING * $q$
- USING new_entry.count, new_entry.prefix_key, new_entry.suggestions;
- END IF;
- ELSE -- key only, or no change
- IF conflict_entry.prefix_key IS NOT NULL THEN -- we'll be updating
- IF NOT conflict_entry.suggestions @> new_entry.suggestions THEN -- There are new suggestions
- RETURN QUERY EXECUTE $q$
- UPDATE search.symspell_dictionary
- SET $q$ || source_class || $q$_suggestions = $2
- WHERE prefix_key = $1
- RETURNING * $q$
- USING new_entry.prefix_key, evergreen.text_array_merge_unique(conflict_entry.suggestions,new_entry.suggestions);
- END IF;
- ELSE
- RETURN QUERY EXECUTE $q$
- INSERT INTO search.symspell_dictionary AS d (
- $q$ || source_class || $q$_count,
- prefix_key,
- $q$ || source_class || $q$_suggestions
- ) VALUES ( $1, $2, $3 ) ON CONFLICT (prefix_key) DO -- key exists, suggestions may be added due to this entry
- UPDATE SET $q$ || source_class || $q$_suggestions = evergreen.text_array_merge_unique(d.$q$ || source_class || $q$_suggestions, EXCLUDED.$q$ || source_class || $q$_suggestions)
- RETURNING * $q$
- USING new_entry.count, new_entry.prefix_key, new_entry.suggestions;
- END IF;
- END IF;
- END LOOP;
-END;
-$F$ LANGUAGE PLPGSQL;
-
-COMMIT;
-
-\qecho ''
-\qecho 'The following should be run at the end of the upgrade before any'
-\qecho 'reingest occurs. Because new triggers are installed already,'
-\qecho 'updates to indexed strings will cause zero-count dictionary entries'
-\qecho 'to be recorded which will require updating every row again (or'
-\qecho 'starting from scratch) so best to do this before other batch'
-\qecho 'changes. A later reingest that does not significantly change'
-\qecho 'indexed strings will /not/ cause table bloat here, and will be'
-\qecho 'as fast as normal. A copy of the SQL in a ready-to-use, non-escaped'
-\qecho 'form is available inside a comment at the end of this upgrade sub-'
-\qecho 'script so you do not need to copy this comment from the psql ouptut.'
-\qecho ''
-\qecho '\\a'
-\qecho '\\t'
-\qecho ''
-\qecho '\\o title'
-\qecho 'select value from metabib.title_field_entry where source in (select id from biblio.record_entry where not deleted);'
-\qecho '\\o author'
-\qecho 'select value from metabib.author_field_entry where source in (select id from biblio.record_entry where not deleted);'
-\qecho '\\o subject'
-\qecho 'select value from metabib.subject_field_entry where source in (select id from biblio.record_entry where not deleted);'
-\qecho '\\o series'
-\qecho 'select value from metabib.series_field_entry where source in (select id from biblio.record_entry where not deleted);'
-\qecho '\\o identifier'
-\qecho 'select value from metabib.identifier_field_entry where source in (select id from biblio.record_entry where not deleted);'
-\qecho '\\o keyword'
-\qecho 'select value from metabib.keyword_field_entry where source in (select id from biblio.record_entry where not deleted);'
-\qecho ''
-\qecho '\\o'
-\qecho '\\a'
-\qecho '\\t'
-\qecho ''
-\qecho '// Then, at the command line:'
-\qecho ''
-\qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl title > title.sql'
-\qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl author > author.sql'
-\qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl subject > subject.sql'
-\qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl series > series.sql'
-\qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl identifier > identifier.sql'
-\qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl keyword > keyword.sql'
-\qecho ''
-\qecho '// And, back in psql'
-\qecho ''
-\qecho 'ALTER TABLE search.symspell_dictionary SET UNLOGGED;'
-\qecho 'TRUNCATE search.symspell_dictionary;'
-\qecho ''
-\qecho '\\i identifier.sql'
-\qecho '\\i author.sql'
-\qecho '\\i title.sql'
-\qecho '\\i subject.sql'
-\qecho '\\i series.sql'
-\qecho '\\i keyword.sql'
-\qecho ''
-\qecho 'CLUSTER search.symspell_dictionary USING symspell_dictionary_pkey;'
-\qecho 'REINDEX TABLE search.symspell_dictionary;'
-\qecho 'ALTER TABLE search.symspell_dictionary SET LOGGED;'
-\qecho 'VACUUM ANALYZE search.symspell_dictionary;'
-\qecho ''
-\qecho 'DROP TABLE search.symspell_dictionary_partial_title;'
-\qecho 'DROP TABLE search.symspell_dictionary_partial_author;'
-\qecho 'DROP TABLE search.symspell_dictionary_partial_subject;'
-\qecho 'DROP TABLE search.symspell_dictionary_partial_series;'
-\qecho 'DROP TABLE search.symspell_dictionary_partial_identifier;'
-\qecho 'DROP TABLE search.symspell_dictionary_partial_keyword;'
-
-/* To run by hand:
-
-\a
-\t
-
-\o title
-select value from metabib.title_field_entry where source in (select id from biblio.record_entry where not deleted);
-
-\o author
-select value from metabib.author_field_entry where source in (select id from biblio.record_entry where not deleted);
-
-\o subject
-select value from metabib.subject_field_entry where source in (select id from biblio.record_entry where not deleted);
-
-\o series
-select value from metabib.series_field_entry where source in (select id from biblio.record_entry where not deleted);
-
-\o identifier
-select value from metabib.identifier_field_entry where source in (select id from biblio.record_entry where not deleted);
-
-\o keyword
-select value from metabib.keyword_field_entry where source in (select id from biblio.record_entry where not deleted);
-
-\o
-\a
-\t
-
-// Then, at the command line:
-
-$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl title > title.sql
-$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl author > author.sql
-$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl subject > subject.sql
-$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl series > series.sql
-$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl identifier > identifier.sql
-$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl keyword > keyword.sql
-
-// To the extent your hardware allows, the above commands can be run in
-// in parallel, in different shells. Each will use a full CPU, and RAM
-// may be a limiting resource, so keep an eye on that with `top`.
-
-
-// And, back in psql
-
-ALTER TABLE search.symspell_dictionary SET UNLOGGED;
-TRUNCATE search.symspell_dictionary;
-
-\i identifier.sql
-\i author.sql
-\i title.sql
-\i subject.sql
-\i series.sql
-\i keyword.sql
-
-CLUSTER search.symspell_dictionary USING symspell_dictionary_pkey;
-REINDEX TABLE search.symspell_dictionary;
-ALTER TABLE search.symspell_dictionary SET LOGGED;
-VACUUM ANALYZE search.symspell_dictionary;
-
-DROP TABLE search.symspell_dictionary_partial_title;
-DROP TABLE search.symspell_dictionary_partial_author;
-DROP TABLE search.symspell_dictionary_partial_subject;
-DROP TABLE search.symspell_dictionary_partial_series;
-DROP TABLE search.symspell_dictionary_partial_identifier;
-DROP TABLE search.symspell_dictionary_partial_keyword;
-
-*/
-