From 8f5b2988d2bd61899ab5069ac25ba8658dd5c6c7 Mon Sep 17 00:00:00 2001 From: Mike Rylander Date: Fri, 13 May 2022 12:35:24 -0400 Subject: [PATCH] LP#1931737: Allow the delay of symspell updates This commit adds a new internal flag, auto-created at the time of need, to control whether record ingest will cause immediate updates to the symspell dictionary, or if those updates will simply be recorded for later incorporation. Inline symspell dictionary updates can cause record updates to be logically serialized, impacting the preformance of other tools used for batch reingest. pingest.pl is changed to allow an administrator to make use of this feature via the --delay-symspell command line flag. Signed-off-by: Mike Rylander --- Open-ILS/src/sql/Pg/030.schema.metabib.sql | 5 +- Open-ILS/src/sql/Pg/300.schema.staged_search.sql | 53 ++++++++++++++++++++++ .../Pg/upgrade/XXXX.schema.dym_delayed_reify.sql | 21 --------- 3 files changed, 57 insertions(+), 22 deletions(-) diff --git a/Open-ILS/src/sql/Pg/030.schema.metabib.sql b/Open-ILS/src/sql/Pg/030.schema.metabib.sql index ac35fc73d3..86416cac97 100644 --- a/Open-ILS/src/sql/Pg/030.schema.metabib.sql +++ b/Open-ILS/src/sql/Pg/030.schema.metabib.sql @@ -1168,7 +1168,10 @@ BEGIN IF NOT b_skip_search THEN PERFORM metabib.update_combined_index_vectors(bib_id); - PERFORM search.symspell_dictionary_reify(); -- NOTE: we only use search data for symspell today + PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_symspell_reification' AND enabled; + IF NOT FOUND THEN + PERFORM search.symspell_dictionary_reify(); + END IF; END IF; RETURN; diff --git a/Open-ILS/src/sql/Pg/300.schema.staged_search.sql b/Open-ILS/src/sql/Pg/300.schema.staged_search.sql index 868ce5fb3a..5abbbef2f0 100644 --- a/Open-ILS/src/sql/Pg/300.schema.staged_search.sql +++ b/Open-ILS/src/sql/Pg/300.schema.staged_search.sql @@ -1205,6 +1205,59 @@ CREATE OR REPLACE FUNCTION search.symspell_dictionary_reify () RETURNS SETOF sea RETURNING *; $f$ LANGUAGE SQL; +CREATE OR REPLACE FUNCTION search.disable_symspell_reification () RETURNS VOID AS $f$ + INSERT INTO config.internal_flag (name,enabled) + VALUES ('ingest.disable_symspell_reification',TRUE) + ON CONFLICT (name) DO UPDATE SET enabled = TRUE; +$f$ LANGUAGE SQL; + +CREATE OR REPLACE FUNCTION search.enable_symspell_reification () RETURNS VOID AS $f$ + UPDATE config.internal_flag SET enabled = FALSE WHERE name = 'ingest.disable_symspell_reification'; +$f$ LANGUAGE SQL; + +CREATE OR REPLACE FUNCTION search.symspell_dictionary_full_reify () RETURNS SETOF search.symspell_dictionary AS $f$ + WITH new_rows AS ( + DELETE FROM search.symspell_dictionary_updates RETURNING * + ), computed_rows AS ( -- this collapses the rows deleted into the format we need for UPSERT + SELECT SUM(keyword_count) AS keyword_count, + SUM(title_count) AS title_count, + SUM(author_count) AS author_count, + SUM(subject_count) AS subject_count, + SUM(series_count) AS series_count, + SUM(identifier_count) AS identifier_count, + + prefix_key, + + ARRAY_REMOVE(ARRAY_AGG(DISTINCT keyword_suggestions[1]), NULL) AS keyword_suggestions, + ARRAY_REMOVE(ARRAY_AGG(DISTINCT title_suggestions[1]), NULL) AS title_suggestions, + ARRAY_REMOVE(ARRAY_AGG(DISTINCT author_suggestions[1]), NULL) AS author_suggestions, + ARRAY_REMOVE(ARRAY_AGG(DISTINCT subject_suggestions[1]), NULL) AS subject_suggestions, + ARRAY_REMOVE(ARRAY_AGG(DISTINCT series_suggestions[1]), NULL) AS series_suggestions, + ARRAY_REMOVE(ARRAY_AGG(DISTINCT identifier_suggestions[1]), NULL) AS identifier_suggestions + FROM new_rows + GROUP BY prefix_key + ) + INSERT INTO search.symspell_dictionary AS d SELECT * FROM computed_rows + ON CONFLICT (prefix_key) DO UPDATE SET + keyword_count = GREATEST(0, d.keyword_count + EXCLUDED.keyword_count), + keyword_suggestions = evergreen.text_array_merge_unique(EXCLUDED.keyword_suggestions,d.keyword_suggestions), + + title_count = GREATEST(0, d.title_count + EXCLUDED.title_count), + title_suggestions = evergreen.text_array_merge_unique(EXCLUDED.title_suggestions,d.title_suggestions), + + author_count = GREATEST(0, d.author_count + EXCLUDED.author_count), + author_suggestions = evergreen.text_array_merge_unique(EXCLUDED.author_suggestions,d.author_suggestions), + + subject_count = GREATEST(0, d.subject_count + EXCLUDED.subject_count), + subject_suggestions = evergreen.text_array_merge_unique(EXCLUDED.subject_suggestions,d.subject_suggestions), + + series_count = GREATEST(0, d.series_count + EXCLUDED.series_count), + series_suggestions = evergreen.text_array_merge_unique(EXCLUDED.series_suggestions,d.series_suggestions), + + identifier_count = GREATEST(0, d.identifier_count + EXCLUDED.identifier_count), + identifier_suggestions = evergreen.text_array_merge_unique(EXCLUDED.identifier_suggestions,d.identifier_suggestions) + RETURNING *; +$f$ LANGUAGE SQL; CREATE OR REPLACE FUNCTION search.symspell_parse_words ( phrase TEXT ) RETURNS SETOF TEXT AS $F$ diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.dym_delayed_reify.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.dym_delayed_reify.sql index 9dc4b0c2e6..5401162322 100644 --- a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.dym_delayed_reify.sql +++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.dym_delayed_reify.sql @@ -1,26 +1,5 @@ BEGIN; --- INSERT-only table that catches updates to be reconciled -CREATE UNLOGGED TABLE search.symspell_dictionary_updates ( - transaction_id BIGINT, - keyword_count INT NOT NULL DEFAULT 0, - title_count INT NOT NULL DEFAULT 0, - author_count INT NOT NULL DEFAULT 0, - subject_count INT NOT NULL DEFAULT 0, - series_count INT NOT NULL DEFAULT 0, - identifier_count INT NOT NULL DEFAULT 0, - - prefix_key TEXT NOT NULL, - - keyword_suggestions TEXT[], - title_suggestions TEXT[], - author_suggestions TEXT[], - subject_suggestions TEXT[], - series_suggestions TEXT[], - identifier_suggestions TEXT[] -); -CREATE INDEX symspell_dictionary_updates_tid_idx ON search.symspell_dictionary_updates (transaction_id); - CREATE OR REPLACE FUNCTION search.disable_symspell_reification () RETURNS VOID AS $f$ INSERT INTO config.internal_flag (name,enabled) VALUES ('ingest.disable_symspell_reification',TRUE) -- 2.11.0