BEFORE INSERT OR UPDATE ON config.db_patch_dependencies
FOR EACH ROW EXECUTE PROCEDURE evergreen.array_overlap_check ('deprecates');
-INSERT INTO config.upgrade_log (version, applied_to) VALUES ('1339', :eg_version); -- khuckins/sandbergja/mmorgan
-INSERT INTO config.upgrade_log (version, applied_to) VALUES ('3.9.0', :eg_version);
+INSERT INTO config.upgrade_log (version, applied_to) VALUES ('1341', :eg_version); -- miker/sandbergja/dyrcona/blake
CREATE TABLE config.bib_source (
id SERIAL PRIMARY KEY,
--- /dev/null
+BEGIN;
+
+SELECT evergreen.upgrade_deps_block_check('1340', :eg_version);
+
+-- INSERT-only table that catches dictionary updates to be reconciled
+CREATE UNLOGGED TABLE search.symspell_dictionary_updates (
+ transaction_id BIGINT,
+ keyword_count INT NOT NULL DEFAULT 0,
+ title_count INT NOT NULL DEFAULT 0,
+ author_count INT NOT NULL DEFAULT 0,
+ subject_count INT NOT NULL DEFAULT 0,
+ series_count INT NOT NULL DEFAULT 0,
+ identifier_count INT NOT NULL DEFAULT 0,
+
+ prefix_key TEXT NOT NULL,
+
+ keyword_suggestions TEXT[],
+ title_suggestions TEXT[],
+ author_suggestions TEXT[],
+ subject_suggestions TEXT[],
+ series_suggestions TEXT[],
+ identifier_suggestions TEXT[]
+);
+CREATE INDEX symspell_dictionary_updates_tid_idx ON search.symspell_dictionary_updates (transaction_id);
+
+-- Function that collects this transactions additions to the unlogged update table
+CREATE OR REPLACE FUNCTION search.symspell_dictionary_reify () RETURNS SETOF search.symspell_dictionary AS $f$
+ WITH new_rows AS (
+ DELETE FROM search.symspell_dictionary_updates WHERE transaction_id = txid_current() RETURNING *
+ ), computed_rows AS ( -- this collapses the rows deleted into the format we need for UPSERT
+ SELECT SUM(keyword_count) AS keyword_count,
+ SUM(title_count) AS title_count,
+ SUM(author_count) AS author_count,
+ SUM(subject_count) AS subject_count,
+ SUM(series_count) AS series_count,
+ SUM(identifier_count) AS identifier_count,
+
+ prefix_key,
+
+ ARRAY_REMOVE(ARRAY_AGG(DISTINCT keyword_suggestions[1]), NULL) AS keyword_suggestions,
+ ARRAY_REMOVE(ARRAY_AGG(DISTINCT title_suggestions[1]), NULL) AS title_suggestions,
+ ARRAY_REMOVE(ARRAY_AGG(DISTINCT author_suggestions[1]), NULL) AS author_suggestions,
+ ARRAY_REMOVE(ARRAY_AGG(DISTINCT subject_suggestions[1]), NULL) AS subject_suggestions,
+ ARRAY_REMOVE(ARRAY_AGG(DISTINCT series_suggestions[1]), NULL) AS series_suggestions,
+ ARRAY_REMOVE(ARRAY_AGG(DISTINCT identifier_suggestions[1]), NULL) AS identifier_suggestions
+ FROM new_rows
+ GROUP BY prefix_key
+ )
+ INSERT INTO search.symspell_dictionary AS d SELECT * FROM computed_rows
+ ON CONFLICT (prefix_key) DO UPDATE SET
+ keyword_count = GREATEST(0, d.keyword_count + EXCLUDED.keyword_count),
+ keyword_suggestions = evergreen.text_array_merge_unique(EXCLUDED.keyword_suggestions,d.keyword_suggestions),
+
+ title_count = GREATEST(0, d.title_count + EXCLUDED.title_count),
+ title_suggestions = evergreen.text_array_merge_unique(EXCLUDED.title_suggestions,d.title_suggestions),
+
+ author_count = GREATEST(0, d.author_count + EXCLUDED.author_count),
+ author_suggestions = evergreen.text_array_merge_unique(EXCLUDED.author_suggestions,d.author_suggestions),
+
+ subject_count = GREATEST(0, d.subject_count + EXCLUDED.subject_count),
+ subject_suggestions = evergreen.text_array_merge_unique(EXCLUDED.subject_suggestions,d.subject_suggestions),
+
+ series_count = GREATEST(0, d.series_count + EXCLUDED.series_count),
+ series_suggestions = evergreen.text_array_merge_unique(EXCLUDED.series_suggestions,d.series_suggestions),
+
+ identifier_count = GREATEST(0, d.identifier_count + EXCLUDED.identifier_count),
+ identifier_suggestions = evergreen.text_array_merge_unique(EXCLUDED.identifier_suggestions,d.identifier_suggestions)
+ RETURNING *;
+$f$ LANGUAGE SQL;
+
+-- simplified metabib.*_field_entry trigger that stages updates for reification in one go
+CREATE OR REPLACE FUNCTION search.symspell_maintain_entries () RETURNS TRIGGER AS $f$
+DECLARE
+ search_class TEXT;
+ new_value TEXT := NULL;
+ old_value TEXT := NULL;
+BEGIN
+ search_class := COALESCE(TG_ARGV[0], SPLIT_PART(TG_TABLE_NAME,'_',1));
+
+ IF TG_OP IN ('INSERT', 'UPDATE') THEN
+ new_value := NEW.value;
+ END IF;
+
+ IF TG_OP IN ('DELETE', 'UPDATE') THEN
+ old_value := OLD.value;
+ END IF;
+
+ IF new_value = old_value THEN
+ -- same, move along
+ ELSE
+ INSERT INTO search.symspell_dictionary_updates
+ SELECT txid_current(), *
+ FROM search.symspell_build_entries(
+ new_value,
+ search_class,
+ old_value
+ );
+ END IF;
+
+ RETURN NULL; -- always fired AFTER
+END;
+$f$ LANGUAGE PLPGSQL;
+
+CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries(
+ bib_id BIGINT,
+ skip_facet BOOL DEFAULT FALSE,
+ skip_display BOOL DEFAULT FALSE,
+ skip_browse BOOL DEFAULT FALSE,
+ skip_search BOOL DEFAULT FALSE,
+ only_fields INT[] DEFAULT '{}'::INT[]
+) RETURNS VOID AS $func$
+DECLARE
+ fclass RECORD;
+ ind_data metabib.field_entry_template%ROWTYPE;
+ mbe_row metabib.browse_entry%ROWTYPE;
+ mbe_id BIGINT;
+ b_skip_facet BOOL;
+ b_skip_display BOOL;
+ b_skip_browse BOOL;
+ b_skip_search BOOL;
+ value_prepped TEXT;
+ field_list INT[] := only_fields;
+ field_types TEXT[] := '{}'::TEXT[];
+BEGIN
+
+ IF field_list = '{}'::INT[] THEN
+ SELECT ARRAY_AGG(id) INTO field_list FROM config.metabib_field;
+ END IF;
+
+ SELECT COALESCE(NULLIF(skip_facet, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_facet_indexing' AND enabled)) INTO b_skip_facet;
+ SELECT COALESCE(NULLIF(skip_display, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_display_indexing' AND enabled)) INTO b_skip_display;
+ SELECT COALESCE(NULLIF(skip_browse, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_browse_indexing' AND enabled)) INTO b_skip_browse;
+ SELECT COALESCE(NULLIF(skip_search, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_search_indexing' AND enabled)) INTO b_skip_search;
+
+ IF NOT b_skip_facet THEN field_types := field_types || '{facet}'; END IF;
+ IF NOT b_skip_display THEN field_types := field_types || '{display}'; END IF;
+ IF NOT b_skip_browse THEN field_types := field_types || '{browse}'; END IF;
+ IF NOT b_skip_search THEN field_types := field_types || '{search}'; END IF;
+
+ PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled;
+ IF NOT FOUND THEN
+ IF NOT b_skip_search THEN
+ FOR fclass IN SELECT * FROM config.metabib_class LOOP
+ -- RAISE NOTICE 'Emptying out %', fclass.name;
+ EXECUTE $$DELETE FROM metabib.$$ || fclass.name || $$_field_entry WHERE source = $$ || bib_id;
+ END LOOP;
+ END IF;
+ IF NOT b_skip_facet THEN
+ DELETE FROM metabib.facet_entry WHERE source = bib_id;
+ END IF;
+ IF NOT b_skip_display THEN
+ DELETE FROM metabib.display_entry WHERE source = bib_id;
+ END IF;
+ IF NOT b_skip_browse THEN
+ DELETE FROM metabib.browse_entry_def_map WHERE source = bib_id;
+ END IF;
+ END IF;
+
+ FOR ind_data IN SELECT * FROM biblio.extract_metabib_field_entry( bib_id, ' ', field_types, field_list ) LOOP
+
+ -- don't store what has been normalized away
+ CONTINUE WHEN ind_data.value IS NULL;
+
+ IF ind_data.field < 0 THEN
+ ind_data.field = -1 * ind_data.field;
+ END IF;
+
+ IF ind_data.facet_field AND NOT b_skip_facet THEN
+ INSERT INTO metabib.facet_entry (field, source, value)
+ VALUES (ind_data.field, ind_data.source, ind_data.value);
+ END IF;
+
+ IF ind_data.display_field AND NOT b_skip_display THEN
+ INSERT INTO metabib.display_entry (field, source, value)
+ VALUES (ind_data.field, ind_data.source, ind_data.value);
+ END IF;
+
+
+ IF ind_data.browse_field AND NOT b_skip_browse THEN
+ -- A caveat about this SELECT: this should take care of replacing
+ -- old mbe rows when data changes, but not if normalization (by
+ -- which I mean specifically the output of
+ -- evergreen.oils_tsearch2()) changes. It may or may not be
+ -- expensive to add a comparison of index_vector to index_vector
+ -- to the WHERE clause below.
+
+ CONTINUE WHEN ind_data.sort_value IS NULL;
+
+ value_prepped := metabib.browse_normalize(ind_data.value, ind_data.field);
+ IF ind_data.browse_nocase THEN
+ SELECT INTO mbe_row * FROM metabib.browse_entry
+ WHERE evergreen.lowercase(value) = evergreen.lowercase(value_prepped) AND sort_value = ind_data.sort_value
+ ORDER BY sort_value, value LIMIT 1; -- gotta pick something, I guess
+ ELSE
+ SELECT INTO mbe_row * FROM metabib.browse_entry
+ WHERE value = value_prepped AND sort_value = ind_data.sort_value;
+ END IF;
+
+ IF FOUND THEN
+ mbe_id := mbe_row.id;
+ ELSE
+ INSERT INTO metabib.browse_entry
+ ( value, sort_value ) VALUES
+ ( value_prepped, ind_data.sort_value );
+
+ mbe_id := CURRVAL('metabib.browse_entry_id_seq'::REGCLASS);
+ END IF;
+
+ INSERT INTO metabib.browse_entry_def_map (entry, def, source, authority)
+ VALUES (mbe_id, ind_data.field, ind_data.source, ind_data.authority);
+ END IF;
+
+ IF ind_data.search_field AND NOT b_skip_search THEN
+ -- Avoid inserting duplicate rows
+ EXECUTE 'SELECT 1 FROM metabib.' || ind_data.field_class ||
+ '_field_entry WHERE field = $1 AND source = $2 AND value = $3'
+ INTO mbe_id USING ind_data.field, ind_data.source, ind_data.value;
+ -- RAISE NOTICE 'Search for an already matching row returned %', mbe_id;
+ IF mbe_id IS NULL THEN
+ EXECUTE $$
+ INSERT INTO metabib.$$ || ind_data.field_class || $$_field_entry (field, source, value)
+ VALUES ($$ ||
+ quote_literal(ind_data.field) || $$, $$ ||
+ quote_literal(ind_data.source) || $$, $$ ||
+ quote_literal(ind_data.value) ||
+ $$);$$;
+ END IF;
+ END IF;
+
+ END LOOP;
+
+ IF NOT b_skip_search THEN
+ PERFORM metabib.update_combined_index_vectors(bib_id);
+ PERFORM search.symspell_dictionary_reify();
+ END IF;
+
+ RETURN;
+END;
+$func$ LANGUAGE PLPGSQL;
+
+COMMIT;
+
--- /dev/null
+BEGIN;
+
+SELECT evergreen.upgrade_deps_block_check('1340', :eg_version);
+
+CREATE OR REPLACE FUNCTION search.disable_symspell_reification () RETURNS VOID AS $f$
+ INSERT INTO config.internal_flag (name,enabled)
+ VALUES ('ingest.disable_symspell_reification',TRUE)
+ ON CONFLICT (name) DO UPDATE SET enabled = TRUE;
+$f$ LANGUAGE SQL;
+
+CREATE OR REPLACE FUNCTION search.enable_symspell_reification () RETURNS VOID AS $f$
+ UPDATE config.internal_flag SET enabled = FALSE WHERE name = 'ingest.disable_symspell_reification';
+$f$ LANGUAGE SQL;
+
+CREATE OR REPLACE FUNCTION search.symspell_dictionary_full_reify () RETURNS SETOF search.symspell_dictionary AS $f$
+ WITH new_rows AS (
+ DELETE FROM search.symspell_dictionary_updates RETURNING *
+ ), computed_rows AS ( -- this collapses the rows deleted into the format we need for UPSERT
+ SELECT SUM(keyword_count) AS keyword_count,
+ SUM(title_count) AS title_count,
+ SUM(author_count) AS author_count,
+ SUM(subject_count) AS subject_count,
+ SUM(series_count) AS series_count,
+ SUM(identifier_count) AS identifier_count,
+
+ prefix_key,
+
+ ARRAY_REMOVE(ARRAY_AGG(DISTINCT keyword_suggestions[1]), NULL) AS keyword_suggestions,
+ ARRAY_REMOVE(ARRAY_AGG(DISTINCT title_suggestions[1]), NULL) AS title_suggestions,
+ ARRAY_REMOVE(ARRAY_AGG(DISTINCT author_suggestions[1]), NULL) AS author_suggestions,
+ ARRAY_REMOVE(ARRAY_AGG(DISTINCT subject_suggestions[1]), NULL) AS subject_suggestions,
+ ARRAY_REMOVE(ARRAY_AGG(DISTINCT series_suggestions[1]), NULL) AS series_suggestions,
+ ARRAY_REMOVE(ARRAY_AGG(DISTINCT identifier_suggestions[1]), NULL) AS identifier_suggestions
+ FROM new_rows
+ GROUP BY prefix_key
+ )
+ INSERT INTO search.symspell_dictionary AS d SELECT * FROM computed_rows
+ ON CONFLICT (prefix_key) DO UPDATE SET
+ keyword_count = GREATEST(0, d.keyword_count + EXCLUDED.keyword_count),
+ keyword_suggestions = evergreen.text_array_merge_unique(EXCLUDED.keyword_suggestions,d.keyword_suggestions),
+
+ title_count = GREATEST(0, d.title_count + EXCLUDED.title_count),
+ title_suggestions = evergreen.text_array_merge_unique(EXCLUDED.title_suggestions,d.title_suggestions),
+
+ author_count = GREATEST(0, d.author_count + EXCLUDED.author_count),
+ author_suggestions = evergreen.text_array_merge_unique(EXCLUDED.author_suggestions,d.author_suggestions),
+
+ subject_count = GREATEST(0, d.subject_count + EXCLUDED.subject_count),
+ subject_suggestions = evergreen.text_array_merge_unique(EXCLUDED.subject_suggestions,d.subject_suggestions),
+
+ series_count = GREATEST(0, d.series_count + EXCLUDED.series_count),
+ series_suggestions = evergreen.text_array_merge_unique(EXCLUDED.series_suggestions,d.series_suggestions),
+
+ identifier_count = GREATEST(0, d.identifier_count + EXCLUDED.identifier_count),
+ identifier_suggestions = evergreen.text_array_merge_unique(EXCLUDED.identifier_suggestions,d.identifier_suggestions)
+ RETURNING *;
+$f$ LANGUAGE SQL;
+
+-- Updated again to check for delayed symspell reification
+CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries(
+ bib_id BIGINT,
+ skip_facet BOOL DEFAULT FALSE,
+ skip_display BOOL DEFAULT FALSE,
+ skip_browse BOOL DEFAULT FALSE,
+ skip_search BOOL DEFAULT FALSE,
+ only_fields INT[] DEFAULT '{}'::INT[]
+) RETURNS VOID AS $func$
+DECLARE
+ fclass RECORD;
+ ind_data metabib.field_entry_template%ROWTYPE;
+ mbe_row metabib.browse_entry%ROWTYPE;
+ mbe_id BIGINT;
+ b_skip_facet BOOL;
+ b_skip_display BOOL;
+ b_skip_browse BOOL;
+ b_skip_search BOOL;
+ value_prepped TEXT;
+ field_list INT[] := only_fields;
+ field_types TEXT[] := '{}'::TEXT[];
+BEGIN
+
+ IF field_list = '{}'::INT[] THEN
+ SELECT ARRAY_AGG(id) INTO field_list FROM config.metabib_field;
+ END IF;
+
+ SELECT COALESCE(NULLIF(skip_facet, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_facet_indexing' AND enabled)) INTO b_skip_facet;
+ SELECT COALESCE(NULLIF(skip_display, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_display_indexing' AND enabled)) INTO b_skip_display;
+ SELECT COALESCE(NULLIF(skip_browse, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_browse_indexing' AND enabled)) INTO b_skip_browse;
+ SELECT COALESCE(NULLIF(skip_search, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_search_indexing' AND enabled)) INTO b_skip_search;
+
+ IF NOT b_skip_facet THEN field_types := field_types || '{facet}'; END IF;
+ IF NOT b_skip_display THEN field_types := field_types || '{display}'; END IF;
+ IF NOT b_skip_browse THEN field_types := field_types || '{browse}'; END IF;
+ IF NOT b_skip_search THEN field_types := field_types || '{search}'; END IF;
+
+ PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled;
+ IF NOT FOUND THEN
+ IF NOT b_skip_search THEN
+ FOR fclass IN SELECT * FROM config.metabib_class LOOP
+ -- RAISE NOTICE 'Emptying out %', fclass.name;
+ EXECUTE $$DELETE FROM metabib.$$ || fclass.name || $$_field_entry WHERE source = $$ || bib_id;
+ END LOOP;
+ END IF;
+ IF NOT b_skip_facet THEN
+ DELETE FROM metabib.facet_entry WHERE source = bib_id;
+ END IF;
+ IF NOT b_skip_display THEN
+ DELETE FROM metabib.display_entry WHERE source = bib_id;
+ END IF;
+ IF NOT b_skip_browse THEN
+ DELETE FROM metabib.browse_entry_def_map WHERE source = bib_id;
+ END IF;
+ END IF;
+
+ FOR ind_data IN SELECT * FROM biblio.extract_metabib_field_entry( bib_id, ' ', field_types, field_list ) LOOP
+
+ -- don't store what has been normalized away
+ CONTINUE WHEN ind_data.value IS NULL;
+
+ IF ind_data.field < 0 THEN
+ ind_data.field = -1 * ind_data.field;
+ END IF;
+
+ IF ind_data.facet_field AND NOT b_skip_facet THEN
+ INSERT INTO metabib.facet_entry (field, source, value)
+ VALUES (ind_data.field, ind_data.source, ind_data.value);
+ END IF;
+
+ IF ind_data.display_field AND NOT b_skip_display THEN
+ INSERT INTO metabib.display_entry (field, source, value)
+ VALUES (ind_data.field, ind_data.source, ind_data.value);
+ END IF;
+
+
+ IF ind_data.browse_field AND NOT b_skip_browse THEN
+ -- A caveat about this SELECT: this should take care of replacing
+ -- old mbe rows when data changes, but not if normalization (by
+ -- which I mean specifically the output of
+ -- evergreen.oils_tsearch2()) changes. It may or may not be
+ -- expensive to add a comparison of index_vector to index_vector
+ -- to the WHERE clause below.
+
+ CONTINUE WHEN ind_data.sort_value IS NULL;
+
+ value_prepped := metabib.browse_normalize(ind_data.value, ind_data.field);
+ IF ind_data.browse_nocase THEN
+ SELECT INTO mbe_row * FROM metabib.browse_entry
+ WHERE evergreen.lowercase(value) = evergreen.lowercase(value_prepped) AND sort_value = ind_data.sort_value
+ ORDER BY sort_value, value LIMIT 1; -- gotta pick something, I guess
+ ELSE
+ SELECT INTO mbe_row * FROM metabib.browse_entry
+ WHERE value = value_prepped AND sort_value = ind_data.sort_value;
+ END IF;
+
+ IF FOUND THEN
+ mbe_id := mbe_row.id;
+ ELSE
+ INSERT INTO metabib.browse_entry
+ ( value, sort_value ) VALUES
+ ( value_prepped, ind_data.sort_value );
+
+ mbe_id := CURRVAL('metabib.browse_entry_id_seq'::REGCLASS);
+ END IF;
+
+ INSERT INTO metabib.browse_entry_def_map (entry, def, source, authority)
+ VALUES (mbe_id, ind_data.field, ind_data.source, ind_data.authority);
+ END IF;
+
+ IF ind_data.search_field AND NOT b_skip_search THEN
+ -- Avoid inserting duplicate rows
+ EXECUTE 'SELECT 1 FROM metabib.' || ind_data.field_class ||
+ '_field_entry WHERE field = $1 AND source = $2 AND value = $3'
+ INTO mbe_id USING ind_data.field, ind_data.source, ind_data.value;
+ -- RAISE NOTICE 'Search for an already matching row returned %', mbe_id;
+ IF mbe_id IS NULL THEN
+ EXECUTE $$
+ INSERT INTO metabib.$$ || ind_data.field_class || $$_field_entry (field, source, value)
+ VALUES ($$ ||
+ quote_literal(ind_data.field) || $$, $$ ||
+ quote_literal(ind_data.source) || $$, $$ ||
+ quote_literal(ind_data.value) ||
+ $$);$$;
+ END IF;
+ END IF;
+
+ END LOOP;
+
+ IF NOT b_skip_search THEN
+ PERFORM metabib.update_combined_index_vectors(bib_id);
+ PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_symspell_reification' AND enabled;
+ IF NOT FOUND THEN
+ PERFORM search.symspell_dictionary_reify();
+ END IF;
+ END IF;
+
+ RETURN;
+END;
+$func$ LANGUAGE PLPGSQL;
+
+COMMIT;
+
+++ /dev/null
-BEGIN;
-
--- INSERT-only table that catches dictionary updates to be reconciled
-CREATE UNLOGGED TABLE search.symspell_dictionary_updates (
- transaction_id BIGINT,
- keyword_count INT NOT NULL DEFAULT 0,
- title_count INT NOT NULL DEFAULT 0,
- author_count INT NOT NULL DEFAULT 0,
- subject_count INT NOT NULL DEFAULT 0,
- series_count INT NOT NULL DEFAULT 0,
- identifier_count INT NOT NULL DEFAULT 0,
-
- prefix_key TEXT NOT NULL,
-
- keyword_suggestions TEXT[],
- title_suggestions TEXT[],
- author_suggestions TEXT[],
- subject_suggestions TEXT[],
- series_suggestions TEXT[],
- identifier_suggestions TEXT[]
-);
-CREATE INDEX symspell_dictionary_updates_tid_idx ON search.symspell_dictionary_updates (transaction_id);
-
--- Function that collects this transactions additions to the unlogged update table
-CREATE OR REPLACE FUNCTION search.symspell_dictionary_reify () RETURNS SETOF search.symspell_dictionary AS $f$
- WITH new_rows AS (
- DELETE FROM search.symspell_dictionary_updates WHERE transaction_id = txid_current() RETURNING *
- ), computed_rows AS ( -- this collapses the rows deleted into the format we need for UPSERT
- SELECT SUM(keyword_count) AS keyword_count,
- SUM(title_count) AS title_count,
- SUM(author_count) AS author_count,
- SUM(subject_count) AS subject_count,
- SUM(series_count) AS series_count,
- SUM(identifier_count) AS identifier_count,
-
- prefix_key,
-
- ARRAY_REMOVE(ARRAY_AGG(DISTINCT keyword_suggestions[1]), NULL) AS keyword_suggestions,
- ARRAY_REMOVE(ARRAY_AGG(DISTINCT title_suggestions[1]), NULL) AS title_suggestions,
- ARRAY_REMOVE(ARRAY_AGG(DISTINCT author_suggestions[1]), NULL) AS author_suggestions,
- ARRAY_REMOVE(ARRAY_AGG(DISTINCT subject_suggestions[1]), NULL) AS subject_suggestions,
- ARRAY_REMOVE(ARRAY_AGG(DISTINCT series_suggestions[1]), NULL) AS series_suggestions,
- ARRAY_REMOVE(ARRAY_AGG(DISTINCT identifier_suggestions[1]), NULL) AS identifier_suggestions
- FROM new_rows
- GROUP BY prefix_key
- )
- INSERT INTO search.symspell_dictionary AS d SELECT * FROM computed_rows
- ON CONFLICT (prefix_key) DO UPDATE SET
- keyword_count = GREATEST(0, d.keyword_count + EXCLUDED.keyword_count),
- keyword_suggestions = evergreen.text_array_merge_unique(EXCLUDED.keyword_suggestions,d.keyword_suggestions),
-
- title_count = GREATEST(0, d.title_count + EXCLUDED.title_count),
- title_suggestions = evergreen.text_array_merge_unique(EXCLUDED.title_suggestions,d.title_suggestions),
-
- author_count = GREATEST(0, d.author_count + EXCLUDED.author_count),
- author_suggestions = evergreen.text_array_merge_unique(EXCLUDED.author_suggestions,d.author_suggestions),
-
- subject_count = GREATEST(0, d.subject_count + EXCLUDED.subject_count),
- subject_suggestions = evergreen.text_array_merge_unique(EXCLUDED.subject_suggestions,d.subject_suggestions),
-
- series_count = GREATEST(0, d.series_count + EXCLUDED.series_count),
- series_suggestions = evergreen.text_array_merge_unique(EXCLUDED.series_suggestions,d.series_suggestions),
-
- identifier_count = GREATEST(0, d.identifier_count + EXCLUDED.identifier_count),
- identifier_suggestions = evergreen.text_array_merge_unique(EXCLUDED.identifier_suggestions,d.identifier_suggestions)
- RETURNING *;
-$f$ LANGUAGE SQL;
-
--- simplified metabib.*_field_entry trigger that stages updates for reification in one go
-CREATE OR REPLACE FUNCTION search.symspell_maintain_entries () RETURNS TRIGGER AS $f$
-DECLARE
- search_class TEXT;
- new_value TEXT := NULL;
- old_value TEXT := NULL;
-BEGIN
- search_class := COALESCE(TG_ARGV[0], SPLIT_PART(TG_TABLE_NAME,'_',1));
-
- IF TG_OP IN ('INSERT', 'UPDATE') THEN
- new_value := NEW.value;
- END IF;
-
- IF TG_OP IN ('DELETE', 'UPDATE') THEN
- old_value := OLD.value;
- END IF;
-
- IF new_value = old_value THEN
- -- same, move along
- ELSE
- INSERT INTO search.symspell_dictionary_updates
- SELECT txid_current(), *
- FROM search.symspell_build_entries(
- new_value,
- search_class,
- old_value
- );
- END IF;
-
- RETURN NULL; -- always fired AFTER
-END;
-$f$ LANGUAGE PLPGSQL;
-
-CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries(
- bib_id BIGINT,
- skip_facet BOOL DEFAULT FALSE,
- skip_display BOOL DEFAULT FALSE,
- skip_browse BOOL DEFAULT FALSE,
- skip_search BOOL DEFAULT FALSE,
- only_fields INT[] DEFAULT '{}'::INT[]
-) RETURNS VOID AS $func$
-DECLARE
- fclass RECORD;
- ind_data metabib.field_entry_template%ROWTYPE;
- mbe_row metabib.browse_entry%ROWTYPE;
- mbe_id BIGINT;
- b_skip_facet BOOL;
- b_skip_display BOOL;
- b_skip_browse BOOL;
- b_skip_search BOOL;
- value_prepped TEXT;
- field_list INT[] := only_fields;
- field_types TEXT[] := '{}'::TEXT[];
-BEGIN
-
- IF field_list = '{}'::INT[] THEN
- SELECT ARRAY_AGG(id) INTO field_list FROM config.metabib_field;
- END IF;
-
- SELECT COALESCE(NULLIF(skip_facet, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_facet_indexing' AND enabled)) INTO b_skip_facet;
- SELECT COALESCE(NULLIF(skip_display, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_display_indexing' AND enabled)) INTO b_skip_display;
- SELECT COALESCE(NULLIF(skip_browse, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_browse_indexing' AND enabled)) INTO b_skip_browse;
- SELECT COALESCE(NULLIF(skip_search, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_search_indexing' AND enabled)) INTO b_skip_search;
-
- IF NOT b_skip_facet THEN field_types := field_types || '{facet}'; END IF;
- IF NOT b_skip_display THEN field_types := field_types || '{display}'; END IF;
- IF NOT b_skip_browse THEN field_types := field_types || '{browse}'; END IF;
- IF NOT b_skip_search THEN field_types := field_types || '{search}'; END IF;
-
- PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled;
- IF NOT FOUND THEN
- IF NOT b_skip_search THEN
- FOR fclass IN SELECT * FROM config.metabib_class LOOP
- -- RAISE NOTICE 'Emptying out %', fclass.name;
- EXECUTE $$DELETE FROM metabib.$$ || fclass.name || $$_field_entry WHERE source = $$ || bib_id;
- END LOOP;
- END IF;
- IF NOT b_skip_facet THEN
- DELETE FROM metabib.facet_entry WHERE source = bib_id;
- END IF;
- IF NOT b_skip_display THEN
- DELETE FROM metabib.display_entry WHERE source = bib_id;
- END IF;
- IF NOT b_skip_browse THEN
- DELETE FROM metabib.browse_entry_def_map WHERE source = bib_id;
- END IF;
- END IF;
-
- FOR ind_data IN SELECT * FROM biblio.extract_metabib_field_entry( bib_id, ' ', field_types, field_list ) LOOP
-
- -- don't store what has been normalized away
- CONTINUE WHEN ind_data.value IS NULL;
-
- IF ind_data.field < 0 THEN
- ind_data.field = -1 * ind_data.field;
- END IF;
-
- IF ind_data.facet_field AND NOT b_skip_facet THEN
- INSERT INTO metabib.facet_entry (field, source, value)
- VALUES (ind_data.field, ind_data.source, ind_data.value);
- END IF;
-
- IF ind_data.display_field AND NOT b_skip_display THEN
- INSERT INTO metabib.display_entry (field, source, value)
- VALUES (ind_data.field, ind_data.source, ind_data.value);
- END IF;
-
-
- IF ind_data.browse_field AND NOT b_skip_browse THEN
- -- A caveat about this SELECT: this should take care of replacing
- -- old mbe rows when data changes, but not if normalization (by
- -- which I mean specifically the output of
- -- evergreen.oils_tsearch2()) changes. It may or may not be
- -- expensive to add a comparison of index_vector to index_vector
- -- to the WHERE clause below.
-
- CONTINUE WHEN ind_data.sort_value IS NULL;
-
- value_prepped := metabib.browse_normalize(ind_data.value, ind_data.field);
- IF ind_data.browse_nocase THEN
- SELECT INTO mbe_row * FROM metabib.browse_entry
- WHERE evergreen.lowercase(value) = evergreen.lowercase(value_prepped) AND sort_value = ind_data.sort_value
- ORDER BY sort_value, value LIMIT 1; -- gotta pick something, I guess
- ELSE
- SELECT INTO mbe_row * FROM metabib.browse_entry
- WHERE value = value_prepped AND sort_value = ind_data.sort_value;
- END IF;
-
- IF FOUND THEN
- mbe_id := mbe_row.id;
- ELSE
- INSERT INTO metabib.browse_entry
- ( value, sort_value ) VALUES
- ( value_prepped, ind_data.sort_value );
-
- mbe_id := CURRVAL('metabib.browse_entry_id_seq'::REGCLASS);
- END IF;
-
- INSERT INTO metabib.browse_entry_def_map (entry, def, source, authority)
- VALUES (mbe_id, ind_data.field, ind_data.source, ind_data.authority);
- END IF;
-
- IF ind_data.search_field AND NOT b_skip_search THEN
- -- Avoid inserting duplicate rows
- EXECUTE 'SELECT 1 FROM metabib.' || ind_data.field_class ||
- '_field_entry WHERE field = $1 AND source = $2 AND value = $3'
- INTO mbe_id USING ind_data.field, ind_data.source, ind_data.value;
- -- RAISE NOTICE 'Search for an already matching row returned %', mbe_id;
- IF mbe_id IS NULL THEN
- EXECUTE $$
- INSERT INTO metabib.$$ || ind_data.field_class || $$_field_entry (field, source, value)
- VALUES ($$ ||
- quote_literal(ind_data.field) || $$, $$ ||
- quote_literal(ind_data.source) || $$, $$ ||
- quote_literal(ind_data.value) ||
- $$);$$;
- END IF;
- END IF;
-
- END LOOP;
-
- IF NOT b_skip_search THEN
- PERFORM metabib.update_combined_index_vectors(bib_id);
- PERFORM search.symspell_dictionary_reify();
- END IF;
-
- RETURN;
-END;
-$func$ LANGUAGE PLPGSQL;
-
-COMMIT;
-
+++ /dev/null
-BEGIN;
-
-CREATE OR REPLACE FUNCTION search.disable_symspell_reification () RETURNS VOID AS $f$
- INSERT INTO config.internal_flag (name,enabled)
- VALUES ('ingest.disable_symspell_reification',TRUE)
- ON CONFLICT (name) DO UPDATE SET enabled = TRUE;
-$f$ LANGUAGE SQL;
-
-CREATE OR REPLACE FUNCTION search.enable_symspell_reification () RETURNS VOID AS $f$
- UPDATE config.internal_flag SET enabled = FALSE WHERE name = 'ingest.disable_symspell_reification';
-$f$ LANGUAGE SQL;
-
-CREATE OR REPLACE FUNCTION search.symspell_dictionary_full_reify () RETURNS SETOF search.symspell_dictionary AS $f$
- WITH new_rows AS (
- DELETE FROM search.symspell_dictionary_updates RETURNING *
- ), computed_rows AS ( -- this collapses the rows deleted into the format we need for UPSERT
- SELECT SUM(keyword_count) AS keyword_count,
- SUM(title_count) AS title_count,
- SUM(author_count) AS author_count,
- SUM(subject_count) AS subject_count,
- SUM(series_count) AS series_count,
- SUM(identifier_count) AS identifier_count,
-
- prefix_key,
-
- ARRAY_REMOVE(ARRAY_AGG(DISTINCT keyword_suggestions[1]), NULL) AS keyword_suggestions,
- ARRAY_REMOVE(ARRAY_AGG(DISTINCT title_suggestions[1]), NULL) AS title_suggestions,
- ARRAY_REMOVE(ARRAY_AGG(DISTINCT author_suggestions[1]), NULL) AS author_suggestions,
- ARRAY_REMOVE(ARRAY_AGG(DISTINCT subject_suggestions[1]), NULL) AS subject_suggestions,
- ARRAY_REMOVE(ARRAY_AGG(DISTINCT series_suggestions[1]), NULL) AS series_suggestions,
- ARRAY_REMOVE(ARRAY_AGG(DISTINCT identifier_suggestions[1]), NULL) AS identifier_suggestions
- FROM new_rows
- GROUP BY prefix_key
- )
- INSERT INTO search.symspell_dictionary AS d SELECT * FROM computed_rows
- ON CONFLICT (prefix_key) DO UPDATE SET
- keyword_count = GREATEST(0, d.keyword_count + EXCLUDED.keyword_count),
- keyword_suggestions = evergreen.text_array_merge_unique(EXCLUDED.keyword_suggestions,d.keyword_suggestions),
-
- title_count = GREATEST(0, d.title_count + EXCLUDED.title_count),
- title_suggestions = evergreen.text_array_merge_unique(EXCLUDED.title_suggestions,d.title_suggestions),
-
- author_count = GREATEST(0, d.author_count + EXCLUDED.author_count),
- author_suggestions = evergreen.text_array_merge_unique(EXCLUDED.author_suggestions,d.author_suggestions),
-
- subject_count = GREATEST(0, d.subject_count + EXCLUDED.subject_count),
- subject_suggestions = evergreen.text_array_merge_unique(EXCLUDED.subject_suggestions,d.subject_suggestions),
-
- series_count = GREATEST(0, d.series_count + EXCLUDED.series_count),
- series_suggestions = evergreen.text_array_merge_unique(EXCLUDED.series_suggestions,d.series_suggestions),
-
- identifier_count = GREATEST(0, d.identifier_count + EXCLUDED.identifier_count),
- identifier_suggestions = evergreen.text_array_merge_unique(EXCLUDED.identifier_suggestions,d.identifier_suggestions)
- RETURNING *;
-$f$ LANGUAGE SQL;
-
--- Updated again to check for delayed symspell reification
-CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries(
- bib_id BIGINT,
- skip_facet BOOL DEFAULT FALSE,
- skip_display BOOL DEFAULT FALSE,
- skip_browse BOOL DEFAULT FALSE,
- skip_search BOOL DEFAULT FALSE,
- only_fields INT[] DEFAULT '{}'::INT[]
-) RETURNS VOID AS $func$
-DECLARE
- fclass RECORD;
- ind_data metabib.field_entry_template%ROWTYPE;
- mbe_row metabib.browse_entry%ROWTYPE;
- mbe_id BIGINT;
- b_skip_facet BOOL;
- b_skip_display BOOL;
- b_skip_browse BOOL;
- b_skip_search BOOL;
- value_prepped TEXT;
- field_list INT[] := only_fields;
- field_types TEXT[] := '{}'::TEXT[];
-BEGIN
-
- IF field_list = '{}'::INT[] THEN
- SELECT ARRAY_AGG(id) INTO field_list FROM config.metabib_field;
- END IF;
-
- SELECT COALESCE(NULLIF(skip_facet, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_facet_indexing' AND enabled)) INTO b_skip_facet;
- SELECT COALESCE(NULLIF(skip_display, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_display_indexing' AND enabled)) INTO b_skip_display;
- SELECT COALESCE(NULLIF(skip_browse, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_browse_indexing' AND enabled)) INTO b_skip_browse;
- SELECT COALESCE(NULLIF(skip_search, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_search_indexing' AND enabled)) INTO b_skip_search;
-
- IF NOT b_skip_facet THEN field_types := field_types || '{facet}'; END IF;
- IF NOT b_skip_display THEN field_types := field_types || '{display}'; END IF;
- IF NOT b_skip_browse THEN field_types := field_types || '{browse}'; END IF;
- IF NOT b_skip_search THEN field_types := field_types || '{search}'; END IF;
-
- PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled;
- IF NOT FOUND THEN
- IF NOT b_skip_search THEN
- FOR fclass IN SELECT * FROM config.metabib_class LOOP
- -- RAISE NOTICE 'Emptying out %', fclass.name;
- EXECUTE $$DELETE FROM metabib.$$ || fclass.name || $$_field_entry WHERE source = $$ || bib_id;
- END LOOP;
- END IF;
- IF NOT b_skip_facet THEN
- DELETE FROM metabib.facet_entry WHERE source = bib_id;
- END IF;
- IF NOT b_skip_display THEN
- DELETE FROM metabib.display_entry WHERE source = bib_id;
- END IF;
- IF NOT b_skip_browse THEN
- DELETE FROM metabib.browse_entry_def_map WHERE source = bib_id;
- END IF;
- END IF;
-
- FOR ind_data IN SELECT * FROM biblio.extract_metabib_field_entry( bib_id, ' ', field_types, field_list ) LOOP
-
- -- don't store what has been normalized away
- CONTINUE WHEN ind_data.value IS NULL;
-
- IF ind_data.field < 0 THEN
- ind_data.field = -1 * ind_data.field;
- END IF;
-
- IF ind_data.facet_field AND NOT b_skip_facet THEN
- INSERT INTO metabib.facet_entry (field, source, value)
- VALUES (ind_data.field, ind_data.source, ind_data.value);
- END IF;
-
- IF ind_data.display_field AND NOT b_skip_display THEN
- INSERT INTO metabib.display_entry (field, source, value)
- VALUES (ind_data.field, ind_data.source, ind_data.value);
- END IF;
-
-
- IF ind_data.browse_field AND NOT b_skip_browse THEN
- -- A caveat about this SELECT: this should take care of replacing
- -- old mbe rows when data changes, but not if normalization (by
- -- which I mean specifically the output of
- -- evergreen.oils_tsearch2()) changes. It may or may not be
- -- expensive to add a comparison of index_vector to index_vector
- -- to the WHERE clause below.
-
- CONTINUE WHEN ind_data.sort_value IS NULL;
-
- value_prepped := metabib.browse_normalize(ind_data.value, ind_data.field);
- IF ind_data.browse_nocase THEN
- SELECT INTO mbe_row * FROM metabib.browse_entry
- WHERE evergreen.lowercase(value) = evergreen.lowercase(value_prepped) AND sort_value = ind_data.sort_value
- ORDER BY sort_value, value LIMIT 1; -- gotta pick something, I guess
- ELSE
- SELECT INTO mbe_row * FROM metabib.browse_entry
- WHERE value = value_prepped AND sort_value = ind_data.sort_value;
- END IF;
-
- IF FOUND THEN
- mbe_id := mbe_row.id;
- ELSE
- INSERT INTO metabib.browse_entry
- ( value, sort_value ) VALUES
- ( value_prepped, ind_data.sort_value );
-
- mbe_id := CURRVAL('metabib.browse_entry_id_seq'::REGCLASS);
- END IF;
-
- INSERT INTO metabib.browse_entry_def_map (entry, def, source, authority)
- VALUES (mbe_id, ind_data.field, ind_data.source, ind_data.authority);
- END IF;
-
- IF ind_data.search_field AND NOT b_skip_search THEN
- -- Avoid inserting duplicate rows
- EXECUTE 'SELECT 1 FROM metabib.' || ind_data.field_class ||
- '_field_entry WHERE field = $1 AND source = $2 AND value = $3'
- INTO mbe_id USING ind_data.field, ind_data.source, ind_data.value;
- -- RAISE NOTICE 'Search for an already matching row returned %', mbe_id;
- IF mbe_id IS NULL THEN
- EXECUTE $$
- INSERT INTO metabib.$$ || ind_data.field_class || $$_field_entry (field, source, value)
- VALUES ($$ ||
- quote_literal(ind_data.field) || $$, $$ ||
- quote_literal(ind_data.source) || $$, $$ ||
- quote_literal(ind_data.value) ||
- $$);$$;
- END IF;
- END IF;
-
- END LOOP;
-
- IF NOT b_skip_search THEN
- PERFORM metabib.update_combined_index_vectors(bib_id);
- PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_symspell_reification' AND enabled;
- IF NOT FOUND THEN
- PERFORM search.symspell_dictionary_reify();
- END IF;
- END IF;
-
- RETURN;
-END;
-$func$ LANGUAGE PLPGSQL;
-
-COMMIT;
-