From: Galen Charlton Date: Fri, 28 Apr 2023 10:45:15 +0000 (+0000) Subject: LP#1997485: stamp database update X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=249454c0f427cecebb37e5291c2b7d9ac5eba1ff;p=evergreen%2Fequinox.git LP#1997485: stamp database update Signed-off-by: Galen Charlton --- diff --git a/Open-ILS/src/sql/Pg/002.schema.config.sql b/Open-ILS/src/sql/Pg/002.schema.config.sql index ed9da1693f..7ceebed438 100644 --- a/Open-ILS/src/sql/Pg/002.schema.config.sql +++ b/Open-ILS/src/sql/Pg/002.schema.config.sql @@ -92,7 +92,7 @@ CREATE TRIGGER no_overlapping_deps BEFORE INSERT OR UPDATE ON config.db_patch_dependencies FOR EACH ROW EXECUTE PROCEDURE evergreen.array_overlap_check ('deprecates'); -INSERT INTO config.upgrade_log (version, applied_to) VALUES ('1365', :eg_version); -- sleary / sandbergja / gmcharlt +INSERT INTO config.upgrade_log (version, applied_to) VALUES ('1367', :eg_version); -- miker / rfrasur / gmcharlt CREATE TABLE config.bib_source ( id SERIAL PRIMARY KEY, diff --git a/Open-ILS/src/sql/Pg/upgrade/1366.schema.DYM-multi-word.sql b/Open-ILS/src/sql/Pg/upgrade/1366.schema.DYM-multi-word.sql new file mode 100644 index 0000000000..823cc4cbe0 --- /dev/null +++ b/Open-ILS/src/sql/Pg/upgrade/1366.schema.DYM-multi-word.sql @@ -0,0 +1,778 @@ +BEGIN; + +SELECT evergreen.upgrade_deps_block_check('1366', :eg_version); + +ALTER TABLE config.metabib_class + ADD COLUMN IF NOT EXISTS variant_authority_suggestion BOOL NOT NULL DEFAULT TRUE, + ADD COLUMN IF NOT EXISTS symspell_transfer_case BOOL NOT NULL DEFAULT TRUE, + ADD COLUMN IF NOT EXISTS symspell_skip_correct BOOL NOT NULL DEFAULT FALSE, + ADD COLUMN IF NOT EXISTS symspell_suggestion_verbosity INT NOT NULL DEFAULT 2, + ADD COLUMN IF NOT EXISTS max_phrase_edit_distance INT NOT NULL DEFAULT 2, + ADD COLUMN IF NOT EXISTS suggestion_word_option_count INT NOT NULL DEFAULT 5, + ADD COLUMN IF NOT EXISTS max_suggestions INT NOT NULL DEFAULT -1, + ADD COLUMN IF NOT EXISTS low_result_threshold INT NOT NULL DEFAULT 0, + ADD COLUMN IF NOT EXISTS min_suggestion_use_threshold INT NOT NULL DEFAULT 1, + ADD COLUMN IF NOT EXISTS soundex_weight INT NOT NULL DEFAULT 0, + ADD COLUMN IF NOT EXISTS pg_trgm_weight INT NOT NULL DEFAULT 0, + ADD COLUMN IF NOT EXISTS keyboard_distance_weight INT NOT NULL DEFAULT 0; + + +/* -- may not need these 2 functions +CREATE OR REPLACE FUNCTION search.symspell_parse_positive_words ( phrase TEXT ) +RETURNS SETOF TEXT AS $F$ + SELECT UNNEST + FROM (SELECT UNNEST(x), ROW_NUMBER() OVER () + FROM regexp_matches($1, '(? 0 THEN + SELECT STRING_AGG(id::TEXT,',') INTO query_part FROM config.metabib_field WHERE name = ANY (search_fields); + IF CHARACTER_LENGTH(query_part) > 0 THEN query_part := 'AND field IN ('||query_part||')'; END IF; + END IF; + + SELECT STRING_AGG(word,' ') INTO norm_input FROM search.query_parse_positions(evergreen.lowercase(raw_input)) WHERE NOT negated; + EXECUTE 'SELECT COUNT(DISTINCT source) AS recs + FROM metabib.' || search_class || '_field_entry + WHERE index_vector @@ plainto_tsquery($$simple$$,$1)' || query_part + INTO norm_count USING norm_input; + + SELECT STRING_AGG(word,' ') INTO norm_test FROM UNNEST(parsed_query_set); + FOR current_sugg IN + SELECT * + FROM search.symspell_generate_combined_suggestions( + sugg_set, + parsed_query_set, + c_skip_correct, + c_suggestion_word_option_count + ) x + LOOP + EXECUTE 'SELECT COUNT(DISTINCT source) AS recs + FROM metabib.' || search_class || '_field_entry + WHERE index_vector @@ to_tsquery($$simple$$,$1)' || query_part + INTO entry USING current_sugg.test; + SELECT STRING_AGG(word,' ') INTO norm_sugg FROM search.query_parse_positions(current_sugg.suggestion); + IF entry.recs >= c_min_suggestion_use_threshold AND (norm_count.recs = 0 OR norm_sugg <> norm_input) THEN + + output.input := raw_input; + output.norm_input := norm_input; + output.suggestion := current_sugg.suggestion; + output.suggestion_count := entry.recs; + output.prefix_key := NULL; + output.prefix_key_count := norm_count.recs; + + output.lev_distance := NULLIF(evergreen.levenshtein_damerau_edistance(norm_test, norm_sugg, c_max_phrase_edit_distance * CARDINALITY(parsed_query_set)), -1); + output.qwerty_kb_match := evergreen.qwerty_keyboard_distance_match(norm_test, norm_sugg); + output.pg_trgm_sim := similarity(norm_input, norm_sugg); + output.soundex_sim := difference(norm_input, norm_sugg) / 4.0; + + RETURN NEXT output; + END IF; + + IF c_variant_authority_suggestion THEN + FOR auth_sugg IN + SELECT DISTINCT m.value AS prefix_key, + m.sort_value AS suggestion, + v.value as raw_input, + v.sort_value as norm_input + FROM authority.simple_heading v + JOIN authority.control_set_authority_field csaf ON (csaf.id = v.atag) + JOIN authority.heading_field f ON (f.id = csaf.heading_field) + JOIN authority.simple_heading m ON (m.record = v.record AND csaf.main_entry = m.atag) + JOIN authority.control_set_bib_field csbf ON (csbf.authority_field = csaf.main_entry) + JOIN authority.control_set_bib_field_metabib_field_map csbfmfm ON (csbf.id = csbfmfm.bib_field) + JOIN config.metabib_field cmf ON ( + csbfmfm.metabib_field = cmf.id + AND (c_authority_class_restrict IS FALSE OR cmf.field_class = search_class) + AND (search_fields = '{}'::TEXT[] OR cmf.name = ANY (search_fields)) + ) + WHERE v.sort_value = norm_sugg + LOOP + EXECUTE 'SELECT COUNT(DISTINCT source) AS recs + FROM metabib.' || search_class || '_field_entry + WHERE index_vector @@ plainto_tsquery($$simple$$,$1)' || query_part + INTO auth_entry USING auth_sugg.suggestion; + IF auth_entry.recs >= c_min_suggestion_use_threshold AND (norm_count.recs = 0 OR auth_sugg.suggestion <> norm_input) THEN + output.input := auth_sugg.raw_input; + output.norm_input := auth_sugg.norm_input; + output.suggestion := auth_sugg.suggestion; + output.prefix_key := auth_sugg.prefix_key; + output.suggestion_count := auth_entry.recs * -1; -- negative value here + + output.lev_distance := 0; + output.qwerty_kb_match := 0; + output.pg_trgm_sim := 0; + output.soundex_sim := 0; + + RETURN NEXT output; + END IF; + END LOOP; + END IF; + END LOOP; + + RETURN; +END; +$F$ LANGUAGE PLPGSQL; + +CREATE OR REPLACE FUNCTION search.symspell_generate_combined_suggestions( + word_data search.symspell_lookup_output[], + pos_data search.query_parse_position[], + skip_correct BOOL DEFAULT TRUE, + max_words INT DEFAULT 0 +) RETURNS TABLE (suggestion TEXT, test TEXT) AS $f$ + my $word_data = shift; + my $pos_data = shift; + my $skip_correct = shift; + my $max_per_word = shift; + return undef unless (@$word_data and @$pos_data); + + my $last_word_pos = $$word_data[-1]{word_pos}; + my $pos_to_word_map = [ map { [] } 0 .. $last_word_pos ]; + my $parsed_query_data = { map { ($$_{word_pos} => $_) } @$pos_data }; + + for my $row (@$word_data) { + my $wp = +$$row{word_pos}; + next if ( + $skip_correct eq 't' and $$row{lev_distance} > 0 + and @{$$pos_to_word_map[$wp]} + and $$pos_to_word_map[$wp][0]{lev_distance} == 0 + ); + push @{$$pos_to_word_map[$$row{word_pos}]}, $row; + } + + gen_step($max_per_word, $pos_to_word_map, $parsed_query_data, $last_word_pos); + return undef; + + # ----------------------------- + sub gen_step { + my $max_words = shift; + my $data = shift; + my $pos_data = shift; + my $last_pos = shift; + my $prefix = shift || ''; + my $test_prefix = shift || ''; + my $current_pos = shift || 0; + + my $word_count = 0; + for my $sugg ( @{$$data[$current_pos]} ) { + my $was_inside_phrase = 0; + my $now_inside_phrase = 0; + + my $word = $$sugg{suggestion}; + $word_count++; + + my $prev_phrase = $$pos_data{$current_pos - 1}{phrase_in_input_pos}; + my $curr_phrase = $$pos_data{$current_pos}{phrase_in_input_pos}; + my $next_phrase = $$pos_data{$current_pos + 1}{phrase_in_input_pos}; + + $now_inside_phrase++ if (defined($next_phrase) and $curr_phrase == $next_phrase); + $was_inside_phrase++ if (defined($prev_phrase) and $curr_phrase == $prev_phrase); + + my $string = $prefix; + $string .= ' ' if $string; + + if (!$was_inside_phrase) { # might be starting a phrase? + $string .= '-' if ($$pos_data{$current_pos}{negated} eq 't'); + if ($now_inside_phrase) { # we are! add the double-quote + $string .= '"'; + } + $string .= $word; + } else { # definitely were in a phrase + $string .= $word; + if (!$now_inside_phrase) { # we are not any longer, add the double-quote + $string .= '"'; + } + } + + my $test_string = $test_prefix; + if ($current_pos > 0) { # have something already, need joiner + $test_string .= $curr_phrase == $prev_phrase ? ' <-> ' : ' & '; + } + $test_string .= '!' if ($$pos_data{$current_pos}{negated} eq 't'); + $test_string .= $word; + + if ($current_pos == $last_pos) { + return_next {suggestion => $string, test => $test_string}; + } else { + gen_step($max_words, $data, $pos_data, $last_pos, $string, $test_string, $current_pos + 1); + } + + last if ($max_words and $word_count >= $max_words); + } + } +$f$ LANGUAGE PLPERLU IMMUTABLE; + +-- Changing parameters, so we have to drop the old one first +DROP FUNCTION search.symspell_lookup; +CREATE FUNCTION search.symspell_lookup ( + raw_input TEXT, + search_class TEXT, + verbosity INT DEFAULT NULL, + xfer_case BOOL DEFAULT NULL, + count_threshold INT DEFAULT NULL, + soundex_weight INT DEFAULT NULL, + pg_trgm_weight INT DEFAULT NULL, + kbdist_weight INT DEFAULT NULL +) RETURNS SETOF search.symspell_lookup_output AS $F$ +DECLARE + prefix_length INT; + maxED INT; + word_list TEXT[]; + edit_list TEXT[] := '{}'; + seen_list TEXT[] := '{}'; + output search.symspell_lookup_output; + output_list search.symspell_lookup_output[]; + entry RECORD; + entry_key TEXT; + prefix_key TEXT; + sugg TEXT; + input TEXT; + word TEXT; + w_pos INT := -1; + smallest_ed INT := -1; + global_ed INT; + c_symspell_suggestion_verbosity INT; + c_min_suggestion_use_threshold INT; + c_soundex_weight INT; + c_pg_trgm_weight INT; + c_keyboard_distance_weight INT; + c_symspell_transfer_case BOOL; +BEGIN + + SELECT cmc.min_suggestion_use_threshold, + cmc.soundex_weight, + cmc.pg_trgm_weight, + cmc.keyboard_distance_weight, + cmc.symspell_transfer_case, + cmc.symspell_suggestion_verbosity + INTO c_min_suggestion_use_threshold, + c_soundex_weight, + c_pg_trgm_weight, + c_keyboard_distance_weight, + c_symspell_transfer_case, + c_symspell_suggestion_verbosity + FROM config.metabib_class cmc + WHERE cmc.name = search_class; + + c_min_suggestion_use_threshold := COALESCE(count_threshold,c_min_suggestion_use_threshold); + c_symspell_transfer_case := COALESCE(xfer_case,c_symspell_transfer_case); + c_symspell_suggestion_verbosity := COALESCE(verbosity,c_symspell_suggestion_verbosity); + c_soundex_weight := COALESCE(soundex_weight,c_soundex_weight); + c_pg_trgm_weight := COALESCE(pg_trgm_weight,c_pg_trgm_weight); + c_keyboard_distance_weight := COALESCE(kbdist_weight,c_keyboard_distance_weight); + + SELECT value::INT INTO prefix_length FROM config.internal_flag WHERE name = 'symspell.prefix_length' AND enabled; + prefix_length := COALESCE(prefix_length, 6); + + SELECT value::INT INTO maxED FROM config.internal_flag WHERE name = 'symspell.max_edit_distance' AND enabled; + maxED := COALESCE(maxED, 3); + + -- XXX This should get some more thought ... maybe search_normalize? + word_list := ARRAY_AGG(x.word) FROM search.query_parse_positions(raw_input) x; + + -- Common case exact match test for preformance + IF c_symspell_suggestion_verbosity = 0 AND CARDINALITY(word_list) = 1 AND CHARACTER_LENGTH(word_list[1]) <= prefix_length THEN + EXECUTE + 'SELECT '||search_class||'_suggestions AS suggestions, + '||search_class||'_count AS count, + prefix_key + FROM search.symspell_dictionary + WHERE prefix_key = $1 + AND '||search_class||'_count >= $2 + AND '||search_class||'_suggestions @> ARRAY[$1]' + INTO entry USING evergreen.lowercase(word_list[1]), c_min_suggestion_use_threshold; + IF entry.prefix_key IS NOT NULL THEN + output.lev_distance := 0; -- definitionally + output.prefix_key := entry.prefix_key; + output.prefix_key_count := entry.count; + output.suggestion_count := entry.count; + output.input := word_list[1]; + IF c_symspell_transfer_case THEN + output.suggestion := search.symspell_transfer_casing(output.input, entry.prefix_key); + ELSE + output.suggestion := entry.prefix_key; + END IF; + output.norm_input := entry.prefix_key; + output.qwerty_kb_match := 1; + output.pg_trgm_sim := 1; + output.soundex_sim := 1; + RETURN NEXT output; + RETURN; + END IF; + END IF; + + <> + FOREACH word IN ARRAY word_list LOOP + w_pos := w_pos + 1; + input := evergreen.lowercase(word); + + IF CHARACTER_LENGTH(input) > prefix_length THEN + prefix_key := SUBSTRING(input FROM 1 FOR prefix_length); + edit_list := ARRAY[input,prefix_key] || search.symspell_generate_edits(prefix_key, 1, maxED); + ELSE + edit_list := input || search.symspell_generate_edits(input, 1, maxED); + END IF; + + SELECT ARRAY_AGG(x ORDER BY CHARACTER_LENGTH(x) DESC) INTO edit_list FROM UNNEST(edit_list) x; + + output_list := '{}'; + seen_list := '{}'; + global_ed := NULL; + + <> + FOREACH entry_key IN ARRAY edit_list LOOP + smallest_ed := -1; + IF global_ed IS NOT NULL THEN + smallest_ed := global_ed; + END IF; + FOR entry IN EXECUTE + 'SELECT '||search_class||'_suggestions AS suggestions, + '||search_class||'_count AS count, + prefix_key + FROM search.symspell_dictionary + WHERE prefix_key = $1 + AND '||search_class||'_suggestions IS NOT NULL' + USING entry_key + LOOP + FOREACH sugg IN ARRAY entry.suggestions LOOP + IF NOT seen_list @> ARRAY[sugg] THEN + seen_list := seen_list || sugg; + IF input = sugg THEN -- exact match, no need to spend time on a call + output.lev_distance := 0; + output.suggestion_count = entry.count; + ELSIF ABS(CHARACTER_LENGTH(input) - CHARACTER_LENGTH(sugg)) > maxED THEN + -- They are definitionally too different to consider, just move on. + CONTINUE; + ELSE + --output.lev_distance := levenshtein_less_equal( + output.lev_distance := evergreen.levenshtein_damerau_edistance( + input, + sugg, + maxED + ); + IF output.lev_distance < 0 THEN + -- The Perl module returns -1 for "more distant than max". + output.lev_distance := maxED + 1; + -- This short-circuit's the count test below for speed, bypassing + -- a couple useless tests. + output.suggestion_count := -1; + ELSE + EXECUTE 'SELECT '||search_class||'_count FROM search.symspell_dictionary WHERE prefix_key = $1' + INTO output.suggestion_count USING sugg; + END IF; + END IF; + + -- The caller passes a minimum suggestion count threshold (or uses + -- the default of 0) and if the suggestion has that many or less uses + -- then we move on to the next suggestion, since this one is too rare. + CONTINUE WHEN output.suggestion_count < c_min_suggestion_use_threshold; + + -- Track the smallest edit distance among suggestions from this prefix key. + IF smallest_ed = -1 OR output.lev_distance < smallest_ed THEN + smallest_ed := output.lev_distance; + END IF; + + -- Track the smallest edit distance for all prefix keys for this word. + IF global_ed IS NULL OR smallest_ed < global_ed THEN + global_ed = smallest_ed; + END IF; + + -- Only proceed if the edit distance is <= the max for the dictionary. + IF output.lev_distance <= maxED THEN + IF output.lev_distance > global_ed AND c_symspell_suggestion_verbosity <= 1 THEN + -- Lev distance is our main similarity measure. While + -- trgm or soundex similarity could be the main filter, + -- Lev is both language agnostic and faster. + -- + -- Here we will skip suggestions that have a longer edit distance + -- than the shortest we've already found. This is simply an + -- optimization that allows us to avoid further processing + -- of this entry. It would be filtered out later. + + CONTINUE; + END IF; + + -- If we have an exact match on the suggestion key we can also avoid + -- some function calls. + IF output.lev_distance = 0 THEN + output.qwerty_kb_match := 1; + output.pg_trgm_sim := 1; + output.soundex_sim := 1; + ELSE + output.qwerty_kb_match := evergreen.qwerty_keyboard_distance_match(input, sugg); + output.pg_trgm_sim := similarity(input, sugg); + output.soundex_sim := difference(input, sugg) / 4.0; + END IF; + + -- Fill in some fields + IF c_symspell_transfer_case THEN + output.suggestion := search.symspell_transfer_casing(word, sugg); + ELSE + output.suggestion := sugg; + END IF; + output.prefix_key := entry.prefix_key; + output.prefix_key_count := entry.count; + output.input := word; + output.norm_input := input; + output.word_pos := w_pos; + + -- We can't "cache" a set of generated records directly, so + -- here we build up an array of search.symspell_lookup_output + -- records that we can revivicate later as a table using UNNEST(). + output_list := output_list || output; + + EXIT entry_key_loop WHEN smallest_ed = 0 AND c_symspell_suggestion_verbosity = 0; -- exact match early exit + CONTINUE entry_key_loop WHEN smallest_ed = 0 AND c_symspell_suggestion_verbosity = 1; -- exact match early jump to the next key + END IF; -- maxED test + END IF; -- suggestion not seen test + END LOOP; -- loop over suggestions + END LOOP; -- loop over entries + END LOOP; -- loop over entry_keys + + -- Now we're done examining this word + IF c_symspell_suggestion_verbosity = 0 THEN + -- Return the "best" suggestion from the smallest edit + -- distance group. We define best based on the weighting + -- of the non-lev similarity measures and use the suggestion + -- use count to break ties. + RETURN QUERY + SELECT * FROM UNNEST(output_list) + ORDER BY lev_distance, + (soundex_sim * c_soundex_weight) + + (pg_trgm_sim * c_pg_trgm_weight) + + (qwerty_kb_match * c_keyboard_distance_weight) DESC, + suggestion_count DESC + LIMIT 1; + ELSIF c_symspell_suggestion_verbosity = 1 THEN + -- Return all suggestions from the smallest + -- edit distance group. + RETURN QUERY + SELECT * FROM UNNEST(output_list) WHERE lev_distance = smallest_ed + ORDER BY (soundex_sim * c_soundex_weight) + + (pg_trgm_sim * c_pg_trgm_weight) + + (qwerty_kb_match * c_keyboard_distance_weight) DESC, + suggestion_count DESC; + ELSIF c_symspell_suggestion_verbosity = 2 THEN + -- Return everything we find, along with relevant stats + RETURN QUERY + SELECT * FROM UNNEST(output_list) + ORDER BY lev_distance, + (soundex_sim * c_soundex_weight) + + (pg_trgm_sim * c_pg_trgm_weight) + + (qwerty_kb_match * c_keyboard_distance_weight) DESC, + suggestion_count DESC; + ELSIF c_symspell_suggestion_verbosity = 3 THEN + -- Return everything we find from the two smallest edit distance groups + RETURN QUERY + SELECT * FROM UNNEST(output_list) + WHERE lev_distance IN (SELECT DISTINCT lev_distance FROM UNNEST(output_list) ORDER BY 1 LIMIT 2) + ORDER BY lev_distance, + (soundex_sim * c_soundex_weight) + + (pg_trgm_sim * c_pg_trgm_weight) + + (qwerty_kb_match * c_keyboard_distance_weight) DESC, + suggestion_count DESC; + ELSIF c_symspell_suggestion_verbosity = 4 THEN + -- Return everything we find from the two smallest edit distance groups that are NOT 0 distance + RETURN QUERY + SELECT * FROM UNNEST(output_list) + WHERE lev_distance IN (SELECT DISTINCT lev_distance FROM UNNEST(output_list) WHERE lev_distance > 0 ORDER BY 1 LIMIT 2) + ORDER BY lev_distance, + (soundex_sim * c_soundex_weight) + + (pg_trgm_sim * c_pg_trgm_weight) + + (qwerty_kb_match * c_keyboard_distance_weight) DESC, + suggestion_count DESC; + END IF; + END LOOP; -- loop over words +END; +$F$ LANGUAGE PLPGSQL; + +COMMIT; + +-- Find the "broadest" value in use, and update the defaults for all classes +DO $do$ +DECLARE + val TEXT; +BEGIN + SELECT FIRST(s.value ORDER BY t.depth) INTO val + FROM actor.org_unit_setting s + JOIN actor.org_unit u ON (u.id = s.org_unit) + JOIN actor.org_unit_type t ON (u.ou_type = t.id) + WHERE s.name = 'opac.did_you_mean.low_result_threshold'; + + IF FOUND AND val IS NOT NULL THEN + UPDATE config.metabib_class SET low_result_threshold = val::INT; + END IF; + + SELECT FIRST(s.value ORDER BY t.depth) INTO val + FROM actor.org_unit_setting s + JOIN actor.org_unit u ON (u.id = s.org_unit) + JOIN actor.org_unit_type t ON (u.ou_type = t.id) + WHERE s.name = 'opac.did_you_mean.max_suggestions'; + + IF FOUND AND val IS NOT NULL THEN + UPDATE config.metabib_class SET max_suggestions = val::INT; + END IF; + + SELECT FIRST(s.value ORDER BY t.depth) INTO val + FROM actor.org_unit_setting s + JOIN actor.org_unit u ON (u.id = s.org_unit) + JOIN actor.org_unit_type t ON (u.ou_type = t.id) + WHERE s.name = 'search.symspell.min_suggestion_use_threshold'; + + IF FOUND AND val IS NOT NULL THEN + UPDATE config.metabib_class SET min_suggestion_use_threshold = val::INT; + END IF; + + SELECT FIRST(s.value ORDER BY t.depth) INTO val + FROM actor.org_unit_setting s + JOIN actor.org_unit u ON (u.id = s.org_unit) + JOIN actor.org_unit_type t ON (u.ou_type = t.id) + WHERE s.name = 'search.symspell.soundex.weight'; + + IF FOUND AND val IS NOT NULL THEN + UPDATE config.metabib_class SET soundex_weight = val::INT; + END IF; + + SELECT FIRST(s.value ORDER BY t.depth) INTO val + FROM actor.org_unit_setting s + JOIN actor.org_unit u ON (u.id = s.org_unit) + JOIN actor.org_unit_type t ON (u.ou_type = t.id) + WHERE s.name = 'search.symspell.pg_trgm.weight'; + + IF FOUND AND val IS NOT NULL THEN + UPDATE config.metabib_class SET pg_trgm_weight = val::INT; + END IF; + + SELECT FIRST(s.value ORDER BY t.depth) INTO val + FROM actor.org_unit_setting s + JOIN actor.org_unit u ON (u.id = s.org_unit) + JOIN actor.org_unit_type t ON (u.ou_type = t.id) + WHERE s.name = 'search.symspell.keyboard_distance.weight'; + + IF FOUND AND val IS NOT NULL THEN + UPDATE config.metabib_class SET keyboard_distance_weight = val::INT; + END IF; +END; +$do$; + diff --git a/Open-ILS/src/sql/Pg/upgrade/1367.schema.DYM-authority-data-dictionary.sql b/Open-ILS/src/sql/Pg/upgrade/1367.schema.DYM-authority-data-dictionary.sql new file mode 100644 index 0000000000..a0691b2c8a --- /dev/null +++ b/Open-ILS/src/sql/Pg/upgrade/1367.schema.DYM-authority-data-dictionary.sql @@ -0,0 +1,303 @@ +BEGIN; + +SELECT evergreen.upgrade_deps_block_check('1367', :eg_version); + +-- Bring authorized headings into the symspell dictionary. Side +-- loader should be used for Real Sites. See below the COMMIT. +/* +SELECT search.symspell_build_and_merge_entries(h.value, m.field_class, NULL) + FROM authority.simple_heading h + JOIN authority.control_set_auth_field_metabib_field_map_refs a ON (a.authority_field = h.atag) + JOIN config.metabib_field m ON (a.metabib_field=m.id); +*/ + +-- ensure that this function is in place; it hitherto had not been +-- present in baseline + +CREATE OR REPLACE FUNCTION search.symspell_build_and_merge_entries ( + full_input TEXT, + source_class TEXT, + old_input TEXT DEFAULT NULL, + include_phrases BOOL DEFAULT FALSE +) RETURNS SETOF search.symspell_dictionary AS $F$ +DECLARE + new_entry RECORD; + conflict_entry RECORD; +BEGIN + + IF full_input = old_input THEN -- neither NULL, and are the same + RETURN; + END IF; + + FOR new_entry IN EXECUTE $q$ + SELECT count, + prefix_key, + s AS suggestions + FROM (SELECT prefix_key, + ARRAY_AGG(DISTINCT $q$ || source_class || $q$_suggestions[1]) s, + SUM($q$ || source_class || $q$_count) count + FROM search.symspell_build_entries($1, $2, $3, $4) + GROUP BY 1) x + $q$ USING full_input, source_class, old_input, include_phrases + LOOP + EXECUTE $q$ + SELECT prefix_key, + $q$ || source_class || $q$_suggestions suggestions, + $q$ || source_class || $q$_count count + FROM search.symspell_dictionary + WHERE prefix_key = $1 $q$ + INTO conflict_entry + USING new_entry.prefix_key; + + IF new_entry.count <> 0 THEN -- Real word, and count changed + IF conflict_entry.prefix_key IS NOT NULL THEN -- we'll be updating + IF conflict_entry.count > 0 THEN -- it's a real word + RETURN QUERY EXECUTE $q$ + UPDATE search.symspell_dictionary + SET $q$ || source_class || $q$_count = $2 + WHERE prefix_key = $1 + RETURNING * $q$ + USING new_entry.prefix_key, GREATEST(0, new_entry.count + conflict_entry.count); + ELSE -- it was a prefix key or delete-emptied word before + IF conflict_entry.suggestions @> new_entry.suggestions THEN -- already have all suggestions here... + RETURN QUERY EXECUTE $q$ + UPDATE search.symspell_dictionary + SET $q$ || source_class || $q$_count = $2 + WHERE prefix_key = $1 + RETURNING * $q$ + USING new_entry.prefix_key, GREATEST(0, new_entry.count); + ELSE -- new suggestion! + RETURN QUERY EXECUTE $q$ + UPDATE search.symspell_dictionary + SET $q$ || source_class || $q$_count = $2, + $q$ || source_class || $q$_suggestions = $3 + WHERE prefix_key = $1 + RETURNING * $q$ + USING new_entry.prefix_key, GREATEST(0, new_entry.count), evergreen.text_array_merge_unique(conflict_entry.suggestions,new_entry.suggestions); + END IF; + END IF; + ELSE + -- We keep the on-conflict clause just in case... + RETURN QUERY EXECUTE $q$ + INSERT INTO search.symspell_dictionary AS d ( + $q$ || source_class || $q$_count, + prefix_key, + $q$ || source_class || $q$_suggestions + ) VALUES ( $1, $2, $3 ) ON CONFLICT (prefix_key) DO + UPDATE SET $q$ || source_class || $q$_count = d.$q$ || source_class || $q$_count + EXCLUDED.$q$ || source_class || $q$_count, + $q$ || source_class || $q$_suggestions = evergreen.text_array_merge_unique(d.$q$ || source_class || $q$_suggestions, EXCLUDED.$q$ || source_class || $q$_suggestions) + RETURNING * $q$ + USING new_entry.count, new_entry.prefix_key, new_entry.suggestions; + END IF; + ELSE -- key only, or no change + IF conflict_entry.prefix_key IS NOT NULL THEN -- we'll be updating + IF NOT conflict_entry.suggestions @> new_entry.suggestions THEN -- There are new suggestions + RETURN QUERY EXECUTE $q$ + UPDATE search.symspell_dictionary + SET $q$ || source_class || $q$_suggestions = $2 + WHERE prefix_key = $1 + RETURNING * $q$ + USING new_entry.prefix_key, evergreen.text_array_merge_unique(conflict_entry.suggestions,new_entry.suggestions); + END IF; + ELSE + RETURN QUERY EXECUTE $q$ + INSERT INTO search.symspell_dictionary AS d ( + $q$ || source_class || $q$_count, + prefix_key, + $q$ || source_class || $q$_suggestions + ) VALUES ( $1, $2, $3 ) ON CONFLICT (prefix_key) DO -- key exists, suggestions may be added due to this entry + UPDATE SET $q$ || source_class || $q$_suggestions = evergreen.text_array_merge_unique(d.$q$ || source_class || $q$_suggestions, EXCLUDED.$q$ || source_class || $q$_suggestions) + RETURNING * $q$ + USING new_entry.count, new_entry.prefix_key, new_entry.suggestions; + END IF; + END IF; + END LOOP; +END; +$F$ LANGUAGE PLPGSQL; + +CREATE OR REPLACE FUNCTION search.symspell_maintain_entries () RETURNS TRIGGER AS $f$ +DECLARE + search_class TEXT; + new_value TEXT := NULL; + old_value TEXT := NULL; +BEGIN + + IF TG_TABLE_SCHEMA = 'authority' THEN + SELECT m.field_class INTO search_class + FROM authority.control_set_auth_field_metabib_field_map_refs a + JOIN config.metabib_field m ON (a.metabib_field=m.id) + WHERE a.authority_field = NEW.atag; + + IF NOT FOUND THEN + RETURN NULL; + END IF; + ELSE + search_class := COALESCE(TG_ARGV[0], SPLIT_PART(TG_TABLE_NAME,'_',1)); + END IF; + + IF TG_OP IN ('INSERT', 'UPDATE') THEN + new_value := NEW.value; + END IF; + + IF TG_OP IN ('DELETE', 'UPDATE') THEN + old_value := OLD.value; + END IF; + + PERFORM * FROM search.symspell_build_and_merge_entries(new_value, search_class, old_value); + + RETURN NULL; -- always fired AFTER +END; +$f$ LANGUAGE PLPGSQL; + +CREATE TRIGGER maintain_symspell_entries_tgr + AFTER INSERT OR UPDATE OR DELETE ON authority.simple_heading + FOR EACH ROW EXECUTE PROCEDURE search.symspell_maintain_entries(); + +COMMIT; + +\qecho '' +\qecho 'If the Evergreen database has authority records, a reingest of' +\qecho 'the search suggestion dictionary is recommended.' +\qecho '' +\qecho 'The following should be run at the end of the upgrade before any' +\qecho 'reingest occurs. Because new triggers are installed already,' +\qecho 'updates to indexed strings will cause zero-count dictionary entries' +\qecho 'to be recorded which will require updating every row again (or' +\qecho 'starting from scratch) so best to do this before other batch' +\qecho 'changes. A later reingest that does not significantly change' +\qecho 'indexed strings will /not/ cause table bloat here, and will be' +\qecho 'as fast as normal. A copy of the SQL in a ready-to-use, non-escaped' +\qecho 'form is available inside a comment at the end of this upgrade sub-' +\qecho 'script so you do not need to copy this comment from the psql ouptut.' +\qecho '' +\qecho '\\a' +\qecho '\\t' +\qecho '' +\qecho '\\o title' +\qecho 'select value from metabib.title_field_entry;' +\qecho 'select h.value' +\qecho ' from authority.simple_heading h' +\qecho ' join authority.control_set_auth_field_metabib_field_map_refs a on (a.authority_field = h.atag)' +\qecho ' join config.metabib_field m on (a.metabib_field=m.id and m.field_class=\'title\');' +\qecho '\\o author' +\qecho 'select value from metabib.author_field_entry;' +\qecho 'select h.value' +\qecho ' from authority.simple_heading h' +\qecho ' join authority.control_set_auth_field_metabib_field_map_refs a on (a.authority_field = h.atag)' +\qecho ' join config.metabib_field m on (a.metabib_field=m.id and m.field_class=\'author\');' +\qecho '\\o subject' +\qecho 'select value from metabib.subject_field_entry;' +\qecho 'select h.value' +\qecho ' from authority.simple_heading h' +\qecho ' join authority.control_set_auth_field_metabib_field_map_refs a on (a.authority_field = h.atag)' +\qecho ' join config.metabib_field m on (a.metabib_field=m.id and m.field_class=\'subject\');' +\qecho '\\o series' +\qecho 'select value from metabib.series_field_entry;' +\qecho '\\o identifier' +\qecho 'select value from metabib.identifier_field_entry;' +\qecho '\\o keyword' +\qecho 'select value from metabib.keyword_field_entry;' +\qecho '' +\qecho '\\o' +\qecho '\\a' +\qecho '\\t' +\qecho '' +\qecho '// Then, at the command line:' +\qecho '' +\qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl title > title.sql' +\qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl author > author.sql' +\qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl subject > subject.sql' +\qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl series > series.sql' +\qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl identifier > identifier.sql' +\qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl keyword > keyword.sql' +\qecho '' +\qecho '// And, back in psql' +\qecho '' +\qecho 'ALTER TABLE search.symspell_dictionary SET UNLOGGED;' +\qecho 'TRUNCATE search.symspell_dictionary;' +\qecho '' +\qecho '\\i identifier.sql' +\qecho '\\i author.sql' +\qecho '\\i title.sql' +\qecho '\\i subject.sql' +\qecho '\\i series.sql' +\qecho '\\i keyword.sql' +\qecho '' +\qecho 'CLUSTER search.symspell_dictionary USING symspell_dictionary_pkey;' +\qecho 'REINDEX TABLE search.symspell_dictionary;' +\qecho 'ALTER TABLE search.symspell_dictionary SET LOGGED;' +\qecho 'VACUUM ANALYZE search.symspell_dictionary;' +\qecho '' +\qecho 'DROP TABLE search.symspell_dictionary_partial_title;' +\qecho 'DROP TABLE search.symspell_dictionary_partial_author;' +\qecho 'DROP TABLE search.symspell_dictionary_partial_subject;' +\qecho 'DROP TABLE search.symspell_dictionary_partial_series;' +\qecho 'DROP TABLE search.symspell_dictionary_partial_identifier;' +\qecho 'DROP TABLE search.symspell_dictionary_partial_keyword;' + +/* +\a +\t + +\o title +select value from metabib.title_field_entry; +select h.value + from authority.simple_heading h + join authority.control_set_auth_field_metabib_field_map_refs a on (a.authority_field = h.atag) + join config.metabib_field m on (a.metabib_field=m.id and m.field_class='title'); +\o author +select value from metabib.author_field_entry; +select h.value + from authority.simple_heading h + join authority.control_set_auth_field_metabib_field_map_refs a on (a.authority_field = h.atag) + join config.metabib_field m on (a.metabib_field=m.id and m.field_class='author'); +\o subject +select value from metabib.subject_field_entry; +select h.value + from authority.simple_heading h + join authority.control_set_auth_field_metabib_field_map_refs a on (a.authority_field = h.atag) + join config.metabib_field m on (a.metabib_field=m.id and m.field_class='subject'); +\o series +select value from metabib.series_field_entry; +\o identifier +select value from metabib.identifier_field_entry; +\o keyword +select value from metabib.keyword_field_entry; + +\o +\a +\t + +// Then, at the command line: + +$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl title > title.sql +$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl author > author.sql +$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl subject > subject.sql +$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl series > series.sql +$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl identifier > identifier.sql +$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl keyword > keyword.sql + +// And, back in psql + +ALTER TABLE search.symspell_dictionary SET UNLOGGED; +TRUNCATE search.symspell_dictionary; + +\i identifier.sql +\i author.sql +\i title.sql +\i subject.sql +\i series.sql +\i keyword.sql + +CLUSTER search.symspell_dictionary USING symspell_dictionary_pkey; +REINDEX TABLE search.symspell_dictionary; +ALTER TABLE search.symspell_dictionary SET LOGGED; +VACUUM ANALYZE search.symspell_dictionary; + +DROP TABLE search.symspell_dictionary_partial_title; +DROP TABLE search.symspell_dictionary_partial_author; +DROP TABLE search.symspell_dictionary_partial_subject; +DROP TABLE search.symspell_dictionary_partial_series; +DROP TABLE search.symspell_dictionary_partial_identifier; +DROP TABLE search.symspell_dictionary_partial_keyword; +*/ diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.DYM-multi-word.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.DYM-multi-word.sql deleted file mode 100644 index aeb4304b88..0000000000 --- a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.DYM-multi-word.sql +++ /dev/null @@ -1,778 +0,0 @@ -BEGIN; - -SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version); - -ALTER TABLE config.metabib_class - ADD COLUMN IF NOT EXISTS variant_authority_suggestion BOOL NOT NULL DEFAULT TRUE, - ADD COLUMN IF NOT EXISTS symspell_transfer_case BOOL NOT NULL DEFAULT TRUE, - ADD COLUMN IF NOT EXISTS symspell_skip_correct BOOL NOT NULL DEFAULT FALSE, - ADD COLUMN IF NOT EXISTS symspell_suggestion_verbosity INT NOT NULL DEFAULT 2, - ADD COLUMN IF NOT EXISTS max_phrase_edit_distance INT NOT NULL DEFAULT 2, - ADD COLUMN IF NOT EXISTS suggestion_word_option_count INT NOT NULL DEFAULT 5, - ADD COLUMN IF NOT EXISTS max_suggestions INT NOT NULL DEFAULT -1, - ADD COLUMN IF NOT EXISTS low_result_threshold INT NOT NULL DEFAULT 0, - ADD COLUMN IF NOT EXISTS min_suggestion_use_threshold INT NOT NULL DEFAULT 1, - ADD COLUMN IF NOT EXISTS soundex_weight INT NOT NULL DEFAULT 0, - ADD COLUMN IF NOT EXISTS pg_trgm_weight INT NOT NULL DEFAULT 0, - ADD COLUMN IF NOT EXISTS keyboard_distance_weight INT NOT NULL DEFAULT 0; - - -/* -- may not need these 2 functions -CREATE OR REPLACE FUNCTION search.symspell_parse_positive_words ( phrase TEXT ) -RETURNS SETOF TEXT AS $F$ - SELECT UNNEST - FROM (SELECT UNNEST(x), ROW_NUMBER() OVER () - FROM regexp_matches($1, '(? 0 THEN - SELECT STRING_AGG(id::TEXT,',') INTO query_part FROM config.metabib_field WHERE name = ANY (search_fields); - IF CHARACTER_LENGTH(query_part) > 0 THEN query_part := 'AND field IN ('||query_part||')'; END IF; - END IF; - - SELECT STRING_AGG(word,' ') INTO norm_input FROM search.query_parse_positions(evergreen.lowercase(raw_input)) WHERE NOT negated; - EXECUTE 'SELECT COUNT(DISTINCT source) AS recs - FROM metabib.' || search_class || '_field_entry - WHERE index_vector @@ plainto_tsquery($$simple$$,$1)' || query_part - INTO norm_count USING norm_input; - - SELECT STRING_AGG(word,' ') INTO norm_test FROM UNNEST(parsed_query_set); - FOR current_sugg IN - SELECT * - FROM search.symspell_generate_combined_suggestions( - sugg_set, - parsed_query_set, - c_skip_correct, - c_suggestion_word_option_count - ) x - LOOP - EXECUTE 'SELECT COUNT(DISTINCT source) AS recs - FROM metabib.' || search_class || '_field_entry - WHERE index_vector @@ to_tsquery($$simple$$,$1)' || query_part - INTO entry USING current_sugg.test; - SELECT STRING_AGG(word,' ') INTO norm_sugg FROM search.query_parse_positions(current_sugg.suggestion); - IF entry.recs >= c_min_suggestion_use_threshold AND (norm_count.recs = 0 OR norm_sugg <> norm_input) THEN - - output.input := raw_input; - output.norm_input := norm_input; - output.suggestion := current_sugg.suggestion; - output.suggestion_count := entry.recs; - output.prefix_key := NULL; - output.prefix_key_count := norm_count.recs; - - output.lev_distance := NULLIF(evergreen.levenshtein_damerau_edistance(norm_test, norm_sugg, c_max_phrase_edit_distance * CARDINALITY(parsed_query_set)), -1); - output.qwerty_kb_match := evergreen.qwerty_keyboard_distance_match(norm_test, norm_sugg); - output.pg_trgm_sim := similarity(norm_input, norm_sugg); - output.soundex_sim := difference(norm_input, norm_sugg) / 4.0; - - RETURN NEXT output; - END IF; - - IF c_variant_authority_suggestion THEN - FOR auth_sugg IN - SELECT DISTINCT m.value AS prefix_key, - m.sort_value AS suggestion, - v.value as raw_input, - v.sort_value as norm_input - FROM authority.simple_heading v - JOIN authority.control_set_authority_field csaf ON (csaf.id = v.atag) - JOIN authority.heading_field f ON (f.id = csaf.heading_field) - JOIN authority.simple_heading m ON (m.record = v.record AND csaf.main_entry = m.atag) - JOIN authority.control_set_bib_field csbf ON (csbf.authority_field = csaf.main_entry) - JOIN authority.control_set_bib_field_metabib_field_map csbfmfm ON (csbf.id = csbfmfm.bib_field) - JOIN config.metabib_field cmf ON ( - csbfmfm.metabib_field = cmf.id - AND (c_authority_class_restrict IS FALSE OR cmf.field_class = search_class) - AND (search_fields = '{}'::TEXT[] OR cmf.name = ANY (search_fields)) - ) - WHERE v.sort_value = norm_sugg - LOOP - EXECUTE 'SELECT COUNT(DISTINCT source) AS recs - FROM metabib.' || search_class || '_field_entry - WHERE index_vector @@ plainto_tsquery($$simple$$,$1)' || query_part - INTO auth_entry USING auth_sugg.suggestion; - IF auth_entry.recs >= c_min_suggestion_use_threshold AND (norm_count.recs = 0 OR auth_sugg.suggestion <> norm_input) THEN - output.input := auth_sugg.raw_input; - output.norm_input := auth_sugg.norm_input; - output.suggestion := auth_sugg.suggestion; - output.prefix_key := auth_sugg.prefix_key; - output.suggestion_count := auth_entry.recs * -1; -- negative value here - - output.lev_distance := 0; - output.qwerty_kb_match := 0; - output.pg_trgm_sim := 0; - output.soundex_sim := 0; - - RETURN NEXT output; - END IF; - END LOOP; - END IF; - END LOOP; - - RETURN; -END; -$F$ LANGUAGE PLPGSQL; - -CREATE OR REPLACE FUNCTION search.symspell_generate_combined_suggestions( - word_data search.symspell_lookup_output[], - pos_data search.query_parse_position[], - skip_correct BOOL DEFAULT TRUE, - max_words INT DEFAULT 0 -) RETURNS TABLE (suggestion TEXT, test TEXT) AS $f$ - my $word_data = shift; - my $pos_data = shift; - my $skip_correct = shift; - my $max_per_word = shift; - return undef unless (@$word_data and @$pos_data); - - my $last_word_pos = $$word_data[-1]{word_pos}; - my $pos_to_word_map = [ map { [] } 0 .. $last_word_pos ]; - my $parsed_query_data = { map { ($$_{word_pos} => $_) } @$pos_data }; - - for my $row (@$word_data) { - my $wp = +$$row{word_pos}; - next if ( - $skip_correct eq 't' and $$row{lev_distance} > 0 - and @{$$pos_to_word_map[$wp]} - and $$pos_to_word_map[$wp][0]{lev_distance} == 0 - ); - push @{$$pos_to_word_map[$$row{word_pos}]}, $row; - } - - gen_step($max_per_word, $pos_to_word_map, $parsed_query_data, $last_word_pos); - return undef; - - # ----------------------------- - sub gen_step { - my $max_words = shift; - my $data = shift; - my $pos_data = shift; - my $last_pos = shift; - my $prefix = shift || ''; - my $test_prefix = shift || ''; - my $current_pos = shift || 0; - - my $word_count = 0; - for my $sugg ( @{$$data[$current_pos]} ) { - my $was_inside_phrase = 0; - my $now_inside_phrase = 0; - - my $word = $$sugg{suggestion}; - $word_count++; - - my $prev_phrase = $$pos_data{$current_pos - 1}{phrase_in_input_pos}; - my $curr_phrase = $$pos_data{$current_pos}{phrase_in_input_pos}; - my $next_phrase = $$pos_data{$current_pos + 1}{phrase_in_input_pos}; - - $now_inside_phrase++ if (defined($next_phrase) and $curr_phrase == $next_phrase); - $was_inside_phrase++ if (defined($prev_phrase) and $curr_phrase == $prev_phrase); - - my $string = $prefix; - $string .= ' ' if $string; - - if (!$was_inside_phrase) { # might be starting a phrase? - $string .= '-' if ($$pos_data{$current_pos}{negated} eq 't'); - if ($now_inside_phrase) { # we are! add the double-quote - $string .= '"'; - } - $string .= $word; - } else { # definitely were in a phrase - $string .= $word; - if (!$now_inside_phrase) { # we are not any longer, add the double-quote - $string .= '"'; - } - } - - my $test_string = $test_prefix; - if ($current_pos > 0) { # have something already, need joiner - $test_string .= $curr_phrase == $prev_phrase ? ' <-> ' : ' & '; - } - $test_string .= '!' if ($$pos_data{$current_pos}{negated} eq 't'); - $test_string .= $word; - - if ($current_pos == $last_pos) { - return_next {suggestion => $string, test => $test_string}; - } else { - gen_step($max_words, $data, $pos_data, $last_pos, $string, $test_string, $current_pos + 1); - } - - last if ($max_words and $word_count >= $max_words); - } - } -$f$ LANGUAGE PLPERLU IMMUTABLE; - --- Changing parameters, so we have to drop the old one first -DROP FUNCTION search.symspell_lookup; -CREATE FUNCTION search.symspell_lookup ( - raw_input TEXT, - search_class TEXT, - verbosity INT DEFAULT NULL, - xfer_case BOOL DEFAULT NULL, - count_threshold INT DEFAULT NULL, - soundex_weight INT DEFAULT NULL, - pg_trgm_weight INT DEFAULT NULL, - kbdist_weight INT DEFAULT NULL -) RETURNS SETOF search.symspell_lookup_output AS $F$ -DECLARE - prefix_length INT; - maxED INT; - word_list TEXT[]; - edit_list TEXT[] := '{}'; - seen_list TEXT[] := '{}'; - output search.symspell_lookup_output; - output_list search.symspell_lookup_output[]; - entry RECORD; - entry_key TEXT; - prefix_key TEXT; - sugg TEXT; - input TEXT; - word TEXT; - w_pos INT := -1; - smallest_ed INT := -1; - global_ed INT; - c_symspell_suggestion_verbosity INT; - c_min_suggestion_use_threshold INT; - c_soundex_weight INT; - c_pg_trgm_weight INT; - c_keyboard_distance_weight INT; - c_symspell_transfer_case BOOL; -BEGIN - - SELECT cmc.min_suggestion_use_threshold, - cmc.soundex_weight, - cmc.pg_trgm_weight, - cmc.keyboard_distance_weight, - cmc.symspell_transfer_case, - cmc.symspell_suggestion_verbosity - INTO c_min_suggestion_use_threshold, - c_soundex_weight, - c_pg_trgm_weight, - c_keyboard_distance_weight, - c_symspell_transfer_case, - c_symspell_suggestion_verbosity - FROM config.metabib_class cmc - WHERE cmc.name = search_class; - - c_min_suggestion_use_threshold := COALESCE(count_threshold,c_min_suggestion_use_threshold); - c_symspell_transfer_case := COALESCE(xfer_case,c_symspell_transfer_case); - c_symspell_suggestion_verbosity := COALESCE(verbosity,c_symspell_suggestion_verbosity); - c_soundex_weight := COALESCE(soundex_weight,c_soundex_weight); - c_pg_trgm_weight := COALESCE(pg_trgm_weight,c_pg_trgm_weight); - c_keyboard_distance_weight := COALESCE(kbdist_weight,c_keyboard_distance_weight); - - SELECT value::INT INTO prefix_length FROM config.internal_flag WHERE name = 'symspell.prefix_length' AND enabled; - prefix_length := COALESCE(prefix_length, 6); - - SELECT value::INT INTO maxED FROM config.internal_flag WHERE name = 'symspell.max_edit_distance' AND enabled; - maxED := COALESCE(maxED, 3); - - -- XXX This should get some more thought ... maybe search_normalize? - word_list := ARRAY_AGG(x.word) FROM search.query_parse_positions(raw_input) x; - - -- Common case exact match test for preformance - IF c_symspell_suggestion_verbosity = 0 AND CARDINALITY(word_list) = 1 AND CHARACTER_LENGTH(word_list[1]) <= prefix_length THEN - EXECUTE - 'SELECT '||search_class||'_suggestions AS suggestions, - '||search_class||'_count AS count, - prefix_key - FROM search.symspell_dictionary - WHERE prefix_key = $1 - AND '||search_class||'_count >= $2 - AND '||search_class||'_suggestions @> ARRAY[$1]' - INTO entry USING evergreen.lowercase(word_list[1]), c_min_suggestion_use_threshold; - IF entry.prefix_key IS NOT NULL THEN - output.lev_distance := 0; -- definitionally - output.prefix_key := entry.prefix_key; - output.prefix_key_count := entry.count; - output.suggestion_count := entry.count; - output.input := word_list[1]; - IF c_symspell_transfer_case THEN - output.suggestion := search.symspell_transfer_casing(output.input, entry.prefix_key); - ELSE - output.suggestion := entry.prefix_key; - END IF; - output.norm_input := entry.prefix_key; - output.qwerty_kb_match := 1; - output.pg_trgm_sim := 1; - output.soundex_sim := 1; - RETURN NEXT output; - RETURN; - END IF; - END IF; - - <> - FOREACH word IN ARRAY word_list LOOP - w_pos := w_pos + 1; - input := evergreen.lowercase(word); - - IF CHARACTER_LENGTH(input) > prefix_length THEN - prefix_key := SUBSTRING(input FROM 1 FOR prefix_length); - edit_list := ARRAY[input,prefix_key] || search.symspell_generate_edits(prefix_key, 1, maxED); - ELSE - edit_list := input || search.symspell_generate_edits(input, 1, maxED); - END IF; - - SELECT ARRAY_AGG(x ORDER BY CHARACTER_LENGTH(x) DESC) INTO edit_list FROM UNNEST(edit_list) x; - - output_list := '{}'; - seen_list := '{}'; - global_ed := NULL; - - <> - FOREACH entry_key IN ARRAY edit_list LOOP - smallest_ed := -1; - IF global_ed IS NOT NULL THEN - smallest_ed := global_ed; - END IF; - FOR entry IN EXECUTE - 'SELECT '||search_class||'_suggestions AS suggestions, - '||search_class||'_count AS count, - prefix_key - FROM search.symspell_dictionary - WHERE prefix_key = $1 - AND '||search_class||'_suggestions IS NOT NULL' - USING entry_key - LOOP - FOREACH sugg IN ARRAY entry.suggestions LOOP - IF NOT seen_list @> ARRAY[sugg] THEN - seen_list := seen_list || sugg; - IF input = sugg THEN -- exact match, no need to spend time on a call - output.lev_distance := 0; - output.suggestion_count = entry.count; - ELSIF ABS(CHARACTER_LENGTH(input) - CHARACTER_LENGTH(sugg)) > maxED THEN - -- They are definitionally too different to consider, just move on. - CONTINUE; - ELSE - --output.lev_distance := levenshtein_less_equal( - output.lev_distance := evergreen.levenshtein_damerau_edistance( - input, - sugg, - maxED - ); - IF output.lev_distance < 0 THEN - -- The Perl module returns -1 for "more distant than max". - output.lev_distance := maxED + 1; - -- This short-circuit's the count test below for speed, bypassing - -- a couple useless tests. - output.suggestion_count := -1; - ELSE - EXECUTE 'SELECT '||search_class||'_count FROM search.symspell_dictionary WHERE prefix_key = $1' - INTO output.suggestion_count USING sugg; - END IF; - END IF; - - -- The caller passes a minimum suggestion count threshold (or uses - -- the default of 0) and if the suggestion has that many or less uses - -- then we move on to the next suggestion, since this one is too rare. - CONTINUE WHEN output.suggestion_count < c_min_suggestion_use_threshold; - - -- Track the smallest edit distance among suggestions from this prefix key. - IF smallest_ed = -1 OR output.lev_distance < smallest_ed THEN - smallest_ed := output.lev_distance; - END IF; - - -- Track the smallest edit distance for all prefix keys for this word. - IF global_ed IS NULL OR smallest_ed < global_ed THEN - global_ed = smallest_ed; - END IF; - - -- Only proceed if the edit distance is <= the max for the dictionary. - IF output.lev_distance <= maxED THEN - IF output.lev_distance > global_ed AND c_symspell_suggestion_verbosity <= 1 THEN - -- Lev distance is our main similarity measure. While - -- trgm or soundex similarity could be the main filter, - -- Lev is both language agnostic and faster. - -- - -- Here we will skip suggestions that have a longer edit distance - -- than the shortest we've already found. This is simply an - -- optimization that allows us to avoid further processing - -- of this entry. It would be filtered out later. - - CONTINUE; - END IF; - - -- If we have an exact match on the suggestion key we can also avoid - -- some function calls. - IF output.lev_distance = 0 THEN - output.qwerty_kb_match := 1; - output.pg_trgm_sim := 1; - output.soundex_sim := 1; - ELSE - output.qwerty_kb_match := evergreen.qwerty_keyboard_distance_match(input, sugg); - output.pg_trgm_sim := similarity(input, sugg); - output.soundex_sim := difference(input, sugg) / 4.0; - END IF; - - -- Fill in some fields - IF c_symspell_transfer_case THEN - output.suggestion := search.symspell_transfer_casing(word, sugg); - ELSE - output.suggestion := sugg; - END IF; - output.prefix_key := entry.prefix_key; - output.prefix_key_count := entry.count; - output.input := word; - output.norm_input := input; - output.word_pos := w_pos; - - -- We can't "cache" a set of generated records directly, so - -- here we build up an array of search.symspell_lookup_output - -- records that we can revivicate later as a table using UNNEST(). - output_list := output_list || output; - - EXIT entry_key_loop WHEN smallest_ed = 0 AND c_symspell_suggestion_verbosity = 0; -- exact match early exit - CONTINUE entry_key_loop WHEN smallest_ed = 0 AND c_symspell_suggestion_verbosity = 1; -- exact match early jump to the next key - END IF; -- maxED test - END IF; -- suggestion not seen test - END LOOP; -- loop over suggestions - END LOOP; -- loop over entries - END LOOP; -- loop over entry_keys - - -- Now we're done examining this word - IF c_symspell_suggestion_verbosity = 0 THEN - -- Return the "best" suggestion from the smallest edit - -- distance group. We define best based on the weighting - -- of the non-lev similarity measures and use the suggestion - -- use count to break ties. - RETURN QUERY - SELECT * FROM UNNEST(output_list) - ORDER BY lev_distance, - (soundex_sim * c_soundex_weight) - + (pg_trgm_sim * c_pg_trgm_weight) - + (qwerty_kb_match * c_keyboard_distance_weight) DESC, - suggestion_count DESC - LIMIT 1; - ELSIF c_symspell_suggestion_verbosity = 1 THEN - -- Return all suggestions from the smallest - -- edit distance group. - RETURN QUERY - SELECT * FROM UNNEST(output_list) WHERE lev_distance = smallest_ed - ORDER BY (soundex_sim * c_soundex_weight) - + (pg_trgm_sim * c_pg_trgm_weight) - + (qwerty_kb_match * c_keyboard_distance_weight) DESC, - suggestion_count DESC; - ELSIF c_symspell_suggestion_verbosity = 2 THEN - -- Return everything we find, along with relevant stats - RETURN QUERY - SELECT * FROM UNNEST(output_list) - ORDER BY lev_distance, - (soundex_sim * c_soundex_weight) - + (pg_trgm_sim * c_pg_trgm_weight) - + (qwerty_kb_match * c_keyboard_distance_weight) DESC, - suggestion_count DESC; - ELSIF c_symspell_suggestion_verbosity = 3 THEN - -- Return everything we find from the two smallest edit distance groups - RETURN QUERY - SELECT * FROM UNNEST(output_list) - WHERE lev_distance IN (SELECT DISTINCT lev_distance FROM UNNEST(output_list) ORDER BY 1 LIMIT 2) - ORDER BY lev_distance, - (soundex_sim * c_soundex_weight) - + (pg_trgm_sim * c_pg_trgm_weight) - + (qwerty_kb_match * c_keyboard_distance_weight) DESC, - suggestion_count DESC; - ELSIF c_symspell_suggestion_verbosity = 4 THEN - -- Return everything we find from the two smallest edit distance groups that are NOT 0 distance - RETURN QUERY - SELECT * FROM UNNEST(output_list) - WHERE lev_distance IN (SELECT DISTINCT lev_distance FROM UNNEST(output_list) WHERE lev_distance > 0 ORDER BY 1 LIMIT 2) - ORDER BY lev_distance, - (soundex_sim * c_soundex_weight) - + (pg_trgm_sim * c_pg_trgm_weight) - + (qwerty_kb_match * c_keyboard_distance_weight) DESC, - suggestion_count DESC; - END IF; - END LOOP; -- loop over words -END; -$F$ LANGUAGE PLPGSQL; - -COMMIT; - --- Find the "broadest" value in use, and update the defaults for all classes -DO $do$ -DECLARE - val TEXT; -BEGIN - SELECT FIRST(s.value ORDER BY t.depth) INTO val - FROM actor.org_unit_setting s - JOIN actor.org_unit u ON (u.id = s.org_unit) - JOIN actor.org_unit_type t ON (u.ou_type = t.id) - WHERE s.name = 'opac.did_you_mean.low_result_threshold'; - - IF FOUND AND val IS NOT NULL THEN - UPDATE config.metabib_class SET low_result_threshold = val::INT; - END IF; - - SELECT FIRST(s.value ORDER BY t.depth) INTO val - FROM actor.org_unit_setting s - JOIN actor.org_unit u ON (u.id = s.org_unit) - JOIN actor.org_unit_type t ON (u.ou_type = t.id) - WHERE s.name = 'opac.did_you_mean.max_suggestions'; - - IF FOUND AND val IS NOT NULL THEN - UPDATE config.metabib_class SET max_suggestions = val::INT; - END IF; - - SELECT FIRST(s.value ORDER BY t.depth) INTO val - FROM actor.org_unit_setting s - JOIN actor.org_unit u ON (u.id = s.org_unit) - JOIN actor.org_unit_type t ON (u.ou_type = t.id) - WHERE s.name = 'search.symspell.min_suggestion_use_threshold'; - - IF FOUND AND val IS NOT NULL THEN - UPDATE config.metabib_class SET min_suggestion_use_threshold = val::INT; - END IF; - - SELECT FIRST(s.value ORDER BY t.depth) INTO val - FROM actor.org_unit_setting s - JOIN actor.org_unit u ON (u.id = s.org_unit) - JOIN actor.org_unit_type t ON (u.ou_type = t.id) - WHERE s.name = 'search.symspell.soundex.weight'; - - IF FOUND AND val IS NOT NULL THEN - UPDATE config.metabib_class SET soundex_weight = val::INT; - END IF; - - SELECT FIRST(s.value ORDER BY t.depth) INTO val - FROM actor.org_unit_setting s - JOIN actor.org_unit u ON (u.id = s.org_unit) - JOIN actor.org_unit_type t ON (u.ou_type = t.id) - WHERE s.name = 'search.symspell.pg_trgm.weight'; - - IF FOUND AND val IS NOT NULL THEN - UPDATE config.metabib_class SET pg_trgm_weight = val::INT; - END IF; - - SELECT FIRST(s.value ORDER BY t.depth) INTO val - FROM actor.org_unit_setting s - JOIN actor.org_unit u ON (u.id = s.org_unit) - JOIN actor.org_unit_type t ON (u.ou_type = t.id) - WHERE s.name = 'search.symspell.keyboard_distance.weight'; - - IF FOUND AND val IS NOT NULL THEN - UPDATE config.metabib_class SET keyboard_distance_weight = val::INT; - END IF; -END; -$do$; - diff --git a/Open-ILS/src/sql/Pg/upgrade/YYYY.schema.DYM-authority-data-dictionary.sql b/Open-ILS/src/sql/Pg/upgrade/YYYY.schema.DYM-authority-data-dictionary.sql deleted file mode 100644 index 6fca3b1535..0000000000 --- a/Open-ILS/src/sql/Pg/upgrade/YYYY.schema.DYM-authority-data-dictionary.sql +++ /dev/null @@ -1,301 +0,0 @@ -BEGIN; - --- Bring authorized headings into the symspell dictionary. Side --- loader should be used for Real Sites. See below the COMMIT. -/* -SELECT search.symspell_build_and_merge_entries(h.value, m.field_class, NULL) - FROM authority.simple_heading h - JOIN authority.control_set_auth_field_metabib_field_map_refs a ON (a.authority_field = h.atag) - JOIN config.metabib_field m ON (a.metabib_field=m.id); -*/ - --- ensure that this function is in place; it hitherto had not been --- present in baseline - -CREATE OR REPLACE FUNCTION search.symspell_build_and_merge_entries ( - full_input TEXT, - source_class TEXT, - old_input TEXT DEFAULT NULL, - include_phrases BOOL DEFAULT FALSE -) RETURNS SETOF search.symspell_dictionary AS $F$ -DECLARE - new_entry RECORD; - conflict_entry RECORD; -BEGIN - - IF full_input = old_input THEN -- neither NULL, and are the same - RETURN; - END IF; - - FOR new_entry IN EXECUTE $q$ - SELECT count, - prefix_key, - s AS suggestions - FROM (SELECT prefix_key, - ARRAY_AGG(DISTINCT $q$ || source_class || $q$_suggestions[1]) s, - SUM($q$ || source_class || $q$_count) count - FROM search.symspell_build_entries($1, $2, $3, $4) - GROUP BY 1) x - $q$ USING full_input, source_class, old_input, include_phrases - LOOP - EXECUTE $q$ - SELECT prefix_key, - $q$ || source_class || $q$_suggestions suggestions, - $q$ || source_class || $q$_count count - FROM search.symspell_dictionary - WHERE prefix_key = $1 $q$ - INTO conflict_entry - USING new_entry.prefix_key; - - IF new_entry.count <> 0 THEN -- Real word, and count changed - IF conflict_entry.prefix_key IS NOT NULL THEN -- we'll be updating - IF conflict_entry.count > 0 THEN -- it's a real word - RETURN QUERY EXECUTE $q$ - UPDATE search.symspell_dictionary - SET $q$ || source_class || $q$_count = $2 - WHERE prefix_key = $1 - RETURNING * $q$ - USING new_entry.prefix_key, GREATEST(0, new_entry.count + conflict_entry.count); - ELSE -- it was a prefix key or delete-emptied word before - IF conflict_entry.suggestions @> new_entry.suggestions THEN -- already have all suggestions here... - RETURN QUERY EXECUTE $q$ - UPDATE search.symspell_dictionary - SET $q$ || source_class || $q$_count = $2 - WHERE prefix_key = $1 - RETURNING * $q$ - USING new_entry.prefix_key, GREATEST(0, new_entry.count); - ELSE -- new suggestion! - RETURN QUERY EXECUTE $q$ - UPDATE search.symspell_dictionary - SET $q$ || source_class || $q$_count = $2, - $q$ || source_class || $q$_suggestions = $3 - WHERE prefix_key = $1 - RETURNING * $q$ - USING new_entry.prefix_key, GREATEST(0, new_entry.count), evergreen.text_array_merge_unique(conflict_entry.suggestions,new_entry.suggestions); - END IF; - END IF; - ELSE - -- We keep the on-conflict clause just in case... - RETURN QUERY EXECUTE $q$ - INSERT INTO search.symspell_dictionary AS d ( - $q$ || source_class || $q$_count, - prefix_key, - $q$ || source_class || $q$_suggestions - ) VALUES ( $1, $2, $3 ) ON CONFLICT (prefix_key) DO - UPDATE SET $q$ || source_class || $q$_count = d.$q$ || source_class || $q$_count + EXCLUDED.$q$ || source_class || $q$_count, - $q$ || source_class || $q$_suggestions = evergreen.text_array_merge_unique(d.$q$ || source_class || $q$_suggestions, EXCLUDED.$q$ || source_class || $q$_suggestions) - RETURNING * $q$ - USING new_entry.count, new_entry.prefix_key, new_entry.suggestions; - END IF; - ELSE -- key only, or no change - IF conflict_entry.prefix_key IS NOT NULL THEN -- we'll be updating - IF NOT conflict_entry.suggestions @> new_entry.suggestions THEN -- There are new suggestions - RETURN QUERY EXECUTE $q$ - UPDATE search.symspell_dictionary - SET $q$ || source_class || $q$_suggestions = $2 - WHERE prefix_key = $1 - RETURNING * $q$ - USING new_entry.prefix_key, evergreen.text_array_merge_unique(conflict_entry.suggestions,new_entry.suggestions); - END IF; - ELSE - RETURN QUERY EXECUTE $q$ - INSERT INTO search.symspell_dictionary AS d ( - $q$ || source_class || $q$_count, - prefix_key, - $q$ || source_class || $q$_suggestions - ) VALUES ( $1, $2, $3 ) ON CONFLICT (prefix_key) DO -- key exists, suggestions may be added due to this entry - UPDATE SET $q$ || source_class || $q$_suggestions = evergreen.text_array_merge_unique(d.$q$ || source_class || $q$_suggestions, EXCLUDED.$q$ || source_class || $q$_suggestions) - RETURNING * $q$ - USING new_entry.count, new_entry.prefix_key, new_entry.suggestions; - END IF; - END IF; - END LOOP; -END; -$F$ LANGUAGE PLPGSQL; - -CREATE OR REPLACE FUNCTION search.symspell_maintain_entries () RETURNS TRIGGER AS $f$ -DECLARE - search_class TEXT; - new_value TEXT := NULL; - old_value TEXT := NULL; -BEGIN - - IF TG_TABLE_SCHEMA = 'authority' THEN - SELECT m.field_class INTO search_class - FROM authority.control_set_auth_field_metabib_field_map_refs a - JOIN config.metabib_field m ON (a.metabib_field=m.id) - WHERE a.authority_field = NEW.atag; - - IF NOT FOUND THEN - RETURN NULL; - END IF; - ELSE - search_class := COALESCE(TG_ARGV[0], SPLIT_PART(TG_TABLE_NAME,'_',1)); - END IF; - - IF TG_OP IN ('INSERT', 'UPDATE') THEN - new_value := NEW.value; - END IF; - - IF TG_OP IN ('DELETE', 'UPDATE') THEN - old_value := OLD.value; - END IF; - - PERFORM * FROM search.symspell_build_and_merge_entries(new_value, search_class, old_value); - - RETURN NULL; -- always fired AFTER -END; -$f$ LANGUAGE PLPGSQL; - -CREATE TRIGGER maintain_symspell_entries_tgr - AFTER INSERT OR UPDATE OR DELETE ON authority.simple_heading - FOR EACH ROW EXECUTE PROCEDURE search.symspell_maintain_entries(); - -COMMIT; - -\qecho '' -\qecho 'If the Evergreen database has authority records, a reingest of' -\qecho 'the search suggestion dictionary is recommended.' -\qecho '' -\qecho 'The following should be run at the end of the upgrade before any' -\qecho 'reingest occurs. Because new triggers are installed already,' -\qecho 'updates to indexed strings will cause zero-count dictionary entries' -\qecho 'to be recorded which will require updating every row again (or' -\qecho 'starting from scratch) so best to do this before other batch' -\qecho 'changes. A later reingest that does not significantly change' -\qecho 'indexed strings will /not/ cause table bloat here, and will be' -\qecho 'as fast as normal. A copy of the SQL in a ready-to-use, non-escaped' -\qecho 'form is available inside a comment at the end of this upgrade sub-' -\qecho 'script so you do not need to copy this comment from the psql ouptut.' -\qecho '' -\qecho '\\a' -\qecho '\\t' -\qecho '' -\qecho '\\o title' -\qecho 'select value from metabib.title_field_entry;' -\qecho 'select h.value' -\qecho ' from authority.simple_heading h' -\qecho ' join authority.control_set_auth_field_metabib_field_map_refs a on (a.authority_field = h.atag)' -\qecho ' join config.metabib_field m on (a.metabib_field=m.id and m.field_class=\'title\');' -\qecho '\\o author' -\qecho 'select value from metabib.author_field_entry;' -\qecho 'select h.value' -\qecho ' from authority.simple_heading h' -\qecho ' join authority.control_set_auth_field_metabib_field_map_refs a on (a.authority_field = h.atag)' -\qecho ' join config.metabib_field m on (a.metabib_field=m.id and m.field_class=\'author\');' -\qecho '\\o subject' -\qecho 'select value from metabib.subject_field_entry;' -\qecho 'select h.value' -\qecho ' from authority.simple_heading h' -\qecho ' join authority.control_set_auth_field_metabib_field_map_refs a on (a.authority_field = h.atag)' -\qecho ' join config.metabib_field m on (a.metabib_field=m.id and m.field_class=\'subject\');' -\qecho '\\o series' -\qecho 'select value from metabib.series_field_entry;' -\qecho '\\o identifier' -\qecho 'select value from metabib.identifier_field_entry;' -\qecho '\\o keyword' -\qecho 'select value from metabib.keyword_field_entry;' -\qecho '' -\qecho '\\o' -\qecho '\\a' -\qecho '\\t' -\qecho '' -\qecho '// Then, at the command line:' -\qecho '' -\qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl title > title.sql' -\qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl author > author.sql' -\qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl subject > subject.sql' -\qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl series > series.sql' -\qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl identifier > identifier.sql' -\qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl keyword > keyword.sql' -\qecho '' -\qecho '// And, back in psql' -\qecho '' -\qecho 'ALTER TABLE search.symspell_dictionary SET UNLOGGED;' -\qecho 'TRUNCATE search.symspell_dictionary;' -\qecho '' -\qecho '\\i identifier.sql' -\qecho '\\i author.sql' -\qecho '\\i title.sql' -\qecho '\\i subject.sql' -\qecho '\\i series.sql' -\qecho '\\i keyword.sql' -\qecho '' -\qecho 'CLUSTER search.symspell_dictionary USING symspell_dictionary_pkey;' -\qecho 'REINDEX TABLE search.symspell_dictionary;' -\qecho 'ALTER TABLE search.symspell_dictionary SET LOGGED;' -\qecho 'VACUUM ANALYZE search.symspell_dictionary;' -\qecho '' -\qecho 'DROP TABLE search.symspell_dictionary_partial_title;' -\qecho 'DROP TABLE search.symspell_dictionary_partial_author;' -\qecho 'DROP TABLE search.symspell_dictionary_partial_subject;' -\qecho 'DROP TABLE search.symspell_dictionary_partial_series;' -\qecho 'DROP TABLE search.symspell_dictionary_partial_identifier;' -\qecho 'DROP TABLE search.symspell_dictionary_partial_keyword;' - -/* -\a -\t - -\o title -select value from metabib.title_field_entry; -select h.value - from authority.simple_heading h - join authority.control_set_auth_field_metabib_field_map_refs a on (a.authority_field = h.atag) - join config.metabib_field m on (a.metabib_field=m.id and m.field_class='title'); -\o author -select value from metabib.author_field_entry; -select h.value - from authority.simple_heading h - join authority.control_set_auth_field_metabib_field_map_refs a on (a.authority_field = h.atag) - join config.metabib_field m on (a.metabib_field=m.id and m.field_class='author'); -\o subject -select value from metabib.subject_field_entry; -select h.value - from authority.simple_heading h - join authority.control_set_auth_field_metabib_field_map_refs a on (a.authority_field = h.atag) - join config.metabib_field m on (a.metabib_field=m.id and m.field_class='subject'); -\o series -select value from metabib.series_field_entry; -\o identifier -select value from metabib.identifier_field_entry; -\o keyword -select value from metabib.keyword_field_entry; - -\o -\a -\t - -// Then, at the command line: - -$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl title > title.sql -$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl author > author.sql -$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl subject > subject.sql -$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl series > series.sql -$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl identifier > identifier.sql -$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl keyword > keyword.sql - -// And, back in psql - -ALTER TABLE search.symspell_dictionary SET UNLOGGED; -TRUNCATE search.symspell_dictionary; - -\i identifier.sql -\i author.sql -\i title.sql -\i subject.sql -\i series.sql -\i keyword.sql - -CLUSTER search.symspell_dictionary USING symspell_dictionary_pkey; -REINDEX TABLE search.symspell_dictionary; -ALTER TABLE search.symspell_dictionary SET LOGGED; -VACUUM ANALYZE search.symspell_dictionary; - -DROP TABLE search.symspell_dictionary_partial_title; -DROP TABLE search.symspell_dictionary_partial_author; -DROP TABLE search.symspell_dictionary_partial_subject; -DROP TABLE search.symspell_dictionary_partial_series; -DROP TABLE search.symspell_dictionary_partial_identifier; -DROP TABLE search.symspell_dictionary_partial_keyword; -*/