LP#1931162: DYM needs some situational optimization

author Mike Rylander <mrylander@gmail.com>

Mon, 7 Jun 2021 19:57:32 +0000 (15:57 -0400)

committer Jason Boyer <JBoyer@equinoxOLI.org>

Fri, 27 Aug 2021 18:40:27 +0000 (14:40 -0400)
author Mike Rylander <mrylander@gmail.com>
Mon, 7 Jun 2021 19:57:32 +0000 (15:57 -0400)
committer Jason Boyer <JBoyer@equinoxOLI.org>
Fri, 27 Aug 2021 18:40:27 +0000 (14:40 -0400)
diff --git a/Open-ILS/src/sql/Pg/300.schema.staged_search.sql b/Open-ILS/src/sql/Pg/300.schema.staged_search.sql

index eba7b96..ece2408 100644 (file)
--- a/Open-ILS/src/sql/Pg/300.schema.staged_search.sql
+++ b/Open-ILS/src/sql/Pg/300.schema.staged_search.sql
@@ -1587,19 +1587,21 @@ CREATE TYPE search.symspell_lookup_output AS (
      word_pos            INT
  );
  
-CREATE OR REPLACE FUNCTION search.symspell_lookup (
-    raw_input       TEXT,
-    search_class    TEXT,
-    verbosity       INT DEFAULT 2,
-    xfer_case       BOOL DEFAULT FALSE,
-    count_threshold INT DEFAULT 1,
-    soundex_weight  INT DEFAULT 0,
-    pg_trgm_weight  INT DEFAULT 0,
-    kbdist_weight   INT DEFAULT 0
-) RETURNS SETOF search.symspell_lookup_output AS $F$
+
+CREATE OR REPLACE FUNCTION search.symspell_lookup(
+    raw_input text,
+    search_class text,
+    verbosity integer DEFAULT 2,
+    xfer_case boolean DEFAULT false,
+    count_threshold integer DEFAULT 1,
+    soundex_weight integer DEFAULT 0,
+    pg_trgm_weight integer DEFAULT 0,
+    kbdist_weight integer DEFAULT 0
+) RETURNS SETOF search.symspell_lookup_output LANGUAGE plpgsql AS $function$
  DECLARE
      prefix_length INT;
      maxED         INT;
+    good_suggs  HSTORE;
      word_list   TEXT[];
      edit_list   TEXT[] := '{}';
      seen_list   TEXT[] := '{}';
@@ -1614,6 +1616,8 @@ DECLARE
      w_pos       INT := -1;
      smallest_ed INT := -1;
      global_ed   INT;
+    i_len       INT;
+    l_maxED     INT;
  BEGIN
      SELECT value::INT INTO prefix_length FROM config.internal_flag WHERE name = 'symspell.prefix_length' AND enabled;
      prefix_length := COALESCE(prefix_length, 6);
@@ -1631,8 +1635,8 @@ BEGIN
                     prefix_key
               FROM  search.symspell_dictionary
               WHERE prefix_key = $1
-                   AND '||search_class||'_count >= $2 
-                   AND '||search_class||'_suggestions @> ARRAY[$1]' 
+                   AND '||search_class||'_count >= $2
+                   AND '||search_class||'_suggestions @> ARRAY[$1]'
            INTO entry USING evergreen.lowercase(word_list[1]), COALESCE(count_threshold,1);
          IF entry.prefix_key IS NOT NULL THEN
              output.lev_distance := 0; -- definitionally
@@ -1658,12 +1662,14 @@ BEGIN
      FOREACH word IN ARRAY word_list LOOP
          w_pos := w_pos + 1;
          input := evergreen.lowercase(word);
+        i_len := CHARACTER_LENGTH(input);
+        l_maxED := maxED;
  
          IF CHARACTER_LENGTH(input) > prefix_length THEN
              prefix_key := SUBSTRING(input FROM 1 FOR prefix_length);
-            edit_list := ARRAY[input,prefix_key] || search.symspell_generate_edits(prefix_key, 1, maxED);
+            edit_list := ARRAY[input,prefix_key] || search.symspell_generate_edits(prefix_key, 1, l_maxED);
          ELSE
-            edit_list := input || search.symspell_generate_edits(input, 1, maxED);
+            edit_list := input || search.symspell_generate_edits(input, 1, l_maxED);
          END IF;
  
          SELECT ARRAY_AGG(x ORDER BY CHARACTER_LENGTH(x) DESC) INTO edit_list FROM UNNEST(edit_list) x;
@@ -1678,106 +1684,109 @@ BEGIN
              IF global_ed IS NOT NULL THEN
                  smallest_ed := global_ed;
              END IF;
+
              FOR entry IN EXECUTE
                  'SELECT  '||search_class||'_suggestions AS suggestions,
                           '||search_class||'_count AS count,
                           prefix_key
                     FROM  search.symspell_dictionary
                     WHERE prefix_key = $1
-                         AND '||search_class||'_suggestions IS NOT NULL' 
+                         AND '||search_class||'_suggestions IS NOT NULL'
                  USING entry_key
              LOOP
-                FOREACH sugg IN ARRAY entry.suggestions LOOP
-                    IF NOT seen_list @> ARRAY[sugg] THEN
-                        seen_list := seen_list || sugg;
-                        IF input = sugg THEN -- exact match, no need to spend time on a call
-                            output.lev_distance := 0;
-                            output.suggestion_count = entry.count;
-                        ELSIF ABS(CHARACTER_LENGTH(input) - CHARACTER_LENGTH(sugg)) > maxED THEN
-                            -- They are definitionally too different to consider, just move on.
-                            CONTINUE;
-                        ELSE
-                            --output.lev_distance := levenshtein_less_equal(
-                            output.lev_distance := evergreen.levenshtein_damerau_edistance(
-                                input,
-                                sugg,
-                                maxED
-                            );
-                            IF output.lev_distance < 0 THEN
-                                -- The Perl module returns -1 for "more distant than max".
-                                output.lev_distance := maxED + 1;
-                                -- This short-circuit's the count test below for speed, bypassing
-                                -- a couple useless tests.
-                                output.suggestion_count := -1;
-                            ELSE
-                                EXECUTE 'SELECT '||search_class||'_count FROM search.symspell_dictionary WHERE prefix_key = $1'
-                                    INTO output.suggestion_count USING sugg;
-                            END IF;
-                        END IF;
  
-                        -- The caller passes a minimum suggestion count threshold (or uses
-                        -- the default of 0) and if the suggestion has that many or less uses
-                        -- then we move on to the next suggestion, since this one is too rare.
-                        CONTINUE WHEN output.suggestion_count < COALESCE(count_threshold,1);
+                SELECT  HSTORE(
+                            ARRAY_AGG(
+                                ARRAY[s, evergreen.levenshtein_damerau_edistance(input,s,l_maxED)::TEXT]
+                                    ORDER BY evergreen.levenshtein_damerau_edistance(input,s,l_maxED) DESC
+                            )
+                        )
+                  INTO  good_suggs
+                  FROM  UNNEST(entry.suggestions) s
+                  WHERE (ABS(CHARACTER_LENGTH(s) - i_len) <= maxEd AND evergreen.levenshtein_damerau_edistance(input,s,l_maxED) BETWEEN 0 AND l_maxED)
+                        AND NOT seen_list @> ARRAY[s];
+
+                CONTINUE WHEN good_suggs IS NULL;
+
+                FOR sugg, output.suggestion_count IN EXECUTE
+                    'SELECT  prefix_key, '||search_class||'_count
+                       FROM  search.symspell_dictionary
+                       WHERE prefix_key = ANY ($1)
+                             AND '||search_class||'_count >= $2'
+                    USING AKEYS(good_suggs), COALESCE(count_threshold,1)
+                LOOP
+
+                    output.lev_distance := good_suggs->sugg;
+                    seen_list := seen_list || sugg;
+
+                    -- Track the smallest edit distance among suggestions from this prefix key.
+                    IF smallest_ed = -1 OR output.lev_distance < smallest_ed THEN
+                        smallest_ed := output.lev_distance;
+                    END IF;
  
-                        -- Track the smallest edit distance among suggestions from this prefix key.
-                        IF smallest_ed = -1 OR output.lev_distance < smallest_ed THEN
-                            smallest_ed := output.lev_distance;
+                    -- Track the smallest edit distance for all prefix keys for this word.
+                    IF global_ed IS NULL OR smallest_ed < global_ed THEN
+                        global_ed = smallest_ed;
+                        -- And if low verbosity, ignore suggs with a larger distance from here on.
+                        IF verbosity <= 1 THEN
+                            l_maxED := global_ed;
                          END IF;
+                    END IF;
  
-                        -- Track the smallest edit distance for all prefix keys for this word.
-                        IF global_ed IS NULL OR smallest_ed < global_ed THEN
-                            global_ed = smallest_ed;
+                    -- Lev distance is our main similarity measure. While
+                    -- trgm or soundex similarity could be the main filter,
+                    -- Lev is both language agnostic and faster.
+                    --
+                    -- Here we will skip suggestions that have a longer edit distance
+                    -- than the shortest we've already found. This is simply an
+                    -- optimization that allows us to avoid further processing
+                    -- of this entry. It would be filtered out later.
+                    CONTINUE WHEN output.lev_distance > global_ed AND verbosity <= 1;
+
+                    -- If we have an exact match on the suggestion key we can also avoid
+                    -- some function calls.
+                    IF output.lev_distance = 0 THEN
+                        output.qwerty_kb_match := 1;
+                        output.pg_trgm_sim := 1;
+                        output.soundex_sim := 1;
+                    ELSE
+                        IF kbdist_weight THEN
+                            output.qwerty_kb_match := evergreen.qwerty_keyboard_distance_match(input, sugg);
+                        ELSE
+                            output.qwerty_kb_match := 0;
                          END IF;
+                        IF pg_trgm_weight THEN
+                            output.pg_trgm_sim := similarity(input, sugg);
+                        ELSE
+                            output.pg_trgm_sim := 0;
+                        END IF;
+                        IF soundex_weight THEN
+                            output.soundex_sim := difference(input, sugg) / 4.0;
+                        ELSE
+                            output.soundex_sim := 0;
+                        END IF;
+                    END IF;
  
-                        -- Only proceed if the edit distance is <= the max for the dictionary.
-                        IF output.lev_distance <= maxED THEN
-                            IF output.lev_distance > global_ed AND verbosity <= 1 THEN
-                                -- Lev distance is our main similarity measure. While
-                                -- trgm or soundex similarity could be the main filter,
-                                -- Lev is both language agnostic and faster.
-                                --
-                                -- Here we will skip suggestions that have a longer edit distance
-                                -- than the shortest we've already found. This is simply an
-                                -- optimization that allows us to avoid further processing
-                                -- of this entry. It would be filtered out later.
+                    -- Fill in some fields
+                    IF xfer_case AND input <> word THEN
+                        output.suggestion := search.symspell_transfer_casing(word, sugg);
+                    ELSE
+                        output.suggestion := sugg;
+                    END IF;
+                    output.prefix_key := entry.prefix_key;
+                    output.prefix_key_count := entry.count;
+                    output.input := word;
+                    output.norm_input := input;
+                    output.word_pos := w_pos;
  
-                                CONTINUE;
-                            END IF;
+                    -- We can't "cache" a set of generated records directly, so
+                    -- here we build up an array of search.symspell_lookup_output
+                    -- records that we can revivicate later as a table using UNNEST().
+                    output_list := output_list || output;
  
-                            -- If we have an exact match on the suggestion key we can also avoid
-                            -- some function calls.
-                            IF output.lev_distance = 0 THEN
-                                output.qwerty_kb_match := 1;
-                                output.pg_trgm_sim := 1;
-                                output.soundex_sim := 1;
-                            ELSE
-                                output.qwerty_kb_match := evergreen.qwerty_keyboard_distance_match(input, sugg);
-                                output.pg_trgm_sim := similarity(input, sugg);
-                                output.soundex_sim := difference(input, sugg) / 4.0;
-                            END IF;
+                    EXIT entry_key_loop WHEN smallest_ed = 0 AND verbosity = 0; -- exact match early exit
+                    CONTINUE entry_key_loop WHEN smallest_ed = 0 AND verbosity = 1; -- exact match early jump to the next key
  
-                            -- Fill in some fields
-                            IF xfer_case THEN
-                                output.suggestion := search.symspell_transfer_casing(word, sugg);
-                            ELSE
-                                output.suggestion := sugg;
-                            END IF;
-                            output.prefix_key := entry.prefix_key;
-                            output.prefix_key_count := entry.count;
-                            output.input := word;
-                            output.norm_input := input;
-                            output.word_pos := w_pos;
-
-                            -- We can't "cache" a set of generated records directly, so
-                            -- here we build up an array of search.symspell_lookup_output
-                            -- records that we can revivicate later as a table using UNNEST().
-                            output_list := output_list || output;
-
-                            EXIT entry_key_loop WHEN smallest_ed = 0 AND verbosity = 0; -- exact match early exit
-                            CONTINUE entry_key_loop WHEN smallest_ed = 0 AND verbosity = 1; -- exact match early jump to the next key
-                        END IF; -- maxED test
-                    END IF; -- suggestion not seen test
                  END LOOP; -- loop over suggestions
              END LOOP; -- loop over entries
          END LOOP; -- loop over entry_keys
@@ -1837,7 +1846,7 @@ BEGIN
          END IF;
      END LOOP; -- loop over words
  END;
-$F$ LANGUAGE PLPGSQL;
+$function$;
  
  CREATE OR REPLACE FUNCTION search.symspell_build_raw_entry (
      raw_input       TEXT,
diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.function.did_you_mean_optimization.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.function.did_you_mean_optimization.sql

new file mode 100644 (file)

index 0000000..a72c76f
--- /dev/null
+++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.function.did_you_mean_optimization.sql
@@ -0,0 +1,268 @@
+BEGIN;
+
+SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version);
+
+CREATE OR REPLACE FUNCTION search.symspell_lookup(
+        raw_input text,
+        search_class text,
+        verbosity integer DEFAULT 2,
+        xfer_case boolean DEFAULT false,
+        count_threshold integer DEFAULT 1,
+        soundex_weight integer DEFAULT 0,
+        pg_trgm_weight integer DEFAULT 0,
+        kbdist_weight integer DEFAULT 0
+) RETURNS SETOF search.symspell_lookup_output
+ LANGUAGE plpgsql
+AS $function$
+DECLARE
+    prefix_length INT;
+    maxED         INT;
+    good_suggs  HSTORE;
+    word_list   TEXT[];
+    edit_list   TEXT[] := '{}';
+    seen_list   TEXT[] := '{}';
+    output      search.symspell_lookup_output;
+    output_list search.symspell_lookup_output[];
+    entry       RECORD;
+    entry_key   TEXT;
+    prefix_key  TEXT;
+    sugg        TEXT;
+    input       TEXT;
+    word        TEXT;
+    w_pos       INT := -1;
+    smallest_ed INT := -1;
+    global_ed   INT;
+    i_len       INT;
+    l_maxED     INT;
+BEGIN
+    SELECT value::INT INTO prefix_length FROM config.internal_flag WHERE name = 'symspell.prefix_length' AND enabled;
+    prefix_length := COALESCE(prefix_length, 6);
+
+    SELECT value::INT INTO maxED FROM config.internal_flag WHERE name = 'symspell.max_edit_distance' AND enabled;
+    maxED := COALESCE(maxED, 3);
+
+    word_list := ARRAY_AGG(x) FROM search.symspell_parse_words(raw_input) x;
+
+    -- Common case exact match test for preformance
+    IF verbosity = 0 AND CARDINALITY(word_list) = 1 AND CHARACTER_LENGTH(word_list[1]) <= prefix_length THEN
+        EXECUTE
+          'SELECT  '||search_class||'_suggestions AS suggestions,
+                   '||search_class||'_count AS count,
+                   prefix_key
+             FROM  search.symspell_dictionary
+             WHERE prefix_key = $1
+                   AND '||search_class||'_count >= $2
+                   AND '||search_class||'_suggestions @> ARRAY[$1]'
+          INTO entry USING evergreen.lowercase(word_list[1]), COALESCE(count_threshold,1);
+        IF entry.prefix_key IS NOT NULL THEN
+            output.lev_distance := 0; -- definitionally
+            output.prefix_key := entry.prefix_key;
+            output.prefix_key_count := entry.count;
+            output.suggestion_count := entry.count;
+            output.input := word_list[1];
+            IF xfer_case THEN
+                output.suggestion := search.symspell_transfer_casing(output.input, entry.prefix_key);
+            ELSE
+                output.suggestion := entry.prefix_key;
+            END IF;
+            output.norm_input := entry.prefix_key;
+            output.qwerty_kb_match := 1;
+            output.pg_trgm_sim := 1;
+            output.soundex_sim := 1;
+            RETURN NEXT output;
+            RETURN;
+        END IF;
+    END IF;
+
+    <<word_loop>>
+    FOREACH word IN ARRAY word_list LOOP
+        w_pos := w_pos + 1;
+        input := evergreen.lowercase(word);
+        i_len := CHARACTER_LENGTH(input);
+        l_maxED := maxED;
+
+        IF CHARACTER_LENGTH(input) > prefix_length THEN
+            prefix_key := SUBSTRING(input FROM 1 FOR prefix_length);
+            edit_list := ARRAY[input,prefix_key] || search.symspell_generate_edits(prefix_key, 1, l_maxED);
+        ELSE
+            edit_list := input || search.symspell_generate_edits(input, 1, l_maxED);
+        END IF;
+
+        SELECT ARRAY_AGG(x ORDER BY CHARACTER_LENGTH(x) DESC) INTO edit_list FROM UNNEST(edit_list) x;
+
+        output_list := '{}';
+        seen_list := '{}';
+        global_ed := NULL;
+
+        <<entry_key_loop>>
+        FOREACH entry_key IN ARRAY edit_list LOOP
+            smallest_ed := -1;
+            IF global_ed IS NOT NULL THEN
+                smallest_ed := global_ed;
+            END IF;
+
+            FOR entry IN EXECUTE
+                'SELECT  '||search_class||'_suggestions AS suggestions,
+                         '||search_class||'_count AS count,
+                         prefix_key
+                   FROM  search.symspell_dictionary
+                   WHERE prefix_key = $1
+                         AND '||search_class||'_suggestions IS NOT NULL'
+                USING entry_key
+            LOOP
+
+                SELECT  HSTORE(
+                            ARRAY_AGG(
+                                ARRAY[s, evergreen.levenshtein_damerau_edistance(input,s,l_maxED)::TEXT]
+                                    ORDER BY evergreen.levenshtein_damerau_edistance(input,s,l_maxED) DESC
+                            )
+                        )
+                  INTO  good_suggs
+                  FROM  UNNEST(entry.suggestions) s
+                  WHERE (ABS(CHARACTER_LENGTH(s) - i_len) <= maxEd AND evergreen.levenshtein_damerau_edistance(input,s,l_maxED) BETWEEN 0 AND l_maxED)
+                        AND NOT seen_list @> ARRAY[s];
+
+                CONTINUE WHEN good_suggs IS NULL;
+
+                FOR sugg, output.suggestion_count IN EXECUTE
+                    'SELECT  prefix_key, '||search_class||'_count
+                       FROM  search.symspell_dictionary
+                       WHERE prefix_key = ANY ($1)
+                             AND '||search_class||'_count >= $2'
+                    USING AKEYS(good_suggs), COALESCE(count_threshold,1)
+                LOOP
+
+                    output.lev_distance := good_suggs->sugg;
+                    seen_list := seen_list || sugg;
+
+                    -- Track the smallest edit distance among suggestions from this prefix key.
+                    IF smallest_ed = -1 OR output.lev_distance < smallest_ed THEN
+                        smallest_ed := output.lev_distance;
+                    END IF;
+
+                    -- Track the smallest edit distance for all prefix keys for this word.
+                    IF global_ed IS NULL OR smallest_ed < global_ed THEN
+                        global_ed = smallest_ed;
+                        -- And if low verbosity, ignore suggs with a larger distance from here on.
+                        IF verbosity <= 1 THEN
+                            l_maxED := global_ed;
+                        END IF;
+                    END IF;
+
+                    -- Lev distance is our main similarity measure. While
+                    -- trgm or soundex similarity could be the main filter,
+                    -- Lev is both language agnostic and faster.
+                    --
+                    -- Here we will skip suggestions that have a longer edit distance
+                    -- than the shortest we've already found. This is simply an
+                    -- optimization that allows us to avoid further processing
+                    -- of this entry. It would be filtered out later.
+                    CONTINUE WHEN output.lev_distance > global_ed AND verbosity <= 1;
+
+                    -- If we have an exact match on the suggestion key we can also avoid
+                    -- some function calls.
+                    IF output.lev_distance = 0 THEN
+                        output.qwerty_kb_match := 1;
+                        output.pg_trgm_sim := 1;
+                        output.soundex_sim := 1;
+                    ELSE
+                        IF kbdist_weight THEN
+                            output.qwerty_kb_match := evergreen.qwerty_keyboard_distance_match(input, sugg);
+                        ELSE
+                            output.qwerty_kb_match := 0;
+                        END IF;
+                        IF pg_trgm_weight THEN
+                            output.pg_trgm_sim := similarity(input, sugg);
+                        ELSE
+                            output.pg_trgm_sim := 0;
+                        END IF;
+                        IF soundex_weight THEN
+                            output.soundex_sim := difference(input, sugg) / 4.0;
+                        ELSE
+                            output.soundex_sim := 0;
+                        END IF;
+                    END IF;
+
+                    -- Fill in some fields
+                    IF xfer_case AND input <> word THEN
+                        output.suggestion := search.symspell_transfer_casing(word, sugg);
+                    ELSE
+                        output.suggestion := sugg;
+                    END IF;
+                    output.prefix_key := entry.prefix_key;
+                    output.prefix_key_count := entry.count;
+                    output.input := word;
+                    output.norm_input := input;
+                    output.word_pos := w_pos;
+
+                    -- We can't "cache" a set of generated records directly, so
+                    -- here we build up an array of search.symspell_lookup_output
+                    -- records that we can revivicate later as a table using UNNEST().
+                    output_list := output_list || output;
+
+                    EXIT entry_key_loop WHEN smallest_ed = 0 AND verbosity = 0; -- exact match early exit
+                    CONTINUE entry_key_loop WHEN smallest_ed = 0 AND verbosity = 1; -- exact match early jump to the next key
+
+                END LOOP; -- loop over suggestions
+            END LOOP; -- loop over entries
+        END LOOP; -- loop over entry_keys
+
+        -- Now we're done examining this word
+        IF verbosity = 0 THEN
+            -- Return the "best" suggestion from the smallest edit
+            -- distance group.  We define best based on the weighting
+            -- of the non-lev similarity measures and use the suggestion
+            -- use count to break ties.
+            RETURN QUERY
+                SELECT * FROM UNNEST(output_list)
+                    ORDER BY lev_distance,
+                        (soundex_sim * COALESCE(soundex_weight,0))
+                            + (pg_trgm_sim * COALESCE(pg_trgm_weight,0))
+                            + (qwerty_kb_match * COALESCE(kbdist_weight,0)) DESC,
+                        suggestion_count DESC
+                        LIMIT 1;
+        ELSIF verbosity = 1 THEN
+            -- Return all suggestions from the smallest
+            -- edit distance group.
+            RETURN QUERY
+                SELECT * FROM UNNEST(output_list) WHERE lev_distance = smallest_ed
+                    ORDER BY (soundex_sim * COALESCE(soundex_weight,0))
+                            + (pg_trgm_sim * COALESCE(pg_trgm_weight,0))
+                            + (qwerty_kb_match * COALESCE(kbdist_weight,0)) DESC,
+                        suggestion_count DESC;
+        ELSIF verbosity = 2 THEN
+            -- Return everything we find, along with relevant stats
+            RETURN QUERY
+                SELECT * FROM UNNEST(output_list)
+                    ORDER BY lev_distance,
+                        (soundex_sim * COALESCE(soundex_weight,0))
+                            + (pg_trgm_sim * COALESCE(pg_trgm_weight,0))
+                            + (qwerty_kb_match * COALESCE(kbdist_weight,0)) DESC,
+                        suggestion_count DESC;
+        ELSIF verbosity = 3 THEN
+            -- Return everything we find from the two smallest edit distance groups
+            RETURN QUERY
+                SELECT * FROM UNNEST(output_list)
+                    WHERE lev_distance IN (SELECT DISTINCT lev_distance FROM UNNEST(output_list) ORDER BY 1 LIMIT 2)
+                    ORDER BY lev_distance,
+                        (soundex_sim * COALESCE(soundex_weight,0))
+                            + (pg_trgm_sim * COALESCE(pg_trgm_weight,0))
+                            + (qwerty_kb_match * COALESCE(kbdist_weight,0)) DESC,
+                        suggestion_count DESC;
+        ELSIF verbosity = 4 THEN
+            -- Return everything we find from the two smallest edit distance groups that are NOT 0 distance
+            RETURN QUERY
+                SELECT * FROM UNNEST(output_list)
+                    WHERE lev_distance IN (SELECT DISTINCT lev_distance FROM UNNEST(output_list) WHERE lev_distance > 0 ORDER BY 1 LIMIT 2)
+                    ORDER BY lev_distance,
+                        (soundex_sim * COALESCE(soundex_weight,0))
+                            + (pg_trgm_sim * COALESCE(pg_trgm_weight,0))
+                            + (qwerty_kb_match * COALESCE(kbdist_weight,0)) DESC,
+                        suggestion_count DESC;
+        END IF;
+    END LOOP; -- loop over words
+END;
+$function$;
+
+COMMIT;
+
author	Mike Rylander <mrylander@gmail.com>
	Mon, 7 Jun 2021 19:57:32 +0000 (15:57 -0400)
committer	Jason Boyer <JBoyer@equinoxOLI.org>
	Fri, 27 Aug 2021 18:40:27 +0000 (14:40 -0400)
Open-ILS/src/sql/Pg/300.schema.staged_search.sql		patch \| blob \| history
Open-ILS/src/sql/Pg/upgrade/XXXX.function.did_you_mean_optimization.sql	[new file with mode: 0644]	patch \| blob