LP#1893997: Did you mean? Single word, single class
authorMike Rylander <mrylander@gmail.com>
Thu, 24 Sep 2020 16:48:41 +0000 (12:48 -0400)
committerGalen Charlton <gmc@equinoxinitiative.org>
Mon, 15 Mar 2021 13:41:04 +0000 (09:41 -0400)
This commit embodies the first stage of a larger search suggestion
project.  The bulk of the code is dedicated to providing an
implementation of the SymSpell[1] algorithm as the basis for very fast
word similarity testing for spelling suggestions as well as alternate
search suggestions.

The native in-memory algorithm specifies a hash table lookup using a
runtime-created dictionary.  As it is untenable to create and maintain a
separate in-memory data structure in the distributed environment that
OpenSRF provides, and adds significantly to the administrative complexity
of such a configuration, we instead maintain a dictionary in the
authoritative Postgres database used by Evergreen.  This dictionary is
based directly on indexed terms used for general search, and aims to
avoid zero-hit suggestions wherever possible while imposing as little
performance impact as can be managed.

In addition to the core SymSpell similarity metric, Damerau-Levenshtein
edit distance, we provide Soundex, Trigram, and QWERTY Keyboard
similarity measures.  The importance of these can be adjusted relative
to one another, or turned off individually.

Global term frequncey data is captured for each of the Evergreen search
classes and is used to help decide when to use specific terms, and which
terms to use as suggestions.

Suggestions are provide in the OPAC, including the staff-embedded OPAC
view, the KPAC, and the Angular catalog.

Later development will add the ability to perform mult-word and
phrase-oriented suggestions, to suggest searching requested terms in
other search classes, and provide local thesaurus values and exclusion
term lists.

[1] https://medium.com/@wolfgarbe/1000x-faster-spelling-correction-algorithm-2012-8701fcd87a5f

NOTE: This development adds two new Perl module dependencies, and will
therefore require a dependency update at upgrade time.

Signed-off-by: Mike Rylander <mrylander@gmail.com>
Signed-off-by: Gina Monti <gmonti@biblio.org>
Signed-off-by: Jason Stephenson <jason@sigio.com>
Signed-off-by: Galen Charlton <gmc@equinoxinitiative.org>
20 files changed:
Open-ILS/src/eg2/src/app/staff/catalog/search-form.component.html
Open-ILS/src/extras/install/Makefile.debian-buster
Open-ILS/src/extras/install/Makefile.debian-jessie
Open-ILS/src/extras/install/Makefile.debian-stretch
Open-ILS/src/extras/install/Makefile.fedora
Open-ILS/src/extras/install/Makefile.ubuntu-bionic
Open-ILS/src/extras/install/Makefile.ubuntu-focal
Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Biblio.pm
Open-ILS/src/perlmods/lib/OpenILS/WWW/EGCatLoader/Search.pm
Open-ILS/src/sql/Pg/000.functions.general.sql
Open-ILS/src/sql/Pg/300.schema.staged_search.sql
Open-ILS/src/sql/Pg/950.data.seed-values.sql
Open-ILS/src/sql/Pg/create_database_extensions.sql
Open-ILS/src/sql/Pg/upgrade/XXXX.schema.symspell.sql [new file with mode: 0644]
Open-ILS/src/support-scripts/symspell-sideload.pl [new file with mode: 0755]
Open-ILS/src/templates-bootstrap/opac/parts/result/lowhits.tt2
Open-ILS/src/templates-bootstrap/opac/parts/searchbar.tt2
Open-ILS/src/templates/kpac/parts/searchbox.tt2
Open-ILS/src/templates/opac/parts/result/lowhits.tt2
Open-ILS/src/templates/opac/parts/searchbar.tt2

index 03e75c3..b3f59c0 100644 (file)
@@ -16,7 +16,7 @@
     </div>
   </div>
   
-  <div *ngIf="showThyself" class="row pt-3 pb-3 mb-3">
+  <div *ngIf="showThyself" class="row pt-3 pb-1 mb-1">
   <div class="col-lg-8">
     <ngb-tabset #searchTabs [activeId]="searchTab" (tabChange)="onTabChange($event)">
       <ngb-tab title="Keyword Search" i18n-title id="term">
     </div>
   </div>
   </div>
+  <div class="row mb-3 pb-3">
+    <ng-container *ngIf="context.result.global_summary as gs">
+      <ng-container *ngIf="gs.suggestions as sugg">
+        <div *ngIf="sugg.one_class_one_term as ocot" class="col-lg-12 d-flex align-content-center">
+          <span class="font-weight-bold font-italic mr-1" i18n>Did you mean:</span>
+          <span *ngFor="let s of ocot.suggestions; first as isFirst">
+            <span *ngIf="!isFirst" i18n>&nbsp; or &nbsp;</span>
+            <a routerLink="/staff/catalog/search" queryParamsHandling="merge"
+             [queryParams]="{query : s.suggestion, fieldClass : ocot.class}">
+               {{s.suggestion}}
+            </a>
+          </span>
+        </div>
+      </ng-container>
+    </ng-container>
+  </div>
 </div>
 
index 682f650..456a402 100644 (file)
@@ -97,6 +97,8 @@ export DEB_APACHE_DISCONF = \
 export CPAN_MODULES = \
        Geo::Coder::Google \
        Business::OnlinePayment::PayPal \
+       String::KeyboardDistance \
+       Text::Levenshtein::Damerau::XS \
        Email::Send
 
 export CPAN_MODULES_FORCE = \
index a10484b..6917396 100644 (file)
@@ -98,6 +98,8 @@ export DEB_APACHE_DISCONF = \
 export CPAN_MODULES = \
        Geo::Coder::Google \
        Business::OnlinePayment::PayPal \
+       String::KeyboardDistance \
+       Text::Levenshtein::Damerau::XS \
        Email::Send
 
 export CPAN_MODULES_FORCE = \
index e773dd1..953eebf 100644 (file)
@@ -97,6 +97,8 @@ export DEB_APACHE_DISCONF = \
 export CPAN_MODULES = \
        Geo::Coder::Google \
        Business::OnlinePayment::PayPal \
+       String::KeyboardDistance \
+       Text::Levenshtein::Damerau::XS \
        Email::Send
 
 export CPAN_MODULES_FORCE = \
index 754e6e1..907bd99 100644 (file)
@@ -75,6 +75,8 @@ export CPAN_MODULES = \
        Geo::Coder::OSM \
        Geo::Coder::Google \
        Excel::Writer::XLSX \
+       String::KeyboardDistance \
+       Text::Levenshtein::Damerau::XS \
        Business::ISSN \
        Net::Z3950::ZOOM \
        Net::Z3950::Simple2ZOOM \
index 21525b2..aafbb33 100644 (file)
@@ -95,6 +95,8 @@ export CPAN_MODULES = \
        Business::OnlinePayment::PayPal \
        Email::Send \
        MARC::Charset \
+       String::KeyboardDistance \
+       Text::Levenshtein::Damerau::XS \
        Net::Z3950::Simple2ZOOM
 
 export CPAN_MODULES_FORCE = \
index eba60fe..b002042 100644 (file)
@@ -96,14 +96,18 @@ export CPAN_MODULES = \
        Email::Send \
        MARC::Charset \
        Net::Z3950::Simple2ZOOM \
-    Locale::Country
+    Locale::Country \
+       String::KeyboardDistance \
+       Text::Levenshtein::Damerau::XS
 
 export CPAN_MODULES_FORCE = \
        Business::Stripe \
        Class::DBI::Frozen::301
 
 export CPAN_MODULES_PGSQL = \
-       MARC::File::XML
+       MARC::File::XML \
+       String::KeyboardDistance \
+       Text::Levenshtein::Damerau::XS
 
 PGSQL_APT_REPO_DEBS = \
         wget \
index 633691a..58ca5af 100644 (file)
@@ -829,7 +829,7 @@ __PACKAGE__->register_method(
 
 sub multiclass_query {
     # arghash only really supports limit/offset anymore
-    my($self, $conn, $arghash, $query, $docache) = @_;
+    my($self, $conn, $arghash, $query, $docache, $phys_loc) = @_;
 
     if ($query) {
         $query =~ s/\+/ /go;
@@ -841,7 +841,7 @@ sub multiclass_query {
     $logger->debug("initial search query => $query") if $query;
 
     (my $method = $self->api_name) =~ s/\.query/.staged/o;
-    return $self->method_lookup($method)->dispatch($arghash, $docache);
+    return $self->method_lookup($method)->dispatch($arghash, $docache, $phys_loc);
 
 }
 
@@ -1130,7 +1130,9 @@ __PACKAGE__->register_method(
 
 my $estimation_strategy;
 sub staged_search {
-    my($self, $conn, $search_hash, $docache) = @_;
+    my($self, $conn, $search_hash, $docache, $phys_loc) = @_;
+
+    $phys_loc ||= $U->get_org_tree->id;
 
     my $IAmMetabib = ($self->api_name =~ /metabib/) ? 1 : 0;
 
@@ -1285,6 +1287,57 @@ sub staged_search {
         $cache->put_cache($key, $cache_data, $cache_timeout);
     }
 
+    my $setting_names = [ qw/
+             opac.did_you_mean.max_suggestions
+             opac.did_you_mean.low_result_threshold
+             search.symspell.min_suggestion_use_threshold
+             search.symspell.soundex.weight
+             search.symspell.pg_trgm.weight
+             search.symspell.keyboard_distance.weight/ ];
+    my %suggest_settings = $U->ou_ancestor_setting_batch_insecure(
+        $phys_loc, $setting_names
+    );
+
+    # Defaults...
+    $suggest_settings{$_} ||= {value=>undef} for @$setting_names;
+
+    # Pull this one off the front, it's not used for the function call
+    my $max_suggestions_setting = shift @$setting_names;
+    my $sugg_low_thresh_setting = shift @$setting_names;
+    $max_suggestions_setting = $suggest_settings{$max_suggestions_setting}{value} // -1;
+    my $suggest_low_threshold = $suggest_settings{$sugg_low_thresh_setting}{value} || 0;
+
+    if ($global_summary->{visible} <= $suggest_low_threshold and $max_suggestions_setting != 0) {
+        # For now, we're doing one-class/one-term suggestions only
+        my ($class, $term) = one_class_one_term($global_summary->{query_struct});
+        if ($class && $term) { # check for suggestions!
+            my $suggestion_verbosity = 4;
+            if ($max_suggestions_setting == -1) { # special value that means "only best suggestion, and not always"
+                $max_suggestions_setting = 1;
+                $suggestion_verbosity = 0;
+            }
+
+            my @settings_params = map { $suggest_settings{$_}{value} } @$setting_names;
+            my $suggs = new_editor()->json_query({
+                from  => [
+                    'search.symspell_lookup',
+                        $term, $class,
+                        $suggestion_verbosity,
+                        1, # case transfer
+                        @settings_params
+                ],
+                limit => $max_suggestions_setting
+            });
+            if (@$suggs and $$suggs[0]{suggestion} ne $term) {
+                $global_summary->{suggestions}{'one_class_one_term'} = {
+                    class       => $class,
+                    term        => $term,
+                    suggestions  => $suggs
+                };
+            }
+        }
+    }
+
     my @results = grep {defined $_} @$all_results[$user_offset..($user_offset + $user_limit - 1)];
 
     $conn->respond_complete(
@@ -1305,6 +1358,30 @@ sub staged_search {
     return cache_facets($facet_key, $new_ids, $IAmMetabib, $ignore_facet_classes) if $docache;
 }
 
+sub one_class_one_term {
+    my $qstruct = shift;
+    my $node = $$qstruct{children};
+
+    my $class = undef;
+    my $term = undef;
+    while ($node) {
+        last if (
+            $$node{'|'}
+            or @{$$node{'&'}} != 1
+            or ($$node{'&'}[0]{fields} and @{$$node{'&'}[0]{fields}} > 0)
+        );
+
+        $class ||= $$node{'&'}[0]{class};
+        $term ||= $$node{'&'}[0]{content};
+
+        last if ($term);
+
+        $node = $$node{'&'}[0]{children};
+    }
+
+    return ($class, $term);
+}
+
 sub fetch_display_fields {
     my $self = shift;
     my $conn = shift;
index b713323..f02783c 100644 (file)
@@ -464,7 +464,7 @@ sub load_rresults {
         my $ses = OpenSRF::AppSession->create('open-ils.search');
 
         $self->timelog("Firing off the multiclass query");
-        my $req = $ses->request($method, $args, $query, 1);
+        my $req = $ses->request($method, $args, $query, 1, $ctx->{physical_loc});
         $results = $req->gather(1);
         $self->timelog("Returned from the multiclass query");
 
@@ -549,7 +549,7 @@ sub load_rresults {
                 # constituents query
                 my $save_offset = $args->{offset};
                 $args->{offset} = 0;
-                $mr_contents{$rec_id} = $ses->request($method, $args, $query, 1);
+                $mr_contents{$rec_id} = $ses->request($method, $args, $query, 1, $ctx->{physical_loc});
                 $args->{offset} = $save_offset;
             } catch Error with {
                 my $err = shift;
index e0aae8d..b253e9e 100644 (file)
@@ -21,6 +21,10 @@ CREATE OR REPLACE FUNCTION evergreen.lowercase( TEXT ) RETURNS TEXT AS $$
     return lc(shift);
 $$ LANGUAGE PLPERLU STRICT IMMUTABLE;
 
+CREATE OR REPLACE FUNCTION evergreen.uppercase( TEXT ) RETURNS TEXT AS $$
+    return uc(shift);
+$$ LANGUAGE PLPERLU STRICT IMMUTABLE;
+
 CREATE OR REPLACE FUNCTION evergreen.xml_escape(str TEXT) RETURNS text AS $$
     SELECT REPLACE(REPLACE(REPLACE($1,
        '&', '&amp;'),
index ee91178..57bc7a4 100644 (file)
@@ -1470,5 +1470,678 @@ BEGIN
 END;
 $f$ LANGUAGE PLPGSQL ROWS 10;
 
+-- SymSpell implementation follows
+
+CREATE OR REPLACE FUNCTION evergreen.text_array_merge_unique (
+    TEXT[], TEXT[]
+) RETURNS TEXT[] AS $F$
+    SELECT NULLIF(ARRAY(
+        SELECT * FROM UNNEST($1) x WHERE x IS NOT NULL
+            UNION
+        SELECT * FROM UNNEST($2) y WHERE y IS NOT NULL
+    ),'{}');
+$F$ LANGUAGE SQL;
+
+CREATE OR REPLACE FUNCTION evergreen.qwerty_keyboard_distance ( a TEXT, b TEXT ) RETURNS NUMERIC AS $F$
+use String::KeyboardDistance qw(:all);
+return qwerty_keyboard_distance(@_);
+$F$ LANGUAGE PLPERLU STRICT IMMUTABLE;
+
+CREATE OR REPLACE FUNCTION evergreen.qwerty_keyboard_distance_match ( a TEXT, b TEXT ) RETURNS NUMERIC AS $F$
+use String::KeyboardDistance qw(:all);
+return qwerty_keyboard_distance_match(@_);
+$F$ LANGUAGE PLPERLU STRICT IMMUTABLE;
+
+CREATE OR REPLACE FUNCTION evergreen.levenshtein_damerau_edistance ( a TEXT, b TEXT, INT ) RETURNS NUMERIC AS $F$
+use Text::Levenshtein::Damerau::XS qw/xs_edistance/;
+return xs_edistance(@_);
+$F$ LANGUAGE PLPERLU STRICT IMMUTABLE;
+
+CREATE TABLE search.symspell_dictionary (
+    keyword_count           INT     NOT NULL DEFAULT 0,
+    title_count             INT     NOT NULL DEFAULT 0,
+    author_count            INT     NOT NULL DEFAULT 0,
+    subject_count           INT     NOT NULL DEFAULT 0,
+    series_count            INT     NOT NULL DEFAULT 0,
+    identifier_count        INT     NOT NULL DEFAULT 0,
+
+    prefix_key              TEXT    PRIMARY KEY,
+
+    keyword_suggestions     TEXT[],
+    title_suggestions       TEXT[],
+    author_suggestions      TEXT[],
+    subject_suggestions     TEXT[],
+    series_suggestions      TEXT[],
+    identifier_suggestions  TEXT[]
+) WITH (fillfactor = 80);
+
+CREATE OR REPLACE FUNCTION search.symspell_parse_words ( phrase TEXT )
+RETURNS SETOF TEXT AS $F$
+    SELECT UNNEST(x) FROM regexp_matches($1, '([[:alnum:]]+''*[[:alnum:]]*)', 'g') x;
+$F$ LANGUAGE SQL STRICT IMMUTABLE;
+
+-- This version does not preserve input word order!
+CREATE OR REPLACE FUNCTION search.symspell_parse_words_distinct ( phrase TEXT )
+RETURNS SETOF TEXT AS $F$
+    SELECT DISTINCT UNNEST(x) FROM regexp_matches($1, '([[:alnum:]]+''*[[:alnum:]]*)', 'g') x;
+$F$ LANGUAGE SQL STRICT IMMUTABLE;
+
+CREATE OR REPLACE FUNCTION search.symspell_transfer_casing ( withCase TEXT, withoutCase TEXT )
+RETURNS TEXT AS $F$
+DECLARE
+    woChars TEXT[];
+    curr    TEXT;
+    ind     INT := 1;
+BEGIN
+    woChars := regexp_split_to_array(withoutCase,'');
+    FOR curr IN SELECT x FROM regexp_split_to_table(withCase, '') x LOOP
+        IF curr = evergreen.uppercase(curr) THEN
+            woChars[ind] := evergreen.uppercase(woChars[ind]);
+        END IF;
+        ind := ind + 1;
+    END LOOP;
+    RETURN ARRAY_TO_STRING(woChars,'');
+END;
+$F$ LANGUAGE PLPGSQL STRICT IMMUTABLE;
+
+CREATE OR REPLACE FUNCTION search.symspell_generate_edits (
+    raw_word    TEXT,
+    dist        INT DEFAULT 1,
+    maxED       INT DEFAULT 3
+) RETURNS TEXT[] AS $F$
+DECLARE
+    item    TEXT;
+    list    TEXT[] := '{}';
+    sublist TEXT[] := '{}';
+BEGIN
+    FOR I IN 1 .. CHARACTER_LENGTH(raw_word) LOOP
+        item := SUBSTRING(raw_word FROM 1 FOR I - 1) || SUBSTRING(raw_word FROM I + 1);
+        IF NOT list @> ARRAY[item] THEN
+            list := item || list;
+            IF dist < maxED AND CHARACTER_LENGTH(raw_word) > dist + 1 THEN
+                sublist := search.symspell_generate_edits(item, dist + 1, maxED) || sublist;
+            END IF;
+        END IF;
+    END LOOP;
+
+    IF dist = 1 THEN
+        RETURN evergreen.text_array_merge_unique(list, sublist);
+    ELSE
+        RETURN list || sublist;
+    END IF;
+END;
+$F$ LANGUAGE PLPGSQL STRICT IMMUTABLE;
+
+-- DROP TYPE search.symspell_lookup_output CASCADE;
+CREATE TYPE search.symspell_lookup_output AS (
+    suggestion          TEXT,
+    suggestion_count    INT,
+    lev_distance        INT,
+    pg_trgm_sim         NUMERIC,
+    qwerty_kb_match     NUMERIC,
+    soundex_sim         NUMERIC,
+    input               TEXT,
+    norm_input          TEXT,
+    prefix_key          TEXT,
+    prefix_key_count    INT,
+    word_pos            INT
+);
+
+CREATE OR REPLACE FUNCTION search.symspell_lookup (
+    raw_input       TEXT,
+    search_class    TEXT,
+    verbosity       INT DEFAULT 2,
+    xfer_case       BOOL DEFAULT FALSE,
+    count_threshold INT DEFAULT 1,
+    soundex_weight  INT DEFAULT 0,
+    pg_trgm_weight  INT DEFAULT 0,
+    kbdist_weight   INT DEFAULT 0
+) RETURNS SETOF search.symspell_lookup_output AS $F$
+DECLARE
+    prefix_length INT;
+    maxED         INT;
+    word_list   TEXT[];
+    edit_list   TEXT[] := '{}';
+    seen_list   TEXT[] := '{}';
+    output      search.symspell_lookup_output;
+    output_list search.symspell_lookup_output[];
+    entry       RECORD;
+    entry_key   TEXT;
+    prefix_key  TEXT;
+    sugg        TEXT;
+    input       TEXT;
+    word        TEXT;
+    w_pos       INT := -1;
+    smallest_ed INT := -1;
+    global_ed   INT;
+BEGIN
+    SELECT value::INT INTO prefix_length FROM config.internal_flag WHERE name = 'symspell.prefix_length' AND enabled;
+    prefix_length := COALESCE(prefix_length, 6);
+
+    SELECT value::INT INTO maxED FROM config.internal_flag WHERE name = 'symspell.max_edit_distance' AND enabled;
+    maxED := COALESCE(maxED, 3);
+
+    word_list := ARRAY_AGG(x) FROM search.symspell_parse_words(raw_input) x;
+
+    -- Common case exact match test for preformance
+    IF verbosity = 0 AND CARDINALITY(word_list) = 1 AND CHARACTER_LENGTH(word_list[1]) <= prefix_length THEN
+        EXECUTE
+          'SELECT  '||search_class||'_suggestions AS suggestions,
+                   '||search_class||'_count AS count,
+                   prefix_key
+             FROM  search.symspell_dictionary
+             WHERE prefix_key = $1
+                   AND '||search_class||'_count >= $2 
+                   AND '||search_class||'_suggestions @> ARRAY[$1]' 
+          INTO entry USING evergreen.lowercase(word_list[1]), COALESCE(count_threshold,1);
+        IF entry.prefix_key IS NOT NULL THEN
+            output.lev_distance := 0; -- definitionally
+            output.prefix_key := entry.prefix_key;
+            output.prefix_key_count := entry.count;
+            output.suggestion_count := entry.count;
+            output.input := word_list[1];
+            IF xfer_case THEN
+                output.suggestion := search.symspell_transfer_casing(output.input, entry.prefix_key);
+            ELSE
+                output.suggestion := entry.prefix_key;
+            END IF;
+            output.norm_input := entry.prefix_key;
+            output.qwerty_kb_match := 1;
+            output.pg_trgm_sim := 1;
+            output.soundex_sim := 1;
+            RETURN NEXT output;
+            RETURN;
+        END IF;
+    END IF;
+
+    <<word_loop>>
+    FOREACH word IN ARRAY word_list LOOP
+        w_pos := w_pos + 1;
+        input := evergreen.lowercase(word);
+
+        IF CHARACTER_LENGTH(input) > prefix_length THEN
+            prefix_key := SUBSTRING(input FROM 1 FOR prefix_length);
+            edit_list := ARRAY[input,prefix_key] || search.symspell_generate_edits(prefix_key, 1, maxED);
+        ELSE
+            edit_list := input || search.symspell_generate_edits(input, 1, maxED);
+        END IF;
+
+        SELECT ARRAY_AGG(x ORDER BY CHARACTER_LENGTH(x) DESC) INTO edit_list FROM UNNEST(edit_list) x;
+
+        output_list := '{}';
+        seen_list := '{}';
+        global_ed := NULL;
+
+        <<entry_key_loop>>
+        FOREACH entry_key IN ARRAY edit_list LOOP
+            smallest_ed := -1;
+            IF global_ed IS NOT NULL THEN
+                smallest_ed := global_ed;
+            END IF;
+            FOR entry IN EXECUTE
+                'SELECT  '||search_class||'_suggestions AS suggestions,
+                         '||search_class||'_count AS count,
+                         prefix_key
+                   FROM  search.symspell_dictionary
+                   WHERE prefix_key = $1
+                         AND '||search_class||'_suggestions IS NOT NULL' 
+                USING entry_key
+            LOOP
+                FOREACH sugg IN ARRAY entry.suggestions LOOP
+                    IF NOT seen_list @> ARRAY[sugg] THEN
+                        seen_list := seen_list || sugg;
+                        IF input = sugg THEN -- exact match, no need to spend time on a call
+                            output.lev_distance := 0;
+                            output.suggestion_count = entry.count;
+                        ELSIF ABS(CHARACTER_LENGTH(input) - CHARACTER_LENGTH(sugg)) > maxED THEN
+                            -- They are definitionally too different to consider, just move on.
+                            CONTINUE;
+                        ELSE
+                            --output.lev_distance := levenshtein_less_equal(
+                            output.lev_distance := evergreen.levenshtein_damerau_edistance(
+                                input,
+                                sugg,
+                                maxED
+                            );
+                            IF output.lev_distance < 0 THEN
+                                -- The Perl module returns -1 for "more distant than max".
+                                output.lev_distance := maxED + 1;
+                                -- This short-circuit's the count test below for speed, bypassing
+                                -- a couple useless tests.
+                                output.suggestion_count := -1;
+                            ELSE
+                                EXECUTE 'SELECT '||search_class||'_count FROM search.symspell_dictionary WHERE prefix_key = $1'
+                                    INTO output.suggestion_count USING sugg;
+                            END IF;
+                        END IF;
+
+                        -- The caller passes a minimum suggestion count threshold (or uses
+                        -- the default of 0) and if the suggestion has that many or less uses
+                        -- then we move on to the next suggestion, since this one is too rare.
+                        CONTINUE WHEN output.suggestion_count < COALESCE(count_threshold,1);
+
+                        -- Track the smallest edit distance among suggestions from this prefix key.
+                        IF smallest_ed = -1 OR output.lev_distance < smallest_ed THEN
+                            smallest_ed := output.lev_distance;
+                        END IF;
+
+                        -- Track the smallest edit distance for all prefix keys for this word.
+                        IF global_ed IS NULL OR smallest_ed < global_ed THEN
+                            global_ed = smallest_ed;
+                        END IF;
+
+                        -- Only proceed if the edit distance is <= the max for the dictionary.
+                        IF output.lev_distance <= maxED THEN
+                            IF output.lev_distance > global_ed AND verbosity <= 1 THEN
+                                -- Lev distance is our main similarity measure. While
+                                -- trgm or soundex similarity could be the main filter,
+                                -- Lev is both language agnostic and faster.
+                                --
+                                -- Here we will skip suggestions that have a longer edit distance
+                                -- than the shortest we've already found. This is simply an
+                                -- optimization that allows us to avoid further processing
+                                -- of this entry. It would be filtered out later.
+
+                                CONTINUE;
+                            END IF;
+
+                            -- If we have an exact match on the suggestion key we can also avoid
+                            -- some function calls.
+                            IF output.lev_distance = 0 THEN
+                                output.qwerty_kb_match := 1;
+                                output.pg_trgm_sim := 1;
+                                output.soundex_sim := 1;
+                            ELSE
+                                output.qwerty_kb_match := evergreen.qwerty_keyboard_distance_match(input, sugg);
+                                output.pg_trgm_sim := similarity(input, sugg);
+                                output.soundex_sim := difference(input, sugg) / 4.0;
+                            END IF;
+
+                            -- Fill in some fields
+                            IF xfer_case THEN
+                                output.suggestion := search.symspell_transfer_casing(word, sugg);
+                            ELSE
+                                output.suggestion := sugg;
+                            END IF;
+                            output.prefix_key := entry.prefix_key;
+                            output.prefix_key_count := entry.count;
+                            output.input := word;
+                            output.norm_input := input;
+                            output.word_pos := w_pos;
+
+                            -- We can't "cache" a set of generated records directly, so
+                            -- here we build up an array of search.symspell_lookup_output
+                            -- records that we can revivicate later as a table using UNNEST().
+                            output_list := output_list || output;
+
+                            EXIT entry_key_loop WHEN smallest_ed = 0 AND verbosity = 0; -- exact match early exit
+                            CONTINUE entry_key_loop WHEN smallest_ed = 0 AND verbosity = 1; -- exact match early jump to the next key
+                        END IF; -- maxED test
+                    END IF; -- suggestion not seen test
+                END LOOP; -- loop over suggestions
+            END LOOP; -- loop over entries
+        END LOOP; -- loop over entry_keys
+
+        -- Now we're done examining this word
+        IF verbosity = 0 THEN
+            -- Return the "best" suggestion from the smallest edit
+            -- distance group.  We define best based on the weighting
+            -- of the non-lev similarity measures and use the suggestion
+            -- use count to break ties.
+            RETURN QUERY
+                SELECT * FROM UNNEST(output_list)
+                    ORDER BY lev_distance,
+                        (soundex_sim * COALESCE(soundex_weight,0))
+                            + (pg_trgm_sim * COALESCE(pg_trgm_weight,0))
+                            + (qwerty_kb_match * COALESCE(kbdist_weight,0)) DESC,
+                        suggestion_count DESC
+                        LIMIT 1;
+        ELSIF verbosity = 1 THEN
+            -- Return all suggestions from the smallest
+            -- edit distance group.
+            RETURN QUERY
+                SELECT * FROM UNNEST(output_list) WHERE lev_distance = smallest_ed
+                    ORDER BY (soundex_sim * COALESCE(soundex_weight,0))
+                            + (pg_trgm_sim * COALESCE(pg_trgm_weight,0))
+                            + (qwerty_kb_match * COALESCE(kbdist_weight,0)) DESC,
+                        suggestion_count DESC;
+        ELSIF verbosity = 2 THEN
+            -- Return everything we find, along with relevant stats
+            RETURN QUERY
+                SELECT * FROM UNNEST(output_list)
+                    ORDER BY lev_distance,
+                        (soundex_sim * COALESCE(soundex_weight,0))
+                            + (pg_trgm_sim * COALESCE(pg_trgm_weight,0))
+                            + (qwerty_kb_match * COALESCE(kbdist_weight,0)) DESC,
+                        suggestion_count DESC;
+        ELSIF verbosity = 3 THEN
+            -- Return everything we find from the two smallest edit distance groups
+            RETURN QUERY
+                SELECT * FROM UNNEST(output_list)
+                    WHERE lev_distance IN (SELECT DISTINCT lev_distance FROM UNNEST(output_list) ORDER BY 1 LIMIT 2)
+                    ORDER BY lev_distance,
+                        (soundex_sim * COALESCE(soundex_weight,0))
+                            + (pg_trgm_sim * COALESCE(pg_trgm_weight,0))
+                            + (qwerty_kb_match * COALESCE(kbdist_weight,0)) DESC,
+                        suggestion_count DESC;
+        ELSIF verbosity = 4 THEN
+            -- Return everything we find from the two smallest edit distance groups that are NOT 0 distance
+            RETURN QUERY
+                SELECT * FROM UNNEST(output_list)
+                    WHERE lev_distance IN (SELECT DISTINCT lev_distance FROM UNNEST(output_list) WHERE lev_distance > 0 ORDER BY 1 LIMIT 2)
+                    ORDER BY lev_distance,
+                        (soundex_sim * COALESCE(soundex_weight,0))
+                            + (pg_trgm_sim * COALESCE(pg_trgm_weight,0))
+                            + (qwerty_kb_match * COALESCE(kbdist_weight,0)) DESC,
+                        suggestion_count DESC;
+        END IF;
+    END LOOP; -- loop over words
+END;
+$F$ LANGUAGE PLPGSQL;
+
+CREATE OR REPLACE FUNCTION search.symspell_build_raw_entry (
+    raw_input       TEXT,
+    source_class    TEXT,
+    no_limit        BOOL DEFAULT FALSE,
+    prefix_length   INT DEFAULT 6,
+    maxED           INT DEFAULT 3
+) RETURNS SETOF search.symspell_dictionary AS $F$
+DECLARE
+    key         TEXT;
+    del_key     TEXT;
+    key_list    TEXT[];
+    entry       search.symspell_dictionary%ROWTYPE;
+BEGIN
+    key := raw_input;
+
+    IF NOT no_limit AND CHARACTER_LENGTH(raw_input) > prefix_length THEN
+        key := SUBSTRING(key FROM 1 FOR prefix_length);
+        key_list := ARRAY[raw_input, key];
+    ELSE
+        key_list := ARRAY[key];
+    END IF;
+
+    FOREACH del_key IN ARRAY key_list LOOP
+        entry.prefix_key := del_key;
+
+        entry.keyword_count := 0;
+        entry.title_count := 0;
+        entry.author_count := 0;
+        entry.subject_count := 0;
+        entry.series_count := 0;
+        entry.identifier_count := 0;
+
+        entry.keyword_suggestions := '{}';
+        entry.title_suggestions := '{}';
+        entry.author_suggestions := '{}';
+        entry.subject_suggestions := '{}';
+        entry.series_suggestions := '{}';
+        entry.identifier_suggestions := '{}';
+
+        IF source_class = 'keyword' THEN entry.keyword_suggestions := ARRAY[raw_input]; END IF;
+        IF source_class = 'title' THEN entry.title_suggestions := ARRAY[raw_input]; END IF;
+        IF source_class = 'author' THEN entry.author_suggestions := ARRAY[raw_input]; END IF;
+        IF source_class = 'subject' THEN entry.subject_suggestions := ARRAY[raw_input]; END IF;
+        IF source_class = 'series' THEN entry.series_suggestions := ARRAY[raw_input]; END IF;
+        IF source_class = 'identifier' THEN entry.identifier_suggestions := ARRAY[raw_input]; END IF;
+        IF source_class = 'keyword' THEN entry.keyword_suggestions := ARRAY[raw_input]; END IF;
+
+        IF del_key = raw_input THEN
+            IF source_class = 'keyword' THEN entry.keyword_count := 1; END IF;
+            IF source_class = 'title' THEN entry.title_count := 1; END IF;
+            IF source_class = 'author' THEN entry.author_count := 1; END IF;
+            IF source_class = 'subject' THEN entry.subject_count := 1; END IF;
+            IF source_class = 'series' THEN entry.series_count := 1; END IF;
+            IF source_class = 'identifier' THEN entry.identifier_count := 1; END IF;
+        END IF;
+
+        RETURN NEXT entry;
+    END LOOP;
+
+    FOR del_key IN SELECT x FROM UNNEST(search.symspell_generate_edits(key, 1, maxED)) x LOOP
+
+        entry.keyword_suggestions := '{}';
+        entry.title_suggestions := '{}';
+        entry.author_suggestions := '{}';
+        entry.subject_suggestions := '{}';
+        entry.series_suggestions := '{}';
+        entry.identifier_suggestions := '{}';
+
+        IF source_class = 'keyword' THEN entry.keyword_count := 0; END IF;
+        IF source_class = 'title' THEN entry.title_count := 0; END IF;
+        IF source_class = 'author' THEN entry.author_count := 0; END IF;
+        IF source_class = 'subject' THEN entry.subject_count := 0; END IF;
+        IF source_class = 'series' THEN entry.series_count := 0; END IF;
+        IF source_class = 'identifier' THEN entry.identifier_count := 0; END IF;
+
+        entry.prefix_key := del_key;
+
+        IF source_class = 'keyword' THEN entry.keyword_suggestions := ARRAY[raw_input]; END IF;
+        IF source_class = 'title' THEN entry.title_suggestions := ARRAY[raw_input]; END IF;
+        IF source_class = 'author' THEN entry.author_suggestions := ARRAY[raw_input]; END IF;
+        IF source_class = 'subject' THEN entry.subject_suggestions := ARRAY[raw_input]; END IF;
+        IF source_class = 'series' THEN entry.series_suggestions := ARRAY[raw_input]; END IF;
+        IF source_class = 'identifier' THEN entry.identifier_suggestions := ARRAY[raw_input]; END IF;
+        IF source_class = 'keyword' THEN entry.keyword_suggestions := ARRAY[raw_input]; END IF;
+
+        RETURN NEXT entry;
+    END LOOP;
+
+END;
+$F$ LANGUAGE PLPGSQL STRICT IMMUTABLE;
+
+CREATE OR REPLACE FUNCTION search.symspell_build_entries (
+    full_input      TEXT,
+    source_class    TEXT,
+    old_input       TEXT DEFAULT NULL,
+    include_phrases BOOL DEFAULT FALSE
+) RETURNS SETOF search.symspell_dictionary AS $F$
+DECLARE
+    prefix_length   INT;
+    maxED           INT;
+    word_list   TEXT[];
+    input       TEXT;
+    word        TEXT;
+    entry       search.symspell_dictionary;
+BEGIN
+    IF full_input IS NOT NULL THEN
+        SELECT value::INT INTO prefix_length FROM config.internal_flag WHERE name = 'symspell.prefix_length' AND enabled;
+        prefix_length := COALESCE(prefix_length, 6);
+
+        SELECT value::INT INTO maxED FROM config.internal_flag WHERE name = 'symspell.max_edit_distance' AND enabled;
+        maxED := COALESCE(maxED, 3);
+
+        input := evergreen.lowercase(full_input);
+        word_list := ARRAY_AGG(x) FROM search.symspell_parse_words_distinct(input) x;
+    
+        IF CARDINALITY(word_list) > 1 AND include_phrases THEN
+            RETURN QUERY SELECT * FROM search.symspell_build_raw_entry(input, source_class, TRUE, prefix_length, maxED);
+        END IF;
+
+        FOREACH word IN ARRAY word_list LOOP
+            RETURN QUERY SELECT * FROM search.symspell_build_raw_entry(word, source_class, FALSE, prefix_length, maxED);
+        END LOOP;
+    END IF;
+
+    IF old_input IS NOT NULL THEN
+        input := evergreen.lowercase(old_input);
+
+        FOR word IN SELECT x FROM search.symspell_parse_words_distinct(input) x LOOP
+            entry.prefix_key := word;
+
+            entry.keyword_count := 0;
+            entry.title_count := 0;
+            entry.author_count := 0;
+            entry.subject_count := 0;
+            entry.series_count := 0;
+            entry.identifier_count := 0;
+
+            entry.keyword_suggestions := '{}';
+            entry.title_suggestions := '{}';
+            entry.author_suggestions := '{}';
+            entry.subject_suggestions := '{}';
+            entry.series_suggestions := '{}';
+            entry.identifier_suggestions := '{}';
+
+            IF source_class = 'keyword' THEN entry.keyword_count := -1; END IF;
+            IF source_class = 'title' THEN entry.title_count := -1; END IF;
+            IF source_class = 'author' THEN entry.author_count := -1; END IF;
+            IF source_class = 'subject' THEN entry.subject_count := -1; END IF;
+            IF source_class = 'series' THEN entry.series_count := -1; END IF;
+            IF source_class = 'identifier' THEN entry.identifier_count := -1; END IF;
+
+            RETURN NEXT entry;
+        END LOOP;
+    END IF;
+END;
+$F$ LANGUAGE PLPGSQL;
+
+CREATE OR REPLACE FUNCTION search.symspell_build_and_merge_entries (
+    full_input      TEXT,
+    source_class    TEXT,
+    old_input       TEXT DEFAULT NULL,
+    include_phrases BOOL DEFAULT FALSE
+) RETURNS SETOF search.symspell_dictionary AS $F$
+DECLARE
+    new_entry       RECORD;
+    conflict_entry  RECORD;
+BEGIN
+
+    IF full_input = old_input THEN -- neither NULL, and are the same
+        RETURN;
+    END IF;
+
+    FOR new_entry IN EXECUTE $q$
+        SELECT  count,
+                prefix_key,
+                evergreen.text_array_merge_unique(s,'{}') suggestions
+          FROM  (SELECT prefix_key,
+                        ARRAY_AGG($q$ || source_class || $q$_suggestions[1]) s,
+                        SUM($q$ || source_class || $q$_count) count
+                  FROM  search.symspell_build_entries($1, $2, $3, $4)
+                  GROUP BY 1) x
+        $q$ USING full_input, source_class, old_input, include_phrases
+    LOOP
+        EXECUTE $q$
+            SELECT  prefix_key,
+                    $q$ || source_class || $q$_suggestions suggestions,
+                    $q$ || source_class || $q$_count count
+              FROM  search.symspell_dictionary
+              WHERE prefix_key = $1 $q$
+            INTO conflict_entry
+            USING new_entry.prefix_key;
+
+        IF new_entry.count <> 0 THEN -- Real word, and count changed
+            IF conflict_entry.prefix_key IS NOT NULL THEN -- we'll be updating
+                IF conflict_entry.count > 0 THEN -- it's a real word
+                    RETURN QUERY EXECUTE $q$
+                        UPDATE  search.symspell_dictionary
+                           SET  $q$ || source_class || $q$_count = $2
+                          WHERE prefix_key = $1
+                          RETURNING * $q$
+                        USING new_entry.prefix_key, GREATEST(0, new_entry.count + conflict_entry.count);
+                ELSE -- it was a prefix key or delete-emptied word before
+                    IF conflict_entry.suggestions @> new_entry.suggestions THEN -- already have all suggestions here...
+                        RETURN QUERY EXECUTE $q$
+                            UPDATE  search.symspell_dictionary
+                               SET  $q$ || source_class || $q$_count = $2
+                              WHERE prefix_key = $1
+                              RETURNING * $q$
+                            USING new_entry.prefix_key, GREATEST(0, new_entry.count);
+                    ELSE -- new suggestion!
+                        RETURN QUERY EXECUTE $q$
+                            UPDATE  search.symspell_dictionary
+                               SET  $q$ || source_class || $q$_count = $2,
+                                    $q$ || source_class || $q$_suggestions = $3
+                              WHERE prefix_key = $1
+                              RETURNING * $q$
+                            USING new_entry.prefix_key, GREATEST(0, new_entry.count), evergreen.text_array_merge_unique(conflict_entry.suggestions,new_entry.suggestions);
+                    END IF;
+                END IF;
+            ELSE
+                -- We keep the on-conflict clause just in case...
+                RETURN QUERY EXECUTE $q$
+                    INSERT INTO search.symspell_dictionary AS d (
+                        $q$ || source_class || $q$_count,
+                        prefix_key,
+                        $q$ || source_class || $q$_suggestions
+                    ) VALUES ( $1, $2, $3 ) ON CONFLICT (prefix_key) DO
+                        UPDATE SET  $q$ || source_class || $q$_count = d.$q$ || source_class || $q$_count + EXCLUDED.$q$ || source_class || $q$_count,
+                                    $q$ || source_class || $q$_suggestions = evergreen.text_array_merge_unique(d.$q$ || source_class || $q$_suggestions, EXCLUDED.$q$ || source_class || $q$_suggestions)
+                        RETURNING * $q$
+                    USING new_entry.count, new_entry.prefix_key, new_entry.suggestions;
+            END IF;
+        ELSE -- key only, or no change
+            IF conflict_entry.prefix_key IS NOT NULL THEN -- we'll be updating
+                IF NOT conflict_entry.suggestions @> new_entry.suggestions THEN -- There are new suggestions
+                    RETURN QUERY EXECUTE $q$
+                        UPDATE  search.symspell_dictionary
+                           SET  $q$ || source_class || $q$_suggestions = $2
+                          WHERE prefix_key = $1
+                          RETURNING * $q$
+                        USING new_entry.prefix_key, evergreen.text_array_merge_unique(conflict_entry.suggestions,new_entry.suggestions);
+                END IF;
+            ELSE
+                RETURN QUERY EXECUTE $q$
+                    INSERT INTO search.symspell_dictionary AS d (
+                        $q$ || source_class || $q$_count,
+                        prefix_key,
+                        $q$ || source_class || $q$_suggestions
+                    ) VALUES ( $1, $2, $3 ) ON CONFLICT (prefix_key) DO -- key exists, suggestions may be added due to this entry
+                        UPDATE SET  $q$ || source_class || $q$_suggestions = evergreen.text_array_merge_unique(d.$q$ || source_class || $q$_suggestions, EXCLUDED.$q$ || source_class || $q$_suggestions)
+                    RETURNING * $q$
+                    USING new_entry.count, new_entry.prefix_key, new_entry.suggestions;
+            END IF;
+        END IF;
+    END LOOP;
+END;
+$F$ LANGUAGE PLPGSQL;
+
+CREATE OR REPLACE FUNCTION search.symspell_maintain_entries () RETURNS TRIGGER AS $f$
+DECLARE
+    search_class    TEXT;
+    new_value       TEXT := NULL;
+    old_value       TEXT := NULL;
+BEGIN
+    search_class := COALESCE(TG_ARGV[0], SPLIT_PART(TG_TABLE_NAME,'_',1));
+
+    IF TG_OP IN ('INSERT', 'UPDATE') THEN
+        new_value := NEW.value;
+    END IF;
+
+    IF TG_OP IN ('DELETE', 'UPDATE') THEN
+        old_value := OLD.value;
+    END IF;
+
+    PERFORM * FROM search.symspell_build_and_merge_entries(new_value, search_class, old_value);
+
+    RETURN NULL; -- always fired AFTER
+END;
+$f$ LANGUAGE PLPGSQL;
+
+CREATE TRIGGER maintain_symspell_entries_tgr
+    AFTER INSERT OR UPDATE OR DELETE ON metabib.title_field_entry
+    FOR EACH ROW EXECUTE PROCEDURE search.symspell_maintain_entries();
+
+CREATE TRIGGER maintain_symspell_entries_tgr
+    AFTER INSERT OR UPDATE OR DELETE ON metabib.author_field_entry
+    FOR EACH ROW EXECUTE PROCEDURE search.symspell_maintain_entries();
+
+CREATE TRIGGER maintain_symspell_entries_tgr
+    AFTER INSERT OR UPDATE OR DELETE ON metabib.subject_field_entry
+    FOR EACH ROW EXECUTE PROCEDURE search.symspell_maintain_entries();
+
+CREATE TRIGGER maintain_symspell_entries_tgr
+    AFTER INSERT OR UPDATE OR DELETE ON metabib.series_field_entry
+    FOR EACH ROW EXECUTE PROCEDURE search.symspell_maintain_entries();
+
+CREATE TRIGGER maintain_symspell_entries_tgr
+    AFTER INSERT OR UPDATE OR DELETE ON metabib.keyword_field_entry
+    FOR EACH ROW EXECUTE PROCEDURE search.symspell_maintain_entries();
+
+CREATE TRIGGER maintain_symspell_entries_tgr
+    AFTER INSERT OR UPDATE OR DELETE ON metabib.identifier_field_entry
+    FOR EACH ROW EXECUTE PROCEDURE search.symspell_maintain_entries();
+
 COMMIT;
 
index 53dd16b..a23f217 100644 (file)
@@ -21516,3 +21516,77 @@ VALUES (
         'cgf', 'label'
     )
 );
+
+INSERT INTO config.internal_flag (name, value, enabled) VALUES ('symspell.prefix_length', '6', TRUE);
+INSERT INTO config.internal_flag (name, value, enabled) VALUES ('symspell.max_edit_distance', '3', TRUE);
+
+INSERT into config.org_unit_setting_type
+( name, grp, label, description, datatype )
+VALUES
+( 'opac.did_you_mean.low_result_threshold', 'opac',
+   oils_i18n_gettext(
+     'opac.did_you_mean.low_result_threshold',
+     'Maximum search result count at which spelling suggestions may be offered',
+     'coust', 'label'),
+   oils_i18n_gettext(
+     'opac.did_you_mean.low_result_threshold',
+     'If a search results in this number or fewer results, and there are correctable spelling mistakes, a suggested search may be provided.',
+     'coust', 'description'),
+   'integer' );
+
+INSERT into config.org_unit_setting_type
+( name, grp, label, description, datatype )
+VALUES
+( 'search.symspell.min_suggestion_use_threshold', 'opac',
+   oils_i18n_gettext(
+     'search.symspell.min_suggestion_use_threshold',
+     'Minimum required uses of a spelling suggestions that may be offered',
+     'coust', 'label'),
+   oils_i18n_gettext(
+     'search.symspell.min_suggestion_use_threshold',
+     'The number of bibliographic records (more or less) that a spelling suggestion must appear in to be considered before offering it to a user. Defaults to 1 (must appear in the bib data).',
+     'coust', 'description'),
+   'integer' );
+
+INSERT into config.org_unit_setting_type
+( name, grp, label, description, datatype )
+VALUES
+( 'search.symspell.soundex.weight', 'opac',
+   oils_i18n_gettext(
+     'search.symspell.soundex.weight',
+     'Soundex score weighting in OPAC spelling suggestions.',
+     'coust', 'label'),
+   oils_i18n_gettext(
+     'search.symspell.soundex.weight',
+     'Soundex, trgm, and keyboard distance similarity measures can be combined to form a secondary ordering parameter for spelling suggestions. This controls the relative weight of the scaled soundex component. Defaults to 0 for "off".',
+     'coust', 'description'),
+   'integer' );
+
+INSERT into config.org_unit_setting_type
+( name, grp, label, description, datatype )
+VALUES
+( 'search.symspell.pg_trgm.weight', 'opac',
+   oils_i18n_gettext(
+     'search.symspell.pg_trgm.weight',
+     'Pg_trgm score weighting in OPAC spelling suggestions.',
+     'coust', 'label'),
+   oils_i18n_gettext(
+     'search.symspell.pg_trgm.weight',
+     'Soundex, pg_trgm, and keyboard distance similarity measures can be combined to form a secondary ordering parameter for spelling suggestions. This controls the relative weight of the scaled pg_trgm component. Defaults to 0 for "off".',
+     'coust', 'description'),
+   'integer' );
+
+INSERT into config.org_unit_setting_type
+( name, grp, label, description, datatype )
+VALUES
+( 'search.symspell.keyboard_distance.weight', 'opac',
+   oils_i18n_gettext(
+     'search.symspell.keyboard_distance.weight',
+     'Keyboard distance score weighting in OPAC spelling suggestions.',
+     'coust', 'label'),
+   oils_i18n_gettext(
+     'search.symspell.keyboard_distance.weight',
+     'Soundex, trgm, and keyboard distance similarity measures can be combined to form a secondary ordering parameter for spelling suggestions. This controls the relative weight of the scaled keyboard distance component. Defaults to 0 for "off".',
+     'coust', 'description'),
+   'integer' );
+
index b37f79e..c1c8571 100644 (file)
@@ -22,3 +22,5 @@ CREATE EXTENSION intarray;
 CREATE EXTENSION pgcrypto;
 CREATE EXTENSION unaccent;
 CREATE EXTENSION earthdistance CASCADE;
+CREATE EXTENSION fuzzystrmatch;
+CREATE EXTENSION pg_trgm;
diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.symspell.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.symspell.sql
new file mode 100644 (file)
index 0000000..5de9eaf
--- /dev/null
@@ -0,0 +1,903 @@
+BEGIN;
+
+CREATE EXTENSION IF NOT EXISTS fuzzystrmatch;
+CREATE EXTENSION IF NOT EXISTS pg_trgm;
+
+INSERT INTO config.internal_flag (name, value, enabled) VALUES ('symspell.prefix_length', '6', TRUE);
+INSERT INTO config.internal_flag (name, value, enabled) VALUES ('symspell.max_edit_distance', '3', TRUE);
+
+INSERT into config.org_unit_setting_type
+( name, grp, label, description, datatype )
+VALUES
+( 'opac.did_you_mean.max_suggestions', 'opac',
+   oils_i18n_gettext(
+     'opac.did_you_mean.max_suggestions',
+     'Maximum number of spelling suggestions that may be offered',
+     'coust', 'label'),
+   oils_i18n_gettext(
+     'opac.did_you_mean.max_suggestions',
+     'If set to -1, provide "best" suggestion if mispelled; if set higher than 0, the maximum suggestions that can be provided; if set to 0, disable suggestions.',
+     'coust', 'description'),
+   'integer' );
+
+INSERT into config.org_unit_setting_type
+( name, grp, label, description, datatype )
+VALUES
+( 'opac.did_you_mean.low_result_threshold', 'opac',
+   oils_i18n_gettext(
+     'opac.did_you_mean.low_result_threshold',
+     'Maximum search result count at which spelling suggestions may be offered',
+     'coust', 'label'),
+   oils_i18n_gettext(
+     'opac.did_you_mean.low_result_threshold',
+     'If a search results in this number or fewer results, and there are correctable spelling mistakes, a suggested search may be provided.',
+     'coust', 'description'),
+   'integer' );
+
+INSERT into config.org_unit_setting_type
+( name, grp, label, description, datatype )
+VALUES
+( 'search.symspell.min_suggestion_use_threshold', 'opac',
+   oils_i18n_gettext(
+     'search.symspell.min_suggestion_use_threshold',
+     'Minimum required uses of a spelling suggestions that may be offered',
+     'coust', 'label'),
+   oils_i18n_gettext(
+     'search.symspell.min_suggestion_use_threshold',
+     'The number of bibliographic records (more or less) that a spelling suggestion must appear in to be considered before offering it to a user. Defaults to 1 (must appear in the bib data).',
+     'coust', 'description'),
+   'integer' );
+
+INSERT into config.org_unit_setting_type
+( name, grp, label, description, datatype )
+VALUES
+( 'search.symspell.soundex.weight', 'opac',
+   oils_i18n_gettext(
+     'search.symspell.soundex.weight',
+     'Soundex score weighting in OPAC spelling suggestions.',
+     'coust', 'label'),
+   oils_i18n_gettext(
+     'search.symspell.soundex.weight',
+     'Soundex, trgm, and keyboard distance similarity measures can be combined to form a secondary ordering parameter for spelling suggestions. This controls the relative weight of the scaled soundex component. Defaults to 0 for "off".',
+     'coust', 'description'),
+   'integer' );
+
+INSERT into config.org_unit_setting_type
+( name, grp, label, description, datatype )
+VALUES
+( 'search.symspell.pg_trgm.weight', 'opac',
+   oils_i18n_gettext(
+     'search.symspell.pg_trgm.weight',
+     'Pg_trgm score weighting in OPAC spelling suggestions.',
+     'coust', 'label'),
+   oils_i18n_gettext(
+     'search.symspell.pg_trgm.weight',
+     'Soundex, pg_trgm, and keyboard distance similarity measures can be combined to form a secondary ordering parameter for spelling suggestions. This controls the relative weight of the scaled pg_trgm component. Defaults to 0 for "off".',
+     'coust', 'description'),
+   'integer' );
+
+INSERT into config.org_unit_setting_type
+( name, grp, label, description, datatype )
+VALUES
+( 'search.symspell.keyboard_distance.weight', 'opac',
+   oils_i18n_gettext(
+     'search.symspell.keyboard_distance.weight',
+     'Keyboard distance score weighting in OPAC spelling suggestions.',
+     'coust', 'label'),
+   oils_i18n_gettext(
+     'search.symspell.keyboard_distance.weight',
+     'Soundex, trgm, and keyboard distance similarity measures can be combined to form a secondary ordering parameter for spelling suggestions. This controls the relative weight of the scaled keyboard distance component. Defaults to 0 for "off".',
+     'coust', 'description'),
+   'integer' );
+
+CREATE OR REPLACE FUNCTION evergreen.uppercase( TEXT ) RETURNS TEXT AS $$
+    return uc(shift);
+$$ LANGUAGE PLPERLU STRICT IMMUTABLE;
+
+CREATE OR REPLACE FUNCTION evergreen.text_array_merge_unique (
+    TEXT[], TEXT[]
+) RETURNS TEXT[] AS $F$
+    SELECT NULLIF(ARRAY(
+        SELECT * FROM UNNEST($1) x WHERE x IS NOT NULL
+            UNION
+        SELECT * FROM UNNEST($2) y WHERE y IS NOT NULL
+    ),'{}');
+$F$ LANGUAGE SQL;
+
+CREATE OR REPLACE FUNCTION evergreen.qwerty_keyboard_distance ( a TEXT, b TEXT ) RETURNS NUMERIC AS $F$
+use String::KeyboardDistance qw(:all);
+return qwerty_keyboard_distance(@_);
+$F$ LANGUAGE PLPERLU STRICT IMMUTABLE;
+
+CREATE OR REPLACE FUNCTION evergreen.qwerty_keyboard_distance_match ( a TEXT, b TEXT ) RETURNS NUMERIC AS $F$
+use String::KeyboardDistance qw(:all);
+return qwerty_keyboard_distance_match(@_);
+$F$ LANGUAGE PLPERLU STRICT IMMUTABLE;
+
+CREATE OR REPLACE FUNCTION evergreen.levenshtein_damerau_edistance ( a TEXT, b TEXT, INT ) RETURNS NUMERIC AS $F$
+use Text::Levenshtein::Damerau::XS qw/xs_edistance/;
+return xs_edistance(@_);
+$F$ LANGUAGE PLPERLU STRICT IMMUTABLE;
+
+CREATE TABLE search.symspell_dictionary (
+    keyword_count           INT     NOT NULL DEFAULT 0,
+    title_count             INT     NOT NULL DEFAULT 0,
+    author_count            INT     NOT NULL DEFAULT 0,
+    subject_count           INT     NOT NULL DEFAULT 0,
+    series_count            INT     NOT NULL DEFAULT 0,
+    identifier_count        INT     NOT NULL DEFAULT 0,
+
+    prefix_key              TEXT    PRIMARY KEY,
+
+    keyword_suggestions     TEXT[],
+    title_suggestions       TEXT[],
+    author_suggestions      TEXT[],
+    subject_suggestions     TEXT[],
+    series_suggestions      TEXT[],
+    identifier_suggestions  TEXT[]
+) WITH (fillfactor = 80);
+
+CREATE OR REPLACE FUNCTION search.symspell_parse_words ( phrase TEXT )
+RETURNS SETOF TEXT AS $F$
+    SELECT UNNEST(x) FROM regexp_matches($1, '([[:alnum:]]+''*[[:alnum:]]*)', 'g') x;
+$F$ LANGUAGE SQL STRICT IMMUTABLE;
+
+-- This version does not preserve input word order!
+CREATE OR REPLACE FUNCTION search.symspell_parse_words_distinct ( phrase TEXT )
+RETURNS SETOF TEXT AS $F$
+    SELECT DISTINCT UNNEST(x) FROM regexp_matches($1, '([[:alnum:]]+''*[[:alnum:]]*)', 'g') x;
+$F$ LANGUAGE SQL STRICT IMMUTABLE;
+
+CREATE OR REPLACE FUNCTION search.symspell_transfer_casing ( withCase TEXT, withoutCase TEXT )
+RETURNS TEXT AS $F$
+DECLARE
+    woChars TEXT[];
+    curr    TEXT;
+    ind     INT := 1;
+BEGIN
+    woChars := regexp_split_to_array(withoutCase,'');
+    FOR curr IN SELECT x FROM regexp_split_to_table(withCase, '') x LOOP
+        IF curr = evergreen.uppercase(curr) THEN
+            woChars[ind] := evergreen.uppercase(woChars[ind]);
+        END IF;
+        ind := ind + 1;
+    END LOOP;
+    RETURN ARRAY_TO_STRING(woChars,'');
+END;
+$F$ LANGUAGE PLPGSQL STRICT IMMUTABLE;
+
+CREATE OR REPLACE FUNCTION search.symspell_generate_edits (
+    raw_word    TEXT,
+    dist        INT DEFAULT 1,
+    maxED       INT DEFAULT 3
+) RETURNS TEXT[] AS $F$
+DECLARE
+    item    TEXT;
+    list    TEXT[] := '{}';
+    sublist TEXT[] := '{}';
+BEGIN
+    FOR I IN 1 .. CHARACTER_LENGTH(raw_word) LOOP
+        item := SUBSTRING(raw_word FROM 1 FOR I - 1) || SUBSTRING(raw_word FROM I + 1);
+        IF NOT list @> ARRAY[item] THEN
+            list := item || list;
+            IF dist < maxED AND CHARACTER_LENGTH(raw_word) > dist + 1 THEN
+                sublist := search.symspell_generate_edits(item, dist + 1, maxED) || sublist;
+            END IF;
+        END IF;
+    END LOOP;
+
+    IF dist = 1 THEN
+        RETURN evergreen.text_array_merge_unique(list, sublist);
+    ELSE
+        RETURN list || sublist;
+    END IF;
+END;
+$F$ LANGUAGE PLPGSQL STRICT IMMUTABLE;
+
+-- DROP TYPE search.symspell_lookup_output CASCADE;
+CREATE TYPE search.symspell_lookup_output AS (
+    suggestion          TEXT,
+    suggestion_count    INT,
+    lev_distance        INT,
+    pg_trgm_sim         NUMERIC,
+    qwerty_kb_match     NUMERIC,
+    soundex_sim         NUMERIC,
+    input               TEXT,
+    norm_input          TEXT,
+    prefix_key          TEXT,
+    prefix_key_count    INT,
+    word_pos            INT
+);
+
+CREATE OR REPLACE FUNCTION search.symspell_lookup (
+    raw_input       TEXT,
+    search_class    TEXT,
+    verbosity       INT DEFAULT 2,
+    xfer_case       BOOL DEFAULT FALSE,
+    count_threshold INT DEFAULT 1,
+    soundex_weight  INT DEFAULT 0,
+    pg_trgm_weight  INT DEFAULT 0,
+    kbdist_weight   INT DEFAULT 0
+) RETURNS SETOF search.symspell_lookup_output AS $F$
+DECLARE
+    prefix_length INT;
+    maxED         INT;
+    word_list   TEXT[];
+    edit_list   TEXT[] := '{}';
+    seen_list   TEXT[] := '{}';
+    output      search.symspell_lookup_output;
+    output_list search.symspell_lookup_output[];
+    entry       RECORD;
+    entry_key   TEXT;
+    prefix_key  TEXT;
+    sugg        TEXT;
+    input       TEXT;
+    word        TEXT;
+    w_pos       INT := -1;
+    smallest_ed INT := -1;
+    global_ed   INT;
+BEGIN
+    SELECT value::INT INTO prefix_length FROM config.internal_flag WHERE name = 'symspell.prefix_length' AND enabled;
+    prefix_length := COALESCE(prefix_length, 6);
+
+    SELECT value::INT INTO maxED FROM config.internal_flag WHERE name = 'symspell.max_edit_distance' AND enabled;
+    maxED := COALESCE(maxED, 3);
+
+    word_list := ARRAY_AGG(x) FROM search.symspell_parse_words(raw_input) x;
+
+    -- Common case exact match test for preformance
+    IF verbosity = 0 AND CARDINALITY(word_list) = 1 AND CHARACTER_LENGTH(word_list[1]) <= prefix_length THEN
+        EXECUTE
+          'SELECT  '||search_class||'_suggestions AS suggestions,
+                   '||search_class||'_count AS count,
+                   prefix_key
+             FROM  search.symspell_dictionary
+             WHERE prefix_key = $1
+                   AND '||search_class||'_count >= $2 
+                   AND '||search_class||'_suggestions @> ARRAY[$1]' 
+          INTO entry USING evergreen.lowercase(word_list[1]), COALESCE(count_threshold,1);
+        IF entry.prefix_key IS NOT NULL THEN
+            output.lev_distance := 0; -- definitionally
+            output.prefix_key := entry.prefix_key;
+            output.prefix_key_count := entry.count;
+            output.suggestion_count := entry.count;
+            output.input := word_list[1];
+            IF xfer_case THEN
+                output.suggestion := search.symspell_transfer_casing(output.input, entry.prefix_key);
+            ELSE
+                output.suggestion := entry.prefix_key;
+            END IF;
+            output.norm_input := entry.prefix_key;
+            output.qwerty_kb_match := 1;
+            output.pg_trgm_sim := 1;
+            output.soundex_sim := 1;
+            RETURN NEXT output;
+            RETURN;
+        END IF;
+    END IF;
+
+    <<word_loop>>
+    FOREACH word IN ARRAY word_list LOOP
+        w_pos := w_pos + 1;
+        input := evergreen.lowercase(word);
+
+        IF CHARACTER_LENGTH(input) > prefix_length THEN
+            prefix_key := SUBSTRING(input FROM 1 FOR prefix_length);
+            edit_list := ARRAY[input,prefix_key] || search.symspell_generate_edits(prefix_key, 1, maxED);
+        ELSE
+            edit_list := input || search.symspell_generate_edits(input, 1, maxED);
+        END IF;
+
+        SELECT ARRAY_AGG(x ORDER BY CHARACTER_LENGTH(x) DESC) INTO edit_list FROM UNNEST(edit_list) x;
+
+        output_list := '{}';
+        seen_list := '{}';
+        global_ed := NULL;
+
+        <<entry_key_loop>>
+        FOREACH entry_key IN ARRAY edit_list LOOP
+            smallest_ed := -1;
+            IF global_ed IS NOT NULL THEN
+                smallest_ed := global_ed;
+            END IF;
+            FOR entry IN EXECUTE
+                'SELECT  '||search_class||'_suggestions AS suggestions,
+                         '||search_class||'_count AS count,
+                         prefix_key
+                   FROM  search.symspell_dictionary
+                   WHERE prefix_key = $1
+                         AND '||search_class||'_suggestions IS NOT NULL' 
+                USING entry_key
+            LOOP
+                FOREACH sugg IN ARRAY entry.suggestions LOOP
+                    IF NOT seen_list @> ARRAY[sugg] THEN
+                        seen_list := seen_list || sugg;
+                        IF input = sugg THEN -- exact match, no need to spend time on a call
+                            output.lev_distance := 0;
+                            output.suggestion_count = entry.count;
+                        ELSIF ABS(CHARACTER_LENGTH(input) - CHARACTER_LENGTH(sugg)) > maxED THEN
+                            -- They are definitionally too different to consider, just move on.
+                            CONTINUE;
+                        ELSE
+                            --output.lev_distance := levenshtein_less_equal(
+                            output.lev_distance := evergreen.levenshtein_damerau_edistance(
+                                input,
+                                sugg,
+                                maxED
+                            );
+                            IF output.lev_distance < 0 THEN
+                                -- The Perl module returns -1 for "more distant than max".
+                                output.lev_distance := maxED + 1;
+                                -- This short-circuit's the count test below for speed, bypassing
+                                -- a couple useless tests.
+                                output.suggestion_count := -1;
+                            ELSE
+                                EXECUTE 'SELECT '||search_class||'_count FROM search.symspell_dictionary WHERE prefix_key = $1'
+                                    INTO output.suggestion_count USING sugg;
+                            END IF;
+                        END IF;
+
+                        -- The caller passes a minimum suggestion count threshold (or uses
+                        -- the default of 0) and if the suggestion has that many or less uses
+                        -- then we move on to the next suggestion, since this one is too rare.
+                        CONTINUE WHEN output.suggestion_count < COALESCE(count_threshold,1);
+
+                        -- Track the smallest edit distance among suggestions from this prefix key.
+                        IF smallest_ed = -1 OR output.lev_distance < smallest_ed THEN
+                            smallest_ed := output.lev_distance;
+                        END IF;
+
+                        -- Track the smallest edit distance for all prefix keys for this word.
+                        IF global_ed IS NULL OR smallest_ed < global_ed THEN
+                            global_ed = smallest_ed;
+                        END IF;
+
+                        -- Only proceed if the edit distance is <= the max for the dictionary.
+                        IF output.lev_distance <= maxED THEN
+                            IF output.lev_distance > global_ed AND verbosity <= 1 THEN
+                                -- Lev distance is our main similarity measure. While
+                                -- trgm or soundex similarity could be the main filter,
+                                -- Lev is both language agnostic and faster.
+                                --
+                                -- Here we will skip suggestions that have a longer edit distance
+                                -- than the shortest we've already found. This is simply an
+                                -- optimization that allows us to avoid further processing
+                                -- of this entry. It would be filtered out later.
+
+                                CONTINUE;
+                            END IF;
+
+                            -- If we have an exact match on the suggestion key we can also avoid
+                            -- some function calls.
+                            IF output.lev_distance = 0 THEN
+                                output.qwerty_kb_match := 1;
+                                output.pg_trgm_sim := 1;
+                                output.soundex_sim := 1;
+                            ELSE
+                                output.qwerty_kb_match := evergreen.qwerty_keyboard_distance_match(input, sugg);
+                                output.pg_trgm_sim := similarity(input, sugg);
+                                output.soundex_sim := difference(input, sugg) / 4.0;
+                            END IF;
+
+                            -- Fill in some fields
+                            IF xfer_case THEN
+                                output.suggestion := search.symspell_transfer_casing(word, sugg);
+                            ELSE
+                                output.suggestion := sugg;
+                            END IF;
+                            output.prefix_key := entry.prefix_key;
+                            output.prefix_key_count := entry.count;
+                            output.input := word;
+                            output.norm_input := input;
+                            output.word_pos := w_pos;
+
+                            -- We can't "cache" a set of generated records directly, so
+                            -- here we build up an array of search.symspell_lookup_output
+                            -- records that we can revivicate later as a table using UNNEST().
+                            output_list := output_list || output;
+
+                            EXIT entry_key_loop WHEN smallest_ed = 0 AND verbosity = 0; -- exact match early exit
+                            CONTINUE entry_key_loop WHEN smallest_ed = 0 AND verbosity = 1; -- exact match early jump to the next key
+                        END IF; -- maxED test
+                    END IF; -- suggestion not seen test
+                END LOOP; -- loop over suggestions
+            END LOOP; -- loop over entries
+        END LOOP; -- loop over entry_keys
+
+        -- Now we're done examining this word
+        IF verbosity = 0 THEN
+            -- Return the "best" suggestion from the smallest edit
+            -- distance group.  We define best based on the weighting
+            -- of the non-lev similarity measures and use the suggestion
+            -- use count to break ties.
+            RETURN QUERY
+                SELECT * FROM UNNEST(output_list)
+                    ORDER BY lev_distance,
+                        (soundex_sim * COALESCE(soundex_weight,0))
+                            + (pg_trgm_sim * COALESCE(pg_trgm_weight,0))
+                            + (qwerty_kb_match * COALESCE(kbdist_weight,0)) DESC,
+                        suggestion_count DESC
+                        LIMIT 1;
+        ELSIF verbosity = 1 THEN
+            -- Return all suggestions from the smallest
+            -- edit distance group.
+            RETURN QUERY
+                SELECT * FROM UNNEST(output_list) WHERE lev_distance = smallest_ed
+                    ORDER BY (soundex_sim * COALESCE(soundex_weight,0))
+                            + (pg_trgm_sim * COALESCE(pg_trgm_weight,0))
+                            + (qwerty_kb_match * COALESCE(kbdist_weight,0)) DESC,
+                        suggestion_count DESC;
+        ELSIF verbosity = 2 THEN
+            -- Return everything we find, along with relevant stats
+            RETURN QUERY
+                SELECT * FROM UNNEST(output_list)
+                    ORDER BY lev_distance,
+                        (soundex_sim * COALESCE(soundex_weight,0))
+                            + (pg_trgm_sim * COALESCE(pg_trgm_weight,0))
+                            + (qwerty_kb_match * COALESCE(kbdist_weight,0)) DESC,
+                        suggestion_count DESC;
+        ELSIF verbosity = 3 THEN
+            -- Return everything we find from the two smallest edit distance groups
+            RETURN QUERY
+                SELECT * FROM UNNEST(output_list)
+                    WHERE lev_distance IN (SELECT DISTINCT lev_distance FROM UNNEST(output_list) ORDER BY 1 LIMIT 2)
+                    ORDER BY lev_distance,
+                        (soundex_sim * COALESCE(soundex_weight,0))
+                            + (pg_trgm_sim * COALESCE(pg_trgm_weight,0))
+                            + (qwerty_kb_match * COALESCE(kbdist_weight,0)) DESC,
+                        suggestion_count DESC;
+        ELSIF verbosity = 4 THEN
+            -- Return everything we find from the two smallest edit distance groups that are NOT 0 distance
+            RETURN QUERY
+                SELECT * FROM UNNEST(output_list)
+                    WHERE lev_distance IN (SELECT DISTINCT lev_distance FROM UNNEST(output_list) WHERE lev_distance > 0 ORDER BY 1 LIMIT 2)
+                    ORDER BY lev_distance,
+                        (soundex_sim * COALESCE(soundex_weight,0))
+                            + (pg_trgm_sim * COALESCE(pg_trgm_weight,0))
+                            + (qwerty_kb_match * COALESCE(kbdist_weight,0)) DESC,
+                        suggestion_count DESC;
+        END IF;
+    END LOOP; -- loop over words
+END;
+$F$ LANGUAGE PLPGSQL;
+
+CREATE OR REPLACE FUNCTION search.symspell_build_raw_entry (
+    raw_input       TEXT,
+    source_class    TEXT,
+    no_limit        BOOL DEFAULT FALSE,
+    prefix_length   INT DEFAULT 6,
+    maxED           INT DEFAULT 3
+) RETURNS SETOF search.symspell_dictionary AS $F$
+DECLARE
+    key         TEXT;
+    del_key     TEXT;
+    key_list    TEXT[];
+    entry       search.symspell_dictionary%ROWTYPE;
+BEGIN
+    key := raw_input;
+
+    IF NOT no_limit AND CHARACTER_LENGTH(raw_input) > prefix_length THEN
+        key := SUBSTRING(key FROM 1 FOR prefix_length);
+        key_list := ARRAY[raw_input, key];
+    ELSE
+        key_list := ARRAY[key];
+    END IF;
+
+    FOREACH del_key IN ARRAY key_list LOOP
+        entry.prefix_key := del_key;
+
+        entry.keyword_count := 0;
+        entry.title_count := 0;
+        entry.author_count := 0;
+        entry.subject_count := 0;
+        entry.series_count := 0;
+        entry.identifier_count := 0;
+
+        entry.keyword_suggestions := '{}';
+        entry.title_suggestions := '{}';
+        entry.author_suggestions := '{}';
+        entry.subject_suggestions := '{}';
+        entry.series_suggestions := '{}';
+        entry.identifier_suggestions := '{}';
+
+        IF source_class = 'keyword' THEN entry.keyword_suggestions := ARRAY[raw_input]; END IF;
+        IF source_class = 'title' THEN entry.title_suggestions := ARRAY[raw_input]; END IF;
+        IF source_class = 'author' THEN entry.author_suggestions := ARRAY[raw_input]; END IF;
+        IF source_class = 'subject' THEN entry.subject_suggestions := ARRAY[raw_input]; END IF;
+        IF source_class = 'series' THEN entry.series_suggestions := ARRAY[raw_input]; END IF;
+        IF source_class = 'identifier' THEN entry.identifier_suggestions := ARRAY[raw_input]; END IF;
+        IF source_class = 'keyword' THEN entry.keyword_suggestions := ARRAY[raw_input]; END IF;
+
+        IF del_key = raw_input THEN
+            IF source_class = 'keyword' THEN entry.keyword_count := 1; END IF;
+            IF source_class = 'title' THEN entry.title_count := 1; END IF;
+            IF source_class = 'author' THEN entry.author_count := 1; END IF;
+            IF source_class = 'subject' THEN entry.subject_count := 1; END IF;
+            IF source_class = 'series' THEN entry.series_count := 1; END IF;
+            IF source_class = 'identifier' THEN entry.identifier_count := 1; END IF;
+        END IF;
+
+        RETURN NEXT entry;
+    END LOOP;
+
+    FOR del_key IN SELECT x FROM UNNEST(search.symspell_generate_edits(key, 1, maxED)) x LOOP
+
+        entry.keyword_suggestions := '{}';
+        entry.title_suggestions := '{}';
+        entry.author_suggestions := '{}';
+        entry.subject_suggestions := '{}';
+        entry.series_suggestions := '{}';
+        entry.identifier_suggestions := '{}';
+
+        IF source_class = 'keyword' THEN entry.keyword_count := 0; END IF;
+        IF source_class = 'title' THEN entry.title_count := 0; END IF;
+        IF source_class = 'author' THEN entry.author_count := 0; END IF;
+        IF source_class = 'subject' THEN entry.subject_count := 0; END IF;
+        IF source_class = 'series' THEN entry.series_count := 0; END IF;
+        IF source_class = 'identifier' THEN entry.identifier_count := 0; END IF;
+
+        entry.prefix_key := del_key;
+
+        IF source_class = 'keyword' THEN entry.keyword_suggestions := ARRAY[raw_input]; END IF;
+        IF source_class = 'title' THEN entry.title_suggestions := ARRAY[raw_input]; END IF;
+        IF source_class = 'author' THEN entry.author_suggestions := ARRAY[raw_input]; END IF;
+        IF source_class = 'subject' THEN entry.subject_suggestions := ARRAY[raw_input]; END IF;
+        IF source_class = 'series' THEN entry.series_suggestions := ARRAY[raw_input]; END IF;
+        IF source_class = 'identifier' THEN entry.identifier_suggestions := ARRAY[raw_input]; END IF;
+        IF source_class = 'keyword' THEN entry.keyword_suggestions := ARRAY[raw_input]; END IF;
+
+        RETURN NEXT entry;
+    END LOOP;
+
+END;
+$F$ LANGUAGE PLPGSQL STRICT IMMUTABLE;
+
+CREATE OR REPLACE FUNCTION search.symspell_build_entries (
+    full_input      TEXT,
+    source_class    TEXT,
+    old_input       TEXT DEFAULT NULL,
+    include_phrases BOOL DEFAULT FALSE
+) RETURNS SETOF search.symspell_dictionary AS $F$
+DECLARE
+    prefix_length   INT;
+    maxED           INT;
+    word_list   TEXT[];
+    input       TEXT;
+    word        TEXT;
+    entry       search.symspell_dictionary;
+BEGIN
+    IF full_input IS NOT NULL THEN
+        SELECT value::INT INTO prefix_length FROM config.internal_flag WHERE name = 'symspell.prefix_length' AND enabled;
+        prefix_length := COALESCE(prefix_length, 6);
+
+        SELECT value::INT INTO maxED FROM config.internal_flag WHERE name = 'symspell.max_edit_distance' AND enabled;
+        maxED := COALESCE(maxED, 3);
+
+        input := evergreen.lowercase(full_input);
+        word_list := ARRAY_AGG(x) FROM search.symspell_parse_words_distinct(input) x;
+    
+        IF CARDINALITY(word_list) > 1 AND include_phrases THEN
+            RETURN QUERY SELECT * FROM search.symspell_build_raw_entry(input, source_class, TRUE, prefix_length, maxED);
+        END IF;
+
+        FOREACH word IN ARRAY word_list LOOP
+            RETURN QUERY SELECT * FROM search.symspell_build_raw_entry(word, source_class, FALSE, prefix_length, maxED);
+        END LOOP;
+    END IF;
+
+    IF old_input IS NOT NULL THEN
+        input := evergreen.lowercase(old_input);
+
+        FOR word IN SELECT x FROM search.symspell_parse_words_distinct(input) x LOOP
+            entry.prefix_key := word;
+
+            entry.keyword_count := 0;
+            entry.title_count := 0;
+            entry.author_count := 0;
+            entry.subject_count := 0;
+            entry.series_count := 0;
+            entry.identifier_count := 0;
+
+            entry.keyword_suggestions := '{}';
+            entry.title_suggestions := '{}';
+            entry.author_suggestions := '{}';
+            entry.subject_suggestions := '{}';
+            entry.series_suggestions := '{}';
+            entry.identifier_suggestions := '{}';
+
+            IF source_class = 'keyword' THEN entry.keyword_count := -1; END IF;
+            IF source_class = 'title' THEN entry.title_count := -1; END IF;
+            IF source_class = 'author' THEN entry.author_count := -1; END IF;
+            IF source_class = 'subject' THEN entry.subject_count := -1; END IF;
+            IF source_class = 'series' THEN entry.series_count := -1; END IF;
+            IF source_class = 'identifier' THEN entry.identifier_count := -1; END IF;
+
+            RETURN NEXT entry;
+        END LOOP;
+    END IF;
+END;
+$F$ LANGUAGE PLPGSQL;
+
+CREATE OR REPLACE FUNCTION search.symspell_build_and_merge_entries (
+    full_input      TEXT,
+    source_class    TEXT,
+    old_input       TEXT DEFAULT NULL,
+    include_phrases BOOL DEFAULT FALSE
+) RETURNS SETOF search.symspell_dictionary AS $F$
+DECLARE
+    new_entry       RECORD;
+    conflict_entry  RECORD;
+BEGIN
+
+    IF full_input = old_input THEN -- neither NULL, and are the same
+        RETURN;
+    END IF;
+
+    FOR new_entry IN EXECUTE $q$
+        SELECT  count,
+                prefix_key,
+                evergreen.text_array_merge_unique(s,'{}') suggestions
+          FROM  (SELECT prefix_key,
+                        ARRAY_AGG($q$ || source_class || $q$_suggestions[1]) s,
+                        SUM($q$ || source_class || $q$_count) count
+                  FROM  search.symspell_build_entries($1, $2, $3, $4)
+                  GROUP BY 1) x
+        $q$ USING full_input, source_class, old_input, include_phrases
+    LOOP
+        EXECUTE $q$
+            SELECT  prefix_key,
+                    $q$ || source_class || $q$_suggestions suggestions,
+                    $q$ || source_class || $q$_count count
+              FROM  search.symspell_dictionary
+              WHERE prefix_key = $1 $q$
+            INTO conflict_entry
+            USING new_entry.prefix_key;
+
+        IF new_entry.count <> 0 THEN -- Real word, and count changed
+            IF conflict_entry.prefix_key IS NOT NULL THEN -- we'll be updating
+                IF conflict_entry.count > 0 THEN -- it's a real word
+                    RETURN QUERY EXECUTE $q$
+                        UPDATE  search.symspell_dictionary
+                           SET  $q$ || source_class || $q$_count = $2
+                          WHERE prefix_key = $1
+                          RETURNING * $q$
+                        USING new_entry.prefix_key, GREATEST(0, new_entry.count + conflict_entry.count);
+                ELSE -- it was a prefix key or delete-emptied word before
+                    IF conflict_entry.suggestions @> new_entry.suggestions THEN -- already have all suggestions here...
+                        RETURN QUERY EXECUTE $q$
+                            UPDATE  search.symspell_dictionary
+                               SET  $q$ || source_class || $q$_count = $2
+                              WHERE prefix_key = $1
+                              RETURNING * $q$
+                            USING new_entry.prefix_key, GREATEST(0, new_entry.count);
+                    ELSE -- new suggestion!
+                        RETURN QUERY EXECUTE $q$
+                            UPDATE  search.symspell_dictionary
+                               SET  $q$ || source_class || $q$_count = $2,
+                                    $q$ || source_class || $q$_suggestions = $3
+                              WHERE prefix_key = $1
+                              RETURNING * $q$
+                            USING new_entry.prefix_key, GREATEST(0, new_entry.count), evergreen.text_array_merge_unique(conflict_entry.suggestions,new_entry.suggestions);
+                    END IF;
+                END IF;
+            ELSE
+                -- We keep the on-conflict clause just in case...
+                RETURN QUERY EXECUTE $q$
+                    INSERT INTO search.symspell_dictionary AS d (
+                        $q$ || source_class || $q$_count,
+                        prefix_key,
+                        $q$ || source_class || $q$_suggestions
+                    ) VALUES ( $1, $2, $3 ) ON CONFLICT (prefix_key) DO
+                        UPDATE SET  $q$ || source_class || $q$_count = d.$q$ || source_class || $q$_count + EXCLUDED.$q$ || source_class || $q$_count,
+                                    $q$ || source_class || $q$_suggestions = evergreen.text_array_merge_unique(d.$q$ || source_class || $q$_suggestions, EXCLUDED.$q$ || source_class || $q$_suggestions)
+                        RETURNING * $q$
+                    USING new_entry.count, new_entry.prefix_key, new_entry.suggestions;
+            END IF;
+        ELSE -- key only, or no change
+            IF conflict_entry.prefix_key IS NOT NULL THEN -- we'll be updating
+                IF NOT conflict_entry.suggestions @> new_entry.suggestions THEN -- There are new suggestions
+                    RETURN QUERY EXECUTE $q$
+                        UPDATE  search.symspell_dictionary
+                           SET  $q$ || source_class || $q$_suggestions = $2
+                          WHERE prefix_key = $1
+                          RETURNING * $q$
+                        USING new_entry.prefix_key, evergreen.text_array_merge_unique(conflict_entry.suggestions,new_entry.suggestions);
+                END IF;
+            ELSE
+                RETURN QUERY EXECUTE $q$
+                    INSERT INTO search.symspell_dictionary AS d (
+                        $q$ || source_class || $q$_count,
+                        prefix_key,
+                        $q$ || source_class || $q$_suggestions
+                    ) VALUES ( $1, $2, $3 ) ON CONFLICT (prefix_key) DO -- key exists, suggestions may be added due to this entry
+                        UPDATE SET  $q$ || source_class || $q$_suggestions = evergreen.text_array_merge_unique(d.$q$ || source_class || $q$_suggestions, EXCLUDED.$q$ || source_class || $q$_suggestions)
+                    RETURNING * $q$
+                    USING new_entry.count, new_entry.prefix_key, new_entry.suggestions;
+            END IF;
+        END IF;
+    END LOOP;
+END;
+$F$ LANGUAGE PLPGSQL;
+
+CREATE OR REPLACE FUNCTION search.symspell_maintain_entries () RETURNS TRIGGER AS $f$
+DECLARE
+    search_class    TEXT;
+    new_value       TEXT := NULL;
+    old_value       TEXT := NULL;
+BEGIN
+    search_class := COALESCE(TG_ARGV[0], SPLIT_PART(TG_TABLE_NAME,'_',1));
+
+    IF TG_OP IN ('INSERT', 'UPDATE') THEN
+        new_value := NEW.value;
+    END IF;
+
+    IF TG_OP IN ('DELETE', 'UPDATE') THEN
+        old_value := OLD.value;
+    END IF;
+
+    PERFORM * FROM search.symspell_build_and_merge_entries(new_value, search_class, old_value);
+
+    RETURN NULL; -- always fired AFTER
+END;
+$f$ LANGUAGE PLPGSQL;
+
+CREATE TRIGGER maintain_symspell_entries_tgr
+    AFTER INSERT OR UPDATE OR DELETE ON metabib.title_field_entry
+    FOR EACH ROW EXECUTE PROCEDURE search.symspell_maintain_entries();
+
+CREATE TRIGGER maintain_symspell_entries_tgr
+    AFTER INSERT OR UPDATE OR DELETE ON metabib.author_field_entry
+    FOR EACH ROW EXECUTE PROCEDURE search.symspell_maintain_entries();
+
+CREATE TRIGGER maintain_symspell_entries_tgr
+    AFTER INSERT OR UPDATE OR DELETE ON metabib.subject_field_entry
+    FOR EACH ROW EXECUTE PROCEDURE search.symspell_maintain_entries();
+
+CREATE TRIGGER maintain_symspell_entries_tgr
+    AFTER INSERT OR UPDATE OR DELETE ON metabib.series_field_entry
+    FOR EACH ROW EXECUTE PROCEDURE search.symspell_maintain_entries();
+
+CREATE TRIGGER maintain_symspell_entries_tgr
+    AFTER INSERT OR UPDATE OR DELETE ON metabib.keyword_field_entry
+    FOR EACH ROW EXECUTE PROCEDURE search.symspell_maintain_entries();
+
+CREATE TRIGGER maintain_symspell_entries_tgr
+    AFTER INSERT OR UPDATE OR DELETE ON metabib.identifier_field_entry
+    FOR EACH ROW EXECUTE PROCEDURE search.symspell_maintain_entries();
+
+COMMIT;
+
+/* This will generate the queries needed to generate the /file/ that can
+ * be used to populate the dictionary table.
+
+select $z$select $y$select $y$||x.id||$y$, '$z$||x.x||$z$', count(*) from search.symspell_build_and_merge_entries($x$$y$ || x.value||$y$$x$, '$z$||x||$z$');$y$ from metabib.$z$||x||$z$_field_entry x;$z$ from (select 'keyword'::text x union select 'title' union select 'author' union select 'subject' union select 'series' union select 'identifier') x;
+
+*/
+
+\qecho ''
+\qecho 'The following should be run at the end of the upgrade before any'
+\qecho 'reingest occurs.  Because new triggers are installed already,'
+\qecho 'updates to indexed strings will cause zero-count dictionary entries'
+\qecho 'to be recorded which will require updating every row again (or'
+\qecho 'starting from scratch) so best to do this before other batch'
+\qecho 'changes.  A later reingest that does not significantly change'
+\qecho 'indexed strings will /not/ cause table bloat here, and will be'
+\qecho 'as fast as normal.  A copy of the SQL in a ready-to-use, non-escaped'
+\qecho 'form is available inside a comment at the end of this upgrade sub-'
+\qecho 'script so you do not need to copy this comment from the psql ouptut.'
+\qecho ''
+\qecho '\\a'
+\qecho '\\t'
+\qecho ''
+\qecho '\\o title'
+\qecho 'select value from metabib.title_field_entry;'
+\qecho '\\o author'
+\qecho 'select value from metabib.author_field_entry;'
+\qecho '\\o subject'
+\qecho 'select value from metabib.subject_field_entry;'
+\qecho '\\o series'
+\qecho 'select value from metabib.series_field_entry;'
+\qecho '\\o identifier'
+\qecho 'select value from metabib.identifier_field_entry;'
+\qecho '\\o keyword'
+\qecho 'select value from metabib.keyword_field_entry;'
+\qecho ''
+\qecho '\\o'
+\qecho '\\a'
+\qecho '\\t'
+\qecho ''
+\qecho '// Then, at the command line:'
+\qecho ''
+\qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl title > title.sql'
+\qecho ''
+\qecho '// And, back in psql'
+\qecho ''
+\qecho 'ALTER TABLE search.symspell_dictionary SET UNLOGGED;'
+\qecho 'TRUNCATE search.symspell_dictionary;'
+\qecho ''
+\qecho '\i identifier.sql'
+\qecho '\i author.sql'
+\qecho '\i title.sql'
+\qecho '\i subject.sql'
+\qecho '\i series.sql'
+\qecho '\i keyword.sql'
+\qecho ''
+\qecho 'CLUSTER search.symspell_dictionary USING symspell_dictionary_pkey;'
+\qecho 'REINDEX TABLE search.symspell_dictionary;'
+\qecho 'ALTER TABLE search.symspell_dictionary SET LOGGED;'
+\qecho 'VACUUM ANALYZE search.symspell_dictionary;'
+\qecho ''
+\qecho 'DROP TABLE search.search.symspell_dictionary_partial_title;'
+\qecho 'DROP TABLE search.search.symspell_dictionary_partial_author;'
+\qecho 'DROP TABLE search.search.symspell_dictionary_partial_subject;'
+\qecho 'DROP TABLE search.search.symspell_dictionary_partial_series;'
+\qecho 'DROP TABLE search.search.symspell_dictionary_partial_identifier;'
+\qecho 'DROP TABLE search.search.symspell_dictionary_partial_keyword;'
+
+/* To run by hand:
+
+\a
+\t
+
+\o title
+select value from metabib.title_field_entry;
+
+\o author
+select value from metabib.author_field_entry;
+
+\o subject
+select value from metabib.subject_field_entry;
+
+\o series
+select value from metabib.series_field_entry;
+
+\o identifier
+select value from metabib.identifier_field_entry;
+
+\o keyword
+select value from metabib.keyword_field_entry;
+
+\o
+\a
+\t
+
+// Then, at the command line:
+
+$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl title > title.sql
+$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl author > author.sql
+$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl subject > subject.sql
+$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl series > series.sql
+$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl identifier > identifier.sql
+$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl keyword > keyword.sql
+
+// To the extent your hardware allows, the above commands can be run in 
+// in parallel, in different shells.  Each will use a full CPU, and RAM
+// may be a limiting resource, so keep an eye on that with `top`.
+
+
+// And, back in psql
+
+ALTER TABLE search.symspell_dictionary SET UNLOGGED;
+TRUNCATE search.symspell_dictionary;
+
+\i identifier.sql
+\i author.sql
+\i title.sql
+\i subject.sql
+\i series.sql
+\i keyword.sql
+
+CLUSTER search.symspell_dictionary USING symspell_dictionary_pkey;
+REINDEX TABLE search.symspell_dictionary;
+ALTER TABLE search.symspell_dictionary SET LOGGED;
+VACUUM ANALYZE search.symspell_dictionary;
+
+DROP TABLE search.search.symspell_dictionary_partial_title;
+DROP TABLE search.search.symspell_dictionary_partial_author;
+DROP TABLE search.search.symspell_dictionary_partial_subject;
+DROP TABLE search.search.symspell_dictionary_partial_series;
+DROP TABLE search.search.symspell_dictionary_partial_identifier;
+DROP TABLE search.search.symspell_dictionary_partial_keyword;
+
+*/
+
diff --git a/Open-ILS/src/support-scripts/symspell-sideload.pl b/Open-ILS/src/support-scripts/symspell-sideload.pl
new file mode 100755 (executable)
index 0000000..af148a8
--- /dev/null
@@ -0,0 +1,120 @@
+#!/usr/bin/perl
+use warnings;
+use strict;
+use List::MoreUtils qw/uniq/;
+
+$| = 1;
+
+my $plen = 6;
+my $maxed = 3;
+
+my %dict;
+my $etime;
+my $secs;
+
+my $class = $ARGV[0];
+
+my $stime = time;
+while (my $data = <>) {
+    my $line = $.;
+
+    chomp($data); $data=lc($data);
+
+    my @words;
+    while( $data =~ m/([\w\d]+'*[\w\d]*)/g ) {
+        push @words, $1;
+    }
+
+    for my $raw (uniq @words) {
+        my $key = $raw;
+        $dict{$key} //= [0,[]];
+        $dict{$key}[0]++;
+
+        if ($dict{$key}[0] == 1) { # first time we've seen it, need to generate prefix keys
+            push @{$dict{$key}[1]}, $raw;
+
+            if (length($raw) > $plen) {
+                $key = substr($raw,0,$plen);
+                $dict{$key} //= [0,[]];
+                push @{$dict{$key}[1]}, $raw;
+            }
+
+            for my $edit (symspell_generate_edits($key, 1)) {
+                $dict{$edit} //= [0,[]];
+                push @{$dict{$edit}[1]}, $raw;
+            }
+        }
+    }
+
+    unless ($line % 10000) {
+        $etime = time;
+        $secs = $etime - $stime;
+        warn "$line lines consumed from input in $secs seconds...\n";
+    }
+}
+
+$etime = time;
+$secs = $etime - $stime;
+warn "Dictionary built in $secs seconds, writing...\n";
+
+$stime = time;
+my $counter = 0;
+
+print <<"SQL";
+CREATE UNLOGGED TABLE search.symspell_dictionary_partial_$class (
+    prefix_key TEXT,
+    ${class}_count INT,
+    ${class}_suggestions TEXT[]
+) FROM STDIN;
+
+COPY search.symspell_dictionary_partial_$class FROM STDIN;
+SQL
+
+while ( my ($key, $cl_dict) = each %dict ) {
+    $counter++;
+    print join( "\t", $key, $$cl_dict[0], (scalar(@{$$cl_dict[1]}) ? '{'.join(',', uniq @{$$cl_dict[1]}).'}' : '\N')) . "\n";
+    delete $dict{$key};
+}
+
+print <<"SQL";
+\\.
+
+INSERT INTO search.symspell_dictionary (prefix_key, ${class}_count, ${class}_suggestions)
+    SELECT * FROM search.symspell_dictionary_partial_$class
+    ON CONFLICT (prefix_key) DO UPDATE
+        SET ${class}_count = EXCLUDED.${class}_count,
+        ${class}_suggestions = EXCLUDED.${class}_suggestions;
+
+SQL
+
+$etime = time;
+$secs = $etime - $stime;
+warn "$counter dictionary prefix key entries written in $secs seconds.\n";
+
+sub symspell_generate_edits {
+    my $word = shift;
+    my $dist = shift;
+    my $c = 1;
+    my @list;
+    my @sublist;
+    my $len = length($word);
+
+    while ( $c <= $len ) {
+        my $item = substr($word, 0, $c - 1) . substr($word, $c);
+        push @list, $item;
+        if ($dist < $maxed) {
+            push @sublist, symspell_generate_edits($item, $dist + 1);
+        }
+        $c++;
+    }
+
+    push @list, @sublist;
+
+    if ($dist == 1) {
+            #warn join(', ', uniq @list) . "\n";
+        return uniq(@list);
+    }
+
+    return @list;
+}
+
index 0cfd2b7..52dbcc9 100755 (executable)
                        END;
                    END %]
             </p>
+            [% IF ctx.search_summary.suggestions.one_class_one_term %]
+            <div class="result_block_visible refine_search" id="did_you_mean">
+                <em><strong>[% s_list = ctx.search_summary.suggestions.one_class_one_term; l('Did you mean: ') %]</strong></em>
+                [% FOREACH s IN s_list.suggestions %]
+                    [% IF !loop.first(); l('or'); END %]
+                    <a href="[% mkurl(ctx.opac_root _ '/results', {qtype=>s_list.class, query=>s.suggestion}) %]">[% s.suggestion | html %]</a>
+                [% END %]
+            </div>
+            <br/>
+            [% END %]
         </div>
         <div id="lowhits_help">
         [% INCLUDE "opac/parts/result/lowhits_purchase.tt2" %]
@@ -65,4 +75,4 @@
             </p>
             [% INCLUDE "opac/parts/result/lowhits_purchase.tt2" %]
         </div>
-    </div>
\ No newline at end of file
+    </div>
index 696882e..70cfa35 100755 (executable)
@@ -186,6 +186,15 @@ END;
         </div>
         [% END %]
     [% END %]
+    [% IF ctx.search_summary.suggestions.one_class_one_term %]
+    <div class="result_block_visible refine_search" id="did_you_mean">
+        <em><strong>[% s_list = ctx.search_summary.suggestions.one_class_one_term; l('Did you mean: ') %]</strong></em>
+        [% FOREACH s IN s_list.suggestions %]
+            [% IF !loop.first(); l('or'); END %]
+            <a href="[% mkurl(ctx.opac_root _ '/results', {qtype=>s_list.class, query=>s.suggestion}) %]">[% s.suggestion | html %]</a>
+        [% END %]
+    </div>
+    [% END %]
 
 
     <script>
index a21dcba..75c8ef8 100644 (file)
     </div>
 </form>
 
+[% IF ctx.search_summary.suggestions.one_class_one_term %]
+<br/>
+<div class="result_block_visible refine_search" id="did_you_mean">
+    <em><strong>[% s_list = ctx.search_summary.suggestions.one_class_one_term; l('Did you mean: ') %]</strong></em>
+    [% FOREACH s IN s_list.suggestions %]
+        [% IF !loop.first(); l('or'); END %]
+        <a href="[% mkurl(ctx.kpac_root _ '/results', {qtype=>s_list.class, query=>s.suggestion}) %]">[% s.suggestion | html %]</a>
+    [% END %]
+</div>
+[% END %]
index cd6b5a9..bc63156 100644 (file)
                        END;
                    END %]
             </p>
+            [% IF ctx.search_summary.suggestions.one_class_one_term %]
+            <div class="result_block_visible refine_search" id="did_you_mean">
+                <em><strong>[% s_list = ctx.search_summary.suggestions.one_class_one_term; l('Did you mean: ') %]</strong></em>
+                [% FOREACH s IN s_list.suggestions %]
+                    [% IF !loop.first(); l('or'); END %]
+                    <a href="[% mkurl(ctx.opac_root _ '/results', {qtype=>s_list.class, query=>s.suggestion}) %]">[% s.suggestion | html %]</a>
+                [% END %]
+            </div>
+            <br/>
+            [% END %]
         </div>
         <div id="lowhits_help">
             [% INCLUDE "opac/parts/result/lowhits_purchase.tt2" %]
index d574ba4..1f1400f 100644 (file)
@@ -185,6 +185,16 @@ END;
         </div>
         [% END %]
     [% END %]
+    [% IF ctx.search_summary.suggestions.one_class_one_term %]
+    <div class="result_block_visible refine_search" id="did_you_mean">
+        <em><strong>[% s_list = ctx.search_summary.suggestions.one_class_one_term; l('Did you mean: ') %]</strong></em>
+        [% FOREACH s IN s_list.suggestions %]
+            [% IF !loop.first(); l('or'); END %] 
+            <a href="[% mkurl(ctx.opac_root _ '/results', {qtype=>s_list.class, query=>s.suggestion}) %]">[% s.suggestion | html %]</a>
+        [% END %]
+    </div>
+    <br/>
+    [% END %]
     [% IF (is_advanced AND NOT is_special) AND CGI.param('qtype') %]
     <div class="refine_search result_block_visible">
         <span id="refine_search_link">[ <a href="[% mkurl(ctx.opac_root _ '/advanced') %]">[%