Merge autosuggest normalization changes into version upgrade

author Dan Scott <dscott@laurentian.ca>

Sun, 4 Mar 2012 19:10:48 +0000 (14:10 -0500)

committer Dan Scott <dscott@laurentian.ca>

Sun, 4 Mar 2012 19:10:48 +0000 (14:10 -0500)
author Dan Scott <dscott@laurentian.ca>
Sun, 4 Mar 2012 19:10:48 +0000 (14:10 -0500)
committer Dan Scott <dscott@laurentian.ca>
Sun, 4 Mar 2012 19:10:48 +0000 (14:10 -0500)
diff --git a/Open-ILS/src/sql/Pg/version-upgrade/2.1-2.2-upgrade-db.sql b/Open-ILS/src/sql/Pg/version-upgrade/2.1-2.2-upgrade-db.sql

index ded1aac..ff322e0 100644 (file)
--- a/Open-ILS/src/sql/Pg/version-upgrade/2.1-2.2-upgrade-db.sql
+++ b/Open-ILS/src/sql/Pg/version-upgrade/2.1-2.2-upgrade-db.sql
@@ -11711,7 +11711,7 @@ CREATE TABLE metabib.browse_entry_def_map (
  ALTER TABLE config.metabib_field ADD COLUMN browse_field BOOLEAN DEFAULT TRUE NOT NULL;
  ALTER TABLE config.metabib_field ADD COLUMN browse_xpath TEXT;
  
-ALTER TABLE config.metabib_class ADD COLUMN bouyant BOOLEAN DEFAULT FALSE NOT NULL;
+ALTER TABLE config.metabib_class ADD COLUMN buoyant BOOLEAN DEFAULT FALSE NOT NULL;
  ALTER TABLE config.metabib_class ADD COLUMN restrict BOOLEAN DEFAULT FALSE NOT NULL;
  ALTER TABLE config.metabib_field ADD COLUMN restrict BOOLEAN DEFAULT FALSE NOT NULL;
  
@@ -12200,10 +12200,58 @@ BEGIN
  END;
  $func$ LANGUAGE PLPGSQL;
  
+SELECT evergreen.upgrade_deps_block_check('0679', :eg_version);
+
+-- The advantage of this over the stock regexp_split_to_array() is that it
+-- won't degrade unicode strings.
+CREATE OR REPLACE FUNCTION evergreen.regexp_split_to_array(TEXT, TEXT)
+RETURNS TEXT[] AS $$
+    return encode_array_literal([split $_[1], $_[0]]);
+$$ LANGUAGE PLPERLU STRICT IMMUTABLE;
+
+-- Given a string such as a user might type into a search box, prepare
+-- two changed variants for TO_TSQUERY(). See
+-- http://www.postgresql.org/docs/9.0/static/textsearch-controls.html
+-- The first variant is normalized to match indexed documents regardless
+-- of diacritics.  The second variant keeps its diacritics for proper
+-- highlighting via TS_HEADLINE().
+CREATE OR REPLACE
+    FUNCTION metabib.autosuggest_prepare_tsquery(orig TEXT) RETURNS TEXT[] AS
+$$
+DECLARE
+    orig_ended_in_space     BOOLEAN;
+    result                  RECORD;
+    plain                   TEXT;
+    normalized              TEXT;
+BEGIN
+    orig_ended_in_space := orig ~ E'\\s$';
+
+    orig := ARRAY_TO_STRING(
+        evergreen.regexp_split_to_array(orig, E'\\W+'), ' '
+    );
+
+    normalized := public.search_normalize(orig); -- also trim()s
+    plain := trim(orig);
+
+    IF NOT orig_ended_in_space THEN
+        plain := plain || ':*';
+        normalized := normalized || ':*';
+    END IF;
+
+    plain := ARRAY_TO_STRING(
+        evergreen.regexp_split_to_array(plain, E'\\s+'), ' & '
+    );
+    normalized := ARRAY_TO_STRING(
+        evergreen.regexp_split_to_array(normalized, E'\\s+'), ' & '
+    );
+
+    RETURN ARRAY[normalized, plain];
+END;
+$$ LANGUAGE PLPGSQL;
  
  CREATE OR REPLACE
      FUNCTION metabib.suggest_browse_entries(
-        query_text      TEXT,   -- 'foo' or 'foo & ba:*',ready for to_tsquery()
+        raw_query_text  TEXT,   -- actually typed by humans at the UI level
          search_class    TEXT,   -- 'alias' or 'class' or 'class|field..', etc
          headline_opts   TEXT,   -- markup options for ts_headline()
          visibility_org  INTEGER,-- null if you don't want opac visibility test
@@ -12212,20 +12260,25 @@ CREATE OR REPLACE
      ) RETURNS TABLE (
          value                   TEXT,   -- plain
          field                   INTEGER,
-        bouyant_and_class_match BOOL,
+        buoyant_and_class_match BOOL,
          field_match             BOOL,
          field_weight            INTEGER,
          rank                    REAL,
-        bouyant                 BOOL,
+        buoyant                 BOOL,
          match                   TEXT    -- marked up
      ) AS $func$
  DECLARE
+    prepared_query_texts    TEXT[];
      query                   TSQUERY;
+    plain_query             TSQUERY;
      opac_visibility_join    TEXT;
      search_class_join       TEXT;
      r_fields                RECORD;
  BEGIN
-    query := TO_TSQUERY('keyword', query_text);
+    prepared_query_texts := metabib.autosuggest_prepare_tsquery(raw_query_text);
+
+    query := TO_TSQUERY('keyword', prepared_query_texts[1]);
+    plain_query := TO_TSQUERY('keyword', prepared_query_texts[2]);
  
      IF visibility_org IS NOT NULL THEN
          opac_visibility_join := '
@@ -12278,14 +12331,14 @@ BEGIN
      ';
      END IF;
  
-    RETURN QUERY EXECUTE 'SELECT *, TS_HEADLINE(value, $1, $3) FROM (SELECT DISTINCT
+    RETURN QUERY EXECUTE 'SELECT *, TS_HEADLINE(value, $7, $3) FROM (SELECT DISTINCT
          mbe.value,
          cmf.id,
-        cmc.bouyant AND _registered.field_class IS NOT NULL,
+        cmc.buoyant AND _registered.field_class IS NOT NULL,
          _registered.field = cmf.id,
          cmf.weight,
          TS_RANK_CD(mbe.index_vector, $1, $6),
-        cmc.bouyant
+        cmc.buoyant
      FROM metabib.browse_entry_def_map mbedm
      JOIN metabib.browse_entry mbe ON (mbe.id = mbedm.entry)
      JOIN config.metabib_field cmf ON (cmf.id = mbedm.def)
@@ -12298,28 +12351,20 @@ BEGIN
      '   -- sic, repeat the order by clause in the outer select too
      USING
          query, search_class, headline_opts,
-        visibility_org, query_limit, normalization
+        visibility_org, query_limit, normalization, plain_query
          ;
  
      -- sort order:
-    --  bouyant AND chosen class = match class
+    --  buoyant AND chosen class = match class
      --  chosen field = match field
      --  field weight
      --  rank
-    --  bouyancy
+    --  buoyancy
      --  value itself
  
  END;
  $func$ LANGUAGE PLPGSQL;
  
--- The advantage of this over the stock regexp_split_to_array() is that it
--- won't degrade unicode strings.
-CREATE OR REPLACE FUNCTION evergreen.regexp_split_to_array(TEXT, TEXT)
-RETURNS TEXT[] AS $$
-    return encode_array_literal([split $_[1], $_[0]]);
-$$ LANGUAGE PLPERLU STRICT IMMUTABLE;
-
-
  -- Adds some logic for browse_entry to split on non-word chars for index_vector, post-normalize
  CREATE OR REPLACE FUNCTION oils_tsearch2 () RETURNS TRIGGER AS $$
  DECLARE
@@ -12381,6 +12426,7 @@ BEGIN
          value :=  ARRAY_TO_STRING(
              evergreen.regexp_split_to_array(value, E'\\W+'), ' '
          );
+        value := public.search_normalize(value);
      END IF;
  
      NEW.index_vector = to_tsvector((TG_ARGV[0])::regconfig, value);
author	Dan Scott <dscott@laurentian.ca>
	Sun, 4 Mar 2012 19:10:48 +0000 (14:10 -0500)
committer	Dan Scott <dscott@laurentian.ca>
	Sun, 4 Mar 2012 19:10:48 +0000 (14:10 -0500)