ALTER TABLE config.metabib_field ADD COLUMN browse_field BOOLEAN DEFAULT TRUE NOT NULL;
ALTER TABLE config.metabib_field ADD COLUMN browse_xpath TEXT;
-ALTER TABLE config.metabib_class ADD COLUMN bouyant BOOLEAN DEFAULT FALSE NOT NULL;
+ALTER TABLE config.metabib_class ADD COLUMN buoyant BOOLEAN DEFAULT FALSE NOT NULL;
ALTER TABLE config.metabib_class ADD COLUMN restrict BOOLEAN DEFAULT FALSE NOT NULL;
ALTER TABLE config.metabib_field ADD COLUMN restrict BOOLEAN DEFAULT FALSE NOT NULL;
END;
$func$ LANGUAGE PLPGSQL;
+SELECT evergreen.upgrade_deps_block_check('0679', :eg_version);
+
+-- The advantage of this over the stock regexp_split_to_array() is that it
+-- won't degrade unicode strings.
+CREATE OR REPLACE FUNCTION evergreen.regexp_split_to_array(TEXT, TEXT)
+RETURNS TEXT[] AS $$
+ return encode_array_literal([split $_[1], $_[0]]);
+$$ LANGUAGE PLPERLU STRICT IMMUTABLE;
+
+-- Given a string such as a user might type into a search box, prepare
+-- two changed variants for TO_TSQUERY(). See
+-- http://www.postgresql.org/docs/9.0/static/textsearch-controls.html
+-- The first variant is normalized to match indexed documents regardless
+-- of diacritics. The second variant keeps its diacritics for proper
+-- highlighting via TS_HEADLINE().
+CREATE OR REPLACE
+ FUNCTION metabib.autosuggest_prepare_tsquery(orig TEXT) RETURNS TEXT[] AS
+$$
+DECLARE
+ orig_ended_in_space BOOLEAN;
+ result RECORD;
+ plain TEXT;
+ normalized TEXT;
+BEGIN
+ orig_ended_in_space := orig ~ E'\\s$';
+
+ orig := ARRAY_TO_STRING(
+ evergreen.regexp_split_to_array(orig, E'\\W+'), ' '
+ );
+
+ normalized := public.search_normalize(orig); -- also trim()s
+ plain := trim(orig);
+
+ IF NOT orig_ended_in_space THEN
+ plain := plain || ':*';
+ normalized := normalized || ':*';
+ END IF;
+
+ plain := ARRAY_TO_STRING(
+ evergreen.regexp_split_to_array(plain, E'\\s+'), ' & '
+ );
+ normalized := ARRAY_TO_STRING(
+ evergreen.regexp_split_to_array(normalized, E'\\s+'), ' & '
+ );
+
+ RETURN ARRAY[normalized, plain];
+END;
+$$ LANGUAGE PLPGSQL;
CREATE OR REPLACE
FUNCTION metabib.suggest_browse_entries(
- query_text TEXT, -- 'foo' or 'foo & ba:*',ready for to_tsquery()
+ raw_query_text TEXT, -- actually typed by humans at the UI level
search_class TEXT, -- 'alias' or 'class' or 'class|field..', etc
headline_opts TEXT, -- markup options for ts_headline()
visibility_org INTEGER,-- null if you don't want opac visibility test
) RETURNS TABLE (
value TEXT, -- plain
field INTEGER,
- bouyant_and_class_match BOOL,
+ buoyant_and_class_match BOOL,
field_match BOOL,
field_weight INTEGER,
rank REAL,
- bouyant BOOL,
+ buoyant BOOL,
match TEXT -- marked up
) AS $func$
DECLARE
+ prepared_query_texts TEXT[];
query TSQUERY;
+ plain_query TSQUERY;
opac_visibility_join TEXT;
search_class_join TEXT;
r_fields RECORD;
BEGIN
- query := TO_TSQUERY('keyword', query_text);
+ prepared_query_texts := metabib.autosuggest_prepare_tsquery(raw_query_text);
+
+ query := TO_TSQUERY('keyword', prepared_query_texts[1]);
+ plain_query := TO_TSQUERY('keyword', prepared_query_texts[2]);
IF visibility_org IS NOT NULL THEN
opac_visibility_join := '
';
END IF;
- RETURN QUERY EXECUTE 'SELECT *, TS_HEADLINE(value, $1, $3) FROM (SELECT DISTINCT
+ RETURN QUERY EXECUTE 'SELECT *, TS_HEADLINE(value, $7, $3) FROM (SELECT DISTINCT
mbe.value,
cmf.id,
- cmc.bouyant AND _registered.field_class IS NOT NULL,
+ cmc.buoyant AND _registered.field_class IS NOT NULL,
_registered.field = cmf.id,
cmf.weight,
TS_RANK_CD(mbe.index_vector, $1, $6),
- cmc.bouyant
+ cmc.buoyant
FROM metabib.browse_entry_def_map mbedm
JOIN metabib.browse_entry mbe ON (mbe.id = mbedm.entry)
JOIN config.metabib_field cmf ON (cmf.id = mbedm.def)
' -- sic, repeat the order by clause in the outer select too
USING
query, search_class, headline_opts,
- visibility_org, query_limit, normalization
+ visibility_org, query_limit, normalization, plain_query
;
-- sort order:
- -- bouyant AND chosen class = match class
+ -- buoyant AND chosen class = match class
-- chosen field = match field
-- field weight
-- rank
- -- bouyancy
+ -- buoyancy
-- value itself
END;
$func$ LANGUAGE PLPGSQL;
--- The advantage of this over the stock regexp_split_to_array() is that it
--- won't degrade unicode strings.
-CREATE OR REPLACE FUNCTION evergreen.regexp_split_to_array(TEXT, TEXT)
-RETURNS TEXT[] AS $$
- return encode_array_literal([split $_[1], $_[0]]);
-$$ LANGUAGE PLPERLU STRICT IMMUTABLE;
-
-
-- Adds some logic for browse_entry to split on non-word chars for index_vector, post-normalize
CREATE OR REPLACE FUNCTION oils_tsearch2 () RETURNS TRIGGER AS $$
DECLARE
value := ARRAY_TO_STRING(
evergreen.regexp_split_to_array(value, E'\\W+'), ' '
);
+ value := public.search_normalize(value);
END IF;
NEW.index_vector = to_tsvector((TG_ARGV[0])::regconfig, value);