BEFORE INSERT OR UPDATE ON config.db_patch_dependencies
FOR EACH ROW EXECUTE PROCEDURE evergreen.array_overlap_check ('deprecates');
-INSERT INTO config.upgrade_log (version, applied_to) VALUES ('0678', :eg_version); -- berick/miker
+INSERT INTO config.upgrade_log (version, applied_to) VALUES ('0679', :eg_version); -- berick/miker
CREATE TABLE config.bib_source (
id SERIAL PRIMARY KEY,
--- /dev/null
+BEGIN;
+
+SELECT evergreen.upgrade_deps_block_check('0679', :eg_version);
+
+-- Address typo in column name
+ALTER TABLE config.metabib_class ADD COLUMN buoyant BOOL DEFAULT FALSE NOT NULL;
+UPDATE config.metabib_class SET buoyant = bouyant;
+ALTER TABLE config.metabib_class DROP COLUMN bouyant;
+
+CREATE OR REPLACE FUNCTION oils_tsearch2 () RETURNS TRIGGER AS $$
+DECLARE
+ normalizer RECORD;
+ value TEXT := '';
+BEGIN
+
+ value := NEW.value;
+
+ IF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN
+ FOR normalizer IN
+ SELECT n.func AS func,
+ n.param_count AS param_count,
+ m.params AS params
+ FROM config.index_normalizer n
+ JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
+ WHERE field = NEW.field AND m.pos < 0
+ ORDER BY m.pos LOOP
+ EXECUTE 'SELECT ' || normalizer.func || '(' ||
+ quote_literal( value ) ||
+ CASE
+ WHEN normalizer.param_count > 0
+ THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
+ ELSE ''
+ END ||
+ ')' INTO value;
+
+ END LOOP;
+
+ NEW.value := value;
+ END IF;
+
+ IF NEW.index_vector = ''::tsvector THEN
+ RETURN NEW;
+ END IF;
+
+ IF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN
+ FOR normalizer IN
+ SELECT n.func AS func,
+ n.param_count AS param_count,
+ m.params AS params
+ FROM config.index_normalizer n
+ JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
+ WHERE field = NEW.field AND m.pos >= 0
+ ORDER BY m.pos LOOP
+ EXECUTE 'SELECT ' || normalizer.func || '(' ||
+ quote_literal( value ) ||
+ CASE
+ WHEN normalizer.param_count > 0
+ THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
+ ELSE ''
+ END ||
+ ')' INTO value;
+
+ END LOOP;
+ END IF;
+
+ IF TG_TABLE_NAME::TEXT ~ 'browse_entry$' THEN
+ value := ARRAY_TO_STRING(
+ evergreen.regexp_split_to_array(value, E'\\W+'), ' '
+ );
+ value := public.search_normalize(value);
+ END IF;
+
+ NEW.index_vector = to_tsvector((TG_ARGV[0])::regconfig, value);
+
+ RETURN NEW;
+END;
+$$ LANGUAGE PLPGSQL;
+
+-- Given a string such as a user might type into a search box, prepare
+-- two changed variants for TO_TSQUERY(). See
+-- http://www.postgresql.org/docs/9.0/static/textsearch-controls.html
+-- The first variant is normalized to match indexed documents regardless
+-- of diacritics. The second variant keeps its diacritics for proper
+-- highlighting via TS_HEADLINE().
+CREATE OR REPLACE
+ FUNCTION metabib.autosuggest_prepare_tsquery(orig TEXT) RETURNS TEXT[] AS
+$$
+DECLARE
+ orig_ended_in_space BOOLEAN;
+ result RECORD;
+ plain TEXT;
+ normalized TEXT;
+BEGIN
+ orig_ended_in_space := orig ~ E'\\s$';
+
+ orig := ARRAY_TO_STRING(
+ evergreen.regexp_split_to_array(orig, E'\\W+'), ' '
+ );
+
+ normalized := public.search_normalize(orig); -- also trim()s
+ plain := trim(orig);
+
+ IF NOT orig_ended_in_space THEN
+ plain := plain || ':*';
+ normalized := normalized || ':*';
+ END IF;
+
+ plain := ARRAY_TO_STRING(
+ evergreen.regexp_split_to_array(plain, E'\\s+'), ' & '
+ );
+ normalized := ARRAY_TO_STRING(
+ evergreen.regexp_split_to_array(normalized, E'\\s+'), ' & '
+ );
+
+ RETURN ARRAY[normalized, plain];
+END;
+$$ LANGUAGE PLPGSQL;
+
+
+-- Definition of OUT parameters changes, so must drop first
+DROP FUNCTION IF EXISTS metabib.suggest_browse_entries (TEXT, TEXT, TEXT, INTEGER, INTEGER, INTEGER);
+
+CREATE OR REPLACE
+ FUNCTION metabib.suggest_browse_entries(
+ raw_query_text TEXT, -- actually typed by humans at the UI level
+ search_class TEXT, -- 'alias' or 'class' or 'class|field..', etc
+ headline_opts TEXT, -- markup options for ts_headline()
+ visibility_org INTEGER,-- null if you don't want opac visibility test
+ query_limit INTEGER,-- use in LIMIT clause of interal query
+ normalization INTEGER -- argument to TS_RANK_CD()
+ ) RETURNS TABLE (
+ value TEXT, -- plain
+ field INTEGER,
+ buoyant_and_class_match BOOL,
+ field_match BOOL,
+ field_weight INTEGER,
+ rank REAL,
+ buoyant BOOL,
+ match TEXT -- marked up
+ ) AS $func$
+DECLARE
+ prepared_query_texts TEXT[];
+ query TSQUERY;
+ plain_query TSQUERY;
+ opac_visibility_join TEXT;
+ search_class_join TEXT;
+ r_fields RECORD;
+BEGIN
+ prepared_query_texts := metabib.autosuggest_prepare_tsquery(raw_query_text);
+
+ query := TO_TSQUERY('keyword', prepared_query_texts[1]);
+ plain_query := TO_TSQUERY('keyword', prepared_query_texts[2]);
+
+ IF visibility_org IS NOT NULL THEN
+ opac_visibility_join := '
+ JOIN asset.opac_visible_copies aovc ON (
+ aovc.record = mbedm.source AND
+ aovc.circ_lib IN (SELECT id FROM actor.org_unit_descendants($4))
+ )';
+ ELSE
+ opac_visibility_join := '';
+ END IF;
+
+ -- The following determines whether we only provide suggestsons matching
+ -- the user's selected search_class, or whether we show other suggestions
+ -- too. The reason for MIN() is that for search_classes like
+ -- 'title|proper|uniform' you would otherwise get multiple rows. The
+ -- implication is that if title as a class doesn't have restrict,
+ -- nor does the proper field, but the uniform field does, you're going
+ -- to get 'false' for your overall evaluation of 'should we restrict?'
+ -- To invert that, change from MIN() to MAX().
+
+ SELECT
+ INTO r_fields
+ MIN(cmc.restrict::INT) AS restrict_class,
+ MIN(cmf.restrict::INT) AS restrict_field
+ FROM metabib.search_class_to_registered_components(search_class)
+ AS _registered (field_class TEXT, field INT)
+ JOIN
+ config.metabib_class cmc ON (cmc.name = _registered.field_class)
+ LEFT JOIN
+ config.metabib_field cmf ON (cmf.id = _registered.field);
+
+ -- evaluate 'should we restrict?'
+ IF r_fields.restrict_field::BOOL OR r_fields.restrict_class::BOOL THEN
+ search_class_join := '
+ JOIN
+ metabib.search_class_to_registered_components($2)
+ AS _registered (field_class TEXT, field INT) ON (
+ (_registered.field IS NULL AND
+ _registered.field_class = cmf.field_class) OR
+ (_registered.field = cmf.id)
+ )
+ ';
+ ELSE
+ search_class_join := '
+ LEFT JOIN
+ metabib.search_class_to_registered_components($2)
+ AS _registered (field_class TEXT, field INT) ON (
+ _registered.field_class = cmc.name
+ )
+ ';
+ END IF;
+
+ RETURN QUERY EXECUTE 'SELECT *, TS_HEADLINE(value, $7, $3) FROM (SELECT DISTINCT
+ mbe.value,
+ cmf.id,
+ cmc.buoyant AND _registered.field_class IS NOT NULL,
+ _registered.field = cmf.id,
+ cmf.weight,
+ TS_RANK_CD(mbe.index_vector, $1, $6),
+ cmc.buoyant
+ FROM metabib.browse_entry_def_map mbedm
+ JOIN metabib.browse_entry mbe ON (mbe.id = mbedm.entry)
+ JOIN config.metabib_field cmf ON (cmf.id = mbedm.def)
+ JOIN config.metabib_class cmc ON (cmf.field_class = cmc.name)
+ ' || search_class_join || opac_visibility_join ||
+ ' WHERE $1 @@ mbe.index_vector
+ ORDER BY 3 DESC, 4 DESC NULLS LAST, 5 DESC, 6 DESC, 7 DESC, 1 ASC
+ LIMIT $5) x
+ ORDER BY 3 DESC, 4 DESC NULLS LAST, 5 DESC, 6 DESC, 7 DESC, 1 ASC
+ ' -- sic, repeat the order by clause in the outer select too
+ USING
+ query, search_class, headline_opts,
+ visibility_org, query_limit, normalization, plain_query
+ ;
+
+ -- sort order:
+ -- buoyant AND chosen class = match class
+ -- chosen field = match field
+ -- field weight
+ -- rank
+ -- buoyancy
+ -- value itself
+
+END;
+$func$ LANGUAGE PLPGSQL;
+
+
+\qecho
+\qecho The following takes about a minute per 100,000 rows in
+\qecho metabib.browse_entry on my development system, which is only a VM with
+\qecho 4 GB of memory and 2 cores.
+\qecho
+\qecho The following is a very loose estimate of how long the next UPDATE
+\qecho statement would take to finish on MY machine, based on YOUR number
+\qecho of rows in metabib.browse_entry:
+\qecho
+
+SELECT (COUNT(id) / 100000.0) * INTERVAL '1 minute'
+ AS "approximate duration of following UPDATE statement"
+ FROM metabib.browse_entry;
+
+UPDATE metabib.browse_entry SET index_vector = TO_TSVECTOR(
+ 'keyword',
+ public.search_normalize(
+ ARRAY_TO_STRING(
+ evergreen.regexp_split_to_array(value, E'\\W+'), ' '
+ )
+ )
+);
+
+COMMIT;
+++ /dev/null
-BEGIN;
-
-SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version);
-
-CREATE OR REPLACE FUNCTION oils_tsearch2 () RETURNS TRIGGER AS $$
-DECLARE
- normalizer RECORD;
- value TEXT := '';
-BEGIN
-
- value := NEW.value;
-
- IF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN
- FOR normalizer IN
- SELECT n.func AS func,
- n.param_count AS param_count,
- m.params AS params
- FROM config.index_normalizer n
- JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
- WHERE field = NEW.field AND m.pos < 0
- ORDER BY m.pos LOOP
- EXECUTE 'SELECT ' || normalizer.func || '(' ||
- quote_literal( value ) ||
- CASE
- WHEN normalizer.param_count > 0
- THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
- ELSE ''
- END ||
- ')' INTO value;
-
- END LOOP;
-
- NEW.value := value;
- END IF;
-
- IF NEW.index_vector = ''::tsvector THEN
- RETURN NEW;
- END IF;
-
- IF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN
- FOR normalizer IN
- SELECT n.func AS func,
- n.param_count AS param_count,
- m.params AS params
- FROM config.index_normalizer n
- JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
- WHERE field = NEW.field AND m.pos >= 0
- ORDER BY m.pos LOOP
- EXECUTE 'SELECT ' || normalizer.func || '(' ||
- quote_literal( value ) ||
- CASE
- WHEN normalizer.param_count > 0
- THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
- ELSE ''
- END ||
- ')' INTO value;
-
- END LOOP;
- END IF;
-
- IF TG_TABLE_NAME::TEXT ~ 'browse_entry$' THEN
- value := ARRAY_TO_STRING(
- evergreen.regexp_split_to_array(value, E'\\W+'), ' '
- );
- value := public.search_normalize(value);
- END IF;
-
- NEW.index_vector = to_tsvector((TG_ARGV[0])::regconfig, value);
-
- RETURN NEW;
-END;
-$$ LANGUAGE PLPGSQL;
-
--- Given a string such as a user might type into a search box, prepare
--- two changed variants for TO_TSQUERY(). See
--- http://www.postgresql.org/docs/9.0/static/textsearch-controls.html
--- The first variant is normalized to match indexed documents regardless
--- of diacritics. The second variant keeps its diacritics for proper
--- highlighting via TS_HEADLINE().
-CREATE OR REPLACE
- FUNCTION metabib.autosuggest_prepare_tsquery(orig TEXT) RETURNS TEXT[] AS
-$$
-DECLARE
- orig_ended_in_space BOOLEAN;
- result RECORD;
- plain TEXT;
- normalized TEXT;
-BEGIN
- orig_ended_in_space := orig ~ E'\\s$';
-
- orig := ARRAY_TO_STRING(
- evergreen.regexp_split_to_array(orig, E'\\W+'), ' '
- );
-
- normalized := public.search_normalize(orig); -- also trim()s
- plain := trim(orig);
-
- IF NOT orig_ended_in_space THEN
- plain := plain || ':*';
- normalized := normalized || ':*';
- END IF;
-
- plain := ARRAY_TO_STRING(
- evergreen.regexp_split_to_array(plain, E'\\s+'), ' & '
- );
- normalized := ARRAY_TO_STRING(
- evergreen.regexp_split_to_array(normalized, E'\\s+'), ' & '
- );
-
- RETURN ARRAY[normalized, plain];
-END;
-$$ LANGUAGE PLPGSQL;
-
-
--- Definition of OUT parameters changes, so must drop first
-DROP FUNCTION IF EXISTS metabib.suggest_browse_entries (TEXT, TEXT, TEXT, INTEGER, INTEGER, INTEGER);
-
-CREATE OR REPLACE
- FUNCTION metabib.suggest_browse_entries(
- raw_query_text TEXT, -- actually typed by humans at the UI level
- search_class TEXT, -- 'alias' or 'class' or 'class|field..', etc
- headline_opts TEXT, -- markup options for ts_headline()
- visibility_org INTEGER,-- null if you don't want opac visibility test
- query_limit INTEGER,-- use in LIMIT clause of interal query
- normalization INTEGER -- argument to TS_RANK_CD()
- ) RETURNS TABLE (
- value TEXT, -- plain
- field INTEGER,
- buoyant_and_class_match BOOL,
- field_match BOOL,
- field_weight INTEGER,
- rank REAL,
- buoyant BOOL,
- match TEXT -- marked up
- ) AS $func$
-DECLARE
- prepared_query_texts TEXT[];
- query TSQUERY;
- plain_query TSQUERY;
- opac_visibility_join TEXT;
- search_class_join TEXT;
- r_fields RECORD;
-BEGIN
- prepared_query_texts := metabib.autosuggest_prepare_tsquery(raw_query_text);
-
- query := TO_TSQUERY('keyword', prepared_query_texts[1]);
- plain_query := TO_TSQUERY('keyword', prepared_query_texts[2]);
-
- IF visibility_org IS NOT NULL THEN
- opac_visibility_join := '
- JOIN asset.opac_visible_copies aovc ON (
- aovc.record = mbedm.source AND
- aovc.circ_lib IN (SELECT id FROM actor.org_unit_descendants($4))
- )';
- ELSE
- opac_visibility_join := '';
- END IF;
-
- -- The following determines whether we only provide suggestsons matching
- -- the user's selected search_class, or whether we show other suggestions
- -- too. The reason for MIN() is that for search_classes like
- -- 'title|proper|uniform' you would otherwise get multiple rows. The
- -- implication is that if title as a class doesn't have restrict,
- -- nor does the proper field, but the uniform field does, you're going
- -- to get 'false' for your overall evaluation of 'should we restrict?'
- -- To invert that, change from MIN() to MAX().
-
- SELECT
- INTO r_fields
- MIN(cmc.restrict::INT) AS restrict_class,
- MIN(cmf.restrict::INT) AS restrict_field
- FROM metabib.search_class_to_registered_components(search_class)
- AS _registered (field_class TEXT, field INT)
- JOIN
- config.metabib_class cmc ON (cmc.name = _registered.field_class)
- LEFT JOIN
- config.metabib_field cmf ON (cmf.id = _registered.field);
-
- -- evaluate 'should we restrict?'
- IF r_fields.restrict_field::BOOL OR r_fields.restrict_class::BOOL THEN
- search_class_join := '
- JOIN
- metabib.search_class_to_registered_components($2)
- AS _registered (field_class TEXT, field INT) ON (
- (_registered.field IS NULL AND
- _registered.field_class = cmf.field_class) OR
- (_registered.field = cmf.id)
- )
- ';
- ELSE
- search_class_join := '
- LEFT JOIN
- metabib.search_class_to_registered_components($2)
- AS _registered (field_class TEXT, field INT) ON (
- _registered.field_class = cmc.name
- )
- ';
- END IF;
-
- RETURN QUERY EXECUTE 'SELECT *, TS_HEADLINE(value, $7, $3) FROM (SELECT DISTINCT
- mbe.value,
- cmf.id,
- cmc.buoyant AND _registered.field_class IS NOT NULL,
- _registered.field = cmf.id,
- cmf.weight,
- TS_RANK_CD(mbe.index_vector, $1, $6),
- cmc.buoyant
- FROM metabib.browse_entry_def_map mbedm
- JOIN metabib.browse_entry mbe ON (mbe.id = mbedm.entry)
- JOIN config.metabib_field cmf ON (cmf.id = mbedm.def)
- JOIN config.metabib_class cmc ON (cmf.field_class = cmc.name)
- ' || search_class_join || opac_visibility_join ||
- ' WHERE $1 @@ mbe.index_vector
- ORDER BY 3 DESC, 4 DESC NULLS LAST, 5 DESC, 6 DESC, 7 DESC, 1 ASC
- LIMIT $5) x
- ORDER BY 3 DESC, 4 DESC NULLS LAST, 5 DESC, 6 DESC, 7 DESC, 1 ASC
- ' -- sic, repeat the order by clause in the outer select too
- USING
- query, search_class, headline_opts,
- visibility_org, query_limit, normalization, plain_query
- ;
-
- -- sort order:
- -- buoyant AND chosen class = match class
- -- chosen field = match field
- -- field weight
- -- rank
- -- buoyancy
- -- value itself
-
-END;
-$func$ LANGUAGE PLPGSQL;
-
-
-\qecho
-\qecho The following takes about a minute per 100,000 rows in
-\qecho metabib.browse_entry on my development system, which is only a VM with
-\qecho 4 GB of memory and 2 cores.
-\qecho
-\qecho The following is a very loose estimate of how long the next UPDATE
-\qecho statement would take to finish on MY machine, based on YOUR number
-\qecho of rows in metabib.browse_entry:
-\qecho
-
-SELECT (COUNT(id) / 100000.0) * INTERVAL '1 minute'
- AS "approximate duration of following UPDATE statement"
- FROM metabib.browse_entry;
-
-UPDATE metabib.browse_entry SET index_vector = TO_TSVECTOR(
- 'keyword',
- public.search_normalize(
- ARRAY_TO_STRING(
- evergreen.regexp_split_to_array(value, E'\\W+'), ' '
- )
- )
-);
-
-COMMIT;