From: Mike Rylander Date: Mon, 20 Feb 2012 19:24:49 +0000 (-0500) Subject: Stamping autosuggest upgrade script X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=86e831e1ad85f6f5ed89ebf6bda4fe379808cd41;p=contrib%2FConifer.git Stamping autosuggest upgrade script Signed-off-by: Mike Rylander --- diff --git a/Open-ILS/src/sql/Pg/002.schema.config.sql b/Open-ILS/src/sql/Pg/002.schema.config.sql index ce262201fd..9e56f3d04d 100644 --- a/Open-ILS/src/sql/Pg/002.schema.config.sql +++ b/Open-ILS/src/sql/Pg/002.schema.config.sql @@ -86,7 +86,7 @@ CREATE TRIGGER no_overlapping_deps BEFORE INSERT OR UPDATE ON config.db_patch_dependencies FOR EACH ROW EXECUTE PROCEDURE evergreen.array_overlap_check ('deprecates'); -INSERT INTO config.upgrade_log (version, applied_to) VALUES ('0675', :eg_version); -- gmc/berick +INSERT INTO config.upgrade_log (version, applied_to) VALUES ('0676', :eg_version); -- senator/miker CREATE TABLE config.bib_source ( id SERIAL PRIMARY KEY, diff --git a/Open-ILS/src/sql/Pg/upgrade/0676.schema.bib_autosuggest.sql b/Open-ILS/src/sql/Pg/upgrade/0676.schema.bib_autosuggest.sql new file mode 100644 index 0000000000..3a1caf1f21 --- /dev/null +++ b/Open-ILS/src/sql/Pg/upgrade/0676.schema.bib_autosuggest.sql @@ -0,0 +1,711 @@ +BEGIN; + +SELECT evergreen.upgrade_deps_block_check('0676', :eg_version); + +INSERT INTO config.global_flag (name, label, enabled, value) VALUES ( + 'opac.use_autosuggest', + 'OPAC: Show auto-completing suggestions dialog under basic search box (put ''opac_visible'' into the value field to limit suggestions to OPAC-visible items, or blank the field for a possible performance improvement)', + TRUE, + 'opac_visible' +); + +CREATE TABLE metabib.browse_entry ( + id BIGSERIAL PRIMARY KEY, + value TEXT unique, + index_vector tsvector +); +CREATE INDEX metabib_browse_entry_index_vector_idx ON metabib.browse_entry USING GIST (index_vector); +CREATE TRIGGER metabib_browse_entry_fti_trigger + BEFORE INSERT OR UPDATE ON metabib.browse_entry + FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('keyword'); + + +CREATE TABLE metabib.browse_entry_def_map ( + id BIGSERIAL PRIMARY KEY, + entry BIGINT REFERENCES metabib.browse_entry (id), + def INT REFERENCES config.metabib_field (id), + source BIGINT REFERENCES biblio.record_entry (id) +); + +ALTER TABLE config.metabib_field ADD COLUMN browse_field BOOLEAN DEFAULT TRUE NOT NULL; +ALTER TABLE config.metabib_field ADD COLUMN browse_xpath TEXT; + +ALTER TABLE config.metabib_class ADD COLUMN bouyant BOOLEAN DEFAULT FALSE NOT NULL; +ALTER TABLE config.metabib_class ADD COLUMN restrict BOOLEAN DEFAULT FALSE NOT NULL; +ALTER TABLE config.metabib_field ADD COLUMN restrict BOOLEAN DEFAULT FALSE NOT NULL; + +-- one good exception to default true: +UPDATE config.metabib_field + SET browse_field = FALSE + WHERE (field_class = 'keyword' AND name = 'keyword') OR + (field_class = 'subject' AND name = 'complete'); + +-- AFTER UPDATE OR INSERT trigger for biblio.record_entry +-- We're only touching it here to add a DELETE statement to the IF NEW.deleted +-- block. + +CREATE OR REPLACE FUNCTION biblio.indexing_ingest_or_delete () RETURNS TRIGGER AS $func$ +DECLARE + transformed_xml TEXT; + prev_xfrm TEXT; + normalizer RECORD; + xfrm config.xml_transform%ROWTYPE; + attr_value TEXT; + new_attrs HSTORE := ''::HSTORE; + attr_def config.record_attr_definition%ROWTYPE; +BEGIN + + IF NEW.deleted IS TRUE THEN -- If this bib is deleted + DELETE FROM metabib.metarecord_source_map WHERE source = NEW.id; -- Rid ourselves of the search-estimate-killing linkage + DELETE FROM metabib.record_attr WHERE id = NEW.id; -- Kill the attrs hash, useless on deleted records + DELETE FROM authority.bib_linking WHERE bib = NEW.id; -- Avoid updating fields in bibs that are no longer visible + DELETE FROM biblio.peer_bib_copy_map WHERE peer_record = NEW.id; -- Separate any multi-homed items + DELETE FROM metabib.browse_entry_def_map WHERE source = NEW.id; -- Don't auto-suggest deleted bibs + RETURN NEW; -- and we're done + END IF; + + IF TG_OP = 'UPDATE' THEN -- re-ingest? + PERFORM * FROM config.internal_flag WHERE name = 'ingest.reingest.force_on_same_marc' AND enabled; + + IF NOT FOUND AND OLD.marc = NEW.marc THEN -- don't do anything if the MARC didn't change + RETURN NEW; + END IF; + END IF; + + -- Record authority linking + PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_authority_linking' AND enabled; + IF NOT FOUND THEN + PERFORM biblio.map_authority_linking( NEW.id, NEW.marc ); + END IF; + + -- Flatten and insert the mfr data + PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_metabib_full_rec' AND enabled; + IF NOT FOUND THEN + PERFORM metabib.reingest_metabib_full_rec(NEW.id); + + -- Now we pull out attribute data, which is dependent on the mfr for all but XPath-based fields + PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_metabib_rec_descriptor' AND enabled; + IF NOT FOUND THEN + FOR attr_def IN SELECT * FROM config.record_attr_definition ORDER BY format LOOP + + IF attr_def.tag IS NOT NULL THEN -- tag (and optional subfield list) selection + SELECT ARRAY_TO_STRING(ARRAY_ACCUM(value), COALESCE(attr_def.joiner,' ')) INTO attr_value + FROM (SELECT * FROM metabib.full_rec ORDER BY tag, subfield) AS x + WHERE record = NEW.id + AND tag LIKE attr_def.tag + AND CASE + WHEN attr_def.sf_list IS NOT NULL + THEN POSITION(subfield IN attr_def.sf_list) > 0 + ELSE TRUE + END + GROUP BY tag + ORDER BY tag + LIMIT 1; + + ELSIF attr_def.fixed_field IS NOT NULL THEN -- a named fixed field, see config.marc21_ff_pos_map.fixed_field + attr_value := biblio.marc21_extract_fixed_field(NEW.id, attr_def.fixed_field); + + ELSIF attr_def.xpath IS NOT NULL THEN -- and xpath expression + + SELECT INTO xfrm * FROM config.xml_transform WHERE name = attr_def.format; + + -- See if we can skip the XSLT ... it's expensive + IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN + -- Can't skip the transform + IF xfrm.xslt <> '---' THEN + transformed_xml := oils_xslt_process(NEW.marc,xfrm.xslt); + ELSE + transformed_xml := NEW.marc; + END IF; + + prev_xfrm := xfrm.name; + END IF; + + IF xfrm.name IS NULL THEN + -- just grab the marcxml (empty) transform + SELECT INTO xfrm * FROM config.xml_transform WHERE xslt = '---' LIMIT 1; + prev_xfrm := xfrm.name; + END IF; + + attr_value := oils_xpath_string(attr_def.xpath, transformed_xml, COALESCE(attr_def.joiner,' '), ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]); + + ELSIF attr_def.phys_char_sf IS NOT NULL THEN -- a named Physical Characteristic, see config.marc21_physical_characteristic_*_map + SELECT m.value INTO attr_value + FROM biblio.marc21_physical_characteristics(NEW.id) v + JOIN config.marc21_physical_characteristic_value_map m ON (m.id = v.value) + WHERE v.subfield = attr_def.phys_char_sf + LIMIT 1; -- Just in case ... + + END IF; + + -- apply index normalizers to attr_value + FOR normalizer IN + SELECT n.func AS func, + n.param_count AS param_count, + m.params AS params + FROM config.index_normalizer n + JOIN config.record_attr_index_norm_map m ON (m.norm = n.id) + WHERE attr = attr_def.name + ORDER BY m.pos LOOP + EXECUTE 'SELECT ' || normalizer.func || '(' || + COALESCE( quote_literal( attr_value ), 'NULL' ) || + CASE + WHEN normalizer.param_count > 0 + THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'') + ELSE '' + END || + ')' INTO attr_value; + + END LOOP; + + -- Add the new value to the hstore + new_attrs := new_attrs || hstore( attr_def.name, attr_value ); + + END LOOP; + + IF TG_OP = 'INSERT' OR OLD.deleted THEN -- initial insert OR revivication + INSERT INTO metabib.record_attr (id, attrs) VALUES (NEW.id, new_attrs); + ELSE + UPDATE metabib.record_attr SET attrs = new_attrs WHERE id = NEW.id; + END IF; + + END IF; + END IF; + + -- Gather and insert the field entry data + PERFORM metabib.reingest_metabib_field_entries(NEW.id); + + -- Located URI magic + IF TG_OP = 'INSERT' THEN + PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_located_uri' AND enabled; + IF NOT FOUND THEN + PERFORM biblio.extract_located_uris( NEW.id, NEW.marc, NEW.editor ); + END IF; + ELSE + PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_located_uri' AND enabled; + IF NOT FOUND THEN + PERFORM biblio.extract_located_uris( NEW.id, NEW.marc, NEW.editor ); + END IF; + END IF; + + -- (re)map metarecord-bib linking + IF TG_OP = 'INSERT' THEN -- if not deleted and performing an insert, check for the flag + PERFORM * FROM config.internal_flag WHERE name = 'ingest.metarecord_mapping.skip_on_insert' AND enabled; + IF NOT FOUND THEN + PERFORM metabib.remap_metarecord_for_bib( NEW.id, NEW.fingerprint ); + END IF; + ELSE -- we're doing an update, and we're not deleted, remap + PERFORM * FROM config.internal_flag WHERE name = 'ingest.metarecord_mapping.skip_on_update' AND enabled; + IF NOT FOUND THEN + PERFORM metabib.remap_metarecord_for_bib( NEW.id, NEW.fingerprint ); + END IF; + END IF; + + RETURN NEW; +END; +$func$ LANGUAGE PLPGSQL; + +CREATE OR REPLACE FUNCTION metabib.browse_normalize(facet_text TEXT, mapped_field INT) RETURNS TEXT AS $$ +DECLARE + normalizer RECORD; +BEGIN + + FOR normalizer IN + SELECT n.func AS func, + n.param_count AS param_count, + m.params AS params + FROM config.index_normalizer n + JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id) + WHERE m.field = mapped_field AND m.pos < 0 + ORDER BY m.pos LOOP + + EXECUTE 'SELECT ' || normalizer.func || '(' || + quote_literal( facet_text ) || + CASE + WHEN normalizer.param_count > 0 + THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'') + ELSE '' + END || + ')' INTO facet_text; + + END LOOP; + + RETURN facet_text; +END; + +$$ LANGUAGE PLPGSQL; + +DROP FUNCTION biblio.extract_metabib_field_entry(bigint, text); +DROP FUNCTION biblio.extract_metabib_field_entry(bigint); + +DROP TYPE metabib.field_entry_template; +CREATE TYPE metabib.field_entry_template AS ( + field_class TEXT, + field INT, + facet_field BOOL, + search_field BOOL, + browse_field BOOL, + source BIGINT, + value TEXT +); + + +CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( rid BIGINT, default_joiner TEXT ) RETURNS SETOF metabib.field_entry_template AS $func$ +DECLARE + bib biblio.record_entry%ROWTYPE; + idx config.metabib_field%ROWTYPE; + xfrm config.xml_transform%ROWTYPE; + prev_xfrm TEXT; + transformed_xml TEXT; + xml_node TEXT; + xml_node_list TEXT[]; + facet_text TEXT; + browse_text TEXT; + raw_text TEXT; + curr_text TEXT; + joiner TEXT := default_joiner; -- XXX will index defs supply a joiner? + output_row metabib.field_entry_template%ROWTYPE; +BEGIN + + -- Get the record + SELECT INTO bib * FROM biblio.record_entry WHERE id = rid; + + -- Loop over the indexing entries + FOR idx IN SELECT * FROM config.metabib_field ORDER BY format LOOP + + SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format; + + -- See if we can skip the XSLT ... it's expensive + IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN + -- Can't skip the transform + IF xfrm.xslt <> '---' THEN + transformed_xml := oils_xslt_process(bib.marc,xfrm.xslt); + ELSE + transformed_xml := bib.marc; + END IF; + + prev_xfrm := xfrm.name; + END IF; + + xml_node_list := oils_xpath( idx.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] ); + + raw_text := NULL; + FOR xml_node IN SELECT x FROM unnest(xml_node_list) AS x LOOP + CONTINUE WHEN xml_node !~ E'^\\s*<'; + + curr_text := ARRAY_TO_STRING( + oils_xpath( '//text()', + REGEXP_REPLACE( -- This escapes all &s not followed by "amp;". Data ise returned from oils_xpath (above) in UTF-8, not entity encoded + REGEXP_REPLACE( -- This escapes embeded [^<]+)(<)([^>]+<)$re$, + E'\\1<\\3', + 'g' + ), + '&(?!amp;)', + '&', + 'g' + ) + ), + ' ' + ); + + CONTINUE WHEN curr_text IS NULL OR curr_text = ''; + + IF raw_text IS NOT NULL THEN + raw_text := raw_text || joiner; + END IF; + + raw_text := COALESCE(raw_text,'') || curr_text; + + -- autosuggest/metabib.browse_entry + IF idx.browse_field THEN + + IF idx.browse_xpath IS NOT NULL AND idx.browse_xpath <> '' THEN + browse_text := oils_xpath_string( idx.browse_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] ); + ELSE + browse_text := curr_text; + END IF; + + output_row.field_class = idx.field_class; + output_row.field = idx.id; + output_row.source = rid; + output_row.value = BTRIM(REGEXP_REPLACE(browse_text, E'\\s+', ' ', 'g')); + + output_row.browse_field = TRUE; + RETURN NEXT output_row; + output_row.browse_field = FALSE; + END IF; + + -- insert raw node text for faceting + IF idx.facet_field THEN + + IF idx.facet_xpath IS NOT NULL AND idx.facet_xpath <> '' THEN + facet_text := oils_xpath_string( idx.facet_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] ); + ELSE + facet_text := curr_text; + END IF; + + output_row.field_class = idx.field_class; + output_row.field = -1 * idx.id; + output_row.source = rid; + output_row.value = BTRIM(REGEXP_REPLACE(facet_text, E'\\s+', ' ', 'g')); + + output_row.facet_field = TRUE; + RETURN NEXT output_row; + output_row.facet_field = FALSE; + END IF; + + END LOOP; + + CONTINUE WHEN raw_text IS NULL OR raw_text = ''; + + -- insert combined node text for searching + IF idx.search_field THEN + output_row.field_class = idx.field_class; + output_row.field = idx.id; + output_row.source = rid; + output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g')); + + output_row.search_field = TRUE; + RETURN NEXT output_row; + END IF; + + END LOOP; + +END; +$func$ LANGUAGE PLPGSQL; + +-- default to a space joiner +CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( BIGINT ) RETURNS SETOF metabib.field_entry_template AS $func$ + SELECT * FROM biblio.extract_metabib_field_entry($1, ' '); + $func$ LANGUAGE SQL; + + +CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries( bib_id BIGINT ) RETURNS VOID AS $func$ +DECLARE + fclass RECORD; + ind_data metabib.field_entry_template%ROWTYPE; + mbe_row metabib.browse_entry%ROWTYPE; + mbe_id BIGINT; +BEGIN + PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled; + IF NOT FOUND THEN + FOR fclass IN SELECT * FROM config.metabib_class LOOP + -- RAISE NOTICE 'Emptying out %', fclass.name; + EXECUTE $$DELETE FROM metabib.$$ || fclass.name || $$_field_entry WHERE source = $$ || bib_id; + END LOOP; + DELETE FROM metabib.facet_entry WHERE source = bib_id; + DELETE FROM metabib.browse_entry_def_map WHERE source = bib_id; + END IF; + + FOR ind_data IN SELECT * FROM biblio.extract_metabib_field_entry( bib_id ) LOOP + IF ind_data.field < 0 THEN + ind_data.field = -1 * ind_data.field; + END IF; + + IF ind_data.facet_field THEN + INSERT INTO metabib.facet_entry (field, source, value) + VALUES (ind_data.field, ind_data.source, ind_data.value); + END IF; + + IF ind_data.browse_field THEN + SELECT INTO mbe_row * FROM metabib.browse_entry WHERE value = ind_data.value; + IF FOUND THEN + mbe_id := mbe_row.id; + ELSE + INSERT INTO metabib.browse_entry (value) VALUES + (metabib.browse_normalize(ind_data.value, ind_data.field)); + mbe_id := CURRVAL('metabib.browse_entry_id_seq'::REGCLASS); + END IF; + + INSERT INTO metabib.browse_entry_def_map (entry, def, source) + VALUES (mbe_id, ind_data.field, ind_data.source); + END IF; + + IF ind_data.search_field THEN + EXECUTE $$ + INSERT INTO metabib.$$ || ind_data.field_class || $$_field_entry (field, source, value) + VALUES ($$ || + quote_literal(ind_data.field) || $$, $$ || + quote_literal(ind_data.source) || $$, $$ || + quote_literal(ind_data.value) || + $$);$$; + END IF; + + END LOOP; + + RETURN; +END; +$func$ LANGUAGE PLPGSQL; + +-- This mimics a specific part of QueryParser, turning the first part of a +-- classed search (search_class) into a set of classes and possibly fields. +-- search_class might look like "author" or "title|proper" or "ti|uniform" +-- or "au" or "au|corporate|personal" or anything like that, where the first +-- element of the list you get by separating on the "|" character is either +-- a registered class (config.metabib_class) or an alias +-- (config.metabib_search_alias), and the rest of any such elements are +-- fields (config.metabib_field). +CREATE OR REPLACE + FUNCTION metabib.search_class_to_registered_components(search_class TEXT) + RETURNS SETOF RECORD AS $func$ +DECLARE + search_parts TEXT[]; + field_name TEXT; + search_part_count INTEGER; + rec RECORD; + registered_class config.metabib_class%ROWTYPE; + registered_alias config.metabib_search_alias%ROWTYPE; + registered_field config.metabib_field%ROWTYPE; +BEGIN + search_parts := REGEXP_SPLIT_TO_ARRAY(search_class, E'\\|'); + + search_part_count := ARRAY_LENGTH(search_parts, 1); + IF search_part_count = 0 THEN + RETURN; + ELSE + SELECT INTO registered_class + * FROM config.metabib_class WHERE name = search_parts[1]; + IF FOUND THEN + IF search_part_count < 2 THEN -- all fields + rec := (registered_class.name, NULL::INTEGER); + RETURN NEXT rec; + RETURN; -- done + END IF; + FOR field_name IN SELECT * + FROM UNNEST(search_parts[2:search_part_count]) LOOP + SELECT INTO registered_field + * FROM config.metabib_field + WHERE name = field_name AND + field_class = registered_class.name; + IF FOUND THEN + rec := (registered_class.name, registered_field.id); + RETURN NEXT rec; + END IF; + END LOOP; + ELSE + -- maybe we have an alias? + SELECT INTO registered_alias + * FROM config.metabib_search_alias WHERE alias=search_parts[1]; + IF NOT FOUND THEN + RETURN; + ELSE + IF search_part_count < 2 THEN -- return w/e the alias says + rec := ( + registered_alias.field_class, registered_alias.field + ); + RETURN NEXT rec; + RETURN; -- done + ELSE + FOR field_name IN SELECT * + FROM UNNEST(search_parts[2:search_part_count]) LOOP + SELECT INTO registered_field + * FROM config.metabib_field + WHERE name = field_name AND + field_class = registered_alias.field_class; + IF FOUND THEN + rec := ( + registered_alias.field_class, + registered_field.id + ); + RETURN NEXT rec; + END IF; + END LOOP; + END IF; + END IF; + END IF; + END IF; +END; +$func$ LANGUAGE PLPGSQL; + + +CREATE OR REPLACE + FUNCTION metabib.suggest_browse_entries( + query_text TEXT, -- 'foo' or 'foo & ba:*',ready for to_tsquery() + search_class TEXT, -- 'alias' or 'class' or 'class|field..', etc + headline_opts TEXT, -- markup options for ts_headline() + visibility_org INTEGER,-- null if you don't want opac visibility test + query_limit INTEGER,-- use in LIMIT clause of interal query + normalization INTEGER -- argument to TS_RANK_CD() + ) RETURNS TABLE ( + value TEXT, -- plain + field INTEGER, + bouyant_and_class_match BOOL, + field_match BOOL, + field_weight INTEGER, + rank REAL, + bouyant BOOL, + match TEXT -- marked up + ) AS $func$ +DECLARE + query TSQUERY; + opac_visibility_join TEXT; + search_class_join TEXT; + r_fields RECORD; +BEGIN + query := TO_TSQUERY('keyword', query_text); + + IF visibility_org IS NOT NULL THEN + opac_visibility_join := ' + JOIN asset.opac_visible_copies aovc ON ( + aovc.record = mbedm.source AND + aovc.circ_lib IN (SELECT id FROM actor.org_unit_descendants($4)) + )'; + ELSE + opac_visibility_join := ''; + END IF; + + -- The following determines whether we only provide suggestsons matching + -- the user's selected search_class, or whether we show other suggestions + -- too. The reason for MIN() is that for search_classes like + -- 'title|proper|uniform' you would otherwise get multiple rows. The + -- implication is that if title as a class doesn't have restrict, + -- nor does the proper field, but the uniform field does, you're going + -- to get 'false' for your overall evaluation of 'should we restrict?' + -- To invert that, change from MIN() to MAX(). + + SELECT + INTO r_fields + MIN(cmc.restrict::INT) AS restrict_class, + MIN(cmf.restrict::INT) AS restrict_field + FROM metabib.search_class_to_registered_components(search_class) + AS _registered (field_class TEXT, field INT) + JOIN + config.metabib_class cmc ON (cmc.name = _registered.field_class) + LEFT JOIN + config.metabib_field cmf ON (cmf.id = _registered.field); + + -- evaluate 'should we restrict?' + IF r_fields.restrict_field::BOOL OR r_fields.restrict_class::BOOL THEN + search_class_join := ' + JOIN + metabib.search_class_to_registered_components($2) + AS _registered (field_class TEXT, field INT) ON ( + (_registered.field IS NULL AND + _registered.field_class = cmf.field_class) OR + (_registered.field = cmf.id) + ) + '; + ELSE + search_class_join := ' + LEFT JOIN + metabib.search_class_to_registered_components($2) + AS _registered (field_class TEXT, field INT) ON ( + _registered.field_class = cmc.name + ) + '; + END IF; + + RETURN QUERY EXECUTE 'SELECT *, TS_HEADLINE(value, $1, $3) FROM (SELECT DISTINCT + mbe.value, + cmf.id, + cmc.bouyant AND _registered.field_class IS NOT NULL, + _registered.field = cmf.id, + cmf.weight, + TS_RANK_CD(mbe.index_vector, $1, $6), + cmc.bouyant + FROM metabib.browse_entry_def_map mbedm + JOIN metabib.browse_entry mbe ON (mbe.id = mbedm.entry) + JOIN config.metabib_field cmf ON (cmf.id = mbedm.def) + JOIN config.metabib_class cmc ON (cmf.field_class = cmc.name) + ' || search_class_join || opac_visibility_join || + ' WHERE $1 @@ mbe.index_vector + ORDER BY 3 DESC, 4 DESC NULLS LAST, 5 DESC, 6 DESC, 7 DESC, 1 ASC + LIMIT $5) x + ORDER BY 3 DESC, 4 DESC NULLS LAST, 5 DESC, 6 DESC, 7 DESC, 1 ASC + ' -- sic, repeat the order by clause in the outer select too + USING + query, search_class, headline_opts, + visibility_org, query_limit, normalization + ; + + -- sort order: + -- bouyant AND chosen class = match class + -- chosen field = match field + -- field weight + -- rank + -- bouyancy + -- value itself + +END; +$func$ LANGUAGE PLPGSQL; + +-- The advantage of this over the stock regexp_split_to_array() is that it +-- won't degrade unicode strings. +CREATE OR REPLACE FUNCTION evergreen.regexp_split_to_array(TEXT, TEXT) +RETURNS TEXT[] AS $$ + return encode_array_literal([split $_[1], $_[0]]); +$$ LANGUAGE PLPERLU STRICT IMMUTABLE; + + +-- Adds some logic for browse_entry to split on non-word chars for index_vector, post-normalize +CREATE OR REPLACE FUNCTION oils_tsearch2 () RETURNS TRIGGER AS $$ +DECLARE + normalizer RECORD; + value TEXT := ''; +BEGIN + + value := NEW.value; + + IF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN + FOR normalizer IN + SELECT n.func AS func, + n.param_count AS param_count, + m.params AS params + FROM config.index_normalizer n + JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id) + WHERE field = NEW.field AND m.pos < 0 + ORDER BY m.pos LOOP + EXECUTE 'SELECT ' || normalizer.func || '(' || + quote_literal( value ) || + CASE + WHEN normalizer.param_count > 0 + THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'') + ELSE '' + END || + ')' INTO value; + + END LOOP; + + NEW.value := value; + END IF; + + IF NEW.index_vector = ''::tsvector THEN + RETURN NEW; + END IF; + + IF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN + FOR normalizer IN + SELECT n.func AS func, + n.param_count AS param_count, + m.params AS params + FROM config.index_normalizer n + JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id) + WHERE field = NEW.field AND m.pos >= 0 + ORDER BY m.pos LOOP + EXECUTE 'SELECT ' || normalizer.func || '(' || + quote_literal( value ) || + CASE + WHEN normalizer.param_count > 0 + THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'') + ELSE '' + END || + ')' INTO value; + + END LOOP; + END IF; + + IF TG_TABLE_NAME::TEXT ~ 'browse_entry$' THEN + value := ARRAY_TO_STRING( + evergreen.regexp_split_to_array(value, E'\\W+'), ' ' + ); + END IF; + + NEW.index_vector = to_tsvector((TG_ARGV[0])::regconfig, value); + + RETURN NEW; +END; +$$ LANGUAGE PLPGSQL; + +COMMIT; diff --git a/Open-ILS/src/sql/Pg/upgrade/YYYY.schema.bib_autosuggest.sql b/Open-ILS/src/sql/Pg/upgrade/YYYY.schema.bib_autosuggest.sql deleted file mode 100644 index ce652b51a0..0000000000 --- a/Open-ILS/src/sql/Pg/upgrade/YYYY.schema.bib_autosuggest.sql +++ /dev/null @@ -1,711 +0,0 @@ -BEGIN; - -SELECT evergreen.upgrade_deps_block_check('YYYY', :eg_version); - -INSERT INTO config.global_flag (name, label, enabled, value) VALUES ( - 'opac.use_autosuggest', - 'OPAC: Show auto-completing suggestions dialog under basic search box (put ''opac_visible'' into the value field to limit suggestions to OPAC-visible items, or blank the field for a possible performance improvement)', - TRUE, - 'opac_visible' -); - -CREATE TABLE metabib.browse_entry ( - id BIGSERIAL PRIMARY KEY, - value TEXT unique, - index_vector tsvector -); -CREATE INDEX metabib_browse_entry_index_vector_idx ON metabib.browse_entry USING GIST (index_vector); -CREATE TRIGGER metabib_browse_entry_fti_trigger - BEFORE INSERT OR UPDATE ON metabib.browse_entry - FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('keyword'); - - -CREATE TABLE metabib.browse_entry_def_map ( - id BIGSERIAL PRIMARY KEY, - entry BIGINT REFERENCES metabib.browse_entry (id), - def INT REFERENCES config.metabib_field (id), - source BIGINT REFERENCES biblio.record_entry (id) -); - -ALTER TABLE config.metabib_field ADD COLUMN browse_field BOOLEAN DEFAULT TRUE NOT NULL; -ALTER TABLE config.metabib_field ADD COLUMN browse_xpath TEXT; - -ALTER TABLE config.metabib_class ADD COLUMN bouyant BOOLEAN DEFAULT FALSE NOT NULL; -ALTER TABLE config.metabib_class ADD COLUMN restrict BOOLEAN DEFAULT FALSE NOT NULL; -ALTER TABLE config.metabib_field ADD COLUMN restrict BOOLEAN DEFAULT FALSE NOT NULL; - --- one good exception to default true: -UPDATE config.metabib_field - SET browse_field = FALSE - WHERE (field_class = 'keyword' AND name = 'keyword') OR - (field_class = 'subject' AND name = 'complete'); - --- AFTER UPDATE OR INSERT trigger for biblio.record_entry --- We're only touching it here to add a DELETE statement to the IF NEW.deleted --- block. - -CREATE OR REPLACE FUNCTION biblio.indexing_ingest_or_delete () RETURNS TRIGGER AS $func$ -DECLARE - transformed_xml TEXT; - prev_xfrm TEXT; - normalizer RECORD; - xfrm config.xml_transform%ROWTYPE; - attr_value TEXT; - new_attrs HSTORE := ''::HSTORE; - attr_def config.record_attr_definition%ROWTYPE; -BEGIN - - IF NEW.deleted IS TRUE THEN -- If this bib is deleted - DELETE FROM metabib.metarecord_source_map WHERE source = NEW.id; -- Rid ourselves of the search-estimate-killing linkage - DELETE FROM metabib.record_attr WHERE id = NEW.id; -- Kill the attrs hash, useless on deleted records - DELETE FROM authority.bib_linking WHERE bib = NEW.id; -- Avoid updating fields in bibs that are no longer visible - DELETE FROM biblio.peer_bib_copy_map WHERE peer_record = NEW.id; -- Separate any multi-homed items - DELETE FROM metabib.browse_entry_def_map WHERE source = NEW.id; -- Don't auto-suggest deleted bibs - RETURN NEW; -- and we're done - END IF; - - IF TG_OP = 'UPDATE' THEN -- re-ingest? - PERFORM * FROM config.internal_flag WHERE name = 'ingest.reingest.force_on_same_marc' AND enabled; - - IF NOT FOUND AND OLD.marc = NEW.marc THEN -- don't do anything if the MARC didn't change - RETURN NEW; - END IF; - END IF; - - -- Record authority linking - PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_authority_linking' AND enabled; - IF NOT FOUND THEN - PERFORM biblio.map_authority_linking( NEW.id, NEW.marc ); - END IF; - - -- Flatten and insert the mfr data - PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_metabib_full_rec' AND enabled; - IF NOT FOUND THEN - PERFORM metabib.reingest_metabib_full_rec(NEW.id); - - -- Now we pull out attribute data, which is dependent on the mfr for all but XPath-based fields - PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_metabib_rec_descriptor' AND enabled; - IF NOT FOUND THEN - FOR attr_def IN SELECT * FROM config.record_attr_definition ORDER BY format LOOP - - IF attr_def.tag IS NOT NULL THEN -- tag (and optional subfield list) selection - SELECT ARRAY_TO_STRING(ARRAY_ACCUM(value), COALESCE(attr_def.joiner,' ')) INTO attr_value - FROM (SELECT * FROM metabib.full_rec ORDER BY tag, subfield) AS x - WHERE record = NEW.id - AND tag LIKE attr_def.tag - AND CASE - WHEN attr_def.sf_list IS NOT NULL - THEN POSITION(subfield IN attr_def.sf_list) > 0 - ELSE TRUE - END - GROUP BY tag - ORDER BY tag - LIMIT 1; - - ELSIF attr_def.fixed_field IS NOT NULL THEN -- a named fixed field, see config.marc21_ff_pos_map.fixed_field - attr_value := biblio.marc21_extract_fixed_field(NEW.id, attr_def.fixed_field); - - ELSIF attr_def.xpath IS NOT NULL THEN -- and xpath expression - - SELECT INTO xfrm * FROM config.xml_transform WHERE name = attr_def.format; - - -- See if we can skip the XSLT ... it's expensive - IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN - -- Can't skip the transform - IF xfrm.xslt <> '---' THEN - transformed_xml := oils_xslt_process(NEW.marc,xfrm.xslt); - ELSE - transformed_xml := NEW.marc; - END IF; - - prev_xfrm := xfrm.name; - END IF; - - IF xfrm.name IS NULL THEN - -- just grab the marcxml (empty) transform - SELECT INTO xfrm * FROM config.xml_transform WHERE xslt = '---' LIMIT 1; - prev_xfrm := xfrm.name; - END IF; - - attr_value := oils_xpath_string(attr_def.xpath, transformed_xml, COALESCE(attr_def.joiner,' '), ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]); - - ELSIF attr_def.phys_char_sf IS NOT NULL THEN -- a named Physical Characteristic, see config.marc21_physical_characteristic_*_map - SELECT m.value INTO attr_value - FROM biblio.marc21_physical_characteristics(NEW.id) v - JOIN config.marc21_physical_characteristic_value_map m ON (m.id = v.value) - WHERE v.subfield = attr_def.phys_char_sf - LIMIT 1; -- Just in case ... - - END IF; - - -- apply index normalizers to attr_value - FOR normalizer IN - SELECT n.func AS func, - n.param_count AS param_count, - m.params AS params - FROM config.index_normalizer n - JOIN config.record_attr_index_norm_map m ON (m.norm = n.id) - WHERE attr = attr_def.name - ORDER BY m.pos LOOP - EXECUTE 'SELECT ' || normalizer.func || '(' || - COALESCE( quote_literal( attr_value ), 'NULL' ) || - CASE - WHEN normalizer.param_count > 0 - THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'') - ELSE '' - END || - ')' INTO attr_value; - - END LOOP; - - -- Add the new value to the hstore - new_attrs := new_attrs || hstore( attr_def.name, attr_value ); - - END LOOP; - - IF TG_OP = 'INSERT' OR OLD.deleted THEN -- initial insert OR revivication - INSERT INTO metabib.record_attr (id, attrs) VALUES (NEW.id, new_attrs); - ELSE - UPDATE metabib.record_attr SET attrs = new_attrs WHERE id = NEW.id; - END IF; - - END IF; - END IF; - - -- Gather and insert the field entry data - PERFORM metabib.reingest_metabib_field_entries(NEW.id); - - -- Located URI magic - IF TG_OP = 'INSERT' THEN - PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_located_uri' AND enabled; - IF NOT FOUND THEN - PERFORM biblio.extract_located_uris( NEW.id, NEW.marc, NEW.editor ); - END IF; - ELSE - PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_located_uri' AND enabled; - IF NOT FOUND THEN - PERFORM biblio.extract_located_uris( NEW.id, NEW.marc, NEW.editor ); - END IF; - END IF; - - -- (re)map metarecord-bib linking - IF TG_OP = 'INSERT' THEN -- if not deleted and performing an insert, check for the flag - PERFORM * FROM config.internal_flag WHERE name = 'ingest.metarecord_mapping.skip_on_insert' AND enabled; - IF NOT FOUND THEN - PERFORM metabib.remap_metarecord_for_bib( NEW.id, NEW.fingerprint ); - END IF; - ELSE -- we're doing an update, and we're not deleted, remap - PERFORM * FROM config.internal_flag WHERE name = 'ingest.metarecord_mapping.skip_on_update' AND enabled; - IF NOT FOUND THEN - PERFORM metabib.remap_metarecord_for_bib( NEW.id, NEW.fingerprint ); - END IF; - END IF; - - RETURN NEW; -END; -$func$ LANGUAGE PLPGSQL; - -CREATE OR REPLACE FUNCTION metabib.browse_normalize(facet_text TEXT, mapped_field INT) RETURNS TEXT AS $$ -DECLARE - normalizer RECORD; -BEGIN - - FOR normalizer IN - SELECT n.func AS func, - n.param_count AS param_count, - m.params AS params - FROM config.index_normalizer n - JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id) - WHERE m.field = mapped_field AND m.pos < 0 - ORDER BY m.pos LOOP - - EXECUTE 'SELECT ' || normalizer.func || '(' || - quote_literal( facet_text ) || - CASE - WHEN normalizer.param_count > 0 - THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'') - ELSE '' - END || - ')' INTO facet_text; - - END LOOP; - - RETURN facet_text; -END; - -$$ LANGUAGE PLPGSQL; - -DROP FUNCTION biblio.extract_metabib_field_entry(bigint, text); -DROP FUNCTION biblio.extract_metabib_field_entry(bigint); - -DROP TYPE metabib.field_entry_template; -CREATE TYPE metabib.field_entry_template AS ( - field_class TEXT, - field INT, - facet_field BOOL, - search_field BOOL, - browse_field BOOL, - source BIGINT, - value TEXT -); - - -CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( rid BIGINT, default_joiner TEXT ) RETURNS SETOF metabib.field_entry_template AS $func$ -DECLARE - bib biblio.record_entry%ROWTYPE; - idx config.metabib_field%ROWTYPE; - xfrm config.xml_transform%ROWTYPE; - prev_xfrm TEXT; - transformed_xml TEXT; - xml_node TEXT; - xml_node_list TEXT[]; - facet_text TEXT; - browse_text TEXT; - raw_text TEXT; - curr_text TEXT; - joiner TEXT := default_joiner; -- XXX will index defs supply a joiner? - output_row metabib.field_entry_template%ROWTYPE; -BEGIN - - -- Get the record - SELECT INTO bib * FROM biblio.record_entry WHERE id = rid; - - -- Loop over the indexing entries - FOR idx IN SELECT * FROM config.metabib_field ORDER BY format LOOP - - SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format; - - -- See if we can skip the XSLT ... it's expensive - IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN - -- Can't skip the transform - IF xfrm.xslt <> '---' THEN - transformed_xml := oils_xslt_process(bib.marc,xfrm.xslt); - ELSE - transformed_xml := bib.marc; - END IF; - - prev_xfrm := xfrm.name; - END IF; - - xml_node_list := oils_xpath( idx.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] ); - - raw_text := NULL; - FOR xml_node IN SELECT x FROM unnest(xml_node_list) AS x LOOP - CONTINUE WHEN xml_node !~ E'^\\s*<'; - - curr_text := ARRAY_TO_STRING( - oils_xpath( '//text()', - REGEXP_REPLACE( -- This escapes all &s not followed by "amp;". Data ise returned from oils_xpath (above) in UTF-8, not entity encoded - REGEXP_REPLACE( -- This escapes embeded [^<]+)(<)([^>]+<)$re$, - E'\\1<\\3', - 'g' - ), - '&(?!amp;)', - '&', - 'g' - ) - ), - ' ' - ); - - CONTINUE WHEN curr_text IS NULL OR curr_text = ''; - - IF raw_text IS NOT NULL THEN - raw_text := raw_text || joiner; - END IF; - - raw_text := COALESCE(raw_text,'') || curr_text; - - -- autosuggest/metabib.browse_entry - IF idx.browse_field THEN - - IF idx.browse_xpath IS NOT NULL AND idx.browse_xpath <> '' THEN - browse_text := oils_xpath_string( idx.browse_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] ); - ELSE - browse_text := curr_text; - END IF; - - output_row.field_class = idx.field_class; - output_row.field = idx.id; - output_row.source = rid; - output_row.value = BTRIM(REGEXP_REPLACE(browse_text, E'\\s+', ' ', 'g')); - - output_row.browse_field = TRUE; - RETURN NEXT output_row; - output_row.browse_field = FALSE; - END IF; - - -- insert raw node text for faceting - IF idx.facet_field THEN - - IF idx.facet_xpath IS NOT NULL AND idx.facet_xpath <> '' THEN - facet_text := oils_xpath_string( idx.facet_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] ); - ELSE - facet_text := curr_text; - END IF; - - output_row.field_class = idx.field_class; - output_row.field = -1 * idx.id; - output_row.source = rid; - output_row.value = BTRIM(REGEXP_REPLACE(facet_text, E'\\s+', ' ', 'g')); - - output_row.facet_field = TRUE; - RETURN NEXT output_row; - output_row.facet_field = FALSE; - END IF; - - END LOOP; - - CONTINUE WHEN raw_text IS NULL OR raw_text = ''; - - -- insert combined node text for searching - IF idx.search_field THEN - output_row.field_class = idx.field_class; - output_row.field = idx.id; - output_row.source = rid; - output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g')); - - output_row.search_field = TRUE; - RETURN NEXT output_row; - END IF; - - END LOOP; - -END; -$func$ LANGUAGE PLPGSQL; - --- default to a space joiner -CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( BIGINT ) RETURNS SETOF metabib.field_entry_template AS $func$ - SELECT * FROM biblio.extract_metabib_field_entry($1, ' '); - $func$ LANGUAGE SQL; - - -CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries( bib_id BIGINT ) RETURNS VOID AS $func$ -DECLARE - fclass RECORD; - ind_data metabib.field_entry_template%ROWTYPE; - mbe_row metabib.browse_entry%ROWTYPE; - mbe_id BIGINT; -BEGIN - PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled; - IF NOT FOUND THEN - FOR fclass IN SELECT * FROM config.metabib_class LOOP - -- RAISE NOTICE 'Emptying out %', fclass.name; - EXECUTE $$DELETE FROM metabib.$$ || fclass.name || $$_field_entry WHERE source = $$ || bib_id; - END LOOP; - DELETE FROM metabib.facet_entry WHERE source = bib_id; - DELETE FROM metabib.browse_entry_def_map WHERE source = bib_id; - END IF; - - FOR ind_data IN SELECT * FROM biblio.extract_metabib_field_entry( bib_id ) LOOP - IF ind_data.field < 0 THEN - ind_data.field = -1 * ind_data.field; - END IF; - - IF ind_data.facet_field THEN - INSERT INTO metabib.facet_entry (field, source, value) - VALUES (ind_data.field, ind_data.source, ind_data.value); - END IF; - - IF ind_data.browse_field THEN - SELECT INTO mbe_row * FROM metabib.browse_entry WHERE value = ind_data.value; - IF FOUND THEN - mbe_id := mbe_row.id; - ELSE - INSERT INTO metabib.browse_entry (value) VALUES - (metabib.browse_normalize(ind_data.value, ind_data.field)); - mbe_id := CURRVAL('metabib.browse_entry_id_seq'::REGCLASS); - END IF; - - INSERT INTO metabib.browse_entry_def_map (entry, def, source) - VALUES (mbe_id, ind_data.field, ind_data.source); - END IF; - - IF ind_data.search_field THEN - EXECUTE $$ - INSERT INTO metabib.$$ || ind_data.field_class || $$_field_entry (field, source, value) - VALUES ($$ || - quote_literal(ind_data.field) || $$, $$ || - quote_literal(ind_data.source) || $$, $$ || - quote_literal(ind_data.value) || - $$);$$; - END IF; - - END LOOP; - - RETURN; -END; -$func$ LANGUAGE PLPGSQL; - --- This mimics a specific part of QueryParser, turning the first part of a --- classed search (search_class) into a set of classes and possibly fields. --- search_class might look like "author" or "title|proper" or "ti|uniform" --- or "au" or "au|corporate|personal" or anything like that, where the first --- element of the list you get by separating on the "|" character is either --- a registered class (config.metabib_class) or an alias --- (config.metabib_search_alias), and the rest of any such elements are --- fields (config.metabib_field). -CREATE OR REPLACE - FUNCTION metabib.search_class_to_registered_components(search_class TEXT) - RETURNS SETOF RECORD AS $func$ -DECLARE - search_parts TEXT[]; - field_name TEXT; - search_part_count INTEGER; - rec RECORD; - registered_class config.metabib_class%ROWTYPE; - registered_alias config.metabib_search_alias%ROWTYPE; - registered_field config.metabib_field%ROWTYPE; -BEGIN - search_parts := REGEXP_SPLIT_TO_ARRAY(search_class, E'\\|'); - - search_part_count := ARRAY_LENGTH(search_parts, 1); - IF search_part_count = 0 THEN - RETURN; - ELSE - SELECT INTO registered_class - * FROM config.metabib_class WHERE name = search_parts[1]; - IF FOUND THEN - IF search_part_count < 2 THEN -- all fields - rec := (registered_class.name, NULL::INTEGER); - RETURN NEXT rec; - RETURN; -- done - END IF; - FOR field_name IN SELECT * - FROM UNNEST(search_parts[2:search_part_count]) LOOP - SELECT INTO registered_field - * FROM config.metabib_field - WHERE name = field_name AND - field_class = registered_class.name; - IF FOUND THEN - rec := (registered_class.name, registered_field.id); - RETURN NEXT rec; - END IF; - END LOOP; - ELSE - -- maybe we have an alias? - SELECT INTO registered_alias - * FROM config.metabib_search_alias WHERE alias=search_parts[1]; - IF NOT FOUND THEN - RETURN; - ELSE - IF search_part_count < 2 THEN -- return w/e the alias says - rec := ( - registered_alias.field_class, registered_alias.field - ); - RETURN NEXT rec; - RETURN; -- done - ELSE - FOR field_name IN SELECT * - FROM UNNEST(search_parts[2:search_part_count]) LOOP - SELECT INTO registered_field - * FROM config.metabib_field - WHERE name = field_name AND - field_class = registered_alias.field_class; - IF FOUND THEN - rec := ( - registered_alias.field_class, - registered_field.id - ); - RETURN NEXT rec; - END IF; - END LOOP; - END IF; - END IF; - END IF; - END IF; -END; -$func$ LANGUAGE PLPGSQL; - - -CREATE OR REPLACE - FUNCTION metabib.suggest_browse_entries( - query_text TEXT, -- 'foo' or 'foo & ba:*',ready for to_tsquery() - search_class TEXT, -- 'alias' or 'class' or 'class|field..', etc - headline_opts TEXT, -- markup options for ts_headline() - visibility_org INTEGER,-- null if you don't want opac visibility test - query_limit INTEGER,-- use in LIMIT clause of interal query - normalization INTEGER -- argument to TS_RANK_CD() - ) RETURNS TABLE ( - value TEXT, -- plain - field INTEGER, - bouyant_and_class_match BOOL, - field_match BOOL, - field_weight INTEGER, - rank REAL, - bouyant BOOL, - match TEXT -- marked up - ) AS $func$ -DECLARE - query TSQUERY; - opac_visibility_join TEXT; - search_class_join TEXT; - r_fields RECORD; -BEGIN - query := TO_TSQUERY('keyword', query_text); - - IF visibility_org IS NOT NULL THEN - opac_visibility_join := ' - JOIN asset.opac_visible_copies aovc ON ( - aovc.record = mbedm.source AND - aovc.circ_lib IN (SELECT id FROM actor.org_unit_descendants($4)) - )'; - ELSE - opac_visibility_join := ''; - END IF; - - -- The following determines whether we only provide suggestsons matching - -- the user's selected search_class, or whether we show other suggestions - -- too. The reason for MIN() is that for search_classes like - -- 'title|proper|uniform' you would otherwise get multiple rows. The - -- implication is that if title as a class doesn't have restrict, - -- nor does the proper field, but the uniform field does, you're going - -- to get 'false' for your overall evaluation of 'should we restrict?' - -- To invert that, change from MIN() to MAX(). - - SELECT - INTO r_fields - MIN(cmc.restrict::INT) AS restrict_class, - MIN(cmf.restrict::INT) AS restrict_field - FROM metabib.search_class_to_registered_components(search_class) - AS _registered (field_class TEXT, field INT) - JOIN - config.metabib_class cmc ON (cmc.name = _registered.field_class) - LEFT JOIN - config.metabib_field cmf ON (cmf.id = _registered.field); - - -- evaluate 'should we restrict?' - IF r_fields.restrict_field::BOOL OR r_fields.restrict_class::BOOL THEN - search_class_join := ' - JOIN - metabib.search_class_to_registered_components($2) - AS _registered (field_class TEXT, field INT) ON ( - (_registered.field IS NULL AND - _registered.field_class = cmf.field_class) OR - (_registered.field = cmf.id) - ) - '; - ELSE - search_class_join := ' - LEFT JOIN - metabib.search_class_to_registered_components($2) - AS _registered (field_class TEXT, field INT) ON ( - _registered.field_class = cmc.name - ) - '; - END IF; - - RETURN QUERY EXECUTE 'SELECT *, TS_HEADLINE(value, $1, $3) FROM (SELECT DISTINCT - mbe.value, - cmf.id, - cmc.bouyant AND _registered.field_class IS NOT NULL, - _registered.field = cmf.id, - cmf.weight, - TS_RANK_CD(mbe.index_vector, $1, $6), - cmc.bouyant - FROM metabib.browse_entry_def_map mbedm - JOIN metabib.browse_entry mbe ON (mbe.id = mbedm.entry) - JOIN config.metabib_field cmf ON (cmf.id = mbedm.def) - JOIN config.metabib_class cmc ON (cmf.field_class = cmc.name) - ' || search_class_join || opac_visibility_join || - ' WHERE $1 @@ mbe.index_vector - ORDER BY 3 DESC, 4 DESC NULLS LAST, 5 DESC, 6 DESC, 7 DESC, 1 ASC - LIMIT $5) x - ORDER BY 3 DESC, 4 DESC NULLS LAST, 5 DESC, 6 DESC, 7 DESC, 1 ASC - ' -- sic, repeat the order by clause in the outer select too - USING - query, search_class, headline_opts, - visibility_org, query_limit, normalization - ; - - -- sort order: - -- bouyant AND chosen class = match class - -- chosen field = match field - -- field weight - -- rank - -- bouyancy - -- value itself - -END; -$func$ LANGUAGE PLPGSQL; - --- The advantage of this over the stock regexp_split_to_array() is that it --- won't degrade unicode strings. -CREATE OR REPLACE FUNCTION evergreen.regexp_split_to_array(TEXT, TEXT) -RETURNS TEXT[] AS $$ - return encode_array_literal([split $_[1], $_[0]]); -$$ LANGUAGE PLPERLU STRICT IMMUTABLE; - - --- Adds some logic for browse_entry to split on non-word chars for index_vector, post-normalize -CREATE OR REPLACE FUNCTION oils_tsearch2 () RETURNS TRIGGER AS $$ -DECLARE - normalizer RECORD; - value TEXT := ''; -BEGIN - - value := NEW.value; - - IF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN - FOR normalizer IN - SELECT n.func AS func, - n.param_count AS param_count, - m.params AS params - FROM config.index_normalizer n - JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id) - WHERE field = NEW.field AND m.pos < 0 - ORDER BY m.pos LOOP - EXECUTE 'SELECT ' || normalizer.func || '(' || - quote_literal( value ) || - CASE - WHEN normalizer.param_count > 0 - THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'') - ELSE '' - END || - ')' INTO value; - - END LOOP; - - NEW.value := value; - END IF; - - IF NEW.index_vector = ''::tsvector THEN - RETURN NEW; - END IF; - - IF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN - FOR normalizer IN - SELECT n.func AS func, - n.param_count AS param_count, - m.params AS params - FROM config.index_normalizer n - JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id) - WHERE field = NEW.field AND m.pos >= 0 - ORDER BY m.pos LOOP - EXECUTE 'SELECT ' || normalizer.func || '(' || - quote_literal( value ) || - CASE - WHEN normalizer.param_count > 0 - THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'') - ELSE '' - END || - ')' INTO value; - - END LOOP; - END IF; - - IF TG_TABLE_NAME::TEXT ~ 'browse_entry$' THEN - value := ARRAY_TO_STRING( - evergreen.regexp_split_to_array(value, E'\\W+'), ' ' - ); - END IF; - - NEW.index_vector = to_tsvector((TG_ARGV[0])::regconfig, value); - - RETURN NEW; -END; -$$ LANGUAGE PLPGSQL; - -COMMIT;