From f5bc354c5d6dffd1a89514bd159b2051bfb90271 Mon Sep 17 00:00:00 2001 From: Mike Rylander Date: Wed, 19 Feb 2014 17:11:29 -0500 Subject: [PATCH] LP#1053397: Use the new data structures for looking up language indexing configuration Also, from Dan Wells: First, we don't want to fully exclude records which don't have a metabib.record_attr_vector_list entry, so we do a LEFT JOIN instead for that table. Otherwise, some records error out when ingesting (see placeholder record -1 in the stock data set). Second, let's add a check for 'active' which appears to have been always missing. Signed-off-by: Mike Rylander Signed-off-by: Dan Wells --- Open-ILS/src/sql/Pg/030.schema.metabib.sql | 41 ++++++-- .../src/sql/Pg/upgrade/QQQQ.MVF_CRA-upgrade.sql | 111 +++++++++++++++++++++ 2 files changed, 142 insertions(+), 10 deletions(-) diff --git a/Open-ILS/src/sql/Pg/030.schema.metabib.sql b/Open-ILS/src/sql/Pg/030.schema.metabib.sql index ba9818ba4d..554deb4e5b 100644 --- a/Open-ILS/src/sql/Pg/030.schema.metabib.sql +++ b/Open-ILS/src/sql/Pg/030.schema.metabib.sql @@ -2054,31 +2054,52 @@ BEGIN END IF; IF TG_TABLE_NAME::TEXT ~ 'browse_entry$' THEN + value := ARRAY_TO_STRING( evergreen.regexp_split_to_array(value, E'\\W+'), ' ' ); value := public.search_normalize(value); NEW.index_vector = to_tsvector(TG_ARGV[0]::regconfig, value); + ELSIF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN FOR ts_rec IN - SELECT ts_config, index_weight - FROM config.metabib_class_ts_map - WHERE field_class = TG_ARGV[0] - AND index_lang IS NULL OR EXISTS (SELECT 1 FROM metabib.record_attr WHERE id = NEW.source AND index_lang IN(attrs->'item_lang',attrs->'language')) - AND always OR NOT EXISTS (SELECT 1 FROM config.metabib_field_ts_map WHERE metabib_field = NEW.field) - UNION - SELECT ts_config, index_weight - FROM config.metabib_field_ts_map - WHERE metabib_field = NEW.field - AND index_lang IS NULL OR EXISTS (SELECT 1 FROM metabib.record_attr WHERE id = NEW.source AND index_lang IN(attrs->'item_lang',attrs->'language')) + + SELECT DISTINCT m.ts_config, m.index_weight + FROM config.metabib_class_ts_map m + LEFT JOIN metabib.record_attr_vector_list r ON (r.source = NEW.source) + LEFT JOIN config.coded_value_map ccvm ON ( + ccvm.ctype IN ('item_lang', 'language') AND + ccvm.code = m.index_lang AND + r.vlist @> intset(ccvm.id) + ) + WHERE m.field_class = TG_ARGV[0] + AND m.active + AND (m.always OR NOT EXISTS (SELECT 1 FROM config.metabib_field_ts_map WHERE metabib_field = NEW.field)) + AND (m.index_lang IS NULL OR ccvm.id IS NOT NULL) + UNION + SELECT DISTINCT m.ts_config, m.index_weight + FROM config.metabib_field_ts_map m + LEFT JOIN metabib.record_attr_vector_list r ON (r.source = NEW.source) + LEFT JOIN config.coded_value_map ccvm ON ( + ccvm.ctype IN ('item_lang', 'language') AND + ccvm.code = m.index_lang AND + r.vlist @> intset(ccvm.id) + ) + WHERE m.metabib_field = NEW.field + AND m.active + AND (m.index_lang IS NULL OR ccvm.id IS NOT NULL) ORDER BY index_weight ASC + LOOP + IF cur_weight IS NOT NULL AND cur_weight != ts_rec.index_weight THEN NEW.index_vector = NEW.index_vector || setweight(temp_vector::tsvector,cur_weight); temp_vector = ''; END IF; + cur_weight = ts_rec.index_weight; SELECT INTO temp_vector temp_vector || ' ' || to_tsvector(ts_rec.ts_config::regconfig, value)::TEXT; + END LOOP; NEW.index_vector = NEW.index_vector || setweight(temp_vector::tsvector,cur_weight); ELSE diff --git a/Open-ILS/src/sql/Pg/upgrade/QQQQ.MVF_CRA-upgrade.sql b/Open-ILS/src/sql/Pg/upgrade/QQQQ.MVF_CRA-upgrade.sql index 4148fa45c8..dd84a49c71 100644 --- a/Open-ILS/src/sql/Pg/upgrade/QQQQ.MVF_CRA-upgrade.sql +++ b/Open-ILS/src/sql/Pg/upgrade/QQQQ.MVF_CRA-upgrade.sql @@ -553,6 +553,117 @@ BEGIN END; $func$ LANGUAGE PLPGSQL; +CREATE OR REPLACE FUNCTION public.oils_tsearch2 () RETURNS TRIGGER AS $$ +DECLARE + normalizer RECORD; + value TEXT := ''; + temp_vector TEXT := ''; + ts_rec RECORD; + cur_weight "char"; +BEGIN + + value := NEW.value; + NEW.index_vector = ''::tsvector; + + IF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN + FOR normalizer IN + SELECT n.func AS func, + n.param_count AS param_count, + m.params AS params + FROM config.index_normalizer n + JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id) + WHERE field = NEW.field AND m.pos < 0 + ORDER BY m.pos LOOP + EXECUTE 'SELECT ' || normalizer.func || '(' || + quote_literal( value ) || + CASE + WHEN normalizer.param_count > 0 + THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'') + ELSE '' + END || + ')' INTO value; + + END LOOP; + + NEW.value = value; + + FOR normalizer IN + SELECT n.func AS func, + n.param_count AS param_count, + m.params AS params + FROM config.index_normalizer n + JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id) + WHERE field = NEW.field AND m.pos >= 0 + ORDER BY m.pos LOOP + EXECUTE 'SELECT ' || normalizer.func || '(' || + quote_literal( value ) || + CASE + WHEN normalizer.param_count > 0 + THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'') + ELSE '' + END || + ')' INTO value; + + END LOOP; + END IF; + + IF TG_TABLE_NAME::TEXT ~ 'browse_entry$' THEN + + value := ARRAY_TO_STRING( + evergreen.regexp_split_to_array(value, E'\\W+'), ' ' + ); + value := public.search_normalize(value); + NEW.index_vector = to_tsvector(TG_ARGV[0]::regconfig, value); + + ELSIF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN + FOR ts_rec IN + + SELECT DISTINCT m.ts_config, m.index_weight + FROM config.metabib_class_ts_map m + LEFT JOIN metabib.record_attr_vector_list r ON (r.source = NEW.source) + LEFT JOIN config.coded_value_map ccvm ON ( + ccvm.ctype IN ('item_lang', 'language') AND + ccvm.code = m.index_lang AND + r.vlist @> intset(ccvm.id) + ) + WHERE m.field_class = TG_ARGV[0] + AND m.active + AND (m.always OR NOT EXISTS (SELECT 1 FROM config.metabib_field_ts_map WHERE metabib_field = NEW.field)) + AND (m.index_lang IS NULL OR ccvm.id IS NOT NULL) + UNION + SELECT DISTINCT m.ts_config, m.index_weight + FROM config.metabib_field_ts_map m + LEFT JOIN metabib.record_attr_vector_list r ON (r.source = NEW.source) + LEFT JOIN config.coded_value_map ccvm ON ( + ccvm.ctype IN ('item_lang', 'language') AND + ccvm.code = m.index_lang AND + r.vlist @> intset(ccvm.id) + ) + WHERE m.metabib_field = NEW.field + AND m.active + AND (m.index_lang IS NULL OR ccvm.id IS NOT NULL) + ORDER BY index_weight ASC + + LOOP + + IF cur_weight IS NOT NULL AND cur_weight != ts_rec.index_weight THEN + NEW.index_vector = NEW.index_vector || setweight(temp_vector::tsvector,cur_weight); + temp_vector = ''; + END IF; + + cur_weight = ts_rec.index_weight; + SELECT INTO temp_vector temp_vector || ' ' || to_tsvector(ts_rec.ts_config::regconfig, value)::TEXT; + + END LOOP; + NEW.index_vector = NEW.index_vector || setweight(temp_vector::tsvector,cur_weight); + ELSE + NEW.index_vector = to_tsvector(TG_ARGV[0]::regconfig, value); + END IF; + + RETURN NEW; +END; +$$ LANGUAGE PLPGSQL; + -- add new sr_format attribute definition INSERT INTO config.record_attr_definition (name, label, phys_char_sf) -- 2.11.0