--- /dev/null
+BEGIN;
+
+SET search_path = public, pg_catalog;
+
+DO $$
+DECLARE
+lang TEXT;
+BEGIN
+FOR lang IN SELECT substring(pptsd.dictname from '(.*)_stem$') AS lang FROM pg_catalog.pg_ts_dict pptsd JOIN pg_catalog.pg_namespace ppn ON ppn.oid = pptsd.dictnamespace
+WHERE ppn.nspname = 'pg_catalog' AND pptsd.dictname LIKE '%_stem' LOOP
+RAISE NOTICE 'FOUND LANGUAGE %', lang;
+
+EXECUTE 'DROP TEXT SEARCH DICTIONARY IF EXISTS ' || lang || '_nostop CASCADE;
+CREATE TEXT SEARCH DICTIONARY ' || lang || '_nostop (TEMPLATE=pg_catalog.snowball, language=''' || lang || ''');
+COMMENT ON TEXT SEARCH DICTIONARY ' || lang || '_nostop IS ''' ||lang || ' snowball stemmer with no stopwords for ASCII words only.'';
+CREATE TEXT SEARCH CONFIGURATION ' || lang || '_nostop ( COPY = pg_catalog.' || lang || ' );
+ALTER TEXT SEARCH CONFIGURATION ' || lang || '_nostop ALTER MAPPING FOR word, hword, hword_part WITH pg_catalog.simple;
+ALTER TEXT SEARCH CONFIGURATION ' || lang || '_nostop ALTER MAPPING FOR asciiword, asciihword, hword_asciipart WITH ' || lang || '_nostop;';
+
+END LOOP;
+END;
+$$;
+CREATE TEXT SEARCH CONFIGURATION keyword ( COPY = english_nostop );
+CREATE TEXT SEARCH CONFIGURATION "default" ( COPY = english_nostop );
+
+SET search_path = evergreen, public, pg_catalog;
+
+ALTER TABLE config.metabib_class
+ ADD COLUMN a_weight NUMERIC DEFAULT 1.0 NOT NULL,
+ ADD COLUMN b_weight NUMERIC DEFAULT 0.4 NOT NULL,
+ ADD COLUMN c_weight NUMERIC DEFAULT 0.2 NOT NULL,
+ ADD COLUMN d_weight NUMERIC DEFAULT 0.1 NOT NULL;
+
+CREATE TABLE config.ts_config_list (
+ id TEXT PRIMARY KEY,
+ name TEXT NOT NULL
+);
+COMMENT ON TABLE config.ts_config_list IS $$
+Full Text Configs
+
+A list of full text configs with names and descriptions.
+$$;
+
+CREATE TABLE config.metabib_class_ts_map (
+ id SERIAL PRIMARY KEY,
+ field_class TEXT NOT NULL REFERENCES config.metabib_class (name),
+ ts_config TEXT NOT NULL REFERENCES config.ts_config_list (id),
+ active BOOL NOT NULL DEFAULT TRUE,
+ index_weight CHAR(1) NOT NULL DEFAULT 'C' CHECK (index_weight IN ('A','B','C','D')),
+ index_lang TEXT NULL,
+ search_lang TEXT NULL,
+ always BOOL NOT NULL DEFAULT true
+);
+COMMENT ON TABLE config.metabib_class_ts_map IS $$
+Text Search Configs for metabib class indexing
+
+This table contains text search config definitions for
+storing index_vector values.
+$$;
+
+CREATE TABLE config.metabib_field_ts_map (
+ id SERIAL PRIMARY KEY,
+ metabib_field INT NOT NULL REFERENCES config.metabib_field (id),
+ ts_config TEXT NOT NULL REFERENCES config.ts_config_list (id),
+ active BOOL NOT NULL DEFAULT TRUE,
+ index_weight CHAR(1) NOT NULL DEFAULT 'C' CHECK (index_weight IN ('A','B','C','D')),
+ index_lang TEXT NULL,
+ search_lang TEXT NULL
+);
+COMMENT ON TABLE config.metabib_field_ts_map IS $$
+Text Search Configs for metabib field indexing
+
+This table contains text search config definitions for
+storing index_vector values.
+$$;
+
+CREATE TABLE metabib.combined_identifier_field_entry (
+ record BIGINT NOT NULL,
+ metabib_field INT NULL,
+ index_vector tsvector NOT NULL
+);
+CREATE UNIQUE INDEX metabib_combined_identifier_field_entry_fakepk_idx ON metabib.combined_identifier_field_entry (record, COALESCE(metabib_field::TEXT,''));
+CREATE INDEX metabib_combined_identifier_field_entry_index_vector_idx ON metabib.combined_identifier_field_entry USING GIST (index_vector);
+CREATE INDEX metabib_combined_identifier_field_source_idx ON metabib.combined_identifier_field_entry (metabib_field);
+
+CREATE TABLE metabib.combined_title_field_entry (
+ record BIGINT NOT NULL,
+ metabib_field INT NULL,
+ index_vector tsvector NOT NULL
+);
+CREATE UNIQUE INDEX metabib_combined_title_field_entry_fakepk_idx ON metabib.combined_title_field_entry (record, COALESCE(metabib_field::TEXT,''));
+CREATE INDEX metabib_combined_title_field_entry_index_vector_idx ON metabib.combined_title_field_entry USING GIST (index_vector);
+CREATE INDEX metabib_combined_title_field_source_idx ON metabib.combined_title_field_entry (metabib_field);
+
+CREATE TABLE metabib.combined_author_field_entry (
+ record BIGINT NOT NULL,
+ metabib_field INT NULL,
+ index_vector tsvector NOT NULL
+);
+CREATE UNIQUE INDEX metabib_combined_author_field_entry_fakepk_idx ON metabib.combined_author_field_entry (record, COALESCE(metabib_field::TEXT,''));
+CREATE INDEX metabib_combined_author_field_entry_index_vector_idx ON metabib.combined_author_field_entry USING GIST (index_vector);
+CREATE INDEX metabib_combined_author_field_source_idx ON metabib.combined_author_field_entry (metabib_field);
+
+CREATE TABLE metabib.combined_subject_field_entry (
+ record BIGINT NOT NULL,
+ metabib_field INT NULL,
+ index_vector tsvector NOT NULL
+);
+CREATE UNIQUE INDEX metabib_combined_subject_field_entry_fakepk_idx ON metabib.combined_subject_field_entry (record, COALESCE(metabib_field::TEXT,''));
+CREATE INDEX metabib_combined_subject_field_entry_index_vector_idx ON metabib.combined_subject_field_entry USING GIST (index_vector);
+CREATE INDEX metabib_combined_subject_field_source_idx ON metabib.combined_subject_field_entry (metabib_field);
+
+CREATE TABLE metabib.combined_keyword_field_entry (
+ record BIGINT NOT NULL,
+ metabib_field INT NULL,
+ index_vector tsvector NOT NULL
+);
+CREATE UNIQUE INDEX metabib_combined_keyword_field_entry_fakepk_idx ON metabib.combined_keyword_field_entry (record, COALESCE(metabib_field::TEXT,''));
+CREATE INDEX metabib_combined_keyword_field_entry_index_vector_idx ON metabib.combined_keyword_field_entry USING GIST (index_vector);
+CREATE INDEX metabib_combined_keyword_field_source_idx ON metabib.combined_keyword_field_entry (metabib_field);
+
+CREATE TABLE metabib.combined_series_field_entry (
+ record BIGINT NOT NULL,
+ metabib_field INT NULL,
+ index_vector tsvector NOT NULL
+);
+CREATE UNIQUE INDEX metabib_combined_series_field_entry_fakepk_idx ON metabib.combined_series_field_entry (record, COALESCE(metabib_field::TEXT,''));
+CREATE INDEX metabib_combined_series_field_entry_index_vector_idx ON metabib.combined_series_field_entry USING GIST (index_vector);
+CREATE INDEX metabib_combined_series_field_source_idx ON metabib.combined_series_field_entry (metabib_field);
+
+CREATE OR REPLACE FUNCTION metabib.update_combined_index_vectors(bib_id BIGINT) RETURNS VOID AS $func$
+BEGIN
+ DELETE FROM metabib.combined_keyword_field_entry WHERE record = bib_id;
+ INSERT INTO metabib.combined_keyword_field_entry(record, metabib_field, index_vector)
+ SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
+ FROM metabib.keyword_field_entry WHERE source = bib_id GROUP BY field;
+ INSERT INTO metabib.combined_keyword_field_entry(record, metabib_field, index_vector)
+ SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
+ FROM metabib.keyword_field_entry WHERE source = bib_id;
+
+ DELETE FROM metabib.combined_title_field_entry WHERE record = bib_id;
+ INSERT INTO metabib.combined_title_field_entry(record, metabib_field, index_vector)
+ SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
+ FROM metabib.title_field_entry WHERE source = bib_id GROUP BY field;
+ INSERT INTO metabib.combined_title_field_entry(record, metabib_field, index_vector)
+ SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
+ FROM metabib.title_field_entry WHERE source = bib_id;
+
+ DELETE FROM metabib.combined_author_field_entry WHERE record = bib_id;
+ INSERT INTO metabib.combined_author_field_entry(record, metabib_field, index_vector)
+ SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
+ FROM metabib.author_field_entry WHERE source = bib_id GROUP BY field;
+ INSERT INTO metabib.combined_author_field_entry(record, metabib_field, index_vector)
+ SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
+ FROM metabib.author_field_entry WHERE source = bib_id;
+
+ DELETE FROM metabib.combined_subject_field_entry WHERE record = bib_id;
+ INSERT INTO metabib.combined_subject_field_entry(record, metabib_field, index_vector)
+ SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
+ FROM metabib.subject_field_entry WHERE source = bib_id GROUP BY field;
+ INSERT INTO metabib.combined_subject_field_entry(record, metabib_field, index_vector)
+ SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
+ FROM metabib.subject_field_entry WHERE source = bib_id;
+
+ DELETE FROM metabib.combined_series_field_entry WHERE record = bib_id;
+ INSERT INTO metabib.combined_series_field_entry(record, metabib_field, index_vector)
+ SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
+ FROM metabib.series_field_entry WHERE source = bib_id GROUP BY field;
+ INSERT INTO metabib.combined_series_field_entry(record, metabib_field, index_vector)
+ SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
+ FROM metabib.series_field_entry WHERE source = bib_id;
+
+ DELETE FROM metabib.combined_identifier_field_entry WHERE record = bib_id;
+ INSERT INTO metabib.combined_identifier_field_entry(record, metabib_field, index_vector)
+ SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
+ FROM metabib.identifier_field_entry WHERE source = bib_id GROUP BY field;
+ INSERT INTO metabib.combined_identifier_field_entry(record, metabib_field, index_vector)
+ SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
+ FROM metabib.identifier_field_entry WHERE source = bib_id;
+
+END;
+$func$ LANGUAGE PLPGSQL;
+
+CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries( bib_id BIGINT, skip_facet BOOL DEFAULT FALSE, skip_browse BOOL DEFAULT FALSE, skip_search BOOL DEFAULT FALSE ) RETURNS VOID AS $func$
+DECLARE
+ fclass RECORD;
+ ind_data metabib.field_entry_template%ROWTYPE;
+ mbe_row metabib.browse_entry%ROWTYPE;
+ mbe_id BIGINT;
+BEGIN
+ PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled;
+ IF NOT FOUND THEN
+ IF NOT skip_search THEN
+ FOR fclass IN SELECT * FROM config.metabib_class LOOP
+ -- RAISE NOTICE 'Emptying out %', fclass.name;
+ EXECUTE $$DELETE FROM metabib.$$ || fclass.name || $$_field_entry WHERE source = $$ || bib_id;
+ END LOOP;
+ END IF;
+ IF NOT skip_facet THEN
+ DELETE FROM metabib.facet_entry WHERE source = bib_id;
+ END IF;
+ IF NOT skip_browse THEN
+ DELETE FROM metabib.browse_entry_def_map WHERE source = bib_id;
+ END IF;
+ END IF;
+
+ FOR ind_data IN SELECT * FROM biblio.extract_metabib_field_entry( bib_id ) LOOP
+ IF ind_data.field < 0 THEN
+ ind_data.field = -1 * ind_data.field;
+ END IF;
+
+ IF ind_data.facet_field AND NOT skip_facet THEN
+ INSERT INTO metabib.facet_entry (field, source, value)
+ VALUES (ind_data.field, ind_data.source, ind_data.value);
+ END IF;
+
+ IF ind_data.browse_field AND NOT skip_browse THEN
+ -- A caveat about this SELECT: this should take care of replacing
+ -- old mbe rows when data changes, but not if normalization (by
+ -- which I mean specifically the output of
+ -- evergreen.oils_tsearch2()) changes. It may or may not be
+ -- expensive to add a comparison of index_vector to index_vector
+ -- to the WHERE clause below.
+ SELECT INTO mbe_row * FROM metabib.browse_entry WHERE value = ind_data.value;
+ IF FOUND THEN
+ mbe_id := mbe_row.id;
+ ELSE
+ INSERT INTO metabib.browse_entry (value) VALUES
+ (metabib.browse_normalize(ind_data.value, ind_data.field));
+ mbe_id := CURRVAL('metabib.browse_entry_id_seq'::REGCLASS);
+ END IF;
+
+ INSERT INTO metabib.browse_entry_def_map (entry, def, source)
+ VALUES (mbe_id, ind_data.field, ind_data.source);
+ END IF;
+
+ IF ind_data.search_field AND NOT skip_search THEN
+ EXECUTE $$
+ INSERT INTO metabib.$$ || ind_data.field_class || $$_field_entry (field, source, value)
+ VALUES ($$ ||
+ quote_literal(ind_data.field) || $$, $$ ||
+ quote_literal(ind_data.source) || $$, $$ ||
+ quote_literal(ind_data.value) ||
+ $$);$$;
+ END IF;
+
+ END LOOP;
+
+ IF NOT skip_search THEN
+ PERFORM metabib.update_combined_index_vectors(bib_id);
+ END IF;
+
+ RETURN;
+END;
+$func$ LANGUAGE PLPGSQL;
+
+DROP FUNCTION IF EXISTS evergreen.oils_tsearch2() CASCADE;
+DROP FUNCTION IF EXISTS public.oils_tsearch2() CASCADE;
+
+CREATE OR REPLACE FUNCTION public.oils_tsearch2 () RETURNS TRIGGER AS $$
+DECLARE
+ normalizer RECORD;
+ value TEXT := '';
+ temp_vector TEXT := '';
+ ts_rec RECORD;
+ cur_weight "char";
+BEGIN
+ value := NEW.value;
+ NEW.index_vector = ''::tsvector;
+
+ IF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN
+ FOR normalizer IN
+ SELECT n.func AS func,
+ n.param_count AS param_count,
+ m.params AS params
+ FROM config.index_normalizer n
+ JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
+ WHERE field = NEW.field
+ ORDER BY m.pos LOOP
+ EXECUTE 'SELECT ' || normalizer.func || '(' ||
+ quote_literal( value ) ||
+ CASE
+ WHEN normalizer.param_count > 0
+ THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
+ ELSE ''
+ END ||
+ ')' INTO value;
+
+ END LOOP;
+ NEW.value = value;
+ END IF;
+
+ IF TG_TABLE_NAME::TEXT ~ 'browse_entry$' THEN
+ value := ARRAY_TO_STRING(
+ evergreen.regexp_split_to_array(value, E'\\W+'), ' '
+ );
+ value := public.search_normalize(value);
+ NEW.index_vector = to_tsvector(TG_ARGV[0]::regconfig, value);
+ ELSIF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN
+ FOR ts_rec IN
+ SELECT ts_config, index_weight
+ FROM config.metabib_class_ts_map
+ WHERE field_class = TG_ARGV[0]
+ AND index_lang IS NULL OR EXISTS (SELECT 1 FROM metabib.record_attr WHERE id = NEW.source AND index_lang IN(attrs->'item_lang',attrs->'language'))
+ AND always OR NOT EXISTS (SELECT 1 FROM config.metabib_field_ts_map WHERE metabib_field = NEW.field)
+ UNION
+ SELECT ts_config, index_weight
+ FROM config.metabib_field_ts_map
+ WHERE metabib_field = NEW.field
+ AND index_lang IS NULL OR EXISTS (SELECT 1 FROM metabib.record_attr WHERE id = NEW.source AND index_lang IN(attrs->'item_lang',attrs->'language'))
+ ORDER BY index_weight ASC
+ LOOP
+ IF cur_weight IS NOT NULL AND cur_weight != ts_rec.index_weight THEN
+ NEW.index_vector = NEW.index_vector || setweight(temp_vector::tsvector,cur_weight);
+ temp_vector = '';
+ END IF;
+ cur_weight = ts_rec.index_weight;
+ SELECT INTO temp_vector temp_vector || ' ' || to_tsvector(ts_rec.ts_config::regconfig, value)::TEXT;
+ END LOOP;
+ NEW.index_vector = NEW.index_vector || setweight(temp_vector::tsvector,cur_weight);
+ ELSE
+ NEW.index_vector = to_tsvector(TG_ARGV[0]::regconfig, value);
+ END IF;
+
+ RETURN NEW;
+END;
+$$ LANGUAGE PLPGSQL;
+
+CREATE TRIGGER authority_full_rec_fti_trigger
+ BEFORE UPDATE OR INSERT ON authority.full_rec
+ FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('keyword');
+
+CREATE TRIGGER authority_simple_heading_fti_trigger
+ BEFORE UPDATE OR INSERT ON authority.simple_heading
+ FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('keyword');
+
+CREATE TRIGGER metabib_identifier_field_entry_fti_trigger
+ BEFORE UPDATE OR INSERT ON metabib.identifier_field_entry
+ FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('identifier');
+
+CREATE TRIGGER metabib_title_field_entry_fti_trigger
+ BEFORE UPDATE OR INSERT ON metabib.title_field_entry
+ FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('title');
+
+CREATE TRIGGER metabib_author_field_entry_fti_trigger
+ BEFORE UPDATE OR INSERT ON metabib.author_field_entry
+ FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('author');
+
+CREATE TRIGGER metabib_subject_field_entry_fti_trigger
+ BEFORE UPDATE OR INSERT ON metabib.subject_field_entry
+ FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('subject');
+
+CREATE TRIGGER metabib_keyword_field_entry_fti_trigger
+ BEFORE UPDATE OR INSERT ON metabib.keyword_field_entry
+ FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('keyword');
+
+CREATE TRIGGER metabib_series_field_entry_fti_trigger
+ BEFORE UPDATE OR INSERT ON metabib.series_field_entry
+ FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('series');
+
+CREATE TRIGGER metabib_browse_entry_fti_trigger
+ BEFORE INSERT OR UPDATE ON metabib.browse_entry
+ FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('keyword');
+
+CREATE TRIGGER metabib_full_rec_fti_trigger
+ BEFORE UPDATE OR INSERT ON metabib.real_full_rec
+ FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('default');
+
+INSERT INTO config.ts_config_list(id, name) VALUES
+ ('simple','Non-Stemmed Simple'),
+ ('danish_nostop','Danish Stemmed'),
+ ('dutch_nostop','Dutch Stemmed'),
+ ('english_nostop','English Stemmed'),
+ ('finnish_nostop','Finnish Stemmed'),
+ ('french_nostop','French Stemmed'),
+ ('german_nostop','German Stemmed'),
+ ('hungarian_nostop','Hungarian Stemmed'),
+ ('italian_nostop','Italian Stemmed'),
+ ('norwegian_nostop','Norwegian Stemmed'),
+ ('portuguese_nostop','Portuguese Stemmed'),
+ ('romanian_nostop','Romanian Stemmed'),
+ ('russian_nostop','Russian Stemmed'),
+ ('spanish_nostop','Spanish Stemmed'),
+ ('swedish_nostop','Swedish Stemmed'),
+ ('turkish_nostop','Turkish Stemmed');
+
+INSERT INTO config.metabib_class_ts_map(field_class, ts_config, index_weight, always) VALUES
+ ('keyword','simple','A',true),
+ ('keyword','english_nostop','C',true),
+ ('title','simple','A',true),
+ ('title','english_nostop','C',true),
+ ('author','simple','A',true),
+ ('author','english_nostop','C',true),
+ ('series','simple','A',true),
+ ('series','english_nostop','C',true),
+ ('subject','simple','A',true),
+ ('subject','english_nostop','C',true),
+ ('identifier','simple','A',true);
+
+CREATE OR REPLACE FUNCTION evergreen.rel_bump(terms TEXT[], value TEXT, bumps TEXT[], mults NUMERIC[]) RETURNS NUMERIC AS
+$BODY$
+use strict;
+my ($terms,$value,$bumps,$mults) = @_;
+
+my $retval = 1;
+
+for (my $id = 0; $id < @$bumps; $id++) {
+ if ($bumps->[$id] eq 'first_word') {
+ $retval *= $mults->[$id] if ($value =~ /^$terms->[0]/);
+ } elsif ($bumps->[$id] eq 'full_match') {
+ my $fullmatch = join(' ', @$terms);
+ $retval *= $mults->[$id] if ($value =~ /^$fullmatch$/);
+ } elsif ($bumps->[$id] eq 'word_order') {
+ my $wordorder = join('.*', @$terms);
+ $retval *= $mults->[$id] if ($value =~ /$wordorder/);
+ }
+}
+return $retval;
+$BODY$ LANGUAGE plperlu IMMUTABLE STRICT COST 100;
+
+UPDATE metabib.identifier_field_entry set value = value;
+UPDATE metabib.title_field_entry set value = value;
+UPDATE metabib.author_field_entry set value = value;
+UPDATE metabib.subject_field_entry set value = value;
+UPDATE metabib.keyword_field_entry set value = value;
+UPDATE metabib.series_field_entry set value = value;
+
+SELECT metabib.update_combined_index_vectors(id)
+ FROM biblio.record_entry
+ WHERE NOT deleted;
+
+COMMIT;