CREATE TYPE metabib.field_entry_template AS (
field_class TEXT,
field INT,
+ facet_field BOOL,
+ search_field BOOL,
+ suggest_field BOOL,
source BIGINT,
value TEXT
);
+
CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( rid BIGINT, default_joiner TEXT ) RETURNS SETOF metabib.field_entry_template AS $func$
DECLARE
bib biblio.record_entry%ROWTYPE;
xml_node TEXT;
xml_node_list TEXT[];
facet_text TEXT;
+ suggest_text TEXT;
raw_text TEXT;
curr_text TEXT;
joiner TEXT := default_joiner; -- XXX will index defs supply a joiner?
-- Loop over the indexing entries
FOR idx IN SELECT * FROM config.metabib_field ORDER BY format LOOP
+ output_row.facet_field = idx.facet_field;
+ output_row.suggest_field = idx.suggest_field;
+ output_row.search_field = idx.search_field;
+
SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
-- See if we can skip the XSLT ... it's expensive
raw_text := COALESCE(raw_text,'') || curr_text;
+ -- autosuggest/metabib.browse_entry
+ IF idx.suggest_field THEN
+
+ IF idx.suggest_xpath IS NOT NULL AND idx.suggest_xpath <> '' THEN
+ suggest_text := oils_xpath_string( idx.suggest_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
+ ELSE
+ suggest_text := curr_text;
+ END IF;
+
+ output_row.field_class = idx.field_class;
+ output_row.field = idx.id;
+ output_row.source = rid;
+ output_row.value = BTRIM(REGEXP_REPLACE(suggest_text, E'\\s+', ' ', 'g'));
+
+ RETURN NEXT output_row;
+ END IF;
+
-- insert raw node text for faceting
IF idx.facet_field THEN
END LOOP;
END;
+
+$func$ LANGUAGE PLPGSQL;
+
+CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries( bib_id BIGINT ) RETURNS VOID AS $func$
+DECLARE
+ fclass RECORD;
+ ind_data metabib.field_entry_template%ROWTYPE;
+ mbe_row metabib.browse_entry%ROWTYPE;
+ mbe_id BIGINT;
+BEGIN
+ PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled;
+ IF NOT FOUND THEN
+ FOR fclass IN SELECT * FROM config.metabib_class LOOP
+ -- RAISE NOTICE 'Emptying out %', fclass.name;
+ EXECUTE $$DELETE FROM metabib.$$ || fclass.name || $$_field_entry WHERE source = $$ || bib_id;
+ END LOOP;
+ DELETE FROM metabib.facet_entry WHERE source = bib_id;
+ DELETE FROM metabib.browse_entry_def_map WHERE source = bib_id;
+ END IF;
+
+ FOR ind_data IN SELECT * FROM biblio.extract_metabib_field_entry( bib_id ) LOOP
+ IF ind_data.field < 0 THEN
+ ind_data.field = -1 * ind_data.field;
+ END IF;
+
+ IF ind_data.facet_field THEN
+ INSERT INTO metabib.facet_entry (field, source, value)
+ VALUES (ind_data.field, ind_data.source, ind_data.value);
+ END IF;
+
+ IF ind_data.suggest_field THEN
+ SELECT INTO mbe_row * FROM metabib.browse_entry WHERE value = ind_data.value;
+ IF FOUND THEN
+ mbe_id := mbe_row.id;
+ ELSE
+ INSERT INTO metabib.browse_entry (value) VALUES
+ (metabib.browse_normalize(ind_data.value, ind_data.field));
+ mbe_id := CURRVAL('metabib.browse_entry_id_seq'::REGCLASS);
+ END IF;
+
+ INSERT INTO metabib.browse_entry_def_map (entry, def, source)
+ VALUES (mbe_id, ind_data.field, ind_data.source);
+ END IF;
+
+ IF ind_data.search_field THEN
+ EXECUTE $$
+ INSERT INTO metabib.$$ || ind_data.field_class || $$_field_entry (field, source, value)
+ VALUES ($$ ||
+ quote_literal(ind_data.field) || $$, $$ ||
+ quote_literal(ind_data.source) || $$, $$ ||
+ quote_literal(ind_data.value) ||
+ $$);$$;
+ END IF;
+
+ END LOOP;
+
+ RETURN;
+END;
$func$ LANGUAGE PLPGSQL;
-- default to a space joiner
END;
$func$ LANGUAGE PLPGSQL;
-CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries( bib_id BIGINT ) RETURNS VOID AS $func$
-DECLARE
- fclass RECORD;
- ind_data metabib.field_entry_template%ROWTYPE;
-BEGIN
- PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled;
- IF NOT FOUND THEN
- FOR fclass IN SELECT * FROM config.metabib_class LOOP
- -- RAISE NOTICE 'Emptying out %', fclass.name;
- EXECUTE $$DELETE FROM metabib.$$ || fclass.name || $$_field_entry WHERE source = $$ || bib_id;
- END LOOP;
- DELETE FROM metabib.facet_entry WHERE source = bib_id;
- END IF;
-
- FOR ind_data IN SELECT * FROM biblio.extract_metabib_field_entry( bib_id ) LOOP
- IF ind_data.field < 0 THEN
- ind_data.field = -1 * ind_data.field;
- INSERT INTO metabib.facet_entry (field, source, value)
- VALUES (ind_data.field, ind_data.source, ind_data.value);
- ELSE
- EXECUTE $$
- INSERT INTO metabib.$$ || ind_data.field_class || $$_field_entry (field, source, value)
- VALUES ($$ ||
- quote_literal(ind_data.field) || $$, $$ ||
- quote_literal(ind_data.source) || $$, $$ ||
- quote_literal(ind_data.value) ||
- $$);$$;
- END IF;
-
- END LOOP;
-
- RETURN;
-END;
-$func$ LANGUAGE PLPGSQL;
-
CREATE OR REPLACE FUNCTION biblio.extract_located_uris( bib_id BIGINT, marcxml TEXT, editor_id INT ) RETURNS VOID AS $func$
DECLARE
uris TEXT[];
END;
$func$ LANGUAGE PLPGSQL;
+CREATE TABLE metabib.browse_entry (
+ id BIGSERIAL PRIMARY KEY,
+ value TEXT unique,
+ index_vector tsvector
+);
+
+CREATE TABLE metabib.browse_entry_def_map (
+ id BIGSERIAL PRIMARY KEY,
+ entry BIGINT REFERENCES metabib.browse_entry (id),
+ def INT REFERENCES config.metabib_field (id),
+ source BIGINT REFERENCES biblio.record_entry (id)
+);
+
+CREATE OR REPLACE FUNCTION metabib.browse_normalize(facet_text TEXT, mapped_field INT) RETURNS TEXT AS $$
+DECLARE
+ normalizer RECORD;
+BEGIN
+
+ FOR normalizer IN
+ SELECT n.func AS func,
+ n.param_count AS param_count,
+ m.params AS params
+ FROM config.index_normalizer n
+ JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
+ WHERE m.field = mapped_field AND m.pos < 0
+ ORDER BY m.pos LOOP
+
+ EXECUTE 'SELECT ' || normalizer.func || '(' ||
+ quote_literal( facet_text ) ||
+ CASE
+ WHEN normalizer.param_count > 0
+ THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
+ ELSE ''
+ END ||
+ ')' INTO facet_text;
+
+ END LOOP;
+
+ RETURN facet_text;
+END;
+
+$$ LANGUAGE PLPGSQL;
+
+CREATE TRIGGER metabib_browse_entry_fti_trigger
+ BEFORE INSERT OR UPDATE ON metabib.browse_entry
+ FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('default');
+
+
COMMIT;
--- /dev/null
+BEGIN;
+
+SELECT evergreen.upgrade_deps_block_check('YYYY', :eg_version);
+
+CREATE TABLE metabib.browse_entry (
+ id BIGSERIAL PRIMARY KEY,
+ value TEXT unique,
+ index_vector tsvector
+);
+
+CREATE TABLE metabib.browse_entry_def_map (
+ id BIGSERIAL PRIMARY KEY,
+ entry BIGINT REFERENCES metabib.browse_entry (id),
+ def INT REFERENCES config.metabib_field (id),
+ source BIGINT REFERENCES biblio.record_entry (id)
+);
+
+ALTER TABLE config.metabib_field ADD COLUMN suggest_field BOOLEAN DEFAULT TRUE NOT NULL;
+ALTER TABLE config.metabib_field ADD COLUMN suggest_xpath TEXT;
+
+CREATE OR REPLACE FUNCTION metabib.browse_normalize(facet_text TEXT, mapped_field INT) RETURNS TEXT AS $$
+DECLARE
+ normalizer RECORD;
+BEGIN
+
+ FOR normalizer IN
+ SELECT n.func AS func,
+ n.param_count AS param_count,
+ m.params AS params
+ FROM config.index_normalizer n
+ JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
+ WHERE m.field = mapped_field AND m.pos < 0
+ ORDER BY m.pos LOOP
+
+ EXECUTE 'SELECT ' || normalizer.func || '(' ||
+ quote_literal( facet_text ) ||
+ CASE
+ WHEN normalizer.param_count > 0
+ THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
+ ELSE ''
+ END ||
+ ')' INTO facet_text;
+
+ END LOOP;
+
+ RETURN facet_text;
+END;
+
+$$ LANGUAGE PLPGSQL;
+
+CREATE TRIGGER metabib_browse_entry_fti_trigger
+ BEFORE INSERT OR UPDATE ON metabib.browse_entry
+ FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('default');
+
+DROP FUNCTION biblio.extract_metabib_field_entry(bigint, text);
+DROP FUNCTION biblio.extract_metabib_field_entry(bigint);
+
+DROP TYPE metabib.field_entry_template;
+CREATE TYPE metabib.field_entry_template AS (
+ field_class TEXT,
+ field INT,
+ facet_field BOOL,
+ search_field BOOL,
+ suggest_field BOOL,
+ source BIGINT,
+ value TEXT
+);
+
+
+CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( rid BIGINT, default_joiner TEXT ) RETURNS SETOF metabib.field_entry_template AS $func$
+DECLARE
+ bib biblio.record_entry%ROWTYPE;
+ idx config.metabib_field%ROWTYPE;
+ xfrm config.xml_transform%ROWTYPE;
+ prev_xfrm TEXT;
+ transformed_xml TEXT;
+ xml_node TEXT;
+ xml_node_list TEXT[];
+ facet_text TEXT;
+ suggest_text TEXT;
+ raw_text TEXT;
+ curr_text TEXT;
+ joiner TEXT := default_joiner; -- XXX will index defs supply a joiner?
+ output_row metabib.field_entry_template%ROWTYPE;
+BEGIN
+
+ -- Get the record
+ SELECT INTO bib * FROM biblio.record_entry WHERE id = rid;
+
+ -- Loop over the indexing entries
+ FOR idx IN SELECT * FROM config.metabib_field ORDER BY format LOOP
+
+ output_row.facet_field = idx.facet_field;
+ output_row.suggest_field = idx.suggest_field;
+ output_row.search_field = idx.search_field;
+
+ SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
+
+ -- See if we can skip the XSLT ... it's expensive
+ IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
+ -- Can't skip the transform
+ IF xfrm.xslt <> '---' THEN
+ transformed_xml := oils_xslt_process(bib.marc,xfrm.xslt);
+ ELSE
+ transformed_xml := bib.marc;
+ END IF;
+
+ prev_xfrm := xfrm.name;
+ END IF;
+
+ xml_node_list := oils_xpath( idx.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
+
+ raw_text := NULL;
+ FOR xml_node IN SELECT x FROM unnest(xml_node_list) AS x LOOP
+ CONTINUE WHEN xml_node !~ E'^\\s*<';
+
+ curr_text := ARRAY_TO_STRING(
+ oils_xpath( '//text()',
+ REGEXP_REPLACE( -- This escapes all &s not followed by "amp;". Data ise returned from oils_xpath (above) in UTF-8, not entity encoded
+ REGEXP_REPLACE( -- This escapes embeded <s
+ xml_node,
+ $re$(>[^<]+)(<)([^>]+<)$re$,
+ E'\\1<\\3',
+ 'g'
+ ),
+ '&(?!amp;)',
+ '&',
+ 'g'
+ )
+ ),
+ ' '
+ );
+
+ CONTINUE WHEN curr_text IS NULL OR curr_text = '';
+
+ IF raw_text IS NOT NULL THEN
+ raw_text := raw_text || joiner;
+ END IF;
+
+ raw_text := COALESCE(raw_text,'') || curr_text;
+
+ -- autosuggest/metabib.browse_entry
+ IF idx.suggest_field THEN
+
+ IF idx.suggest_xpath IS NOT NULL AND idx.suggest_xpath <> '' THEN
+ suggest_text := oils_xpath_string( idx.suggest_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
+ ELSE
+ suggest_text := curr_text;
+ END IF;
+
+ output_row.field_class = idx.field_class;
+ output_row.field = idx.id;
+ output_row.source = rid;
+ output_row.value = BTRIM(REGEXP_REPLACE(suggest_text, E'\\s+', ' ', 'g'));
+
+ RETURN NEXT output_row;
+ END IF;
+
+ -- insert raw node text for faceting
+ IF idx.facet_field THEN
+
+ IF idx.facet_xpath IS NOT NULL AND idx.facet_xpath <> '' THEN
+ facet_text := oils_xpath_string( idx.facet_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
+ ELSE
+ facet_text := curr_text;
+ END IF;
+
+ output_row.field_class = idx.field_class;
+ output_row.field = -1 * idx.id;
+ output_row.source = rid;
+ output_row.value = BTRIM(REGEXP_REPLACE(facet_text, E'\\s+', ' ', 'g'));
+
+ RETURN NEXT output_row;
+ END IF;
+
+ END LOOP;
+
+ CONTINUE WHEN raw_text IS NULL OR raw_text = '';
+
+ -- insert combined node text for searching
+ IF idx.search_field THEN
+ output_row.field_class = idx.field_class;
+ output_row.field = idx.id;
+ output_row.source = rid;
+ output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g'));
+
+ RETURN NEXT output_row;
+ END IF;
+
+ END LOOP;
+
+END;
+$func$ LANGUAGE PLPGSQL;
+
+-- default to a space joiner
+CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( BIGINT ) RETURNS SETOF metabib.field_entry_template AS $func$
+ SELECT * FROM biblio.extract_metabib_field_entry($1, ' ');
+ $func$ LANGUAGE SQL;
+
+
+CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries( bib_id BIGINT ) RETURNS VOID AS $func$
+DECLARE
+ fclass RECORD;
+ ind_data metabib.field_entry_template%ROWTYPE;
+ mbe_row metabib.browse_entry%ROWTYPE;
+ mbe_id BIGINT;
+BEGIN
+ PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled;
+ IF NOT FOUND THEN
+ FOR fclass IN SELECT * FROM config.metabib_class LOOP
+ -- RAISE NOTICE 'Emptying out %', fclass.name;
+ EXECUTE $$DELETE FROM metabib.$$ || fclass.name || $$_field_entry WHERE source = $$ || bib_id;
+ END LOOP;
+ DELETE FROM metabib.facet_entry WHERE source = bib_id;
+ DELETE FROM metabib.browse_entry_def_map WHERE source = bib_id;
+ END IF;
+
+ FOR ind_data IN SELECT * FROM biblio.extract_metabib_field_entry( bib_id ) LOOP
+ IF ind_data.field < 0 THEN
+ ind_data.field = -1 * ind_data.field;
+ END IF;
+
+ IF ind_data.facet_field THEN
+ INSERT INTO metabib.facet_entry (field, source, value)
+ VALUES (ind_data.field, ind_data.source, ind_data.value);
+ END IF;
+
+ IF ind_data.suggest_field THEN
+ SELECT INTO mbe_row * FROM metabib.browse_entry WHERE value = ind_data.value;
+ IF FOUND THEN
+ mbe_id := mbe_row.id;
+ ELSE
+ INSERT INTO metabib.browse_entry (value) VALUES
+ (metabib.browse_normalize(ind_data.value, ind_data.field));
+ mbe_id := CURRVAL('metabib.browse_entry_id_seq'::REGCLASS);
+ END IF;
+
+ INSERT INTO metabib.browse_entry_def_map (entry, def, source)
+ VALUES (mbe_id, ind_data.field, ind_data.source);
+ END IF;
+
+ IF ind_data.search_field THEN
+ EXECUTE $$
+ INSERT INTO metabib.$$ || ind_data.field_class || $$_field_entry (field, source, value)
+ VALUES ($$ ||
+ quote_literal(ind_data.field) || $$, $$ ||
+ quote_literal(ind_data.source) || $$, $$ ||
+ quote_literal(ind_data.value) ||
+ $$);$$;
+ END IF;
+
+ END LOOP;
+
+ RETURN;
+END;
+$func$ LANGUAGE PLPGSQL;
+
+COMMIT;