END;
$f$ LANGUAGE PLPGSQL;
+CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( rid BIGINT, default_joiner TEXT ) RETURNS SETOF metabib.field_entry_template AS $func$
+DECLARE
+ bib biblio.record_entry%ROWTYPE;
+ idx config.metabib_field%ROWTYPE;
+ xfrm config.xml_transform%ROWTYPE;
+ prev_xfrm TEXT;
+ transformed_xml TEXT;
+ xml_node TEXT;
+ xml_node_list TEXT[];
+ facet_text TEXT;
+ browse_text TEXT;
+ raw_text TEXT;
+ curr_text TEXT;
+ joiner TEXT := default_joiner; -- XXX will index defs supply a joiner?
+ output_row metabib.field_entry_template%ROWTYPE;
+BEGIN
+
+ -- Start out with no field-use bools set
+ output_row.browse_field = FALSE;
+ output_row.facet_field = FALSE;
+ output_row.search_field = FALSE;
+
+ -- Get the record
+ SELECT INTO bib * FROM biblio.record_entry WHERE id = rid;
+
+ -- Loop over the indexing entries
+ FOR idx IN SELECT * FROM config.metabib_field ORDER BY format LOOP
+
+ SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
+
+ -- See if we can skip the XSLT ... it's expensive
+ IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
+ -- Can't skip the transform
+ IF xfrm.xslt <> '---' THEN
+ transformed_xml := oils_xslt_process(bib.marc,xfrm.xslt);
+ ELSE
+ transformed_xml := bib.marc;
+ END IF;
--- done! we hope...
+ prev_xfrm := xfrm.name;
+ END IF;
+
+ xml_node_list := oils_xpath( idx.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
+
+ raw_text := NULL;
+ FOR xml_node IN SELECT x FROM unnest(xml_node_list) AS x LOOP
+ CONTINUE WHEN xml_node !~ E'^\\s*<';
+
+ curr_text := ARRAY_TO_STRING(
+ oils_xpath( '//text()',
+ REGEXP_REPLACE( -- This escapes all &s not followed by "amp;". Data ise returned from oils_xpath (above) in UTF-8, not entity encoded
+ REGEXP_REPLACE( -- This escapes embeded <s
+ xml_node,
+ $re$(>[^<]+)(<)([^>]+<)$re$,
+ E'\\1<\\3',
+ 'g'
+ ),
+ '&(?!amp;)',
+ '&',
+ 'g'
+ )
+ ),
+ ' '
+ );
+
+ CONTINUE WHEN curr_text IS NULL OR curr_text = '';
+
+ IF raw_text IS NOT NULL THEN
+ raw_text := raw_text || joiner;
+ END IF;
+
+ raw_text := COALESCE(raw_text,'') || curr_text;
+
+ -- autosuggest/metabib.browse_entry
+ IF idx.browse_field THEN
+
+ IF idx.browse_xpath IS NOT NULL AND idx.browse_xpath <> '' THEN
+ browse_text := oils_xpath_string( idx.browse_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
+ ELSE
+ browse_text := curr_text;
+ END IF;
+
+ output_row.field_class = idx.field_class;
+ output_row.field = idx.id;
+ output_row.source = rid;
+ output_row.value = BTRIM(REGEXP_REPLACE(browse_text, E'\\s+', ' ', 'g'));
+
+ output_row.browse_field = TRUE;
+ RETURN NEXT output_row;
+ output_row.browse_field = FALSE;
+ END IF;
+
+ -- insert raw node text for faceting
+ IF idx.facet_field THEN
+
+ IF idx.facet_xpath IS NOT NULL AND idx.facet_xpath <> '' THEN
+ facet_text := oils_xpath_string( idx.facet_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
+ ELSE
+ facet_text := curr_text;
+ END IF;
+
+ output_row.field_class = idx.field_class;
+ output_row.field = -1 * idx.id;
+ output_row.source = rid;
+ output_row.value = BTRIM(REGEXP_REPLACE(facet_text, E'\\s+', ' ', 'g'));
+
+ output_row.facet_field = TRUE;
+ RETURN NEXT output_row;
+ output_row.facet_field = FALSE;
+ END IF;
+
+ END LOOP;
+
+ CONTINUE WHEN raw_text IS NULL OR raw_text = '';
+
+ -- insert combined node text for searching
+ IF idx.search_field THEN
+ output_row.field_class = idx.field_class;
+ output_row.field = idx.id;
+ output_row.source = rid;
+ output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g'));
+
+ output_row.search_field = TRUE;
+ RETURN NEXT output_row;
+ output_row.search_field = FALSE;
+ END IF;
+
+ END LOOP;
+
+END;
+
+$func$ LANGUAGE PLPGSQL;
COMMIT;
DROP FUNCTION IF EXISTS evergreen.oils_tsearch2() CASCADE;
DROP FUNCTION IF EXISTS public.oils_tsearch2() CASCADE;
+-- commit 1d05b0bc14b4e91 - bring back split brain indexing
CREATE OR REPLACE FUNCTION public.oils_tsearch2 () RETURNS TRIGGER AS $$
DECLARE
normalizer RECORD;
ts_rec RECORD;
cur_weight "char";
BEGIN
+
value := NEW.value;
NEW.index_vector = ''::tsvector;
m.params AS params
FROM config.index_normalizer n
JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
- WHERE field = NEW.field
+ WHERE field = NEW.field AND m.pos < 0
ORDER BY m.pos LOOP
EXECUTE 'SELECT ' || normalizer.func || '(' ||
quote_literal( value ) ||
')' INTO value;
END LOOP;
+
NEW.value = value;
- END IF;
+
+ FOR normalizer IN
+ SELECT n.func AS func,
+ n.param_count AS param_count,
+ m.params AS params
+ FROM config.index_normalizer n
+ JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
+ WHERE field = NEW.field AND m.pos >= 0
+ ORDER BY m.pos LOOP
+ EXECUTE 'SELECT ' || normalizer.func || '(' ||
+ quote_literal( value ) ||
+ CASE
+ WHEN normalizer.param_count > 0
+ THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
+ ELSE ''
+ END ||
+ ')' INTO value;
+
+ END LOOP;
+ END IF;
IF TG_TABLE_NAME::TEXT ~ 'browse_entry$' THEN
value := ARRAY_TO_STRING(
INSERT INTO config.internal_flag (name) VALUES ('ingest.skip_search_indexing');
INSERT INTO config.internal_flag (name) VALUES ('ingest.skip_facet_indexing');
+-- commit 8d772605b30b - reduce index bloat but keep search granularity
CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries( bib_id BIGINT, skip_facet BOOL DEFAULT FALSE, skip_browse BOOL DEFAULT FALSE, skip_search BOOL DEFAULT FALSE ) RETURNS VOID AS $func$
DECLARE
fclass RECORD;
VALUES (mbe_id, ind_data.field, ind_data.source);
END IF;
- IF ind_data.search_field AND NOT b_skip_search THEN
- EXECUTE $$
+ -- Avoid inserting duplicate rows, but retain granularity of being
+ -- able to search browse fields with "starts with" type operators
+ -- (for example, for titles of songs in music albums)
+ IF (ind_data.search_field OR ind_data.browse_field) AND NOT b_skip_search THEN
+ EXECUTE 'SELECT 1 FROM metabib.' || ind_data.field_class ||
+ '_field_entry WHERE field = $1 source = $2 AND value = $3'
+ INTO mbe_id USING ind_data.field, ind_data.source, ind_data.value;
+ -- RAISE NOTICE 'Search for an already matching row returned %', mbe_id;
+ IF mbe_id IS NULL THEN
+ EXECUTE $$
INSERT INTO metabib.$$ || ind_data.field_class || $$_field_entry (field, source, value)
VALUES ($$ ||
quote_literal(ind_data.field) || $$, $$ ||
quote_literal(ind_data.source) || $$, $$ ||
quote_literal(ind_data.value) ||
$$);$$;
+ END IF;
END IF;
END LOOP;
END;
$func$ LANGUAGE PLPGSQL;
-
SELECT evergreen.upgrade_deps_block_check('0763', :eg_version);
INSERT INTO config.org_unit_setting_type (