From: Chris Sharp Date: Thu, 23 Sep 2021 16:02:56 +0000 (-0400) Subject: LP#1864516: stamp DB upgrade script X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=921159054219dab0c39261e200a6c4bc911e7355;p=Evergreen.git LP#1864516: stamp DB upgrade script Signed-off-by: Chris Sharp --- diff --git a/Open-ILS/src/sql/Pg/002.schema.config.sql b/Open-ILS/src/sql/Pg/002.schema.config.sql index 7d78804f36..6339ed5ccc 100644 --- a/Open-ILS/src/sql/Pg/002.schema.config.sql +++ b/Open-ILS/src/sql/Pg/002.schema.config.sql @@ -92,7 +92,7 @@ CREATE TRIGGER no_overlapping_deps BEFORE INSERT OR UPDATE ON config.db_patch_dependencies FOR EACH ROW EXECUTE PROCEDURE evergreen.array_overlap_check ('deprecates'); -INSERT INTO config.upgrade_log (version, applied_to) VALUES ('1297', :eg_version); -- dbriem / gmonti / gmcharlt +INSERT INTO config.upgrade_log (version, applied_to) VALUES ('1298', :eg_version); -- miker / ehardy / csharp CREATE TABLE config.bib_source ( id SERIAL PRIMARY KEY, diff --git a/Open-ILS/src/sql/Pg/upgrade/1298.schema.case-insensitive-browse-field-match.sql b/Open-ILS/src/sql/Pg/upgrade/1298.schema.case-insensitive-browse-field-match.sql new file mode 100644 index 0000000000..5f2aeb4d6c --- /dev/null +++ b/Open-ILS/src/sql/Pg/upgrade/1298.schema.case-insensitive-browse-field-match.sql @@ -0,0 +1,355 @@ +BEGIN; + +SELECT evergreen.upgrade_deps_block_check('1298', :eg_version); + +ALTER TYPE metabib.field_entry_template ADD ATTRIBUTE browse_nocase BOOL CASCADE; + +ALTER TABLE config.metabib_field ADD COLUMN browse_nocase BOOL NOT NULL DEFAULT FALSE; + +CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( + rid BIGINT, + default_joiner TEXT, + field_types TEXT[], + only_fields INT[] +) RETURNS SETOF metabib.field_entry_template AS $func$ +DECLARE + bib biblio.record_entry%ROWTYPE; + idx config.metabib_field%ROWTYPE; + xfrm config.xml_transform%ROWTYPE; + prev_xfrm TEXT; + transformed_xml TEXT; + xml_node TEXT; + xml_node_list TEXT[]; + facet_text TEXT; + display_text TEXT; + browse_text TEXT; + sort_value TEXT; + raw_text TEXT; + curr_text TEXT; + joiner TEXT := default_joiner; -- XXX will index defs supply a joiner? + authority_text TEXT; + authority_link BIGINT; + output_row metabib.field_entry_template%ROWTYPE; + process_idx BOOL; +BEGIN + + -- Start out with no field-use bools set + output_row.browse_nocase = FALSE; + output_row.browse_field = FALSE; + output_row.facet_field = FALSE; + output_row.display_field = FALSE; + output_row.search_field = FALSE; + + -- Get the record + SELECT INTO bib * FROM biblio.record_entry WHERE id = rid; + + -- Loop over the indexing entries + FOR idx IN SELECT * FROM config.metabib_field WHERE id = ANY (only_fields) ORDER BY format LOOP + CONTINUE WHEN idx.xpath IS NULL OR idx.xpath = ''; -- pure virtual field + + process_idx := FALSE; + IF idx.display_field AND 'display' = ANY (field_types) THEN process_idx = TRUE; END IF; + IF idx.browse_field AND 'browse' = ANY (field_types) THEN process_idx = TRUE; END IF; + IF idx.search_field AND 'search' = ANY (field_types) THEN process_idx = TRUE; END IF; + IF idx.facet_field AND 'facet' = ANY (field_types) THEN process_idx = TRUE; END IF; + CONTINUE WHEN process_idx = FALSE; -- disabled for all types + + joiner := COALESCE(idx.joiner, default_joiner); + + SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format; + + -- See if we can skip the XSLT ... it's expensive + IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN + -- Can't skip the transform + IF xfrm.xslt <> '---' THEN + transformed_xml := oils_xslt_process(bib.marc,xfrm.xslt); + ELSE + transformed_xml := bib.marc; + END IF; + + prev_xfrm := xfrm.name; + END IF; + + xml_node_list := oils_xpath( idx.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] ); + + raw_text := NULL; + FOR xml_node IN SELECT x FROM unnest(xml_node_list) AS x LOOP + CONTINUE WHEN xml_node !~ E'^\\s*<'; + + -- XXX much of this should be moved into oils_xpath_string... + curr_text := ARRAY_TO_STRING(evergreen.array_remove_item_by_value(evergreen.array_remove_item_by_value( + oils_xpath( '//text()', -- get the content of all the nodes within the main selected node + REGEXP_REPLACE( xml_node, E'\\s+', ' ', 'g' ) -- Translate adjacent whitespace to a single space + ), ' '), ''), -- throw away morally empty (bankrupt?) strings + joiner + ); + + CONTINUE WHEN curr_text IS NULL OR curr_text = ''; + + IF raw_text IS NOT NULL THEN + raw_text := raw_text || joiner; + END IF; + + raw_text := COALESCE(raw_text,'') || curr_text; + + -- autosuggest/metabib.browse_entry + IF idx.browse_field THEN + output_row.browse_nocase = idx.browse_nocase; + + IF idx.browse_xpath IS NOT NULL AND idx.browse_xpath <> '' THEN + browse_text := oils_xpath_string( idx.browse_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] ); + ELSE + browse_text := curr_text; + END IF; + + IF idx.browse_sort_xpath IS NOT NULL AND + idx.browse_sort_xpath <> '' THEN + + sort_value := oils_xpath_string( + idx.browse_sort_xpath, xml_node, joiner, + ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] + ); + ELSE + sort_value := browse_text; + END IF; + + output_row.field_class = idx.field_class; + output_row.field = idx.id; + output_row.source = rid; + output_row.value = BTRIM(REGEXP_REPLACE(browse_text, E'\\s+', ' ', 'g')); + output_row.sort_value := + public.naco_normalize(sort_value); + + output_row.authority := NULL; + + IF idx.authority_xpath IS NOT NULL AND idx.authority_xpath <> '' THEN + authority_text := oils_xpath_string( + idx.authority_xpath, xml_node, joiner, + ARRAY[ + ARRAY[xfrm.prefix, xfrm.namespace_uri], + ARRAY['xlink','http://www.w3.org/1999/xlink'] + ] + ); + + IF authority_text ~ '^\d+$' THEN + authority_link := authority_text::BIGINT; + PERFORM * FROM authority.record_entry WHERE id = authority_link; + IF FOUND THEN + output_row.authority := authority_link; + END IF; + END IF; + + END IF; + + output_row.browse_field = TRUE; + -- Returning browse rows with search_field = true for search+browse + -- configs allows us to retain granularity of being able to search + -- browse fields with "starts with" type operators (for example, for + -- titles of songs in music albums) + IF idx.search_field THEN + output_row.search_field = TRUE; + END IF; + RETURN NEXT output_row; + output_row.browse_nocase = FALSE; + output_row.browse_field = FALSE; + output_row.search_field = FALSE; + output_row.sort_value := NULL; + END IF; + + -- insert raw node text for faceting + IF idx.facet_field THEN + + IF idx.facet_xpath IS NOT NULL AND idx.facet_xpath <> '' THEN + facet_text := oils_xpath_string( idx.facet_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] ); + ELSE + facet_text := curr_text; + END IF; + + output_row.field_class = idx.field_class; + output_row.field = -1 * idx.id; + output_row.source = rid; + output_row.value = BTRIM(REGEXP_REPLACE(facet_text, E'\\s+', ' ', 'g')); + + output_row.facet_field = TRUE; + RETURN NEXT output_row; + output_row.facet_field = FALSE; + END IF; + + -- insert raw node text for display + IF idx.display_field THEN + + IF idx.display_xpath IS NOT NULL AND idx.display_xpath <> '' THEN + display_text := oils_xpath_string( idx.display_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] ); + ELSE + display_text := curr_text; + END IF; + + output_row.field_class = idx.field_class; + output_row.field = -1 * idx.id; + output_row.source = rid; + output_row.value = BTRIM(REGEXP_REPLACE(display_text, E'\\s+', ' ', 'g')); + + output_row.display_field = TRUE; + RETURN NEXT output_row; + output_row.display_field = FALSE; + END IF; + + END LOOP; + + CONTINUE WHEN raw_text IS NULL OR raw_text = ''; + + -- insert combined node text for searching + IF idx.search_field THEN + output_row.field_class = idx.field_class; + output_row.field = idx.id; + output_row.source = rid; + output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g')); + + output_row.search_field = TRUE; + RETURN NEXT output_row; + output_row.search_field = FALSE; + END IF; + + END LOOP; + +END; +$func$ LANGUAGE PLPGSQL; + +CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries( + bib_id BIGINT, + skip_facet BOOL DEFAULT FALSE, + skip_display BOOL DEFAULT FALSE, + skip_browse BOOL DEFAULT FALSE, + skip_search BOOL DEFAULT FALSE, + only_fields INT[] DEFAULT '{}'::INT[] +) RETURNS VOID AS $func$ +DECLARE + fclass RECORD; + ind_data metabib.field_entry_template%ROWTYPE; + mbe_row metabib.browse_entry%ROWTYPE; + mbe_id BIGINT; + b_skip_facet BOOL; + b_skip_display BOOL; + b_skip_browse BOOL; + b_skip_search BOOL; + value_prepped TEXT; + field_list INT[] := only_fields; + field_types TEXT[] := '{}'::TEXT[]; +BEGIN + + IF field_list = '{}'::INT[] THEN + SELECT ARRAY_AGG(id) INTO field_list FROM config.metabib_field; + END IF; + + SELECT COALESCE(NULLIF(skip_facet, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_facet_indexing' AND enabled)) INTO b_skip_facet; + SELECT COALESCE(NULLIF(skip_display, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_display_indexing' AND enabled)) INTO b_skip_display; + SELECT COALESCE(NULLIF(skip_browse, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_browse_indexing' AND enabled)) INTO b_skip_browse; + SELECT COALESCE(NULLIF(skip_search, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_search_indexing' AND enabled)) INTO b_skip_search; + + IF NOT b_skip_facet THEN field_types := field_types || '{facet}'; END IF; + IF NOT b_skip_display THEN field_types := field_types || '{display}'; END IF; + IF NOT b_skip_browse THEN field_types := field_types || '{browse}'; END IF; + IF NOT b_skip_search THEN field_types := field_types || '{search}'; END IF; + + PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled; + IF NOT FOUND THEN + IF NOT b_skip_search THEN + FOR fclass IN SELECT * FROM config.metabib_class LOOP + -- RAISE NOTICE 'Emptying out %', fclass.name; + EXECUTE $$DELETE FROM metabib.$$ || fclass.name || $$_field_entry WHERE source = $$ || bib_id; + END LOOP; + END IF; + IF NOT b_skip_facet THEN + DELETE FROM metabib.facet_entry WHERE source = bib_id; + END IF; + IF NOT b_skip_display THEN + DELETE FROM metabib.display_entry WHERE source = bib_id; + END IF; + IF NOT b_skip_browse THEN + DELETE FROM metabib.browse_entry_def_map WHERE source = bib_id; + END IF; + END IF; + + FOR ind_data IN SELECT * FROM biblio.extract_metabib_field_entry( bib_id, ' ', field_types, field_list ) LOOP + + -- don't store what has been normalized away + CONTINUE WHEN ind_data.value IS NULL; + + IF ind_data.field < 0 THEN + ind_data.field = -1 * ind_data.field; + END IF; + + IF ind_data.facet_field AND NOT b_skip_facet THEN + INSERT INTO metabib.facet_entry (field, source, value) + VALUES (ind_data.field, ind_data.source, ind_data.value); + END IF; + + IF ind_data.display_field AND NOT b_skip_display THEN + INSERT INTO metabib.display_entry (field, source, value) + VALUES (ind_data.field, ind_data.source, ind_data.value); + END IF; + + + IF ind_data.browse_field AND NOT b_skip_browse THEN + -- A caveat about this SELECT: this should take care of replacing + -- old mbe rows when data changes, but not if normalization (by + -- which I mean specifically the output of + -- evergreen.oils_tsearch2()) changes. It may or may not be + -- expensive to add a comparison of index_vector to index_vector + -- to the WHERE clause below. + + CONTINUE WHEN ind_data.sort_value IS NULL; + + value_prepped := metabib.browse_normalize(ind_data.value, ind_data.field); + IF ind_data.browse_nocase THEN + SELECT INTO mbe_row * FROM metabib.browse_entry + WHERE evergreen.lowercase(value) = evergreen.lowercase(value_prepped) AND sort_value = ind_data.sort_value + ORDER BY sort_value, value LIMIT 1; -- gotta pick something, I guess + ELSE + SELECT INTO mbe_row * FROM metabib.browse_entry + WHERE value = value_prepped AND sort_value = ind_data.sort_value; + END IF; + + IF FOUND THEN + mbe_id := mbe_row.id; + ELSE + INSERT INTO metabib.browse_entry + ( value, sort_value ) VALUES + ( value_prepped, ind_data.sort_value ); + + mbe_id := CURRVAL('metabib.browse_entry_id_seq'::REGCLASS); + END IF; + + INSERT INTO metabib.browse_entry_def_map (entry, def, source, authority) + VALUES (mbe_id, ind_data.field, ind_data.source, ind_data.authority); + END IF; + + IF ind_data.search_field AND NOT b_skip_search THEN + -- Avoid inserting duplicate rows + EXECUTE 'SELECT 1 FROM metabib.' || ind_data.field_class || + '_field_entry WHERE field = $1 AND source = $2 AND value = $3' + INTO mbe_id USING ind_data.field, ind_data.source, ind_data.value; + -- RAISE NOTICE 'Search for an already matching row returned %', mbe_id; + IF mbe_id IS NULL THEN + EXECUTE $$ + INSERT INTO metabib.$$ || ind_data.field_class || $$_field_entry (field, source, value) + VALUES ($$ || + quote_literal(ind_data.field) || $$, $$ || + quote_literal(ind_data.source) || $$, $$ || + quote_literal(ind_data.value) || + $$);$$; + END IF; + END IF; + + END LOOP; + + IF NOT b_skip_search THEN + PERFORM metabib.update_combined_index_vectors(bib_id); + END IF; + + RETURN; +END; +$func$ LANGUAGE PLPGSQL; + +COMMIT; + diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.case-insensitive-browse-field-match.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.case-insensitive-browse-field-match.sql deleted file mode 100644 index de482c3196..0000000000 --- a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.case-insensitive-browse-field-match.sql +++ /dev/null @@ -1,353 +0,0 @@ -BEGIN; - -ALTER TYPE metabib.field_entry_template ADD ATTRIBUTE browse_nocase BOOL CASCADE; - -ALTER TABLE config.metabib_field ADD COLUMN browse_nocase BOOL NOT NULL DEFAULT FALSE; - -CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( - rid BIGINT, - default_joiner TEXT, - field_types TEXT[], - only_fields INT[] -) RETURNS SETOF metabib.field_entry_template AS $func$ -DECLARE - bib biblio.record_entry%ROWTYPE; - idx config.metabib_field%ROWTYPE; - xfrm config.xml_transform%ROWTYPE; - prev_xfrm TEXT; - transformed_xml TEXT; - xml_node TEXT; - xml_node_list TEXT[]; - facet_text TEXT; - display_text TEXT; - browse_text TEXT; - sort_value TEXT; - raw_text TEXT; - curr_text TEXT; - joiner TEXT := default_joiner; -- XXX will index defs supply a joiner? - authority_text TEXT; - authority_link BIGINT; - output_row metabib.field_entry_template%ROWTYPE; - process_idx BOOL; -BEGIN - - -- Start out with no field-use bools set - output_row.browse_nocase = FALSE; - output_row.browse_field = FALSE; - output_row.facet_field = FALSE; - output_row.display_field = FALSE; - output_row.search_field = FALSE; - - -- Get the record - SELECT INTO bib * FROM biblio.record_entry WHERE id = rid; - - -- Loop over the indexing entries - FOR idx IN SELECT * FROM config.metabib_field WHERE id = ANY (only_fields) ORDER BY format LOOP - CONTINUE WHEN idx.xpath IS NULL OR idx.xpath = ''; -- pure virtual field - - process_idx := FALSE; - IF idx.display_field AND 'display' = ANY (field_types) THEN process_idx = TRUE; END IF; - IF idx.browse_field AND 'browse' = ANY (field_types) THEN process_idx = TRUE; END IF; - IF idx.search_field AND 'search' = ANY (field_types) THEN process_idx = TRUE; END IF; - IF idx.facet_field AND 'facet' = ANY (field_types) THEN process_idx = TRUE; END IF; - CONTINUE WHEN process_idx = FALSE; -- disabled for all types - - joiner := COALESCE(idx.joiner, default_joiner); - - SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format; - - -- See if we can skip the XSLT ... it's expensive - IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN - -- Can't skip the transform - IF xfrm.xslt <> '---' THEN - transformed_xml := oils_xslt_process(bib.marc,xfrm.xslt); - ELSE - transformed_xml := bib.marc; - END IF; - - prev_xfrm := xfrm.name; - END IF; - - xml_node_list := oils_xpath( idx.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] ); - - raw_text := NULL; - FOR xml_node IN SELECT x FROM unnest(xml_node_list) AS x LOOP - CONTINUE WHEN xml_node !~ E'^\\s*<'; - - -- XXX much of this should be moved into oils_xpath_string... - curr_text := ARRAY_TO_STRING(evergreen.array_remove_item_by_value(evergreen.array_remove_item_by_value( - oils_xpath( '//text()', -- get the content of all the nodes within the main selected node - REGEXP_REPLACE( xml_node, E'\\s+', ' ', 'g' ) -- Translate adjacent whitespace to a single space - ), ' '), ''), -- throw away morally empty (bankrupt?) strings - joiner - ); - - CONTINUE WHEN curr_text IS NULL OR curr_text = ''; - - IF raw_text IS NOT NULL THEN - raw_text := raw_text || joiner; - END IF; - - raw_text := COALESCE(raw_text,'') || curr_text; - - -- autosuggest/metabib.browse_entry - IF idx.browse_field THEN - output_row.browse_nocase = idx.browse_nocase; - - IF idx.browse_xpath IS NOT NULL AND idx.browse_xpath <> '' THEN - browse_text := oils_xpath_string( idx.browse_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] ); - ELSE - browse_text := curr_text; - END IF; - - IF idx.browse_sort_xpath IS NOT NULL AND - idx.browse_sort_xpath <> '' THEN - - sort_value := oils_xpath_string( - idx.browse_sort_xpath, xml_node, joiner, - ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] - ); - ELSE - sort_value := browse_text; - END IF; - - output_row.field_class = idx.field_class; - output_row.field = idx.id; - output_row.source = rid; - output_row.value = BTRIM(REGEXP_REPLACE(browse_text, E'\\s+', ' ', 'g')); - output_row.sort_value := - public.naco_normalize(sort_value); - - output_row.authority := NULL; - - IF idx.authority_xpath IS NOT NULL AND idx.authority_xpath <> '' THEN - authority_text := oils_xpath_string( - idx.authority_xpath, xml_node, joiner, - ARRAY[ - ARRAY[xfrm.prefix, xfrm.namespace_uri], - ARRAY['xlink','http://www.w3.org/1999/xlink'] - ] - ); - - IF authority_text ~ '^\d+$' THEN - authority_link := authority_text::BIGINT; - PERFORM * FROM authority.record_entry WHERE id = authority_link; - IF FOUND THEN - output_row.authority := authority_link; - END IF; - END IF; - - END IF; - - output_row.browse_field = TRUE; - -- Returning browse rows with search_field = true for search+browse - -- configs allows us to retain granularity of being able to search - -- browse fields with "starts with" type operators (for example, for - -- titles of songs in music albums) - IF idx.search_field THEN - output_row.search_field = TRUE; - END IF; - RETURN NEXT output_row; - output_row.browse_nocase = FALSE; - output_row.browse_field = FALSE; - output_row.search_field = FALSE; - output_row.sort_value := NULL; - END IF; - - -- insert raw node text for faceting - IF idx.facet_field THEN - - IF idx.facet_xpath IS NOT NULL AND idx.facet_xpath <> '' THEN - facet_text := oils_xpath_string( idx.facet_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] ); - ELSE - facet_text := curr_text; - END IF; - - output_row.field_class = idx.field_class; - output_row.field = -1 * idx.id; - output_row.source = rid; - output_row.value = BTRIM(REGEXP_REPLACE(facet_text, E'\\s+', ' ', 'g')); - - output_row.facet_field = TRUE; - RETURN NEXT output_row; - output_row.facet_field = FALSE; - END IF; - - -- insert raw node text for display - IF idx.display_field THEN - - IF idx.display_xpath IS NOT NULL AND idx.display_xpath <> '' THEN - display_text := oils_xpath_string( idx.display_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] ); - ELSE - display_text := curr_text; - END IF; - - output_row.field_class = idx.field_class; - output_row.field = -1 * idx.id; - output_row.source = rid; - output_row.value = BTRIM(REGEXP_REPLACE(display_text, E'\\s+', ' ', 'g')); - - output_row.display_field = TRUE; - RETURN NEXT output_row; - output_row.display_field = FALSE; - END IF; - - END LOOP; - - CONTINUE WHEN raw_text IS NULL OR raw_text = ''; - - -- insert combined node text for searching - IF idx.search_field THEN - output_row.field_class = idx.field_class; - output_row.field = idx.id; - output_row.source = rid; - output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g')); - - output_row.search_field = TRUE; - RETURN NEXT output_row; - output_row.search_field = FALSE; - END IF; - - END LOOP; - -END; -$func$ LANGUAGE PLPGSQL; - -CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries( - bib_id BIGINT, - skip_facet BOOL DEFAULT FALSE, - skip_display BOOL DEFAULT FALSE, - skip_browse BOOL DEFAULT FALSE, - skip_search BOOL DEFAULT FALSE, - only_fields INT[] DEFAULT '{}'::INT[] -) RETURNS VOID AS $func$ -DECLARE - fclass RECORD; - ind_data metabib.field_entry_template%ROWTYPE; - mbe_row metabib.browse_entry%ROWTYPE; - mbe_id BIGINT; - b_skip_facet BOOL; - b_skip_display BOOL; - b_skip_browse BOOL; - b_skip_search BOOL; - value_prepped TEXT; - field_list INT[] := only_fields; - field_types TEXT[] := '{}'::TEXT[]; -BEGIN - - IF field_list = '{}'::INT[] THEN - SELECT ARRAY_AGG(id) INTO field_list FROM config.metabib_field; - END IF; - - SELECT COALESCE(NULLIF(skip_facet, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_facet_indexing' AND enabled)) INTO b_skip_facet; - SELECT COALESCE(NULLIF(skip_display, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_display_indexing' AND enabled)) INTO b_skip_display; - SELECT COALESCE(NULLIF(skip_browse, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_browse_indexing' AND enabled)) INTO b_skip_browse; - SELECT COALESCE(NULLIF(skip_search, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_search_indexing' AND enabled)) INTO b_skip_search; - - IF NOT b_skip_facet THEN field_types := field_types || '{facet}'; END IF; - IF NOT b_skip_display THEN field_types := field_types || '{display}'; END IF; - IF NOT b_skip_browse THEN field_types := field_types || '{browse}'; END IF; - IF NOT b_skip_search THEN field_types := field_types || '{search}'; END IF; - - PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled; - IF NOT FOUND THEN - IF NOT b_skip_search THEN - FOR fclass IN SELECT * FROM config.metabib_class LOOP - -- RAISE NOTICE 'Emptying out %', fclass.name; - EXECUTE $$DELETE FROM metabib.$$ || fclass.name || $$_field_entry WHERE source = $$ || bib_id; - END LOOP; - END IF; - IF NOT b_skip_facet THEN - DELETE FROM metabib.facet_entry WHERE source = bib_id; - END IF; - IF NOT b_skip_display THEN - DELETE FROM metabib.display_entry WHERE source = bib_id; - END IF; - IF NOT b_skip_browse THEN - DELETE FROM metabib.browse_entry_def_map WHERE source = bib_id; - END IF; - END IF; - - FOR ind_data IN SELECT * FROM biblio.extract_metabib_field_entry( bib_id, ' ', field_types, field_list ) LOOP - - -- don't store what has been normalized away - CONTINUE WHEN ind_data.value IS NULL; - - IF ind_data.field < 0 THEN - ind_data.field = -1 * ind_data.field; - END IF; - - IF ind_data.facet_field AND NOT b_skip_facet THEN - INSERT INTO metabib.facet_entry (field, source, value) - VALUES (ind_data.field, ind_data.source, ind_data.value); - END IF; - - IF ind_data.display_field AND NOT b_skip_display THEN - INSERT INTO metabib.display_entry (field, source, value) - VALUES (ind_data.field, ind_data.source, ind_data.value); - END IF; - - - IF ind_data.browse_field AND NOT b_skip_browse THEN - -- A caveat about this SELECT: this should take care of replacing - -- old mbe rows when data changes, but not if normalization (by - -- which I mean specifically the output of - -- evergreen.oils_tsearch2()) changes. It may or may not be - -- expensive to add a comparison of index_vector to index_vector - -- to the WHERE clause below. - - CONTINUE WHEN ind_data.sort_value IS NULL; - - value_prepped := metabib.browse_normalize(ind_data.value, ind_data.field); - IF ind_data.browse_nocase THEN - SELECT INTO mbe_row * FROM metabib.browse_entry - WHERE evergreen.lowercase(value) = evergreen.lowercase(value_prepped) AND sort_value = ind_data.sort_value - ORDER BY sort_value, value LIMIT 1; -- gotta pick something, I guess - ELSE - SELECT INTO mbe_row * FROM metabib.browse_entry - WHERE value = value_prepped AND sort_value = ind_data.sort_value; - END IF; - - IF FOUND THEN - mbe_id := mbe_row.id; - ELSE - INSERT INTO metabib.browse_entry - ( value, sort_value ) VALUES - ( value_prepped, ind_data.sort_value ); - - mbe_id := CURRVAL('metabib.browse_entry_id_seq'::REGCLASS); - END IF; - - INSERT INTO metabib.browse_entry_def_map (entry, def, source, authority) - VALUES (mbe_id, ind_data.field, ind_data.source, ind_data.authority); - END IF; - - IF ind_data.search_field AND NOT b_skip_search THEN - -- Avoid inserting duplicate rows - EXECUTE 'SELECT 1 FROM metabib.' || ind_data.field_class || - '_field_entry WHERE field = $1 AND source = $2 AND value = $3' - INTO mbe_id USING ind_data.field, ind_data.source, ind_data.value; - -- RAISE NOTICE 'Search for an already matching row returned %', mbe_id; - IF mbe_id IS NULL THEN - EXECUTE $$ - INSERT INTO metabib.$$ || ind_data.field_class || $$_field_entry (field, source, value) - VALUES ($$ || - quote_literal(ind_data.field) || $$, $$ || - quote_literal(ind_data.source) || $$, $$ || - quote_literal(ind_data.value) || - $$);$$; - END IF; - END IF; - - END LOOP; - - IF NOT b_skip_search THEN - PERFORM metabib.update_combined_index_vectors(bib_id); - END IF; - - RETURN; -END; -$func$ LANGUAGE PLPGSQL; - -COMMIT; -