From c964cd4ce0fc97db9b726d6ada4902f6388c255e Mon Sep 17 00:00:00 2001 From: Mike Rylander Date: Mon, 7 Apr 2014 14:03:51 -0400 Subject: [PATCH] LP#1303940: Protect against bogus data that can breaking indexing In at least the case of browse indexing, some MARC contents can cause us problems when normalization (such as stripping all punctuation) can leave us with a NULL value for index-intended data. The patch avoids the specific case of browse data being null and causing an ingest failure. Signed-off-by: Mike Rylander Jason Stephenson Signed-off-by: Galen Charlton --- Open-ILS/src/sql/Pg/030.schema.metabib.sql | 2 + .../XXXX.schema.browse-ingest-null-protect.sql | 103 +++++++++++++++++++++ 2 files changed, 105 insertions(+) create mode 100644 Open-ILS/src/sql/Pg/upgrade/XXXX.schema.browse-ingest-null-protect.sql diff --git a/Open-ILS/src/sql/Pg/030.schema.metabib.sql b/Open-ILS/src/sql/Pg/030.schema.metabib.sql index fccde87da1..97750e7e07 100644 --- a/Open-ILS/src/sql/Pg/030.schema.metabib.sql +++ b/Open-ILS/src/sql/Pg/030.schema.metabib.sql @@ -826,6 +826,8 @@ BEGIN -- expensive to add a comparison of index_vector to index_vector -- to the WHERE clause below. + CONTINUE WHEN ind_data.value IS NULL OR ind_data.sort_value IS NULL; + value_prepped := metabib.browse_normalize(ind_data.value, ind_data.field); SELECT INTO mbe_row * FROM metabib.browse_entry WHERE value = value_prepped AND sort_value = ind_data.sort_value; diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.browse-ingest-null-protect.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.browse-ingest-null-protect.sql new file mode 100644 index 0000000000..4a3759deaa --- /dev/null +++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.browse-ingest-null-protect.sql @@ -0,0 +1,103 @@ +BEGIN; + +SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version); + +CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries( bib_id BIGINT, skip_facet BOOL DEFAULT FALSE, skip_browse BOOL DEFAULT FALSE, skip_search BOOL DEFAULT FALSE ) RETURNS VOID AS $func$ +DECLARE + fclass RECORD; + ind_data metabib.field_entry_template%ROWTYPE; + mbe_row metabib.browse_entry%ROWTYPE; + mbe_id BIGINT; + b_skip_facet BOOL; + b_skip_browse BOOL; + b_skip_search BOOL; + value_prepped TEXT; +BEGIN + + SELECT COALESCE(NULLIF(skip_facet, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_facet_indexing' AND enabled)) INTO b_skip_facet; + SELECT COALESCE(NULLIF(skip_browse, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_browse_indexing' AND enabled)) INTO b_skip_browse; + SELECT COALESCE(NULLIF(skip_search, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_search_indexing' AND enabled)) INTO b_skip_search; + + PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled; + IF NOT FOUND THEN + IF NOT b_skip_search THEN + FOR fclass IN SELECT * FROM config.metabib_class LOOP + -- RAISE NOTICE 'Emptying out %', fclass.name; + EXECUTE $$DELETE FROM metabib.$$ || fclass.name || $$_field_entry WHERE source = $$ || bib_id; + END LOOP; + END IF; + IF NOT b_skip_facet THEN + DELETE FROM metabib.facet_entry WHERE source = bib_id; + END IF; + IF NOT b_skip_browse THEN + DELETE FROM metabib.browse_entry_def_map WHERE source = bib_id; + END IF; + END IF; + + FOR ind_data IN SELECT * FROM biblio.extract_metabib_field_entry( bib_id ) LOOP + IF ind_data.field < 0 THEN + ind_data.field = -1 * ind_data.field; + END IF; + + IF ind_data.facet_field AND NOT b_skip_facet THEN + INSERT INTO metabib.facet_entry (field, source, value) + VALUES (ind_data.field, ind_data.source, ind_data.value); + END IF; + + IF ind_data.browse_field AND NOT b_skip_browse THEN + -- A caveat about this SELECT: this should take care of replacing + -- old mbe rows when data changes, but not if normalization (by + -- which I mean specifically the output of + -- evergreen.oils_tsearch2()) changes. It may or may not be + -- expensive to add a comparison of index_vector to index_vector + -- to the WHERE clause below. + + CONTINUE WHEN ind_data.value IS NULL OR ind_data.sort_value IS NULL; + + value_prepped := metabib.browse_normalize(ind_data.value, ind_data.field); + SELECT INTO mbe_row * FROM metabib.browse_entry + WHERE value = value_prepped AND sort_value = ind_data.sort_value; + + IF FOUND THEN + mbe_id := mbe_row.id; + ELSE + INSERT INTO metabib.browse_entry + ( value, sort_value ) VALUES + ( value_prepped, ind_data.sort_value ); + + mbe_id := CURRVAL('metabib.browse_entry_id_seq'::REGCLASS); + END IF; + + INSERT INTO metabib.browse_entry_def_map (entry, def, source, authority) + VALUES (mbe_id, ind_data.field, ind_data.source, ind_data.authority); + END IF; + + IF ind_data.search_field AND NOT b_skip_search THEN + -- Avoid inserting duplicate rows + EXECUTE 'SELECT 1 FROM metabib.' || ind_data.field_class || + '_field_entry WHERE field = $1 AND source = $2 AND value = $3' + INTO mbe_id USING ind_data.field, ind_data.source, ind_data.value; + -- RAISE NOTICE 'Search for an already matching row returned %', mbe_id; + IF mbe_id IS NULL THEN + EXECUTE $$ + INSERT INTO metabib.$$ || ind_data.field_class || $$_field_entry (field, source, value) + VALUES ($$ || + quote_literal(ind_data.field) || $$, $$ || + quote_literal(ind_data.source) || $$, $$ || + quote_literal(ind_data.value) || + $$);$$; + END IF; + END IF; + + END LOOP; + + IF NOT b_skip_search THEN + PERFORM metabib.update_combined_index_vectors(bib_id); + END IF; + + RETURN; +END; +$func$ LANGUAGE PLPGSQL; + +COMMIT; + -- 2.11.0