From: Mike Rylander Date: Fri, 18 Aug 2017 19:32:34 +0000 (-0400) Subject: LP#1251394: Reingest streamlining, schema realigning, rebasing X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=c7598ec8d3d4cf442ed2c3cb153b00ec4cf58ce4;p=working%2FEvergreen.git LP#1251394: Reingest streamlining, schema realigning, rebasing I've rebased this to master and undertake some work to allow streamlined reingest of specific index definitions. The API remains essentially backwards compatable, but is reingest_metabib_field_entries() is extended to accept a list of index definition IDs to which it should restrict its work. If that optional parameter is not passed, all index definitions are processed. This has the benefit of skipping XML transformation for index definitions which won't produce output useful to the requested reingest process, which should provide a significant speed boost for some situations. The upgrade schema has now been realigned with some missing bits from the baseline, but the baseline may still be lacking some from the upgrade. The data upgrade script has been made to use the existing index definitions where they match the needs of the new ones, and to use the new reindexing options. Signed-off-by: Mike Rylander Signed-off-by: Bill Erickson Signed-off-by: Mike Rylander --- diff --git a/Open-ILS/src/sql/Pg/030.schema.metabib.sql b/Open-ILS/src/sql/Pg/030.schema.metabib.sql index c9935bae2d..eea6a25b93 100644 --- a/Open-ILS/src/sql/Pg/030.schema.metabib.sql +++ b/Open-ILS/src/sql/Pg/030.schema.metabib.sql @@ -624,7 +624,12 @@ CREATE TYPE metabib.field_entry_template AS ( sort_value TEXT ); -CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( rid BIGINT, default_joiner TEXT ) RETURNS SETOF metabib.field_entry_template AS $func$ +CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( + rid BIGINT, + default_joiner TEXT, + field_types TEXT[], + only_fields INT[] +) RETURNS SETOF metabib.field_entry_template AS $func$ DECLARE bib biblio.record_entry%ROWTYPE; idx config.metabib_field%ROWTYPE; @@ -643,6 +648,7 @@ DECLARE authority_text TEXT; authority_link BIGINT; output_row metabib.field_entry_template%ROWTYPE; + process_idx BOOL; BEGIN -- Start out with no field-use bools set @@ -655,7 +661,14 @@ BEGIN SELECT INTO bib * FROM biblio.record_entry WHERE id = rid; -- Loop over the indexing entries - FOR idx IN SELECT * FROM config.metabib_field ORDER BY format LOOP + FOR idx IN SELECT * FROM config.metabib_field WHERE id = ANY (only_fields) ORDER BY format LOOP + + process_idx := FALSE; + IF idx.display_field AND 'display' = ANY (field_types) THEN process_idx = TRUE; END IF; + IF idx.browse_field AND 'browse' = ANY (field_types) THEN process_idx = TRUE; END IF; + IF idx.search_field AND 'search' = ANY (field_types) THEN process_idx = TRUE; END IF; + IF idx.facet_field AND 'facet' = ANY (field_types) THEN process_idx = TRUE; END IF; + CONTINUE WHEN process_idx = FALSE; joiner := COALESCE(idx.joiner, default_joiner); @@ -871,9 +884,13 @@ END; $func$ LANGUAGE PLPGSQL; CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries( - bib_id BIGINT, skip_facet BOOL DEFAULT FALSE, - skip_display BOOL DEFAULT FALSE, skip_browse BOOL DEFAULT FALSE, - skip_search BOOL DEFAULT FALSE ) RETURNS VOID AS $func$ + bib_id BIGINT, + skip_facet BOOL DEFAULT FALSE, + skip_display BOOL DEFAULT FALSE, + skip_browse BOOL DEFAULT FALSE, + skip_search BOOL DEFAULT FALSE, + only_fields INT[] DEFAULT '{}'::INT[] +) RETURNS VOID AS $func$ DECLARE fclass RECORD; ind_data metabib.field_entry_template%ROWTYPE; @@ -884,13 +901,24 @@ DECLARE b_skip_browse BOOL; b_skip_search BOOL; value_prepped TEXT; + field_list INT[] := only_fields; + field_types TEXT[] := '{}'::TEXT[]; BEGIN + IF field_list = '{}'::INT[] THEN + SELECT ARRAY_AGG(id) INTO field_list FROM config.metabib_field; + END IF; + SELECT COALESCE(NULLIF(skip_facet, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_facet_indexing' AND enabled)) INTO b_skip_facet; SELECT COALESCE(NULLIF(skip_display, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_display_indexing' AND enabled)) INTO b_skip_display; SELECT COALESCE(NULLIF(skip_browse, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_browse_indexing' AND enabled)) INTO b_skip_browse; SELECT COALESCE(NULLIF(skip_search, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_search_indexing' AND enabled)) INTO b_skip_search; + IF NOT b_skip_facet THEN field_types := field_types || 'facet'; END IF; + IF NOT b_skip_display THEN field_types := field_types || 'display'; END IF; + IF NOT b_skip_browse THEN field_types := field_types || 'browse'; END IF; + IF NOT b_skip_search THEN field_types := field_types || 'search'; END IF; + PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled; IF NOT FOUND THEN IF NOT b_skip_search THEN @@ -910,7 +938,7 @@ BEGIN END IF; END IF; - FOR ind_data IN SELECT * FROM biblio.extract_metabib_field_entry( bib_id ) LOOP + FOR ind_data IN SELECT * FROM biblio.extract_metabib_field_entry( bib_id, ' ', field_types, field_list ) LOOP -- don't store what has been normalized away CONTINUE WHEN ind_data.value IS NULL; @@ -985,11 +1013,6 @@ BEGIN END; $func$ LANGUAGE PLPGSQL; --- default to a space joiner -CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( BIGINT ) RETURNS SETOF metabib.field_entry_template AS $func$ - SELECT * FROM biblio.extract_metabib_field_entry($1, ' '); -$func$ LANGUAGE SQL; - CREATE OR REPLACE FUNCTION authority.flatten_marc ( rid BIGINT ) RETURNS SETOF authority.full_rec AS $func$ DECLARE auth authority.record_entry%ROWTYPE; @@ -1710,17 +1733,8 @@ BEGIN PERFORM metabib.reingest_metabib_field_entries(NEW.id); -- Located URI magic - IF TG_OP = 'INSERT' THEN - PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_located_uri' AND enabled; - IF NOT FOUND THEN - PERFORM biblio.extract_located_uris( NEW.id, NEW.marc, NEW.editor ); - END IF; - ELSE - PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_located_uri' AND enabled; - IF NOT FOUND THEN - PERFORM biblio.extract_located_uris( NEW.id, NEW.marc, NEW.editor ); - END IF; - END IF; + PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_located_uri' AND enabled; + IF NOT FOUND THEN PERFORM biblio.extract_located_uris( NEW.id, NEW.marc, NEW.editor ); END IF; -- (re)map metarecord-bib linking IF TG_OP = 'INSERT' THEN -- if not deleted and performing an insert, check for the flag diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.metabib-display-field.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.metabib-display-field.sql index 467ef60c4c..5493d653fe 100644 --- a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.metabib-display-field.sql +++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.metabib-display-field.sql @@ -1,6 +1,26 @@ BEGIN; +CREATE OR REPLACE FUNCTION + config.metabib_representative_field_is_valid(INTEGER, TEXT) RETURNS BOOLEAN AS $$ + SELECT EXISTS (SELECT 1 FROM config.metabib_field WHERE id = $1 AND field_class = $2); +$$ LANGUAGE SQL STRICT IMMUTABLE; + +COMMENT ON FUNCTION config.metabib_representative_field_is_valid(INTEGER, TEXT) IS $$ +Ensure the field_class value on the selected representative field matches +the class name. +$$; + +ALTER TABLE config.metabib_class + ADD COLUMN representative_field + INTEGER REFERENCES config.metabib_field(id), + ADD CONSTRAINT rep_field_unique UNIQUE(representative_field), + ADD CONSTRAINT rep_field_is_valid CHECK ( + representative_field IS NULL OR + config.metabib_representative_field_is_valid(representative_field, name) + ) +; + ALTER TABLE config.metabib_field ADD COLUMN display_xpath TEXT, ADD COLUMN display_field BOOL NOT NULL DEFAULT FALSE; @@ -130,7 +150,16 @@ CREATE TRIGGER display_field_force_nfc_tgr ALTER TYPE metabib.field_entry_template ADD ATTRIBUTE display_field BOOL; -CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( rid BIGINT, default_joiner TEXT ) RETURNS SETOF metabib.field_entry_template AS $func$ +DROP FUNCTION metabib.reingest_metabib_field_entries(BIGINT, BOOL, BOOL, BOOL); +DROP FUNCTION metabib.extract_metabib_field_entries(BIGINT); +DROP FUNCTION metabib.extract_metabib_field_entries(BIGINT, TEXT); + +CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( + rid BIGINT, + default_joiner TEXT, + field_types TEXT[], + only_fields INT[] +) RETURNS SETOF metabib.field_entry_template AS $func$ DECLARE bib biblio.record_entry%ROWTYPE; idx config.metabib_field%ROWTYPE; @@ -149,6 +178,7 @@ DECLARE authority_text TEXT; authority_link BIGINT; output_row metabib.field_entry_template%ROWTYPE; + process_idx BOOL; BEGIN -- Start out with no field-use bools set @@ -161,7 +191,14 @@ BEGIN SELECT INTO bib * FROM biblio.record_entry WHERE id = rid; -- Loop over the indexing entries - FOR idx IN SELECT * FROM config.metabib_field ORDER BY format LOOP + FOR idx IN SELECT * FROM config.metabib_field WHERE id = ANY (only_fields) ORDER BY format LOOP + + process_idx := FALSE; + IF idx.display_field AND 'display' = ANY (field_types) THEN process_idx = TRUE; END IF; + IF idx.browse_field AND 'browse' = ANY (field_types) THEN process_idx = TRUE; END IF; + IF idx.search_field AND 'search' = ANY (field_types) THEN process_idx = TRUE; END IF; + IF idx.facet_field AND 'facet' = ANY (field_types) THEN process_idx = TRUE; END IF; + CONTINUE WHEN process_idx = FALSE; joiner := COALESCE(idx.joiner, default_joiner); @@ -323,12 +360,14 @@ END; $func$ LANGUAGE PLPGSQL; -DROP FUNCTION metabib.reingest_metabib_field_entries(BIGINT, BOOL, BOOL, BOOL); - CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries( - bib_id BIGINT, skip_facet BOOL DEFAULT FALSE, - skip_display BOOL DEFAULT FALSE, skip_browse BOOL DEFAULT FALSE, - skip_search BOOL DEFAULT FALSE ) RETURNS VOID AS $func$ + bib_id BIGINT, + skip_facet BOOL DEFAULT FALSE, + skip_display BOOL DEFAULT FALSE, + skip_browse BOOL DEFAULT FALSE, + skip_search BOOL DEFAULT FALSE, + only_fields INT[] DEFAULT '{}'::INT[] +) RETURNS VOID AS $func$ DECLARE fclass RECORD; ind_data metabib.field_entry_template%ROWTYPE; @@ -339,13 +378,24 @@ DECLARE b_skip_browse BOOL; b_skip_search BOOL; value_prepped TEXT; + field_list INT[] := only_fields; + field_types TEXT[] := '{}'::TEXT[]; BEGIN + IF field_list = '{}'::INT[] THEN + SELECT ARRAY_AGG(id) INTO field_list FROM config.metabib_field; + END IF; + SELECT COALESCE(NULLIF(skip_facet, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_facet_indexing' AND enabled)) INTO b_skip_facet; SELECT COALESCE(NULLIF(skip_display, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_display_indexing' AND enabled)) INTO b_skip_display; SELECT COALESCE(NULLIF(skip_browse, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_browse_indexing' AND enabled)) INTO b_skip_browse; SELECT COALESCE(NULLIF(skip_search, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_search_indexing' AND enabled)) INTO b_skip_search; + IF NOT b_skip_facet THEN field_types := field_types || 'facet'; END IF; + IF NOT b_skip_display THEN field_types := field_types || 'display'; END IF; + IF NOT b_skip_browse THEN field_types := field_types || 'browse'; END IF; + IF NOT b_skip_search THEN field_types := field_types || 'search'; END IF; + PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled; IF NOT FOUND THEN IF NOT b_skip_search THEN @@ -365,7 +415,7 @@ BEGIN END IF; END IF; - FOR ind_data IN SELECT * FROM biblio.extract_metabib_field_entry( bib_id ) LOOP + FOR ind_data IN SELECT * FROM biblio.extract_metabib_field_entry( bib_id, ' ', field_types, field_list ) LOOP -- don't store what has been normalized away CONTINUE WHEN ind_data.value IS NULL; @@ -440,7 +490,83 @@ BEGIN END; $func$ LANGUAGE PLPGSQL; -COMMIT; +-- AFTER UPDATE OR INSERT trigger for biblio.record_entry +CREATE OR REPLACE FUNCTION biblio.indexing_ingest_or_delete () RETURNS TRIGGER AS $func$ +DECLARE + tmp_bool BOOL; +BEGIN + + IF NEW.deleted THEN -- If this bib is deleted + + PERFORM * FROM config.internal_flag WHERE + name = 'ingest.metarecord_mapping.preserve_on_delete' AND enabled; + + tmp_bool := FOUND; -- Just in case this is changed by some other statement + + PERFORM metabib.remap_metarecord_for_bib( NEW.id, NEW.fingerprint, TRUE, tmp_bool ); + + IF NOT tmp_bool THEN + -- One needs to keep these around to support searches + -- with the #deleted modifier, so one should turn on the named + -- internal flag for that functionality. + DELETE FROM metabib.record_attr_vector_list WHERE source = NEW.id; + END IF; + + DELETE FROM authority.bib_linking WHERE bib = NEW.id; -- Avoid updating fields in bibs that are no longer visible + DELETE FROM biblio.peer_bib_copy_map WHERE peer_record = NEW.id; -- Separate any multi-homed items + DELETE FROM metabib.browse_entry_def_map WHERE source = NEW.id; -- Don't auto-suggest deleted bibs + RETURN NEW; -- and we're done + END IF; + + IF TG_OP = 'UPDATE' THEN -- re-ingest? + PERFORM * FROM config.internal_flag WHERE name = 'ingest.reingest.force_on_same_marc' AND enabled; + IF NOT FOUND AND OLD.marc = NEW.marc THEN -- don't do anything if the MARC didn't change + RETURN NEW; + END IF; + END IF; + + -- Record authority linking + PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_authority_linking' AND enabled; + IF NOT FOUND THEN + PERFORM biblio.map_authority_linking( NEW.id, NEW.marc ); + END IF; + + -- Flatten and insert the mfr data + PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_metabib_full_rec' AND enabled; + IF NOT FOUND THEN + PERFORM metabib.reingest_metabib_full_rec(NEW.id); + + -- Now we pull out attribute data, which is dependent on the mfr for all but XPath-based fields + PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_metabib_rec_descriptor' AND enabled; + IF NOT FOUND THEN + PERFORM metabib.reingest_record_attributes(NEW.id, NULL, NEW.marc, TG_OP = 'INSERT' OR OLD.deleted); + END IF; + END IF; + + -- Gather and insert the field entry data + PERFORM metabib.reingest_metabib_field_entries(NEW.id); + -- Located URI magic + PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_located_uri' AND enabled; + IF NOT FOUND THEN PERFORM biblio.extract_located_uris( NEW.id, NEW.marc, NEW.editor ); END IF; + + -- (re)map metarecord-bib linking + IF TG_OP = 'INSERT' THEN -- if not deleted and performing an insert, check for the flag + PERFORM * FROM config.internal_flag WHERE name = 'ingest.metarecord_mapping.skip_on_insert' AND enabled; + IF NOT FOUND THEN + PERFORM metabib.remap_metarecord_for_bib( NEW.id, NEW.fingerprint ); + END IF; + ELSE -- we're doing an update, and we're not deleted, remap + PERFORM * FROM config.internal_flag WHERE name = 'ingest.metarecord_mapping.skip_on_update' AND enabled; + IF NOT FOUND THEN + PERFORM metabib.remap_metarecord_for_bib( NEW.id, NEW.fingerprint ); + END IF; + END IF; + + RETURN NEW; +END; +$func$ LANGUAGE PLPGSQL; + +COMMIT; diff --git a/Open-ILS/src/sql/Pg/upgrade/YYYY.data.metabib-display-field.sql b/Open-ILS/src/sql/Pg/upgrade/YYYY.data.metabib-display-field.sql index 09de901806..08832be41e 100644 --- a/Open-ILS/src/sql/Pg/upgrade/YYYY.data.metabib-display-field.sql +++ b/Open-ILS/src/sql/Pg/upgrade/YYYY.data.metabib-display-field.sql @@ -6,46 +6,21 @@ INSERT INTO config.internal_flag (name, enabled) -- Adds seed data to replace (for now) values from the 'mvr' class -INSERT INTO config.metabib_field (id, field_class, name, format, - display_field, search_field, browse_field, label, xpath) -VALUES - (37, 'title', 'display|title', 'mods32', TRUE, FALSE, FALSE, - oils_i18n_gettext(37, 'Title', 'cmf', 'label'), - '//mods32:mods/mods32:titleNonfiling[mods32:title and not (@type)]'), - (38, 'author', 'display|author', 'mods32', TRUE, FALSE, FALSE, - oils_i18n_gettext(38, 'Author', 'cmf', 'label'), - $$//mods32:mods/mods32:name[@type='personal' and mods32:role/mods32:roleTerm[text()='creator']]$$), - (39, 'subject', 'display|subject', 'mods32', TRUE, FALSE, FALSE, - oils_i18n_gettext(39, 'Subject', 'cmf', 'label'), - '//mods32:mods/mods32:subject'), - (40, 'subject', 'display|topic_subject', 'mods32', TRUE, FALSE, FALSE, - oils_i18n_gettext(40, 'Subject', 'cmf', 'label'), - '//mods32:mods/mods32:subject/mods32:topic'), - (41, 'identifier', 'display|isbn', 'marcxml', TRUE, FALSE, FALSE, - oils_i18n_gettext(41, 'ISBN', 'cmf', 'label'), - $$//marc:datafield[@tag='020']/marc:subfield[@code='a' or @code='z']$$) - -; +UPDATE config.metabib_field SET display_field = TRUE WHERE id IN (6, 8, 14, 16, 18); INSERT INTO config.display_field_map (name, field, multi) VALUES - ('title', 37, FALSE), - ('author', 38, FALSE), - ('subject', 39, TRUE), - ('topic_subject', 40, TRUE), - ('isbn', 41, TRUE) + ('title', 6, FALSE), + ('author', 8, FALSE), + ('subject', 16, TRUE), + ('topic_subject', 14, TRUE), + ('isbn', 18, TRUE) ; +UPDATE config.metabib_class SET representative_field = 6 WHERE name = 'title'; +UPDATE config.metabib_class SET representative_field = 8 WHERE name = 'author'; + COMMIT; -- REINGEST DISPLAY ENTRIES - -BEGIN; -UPDATE config.internal_flag SET enabled = TRUE WHERE name IN ( -'ingest.assume_inserts_only','ingest.disable_authority_auto_update','ingest.disable_authority_linking','ingest.disable_located_uri','ingest.disable_metabib_field_entry','ingest.disable_metabib_full_rec','ingest.disable_metabib_rec_descriptor','ingest.metarecord_mapping.preserve_on_delete','ingest.metarecord_mapping.skip_on_insert','ingest.metarecord_mapping.skip_on_update','ingest.reingest.force_on_same_marc','ingest.skip_browse_indexing','ingest.skip_facet_indexing','ingest.skip_search_indexing'); - -UPDATE biblio.record_entry SET marc = marc; - -UPDATE config.internal_flag SET enabled = FALSE WHERE name IN ( -'ingest.assume_inserts_only','ingest.disable_authority_auto_update','ingest.disable_authority_linking','ingest.disable_located_uri','ingest.disable_metabib_field_entry','ingest.disable_metabib_full_rec','ingest.disable_metabib_rec_descriptor','ingest.metarecord_mapping.preserve_on_delete','ingest.metarecord_mapping.skip_on_insert','ingest.metarecord_mapping.skip_on_update','ingest.reingest.force_on_same_marc','ingest.skip_browse_indexing','ingest.skip_facet_indexing','ingest.skip_search_indexing'); -COMMIT; +SELECT metabib.reingest_metabib_field_entries(id, TRUE, FALSE, TRUE, TRUE, '{6,8,14,16,18}'::INT[]) FROM biblio.record_entry WHERE NOT deleted AND id > 0;