From 5f0306865e871aab3e072bfb1cdf191340efb680 Mon Sep 17 00:00:00 2001 From: Mike Rylander Date: Fri, 7 Mar 2014 15:48:35 -0500 Subject: [PATCH] LP#1284864: Don't leak deleted constituent records There were times when deleted constituent records were leaking into the display of metarecord results. In particular, when a metarecord's lead record was deleted, the lead was not recalculated. We address this situation directly now in the remapper. Signed-off-by: Mike Rylander Signed-off-by: Bill Erickson Signed-off-by: Ben Shum --- Open-ILS/src/sql/Pg/030.schema.metabib.sql | 100 ++++-- Open-ILS/src/sql/Pg/990.schema.unapi.sql | 2 +- ...X.functions.metarecord-deleted-constituents.sql | 343 +++++++++++++++++++++ 3 files changed, 416 insertions(+), 29 deletions(-) create mode 100644 Open-ILS/src/sql/Pg/upgrade/XXXX.functions.metarecord-deleted-constituents.sql diff --git a/Open-ILS/src/sql/Pg/030.schema.metabib.sql b/Open-ILS/src/sql/Pg/030.schema.metabib.sql index 554deb4e5b..b8ad51a44e 100644 --- a/Open-ILS/src/sql/Pg/030.schema.metabib.sql +++ b/Open-ILS/src/sql/Pg/030.schema.metabib.sql @@ -1359,49 +1359,85 @@ BEGIN END; $func$ LANGUAGE PLPGSQL; -CREATE OR REPLACE FUNCTION metabib.remap_metarecord_for_bib( bib_id BIGINT, fp TEXT ) RETURNS BIGINT AS $func$ +CREATE OR REPLACE FUNCTION metabib.remap_metarecord_for_bib( bib_id BIGINT, fp TEXT, bib_is_deleted BOOL DEFAULT FALSE, retain_deleted BOOL DEFAULT FALSE ) RETURNS BIGINT AS $func$ DECLARE + new_mapping BOOL := TRUE; source_count INT; old_mr BIGINT; tmp_mr metabib.metarecord%ROWTYPE; deleted_mrs BIGINT[]; BEGIN - DELETE FROM metabib.metarecord_source_map WHERE source = bib_id; -- Rid ourselves of the search-estimate-killing linkage + -- We need to make sure we're not a deleted master record of an MR + IF bib_is_deleted THEN + FOR old_mr IN SELECT id FROM metabib.metarecord WHERE master_record = bib_id LOOP - FOR tmp_mr IN SELECT m.* FROM metabib.metarecord m JOIN metabib.metarecord_source_map s ON (s.metarecord = m.id) WHERE s.source = bib_id LOOP + IF NOT retain_deleted THEN -- Go away for any MR that we're master of, unless retained + DELETE FROM metabib.metarecord_source_map WHERE source = bib_id; + END IF; - IF old_mr IS NULL AND fp = tmp_mr.fingerprint THEN -- Find the first fingerprint-matching - old_mr := tmp_mr.id; - ELSE - SELECT COUNT(*) INTO source_count FROM metabib.metarecord_source_map WHERE metarecord = tmp_mr.id; - IF source_count = 0 THEN -- No other records - deleted_mrs := ARRAY_APPEND(deleted_mrs, tmp_mr.id); - DELETE FROM metabib.metarecord WHERE id = tmp_mr.id; + -- Now, are there any more sources on this MR? + SELECT COUNT(*) INTO source_count FROM metabib.metarecord_source_map WHERE metarecord = old_mr; + + IF source_count = 0 AND NOT retain_deleted THEN -- No other records + deleted_mrs := ARRAY_APPEND(deleted_mrs, old_mr); -- Just in case... + DELETE FROM metabib.metarecord WHERE id = old_mr; + + ELSE -- indeed there are. Update it with a null cache and recalcualated master record + UPDATE metabib.metarecord + SET mods = NULL, + master_record = ( SELECT id FROM biblio.record_entry WHERE fingerprint = fp AND NOT deleted ORDER BY quality DESC LIMIT 1) + WHERE id = old_mr; END IF; - END IF; + END LOOP; - END LOOP; + ELSE -- insert or update + + FOR tmp_mr IN SELECT m.* FROM metabib.metarecord m JOIN metabib.metarecord_source_map s ON (s.metarecord = m.id) WHERE s.source = bib_id LOOP + + -- Find the first fingerprint-matching + IF old_mr IS NULL AND fp = tmp_mr.fingerprint THEN + old_mr := tmp_mr.id; + new_mapping := FALSE; + + ELSE -- Our fingerprint changed ... maybe remove the old MR + DELETE FROM metabib.metarecord_source_map WHERE metarecord = old_mr AND source = bib_id; -- remove the old source mapping + SELECT COUNT(*) INTO source_count FROM metabib.metarecord_source_map WHERE metarecord = tmp_mr.id; + IF source_count = 0 THEN -- No other records + deleted_mrs := ARRAY_APPEND(deleted_mrs, tmp_mr.id); + DELETE FROM metabib.metarecord WHERE id = tmp_mr.id; + END IF; + END IF; + + END LOOP; + + -- we found no suitable, preexisting MR based on old source maps + IF old_mr IS NULL THEN + SELECT id INTO old_mr FROM metabib.metarecord WHERE fingerprint = fp; -- is there one for our current fingerprint? - IF old_mr IS NULL THEN -- we found no suitable, preexisting MR based on old source maps - SELECT id INTO old_mr FROM metabib.metarecord WHERE fingerprint = fp; -- is there one for our current fingerprint? - IF old_mr IS NULL THEN -- nope, create one and grab its id - INSERT INTO metabib.metarecord ( fingerprint, master_record ) VALUES ( fp, bib_id ); - SELECT id INTO old_mr FROM metabib.metarecord WHERE fingerprint = fp; - ELSE -- indeed there is. update it with a null cache and recalcualated master record + IF old_mr IS NULL THEN -- nope, create one and grab its id + INSERT INTO metabib.metarecord ( fingerprint, master_record ) VALUES ( fp, bib_id ); + SELECT id INTO old_mr FROM metabib.metarecord WHERE fingerprint = fp; + + ELSE -- indeed there is. update it with a null cache and recalcualated master record + UPDATE metabib.metarecord + SET mods = NULL, + master_record = ( SELECT id FROM biblio.record_entry WHERE fingerprint = fp AND NOT deleted ORDER BY quality DESC LIMIT 1) + WHERE id = old_mr; + END IF; + + ELSE -- there was one we already attached to, update its mods cache and master_record UPDATE metabib.metarecord SET mods = NULL, - master_record = ( SELECT id FROM biblio.record_entry WHERE fingerprint = fp ORDER BY quality DESC LIMIT 1) + master_record = ( SELECT id FROM biblio.record_entry WHERE fingerprint = fp AND NOT deleted ORDER BY quality DESC LIMIT 1) WHERE id = old_mr; END IF; - ELSE -- there was one we already attached to, update its mods cache and master_record - UPDATE metabib.metarecord - SET mods = NULL, - master_record = ( SELECT id FROM biblio.record_entry WHERE fingerprint = fp ORDER BY quality DESC LIMIT 1) - WHERE id = old_mr; - END IF; - INSERT INTO metabib.metarecord_source_map (metarecord, source) VALUES (old_mr, bib_id); -- new source mapping + IF new_mapping THEN + INSERT INTO metabib.metarecord_source_map (metarecord, source) VALUES (old_mr, bib_id); -- new source mapping + END IF; + + END IF; IF ARRAY_UPPER(deleted_mrs,1) > 0 THEN UPDATE action.hold_request SET target = old_mr WHERE target IN ( SELECT unnest(deleted_mrs) ) AND hold_type = 'M'; -- if we had to delete any MRs above, make sure their holds are moved @@ -1412,6 +1448,7 @@ BEGIN END; $func$ LANGUAGE PLPGSQL; + CREATE OR REPLACE FUNCTION biblio.map_authority_linking (bibid BIGINT, marc TEXT) RETURNS BIGINT AS $func$ DELETE FROM authority.bib_linking WHERE bib = $1; INSERT INTO authority.bib_linking (bib, authority) @@ -1639,16 +1676,23 @@ $func$ LANGUAGE PLPGSQL; -- AFTER UPDATE OR INSERT trigger for biblio.record_entry CREATE OR REPLACE FUNCTION biblio.indexing_ingest_or_delete () RETURNS TRIGGER AS $func$ +DECLARE + tmp_bool BOOL; BEGIN IF NEW.deleted THEN -- If this bib is deleted + PERFORM * FROM config.internal_flag WHERE name = 'ingest.metarecord_mapping.preserve_on_delete' AND enabled; - IF NOT FOUND THEN + + tmp_bool := FOUND; -- Just in case this is changed by some other statement + + PERFORM metabib.remap_metarecord_for_bib( NEW.id, NEW.fingerprint, TRUE, tmp_bool ); + + IF NOT tmp_bool THEN -- One needs to keep these around to support searches -- with the #deleted modifier, so one should turn on the named -- internal flag for that functionality. - DELETE FROM metabib.metarecord_source_map WHERE source = NEW.id; DELETE FROM metabib.record_attr_vector_list WHERE source = NEW.id; END IF; diff --git a/Open-ILS/src/sql/Pg/990.schema.unapi.sql b/Open-ILS/src/sql/Pg/990.schema.unapi.sql index 02d87c7da9..2c7c0b7dae 100644 --- a/Open-ILS/src/sql/Pg/990.schema.unapi.sql +++ b/Open-ILS/src/sql/Pg/990.schema.unapi.sql @@ -1440,7 +1440,7 @@ BEGIN FOR subrec IN SELECT bre.* FROM biblio.record_entry bre JOIN metabib.metarecord_source_map mmsm ON (mmsm.source = bre.id) JOIN metabib.metarecord mmr ON (mmr.id = mmsm.metarecord) - WHERE mmr.id = obj_id + WHERE mmr.id = obj_id AND NOT bre.deleted ORDER BY CASE WHEN bre.id = mmr.master_record THEN 0 ELSE bre.id END LIMIT COALESCE((slimit->'bre')::INT, 5) LOOP diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.functions.metarecord-deleted-constituents.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.functions.metarecord-deleted-constituents.sql new file mode 100644 index 0000000000..7b98009340 --- /dev/null +++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.functions.metarecord-deleted-constituents.sql @@ -0,0 +1,343 @@ + +CREATE OR REPLACE FUNCTION metabib.remap_metarecord_for_bib( bib_id BIGINT, fp TEXT, bib_is_deleted BOOL DEFAULT FALSE, retain_deleted BOOL DEFAULT FALSE ) RETURNS BIGINT AS $func$ +DECLARE + new_mapping BOOL := TRUE; + source_count INT; + old_mr BIGINT; + tmp_mr metabib.metarecord%ROWTYPE; + deleted_mrs BIGINT[]; +BEGIN + + -- We need to make sure we're not a deleted master record of an MR + IF bib_is_deleted THEN + FOR old_mr IN SELECT id FROM metabib.metarecord WHERE master_record = bib_id LOOP + + IF NOT retain_deleted THEN -- Go away for any MR that we're master of, unless retained + DELETE FROM metabib.metarecord_source_map WHERE source = bib_id; + END IF; + + -- Now, are there any more sources on this MR? + SELECT COUNT(*) INTO source_count FROM metabib.metarecord_source_map WHERE metarecord = old_mr; + + IF source_count = 0 AND NOT retain_deleted THEN -- No other records + deleted_mrs := ARRAY_APPEND(deleted_mrs, old_mr); -- Just in case... + DELETE FROM metabib.metarecord WHERE id = old_mr; + + ELSE -- indeed there are. Update it with a null cache and recalcualated master record + UPDATE metabib.metarecord + SET mods = NULL, + master_record = ( SELECT id FROM biblio.record_entry WHERE fingerprint = fp AND NOT deleted ORDER BY quality DESC LIMIT 1) + WHERE id = old_mr; + END IF; + END LOOP; + + ELSE -- insert or update + + FOR tmp_mr IN SELECT m.* FROM metabib.metarecord m JOIN metabib.metarecord_source_map s ON (s.metarecord = m.id) WHERE s.source = bib_id LOOP + + -- Find the first fingerprint-matching + IF old_mr IS NULL AND fp = tmp_mr.fingerprint THEN + old_mr := tmp_mr.id; + new_mapping := FALSE; + + ELSE -- Our fingerprint changed ... maybe remove the old MR + DELETE FROM metabib.metarecord_source_map WHERE metarecord = old_mr AND source = bib_id; -- remove the old source mapping + SELECT COUNT(*) INTO source_count FROM metabib.metarecord_source_map WHERE metarecord = tmp_mr.id; + IF source_count = 0 THEN -- No other records + deleted_mrs := ARRAY_APPEND(deleted_mrs, tmp_mr.id); + DELETE FROM metabib.metarecord WHERE id = tmp_mr.id; + END IF; + END IF; + + END LOOP; + + -- we found no suitable, preexisting MR based on old source maps + IF old_mr IS NULL THEN + SELECT id INTO old_mr FROM metabib.metarecord WHERE fingerprint = fp; -- is there one for our current fingerprint? + + IF old_mr IS NULL THEN -- nope, create one and grab its id + INSERT INTO metabib.metarecord ( fingerprint, master_record ) VALUES ( fp, bib_id ); + SELECT id INTO old_mr FROM metabib.metarecord WHERE fingerprint = fp; + + ELSE -- indeed there is. update it with a null cache and recalcualated master record + UPDATE metabib.metarecord + SET mods = NULL, + master_record = ( SELECT id FROM biblio.record_entry WHERE fingerprint = fp AND NOT deleted ORDER BY quality DESC LIMIT 1) + WHERE id = old_mr; + END IF; + + ELSE -- there was one we already attached to, update its mods cache and master_record + UPDATE metabib.metarecord + SET mods = NULL, + master_record = ( SELECT id FROM biblio.record_entry WHERE fingerprint = fp AND NOT deleted ORDER BY quality DESC LIMIT 1) + WHERE id = old_mr; + END IF; + + IF new_mapping THEN + INSERT INTO metabib.metarecord_source_map (metarecord, source) VALUES (old_mr, bib_id); -- new source mapping + END IF; + + END IF; + + IF ARRAY_UPPER(deleted_mrs,1) > 0 THEN + UPDATE action.hold_request SET target = old_mr WHERE target IN ( SELECT unnest(deleted_mrs) ) AND hold_type = 'M'; -- if we had to delete any MRs above, make sure their holds are moved + END IF; + + RETURN old_mr; + +END; +$func$ LANGUAGE PLPGSQL; + +DROP FUNCTION metabib.remap_metarecord_for_bib( bib_id BIGINT, fp TEXT ); + +CREATE OR REPLACE FUNCTION biblio.indexing_ingest_or_delete () RETURNS TRIGGER AS $func$ +DECLARE + tmp_bool BOOL; +BEGIN + + IF NEW.deleted THEN -- If this bib is deleted + + PERFORM * FROM config.internal_flag WHERE + name = 'ingest.metarecord_mapping.preserve_on_delete' AND enabled; + + tmp_bool := FOUND; -- Just in case this is changed by some other statement + + PERFORM metabib.remap_metarecord_for_bib( NEW.id, NEW.fingerprint, TRUE, tmp_bool ); + + IF NOT tmp_bool THEN + -- One needs to keep these around to support searches + -- with the #deleted modifier, so one should turn on the named + -- internal flag for that functionality. + DELETE FROM metabib.record_attr_vector_list WHERE source = NEW.id; + END IF; + + DELETE FROM authority.bib_linking WHERE bib = NEW.id; -- Avoid updating fields in bibs that are no longer visible + DELETE FROM biblio.peer_bib_copy_map WHERE peer_record = NEW.id; -- Separate any multi-homed items + DELETE FROM metabib.browse_entry_def_map WHERE source = NEW.id; -- Don't auto-suggest deleted bibs + RETURN NEW; -- and we're done + END IF; + + IF TG_OP = 'UPDATE' THEN -- re-ingest? + PERFORM * FROM config.internal_flag WHERE name = 'ingest.reingest.force_on_same_marc' AND enabled; + + IF NOT FOUND AND OLD.marc = NEW.marc THEN -- don't do anything if the MARC didn't change + RETURN NEW; + END IF; + END IF; + + -- Record authority linking + PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_authority_linking' AND enabled; + IF NOT FOUND THEN + PERFORM biblio.map_authority_linking( NEW.id, NEW.marc ); + END IF; + + -- Flatten and insert the mfr data + PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_metabib_full_rec' AND enabled; + IF NOT FOUND THEN + PERFORM metabib.reingest_metabib_full_rec(NEW.id); + + -- Now we pull out attribute data, which is dependent on the mfr for all but XPath-based fields + PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_metabib_rec_descriptor' AND enabled; + IF NOT FOUND THEN + PERFORM metabib.reingest_record_attributes(NEW.id, NULL, NEW.marc, TG_OP = 'INSERT' OR OLD.deleted); + END IF; + END IF; + + -- Gather and insert the field entry data + PERFORM metabib.reingest_metabib_field_entries(NEW.id); + + -- Located URI magic + IF TG_OP = 'INSERT' THEN + PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_located_uri' AND enabled; + IF NOT FOUND THEN + PERFORM biblio.extract_located_uris( NEW.id, NEW.marc, NEW.editor ); + END IF; + ELSE + PERFORM * FROM config.internal_flag WHERE name = 'ingest.disable_located_uri' AND enabled; + IF NOT FOUND THEN + PERFORM biblio.extract_located_uris( NEW.id, NEW.marc, NEW.editor ); + END IF; + END IF; + + -- (re)map metarecord-bib linking + IF TG_OP = 'INSERT' THEN -- if not deleted and performing an insert, check for the flag + PERFORM * FROM config.internal_flag WHERE name = 'ingest.metarecord_mapping.skip_on_insert' AND enabled; + IF NOT FOUND THEN + PERFORM metabib.remap_metarecord_for_bib( NEW.id, NEW.fingerprint ); + END IF; + ELSE -- we're doing an update, and we're not deleted, remap + PERFORM * FROM config.internal_flag WHERE name = 'ingest.metarecord_mapping.skip_on_update' AND enabled; + IF NOT FOUND THEN + PERFORM metabib.remap_metarecord_for_bib( NEW.id, NEW.fingerprint ); + END IF; + END IF; + + RETURN NEW; +END; +$func$ LANGUAGE PLPGSQL; + +CREATE OR REPLACE FUNCTION unapi.mmr ( + obj_id BIGINT, + format TEXT, + ename TEXT, + includes TEXT[], + org TEXT, + depth INT DEFAULT NULL, + slimit HSTORE DEFAULT NULL, + soffset HSTORE DEFAULT NULL, + include_xmlns BOOL DEFAULT TRUE, + pref_lib INT DEFAULT NULL +) +RETURNS XML AS $F$ +DECLARE + mmrec metabib.metarecord%ROWTYPE; + leadrec biblio.record_entry%ROWTYPE; + subrec biblio.record_entry%ROWTYPE; + layout unapi.bre_output_layout%ROWTYPE; + xfrm config.xml_transform%ROWTYPE; + ouid INT; + xml_buf TEXT; -- growing XML document + tmp_xml TEXT; -- single-use XML string + xml_frag TEXT; -- single-use XML fragment + top_el TEXT; + output XML; + hxml XML; + axml XML; + subxml XML; -- subordinate records elements + sub_xpath TEXT; + parts TEXT[]; +BEGIN + + -- xpath for extracting bre.marc values from subordinate records + -- so they may be appended to the MARC of the master record prior + -- to XSLT processing. + -- subjects, isbn, issn, upc -- anything else? + sub_xpath := + '//*[starts-with(@tag, "6") or @tag="020" or @tag="022" or @tag="024"]'; + + IF org = '-' OR org IS NULL THEN + SELECT shortname INTO org FROM evergreen.org_top(); + END IF; + + SELECT id INTO ouid FROM actor.org_unit WHERE shortname = org; + + IF ouid IS NULL THEN + RETURN NULL::XML; + END IF; + + SELECT INTO mmrec * FROM metabib.metarecord WHERE id = obj_id; + IF NOT FOUND THEN + RETURN NULL::XML; + END IF; + + -- TODO: aggregate holdings from constituent records + IF format = 'holdings_xml' THEN -- the special case + output := unapi.mmr_holdings_xml( + obj_id, ouid, org, depth, + evergreen.array_remove_item_by_value(includes,'holdings_xml'), + slimit, soffset, include_xmlns, pref_lib); + RETURN output; + END IF; + + SELECT * INTO layout FROM unapi.bre_output_layout WHERE name = format; + + IF layout.name IS NULL THEN + RETURN NULL::XML; + END IF; + + SELECT * INTO xfrm FROM config.xml_transform WHERE name = layout.transform; + + SELECT INTO leadrec * FROM biblio.record_entry WHERE id = mmrec.master_record; + + -- Grab distinct MVF for all records if requested + IF ('mra' = ANY (includes)) THEN + axml := unapi.mmr_mra(obj_id,NULL,NULL,NULL,org,depth,NULL,NULL,TRUE,pref_lib); + ELSE + axml := NULL::XML; + END IF; + + xml_buf = leadrec.marc; + + hxml := NULL::XML; + IF ('holdings_xml' = ANY (includes)) THEN + hxml := unapi.mmr_holdings_xml( + obj_id, ouid, org, depth, + evergreen.array_remove_item_by_value(includes,'holdings_xml'), + slimit, soffset, include_xmlns, pref_lib); + END IF; + + subxml := NULL::XML; + parts := '{}'::TEXT[]; + FOR subrec IN SELECT bre.* FROM biblio.record_entry bre + JOIN metabib.metarecord_source_map mmsm ON (mmsm.source = bre.id) + JOIN metabib.metarecord mmr ON (mmr.id = mmsm.metarecord) + WHERE mmr.id = obj_id AND NOT bre.deleted + ORDER BY CASE WHEN bre.id = mmr.master_record THEN 0 ELSE bre.id END + LIMIT COALESCE((slimit->'bre')::INT, 5) LOOP + + IF subrec.id = leadrec.id THEN CONTINUE; END IF; + -- Append choice data from the the non-lead records to the + -- the lead record document + + parts := parts || xpath(sub_xpath, subrec.marc::XML)::TEXT[]; + END LOOP; + + SELECT ARRAY_TO_STRING( ARRAY_AGG( DISTINCT p ), '' )::XML INTO subxml FROM UNNEST(parts) p; + + -- append data from the subordinate records to the + -- main record document before applying the XSLT + + IF subxml IS NOT NULL THEN + xml_buf := REGEXP_REPLACE(xml_buf, + '(.*?)$', subxml || '' || E'\\1'); + END IF; + + IF format = 'marcxml' THEN + -- If we're not using the prefixed namespace in + -- this record, then remove all declarations of it + IF xml_buf !~ E' mods) + top_el := REGEXP_REPLACE(xml_buf, E'^.*?<((?:\\S+:)?' || + layout.holdings_element || ').*$', E'\\1'); + + IF axml IS NOT NULL THEN + xml_buf := REGEXP_REPLACE(xml_buf, + '(.*?)$', axml || '\\1'); + END IF; + + IF hxml IS NOT NULL THEN + xml_buf := REGEXP_REPLACE(xml_buf, + '(.*?)$', hxml || '\\1'); + END IF; + + IF ('mmr.unapi' = ANY (includes)) THEN + output := REGEXP_REPLACE( + xml_buf, + '(.*?)', + XMLELEMENT( + name abbr, + XMLATTRIBUTES( + 'http://www.w3.org/1999/xhtml' AS xmlns, + 'unapi-id' AS class, + 'tag:open-ils.org:U2@mmr/' || obj_id || '/' || org AS title + ) + )::TEXT || '\\1' + ); + ELSE + output := xml_buf; + END IF; + + -- remove ignorable whitesace + output := REGEXP_REPLACE(output::TEXT,E'>\\s+<','><','gs')::XML; + RETURN output; +END; +$F$ LANGUAGE PLPGSQL STABLE; + -- 2.11.0