From 75c5b2f95deedc9996f8ea42fc0e9f0428af6778 Mon Sep 17 00:00:00 2001 From: Bill Erickson Date: Mon, 23 Oct 2017 11:39:11 -0400 Subject: [PATCH] JBAS-1764 A/T purge schema and data updates Signed-off-by: Bill Erickson --- KCLS/sql/schema/deploy/at-purge-interval-data.sql | 127 ++++++++++++++++++++++ KCLS/sql/schema/deploy/at-purge-interval.sql | 94 ++++++++++++++++ KCLS/sql/schema/revert/at-purge-interval-data.sql | 7 ++ KCLS/sql/schema/revert/at-purge-interval.sql | 12 ++ KCLS/sql/schema/sqitch.plan | 2 + KCLS/sql/schema/verify/at-purge-interval-data.sql | 7 ++ KCLS/sql/schema/verify/at-purge-interval.sql | 7 ++ 7 files changed, 256 insertions(+) create mode 100644 KCLS/sql/schema/deploy/at-purge-interval-data.sql create mode 100644 KCLS/sql/schema/deploy/at-purge-interval.sql create mode 100644 KCLS/sql/schema/revert/at-purge-interval-data.sql create mode 100644 KCLS/sql/schema/revert/at-purge-interval.sql create mode 100644 KCLS/sql/schema/verify/at-purge-interval-data.sql create mode 100644 KCLS/sql/schema/verify/at-purge-interval.sql diff --git a/KCLS/sql/schema/deploy/at-purge-interval-data.sql b/KCLS/sql/schema/deploy/at-purge-interval-data.sql new file mode 100644 index 0000000000..e58347eae0 --- /dev/null +++ b/KCLS/sql/schema/deploy/at-purge-interval-data.sql @@ -0,0 +1,127 @@ +-- Deploy kcls-evergreen:at-purge-interval-data to pg +-- requires: at-purge-interval + +BEGIN; + +SET STATEMENT_TIMEOUT = 0; + +SELECT CLOCK_TIMESTAMP(), 'getting started'; + +UPDATE action_trigger.hook SET passive = FALSE WHERE key IN ( + 'format.po.html', + 'format.po.pdf', + 'format.selfcheck.checkout', + 'format.selfcheck.items_out', + 'format.selfcheck.holds', + 'format.selfcheck.fines', + 'format.acqcle.html', + 'format.acqinv.html', + 'format.acqli.html', + 'aur.ordered', + 'aur.received', + 'aur.cancelled', + 'aur.created', + 'aur.rejected' +); + +-- Apply some max_delay values where needed. +-- These defs are unused, but best to apply a max_delay now so we can +-- apply a retention_interval that won't be forgotten later. +UPDATE action_trigger.event_definition SET max_delay = '10 days' + WHERE id IN (2, 9, 24, 36, 49) AND max_delay IS NULL; + +-- most events are purged after a short period +UPDATE action_trigger.event_definition SET retention_interval = '1 day'; + +-- keep these events forever +UPDATE action_trigger.event_definition + SET retention_interval = NULL + WHERE hook IN ( + -- keep these event types forever + 'acqpo.activated', + 'format.acqcle.html', + 'format.acqinv.html', + 'format.po.html' + ); + +-- keep these events for 1 year +UPDATE action_trigger.event_definition + SET retention_interval = '1 year' + WHERE hook IN ( + 'checkout.due', + 'hold.available', + 'hold_request.cancel.expire_holds_shelf', + 'hold_request.cancel.expire_no_target', + 'hold_request.cancel.staff', + 'hold_request.long_wait', + 'hold_request.shelf_expires_soon', + 'penalty.PATRON_EXCEEDS_COLLECTIONS_WARNING', + 'password.reset_request', + 'money.format.payment_receipt.email', + 'money.format.payment_receipt.print', + 'lost.auto', + 'longoverdue.auto', + 'circ.staff_age_to_lost', + 'circ.format.missing_pieces.slip.print', + 'circ.format.missing_pieces.letter.print' + ); + +CREATE TEMPORARY TABLE tmp_at_event ON COMMIT DROP AS + SELECT evt.* FROM action_trigger.event evt + JOIN action_trigger.event_definition def ON (def.id = evt.event_def) + WHERE + evt.state = 'pending' + OR def.retention_interval IS NULL -- these never expire + OR evt.update_time > (NOW() - def.retention_interval) +; + +SELECT CLOCK_TIMESTAMP(), 'tmp_at_event created', + (SELECT COUNT(*) FROM tmp_at_event); + +--SELECT COUNT(*), DATE_TRUNC('year', update_time), state +-- FROM tmp_at_event GROUP BY 2, 3 ORDER BY 2, 3; + +-- keep only template outputs that are linked from saved events +CREATE TEMPORARY TABLE tmp_at_event_output ON COMMIT DROP AS + WITH linked_outputs AS ( + SELECT templates.id AS id FROM ( + SELECT template_output AS id + FROM tmp_at_event WHERE template_output IS NOT NULL + UNION + SELECT error_output AS id + FROM tmp_at_event WHERE error_output IS NOT NULL + UNION + SELECT async_output AS id + FROM tmp_at_event WHERE async_output IS NOT NULL + ) templates + ) + SELECT * FROM action_trigger.event_output + WHERE id IN (SELECT id FROM linked_outputs); + +SELECT CLOCK_TIMESTAMP(), 'tmp_at_event_output created', + (SELECT COUNT(*) FROM tmp_at_event_output); + +-- TRUNCATE AND REBUILD TABLES FROM TMP DATA +TRUNCATE action_trigger.event, action_trigger.event_output; + +-- rebuild event_output first since event depends on it. + +INSERT INTO action_trigger.event_output SELECT * FROM tmp_at_event_output; +SELECT CLOCK_TIMESTAMP(), 'action_trigger.event_output rebuilt', + (SELECT COUNT(*) FROM action_trigger.event_output); + +INSERT INTO action_trigger.event SELECT * FROM tmp_at_event; +SELECT CLOCK_TIMESTAMP(), 'action_trigger.event rebuilt', + (SELECT COUNT(*) FROM action_trigger.event); + +-- TESTING +ROLLBACK; + +/* +COMMIT; + +ANALYZE action_trigger.event; +ANALYZE action_trigger.event_output; +*/ + + diff --git a/KCLS/sql/schema/deploy/at-purge-interval.sql b/KCLS/sql/schema/deploy/at-purge-interval.sql new file mode 100644 index 0000000000..edf225839f --- /dev/null +++ b/KCLS/sql/schema/deploy/at-purge-interval.sql @@ -0,0 +1,94 @@ +-- Deploy kcls-evergreen:at-purge-interval to pg +-- requires: 2.9-to-2.10-upgrade-reingest + +BEGIN; + +ALTER TABLE action_trigger.event_definition + ADD COLUMN retention_interval INTERVAL; + +CREATE OR REPLACE FUNCTION action_trigger.check_valid_retention_interval() + RETURNS TRIGGER AS $_$ +BEGIN + + /* + * 1. Retention intervals are alwyas allowed on active hooks. + * 2. On passive hooks, retention intervals are only allowed + * when the event definition has a max_delay value and the + * retention_interval value is greater than the difference + * beteween the delay and max_delay values. + */ + PERFORM TRUE FROM action_trigger.hook + WHERE key = NEW.hook AND NOT passive; + + IF FOUND THEN + RETURN NEW; + END IF; + + IF NEW.max_delay IS NOT NULL THEN + IF EXTRACT(EPOCH FROM NEW.retention_interval) > + ABS(EXTRACT(EPOCH FROM (NEW.max_delay - NEW.delay))) THEN + RETURN NEW; -- all good + ELSE + RAISE EXCEPTION 'retention_interval is too short'; + END IF; + ELSE + RAISE EXCEPTION 'retention_interval requires max_delay'; + END IF; +END; +$_$ LANGUAGE PLPGSQL; + +CREATE TRIGGER is_valid_retention_interval + BEFORE INSERT OR UPDATE ON action_trigger.event_definition + FOR EACH ROW WHEN (NEW.retention_interval IS NOT NULL) + EXECUTE PROCEDURE action_trigger.check_valid_retention_interval(); + +CREATE OR REPLACE FUNCTION action_trigger.purge_events() RETURNS VOID AS $_$ +/** + * Deleting expired events without simultaneously deleting their outputs + * creates orphaned outputs. Deleting their outputs and all of the events + * linking back to them, plus any outputs those events link to is messy and + * inefficient. It's simpler to handle them in 2 sweeping steps. + * + * 1. Delete expired events. + * 2. Delete orphaned event outputs. + * + * This has the added benefit of removing outputs that may have been + * orphaned by some other process. Such outputs are not usuable by + * the system. + * + * This does not guarantee that all events within an event group are + * purged at the same time. In such cases, the remaining events will + * be purged with the next instance of the purge (or soon thereafter). + * This is another nod toward efficiency over completeness of old + * data that's circling the bit bucket anyway. + */ +BEGIN + + DELETE FROM action_trigger.event WHERE id IN ( + SELECT evt.id + FROM action_trigger.event evt + JOIN action_trigger.event_definition def ON (def.id = evt.event_def) + WHERE def.retention_interval IS NOT NULL + AND evt.state <> 'pending' + AND evt.update_time < (NOW() - def.retention_interval) + ); + + WITH linked_outputs AS ( + SELECT templates.id AS id FROM ( + SELECT template_output AS id + FROM action_trigger.event WHERE template_output IS NOT NULL + UNION + SELECT error_output AS id + FROM action_trigger.event WHERE error_output IS NOT NULL + UNION + SELECT async_output AS id + FROM action_trigger.event WHERE async_output IS NOT NULL + ) templates + ) DELETE FROM action_trigger.event_output + WHERE id NOT IN (SELECT id FROM linked_outputs); + +END; +$_$ LANGUAGE PLPGSQL; + + +COMMIT; diff --git a/KCLS/sql/schema/revert/at-purge-interval-data.sql b/KCLS/sql/schema/revert/at-purge-interval-data.sql new file mode 100644 index 0000000000..6b0869be53 --- /dev/null +++ b/KCLS/sql/schema/revert/at-purge-interval-data.sql @@ -0,0 +1,7 @@ +-- Revert kcls-evergreen:at-purge-interval-data from pg + +BEGIN; + +-- DATA COMMITS CANNOT BE UNDONE + +COMMIT; diff --git a/KCLS/sql/schema/revert/at-purge-interval.sql b/KCLS/sql/schema/revert/at-purge-interval.sql new file mode 100644 index 0000000000..d2b42eb277 --- /dev/null +++ b/KCLS/sql/schema/revert/at-purge-interval.sql @@ -0,0 +1,12 @@ +-- Revert kcls-evergreen:at-purge-interval from pg + +BEGIN; + +DROP FUNCTION action_trigger.purge_events(); +DROP TRIGGER is_valid_retention_interval ON action_trigger.event_definition; +DROP FUNCTION action_trigger.check_valid_retention_interval(); +ALTER TABLE action_trigger.event_definition DROP COLUMN retention_interval; + +COMMIT; + + diff --git a/KCLS/sql/schema/sqitch.plan b/KCLS/sql/schema/sqitch.plan index da71759cd1..813d9a44f8 100644 --- a/KCLS/sql/schema/sqitch.plan +++ b/KCLS/sql/schema/sqitch.plan @@ -57,3 +57,5 @@ checkout-ok-2.10-recovery [2.9-to-2.10-upgrade] 2017-12-20T20:32:10Z Bill Ericks new-headings-mattype [2.9-to-2.10-upgrade] 2017-11-30T15:41:38Z Bill Erickson,,, # Add mattype filter for new headings report browse-mattype-filter [2.9-to-2.10-upgrade] 2017-11-28T20:39:59Z Bill Erickson,,, # Add mattype filter for browse search auth-prop-partial-matches [2.9-to-2.10-upgrade-reingest] 2017-09-29T15:47:04Z Bill Erickson,,, # Authority propagation partial matches +at-purge-interval [2.9-to-2.10-upgrade-reingest] 2017-10-23T15:31:00Z Bill Erickson,,, # A/T Purge schema changes +at-purge-interval-data [at-purge-interval] 2017-10-23T15:31:23Z Bill Erickson,,, # A/T Purge data cleanup diff --git a/KCLS/sql/schema/verify/at-purge-interval-data.sql b/KCLS/sql/schema/verify/at-purge-interval-data.sql new file mode 100644 index 0000000000..f06b6791d7 --- /dev/null +++ b/KCLS/sql/schema/verify/at-purge-interval-data.sql @@ -0,0 +1,7 @@ +-- Verify kcls-evergreen:at-purge-interval-data on pg + +BEGIN; + +-- XXX Add verifications here. + +ROLLBACK; diff --git a/KCLS/sql/schema/verify/at-purge-interval.sql b/KCLS/sql/schema/verify/at-purge-interval.sql new file mode 100644 index 0000000000..f8bbc4e5ee --- /dev/null +++ b/KCLS/sql/schema/verify/at-purge-interval.sql @@ -0,0 +1,7 @@ +-- Verify kcls-evergreen:at-purge-interval on pg + +BEGIN; + +-- XXX Add verifications here. + +ROLLBACK; -- 2.11.0