JBAS-1764 A/T purge schema and data updates
authorBill Erickson <berickxx@gmail.com>
Mon, 23 Oct 2017 15:39:11 +0000 (11:39 -0400)
committerBill Erickson <berickxx@gmail.com>
Thu, 21 Mar 2019 19:46:23 +0000 (15:46 -0400)
Signed-off-by: Bill Erickson <berickxx@gmail.com>
KCLS/sql/schema/deploy/at-purge-interval-data.sql [new file with mode: 0644]
KCLS/sql/schema/deploy/at-purge-interval.sql [new file with mode: 0644]
KCLS/sql/schema/revert/at-purge-interval-data.sql [new file with mode: 0644]
KCLS/sql/schema/revert/at-purge-interval.sql [new file with mode: 0644]
KCLS/sql/schema/sqitch.plan
KCLS/sql/schema/verify/at-purge-interval-data.sql [new file with mode: 0644]
KCLS/sql/schema/verify/at-purge-interval.sql [new file with mode: 0644]

diff --git a/KCLS/sql/schema/deploy/at-purge-interval-data.sql b/KCLS/sql/schema/deploy/at-purge-interval-data.sql
new file mode 100644 (file)
index 0000000..e58347e
--- /dev/null
@@ -0,0 +1,127 @@
+-- Deploy kcls-evergreen:at-purge-interval-data to pg
+-- requires: at-purge-interval
+
+BEGIN;
+
+SET STATEMENT_TIMEOUT = 0;
+
+SELECT CLOCK_TIMESTAMP(), 'getting started';
+
+UPDATE action_trigger.hook SET passive = FALSE WHERE key IN (
+    'format.po.html',
+    'format.po.pdf',
+    'format.selfcheck.checkout',
+    'format.selfcheck.items_out',
+    'format.selfcheck.holds',
+    'format.selfcheck.fines',
+    'format.acqcle.html',
+    'format.acqinv.html',
+    'format.acqli.html',
+    'aur.ordered',
+    'aur.received',
+    'aur.cancelled',
+    'aur.created',
+    'aur.rejected'
+);
+
+-- Apply some max_delay values where needed.
+-- These defs are unused, but best to apply a max_delay now so we can
+-- apply a retention_interval that won't be forgotten later.
+UPDATE action_trigger.event_definition SET max_delay = '10 days' 
+    WHERE id IN (2, 9, 24, 36, 49) AND max_delay IS NULL;
+
+-- most events are purged after a short period
+UPDATE action_trigger.event_definition SET retention_interval = '1 day';
+
+-- keep these events forever
+UPDATE action_trigger.event_definition 
+    SET retention_interval = NULL
+    WHERE hook IN (
+        -- keep these event types forever
+        'acqpo.activated',
+        'format.acqcle.html',
+        'format.acqinv.html',
+        'format.po.html'
+    );
+
+-- keep these events for 1 year
+UPDATE action_trigger.event_definition 
+    SET retention_interval = '1 year'
+    WHERE hook IN (
+        'checkout.due',                                                
+        'hold.available',                                              
+        'hold_request.cancel.expire_holds_shelf',                      
+        'hold_request.cancel.expire_no_target',                        
+        'hold_request.cancel.staff',                                   
+        'hold_request.long_wait',
+        'hold_request.shelf_expires_soon',
+        'penalty.PATRON_EXCEEDS_COLLECTIONS_WARNING',                  
+        'password.reset_request',
+        'money.format.payment_receipt.email',
+        'money.format.payment_receipt.print',
+        'lost.auto',
+        'longoverdue.auto',
+        'circ.staff_age_to_lost',
+        'circ.format.missing_pieces.slip.print',
+        'circ.format.missing_pieces.letter.print'
+    );
+
+CREATE TEMPORARY TABLE tmp_at_event ON COMMIT DROP AS 
+    SELECT evt.* FROM action_trigger.event evt
+    JOIN action_trigger.event_definition def ON (def.id = evt.event_def)
+    WHERE 
+        evt.state = 'pending'
+        OR def.retention_interval IS NULL -- these never expire
+        OR evt.update_time > (NOW() - def.retention_interval)    
+;
+
+SELECT CLOCK_TIMESTAMP(), 'tmp_at_event created',
+    (SELECT COUNT(*) FROM tmp_at_event);
+
+--SELECT COUNT(*), DATE_TRUNC('year', update_time), state 
+--    FROM tmp_at_event GROUP BY 2, 3 ORDER BY 2, 3;
+
+-- keep only template outputs that are linked from saved events
+CREATE TEMPORARY TABLE tmp_at_event_output ON COMMIT DROP AS 
+    WITH linked_outputs AS (
+        SELECT templates.id AS id FROM (
+            SELECT template_output AS id
+                FROM tmp_at_event WHERE template_output IS NOT NULL
+            UNION
+            SELECT error_output AS id
+                FROM tmp_at_event WHERE error_output IS NOT NULL
+            UNION
+            SELECT async_output AS id
+                FROM tmp_at_event WHERE async_output IS NOT NULL
+        ) templates
+    )
+    SELECT * FROM action_trigger.event_output
+    WHERE id IN (SELECT id FROM linked_outputs);
+
+SELECT CLOCK_TIMESTAMP(), 'tmp_at_event_output created',
+    (SELECT COUNT(*) FROM tmp_at_event_output);
+
+-- TRUNCATE AND REBUILD TABLES FROM TMP DATA
+TRUNCATE action_trigger.event, action_trigger.event_output;
+
+-- rebuild event_output first since event depends on it.
+
+INSERT INTO action_trigger.event_output SELECT * FROM tmp_at_event_output;
+SELECT CLOCK_TIMESTAMP(), 'action_trigger.event_output rebuilt',
+    (SELECT COUNT(*) FROM action_trigger.event_output);
+
+INSERT INTO action_trigger.event SELECT * FROM tmp_at_event;
+SELECT CLOCK_TIMESTAMP(), 'action_trigger.event rebuilt',
+    (SELECT COUNT(*) FROM action_trigger.event);
+
+-- TESTING
+ROLLBACK;
+
+/*
+COMMIT;
+
+ANALYZE action_trigger.event;
+ANALYZE action_trigger.event_output;
+*/
+
+
diff --git a/KCLS/sql/schema/deploy/at-purge-interval.sql b/KCLS/sql/schema/deploy/at-purge-interval.sql
new file mode 100644 (file)
index 0000000..edf2258
--- /dev/null
@@ -0,0 +1,94 @@
+-- Deploy kcls-evergreen:at-purge-interval to pg
+-- requires: 2.9-to-2.10-upgrade-reingest
+
+BEGIN;
+
+ALTER TABLE action_trigger.event_definition
+    ADD COLUMN retention_interval INTERVAL;
+
+CREATE OR REPLACE FUNCTION action_trigger.check_valid_retention_interval() 
+    RETURNS TRIGGER AS $_$
+BEGIN
+
+    /*
+     * 1. Retention intervals are alwyas allowed on active hooks.
+     * 2. On passive hooks, retention intervals are only allowed
+     *    when the event definition has a max_delay value and the
+     *    retention_interval value is greater than the difference 
+     *    beteween the delay and max_delay values.
+     */ 
+    PERFORM TRUE FROM action_trigger.hook 
+        WHERE key = NEW.hook AND NOT passive;
+
+    IF FOUND THEN
+        RETURN NEW;
+    END IF;
+
+    IF NEW.max_delay IS NOT NULL THEN
+        IF EXTRACT(EPOCH FROM NEW.retention_interval) > 
+            ABS(EXTRACT(EPOCH FROM (NEW.max_delay - NEW.delay))) THEN
+            RETURN NEW; -- all good
+        ELSE
+            RAISE EXCEPTION 'retention_interval is too short';
+        END IF;
+    ELSE
+        RAISE EXCEPTION 'retention_interval requires max_delay';
+    END IF;
+END;
+$_$ LANGUAGE PLPGSQL;
+
+CREATE TRIGGER is_valid_retention_interval 
+    BEFORE INSERT OR UPDATE ON action_trigger.event_definition
+    FOR EACH ROW WHEN (NEW.retention_interval IS NOT NULL)
+    EXECUTE PROCEDURE action_trigger.check_valid_retention_interval();
+
+CREATE OR REPLACE FUNCTION action_trigger.purge_events() RETURNS VOID AS $_$
+/**
+  * Deleting expired events without simultaneously deleting their outputs
+  * creates orphaned outputs.  Deleting their outputs and all of the events 
+  * linking back to them, plus any outputs those events link to is messy and 
+  * inefficient.  It's simpler to handle them in 2 sweeping steps.
+  *
+  * 1. Delete expired events.
+  * 2. Delete orphaned event outputs.
+  *
+  * This has the added benefit of removing outputs that may have been
+  * orphaned by some other process.  Such outputs are not usuable by
+  * the system.
+  *
+  * This does not guarantee that all events within an event group are
+  * purged at the same time.  In such cases, the remaining events will
+  * be purged with the next instance of the purge (or soon thereafter).
+  * This is another nod toward efficiency over completeness of old 
+  * data that's circling the bit bucket anyway.
+  */
+BEGIN
+
+    DELETE FROM action_trigger.event WHERE id IN (
+        SELECT evt.id
+        FROM action_trigger.event evt
+        JOIN action_trigger.event_definition def ON (def.id = evt.event_def)
+        WHERE def.retention_interval IS NOT NULL 
+            AND evt.state <> 'pending'
+            AND evt.update_time < (NOW() - def.retention_interval)
+    );
+
+    WITH linked_outputs AS (
+        SELECT templates.id AS id FROM (
+            SELECT template_output AS id
+                FROM action_trigger.event WHERE template_output IS NOT NULL
+            UNION
+            SELECT error_output AS id
+                FROM action_trigger.event WHERE error_output IS NOT NULL
+            UNION
+            SELECT async_output AS id
+                FROM action_trigger.event WHERE async_output IS NOT NULL
+        ) templates
+    ) DELETE FROM action_trigger.event_output
+        WHERE id NOT IN (SELECT id FROM linked_outputs);
+
+END;
+$_$ LANGUAGE PLPGSQL;
+
+
+COMMIT;
diff --git a/KCLS/sql/schema/revert/at-purge-interval-data.sql b/KCLS/sql/schema/revert/at-purge-interval-data.sql
new file mode 100644 (file)
index 0000000..6b0869b
--- /dev/null
@@ -0,0 +1,7 @@
+-- Revert kcls-evergreen:at-purge-interval-data from pg
+
+BEGIN;
+
+-- DATA COMMITS CANNOT BE UNDONE
+
+COMMIT;
diff --git a/KCLS/sql/schema/revert/at-purge-interval.sql b/KCLS/sql/schema/revert/at-purge-interval.sql
new file mode 100644 (file)
index 0000000..d2b42eb
--- /dev/null
@@ -0,0 +1,12 @@
+-- Revert kcls-evergreen:at-purge-interval from pg
+
+BEGIN;
+
+DROP FUNCTION action_trigger.purge_events();
+DROP TRIGGER is_valid_retention_interval ON action_trigger.event_definition;
+DROP FUNCTION action_trigger.check_valid_retention_interval();
+ALTER TABLE action_trigger.event_definition DROP COLUMN retention_interval;
+
+COMMIT;
+
+
index da71759..813d9a4 100644 (file)
@@ -57,3 +57,5 @@ checkout-ok-2.10-recovery [2.9-to-2.10-upgrade] 2017-12-20T20:32:10Z Bill Ericks
 new-headings-mattype [2.9-to-2.10-upgrade] 2017-11-30T15:41:38Z Bill Erickson,,, <berick@kcls-dev-local> # Add mattype filter for new headings report
 browse-mattype-filter [2.9-to-2.10-upgrade] 2017-11-28T20:39:59Z Bill Erickson,,, <berick@kcls-dev-local> # Add mattype filter for browse search
 auth-prop-partial-matches [2.9-to-2.10-upgrade-reingest] 2017-09-29T15:47:04Z Bill Erickson,,, <berick@kcls-dev-local> # Authority propagation partial matches
+at-purge-interval [2.9-to-2.10-upgrade-reingest] 2017-10-23T15:31:00Z Bill Erickson,,, <berick@kcls-dev-local> # A/T Purge schema changes
+at-purge-interval-data [at-purge-interval] 2017-10-23T15:31:23Z Bill Erickson,,, <berick@kcls-dev-local> # A/T Purge data cleanup
diff --git a/KCLS/sql/schema/verify/at-purge-interval-data.sql b/KCLS/sql/schema/verify/at-purge-interval-data.sql
new file mode 100644 (file)
index 0000000..f06b679
--- /dev/null
@@ -0,0 +1,7 @@
+-- Verify kcls-evergreen:at-purge-interval-data on pg
+
+BEGIN;
+
+-- XXX Add verifications here.
+
+ROLLBACK;
diff --git a/KCLS/sql/schema/verify/at-purge-interval.sql b/KCLS/sql/schema/verify/at-purge-interval.sql
new file mode 100644 (file)
index 0000000..f8bbc4e
--- /dev/null
@@ -0,0 +1,7 @@
+-- Verify kcls-evergreen:at-purge-interval on pg
+
+BEGIN;
+
+-- XXX Add verifications here.
+
+ROLLBACK;