From 1c2fefe6399680439074c64b7ce91a83e00eafff Mon Sep 17 00:00:00 2001 From: Bill Erickson Date: Wed, 26 Sep 2018 16:38:44 -0400 Subject: [PATCH] JBAS-1832 3.2 data updates extracted for parallelizing Move name keywords, billing updates, and browse data migration to dedicated scripts so they can be manually run in parallel. Signed-off-by: Bill Erickson --- .../sql/schema/deploy/2.12-3.2-db-upgrade-data.sql | 78 +--------------------- .../schema/deploy/3.2-data-billing-timestamps.sql | 75 +++++++++++++++++++++ KCLS/sql/schema/deploy/3.2-data-name-keywords.sql | 24 +++++++ .../schema/revert/3.2-data-billing-timestamps.sql | 7 ++ KCLS/sql/schema/revert/3.2-data-name-keywords.sql | 7 ++ KCLS/sql/schema/sqitch.plan | 2 + 6 files changed, 118 insertions(+), 75 deletions(-) create mode 100644 KCLS/sql/schema/deploy/3.2-data-billing-timestamps.sql create mode 100644 KCLS/sql/schema/deploy/3.2-data-name-keywords.sql create mode 100644 KCLS/sql/schema/revert/3.2-data-billing-timestamps.sql create mode 100644 KCLS/sql/schema/revert/3.2-data-name-keywords.sql diff --git a/KCLS/sql/schema/deploy/2.12-3.2-db-upgrade-data.sql b/KCLS/sql/schema/deploy/2.12-3.2-db-upgrade-data.sql index 43bc36ea56..c0a7d3b749 100644 --- a/KCLS/sql/schema/deploy/2.12-3.2-db-upgrade-data.sql +++ b/KCLS/sql/schema/deploy/2.12-3.2-db-upgrade-data.sql @@ -56,87 +56,12 @@ CREATE TRIGGER z_opac_vis_mat_view_tgr AFTER INSERT OR UPDATE CREATE TRIGGER z_opac_vis_mat_view_tgr AFTER INSERT OR UPDATE ON serial.unit FOR EACH ROW EXECUTE PROCEDURE asset.cache_copy_visibility(); -SELECT CLOCK_TIMESTAMP(), 'Updating billing timestamps'; - -ALTER TABLE money.billing - ADD COLUMN create_date TIMESTAMP WITH TIME ZONE, - ADD COLUMN period_start TIMESTAMP WITH TIME ZONE, - ADD COLUMN period_end TIMESTAMP WITH TIME ZONE; - --- TODO: back-fill aged billing data, indexes, and null constraints --- for consistency after deciding whether we want to delete any of them. -ALTER TABLE money.aged_billing - ADD COLUMN create_date TIMESTAMP WITH TIME ZONE, - ADD COLUMN period_start TIMESTAMP WITH TIME ZONE, - ADD COLUMN period_end TIMESTAMP WITH TIME ZONE; - ---Disable materialized update trigger ---It takes forever, and doesn't matter yet for what we are doing, as the ---view definition is unchanged (still using billing_ts) -ALTER TABLE money.billing DISABLE TRIGGER mat_summary_upd_tgr; - ---Limit to btype=1 / 'Overdue Materials' ---Update day-granular fines first (i.e. 24 hour, 1 day, 2 day, etc., all of which are multiples of 86400 seconds), and simply remove the time portion of timestamp -UPDATE money.billing mb - SET period_start = date_trunc('day', billing_ts), period_end = date_trunc('day', billing_ts) + (ac.fine_interval - '1 second') - FROM action.circulation ac -WHERE mb.xact = ac.id - AND mb.btype = 1 - AND (EXTRACT(EPOCH FROM ac.fine_interval))::integer % 86400 = 0; - ---Update fines for non-day intervals -UPDATE money.billing mb - SET period_start = billing_ts - ac.fine_interval + interval '1 sec', period_end = billing_ts - FROM action.circulation ac -WHERE mb.xact = ac.id - AND mb.btype = 1 - AND (EXTRACT(EPOCH FROM ac.fine_interval))::integer % 86400 > 0; - -SET CONSTRAINTS ALL IMMEDIATE; -UPDATE money.billing SET create_date = COALESCE(period_start, billing_ts); - ---Re-enable update trigger -ALTER TABLE money.billing ENABLE TRIGGER mat_summary_upd_tgr; - -ALTER TABLE money.billing ALTER COLUMN create_date SET DEFAULT NOW(); -ALTER TABLE money.billing ALTER COLUMN create_date SET NOT NULL; - -CREATE INDEX m_b_create_date_idx ON money.billing (create_date); -CREATE INDEX m_b_period_start_idx ON money.billing (period_start); -CREATE INDEX m_b_period_end_idx ON money.billing (period_end); - -CREATE OR REPLACE FUNCTION money.maintain_billing_ts () RETURNS TRIGGER AS $$ -BEGIN - NEW.billing_ts := COALESCE(NEW.period_end, NEW.create_date); - RETURN NEW; -END; -$$ LANGUAGE PLPGSQL; -CREATE TRIGGER maintain_billing_ts_tgr BEFORE INSERT OR UPDATE ON money.billing FOR EACH ROW EXECUTE PROCEDURE money.maintain_billing_ts(); - SELECT CLOCK_TIMESTAMP(), 'Making copy alert types active'; UPDATE config.copy_alert_type SET active = TRUE WHERE id IN (4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17); - --- Update keyword indexes for existing patrons -SELECT CLOCK_TIMESTAMP(), 'Updating patron name keyword vector'; - -UPDATE actor.usr SET name_kw_tsvector = - TO_TSVECTOR( - COALESCE(prefix, '') || ' ' || - COALESCE(first_given_name, '') || ' ' || - COALESCE(evergreen.unaccent_and_squash(first_given_name), '') || ' ' || - COALESCE(second_given_name, '') || ' ' || - COALESCE(evergreen.unaccent_and_squash(second_given_name), '') || ' ' || - COALESCE(family_name, '') || ' ' || - COALESCE(evergreen.unaccent_and_squash(family_name), '') || ' ' || - COALESCE(suffix, '') - ); - --- Authority record re-ingest - SELECT CLOCK_TIMESTAMP(), 'Updating copy alert messages'; SELECT CLOCK_TIMESTAMP(), 'Copying copy alert messages to normal checkout copy alerts...'; @@ -157,6 +82,8 @@ SELECT CLOCK_TIMESTAMP(), 'Clearing legacy copy alert field; this may take a whi UPDATE asset.copy SET alert_message = NULL WHERE alert_message IS NOT NULL; + +/* SELECT CLOCK_TIMESTAMP(), 'Reingesting authority records'; -- add the flag ingest.disable_authority_full_rec if it does not exist @@ -201,4 +128,5 @@ FROM internal_flag_state b WHERE a.name = b.name; SELECT CLOCK_TIMESTAMP(), 'Done reingesting authority records'; +*/ diff --git a/KCLS/sql/schema/deploy/3.2-data-billing-timestamps.sql b/KCLS/sql/schema/deploy/3.2-data-billing-timestamps.sql new file mode 100644 index 0000000000..268c56f361 --- /dev/null +++ b/KCLS/sql/schema/deploy/3.2-data-billing-timestamps.sql @@ -0,0 +1,75 @@ +-- Deploy kcls-evergreen:3.2-data-billing-timestamps to pg +-- requires: 2.12-3.2-db-upgrade-data + +BEGIN; + +SET STATEMENT_TIMEOUT = 0; + +SELECT CLOCK_TIMESTAMP(), 'Updating billing timestamps: ~5 hours'; + +ALTER TABLE money.billing + ADD COLUMN create_date TIMESTAMP WITH TIME ZONE, + ADD COLUMN period_start TIMESTAMP WITH TIME ZONE, + ADD COLUMN period_end TIMESTAMP WITH TIME ZONE; + +-- TODO: back-fill aged billing data, indexes, and null constraints +-- for consistency after deciding whether we want to delete any of them. +ALTER TABLE money.aged_billing + ADD COLUMN create_date TIMESTAMP WITH TIME ZONE, + ADD COLUMN period_start TIMESTAMP WITH TIME ZONE, + ADD COLUMN period_end TIMESTAMP WITH TIME ZONE; + +--Disable materialized update trigger +--It takes forever, and doesn't matter yet for what we are doing, as the +--view definition is unchanged (still using billing_ts) +ALTER TABLE money.billing DISABLE TRIGGER mat_summary_upd_tgr; + +SELECT CLOCK_TIMESTAMP(), 'Updating billing period start/end timestamps'; + +--Limit to btype=1 / 'Overdue Materials' +--Update day-granular fines first (i.e. 24 hour, 1 day, 2 day, etc., all of which are multiples of 86400 seconds), and simply remove the time portion of timestamp +UPDATE money.billing mb + SET period_start = date_trunc('day', billing_ts), period_end = date_trunc('day', billing_ts) + (ac.fine_interval - '1 second') + FROM action.circulation ac +WHERE mb.xact = ac.id + AND mb.btype = 1 + AND (EXTRACT(EPOCH FROM ac.fine_interval))::integer % 86400 = 0; + +SELECT CLOCK_TIMESTAMP(), 'Updating billing period start/end timestamps (non-daily)'; + +--Update fines for non-day intervals +UPDATE money.billing mb + SET period_start = billing_ts - ac.fine_interval + interval '1 sec', period_end = billing_ts + FROM action.circulation ac +WHERE mb.xact = ac.id + AND mb.btype = 1 + AND (EXTRACT(EPOCH FROM ac.fine_interval))::integer % 86400 > 0; + +SELECT CLOCK_TIMESTAMP(), 'Updating billing create_date timestamps'; + +SET CONSTRAINTS ALL IMMEDIATE; +UPDATE money.billing SET create_date = COALESCE(period_start, billing_ts); + +--Re-enable update trigger +ALTER TABLE money.billing ENABLE TRIGGER mat_summary_upd_tgr; + +ALTER TABLE money.billing ALTER COLUMN create_date SET DEFAULT NOW(); +ALTER TABLE money.billing ALTER COLUMN create_date SET NOT NULL; + +SELECT CLOCK_TIMESTAMP(), 'Creating billing indexes (x3)'; + +CREATE INDEX m_b_create_date_idx ON money.billing (create_date); +CREATE INDEX m_b_period_start_idx ON money.billing (period_start); +CREATE INDEX m_b_period_end_idx ON money.billing (period_end); + +CREATE OR REPLACE FUNCTION money.maintain_billing_ts () RETURNS TRIGGER AS $$ +BEGIN + NEW.billing_ts := COALESCE(NEW.period_end, NEW.create_date); + RETURN NEW; +END; +$$ LANGUAGE PLPGSQL; +CREATE TRIGGER maintain_billing_ts_tgr BEFORE INSERT OR UPDATE ON money.billing FOR EACH ROW EXECUTE PROCEDURE money.maintain_billing_ts(); + +COMMIT; + + diff --git a/KCLS/sql/schema/deploy/3.2-data-name-keywords.sql b/KCLS/sql/schema/deploy/3.2-data-name-keywords.sql new file mode 100644 index 0000000000..d214decc65 --- /dev/null +++ b/KCLS/sql/schema/deploy/3.2-data-name-keywords.sql @@ -0,0 +1,24 @@ +-- Deploy kcls-evergreen:3.2-data-name-keywords to pg +-- requires: 2.12-3.2-db-upgrade-data + +BEGIN; + +SET STATEMENT_TIMEOUT = 0; + +-- Update keyword indexes for existing patrons +SELECT CLOCK_TIMESTAMP(), 'Updating patron name keyword vector: ~1 hour'; + +UPDATE actor.usr SET name_kw_tsvector = + TO_TSVECTOR( + COALESCE(prefix, '') || ' ' || + COALESCE(first_given_name, '') || ' ' || + COALESCE(evergreen.unaccent_and_squash(first_given_name), '') || ' ' || + COALESCE(second_given_name, '') || ' ' || + COALESCE(evergreen.unaccent_and_squash(second_given_name), '') || ' ' || + COALESCE(family_name, '') || ' ' || + COALESCE(evergreen.unaccent_and_squash(family_name), '') || ' ' || + COALESCE(suffix, '') + ); + +COMMIT; + diff --git a/KCLS/sql/schema/revert/3.2-data-billing-timestamps.sql b/KCLS/sql/schema/revert/3.2-data-billing-timestamps.sql new file mode 100644 index 0000000000..ff7ada214b --- /dev/null +++ b/KCLS/sql/schema/revert/3.2-data-billing-timestamps.sql @@ -0,0 +1,7 @@ +-- Revert kcls-evergreen:3.2-data-billing-timestamps from pg + +BEGIN; + +-- XXX Add DDLs here. + +COMMIT; diff --git a/KCLS/sql/schema/revert/3.2-data-name-keywords.sql b/KCLS/sql/schema/revert/3.2-data-name-keywords.sql new file mode 100644 index 0000000000..089e20b07c --- /dev/null +++ b/KCLS/sql/schema/revert/3.2-data-name-keywords.sql @@ -0,0 +1,7 @@ +-- Revert kcls-evergreen:3.2-data-name-keywords from pg + +BEGIN; + +-- XXX Add DDLs here. + +COMMIT; diff --git a/KCLS/sql/schema/sqitch.plan b/KCLS/sql/schema/sqitch.plan index ad0dfc7fa9..2905d34fc0 100644 --- a/KCLS/sql/schema/sqitch.plan +++ b/KCLS/sql/schema/sqitch.plan @@ -82,3 +82,5 @@ stock-browse-headings-report [stock-browse-schema] 2018-10-04T15:56:18Z Bill Eri stock-browse-cleanup [stock-browse-schema] 2018-10-03T18:05:49Z Bill Erickson,,, # Delete old browse data 2.12-3.2-db-upgrade-schema [ecard-notice-validator] 2018-09-12T14:46:28Z Bill Erickson,,, # 2.12 to 3.2 DB update 2.12-3.2-db-upgrade-data [2.12-3.2-db-upgrade-schema] 2018-09-12T14:46:52Z Bill Erickson,,, # 2.12 to 3.2 post-update data +3.2-data-name-keywords [2.12-3.2-db-upgrade-data] 2018-09-26T20:29:00Z Bill Erickson,,, # Extract name keywords +3.2-data-billing-timestamps [2.12-3.2-db-upgrade-data] 2018-09-26T20:29:38Z Bill Erickson,,, # Billing timestamp updates -- 2.11.0