From d3071bfbf2ac60ccc5626ccab74487808a2c6527 Mon Sep 17 00:00:00 2001 From: Mike Rylander Date: Thu, 7 Jan 2016 22:00:20 -0500 Subject: [PATCH] LP#1549505: schema and IDL for statistical poularity ratings Signed-off-by: Mike Rylander Signed-off-by: Galen Charlton --- Open-ILS/examples/fm_IDL.xml | 92 ++++ .../Pg/upgrade/XXXX.schema.statisitcal-ratings.sql | 542 +++++++++++++++++++++ 2 files changed, 634 insertions(+) create mode 100644 Open-ILS/src/sql/Pg/upgrade/XXXX.schema.statisitcal-ratings.sql diff --git a/Open-ILS/examples/fm_IDL.xml b/Open-ILS/examples/fm_IDL.xml index 42b66f27a9..06dfb63fa2 100644 --- a/Open-ILS/examples/fm_IDL.xml +++ b/Open-ILS/examples/fm_IDL.xml @@ -239,6 +239,98 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0.0), + recalc_interval INTERVAL NOT NULL DEFAULT '1 month', + attr_filter TEXT, + src_filter INT REFERENCES config.bib_source (id) ON UPDATE CASCADE ON DELETE SET NULL DEFERRABLE INITIALLY DEFERRED, + circ_mod_filter TEXT REFERENCES config.circ_modifier (code) ON UPDATE CASCADE ON DELETE SET NULL DEFERRABLE INITIALLY DEFERRED, + loc_grp_filter INT REFERENCES asset.copy_location_group (id) ON UPDATE CASCADE ON DELETE SET NULL DEFERRABLE INITIALLY DEFERRED, + popularity_parameter INT NOT NULL REFERENCES rating.popularity_parameter (id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED, + fixed_rating INT CHECK (fixed_rating IS NULL OR fixed_rating BETWEEN -5 AND 5), + percentile NUMERIC CHECK (percentile IS NULL OR (percentile >= 50.0 AND percentile < 100.0)), + discard INT NOT NULL DEFAULT 0, + last_calc TIMESTAMPTZ, + CONSTRAINT unique_name_scope UNIQUE (name,scope) +); + +CREATE TABLE rating.record_badge_score ( + id BIGSERIAL PRIMARY KEY, + record BIGINT NOT NULL REFERENCES biblio.record_entry (id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED, + badge INT NOT NULL REFERENCES rating.badge (id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED, + score INT NOT NULL CHECK (score BETWEEN -5 AND 5), + CONSTRAINT unique_record_badge UNIQUE (record,badge) +); +CREATE INDEX record_badge_score_badge_idx ON rating.record_badge_score (badge); +CREATE INDEX record_badge_score_record_idx ON rating.record_badge_score (record); + +CREATE VIEW rating.generic_bib_population_global AS + WITH org_scope AS ( + SELECT id, + array_agg(tree) AS orgs + FROM (SELECT id, + (actor.org_unit_descendants(id)).id AS tree + FROM actor.org_unit + ) x + GROUP BY 1 + ), + badge_data AS ( + SELECT *, + metabib.compile_composite_attr(COALESCE(attr_filter,'{}')) AS afilter + FROM rating.badge + ) + SELECT DISTINCT ON (b.id) + b.id AS bib, + badge_data.id AS badge, + s.orgs + FROM biblio.record_entry b + JOIN metabib.record_attr_vector_list r ON (b.id = r.source) + JOIN badge_data ON ( + badge_data.circ_mod_filter IS NULL + AND (badge_data.src_filter IS NULL OR b.source = badge_data.src_filter) + AND (badge_data.attr_filter IS NULL OR r.vlist @@ badge_data.afilter) + ) + JOIN org_scope s ON (badge_data.scope = s.id); + +CREATE VIEW rating.generic_bib_population_by_copy AS + WITH org_scope AS ( + SELECT id, + array_agg(tree) AS orgs + FROM (SELECT id, + (actor.org_unit_descendants(id)).id AS tree + FROM actor.org_unit + ) x + GROUP BY 1 + ), + badge_data AS ( + SELECT *, + metabib.compile_composite_attr(COALESCE(attr_filter,'{}')) AS afilter + FROM rating.badge + ) + SELECT DISTINCT ON (b.id) + b.id AS bib, + badge_data.id AS badge, + s.orgs + FROM biblio.record_entry b + JOIN metabib.record_attr_vector_list r ON (b.id = r.source) + JOIN asset.call_number cn ON (cn.record = b.id AND NOT cn.deleted) + JOIN asset.copy cp ON (cp.call_number = cn.id AND NOT cp.deleted) + JOIN org_scope s ON (cn.owning_lib = ANY (s.orgs)) + JOIN badge_data ON ( + badge_data.scope = s.id + AND (badge_data.src_filter IS NULL OR b.source = badge_data.src_filter) + AND (badge_data.circ_mod_filter IS NULL OR cp.circ_modifier = badge_data.circ_mod_filter) + AND (badge_data.attr_filter IS NULL OR r.vlist @@ badge_data.afilter) + ) + LEFT JOIN asset.copy_location_group_map lg ON (cp.location = lg.location) + WHERE (badge_data.loc_grp_filter IS NULL OR lg.lgroup = badge_data.loc_grp_filter); + +CREATE VIEW rating.generic_bib_population_by_uri AS + WITH org_scope AS ( + SELECT id, + array_agg(tree) AS orgs + FROM (SELECT id, + (actor.org_unit_descendants(id)).id AS tree + FROM actor.org_unit + ) x + GROUP BY 1 + ), + badge_data AS ( + SELECT *, + metabib.compile_composite_attr(COALESCE(attr_filter,'{}')) AS afilter + FROM rating.badge + ) + SELECT DISTINCT ON (b.id) + b.id AS bib, + badge_data.id AS badge, + s.orgs + FROM biblio.record_entry b + JOIN metabib.record_attr_vector_list r ON (b.id = r.source) + JOIN asset.call_number cn ON (cn.record = b.id AND cn.label = '##URI##' AND NOT cn.deleted) + JOIN asset.uri_call_number_map urim ON (urim.call_number = cn.id) + JOIN asset.uri uri ON (urim.uri = uri.id AND uri.active) + JOIN org_scope s ON (cn.owning_lib = ANY (s.orgs)) + JOIN badge_data ON ( + badge_data.scope = s.id + AND badge_data.circ_mod_filter IS NULL + AND (badge_data.src_filter IS NULL OR b.source = badge_data.src_filter) + AND (badge_data.attr_filter IS NULL OR r.vlist @@ badge_data.afilter) + ); + +CREATE VIEW rating.generic_bib_population_by_copy_or_uri AS + WITH org_scope AS ( + SELECT id, + array_agg(tree) AS orgs + FROM (SELECT id, + (actor.org_unit_descendants(id)).id AS tree + FROM actor.org_unit + ) x + GROUP BY 1 + ), + badge_data AS ( + SELECT *, + metabib.compile_composite_attr(COALESCE(attr_filter,'{}')) AS afilter + FROM rating.badge + ) + SELECT DISTINCT ON (b.id) + b.id AS bib, + badge_data.id AS badge, + s.orgs + FROM biblio.record_entry b + JOIN metabib.record_attr_vector_list r ON (b.id = r.source) + JOIN asset.call_number cn ON (cn.record = b.id AND NOT cn.deleted) + JOIN org_scope s ON (cn.owning_lib = ANY (s.orgs)) + JOIN badge_data ON ( + badge_data.scope = s.id + AND (badge_data.src_filter IS NULL OR b.source = badge_data.src_filter) + AND (badge_data.attr_filter IS NULL OR r.vlist @@ badge_data.afilter) + ) + LEFT JOIN asset.copy cp ON (cp.call_number = cn.id AND NOT cp.deleted) + LEFT JOIN asset.uri_call_number_map urim ON (urim.call_number = cn.id) + LEFT JOIN asset.uri uri ON (urim.uri = uri.id AND uri.active) + LEFT JOIN asset.copy_location_group_map lg ON (cp.location = lg.location) + WHERE (badge_data.loc_grp_filter IS NULL OR lg.lgroup = badge_data.loc_grp_filter) + AND (uri.id IS NOT NULL OR cp.id IS NOT NULL) + AND (badge_data.circ_mod_filter IS NULL OR cp.circ_modifier = badge_data.circ_mod_filter); + + +CREATE OR REPLACE FUNCTION rating.recalculate_badge_score ( badge_id INT ) RETURNS VOID AS $f$ +DECLARE + badge_row rating.badge%ROWTYPE; + param rating.popularity_parameter%ROWTYPE; +BEGIN + + -- Find what we're doing + SELECT * INTO badge_row FROM rating.badge WHERE id = badge_id; + SELECT * INTO param FROM rating.popularity_parameter WHERE id = badge_row.popularity_parameter; + + -- If it's a fixed-rating badge, just do it ... + IF badge_row.fixed_rating IS NOT NULL THEN + DELETE FROM rating.record_badge_score WHERE badge = badge_id; + EXECUTE $e$ + INSERT INTO rating.record_badge_score (record, badge, score) + SELECT bib, $1, $2, FROM $e$ || param.func || $e$($1)$e$ + USING badge_id, badge_row.fixed_rating; + + RETURN; + END IF; + -- else, calculate! + + -- Make a session-local scratchpad for calculating scores + CREATE TEMP TABLE record_score_scratchpad ( + bib BIGINT, + value NUMERIC + ) ON COMMIT DROP; + + -- Gather raw values + EXECUTE $e$ + INSERT INTO record_score_scratchpad (bib, value) + SELECT * FROM $e$ || param.func || $e$($1)$e$ + USING badge_id; + + IF badge_row.discard > 0 OR badge_row.percentile IS NOT NULL THEN + -- To speed up discard-common + CREATE INDEX record_score_scratchpad_score_idx ON record_score_scratchpad (value); + ANALYZE record_score_scratchpad; + END IF; + + IF badge_row.discard > 0 THEN -- Remove common low values (trim the long tail) + DELETE FROM record_score_scratchpad WHERE value IN ( + SELECT DISTINCT value FROM record_score_scratchpad ORDER BY value LIMIT badge_row.discard + ); + END IF; + + IF badge_row.percentile IS NOT NULL THEN -- Cut population down to exceptional records + DELETE FROM record_score_scratchpad WHERE value <= ( + SELECT value FROM ( + SELECT value, + CUME_DIST() OVER (ORDER BY value) AS p + FROM record_score_scratchpad + ) x WHERE p < badge_row.percentile / 100.0 ORDER BY p DESC LIMIT 1 + ); + END IF; + + + -- And, finally, push new data in + DELETE FROM rating.record_badge_score WHERE badge = badge_id; + INSERT INTO rating.record_badge_score (badge, record, score) + SELECT badge_id, + bib, + GREATEST(ROUND((CUME_DIST() OVER (ORDER BY value)) * 5), 1) AS value + FROM record_score_scratchpad; + + DROP TABLE record_score_scratchpad; + + RETURN; +END; +$f$ LANGUAGE PLPGSQL; + +CREATE OR REPLACE FUNCTION rating.holds_filled_over_time(badge_id INT) + RETURNS TABLE (record BIGINT, value NUMERIC) AS $f$ +DECLARE + badge rating.badge%ROWTYPE; + iage INT := 1; + iint INT := NULL; + iscale NUMERIC := NULL; +BEGIN + + SELECT * INTO badge FROM rating.badge WHERE id = badge_id; + + IF badge.horizon_age IS NULL THEN + RAISE EXCEPTION 'Badge "%" with id % requires a horizon age but has none.', + badge.name, + badge.id; + END IF; + + iint := EXTRACT(EPOCH FROM badge.importance_interval); + IF badge.importance_age IS NOT NULL THEN + iage := (EXTRACT(EPOCH FROM badge.importance_age) / iint)::INT; + END IF; + + -- if iscale is smaller than 1, scaling slope will be shallow ... BEWARE! + iscale := COALESCE(badge.importance_scale, 1.0); + + RETURN QUERY + SELECT bib, + SUM( holds * GREATEST( iscale * (iage - hage), 1.0 )) + FROM ( + SELECT pop.bib AS bib, + (1 + EXTRACT(EPOCH FROM h.fulfillment_time) / iint)::INT AS hage, + COUNT(h.id)::INT AS holds + FROM action.hold_request h + JOIN reporter.hold_request_record rhrr ON (rhrr.id = h.id) + JOIN rating.generic_bib_population_by_copy pop ON (rhrr.bib_record = pop.bib AND pop.badge = badge.id) + WHERE h.fulfillment_time >= NOW() - badge.horizon_age + AND h.request_lib = ANY (pop.orgs) + GROUP BY 1, 2 + ) x + GROUP BY 1; +END; +$f$ LANGUAGE PLPGSQL STABLE STRICT; + +CREATE OR REPLACE FUNCTION rating.holds_placed_over_time(badge_id INT) + RETURNS TABLE (record BIGINT, value NUMERIC) AS $f$ +DECLARE + badge rating.badge%ROWTYPE; + iage INT := 1; + iint INT := NULL; + iscale NUMERIC := NULL; +BEGIN + + SELECT * INTO badge FROM rating.badge WHERE id = badge_id; + + IF badge.horizon_age IS NULL THEN + RAISE EXCEPTION 'Badge "%" with id % requires a horizon age but has none.', + badge.name, + badge.id; + END IF; + + iint := EXTRACT(EPOCH FROM badge.importance_interval); + IF badge.importance_age IS NOT NULL THEN + iage := (EXTRACT(EPOCH FROM badge.importance_age) / iint)::INT; + END IF; + + -- if iscale is smaller than 1, scaling slope will be shallow ... BEWARE! + iscale := COALESCE(badge.importance_scale, 1.0); + + RETURN QUERY + SELECT bib, + SUM( holds * GREATEST( iscale * (iage - hage), 1.0 )) + FROM ( + SELECT pop.bib AS bib, + (1 + EXTRACT(EPOCH FROM h.fulfillment_time) / iint)::INT AS hage, + COUNT(h.id)::INT AS holds + FROM action.hold_request h + JOIN reporter.hold_request_record rhrr ON (rhrr.id = h.id) + JOIN rating.generic_bib_population_by_copy pop ON (rhrr.bib_record = pop.bib AND pop.badge = badge.id) + WHERE h.request_time >= NOW() - badge.horizon_age + AND h.request_lib = ANY (pop.orgs) + GROUP BY 1, 2 + ) x + GROUP BY 1; +END; +$f$ LANGUAGE PLPGSQL STABLE STRICT; + +CREATE OR REPLACE FUNCTION rating.current_hold_count(INT) + RETURNS TABLE (record BIGINT, value NUMERIC) AS $f$ + SELECT pop.bib AS bib, + COUNT(h.id)::NUMERIC AS holds + FROM action.hold_request h + JOIN reporter.hold_request_record rhrr ON (rhrr.id = h.id) + JOIN rating.generic_bib_population_by_copy pop ON (rhrr.bib_record = pop.bib AND pop.badge = $1) + WHERE h.fulfillment_time IS NULL + AND h.request_lib = ANY (pop.orgs) + GROUP BY 1 +$f$ LANGUAGE SQL STABLE STRICT; + +CREATE OR REPLACE FUNCTION rating.circs_over_time(badge_id INT) + RETURNS TABLE (record BIGINT, value NUMERIC) AS $f$ +DECLARE + badge rating.badge%ROWTYPE; + iage INT := 1; + iint INT := NULL; + iscale NUMERIC := NULL; +BEGIN + + SELECT * INTO badge FROM rating.badge WHERE id = badge_id; + + IF badge.horizon_age IS NULL THEN + RAISE EXCEPTION 'Badge "%" with id % requires a horizon age but has none.', + badge.name, + badge.id; + END IF; + + iint := EXTRACT(EPOCH FROM badge.importance_interval); + IF badge.importance_age IS NOT NULL THEN + iage := (EXTRACT(EPOCH FROM badge.importance_age) / iint)::INT; + END IF; + + -- if iscale is smaller than 1, scaling slope will be shallow ... BEWARE! + iscale := COALESCE(badge.importance_scale, 1.0); + + RETURN QUERY + SELECT bib, + SUM( circs * GREATEST( iscale * (iage - cage), 1.0 )) + FROM ( + SELECT b.id AS bib, + (1 + EXTRACT(EPOCH FROM c.xact_start) / iint)::INT AS cage, + COUNT(c.id)::INT AS circs + FROM action.circulation c + JOIN asset.copy cp ON (cp.id = c.target_copy) + JOIN asset.call_number cn ON (cn.id = cp.call_number) + JOIN rating.generic_bib_population_by_copy pop ON (cn.record = pop.bib AND pop.badge = badge.id) + WHERE c.xact_start >= NOW() - badge.horizon_age + AND cn.owning_lib = ANY (pop.orgs) + AND c.phone_renewal IS FALSE -- we don't count renewals + AND c.desk_renewal IS FALSE + AND c.opac_renewal IS FALSE + GROUP BY 1, 2 + ) x + GROUP BY 1; +END; +$f$ LANGUAGE PLPGSQL STABLE STRICT; + +CREATE OR REPLACE FUNCTION rating.current_circ_count(INT) + RETURNS TABLE (record BIGINT, value NUMERIC) AS $f$ + SELECT pop.bib AS bib, + COUNT(c.id)::NUMERIC AS circs + FROM action.circulation c + JOIN asset.copy cp ON (cp.id = c.target_copy) + JOIN asset.call_number cn ON (cn.id = cp.call_number) + JOIN rating.generic_bib_population_by_copy pop ON (cn.record = pop.bib AND pop.badge = $1) + WHERE c.checkin_time IS NULL + AND cn.owning_lib = ANY (pop.orgs) + GROUP BY 1 +$f$ LANGUAGE SQL STABLE STRICT; + +CREATE OR REPLACE FUNCTION rating.checked_out_total_ratio(INT) + RETURNS TABLE (record BIGINT, value NUMERIC) AS $f$ + SELECT bib, + SUM(checked_out)::NUMERIC / SUM(total)::NUMERIC + FROM (SELECT cn.record AS bib, + (cp.status = 1)::INT AS checked_out, + 1 AS total + FROM asset.copy cp + JOIN asset.call_number cn ON (cn.id = cp.call_number) + JOIN rating.generic_bib_population_by_copy pop ON (cn.record = pop.bib AND pop.badge = $1) + WHERE cn.owning_lib = ANY (pop.orgs) + ) x + GROUP BY 1 +$f$ LANGUAGE SQL STABLE STRICT; + +CREATE OR REPLACE FUNCTION rating.holds_total_ratio(INT) + RETURNS TABLE (record BIGINT, value NUMERIC) AS $f$ + SELECT cn.record AS bib, + COUNT(DISTINCT m.hold)::NUMERIC / COUNT(DISTINCT cp.id)::NUMERIC + FROM asset.copy cp + JOIN asset.call_number cn ON (cn.id = cp.call_number) + JOIN rating.generic_bib_population_by_copy pop ON (cn.record = pop.bib AND pop.badge = $1) + LEFT JOIN action.hold_copy_map m ON (m.target_copy = cp.id) + WHERE cn.owning_lib = ANY (pop.orgs) + GROUP BY 1 +$f$ LANGUAGE SQL STABLE STRICT; + +CREATE OR REPLACE FUNCTION rating.holds_holdable_ratio(INT) + RETURNS TABLE (record BIGINT, value NUMERIC) AS $f$ + SELECT cn.record AS bib, + COUNT(DISTINCT m.hold)::NUMERIC / COUNT(DISTINCT cp.id)::NUMERIC + FROM asset.copy cp + JOIN asset.copy_location cl ON (cl.id = cp.location) + JOIN config.copy_status cs ON (cs.id = cp.status) + JOIN asset.call_number cn ON (cn.id = cp.call_number) + JOIN rating.generic_bib_population_by_copy pop ON (cn.record = pop.bib AND pop.badge = $1) + LEFT JOIN action.hold_copy_map m ON (m.target_copy = cp.id) + WHERE cn.owning_lib = ANY (pop.orgs) + AND cp.holdable IS TRUE + AND cl.holdable IS TRUE + AND cs.holdable IS TRUE + GROUP BY 1 +$f$ LANGUAGE SQL STABLE STRICT; + +CREATE OR REPLACE FUNCTION rating.bib_record_age(INT) + RETURNS TABLE (record BIGINT, value NUMERIC) AS $f$ + SELECT pop.bib AS bib, + 1.0 / EXTRACT(EPOCH FROM AGE(b.create_date))::NUMERIC + 1.0 + FROM rating.generic_bib_population_by_copy_or_uri pop + JOIN biblio.record_entry b ON (b.id = pop.bib) + WHERE pop.badge = $1 +$f$ LANGUAGE SQL STABLE STRICT; + +CREATE OR REPLACE FUNCTION rating.bib_pub_age(INT) + RETURNS TABLE (record BIGINT, value NUMERIC) AS $f$ + SELECT pop.bib AS bib, + s.value::NUMERIC + FROM rating.generic_bib_population_by_copy_or_uri pop + JOIN metabib.record_sorter s ON ( + s.source = pop.bib + AND s.attr = 'pubdate' + AND s.value ~ '^\d+$' + ) + WHERE pop.badge = $1 +$f$ LANGUAGE SQL STABLE STRICT; + +CREATE OR REPLACE FUNCTION rating.percent_time_circulating(INT) + RETURNS TABLE (record BIGINT, value NUMERIC) AS $f$ + SELECT bib, + SUM(circ_time)::NUMERIC / SUM(age)::NUMERIC + FROM (SELECT cn.record AS bib, + cp.id, + EXTRACT( EPOCH FROM AGE(cp.active_date) ) + 1 AS age, + SUM( -- time copy spent circulating + EXTRACT( + EPOCH FROM + AGE( + COALESCE(circ.checkin_time, NOW()), + circ.xact_start + ) + ) + )::NUMERIC AS circ_time + FROM asset.copy cp + JOIN asset.call_number cn ON (cn.id = cp.call_number) + JOIN rating.generic_bib_population_by_copy pop ON (cn.record = pop.bib AND pop.badge = $1) + LEFT JOIN action.all_circulation circ ON (circ.target_copy = cp.id) + WHERE cn.owning_lib = ANY (pop.orgs) + GROUP BY 1,2,3 + ) x + GROUP BY 1 +$f$ LANGUAGE SQL STABLE STRICT; + +CREATE OR REPLACE FUNCTION rating.generic_fixed_rating_by_copy(INT) + RETURNS TABLE (record BIGINT, value NUMERIC) AS $f$ +SELECT bib, 1.0 + FROM rating.generic_bib_population_by_copy pop + WHERE pop.badge = $1; +$f$ LANGUAGE SQL STABLE STRICT; + +CREATE OR REPLACE FUNCTION rating.generic_fixed_rating_by_uri(INT) + RETURNS TABLE (record BIGINT, value NUMERIC) AS $f$ +SELECT bib, 1.0 + FROM rating.generic_bib_population_by_uri pop + WHERE pop.badge = $1; +$f$ LANGUAGE SQL STABLE STRICT; + +CREATE OR REPLACE FUNCTION rating.generic_fixed_rating_global(INT) + RETURNS TABLE (record BIGINT, value NUMERIC) AS $f$ +SELECT bib, 1.0 + FROM rating.generic_bib_population_global pop + WHERE pop.badge = $1; +$f$ LANGUAGE SQL STABLE STRICT; + +CREATE INDEX hold_fulfillment_time_idx ON action.hold_request (fulfillment_time) WHERE fulfillment_time IS NOT NULL; +CREATE INDEX hold_request_time_idx ON action.hold_request (request_time); + +COMMIT; + -- 2.11.0