From ecf728c843ee370ba25b060d718133b30d04e112 Mon Sep 17 00:00:00 2001 From: Mike Rylander Date: Tue, 8 Oct 2013 13:45:37 -0400 Subject: [PATCH] Function-based bibs-by-item-age enhancement squashing of commits from collab/miker/faster_bibs_by_item_age_depesz so I can patch a non-git system more easily for testing Signed-off-by: Jason Etheridge --- .../perlmods/lib/OpenILS/Application/SuperCat.pm | 29 +++-- Open-ILS/src/sql/Pg/999.functions.global.sql | 116 ++++++++++++++++++++ .../Pg/upgrade/XXXX.function.bibs_by_item_age.sql | 121 +++++++++++++++++++++ 3 files changed, 250 insertions(+), 16 deletions(-) create mode 100644 Open-ILS/src/sql/Pg/upgrade/XXXX.function.bibs_by_item_age.sql diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/SuperCat.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/SuperCat.pm index 64687720ea..0a63048750 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Application/SuperCat.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Application/SuperCat.pm @@ -623,6 +623,10 @@ sub new_books_by_item { my $statuses = shift || []; my $copy_locations = shift || []; + # id's only, please! + $statuses = [ grep { /^\d+$/ } @$statuses ]; + $copy_locations = [ grep { /^\d+$/ } @$copy_locations ]; + my $offset = $page_size * ($page - 1); my $_storage = OpenSRF::AppSession->create( 'open-ils.cstore' ); @@ -642,22 +646,15 @@ sub new_books_by_item { $logger->debug("Searching for records with new copies at orgs [".join(',',@ou_ids)."], based on $ou"); my $cns = $_storage->request( "open-ils.cstore.json_query.atomic", - { select => { acn => ['record'], - acp => [{ aggregate => 1 => transform => max => column => create_date => alias => 'create_date'}] - }, - from => { 'acn' => { 'acp' => { field => call_number => fkey => 'id' } } }, - where => - { '+acp' => - { deleted => 'f', - ((@ou_ids) ? ( circ_lib => \@ou_ids) : ()), - ((@$statuses) ? ( status => $statuses) : ()), - ((@$copy_locations) ? ( location => $copy_locations) : ()) - }, - '+acn' => { record => { '>' => 0 } }, - }, - order_by => { acp => { create_date => { transform => 'max', direction => 'desc' } } }, - limit => $page_size, - offset => $offset + { + from => [ + 'biblio.records_by_item_age', + $page_size, + $offset, + '{' . join(',', @ou_ids) . '}', + '{' . join(',', @$statuses) . '}', + '{' . join(',', @$copy_locations) . '}' + ] } )->gather(1); diff --git a/Open-ILS/src/sql/Pg/999.functions.global.sql b/Open-ILS/src/sql/Pg/999.functions.global.sql index e57158d9f9..1ac4231e76 100644 --- a/Open-ILS/src/sql/Pg/999.functions.global.sql +++ b/Open-ILS/src/sql/Pg/999.functions.global.sql @@ -14,6 +14,122 @@ * */ +CREATE OR REPLACE FUNCTION biblio.records_by_item_age( p_limit INT4, p_offset INT4, p_circ_libs INT4[], p_copy_statuses INT4[], p_copy_locations INT4[] ) + RETURNS TABLE ( record INT8, create_date timestamptz ) as $$ +DECLARE + v_results hstore := ''; + v_records hstore := ''; + v_seen hstore; + v_circ_lib INT4; + a_circ_libs INT4[]; + v_record int8; + v_temprec record; + v_found INT4; + v_cursor REFCURSOR; + v_oldest timestamptz := NUll; + v_c_oldest timestamptz := NULL; + v_key_count INT8; + v_circ_libs_size INT4; +BEGIN + + IF array_length(p_circ_libs,1) > 0 THEN + a_circ_libs = p_circ_libs; + ELSE -- just collect them all + SELECT ARRAY_AGG(id) INTO a_circ_libs FROM actor.org_unit; + END IF; + + v_circ_libs_size := array_length(a_circ_libs, 1); + + FOREACH v_circ_lib IN ARRAY a_circ_libs LOOP + v_found := 0; + v_seen := ''; + v_c_oldest := NULL; + + + -- If there is only one circ lib in the array then we do not need + -- to get the key count, so set v_key_count to 0 to force second + -- query. + IF v_circ_libs_size > 1 THEN + SELECT COUNT(keys) FROM skeys(v_results) AS keys INTO v_key_count; + ELSE + v_key_count = 0; + END IF; + + -- If the number of keys in v_results is greater than or equal to + -- the number of results we need for the current page, then only look + -- for records that are newer than items currently in v_results + IF v_key_count >= p_limit + p_offset THEN + open v_cursor NO SCROLL FOR + SELECT c.call_number, c.create_date, c.status, c.location + FROM asset.copy c + WHERE c.circ_lib = v_circ_lib AND NOT c.deleted + AND c.create_date > v_oldest + ORDER BY c.create_date DESC; + ELSE + open v_cursor NO SCROLL FOR + SELECT c.call_number, c.create_date, c.status, c.location + FROM asset.copy c + WHERE c.circ_lib = v_circ_lib AND NOT c.deleted + ORDER BY c.create_date DESC; + END IF; + + LOOP -- This loop gets up to a page worth of copies from each circ lib, if that many exist for each. + FETCH v_cursor INTO v_temprec; + EXIT WHEN NOT FOUND; + + -- Ignore if this copy doesn't match a requested status or location + CONTINUE WHEN array_length(p_copy_statuses, 1) > 0 AND NOT ( v_temprec.status = ANY ( p_copy_statuses ) ); + CONTINUE WHEN array_length(p_copy_locations, 1) > 0 AND NOT ( v_temprec.location = ANY ( p_copy_locations ) ); + + -- Ignore if we've seen given call number in current query (for current circ_lib) + CONTINUE WHEN v_seen ? v_temprec.call_number::TEXT; + v_seen := v_seen || hstore( v_temprec.call_number::TEXT, '1' ); + + -- If we don't have yet record for given call_number, we need to get it + IF v_records ? v_temprec.call_number::TEXT THEN + v_record := v_records -> v_temprec.call_number::TEXT; + ELSE + SELECT cn.record INTO v_record FROM asset.call_number cn WHERE cn.id = v_temprec.call_number AND NOT cn.deleted; + CONTINUE WHEN NOT FOUND; + CONTINUE WHEN v_record <= 0; + v_records := v_records || hstore( v_temprec.call_number::TEXT, v_record::TEXT ); + END IF; + + -- If results already contain "better" date for given record, next row + IF v_results ? v_record::TEXT THEN + CONTINUE WHEN ( v_results -> v_record::TEXT )::timestamptz > v_temprec.create_date; + END IF; + + v_found := v_found + 1; + v_results := v_results || hstore( v_record::TEXT, v_temprec.create_date::TEXT ); + + IF v_c_oldest IS NULL OR v_c_oldest > v_temprec.create_date THEN + v_c_oldest := v_temprec.create_date; + END IF; + + EXIT WHEN v_found = p_limit + p_offset; + END LOOP; + + CLOSE v_cursor; + + -- Update oldest information based on oldest row added in current loop + -- only if we have more than one circ lib + IF (v_oldest IS NULL OR v_oldest < v_c_oldest) AND v_circ_libs_size > 1 THEN + v_oldest := v_c_oldest; + END IF; + + END LOOP; + RETURN QUERY SELECT KEY::INT8, value::timestamptz FROM each(v_results) ORDER BY value::timestamptz DESC, KEY::INT8 LIMIT p_limit OFFSET p_offset; +RETURN; +END; +$$ language plpgsql STRICT; + +create index copy_circ_lib on asset.copy (circ_lib); +create index unit_circ_lib on serial.unit (circ_lib); + +create index copy_circ_lib_create_date on asset.copy (circ_lib, create_date); +create index unit_circ_lib_create_date on serial.unit (circ_lib, create_date); + CREATE OR REPLACE FUNCTION actor.usr_merge_rows( table_name TEXT, col_name TEXT, src_usr INT, dest_usr INT ) RETURNS VOID AS $$ DECLARE sel TEXT; diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.function.bibs_by_item_age.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.function.bibs_by_item_age.sql new file mode 100644 index 0000000000..96a0d58688 --- /dev/null +++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.function.bibs_by_item_age.sql @@ -0,0 +1,121 @@ +BEGIN; + +SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version); + +CREATE OR REPLACE FUNCTION biblio.records_by_item_age( p_limit INT4, p_offset INT4, p_circ_libs INT4[], p_copy_statuses INT4[], p_copy_locations INT4[] ) + RETURNS TABLE ( record INT8, create_date timestamptz ) as $$ +DECLARE + v_results hstore := ''; + v_records hstore := ''; + v_seen hstore; + v_circ_lib INT4; + a_circ_libs INT4[]; + v_record int8; + v_temprec record; + v_found INT4; + v_cursor REFCURSOR; + v_oldest timestamptz := NUll; + v_c_oldest timestamptz := NULL; + v_key_count INT8; + v_circ_libs_size INT4; +BEGIN + + IF array_length(p_circ_libs,1) > 0 THEN + a_circ_libs = p_circ_libs; + ELSE -- just collect them all + SELECT ARRAY_AGG(id) INTO a_circ_libs FROM actor.org_unit; + END IF; + + v_circ_libs_size := array_length(a_circ_libs, 1); + + FOREACH v_circ_lib IN ARRAY a_circ_libs LOOP + v_found := 0; + v_seen := ''; + v_c_oldest := NULL; + + + -- If there is only one circ lib in the array then we do not need + -- to get the key count, so set v_key_count to 0 to force second + -- query. + IF v_circ_libs_size > 1 THEN + SELECT COUNT(keys) FROM skeys(v_results) AS keys INTO v_key_count; + ELSE + v_key_count = 0; + END IF; + + -- If the number of keys in v_results is greater than or equal to + -- the number of results we need for the current page, then only look + -- for records that are newer than items currently in v_results + IF v_key_count >= p_limit + p_offset THEN + open v_cursor NO SCROLL FOR + SELECT c.call_number, c.create_date, c.status, c.location + FROM asset.copy c + WHERE c.circ_lib = v_circ_lib AND NOT c.deleted + AND c.create_date > v_oldest + ORDER BY c.create_date DESC; + ELSE + open v_cursor NO SCROLL FOR + SELECT c.call_number, c.create_date, c.status, c.location + FROM asset.copy c + WHERE c.circ_lib = v_circ_lib AND NOT c.deleted + ORDER BY c.create_date DESC; + END IF; + + LOOP -- This loop gets up to a page worth of copies from each circ lib, if that many exist for each. + FETCH v_cursor INTO v_temprec; + EXIT WHEN NOT FOUND; + + -- Ignore if this copy doesn't match a requested status or location + CONTINUE WHEN array_length(p_copy_statuses, 1) > 0 AND NOT ( v_temprec.status = ANY ( p_copy_statuses ) ); + CONTINUE WHEN array_length(p_copy_locations, 1) > 0 AND NOT ( v_temprec.location = ANY ( p_copy_locations ) ); + + -- Ignore if we've seen given call number in current query (for current circ_lib) + CONTINUE WHEN v_seen ? v_temprec.call_number::TEXT; + v_seen := v_seen || hstore( v_temprec.call_number::TEXT, '1' ); + + -- If we don't have yet record for given call_number, we need to get it + IF v_records ? v_temprec.call_number::TEXT THEN + v_record := v_records -> v_temprec.call_number::TEXT; + ELSE + SELECT cn.record INTO v_record FROM asset.call_number cn WHERE cn.id = v_temprec.call_number AND NOT cn.deleted; + CONTINUE WHEN NOT FOUND; + CONTINUE WHEN v_record <= 0; + v_records := v_records || hstore( v_temprec.call_number::TEXT, v_record::TEXT ); + END IF; + + -- If results already contain "better" date for given record, next row + IF v_results ? v_record::TEXT THEN + CONTINUE WHEN ( v_results -> v_record::TEXT )::timestamptz > v_temprec.create_date; + END IF; + + v_found := v_found + 1; + v_results := v_results || hstore( v_record::TEXT, v_temprec.create_date::TEXT ); + + IF v_c_oldest IS NULL OR v_c_oldest > v_temprec.create_date THEN + v_c_oldest := v_temprec.create_date; + END IF; + + EXIT WHEN v_found = p_limit + p_offset; + END LOOP; + + CLOSE v_cursor; + + -- Update oldest information based on oldest row added in current loop + -- only if we have more than one circ lib + IF (v_oldest IS NULL OR v_oldest < v_c_oldest) AND v_circ_libs_size > 1 THEN + v_oldest := v_c_oldest; + END IF; + + END LOOP; + RETURN QUERY SELECT KEY::INT8, value::timestamptz FROM each(v_results) ORDER BY value::timestamptz DESC, KEY::INT8 LIMIT p_limit OFFSET p_offset; +RETURN; +END; +$$ language plpgsql STRICT; + +create index copy_circ_lib on asset.copy (circ_lib); +create index unit_circ_lib on serial.unit (circ_lib); + +create index copy_circ_lib_create_date on asset.copy (circ_lib, create_date); +create index unit_circ_lib_create_date on serial.unit (circ_lib, create_date); + +COMMIT; -- 2.11.0