From 1d26f1b17a87e6786a376ef105d85cbab29893cf Mon Sep 17 00:00:00 2001 From: Bill Erickson Date: Tue, 30 Oct 2018 14:15:44 -0400 Subject: [PATCH] JBAS-1839 More 3.2 browse SQL syncing Signed-off-by: Bill Erickson --- .../schema/deploy/2.12-3.2-db-upgrade-schema.sql | 353 +++------------------ 1 file changed, 44 insertions(+), 309 deletions(-) diff --git a/KCLS/sql/schema/deploy/2.12-3.2-db-upgrade-schema.sql b/KCLS/sql/schema/deploy/2.12-3.2-db-upgrade-schema.sql index dc8d6d2122..1759868a23 100644 --- a/KCLS/sql/schema/deploy/2.12-3.2-db-upgrade-schema.sql +++ b/KCLS/sql/schema/deploy/2.12-3.2-db-upgrade-schema.sql @@ -1726,300 +1726,6 @@ SELECT DISTINCT END; $f$ LANGUAGE plpgsql ROWS 10; -/* COMMENT OUT THE FIRST UPDATE SO WE THERE IS NOT NEED TO PATCH BOTH -CREATE OR REPLACE FUNCTION metabib.browse(search_field integer[], browse_term text, context_org integer DEFAULT NULL::integer, context_loc_group integer DEFAULT NULL::integer, staff boolean DEFAULT false, pivot_id bigint DEFAULT NULL::bigint, result_limit integer DEFAULT 10) - RETURNS SETOF metabib.flat_browse_entry_appearance -AS $f$ -DECLARE - core_query TEXT; - back_query TEXT; - forward_query TEXT; - pivot_sort_value TEXT; - pivot_sort_fallback TEXT; - context_locations INT[]; - browse_superpage_size INT; - results_skipped INT := 0; - back_limit INT; - back_to_pivot INT; - forward_limit INT; - forward_to_pivot INT; -BEGIN - -- First, find the pivot if we were given a browse term but not a pivot. - IF pivot_id IS NULL THEN - pivot_id := metabib.browse_pivot(search_field, browse_term); - END IF; - - SELECT INTO pivot_sort_value, pivot_sort_fallback - sort_value, value FROM metabib.browse_entry WHERE id = pivot_id; - - -- Bail if we couldn't find a pivot. - IF pivot_sort_value IS NULL THEN - RETURN; - END IF; - - -- Transform the context_loc_group argument (if any) (logc at the - -- TPAC layer) into a form we'll be able to use. - IF context_loc_group IS NOT NULL THEN - SELECT INTO context_locations ARRAY_AGG(location) - FROM asset.copy_location_group_map - WHERE lgroup = context_loc_group; - END IF; - - -- Get the configured size of browse superpages. - SELECT INTO browse_superpage_size COALESCE(value::INT,100) -- NULL ok - FROM config.global_flag - WHERE enabled AND name = 'opac.browse.holdings_visibility_test_limit'; - - -- First we're going to search backward from the pivot, then we're going - -- to search forward. In each direction, we need two limits. At the - -- lesser of the two limits, we delineate the edge of the result set - -- we're going to return. At the greater of the two limits, we find the - -- pivot value that would represent an offset from the current pivot - -- at a distance of one "page" in either direction, where a "page" is a - -- result set of the size specified in the "result_limit" argument. - -- - -- The two limits in each direction make four derived values in total, - -- and we calculate them now. - back_limit := CEIL(result_limit::FLOAT / 2); - back_to_pivot := result_limit; - forward_limit := result_limit / 2; - forward_to_pivot := result_limit - 1; - - -- This is the meat of the SQL query that finds browse entries. We'll - -- pass this to a function which uses it with a cursor, so that individual - -- rows may be fetched in a loop until some condition is satisfied, without - -- waiting for a result set of fixed size to be collected all at once. - core_query := ' -SELECT mbe.id, - mbe.value, - mbe.sort_value - FROM metabib.browse_entry mbe - WHERE ( - EXISTS ( -- are there any bibs using this mbe via the requested fields? - SELECT 1 - FROM metabib.browse_entry_def_map mbedm - WHERE mbedm.entry = mbe.id AND mbedm.def = ANY(' || quote_literal(search_field) || ') - ) OR EXISTS ( -- are there any authorities using this mbe via the requested fields? - SELECT 1 - FROM metabib.browse_entry_simple_heading_map mbeshm - JOIN authority.simple_heading ash ON ( mbeshm.simple_heading = ash.id ) - JOIN authority.control_set_auth_field_metabib_field_map_refs map ON ( - ash.atag = map.authority_field - AND map.metabib_field = ANY(' || quote_literal(search_field) || ') - ) - WHERE mbeshm.entry = mbe.id - ) - ) AND '; - - -- This is the variant of the query for browsing backward. - back_query := core_query || - ' mbe.sort_value <= ' || quote_literal(pivot_sort_value) || - ' ORDER BY mbe.sort_value DESC, mbe.value DESC LIMIT 1000'; - - -- This variant browses forward. - forward_query := core_query || - ' mbe.sort_value > ' || quote_literal(pivot_sort_value) || - ' ORDER BY mbe.sort_value, mbe.value LIMIT 1000'; - - -- We now call the function which applies a cursor to the provided - -- queries, stopping at the appropriate limits and also giving us - -- the next page's pivot. - RETURN QUERY - SELECT * FROM metabib.staged_browse( - back_query, search_field, context_org, context_locations, - staff, browse_superpage_size, TRUE, back_limit, back_to_pivot - ) UNION - SELECT * FROM metabib.staged_browse( - forward_query, search_field, context_org, context_locations, - staff, browse_superpage_size, FALSE, forward_limit, forward_to_pivot - ) ORDER BY row_number DESC; -END; $f$ LANGUAGE plpgsql ROWS 10; - -CREATE OR REPLACE FUNCTION metabib.staged_browse(query text, fields integer[], context_org integer, context_locations integer[], staff boolean, browse_superpage_size integer, count_up_from_zero boolean, result_limit integer, next_pivot_pos integer) - RETURNS SETOF metabib.flat_browse_entry_appearance -AS $f$ -DECLARE - curs REFCURSOR; - rec RECORD; - qpfts_query TEXT; - aqpfts_query TEXT; - afields INT[]; - bfields INT[]; - result_row metabib.flat_browse_entry_appearance%ROWTYPE; - results_skipped INT := 0; - row_counter INT := 0; - row_number INT; - slice_start INT; - slice_end INT; - full_end INT; - all_records BIGINT[]; - all_brecords BIGINT[]; - all_arecords BIGINT[]; - superpage_of_records BIGINT[]; - superpage_size INT; - c_tests TEXT := ''; - b_tests TEXT := ''; - c_orgs INT[]; -BEGIN - IF count_up_from_zero THEN - row_number := 0; - ELSE - row_number := -1; - END IF; - - IF NOT staff THEN - SELECT x.c_attrs, x.b_attrs INTO c_tests, b_tests FROM asset.patron_default_visibility_mask() x; - END IF; - - IF c_tests <> '' THEN c_tests := c_tests || '&'; END IF; - IF b_tests <> '' THEN b_tests := b_tests || '&'; END IF; - - SELECT ARRAY_AGG(id) INTO c_orgs FROM actor.org_unit_descendants(context_org); - - c_tests := c_tests || search.calculate_visibility_attribute_test('circ_lib',c_orgs) - || '&' || search.calculate_visibility_attribute_test('owning_lib',c_orgs); - - PERFORM 1 FROM config.internal_flag WHERE enabled AND name = 'opac.located_uri.act_as_copy'; - IF FOUND THEN - b_tests := b_tests || search.calculate_visibility_attribute_test( - 'luri_org', - (SELECT ARRAY_AGG(id) FROM actor.org_unit_full_path(context_org) x) - ); - ELSE - b_tests := b_tests || search.calculate_visibility_attribute_test( - 'luri_org', - (SELECT ARRAY_AGG(id) FROM actor.org_unit_ancestors(context_org) x) - ); - END IF; - - IF context_locations THEN - IF c_tests <> '' THEN c_tests := c_tests || '&'; END IF; - c_tests := c_tests || search.calculate_visibility_attribute_test('location',context_locations); - END IF; - - OPEN curs NO SCROLL FOR EXECUTE query; - - LOOP - FETCH curs INTO rec; - IF NOT FOUND THEN - IF result_row.pivot_point IS NOT NULL THEN - RETURN NEXT result_row; - END IF; - RETURN; - END IF; - - -- Gather aggregate data based on the MBE row we're looking at now, authority axis - SELECT INTO all_arecords, result_row.sees, afields - ARRAY_AGG(DISTINCT abl.bib), -- bibs to check for visibility - STRING_AGG(DISTINCT aal.source::TEXT, $$,$$), -- authority record ids - ARRAY_AGG(DISTINCT map.metabib_field) -- authority-tag-linked CMF rows - - FROM metabib.browse_entry_simple_heading_map mbeshm - JOIN authority.simple_heading ash ON ( mbeshm.simple_heading = ash.id ) - JOIN authority.authority_linking aal ON ( ash.record = aal.source ) - JOIN authority.bib_linking abl ON ( aal.target = abl.authority ) - JOIN authority.control_set_auth_field_metabib_field_map_refs map ON ( - ash.atag = map.authority_field - AND map.metabib_field = ANY(fields) - ) - WHERE mbeshm.entry = rec.id; - - -- Gather aggregate data based on the MBE row we're looking at now, bib axis - SELECT INTO all_brecords, result_row.authorities, bfields - ARRAY_AGG(DISTINCT source), - STRING_AGG(DISTINCT authority::TEXT, $$,$$), - ARRAY_AGG(DISTINCT def) - FROM metabib.browse_entry_def_map - WHERE entry = rec.id - AND def = ANY(fields); - - SELECT INTO result_row.fields STRING_AGG(DISTINCT x::TEXT, $$,$$) FROM UNNEST(afields || bfields) x; - - result_row.sources := 0; - result_row.asources := 0; - - -- Bib-linked vis checking - IF ARRAY_UPPER(all_brecords,1) IS NOT NULL THEN - - SELECT INTO result_row.sources COUNT(DISTINCT b.id) - FROM biblio.record_entry b - JOIN asset.copy_vis_attr_cache acvac ON (acvac.record = b.id) - WHERE b.id = ANY(all_brecords[1:browse_superpage_size]) - AND ( - acvac.vis_attr_vector @@ c_tests::query_int - OR b.vis_attr_vector @@ b_tests::query_int - ); - - result_row.accurate := TRUE; - - END IF; - - -- Authority-linked vis checking - IF ARRAY_UPPER(all_arecords,1) IS NOT NULL THEN - - SELECT INTO result_row.asources COUNT(DISTINCT b.id) - FROM biblio.record_entry b - JOIN asset.copy_vis_attr_cache acvac ON (acvac.record = b.id) - WHERE b.id = ANY(all_arecords[1:browse_superpage_size]) - AND ( - acvac.vis_attr_vector @@ c_tests::query_int - OR b.vis_attr_vector @@ b_tests::query_int - ); - - result_row.aaccurate := TRUE; - - END IF; - - IF result_row.sources > 0 OR result_row.asources > 0 THEN - - -- The function that calls this function needs row_number in order - -- to correctly order results from two different runs of this - -- functions. - result_row.row_number := row_number; - - -- Now, if row_counter is still less than limit, return a row. If - -- not, but it is less than next_pivot_pos, continue on without - -- returning actual result rows until we find - -- that next pivot, and return it. - - IF row_counter < result_limit THEN - result_row.browse_entry := rec.id; - result_row.value := rec.value; - - RETURN NEXT result_row; - ELSE - result_row.browse_entry := NULL; - result_row.authorities := NULL; - result_row.fields := NULL; - result_row.value := NULL; - result_row.sources := NULL; - result_row.sees := NULL; - result_row.accurate := NULL; - result_row.aaccurate := NULL; - result_row.pivot_point := rec.id; - - IF row_counter >= next_pivot_pos THEN - RETURN NEXT result_row; - RETURN; - END IF; - END IF; - - IF count_up_from_zero THEN - row_number := row_number + 1; - ELSE - row_number := row_number - 1; - END IF; - - -- row_counter is different from row_number. - -- It simply counts up from zero so that we know when - -- we've reached our limit. - row_counter := row_counter + 1; - END IF; - END LOOP; -END; -$f$ LANGUAGE plpgsql ROWS 10; -*/ - CREATE OR REPLACE FUNCTION asset.opac_ou_record_copy_count (org INT, rid BIGINT) RETURNS TABLE (depth INT, org_unit INT, visible BIGINT, available BIGINT, unshadow BIGINT, transcendant INT) AS $f$ DECLARE ans RECORD; @@ -5684,8 +5390,8 @@ BEGIN pivot_id := metabib.browse_pivot(search_field, browse_term); END IF; - SELECT INTO pivot_sort_value, pivot_sort_fallback - sort_value, value FROM metabib.browse_entry WHERE id = pivot_id; + SELECT INTO pivot_sort_value combo_sort_value + FROM metabib.browse_entry WHERE id = pivot_id; -- Bail if we couldn't find a pivot. IF pivot_sort_value IS NULL THEN @@ -5953,23 +5659,43 @@ DROP FUNCTION metabib.reingest_metabib_field_entries(BIGINT, BOOL, BOOL, BOOL); DROP FUNCTION biblio.extract_metabib_field_entry(BIGINT); DROP FUNCTION biblio.extract_metabib_field_entry(BIGINT, TEXT); -CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries( bib_id BIGINT, skip_facet BOOL DEFAULT FALSE, skip_browse BOOL DEFAULT FALSE, skip_search BOOL DEFAULT FALSE ) RETURNS VOID AS $func$ + +CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries( + bib_id BIGINT, + skip_facet BOOL DEFAULT FALSE, + skip_display BOOL DEFAULT FALSE, + skip_browse BOOL DEFAULT FALSE, + skip_search BOOL DEFAULT FALSE, + only_fields INT[] DEFAULT '{}'::INT[] +) RETURNS VOID AS $func$ DECLARE fclass RECORD; ind_data metabib.field_entry_template%ROWTYPE; mbe_row metabib.browse_entry%ROWTYPE; mbe_id BIGINT; b_skip_facet BOOL; + b_skip_display BOOL; b_skip_browse BOOL; b_skip_search BOOL; value_prepped TEXT; - combo_sort_val TEXT; + field_list INT[] := only_fields; + field_types TEXT[] := '{}'::TEXT[]; BEGIN + IF field_list = '{}'::INT[] THEN + SELECT ARRAY_AGG(id) INTO field_list FROM config.metabib_field; + END IF; + SELECT COALESCE(NULLIF(skip_facet, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_facet_indexing' AND enabled)) INTO b_skip_facet; + SELECT COALESCE(NULLIF(skip_display, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_display_indexing' AND enabled)) INTO b_skip_display; SELECT COALESCE(NULLIF(skip_browse, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_browse_indexing' AND enabled)) INTO b_skip_browse; SELECT COALESCE(NULLIF(skip_search, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_search_indexing' AND enabled)) INTO b_skip_search; + IF NOT b_skip_facet THEN field_types := field_types || '{facet}'; END IF; + IF NOT b_skip_display THEN field_types := field_types || '{display}'; END IF; + IF NOT b_skip_browse THEN field_types := field_types || '{browse}'; END IF; + IF NOT b_skip_search THEN field_types := field_types || '{search}'; END IF; + PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled; IF NOT FOUND THEN IF NOT b_skip_search THEN @@ -5981,14 +5707,17 @@ BEGIN IF NOT b_skip_facet THEN DELETE FROM metabib.facet_entry WHERE source = bib_id; END IF; + IF NOT b_skip_display THEN + DELETE FROM metabib.display_entry WHERE source = bib_id; + END IF; IF NOT b_skip_browse THEN DELETE FROM metabib.browse_entry_def_map WHERE source = bib_id; END IF; END IF; - FOR ind_data IN SELECT * FROM biblio.extract_metabib_field_entry( bib_id ) LOOP + FOR ind_data IN SELECT * FROM biblio.extract_metabib_field_entry( bib_id, ' ', field_types, field_list ) LOOP - -- don't store what has been normalized away + -- don't store what has been normalized away CONTINUE WHEN ind_data.value IS NULL; IF ind_data.field < 0 THEN @@ -6000,6 +5729,12 @@ BEGIN VALUES (ind_data.field, ind_data.source, ind_data.value); END IF; + IF ind_data.display_field AND NOT b_skip_display THEN + INSERT INTO metabib.display_entry (field, source, value) + VALUES (ind_data.field, ind_data.source, ind_data.value); + END IF; + + IF ind_data.browse_field AND NOT b_skip_browse THEN -- A caveat about this SELECT: this should take care of replacing -- old mbe rows when data changes, but not if normalization (by @@ -6011,21 +5746,22 @@ BEGIN CONTINUE WHEN ind_data.sort_value IS NULL; value_prepped := metabib.browse_normalize(ind_data.value, ind_data.field); - - combo_sort_val := - SUBSTR((ind_data.sort_value || value_prepped), 1, 2048); - SELECT INTO mbe_row * FROM metabib.browse_entry - WHERE value = value_prepped AND sort_value = sort_value; + WHERE + MD5(value) = MD5(value_prepped) AND + MD5(sort_value) = MD5(ind_data.sort_value); IF FOUND THEN mbe_id := mbe_row.id; ELSE INSERT INTO metabib.browse_entry ( value, sort_value, combo_sort_value, metabib_fields_cache) - VALUES - ( value_prepped, sort_value_prepped, combo_sort_val, '{}' ); - + VALUES ( + value_prepped, + ind_data.sort_value, + SUBSTR((ind_data.sort_value || value_prepped), 1, 2048), + '{}' + ); mbe_id := CURRVAL('metabib.browse_entry_id_seq'::REGCLASS); END IF; @@ -6060,7 +5796,6 @@ BEGIN END; $func$ LANGUAGE PLPGSQL; - CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( rid BIGINT, default_joiner TEXT, -- 2.11.0