From f5262ddc9f7c0e742b0fb07454ee24a34b08fec0 Mon Sep 17 00:00:00 2001 From: Bill Erickson Date: Mon, 26 Oct 2015 12:08:15 -0400 Subject: [PATCH] JBAS-944 staged_browse() and reingest_metabib_field_entries * Avoid replacing staged browse. * Cross-port local changes from reingest_metabib_field_entries into the stock code. Signed-off-by: Bill Erickson --- KCLS/sql/schema/deploy/2.5-to-2.7-upgrade.sql | 352 ++++++++++---------------- 1 file changed, 130 insertions(+), 222 deletions(-) diff --git a/KCLS/sql/schema/deploy/2.5-to-2.7-upgrade.sql b/KCLS/sql/schema/deploy/2.5-to-2.7-upgrade.sql index 8abdd9c2dc..a31a7c6f78 100644 --- a/KCLS/sql/schema/deploy/2.5-to-2.7-upgrade.sql +++ b/KCLS/sql/schema/deploy/2.5-to-2.7-upgrade.sql @@ -1930,213 +1930,8 @@ SELECT r.id, SELECT evergreen.upgrade_deps_block_check('0856', :eg_version); - -CREATE OR REPLACE FUNCTION metabib.staged_browse( - query TEXT, - fields INT[], - context_org INT, - context_locations INT[], - staff BOOL, - browse_superpage_size INT, - count_up_from_zero BOOL, -- if false, count down from -1 - result_limit INT, - next_pivot_pos INT -) RETURNS SETOF metabib.flat_browse_entry_appearance AS $p$ -DECLARE - curs REFCURSOR; - rec RECORD; - qpfts_query TEXT; - aqpfts_query TEXT; - afields INT[]; - bfields INT[]; - result_row metabib.flat_browse_entry_appearance%ROWTYPE; - results_skipped INT := 0; - row_counter INT := 0; - row_number INT; - slice_start INT; - slice_end INT; - full_end INT; - all_records BIGINT[]; - all_brecords BIGINT[]; - all_arecords BIGINT[]; - superpage_of_records BIGINT[]; - superpage_size INT; -BEGIN - IF count_up_from_zero THEN - row_number := 0; - ELSE - row_number := -1; - END IF; - - OPEN curs FOR EXECUTE query; - - LOOP - FETCH curs INTO rec; - IF NOT FOUND THEN - IF result_row.pivot_point IS NOT NULL THEN - RETURN NEXT result_row; - END IF; - RETURN; - END IF; - - - -- Gather aggregate data based on the MBE row we're looking at now, authority axis - SELECT INTO all_arecords, result_row.sees, afields - ARRAY_AGG(DISTINCT abl.bib), -- bibs to check for visibility - STRING_AGG(DISTINCT aal.source::TEXT, $$,$$), -- authority record ids - ARRAY_AGG(DISTINCT map.metabib_field) -- authority-tag-linked CMF rows - - FROM metabib.browse_entry_simple_heading_map mbeshm - JOIN authority.simple_heading ash ON ( mbeshm.simple_heading = ash.id ) - JOIN authority.authority_linking aal ON ( ash.record = aal.source ) - JOIN authority.bib_linking abl ON ( aal.target = abl.authority ) - JOIN authority.control_set_auth_field_metabib_field_map_refs map ON ( - ash.atag = map.authority_field - AND map.metabib_field = ANY(fields) - ) - WHERE mbeshm.entry = rec.id; - - - -- Gather aggregate data based on the MBE row we're looking at now, bib axis - SELECT INTO all_brecords, result_row.authorities, bfields - ARRAY_AGG(DISTINCT source), - STRING_AGG(DISTINCT authority::TEXT, $$,$$), - ARRAY_AGG(DISTINCT def) - FROM metabib.browse_entry_def_map - WHERE entry = rec.id - AND def = ANY(fields); - - SELECT INTO result_row.fields STRING_AGG(DISTINCT x::TEXT, $$,$$) FROM UNNEST(afields || bfields) x; - - result_row.sources := 0; - result_row.asources := 0; - - -- Bib-linked vis checking - IF ARRAY_UPPER(all_brecords,1) IS NOT NULL THEN - - full_end := ARRAY_LENGTH(all_brecords, 1); - superpage_size := COALESCE(browse_superpage_size, full_end); - slice_start := 1; - slice_end := superpage_size; - - WHILE result_row.sources = 0 AND slice_start <= full_end LOOP - superpage_of_records := all_brecords[slice_start:slice_end]; - qpfts_query := - 'SELECT NULL::BIGINT AS id, ARRAY[r] AS records, ' || - '1::INT AS rel FROM (SELECT UNNEST(' || - quote_literal(superpage_of_records) || '::BIGINT[]) AS r) rr'; - - -- We use search.query_parser_fts() for visibility testing. - -- We're calling it once per browse-superpage worth of records - -- out of the set of records related to a given mbe, until we've - -- either exhausted that set of records or found at least 1 - -- visible record. - - SELECT INTO result_row.sources visible - FROM search.query_parser_fts( - context_org, NULL, qpfts_query, NULL, - context_locations, 0, NULL, NULL, FALSE, staff, FALSE - ) qpfts - WHERE qpfts.rel IS NULL; - - slice_start := slice_start + superpage_size; - slice_end := slice_end + superpage_size; - END LOOP; - - -- Accurate? Well, probably. - result_row.accurate := browse_superpage_size IS NULL OR - browse_superpage_size >= full_end; - - END IF; - - -- Authority-linked vis checking - IF ARRAY_UPPER(all_arecords,1) IS NOT NULL THEN - - full_end := ARRAY_LENGTH(all_arecords, 1); - superpage_size := COALESCE(browse_superpage_size, full_end); - slice_start := 1; - slice_end := superpage_size; - - WHILE result_row.asources = 0 AND slice_start <= full_end LOOP - superpage_of_records := all_arecords[slice_start:slice_end]; - qpfts_query := - 'SELECT NULL::BIGINT AS id, ARRAY[r] AS records, ' || - '1::INT AS rel FROM (SELECT UNNEST(' || - quote_literal(superpage_of_records) || '::BIGINT[]) AS r) rr'; - - -- We use search.query_parser_fts() for visibility testing. - -- We're calling it once per browse-superpage worth of records - -- out of the set of records related to a given mbe, via - -- authority until we've either exhausted that set of records - -- or found at least 1 visible record. - - SELECT INTO result_row.asources visible - FROM search.query_parser_fts( - context_org, NULL, qpfts_query, NULL, - context_locations, 0, NULL, NULL, FALSE, staff, FALSE - ) qpfts - WHERE qpfts.rel IS NULL; - - slice_start := slice_start + superpage_size; - slice_end := slice_end + superpage_size; - END LOOP; - - - -- Accurate? Well, probably. - result_row.aaccurate := browse_superpage_size IS NULL OR - browse_superpage_size >= full_end; - - END IF; - - IF result_row.sources > 0 OR result_row.asources > 0 THEN - - -- The function that calls this function needs row_number in order - -- to correctly order results from two different runs of this - -- functions. - result_row.row_number := row_number; - - -- Now, if row_counter is still less than limit, return a row. If - -- not, but it is less than next_pivot_pos, continue on without - -- returning actual result rows until we find - -- that next pivot, and return it. - - IF row_counter < result_limit THEN - result_row.browse_entry := rec.id; - result_row.value := rec.value; - - RETURN NEXT result_row; - ELSE - result_row.browse_entry := NULL; - result_row.authorities := NULL; - result_row.fields := NULL; - result_row.value := NULL; - result_row.sources := NULL; - result_row.sees := NULL; - result_row.accurate := NULL; - result_row.aaccurate := NULL; - result_row.pivot_point := rec.id; - - IF row_counter >= next_pivot_pos THEN - RETURN NEXT result_row; - RETURN; - END IF; - END IF; - - IF count_up_from_zero THEN - row_number := row_number + 1; - ELSE - row_number := row_number - 1; - END IF; - - -- row_counter is different from row_number. - -- It simply counts up from zero so that we know when - -- we've reached our limit. - row_counter := row_counter + 1; - END IF; - END LOOP; -END; -$p$ LANGUAGE PLPGSQL; - +-- KCLS metabib.staged_browse() update went here, but our version conflicts +-- heavily with stock. Required change (STRING_AGG casting) already covered. /* * Copyright (C) 2014 Equinox Software, Inc. @@ -5383,7 +5178,13 @@ BEGIN output_row.field_class = idx.field_class; output_row.field = idx.id; output_row.source = rid; - output_row.value = BTRIM(REGEXP_REPLACE(browse_text, E'\\s+', ' ', 'g')); + -- KCLS / Catalyst + -- output_row.value = BTRIM(REGEXP_REPLACE(browse_text, E'\\s+', ' ', 'g')); + -- outer regexp_replace keeps all '.' expect the last one. + -- inner regexp_replace removes all connecting whitespace and replaces it with a single space + output_row.value = BTRIM(REGEXP_REPLACE(REGEXP_REPLACE(browse_text, E'\\s+', ' ', 'g'), E'\\.$', '', 'g')); + -- /KCLS + output_row.sort_value := public.naco_normalize(sort_value); @@ -5447,10 +5248,24 @@ BEGIN -- insert combined node text for searching IF idx.search_field THEN + -- KCLS + IF idx.field_class = 'identifier' AND idx.name = 'bibcn' THEN + output_row.field_class = 'call_number'; + output_row.browse_field = TRUE; + output_row.sort_value = public.naco_normalize_keep_decimal(raw_text,''); + output_row.value = raw_text; + ELSE + output_row.value = BTRIM(REGEXP_REPLACE( + REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g'), E'\\.+', '', 'g')); + END IF; + -- /KCLS + output_row.field_class = idx.field_class; output_row.field = idx.id; output_row.source = rid; - output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g')); + -- KCLS -- value is set above + -- output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g')); + -- /KCLS output_row.search_field = TRUE; RETURN NEXT output_row; @@ -5751,6 +5566,7 @@ DECLARE b_skip_browse BOOL; b_skip_search BOOL; value_prepped TEXT; + field_class TEXT; BEGIN SELECT COALESCE(NULLIF(skip_facet, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_facet_indexing' AND enabled)) INTO b_skip_facet; @@ -5769,7 +5585,11 @@ BEGIN DELETE FROM metabib.facet_entry WHERE source = bib_id; END IF; IF NOT b_skip_browse THEN - DELETE FROM metabib.browse_entry_def_map WHERE source = bib_id; + DELETE FROM metabib.browse_author_entry_def_map WHERE source = bib_id; + DELETE FROM metabib.browse_title_entry_def_map WHERE source = bib_id; + DELETE FROM metabib.browse_subject_entry_def_map WHERE source = bib_id; + DELETE FROM metabib.browse_series_entry_def_map WHERE source = bib_id; + DELETE FROM metabib.browse_call_number_entry_def_map WHERE source = bib_id; END IF; END IF; @@ -5798,22 +5618,110 @@ BEGIN CONTINUE WHEN ind_data.sort_value IS NULL; value_prepped := metabib.browse_normalize(ind_data.value, ind_data.field); - SELECT INTO mbe_row * FROM metabib.browse_entry - WHERE value = value_prepped AND sort_value = ind_data.sort_value; - IF FOUND THEN - mbe_id := mbe_row.id; - ELSE - INSERT INTO metabib.browse_entry - ( value, sort_value ) VALUES - ( value_prepped, ind_data.sort_value ); + -- KCLS + IF char_length(value_prepped) > 0 THEN + CASE ind_data.field_class - mbe_id := CURRVAL('metabib.browse_entry_id_seq'::REGCLASS); - END IF; + WHEN 'author' THEN + + SELECT INTO mbe_row * FROM metabib.browse_author_entry + WHERE sort_value = ind_data.sort_value + ORDER BY id; + + IF FOUND THEN + mbe_id := mbe_row.id; + ELSE + INSERT INTO metabib.browse_author_entry + ( value, sort_value, truncated_sort_value ) VALUES + ( value_prepped, ind_data.sort_value, substr(ind_data.sort_value, 1, 2700) ); + + mbe_id := CURRVAL('metabib.browse_author_entry_id_seq'::REGCLASS); + END IF; + + INSERT INTO metabib.browse_author_entry_def_map (entry, def, source, authority) + VALUES (mbe_id, ind_data.field, ind_data.source, ind_data.authority); + + WHEN 'title' THEN + + SELECT INTO mbe_row * FROM metabib.browse_title_entry + WHERE sort_value = ind_data.sort_value + ORDER BY id; + + IF FOUND THEN + mbe_id := mbe_row.id; + ELSE + INSERT INTO metabib.browse_title_entry + ( value, sort_value, truncated_sort_value ) VALUES + ( value_prepped, ind_data.sort_value, substr(ind_data.sort_value, 1, 2700) ); + + mbe_id := CURRVAL('metabib.browse_title_entry_id_seq'::REGCLASS); + END IF; - INSERT INTO metabib.browse_entry_def_map (entry, def, source, authority) - VALUES (mbe_id, ind_data.field, ind_data.source, ind_data.authority); + INSERT INTO metabib.browse_title_entry_def_map (entry, def, source, authority) + VALUES (mbe_id, ind_data.field, ind_data.source, ind_data.authority); + + WHEN 'subject' THEN + + SELECT INTO mbe_row * FROM metabib.browse_subject_entry + WHERE sort_value = ind_data.sort_value + ORDER BY id; + + IF FOUND THEN + mbe_id := mbe_row.id; + ELSE + INSERT INTO metabib.browse_subject_entry + ( value, sort_value, truncated_sort_value ) VALUES + ( value_prepped, ind_data.sort_value, substr(ind_data.sort_value, 1, 2700) ); + + mbe_id := CURRVAL('metabib.browse_subject_entry_id_seq'::REGCLASS); + END IF; + + INSERT INTO metabib.browse_subject_entry_def_map (entry, def, source, authority) + VALUES (mbe_id, ind_data.field, ind_data.source, ind_data.authority); + + WHEN 'series' THEN + + SELECT INTO mbe_row * FROM metabib.browse_series_entry + WHERE sort_value = ind_data.sort_value + ORDER BY id; + + IF FOUND THEN + mbe_id := mbe_row.id; + ELSE + INSERT INTO metabib.browse_series_entry + ( value, sort_value, truncated_sort_value ) VALUES + ( value_prepped, ind_data.sort_value, substr(ind_data.sort_value, 1, 2700) ); + + mbe_id := CURRVAL('metabib.browse_series_entry_id_seq'::REGCLASS); + END IF; + + INSERT INTO metabib.browse_series_entry_def_map (entry, def, source, authority) + VALUES (mbe_id, ind_data.field, ind_data.source, ind_data.authority); + + WHEN 'call_number' THEN + + SELECT INTO mbe_row * FROM metabib.browse_call_number_entry + WHERE sort_value = ind_data.sort_value + ORDER BY id; + + IF FOUND THEN + mbe_id := mbe_row.id; + ELSE + INSERT INTO metabib.browse_call_number_entry + ( value, sort_value, truncated_sort_value ) VALUES + ( value_prepped, ind_data.sort_value, substr(ind_data.sort_value, 1, 2700) ); + + mbe_id := CURRVAL('metabib.browse_call_number_entry_id_seq'::REGCLASS); + END IF; + + INSERT INTO metabib.browse_call_number_entry_def_map (entry, def, source, authority) + VALUES (mbe_id, ind_data.field, ind_data.source, ind_data.authority); + ELSE + END CASE; + END IF; END IF; + -- /KCLS IF ind_data.search_field AND NOT b_skip_search THEN -- Avoid inserting duplicate rows -- 2.11.0