JBAS-944 staged_browse() and reingest_metabib_field_entries
authorBill Erickson <berickxx@gmail.com>
Mon, 26 Oct 2015 16:08:15 +0000 (12:08 -0400)
committerBill Erickson <berickxx@gmail.com>
Thu, 21 Mar 2019 19:46:23 +0000 (15:46 -0400)
* Avoid replacing staged browse.
* Cross-port local changes from reingest_metabib_field_entries
  into the stock code.

Signed-off-by: Bill Erickson <berickxx@gmail.com>
KCLS/sql/schema/deploy/2.5-to-2.7-upgrade.sql

index 8abdd9c..a31a7c6 100644 (file)
@@ -1930,213 +1930,8 @@ SELECT  r.id,
 
 
 SELECT evergreen.upgrade_deps_block_check('0856', :eg_version);
-
-CREATE OR REPLACE FUNCTION metabib.staged_browse(
-    query                   TEXT,
-    fields                  INT[],
-    context_org             INT,
-    context_locations       INT[],
-    staff                   BOOL,
-    browse_superpage_size   INT,
-    count_up_from_zero      BOOL,   -- if false, count down from -1
-    result_limit            INT,
-    next_pivot_pos          INT
-) RETURNS SETOF metabib.flat_browse_entry_appearance AS $p$
-DECLARE
-    curs                    REFCURSOR;
-    rec                     RECORD;
-    qpfts_query             TEXT;
-    aqpfts_query            TEXT;
-    afields                 INT[];
-    bfields                 INT[];
-    result_row              metabib.flat_browse_entry_appearance%ROWTYPE;
-    results_skipped         INT := 0;
-    row_counter             INT := 0;
-    row_number              INT;
-    slice_start             INT;
-    slice_end               INT;
-    full_end                INT;
-    all_records             BIGINT[];
-    all_brecords             BIGINT[];
-    all_arecords            BIGINT[];
-    superpage_of_records    BIGINT[];
-    superpage_size          INT;
-BEGIN
-    IF count_up_from_zero THEN
-        row_number := 0;
-    ELSE
-        row_number := -1;
-    END IF;
-
-    OPEN curs FOR EXECUTE query;
-
-    LOOP
-        FETCH curs INTO rec;
-        IF NOT FOUND THEN
-            IF result_row.pivot_point IS NOT NULL THEN
-                RETURN NEXT result_row;
-            END IF;
-            RETURN;
-        END IF;
-
-
-        -- Gather aggregate data based on the MBE row we're looking at now, authority axis
-        SELECT INTO all_arecords, result_row.sees, afields
-                ARRAY_AGG(DISTINCT abl.bib), -- bibs to check for visibility
-                STRING_AGG(DISTINCT aal.source::TEXT, $$,$$), -- authority record ids
-                ARRAY_AGG(DISTINCT map.metabib_field) -- authority-tag-linked CMF rows
-
-          FROM  metabib.browse_entry_simple_heading_map mbeshm
-                JOIN authority.simple_heading ash ON ( mbeshm.simple_heading = ash.id )
-                JOIN authority.authority_linking aal ON ( ash.record = aal.source )
-                JOIN authority.bib_linking abl ON ( aal.target = abl.authority )
-                JOIN authority.control_set_auth_field_metabib_field_map_refs map ON (
-                    ash.atag = map.authority_field
-                    AND map.metabib_field = ANY(fields)
-                )
-          WHERE mbeshm.entry = rec.id;
-
-
-        -- Gather aggregate data based on the MBE row we're looking at now, bib axis
-        SELECT INTO all_brecords, result_row.authorities, bfields
-                ARRAY_AGG(DISTINCT source),
-                STRING_AGG(DISTINCT authority::TEXT, $$,$$),
-                ARRAY_AGG(DISTINCT def)
-          FROM  metabib.browse_entry_def_map
-          WHERE entry = rec.id
-                AND def = ANY(fields);
-
-        SELECT INTO result_row.fields STRING_AGG(DISTINCT x::TEXT, $$,$$) FROM UNNEST(afields || bfields) x;
-
-        result_row.sources := 0;
-        result_row.asources := 0;
-
-        -- Bib-linked vis checking
-        IF ARRAY_UPPER(all_brecords,1) IS NOT NULL THEN
-
-            full_end := ARRAY_LENGTH(all_brecords, 1);
-            superpage_size := COALESCE(browse_superpage_size, full_end);
-            slice_start := 1;
-            slice_end := superpage_size;
-
-            WHILE result_row.sources = 0 AND slice_start <= full_end LOOP
-                superpage_of_records := all_brecords[slice_start:slice_end];
-                qpfts_query :=
-                    'SELECT NULL::BIGINT AS id, ARRAY[r] AS records, ' ||
-                    '1::INT AS rel FROM (SELECT UNNEST(' ||
-                    quote_literal(superpage_of_records) || '::BIGINT[]) AS r) rr';
-
-                -- We use search.query_parser_fts() for visibility testing.
-                -- We're calling it once per browse-superpage worth of records
-                -- out of the set of records related to a given mbe, until we've
-                -- either exhausted that set of records or found at least 1
-                -- visible record.
-
-                SELECT INTO result_row.sources visible
-                    FROM search.query_parser_fts(
-                        context_org, NULL, qpfts_query, NULL,
-                        context_locations, 0, NULL, NULL, FALSE, staff, FALSE
-                    ) qpfts
-                    WHERE qpfts.rel IS NULL;
-
-                slice_start := slice_start + superpage_size;
-                slice_end := slice_end + superpage_size;
-            END LOOP;
-
-            -- Accurate?  Well, probably.
-            result_row.accurate := browse_superpage_size IS NULL OR
-                browse_superpage_size >= full_end;
-
-        END IF;
-
-        -- Authority-linked vis checking
-        IF ARRAY_UPPER(all_arecords,1) IS NOT NULL THEN
-
-            full_end := ARRAY_LENGTH(all_arecords, 1);
-            superpage_size := COALESCE(browse_superpage_size, full_end);
-            slice_start := 1;
-            slice_end := superpage_size;
-
-            WHILE result_row.asources = 0 AND slice_start <= full_end LOOP
-                superpage_of_records := all_arecords[slice_start:slice_end];
-                qpfts_query :=
-                    'SELECT NULL::BIGINT AS id, ARRAY[r] AS records, ' ||
-                    '1::INT AS rel FROM (SELECT UNNEST(' ||
-                    quote_literal(superpage_of_records) || '::BIGINT[]) AS r) rr';
-
-                -- We use search.query_parser_fts() for visibility testing.
-                -- We're calling it once per browse-superpage worth of records
-                -- out of the set of records related to a given mbe, via
-                -- authority until we've either exhausted that set of records
-                -- or found at least 1 visible record.
-
-                SELECT INTO result_row.asources visible
-                    FROM search.query_parser_fts(
-                        context_org, NULL, qpfts_query, NULL,
-                        context_locations, 0, NULL, NULL, FALSE, staff, FALSE
-                    ) qpfts
-                    WHERE qpfts.rel IS NULL;
-
-                slice_start := slice_start + superpage_size;
-                slice_end := slice_end + superpage_size;
-            END LOOP;
-
-
-            -- Accurate?  Well, probably.
-            result_row.aaccurate := browse_superpage_size IS NULL OR
-                browse_superpage_size >= full_end;
-
-        END IF;
-
-        IF result_row.sources > 0 OR result_row.asources > 0 THEN
-
-            -- The function that calls this function needs row_number in order
-            -- to correctly order results from two different runs of this
-            -- functions.
-            result_row.row_number := row_number;
-
-            -- Now, if row_counter is still less than limit, return a row.  If
-            -- not, but it is less than next_pivot_pos, continue on without
-            -- returning actual result rows until we find
-            -- that next pivot, and return it.
-
-            IF row_counter < result_limit THEN
-                result_row.browse_entry := rec.id;
-                result_row.value := rec.value;
-
-                RETURN NEXT result_row;
-            ELSE
-                result_row.browse_entry := NULL;
-                result_row.authorities := NULL;
-                result_row.fields := NULL;
-                result_row.value := NULL;
-                result_row.sources := NULL;
-                result_row.sees := NULL;
-                result_row.accurate := NULL;
-                result_row.aaccurate := NULL;
-                result_row.pivot_point := rec.id;
-
-                IF row_counter >= next_pivot_pos THEN
-                    RETURN NEXT result_row;
-                    RETURN;
-                END IF;
-            END IF;
-
-            IF count_up_from_zero THEN
-                row_number := row_number + 1;
-            ELSE
-                row_number := row_number - 1;
-            END IF;
-
-            -- row_counter is different from row_number.
-            -- It simply counts up from zero so that we know when
-            -- we've reached our limit.
-            row_counter := row_counter + 1;
-        END IF;
-    END LOOP;
-END;
-$p$ LANGUAGE PLPGSQL;
-
+-- KCLS metabib.staged_browse() update went here, but our version conflicts
+-- heavily with stock.  Required change (STRING_AGG casting) already covered.
 
 /*
  * Copyright (C) 2014  Equinox Software, Inc.
@@ -5383,7 +5178,13 @@ BEGIN
                 output_row.field_class = idx.field_class;
                 output_row.field = idx.id;
                 output_row.source = rid;
-                output_row.value = BTRIM(REGEXP_REPLACE(browse_text, E'\\s+', ' ', 'g'));
+                -- KCLS / Catalyst
+                -- output_row.value = BTRIM(REGEXP_REPLACE(browse_text, E'\\s+', ' ', 'g'));
+                -- outer regexp_replace keeps all '.' expect the last one.
+                -- inner regexp_replace removes all connecting whitespace and replaces it with a single space
+                output_row.value = BTRIM(REGEXP_REPLACE(REGEXP_REPLACE(browse_text, E'\\s+', ' ', 'g'), E'\\.$', '', 'g'));
+                -- /KCLS 
+
                 output_row.sort_value :=
                     public.naco_normalize(sort_value);
 
@@ -5447,10 +5248,24 @@ BEGIN
 
         -- insert combined node text for searching
         IF idx.search_field THEN
+            -- KCLS
+            IF idx.field_class = 'identifier' AND idx.name = 'bibcn' THEN
+                output_row.field_class = 'call_number';
+                output_row.browse_field = TRUE;
+                output_row.sort_value = public.naco_normalize_keep_decimal(raw_text,'');
+                output_row.value = raw_text;
+            ELSE
+                output_row.value = BTRIM(REGEXP_REPLACE(
+                    REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g'), E'\\.+', '', 'g'));
+            END IF;
+            -- /KCLS
+
             output_row.field_class = idx.field_class;
             output_row.field = idx.id;
             output_row.source = rid;
-            output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g'));
+            -- KCLS -- value is set above
+            -- output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g'));
+            -- /KCLS
 
             output_row.search_field = TRUE;
             RETURN NEXT output_row;
@@ -5751,6 +5566,7 @@ DECLARE
     b_skip_browse   BOOL;
     b_skip_search   BOOL;
     value_prepped   TEXT;
+    field_class     TEXT;
 BEGIN
 
     SELECT COALESCE(NULLIF(skip_facet, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name =  'ingest.skip_facet_indexing' AND enabled)) INTO b_skip_facet;
@@ -5769,7 +5585,11 @@ BEGIN
             DELETE FROM metabib.facet_entry WHERE source = bib_id;
         END IF;
         IF NOT b_skip_browse THEN
-            DELETE FROM metabib.browse_entry_def_map WHERE source = bib_id;
+            DELETE FROM metabib.browse_author_entry_def_map WHERE source = bib_id;
+            DELETE FROM metabib.browse_title_entry_def_map WHERE source = bib_id;
+            DELETE FROM metabib.browse_subject_entry_def_map WHERE source = bib_id;
+            DELETE FROM metabib.browse_series_entry_def_map WHERE source = bib_id;
+            DELETE FROM metabib.browse_call_number_entry_def_map WHERE source = bib_id;
         END IF;
     END IF;
 
@@ -5798,22 +5618,110 @@ BEGIN
             CONTINUE WHEN ind_data.sort_value IS NULL;
 
             value_prepped := metabib.browse_normalize(ind_data.value, ind_data.field);
-            SELECT INTO mbe_row * FROM metabib.browse_entry
-                WHERE value = value_prepped AND sort_value = ind_data.sort_value;
 
-            IF FOUND THEN
-                mbe_id := mbe_row.id;
-            ELSE
-                INSERT INTO metabib.browse_entry
-                    ( value, sort_value ) VALUES
-                    ( value_prepped, ind_data.sort_value );
+            -- KCLS
+            IF char_length(value_prepped) > 0 THEN
+                CASE ind_data.field_class
 
-                mbe_id := CURRVAL('metabib.browse_entry_id_seq'::REGCLASS);
-            END IF;
+                  WHEN 'author' THEN
+
+                    SELECT INTO mbe_row * FROM metabib.browse_author_entry
+                        WHERE sort_value = ind_data.sort_value
+                        ORDER BY id;
+
+                    IF FOUND THEN
+                        mbe_id := mbe_row.id;
+                    ELSE
+                        INSERT INTO metabib.browse_author_entry
+                            ( value, sort_value, truncated_sort_value ) VALUES
+                            ( value_prepped, ind_data.sort_value, substr(ind_data.sort_value, 1, 2700) );
+
+                        mbe_id := CURRVAL('metabib.browse_author_entry_id_seq'::REGCLASS);
+                    END IF;
+
+                    INSERT INTO metabib.browse_author_entry_def_map (entry, def, source, authority)
+                        VALUES (mbe_id, ind_data.field, ind_data.source, ind_data.authority);
+
+                  WHEN 'title' THEN
+
+                    SELECT INTO mbe_row * FROM metabib.browse_title_entry
+                        WHERE sort_value = ind_data.sort_value
+                        ORDER BY id;
+
+                    IF FOUND THEN
+                        mbe_id := mbe_row.id;
+                    ELSE
+                        INSERT INTO metabib.browse_title_entry
+                            ( value, sort_value, truncated_sort_value ) VALUES
+                            ( value_prepped, ind_data.sort_value, substr(ind_data.sort_value, 1, 2700) );
+
+                        mbe_id := CURRVAL('metabib.browse_title_entry_id_seq'::REGCLASS);
+                    END IF;
 
-            INSERT INTO metabib.browse_entry_def_map (entry, def, source, authority)
-                VALUES (mbe_id, ind_data.field, ind_data.source, ind_data.authority);
+                    INSERT INTO metabib.browse_title_entry_def_map (entry, def, source, authority)
+                        VALUES (mbe_id, ind_data.field, ind_data.source, ind_data.authority);
+
+                  WHEN 'subject' THEN
+
+                    SELECT INTO mbe_row * FROM metabib.browse_subject_entry
+                        WHERE sort_value = ind_data.sort_value
+                        ORDER BY id;
+
+                    IF FOUND THEN
+                        mbe_id := mbe_row.id;
+                    ELSE
+                        INSERT INTO metabib.browse_subject_entry
+                            ( value, sort_value, truncated_sort_value ) VALUES
+                            ( value_prepped, ind_data.sort_value, substr(ind_data.sort_value, 1, 2700) );
+
+                        mbe_id := CURRVAL('metabib.browse_subject_entry_id_seq'::REGCLASS);
+                    END IF;
+
+                    INSERT INTO metabib.browse_subject_entry_def_map (entry, def, source, authority)
+                        VALUES (mbe_id, ind_data.field, ind_data.source, ind_data.authority);
+
+                  WHEN 'series' THEN
+
+                    SELECT INTO mbe_row * FROM metabib.browse_series_entry
+                        WHERE sort_value = ind_data.sort_value
+                        ORDER BY id;
+
+                    IF FOUND THEN
+                        mbe_id := mbe_row.id;
+                    ELSE
+                        INSERT INTO metabib.browse_series_entry
+                            ( value, sort_value, truncated_sort_value ) VALUES
+                            ( value_prepped, ind_data.sort_value, substr(ind_data.sort_value, 1, 2700) );
+
+                        mbe_id := CURRVAL('metabib.browse_series_entry_id_seq'::REGCLASS);
+                    END IF;
+
+                    INSERT INTO metabib.browse_series_entry_def_map (entry, def, source, authority)
+                        VALUES (mbe_id, ind_data.field, ind_data.source, ind_data.authority);
+
+                  WHEN 'call_number' THEN
+
+                    SELECT INTO mbe_row * FROM metabib.browse_call_number_entry
+                        WHERE sort_value = ind_data.sort_value
+                        ORDER BY id;
+
+                    IF FOUND THEN
+                        mbe_id := mbe_row.id;
+                    ELSE
+                        INSERT INTO metabib.browse_call_number_entry
+                            ( value, sort_value, truncated_sort_value ) VALUES
+                            ( value_prepped, ind_data.sort_value, substr(ind_data.sort_value, 1, 2700) );
+
+                        mbe_id := CURRVAL('metabib.browse_call_number_entry_id_seq'::REGCLASS);
+                    END IF;
+
+                    INSERT INTO metabib.browse_call_number_entry_def_map (entry, def, source, authority)
+                        VALUES (mbe_id, ind_data.field, ind_data.source, ind_data.authority);
+                  ELSE
+                END CASE;
+            END IF;
         END IF;
+        -- /KCLS
 
         IF ind_data.search_field AND NOT b_skip_search THEN
             -- Avoid inserting duplicate rows