LP#1949892: join up non-multi display entries user/gmcharlt/lp1949892_join_up_display_field
authorGalen Charlton <gmc@equinoxOLI.org>
Thu, 4 Nov 2021 19:17:11 +0000 (15:17 -0400)
committerGalen Charlton <gmc@equinoxOLI.org>
Thu, 4 Nov 2021 19:17:11 +0000 (15:17 -0400)
This patch ensures that if a display field that is
not marked as "multi" in config.display_entry_map has
an XPath expression that could result in multiple XML
nodes, that it gets joined up into just a single
display entry.

To test
-------
[1] Update the title|proper metabib display field config
    to change the format to 'marcxml' and the XPath to the following:

//marc:datafield[@tag="245"]/marc:subfield[@code="a" or @code="b" or @code="n" or @code="p" or @code="k" or @code="f" or @code="g" or @code="s"]

[2] Load or save a bib record that has at least subfields $a and $b
    in the 245.
[3] Search for the title in the Angular staff catalog. Note that only
    one of the subfields is displayed in the record summary.
[4] Apply the patch and reindex the record, then repeat step 4. This
    time, the whole title should be displayed.
[5] By querying (say) metabib.display_field, verify that display
    entries such as the ISBN that _are_ marked as multi do have
    a display entry for each ISBN.

Signed-off-by: Galen Charlton <gmc@equinoxOLI.org>
Open-ILS/src/sql/Pg/030.schema.metabib.sql
Open-ILS/src/sql/Pg/upgrade/XXXX.schema.join-up-non-multi-display-fields.sql [new file with mode: 0644]

index 7cb4e8c..dfe4cbd 100644 (file)
@@ -768,6 +768,7 @@ DECLARE
     authority_link BIGINT;
     output_row  metabib.field_entry_template%ROWTYPE;
     process_idx BOOL;
+    display_multi BOOL;
 BEGIN
 
     -- Start out with no field-use bools set
@@ -794,6 +795,8 @@ BEGIN
         joiner := COALESCE(idx.joiner, default_joiner);
 
         SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
+        SELECT INTO display_multi multi from config.display_field_map WHERE field = idx.id;
+        display_text := '';
 
         -- See if we can skip the XSLT ... it's expensive
         IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
@@ -915,24 +918,43 @@ BEGIN
             -- insert raw node text for display
             IF idx.display_field THEN
 
+                IF display_text <> '' THEN
+                    display_text := display_text || joiner;
+                END IF;
+
                 IF idx.display_xpath IS NOT NULL AND idx.display_xpath <> '' THEN
-                    display_text := oils_xpath_string( idx.display_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
+                    display_text := display_text || oils_xpath_string( idx.display_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
                 ELSE
-                    display_text := curr_text;
+                    display_text := display_text || curr_text;
                 END IF;
 
-                output_row.field_class = idx.field_class;
-                output_row.field = -1 * idx.id;
-                output_row.source = rid;
-                output_row.value = BTRIM(REGEXP_REPLACE(display_text, E'\\s+', ' ', 'g'));
+                IF (display_multi OR display_multi IS NULL) AND display_text <> '' THEN
+                    output_row.field_class = idx.field_class;
+                    output_row.field = -1 * idx.id;
+                    output_row.source = rid;
+                    output_row.value = BTRIM(REGEXP_REPLACE(display_text, E'\\s+', ' ', 'g'));
+
+                    output_row.display_field = TRUE;
+                    RETURN NEXT output_row;
+                    output_row.display_field = FALSE;
+                    display_text := '';
+                END IF;
 
-                output_row.display_field = TRUE;
-                RETURN NEXT output_row;
-                output_row.display_field = FALSE;
             END IF;
 
         END LOOP;
 
+        IF display_text <> '' AND (NOT display_multi OR display_multi IS NULL) THEN
+            output_row.field_class = idx.field_class;
+            output_row.field = -1 * idx.id;
+            output_row.source = rid;
+            output_row.value = BTRIM(REGEXP_REPLACE(display_text, E'\\s+', ' ', 'g'));
+
+            output_row.display_field = TRUE;
+            RETURN NEXT output_row;
+            output_row.display_field = FALSE;
+        END IF;
+
         CONTINUE WHEN raw_text IS NULL OR raw_text = '';
 
         -- insert combined node text for searching
diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.join-up-non-multi-display-fields.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.join-up-non-multi-display-fields.sql
new file mode 100644 (file)
index 0000000..795849a
--- /dev/null
@@ -0,0 +1,236 @@
+BEGIN;
+
+SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version);
+
+CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry (
+    rid BIGINT,
+    default_joiner TEXT,
+    field_types TEXT[],
+    only_fields INT[]
+) RETURNS SETOF metabib.field_entry_template AS $func$
+DECLARE
+    bib     biblio.record_entry%ROWTYPE;
+    idx     config.metabib_field%ROWTYPE;
+    xfrm        config.xml_transform%ROWTYPE;
+    prev_xfrm   TEXT;
+    transformed_xml TEXT;
+    xml_node    TEXT;
+    xml_node_list   TEXT[];
+    facet_text  TEXT;
+    display_text TEXT;
+    browse_text TEXT;
+    sort_value  TEXT;
+    raw_text    TEXT;
+    curr_text   TEXT;
+    joiner      TEXT := default_joiner; -- XXX will index defs supply a joiner?
+    authority_text TEXT;
+    authority_link BIGINT;
+    output_row  metabib.field_entry_template%ROWTYPE;
+    process_idx BOOL;
+    display_multi BOOL;
+BEGIN
+
+    -- Start out with no field-use bools set
+    output_row.browse_nocase = FALSE;
+    output_row.browse_field = FALSE;
+    output_row.facet_field = FALSE;
+    output_row.display_field = FALSE;
+    output_row.search_field = FALSE;
+
+    -- Get the record
+    SELECT INTO bib * FROM biblio.record_entry WHERE id = rid;
+
+    -- Loop over the indexing entries
+    FOR idx IN SELECT * FROM config.metabib_field WHERE id = ANY (only_fields) ORDER BY format LOOP
+        CONTINUE WHEN idx.xpath IS NULL OR idx.xpath = ''; -- pure virtual field
+
+        process_idx := FALSE;
+        IF idx.display_field AND 'display' = ANY (field_types) THEN process_idx = TRUE; END IF;
+        IF idx.browse_field AND 'browse' = ANY (field_types) THEN process_idx = TRUE; END IF;
+        IF idx.search_field AND 'search' = ANY (field_types) THEN process_idx = TRUE; END IF;
+        IF idx.facet_field AND 'facet' = ANY (field_types) THEN process_idx = TRUE; END IF;
+        CONTINUE WHEN process_idx = FALSE; -- disabled for all types
+
+        joiner := COALESCE(idx.joiner, default_joiner);
+
+        SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
+        SELECT INTO display_multi multi from config.display_field_map WHERE field = idx.id;
+        display_text := '';
+
+        -- See if we can skip the XSLT ... it's expensive
+        IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
+            -- Can't skip the transform
+            IF xfrm.xslt <> '---' THEN
+                transformed_xml := oils_xslt_process(bib.marc,xfrm.xslt);
+            ELSE
+                transformed_xml := bib.marc;
+            END IF;
+
+            prev_xfrm := xfrm.name;
+        END IF;
+
+        xml_node_list := oils_xpath( idx.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
+
+        raw_text := NULL;
+        FOR xml_node IN SELECT x FROM unnest(xml_node_list) AS x LOOP
+            CONTINUE WHEN xml_node !~ E'^\\s*<';
+
+            -- XXX much of this should be moved into oils_xpath_string...
+            curr_text := ARRAY_TO_STRING(array_remove(array_remove(
+                oils_xpath( '//text()', -- get the content of all the nodes within the main selected node
+                    REGEXP_REPLACE( xml_node, E'\\s+', ' ', 'g' ) -- Translate adjacent whitespace to a single space
+                ), ' '), ''),  -- throw away morally empty (bankrupt?) strings
+                joiner
+            );
+
+            CONTINUE WHEN curr_text IS NULL OR curr_text = '';
+
+            IF raw_text IS NOT NULL THEN
+                raw_text := raw_text || joiner;
+            END IF;
+
+            raw_text := COALESCE(raw_text,'') || curr_text;
+
+            -- autosuggest/metabib.browse_entry
+            IF idx.browse_field THEN
+                output_row.browse_nocase = idx.browse_nocase;
+
+                IF idx.browse_xpath IS NOT NULL AND idx.browse_xpath <> '' THEN
+                    browse_text := oils_xpath_string( idx.browse_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
+                ELSE
+                    browse_text := curr_text;
+                END IF;
+
+                IF idx.browse_sort_xpath IS NOT NULL AND
+                    idx.browse_sort_xpath <> '' THEN
+
+                    sort_value := oils_xpath_string(
+                        idx.browse_sort_xpath, xml_node, joiner,
+                        ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
+                    );
+                ELSE
+                    sort_value := browse_text;
+                END IF;
+
+                output_row.field_class = idx.field_class;
+                output_row.field = idx.id;
+                output_row.source = rid;
+                output_row.value = BTRIM(REGEXP_REPLACE(browse_text, E'\\s+', ' ', 'g'));
+                output_row.sort_value :=
+                    public.naco_normalize(sort_value);
+
+                output_row.authority := NULL;
+
+                IF idx.authority_xpath IS NOT NULL AND idx.authority_xpath <> '' THEN
+                    authority_text := oils_xpath_string(
+                        idx.authority_xpath, xml_node, joiner,
+                        ARRAY[
+                            ARRAY[xfrm.prefix, xfrm.namespace_uri],
+                            ARRAY['xlink','http://www.w3.org/1999/xlink']
+                        ]
+                    );
+
+                    IF authority_text ~ '^\d+$' THEN
+                        authority_link := authority_text::BIGINT;
+                        PERFORM * FROM authority.record_entry WHERE id = authority_link;
+                        IF FOUND THEN
+                            output_row.authority := authority_link;
+                        END IF;
+                    END IF;
+
+                END IF;
+
+                output_row.browse_field = TRUE;
+                -- Returning browse rows with search_field = true for search+browse
+                -- configs allows us to retain granularity of being able to search
+                -- browse fields with "starts with" type operators (for example, for
+                -- titles of songs in music albums)
+                IF idx.search_field THEN
+                    output_row.search_field = TRUE;
+                END IF;
+                RETURN NEXT output_row;
+                output_row.browse_nocase = FALSE;
+                output_row.browse_field = FALSE;
+                output_row.search_field = FALSE;
+                output_row.sort_value := NULL;
+            END IF;
+
+            -- insert raw node text for faceting
+            IF idx.facet_field THEN
+
+                IF idx.facet_xpath IS NOT NULL AND idx.facet_xpath <> '' THEN
+                    facet_text := oils_xpath_string( idx.facet_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
+                ELSE
+                    facet_text := curr_text;
+                END IF;
+
+                output_row.field_class = idx.field_class;
+                output_row.field = -1 * idx.id;
+                output_row.source = rid;
+                output_row.value = BTRIM(REGEXP_REPLACE(facet_text, E'\\s+', ' ', 'g'));
+
+                output_row.facet_field = TRUE;
+                RETURN NEXT output_row;
+                output_row.facet_field = FALSE;
+            END IF;
+
+            -- insert raw node text for display
+            IF idx.display_field THEN
+
+                IF display_text <> '' THEN
+                    display_text := display_text || joiner;
+                END IF;
+
+                IF idx.display_xpath IS NOT NULL AND idx.display_xpath <> '' THEN
+                    display_text := display_text || oils_xpath_string( idx.display_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
+                ELSE
+                    display_text := display_text || curr_text;
+                END IF;
+
+                IF (display_multi OR display_multi IS NULL) AND display_text <> '' THEN
+                    output_row.field_class = idx.field_class;
+                    output_row.field = -1 * idx.id;
+                    output_row.source = rid;
+                    output_row.value = BTRIM(REGEXP_REPLACE(display_text, E'\\s+', ' ', 'g'));
+
+                    output_row.display_field = TRUE;
+                    RETURN NEXT output_row;
+                    output_row.display_field = FALSE;
+                    display_text := '';
+                END IF;
+
+            END IF;
+
+        END LOOP;
+
+        IF display_text <> '' AND (NOT display_multi OR display_multi IS NULL) THEN
+            output_row.field_class = idx.field_class;
+            output_row.field = -1 * idx.id;
+            output_row.source = rid;
+            output_row.value = BTRIM(REGEXP_REPLACE(display_text, E'\\s+', ' ', 'g'));
+
+            output_row.display_field = TRUE;
+            RETURN NEXT output_row;
+            output_row.display_field = FALSE;
+        END IF;
+
+        CONTINUE WHEN raw_text IS NULL OR raw_text = '';
+
+        -- insert combined node text for searching
+        IF idx.search_field THEN
+            output_row.field_class = idx.field_class;
+            output_row.field = idx.id;
+            output_row.source = rid;
+            output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g'));
+
+            output_row.search_field = TRUE;
+            RETURN NEXT output_row;
+            output_row.search_field = FALSE;
+        END IF;
+
+    END LOOP;
+
+END;
+$func$ LANGUAGE PLPGSQL;
+
+COMMIT;