From: Galen Charlton Date: Thu, 4 Nov 2021 19:17:11 +0000 (-0400) Subject: LP#1949892: join up non-multi display entries (3.7.x version) X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=e364709672bae53477a71d7aa93733cf8a0bb534;p=working%2FEvergreen.git LP#1949892: join up non-multi display entries (3.7.x version) This patch ensures that if a display field that is not marked as "multi" in config.display_entry_map has an XPath expression that could result in multiple XML nodes, that it gets joined up into just a single display entry. To test ------- [1] Update the title|proper metabib display field config to change the format to 'marcxml' and the XPath to the following: //marc:datafield[@tag="245"]/marc:subfield[@code="a" or @code="b" or @code="n" or @code="p" or @code="k" or @code="f" or @code="g" or @code="s"] [2] Load or save a bib record that has at least subfields $a and $b in the 245. [3] Search for the title in the Angular staff catalog. Note that only one of the subfields is displayed in the record summary. [4] Apply the patch and reindex the record, then repeat step 4. This time, the whole title should be displayed. [5] By querying (say) metabib.display_field, verify that display entries such as the ISBN that _are_ marked as multi do have a display entry for each ISBN. Signed-off-by: Galen Charlton --- diff --git a/Open-ILS/src/sql/Pg/030.schema.metabib.sql b/Open-ILS/src/sql/Pg/030.schema.metabib.sql index 2cede48920..f897ba17de 100644 --- a/Open-ILS/src/sql/Pg/030.schema.metabib.sql +++ b/Open-ILS/src/sql/Pg/030.schema.metabib.sql @@ -767,6 +767,7 @@ DECLARE authority_link BIGINT; output_row metabib.field_entry_template%ROWTYPE; process_idx BOOL; + display_multi BOOL; BEGIN -- Start out with no field-use bools set @@ -792,6 +793,8 @@ BEGIN joiner := COALESCE(idx.joiner, default_joiner); SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format; + SELECT INTO display_multi multi from config.display_field_map WHERE field = idx.id; + display_text := ''; -- See if we can skip the XSLT ... it's expensive IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN @@ -911,24 +914,43 @@ BEGIN -- insert raw node text for display IF idx.display_field THEN + IF display_text <> '' THEN + display_text := display_text || joiner; + END IF; + IF idx.display_xpath IS NOT NULL AND idx.display_xpath <> '' THEN - display_text := oils_xpath_string( idx.display_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] ); + display_text := display_text || oils_xpath_string( idx.display_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] ); ELSE - display_text := curr_text; + display_text := display_text || curr_text; END IF; - output_row.field_class = idx.field_class; - output_row.field = -1 * idx.id; - output_row.source = rid; - output_row.value = BTRIM(REGEXP_REPLACE(display_text, E'\\s+', ' ', 'g')); + IF (display_multi OR display_multi IS NULL) AND display_text <> '' THEN + output_row.field_class = idx.field_class; + output_row.field = -1 * idx.id; + output_row.source = rid; + output_row.value = BTRIM(REGEXP_REPLACE(display_text, E'\\s+', ' ', 'g')); + + output_row.display_field = TRUE; + RETURN NEXT output_row; + output_row.display_field = FALSE; + display_text := ''; + END IF; - output_row.display_field = TRUE; - RETURN NEXT output_row; - output_row.display_field = FALSE; END IF; END LOOP; + IF display_text <> '' AND (NOT display_multi OR display_multi IS NULL) THEN + output_row.field_class = idx.field_class; + output_row.field = -1 * idx.id; + output_row.source = rid; + output_row.value = BTRIM(REGEXP_REPLACE(display_text, E'\\s+', ' ', 'g')); + + output_row.display_field = TRUE; + RETURN NEXT output_row; + output_row.display_field = FALSE; + END IF; + CONTINUE WHEN raw_text IS NULL OR raw_text = ''; -- insert combined node text for searching diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.join-up-non-multi-display-fields.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.join-up-non-multi-display-fields.sql new file mode 100644 index 0000000000..6142278590 --- /dev/null +++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.join-up-non-multi-display-fields.sql @@ -0,0 +1,233 @@ +BEGIN; + +SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version); + +CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( + rid BIGINT, + default_joiner TEXT, + field_types TEXT[], + only_fields INT[] +) RETURNS SETOF metabib.field_entry_template AS $func$ +DECLARE + bib biblio.record_entry%ROWTYPE; + idx config.metabib_field%ROWTYPE; + xfrm config.xml_transform%ROWTYPE; + prev_xfrm TEXT; + transformed_xml TEXT; + xml_node TEXT; + xml_node_list TEXT[]; + facet_text TEXT; + display_text TEXT; + browse_text TEXT; + sort_value TEXT; + raw_text TEXT; + curr_text TEXT; + joiner TEXT := default_joiner; -- XXX will index defs supply a joiner? + authority_text TEXT; + authority_link BIGINT; + output_row metabib.field_entry_template%ROWTYPE; + process_idx BOOL; + display_multi BOOL; +BEGIN + + -- Start out with no field-use bools set + output_row.browse_field = FALSE; + output_row.facet_field = FALSE; + output_row.display_field = FALSE; + output_row.search_field = FALSE; + + -- Get the record + SELECT INTO bib * FROM biblio.record_entry WHERE id = rid; + + -- Loop over the indexing entries + FOR idx IN SELECT * FROM config.metabib_field WHERE id = ANY (only_fields) ORDER BY format LOOP + CONTINUE WHEN idx.xpath IS NULL OR idx.xpath = ''; -- pure virtual field + + process_idx := FALSE; + IF idx.display_field AND 'display' = ANY (field_types) THEN process_idx = TRUE; END IF; + IF idx.browse_field AND 'browse' = ANY (field_types) THEN process_idx = TRUE; END IF; + IF idx.search_field AND 'search' = ANY (field_types) THEN process_idx = TRUE; END IF; + IF idx.facet_field AND 'facet' = ANY (field_types) THEN process_idx = TRUE; END IF; + CONTINUE WHEN process_idx = FALSE; -- disabled for all types + + joiner := COALESCE(idx.joiner, default_joiner); + + SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format; + SELECT INTO display_multi multi from config.display_field_map WHERE field = idx.id; + display_text := ''; + + -- See if we can skip the XSLT ... it's expensive + IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN + -- Can't skip the transform + IF xfrm.xslt <> '---' THEN + transformed_xml := oils_xslt_process(bib.marc,xfrm.xslt); + ELSE + transformed_xml := bib.marc; + END IF; + + prev_xfrm := xfrm.name; + END IF; + + xml_node_list := oils_xpath( idx.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] ); + + raw_text := NULL; + FOR xml_node IN SELECT x FROM unnest(xml_node_list) AS x LOOP + CONTINUE WHEN xml_node !~ E'^\\s*<'; + + -- XXX much of this should be moved into oils_xpath_string... + curr_text := ARRAY_TO_STRING(array_remove(array_remove( + oils_xpath( '//text()', -- get the content of all the nodes within the main selected node + REGEXP_REPLACE( xml_node, E'\\s+', ' ', 'g' ) -- Translate adjacent whitespace to a single space + ), ' '), ''), -- throw away morally empty (bankrupt?) strings + joiner + ); + + CONTINUE WHEN curr_text IS NULL OR curr_text = ''; + + IF raw_text IS NOT NULL THEN + raw_text := raw_text || joiner; + END IF; + + raw_text := COALESCE(raw_text,'') || curr_text; + + -- autosuggest/metabib.browse_entry + IF idx.browse_field THEN + + IF idx.browse_xpath IS NOT NULL AND idx.browse_xpath <> '' THEN + browse_text := oils_xpath_string( idx.browse_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] ); + ELSE + browse_text := curr_text; + END IF; + + IF idx.browse_sort_xpath IS NOT NULL AND + idx.browse_sort_xpath <> '' THEN + + sort_value := oils_xpath_string( + idx.browse_sort_xpath, xml_node, joiner, + ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] + ); + ELSE + sort_value := browse_text; + END IF; + + output_row.field_class = idx.field_class; + output_row.field = idx.id; + output_row.source = rid; + output_row.value = BTRIM(REGEXP_REPLACE(browse_text, E'\\s+', ' ', 'g')); + output_row.sort_value := + public.naco_normalize(sort_value); + + output_row.authority := NULL; + + IF idx.authority_xpath IS NOT NULL AND idx.authority_xpath <> '' THEN + authority_text := oils_xpath_string( + idx.authority_xpath, xml_node, joiner, + ARRAY[ + ARRAY[xfrm.prefix, xfrm.namespace_uri], + ARRAY['xlink','http://www.w3.org/1999/xlink'] + ] + ); + + IF authority_text ~ '^\d+$' THEN + authority_link := authority_text::BIGINT; + PERFORM * FROM authority.record_entry WHERE id = authority_link; + IF FOUND THEN + output_row.authority := authority_link; + END IF; + END IF; + + END IF; + + output_row.browse_field = TRUE; + -- Returning browse rows with search_field = true for search+browse + -- configs allows us to retain granularity of being able to search + -- browse fields with "starts with" type operators (for example, for + -- titles of songs in music albums) + IF idx.search_field THEN + output_row.search_field = TRUE; + END IF; + RETURN NEXT output_row; + output_row.browse_field = FALSE; + output_row.search_field = FALSE; + output_row.sort_value := NULL; + END IF; + + -- insert raw node text for faceting + IF idx.facet_field THEN + + IF idx.facet_xpath IS NOT NULL AND idx.facet_xpath <> '' THEN + facet_text := oils_xpath_string( idx.facet_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] ); + ELSE + facet_text := curr_text; + END IF; + + output_row.field_class = idx.field_class; + output_row.field = -1 * idx.id; + output_row.source = rid; + output_row.value = BTRIM(REGEXP_REPLACE(facet_text, E'\\s+', ' ', 'g')); + + output_row.facet_field = TRUE; + RETURN NEXT output_row; + output_row.facet_field = FALSE; + END IF; + + -- insert raw node text for display + IF idx.display_field THEN + + IF display_text <> '' THEN + display_text := display_text || joiner; + END IF; + + IF idx.display_xpath IS NOT NULL AND idx.display_xpath <> '' THEN + display_text := display_text || oils_xpath_string( idx.display_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] ); + ELSE + display_text := display_text || curr_text; + END IF; + + IF (display_multi OR display_multi IS NULL) AND display_text <> '' THEN + output_row.field_class = idx.field_class; + output_row.field = -1 * idx.id; + output_row.source = rid; + output_row.value = BTRIM(REGEXP_REPLACE(display_text, E'\\s+', ' ', 'g')); + + output_row.display_field = TRUE; + RETURN NEXT output_row; + output_row.display_field = FALSE; + display_text := ''; + END IF; + + END IF; + + END LOOP; + + IF display_text <> '' AND (NOT display_multi OR display_multi IS NULL) THEN + output_row.field_class = idx.field_class; + output_row.field = -1 * idx.id; + output_row.source = rid; + output_row.value = BTRIM(REGEXP_REPLACE(display_text, E'\\s+', ' ', 'g')); + + output_row.display_field = TRUE; + RETURN NEXT output_row; + output_row.display_field = FALSE; + END IF; + + CONTINUE WHEN raw_text IS NULL OR raw_text = ''; + + -- insert combined node text for searching + IF idx.search_field THEN + output_row.field_class = idx.field_class; + output_row.field = idx.id; + output_row.source = rid; + output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g')); + + output_row.search_field = TRUE; + RETURN NEXT output_row; + output_row.search_field = FALSE; + END IF; + + END LOOP; + +END; +$func$ LANGUAGE PLPGSQL; + +COMMIT;