--- /dev/null
+BEGIN;
+
+SELECT evergreen.upgrade_deps_block_check('1301', :eg_version);
+
+CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry (
+ rid BIGINT,
+ default_joiner TEXT,
+ field_types TEXT[],
+ only_fields INT[]
+) RETURNS SETOF metabib.field_entry_template AS $func$
+DECLARE
+ bib biblio.record_entry%ROWTYPE;
+ idx config.metabib_field%ROWTYPE;
+ xfrm config.xml_transform%ROWTYPE;
+ prev_xfrm TEXT;
+ transformed_xml TEXT;
+ xml_node TEXT;
+ xml_node_list TEXT[];
+ facet_text TEXT;
+ display_text TEXT;
+ browse_text TEXT;
+ sort_value TEXT;
+ raw_text TEXT;
+ curr_text TEXT;
+ joiner TEXT := default_joiner; -- XXX will index defs supply a joiner?
+ authority_text TEXT;
+ authority_link BIGINT;
+ output_row metabib.field_entry_template%ROWTYPE;
+ process_idx BOOL;
+BEGIN
+
+ -- Start out with no field-use bools set
+ output_row.browse_nocase = FALSE;
+ output_row.browse_field = FALSE;
+ output_row.facet_field = FALSE;
+ output_row.display_field = FALSE;
+ output_row.search_field = FALSE;
+
+ -- Get the record
+ SELECT INTO bib * FROM biblio.record_entry WHERE id = rid;
+
+ -- Loop over the indexing entries
+ FOR idx IN SELECT * FROM config.metabib_field WHERE id = ANY (only_fields) ORDER BY format LOOP
+ CONTINUE WHEN idx.xpath IS NULL OR idx.xpath = ''; -- pure virtual field
+
+ process_idx := FALSE;
+ IF idx.display_field AND 'display' = ANY (field_types) THEN process_idx = TRUE; END IF;
+ IF idx.browse_field AND 'browse' = ANY (field_types) THEN process_idx = TRUE; END IF;
+ IF idx.search_field AND 'search' = ANY (field_types) THEN process_idx = TRUE; END IF;
+ IF idx.facet_field AND 'facet' = ANY (field_types) THEN process_idx = TRUE; END IF;
+ CONTINUE WHEN process_idx = FALSE; -- disabled for all types
+
+ joiner := COALESCE(idx.joiner, default_joiner);
+
+ SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
+
+ -- See if we can skip the XSLT ... it's expensive
+ IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
+ -- Can't skip the transform
+ IF xfrm.xslt <> '---' THEN
+ transformed_xml := oils_xslt_process(bib.marc,xfrm.xslt);
+ ELSE
+ transformed_xml := bib.marc;
+ END IF;
+
+ prev_xfrm := xfrm.name;
+ END IF;
+
+ xml_node_list := oils_xpath( idx.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
+
+ raw_text := NULL;
+ FOR xml_node IN SELECT x FROM unnest(xml_node_list) AS x LOOP
+ CONTINUE WHEN xml_node !~ E'^\\s*<';
+
+ -- XXX much of this should be moved into oils_xpath_string...
+ curr_text := ARRAY_TO_STRING(array_remove(array_remove(
+ oils_xpath( '//text()', -- get the content of all the nodes within the main selected node
+ REGEXP_REPLACE( xml_node, E'\\s+', ' ', 'g' ) -- Translate adjacent whitespace to a single space
+ ), ' '), ''), -- throw away morally empty (bankrupt?) strings
+ joiner
+ );
+
+ CONTINUE WHEN curr_text IS NULL OR curr_text = '';
+
+ IF raw_text IS NOT NULL THEN
+ raw_text := raw_text || joiner;
+ END IF;
+
+ raw_text := COALESCE(raw_text,'') || curr_text;
+
+ -- autosuggest/metabib.browse_entry
+ IF idx.browse_field THEN
+ output_row.browse_nocase = idx.browse_nocase;
+
+ IF idx.browse_xpath IS NOT NULL AND idx.browse_xpath <> '' THEN
+ browse_text := oils_xpath_string( idx.browse_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
+ ELSE
+ browse_text := curr_text;
+ END IF;
+
+ IF idx.browse_sort_xpath IS NOT NULL AND
+ idx.browse_sort_xpath <> '' THEN
+
+ sort_value := oils_xpath_string(
+ idx.browse_sort_xpath, xml_node, joiner,
+ ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
+ );
+ ELSE
+ sort_value := browse_text;
+ END IF;
+
+ output_row.field_class = idx.field_class;
+ output_row.field = idx.id;
+ output_row.source = rid;
+ output_row.value = BTRIM(REGEXP_REPLACE(browse_text, E'\\s+', ' ', 'g'));
+ output_row.sort_value :=
+ public.naco_normalize(sort_value);
+
+ output_row.authority := NULL;
+
+ IF idx.authority_xpath IS NOT NULL AND idx.authority_xpath <> '' THEN
+ authority_text := oils_xpath_string(
+ idx.authority_xpath, xml_node, joiner,
+ ARRAY[
+ ARRAY[xfrm.prefix, xfrm.namespace_uri],
+ ARRAY['xlink','http://www.w3.org/1999/xlink']
+ ]
+ );
+
+ IF authority_text ~ '^\d+$' THEN
+ authority_link := authority_text::BIGINT;
+ PERFORM * FROM authority.record_entry WHERE id = authority_link;
+ IF FOUND THEN
+ output_row.authority := authority_link;
+ END IF;
+ END IF;
+
+ END IF;
+
+ output_row.browse_field = TRUE;
+ -- Returning browse rows with search_field = true for search+browse
+ -- configs allows us to retain granularity of being able to search
+ -- browse fields with "starts with" type operators (for example, for
+ -- titles of songs in music albums)
+ IF idx.search_field THEN
+ output_row.search_field = TRUE;
+ END IF;
+ RETURN NEXT output_row;
+ output_row.browse_nocase = FALSE;
+ output_row.browse_field = FALSE;
+ output_row.search_field = FALSE;
+ output_row.sort_value := NULL;
+ END IF;
+
+ -- insert raw node text for faceting
+ IF idx.facet_field THEN
+
+ IF idx.facet_xpath IS NOT NULL AND idx.facet_xpath <> '' THEN
+ facet_text := oils_xpath_string( idx.facet_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
+ ELSE
+ facet_text := curr_text;
+ END IF;
+
+ output_row.field_class = idx.field_class;
+ output_row.field = -1 * idx.id;
+ output_row.source = rid;
+ output_row.value = BTRIM(REGEXP_REPLACE(facet_text, E'\\s+', ' ', 'g'));
+
+ output_row.facet_field = TRUE;
+ RETURN NEXT output_row;
+ output_row.facet_field = FALSE;
+ END IF;
+
+ -- insert raw node text for display
+ IF idx.display_field THEN
+
+ IF idx.display_xpath IS NOT NULL AND idx.display_xpath <> '' THEN
+ display_text := oils_xpath_string( idx.display_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
+ ELSE
+ display_text := curr_text;
+ END IF;
+
+ output_row.field_class = idx.field_class;
+ output_row.field = -1 * idx.id;
+ output_row.source = rid;
+ output_row.value = BTRIM(REGEXP_REPLACE(display_text, E'\\s+', ' ', 'g'));
+
+ output_row.display_field = TRUE;
+ RETURN NEXT output_row;
+ output_row.display_field = FALSE;
+ END IF;
+
+ END LOOP;
+
+ CONTINUE WHEN raw_text IS NULL OR raw_text = '';
+
+ -- insert combined node text for searching
+ IF idx.search_field THEN
+ output_row.field_class = idx.field_class;
+ output_row.field = idx.id;
+ output_row.source = rid;
+ output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g'));
+
+ output_row.search_field = TRUE;
+ RETURN NEXT output_row;
+ output_row.search_field = FALSE;
+ END IF;
+
+ END LOOP;
+
+END;
+$func$ LANGUAGE PLPGSQL;
+
+COMMIT;