--- /dev/null
+BEGIN;
+
+-- subset of types listed in https://www.loc.gov/marc/authority/ad1xx3xx.html
+-- for now, ignoring subdivisions
+CREATE TYPE authority.heading_type AS ENUM (
+ 'personal_name',
+ 'corporate_name',
+ 'meeting_name',
+ 'uniform_title',
+ 'named_event',
+ 'chronological_term',
+ 'topical_term',
+ 'geographic_name',
+ 'genre_form_term',
+ 'medium_of_performance_term'
+);
+
+CREATE TYPE authority.variant_heading_type AS ENUM (
+ 'abbreviation',
+ 'acronym',
+ 'translation',
+ 'expansion',
+ 'other',
+ 'hidden'
+);
+
+CREATE TYPE authority.related_heading_type AS ENUM (
+ 'earlier',
+ 'later',
+ 'parent organization',
+ 'broader',
+ 'narrower',
+ 'equivalent'
+);
+
+CREATE TYPE authority.heading_purpose AS ENUM (
+ 'main',
+ 'variant',
+ 'related'
+);
+
+CREATE TABLE authority.heading_field (
+ id SERIAL PRIMARY KEY,
+ heading_type authority.heading_type NOT NULL,
+ heading_purpose authority.heading_purpose NOT NULL,
+ label TEXT NOT NULL,
+ format TEXT NOT NULL REFERENCES config.xml_transform (name) DEFAULT 'mads21',
+ heading_xpath TEXT NOT NULL,
+ component_xpath TEXT NOT NULL,
+ type_xpath TEXT NULL, -- to extract related or variant type
+ thesaurus_xpath TEXT NULL,
+ joiner TEXT NULL
+);
+
+INSERT INTO authority.heading_field(heading_type, heading_purpose, label, heading_xpath, component_xpath, type_xpath, thesaurus_xpath) VALUES
+( 'topical_term', 'main', 'Main Topical Term', '/mads21:mads/mads21:authority', '//mads21:topic', NULL, '//mads21:topic[1]/@authority' ),
+( 'topical_term', 'variant', 'Variant Topical Term', '/mads21:mads/mads21:variant', '//mads21:topic', '/mads21:variant/@type', '//mads21:topic[1]/@authority'),
+( 'topical_term', 'related', 'Broader Topical Term', '/mads21:mads/mads21:related', '//mads21:topic', '/mads21:related/@type', '//mads21:topic[1]/@authority'),
+( 'personal_name', 'main', 'Main Personal name', '/mads21:mads/mads21:authority', '//mads21:name', NULL, NULL ),
+( 'personal_name', 'variant', 'Variant Personal name', '/mads21:mads/mads21:variant', '//mads21:name', NULL, NULL )
+;
+
+CREATE TYPE authority.heading AS (
+ type authority.heading_type,
+ purpose authority.heading_purpose,
+ variant_type authority.variant_heading_type,
+ related_type authority.related_heading_type,
+ thesaurus TEXT,
+ heading TEXT,
+ normalized_heading TEXT
+);
+
+CREATE OR REPLACE FUNCTION authority.extract_headings(rid BIGINT) RETURNS SETOF authority.heading AS $func$
+DECLARE
+ auth authority.record_entry%ROWTYPE;
+ idx authority.heading_field%ROWTYPE;
+ xfrm config.xml_transform%ROWTYPE;
+ prev_xfrm TEXT;
+ transformed_xml TEXT;
+ heading_node TEXT;
+ heading_node_list TEXT[];
+ component_node TEXT;
+ component_node_list TEXT[];
+ raw_text TEXT;
+ curr_text TEXT;
+ joiner TEXT;
+ type_value TEXT;
+ output_row authority.heading;
+BEGIN
+
+ -- Get the record
+ SELECT INTO auth * FROM authority.record_entry WHERE id = rid;
+
+ -- Loop over the indexing entries
+ FOR idx IN SELECT * FROM authority.heading_field ORDER BY format LOOP
+
+ output_row.type := idx.heading_type;
+ output_row.purpose := idx.heading_purpose;
+
+ joiner := COALESCE(idx.joiner, ' ');
+
+ SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
+
+ -- See if we can skip the XSLT ... it's expensive
+ IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
+ -- Can't skip the transform
+ IF xfrm.xslt <> '---' THEN
+ transformed_xml := oils_xslt_process(auth.marc, xfrm.xslt);
+ ELSE
+ transformed_xml := auth.marc;
+ END IF;
+
+ prev_xfrm := xfrm.name;
+ END IF;
+
+ heading_node_list := oils_xpath( idx.heading_xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
+
+ FOR heading_node IN SELECT x FROM unnest(heading_node_list) AS x LOOP
+
+ CONTINUE WHEN heading_node !~ E'^\\s*<';
+
+ output_row.variant_type := NULL;
+ output_row.related_type := NULL;
+ output_row.thesaurus := NULL;
+ output_row.heading := NULL;
+
+ IF idx.heading_purpose = 'variant' AND idx.type_xpath IS NOT NULL THEN
+ type_value := ARRAY_TO_STRING(oils_xpath(idx.type_xpath, heading_node, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]), '');
+ BEGIN
+ output_row.variant_type := type_value;
+ EXCEPTION WHEN invalid_text_representation THEN
+ RAISE NOTICE 'Do not recognize variant heading type %', type_value;
+ END;
+ END IF;
+ IF idx.heading_purpose = 'related' AND idx.type_xpath IS NOT NULL THEN
+ type_value := ARRAY_TO_STRING(oils_xpath(idx.type_xpath, heading_node, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]), '');
+ BEGIN
+ output_row.related_type := type_value;
+ EXCEPTION WHEN invalid_text_representation THEN
+ RAISE NOTICE 'Do not recognize related heading type %', type_value;
+ END;
+ END IF;
+
+ IF idx.thesaurus_xpath IS NOT NULL THEN
+ output_row.thesaurus = ARRAY_TO_STRING(oils_xpath(idx.thesaurus_xpath, heading_node, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]), '');
+ END IF;
+
+ raw_text := NULL;
+
+ -- now iterate over components of heading
+ component_node_list := oils_xpath( idx.component_xpath, heading_node, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
+ FOR component_node IN SELECT x FROM unnest(component_node_list) AS x LOOP
+ -- XXX much of this should be moved into oils_xpath_string...
+ curr_text := ARRAY_TO_STRING(evergreen.array_remove_item_by_value(evergreen.array_remove_item_by_value(
+ oils_xpath( '//text()', -- get the content of all the nodes within the main selected node
+ REGEXP_REPLACE( component_node, E'\\s+', ' ', 'g' ) -- Translate adjacent whitespace to a single space
+ ), ' '), ''), -- throw away morally empty (bankrupt?) strings
+ joiner
+ );
+
+ CONTINUE WHEN curr_text IS NULL OR curr_text = '';
+
+ IF raw_text IS NOT NULL THEN
+ raw_text := raw_text || joiner;
+ END IF;
+
+ raw_text := COALESCE(raw_text,'') || curr_text;
+ END LOOP;
+
+ IF raw_text IS NOT NULL THEN
+ output_row.heading = raw_text;
+ RETURN NEXT output_row;
+ END IF;
+ END LOOP;
+
+ END LOOP;
+END;
+$func$ LANGUAGE PLPGSQL;
+
+SELECT * FROM authority.extract_headings(152);
+SELECT * FROM authority.extract_headings(164);
+SELECT * FROM authority.extract_headings(149);