From: Mike Rylander Date: Tue, 21 Feb 2017 18:47:14 +0000 (-0500) Subject: Adding Related Name seed data; Optimizing a.extract_headings when only a subset is... X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=33850b6c1cf3dc2ee5f7287882a6c3ff45925c52;p=working%2FEvergreen.git Adding Related Name seed data; Optimizing a.extract_headings when only a subset is needed; Fixing a couple copy-paste-os Signed-off-by: Mike Rylander --- diff --git a/Open-ILS/src/sql/Pg/upgrade/YYYY.schema.authority.sql b/Open-ILS/src/sql/Pg/upgrade/YYYY.schema.authority.sql index 328e8b1e69..fd8979de3e 100644 --- a/Open-ILS/src/sql/Pg/upgrade/YYYY.schema.authority.sql +++ b/Open-ILS/src/sql/Pg/upgrade/YYYY.schema.authority.sql @@ -67,13 +67,16 @@ INSERT INTO authority.heading_field(heading_type, heading_purpose, label, headin ,( 'topical_term', 'related', 'Related Topical Term', '/mads21:mads/mads21:related', '//mads21:topic', '/mads21:related/@type', '/mads21:mads/mads21:authority/mads21:topic[1]/@authority', '//mads21:topic[1]/@authority') ,( 'personal_name', 'main', 'Main Personal Name', '/mads21:mads/mads21:authority', '//mads21:name[@type="personal"]', NULL, NULL, NULL ) ,( 'personal_name', 'variant', 'Variant Personal Name', '/mads21:mads/mads21:variant', '//mads21:name[@type="personal"]', NULL, NULL, NULL ) +,( 'personal_name', 'related', 'Related Personal Name', '/mads21:mads/mads21:related', '//mads21:name[@type="personal"]', '/mads21:related/@type', NULL, NULL ) ,( 'corporate_name', 'main', 'Main Corporate name', '/mads21:mads/mads21:authority', '//mads21:name[@type="corporate"]', NULL, NULL, NULL ) ,( 'corporate_name', 'variant', 'Variant Corporate Name', '/mads21:mads/mads21:variant', '//mads21:name[@type="corporate"]', NULL, NULL, NULL ) +,( 'corporate_name', 'related', 'Related Corporate Name', '/mads21:mads/mads21:related', '//mads21:name[@type="corporate"]', '/mads21:related/@type', NULL, NULL ) ,( 'meeting_name', 'main', 'Main Meeting name', '/mads21:mads/mads21:authority', '//mads21:name[@type="conference"]', NULL, NULL, NULL ) ,( 'meeting_name', 'variant', 'Variant Meeting Name', '/mads21:mads/mads21:variant', '//mads21:name[@type="conference"]', NULL, NULL, NULL ) -,( 'geographic_name', 'main', 'Main Topical Term', '/mads21:mads/mads21:authority', '//mads21:geographic', NULL, '/mads21:mads/mads21:authority/mads21:geographic[1]/@authority', NULL ) -,( 'geographic_name', 'variant', 'Variant Topical Term', '/mads21:mads/mads21:variant', '//mads21:geographic', '/mads21:variant/@type', '/mads21:mads/mads21:authority/mads21:geographic[1]/@authority', '//mads21:geographic[1]/@authority') -,( 'geographic_name', 'related', 'Related Topical Term', '/mads21:mads/mads21:related', '//mads21:geographic', '/mads21:related/@type', '/mads21:mads/mads21:authority/mads21:geographic[1]/@authority', '//mads21:geographic[1]/@authority') +,( 'meeting_name', 'related', 'Related Meeting Name', '/mads21:mads/mads21:related', '//mads21:name[@type="meeting"]', '/mads21:related/@type', NULL, NULL ) +,( 'geographic_name', 'main', 'Main Geographic Term', '/mads21:mads/mads21:authority', '//mads21:geographic', NULL, '/mads21:mads/mads21:authority/mads21:geographic[1]/@authority', NULL ) +,( 'geographic_name', 'variant', 'Variant Geographic Term', '/mads21:mads/mads21:variant', '//mads21:geographic', '/mads21:variant/@type', '/mads21:mads/mads21:authority/mads21:geographic[1]/@authority', '//mads21:geographic[1]/@authority') +,( 'geographic_name', 'related', 'Related Geographic Term', '/mads21:mads/mads21:related', '//mads21:geographic', '/mads21:related/@type', '/mads21:mads/mads21:authority/mads21:geographic[1]/@authority', '//mads21:geographic[1]/@authority') ,( 'genre_form_term', 'main', 'Main Genre/Form Term', '/mads21:mads/mads21:authority', '//mads21:genre', NULL, '/mads21:mads/mads21:authority/mads21:genre[1]/@authority', NULL ) ,( 'genre_form_term', 'variant', 'Variant Genre/Form Term', '/mads21:mads/mads21:variant', '//mads21:genre', '/mads21:variant/@type', '/mads21:mads/mads21:authority/mads21:genre[1]/@authority', '//mads21:genre[1]/@authority') ,( 'genre_form_term', 'related', 'Related Genre/Form Term', '/mads21:mads/mads21:related', '//mads21:genre', '/mads21:related/@type', '/mads21:mads/mads21:authority/mads21:genre[1]/@authority', '//mads21:genre[1]/@authority') @@ -101,7 +104,7 @@ CREATE TYPE authority.heading AS ( normalized_heading TEXT ); -CREATE OR REPLACE FUNCTION authority.extract_headings(marc TEXT) RETURNS SETOF authority.heading AS $func$ +CREATE OR REPLACE FUNCTION authority.extract_headings(marc TEXT, restrict INT[] DEFAULT NULL) RETURNS SETOF authority.heading AS $func$ DECLARE idx authority.heading_field%ROWTYPE; xfrm config.xml_transform%ROWTYPE; @@ -122,7 +125,7 @@ DECLARE BEGIN -- Loop over the indexing entries - FOR idx IN SELECT * FROM authority.heading_field ORDER BY format LOOP + FOR idx IN SELECT * FROM authority.heading_field WHERE restrict IS NULL OR id = ANY (restrict) ORDER BY format LOOP output_row.field := idx.id; output_row.type := idx.heading_type; @@ -239,7 +242,7 @@ BEGIN END; $func$ LANGUAGE PLPGSQL; -CREATE OR REPLACE FUNCTION authority.extract_headings(rid BIGINT) RETURNS SETOF authority.heading AS $func$ +CREATE OR REPLACE FUNCTION authority.extract_headings(rid BIGINT, restrict INT[] DEFAULT NULL) RETURNS SETOF authority.heading AS $func$ DECLARE auth authority.record_entry%ROWTYPE; output_row authority.heading; @@ -247,10 +250,99 @@ BEGIN -- Get the record SELECT INTO auth * FROM authority.record_entry WHERE id = rid; - RETURN QUERY SELECT * FROM authority.extract_headings(auth.marc); + RETURN QUERY SELECT * FROM authority.extract_headings(auth.marc, restrict); END; $func$ LANGUAGE PLPGSQL; +CREATE OR REPLACE FUNCTION authority.simple_heading_set( marcxml TEXT ) RETURNS SETOF authority.simple_heading AS $func$ +DECLARE + res authority.simple_heading%ROWTYPE; + acsaf authority.control_set_authority_field%ROWTYPE; + heading_row authority.heading%ROWTYPE; + tag_used TEXT; + nfi_used TEXT; + sf TEXT; + cset INT; + heading_text TEXT; + joiner_text TEXT; + sort_text TEXT; + tmp_text TEXT; + tmp_xml TEXT; + first_sf BOOL; + auth_id INT DEFAULT COALESCE(NULLIF(oils_xpath_string('//*[@tag="901"]/*[local-name()="subfield" and @code="c"]', marcxml), ''), '0')::INT; +BEGIN + + SELECT control_set INTO cset FROM authority.record_entry WHERE id = auth_id; + + IF cset IS NULL THEN + SELECT control_set INTO cset + FROM authority.control_set_authority_field + WHERE tag IN ( SELECT UNNEST(XPATH('//*[starts-with(@tag,"1")]/@tag',marcxml::XML)::TEXT[])) + LIMIT 1; + END IF; + + res.record := auth_id; + res.thesaurus := authority.extract_thesaurus(marcxml); + + FOR acsaf IN SELECT * FROM authority.control_set_authority_field WHERE control_set = cset LOOP + res.atag := acsaf.id; + + IF acsaf.heading_field IS NULL THEN + tag_used := acsaf.tag; + nfi_used := acsaf.nfi; + joiner_text := COALESCE(acsaf.joiner, ' '); + + FOR tmp_xml IN SELECT UNNEST(XPATH('//*[@tag="'||tag_used||'"]', marcxml::XML)::TEXT[]) LOOP + + heading_text := COALESCE( + oils_xpath_string('./*[contains("'||acsaf.display_sf_list||'",@code)]', tmp_xml, joiner_text), + '' + ); + + IF nfi_used IS NOT NULL THEN + + sort_text := SUBSTRING( + heading_text FROM + COALESCE( + NULLIF( + REGEXP_REPLACE( + oils_xpath_string('./@ind'||nfi_used, tmp_xml::TEXT), + $$\D+$$, + '', + 'g' + ), + '' + )::INT, + 0 + ) + 1 + ); + + ELSE + sort_text := heading_text; + END IF; + + IF heading_text IS NOT NULL AND heading_text <> '' THEN + res.value := heading_text; + res.sort_value := public.naco_normalize(sort_text); + res.index_vector = to_tsvector('keyword'::regconfig, res.sort_value); + RETURN NEXT res; + END IF; + + END LOOP; + ELSE + FOR heading_row IN SELECT * FROM authority.extract_headings(marcxml, ARRAY[acsaf.heading_field]) LOOP + res.value := heading_row.heading; + res.sort_value := heading_row.normalized_heading; + res.index_vector = to_tsvector('keyword'::regconfig, res.sort_value); + RETURN NEXT res; + END LOOP; + END IF; + END LOOP; + + RETURN; +END; +$func$ LANGUAGE PLPGSQL STABLE STRICT; + ALTER TABLE authority.control_set_authority_field ADD COLUMN heading_field INTEGER REFERENCES authority.heading_field(id); UPDATE authority.control_set_authority_field acsaf @@ -317,7 +409,7 @@ FROM authority.heading_field ahf WHERE tag = '511' AND control_set = 1 AND ahf.heading_purpose = 'related' -AND ahf.heading_type = 'personal_name'; +AND ahf.heading_type = 'meeting_name'; UPDATE authority.control_set_authority_field acsaf SET heading_field = ahf.id @@ -416,98 +508,6 @@ AND ahf.heading_type = 'genre_form_term'; -- SELECT * FROM authority.extract_headings(174); -- SELECT * FROM authority.extract_headings(151); -CREATE OR REPLACE FUNCTION authority.simple_heading_set( marcxml TEXT ) RETURNS SETOF authority.simple_heading AS $func$ -DECLARE - res authority.simple_heading%ROWTYPE; - acsaf authority.control_set_authority_field%ROWTYPE; - heading authority.heading%ROWTYPE; - tag_used TEXT; - nfi_used TEXT; - sf TEXT; - cset INT; - heading_text TEXT; - joiner_text TEXT; - sort_text TEXT; - tmp_text TEXT; - tmp_xml TEXT; - first_sf BOOL; - auth_id INT DEFAULT COALESCE(NULLIF(oils_xpath_string('//*[@tag="901"]/*[local-name()="subfield" and @code="c"]', marcxml), ''), '0')::INT; -BEGIN - - SELECT control_set INTO cset FROM authority.record_entry WHERE id = auth_id; - - IF cset IS NULL THEN - SELECT control_set INTO cset - FROM authority.control_set_authority_field - WHERE tag IN ( SELECT UNNEST(XPATH('//*[starts-with(@tag,"1")]/@tag',marcxml::XML)::TEXT[])) - LIMIT 1; - END IF; - - res.record := auth_id; - res.thesaurus := authority.extract_thesaurus(marcxml); - - FOR acsaf IN SELECT * FROM authority.control_set_authority_field WHERE control_set = cset LOOP - - res.atag := acsaf.id; - tag_used := acsaf.tag; - nfi_used := acsaf.nfi; - joiner_text := COALESCE(acsaf.joiner, ' '); - - IF acsaf.heading_field IS NOT NULL THEN - FOR heading IN SELECT * FROM authority.extract_headings(marcxml) LOOP - IF heading.field = acsaf.heading_field THEN - res.value := heading.heading; - res.sort_value := heading.normalized_heading; - res.index_vector = to_tsvector('keyword'::regconfig, res.sort_value); - RETURN NEXT res; - END IF; - END LOOP; - ELSE - FOR tmp_xml IN SELECT UNNEST(XPATH('//*[@tag="'||tag_used||'"]', marcxml::XML)::TEXT[]) LOOP - - heading_text := COALESCE( - oils_xpath_string('./*[contains("'||acsaf.display_sf_list||'",@code)]', tmp_xml, joiner_text), - '' - ); - - IF nfi_used IS NOT NULL THEN - - sort_text := SUBSTRING( - heading_text FROM - COALESCE( - NULLIF( - REGEXP_REPLACE( - oils_xpath_string('./@ind'||nfi_used, tmp_xml::TEXT), - $$\D+$$, - '', - 'g' - ), - '' - )::INT, - 0 - ) + 1 - ); - - ELSE - sort_text := heading_text; - END IF; - - IF heading_text IS NOT NULL AND heading_text <> '' THEN - res.value := heading_text; - res.sort_value := public.naco_normalize(sort_text); - res.index_vector = to_tsvector('keyword'::regconfig, res.sort_value); - RETURN NEXT res; - END IF; - - END LOOP; - END IF; - - END LOOP; - - RETURN; -END; -$func$ LANGUAGE PLPGSQL STABLE STRICT; - -- select value from metabib.browse_entry where value ~ '^Shakespeare, William' and value ~ '1564-1616$'; -- UPDATE config.internal_flag SET enabled = TRUE where name = 'ingest.reingest.force_on_same_marc';