Adding Related Name seed data; Optimizing a.extract_headings when only a subset is...
authorMike Rylander <mrylander@gmail.com>
Tue, 21 Feb 2017 18:47:14 +0000 (13:47 -0500)
committerMike Rylander <mrylander@gmail.com>
Tue, 21 Feb 2017 18:47:14 +0000 (13:47 -0500)
Signed-off-by: Mike Rylander <mrylander@gmail.com>
Open-ILS/src/sql/Pg/upgrade/YYYY.schema.authority.sql

index 328e8b1..fd8979d 100644 (file)
@@ -67,13 +67,16 @@ INSERT INTO authority.heading_field(heading_type, heading_purpose, label, headin
 ,( 'topical_term', 'related', 'Related Topical Term', '/mads21:mads/mads21:related',   '//mads21:topic', '/mads21:related/@type', '/mads21:mads/mads21:authority/mads21:topic[1]/@authority', '//mads21:topic[1]/@authority')
 ,( 'personal_name', 'main', 'Main Personal Name',     '/mads21:mads/mads21:authority', '//mads21:name[@type="personal"]', NULL, NULL, NULL )
 ,( 'personal_name', 'variant', 'Variant Personal Name',     '/mads21:mads/mads21:variant', '//mads21:name[@type="personal"]', NULL, NULL, NULL )
+,( 'personal_name', 'related', 'Related Personal Name',     '/mads21:mads/mads21:related', '//mads21:name[@type="personal"]', '/mads21:related/@type', NULL, NULL )
 ,( 'corporate_name', 'main', 'Main Corporate name',     '/mads21:mads/mads21:authority', '//mads21:name[@type="corporate"]', NULL, NULL, NULL )
 ,( 'corporate_name', 'variant', 'Variant Corporate Name',     '/mads21:mads/mads21:variant', '//mads21:name[@type="corporate"]', NULL, NULL, NULL )
+,( 'corporate_name', 'related', 'Related Corporate Name',     '/mads21:mads/mads21:related', '//mads21:name[@type="corporate"]', '/mads21:related/@type', NULL, NULL )
 ,( 'meeting_name', 'main', 'Main Meeting name',     '/mads21:mads/mads21:authority', '//mads21:name[@type="conference"]', NULL, NULL, NULL )
 ,( 'meeting_name', 'variant', 'Variant Meeting Name',     '/mads21:mads/mads21:variant', '//mads21:name[@type="conference"]', NULL, NULL, NULL )
-,( 'geographic_name', 'main',    'Main Topical Term',    '/mads21:mads/mads21:authority', '//mads21:geographic', NULL, '/mads21:mads/mads21:authority/mads21:geographic[1]/@authority', NULL )
-,( 'geographic_name', 'variant', 'Variant Topical Term', '/mads21:mads/mads21:variant',   '//mads21:geographic', '/mads21:variant/@type', '/mads21:mads/mads21:authority/mads21:geographic[1]/@authority', '//mads21:geographic[1]/@authority')
-,( 'geographic_name', 'related', 'Related Topical Term', '/mads21:mads/mads21:related',   '//mads21:geographic', '/mads21:related/@type', '/mads21:mads/mads21:authority/mads21:geographic[1]/@authority', '//mads21:geographic[1]/@authority')
+,( 'meeting_name', 'related', 'Related Meeting Name',     '/mads21:mads/mads21:related', '//mads21:name[@type="meeting"]', '/mads21:related/@type', NULL, NULL )
+,( 'geographic_name', 'main',    'Main Geographic Term',    '/mads21:mads/mads21:authority', '//mads21:geographic', NULL, '/mads21:mads/mads21:authority/mads21:geographic[1]/@authority', NULL )
+,( 'geographic_name', 'variant', 'Variant Geographic Term', '/mads21:mads/mads21:variant',   '//mads21:geographic', '/mads21:variant/@type', '/mads21:mads/mads21:authority/mads21:geographic[1]/@authority', '//mads21:geographic[1]/@authority')
+,( 'geographic_name', 'related', 'Related Geographic Term', '/mads21:mads/mads21:related',   '//mads21:geographic', '/mads21:related/@type', '/mads21:mads/mads21:authority/mads21:geographic[1]/@authority', '//mads21:geographic[1]/@authority')
 ,( 'genre_form_term', 'main',    'Main Genre/Form Term',    '/mads21:mads/mads21:authority', '//mads21:genre', NULL, '/mads21:mads/mads21:authority/mads21:genre[1]/@authority', NULL )
 ,( 'genre_form_term', 'variant', 'Variant Genre/Form Term', '/mads21:mads/mads21:variant',   '//mads21:genre', '/mads21:variant/@type', '/mads21:mads/mads21:authority/mads21:genre[1]/@authority', '//mads21:genre[1]/@authority')
 ,( 'genre_form_term', 'related', 'Related Genre/Form Term', '/mads21:mads/mads21:related',   '//mads21:genre', '/mads21:related/@type', '/mads21:mads/mads21:authority/mads21:genre[1]/@authority', '//mads21:genre[1]/@authority')
@@ -101,7 +104,7 @@ CREATE TYPE authority.heading AS (
     normalized_heading  TEXT
 );
 
-CREATE OR REPLACE FUNCTION authority.extract_headings(marc TEXT) RETURNS SETOF authority.heading AS $func$
+CREATE OR REPLACE FUNCTION authority.extract_headings(marc TEXT, restrict INT[] DEFAULT NULL) RETURNS SETOF authority.heading AS $func$
 DECLARE
     idx         authority.heading_field%ROWTYPE;
     xfrm        config.xml_transform%ROWTYPE;
@@ -122,7 +125,7 @@ DECLARE
 BEGIN
 
     -- Loop over the indexing entries
-    FOR idx IN SELECT * FROM authority.heading_field ORDER BY format LOOP
+    FOR idx IN SELECT * FROM authority.heading_field WHERE restrict IS NULL OR id = ANY (restrict) ORDER BY format LOOP
 
         output_row.field   := idx.id;
         output_row.type    := idx.heading_type;
@@ -239,7 +242,7 @@ BEGIN
 END;
 $func$ LANGUAGE PLPGSQL;
 
-CREATE OR REPLACE FUNCTION authority.extract_headings(rid BIGINT) RETURNS SETOF authority.heading AS $func$
+CREATE OR REPLACE FUNCTION authority.extract_headings(rid BIGINT, restrict INT[] DEFAULT NULL) RETURNS SETOF authority.heading AS $func$
 DECLARE
     auth        authority.record_entry%ROWTYPE;
     output_row  authority.heading;
@@ -247,10 +250,99 @@ BEGIN
     -- Get the record
     SELECT INTO auth * FROM authority.record_entry WHERE id = rid;
 
-    RETURN QUERY SELECT * FROM authority.extract_headings(auth.marc);
+    RETURN QUERY SELECT * FROM authority.extract_headings(auth.marc, restrict);
 END;
 $func$ LANGUAGE PLPGSQL;
 
+CREATE OR REPLACE FUNCTION authority.simple_heading_set( marcxml TEXT ) RETURNS SETOF authority.simple_heading AS $func$
+DECLARE
+    res             authority.simple_heading%ROWTYPE;
+    acsaf           authority.control_set_authority_field%ROWTYPE;
+    heading_row     authority.heading%ROWTYPE;
+    tag_used        TEXT;
+    nfi_used        TEXT;
+    sf              TEXT;
+    cset            INT;
+    heading_text    TEXT;
+    joiner_text     TEXT;
+    sort_text       TEXT;
+    tmp_text        TEXT;
+    tmp_xml         TEXT;
+    first_sf        BOOL;
+    auth_id         INT DEFAULT COALESCE(NULLIF(oils_xpath_string('//*[@tag="901"]/*[local-name()="subfield" and @code="c"]', marcxml), ''), '0')::INT; 
+BEGIN
+
+    SELECT control_set INTO cset FROM authority.record_entry WHERE id = auth_id;
+
+    IF cset IS NULL THEN
+        SELECT  control_set INTO cset
+          FROM  authority.control_set_authority_field
+          WHERE tag IN ( SELECT  UNNEST(XPATH('//*[starts-with(@tag,"1")]/@tag',marcxml::XML)::TEXT[]))
+          LIMIT 1;
+    END IF;
+
+    res.record := auth_id;
+    res.thesaurus := authority.extract_thesaurus(marcxml);
+
+    FOR acsaf IN SELECT * FROM authority.control_set_authority_field WHERE control_set = cset LOOP
+        res.atag := acsaf.id;
+
+        IF acsaf.heading_field IS NULL THEN
+            tag_used := acsaf.tag;
+            nfi_used := acsaf.nfi;
+            joiner_text := COALESCE(acsaf.joiner, ' ');
+    
+            FOR tmp_xml IN SELECT UNNEST(XPATH('//*[@tag="'||tag_used||'"]', marcxml::XML)::TEXT[]) LOOP
+    
+                heading_text := COALESCE(
+                    oils_xpath_string('./*[contains("'||acsaf.display_sf_list||'",@code)]', tmp_xml, joiner_text),
+                    ''
+                );
+    
+                IF nfi_used IS NOT NULL THEN
+    
+                    sort_text := SUBSTRING(
+                        heading_text FROM
+                        COALESCE(
+                            NULLIF(
+                                REGEXP_REPLACE(
+                                    oils_xpath_string('./@ind'||nfi_used, tmp_xml::TEXT),
+                                    $$\D+$$,
+                                    '',
+                                    'g'
+                                ),
+                                ''
+                            )::INT,
+                            0
+                        ) + 1
+                    );
+    
+                ELSE
+                    sort_text := heading_text;
+                END IF;
+    
+                IF heading_text IS NOT NULL AND heading_text <> '' THEN
+                    res.value := heading_text;
+                    res.sort_value := public.naco_normalize(sort_text);
+                    res.index_vector = to_tsvector('keyword'::regconfig, res.sort_value);
+                    RETURN NEXT res;
+                END IF;
+    
+            END LOOP;
+        ELSE
+            FOR heading_row IN SELECT * FROM authority.extract_headings(marcxml, ARRAY[acsaf.heading_field]) LOOP
+                res.value := heading_row.heading;
+                res.sort_value := heading_row.normalized_heading;
+                res.index_vector = to_tsvector('keyword'::regconfig, res.sort_value);
+                RETURN NEXT res;
+            END LOOP;
+        END IF;
+    END LOOP;
+
+    RETURN;
+END;
+$func$ LANGUAGE PLPGSQL STABLE STRICT;
+
 ALTER TABLE authority.control_set_authority_field ADD COLUMN heading_field INTEGER REFERENCES authority.heading_field(id);
 
 UPDATE authority.control_set_authority_field acsaf
@@ -317,7 +409,7 @@ FROM authority.heading_field ahf
 WHERE tag = '511'
 AND control_set = 1
 AND ahf.heading_purpose = 'related'
-AND ahf.heading_type = 'personal_name';
+AND ahf.heading_type = 'meeting_name';
 
 UPDATE authority.control_set_authority_field acsaf
 SET heading_field = ahf.id
@@ -416,98 +508,6 @@ AND ahf.heading_type = 'genre_form_term';
 -- SELECT * FROM authority.extract_headings(174);
 -- SELECT * FROM authority.extract_headings(151);
 
-CREATE OR REPLACE FUNCTION authority.simple_heading_set( marcxml TEXT ) RETURNS SETOF authority.simple_heading AS $func$
-DECLARE
-    res             authority.simple_heading%ROWTYPE;
-    acsaf           authority.control_set_authority_field%ROWTYPE;
-    heading         authority.heading%ROWTYPE;
-    tag_used        TEXT;
-    nfi_used        TEXT;
-    sf              TEXT;
-    cset            INT;
-    heading_text    TEXT;
-    joiner_text     TEXT;
-    sort_text       TEXT;
-    tmp_text        TEXT;
-    tmp_xml         TEXT;
-    first_sf        BOOL;
-    auth_id         INT DEFAULT COALESCE(NULLIF(oils_xpath_string('//*[@tag="901"]/*[local-name()="subfield" and @code="c"]', marcxml), ''), '0')::INT; 
-BEGIN
-
-    SELECT control_set INTO cset FROM authority.record_entry WHERE id = auth_id;
-
-    IF cset IS NULL THEN
-        SELECT  control_set INTO cset
-          FROM  authority.control_set_authority_field
-          WHERE tag IN ( SELECT  UNNEST(XPATH('//*[starts-with(@tag,"1")]/@tag',marcxml::XML)::TEXT[]))
-          LIMIT 1;
-    END IF;
-
-    res.record := auth_id;
-    res.thesaurus := authority.extract_thesaurus(marcxml);
-
-    FOR acsaf IN SELECT * FROM authority.control_set_authority_field WHERE control_set = cset LOOP
-
-        res.atag := acsaf.id;
-        tag_used := acsaf.tag;
-        nfi_used := acsaf.nfi;
-        joiner_text := COALESCE(acsaf.joiner, ' ');
-
-        IF acsaf.heading_field IS NOT NULL THEN
-            FOR heading IN SELECT * FROM authority.extract_headings(marcxml) LOOP
-                IF heading.field = acsaf.heading_field THEN
-                    res.value := heading.heading;
-                    res.sort_value := heading.normalized_heading;
-                    res.index_vector = to_tsvector('keyword'::regconfig, res.sort_value);
-                    RETURN NEXT res;
-                END IF;
-            END LOOP;
-        ELSE
-            FOR tmp_xml IN SELECT UNNEST(XPATH('//*[@tag="'||tag_used||'"]', marcxml::XML)::TEXT[]) LOOP
-
-                heading_text := COALESCE(
-                    oils_xpath_string('./*[contains("'||acsaf.display_sf_list||'",@code)]', tmp_xml, joiner_text),
-                    ''
-                );
-
-                IF nfi_used IS NOT NULL THEN
-
-                    sort_text := SUBSTRING(
-                        heading_text FROM
-                        COALESCE(
-                            NULLIF(
-                                REGEXP_REPLACE(
-                                    oils_xpath_string('./@ind'||nfi_used, tmp_xml::TEXT),
-                                    $$\D+$$,
-                                    '',
-                                    'g'
-                                ),
-                                ''
-                            )::INT,
-                            0
-                        ) + 1
-                    );
-
-                ELSE
-                    sort_text := heading_text;
-                END IF;
-
-                IF heading_text IS NOT NULL AND heading_text <> '' THEN
-                    res.value := heading_text;
-                    res.sort_value := public.naco_normalize(sort_text);
-                    res.index_vector = to_tsvector('keyword'::regconfig, res.sort_value);
-                    RETURN NEXT res;
-                END IF;
-
-            END LOOP;
-        END IF;
-
-    END LOOP;
-
-    RETURN;
-END;
-$func$ LANGUAGE PLPGSQL STABLE STRICT;
-
 
 -- select value from metabib.browse_entry where value ~ '^Shakespeare, William' and value ~ '1564-1616$';
 -- UPDATE config.internal_flag SET enabled = TRUE where name = 'ingest.reingest.force_on_same_marc';