Extract multiple like values from lineitem
authorBill Erickson <berick@esilibrary.com>
Tue, 22 Jan 2013 19:35:22 +0000 (14:35 -0500)
committerLebbeous Fogle-Weekley <lebbeous@esilibrary.com>
Wed, 13 Mar 2013 19:12:13 +0000 (15:12 -0400)
Make it possible to extract multiple values with the same MARC tag
during acquisitions lineitem ingest.  Prior to this, only one (e.g.)
ISBN for each record would be extracted as a lineitem attribute.

Signed-off-by: Bill Erickson <berick@esilibrary.com>
Signed-off-by: Lebbeous Fogle-Weekley <lebbeous@esilibrary.com>
Open-ILS/src/sql/Pg/002.functions.config.sql
Open-ILS/src/sql/Pg/200.schema.acq.sql
Open-ILS/src/sql/Pg/upgrade/YYYY.schema.acq-multi-attrs.sql [new file with mode: 0644]

index 87b6e28..47f97c3 100644 (file)
@@ -346,6 +346,29 @@ BEGIN
 END;
 $$ LANGUAGE PLPGSQL IMMUTABLE;
 
+CREATE OR REPLACE FUNCTION extract_marc_field_set
+        (TEXT, BIGINT, TEXT, TEXT) RETURNS SETOF TEXT AS $$
+DECLARE
+    query TEXT;
+    output TEXT;
+BEGIN
+    FOR output IN
+        SELECT x.t FROM (
+            SELECT id,t
+                FROM  oils_xpath_table(
+                    'id', 'marc', $1, $3, 'id = ' || $2)
+                AS t(id int, t text))x
+        LOOP
+        IF $4 IS NOT NULL THEN
+            SELECT INTO output (SELECT regexp_replace(output, $4, '', 'g'));
+        END IF;
+        RETURN NEXT output;
+    END LOOP;
+    RETURN;
+END;
+$$ LANGUAGE PLPGSQL IMMUTABLE;
+
+
 CREATE OR REPLACE FUNCTION extract_marc_field ( TEXT, BIGINT, TEXT ) RETURNS TEXT AS $$
     SELECT extract_marc_field($1,$2,$3,'');
 $$ LANGUAGE SQL IMMUTABLE;
index 89c573a..21fc092 100644 (file)
@@ -1019,6 +1019,11 @@ CREATE OR REPLACE FUNCTION public.extract_acq_marc_field ( BIGINT, TEXT, TEXT) R
        SELECT extract_marc_field('acq.lineitem', $1, $2, $3);
 $$ LANGUAGE SQL;
 
+CREATE OR REPLACE FUNCTION public.extract_acq_marc_field_set ( BIGINT, TEXT, TEXT) RETURNS SETOF TEXT AS $$
+       SELECT extract_marc_field_set('acq.lineitem', $1, $2, $3);
+$$ LANGUAGE SQL;
+
+
 /*
 CREATE OR REPLACE FUNCTION public.extract_bib_marc_field ( BIGINT, TEXT ) RETURNS TEXT AS $$
        SELECT public.extract_marc_field('biblio.record_entry', $1, $2);
@@ -1080,19 +1085,26 @@ BEGIN
                 END IF;
             ELSE
                 pos := 1;
-
                 LOOP
-                           SELECT extract_acq_marc_field(id, xpath_string || '[' || pos || ']', adef.remove) INTO value FROM acq.lineitem WHERE id = NEW.id;
-
-                           IF (value IS NOT NULL AND value <> '') THEN
-                                   INSERT INTO acq.lineitem_attr (lineitem, definition, attr_type, attr_name, attr_value)
-                                           VALUES (NEW.id, adef.id, atype, adef.code, value);
-                    ELSE
+                    -- each application of the regex may produce multiple values
+                    FOR value IN
+                        SELECT * FROM extract_acq_marc_field_set(
+                            NEW.id, xpath_string || '[' || pos || ']', adef.remove)
+                        LOOP
+
+                        IF (value IS NOT NULL AND value <> '') THEN
+                            INSERT INTO acq.lineitem_attr
+                                (lineitem, definition, attr_type, attr_name, attr_value)
+                                VALUES (NEW.id, adef.id, atype, adef.code, value);
+                        ELSE
+                            EXIT;
+                        END IF;
+                    END LOOP;
+                    IF NOT FOUND THEN
                         EXIT;
-                               END IF;
-
+                    END IF;
                     pos := pos + 1;
-                END LOOP;
+               END LOOP;
             END IF;
 
                END IF;
diff --git a/Open-ILS/src/sql/Pg/upgrade/YYYY.schema.acq-multi-attrs.sql b/Open-ILS/src/sql/Pg/upgrade/YYYY.schema.acq-multi-attrs.sql
new file mode 100644 (file)
index 0000000..cbe837f
--- /dev/null
@@ -0,0 +1,104 @@
+
+BEGIN;
+
+CREATE OR REPLACE FUNCTION extract_marc_field_set
+        (TEXT, BIGINT, TEXT, TEXT) RETURNS SETOF TEXT AS $$
+DECLARE
+    query TEXT;
+    output TEXT;
+BEGIN
+    FOR output IN
+        SELECT x.t FROM (
+            SELECT id,t
+                FROM  oils_xpath_table(
+                    'id', 'marc', $1, $3, 'id = ' || $2)
+                AS t(id int, t text))x
+        LOOP
+        IF $4 IS NOT NULL THEN
+            SELECT INTO output (SELECT regexp_replace(output, $4, '', 'g'));
+        END IF;
+        RETURN NEXT output;
+    END LOOP;
+    RETURN;
+END;
+$$ LANGUAGE PLPGSQL IMMUTABLE;
+
+
+CREATE OR REPLACE FUNCTION 
+        public.extract_acq_marc_field_set ( BIGINT, TEXT, TEXT) 
+        RETURNS SETOF TEXT AS $$
+       SELECT extract_marc_field_set('acq.lineitem', $1, $2, $3);
+$$ LANGUAGE SQL;
+
+
+CREATE OR REPLACE FUNCTION public.ingest_acq_marc ( ) RETURNS TRIGGER AS $function$
+DECLARE
+       value           TEXT;
+       atype           TEXT;
+       prov            INT;
+       pos             INT;
+       adef            RECORD;
+       xpath_string    TEXT;
+BEGIN
+       FOR adef IN SELECT *,tableoid FROM acq.lineitem_attr_definition LOOP
+
+               SELECT relname::TEXT INTO atype FROM pg_class WHERE oid = adef.tableoid;
+
+               IF (atype NOT IN ('lineitem_usr_attr_definition','lineitem_local_attr_definition')) THEN
+                       IF (atype = 'lineitem_provider_attr_definition') THEN
+                               SELECT provider INTO prov FROM acq.lineitem_provider_attr_definition WHERE id = adef.id;
+                               CONTINUE WHEN NEW.provider IS NULL OR prov <> NEW.provider;
+                       END IF;
+                       
+                       IF (atype = 'lineitem_provider_attr_definition') THEN
+                               SELECT xpath INTO xpath_string FROM acq.lineitem_provider_attr_definition WHERE id = adef.id;
+                       ELSIF (atype = 'lineitem_marc_attr_definition') THEN
+                               SELECT xpath INTO xpath_string FROM acq.lineitem_marc_attr_definition WHERE id = adef.id;
+                       ELSIF (atype = 'lineitem_generated_attr_definition') THEN
+                               SELECT xpath INTO xpath_string FROM acq.lineitem_generated_attr_definition WHERE id = adef.id;
+                       END IF;
+
+            xpath_string := REGEXP_REPLACE(xpath_string,$re$//?text\(\)$$re$,'');
+
+            IF (adef.code = 'title' OR adef.code = 'author') THEN
+                -- title and author should not be split
+                -- FIXME: once oils_xpath can grok XPATH 2.0 functions, we can use
+                -- string-join in the xpath and remove this special case
+                       SELECT extract_acq_marc_field(id, xpath_string, adef.remove) INTO value FROM acq.lineitem WHERE id = NEW.id;
+                       IF (value IS NOT NULL AND value <> '') THEN
+                                   INSERT INTO acq.lineitem_attr (lineitem, definition, attr_type, attr_name, attr_value)
+                                   VALUES (NEW.id, adef.id, atype, adef.code, value);
+                END IF;
+            ELSE
+                pos := 1;
+                LOOP
+                    -- each application of the regex may produce multiple values
+                    FOR value IN
+                        SELECT * FROM extract_acq_marc_field_set(
+                            NEW.id, xpath_string || '[' || pos || ']', adef.remove)
+                        LOOP
+
+                        IF (value IS NOT NULL AND value <> '') THEN
+                            INSERT INTO acq.lineitem_attr
+                                (lineitem, definition, attr_type, attr_name, attr_value)
+                                VALUES (NEW.id, adef.id, atype, adef.code, value);
+                        ELSE
+                            EXIT;
+                        END IF;
+                    END LOOP;
+                    IF NOT FOUND THEN
+                        EXIT;
+                    END IF;
+                    pos := pos + 1;
+               END LOOP;
+            END IF;
+
+               END IF;
+
+       END LOOP;
+
+       RETURN NULL;
+END;
+$function$ LANGUAGE PLPGSQL;
+
+COMMIT;