LP#1744996 Track bib record merges
authorBill Erickson <berickxx@gmail.com>
Thu, 25 Jan 2018 16:05:23 +0000 (11:05 -0500)
committerDan Wells <dbw2@calvin.edu>
Tue, 27 Feb 2018 16:40:42 +0000 (11:40 -0500)
Adds "merge_date" and "merged_to" fields for bib records.  Merge date
is applied to all records involved in a record merge at merge time.
merged_to is applied to all subordinate (merged) records and points to
the target record.

Signed-off-by: Bill Erickson <berickxx@gmail.com>
Signed-off-by: Remington Steed <rjs7@calvin.edu>
Signed-off-by: Dan Wells <dbw2@calvin.edu>
Open-ILS/examples/fm_IDL.xml
Open-ILS/src/sql/Pg/010.schema.biblio.sql
Open-ILS/src/sql/Pg/999.functions.global.sql
Open-ILS/src/sql/Pg/upgrade/XXXX.schema.record-merge-tracking.sql [new file with mode: 0644]

index 2635b8c..19bce3d 100644 (file)
@@ -3136,6 +3136,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
                        <field reporter:label="Flat Display Entries" name="flat_display_entries" oils_persist:virtual="true" reporter:datatype="link"/>
                        <field reporter:label="Compressed Display Entries" name="compressed_display_entries" oils_persist:virtual="true" reporter:datatype="link"/>
                        <field reporter:label="Wide Display Entries" name="wide_display_entry" oils_persist:virtual="true" reporter:datatype="link"/>
+                       <field reporter:label="Merge Date" name="merge_date" reporter:datatype="timestamp"/>
+                       <field reporter:label="Merged To" name="merged_to" reporter:datatype="link"/>
                </fields>
                <links>
                        <link field="owner" reltype="has_a" key="id" map="" class="aou"/>
@@ -3163,6 +3165,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
                        <link field="flat_display_entries" reltype="has_many" key="source" map="" class="mfde"/>
                        <link field="compressed_display_entries" reltype="has_many" key="source" map="" class="mcde"/>
                        <link field="wide_display_entry" reltype="might_have" key="source" map="" class="mwde"/>
+                       <link field="merged_to" reltype="has_a" key="id" map="" class="bre"/>
                </links>
         <permacrud xmlns="http://open-ils.org/spec/opensrf/IDL/permacrud/v1">
             <actions>
index 13620a4..857307e 100644 (file)
@@ -54,7 +54,9 @@ CREATE TABLE biblio.record_entry (
        last_xact_id    TEXT            NOT NULL,
     vis_attr_vector INT[],
     owner       INT,
-    share_depth INT
+    share_depth INT,
+    merge_date  TIMESTAMP WITH TIME ZONE,
+    merged_to   BIGINT REFERENCES biblio.record_entry(id)
 );
 CREATE INDEX biblio_record_entry_creator_idx ON biblio.record_entry ( creator );
 CREATE INDEX biblio_record_entry_create_date_idx ON biblio.record_entry ( create_date );
index dc72926..dd36aa9 100644 (file)
@@ -1212,6 +1212,14 @@ BEGIN
                FROM asset.copy WHERE id = target_copy
        );
 
+    -- Apply merge tracking
+    UPDATE biblio.record_entry 
+        SET merge_date = NOW() WHERE id = target_record;
+
+    UPDATE biblio.record_entry
+        SET merge_date = NOW(), merged_to = target_record
+        WHERE id = source_record;
+
     -- Finally, "delete" the source record
     DELETE FROM biblio.record_entry WHERE id = source_record;
 
diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.record-merge-tracking.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.record-merge-tracking.sql
new file mode 100644 (file)
index 0000000..d781df3
--- /dev/null
@@ -0,0 +1,277 @@
+
+BEGIN;
+
+-- SELECT evergreen.upgrade_deps_block_check('XXX', :eg_version);
+
+ALTER TABLE biblio.record_entry
+    ADD COLUMN merge_date TIMESTAMP WITH TIME ZONE,
+    ADD COLUMN merged_to BIGINT REFERENCES biblio.record_entry(id);
+
+CREATE OR REPLACE FUNCTION asset.merge_record_assets( target_record BIGINT, source_record BIGINT ) RETURNS INT AS $func$
+DECLARE
+    moved_objects INT := 0;
+    source_cn     asset.call_number%ROWTYPE;
+    target_cn     asset.call_number%ROWTYPE;
+    metarec       metabib.metarecord%ROWTYPE;
+    hold          action.hold_request%ROWTYPE;
+    ser_rec       serial.record_entry%ROWTYPE;
+    ser_sub       serial.subscription%ROWTYPE;
+    acq_lineitem  acq.lineitem%ROWTYPE;
+    acq_request   acq.user_request%ROWTYPE;
+    booking       booking.resource_type%ROWTYPE;
+    source_part   biblio.monograph_part%ROWTYPE;
+    target_part   biblio.monograph_part%ROWTYPE;
+    multi_home    biblio.peer_bib_copy_map%ROWTYPE;
+    uri_count     INT := 0;
+    counter       INT := 0;
+    uri_datafield TEXT;
+    uri_text      TEXT := '';
+BEGIN
+
+    -- move any 856 entries on records that have at least one MARC-mapped URI entry
+    SELECT  INTO uri_count COUNT(*)
+      FROM  asset.uri_call_number_map m
+            JOIN asset.call_number cn ON (m.call_number = cn.id)
+      WHERE cn.record = source_record;
+
+    IF uri_count > 0 THEN
+        
+        -- This returns more nodes than you might expect:
+        -- 7 instead of 1 for an 856 with $u $y $9
+        SELECT  COUNT(*) INTO counter
+          FROM  oils_xpath_table(
+                    'id',
+                    'marc',
+                    'biblio.record_entry',
+                    '//*[@tag="856"]',
+                    'id=' || source_record
+                ) as t(i int,c text);
+    
+        FOR i IN 1 .. counter LOOP
+            SELECT  '<datafield xmlns="http://www.loc.gov/MARC21/slim"' || 
+                       ' tag="856"' ||
+                       ' ind1="' || FIRST(ind1) || '"'  ||
+                       ' ind2="' || FIRST(ind2) || '">' ||
+                        STRING_AGG(
+                            '<subfield code="' || subfield || '">' ||
+                            regexp_replace(
+                                regexp_replace(
+                                    regexp_replace(data,'&','&amp;','g'),
+                                    '>', '&gt;', 'g'
+                                ),
+                                '<', '&lt;', 'g'
+                            ) || '</subfield>', ''
+                        ) || '</datafield>' INTO uri_datafield
+              FROM  oils_xpath_table(
+                        'id',
+                        'marc',
+                        'biblio.record_entry',
+                        '//*[@tag="856"][position()=' || i || ']/@ind1|' ||
+                        '//*[@tag="856"][position()=' || i || ']/@ind2|' ||
+                        '//*[@tag="856"][position()=' || i || ']/*/@code|' ||
+                        '//*[@tag="856"][position()=' || i || ']/*[@code]',
+                        'id=' || source_record
+                    ) as t(id int,ind1 text, ind2 text,subfield text,data text);
+
+            -- As most of the results will be NULL, protect against NULLifying
+            -- the valid content that we do generate
+            uri_text := uri_text || COALESCE(uri_datafield, '');
+        END LOOP;
+
+        IF uri_text <> '' THEN
+            UPDATE  biblio.record_entry
+              SET   marc = regexp_replace(marc,'(</[^>]*record>)', uri_text || E'\\1')
+              WHERE id = target_record;
+        END IF;
+
+    END IF;
+
+       -- Find and move metarecords to the target record
+       SELECT  INTO metarec *
+         FROM  metabib.metarecord
+         WHERE master_record = source_record;
+
+       IF FOUND THEN
+               UPDATE  metabib.metarecord
+                 SET   master_record = target_record,
+                       mods = NULL
+                 WHERE id = metarec.id;
+
+               moved_objects := moved_objects + 1;
+       END IF;
+
+       -- Find call numbers attached to the source ...
+       FOR source_cn IN SELECT * FROM asset.call_number WHERE record = source_record LOOP
+
+               SELECT  INTO target_cn *
+                 FROM  asset.call_number
+                 WHERE label = source_cn.label
+            AND prefix = source_cn.prefix
+            AND suffix = source_cn.suffix
+                       AND owning_lib = source_cn.owning_lib
+                       AND record = target_record
+                       AND NOT deleted;
+
+               -- ... and if there's a conflicting one on the target ...
+               IF FOUND THEN
+
+                       -- ... move the copies to that, and ...
+                       UPDATE  asset.copy
+                         SET   call_number = target_cn.id
+                         WHERE call_number = source_cn.id;
+
+                       -- ... move V holds to the move-target call number
+                       FOR hold IN SELECT * FROM action.hold_request WHERE target = source_cn.id AND hold_type = 'V' LOOP
+               
+                               UPDATE  action.hold_request
+                                 SET   target = target_cn.id
+                                 WHERE id = hold.id;
+               
+                               moved_objects := moved_objects + 1;
+                       END LOOP;
+        
+            UPDATE asset.call_number SET deleted = TRUE WHERE id = source_cn.id;
+
+               -- ... if not ...
+               ELSE
+                       -- ... just move the call number to the target record
+                       UPDATE  asset.call_number
+                         SET   record = target_record
+                         WHERE id = source_cn.id;
+               END IF;
+
+               moved_objects := moved_objects + 1;
+       END LOOP;
+
+       -- Find T holds targeting the source record ...
+       FOR hold IN SELECT * FROM action.hold_request WHERE target = source_record AND hold_type = 'T' LOOP
+
+               -- ... and move them to the target record
+               UPDATE  action.hold_request
+                 SET   target = target_record
+                 WHERE id = hold.id;
+
+               moved_objects := moved_objects + 1;
+       END LOOP;
+
+       -- Find serial records targeting the source record ...
+       FOR ser_rec IN SELECT * FROM serial.record_entry WHERE record = source_record LOOP
+               -- ... and move them to the target record
+               UPDATE  serial.record_entry
+                 SET   record = target_record
+                 WHERE id = ser_rec.id;
+
+               moved_objects := moved_objects + 1;
+       END LOOP;
+
+       -- Find serial subscriptions targeting the source record ...
+       FOR ser_sub IN SELECT * FROM serial.subscription WHERE record_entry = source_record LOOP
+               -- ... and move them to the target record
+               UPDATE  serial.subscription
+                 SET   record_entry = target_record
+                 WHERE id = ser_sub.id;
+
+               moved_objects := moved_objects + 1;
+       END LOOP;
+
+       -- Find booking resource types targeting the source record ...
+       FOR booking IN SELECT * FROM booking.resource_type WHERE record = source_record LOOP
+               -- ... and move them to the target record
+               UPDATE  booking.resource_type
+                 SET   record = target_record
+                 WHERE id = booking.id;
+
+               moved_objects := moved_objects + 1;
+       END LOOP;
+
+       -- Find acq lineitems targeting the source record ...
+       FOR acq_lineitem IN SELECT * FROM acq.lineitem WHERE eg_bib_id = source_record LOOP
+               -- ... and move them to the target record
+               UPDATE  acq.lineitem
+                 SET   eg_bib_id = target_record
+                 WHERE id = acq_lineitem.id;
+
+               moved_objects := moved_objects + 1;
+       END LOOP;
+
+       -- Find acq user purchase requests targeting the source record ...
+       FOR acq_request IN SELECT * FROM acq.user_request WHERE eg_bib = source_record LOOP
+               -- ... and move them to the target record
+               UPDATE  acq.user_request
+                 SET   eg_bib = target_record
+                 WHERE id = acq_request.id;
+
+               moved_objects := moved_objects + 1;
+       END LOOP;
+
+       -- Find parts attached to the source ...
+       FOR source_part IN SELECT * FROM biblio.monograph_part WHERE record = source_record LOOP
+
+               SELECT  INTO target_part *
+                 FROM  biblio.monograph_part
+                 WHERE label = source_part.label
+                       AND record = target_record;
+
+               -- ... and if there's a conflicting one on the target ...
+               IF FOUND THEN
+
+                       -- ... move the copy-part maps to that, and ...
+                       UPDATE  asset.copy_part_map
+                         SET   part = target_part.id
+                         WHERE part = source_part.id;
+
+                       -- ... move P holds to the move-target part
+                       FOR hold IN SELECT * FROM action.hold_request WHERE target = source_part.id AND hold_type = 'P' LOOP
+               
+                               UPDATE  action.hold_request
+                                 SET   target = target_part.id
+                                 WHERE id = hold.id;
+               
+                               moved_objects := moved_objects + 1;
+                       END LOOP;
+
+               -- ... if not ...
+               ELSE
+                       -- ... just move the part to the target record
+                       UPDATE  biblio.monograph_part
+                         SET   record = target_record
+                         WHERE id = source_part.id;
+               END IF;
+
+               moved_objects := moved_objects + 1;
+       END LOOP;
+
+       -- Find multi_home items attached to the source ...
+       FOR multi_home IN SELECT * FROM biblio.peer_bib_copy_map WHERE peer_record = source_record LOOP
+               -- ... and move them to the target record
+               UPDATE  biblio.peer_bib_copy_map
+                 SET   peer_record = target_record
+                 WHERE id = multi_home.id;
+
+               moved_objects := moved_objects + 1;
+       END LOOP;
+
+       -- And delete mappings where the item's home bib was merged with the peer bib
+       DELETE FROM biblio.peer_bib_copy_map WHERE peer_record = (
+               SELECT (SELECT record FROM asset.call_number WHERE id = call_number)
+               FROM asset.copy WHERE id = target_copy
+       );
+
+    -- Apply merge tracking
+    UPDATE biblio.record_entry 
+        SET merge_date = NOW() WHERE id = target_record;
+
+    UPDATE biblio.record_entry
+        SET merge_date = NOW(), merged_to = target_record
+        WHERE id = source_record;
+
+    -- Finally, "delete" the source record
+    DELETE FROM biblio.record_entry WHERE id = source_record;
+
+       -- That's all, folks!
+       RETURN moved_objects;
+END;
+$func$ LANGUAGE plpgsql;
+
+
+COMMIT;