LP#1878984: Fix Preserve specification user/miker/lp-1878984-vandelay-preserve
authorMike Rylander <mrylander@gmail.com>
Mon, 26 Oct 2020 20:28:24 +0000 (16:28 -0400)
committerMike Rylander <mrylander@gmail.com>
Mon, 26 Oct 2020 20:30:28 +0000 (16:30 -0400)
When processing a Preserve specification, vandelay.replace_field fails
to account for the situation where the target record does not have any
instances of the field to be preserved.  This commit copies fields from
the source to the target when there are no like target fields and the
source fields are not restricted by a match requirement.

It additionally adds multi-subfield match requirement capabilities to
the add_field and replace_field functions, as implemented in the branch
submitted for LP#1901597 that does the same for strip_field.

Signed-off-by: Mike Rylander <mrylander@gmail.com>
Open-ILS/src/sql/Pg/012.schema.vandelay.sql
Open-ILS/src/sql/Pg/upgrade/XXXX.function.vandelay_replace_field.sql [new file with mode: 0644]

index 64fd6cd..7f6435e 100644 (file)
@@ -1150,15 +1150,18 @@ CREATE OR REPLACE FUNCTION vandelay.add_field ( target_xml TEXT, source_xml TEXT
             $field =~ s/\s+//;
             my $sf = $2;
             $sf =~ s/\s+//;
-            my $match = $3;
-            $match =~ s/^\s*//; $match =~ s/\s*$//;
+            my $matches = $3;
+            $matches =~ s/^\s*//; $matches =~ s/\s*$//;
             $fields{$field} = { sf => [ split('', $sf) ] };
-            if ($match) {
-                my ($msf,$mre) = split('~', $match);
-                if (length($msf) > 0 and length($mre) > 0) {
-                    $msf =~ s/^\s*//; $msf =~ s/\s*$//;
-                    $mre =~ s/^\s*//; $mre =~ s/\s*$//;
-                    $fields{$field}{match} = { sf => $msf, re => qr/$mre/ };
+            if ($matches) {
+                for my $match (split('&&', $matches)) {
+                    $match =~ s/^\s*//; $match =~ s/\s*$//;
+                    my ($msf,$mre) = split('~', $match);
+                    if (length($msf) > 0 and length($mre) > 0) {
+                        $msf =~ s/^\s*//; $msf =~ s/\s*$//;
+                        $mre =~ s/^\s*//; $mre =~ s/\s*$//;
+                        $fields{$field}{match}{$msf} = qr/$mre/;
+                    }
                 }
             }
         }
@@ -1175,7 +1178,8 @@ CREATE OR REPLACE FUNCTION vandelay.add_field ( target_xml TEXT, source_xml TEXT
                 } else {
                     for my $to_field (@tos) {
                         if (exists($fields{$f}{match})) {
-                            next unless (grep { $_ =~ $fields{$f}{match}{re} } $to_field->subfield($fields{$f}{match}{sf}));
+                            my @match_list = grep { $to_field->subfield($_) =~ $fields{$f}{match}{$_} } keys %{$fields{$f}{match}};
+                            next unless (scalar(@match_list) == scalar(keys %{$fields{$f}{match}}));
                         }
                         for my $old_sf ($from_field->subfields) {
                             $to_field->add_subfields( @$old_sf ) if grep(/$$old_sf[0]/,@{$fields{$f}{sf}});
@@ -1296,16 +1300,19 @@ CREATE OR REPLACE FUNCTION vandelay.replace_field
             $field =~ s/\s+//;
             my $sf = $2;
             $sf =~ s/\s+//;
-            my $match = $3;
-            $match =~ s/^\s*//; $match =~ s/\s*$//;
+            my $matches = $3;
+            $matches =~ s/^\s*//; $matches =~ s/\s*$//;
             $fields{$field} = { sf => [ split('', $sf) ] };
-            if ($match) {
-                my ($msf,$mre) = split('~', $match);
-                if (length($msf) > 0 and length($mre) > 0) {
-                    $msf =~ s/^\s*//; $msf =~ s/\s*$//;
-                    $mre =~ s/^\s*//; $mre =~ s/\s*$//;
-                    $fields{$field}{match} = { sf => $msf, re => qr/$mre/ };
-                }
+            if ($matches) {
+                for my $match (split('&&', $matches)) {
+                    $match =~ s/^\s*//; $match =~ s/\s*$//;
+                    my ($msf,$mre) = split('~', $match);
+                    if (length($msf) > 0 and length($mre) > 0) {
+                        $msf =~ s/^\s*//; $msf =~ s/\s*$//;
+                        $mre =~ s/^\s*//; $mre =~ s/\s*$//;
+                        $fields{$field}{match}{$msf} = qr/$mre/;
+                    }
+                 }
             }
         }
     }
@@ -1394,13 +1401,22 @@ CREATE OR REPLACE FUNCTION vandelay.replace_field
     # MARC tag loop
     for my $f (keys %fields) {
         my $tag_idx = -1;
-        for my $target_field ($target_r->field($f)) {
+        my @target_fields = $target_r->field($f);
+
+        if (!@target_fields and !defined($fields{$f}{match})) {
+            # we will just add the source fields
+            # unless they require a target match.
+            my @add_these = map { $_->clone } $source_r->field($f);
+            $target_r->insert_fields_ordered( @add_these );
+        }
+
+        for my $target_field (@target_fields) { # This will not run when the above "if" does.
 
             # field spec contains a regex for this field.  Confirm field on 
             # target record matches the specified regex before replacing.
             if (exists($fields{$f}{match})) {
-                next unless (grep { $_ =~ $fields{$f}{match}{re} } 
-                    $target_field->subfield($fields{$f}{match}{sf}));
+                my @match_list = grep { $target_field->subfield($_) =~ $fields{$f}{match}{$_} } keys %{$fields{$f}{match}};
+                next unless (scalar(@match_list) == scalar(keys %{$fields{$f}{match}}));
             }
 
             my @new_subfields;
diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.function.vandelay_replace_field.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.function.vandelay_replace_field.sql
new file mode 100644 (file)
index 0000000..39f3c53
--- /dev/null
@@ -0,0 +1,295 @@
+BEGIN;
+
+SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version);
+
+CREATE OR REPLACE FUNCTION vandelay.add_field ( target_xml TEXT, source_xml TEXT, field TEXT, force_add INT ) RETURNS TEXT AS $_$
+
+    use MARC::Record;
+    use MARC::File::XML (BinaryEncoding => 'UTF-8');
+    use MARC::Charset;
+    use strict;
+
+    MARC::Charset->assume_unicode(1);
+
+    my $target_xml = shift;
+    my $source_xml = shift;
+    my $field_spec = shift;
+    my $force_add = shift || 0;
+
+    my $target_r = MARC::Record->new_from_xml( $target_xml );
+    my $source_r = MARC::Record->new_from_xml( $source_xml );
+
+    return $target_xml unless ($target_r && $source_r);
+
+    my @field_list = split(',', $field_spec);
+
+    my %fields;
+    for my $f (@field_list) {
+        $f =~ s/^\s*//; $f =~ s/\s*$//;
+        if ($f =~ /^(.{3})(\w*)(?:\[([^]]*)\])?$/) {
+            my $field = $1;
+            $field =~ s/\s+//;
+            my $sf = $2;
+            $sf =~ s/\s+//;
+            my $matches = $3;
+            $matches =~ s/^\s*//; $matches =~ s/\s*$//;
+            $fields{$field} = { sf => [ split('', $sf) ] };
+            if ($matches) {
+                for my $match (split('&&', $matches)) {
+                    $match =~ s/^\s*//; $match =~ s/\s*$//;
+                    my ($msf,$mre) = split('~', $match);
+                    if (length($msf) > 0 and length($mre) > 0) {
+                        $msf =~ s/^\s*//; $msf =~ s/\s*$//;
+                        $mre =~ s/^\s*//; $mre =~ s/\s*$//;
+                        $fields{$field}{match}{$msf} = qr/$mre/;
+                    }
+                }
+            }
+        }
+    }
+
+    for my $f ( keys %fields) {
+        if ( @{$fields{$f}{sf}} ) {
+            for my $from_field ($source_r->field( $f )) {
+                my @tos = $target_r->field( $f );
+                if (!@tos) {
+                    next if (exists($fields{$f}{match}) and !$force_add);
+                    my @new_fields = map { $_->clone } $source_r->field( $f );
+                    $target_r->insert_fields_ordered( @new_fields );
+                } else {
+                    for my $to_field (@tos) {
+                        if (exists($fields{$f}{match})) {
+                            my @match_list = grep { $to_field->subfield($_) =~ $fields{$f}{match}{$_} } keys %{$fields{$f}{match}};
+                            next unless (scalar(@match_list) == scalar(keys %{$fields{$f}{match}}));
+                        }
+                        for my $old_sf ($from_field->subfields) {
+                            $to_field->add_subfields( @$old_sf ) if grep(/$$old_sf[0]/,@{$fields{$f}{sf}});
+                        }
+                    }
+                }
+            }
+        } else {
+            my @new_fields = map { $_->clone } $source_r->field( $f );
+            $target_r->insert_fields_ordered( @new_fields );
+        }
+    }
+
+    $target_xml = $target_r->as_xml_record;
+    $target_xml =~ s/^<\?.+?\?>$//mo;
+    $target_xml =~ s/\n//sgo;
+    $target_xml =~ s/>\s+</></sgo;
+
+    return $target_xml;
+
+$_$ LANGUAGE PLPERLU;
+
+CREATE OR REPLACE FUNCTION vandelay.replace_field 
+    (target_xml TEXT, source_xml TEXT, field TEXT) RETURNS TEXT AS $_$
+
+    use strict;
+    use MARC::Record;
+    use MARC::Field;
+    use MARC::File::XML (BinaryEncoding => 'UTF-8');
+    use MARC::Charset;
+
+    MARC::Charset->assume_unicode(1);
+
+    my $target_xml = shift;
+    my $source_xml = shift;
+    my $field_spec = shift;
+
+    my $target_r = MARC::Record->new_from_xml($target_xml);
+    my $source_r = MARC::Record->new_from_xml($source_xml);
+
+    return $target_xml unless $target_r && $source_r;
+
+    # Extract the field_spec components into MARC tags, subfields, 
+    # and regex matches.  Copied wholesale from vandelay.strip_field()
+
+    my @field_list = split(',', $field_spec);
+    my %fields;
+    for my $f (@field_list) {
+        $f =~ s/^\s*//; $f =~ s/\s*$//;
+        if ($f =~ /^(.{3})(\w*)(?:\[([^]]*)\])?$/) {
+            my $field = $1;
+            $field =~ s/\s+//;
+            my $sf = $2;
+            $sf =~ s/\s+//;
+            my $matches = $3;
+            $matches =~ s/^\s*//; $matches =~ s/\s*$//;
+            $fields{$field} = { sf => [ split('', $sf) ] };
+            if ($matches) {
+                for my $match (split('&&', $matches)) {
+                    $match =~ s/^\s*//; $match =~ s/\s*$//;
+                    my ($msf,$mre) = split('~', $match);
+                    if (length($msf) > 0 and length($mre) > 0) {
+                        $msf =~ s/^\s*//; $msf =~ s/\s*$//;
+                        $mre =~ s/^\s*//; $mre =~ s/\s*$//;
+                        $fields{$field}{match}{$msf} = qr/$mre/;
+                    }
+                 }
+            }
+        }
+    }
+
+    # Returns a flat list of subfield (code, value, code, value, ...)
+    # suitable for adding to a MARC::Field.
+    sub generate_replacement_subfields {
+        my ($source_field, $target_field, @controlled_subfields) = @_;
+
+        # Performing a wholesale field replacment.  
+        # Use the entire source field as-is.
+        return map {$_->[0], $_->[1]} $source_field->subfields
+            unless @controlled_subfields;
+
+        my @new_subfields;
+
+        # Iterate over all target field subfields:
+        # 1. Keep uncontrolled subfields as is.
+        # 2. Replace values for controlled subfields when a
+        #    replacement value exists on the source record.
+        # 3. Delete values for controlled subfields when no 
+        #    replacement value exists on the source record.
+
+        for my $target_sf ($target_field->subfields) {
+            my $subfield = $target_sf->[0];
+            my $target_val = $target_sf->[1];
+
+            if (grep {$_ eq $subfield} @controlled_subfields) {
+                if (my $source_val = $source_field->subfield($subfield)) {
+                    # We have a replacement value
+                    push(@new_subfields, $subfield, $source_val);
+                } else {
+                    # no replacement value for controlled subfield, drop it.
+                }
+            } else {
+                # Field is not controlled.  Copy it over as-is.
+                push(@new_subfields, $subfield, $target_val);
+            }
+        }
+
+        # Iterate over all subfields in the source field and back-fill
+        # any values that exist only in the source field.  Insert these
+        # subfields in the same relative position they exist in the
+        # source field.
+                
+        my @seen_subfields;
+        for my $source_sf ($source_field->subfields) {
+            my $subfield = $source_sf->[0];
+            my $source_val = $source_sf->[1];
+            push(@seen_subfields, $subfield);
+
+            # target field already contains this subfield, 
+            # so it would have been addressed above.
+            next if $target_field->subfield($subfield);
+
+            # Ignore uncontrolled subfields.
+            next unless grep {$_ eq $subfield} @controlled_subfields;
+
+            # Adding a new subfield.  Find its relative position and add
+            # it to the list under construction.  Work backwards from
+            # the list of already seen subfields to find the best slot.
+
+            my $done = 0;
+            for my $seen_sf (reverse(@seen_subfields)) {
+                my $idx = @new_subfields;
+                for my $new_sf (reverse(@new_subfields)) {
+                    $idx--;
+                    next if $idx % 2 == 1; # sf codes are in the even slots
+
+                    if ($new_subfields[$idx] eq $seen_sf) {
+                        splice(@new_subfields, $idx + 2, 0, $subfield, $source_val);
+                        $done = 1;
+                        last;
+                    }
+                }
+                last if $done;
+            }
+
+            # if no slot was found, add to the end of the list.
+            push(@new_subfields, $subfield, $source_val) unless $done;
+        }
+
+        return @new_subfields;
+    }
+
+    # MARC tag loop
+    for my $f (keys %fields) {
+        my $tag_idx = -1;
+        my @target_fields = $target_r->field($f);
+
+        if (!@target_fields and !defined($fields{$f}{match})) {
+            # we will just add the source fields
+            # unless they require a target match.
+            my @add_these = map { $_->clone } $source_r->field($f);
+            $target_r->insert_fields_ordered( @add_these );
+        }
+
+        for my $target_field (@target_fields) { # This will not run when the above "if" does.
+
+            # field spec contains a regex for this field.  Confirm field on 
+            # target record matches the specified regex before replacing.
+            if (exists($fields{$f}{match})) {
+                my @match_list = grep { $target_field->subfield($_) =~ $fields{$f}{match}{$_} } keys %{$fields{$f}{match}};
+                next unless (scalar(@match_list) == scalar(keys %{$fields{$f}{match}}));
+            }
+
+            my @new_subfields;
+            my @controlled_subfields = @{$fields{$f}{sf}};
+
+            # If the target record has multiple matching bib fields,
+            # replace them from matching fields on the source record
+            # in a predictable order to avoid replacing with them with
+            # same source field repeatedly.
+            my @source_fields = $source_r->field($f);
+            my $source_field = $source_fields[++$tag_idx];
+
+            if (!$source_field && @controlled_subfields) {
+                # When there are more target fields than source fields
+                # and we are replacing values for subfields and not
+                # performing wholesale field replacment, use the last
+                # available source field as the input for all remaining
+                # target fields.
+                $source_field = $source_fields[$#source_fields];
+            }
+
+            if (!$source_field) {
+                # No source field exists.  Delete all affected target
+                # data.  This is a little bit counterintuitive, but is
+                # backwards compatible with the previous version of this
+                # function which first deleted all affected data, then
+                # replaced values where possible.
+                if (@controlled_subfields) {
+                    $target_field->delete_subfield($_) for @controlled_subfields;
+                } else {
+                    $target_r->delete_field($target_field);
+                }
+                next;
+            }
+
+            my @new_subfields = generate_replacement_subfields(
+                $source_field, $target_field, @controlled_subfields);
+
+            # Build the replacement field from scratch.  
+            my $replacement_field = MARC::Field->new(
+                $target_field->tag,
+                $target_field->indicator(1),
+                $target_field->indicator(2),
+                @new_subfields
+            );
+
+            $target_field->replace_with($replacement_field);
+        }
+    }
+
+    $target_xml = $target_r->as_xml_record;
+    $target_xml =~ s/^<\?.+?\?>$//mo;
+    $target_xml =~ s/\n//sgo;
+    $target_xml =~ s/>\s+</></sgo;
+
+    return $target_xml;
+
+$_$ LANGUAGE PLPERLU;
+
+COMMIT;
+