LP1754455: Add strip-tag option and simplify strip code. collab/dyrcona/LP1754455_marc_export_strip
authorJason Stephenson <jason@sigio.com>
Thu, 29 Mar 2018 20:37:03 +0000 (16:37 -0400)
committerJason Stephenson <jason@sigio.com>
Fri, 30 Mar 2018 00:33:02 +0000 (20:33 -0400)
Add a --strip-tag option that piggy backs on the strip code added by
Dan Pearl in a previous commit.

Also, simplify Dan's code to fit better in the framework of what was
added for the --strip-tag option.

Signed-off-by: Jason Stephenson <jason@sigio.com>
Open-ILS/src/support-scripts/marc_export.in

index 576349c..7934ee4 100755 (executable)
@@ -87,6 +87,7 @@ sub new {
                'all',
                'replace_001',
                'strip=s@',
+               'strip-tag=s@',
                'location=s',
                'money=s',
                'config=s',
@@ -128,7 +129,18 @@ Usage: $0 [options]
  --all or -a        Export all records; ignores input list
  --replace_001      Replace the 001 field value with the record ID
  --strip tagRE/subRE 
-                    Strip fields or subfields matching regular expressions
+                    Strip fields or subfields matching regular expressions.
+                    This will remove whole fields or just subfields where
+                    the field tag and/or sufield code match the give regular
+                    expressions.
+ --strip-tag tagRE/subRe=valRE
+                    Strip whole fields based on tag and subfield value.
+                    This will remove whole fields where the given subfield's
+                    value matches a regular expression.
+ --strip-tag tagRE/subRe!valRE
+                    Strip whole fields based on tag and subfield value.
+                    This will remove whole fields where the given subfield's
+                    value does not match a regular expression.
  --store            Use the given storage backend to connect to the database.
                     Choices are (reporter, cstore, storage) [reporter]
  --since            Export records modified since a certain date and time.
@@ -225,7 +237,27 @@ HELP
         }
         push @strip, $trec;
     }
-
+    # Process --strip-tag arguments.
+    foreach my $strip_value (@{$opts{'strip-tag'}}) {
+        my $trec = {};
+        if ($strip_value =~ /\//) {
+            $trec->{tag} = $`;
+            $trec->{tag} = "..." if ($` eq "");
+            my $str = $';
+            if ($str =~ /(!|=)/) {
+                $trec->{subfield} = $`;
+                $trec->{value} = $';
+                if ($1 eq "!") {
+                    $trec->{not} = 1;
+                }
+            } else {
+                die "Malformed --strip-tag argument, $strip_value: must contain = or !.";
+            }
+            push @strip, $trec;
+        } else {
+            die "Malformed --strip-tag argument, $strip_value: must contain /.";
+        }
+    }
     if ($opts{format} eq 'ARE' && $opts{type} ne 'authority') {
         die "Format ARE is not compatible with type " . $opts{type};
     }
@@ -559,39 +591,26 @@ sub next {
                     }
                 }
 
-                my $strip_arg_ref = $Marque::config->option_value('strip');
-                my @strip = @{$strip_arg_ref};
-
-                foreach my $strip_ref (@strip) {
-                    my $tagRE = $strip_ref->{tag};
-                    my $subfieldRE = $strip_ref->{subfield};
-
-                    if ( $subfieldRE eq "") {
-                        # Case 1: Field only check, e.g. "--strip 5.[0,1]"
-                        # If the supplied regexp matches the field, then
-                        # delete that field.
-                        foreach my $test_field ($marc->fields()) {
-                            if ($test_field->tag() =~ /$tagRE/) {
-                                # A hit!
-                                $marc->delete_field($test_field);
+                foreach my $strip_ref (@{$Marque::config->option_value('strip')}) {
+                    if (defined($strip_ref->{value})) {
+                        # strip-tag option was used
+                        foreach my $field ($marc->fields($strip_ref->{tag})) {
+                            foreach my $subfield ($field->subfield($strip_ref->{subfield})) {
+                                if (defined($strip_ref->{not})) {
+                                    $marc->delete_field($field) if ($subfield !~ /$strip_ref->{value}/);
+                                } else {
+                                    $marc->delete_field($field) if ($subfield =~ /$strip_ref->{value}/);
+                                }
                             }
                         }
-                    } elsif ($subfieldRE ne "" && $tagRE ne "") {
-                        # Case 2: Field & subfield supplied.
-                        # Note a blank tag will be wildcarded to "*".
-                        # Traverse fields, then traverse subfields if match
-                        # is found.
-                        foreach my $test_field ($marc->fields()) {
-                            if ( !$test_field->is_control_field() &&
-                                     $test_field->tag() =~ /$tagRE/) {
-                                # Traverse all subfields:
-                                foreach my $test_subfield ($test_field->subfields()) {
-                                    my $sfcode = @{$test_subfield}[0];
-                                    if ($sfcode =~ /$subfieldRE/) {
-                                        $test_field->delete_subfield($sfcode);
-                                    }
-                                }
+                    } else {
+                        # strip option was used.
+                        if ($strip_ref->{subfield}) {
+                            foreach my $field ($marc->fields($strip_ref->{tag})) {
+                                $field->delete_subfield(code => /$strip_ref->{subfield}/);
                             }
+                        } else {
+                            $marc->delete_fields($strip_ref->{tag})
                         }
                     }
                 }
@@ -962,42 +981,29 @@ sub next {
                 }
             }
 
-            my $strip_arg_ref = $Marque::config->option_value('strip');
-            my @strip = @{$strip_arg_ref};
-
-           foreach my $strip_ref (@strip) {
-               my $tagRE = $strip_ref->{tag};
-               my $subfieldRE = $strip_ref->{subfield};
-
-               if ( $subfieldRE eq "") {
-                    # Case 1: Field only check, e.g. "--strip 5.[0,1]"
-                    # If the supplied regexp matches the field, then
-                    # delete that field.
-                    foreach my $test_field ($marc->fields()) {
-                        if ($test_field->tag() =~ /$tagRE/) {
-                            # A hit!
-                            $marc->delete_field($test_field);
+            foreach my $strip_ref (@{$Marque::config->option_value('strip')}) {
+                if (defined($strip_ref->{value})) {
+                    # strip-tag option was used
+                    foreach my $field ($marc->fields($strip_ref->{tag})) {
+                        foreach my $subfield ($field->subfield($strip_ref->{subfield})) {
+                            if (defined($strip_ref->{not})) {
+                                $marc->delete_field($field) if ($subfield !~ /$strip_ref->{value}/);
+                            } else {
+                                $marc->delete_field($field) if ($subfield =~ /$strip_ref->{value}/);
+                            }
                         }
                     }
-               } elsif ($subfieldRE ne "" && $tagRE ne "") {
-                    # Case 2: Field & subfield supplied.
-                    # Note a blank tag will be wildcarded to "*".
-                    # Traverse fields, then traverse subfields if match
-                    # is found.
-                    foreach my $test_field ($marc->fields()) {
-                       if ( !$test_field->is_control_field() &&
-                                 $test_field->tag() =~ /$tagRE/) {
-                           # Traverse all subfields:
-                           foreach my $test_subfield ($test_field->subfields()) {
-                                my $sfcode = @{$test_subfield}[0];
-                                if ($sfcode =~ /$subfieldRE/) {
-                                   $test_field->delete_subfield($sfcode);
-                                }
-                           }
-                       }
+                } else {
+                    # strip option was used.
+                    if ($strip_ref->{subfield}) {
+                        foreach my $field ($marc->fields($strip_ref->{tag})) {
+                            $field->delete_subfield(code => /$strip_ref->{subfield}/);
+                        }
+                    } else {
+                        $marc->delete_fields($strip_ref->{tag})
                     }
-               }
-           }
+                }
+            }
 
             if ($Marque::config->option_value('since')) {
                 my $leader = $marc->leader();