From 029c6c855b62e667e77cecc61226e864826ecd62 Mon Sep 17 00:00:00 2001 From: Dan Pearl Date: Fri, 16 Mar 2018 13:14:03 -0400 Subject: [PATCH] LP1754455 Add Ability to remove fields or subfields in marc_export This commit adds a --strip option to marc_export that allows fields and/or subfields to be removed from MARC records on export. To test: Try all forms of --strip: --strip e.g. --strip 856 --strip / e.g. --strip 856/0 --strip / e.g. --strip /0 Try some regular expressions: --strip 8.. --strip /[ab0] For all tests, compare the regular output to the stripped otput and the plain (unstripped) output. Signed-off-by: Dan Pearl Signed-off-by: Jason Boyer Signed-off-by: Galen Charlton --- Open-ILS/src/support-scripts/marc_export.in | 109 +++++++++++++++++++++ .../Administration/marc_export_strip.adoc | 34 +++++++ 2 files changed, 143 insertions(+) create mode 100644 docs/RELEASE_NOTES_NEXT/Administration/marc_export_strip.adoc diff --git a/Open-ILS/src/support-scripts/marc_export.in b/Open-ILS/src/support-scripts/marc_export.in index 843ddff1ca..78c9c4f8ff 100755 --- a/Open-ILS/src/support-scripts/marc_export.in +++ b/Open-ILS/src/support-scripts/marc_export.in @@ -86,6 +86,7 @@ sub new { 'mfhd', 'all', 'replace_001', + 'strip=s@', 'location=s', 'money=s', 'config=s', @@ -126,6 +127,8 @@ Usage: $0 [options] --type or -t Record type (BIBLIO, AUTHORITY) [BIBLIO] --all or -a Export all records; ignores input list --replace_001 Replace the 001 field value with the record ID + --strip tagRE/subRE + Strip fields or subfields matching regular expressions --store Use the given storage backend to connect to the database. Choices are (reporter, cstore, storage) [reporter] --since Export records modified since a certain date and time. @@ -162,6 +165,14 @@ libraries with the short names "BR1" and "BR2": $0 --library BR1 --library BR2 --encoding UTF-8 > sys1_bibs.mrc +The --strip option can be used more than once which imples an "OR" operation. +If the fie]d argument is omitted (e.g., "/0", it is treated like "..." (all fields). +If the subfield argument is omitted (e.g. "100/", it is treated like "." +(all subfields). Examples: + + --strip /0 Remove all 0 subfields + --strip 1[23]. Remove fields with tags 120 through 139, inclusive. + --strip / Remove all subfields (probably not useful). HELP exit; } @@ -193,6 +204,27 @@ HELP "Right now that means one of [". join('|',(FORMATS)). "]\n"; } + # Process --strip arguments. They are in the form tagRE/subRE. Note + # that the RE pieces cannot contain a slash (/), as this would create + # ambiguity. If there is no /subRE, then it's OK, and just the tagRE + # is specified. + # $opts{strip} is an array-ref for this multi-valued option. + my @strip = (); + + foreach my $strip_value (@{$opts{strip}}) { + my $trec = {}; + if ($strip_value =~ /\//) { + $trec->{tag} = $`; + $trec->{tag} = "..." if ($` eq ""); + $trec->{subfield} = $'; + $trec->{subfield} = "." if ($' eq ""); + } else { + # No slash case + $trec->{tag} = $strip_value; + $trec->{subfield} = ''; + } + push @strip, $trec; + } if ($opts{format} eq 'ARE' && $opts{type} ne 'authority') { die "Format ARE is not compatible with type " . $opts{type}; @@ -234,6 +266,8 @@ HELP } $opts{encoding} = uc($opts{encoding}); + $opts{strip} = \@strip; + $self->{'options'} = \%opts; bless $self, $class; return $self; @@ -524,6 +558,43 @@ sub next { $marc->insert_fields_ordered($tcn); } } + + my $strip_arg_ref = $Marque::config->option_value('strip'); + my @strip = @{$strip_arg_ref}; + foreach my $strip_ref (@strip) { + my $tagRE = $strip_ref->{tag}; + my $subfieldRE = $strip_ref->{subfield}; + + if ( $subfieldRE eq "") { + # Case 1: Field only check, e.g. "--strip 5.[0,1]" + # If the supplied regexp matches the field, then + # delete that field. + foreach my $test_field ($marc->fields()) { + if ($test_field->tag() =~ /$tagRE/) { + # A hit! + $marc->delete_field($test_field); + } + } + } elsif ($subfieldRE ne "" && $tagRE ne "") { + # Case 2: Field & subfield supplied. + # Note a blank tag will be wildcarded to "*". + # Traverse fields, then traverse subfields if match + # is found. + foreach my $test_field ($marc->fields()) { + if ( !$test_field->is_control_field() && + $test_field->tag() =~ /$tagRE/) { + # Traverse all subfields: + foreach my $test_subfield ($test_field->subfields()) { + my $sfcode = @{$test_subfield}[0]; + if ($sfcode =~ /$subfieldRE/) { + $test_field->delete_subfield($sfcode); + } + } + } + } + } + } + if ($Marque::config->option_value('items')) { my @acps = $self->acps_for_bre($r); foreach my $acp (@acps) { @@ -913,6 +984,44 @@ sub next { $marc->insert_fields_ordered($tcn); } } + + my $strip_arg_ref = $Marque::config->option_value('strip'); + my @strip = @{$strip_arg_ref}; + + foreach my $strip_ref (@strip) { + my $tagRE = $strip_ref->{tag}; + my $subfieldRE = $strip_ref->{subfield}; + + if ( $subfieldRE eq "") { + # Case 1: Field only check, e.g. "--strip 5.[0,1]" + # If the supplied regexp matches the field, then + # delete that field. + foreach my $test_field ($marc->fields()) { + if ($test_field->tag() =~ /$tagRE/) { + # A hit! + $marc->delete_field($test_field); + } + } + } elsif ($subfieldRE ne "" && $tagRE ne "") { + # Case 2: Field & subfield supplied. + # Note a blank tag will be wildcarded to "*". + # Traverse fields, then traverse subfields if match + # is found. + foreach my $test_field ($marc->fields()) { + if ( !$test_field->is_control_field() && + $test_field->tag() =~ /$tagRE/) { + # Traverse all subfields: + foreach my $test_subfield ($test_field->subfields()) { + my $sfcode = @{$test_subfield}[0]; + if ($sfcode =~ /$subfieldRE/) { + $test_field->delete_subfield($sfcode); + } + } + } + } + } + } + if ($Marque::config->option_value('since')) { my $leader = $marc->leader(); if ($U->is_true($r->deleted())) { diff --git a/docs/RELEASE_NOTES_NEXT/Administration/marc_export_strip.adoc b/docs/RELEASE_NOTES_NEXT/Administration/marc_export_strip.adoc new file mode 100644 index 0000000000..4fd11535ba --- /dev/null +++ b/docs/RELEASE_NOTES_NEXT/Administration/marc_export_strip.adoc @@ -0,0 +1,34 @@ +--strip option for marc_export +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The --strip option is used to suppress generation of specified elements +from the marc_export output. The option, which can be specified more than +once, is in one if these forms: + +[source] +-------- + --strip / + --strip / + --strip + +Some examples: + + --strip 856/0 [Delete subfield 0's in fields with tag 856.] + +Regular expressions are accepted: + + --strip 8../0 [Delete subfield 0's in fields with tag 800-899.] + +If the field is omitted, it is as if you specified "..." for the field RE. + + --strip /0 [Delete subfield 0's in all fields.] + --strip /[abc] [Delete subfield a, b or c in all fields.] + +If the slash and subfield are omitted, it means to delete the given fields. + + -strip 856 [Delete fields with tag 856] + +If the slash is present, but the subfield is omitted, it means "all subfields" + + --strip 856/ [Delete all subfields with tag 856] +-------- -- 2.11.0