LP1844418 Move isbn/issn extraction to Normilize mod

author Bill Erickson <berickxx@gmail.com>

Wed, 5 Feb 2020 17:23:02 +0000 (12:23 -0500)

committer Bill Erickson <berickxx@gmail.com>

Wed, 5 Feb 2020 17:23:05 +0000 (12:23 -0500)
author Bill Erickson <berickxx@gmail.com>
Wed, 5 Feb 2020 17:23:02 +0000 (12:23 -0500)
committer Bill Erickson <berickxx@gmail.com>
Wed, 5 Feb 2020 17:23:05 +0000 (12:23 -0500)
diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm b/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm

index 8384baf..1f782fc 100644 (file)
--- a/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm
+++ b/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm
@@ -18,14 +18,13 @@ use warnings;
  use Encode;
  use DateTime;
  use Clone 'clone';
-use Business::ISBN;
-use Business::ISSN;
  use Time::HiRes qw/time/;
  use OpenSRF::Utils::Logger qw/:logger/;
  use OpenSRF::Utils::JSON;
  use OpenILS::Utils::CStoreEditor qw/:funcs/;
  use OpenILS::Utils::DateTime qw/interval_to_seconds/;
  use OpenILS::Elastic;
+use OpenILS::Utils::Normalize;
  use base qw/OpenILS::Elastic/;
  
  # default number of bibs to index per batch.
@@ -473,22 +472,16 @@ sub index_isbns {
      return unless $value;
      
      my %seen; # deduplicate values
+    my @isbns = OpenILS::Utils::Normalize::clean_isbns($value);
  
-    # Chop up the collected raw values into parts and let
-    # Business::* tell us which parts looks like ISBNs.
-    for my $token (split(/ /, $value)) {
-        if (length($token) > 8) {
-            my $isbn = Business::ISBN->new($token);
-            if ($isbn && $isbn->is_valid) {
-                if ($isbn->as_isbn10) {
-                    $seen{$isbn->as_isbn10->isbn} = 1;
-                    $seen{$isbn->as_isbn10->as_string} = 1;
-                }
-                if ($isbn->as_isbn13) {
-                    $seen{$isbn->as_isbn13->isbn} = 1;
-                    $seen{$isbn->as_isbn13->as_string} = 1;
-                }
-            }
+    for my $isbn (@isbns) {
+        if ($isbn->as_isbn10) {
+            $seen{$isbn->as_isbn10->isbn} = 1; # compact
+            $seen{$isbn->as_isbn10->as_string} = 1; # with hyphens
+        }
+        if ($isbn->as_isbn13) {
+            $seen{$isbn->as_isbn13->isbn} = 1;
+            $seen{$isbn->as_isbn13->as_string} = 1;
          }
      }
  
@@ -501,17 +494,13 @@ sub index_issns {
      return unless $value;
  
      my %seen; # deduplicate values
+    my @issns = OpenILS::Utils::Normalize::clean_issns($value);
      
-    # Chop up the collected raw values into parts and let
-    # Business::* tell us which parts looks valid.
-    for my $token (split(/ /, $value)) {
-        my $issn = Business::ISSN->new($token);
-        if ($issn && $issn->is_valid) {
-            # no option in business::issn to get the unformatted value.
-            (my $unformatted = $issn->as_string) =~ s/-//g;
-            $seen{$unformatted} = 1;
-            $seen{$issn->as_string} = 1;
-        }
+    for my $issn (@issns) {
+        # no option in business::issn to get the unformatted value.
+        (my $unformatted = $issn->as_string) =~ s/-//g;
+        $seen{$unformatted} = 1;
+        $seen{$issn->as_string} = 1;
      }
  
      append_field_value($body, 'identifier|issn', $_) foreach keys %seen;
diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Utils/Normalize.pm b/Open-ILS/src/perlmods/lib/OpenILS/Utils/Normalize.pm

index 4a217d7..1d70521 100644 (file)
--- a/Open-ILS/src/perlmods/lib/OpenILS/Utils/Normalize.pm
+++ b/Open-ILS/src/perlmods/lib/OpenILS/Utils/Normalize.pm
@@ -7,6 +7,8 @@ use UNIVERSAL;
  use MARC::Record;
  use MARC::File::XML ( BinaryEncoding => 'UTF-8' );
  use OpenILS::Application::AppUtils;
+use Business::ISBN;
+use Business::ISSN;
  
  use Exporter 'import';
  our @EXPORT_OK = qw( clean_marc naco_normalize search_normalize );
@@ -125,4 +127,41 @@ sub clean_marc {
      return $xml;
  }
  
+# Returns a list of Business::ISBN objects from the provided string,
+# which is assumed to be a space-seprated list of ISBN-ish data.
+# The resulting object can be used to extract isbn10 and isbn13 values.
+# $isbn->as_isbn10->isbn # compact
+# $isbn->as_isbn13->as_string # with hyphens
+sub clean_isbns {
+    my $value = shift;
+    return () unless $value;
+    my @isbns;
+
+    # Chop up the collected raw values into parts and let
+    # Business::* tell us which parts looks like ISBNs.
+    for my $token (split(/ /, $value)) {
+        if (length($token) > 8) {
+            my $isbn = Business::ISBN->new($token);
+            push(@isbns, $isbn) if $isbn && $isbn->is_valid;
+        }
+    }
+
+    return @isbns;
+}
+
+sub clean_issns {
+    my $value = shift;
+    return () unless $value;
+    my @issns;
+
+    # Chop up the collected raw values into parts and let
+    # Business::* tell us which parts looks valid.
+    for my $token (split(/ /, $value)) {
+        my $issn = Business::ISSN->new($token);
+        push(@issns, $issn) if $issn && $issn->is_valid;
+    }
+
+    return @issns;
+}
+
  1;
author	Bill Erickson <berickxx@gmail.com>
	Wed, 5 Feb 2020 17:23:02 +0000 (12:23 -0500)
committer	Bill Erickson <berickxx@gmail.com>
	Wed, 5 Feb 2020 17:23:05 +0000 (12:23 -0500)
Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm		patch \| blob \| history
Open-ILS/src/perlmods/lib/OpenILS/Utils/Normalize.pm		patch \| blob \| history