From ca78a1905107340387c97028abcc6f361c160708 Mon Sep 17 00:00:00 2001
From: Bill Erickson <berickxx@gmail.com>
Date: Wed, 5 Feb 2020 12:23:02 -0500
Subject: [PATCH] LP1844418 Move isbn/issn extraction to Normilize mod

Signed-off-by: Bill Erickson <berickxx@gmail.com>
---
 .../src/perlmods/lib/OpenILS/Elastic/BibSearch.pm  | 43 ++++++++--------------
 .../src/perlmods/lib/OpenILS/Utils/Normalize.pm    | 39 ++++++++++++++++++++
 2 files changed, 55 insertions(+), 27 deletions(-)

diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm b/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm
index 8384baf086..1f782fca71 100644
--- a/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm
+++ b/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm
@@ -18,14 +18,13 @@ use warnings;
 use Encode;
 use DateTime;
 use Clone 'clone';
-use Business::ISBN;
-use Business::ISSN;
 use Time::HiRes qw/time/;
 use OpenSRF::Utils::Logger qw/:logger/;
 use OpenSRF::Utils::JSON;
 use OpenILS::Utils::CStoreEditor qw/:funcs/;
 use OpenILS::Utils::DateTime qw/interval_to_seconds/;
 use OpenILS::Elastic;
+use OpenILS::Utils::Normalize;
 use base qw/OpenILS::Elastic/;
 
 # default number of bibs to index per batch.
@@ -473,22 +472,16 @@ sub index_isbns {
     return unless $value;
     
     my %seen; # deduplicate values
+    my @isbns = OpenILS::Utils::Normalize::clean_isbns($value);
 
-    # Chop up the collected raw values into parts and let
-    # Business::* tell us which parts looks like ISBNs.
-    for my $token (split(/ /, $value)) {
-        if (length($token) > 8) {
-            my $isbn = Business::ISBN->new($token);
-            if ($isbn && $isbn->is_valid) {
-                if ($isbn->as_isbn10) {
-                    $seen{$isbn->as_isbn10->isbn} = 1;
-                    $seen{$isbn->as_isbn10->as_string} = 1;
-                }
-                if ($isbn->as_isbn13) {
-                    $seen{$isbn->as_isbn13->isbn} = 1;
-                    $seen{$isbn->as_isbn13->as_string} = 1;
-                }
-            }
+    for my $isbn (@isbns) {
+        if ($isbn->as_isbn10) {
+            $seen{$isbn->as_isbn10->isbn} = 1; # compact
+            $seen{$isbn->as_isbn10->as_string} = 1; # with hyphens
+        }
+        if ($isbn->as_isbn13) {
+            $seen{$isbn->as_isbn13->isbn} = 1;
+            $seen{$isbn->as_isbn13->as_string} = 1;
         }
     }
 
@@ -501,17 +494,13 @@ sub index_issns {
     return unless $value;
 
     my %seen; # deduplicate values
+    my @issns = OpenILS::Utils::Normalize::clean_issns($value);
     
-    # Chop up the collected raw values into parts and let
-    # Business::* tell us which parts looks valid.
-    for my $token (split(/ /, $value)) {
-        my $issn = Business::ISSN->new($token);
-        if ($issn && $issn->is_valid) {
-            # no option in business::issn to get the unformatted value.
-            (my $unformatted = $issn->as_string) =~ s/-//g;
-            $seen{$unformatted} = 1;
-            $seen{$issn->as_string} = 1;
-        }
+    for my $issn (@issns) {
+        # no option in business::issn to get the unformatted value.
+        (my $unformatted = $issn->as_string) =~ s/-//g;
+        $seen{$unformatted} = 1;
+        $seen{$issn->as_string} = 1;
     }
 
     append_field_value($body, 'identifier|issn', $_) foreach keys %seen;
diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Utils/Normalize.pm b/Open-ILS/src/perlmods/lib/OpenILS/Utils/Normalize.pm
index 4a217d7636..1d70521420 100644
--- a/Open-ILS/src/perlmods/lib/OpenILS/Utils/Normalize.pm
+++ b/Open-ILS/src/perlmods/lib/OpenILS/Utils/Normalize.pm
@@ -7,6 +7,8 @@ use UNIVERSAL;
 use MARC::Record;
 use MARC::File::XML ( BinaryEncoding => 'UTF-8' );
 use OpenILS::Application::AppUtils;
+use Business::ISBN;
+use Business::ISSN;
 
 use Exporter 'import';
 our @EXPORT_OK = qw( clean_marc naco_normalize search_normalize );
@@ -125,4 +127,41 @@ sub clean_marc {
     return $xml;
 }
 
+# Returns a list of Business::ISBN objects from the provided string,
+# which is assumed to be a space-seprated list of ISBN-ish data.
+# The resulting object can be used to extract isbn10 and isbn13 values.
+# $isbn->as_isbn10->isbn # compact
+# $isbn->as_isbn13->as_string # with hyphens
+sub clean_isbns {
+    my $value = shift;
+    return () unless $value;
+    my @isbns;
+
+    # Chop up the collected raw values into parts and let
+    # Business::* tell us which parts looks like ISBNs.
+    for my $token (split(/ /, $value)) {
+        if (length($token) > 8) {
+            my $isbn = Business::ISBN->new($token);
+            push(@isbns, $isbn) if $isbn && $isbn->is_valid;
+        }
+    }
+
+    return @isbns;
+}
+
+sub clean_issns {
+    my $value = shift;
+    return () unless $value;
+    my @issns;
+
+    # Chop up the collected raw values into parts and let
+    # Business::* tell us which parts looks valid.
+    for my $token (split(/ /, $value)) {
+        my $issn = Business::ISSN->new($token);
+        push(@issns, $issn) if $issn && $issn->is_valid;
+    }
+
+    return @issns;
+}
+
 1;
-- 
2.11.0