From ca78a1905107340387c97028abcc6f361c160708 Mon Sep 17 00:00:00 2001 From: Bill Erickson Date: Wed, 5 Feb 2020 12:23:02 -0500 Subject: [PATCH] LP1844418 Move isbn/issn extraction to Normilize mod Signed-off-by: Bill Erickson --- .../src/perlmods/lib/OpenILS/Elastic/BibSearch.pm | 43 ++++++++-------------- .../src/perlmods/lib/OpenILS/Utils/Normalize.pm | 39 ++++++++++++++++++++ 2 files changed, 55 insertions(+), 27 deletions(-) diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm b/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm index 8384baf086..1f782fca71 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm @@ -18,14 +18,13 @@ use warnings; use Encode; use DateTime; use Clone 'clone'; -use Business::ISBN; -use Business::ISSN; use Time::HiRes qw/time/; use OpenSRF::Utils::Logger qw/:logger/; use OpenSRF::Utils::JSON; use OpenILS::Utils::CStoreEditor qw/:funcs/; use OpenILS::Utils::DateTime qw/interval_to_seconds/; use OpenILS::Elastic; +use OpenILS::Utils::Normalize; use base qw/OpenILS::Elastic/; # default number of bibs to index per batch. @@ -473,22 +472,16 @@ sub index_isbns { return unless $value; my %seen; # deduplicate values + my @isbns = OpenILS::Utils::Normalize::clean_isbns($value); - # Chop up the collected raw values into parts and let - # Business::* tell us which parts looks like ISBNs. - for my $token (split(/ /, $value)) { - if (length($token) > 8) { - my $isbn = Business::ISBN->new($token); - if ($isbn && $isbn->is_valid) { - if ($isbn->as_isbn10) { - $seen{$isbn->as_isbn10->isbn} = 1; - $seen{$isbn->as_isbn10->as_string} = 1; - } - if ($isbn->as_isbn13) { - $seen{$isbn->as_isbn13->isbn} = 1; - $seen{$isbn->as_isbn13->as_string} = 1; - } - } + for my $isbn (@isbns) { + if ($isbn->as_isbn10) { + $seen{$isbn->as_isbn10->isbn} = 1; # compact + $seen{$isbn->as_isbn10->as_string} = 1; # with hyphens + } + if ($isbn->as_isbn13) { + $seen{$isbn->as_isbn13->isbn} = 1; + $seen{$isbn->as_isbn13->as_string} = 1; } } @@ -501,17 +494,13 @@ sub index_issns { return unless $value; my %seen; # deduplicate values + my @issns = OpenILS::Utils::Normalize::clean_issns($value); - # Chop up the collected raw values into parts and let - # Business::* tell us which parts looks valid. - for my $token (split(/ /, $value)) { - my $issn = Business::ISSN->new($token); - if ($issn && $issn->is_valid) { - # no option in business::issn to get the unformatted value. - (my $unformatted = $issn->as_string) =~ s/-//g; - $seen{$unformatted} = 1; - $seen{$issn->as_string} = 1; - } + for my $issn (@issns) { + # no option in business::issn to get the unformatted value. + (my $unformatted = $issn->as_string) =~ s/-//g; + $seen{$unformatted} = 1; + $seen{$issn->as_string} = 1; } append_field_value($body, 'identifier|issn', $_) foreach keys %seen; diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Utils/Normalize.pm b/Open-ILS/src/perlmods/lib/OpenILS/Utils/Normalize.pm index 4a217d7636..1d70521420 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Utils/Normalize.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Utils/Normalize.pm @@ -7,6 +7,8 @@ use UNIVERSAL; use MARC::Record; use MARC::File::XML ( BinaryEncoding => 'UTF-8' ); use OpenILS::Application::AppUtils; +use Business::ISBN; +use Business::ISSN; use Exporter 'import'; our @EXPORT_OK = qw( clean_marc naco_normalize search_normalize ); @@ -125,4 +127,41 @@ sub clean_marc { return $xml; } +# Returns a list of Business::ISBN objects from the provided string, +# which is assumed to be a space-seprated list of ISBN-ish data. +# The resulting object can be used to extract isbn10 and isbn13 values. +# $isbn->as_isbn10->isbn # compact +# $isbn->as_isbn13->as_string # with hyphens +sub clean_isbns { + my $value = shift; + return () unless $value; + my @isbns; + + # Chop up the collected raw values into parts and let + # Business::* tell us which parts looks like ISBNs. + for my $token (split(/ /, $value)) { + if (length($token) > 8) { + my $isbn = Business::ISBN->new($token); + push(@isbns, $isbn) if $isbn && $isbn->is_valid; + } + } + + return @isbns; +} + +sub clean_issns { + my $value = shift; + return () unless $value; + my @issns; + + # Chop up the collected raw values into parts and let + # Business::* tell us which parts looks valid. + for my $token (split(/ /, $value)) { + my $issn = Business::ISSN->new($token); + push(@issns, $issn) if $issn && $issn->is_valid; + } + + return @issns; +} + 1; -- 2.11.0