From 36319cec3f239091b0640cbe0c50c8c8cf346234 Mon Sep 17 00:00:00 2001 From: Bill Erickson Date: Wed, 28 Aug 2019 12:16:17 -0400 Subject: [PATCH] bib marc record index / searching Signed-off-by: Bill Erickson --- .../src/perlmods/lib/OpenILS/Application/Search.pm | 1 - .../lib/OpenILS/Application/Search/Biblio.pm | 10 +- .../Search/{Elastic.pm => ElasticMapper.pm} | 138 ++++++++++++++++++--- .../src/perlmods/lib/OpenILS/Elastic/BibMarc.pm | 30 ++++- 4 files changed, 156 insertions(+), 23 deletions(-) rename Open-ILS/src/perlmods/lib/OpenILS/Application/Search/{Elastic.pm => ElasticMapper.pm} (82%) diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/Search.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/Search.pm index bcd3fbfad5..78d4a4e2db 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Application/Search.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Application/Search.pm @@ -36,7 +36,6 @@ sub initialize { sub child_init { OpenILS::Application::Search::Z3950->child_init; OpenILS::Application::Search::Browse->child_init; - OpenILS::Application::Search::Elastic->child_init; } diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Biblio.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Biblio.pm index a0ac123bfd..883dfd80cc 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Biblio.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Biblio.pm @@ -10,7 +10,7 @@ use OpenSRF::Utils::SettingsClient; use OpenILS::Utils::CStoreEditor q/:funcs/; use OpenSRF::Utils::Cache; use Encode; -use OpenILS::Application::Search::Elastic; +use OpenILS::Application::Search::ElasticMapper; use OpenSRF::Utils::Logger qw/:logger/; @@ -1157,11 +1157,11 @@ sub staged_search { $user_offset = ($user_offset >= 0) ? $user_offset : 0; $user_limit = ($user_limit >= 0) ? $user_limit : 10; - return OpenILS::Application::Search::Elastic->bib_search( + return OpenILS::Application::Search::ElasticMapper->bib_search( $search_hash->{query}, # query string ($method =~ /staff/ ? 1 : 0), $user_offset, $user_limit - ) if OpenILS::Application::Search::Elastic->is_enabled; + ) if OpenILS::Application::Search::ElasticMapper->is_enabled('bib-search'); # we're grabbing results on a per-superpage basis, which means the # limit and offset should coincide with superpage boundaries @@ -2133,6 +2133,10 @@ sub marc_search { my $limit = $args->{limit} || 10; my $offset = $args->{offset} || 0; + return OpenILS::Application::Search::ElasticMapper->marc_search( + $args, ($method =~ /staff/ ? 1 : 0), $limit, $offset + ) if OpenILS::Application::Search::ElasticMapper->is_enabled('bib-marc'); + # allow caller to pass in a call timeout since MARC searches # can take longer than the default 60-second timeout. # Default to 2 mins. Arbitrarily cap at 5 mins. diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Elastic.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/ElasticMapper.pm similarity index 82% rename from Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Elastic.pm rename to Open-ILS/src/perlmods/lib/OpenILS/Application/Search/ElasticMapper.pm index bb6caa14a5..566d57a9d6 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Elastic.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/ElasticMapper.pm @@ -1,4 +1,4 @@ -package OpenILS::Application::Search::Elastic; +package OpenILS::Application::Search::ElasticMapper; # --------------------------------------------------------------- # Copyright (C) 2018 King County Library System # Author: Bill Erickson @@ -21,6 +21,7 @@ use OpenILS::Utils::Fieldmapper; use OpenSRF::Utils::SettingsClient; use OpenILS::Utils::CStoreEditor q/:funcs/; use OpenILS::Elastic::BibSearch; +use OpenILS::Elastic::BibMarc; use List::Util qw/min/; use Digest::MD5 qw(md5_hex); @@ -39,31 +40,34 @@ my $bib_fields; my $hidden_copy_statuses; my $hidden_copy_locations; my $avail_copy_statuses; -our $enabled = undef; +our $enabled = {}; # Returns true if the Elasticsearch 'bib-search' index is active. sub is_enabled { + my ($class, $index) = @_; - return $enabled if defined $enabled; + $class->init; - # Elastic bib search is enabled if a "bib-search" index is enabled. - my $index = new_editor()->search_elastic_index( - {active => 't', code => 'bib-search'})->[0]; + return $enabled->{$index} if exists $enabled->{$index}; - if ($index) { + # Elastic bib search is enabled if a "bib-search" index is enabled. + my $config = new_editor()->search_elastic_index( + {active => 't', code => $index})->[0]; - $logger->info("ES bib-search index is enabled"); - $enabled = 1; + if ($config) { + $logger->info("ES '$index' index is enabled"); + $enabled->{$index} = 1; } else { - $enabled = 0; + $enabled->{$index} = 0; } - return $enabled; + return $enabled->{$index}; } -sub child_init { +my $init_complete = 0; +sub init { my $class = shift; - return unless $class->is_enabled(); + return if $init_complete; my $e = new_editor(); @@ -93,6 +97,7 @@ sub child_init { $hidden_copy_locations = [map {$_->{id}} @$locs]; + $init_complete = 1; return 1; } @@ -257,7 +262,9 @@ sub translate_query_node { my $field_class = $node->{class}; # e.g. subject my @fields = @{$node->{fields}}; # e.g. temporal (optional) - # class-level searches are OR/should searches across all + $logger->info("ES query node field_class=$field_class fields=@fields"); + + # class-level searches are OR ("should") searches across all # fields in the selected class. @fields = map {$_->name} grep {$_->search_group eq $field_class} @$bib_fields @@ -313,7 +320,6 @@ sub translate_query_node { for my $field (@fields) { my $key = "$field_class|$field"; - if ($text_search) { # use the full-text indices @@ -330,8 +336,10 @@ sub translate_query_node { } } - $logger->info("ES content = $content / bools = ". - OpenSRF::Utils::JSON->perl2JSON($field_nodes)); + $logger->info( + "ES content = ". OpenSRF::Utils::JSON->perl2JSON($content) . + "; bools = ". OpenSRF::Utils::JSON->perl2JSON($field_nodes) + ); my $query; if (scalar(@$field_nodes) == 1) { @@ -346,6 +354,8 @@ sub translate_query_node { $query = {bool => {must_not => $query}}; } + $logger->info("ES sub-query = ". OpenSRF::Utils::JSON->perl2JSON($query)); + return $query; } } @@ -545,5 +555,99 @@ sub add_elastic_holdings_filter { } + + +sub compile_elastic_marc_query { + my ($args, $staff, $offset, $limit) = @_; + + # args->{searches} = + # [{term => "harry", restrict => [{tag => 245, subfield => "a"}]}] + + my $root_and = []; + for my $search (@{$args->{searches}}) { + + # NOTE Assume only one tag/subfield will be queried per search term. + my $tag = $search->{restrict}->[0]->{tag}; + my $sf = $search->{restrict}->[0]->{subfield}; + my $value = $search->{term}; + + # Use text searching on the value field + my $value_query = { + bool => { + should => [ + {match => {'marc.value.text' => + {query => $value, operator => 'and'}}}, + {match => {'marc.value.text_folded' => + {query => $value, operator => 'and'}}} + ] + } + }; + + my $sub_query = { + bool => { + must => [ + {term => {'marc.tag' => $tag}}, + {term => {'marc.subfield.lower' => $sf}}, + $value_query + ] + } + }; + + push (@$root_and, { + nested => { + path => 'marc', + query => {bool => {must => $sub_query}} + } + }); + } + + return { + _source => ['id'], # Fetch bib ID only + size => $limit, + from => $offset, + sort => [], + query => { + bool => { + must => $root_and, + filter => [] + } + } + }; +} + + + +# Translate a MARC search API call into something consumable by Elasticsearch +# Translate search results into a structure consistent with a bib search +# API response. +sub marc_search { + my ($class, $args, $staff, $limit, $offset) = @_; + + return {count => 0} unless $args->{searches} && @{$args->{searches}}; + + my $elastic_query = + compile_elastic_marc_query($args, $staff, $offset, $limit); + + my $es = OpenILS::Elastic::BibMarc->new('main'); + + $es->connect; + my $results = $es->search($elastic_query); + + $logger->debug("ES elasticsearch returned: ". + OpenSRF::Utils::JSON->perl2JSON($results)); + + return {count => 0} unless $results; + + my @bib_ids = map {$_->{_id}} + grep {defined $_} @{$results->{hits}->{hits}}; + + return { + ids => \@bib_ids, + count => $results->{hits}->{total} + }; +} + + + 1; diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibMarc.pm b/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibMarc.pm index bf02b66d92..86bfc9f425 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibMarc.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibMarc.pm @@ -60,9 +60,35 @@ my $BASE_PROPERTIES = { marc => { type => 'nested', properties => { + # tag is assumed to be composed of numbers, so no lowercase. tag => {type => 'keyword'}, - subfield => {type => 'keyword'}, - value => {type => 'text'} + subfield => { + type => 'keyword', + fields => { + lower => { + type => 'keyword', + normalizer => 'custom_lowercase' + } + } + }, + value => { + type => 'keyword', + fields => { + lower => { + type => 'keyword', + normalizer => 'custom_lowercase' + }, + text => { + type => 'text', + analyzer => $LANG_ANALYZER + }, + text_folded => { + type => 'text', + analyzer => 'folding' + } + } + } + } } }; -- 2.11.0