From 8657895178af442971d3c37dbae08f753ccaaf47 Mon Sep 17 00:00:00 2001 From: Bill Erickson Date: Mon, 9 Sep 2019 15:24:26 -0400 Subject: [PATCH] move away from mapper to pure client Signed-off-by: Bill Erickson --- .../lib/OpenILS/Application/Search/Biblio.pm | 10 - .../OpenILS/Application/Search/ElasticMapper.pm | 645 --------------------- 2 files changed, 655 deletions(-) delete mode 100644 Open-ILS/src/perlmods/lib/OpenILS/Application/Search/ElasticMapper.pm diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Biblio.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Biblio.pm index af73b793ca..159ecd78b9 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Biblio.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Biblio.pm @@ -10,7 +10,6 @@ use OpenSRF::Utils::SettingsClient; use OpenILS::Utils::CStoreEditor q/:funcs/; use OpenSRF::Utils::Cache; use Encode; -use OpenILS::Application::Search::ElasticMapper; use OpenSRF::Utils::Logger qw/:logger/; @@ -1157,11 +1156,6 @@ sub staged_search { $user_offset = ($user_offset >= 0) ? $user_offset : 0; $user_limit = ($user_limit >= 0) ? $user_limit : 10; - return OpenILS::Application::Search::ElasticMapper->bib_search( - $search_hash->{query}, # query string - ($method =~ /staff/ ? 1 : 0), - $user_offset, $user_limit - ) if OpenILS::Application::Search::ElasticMapper->is_enabled('bib-search'); # we're grabbing results on a per-superpage basis, which means the # limit and offset should coincide with superpage boundaries @@ -2133,10 +2127,6 @@ sub marc_search { my $limit = $args->{limit} || 10; my $offset = $args->{offset} || 0; - return OpenILS::Application::Search::ElasticMapper->marc_search( - $args, ($method =~ /staff/ ? 1 : 0), $limit, $offset - ) if OpenILS::Application::Search::ElasticMapper->is_enabled('bib-search'); - # allow caller to pass in a call timeout since MARC searches # can take longer than the default 60-second timeout. # Default to 2 mins. Arbitrarily cap at 5 mins. diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/ElasticMapper.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/ElasticMapper.pm deleted file mode 100644 index 291fe4241a..0000000000 --- a/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/ElasticMapper.pm +++ /dev/null @@ -1,645 +0,0 @@ -package OpenILS::Application::Search::ElasticMapper; -# --------------------------------------------------------------- -# Copyright (C) 2018 King County Library System -# Author: Bill Erickson -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation; either version 2 -# of the License, or (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# --------------------------------------------------------------- -use strict; -use warnings; -use OpenSRF::Utils::JSON; -use OpenSRF::Utils::Logger qw/:logger/; -use OpenILS::Utils::Fieldmapper; -use OpenSRF::Utils::SettingsClient; -use OpenILS::Utils::CStoreEditor q/:funcs/; -use OpenILS::Elastic::Bib::Search; -#use OpenILS::Elastic::Bib::Marc; -use List::Util qw/min/; -use Digest::MD5 qw(md5_hex); - -use OpenILS::Application::AppUtils; -my $U = "OpenILS::Application::AppUtils"; - -# Use the QueryParser module to make sense of the inbound search query. -use OpenILS::Application::Storage::Driver::Pg::QueryParser; - - -# avoid repetitive calls to DB for org info. -my %org_data_cache = (by_shortname => {}, ancestors_at => {}); - -# bib fields defined in the elastic bib-search index -my $bib_fields; -my $hidden_copy_statuses; -my $hidden_copy_locations; -my $avail_copy_statuses; -our $enabled = {}; - -# Returns true if the Elasticsearch 'bib-search' index is active. -sub is_enabled { - my ($class, $index) = @_; - - $class->init; - - return $enabled->{$index} if exists $enabled->{$index}; - - # Elastic bib search is enabled if a "bib-search" index is enabled. - my $config = new_editor()->search_elastic_index( - {active => 't', code => $index})->[0]; - - if ($config) { - $logger->info("ES '$index' index is enabled"); - $enabled->{$index} = 1; - } else { - $enabled->{$index} = 0; - } - - return $enabled->{$index}; -} - -my $init_complete = 0; -sub init { - my $class = shift; - return if $init_complete; - - my $e = new_editor(); - - $bib_fields = $e->retrieve_all_elastic_bib_field; - - my $stats = $e->json_query({ - select => {ccs => ['id', 'opac_visible', 'is_available']}, - from => 'ccs', - where => {'-or' => [ - {opac_visible => 'f'}, - {is_available => 't'} - ]} - }); - - $hidden_copy_statuses = - [map {$_->{id}} grep {$_->{opac_visible} eq 'f'} @$stats]; - - $avail_copy_statuses = - [map {$_->{id}} grep {$_->{is_available} eq 't'} @$stats]; - - # Include deleted copy locations since this is an exclusion set. - my $locs = $e->json_query({ - select => {acpl => ['id']}, - from => 'acpl', - where => {opac_visible => 'f'} - }); - - $hidden_copy_locations = [map {$_->{id}} @$locs]; - - $init_complete = 1; - return 1; -} - -# Translate a bib search API call into something consumable by Elasticsearch -# Translate search results into a structure consistent with a bib search -# API response. -sub bib_search { - my ($class, $query, $staff, $offset, $limit) = @_; - - $logger->info("ES parsing API query $query staff=$staff"); - - my ($elastic_query, $cache_key) = - compile_elastic_query($query, $staff, $offset, $limit); - - my $es = OpenILS::Elastic::Bib::Search->new('main'); - - $es->connect; - my $results = $es->search($elastic_query); - - $logger->debug("ES elasticsearch returned: ". - OpenSRF::Utils::JSON->perl2JSON($results)); - - return {count => 0, ids => []} unless $results; - - return { - count => $results->{hits}->{total}, - ids => [ - map { [$_->{_id}, undef, $_->{_score}] } - grep {defined $_} @{$results->{hits}->{hits}} - ], - facets => format_facets($results->{aggregations}), - # Elastic has its own search cacheing, so external caching is - # performed, but providing cache keys allows the caller to - # know if this search matches another search. - cache_key => $cache_key, - facet_key => $cache_key.'_facets' - }; -} - -sub compile_elastic_query { - my ($query, $staff, $offset, $limit) = @_; - - my $parser = init_query_parser($query); - - $parser->parse; - my $query_struct = $parser->parse_tree->to_abstract_query; - - my $elastic = { - _source => ['id'], # Fetch bib ID only - size => $limit, - from => $offset, - sort => [], - query => { - bool => { - must => [], - filter => [] - } - } - }; - - $elastic->{query}->{bool}->{must} = - [translate_query_node($elastic, $query_struct)]; - - add_elastic_holdings_filter($elastic, $staff, - $elastic->{__site}, $elastic->{__depth}, $elastic->{__available}); - - add_elastic_facet_aggregations($elastic); - - # delete __-prefixed state maintenance keys. - delete $elastic->{$_} for (grep {$_ =~ /^__/} keys %$elastic); - - $elastic->{sort} = ['_score'] unless @{$elastic->{sort}}; - - return $elastic; -} - -sub translate_query_node { - my ($elastic, $node) = @_; - - if ($node->{type} eq 'query_plan') { - - my ($joiner) = keys %{$node->{children}}; - my $children = $node->{children}->{$joiner}; - my $filters = $node->{filters}; - my $modifiers = $node->{modifiers}; - - if (grep {$_ eq 'descending'} @$modifiers) { - $elastic->{__sort_dir} = 'desc'; - } - - if (grep {$_ eq 'available'} @$modifiers) { - $elastic->{__available} = 1; - } - - return unless @$children || @$filters; - - my $bool_op = $joiner eq '&' ? 'must' : 'should'; - my $bool_nodes = []; - my $filter_nodes = []; - my $query = { - bool => { - $bool_op => $bool_nodes, - filter => $filter_nodes - } - }; - - for my $child (@$children) { - my $type = $child->{type}; - - if ($type eq 'node' || $type eq 'query_plan') { - my $subq = translate_query_node($elastic, $child); - push(@$bool_nodes, $subq) if defined $subq; - - } elsif ($type eq 'facet') { - - for my $value (@{$child->{values}}) { - push(@$filter_nodes, {term => {$child->{name} => $value}}); - } - } - } - - for my $filter (@$filters) { - my $name = $filter->{name}; - my @values = @{$filter->{args}}; - - # Sorting is managed at the root of the ES search structure. - # QP assumes all sorts are ascending or descending -- possible - # only one sort filter per struct is supported? - if ($name eq 'sort') { - my $dir = $elastic->{__sort_dir} || 'asc'; - push(@{$elastic->{sort}}, {$_ => $dir}) for @values; - - } elsif ($name =~ /site|depth/) { - # site and depth are copy-level filters. - # Apply those after the main structure is built. - $elastic->{"__$name"} = $values[0]; - - } else { - if (@values > 1) { - push(@$filter_nodes, {terms => {$name => \@values}}); - } else { - push(@$filter_nodes, {term => {$name => $values[0]}}); - } - } - } - - # trim and compress branches - if (!@$filter_nodes) { - delete $query->{bool}{filter}; - return $bool_nodes->[0] if scalar(@$bool_nodes) == 1; - - } elsif (!@$bool_nodes) { - # If this is a filter-only node, add a match-all - # query for the filter to have something to match on. - $query->{bool}{must} = {match_all => {}}; - } - - return $query; - - } elsif ($node->{type} eq 'node') { - - my $field_class = $node->{class}; # e.g. subject - my @fields = @{$node->{fields}}; # e.g. temporal (optional) - - $logger->info("ES query node field_class=$field_class fields=@fields"); - - # class-level searches are OR ("should") searches across all - # fields in the selected class. - @fields = map {$_->name} - grep {$_->search_group eq $field_class} @$bib_fields - unless @fields; - - # note: $joiner is always '&' for type=node - my ($joiner) = keys %{$node->{children}}; - my $children = $node->{children}->{$joiner}; - - # Content is only split across children when multiple words - # are part of the same query structure, e.g. kw:piano music - # This equates to a match search with multiple words in ES. - my $content = join(' ', map {$_->{content}} @$children); - - # not sure how/why this happens sometimes. - return undef unless $content; - - my $first_char = substr($content, 0, 1); - my $last_char = substr($content, -1, 1); - my $prefix = $children->[0]->{prefix}; - - my $match_type = 'most_fields'; - - # "Contains Phrase" - $match_type = 'phrase' if $prefix eq '"'; - - my @field_nodes; - - # Matchiness specificiers embedded in the content override - # the query node prefix. - if ($first_char eq '^') { - $content = substr($content, 1); - - if ($last_char eq '$') { # "Matches Exactly" - - $match_type = undef; - $content = substr($content, 0, -1); - - for my $field (@fields) { - my $key = "$field_class|$field"; - # Use the lowercase normalized keyword index for - # exact match searches. - push(@field_nodes, {term => {"$key.lower" => $content}}); - } - - } else { # "Starts With" - - $match_type = 'phrase_prefix'; - } - } - - if ($match_type) { - - push(@field_nodes, { - multi_match => { - query => $content, - operator => 'and', - fields => ["$field_class|*.text*"], - type => $match_type - } - }); - } - - $logger->info( - "ES content = ". OpenSRF::Utils::JSON->perl2JSON($content) . - "; bools = ". OpenSRF::Utils::JSON->perl2JSON(\@field_nodes) - ); - - my $query; - if (scalar(@field_nodes) == 1) { - $query = {bool => {must => \@field_nodes}}; - } else { - # Query multiple fields within a search class via OR query. - $query = {bool => {should => \@field_nodes}}; - } - - if ($prefix eq '-"') { - # Negation query. Wrap the whole shebang in a must_not - $query = {bool => {must_not => $query}}; - } - - $logger->info("ES sub-query = ". OpenSRF::Utils::JSON->perl2JSON($query)); - - return $query; - } -} - -sub init_query_parser { - my $query = shift; - - my $query_parser = - OpenILS::Application::Storage::Driver::Pg::QueryParser->new( - query => $query - ); - - my %attrs = get_qp_attrs(); - $query_parser->initialize(%attrs); - - return $query_parser; -} - -my %qp_attrs; -sub get_qp_attrs { - return %qp_attrs if %qp_attrs; - - # Fetch and cache the QP configuration attributes - # TODO: call this in service child_init()? - - $logger->debug("ES initializing query parser attributes"); - my $e = new_editor(); - - %qp_attrs = ( - config_record_attr_index_norm_map => - $e->search_config_record_attr_index_norm_map([ - { id => { "!=" => undef } }, - { flesh => 1, flesh_fields => { crainm => [qw/norm/] }, - order_by => [{ class => "crainm", field => "pos" }] } - ]), - search_relevance_adjustment => - $e->retrieve_all_search_relevance_adjustment, - config_metabib_field => - $e->retrieve_all_config_metabib_field, - config_metabib_field_virtual_map => - $e->retrieve_all_config_metabib_field_virtual_map, - config_metabib_search_alias => - $e->retrieve_all_config_metabib_search_alias, - config_metabib_field_index_norm_map => - $e->search_config_metabib_field_index_norm_map([ - { id => { "!=" => undef } }, - { flesh => 1, flesh_fields => { cmfinm => [qw/norm/] }, - order_by => [{ class => "cmfinm", field => "pos" }] } - ]), - config_record_attr_definition => - $e->retrieve_all_config_record_attr_definition - ); - - return %qp_attrs; -} - - -# Format ES search aggregations to match the API response facet structure -# {$cmf_id => {"Value" => $count}, $cmf_id2 => {"Value Two" => $count2}, ...} -sub format_facets { - my $aggregations = shift; - my $facets = {}; - - for my $fname (keys %$aggregations) { - - my ($field_class, $name) = split(/\|/, $fname); - - my ($bib_field) = grep { - $_->name eq $name && $_->search_group eq $field_class - } @$bib_fields; - - my $hash = $facets->{$bib_field->metabib_field} = {}; - - my $values = $aggregations->{$fname}->{buckets}; - for my $bucket (@$values) { - $hash->{$bucket->{key}} = $bucket->{doc_count}; - } - } - - return $facets; -} - -sub add_elastic_facet_aggregations { - my ($elastic_query) = @_; - - my @facet_fields = grep {$_->facet_field eq 't'} @$bib_fields; - return unless @facet_fields; - - $elastic_query->{aggs} = {}; - - for my $facet (@facet_fields) { - my $fname = $facet->name; - my $fgrp = $facet->search_group; - $fname = "$fgrp|$fname" if $fgrp; - - $elastic_query->{aggs}{$fname} = {terms => {field => "$fname.raw"}}; - } -} - -sub add_elastic_holdings_filter { - my ($elastic_query, $staff, $shortname, $depth, $available) = @_; - - # in non-staff mode, ensure at least on copy in scope is visible - my $visible = !$staff; - - my $org; - if ($shortname) { - - if (!$org_data_cache{by_shortname}{$shortname}) { - $org_data_cache{by_shortname}{$shortname} = - $U->find_org_by_shortname($U->get_org_tree, $shortname); - } - - $org = $org_data_cache{by_shortname}{$shortname}; - - my $types = $U->get_org_types; # pulls from cache - my ($type) = grep {$_->id == $org->ou_type} @$types; - $depth = defined $depth ? min($depth, $type->depth) : $type->depth; - } - - my $visible_filters = { - query => { - bool => { - must_not => [ - {terms => {'holdings.status' => $hidden_copy_statuses}}, - {terms => {'holdings.location' => $hidden_copy_locations}} - ] - } - } - }; - - my $filter = {nested => {path => 'holdings', query => {bool => {}}}}; - - if ($depth > 0) { - - if (!$org_data_cache{ancestors_at}{$shortname}) { - $org_data_cache{ancestors_at}{$shortname} = {}; - } - - if (!$org_data_cache{ancestors_at}{$shortname}{$depth}) { - $org_data_cache{ancestors_at}{$shortname}{$depth} = - $U->get_org_descendants($org->id, $depth); - } - - my $org_ids = $org_data_cache{ancestors_at}{$shortname}{$depth}; - - # Add a boolean OR-filter on holdings circ lib and optionally - # add a boolean AND-filter on copy status for availability - # checking. - - my $should = []; - $filter->{nested}->{query}->{bool}->{should} = $should; - - for my $org_id (@$org_ids) { - - # Ensure at least one copy exists at the selected org unit - my $and = { - bool => { - must => [ - {term => {'holdings.circ_lib' => $org_id}} - ] - } - }; - - # When limiting to visible/available, ensure at least one of the - # copies from the above org-limited set is visible/available. - if ($available) { - push( - @{$and->{bool}{must}}, - {terms => {'holdings.status' => $avail_copy_statuses}} - ); - - } elsif ($visible) { - push(@{$and->{bool}{must}}, $visible_filters); - } - - push(@$should, $and); - } - - } elsif ($available) { - # Limit to results that have an available copy, but don't worry - # about where the copy lives, since we're searching globally. - - $filter->{nested}->{query}->{bool}->{must} = - [{terms => {'holdings.status' => $avail_copy_statuses}}]; - - } elsif ($visible) { - - $filter->{nested}->{query} = $visible_filters->{query}; - } - - $logger->info("ES holdings filter is " . - OpenSRF::Utils::JSON->perl2JSON($filter)); - - # array of filters in progress - push(@{$elastic_query->{query}->{bool}->{filter}}, $filter); - -} - - - -sub compile_elastic_marc_query { - my ($args, $staff, $offset, $limit) = @_; - - # args->{searches} = - # [{term => "harry", restrict => [{tag => 245, subfield => "a"}]}] - - my $root_and = []; - for my $search (@{$args->{searches}}) { - - # NOTE Assume only one tag/subfield will be queried per search term. - my $tag = $search->{restrict}->[0]->{tag}; - my $sf = $search->{restrict}->[0]->{subfield}; - my $value = $search->{term}; - - # Use text searching on the value field - my $value_query = { - multi_match => { - query => $value, - fields => ['marc.value*'], - type => 'most_fields', - operator => 'and' - } - }; - - my @must = ($value_query); - - # tag (ES-only) and subfield are both optional - push (@must, {term => {'marc.tag' => $tag}}) if $tag; - push (@must, {term => {'marc.subfield' => $sf}}) if $sf && $sf ne '_'; - - my $sub_query = {bool => {must => \@must}}; - - push (@$root_and, { - nested => { - path => 'marc', - query => {bool => {must => $sub_query}} - } - }); - } - - return { - _source => ['id'], # Fetch bib ID only - size => $limit, - from => $offset, - sort => [], - query => { - bool => { - must => $root_and, - filter => [] - } - } - }; -} - - - -# Translate a MARC search API call into something consumable by Elasticsearch -# Translate search results into a structure consistent with a bib search -# API response. -# TODO: This version is not currently holdings-aware, meaning it will return -# results for all non-deleted bib records that match the query. However, -# the data does exist in the EL index. Just need to integrate. -sub marc_search { - my ($class, $args, $staff, $limit, $offset) = @_; - - return {count => 0, ids => []} - unless $args->{searches} && @{$args->{searches}}; - - my $elastic_query = - compile_elastic_marc_query($args, $staff, $offset, $limit); - - my $es = OpenILS::Elastic::Bib::Search->new('main'); - - $es->connect; - my $results = $es->search($elastic_query); - - $logger->debug("ES elasticsearch returned: ". - OpenSRF::Utils::JSON->perl2JSON($results)); - - return {count => 0, ids => []} unless $results; - - my @bib_ids = map {$_->{_id}} - grep {defined $_} @{$results->{hits}->{hits}}; - - return { - ids => \@bib_ids, - count => $results->{hits}->{total} - }; -} - - - -1; - -- 2.11.0