From: Bill Erickson Date: Fri, 30 Aug 2019 16:26:04 +0000 (-0400) Subject: initial tighter catalog integration / cat side needs more data X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=8d83c66f2ffabbbead30cd5c93df20c364fc88b1;p=working%2FEvergreen.git initial tighter catalog integration / cat side needs more data Signed-off-by: Bill Erickson --- diff --git a/Open-ILS/src/eg2/src/app/share/catalog/catalog-url.service.ts b/Open-ILS/src/eg2/src/app/share/catalog/catalog-url.service.ts index 7b9698f2d3..7c03489725 100644 --- a/Open-ILS/src/eg2/src/app/share/catalog/catalog-url.service.ts +++ b/Open-ILS/src/eg2/src/app/share/catalog/catalog-url.service.ts @@ -5,6 +5,7 @@ import {CatalogSearchContext, CatalogBrowseContext, CatalogMarcContext, CatalogTermContext, FacetFilter} from './search-context'; import {CATALOG_CCVM_FILTERS} from './search-context'; import {HashParams} from '@eg/share/util/hash-params'; +import {ElasticSearchContext} from './elastic-search-context'; @Injectable() export class CatalogUrlService { @@ -140,7 +141,9 @@ export class CatalogUrlService { * Creates a new search context from the active route params. */ fromUrlParams(params: ParamMap): CatalogSearchContext { - const context = new CatalogSearchContext(); + //const context = new CatalogSearchContext(); + // TODO: hard code for now + const context = new ElasticSearchContext(); this.applyUrlParams(context, params); diff --git a/Open-ILS/src/eg2/src/app/share/catalog/catalog.service.ts b/Open-ILS/src/eg2/src/app/share/catalog/catalog.service.ts index 7f7c5b50c2..5869945e7c 100644 --- a/Open-ILS/src/eg2/src/app/share/catalog/catalog.service.ts +++ b/Open-ILS/src/eg2/src/app/share/catalog/catalog.service.ts @@ -110,7 +110,6 @@ export class CatalogService { termSearch(ctx: CatalogSearchContext): Promise { - let method = 'open-ils.search.biblio.multiclass.query'; let fullQuery; if (ctx.identSearch.isSearchable()) { @@ -119,21 +118,14 @@ export class CatalogService { } else { fullQuery = ctx.compileTermSearchQuery(); - if (ctx.termSearch.groupByMetarecord - && !ctx.termSearch.fromMetarecord) { - method = 'open-ils.search.metabib.multiclass.query'; - } - if (ctx.termSearch.hasBrowseEntry) { this.fetchBrowseEntry(ctx); } } - console.debug(`search query: ${fullQuery}`); + console.debug('search query', JSON.stringify(fullQuery)); - if (ctx.isStaff) { - method += '.staff'; - } + const method = ctx.getApiName(); return this.net.request( 'open-ils.search', method, { diff --git a/Open-ILS/src/eg2/src/app/share/catalog/elastic-search-context.ts b/Open-ILS/src/eg2/src/app/share/catalog/elastic-search-context.ts new file mode 100644 index 0000000000..de19ca59d0 --- /dev/null +++ b/Open-ILS/src/eg2/src/app/share/catalog/elastic-search-context.ts @@ -0,0 +1,171 @@ +import {IdlObject} from '@eg/core/idl.service'; +import {CatalogSearchContext} from './search-context'; + +class ElasticSearchParams { + search_org: number; + search_depth: number; + available: boolean; + sort: any[] = []; + query: any = {bool: {must: [], filter: []}}; +} + +export class ElasticSearchContext extends CatalogSearchContext { + + + // The UI is ambiguous re: mixing ANDs and ORs. + // Here booleans are grouped ANDs first, then each OR is given its own node. + compileTerms(params: ElasticSearchParams) { + + const ts = this.termSearch; + const terms: any = { + bool: { + should: [ + {bool: {must: []}} // ANDs + // ORs + ] + } + }; + + ts.joinOp.forEach((op, idx) => { + let matchOp = 'match'; + + // The 'and' operator here tells EL to treat multi-word search + // terms as an ANDed pair (e.g. "harry potter" = "harry and potter") + let operator = 'and'; + + switch (ts.matchOp[idx]) { + case 'phrase': + matchOp = 'match_phrase'; + operator = null; + break; + case 'nocontains': + matchOp = 'must_not'; + break; + case 'exact': + matchOp = 'term'; + operator = null; + break; + case 'starts': + matchOp = 'match_phrase_prefix'; + operator = null; + break; + } + + let node: any = {}; + node[matchOp] = {}; + + if (operator) { + node[matchOp][ts.fieldClass[idx]] = + {query: ts.query[idx], operator: operator}; + } else { + node[matchOp][ts.fieldClass[idx]] = ts.query[idx]; + } + + if (matchOp === 'must_not') { + // adds a boolean sub-node + node = {bool: node}; + } + + if (ts.joinOp[idx] === 'or') { + terms.bool.should.push(node); + } else { + terms.bool.should[0].bool.must.push(node); + } + }); + + params.query.bool.must.push(terms); + } + + addFilter(params: ElasticSearchParams, name: string, value: any) { + if (value === '' || + value === null || + value === undefined) { return; } + + const node: any = {term: {}}; + node.term[name] = value; + params.query.bool.filter.push(node); + } + + compileTermSearchQuery(): any { + const ts = this.termSearch; + const params = new ElasticSearchParams(); + + params.available = ts.available; + + if (this.sort) { + // e.g. title, title.descending => [{title => 'desc'}] + const parts = this.sort.split(/\./); + const sort: any = {}; + sort[parts[0]] = parts[1] ? 'desc' : 'asc'; + params.sort = [sort]; + } + + if (ts.date1 && ts.dateOp) { + switch (ts.dateOp) { + case 'is': + this.addFilter(params, 'date1', ts.date1); + break; + case 'before': + this.addFilter(params, 'range', {date1: {'lt': ts.date1}}); + break; + case 'after': + this.addFilter(params, 'range', {date1: {'gt': ts.date1}}); + break; + case 'between': + if (ts.date2) { + this.addFilter(params, 'range', + {date1: {'gt': ts.date1, 'lt': ts.date2}}); + } + } + } + + this.compileTerms(params); + params.search_org = this.searchOrg.id(); + + if (this.global) { + params.search_depth = this.org.root().ou_type().depth(); + } + + // PENDING DEV + /* + if (ts.copyLocations[0] !== '') { + str += ' locations(' + ts.copyLocations + ')'; + } + */ + + if (ts.format) { + this.addFilter(params, ts.formatCtype, ts.format); + } + + Object.keys(ts.ccvmFilters).forEach(field => { + ts.ccvmFilters[field].forEach(value => { + if (value !== '') { + this.addFilter(params, field, value); + } + }); + }); + + ts.facetFilters.forEach(f => { + this.addFilter(params, + `${f.facetClass}:${f.facetName}`, f.facetValue); + }); + + return params; + } + + getApiName(): string { + + // Elastic covers only a subset of available search types. + if (!this.termSearch.isSearchable() || + this.termSearch.groupByMetarecord || + this.termSearch.fromMetarecord + ) { + return super.getApiName(); + } + + return this.isStaff ? + 'open-ils.search.elastic.bib_search.staff' : + 'open-ils.search.elastic.bib_search'; + } +} + diff --git a/Open-ILS/src/eg2/src/app/share/catalog/search-context.ts b/Open-ILS/src/eg2/src/app/share/catalog/search-context.ts index a3995c6828..2ef9bcc3c2 100644 --- a/Open-ILS/src/eg2/src/app/share/catalog/search-context.ts +++ b/Open-ILS/src/eg2/src/app/share/catalog/search-context.ts @@ -558,7 +558,7 @@ export class CatalogSearchContext { return query; } - compileTermSearchQuery(): string { + compileTermSearchQuery(): any { const ts = this.termSearch; let str = ''; @@ -693,5 +693,20 @@ export class CatalogSearchContext { break; } } + + getApiName(): string { + let method = 'open-ils.search.biblio.multiclass.query'; + + if (this.termSearch.groupByMetarecord + && !this.termSearch.fromMetarecord) { + method = 'open-ils.search.metabib.multiclass.query'; + } + + if (this.isStaff) { + method += '.staff'; + } + + return method; + } } diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/Search.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/Search.pm index 78d4a4e2db..b1d34d90f1 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Application/Search.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Application/Search.pm @@ -17,6 +17,7 @@ use OpenILS::Application::Search::Zips; use OpenILS::Application::Search::CNBrowse; use OpenILS::Application::Search::Serial; use OpenILS::Application::Search::Browse; +use OpenILS::Application::Search::Elastic; use OpenILS::Application::AppUtils; diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Biblio.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Biblio.pm index af73b793ca..47d95d7089 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Biblio.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Biblio.pm @@ -10,6 +10,7 @@ use OpenSRF::Utils::SettingsClient; use OpenILS::Utils::CStoreEditor q/:funcs/; use OpenSRF::Utils::Cache; use Encode; +use OpenILS::Application::Search::Elastic; use OpenILS::Application::Search::ElasticMapper; use OpenSRF::Utils::Logger qw/:logger/; @@ -1161,7 +1162,7 @@ sub staged_search { $search_hash->{query}, # query string ($method =~ /staff/ ? 1 : 0), $user_offset, $user_limit - ) if OpenILS::Application::Search::ElasticMapper->is_enabled('bib-search'); + ) if OpenILS::Application::Search::Elastic->is_enabled('bib-search'); # we're grabbing results on a per-superpage basis, which means the # limit and offset should coincide with superpage boundaries diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Elastic.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Elastic.pm new file mode 100644 index 0000000000..be47a75dde --- /dev/null +++ b/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Elastic.pm @@ -0,0 +1,402 @@ +package OpenILS::Application::Search::Elastic; +use base qw/OpenILS::Application/; +# --------------------------------------------------------------- +# Copyright (C) 2019 King County Library System +# Author: Bill Erickson +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# --------------------------------------------------------------- +use strict; +use warnings; +use OpenSRF::Utils::JSON; +use OpenSRF::Utils::Logger qw/:logger/; +use OpenILS::Utils::Fieldmapper; +use OpenSRF::Utils::SettingsClient; +use OpenILS::Utils::CStoreEditor q/:funcs/; +use OpenILS::Elastic::BibSearch; +use OpenILS::Elastic::BibMarc; +use List::Util qw/min/; + +use OpenILS::Application::AppUtils; +my $U = "OpenILS::Application::AppUtils"; + +# avoid repetitive calls to DB for org info. +my %org_data_cache = (ancestors_at => {}); + +# bib fields defined in the elastic bib-search index +my $bib_fields; +my $hidden_copy_statuses; +my $hidden_copy_locations; +my $avail_copy_statuses; +our $enabled = {}; + +# Returns true if the Elasticsearch 'bib-search' index is active. +sub is_enabled { + my ($class, $index) = @_; + + $class->init; + + return $enabled->{$index} if exists $enabled->{$index}; + + # Elastic bib search is enabled if a "bib-search" index is enabled. + my $config = new_editor()->search_elastic_index( + {active => 't', code => $index})->[0]; + + if ($config) { + $logger->info("ES '$index' index is enabled"); + $enabled->{$index} = 1; + } else { + $enabled->{$index} = 0; + } + + return $enabled->{$index}; +} + +my $init_complete = 0; +sub init { + my $class = shift; + return if $init_complete; + + my $e = new_editor(); + + $bib_fields = $e->retrieve_all_elastic_bib_field; + + my $stats = $e->json_query({ + select => {ccs => ['id', 'opac_visible', 'is_available']}, + from => 'ccs', + where => {'-or' => [ + {opac_visible => 'f'}, + {is_available => 't'} + ]} + }); + + $hidden_copy_statuses = + [map {$_->{id}} grep {$_->{opac_visible} eq 'f'} @$stats]; + + $avail_copy_statuses = + [map {$_->{id}} grep {$_->{is_available} eq 't'} @$stats]; + + # Include deleted copy locations since this is an exclusion set. + my $locs = $e->json_query({ + select => {acpl => ['id']}, + from => 'acpl', + where => {opac_visible => 'f'} + }); + + $hidden_copy_locations = [map {$_->{id}} @$locs]; + + $init_complete = 1; + return 1; +} + +__PACKAGE__->register_method( + method => 'bib_search', + api_name => 'open-ils.search.elastic.bib_search' +); +__PACKAGE__->register_method( + method => 'bib_search', + api_name => 'open-ils.search.elastic.bib_search.staff' +); + +# Translate a bib search API call into something consumable by Elasticsearch +# Translate search results into a structure consistent with a bib search +# API response. +sub bib_search { + my ($self, $client, $options, $query) = @_; + $options ||= {}; + + my $staff = ($self->api_name =~ /staff/); + + $logger->info("ES parsing API query $query staff=$staff"); + + my ($elastic_query, $cache_key) = + compile_elastic_query($query, $options, $options); + + my $es = OpenILS::Elastic::BibSearch->new('main'); + + $es->connect; + my $results = $es->search($elastic_query); + + $logger->debug("ES elasticsearch returned: ". + OpenSRF::Utils::JSON->perl2JSON($results)); + + return {count => 0, ids => []} unless $results; + + return { + count => $results->{hits}->{total}, + ids => [ + map { [$_->{_id}, undef, $_->{_score}] } + grep {defined $_} @{$results->{hits}->{hits}} + ], + facets => format_facets($results->{aggregations}), + # Elastic has its own search cacheing, so external caching is + # performed, but providing cache keys allows the caller to + # know if this search matches another search. + cache_key => $cache_key, + facet_key => $cache_key.'_facets' + }; +} + +sub compile_elastic_query { + my ($query, $options, $staff) = @_; + + my $elastic = { + _source => ['id'], # Fetch bib ID only + size => $options->{limit}, + from => $options->{offset}, + sort => $query->{sort}, + query => $query->{query} + }; + + add_elastic_holdings_filter($elastic, $staff, + $query->{search_org}, $query->{search_depth}, $query->{available}); + + add_elastic_facet_aggregations($elastic); + + $elastic->{sort} = ['_score'] unless @{$elastic->{sort}}; + + return $elastic; +} + +# Format ES search aggregations to match the API response facet structure +# {$cmf_id => {"Value" => $count}, $cmf_id2 => {"Value Two" => $count2}, ...} +sub format_facets { + my $aggregations = shift; + my $facets = {}; + + for my $fname (keys %$aggregations) { + + my ($field_class, $name) = split(/\|/, $fname); + + my ($bib_field) = grep { + $_->name eq $name && $_->search_group eq $field_class + } @$bib_fields; + + my $hash = $facets->{$bib_field->metabib_field} = {}; + + my $values = $aggregations->{$fname}->{buckets}; + for my $bucket (@$values) { + $hash->{$bucket->{key}} = $bucket->{doc_count}; + } + } + + return $facets; +} + +sub add_elastic_facet_aggregations { + my ($elastic_query) = @_; + + my @facet_fields = grep {$_->facet_field eq 't'} @$bib_fields; + return unless @facet_fields; + + $elastic_query->{aggs} = {}; + + for my $facet (@facet_fields) { + my $fname = $facet->name; + my $fgrp = $facet->search_group; + $fname = "$fgrp|$fname" if $fgrp; + + $elastic_query->{aggs}{$fname} = {terms => {field => $fname}}; + } +} + +sub add_elastic_holdings_filter { + my ($elastic_query, $staff, $org_id, $depth, $available) = @_; + + # in non-staff mode, ensure at least on copy in scope is visible + my $visible = !$staff; + + if ($org_id) { + my ($org) = $U->fetch_org_unit($org_id); + my $types = $U->get_org_types; # pulls from cache + my ($type) = grep {$_->id == $org->ou_type} @$types; + $depth = defined $depth ? min($depth, $type->depth) : $type->depth; + } + + my $visible_filters = { + query => { + bool => { + must_not => [ + {terms => {'holdings.status' => $hidden_copy_statuses}}, + {terms => {'holdings.location' => $hidden_copy_locations}} + ] + } + } + }; + + my $filter = {nested => {path => 'holdings', query => {bool => {}}}}; + + if ($depth > 0) { + + if (!$org_data_cache{ancestors_at}{$org_id}) { + $org_data_cache{ancestors_at}{$org_id} = {}; + } + + if (!$org_data_cache{ancestors_at}{$org_id}{$depth}) { + $org_data_cache{ancestors_at}{$org_id}{$depth} = + $U->get_org_descendants($org_id, $depth); + } + + my $org_ids = $org_data_cache{ancestors_at}{$org_id}{$depth}; + + # Add a boolean OR-filter on holdings circ lib and optionally + # add a boolean AND-filter on copy status for availability + # checking. + + my $should = []; + $filter->{nested}->{query}->{bool}->{should} = $should; + + for my $aou_id (@$org_ids) { + + # Ensure at least one copy exists at the selected org unit + my $and = { + bool => { + must => [ + {term => {'holdings.circ_lib' => $aou_id}} + ] + } + }; + + # When limiting to visible/available, ensure at least one of the + # copies from the above org-limited set is visible/available. + if ($available) { + push( + @{$and->{bool}{must}}, + {terms => {'holdings.status' => $avail_copy_statuses}} + ); + + } elsif ($visible) { + push(@{$and->{bool}{must}}, $visible_filters); + } + + push(@$should, $and); + } + + } elsif ($available) { + # Limit to results that have an available copy, but don't worry + # about where the copy lives, since we're searching globally. + + $filter->{nested}->{query}->{bool}->{must} = + [{terms => {'holdings.status' => $avail_copy_statuses}}]; + + } elsif ($visible) { + + $filter->{nested}->{query} = $visible_filters->{query}; + + } elsif ($staff) { + + $logger->info("ES skipping holdings filter on global staff search"); + return; + } + + $logger->info("ES holdings filter is " . + OpenSRF::Utils::JSON->perl2JSON($filter)); + + # array of filters in progress + push(@{$elastic_query->{query}->{bool}->{filter}}, $filter); +} + + +sub compile_elastic_marc_query { + my ($args, $staff, $offset, $limit) = @_; + + # args->{searches} = + # [{term => "harry", restrict => [{tag => 245, subfield => "a"}]}] + + my $root_and = []; + for my $search (@{$args->{searches}}) { + + # NOTE Assume only one tag/subfield will be queried per search term. + my $tag = $search->{restrict}->[0]->{tag}; + my $sf = $search->{restrict}->[0]->{subfield}; + my $value = $search->{term}; + + # Use text searching on the value field + my $value_query = { + bool => { + should => [ + {match => {'marc.value.text' => + {query => $value, operator => 'and'}}}, + {match => {'marc.value.text_folded' => + {query => $value, operator => 'and'}}} + ] + } + }; + + my @must = ($value_query); + + # tag (ES-only) and subfield are both optional + push (@must, {term => {'marc.tag' => $tag}}) if $tag; + push (@must, {term => {'marc.subfield' => $sf}}) if $sf && $sf ne '_'; + + my $sub_query = {bool => {must => \@must}}; + + push (@$root_and, { + nested => { + path => 'marc', + query => {bool => {must => $sub_query}} + } + }); + } + + return { + _source => ['id'], # Fetch bib ID only + size => $limit, + from => $offset, + sort => [], + query => { + bool => { + must => $root_and, + filter => [] + } + } + }; +} + + + +# Translate a MARC search API call into something consumable by Elasticsearch +# Translate search results into a structure consistent with a bib search +# API response. +# TODO: This version is not currently holdings-aware, meaning it will return +# results for all non-deleted bib records that match the query. +sub marc_search { + my ($class, $args, $staff, $limit, $offset) = @_; + + return {count => 0, ids => []} + unless $args->{searches} && @{$args->{searches}}; + + my $elastic_query = + compile_elastic_marc_query($args, $staff, $offset, $limit); + + my $es = OpenILS::Elastic::BibMarc->new('main'); + + $es->connect; + my $results = $es->search($elastic_query); + + $logger->debug("ES elasticsearch returned: ". + OpenSRF::Utils::JSON->perl2JSON($results)); + + return {count => 0, ids => []} unless $results; + + my @bib_ids = map {$_->{_id}} + grep {defined $_} @{$results->{hits}->{hits}}; + + return { + ids => \@bib_ids, + count => $results->{hits}->{total} + }; +} + + + +1; +