From c40e397069afb93fa77310215a784b25234ea1cc Mon Sep 17 00:00:00 2001 From: Bill Erickson Date: Tue, 18 Feb 2020 11:06:08 -0500 Subject: [PATCH] LP1844418 Direct indexing experiment WIP Signed-off-by: Bill Erickson --- .../src/perlmods/lib/OpenILS/Elastic/BibSearch.pm | 34 ++-- .../perlmods/lib/OpenILS/Elastic/BibSearch/XSLT.pm | 176 +++++++++++++++++++++ .../sql/Pg/upgrade/XXXX.schema.elastic-search.sql | 1 - 3 files changed, 198 insertions(+), 13 deletions(-) create mode 100644 Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch/XSLT.pm diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm b/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm index 1f782fca71..8ae28ffbd0 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm @@ -213,6 +213,16 @@ sub language_analyzers { return ("english"); } +sub get_dynamic_fields { + my $self = shift; + + # elastic.bib_field has no primary key field, so retrieve_all won't work. + # Note the name value may be repeated across search group depending + # on local configuration. + return new_editor()->search_elastic_bib_field({name => {'!=' => undef}}); +} + + sub create_index_properties { my ($self) = @_; @@ -234,10 +244,7 @@ sub create_index_properties { } foreach qw/title subject series keyword/; } - # elastic.bib_field has no primary key field, so retrieve_all won't work. - # Note the name value may be repeated across search group depending - # on local configuration. - my $fields = new_editor()->search_elastic_bib_field({name => {'!=' => undef}}); + my $fields = $self->get_dynamic_fields; for my $field (@$fields) { @@ -248,15 +255,17 @@ sub create_index_properties { my $def; if ($search_group) { + if ($field->search_field eq 't') { - # Use the same fields and analysis as the 'grouped' field. - $def = clone($properties->{$search_group}); - $def->{copy_to} = [$search_group, $SHORT_GROUP_MAP{$search_group}]; + # Use the same fields and analysis as the 'grouped' field. + $def = clone($properties->{$search_group}); + $def->{copy_to} = [$search_group, $SHORT_GROUP_MAP{$search_group}]; - # Apply ranking boost to each analysis variation. - my $flds = $def->{fields}; - if ($flds && (my $boost = ($field->weight || 1)) > 1) { - $flds->{$_}->{boost} = $boost foreach keys %$flds; + # Apply ranking boost to each analysis variation. + my $flds = $def->{fields}; + if ($flds && (my $boost = ($field->weight || 1)) > 1) { + $flds->{$_}->{boost} = $boost foreach keys %$flds; + } } } else { @@ -271,7 +280,8 @@ sub create_index_properties { }; } - if ($field->facet_field eq 't' && $def->{fields}) { + if ($field->facet_field eq 't') { + $def->{fields} = {} unless $def->{fields}; # facet only? # Facet fields are used for aggregation which requires # an additional unaltered keyword field. $def->{fields}->{facet} = { diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch/XSLT.pm b/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch/XSLT.pm new file mode 100644 index 0000000000..fda4682257 --- /dev/null +++ b/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch/XSLT.pm @@ -0,0 +1,176 @@ +# --------------------------------------------------------------- +# Copyright (C) 2020 King County Library System +# Author: Bill Erickson +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR code. See the +# GNU General Public License for more details. +# --------------------------------------------------------------- +package OpenILS::Elastic::BibSearch::BibField; +# Helper class for modeling an elastic bib field. +# This is what OpenILS::Elastic::BibSearch expects. + +sub new { + my ($class, %args) = @_; + return bless(\%args, $class); +} + +sub search_field { + return $self->{search_field} ? 't' : 'f'; +} +sub facet_field { + return $self->{facet_field} ? 't' : 'f'; +} +sub sorter { + return $self->{sorter} ? 't' : 'f'; +} + +package OpenILS::Elastic::BibSearch::XSLT; +use strict; +use warnings; +use XML::LibXML; +use XML::LibXSLT; +use OpenSRF::Utils::Logger qw/:logger/; +use OpenILS::Utils::CStoreEditor qw/:funcs/; +use OpenILS::Elastic::BibSearch; +use OpenILS::Utils::Normalize; +use base qw/OpenILS::Elastic::BibSearch/; + + +sub xsl_file { + my ($self, $filename) = @_; + $self->{xsl_file} = $filename if $filename; + return $self->{xsl_file}; +} + +sub xsl_doc { + my ($self) = @_; + + $self->{xsl_doc} = XML::LibXML->load_xml(location => $self->xsl_file); + unless ($self->{xsl_doc}); + + return $self->{xsl_doc}; +} + +sub xsl_sheet { + my $self = shift; + + $self->{xsl_sheet} = XML::LibXSLT->new->parse_stylesheet($self->xsl_doc) + unless $self->{xsl_sheet}; + + return $self->{xsl_sheet}; +} + + +my @seen_fields; +sub add_dynamic_field { + my ($self, $fields, $purpose, $class, $name) = @_; + my $tag = $purpose . ($class || '') . $name; + return if grep {$_ eq $tag} @seen_fields; + + my $field = OpenILS::Elastic::BibSearch::BibField->new( + purpose => $purpose, + class => $class, + name => $name + ); + + push(@$fields, $field); +} + +# TODO: what to do about fields that have the same class/name +# and are both search and facet fields, but the facet values +# are different than the searched value? + +sub get_dynamic_fields { + my $self = shift; + my $fields = []; + + @seen_fields = (); # reset with each run + + my $doc = $self->xsl_doc; + + for my $node ($doc->findnodes('//xsl:call-template[@name="add_search_entry"]')) { + my $class = $node->findnodes('./xsl:with-param[@name="field_class"]/text()'); + my $name = $node->findnodes('./xsl:with-param[@name="index_name"]/text()'); + $self->add_dynamic_field($fields, 'search', $class, $name); + } + + for my $node ($doc->findnodes('//xsl:call-template[@name="add_facet_entry"]')) { + my $class = $node->findnodes('./xsl:with-param[@name="field_class"]/text()'); + my $name = $node->findnodes('./xsl:with-param[@name="index_name"]/text()'); + $self->add_dynamic_field($fields, 'facet', $class, $name); + } + + for my $node ($doc->findnodes('//xsl:call-template[@name="add_filter_entry"]')) { + my $name = $node->findnodes('./xsl:with-param[@name="name"]/text()'); + $self->add_dynamic_field($fields, 'filter', undef, $name); + } + + for my $node ($doc->findnodes('//xsl:call-template[@name="add_composite_filter_entry"]')) { + my $name = $node->findnodes('./xsl:with-param[@name="name"]/text()'); + $self->add_dynamic_field($fields, 'filter', undef, $name); + } + + for my $node ($doc->findnodes('//xsl:call-template[@name="add_sorter_entry"]')) { + my $name = $node->findnodes('./xsl:with-param[@name="name"]/text()'); + $self->add_dynamic_field($fields, 'sorter', undef, $name); + } + + return $fields; +} + +sub get_bib_data { + my ($self, $record_ids) = @_; + + my $bib_data = []; + my $db_data = $self->get_bib_db_data($record_ids); + + for my $db_rec (@$db_data) { + my $marc_doc = XML::LibXML->load_xml(string => $db_rec->{marc}); + my $result = $self->xsl_sheet->transform($marc_doc); + my $output = $stylesheet->output_as_chars($result); + + my @fields = split(/\n/, $output); + for my $field (@fields) { + my @parts = split(/ /, $field); + my $field_type = $parts[0]; + + if ($field_type eq 'search') { + } elsif ($field_type eq 'facet') { + } elsif ($field_type eq 'filter') { + } elsif ($field_type eq 'sorter') { + } + } + } +} + +sub get_bib_db_data { + my ($self, $record_ids) = @_; + + my $ids_str = join(',', @$record_ids); + + my $sql = <get_db_rows($sql); +} + + +1; + diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.elastic-search.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.elastic-search.sql index 356ecd9d1a..fc5430f459 100644 --- a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.elastic-search.sql +++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.elastic-search.sql @@ -279,7 +279,6 @@ WHERE name NOT IN ( 'date1', 'date2', 'bib_level', - 'icon_format', 'item_form', 'item_lang', 'item_type', -- 2.11.0