controller="open-ils.cstore open-ils.pcrud"
oils_obj:fieldmapper="elastic::bib_field"
oils_persist:tablename="elastic.bib_field"
- reporter:label="Elastic Bib Index field"
- oils_persist:readonly="true">
- <fields> <!-- no guaranteed unique field => no oils_persist:primary -->
- <field reporter:label="Metabib Field" name="metabib_field" reporter:datatype="link"/>
+ reporter:label="Elastic Bib Index field">
+ <fields oils_persist:primary="id" oils_persist:sequence="elastic.bib_field_id_seq">
+ <field reporter:label="ID" name="id" reporter:datatype="id"/>
<field reporter:label="Name" name="name" reporter:datatype="text"/>
<field reporter:label="Label" name="label" reporter:datatype="text"/>
- <field reporter:label="Search Group" name="search_group" reporter:datatype="text"/>
- <field reporter:label="Is Sort Field" name="sorter" reporter:datatype="bool"/>
+ <field reporter:label="Field Class" name="field_class" reporter:datatype="text"/>
<field reporter:label="Is Search Field" name="search_field" reporter:datatype="bool"/>
<field reporter:label="Is Facet Field" name="facet_field" reporter:datatype="bool"/>
+ <field reporter:label="Is Filter Field" name="filter" reporter:datatype="bool"/>
+ <field reporter:label="Is Sort Field" name="sorter" reporter:datatype="bool"/>
<field reporter:label="Field Weight (Boost)" name="weight" reporter:datatype="int"/>
</fields>
<links>
use OpenSRF::Utils::JSON;
use OpenSRF::Utils::Logger qw/:logger/;
use OpenILS::Utils::Fieldmapper;
-use OpenSRF::Utils::SettingsClient;
use OpenILS::Utils::CStoreEditor q/:funcs/;
use OpenILS::Elastic::BibSearch;
use Digest::MD5 qw(md5_hex);
my $e = new_editor();
- # no pkey
- $bib_fields = $e->search_elastic_bib_field({name => {'!=' => undef}});
+ $bib_fields = $e->retrieve_all_elastic_bib_field;
my $stats = $e->json_query({
select => {ccs => ['id', 'opac_visible', 'is_available']},
my ($field_class, $name) = split(/\|/, $fname);
my ($bib_field) = grep {
- $_->name eq $name && $_->search_group eq $field_class
+ $_->name eq $name && $_->field_class eq $field_class
} @$bib_fields;
my $hash = $facets->{$bib_field->metabib_field} = {};
for my $facet (@facet_fields) {
my $fname = $facet->name;
- my $fgrp = $facet->search_group;
+ my $fgrp = $facet->field_class;
$fname = "$fgrp|$fname" if $fgrp;
$elastic_query->{aggs}{$fname} = {terms => {field => "$fname|facet"}};
# MERCHANTABILITY or FITNESS FOR A PARTICULAR code. See the
# GNU General Public License for more details.
# ---------------------------------------------------------------
-package OpenILS::Elastic::BibSearch::BibField;
-# Models a single indexable field.
-use strict;
-use warnings;
-
-sub new {
- my ($class, %args) = @_;
- return bless(\%args, $class);
-}
-sub name {
- my $self = shift;
- return $self->{name};
-}
-sub field_class {
- my $self = shift;
- return $self->{field_class};
-}
-sub search_field {
- my $self = shift;
- return $self->{purpose} eq 'search';
-}
-sub facet_field {
- my $self = shift;
- return $self->{purpose} eq 'facet';
-}
-sub sorter {
- my $self = shift;
- return $self->{purpose} eq 'sorter';
-}
-sub filter {
- my $self = shift;
- return $self->{purpose} eq 'filter';
-}
-sub weight {
- my $self = shift;
- return $self->{weight} || 1;
-}
-
-# ---------------------------------------------------------------
package OpenILS::Elastic::BibSearch;
use strict;
use warnings;
use Time::HiRes qw/time/;
use OpenSRF::Utils::Logger qw/:logger/;
use OpenSRF::Utils::JSON;
+use OpenSRF::Utils::SettingsClient;
use OpenILS::Utils::CStoreEditor qw/:funcs/;
use OpenILS::Utils::DateTime qw/interval_to_seconds/;
use OpenILS::Elastic;
}
sub xsl_file {
- my ($self, $filename) = @_;
- $self->{xsl_file} = $filename if $filename;
+ my ($self) = @_;
+
+ if (!$self->{xsl_file}) {
+
+ my $client = OpenSRF::Utils::SettingsClient->new;
+ my $dir = $client->config_value("dirs", "xsl");
+
+ my $filename = new_editor()->search_config_global_flag({
+ name => 'elastic.bib_search.transform_file',
+ enabled => 't'
+ })->[0];
+
+ if ($filename) {
+ $self->{xsl_file} = "$dir/" . $filename->value;
+
+ } else {
+ die <<' TEXT';
+ No XSL file provided for Elastic::BibSearch. Confirm
+ config.global_flag "elastic.bib_search.transform_file"
+ is enabled, contains a valid value, and the file exists
+ in the XSL directory.
+ TEXT
+ }
+ }
+
return $self->{xsl_file};
}
return $self->{xsl_sheet};
}
-my @seen_fields;
-sub add_dynamic_field {
- my ($self, $fields, $purpose, $field_class, $name, $weight) = @_;
- return unless $name;
-
- $weight = '' if !$weight || $weight eq '_';
- $field_class = '' if !$field_class || $field_class eq '_';
-
- my $tag = $purpose . $field_class . $name;
- return if grep {$_ eq $tag} @seen_fields;
- push(@seen_fields, $tag);
-
- $logger->info("ES adding dynamic field purpose=$purpose ".
- "field_class=$field_class name=$name weight=$weight");
-
- my $field = OpenILS::Elastic::BibSearch::BibField->new(
- purpose => $purpose,
- field_class => $field_class,
- name => $name,
- weight => $weight
- );
-
- push(@$fields, $field);
-}
-
-sub get_dynamic_fields {
- my $self = shift;
- my $fields = [];
-
- @seen_fields = (); # reset with each run
-
- # Apply the transform in "target=index-fields" mode to extract just
- # the field definitions.
- my $null_doc = XML::LibXML->load_xml(string => '<root/>');
- my $result = $self->xsl_sheet->transform($null_doc, target => '"index-fields"');
- my $output = $self->xsl_sheet->output_as_chars($result);
-
- my @rows = split(/\n/, $output);
- for my $row (@rows) {
- my @parts = split(/ /, $row);
- $self->add_dynamic_field($fields, @parts);
- }
-
- return $fields;
-}
-
sub get_bib_data {
my ($self, $record_ids) = @_;
}
my $marc_doc = XML::LibXML->load_xml(string => $db_rec->{marc});
- my $result =
- $self->xsl_sheet->transform($marc_doc, target => '"index-values"');
+ my $result = $self->xsl_sheet->transform($marc_doc, target => '"index-values"');
my $output = $self->xsl_sheet->output_as_chars($result);
my @rows = split(/\n/, $output);
} foreach qw/title subject series keyword/;
}
- my $fields = $self->get_dynamic_fields;
+ my $fields = new_editor()->retrieve_all_elastic_bib_field;
for my $field (@$fields) {
my $def;
if ($field_class) {
- if ($field->search_field) {
+ if ($field->search_field eq 't') {
# Use the same fields and analysis as the 'grouped' field.
$def = clone($properties->{$field_class});
# Long sorter values are not necessarily unexpected,
# e.g. long titles.
- $def->{ignore_above} = $IGNORE_ABOVE unless $field->sorter;
+ $def->{ignore_above} = $IGNORE_ABOVE unless $field->sorter eq 't';
}
if ($def) {
# Search and facet fields can have the same name/group pair,
# but are stored as separate fields in ES since the content
# may vary between the two.
- if ($field->facet_field) {
+ if ($field->facet_field eq 't') {
# Facet fields are stored as separate fields, because their
# content may differ from the matching search field.
$self->create_one_field_index($field, $properties->{$field});
}
- # Now that we've added the static (and dynamic) fields,
+ # Now that we've added the configured fields,
# add the shortened field_class aliases.
while (my ($alias, $field) = each %SEARCH_CLASS_ALIAS_MAP) {
return 0 unless $self->create_one_field_index(
BEGIN;
-INSERT INTO config.global_flag (name, enabled, label)
+INSERT INTO config.global_flag (name, enabled, label, value)
VALUES (
'elastic.bib_search.enabled', FALSE,
- 'Elasticsearch Enable Bib Searching'
+ 'Elasticsearch Enable Bib Searching', NULL
+), (
+ 'elastic.bib_search.transform_file', FALSE,
+ 'Elasticsearch Bib Transform File [Relative to xsl directory]',
+ 'elastic-bib-transform.xsl'
);
CREATE SCHEMA elastic;
CONSTRAINT valid_index_class CHECK (index_class IN ('bib-search'))
);
+-- XXX consider storing the xsl chunk directly on the field,
+-- then stitching the chunks together for indexing. This would
+-- require a search chunk and a facet chunk.
+CREATE TABLE elastic.bib_field (
+ id SERIAL PRIMARY KEY,
+ name TEXT NOT NULL,
+ field_class TEXT REFERENCES config.metabib_class(name) ON DELETE CASCADE,
+ label TEXT NOT NULL UNIQUE,
+ search_field BOOLEAN NOT NULL DEFAULT FALSE,
+ facet_field BOOLEAN NOT NULL DEFAULT FALSE,
+ filter BOOLEAN NOT NULL DEFAULT FALSE,
+ sorter BOOLEAN NOT NULL DEFAULT FALSE,
+ weight INTEGER NOT NULL DEFAULT 1,
+ CONSTRAINT name_class_once_per_field UNIQUE (name, field_class)
+);
+
CREATE OR REPLACE VIEW elastic.bib_last_mod_date AS
/**
* Last update date for each bib, which is taken from most recent
INSERT INTO elastic.node (label, host, proto, port, active, cluster)
VALUES ('Localhost', 'localhost', 'http', 9200, TRUE, 'main');
+INSERT INTO elastic.bib_field
+ (field_class, name, label, search_field, facet_field, filter, sorter, weight)
+VALUES (
+ 'author', 'conference', '', FALSE, TRUE, FALSE, FALSE, 1),
+ 'author', 'corporate', '', FALSE, TRUE, FALSE, FALSE, 1),
+ 'author', 'personal', '', FALSE, TRUE, FALSE, FALSE, 1),
+ 'series', 'seriestitle', '', FALSE, TRUE, FALSE, FALSE, 1),
+ 'subject', 'geographic', '', FALSE, TRUE, FALSE, FALSE, 1),
+ 'subject', 'name', '', FALSE, TRUE, FALSE, FALSE, 1),
+ 'subject', 'topic', '', FALSE, TRUE, FALSE, FALSE, 1),
+ 'title', 'seriestitle', '', FALSE, TRUE, FALSE, FALSE, 1),
+
+filter _ audience _
+filter _ bib_level _
+filter _ date1 _
+filter _ date2 _
+filter _ item_form _
+filter _ item_lang _
+filter _ item_type _
+filter _ lit_form _
+filter _ search_format _
+filter _ sr_format _
+filter _ vr_format _
+search author added_personal
+search author conference
+search author conference_series
+search author corporate
+search author corporate_series
+search author meeting
+search author personal
+search author personal_series
+search author responsibility
+search identifier bibcn
+search identifier isbn
+search identifier issn
+search identifier lccn
+search identifier match_isbn
+search identifier sudoc
+search identifier tech_number
+search identifier upc
+search keyword keyword _
+search keyword publisher
+search series seriestitle
+search subject corpname
+search subject genre
+search subject geographic
+search subject meeting
+search subject name
+search subject topic
+search subject uniftitle
+search title abbreviated
+search title added
+search title alternative
+search title former
+search title magazine
+search title maintitle 10
+search title previous
+search title proper
+search title seriestitle
+search title succeeding
+search title uniform
+sorter _ author _
+sorter _ pubdate _
+sorter _ title _
COMMIT;
/* UNDO
my $help;
my $osrf_config = '/openils/conf/opensrf_core.xml';
-my $bib_transform = '/openils/var/xsl/elastic-bib-transform.xsl';
+my $bib_transform;
my $cluster;
my $create_index;
my $delete_index;
Values default to their PG* environment variable equivalent.
+ --bib-transform <path_to_file>
+ Override the configured global config value for
+ 'elastic.bib_search.transform_file'
+
--cluster <name>
Specify a cluster name. Defaults to 'main'.
<xsl:output encoding="UTF-8" method="text"/>
<!--
-
- XSLT for transforming bib records into indexable fields / data
- suitable for consumption by the Elasticsearch BibSearch indexer.
-
- TRANSFORM REQUIREMENTS ===
-
- Transform operates in one of two modes:
-
- 1. target == 'index-fields'
-
- Prints one index definition per line without any record-specific
- data, In this mode, any valid XML string/file (e.g. '<root />')
- may be used for the transform.
-
- Output:
-
- $index_purpose $index_class $index_name $index_weight
-
- - Fields that have no value should use '_' as the value.
-
- e.g.
-
- search title proper 5
-
- 2. target == 'index-values'
-
Prints one index value per line for data found by transforming
a MARCXML record.
search subject topic South America
facet author personal Janey Jam "Jojo" Jones
-
- INDEX REQUIREMENTS ===
-
- For searches, the index name can be anything, but all indexes must
- use one of the following search classes:
-
- title
- author
- subject
- series
- keyword
- identifier
-
- Required Filters for Bib Transform Based on Staff Catalog Options
- at Time of Writing:
-
- item_type
- item_form
- item_lang
- audience
- vr_format
- bib_level
- lit_form
- search_format
-->
<xsl:template match="@*|node()">