From: Bill Erickson Date: Mon, 24 Feb 2020 17:31:04 +0000 (-0500) Subject: LP1844418 Direct indexing WIP X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=907c1e93304fd3e611028bc64f84987d3b132859;p=working%2FEvergreen.git LP1844418 Direct indexing WIP Signed-off-by: Bill Erickson --- diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Elastic.pm b/Open-ILS/src/perlmods/lib/OpenILS/Elastic.pm index 316520a7f6..9b878d0658 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Elastic.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Elastic.pm @@ -388,6 +388,35 @@ sub index_document { return $result; } +# Partial document update +sub update_document { + my ($self, $id, $body) = @_; + + my $result; + + eval { + $result = $self->es->update( + index => $self->index_name, + type => 'record', + id => $id, + body => $body + ); + }; + + if ($@) { + $logger->error("ES update_document failed with $@"); + return undef; + } + + if ($result->{failed}) { + $logger->error("ES update document $id failed " . Dumper($result)); + return undef; + } + + $logger->debug("ES update => $id succeeded"); + return $result; +} + sub search { my ($self, $query) = @_; diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm b/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm index d92329ce67..6c89f1e1ec 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm @@ -218,6 +218,16 @@ sub language_analyzers { return ("english"); } +sub skip_marc { + my $self = shift; + return $self->{skip_marc}; +} + +sub skip_holdings { + my $self = shift; + return $self->{skip_holdings}; +} + sub xsl_file { my ($self) = @_; @@ -563,8 +573,9 @@ sub populate_bib_index_batch { $bib_ids = [@active_ids]; - my $holdings = $self->load_holdings($bib_ids); - my $marc = $self->load_marc($bib_ids); + my $holdings = $self->load_holdings($bib_ids) unless $self->skip_holdings; + my $marc = $self->load_marc($bib_ids) unless $self->skip_marc; + my $bib_fields = new_editor()->retrieve_all_elastic_bib_field; for my $bib_id (@$bib_ids) { @@ -572,11 +583,12 @@ sub populate_bib_index_batch { my $body = { bib_source => $rec->{bib_source}, - metarecord => $rec->{metarecord}, - marc => $marc->{$bib_id} || [], - holdings => $holdings->{$bib_id} || [] + metarecord => $rec->{metarecord} }; + $body->{marc} = $marc->{$bib_id} || [] unless $self->skip_marc; + $body->{holdings} = $holdings->{$bib_id} || [] unless $self->skip_holdings; + # ES likes the "T" separator for ISO dates ($body->{create_date} = $rec->{create_date}) =~ s/ /T/g; ($body->{edit_date} = $rec->{edit_date}) =~ s/ /T/g; @@ -608,7 +620,13 @@ sub populate_bib_index_batch { } } - return 0 unless $self->index_document($bib_id, $body); + if ($self->skip_marc || $self->skip_holdings) { + # TODO: In skip mode, assume we are updating documents instead + # of creating new ones. This may need to be more flexible. + return 0 unless $self->update_document($bib_id, $body); + } else { + return 0 unless $self->index_document($bib_id, $body); + } $state->{start_record} = $bib_id + 1; $index_count++; diff --git a/Open-ILS/src/support-scripts/elastic-index.pl b/Open-ILS/src/support-scripts/elastic-index.pl index 64faff7804..baa03216f3 100755 --- a/Open-ILS/src/support-scripts/elastic-index.pl +++ b/Open-ILS/src/support-scripts/elastic-index.pl @@ -23,6 +23,8 @@ my $stop_record; my $modified_since; my $max_duration; my $batch_size = 500; +my $skip_marc; +my $skip_holdings; # Database settings read from ENV by default. my $db_host = $ENV{PGHOST} || 'localhost'; @@ -48,6 +50,8 @@ GetOptions( 'max-duration=s' => \$max_duration, 'batch-size=s' => \$batch_size, 'bib-transform=s' => \$bib_transform, + 'skip-marc' => \$skip_marc, + 'skip-holdings' => \$skip_holdings, 'db-name=s' => \$db_name, 'db-host=s' => \$db_host, 'db-port=s' => \$db_port, @@ -123,6 +127,12 @@ sub help { at regular intervals to keep the ES-indexed data in sync with the EG data. + --skip-marc + --skip-holdings + Bypass indexing the MARC and/or holdings data. This is + useful when reindexing for configuration changes, where + the underlying bib data has not changed. + --max-duration Stop indexing once the process has been running for this amount of time. @@ -150,8 +160,10 @@ if ($index_class eq 'bib-search') { $es = OpenILS::Elastic::BibSearch->new( cluster => $cluster, index_name => $index_name, + write_mode => 1, xsl_file => $bib_transform, - write_mode => 1 + skip_marc => $skip_marc, + skip_holdings => $skip_holdings ); }