<!-- number of parallel processes to use during fine generation -->
<parallel>1</parallel>
</fine_generator>
-
- <elastic_search>
- <database>
- <driver>Pg</driver>
- <host>localhost</host>
- <port>5432</port>
- <db>evergreen</db>
- <user>evergreen</user>
- <pw>evergreen</pw>
- <application_name>Elastic Search Indexer</application_name>
- </database>
- </elastic_search>
<reporter>
<!--
use DBI;
use Time::HiRes qw/time/;
use OpenSRF::Utils::Logger qw/:logger/;
-use OpenSRF::Utils::SettingsClient;
use OpenILS::Utils::CStoreEditor qw/:funcs/;
use Search::Elasticsearch;
use OpenSRF::Utils::JSON;
my ($self) = @_;
return $self->{db} if $self->{db};
-
- my $client = OpenSRF::Utils::SettingsClient->new;
- my $settings = $client->config_value('elastic_search');
- my $db_name = $settings->{database}->{db};
- my $db_host = $settings->{database}->{host};
- my $db_port = $settings->{database}->{port};
- my $db_user = $settings->{database}->{user};
- my $db_pass = $settings->{database}->{pw};
- my $db_appn = $settings->{database}->{application_name};
+
+ my $db_name = $self->{db_name};
+ my $db_host = $self->{db_host};
+ my $db_port = $self->{db_port};
+ my $db_user = $self->{db_user};
+ my $db_pass = $self->{db_pass};
+ my $db_appn = $self->{db_appn} || 'Elastic Indexer';
# TODO Add application_name to dsn
# ---------------------------------------------------------------
use strict;
use warnings;
+use DateTime;
+use Time::HiRes qw/time/;
use OpenSRF::Utils::Logger qw/:logger/;
use OpenSRF::Utils::JSON;
use OpenILS::Utils::CStoreEditor qw/:funcs/;
+use OpenILS::Utils::DateTime qw/interval_to_seconds/;
use OpenILS::Elastic;
use base qw/OpenILS::Elastic/;
# Add data to the bib-search index
sub populate_index {
- my ($self) = @_;
+ my ($self, $settings) = @_;
+ $settings ||= {};
my $index_count = 0;
my $total_indexed = 0;
- my $state = {last_bib_id => 0};
- do {
- $index_count =
- $self->populate_bib_search_index_page($state);
+ # extract the database settings.
+ for my $db_key (grep {$_ =~ /^db_/} keys %$settings) {
+ $self->{$db_key} = $settings->{$db_key};
+ }
+
+ # TODO $settings->{stop_record}
+ # TODO $settings->{start_date}
+
+ my $end_time;
+ my $duration = $settings->{max_duration};
+ if ($duration) {
+ my $seconds = interval_to_seconds($duration);
+ $end_time = DateTime->now;
+ $end_time->add(seconds => $seconds);
+ }
+
+ while (1) {
+
+ $index_count = $self->populate_bib_index_batch($settings);
$total_indexed += $index_count;
$logger->info("ES indexed $total_indexed bib records");
- } while ($index_count > 0);
+ # exit if we're only indexing a single record or if the
+ # batch indexer says there are no more records to index.
+ last if !$index_count || $settings->{index_record};
+
+ if ($end_time && DateTime->now > $end_time) {
+ $logger->info(
+ "ES index populate exiting early on max_duration $duration");
+ last;
+ }
+ }
$logger->info("ES bib indexing complete with $total_indexed records");
}
sub get_bib_ids {
- my ($self, $state, $record_id) = @_;
- return [$record_id] if $record_id;
+ my ($self, $state) = @_;
- # TODO add support for last_edit_date
- my $last_id = $state->{last_bib_id};
+ # A specific record is selected for indexing.
+ return [$state->{index_record}] if $state->{index_record};
+
+ my $start_id = $state->{start_record} || 0;
+ my $stop_id = $state->{stop_record}; # TODO
+
+ # TODO: implement start_date filtering.
+ # Requires checking edit dates on bibs, call numbers, and copies!
+ my $start_date = $state->{start_date};
my $sql = <<SQL;
SELECT bre.id
FROM biblio.record_entry bre
-WHERE NOT bre.deleted AND bre.active AND bre.id > $last_id
+WHERE NOT bre.deleted AND bre.active AND bre.id >= $start_id
ORDER BY bre.edit_date, bre.id LIMIT $BIB_BATCH_SIZE
SQL
return $self->get_db_rows($sql);
}
-sub populate_bib_search_index_page {
+sub populate_bib_index_batch {
my ($self, $state) = @_;
my $index_count = 0;
- my $last_id = $state->{last_bib_id};
my $bib_ids = $self->get_bib_ids($state);
return 0 unless @$bib_ids;
+ $logger->info("ES indexing ".scalar(@$bib_ids)." records");
+
my $bib_data = $self->get_bib_data($bib_ids);
my $holdings = $self->load_holdings($bib_ids);
return 0 unless $self->index_document($bib_id, $body);
- $state->{last_bib_id} = $bib_id;
+ $state->{start_record} = $bib_id + 1;
$index_count++;
}
use strict;
use warnings;
use Getopt::Long;
+use OpenSRF::Utils::JSON;
use OpenILS::Utils::Fieldmapper;
use OpenILS::Utils::CStoreEditor;
use OpenILS::Elastic::BibSearch;
my $cluster = 'main';
my $create_index;
my $delete_index;
-my $index_name;
+my $index_name = 'bib-search'; # only supported index at time of writing
my $populate;
+my $index_record;
+my $start_record;
+my $stop_record;
+my $start_date;
+my $max_duration;
+my $batch_size = 500;
+
+# Database settings read from ENV by default.
+my $db_host = $ENV{PGHOST} || 'localhost';
+my $db_port = $ENV{PGPORT} || 5432;
+my $db_user = $ENV{PGUSER} || 'evergreen';
+my $db_pass = $ENV{PGPASSWORD} || 'evergreen';
+my $db_name = $ENV{PGDATABASE} || 'evergreen';
+my $db_appn = 'Elastic Indexer';
GetOptions(
'help' => \$help,
'create-index' => \$create_index,
'delete-index' => \$delete_index,
'index=s' => \$index_name,
+ 'index-record=s' => \$index_record,
+ 'start-record=s' => \$start_record,
+ 'stop-record=s' => \$stop_record,
+ 'start-date=s' => \$start_date,
+ 'max-duration=s' => \$max_duration,
+ 'batch-size=s' => \$batch_size,
+ 'db-name=s' => \$db_name,
+ 'db-host=s' => \$db_host,
+ 'db-port=s' => \$db_port,
+ 'db-user=s' => \$db_user,
+ 'db-pass=s' => \$db_pass,
+ 'db-appn=s' => \$db_appn,
'populate' => \$populate
) || die "\nSee --help for more\n";
+sub help {
+ print <<HELP;
+ Synopsis:
+
+ $0 --delete-index --create-index --index bib-search --populate
+
+ Options:
+
+ --osrf-config <file-path>
+
+ --db-name <$db_name>
+ --db-host <$db_host>
+ --db-port <$db_port>
+ --db-user <$db_user>
+ --db-pass <PASSWORD>
+ --db-appn <$db_appn>
+ Database connection values. This is the Evergreen database
+ where values should be extracted for elastic search indexing.
+
+ Values default to their PG* environment variable equivalent.
+
+ --cluster <name>
+ Specify a cluster name. Defaults to 'main'.
+
+ --index <name>
+ Specify an index name. Defaults to 'bib-search'.
+
+ --delete-index
+ Delete the specified index and all of its data.
+
+ --create-index
+ Create an index whose name equals --index-name.
+
+ --batch-size <number>
+ Index at most this many records per batch.
+ Default is 500.
+
+ --index-record <id>
+ Index a specific record by identifier.
+
+ --start-record <id>
+ Start indexing at the record with this ID.
+
+ --stop-record <id>
+ Stop indexing after the record with this ID has been indexed.
+
+ --start-date <YYYY-MM-DD[Thh::mm:ss]>
+ Start indexing records whose last edit date falls after
+ the provided date;
+
+ --max-duration <duration>
+ Stop indexing once the process has been running for this
+ amount of time.
+
+ --populate
+ Populate the selected index with data. If no filters
+ are provided (e.g. --index-start-record) then all
+ applicable values will be indexed.
+
+HELP
+ exit(0);
+}
+
+help() if $help;
+
# connect to osrf...
OpenSRF::System->bootstrap_client(config_file => $osrf_config);
Fieldmapper->import(
IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL"));
OpenILS::Utils::CStoreEditor::init();
-my $es = OpenILS::Elastic::BibSearch->new($cluster);
+my $es;
+
+if ($index_name eq 'bib-search') {
+ $es = OpenILS::Elastic::BibSearch->new($cluster);
+}
+
+if (!$es) {
+ die "Unknown index type: $index_name\n";
+}
$es->connect;
}
if ($populate) {
- $es->populate_index or die "Index populate failed.\n";
+
+ my $settings = {
+ db_name => $db_name,
+ db_host => $db_host,
+ db_port => $db_port,
+ db_user => $db_user,
+ db_pass => 'REDACTED',
+ db_appn => $db_appn,
+ index_record => $index_record,
+ start_record => $start_record,
+ stop_record => $stop_record,
+ start_date => $start_date,
+ max_duration => $max_duration,
+ batch_size => $batch_size
+ };
+
+ print "Commencing index populate with settings: " .
+ OpenSRF::Utils::JSON->perl2JSON($settings) . "\n";
+
+ # Apply after logging $settings
+ $settings->{db_pass} = $db_pass;
+
+ $es->populate_index($settings) or die "Index populate failed.\n";
}