--- /dev/null
+#!/usr/bin/perl
+# ----------------------------------------------------------------------
+# Find bib records matching the requested criteria for linking.
+# Bib IDs are exported to one or more batch files for future processing.
+# ----------------------------------------------------------------------
+use strict;
+use warnings;
+use DBI;
+use Getopt::Long;
+use DateTime;
+
+my $db_handle;
+my $counter = 0;
+
+# options
+my $help;
+my $modified_since;
+my $exported_since;
+my $batch_size = 10000;
+my $start_id;
+my $end_id;
+my $count_only;
+my $out_dir = '/tmp';
+my $db_host = $ENV{PGHOST} || 'localhost';
+my $db_port = $ENV{PGPORT} || '5432';
+my $db_user = $ENV{PGUSER} || 'evergreen';
+my $db_name = $ENV{PGDATABASE} || 'evergreen';
+my $db_pass = $ENV{PGPASSWORD};
+
+my $opt_result = GetOptions(
+ 'modified-since=s' => \$modified_since,
+ 'exported-since=s' => \$exported_since,
+ 'start-id=i' => \$start_id,
+ 'end-id=i' => \$end_id,
+ 'batch-size=i' => \$batch_size,
+ 'count-only' => \$count_only,
+ 'out-dir=s' => \$out_dir,
+ "db-host=s" => \$db_host,
+ "db-user=s" => \$db_user,
+ "db-pass=s" => \$db_pass,
+ "db-port=s" => \$db_port,
+ 'help' => \$help
+);
+
+sub announce {
+ my $msg = shift;
+ print DateTime->now(time_zone => 'local')->strftime('%F %T')." $msg\n";
+}
+
+sub help {
+ print <<HELP;
+ Find IDs for bib records based on various criteria. Write bib
+ IDs to batch files. Batch files are placed into --out-dir and
+ named bib-ids.001, bib-ids.002, etc.
+
+ Usage:
+
+ Find
+
+ $0 --modified-since 1 --batch-size 100 \
+ --out-dir /openils/var/data/linkbibs/2016-12-01
+
+ Options:
+
+ --modified-since <YYYY-MM-DD>
+ Limit bibs to those modifed since the specified date.
+
+ --exported-since <YYYY-MM-DD>
+ Limit bibs to those exported since the specified date.
+ Export date is based on data found in the
+ metabib.bib_export_data table.
+
+ --start-id <id>
+ Limit bibs to those whose ID is no less than <id>
+
+ --end-id <id>
+ Limit bibs to those whose ID is no greater than <id>
+
+ --out-dir [/tmp]
+ Output directory.
+
+ --batch-size
+ Number of bib IDs to write to each batch file.
+
+ --count-only
+ Print the total number of records that would be added
+ to batch files without adding to any batch files.
+
+ --db-host
+ --db-user
+ --db-pass
+ --db-port
+ Database connection params. PG environment variables are
+ also inspected for values. When all else fails, try to
+ connect to database evergreen\@localhost
+HELP
+ exit 0;
+}
+
+help() if $help || !$opt_result;
+
+sub connect_db {
+ $db_handle = DBI->connect(
+ "dbi:Pg:db=$db_name;host=$db_host;port=$db_port;options='--statement-timeout=0'",
+ $db_user, $db_pass, {
+ RaiseError => 1,
+ PrintError => 0,
+ AutoCommit => 1,
+ pg_expand_array => 0,
+ pg_enable_utf8 => 1
+ }
+ ) or die "Connection to database failed: $DBI::err : $DBI::errstr";
+}
+
+connect_db();
+
+# ----------------------------------------------------------------------
+my $from = 'FROM biblio.record_entry bre';
+
+my $where = 'WHERE NOT bre.deleted';
+$where .= " AND bre.id >= $start_id" if $start_id;
+$where .= " AND bre.id <= $end_id" if $end_id;
+
+if ($exported_since) {
+ $where .= " AND bed.export_date > '$exported_since'";
+ $from .= " JOIN metabib.bib_export_data bed ON (bed.bib = bre.id)";
+}
+
+my $sql = <<SQL;
+ SELECT bre.id
+ $from
+ $where
+ ORDER BY bre.id DESC;
+SQL
+
+my $sth = $db_handle->prepare($sql);
+$sth->execute;
+
+my $batch_file;
+sub open_batch_file {
+ my $path = shift;
+ announce("Starting new batch file: $path");
+
+ close $batch_file if $batch_file;
+
+ open $batch_file, '>', $path or
+ die "Cannot open batch file for writing: $!\n";
+}
+
+my $ctr = 0;
+my $batch = 0;
+while (my $ref = $sth->fetchrow_hashref()) {
+ $ctr++;
+ next if $count_only;
+
+ if (( ($ctr - 1) % $batch_size) == 0) {
+ my $path = sprintf("$out_dir/bib-ids.%0.3d", $batch);
+ open_batch_file($path);
+ $batch++;
+ }
+
+ print $batch_file $ref->{id} . "\n";
+}
+
+close $batch_file if $batch_file;
+$sth->finish;
+
+announce("Found $ctr bib records");
+