# options
my $help;
-my $new_since;
my $modified_since;
+my $max_auth_count;
+my $start_auth_id;
my $print_auth_ids;
my $print_bib_ids;
my $link_auths;
my $db_pass = $ENV{PGPASSWORD};
my $opt_result = GetOptions(
- 'new-since=i' => \$new_since,
'modified-since=i' => \$modified_since,
+ 'max-auth-count=i' => \$max_auth_count,
+ 'start-auth-id=i' => \$start_auth_id,
'print-bib-ids' => \$print_bib_ids,
'print-auth-ids' => \$print_auth_ids,
'link-bibs' => \$link_bibs,
$0 --modified-since 1 --link-auths --link-bibs
Options:
-
- --new-since <days>
- Process authority records created within the last <days> days
- If --modified-since is also applied, the union of both record
- sets are processed (via OR query).
--modified-since <days>
- Process authority records modified within the last <days> days
- If --new-since is also applied, the union of both record
- sets are processed (via OR query).
+ Process authority records created or modified within the
+ last <days> days.
+
+ --max-auth-count <count>
+ Process <count> authority records in total. Use with
+ --start-auth-id to process batches of records across
+ multiple instances of the script.
+
+ --start-auth-id <id>
+ Process authority records whose ID is equal to or greater
+ than <id>. Use with --max-auth-count to process batches
+ of records accross multiple runs of the script.
--print-auth-ids
Print authority record IDs to process to STDOUT
$dbh->do('SET statement_timeout = 0');
# ----------------------------------------------------------------------
-# Find the new authority record IDs
-my $where = 'WHERE ';
-
-$where .= "DATE(create_date) >= DATE(NOW() - '$new_since day'::INTERVAL)"
- if $new_since;
-
-$where .= ' OR ' if $new_since && $modified_since;
-
-$where .= "DATE(edit_date) >= DATE(NOW() - '$modified_since day'::INTERVAL)"
- if $modified_since;
+# Load the authority record IDs
+my $where2 = $start_auth_id ? "AND id >= $start_auth_id" : '';
+my $limit = $max_auth_count ? "LIMIT $max_auth_count" : '';
+
+my $sth = $dbh->prepare(<<SQL);
+ SELECT id FROM authority.record_entry
+ WHERE DATE(edit_date) >= DATE(NOW() - '$modified_since day'::INTERVAL)
+ $where2
+ ORDER BY id
+ $limit
+SQL
-my $sth = $dbh->prepare("SELECT id FROM authority.record_entry $where");
$sth->execute;
while (my $ref = $sth->fetchrow_hashref()) {
$sth->finish;
my $auth_rec_count = scalar(@auth_ids);
-announce("Auth IDs: @auth_ids") if $print_auth_ids;
+print join("\n", @auth_ids) if $print_auth_ids;
+
+# Let the caller know what the last record processed will be,
+# so the next iteration of the script can start there.
+announce("Final auth ID will be: " . $auth_ids[-1]) if $max_auth_count;
if (!@auth_ids) {
- announce("No authority records created in the last $new_since days");
+ announce("No authority records edited in the last $modified_since days");
exit 0;
}
if ($link_auths) {
# Pass all new authority records to the auth-to-auth linker
for my $rec_id (@auth_ids) {
+
system(
'./authority_authority_linker.pl',
'--db-host', $db_host,
}
}
-announce("Bib IDs: @bib_ids") if $print_bib_ids;
+print join("\n", @bib_ids) if $print_bib_ids;