From: Bill Erickson Date: Thu, 30 Jun 2016 21:36:21 +0000 (-0400) Subject: LP#1596595 HoldTargeter parallel targeting support X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=2413c55d1f014f2f578840b963b6833dd5a17aba;p=working%2FEvergreen.git LP#1596595 HoldTargeter parallel targeting support Teach the targeter to process a subset of holds based on the number of parallel targeters at play and the parallel targeting slot each targeter instance occupies. As with the existing hold targeter, group holds by their metarecord to avoid multiple targeter processes targeting the same sets of potential copies. Signed-off-by: Bill Erickson --- diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Utils/HoldTargeter.pm b/Open-ILS/src/perlmods/lib/OpenILS/Utils/HoldTargeter.pm index e55e276e61..2a871801bb 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Utils/HoldTargeter.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Utils/HoldTargeter.pm @@ -70,12 +70,19 @@ sub new { # target_all => 1 # USE WITH CAUTION. Forces (re)targeting of all active holds. This # is primarily useful or testing. +# +# parallel_count => n +# Number of parallel targeters running. This acts as the indication +# that other targeter instances are running. +# +# parallel_slot => n [starts at 1] +# Sets the parallel targeter instance position/slot. Used to determine +# which holds to process to avoid conflicts with other running instances. +# sub target { my ($self, %args) = @_; - foreach (qw/hold retarget_interval newest_first target_all return_count/) { - $self->{$_} = $args{$_} if exists $args{$_}; - } + $self->{$_} = $args{$_} for keys %args; $self->init; @@ -130,6 +137,29 @@ sub find_holds_to_target { ]; } + # parallel < 1 means no parallel + my $parallel = ($self->{parallel_count} || 0) > 1 ? + $self->{parallel_count} : 0; + + if ($parallel) { + # In parallel mode, we need to also grab the metarecord for each hold. + $query->{select}->{mmrsm} = ['metarecord']; + $query->{from} = { + ahr => { + rhrr => { + fkey => 'id', + field => 'id', + join => { + mmrsm => { + field => 'source', + fkey => 'bib_record' + } + } + } + } + }; + } + # Newest-first sorting cares only about hold create_time. $query->{order_by} = [{class => 'ahr', field => 'request_time', direction => 'DESC'}] @@ -137,6 +167,30 @@ sub find_holds_to_target { my $holds = $self->editor->json_query($query, {substream => 1}); + # In parallel mode, only process holds within the current process + # whose metarecord ID modulo the parallel targeter count matches + # our paralell targeting slot. This ensures that no 2 processes + # will be operating on the same potential copy sets. + # + # E.g. Running 5 parallel and we are slot 3 (0-based slot 2) of 5, + # process holds whose metarecord ID's are 2, 7, 12, 17, ... + if ($parallel) { + + # Slots are 1-based at the API level, but 0-based for modulo. + my $slot = $self->{parallel_slot} - 1; + + my @slot_holds = + grep { ($_->{metarecord} % $parallel) == $slot } @$holds; + + $logger->info(sprintf( + "targeter: parallel targeter (slot %d of %d) trimmed ". + "targetable holds set down to %d from %d holds", + $slot + 1, $parallel, scalar(@slot_holds), scalar(@$holds) + )); + + $holds = \@slot_holds; + } + return map {$_->{id}} @$holds; }