From 1f69637809a3c267f33a6b542f05ebe30865cb46 Mon Sep 17 00:00:00 2001 From: dbwells Date: Mon, 3 May 2010 12:50:40 +0000 Subject: [PATCH] This changeset deals with enhancements to the MFHD/Caption/Holding Perl modules. Along with a few smaller changes, it: - Forces MARC::Record to be newer than 2.0.0, as that version had an issue with classes derived from MARC::Field - Augments MFHD to properly deal with inserting, appending, and deleting Caption and Holding objects - Replaces the issuance-table specific version of generate_predictions() with something more general and flexible (the diff butchers this!) - Adds new methods for getting a compressed or decompressed set of Holdings for a given Caption - Splits increment() into increment() and extend(), with the second being meant for compressed holdings - Adds compressed_to_first(), a companion method for compressed_to_last() - Overloads the 'cmp' operator for Holdings to aid in sorting, compressing, and deduping of Holding objects git-svn-id: svn://svn.open-ils.org/ILS/branches/seials-integration@16373 dcc99617-32d9-48b4-a31d-7c20da2025e4 --- Open-ILS/src/perlmods/OpenILS/Utils/MFHD.pm | 341 +++++++++++++++++---- .../src/perlmods/OpenILS/Utils/MFHD/Holding.pm | 189 +++++++++++- .../perlmods/OpenILS/Utils/MFHD/test/testlib.pm | 1 - 3 files changed, 459 insertions(+), 72 deletions(-) diff --git a/Open-ILS/src/perlmods/OpenILS/Utils/MFHD.pm b/Open-ILS/src/perlmods/OpenILS/Utils/MFHD.pm index 682d2ebfa2..4b3a30fe56 100755 --- a/Open-ILS/src/perlmods/OpenILS/Utils/MFHD.pm +++ b/Open-ILS/src/perlmods/OpenILS/Utils/MFHD.pm @@ -6,6 +6,9 @@ use Carp; use DateTime::Format::Strptime; use Data::Dumper; +# for inherited methods to work properly, we need to force a +# MARC::Record version greater than 2.0.0 +use MARC::Record 2.0.1; use base 'MARC::Record'; use OpenILS::Utils::MFHD::Caption; @@ -84,30 +87,148 @@ sub caption_link_ids { return sort keys %{$self->{_mfhd_CAPTIONS}->{$field}}; } +# optional argument to get back a 'hashref' or an 'array' (default) sub captions { my $self = shift; - my $field = shift; + my $tag = shift; + my $return_type = shift; # TODO: add support for caption types as argument? (base, index, supplement) - my @captions; - my @sorted_ids = $self->caption_link_ids($field); + my @sorted_ids = $self->caption_link_ids($tag); - foreach my $link_id (@sorted_ids) { - push(@captions, $self->{_mfhd_CAPTIONS}{$field}{$link_id}); + if (defined($return_type) and $return_type eq 'hashref') { + my %captions; + foreach my $link_id (@sorted_ids) { + $captions{$link_id} = $self->{_mfhd_CAPTIONS}{$tag}{$link_id}; + } + return \%captions; + } else { + my @captions; + foreach my $link_id (@sorted_ids) { + push(@captions, $self->{_mfhd_CAPTIONS}{$tag}{$link_id}); + } + return @captions; } +} - return @captions; +sub append_fields { + my $self = shift; + + my $field_count = $self->SUPER::append_fields(@_); + if ($field_count) { + foreach my $field (@_) { + $self->_avoid_link_collision($field); + my $field_type = ref $field; + if ($field_type eq 'MFHD::Holding') { + $self->{_mfhd_HOLDINGS}{$field->tag}{$field->caption->link_id}{$field->seqno} = $field; + } elsif ($field_type eq 'MFHD::Caption') { + $self->{_mfhd_CAPTIONS}{$field->tag}{$field->link_id} = $field; + } + } + return $field_count; + } else { + return; + } +} + +sub delete_field { + my $self = shift; + my $field = shift; + + my $field_count = $self->SUPER::delete_field($field); + if ($field_count) { + my $field_type = ref($field); + if ($field_type eq 'MFHD::Holding') { + delete($self->{_mfhd_HOLDINGS}{$field->tag}{$field->caption->link_id}{$field->seqno}); + } elsif ($field_type eq 'MFHD::Caption') { + delete($self->{_mfhd_CAPTIONS}{$field->tag}{$field->link_id}); + } + return $field_count; + } else { + return; + } +} + +sub insert_fields_before { + my $self = shift; + my $before = shift; + + my $field_count = $self->SUPER::insert_fields_before($before, @_); + if ($field_count) { + foreach my $field (@_) { + $self->_avoid_link_collision($field); + my $field_type = ref $field; + if ($field_type eq 'MFHD::Holding') { + $self->{_mfhd_HOLDINGS}{$field->tag}{$field->caption->link_id}{$field->seqno} = $field; + } elsif ($field_type eq 'MFHD::Caption') { + $self->{_mfhd_CAPTIONS}{$field->tag}{$field->link_id} = $field; + } + } + return $field_count; + } else { + return; + } +} + +sub insert_fields_after { + my $self = shift; + my $after = shift; + + my $field_count = $self->SUPER::insert_fields_after($after, @_); + if ($field_count) { + foreach my $field (@_) { + $self->_avoid_link_collision($field); + my $field_type = ref $field; + if ($field_type eq 'MFHD::Holding') { + $self->{_mfhd_HOLDINGS}{$field->tag}{$field->caption->link_id}{$field->seqno} = $field; + } elsif ($field_type eq 'MFHD::Caption') { + $self->{_mfhd_CAPTIONS}{$field->tag}{$field->link_id} = $field; + } + } + return $field_count; + } else { + return; + } +} + +sub _avoid_link_collision { + my $self = shift; + my $field = shift; + + my $fieldref = ref($field); + if ($fieldref eq 'MFHD::Holding') { + my $seqno = $field->seqno; + my $changed_seqno = 0; + if (exists($self->{_mfhd_HOLDINGS}{$field->tag}{$field->caption->link_id}{$seqno})) { + $changed_seqno = 1; + do { + $seqno++; + } while (exists($self->{_mfhd_HOLDINGS}{$field->tag}{$field->caption->link_id}{$seqno})); + } + $field->seqno($seqno) if $changed_seqno; + } elsif ($fieldref eq 'MFHD::Caption') { + my $link_id = $field->link_id; + my $changed_link_id = 0; + if (exists($self->{_mfhd_CAPTIONS}{$field->tag}{$link_id})) { + $link_id++; + $changed_link_id = 1; + do { + $link_id++; + } while (exists($self->{_mfhd_CAPTIONS}{$field->tag}{$link_id})); + } + $field->link_id($link_id) if $changed_link_id; + } } sub active_captions { my $self = shift; - my $field = shift; + my $tag = shift; # TODO: add support for caption types as argument? (base, index, supplement) my @captions; my @active_captions; - @captions = $self->captions($field); + @captions = $self->captions($tag); # TODO: for now, we will assume the last 85X field is active # and the rest are historical. The standard is hazy about @@ -130,74 +251,178 @@ sub holdings { } # -# generate_predictions() is an initial attempt at a function which can be used -# to populate an issuance table with a list of predicted issues. It accepts -# a hash ref of options initially defined as: -# field : the caption field to predict on (853, 854, or 855) +# generate_predictions() +# Accepts a hash ref of options initially defined as: +# base_holding : reference to the holding field to predict from # num_to_predict : the number of issues you wish to predict -# last_rec_date : the date of the last received issue, to be used as an offset -# for predicting future issues +# OR +# end_holding : holding field ref, keep predicting until you meet or exceed it # # The basic method is to first convert to a single holding if compressed, then # increment the holding and save the resulting values to @predictions. # -# returns @preditions, an array of array refs containing (link id, formatted -# label, formatted chronology date, formatted estimated arrival date, and an -# array ref of holding subfields as (key, value, key, value ...)) (not a hash -# to protect order and possible duplicate keys). -# +# returns @predictions, an array of holding field refs (including end_holding +# if applicable but NOT base_holding) +# sub generate_predictions { my ($self, $options) = @_; - my $field = $options->{field}; - my $num_to_predict = $options->{num_to_predict}; - my $last_rec_date = - $options->{last_rec_date}; # expected or actual, according to preference - # TODO: add support for predicting serials with no chronology by passing in - # a last_pub_date option? + my $base_holding = $options->{base_holding}; + my $num_to_predict = $options->{num_to_predict}; + my $end_holding = $options->{end_holding}; + my $max_to_predict = $options->{max_to_predict} || 10000; # fail-safe - my $strp = new DateTime::Format::Strptime(pattern => '%F'); + if (!defined($base_holding)) { + carp("Base holding not defined in generate_predictions, returning empty set"); + return (); + } + if ($base_holding->is_compressed) { + carp("Ambiguous compressed base holding in generate_predictions, returning empty set"); + return (); + } + my $curr_holding = $base_holding->clone; # prevent side-effects + + my @predictions; + + if ($num_to_predict) { + for (my $i = 0; $i < $num_to_predict; $i++) { + push(@predictions, $curr_holding->increment->clone); + } + } elsif (defined($end_holding)) { + $end_holding = $end_holding->clone; # prevent side-effects + my $next_holding = $curr_holding->increment->clone; + my $num_predicted = 0; + while ($next_holding le $end_holding) { + push(@predictions, $next_holding); + $num_predicted++; + if ($num_predicted >= $max_to_predict) { + carp("Maximum prediction count exceeded"); + last; + } + $next_holding = $curr_holding->increment->clone; + } + } - my $receival_date = $strp->parse_datetime($last_rec_date); + return @predictions; +} - my @active_captions = $self->active_captions($field); +# +# create an array of compressed holdings from all holdings for a given caption, +# compressing as needed +# +# Optionally you can skip sorting, but the resulting compression will be compromised +# if the current holdings are out of order +# +# TODO: gap marking, gap preservation +# +# TODO: some of this could be moved to the Caption object to allow for +# decompression in the absense of an overarching MFHD object +# +sub get_compressed_holdings { + my $self = shift; + my $caption = shift; + my $opts = shift; + my $skip_sort = $opts->{'skip_sort'}; + + # make sure none are compressed + my @decomp_holdings; + if ($skip_sort) { + @decomp_holdings = $self->get_decompressed_holdings($caption, {'skip_sort' => 1}); + } else { + # sort for best algorithm + @decomp_holdings = $self->get_decompressed_holdings($caption, {'dedupe' => 1}); + } - my @predictions; - foreach my $caption (@active_captions) { - my $htag = $caption->tag; - my $link_id = $caption->link_id; - $htag =~ s/^85/86/; - my @holdings = $self->holdings($htag, $link_id); - my $last_holding = $holdings[-1]; - - if ($last_holding->is_compressed) { - $last_holding->compressed_to_last; # convert to last in range + my $runner = $decomp_holdings[0]->clone->increment; + my $curr_holding = shift(@decomp_holdings); + $curr_holding = $curr_holding->clone; + my $seqno = 1; + $curr_holding->seqno($seqno); + my @comp_holdings; +# my $last_holding; + foreach my $holding (@decomp_holdings) { + if ($runner eq $holding) { + $curr_holding->extend; + $runner->increment; +# } elsif ($holding eq $last_holding) { +# carp("Found duplicate holding in compression set, skipping"); + } elsif ($runner gt $holding) { # should not happen unless holding is not in series + carp("Found unexpected holding, skipping"); + } else { + push(@comp_holdings, $curr_holding); + while ($runner le $holding) { + $runner->increment; + } + $curr_holding = $holding->clone; + $seqno++; + $curr_holding->seqno($seqno); } +# $last_holding = $holding; + } + push(@comp_holdings, $curr_holding); - my $pub_date = $strp->parse_datetime($last_holding->chron_to_date); - my $date_diff = $receival_date - $pub_date; + return @comp_holdings; +} + +# +# create an array of single holdings from all holdings for a given caption, +# decompressing as needed +# +# resulting array is returned as they come in the record, unsorted +# +# optional argument will reorder and renumber the holdings before returning +# +# TODO: some of this could be moved to the Caption (and/or Holding) object to +# allow for decompression in the absense of an overarching MFHD object +# +sub get_decompressed_holdings { + my $self = shift; + my $caption = shift; + my $opts = shift; + my $skip_sort = $opts->{'skip_sort'}; + my $dedupe = $opts->{'dedupe'}; - $last_holding->notes('public', []); - # add a note marker for system use - $last_holding->notes('private', ['AUTOGEN']); + if ($dedupe and $skip_sort) { + carp("Attempted deduplication without sorting, failure likely"); + } - for (my $i = 0; $i < $num_to_predict; $i++) { - $last_holding->increment; - $pub_date = $strp->parse_datetime($last_holding->chron_to_date); - my $arrival_date = $pub_date + $date_diff; - push( - @predictions, - [ - $link_id, - $last_holding->format, - $pub_date->strftime('%F'), - $arrival_date->strftime('%F'), - [$last_holding->subfields_list] - ] - ); + my $htag = $caption->tag; + my $link_id = $caption->link_id; + $htag =~ s/^85/86/; + my @holdings = $self->holdings($htag, $link_id); + my @decomp_holdings; + + foreach my $holding (@holdings) { + if (!$holding->is_compressed) { + push(@decomp_holdings, $holding->clone); + } else { + my $base_holding = $holding->clone->compressed_to_first; + my @new_holdings = $self->generate_predictions( + {'base_holding' => $base_holding, + 'end_holding' => $holding->clone->compressed_to_last}); + push(@decomp_holdings, $base_holding, @new_holdings); } } - return @predictions; + + unless ($skip_sort) { + my @temp_holdings = sort {$a cmp $b} @decomp_holdings; + @decomp_holdings = @temp_holdings; + } + + my @return_holdings = (shift(@decomp_holdings)); + $return_holdings[0]->seqno(1); + my $seqno = 2; + foreach my $holding (@decomp_holdings) { # renumber sequence + if ($holding eq $return_holdings[-1] and $dedupe) { + carp("Found duplicate holding in decompression set, discarding"); + next; + } + $holding->seqno($seqno); + $seqno++; + push(@return_holdings, $holding); + } + + return @return_holdings; } # diff --git a/Open-ILS/src/perlmods/OpenILS/Utils/MFHD/Holding.pm b/Open-ILS/src/perlmods/OpenILS/Utils/MFHD/Holding.pm index 4ce15f256d..efd6027ba2 100755 --- a/Open-ILS/src/perlmods/OpenILS/Utils/MFHD/Holding.pm +++ b/Open-ILS/src/perlmods/OpenILS/Utils/MFHD/Holding.pm @@ -39,6 +39,10 @@ sub new { my ($key, $val) = @$subfield; if ($key =~ /[a-m]/) { + if (exists($self->{_mfhdh_FIELDS}->{$key})) { + carp("Duplicate, non-repeatable subfield '$key' found, ignoring"); + next; + } if ($self->{_mfhdh_COMPRESSED}) { $self->{_mfhdh_FIELDS}->{$key}{HOLDINGS} = [split(/\-/, $val)]; } else { @@ -466,29 +470,21 @@ sub validate { # Replace a single holding with it's next prediction # and return itself # -# If the holding is compressed, the range is expanded -# sub increment { my $self = shift; if ($self->is_open_ended) { carp "Holding is open-ended, cannot increment"; return $self; + } elsif ($self->is_compressed) { + carp "Incrementing a compressed holding is deprecated, use extend instead"; + return $self->extend; } my $next = $self->next(); - if ($self->is_compressed) { # expand range - foreach my $key (keys %{$next}) { - my @values = @{$self->field_values($key)}; - $values[1] = $next->{$key}; - $self->fields->{$key}{HOLDINGS} = \@values; - $next->{$key} = join('-', @values); - } - } else { - foreach my $key (keys %{$next}) { - $self->fields->{$key}{HOLDINGS}[0] = $next->{$key}; - } + foreach my $key (keys %{$next}) { + $self->fields->{$key}{HOLDINGS}[0] = $next->{$key}; } $self->seqno($self->seqno + 1); @@ -497,6 +493,60 @@ sub increment { } # +# Extends a holding (compressing if needed) to include the next +# prediction and returns itself +# +sub extend { + my $self = shift; + + if ($self->is_open_ended) { + carp "Holding is open-ended, cannot extend"; + return $self; + } + + my $next = $self->next(); + + if (!$self->is_compressed) { + $self->is_compressed(1); # add compressed state + } + + foreach my $key (keys %{$next}) { + my @values = @{$self->field_values($key)}; + $values[1] = $next->{$key}; + $self->fields->{$key}{HOLDINGS} = \@values; + $next->{$key} = join('-', @values); + } + + $self->update(%{$next}); # update underlying subfields + return $self; +} + +# +# Turns a compressed holding into the singular form of the first member +# in the range +# +sub compressed_to_first { + my $self = shift; + + if (!$self->is_compressed) { + carp "Holding not compressed, cannot convert to first member"; + return $self; + } + + my %changes; + foreach my $key (keys %{$self->fields}) { + my @values = @{$self->field_values($key)}; + $self->fields->{$key}{HOLDINGS} = [$values[0]]; + $changes{$key} = $values[0]; + } + + $self->update(%changes); # update underlying subfields + $self->is_compressed(0); # remove compressed state + + return $self; +} + +# # Turns a compressed holding into the singular form of the last member # in the range # @@ -627,4 +677,117 @@ sub _uncombine { my @parts = split('/', $combo); return $parts[$pos]; } + +# +# Overload string comparison operators +# +# We are not overloading '<=>' because '==' is used liberally in MARC::Record +# to compare field identity (i.e. is this the same exact Field object?), not value +# +# Other string operators are auto-generated from 'cmp' +# +# Please note that this comparison is based on what the holding represents, +# not whether it is strictly identical (e.g. the seqno and link may vary) +# +use overload ('cmp' => \&_compare, + 'fallback' => 1); +sub _compare { + my ($holding_1, $holding_2) = @_; + + # TODO: this needs some more consideration + # fall back to 'built-in' comparison + if (!UNIVERSAL::isa($holding_2, ref $holding_1)) { + if (defined $holding_2) { + carp("Use of non-holding in holding comparison operation"); + return ( "$holding_1" cmp "$holding_2" ); + } else { + carp("Use of undefined value in holding comparison operation"); + return 1; # similar to built-in, something is "greater than" nothing + } + } + + # special cases for compressed holdings + my ($holding_1_first, $holding_1_last, $holding_2_first, $holding_2_last, $found_compressed); + # 0 for no compressed, 1 for first compressed, 2 for second compressed, 3 for both compressed + $found_compressed = 0; + if ($holding_1->is_compressed) { + $holding_1_last = $holding_1->clone->compressed_to_last; + $found_compressed += 1; + } else { + $holding_1_first = $holding_1; + $holding_1_last = $holding_1; + } + if ($holding_2->is_compressed) { + $holding_2_first = $holding_2->clone->compressed_to_first; + $found_compressed += 2; + } else { + $holding_2_first = $holding_2; + $holding_2_last = $holding_2; + } + + if ($found_compressed) { + my $cmp = ($holding_1_last cmp $holding_2_first); # 1 ends before 2 starts + if ($cmp == -1) { + return -1; # 1 is fully lt + } elsif ($cmp == 0) { + carp("Overlapping holdings in comparison, lt and gt based on start value only"); + return -1; + } else { # check the opposite, 2 ends before 1 starts + # clone is expensive, wait until we need it (here) + if (!defined($holding_2_last)) { + $holding_2_last = $holding_2->clone->compressed_to_last; + } + if (!defined($holding_1_first)) { + $holding_1_first = $holding_1->clone->compressed_to_first; + } + $cmp = ($holding_2_last cmp $holding_1_first); + if ($cmp == -1) { + return 1; # 1 is fully gt + } elsif ($cmp == 0) { + carp("Overlapping holdings in comparison, lt and gt based on start value only"); + return 1; + } else { + $cmp = ($holding_1_first cmp $holding_2_first); + if (!$cmp) { # they are not equal + carp("Overlapping holdings in comparison, lt and gt based on start value only"); + return $cmp; + } elsif ($found_compressed == 1) { + carp("Compressed holding found with start equal to non-compressed holding"); + return 1; # compressed (first holding) is 'greater than' non-compressed + } elsif ($found_compressed == 2) { + carp("Compressed holding found with start equal to non-compressed holding"); + return -1; # compressed (second holding) is 'greater than' non-compressed + } else { # both holdings compressed, check for full equality + $cmp = ($holding_1_last cmp $holding_2_last); + if (!$cmp) { # they are not equal + carp("Compressed holdings in comparison have equal starts, lt and gt based on end value only"); + return $cmp; + } else { + return 0; # both are compressed, both ends are equal + } + } + } + } + } + + # start doing the actual comparison + my $result; + foreach my $key ('a'..'f') { + if (defined($holding_1->field_values($key))) { + if (!defined($holding_2->field_values($key))) { + return 1; # more details equals 'greater' (?) + } else { + $result = $holding_1->field_values($key)->[0] <=> $holding_2->field_values($key)->[0]; + } + } elsif (defined($holding_2->field_values($key))) { + return -1; # more details equals 'greater' (?) + } + + return $result if $result; + } + + # got through, return 0 for equal + return 0; +} + 1; diff --git a/Open-ILS/src/perlmods/OpenILS/Utils/MFHD/test/testlib.pm b/Open-ILS/src/perlmods/OpenILS/Utils/MFHD/test/testlib.pm index a00023142f..9953a569d1 100644 --- a/Open-ILS/src/perlmods/OpenILS/Utils/MFHD/test/testlib.pm +++ b/Open-ILS/src/perlmods/OpenILS/Utils/MFHD/test/testlib.pm @@ -55,7 +55,6 @@ sub load_MARC_rec { $field = MARC::Field->new( $fieldno, $inds[0], $inds[1], - a => 'scratch', @subfields ); -- 2.11.0