From e9cd992fa4dbf1013346336193cb7469ecee10a8 Mon Sep 17 00:00:00 2001 From: Dan Wells Date: Tue, 19 Jul 2011 12:02:17 -0400 Subject: [PATCH] MFHD compressed holding fixes 1. Add new utility method to MFHD.pm: holdings_by_caption(): return all holdings attached to a given caption 2. Add 'passthru_open_ended' option to get_decompressed_holdings() Previously, attempts to decompress an open-ended holding would error out. Now, in the absence of this option, open-ended holdings are discarded and you get a warning (since they cannot logically be decompressed), while if this option is 'true' they are passed back unaffected 3. compressed_to_last() on an open-ended holding now more correctly returns 'undef' rather than the unaltered holding 4. get_compressed_holdings() will now honor an open-ended holding by treating it as "infinite", and thus absorbing any holdings which would follow 5. Overloaded comparison operator now correctly detects "swap" cases (where only the second operand is a holding) 6. Overloaded comparison operator now consistently treats open-ended holdings as "greater-than" a single or closed holding which has the same starting point 7. Fix 2 thinkos in comparison operator for overlapping compressed holdings 8. Add new compressed_end() method to Holding.pm which can add or set a new ending to a holding, making compressed if needed 9. Correctly recognize partially compressed holdings If a holding is defined as: 863 40 $81.1 $a1 $b2-10 this actually means: 863 40 $81.1 $a1-1 $b2-10 so let's make sure to treat it that way. 9. Expand the MFHD test suite to better cover the improvements in this commit There is also the beginnings of a _get_truncated_holdings() method for handling odd cases where an open-ended holding is followed by a single/closed holding, but it is currently commented out, pending further design consideration. Signed-off-by: Dan Wells --- Open-ILS/src/perlmods/lib/OpenILS/Utils/MFHD.pm | 105 ++++++++++++++++++--- .../src/perlmods/lib/OpenILS/Utils/MFHD/Holding.pm | 69 ++++++++++++-- .../perlmods/lib/OpenILS/Utils/MFHD/test/mfhd.t | 63 +++++++++++++ .../lib/OpenILS/Utils/MFHD/test/mfhddata2.txt | 57 +++++++++++ 4 files changed, 270 insertions(+), 24 deletions(-) create mode 100644 Open-ILS/src/perlmods/lib/OpenILS/Utils/MFHD/test/mfhddata2.txt diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Utils/MFHD.pm b/Open-ILS/src/perlmods/lib/OpenILS/Utils/MFHD.pm index 5b728c039d..6964c56d1b 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Utils/MFHD.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Utils/MFHD.pm @@ -252,6 +252,16 @@ sub holdings { values %{$self->{_mfhd_HOLDINGS}->{$field}->{$capid}}; } +sub holdings_by_caption { + my $self = shift; + my $caption = shift; + + my $htag = $caption->tag; + my $link_id = $caption->link_id; + $htag =~ s/^85/86/; + return $self->holdings($htag, $link_id); +} + sub _holding_date { my $self = shift; my $holding = shift; @@ -348,32 +358,44 @@ sub get_compressed_holdings { my $opts = shift; my $skip_sort = $opts->{'skip_sort'}; - # make sure none are compressed + # make sure none are compressed (except for open-ended) my @decomp_holdings; if ($skip_sort) { - @decomp_holdings = $self->get_decompressed_holdings($caption, {'skip_sort' => 1}); + @decomp_holdings = $self->get_decompressed_holdings($caption, {'skip_sort' => 1, 'passthru_open_ended' => 1}); } else { # sort for best algorithm - @decomp_holdings = $self->get_decompressed_holdings($caption, {'dedupe' => 1}); + @decomp_holdings = $self->get_decompressed_holdings($caption, {'dedupe' => 1, 'passthru_open_ended' => 1}); } return () if !@decomp_holdings; + # if first holding is open-ended, it 'includes' all the rest, so return + if ($decomp_holdings[0]->is_open_ended) { + return ($decomp_holdings[0]); + } + my $runner = $decomp_holdings[0]->clone->increment; my $curr_holding = shift(@decomp_holdings); $curr_holding = $curr_holding->clone; my $seqno = 1; $curr_holding->seqno($seqno); my @comp_holdings; -# my $last_holding; foreach my $holding (@decomp_holdings) { if ($runner eq $holding) { $curr_holding->extend; $runner->increment; -# } elsif ($holding eq $last_holding) { -# carp("Found duplicate holding in compression set, skipping"); } elsif ($runner gt $holding) { # should not happen unless holding is not in series carp("Found unexpected holding, skipping"); + } elsif ($holding->is_open_ended) { # special case, as it will always be the last + if ($runner eq $holding->clone->compressed_to_first) { + $curr_holding->compressed_end(); + } else { + push(@comp_holdings, $curr_holding); + $curr_holding = $holding->clone; + $seqno++; + $curr_holding->seqno($seqno); + } + last; } else { push(@comp_holdings, $curr_holding); while ($runner le $holding) { @@ -383,7 +405,6 @@ sub get_compressed_holdings { $seqno++; $curr_holding->seqno($seqno); } -# $last_holding = $holding; } push(@comp_holdings, $curr_holding); @@ -394,10 +415,12 @@ sub get_compressed_holdings { # create an array of single holdings from all holdings for a given caption, # decompressing as needed # -# resulting array is returned as they come in the record, unsorted -# -# optional argument will reorder and renumber the holdings before returning -# +# optional arguments: +# skip_sort: do not sort the returned holdings +# dedupe: remove any duplicate holdings from the set +# passthru_open_ended: open-ended compressed holdings cannot be logically +# decompressed (they are infinite); if set to true these holdings are passed +# thru rather than skipped # TODO: some of this could be moved to the Caption (and/or Holding) object to # allow for decompression in the absense of an overarching MFHD object # @@ -407,15 +430,13 @@ sub get_decompressed_holdings { my $opts = shift; my $skip_sort = $opts->{'skip_sort'}; my $dedupe = $opts->{'dedupe'}; + my $passthru_open_ended = $opts->{'passthru_open_ended'}; if ($dedupe and $skip_sort) { carp("Attempted deduplication without sorting, failure likely"); } - my $htag = $caption->tag; - my $link_id = $caption->link_id; - $htag =~ s/^85/86/; - my @holdings = $self->holdings($htag, $link_id); + my @holdings = $self->holdings_by_caption($caption); return () if !@holdings; @@ -424,6 +445,12 @@ sub get_decompressed_holdings { foreach my $holding (@holdings) { if (!$holding->is_compressed) { push(@decomp_holdings, $holding->clone); + } elsif ($holding->is_open_ended) { + if ($passthru_open_ended) { + push(@decomp_holdings, $holding->clone); + } else { + carp("Open-ended holdings cannot be decompressed, skipping"); + } } else { my $base_holding = $holding->clone->compressed_to_first; my @new_holdings = $self->generate_predictions( @@ -454,6 +481,54 @@ sub get_decompressed_holdings { return @return_holdings; } +## +## close any open-ended holdings which are followed by another holding by +## combining them +## +## This needs more thought about concerning usability (e.g. should it be a +## mutator?), commenting out for now +#sub _get_truncated_holdings { +# my $self = shift; +# my $caption = shift; +# +# my @holdings = $self->holdings_by_caption($caption); +# +# return () if !@holdings; +# +# @holdings = sort {$a cmp $b} @holdings; +# +# my $current_open_holding; +# my @truncated_holdings; +# foreach my $holding (@holdings) { +# if ($current_open_holding) { +# if ($holding->is_open_ended) { +# next; # consecutive open holdings are meaningless, as they are contained by the previous +# } elsif ($holding->is_compressed) { +# $current_open_holding->compressed_end($holding->compressed_to_last); +# } else { +# $current_open_holding->compressed_end($holding); +# } +# push(@truncated_holdings, $current_open_holding); +# $current_open_holding = undef; +# } elsif ($holding->is_open_ended) { +# $current_open_holding = $holding; +# } else { +# push(@truncated_holdings, $holding); +# } +# } +# +# # catch possible open holding at end +# push(@truncated_holdings, $current_open_holding) if $current_open_holding; +# +# my $seqno = 1; +# foreach my $holding (@truncated_holdings) { # renumber sequence +# $holding->seqno($seqno); +# $seqno++; +# } +# +# return @truncated_holdings; +#} + # # format_holdings(): Generate textual display of all holdings in record # for given type of caption (853--855) taking into account all the diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Utils/MFHD/Holding.pm b/Open-ILS/src/perlmods/lib/OpenILS/Utils/MFHD/Holding.pm index ce08d4f4a1..9b673c2722 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Utils/MFHD/Holding.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Utils/MFHD/Holding.pm @@ -44,7 +44,10 @@ sub new { next; } if ($self->{_mfhdh_COMPRESSED}) { - $self->{_mfhdh_FIELDS}->{$key}{HOLDINGS} = [split(/\-/, $val)]; + $self->{_mfhdh_FIELDS}->{$key}{HOLDINGS} = [split(/\-/, $val, -1)]; + if (!defined($self->{_mfhdh_FIELDS}->{$key}{HOLDINGS}[1])) { + $self->{_mfhdh_FIELDS}->{$key}{HOLDINGS}[1] = $self->{_mfhdh_FIELDS}->{$key}{HOLDINGS}[0]; + } } else { $self->{_mfhdh_FIELDS}->{$key}{HOLDINGS} = [$val]; } @@ -561,7 +564,7 @@ sub compressed_to_last { return $self; } elsif ($self->is_open_ended) { carp "Holding is open-ended, cannot convert to last member"; - return $self; + return undef; } my %changes; @@ -578,6 +581,40 @@ sub compressed_to_last { } # +# Creates or replaces an end of a compressed holding +# +sub compressed_end { + my $self = shift; + my $end_holding = shift; + + my %changes; + if ($end_holding) { + foreach my $key (keys %{$self->fields}) { + my @values = @{$self->field_values($key)}; + my @end_values = @{$end_holding->field_values($key)}; + $values[1] = $end_values[0]; + $self->fields->{$key}{HOLDINGS} = \@values; + $changes{$key} = join('-', @values); + } + } elsif (!$self->is_open_ended) { # make open-ended if no $end_holding + foreach my $key (keys %{$self->fields}) { + my @values = @{$self->field_values($key)}; + $self->fields->{$key}{HOLDINGS} = [$values[0]]; + $changes{$key} = $values[0] . '-'; + } + $self->{_mfhdh_OPEN_ENDED} = 1; #TODO: setter for this value + } + + $self->update(%changes); # update underlying subfields + + if (!$self->is_compressed) { + $self->is_compressed(1); # add compressed state + } + + return $self; +} + +# # Basic, working, unoptimized clone operation # sub clone { @@ -707,17 +744,23 @@ sub _uncombine { # Please note that this comparison is based on what the holding represents, # not whether it is strictly identical (e.g. the seqno and link may vary) # +# XXX: sorting using this operator is currently not deterministic for +# nonsensical holdings (e.g. V.10-V.5), and may require further consideration use overload ('cmp' => \&_compare, 'fallback' => 1); sub _compare { - my ($holding_1, $holding_2) = @_; + my ($holding_1, $holding_2, $swap) = @_; # TODO: this needs some more consideration # fall back to 'built-in' comparison if (!UNIVERSAL::isa($holding_2, ref $holding_1)) { if (defined $holding_2) { - carp("Use of non-holding in holding comparison operation"); - return ( "$holding_1" cmp "$holding_2" ); + carp("Use of non-holding in holding comparison operation") if $holding_2 ne '~~~'; + if ($swap) { + return ( "$holding_2" cmp "$holding_1" ); + } else { + return ( "$holding_1" cmp "$holding_2" ); + } } else { carp("Use of undefined value in holding comparison operation"); return 1; # similar to built-in, something is "greater than" nothing @@ -729,7 +772,11 @@ sub _compare { # 0 for no compressed, 1 for first compressed, 2 for second compressed, 3 for both compressed $found_compressed = 0; if ($holding_1->is_compressed) { - $holding_1_last = $holding_1->clone->compressed_to_last; + if (!$holding_1->is_open_ended) { + $holding_1_last = $holding_1->clone->compressed_to_last; + } else { + $holding_1_last = '~~~'; # take advantage of string sort fallback + } $found_compressed += 1; } else { $holding_1_first = $holding_1; @@ -753,7 +800,11 @@ sub _compare { } else { # check the opposite, 2 ends before 1 starts # clone is expensive, wait until we need it (here) if (!defined($holding_2_last)) { - $holding_2_last = $holding_2->clone->compressed_to_last; + if (!$holding_2->is_open_ended) { + $holding_2_last = $holding_2->clone->compressed_to_last; + } else { + $holding_2_last = '~~~'; # take advantage of string sort fallback + } } if (!defined($holding_1_first)) { $holding_1_first = $holding_1->clone->compressed_to_first; @@ -766,7 +817,7 @@ sub _compare { return 1; } else { $cmp = ($holding_1_first cmp $holding_2_first); - if (!$cmp) { # they are not equal + if ($cmp) { # they are not equal carp("Overlapping holdings in comparison, lt and gt based on start value only"); return $cmp; } elsif ($found_compressed == 1) { @@ -777,7 +828,7 @@ sub _compare { return -1; # compressed (second holding) is 'greater than' non-compressed } else { # both holdings compressed, check for full equality $cmp = ($holding_1_last cmp $holding_2_last); - if (!$cmp) { # they are not equal + if ($cmp) { # they are not equal carp("Compressed holdings in comparison have equal starts, lt and gt based on end value only"); return $cmp; } else { diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Utils/MFHD/test/mfhd.t b/Open-ILS/src/perlmods/lib/OpenILS/Utils/MFHD/test/mfhd.t index a478b32dfa..d9ad34bc6f 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Utils/MFHD/test/mfhd.t +++ b/Open-ILS/src/perlmods/lib/OpenILS/Utils/MFHD/test/mfhd.t @@ -63,4 +63,67 @@ while ($rec = testlib::load_MARC_rec($testdata, $testno++)) { } } +close $testdata; + +# test: passthru_open_ended +open($testdata, "new(testlib::load_MARC_rec($testdata, $testno)); +my $rec2 = MFHD->new(testlib::load_MARC_rec($testdata, $testno)); + +my @holdings_a = $rec->get_decompressed_holdings(($rec->captions('853'))[0], {'passthru_open_ended' => 1}); +my @holdings_b = $rec2->holdings_by_caption(($rec2->captions('853'))[0]); + +is_deeply(\@holdings_a, \@holdings_b, 'passthru open ended'); + +# test: compressed to last +$testno++; + +$rec = MFHD->new(testlib::load_MARC_rec($testdata, $testno)); +$rec2 = MFHD->new(testlib::load_MARC_rec($testdata, $testno)); + +@holdings_a = $rec->holdings_by_caption(($rec->captions('853'))[0]); +@holdings_b = $rec2->holdings_by_caption(($rec2->captions('853'))[0]); + +is_deeply($holdings_a[0]->compressed_to_last, $holdings_b[0], 'compressed to last, normal'); +is($holdings_a[1]->compressed_to_last, undef, 'compressed to last, open ended'); + +# test: get compressed holdings, open ended member +$testno++; + +$rec = MFHD->new(testlib::load_MARC_rec($testdata, $testno)); +$rec2 = MFHD->new(testlib::load_MARC_rec($testdata, $testno)); + +@holdings_a = $rec->get_compressed_holdings(($rec->captions('853'))[0]); +@holdings_b = $rec2->holdings_by_caption(($rec2->captions('853'))[0]); + +is_deeply(\@holdings_a, \@holdings_b, 'get compressed holdings, open ended member'); + +# test comparisons, for all operands, for all types of holdings +$testno++; + +$rec = MFHD->new(testlib::load_MARC_rec($testdata, $testno)); +$rec2 = MFHD->new(testlib::load_MARC_rec($testdata, $testno)); + +@holdings_a = $rec->holdings_by_caption(($rec->captions('853'))[0]); +@holdings_b = $rec2->holdings_by_caption(($rec2->captions('853'))[0]); + +unshift(@holdings_a, "zzz I am NOT a holding"); +push(@holdings_b, "zzz I am NOT a holding"); + +push(@holdings_a, undef); +unshift(@holdings_b, undef); + +@holdings_a = sort { $a cmp $b } @holdings_a; +my $seqno = 1; +foreach my $holding (@holdings_a) { + if (ref $holding) { + $holding->seqno($seqno); + $seqno++; + } +} + +is_deeply(\@holdings_a, \@holdings_b, 'comparison testing via sort'); + +close $testdata; 1; diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Utils/MFHD/test/mfhddata2.txt b/Open-ILS/src/perlmods/lib/OpenILS/Utils/MFHD/test/mfhddata2.txt new file mode 100644 index 0000000000..a450bb41f8 --- /dev/null +++ b/Open-ILS/src/perlmods/lib/OpenILS/Utils/MFHD/test/mfhddata2.txt @@ -0,0 +1,57 @@ +245 00 $aPassthru Open Ended, test data +853 20 $81$av.$bno.$u12$vr$i(year)$j(month)$wm$x01 +863 40 $81.1$a1$b6-8$i1990$j06-08 +863 40 $81.2$a1-$b11-$i1990-$j11- + +245 00 $aPassthru Open Ended, test results +853 20 $81$av.$bno.$u12$vr$i(year)$j(month)$wm$x01 +863 41 $81.1$a1$b6$i1990$j06 +863 41 $81.2$a1$b7$i1990$j07 +863 41 $81.3$a1$b8$i1990$j08 +863 40 $81.4$a1-$b11-$i1990-$j11- + +245 00 $aCompressed to last, part 1 +853 20 $81$av.$bno.$u12$vr$i(year)$j(month)$wm$x01 +863 40 $81.1$a1$b6-8$i1990$j06-08 +863 40 $81.2$a1-$b11-$i1990-$j11- + +245 00 $aCompressed to last, part 2 +853 20 $81$av.$bno.$u12$vr$i(year)$j(month)$wm$x01 +863 41 $81.1$a1$b8$i1990$j08 + +245 00 $aGet compressed holdings, open ended member +853 20 $81$av.$bno.$u12$vr$i(year)$j(month)$wm$x01 +863 41 $81.1$a1$b4$i1990$j04 +863 41 $81.2$a1$b5$i1990$j05 +863 40 $81.3$a1-$b6-$i1990-$j06- +863 41 $81.4$a1$b8$i1990$j08 + +245 00 $aGet compressed holdings, open ended member, result +853 20 $81$av.$bno.$u12$vr$i(year)$j(month)$wm$x01 +863 40 $81.1$a1-$b4-$i1990-$j04- + +245 00 $aComparison test, unsorted +853 20 $81$av.$bno.$u12$vr$i(year)$j(month)$wm$x01 +863 40 $81.1$a1$b4-7$i1990$j04-07 +863 40 $81.2$a1$b4-7$i1990$j04-07 +863 41 $81.3$a1$b4$i1990$j04 +863 41 $81.4$a1$b4$i1990$j04 +863 40 $81.5$a1-$b3-$i1990-$j03- +863 40 $81.6$a1$b3-6$i1990$j03-06 +863 40 $81.7$a1$b3-5$i1990$j03-05 +863 41 $81.8$a1$b3$i1990$j03 +863 41 $81.9$a1$b2$i1990$j02 + +245 00 $aComparison test, sorted +853 20 $81$av.$bno.$u12$vr$i(year)$j(month)$wm$x01 +863 41 $81.1$a1$b2$i1990$j02 +863 41 $81.2$a1$b3$i1990$j03 +863 40 $81.3$a1$b3-5$i1990$j03-05 +863 40 $81.4$a1$b3-6$i1990$j03-06 +863 40 $81.5$a1-$b3-$i1990-$j03- +863 41 $81.6$a1$b4$i1990$j04 +863 41 $81.7$a1$b4$i1990$j04 +863 40 $81.8$a1$b4-7$i1990$j04-07 +863 40 $81.9$a1$b4-7$i1990$j04-07 + + -- 2.11.0