From 570c7224da3a59663f11d43908529b253fcb6f12 Mon Sep 17 00:00:00 2001 From: erickson Date: Fri, 14 Mar 2008 15:14:41 +0000 Subject: [PATCH] staged search caching and paging git-svn-id: svn://svn.open-ils.org/ILS/trunk@9012 dcc99617-32d9-48b4-a31d-7c20da2025e4 --- .../perlmods/OpenILS/Application/Search/Biblio.pm | 128 +++++++++++++-------- 1 file changed, 82 insertions(+), 46 deletions(-) diff --git a/Open-ILS/src/perlmods/OpenILS/Application/Search/Biblio.pm b/Open-ILS/src/perlmods/OpenILS/Application/Search/Biblio.pm index dd11164d94..5e160d2513 100644 --- a/Open-ILS/src/perlmods/OpenILS/Application/Search/Biblio.pm +++ b/Open-ILS/src/perlmods/OpenILS/Application/Search/Biblio.pm @@ -737,9 +737,8 @@ __PACKAGE__->register_method( api_name => 'open-ils.search.metabib.multiclass.staged.staff', signature => q/@see open-ils.search.biblio.multiclass.staged/); -my $CACHE_LIMIT = 200; -my $CHECK_LIMIT = 1000; - +my $CHECK_SIZE = 1000; +my $SEARCH_PAGES = 25; sub staged_search { my($self, $conn, $search_hash, $nocache) = @_; @@ -750,53 +749,87 @@ sub staged_search { $method .= '.staff' if $self->api_name =~ /staff$/; $method .= '.atomic'; - $search_hash->{skip_check} ||= 0; + my $user_offset = $search_hash->{offset} || 0; # user-specified offset + my $user_limit = $search_hash->{limit} || 10; + + # we're grabbing results on a per-superpage basis, which means the + # limit and offset should coincide with superpage boundaries + $search_hash->{offset} = 0; + $search_hash->{limit} = $CHECK_SIZE; + + # pull any existing results from the cache + my $key = search_cache_key($method, $search_hash); + my $cache_data = $cache->get_cache($key) || {}; + + # keep retrieving results until we find enough to + # fulfill the user-specified limit and offset + my $all_results = []; + my $avg_hit_count = 0; + my $page; # current superpage + + for($page = 0; $page < $SEARCH_PAGES; $page++) { - my ($hit_count, $results) = try_staged_search_cache($method, $search_hash); + my $data = $cache_data->{$page}; + my $results; + my $summary; - if($results) { - $nocache = 1; + $logger->debug("staged search: analyzing superpage $page"); - } else { - $results = $U->storagereq($method, %$search_hash); - my $summary = shift(@$results); - $hit_count = $summary->{estimated_hit_count}; + if($data) { + # this window of results is already cached + $logger->debug("staged search: found cached results"); + $summary = $data->{summary}; + $results = $data->{results}; - # Clean up the results - if($self->api_name =~ /biblio/) { - $results = [map {$_->{id}} @$results]; } else { - delete $_->{rel} for @$results; + # retrieve the window of results from the database + $logger->debug("staged search: fetching results from the database"); + $search_hash->{skip_check} = $page * $CHECK_SIZE; + $results = $U->storagereq($method, %$search_hash); + $summary = shift(@$results); + + # Clean up the raw search results + if($self->api_name =~ /biblio/) { + $results = [map {$_->{id}} @$results]; + } else { + delete $_->{rel} for @$results; + } + + cache_staged_search_page($key, $page, $summary, $results) unless $nocache; } - - cache_staged_search($method, $search_hash, $summary, $results) unless $nocache; - } - return { - count => $hit_count, - results => $results - }; -} + # add the new set of results to the set under construction + push(@$all_results, grep {defined $_} @$results); -sub try_staged_search_cache { - my $method = shift; - my $search_hash = shift; + my $current_count = scalar(@$all_results); + $avg_hit_count += $summary->{estimated_hit_count} || $summary->{visible}; - my $key = search_cache_key($method, $search_hash); - my $start = $search_hash->{offset}; - my $end = $start + $search_hash->{limit} - 1; - my $data = $cache->get_cache($key); + $logger->debug("staged search: located $current_count, with estimated hits=". + $summary->{estimated_hit_count}." : visible=".$summary->{visible}); - $logger->info("searching search cache $key with skip_check $$search_hash{skip_check}"); - return undef unless $data; - $logger->info("searching search cache $key with skip_check $$search_hash{skip_check}"); - return undef unless $data = $data->{$$search_hash{skip_check}}; - $logger->info("returning search cache $key with skip_check $$search_hash{skip_check}"); + # no results for this search + last if $current_count == 0; - return ( - $data->{summary}->{estimated_hit_count}, - $data->{results} - ); + # we've found all the possible hits + last if $current_count == $summary->{visible} + and not defined $summary->{estimated_hit_count}; + + # we've found enough results to satisfy the requested limit/offset + last if $current_count >= ($user_limit + $user_offset); + + + } + + # calculate the average estimated hit count from the data we've collected thus far + $avg_hit_count = int($avg_hit_count / ++$page); + $avg_hit_count = scalar(@$all_results) if scalar(@$all_results) > $avg_hit_count; + + my @results = grep {defined $_} @$all_results[$user_offset..($user_offset + $user_limit - 1)]; + + return { + count => $avg_hit_count, + results => \@results + }; } # creates a unique token to represent the query in the cache @@ -814,17 +847,20 @@ sub search_cache_key { return $pfx . md5_hex($method . $s); } -sub cache_staged_search { - my($method, $search_hash, $summary, $results) = @_; - my $cache_key = search_cache_key($method, $search_hash); - my $data = $cache->get_cache($cache_key); +sub cache_staged_search_page { + # puts this set of results into the cache + my($key, $page, $summary, $results) = @_; + my $data = $cache->get_cache($key); $data ||= {}; - $data->{$search_hash->{skip_check}} = { + $data->{$page} = { summary => $summary, results => $results }; - $logger->info("cached ranged search with skip_check $$search_hash{skip_check} and key $cache_key"); - $cache->put_cache($data); + + $logger->info("staged search: cached with key=$key, superpage=$page, estimated=". + $summary->{estimated_hit_count}.", visible=".$summary->{visible}); + + $cache->put_cache($key, $data); } sub search_cache { -- 2.11.0