backporting staged search
authorerickson <erickson@dcc99617-32d9-48b4-a31d-7c20da2025e4>
Thu, 20 Mar 2008 12:07:29 +0000 (12:07 +0000)
committererickson <erickson@dcc99617-32d9-48b4-a31d-7c20da2025e4>
Thu, 20 Mar 2008 12:07:29 +0000 (12:07 +0000)
git-svn-id: svn://svn.open-ils.org/ILS/branches/rel_1_2@9097 dcc99617-32d9-48b4-a31d-7c20da2025e4

Open-ILS/src/perlmods/OpenILS/Application/Search/Biblio.pm
Open-ILS/web/opac/locale/en-US/opac.dtd
Open-ILS/web/opac/skin/default/js/result_common.js
Open-ILS/web/opac/skin/default/js/sidebar_extras.js
Open-ILS/web/opac/skin/default/xml/result/result_info.xml
Open-ILS/web/opac/skin/default/xml/result/result_table.xml

index 86cfdb3..d798c5f 100644 (file)
@@ -1,5 +1,5 @@
 package OpenILS::Application::Search::Biblio;
-use base qw/OpenSRF::Application/;
+use base qw/OpenILS::Application/;
 use strict; use warnings;
 
 
@@ -428,7 +428,7 @@ __PACKAGE__->register_method(
                 subclasses, specified with a "|". For example, "title|proper:gone with the wind" 
                 For more, see config.metabib_field
 
-        @param nocache @see open-ils.search.biblio.multiclass
+        @param docache @see open-ils.search.biblio.multiclass
     #
 );
 __PACKAGE__->register_method(
@@ -448,6 +448,7 @@ sub multiclass_query {
     my($self, $conn, $arghash, $query, $docache) = @_;
 
     $logger->debug("initial search query => $query");
+    my $orig_query = $query;
 
     $query = decode_utf8($query);
     $query =~ s/\+/ /go;
@@ -513,12 +514,21 @@ sub multiclass_query {
     # capture the original limit because the search method alters the limit internally
     my $ol = $arghash->{limit};
 
+       my $sclient = OpenSRF::Utils::SettingsClient->new;
+
     (my $method = $self->api_name) =~ s/\.query//o;
+
+    $method =~ s/multiclass/multiclass.staged/
+        if $sclient->config_value(apps => 'open-ils.search',
+            app_settings => 'use_staged_search') =~ /true/i;
+
+
        $method = $self->method_lookup($method);
     my ($data) = $method->run($arghash, $docache);
 
     $arghash->{limit} = $ol if $ol;
     $data->{compiled_search} = $arghash;
+    $data->{query} = $orig_query;
 
     $logger->info("compiled search is " . OpenSRF::Utils::JSON->perl2JSON($arghash));
 
@@ -654,6 +664,146 @@ sub the_quest_for_knowledge {
 }
 
 
+__PACKAGE__->register_method(
+       method          => 'staged_search',
+       api_name        => 'open-ils.search.biblio.multiclass.staged');
+__PACKAGE__->register_method(
+       method          => 'staged_search',
+       api_name        => 'open-ils.search.biblio.multiclass.staged.staff',
+       signature       => q/@see open-ils.search.biblio.multiclass.staged/);
+__PACKAGE__->register_method(
+       method          => 'staged_search',
+       api_name        => 'open-ils.search.metabib.multiclass.staged',
+       signature       => q/@see open-ils.search.biblio.multiclass.staged/);
+__PACKAGE__->register_method(
+       method          => 'staged_search',
+       api_name        => 'open-ils.search.metabib.multiclass.staged.staff',
+       signature       => q/@see open-ils.search.biblio.multiclass.staged/);
+
+my $PAGE_SIZE = 1000;
+my $SEARCH_PAGES = 25;
+sub staged_search {
+       my($self, $conn, $search_hash, $docache) = @_;
+
+    my $method = ($self->api_name =~ /metabib/) ?
+        'open-ils.storage.metabib.multiclass.staged.search_fts':
+        'open-ils.storage.biblio.multiclass.staged.search_fts';
+
+    $method .= '.staff' if $self->api_name =~ /staff$/;
+    $method .= '.atomic';
+
+    my $user_offset = $search_hash->{offset} || 0; # user-specified offset
+    my $user_limit = $search_hash->{limit} || 10;
+
+    # we're grabbing results on a per-superpage basis, which means the 
+    # limit and offset should coincide with superpage boundaries
+    $search_hash->{offset} = 0;
+    $search_hash->{limit} = $PAGE_SIZE;
+    $search_hash->{check_limit} = $PAGE_SIZE; # force a well-known check_limit
+
+    # pull any existing results from the cache
+    my $key = search_cache_key($method, $search_hash);
+    my $cache_data = $cache->get_cache($key) || {};
+
+    # keep retrieving results until we find enough to 
+    # fulfill the user-specified limit and offset
+    my $all_results = [];
+    my $page; # current superpage
+    my $est_hit_count;
+
+    for($page = 0; $page < $SEARCH_PAGES; $page++) {
+
+        my $data = $cache_data->{$page};
+        my $results;
+        my $summary;
+
+        $logger->debug("staged search: analyzing superpage $page");
+
+        if($data) {
+            # this window of results is already cached
+            $logger->debug("staged search: found cached results");
+            $summary = $data->{summary};
+            $results = $data->{results};
+
+        } else {
+            # retrieve the window of results from the database
+            $logger->debug("staged search: fetching results from the database");
+            $search_hash->{skip_check} = $page * $PAGE_SIZE;
+            $results = $U->storagereq($method, %$search_hash);
+            $summary = shift(@$results);
+
+            # Create backwards-compatible result structures
+            if($self->api_name =~ /biblio/) {
+                $results = [map {[$_->{id}]} @$results];
+            } else {
+                $results = [map {[$_->{id}, $_->{rel}, $_->{record}]} @$results];
+            }
+
+            $results = [grep {defined $_->[0]} @$results];
+            cache_staged_search_page($key, $page, $summary, $results) if $docache;
+        }
+
+        # add the new set of results to the set under construction
+        push(@$all_results, @$results);
+
+        my $current_count = scalar(@$all_results);
+
+        $est_hit_count = $summary->{estimated_hit_count} || $summary->{visible}
+            if $page == 0;
+
+        $logger->debug("staged search: located $current_count, with estimated hits=".
+            $summary->{estimated_hit_count}." : visible=".$summary->{visible}.", checked=".$summary->{checked});
+
+        # we've found all the possible hits
+        last if $current_count == $summary->{visible}
+            and not defined $summary->{estimated_hit_count};
+
+        # we've found enough results to satisfy the requested limit/offset
+        last if $current_count >= ($user_limit + $user_offset);
+
+        # we've scanned all possible hits
+        last if $summary->{checked} < $PAGE_SIZE;
+    }
+
+    # calculate the average estimated hit count from the data we've collected thus far
+    my @results = grep {defined $_} @$all_results[$user_offset..($user_offset + $user_limit - 1)];
+
+    return {
+        count => $est_hit_count,
+        ids => \@results
+    };
+}
+
+# creates a unique token to represent the query in the cache
+sub search_cache_key {
+    my $method = shift;
+    my $search_hash = shift;
+       my @sorted;
+    for my $key (sort keys %$search_hash) {
+           push(@sorted, ($key => $$search_hash{$key})) 
+            unless $key eq 'limit' or 
+                $key eq 'offset' or 
+                $key eq 'skip_check';
+    }
+       my $s = OpenSRF::Utils::JSON->perl2JSON(\@sorted);
+       return $pfx . md5_hex($method . $s);
+}
+
+sub cache_staged_search_page {
+    # puts this set of results into the cache
+    my($key, $page, $summary, $results) = @_;
+    my $data = $cache->get_cache($key);
+    $data ||= {};
+    $data->{$page} = {
+        summary => $summary,
+        results => $results
+    };
+
+    $logger->info("staged search: cached with key=$key, superpage=$page, estimated=".
+        $summary->{estimated_hit_count}.", visible=".$summary->{visible});
+
+    $cache->put_cache($key, $data);
+}
 
 sub search_cache {
 
@@ -675,7 +825,6 @@ sub search_cache {
 
        return undef unless $offset < $count;
 
-
        my @result;
        for( my $i = $offset; $i <= $end; $i++ ) {
                last unless my $d = $$data[$i];
index f7c564a..e985dc9 100644 (file)
@@ -42,7 +42,7 @@
 <!ENTITY common.user_not_found "User not found">
 <!ENTITY opac.advanced.wizard.title "Advanced Search">
 <!ENTITY common.nowSearching "Now searching ">
-<!ENTITY common.ofAtLeast " of at least ">
+<!ENTITY common.ofAtLeast " of about ">
 <!ENTITY common.relevancy "Match Score: ">
 <!ENTITY common.tips "Tips:">
 <!ENTITY common.tips.tip1 "Click on a folder icon in the sidebar to access related quick searches">
index 0d9d02e..2229c48 100644 (file)
@@ -77,6 +77,8 @@ function resultCollectSearchIds( type, method, handler ) {
        if(getLitForm()) args.lit_form  = getLitForm().split(/,/);
        if(getLanguage()) args.language = getLanguage().split(/,/);
 
+    TERM = (TERM+'').replace(/\\/g,'');
+
        _debug('Search args: ' + js2JSON(args));
        _debug('Raw query: ' + getTerm());
 
@@ -340,11 +342,13 @@ function resultPaginate() {
                if( getHitCount() % getDisplayCount() == 0 ) 
                        args[PARAM_OFFSET] -= getDisplayCount();
 
+        /*
                G.ui.result.end_link.setAttribute("href", buildOPACLink(args)); 
                addCSSClass(G.ui.result.end_link, config.css.result.nav_active);
 
                $('end_link2').setAttribute("href", buildOPACLink(args)); 
                addCSSClass($('end_link2'), config.css.result.nav_active);
+        */
        }
 
        if( o > 0 ) {
index cb35d88..b64c810 100644 (file)
@@ -101,6 +101,7 @@ function resultFireXRefBatch(treeName, xrefCache, stype) {
        var query = [];
        for( var i = 0; i != xrefCache.length; i++ ) {
                var topic = xrefCache[i];
+        topic.term = (topic.term+'').replace(/\\/g,'');
                query.push( [ topic.type, topic.term ] );
        }
        var req = new Request(FETCH_CROSSREF_BATCH, query);
index fd1201e..0bdac8e 100644 (file)
                                        <span class='search_page_nav_link' id='page_numbers'> </span>
 
                                        <a  class='search_page_nav_link' id='next_link' 
-                                               title='&rdetail.page.next;'>&#187;</a><a class='search_page_nav_link' 
+                                               title='&rdetail.page.next;'>&#187;</a>
+                        <!--
+                        <a class='search_page_nav_link' 
                                                        id='end_link' title="&rdetail.page.last;">&rdetail.end;</a>
+                            -->
                                </span>
 
                        </td>
index cd1137b..495806b 100644 (file)
             
                    <span class='search_page_nav_link' id='page_numbers2'> </span>
                       <a  class='search_page_nav_link' id='next_link2' 
-                         title='&rdetail.page.next;'>&#187;</a><a class='search_page_nav_link' 
+                         title='&rdetail.page.next;'>&#187;</a>
+                      <!--
+                      <a class='search_page_nav_link' 
                          id='end_link2' title="&rdetail.page.last;">&rdetail.end;</a>
+                      -->
                    </span>
                         
                 </td>