OPAC Browse: use superpage concept for performance; fix other counting bug
authorLebbeous Fogle-Weekley <lebbeous@esilibrary.com>
Thu, 23 May 2013 22:23:54 +0000 (18:23 -0400)
committerBen Shum <bshum@biblio.org>
Mon, 15 Jul 2013 15:45:24 +0000 (11:45 -0400)
This commit does two totally different things.

1) It implements a superpages concept in the visibility testing that
happens for records linked to displayed browse headings. By default now
we will only test 100 records at at time for visible holdings w/in the
browse scope until at least 1 visible record is found (or the set of
records is exhausted).

In practical terms this will result in display of headings followed by
"at least 100" or "at least 92" instead of larger numbers, but there
should be a big speed win. Yes it's possible, though very unlikely, that
you'd get cases of "at least 1".

When there's less than a browse superpage worth of records to check
anyway, the count should be exact and won't say "at least."

2) It fixes a bug where the counts for the number of bib records linked
to an authority reference heading were not cached properly on the server
side, so upon page reloads or back/forward operations those counts could
disappear.

Signed-off-by: Lebbeous Fogle-Weekley <lebbeous@esilibrary.com>
Open-ILS/src/perlmods/lib/OpenILS/WWW/EGCatLoader/Browse.pm
Open-ILS/src/sql/Pg/030.schema.metabib.sql
Open-ILS/src/sql/Pg/950.data.seed-values.sql
Open-ILS/src/sql/Pg/upgrade/YYYY.schema.bib-auth-browse.sql
Open-ILS/src/templates/opac/browse.tt2

index ab98c46..cc9d73c 100644 (file)
@@ -174,7 +174,7 @@ sub find_authority_headings_and_notes {
 }
 
 sub map_authority_headings_to_results {
-    my ($self, $linked, $results) = @_;
+    my ($self, $linked, $results, $auth_ids) = @_;
 
     # Use the linked authority records' control sets to find and pick
     # out non-main-entry headings. Build the headings and make a
@@ -190,6 +190,46 @@ sub map_authority_headings_to_results {
             map { $linked_headings_by_auth_id{$_} } @{$row->{authorities}}
         ];
     }
+
+    # Get linked-bib counts for each of those authorities, and put THAT
+    # information into place in the data structure.
+    my $counts = $self->editor->json_query({
+        select => {
+            abl => [
+                {column => "id", transform => "count",
+                    alias => "count", aggregate => 1},
+                "authority"
+            ]
+        },
+        from => {abl => {}},
+        where => {
+            "+abl" => {
+                authority => [
+                    @$auth_ids,
+                    $U->unique_unnested_numbers(map { $_->{target} } @$linked)
+                ]
+            }
+        }
+    }) or return;
+
+    my %auth_counts = map { $_->{authority} => $_->{count} } @$counts;
+
+    # Soooo nesty!  We look for places where we'll need a count of bibs
+    # linked to an authority record, and put it there for the template to find.
+    for my $row (@$results) {
+        for my $auth (@{$row->{authorities}}) {
+            if ($auth->{headings}) {
+                for my $outer_heading (@{$auth->{headings}}) {
+                    for my $heading_blob (@{(values %$outer_heading)[0]}) {
+                        if ($heading_blob->{target}) {
+                            $heading_blob->{target_count} =
+                                $auth_counts{$heading_blob->{target}};
+                        }
+                    }
+                }
+            }
+        }
+    }
 }
 
 # flesh_browse_results() attaches data from authority records. It
@@ -226,34 +266,7 @@ sub flesh_browse_results {
             where => {"+are" => {id => \@auth_ids}}
         }) or return;
 
-
-        $self->map_authority_headings_to_results($linked, $results);
-
-        # Get use counts of authority records, i.e. number of bibs linked to
-        # them. - XXX refine later to consider holdings visibility.
-        my $counts = $self->editor->json_query({
-            select => {
-                abl => [
-                    {column => "id", transform => "count",
-                        alias => "count", aggregate => 1},
-                    "authority"
-                ]
-            },
-            from => {abl => {}},
-            where => {
-                "+abl" => {
-                    authority => [
-                        @auth_ids, $U->unique_unnested_numbers(
-                            map { $_->{target} } @$linked
-                        )
-                    ]
-                }
-            }
-        }) or return;
-
-        $self->ctx->{authority_counts} = {
-            map { $_->{authority} => $_->{count} } @$counts
-        };
+        $self->map_authority_headings_to_results($linked, $results, \@auth_ids);
     }
 
     return 1;
index 57976f5..ae74fba 100644 (file)
@@ -1780,7 +1780,10 @@ CREATE TYPE metabib.flat_browse_entry_appearance AS (
     fields          TEXT,
     authorities     TEXT,
     sources         INT,        -- visible ones, that is
-    row_number      INT         -- internal use, sort of
+    row_number      INT,        -- internal use, sort of
+    accurate        BOOL        -- Count in sources field is accurate? Not
+                                -- if we had more than a browse superpage
+                                -- of records to look at.
 );
 
 
@@ -1808,6 +1811,7 @@ CREATE OR REPLACE FUNCTION metabib.staged_browse(
     context_org             INT,
     context_locations       INT[],
     staff                   BOOL,
+    browse_superpage_size   INT,
     result_limit            INT,
     use_offset              INT
 ) RETURNS SETOF metabib.flat_browse_entry_appearance AS $p$
@@ -1818,6 +1822,11 @@ DECLARE
     result_row              metabib.flat_browse_entry_appearance%ROWTYPE;
     results_skipped         INT := 0;
     results_returned        INT := 0;
+    slice_start             INT;
+    slice_end               INT;
+    full_end                INT;
+    superpage_of_records    BIGINT[];
+    superpage_size          INT;
 BEGIN
     OPEN core_cursor FOR EXECUTE core_query;
 
@@ -1825,20 +1834,40 @@ BEGIN
         FETCH core_cursor INTO core_record;
         EXIT WHEN NOT FOUND;
 
-        qpfts_query :=
-            'SELECT NULL::BIGINT AS id, ARRAY[r] AS records, 1::INT AS rel ' ||
-            'FROM (SELECT UNNEST(' ||
-            quote_literal(core_record.records) || '::BIGINT[]) AS r) rr';
-
-        -- We use search.query_parser_fts() for visibility testing. Yes there
-        -- is a reason we feed it the records for one mbe at a time instead of
-        -- the records for `result_limit` mbe's at a time.
-        SELECT INTO result_row.sources visible
-            FROM search.query_parser_fts(
-                context_org, NULL, qpfts_query, NULL,
-                context_locations, 0, NULL, NULL, FALSE, staff, FALSE
-            ) qpfts
-            WHERE qpfts.rel IS NULL;
+        result_row.sources := 0;
+
+        full_end := ARRAY_LENGTH(core_record.records, 1);
+        superpage_size := COALESCE(browse_superpage_size, full_end);
+        slice_start := 1;
+        slice_end := superpage_size;
+
+        WHILE result_row.sources = 0 AND slice_start <= full_end LOOP
+            superpage_of_records := core_record.records[slice_start:slice_end];
+            qpfts_query :=
+                'SELECT NULL::BIGINT AS id, ARRAY[r] AS records, ' ||
+                '1::INT AS rel FROM (SELECT UNNEST(' ||
+                quote_literal(superpage_of_records) || '::BIGINT[]) AS r) rr';
+
+            -- We use search.query_parser_fts() for visibility testing.
+            -- We're calling it once per browse-superpage worth of records
+            -- out of the set of records related to a given mbe, until we've
+            -- either exhausted that set of records or found at least 1
+            -- visible record.
+
+            SELECT INTO result_row.sources visible
+                FROM search.query_parser_fts(
+                    context_org, NULL, qpfts_query, NULL,
+                    context_locations, 0, NULL, NULL, FALSE, staff, FALSE
+                ) qpfts
+                WHERE qpfts.rel IS NULL;
+
+            slice_start := slice_start + superpage_size;
+            slice_end := slice_end + superpage_size;
+        END LOOP;
+
+        -- Accurate?  Well, probably.
+        result_row.accurate := browse_superpage_size IS NULL OR
+            browse_superpage_size >= full_end;
 
         IF result_row.sources > 0 THEN
             IF results_skipped < use_offset THEN
@@ -1883,6 +1912,7 @@ DECLARE
     pivot_sort_fallback     TEXT;
     context_locations       INT[];
     use_offset              INT;
+    browse_superpage_size   INT;
     results_skipped         INT := 0;
 BEGIN
     IF pivot_id IS NULL THEN
@@ -1890,7 +1920,7 @@ BEGIN
     END IF;
 
     SELECT INTO pivot_sort_value, pivot_sort_fallback
-        sort_value, value FROM metabib.browse_entry where id = pivot_id;
+        sort_value, value FROM metabib.browse_entry WHERE id = pivot_id;
 
     IF pivot_sort_value IS NULL THEN
         RETURN;
@@ -1902,6 +1932,10 @@ BEGIN
             WHERE lgroup = context_loc_group;
     END IF;
 
+    SELECT INTO browse_superpage_size value     -- NULL ok
+        FROM config.global_flag
+        WHERE enabled AND name = 'opac.browse.holdings_visibility_test_limit';
+
     core_query := '
     SELECT
         mbe.id,
@@ -1932,7 +1966,7 @@ BEGIN
 
         RETURN QUERY SELECT * FROM metabib.staged_browse(
             core_query, context_org, context_locations,
-            staff, result_limit, use_offset
+            staff, browse_superpage_size, result_limit, use_offset
         );
     ELSE
         -- Part 1 of 2 to deliver what the user wants with a negative offset:
@@ -1943,7 +1977,7 @@ BEGIN
         -- Part 2 of 2 to deliver what the user wants with a negative offset:
         RETURN QUERY SELECT * FROM (SELECT * FROM metabib.staged_browse(
             core_query, context_org, context_locations,
-            staff, result_limit, use_offset
+            staff, browse_superpage_size, result_limit, use_offset
         )) sb ORDER BY row_number DESC;
 
     END IF;
index 45dc921..5e34913 100644 (file)
@@ -9185,6 +9185,17 @@ VALUES (
         'cgf',
         'label'
     )
+),
+(
+    'opac.browse.holdings_visibility_test_limit',
+    '100',
+    TRUE,
+    oils_i18n_gettext(
+        'opac.browse.holdings_visibility_test_limit',
+        'Don''t look for more than this number of records with holdings when displaying browse headings with visible record counts.',
+        'cgf',
+        'label'
+    )
 );
 
 INSERT INTO config.usr_setting_type (name,opac_visible,label,description,datatype)
index e0e3a01..66c8fcb 100644 (file)
@@ -7089,8 +7089,8 @@ Revision 1.2 - Added Log Comment  2003/03/24 19:37:42  ckeith
 </xsl:stylesheet>$$ WHERE name = 'mods33';
 
 
-INSERT INTO config.global_flag (name, value, enabled, label)
-VALUES (
+INSERT INTO config.global_flag (name, value, enabled, label) VALUES
+(
     'opac.browse.warnable_regexp_per_class',
     '{"title": "^(a|the|an)\\s"}',
     FALSE,
@@ -7100,6 +7100,17 @@ VALUES (
         'cgf',
         'label'
     )
+),
+(
+    'opac.browse.holdings_visibility_test_limit',
+    '100',
+    TRUE,
+    oils_i18n_gettext(
+        'opac.browse.holdings_visibility_test_limit',
+        'Don''t look for more than this number of records with holdings when displaying browse headings with visible record counts.',
+        'cgf',
+        'label'
+    )
 );
 
 ALTER TABLE metabib.browse_entry DROP CONSTRAINT browse_entry_value_key;
@@ -7128,7 +7139,10 @@ CREATE TYPE metabib.flat_browse_entry_appearance AS (
     fields          TEXT,
     authorities     TEXT,
     sources         INT,        -- visible ones, that is
-    row_number      INT         -- internal use, sort of
+    row_number      INT,        -- internal use, sort of
+    accurate        BOOL        -- Count in sources field is accurate? Not
+                                -- if we had more than a browse superpage
+                                -- of records to look at.
 );
 
 
@@ -7156,6 +7170,7 @@ CREATE OR REPLACE FUNCTION metabib.staged_browse(
     context_org             INT,
     context_locations       INT[],
     staff                   BOOL,
+    browse_superpage_size   INT,
     result_limit            INT,
     use_offset              INT
 ) RETURNS SETOF metabib.flat_browse_entry_appearance AS $p$
@@ -7166,6 +7181,11 @@ DECLARE
     result_row              metabib.flat_browse_entry_appearance%ROWTYPE;
     results_skipped         INT := 0;
     results_returned        INT := 0;
+    slice_start             INT;
+    slice_end               INT;
+    full_end                INT;
+    superpage_of_records    BIGINT[];
+    superpage_size          INT;
 BEGIN
     OPEN core_cursor FOR EXECUTE core_query;
 
@@ -7173,20 +7193,40 @@ BEGIN
         FETCH core_cursor INTO core_record;
         EXIT WHEN NOT FOUND;
 
-        qpfts_query :=
-            'SELECT NULL::BIGINT AS id, ARRAY[r] AS records, 1::INT AS rel ' ||
-            'FROM (SELECT UNNEST(' ||
-            quote_literal(core_record.records) || '::BIGINT[]) AS r) rr';
-
-        -- We use search.query_parser_fts() for visibility testing. Yes there
-        -- is a reason we feed it the records for one mbe at a time instead of
-        -- the records for `result_limit` mbe's at a time.
-        SELECT INTO result_row.sources visible
-            FROM search.query_parser_fts(
-                context_org, NULL, qpfts_query, NULL,
-                context_locations, 0, NULL, NULL, FALSE, staff, FALSE
-            ) qpfts
-            WHERE qpfts.rel IS NULL;
+        result_row.sources := 0;
+
+        full_end := ARRAY_LENGTH(core_record.records, 1);
+        superpage_size := COALESCE(browse_superpage_size, full_end);
+        slice_start := 1;
+        slice_end := superpage_size;
+
+        WHILE result_row.sources = 0 AND slice_start <= full_end LOOP
+            superpage_of_records := core_record.records[slice_start:slice_end];
+            qpfts_query :=
+                'SELECT NULL::BIGINT AS id, ARRAY[r] AS records, ' ||
+                '1::INT AS rel FROM (SELECT UNNEST(' ||
+                quote_literal(superpage_of_records) || '::BIGINT[]) AS r) rr';
+
+            -- We use search.query_parser_fts() for visibility testing.
+            -- We're calling it once per browse-superpage worth of records
+            -- out of the set of records related to a given mbe, until we've
+            -- either exhausted that set of records or found at least 1
+            -- visible record.
+
+            SELECT INTO result_row.sources visible
+                FROM search.query_parser_fts(
+                    context_org, NULL, qpfts_query, NULL,
+                    context_locations, 0, NULL, NULL, FALSE, staff, FALSE
+                ) qpfts
+                WHERE qpfts.rel IS NULL;
+
+            slice_start := slice_start + superpage_size;
+            slice_end := slice_end + superpage_size;
+        END LOOP;
+
+        -- Accurate?  Well, probably.
+        result_row.accurate := browse_superpage_size IS NULL OR
+            browse_superpage_size >= full_end;
 
         IF result_row.sources > 0 THEN
             IF results_skipped < use_offset THEN
@@ -7231,6 +7271,7 @@ DECLARE
     pivot_sort_fallback     TEXT;
     context_locations       INT[];
     use_offset              INT;
+    browse_superpage_size   INT;
     results_skipped         INT := 0;
 BEGIN
     IF pivot_id IS NULL THEN
@@ -7238,7 +7279,7 @@ BEGIN
     END IF;
 
     SELECT INTO pivot_sort_value, pivot_sort_fallback
-        sort_value, value FROM metabib.browse_entry where id = pivot_id;
+        sort_value, value FROM metabib.browse_entry WHERE id = pivot_id;
 
     IF pivot_sort_value IS NULL THEN
         RETURN;
@@ -7250,6 +7291,10 @@ BEGIN
             WHERE lgroup = context_loc_group;
     END IF;
 
+    SELECT INTO browse_superpage_size value     -- NULL ok
+        FROM config.global_flag
+        WHERE enabled AND name = 'opac.browse.holdings_visibility_test_limit';
+
     core_query := '
     SELECT
         mbe.id,
@@ -7280,7 +7325,7 @@ BEGIN
 
         RETURN QUERY SELECT * FROM metabib.staged_browse(
             core_query, context_org, context_locations,
-            staff, result_limit, use_offset
+            staff, browse_superpage_size, result_limit, use_offset
         );
     ELSE
         -- Part 1 of 2 to deliver what the user wants with a negative offset:
@@ -7291,7 +7336,7 @@ BEGIN
         -- Part 2 of 2 to deliver what the user wants with a negative offset:
         RETURN QUERY SELECT * FROM (SELECT * FROM metabib.staged_browse(
             core_query, context_org, context_locations,
-            staff, result_limit, use_offset
+            staff, browse_superpage_size, result_limit, use_offset
         )) sb ORDER BY row_number DESC;
 
     END IF;
index 7797444..9171685 100644 (file)
                                         'fi:has_browse_entry' => (result.browse_entry _ ',' _ result.fields)
                                     }) %]">[% result.value | html %]</a>
                             </span>
-                            <span class="browse-result-sources">([% result.sources %])</span>
-                            [% IF result.authorities %]
+                            <span class="browse-result-sources">([%
+                                IF result.accurate == 'f';
+                                    l("At least"); " ";
+                                END;
+                                result.sources %])</span>
+                            [% IF result.authorities.size %]
                             <ul class="browse-result-authority-headings">
                                 [% FOR a IN result.authorities;
                                     PROCESS authority_notes authority=a;
                                         field = acs.$field_id;
                                         headings = field_group.values.0;
                                         FOR h IN headings;
-                                            target_auth_id = h.target;
-
                                             # We could display headings without
-                                            # links here when target_auth_id is
-                                            # undef, if we wanted to.
+                                            # links here when h.target is
+                                            # undef, if we wanted to, but note
+                                            # that h.target_count is only
+                                            # defined when h.target is.
 
-                                            IF target_auth_id %]
+                                            IF h.target %]
                                             <li><span class="browse-result-authority-field-name">[% field.name %]</span>
                                             <a href="[% mkurl(ctx.opac_root _ '/results', {query => 'identifier|authority_id[' _ target_auth_id _ ']'}) %]">[% h.heading | html %]</a>
-                                            <span class="browse-result-authority-bib-links">([% ctx.authority_counts.$target_auth_id %])</span>
+                                            <span class="browse-result-authority-bib-links">([% h.target_count %])</span>
                                             </li>
                                             [% END %]
                                         [% END %]