From: Jason Stephenson Date: Fri, 6 Jun 2014 20:07:11 +0000 (-0400) Subject: LP 1208875: Fix circ history CSV download for many circulations. X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=d3448167ca277366bb839a402f8285f5f81d31e8;p=evergreen%2Fmasslnc.git LP 1208875: Fix circ history CSV download for many circulations. We no longer retrieve a user's circ history for download via action/trigger and instead build the CSV data right in the TPAC. The reason for this change is that action/trigger imposes just too much of a delay between initiating the retrieval and getting the data, particulary for patrons who have a large number of circulation history entries, for certain values of large. The new code uses the CStoreEditor to make JSON queries to retrieve only the information needed for CSV. Testing revealed that using the existing fetch_user_circs method in EGCatLoader/Account.pm was still too slow for the more extreme patrons. The new code also caches most of the retrieved bibliographic data. Testing revealed that most patrons get multiple checkouts of the same things, or multiple parts of a multiple part television series. Caching the bib data for these records has shaved several seconds off retrieval time in testing. This branch makes use of a new, MVF, view when retrieving format information. It is thus unsuitable as-is for backport before 2.6. Along the way, we have accreted a generically reusable CSV filter for Template Toolkit. That could be useful not only in other parts of Evergreen, but in other projects. Finally, this commit leaves the old action/trigger code in the database. Right now, it makes a good reference if anyone wants to study what has been changed. It can be removed later, if so desired. Signed-off-by: Jason Stephenson Signed-off-by: Galen Charlton Conflicts: Open-ILS/src/perlmods/lib/OpenILS/WWW/EGCatLoader/Account.pm Signed-off-by: Jason Etheridge Signed-off-by: Galen Charlton Signed-off-by: Jason Stephenson --- diff --git a/Open-ILS/src/perlmods/MANIFEST b/Open-ILS/src/perlmods/MANIFEST index 594e944cf4..cb980a6e22 100644 --- a/Open-ILS/src/perlmods/MANIFEST +++ b/Open-ILS/src/perlmods/MANIFEST @@ -172,4 +172,5 @@ lib/OpenILS/WWW/SuperCat/Feed.pm lib/OpenILS/WWW/TemplateBatchBibUpdate.pm lib/OpenILS/WWW/Vandelay.pm lib/OpenILS/WWW/XMLRPCGateway.pm +lib/Template/Plugin/CSVFilter.pm MANIFEST This list of files diff --git a/Open-ILS/src/perlmods/lib/OpenILS/WWW/EGCatLoader/Account.pm b/Open-ILS/src/perlmods/lib/OpenILS/WWW/EGCatLoader/Account.pm index 9896ff23a4..b5afffba64 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/WWW/EGCatLoader/Account.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/WWW/EGCatLoader/Account.pm @@ -2718,10 +2718,8 @@ sub load_myopac_circ_history_export { my $circs = $self->fetch_user_circ_history; - $self->ctx->{csv} = $U->fire_object_event( - undef, - 'circ.format.history.csv', $circs, - $self->editor->requestor->home_ou + $self->ctx->{csv}->{rows} = $self->_get_circ_history_csv( + [map {$_->{id}} @$ids] ); return $self->set_file_download_headers($filename); @@ -2796,4 +2794,182 @@ sub load_password_reset { return Apache2::Const::OK; } +# fetch_user_circs is too slow for our purposes. (Yes, I tested it.) +# It also does a lot more than we need in order to produce our CSV +# output of a patron's circ history. So, _get_circ_history_csv was +# concocted to get only the information that we need in a more timely +# manner, though it is still not as speedy as one would like. +# +# Given an array ref of circulation ids, the function returns an array +# ref of hash ref objects with fields, xact_start, due_date, +# checkin_time, title, author, call_number, barcode, and format. +# There is one array entry for each circulation. This is the +# information presently required for the circ_history.csv file. +sub _get_circ_history_csv { + my $self = shift; + my $ids = shift; + my $rows = []; + my $e = $self->editor; + + if ($ids && @$ids) { + my $results = $e->json_query( + { + 'select' => { + 'circ' => ['xact_start','due_date','checkin_time'], + 'acp' => ['barcode'], + 'acn' => ['label','record'] + }, + 'from' => {'circ' => {'acp' => {'join' => 'acn'}}}, + 'where' => {'+circ' => {'id' => $ids}}, + 'order_by' => {'circ' => ['xact_start']} + } + ); + foreach my $r (@$results) { + my $row = {}; + my $record = $self->_get_record_data($r->{record}); + $row->{title} = $record->{title}; + $row->{author} = $record->{author}; + $row->{format} = join('+', @{$record->{formats}}); + $row->{xact_start} = $r->{xact_start}; + $row->{due_date} = $r->{due_date}; + $row->{checkin_time} = $r->{checkin_time}; + $row->{barcode} = $r->{barcode}; + $row->{call_number} = $r->{label}; + push(@$rows, $row); + } + } + + return $rows; +} + +# Utility functions used by _get_circ_history_csv: + +# This funtion retrieves the information that we need from a bre for +# our circ_history.csv output. It gathers, the title, author, and +# icon formats into a hashref using the _get_title_proper, +# _get_author, and _get_icon_formats functions defined below. Once +# the data are retrieved, it is cached for 90 seconds in the +# global cache. Testing has shown that it takes about 82 seconds to +# retrieve the information for a patron with 4,196 retained +# circulations on a slow development system. Using the cache with a +# timeout of 90 seconds, dropped 4 seconds from the retrieval time, +# and led to 371 cache hits. The cached information is, of course, +# used if available rather than doing a database lookup. +sub _get_record_data { + my $self = shift; + my $record = shift; + my $e = $self->editor; + my $obj; + my $key = 'TPAC_csv_info_' . $record; + + # First, we try the global cache. + my $cache = OpenSRF::Utils::Cache->new('global'); + $obj = $cache->get_cache($key); + + unless ($obj) { + $obj = {}; + + $obj->{author} = $self->_get_author($record); + $obj->{formats} = $self->_get_icon_formats($record); + $obj->{title} = $self->_get_title_proper($record); + + $cache->put_cache($key, $obj, 90); + } + + return $obj; +} + +# This function looks up the text for the icon formats of a given +# bre.id. It does so using a JSON query and a cached hash map of +# coded value map code => value. The hash map is cached for 24 hours +# because we have no idea how often people ask for their circ history +# in a csv, nor how often they might want to retrieve their lists/book +# bags in a csv. +sub _get_icon_formats { + my $self = shift; + my $record = shift; + my $e = $self->editor; + my $formats = []; + + # First, let's try using a cache: + my $cache = OpenSRF::Utils::Cache->new('global'); + my $icon_formats = $cache->get_cache('CCVM_icon_format_code_map'); + unless ($icon_formats) { + # Build a map of item_type coded value map: + my $ccvms = $e->search_config_coded_value_map({ctype => 'icon_format'}); + if ($ccvms && @$ccvms) { + foreach my $ccvm (@$ccvms) { + $icon_formats->{$ccvm->code} = $ccvm->value; + } + } + $cache->put_cache('CCVM_icon_format_code_map', $icon_formats); + } + + + my $query = { + 'select' => {'mraf' => ['value']}, + 'from' => 'mraf', + 'where' => {'id' => $record, 'attr' => 'icon_format'}, + 'distinct' => 'true' + }; + + my $result = $e->json_query($query); + foreach (@$result) { + push(@$formats, $$icon_formats{$_->{value}}); + } + + return $formats; +} + +# This function retrieves the "first" author for a bre using a JSON +# query on metabib.author_field_entry. It deterimines "first" by +# ordering by "field" and then "id" and using a limit of 1 on the +# result set. +sub _get_author { + my $self = shift; + my $record = shift; + my $e = $self->editor; + my $author; + + my $query = { + 'select' => {'mafe' => ['value']}, + 'from' => 'mafe', + 'where' => {'source' => $record}, + 'order_by' => {'mafe' => ['field','id']}, + 'limit' => 1 + }; + + my $result = $e->json_query($query); + if ($result && @$result) { + $author = $result->[0]->{value}; + } + + return $author; +} + +# This function looks up a "title proper" for a bre using a JSON query +# on metabib.author_field_entry where the "field" is equal to 6, the +# code for "title proper." +sub _get_title_proper { + my $self = shift; + my $record = shift; + my $e = $self->editor; + my $title; + + my $query = { + 'select' => {'mtfe' => ['value']}, + 'from' => 'mtfe', + 'where' => {'source' => $record, 'field' => 6}, + 'limit' => 1 + }; + + my $result = $e->json_query($query); + if ($result && @$result) { + $title = $result->[0]->{value}; + } + + return $title; +} + + 1; diff --git a/Open-ILS/src/perlmods/lib/Template/Plugin/CSVFilter.pm b/Open-ILS/src/perlmods/lib/Template/Plugin/CSVFilter.pm new file mode 100644 index 0000000000..6f716e0c58 --- /dev/null +++ b/Open-ILS/src/perlmods/lib/Template/Plugin/CSVFilter.pm @@ -0,0 +1,146 @@ +# --------------------------------------------------------------- +# Copyright © 2014 Merrimack Valley Library Consortium +# Jason Stephenson +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# --------------------------------------------------------------- +package Template::Plugin::CSVFilter; +use Template::Plugin::Filter; + +use base qw(Template::Plugin::Filter); + +our $VERSION = "1.00"; + +sub init { + my $self = shift; + + $self->{_DYNAMIC} = 1; + $self->install_filter($self->{_ARGS}->[0] || 'CSVFilter'); + + return $self; +} + +sub filter { + my ($self, $text, $args, $conf) = @_; + + $args = $self->merge_args($args); + $conf = $self->merge_config($conf); + + my $quote_char = $conf->{quote_char} || '"'; + my $delim = $conf->{separator} || ','; + my $force_quotes = grep {$_ eq 'force_quotes'} @$args; + if ($text) { + $text =~ s/$quote_char/$quote_char$quote_char/g; + $text = $quote_char . $text . $quote_char + if ($force_quotes || $text =~ /[$delim$quote_char\r\n]/); + } + $text .= $delim unless(grep {$_ eq 'last'} @$args); + + return $text; +} + +1; +__END__ + +=pod + +=head1 NAME + +Template::Plugin::CSVFilter - Template Toolkit2 Filter for CSV fields + +=head1 SYNOPSIS + + [%- USE CSVFilter 'csv'; + FOREACH row IN rows; + FOREACH field IN row; + IF loop.count == loop.size; + field | csv 'last'; + ELSE; + field | csv; + END; + END; %] + [% END -%] + +You can use the above as a template for a CSV output file, +provided that you arrange your data variable to have a 'rows' member +that is an array ref containing array refs of the fields for each row: + + $var = {rows => [[...], ...]}; + +If you want headers in the first row of the output, then make sure the +header fields make up the first sub array of 'rows' array. + +=head1 DESCRIPTION + +Template::Plugin::CSVFilter adds a filter that you can use in Template +Toolkit2 templates to output data that is suitably formatted for +inclusion in a CSV type file. The filter will see to it that the +field is properly quoted and that any included quote symbols are +properly escaped. It will also add the separator character after the +field's text. + +=head1 OPTIONS + +CSVFilter accepts the following configuration options that require a +single character argument: + +=over + +=item * + +C - The argument is the character to use as the field +delimiter. If this is not specified, a default of , is used. + +=item * + +C - The argument is the character to for quoting fields. +If this is not specified, a default of " is used. + +=back + +The above are best set when you use the filter in your template. +However, you can set them at any or every time you use the filter in +your template. + +Each call to the filter can be modified by the following two arguments +that do not themselves take an argument: + +=over + +=item * + +C - If present, this argument causes the field output to +be quoted even if it would not ordinarily be. If you use this where +you C the filter, you will need to specify a name for the filter or +the name of the filter will be C. If you specify this +option when initializing the filter, then every output field will be +quoted. + +=item * + +C - This argument must be used on the last field in an output +row in order to suppress output of the C character. This +argument must never be used when initializing the filter. + +=back + +=head1 SEE ALSO + + Template + Template::Plugin + Template::Plugin::Filter + +=head1 AUTHOR + +Jason Stephenson + + +=cut diff --git a/Open-ILS/src/templates/opac/myopac/circ_history/export.tt2 b/Open-ILS/src/templates/opac/myopac/circ_history/export.tt2 index fa3988f7f0..9bffe35f63 100644 --- a/Open-ILS/src/templates/opac/myopac/circ_history/export.tt2 +++ b/Open-ILS/src/templates/opac/myopac/circ_history/export.tt2 @@ -1 +1,25 @@ -[%- ctx.csv.template_output.data -%] +[%- USE CSVFilter 'csv'; + USE date; + SET DATE_FORMAT = l('%m/%d/%Y'); -%] +[%- l('Title') | csv -%] +[%- l('Author') | csv -%] +[%- l('Checkout Date') | csv -%] +[%- l('Due Date') | csv -%] +[%- l('Date Returned') | csv -%] +[%- l('Barcode') | csv -%] +[%- l('Call Number') | csv -%] +[%- l('Format') | csv 'last' %] +[% FOREACH row IN ctx.csv.rows; -%] +[%- row.title | csv -%] +[%- row.author | csv -%] +[%- date.format(ctx.parse_datetime(row.xact_start), DATE_FORMAT) | csv-%] +[%- date.format(ctx.parse_datetime(row.due_date), DATE_FORMAT) | csv -%] +[%- IF row.checkin_time; + date.format(ctx.parse_datetime(row.checkin_time), DATE_FORMAT) | csv; + ELSE; -%] +, +[%- END -%] +[%- row.barcode | csv -%] +[%- row.call_number | csv -%] +[%- row.format | csv 'last' %] +[% END -%]