--- /dev/null
+#!/usr/bin/perl
+use warnings;
+use strict;
+use lib '/home/opensrf/svn/OpenSRF/trunk/src/perl/lib/';
+use lib '/home/opensrf/svn/ILS/trunk/Open-ILS/src/perlmods/';
+use OpenILS::Application::Storage::Driver::Pg::QueryParser;
+use JSON::XS;
+use Getopt::Long;
+use Data::Dumper;
+$Data::Dumper::Indent = 1;
+use Time::HiRes qw/time/;
+
+my $qpconfig;
+
+sub _abstract_query2str_filter {
+ my $f = shift;
+
+ return sprintf(
+ "%s%s(%s)",
+ $f->{negate} ? $qpconfig->{operators}{disallowed} : "",
+ $f->{name},
+ join(",", @{$f->{args}})
+ );
+}
+
+sub _abstract_query2str_modifier {
+ my $f = shift;
+
+ return $qpconfig->{operators}{modifier} . $f;
+}
+
+# This should produce an equivalent query to the original, given an
+# abstract_query with a qp config.
+sub abstract_query2str {
+ my $abstract_query = shift;
+ my $depth = shift || 0;
+
+ $qpconfig ||= $abstract_query->{config};
+
+ my $gs = $qpconfig->{operators}{group_start};
+ my $ge = $qpconfig->{operators}{group_end};
+ my $and = $qpconfig->{operators}{and};
+ my $or = $qpconfig->{operators}{or};
+
+ my $q = "";
+ $q .= $gs if $abstract_query->{type} and $abstract_query->{type} eq "query_plan" and $depth;
+
+ if (exists $abstract_query->{type}) {
+ if ($abstract_query->{type} eq 'query_plan') {
+ $q .= join(" ", map { _abstract_query2str_filter($_) } @{$abstract_query->{filters}}) if
+ exists $abstract_query->{filters};
+ $q .= " ";
+
+ $q .= join(" ", map { _abstract_query2str_modifier($_) } @{$abstract_query->{modifiers}}) if
+ exists $abstract_query->{modifiers};
+ } elsif ($abstract_query->{type} eq 'node') {
+ $q .= " " . $abstract_query->{class};
+ $q .= "|$_" foreach @{$abstract_query->{fields}};
+ $q .= ":";
+ } elsif ($abstract_query->{type} eq 'atom') {
+ my $prefix = $abstract_query->{prefix} || '';
+ $prefix = $qpconfig->{operators}{disallowed} if $prefix eq '!';
+ $q .= $prefix .
+ ($abstract_query->{content} || '') .
+ ($abstract_query->{suffix} || '');
+ }
+ }
+
+ if (exists $abstract_query->{children}) {
+ my $op = (keys(%{$abstract_query->{children}}))[0];
+ $q .= join(
+ " " . ($op eq '&' ? $and : $or) . " ",
+ map { abstract_query2str($_, $depth + 1) } @{$abstract_query->{children}{$op}}
+ );
+ } elsif ($abstract_query->{'&'} or $abstract_query->{'|'}) {
+ my $op = (keys(%{$abstract_query}))[0];
+ $q .= join(
+ " " . ($op eq '&' ? $and : $or) . " ",
+ map { abstract_query2str($_, $depth + 1) } @{$abstract_query->{$op}}
+ );
+ }
+ $q .= " ";
+
+
+ $q .= $ge if $abstract_query->{type} and $abstract_query->{type} eq "query_plan" and $depth;
+
+ return $q;
+}
+
+OpenILS::Application::Storage::Driver::Pg::QueryParser->TEST_SETUP;
+
+my $query = '#available title: foo bar* || (-baz || (subject:"1900'.
+ '-1910 junk" "and another thing" se:stuff #available '.
+ 'statuses(0,7,12))) && && && au:malarky || au|'.
+ 'corporate|personal:gonzo && dc.identifier:+123456789X'.
+ ' dc.contributor=rowling #metarecord estimation_'.
+ 'strategy(exclusion) item_type(a, t) item_form(d) '.
+ 'bib.subjectTitle=potter bib.subjectName=harry '.
+ 'keyword|mapscale:1:250000';
+my $superpage = 1;
+my $superpage_size = 1000;
+my $core_limit = 25000;
+my $debug;
+my $quiet;
+my $runs = 100;
+
+GetOptions(
+ 'superpage=i' => \$superpage,
+ 'superpage-size=i' => \$superpage_size,
+ 'core-limit=i' => \$core_limit,
+ 'query=s' => \$query,
+ 'debug' => \$debug,
+ 'quiet' => \$quiet,
+ 'runs=i' => \$runs
+);
+
+print "Original query: $query\n";
+
+my $start = time();
+OpenILS::Application::Storage::Driver::Pg::QueryParser->new( superpage_size => $superpage_size, superpage => $superpage, core_limit => $core_limit, debug => $debug, query => $query )->parse->parse_tree for (1 .. $runs);
+my $end = time();
+
+my $plan = OpenILS::Application::Storage::Driver::Pg::QueryParser->new( superpage_size => $superpage_size, superpage => $superpage, core_limit => $core_limit, query => $query, debug => $debug );
+$plan->parse;
+print "Parsed query tree:\n" . Dumper( $plan->parse_tree) if (!$quiet);
+#print "Parsed query tree:\n" . Dumper( QueryParser->new( superpage_size => $superpage_size, superpage => $superpage, core_limit => $core_limit, query => $query, debug => $debug )->parse->parse_tree);
+my $sql = $plan->toSQL;
+$sql =~ s/^\s*$//gm;
+print "SQL:\n$sql\n\n" if (!$quiet);
+
+my $abstract_query = $plan->parse_tree->to_abstract_query(with_config => 1);
+print "abstract_query: " . Dumper($abstract_query) . "\n";
+print "abstract_query back to string: " . abstract_query2str($abstract_query) . "\n";
+print "Simple plan: " . ($plan->simple_plan ? 'yes' : 'no') . "\n";
+print "Total parse time, $runs runs: " . ($end - $start) . "s\n";
+print "Average parse time, $runs runs: " . sprintf('%0.3f',(($end - $start) / $runs) * 1000) . "ms\n";
+
my $page; # current superpage
my $est_hit_count = 0;
my $current_page_summary = {};
+ my $current_abstract = {};
my $global_summary = {checked => 0, visible => 0, excluded => 0, deleted => 0, total => 0};
my $is_real_hit_count = 0;
my $new_ids = [];
my $data = $cache_data->{$page};
my $results;
- my $summary;
+ my ($summary, $abstract);
$logger->debug("staged search: analyzing superpage $page");
$logger->debug("staged search: found cached results");
$summary = $data->{summary};
$results = $data->{results};
-
+ $abstract = pop @$results;
} else {
# retrieve the window of results from the database
$logger->debug("staged search: fetching results from the database");
$search_hash->{skip_check} = $page * $superpage_size;
my $start = time;
$results = $U->storagereq($method, %$search_hash);
+ $abstract = pop @$results;
$search_duration = time - $start;
$logger->info("staged search: DB call took $search_duration seconds and returned ".scalar(@$results)." rows, including summary");
$summary = shift(@$results) if $results;
}
$current_page_summary = $summary;
+ $current_abstract = $abstract;
# add the new set of results to the set under construction
push(@$all_results, @$results);
superpage_size => $search_hash->{check_limit},
superpage_summary => $current_page_summary,
facet_key => $facet_key,
- ids => \@results
+ ids => \@results,
+ abstract_query => $current_abstract
}
);
#-------------------------------
+package _util;
+
+# At this level, joiners are always & or |. This is not
+# the external, configurable representation of joiners that
+# defaults to # && and ||.
+sub is_joiner {
+ my $str = shift;
+
+ return (not ref $str and ($str eq '&' or $str eq '|'));
+}
+
+sub default_joiner { '&' }
+
+# 0 for different, 1 for the same.
+sub compare_abstract_atoms {
+ my ($left, $right) = @_;
+
+ foreach (qw/prefix suffix content/) {
+ no warnings; # undef can stand in for '' here
+ return 0 unless $left->{$_} eq $right->{$_};
+ }
+
+ return 1;
+}
+
+sub fake_abstract_atom_from_phrase {
+ my ($phrase, $neg) = @_;
+
+ my $prefix = '"';
+ if ($neg) {
+ $prefix =
+ $QueryParser::parser_config{QueryParser}{operators}{disallowed} .
+ $prefix;
+ }
+
+ return {
+ "type" => "atom", "prefix" => $prefix, "suffix" => '"',
+ "content" => $phrase
+ }
+}
+
+sub find_arrays_in_abstract {
+ my ($hash) = @_;
+
+ my @arrays;
+ foreach my $key (keys %$hash) {
+ if (ref $hash->{$key} eq "ARRAY") {
+ push @arrays, $hash->{$key};
+ foreach (@{$hash->{$key}}) {
+ push @arrays, find_arrays_in_abstract($_);
+ }
+ }
+ }
+
+ return @arrays;
+}
+
+#-------------------------------
package QueryParser::query_plan;
sub QueryParser {
return $self;
}
+# %opts supports two options at this time:
+# no_phrases :
+# If true, do not do anything to the phrases and unphrases
+# fields on any discovered nodes.
+# with_config :
+# If true, also return the query parser config as part of the blob.
+# This will get set back to 0 before recursion to avoid repetition.
+sub to_abstract_query {
+ my $self = shift;
+ my %opts = @_;
+
+ my $abstract_query = {
+ type => "query_plan",
+ filters => [map { $_->to_abstract_query } @{$self->filters}],
+ modifiers => [map { $_->to_abstract_query } @{$self->modifiers}]
+ };
+
+ if ($opts{with_config}) {
+ $opts{with_config} = 0;
+ $abstract_query->{config} = $QueryParser::parser_config{QueryParser};
+ }
+
+ my $kids = [];
+
+ for my $qnode (@{$self->query_nodes}) {
+ # Remember: qnode can be a joiner string, a node, or another query_plan
+
+ if (_util::is_joiner($qnode)) {
+ if ($abstract_query->{children}) {
+ my $open_joiner = (keys(%{$abstract_query->{children}}))[0];
+ next if $open_joiner eq $qnode;
+
+ my $oldroot = $abstract_query->{children};
+ $kids = [$oldroot];
+ $abstract_query->{children} = {$qnode => $kids};
+ } else {
+ $abstract_query->{children} = {$qnode => $kids};
+ }
+ } else {
+ push @$kids, $qnode->to_abstract_query(%opts);
+ }
+ }
+
+ $abstract_query->{children} ||= { _util::default_joiner() => $kids };
+ return $abstract_query;
+}
+
#-------------------------------
package QueryParser::query_plan::node;
+use Data::Dumper;
+$Data::Dumper::Indent = 0;
sub new {
my $pkg = shift;
return $self;
}
+# This will find up to one occurence of @$short_list within @$long_list, and
+# replace it with the single atom $replacement.
+sub replace_phrase_in_abstract_query {
+ my ($self, $short_list, $long_list, $replacement) = @_;
+
+ my $success = 0;
+ my @already = ();
+ my $goal = scalar @$short_list;
+
+ for (my $i = 0; $i < scalar (@$long_list); $i++) {
+ my $right = $long_list->[$i];
+
+ if (_util::compare_abstract_atoms(
+ $short_list->[scalar @already], $right
+ )) {
+ push @already, $i;
+ } elsif (scalar @already) {
+ @already = ();
+ next;
+ }
+
+ if (scalar @already == $goal) {
+ splice @$long_list, $already[0], scalar(@already), $replacement;
+ $success = 1;
+ last;
+ }
+ }
+
+ return $success;
+}
+
+sub to_abstract_query {
+ my $self = shift;
+ my %opts = @_;
+
+ my $abstract_query = {
+ "type" => "node",
+ "class" => $self->classname,
+ "fields" => $self->fields
+ };
+
+ my $kids = [];
+
+ for my $qatom (@{$self->query_atoms}) {
+ if (_util::is_joiner($qatom)) {
+ if ($abstract_query->{children}) {
+ my $open_joiner = (keys(%{$abstract_query->{children}}))[0];
+ next if $open_joiner eq $qatom;
+
+ my $oldroot = $abstract_query->{children};
+ $kids = [$oldroot];
+ $abstract_query->{children} = {$qatom => $kids};
+ } else {
+ $abstract_query->{children} = {$qatom => $kids};
+ }
+ } else {
+ push @$kids, $qatom->to_abstract_query;
+ }
+ }
+
+ if ($self->{phrases} and not $opts{no_phrases}) {
+ for my $phrase (@{$self->{phrases}}) {
+ # Phrases appear duplication in a real QP tree, and we don't want
+ # that duplication in our abstract query. So for all our phrases,
+ # break them into atoms as QP would, and remove any matching
+ # sequences of atoms from our abstract query.
+
+ my $tmptree = $self->{plan}->{QueryParser}->new(query => '"'.$phrase.'"')->parse->parse_tree;
+ if ($tmptree) {
+ # For a well-behaved phrase, we should now have only one node
+ # in the $tmptree query plan, and that node should have an
+ # orderly list of atoms and joiners.
+
+ if ($tmptree->{query} and scalar(@{$tmptree->{query}}) == 1) {
+ my $tmplist;
+
+ eval {
+ $tmplist = $tmptree->{query}->[0]->to_abstract_query(
+ no_phrases => 1
+ )->{children}->{'&'}->[0]->{children}->{'&'};
+ };
+ next if $@;
+
+ foreach (
+ _util::find_arrays_in_abstract($abstract_query->{children})
+ ) {
+ last if $self->replace_phrase_in_abstract_query(
+ $tmplist,
+ $_,
+ _util::fake_abstract_atom_from_phrase($phrase)
+ );
+ }
+ }
+ }
+ }
+ }
+
+ # Do the same as the preceding block for unphrases (negated phrases).
+ if ($self->{unphrases} and not $opts{no_phrases}) {
+ for my $phrase (@{$self->{unphrases}}) {
+ my $tmptree = $self->{plan}->{QueryParser}->new(
+ query => $QueryParser::parser_config{QueryParser}{operators}{disallowed}.
+ '"' . $phrase . '"'
+ )->parse->parse_tree;
+
+ if ($tmptree) {
+ if ($tmptree->{query} and scalar(@{$tmptree->{query}}) == 1) {
+ my $tmplist;
+
+ eval {
+ $tmplist = $tmptree->{query}->[0]->to_abstract_query(
+ no_phrases => 1
+ )->{children}->{'&'}->[0]->{children}->{'&'};
+ };
+ next if $@;
+
+ foreach (
+ _util::find_arrays_in_abstract($abstract_query->{children})
+ ) {
+ last if $self->replace_phrase_in_abstract_query(
+ $tmplist,
+ $_,
+ _util::fake_abstract_atom_from_phrase($phrase, 1)
+ );
+ }
+ }
+ }
+ }
+ }
+
+ $abstract_query->{children} ||= { _util::default_joiner() => $kids };
+ return $abstract_query;
+}
+
#-------------------------------
package QueryParser::query_plan::node::atom;
return $self->{suffix};
}
+sub to_abstract_query {
+ my ($self) = @_;
+
+ return {
+ (map { $_ => $self->$_ } qw/prefix suffix content/),
+ "type" => "atom"
+ };
+}
#-------------------------------
package QueryParser::query_plan::filter;
return $self->{args};
}
+sub to_abstract_query {
+ my ($self) = @_;
+
+ return {
+ map { $_ => $self->$_ } qw/name negate args/
+ };
+}
+
#-------------------------------
package QueryParser::query_plan::facet;
return $$self;
}
+sub to_abstract_query {
+ my ($self) = @_;
+
+ return $self->name;
+}
1;