From: Mike Rylander Date: Wed, 15 Apr 2015 16:10:27 +0000 (-0400) Subject: LP#1438136: Teach QP to be smart about attribute selectivity X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=e3486397bb616b230da2bb8a458767bfff07eddb;p=evergreen%2Fmasslnc.git LP#1438136: Teach QP to be smart about attribute selectivity Signed-off-by: Mike Rylander Signed-off-by: Chris Sharp Signed-off-by: Jason Stephenson --- diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/Driver/Pg/QueryParser.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/Driver/Pg/QueryParser.pm index 147b16571e..ac3174facf 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/Driver/Pg/QueryParser.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/Driver/Pg/QueryParser.pm @@ -696,6 +696,7 @@ use OpenILS::Application::AppUtils; my $apputils = "OpenILS::Application::AppUtils"; our %_dfilter_controlled_cache = (); +our %_dfilter_stats_cache = (); sub dynamic_filter_compile { my ($self, $filter, $params, $negate) = @_; @@ -703,6 +704,13 @@ sub dynamic_filter_compile { $negate = $negate ? '!' : ''; + if (!scalar keys %_dfilter_stats_cache) { + my $data = $e->json_query({from => ['evergreen.pg_statistics', 'record_attr_vector_list', 'vlist']}); + %_dfilter_stats_cache = map { + ( $_->{element}, $_->{frequency} ) + } grep { $_->{frequency} > 5 } @$data; # Pin floor to 5% of the table + } + if (!exists($_dfilter_controlled_cache{$filter})) { my $crad = $e->retrieve_config_record_attr_definition($filter); my $ccvm_list = $e->search_config_coded_value_map({ctype =>$filter}); @@ -718,16 +726,15 @@ sub dynamic_filter_compile { my $value_field = $_dfilter_controlled_cache{$filter}{controlled} ? 'code' : 'value'; - return sprintf('%s(%s)', $negate, + my $attr_objects = $e->$method({ $attr_field => $filter, $value_field => $params }); + my $common = scalar(grep { exists($_dfilter_stats_cache{$_->id}) } @$attr_objects); + + return (sprintf('%s(%s)', $negate, join( '|', - map { - $_->id - } @{ - $e->$method({ $attr_field => $filter, $value_field => $params }) - } + map { $_->id } @$attr_objects ) - ); + ), $common); } sub toSQL { @@ -1059,6 +1066,7 @@ sub flatten { my $joiner = "\n" . ${spc} x ( $self->plan_level + 5 ) . ($self->joiner eq '&' ? 'AND ' : 'OR '); my @dlist = (); + my $common = 0; # for each dynamic filter, build more of the WHERE clause for my $filter (@{$self->filters}) { my $NOT = $filter->negate ? 'NOT ' : ''; @@ -1070,7 +1078,8 @@ sub flatten { warn "flatten(): processing dynamic filter ". $filter->name ."\n" if $self->QueryParser->debug; - my $vlist_query = $self->dynamic_filter_compile( $fname, $filter->args, $filter->negate ); + my $vlist_query; + ($vlist_query, $common) = $self->dynamic_filter_compile( $fname, $filter->args, $filter->negate ); # bool joiner for intra-plan nodes/filters push(@dlist, $self->joiner) if @dlist; @@ -1230,10 +1239,17 @@ sub flatten { if (@dlist) { $where .= $joiner if $where ne ''; - $where .= sprintf( - 'mrv.vlist @@ \'%s\'', - join('', @dlist) - ); + if ($common) { # Use a function wrapper to inform PG of the non-rareness of one or more filter elements + $where .= sprintf( + 'evergreen.query_int_wrapper(mrv.vlist, \'%s\')', + join('', @dlist) + ); + } else { + $where .= sprintf( + 'mrv.vlist @@ \'%s\'', + join('', @dlist) + ); + } } warn "flatten(): full filter where => $where\n" if $self->QueryParser->debug; diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.vlist_freq.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.vlist_freq.sql new file mode 100644 index 0000000000..cff061a1da --- /dev/null +++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.vlist_freq.sql @@ -0,0 +1,36 @@ +BEGIN; + +SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version); + +CREATE OR REPLACE FUNCTION evergreen.pg_statistics (tab TEXT, col TEXT) RETURNS TABLE(element TEXT, frequency INT) AS $$ +BEGIN + RETURN QUERY + SELECT e, + f + FROM (SELECT ROW_NUMBER() OVER (), + (f * 100)::INT AS f + FROM (SELECT UNNEST(most_common_elem_freqs) AS f + FROM pg_stats + WHERE tablename = tab + AND attname = col + )x + ) AS f + JOIN (SELECT ROW_NUMBER() OVER (), + e + FROM (SELECT UNNEST(most_common_elems::text::text[]) AS e + FROM pg_stats + WHERE tablename = tab + AND attname = col + )y + ) AS elems USING (row_number); +END; +$$ LANGUAGE PLPGSQL; + +CREATE FUNCTION evergreen.query_int_wrapper (INT[],TEXT) RETURNS BOOL AS $$ +BEGIN + RETURN $1 @@ $2::query_int; +END; +$$ LANGUAGE PLPGSQL STABLE; + +COMMIT; +