From 0057131d799a1b8229722fe802bcfa05fdfded9f Mon Sep 17 00:00:00 2001 From: miker <miker@dcc99617-32d9-48b4-a31d-7c20da2025e4> Date: Wed, 24 Mar 2010 18:13:52 +0000 Subject: [PATCH] split search/fact metadata; add facet syntax git-svn-id: svn://svn.open-ils.org/ILS/trunk@15952 dcc99617-32d9-48b4-a31d-7c20da2025e4 --- .../Application/Storage/Driver/Pg/QueryParser.pm | 165 +++++++++++++++++---- .../OpenILS/Application/Storage/QueryParser.pm | 151 ++++++++++++++++++- 2 files changed, 285 insertions(+), 31 deletions(-) diff --git a/Open-ILS/src/perlmods/OpenILS/Application/Storage/Driver/Pg/QueryParser.pm b/Open-ILS/src/perlmods/OpenILS/Application/Storage/Driver/Pg/QueryParser.pm index c2091fd989..dd565f95a3 100644 --- a/Open-ILS/src/perlmods/OpenILS/Application/Storage/Driver/Pg/QueryParser.pm +++ b/Open-ILS/src/perlmods/OpenILS/Application/Storage/Driver/Pg/QueryParser.pm @@ -2,6 +2,8 @@ package OpenILS::Application::Storage::Driver::Pg::QueryParser; use OpenILS::Application::Storage::QueryParser; use base 'QueryParser'; use OpenSRF::Utils::JSON; +use OpenILS::Application::AppUtils; +my $U = 'OpenILS::Application::AppUtils'; sub init { my $class = shift; @@ -44,16 +46,63 @@ sub toSQL { return $self->parse_tree->toSQL; } -sub field_id_map { +sub facet_field_id_map { my $self = shift; my $map = shift; - $self->custom_data->{field_id_map} ||= {}; - $self->custom_data->{field_id_map} = $map if ($map); - return $self->custom_data->{field_id_map}; + $self->custom_data->{facet_field_id_map} ||= {}; + $self->custom_data->{facet_field_id_map} = $map if ($map); + return $self->custom_data->{facet_field_id_map}; } -sub add_field_id_map { +sub add_facet_field_id_map { + my $self = shift; + my $class = shift; + my $field = shift; + my $id = shift; + my $weight = shift; + + $self->add_facet_field( $class => $field ); + $self->facet_field_id_map->{by_id}{$id} = { classname => $class, field => $field, weight => $weight }; + $self->facet_field_id_map->{by_class}{$class}{$field} = $id; + + return { + by_id => { $id => { classname => $class, field => $field, weight => $weight } }, + by_class => { $class => { $field => $id } } + }; +} + +sub facet_field_class_by_id { + my $self = shift; + my $id = shift; + + return $self->facet_field_id_map->{by_id}{$id}; +} + +sub facet_field_ids_by_class { + my $self = shift; + my $class = shift; + my $field = shift; + + return undef unless ($class); + + if ($field) { + return [$self->facet_field_id_map->{by_class}{$class}{$field}]; + } + + return [values( %{ $self->facet_field_id_map->{by_class}{$class} } )]; +} + +sub search_field_id_map { + my $self = shift; + my $map = shift; + + $self->custom_data->{search_field_id_map} ||= {}; + $self->custom_data->{search_field_id_map} = $map if ($map); + return $self->custom_data->{search_field_id_map}; +} + +sub add_search_field_id_map { my $self = shift; my $class = shift; my $field = shift; @@ -61,8 +110,8 @@ sub add_field_id_map { my $weight = shift; $self->add_search_field( $class => $field ); - $self->field_id_map->{by_id}{$id} = { classname => $class, field => $field, weight => $weight }; - $self->field_id_map->{by_class}{$class}{$field} = $id; + $self->search_field_id_map->{by_id}{$id} = { classname => $class, field => $field, weight => $weight }; + $self->search_field_id_map->{by_class}{$class}{$field} = $id; return { by_id => { $id => { classname => $class, field => $field, weight => $weight } }, @@ -70,14 +119,14 @@ sub add_field_id_map { }; } -sub field_class_by_id { +sub search_field_class_by_id { my $self = shift; my $id = shift; - return $self->field_id_map->{by_id}{$id}; + return $self->search_field_id_map->{by_id}{$id}; } -sub field_ids_by_class { +sub search_field_ids_by_class { my $self = shift; my $class = shift; my $field = shift; @@ -85,10 +134,10 @@ sub field_ids_by_class { return undef unless ($class); if ($field) { - return [$self->field_id_map->{by_class}{$class}{$field}]; + return [$self->search_field_id_map->{by_class}{$class}{$field}]; } - return [values( %{ $self->field_id_map->{by_class}{$class} } )]; + return [values( %{ $self->search_field_id_map->{by_class}{$class} } )]; } sub relevance_bumps { @@ -124,15 +173,16 @@ sub add_relevance_bump { } -sub initialize_field_id_map { +sub initialize_search_field_id_map { my $self = shift; my $cmf_list = shift; for my $cmf (@$cmf_list) { - __PACKAGE__->add_field_id_map( $cmf->field_class, $cmf->name, $cmf->id, $cmf->weight ); + __PACKAGE__->add_search_field_id_map( $cmf->field_class, $cmf->name, $cmf->id, $cmf->weight ) if ($U->is_true($cmf->search_field)); + __PACKAGE__->add_facet_field_id_map( $cmf->field_class, $cmf->name, $cmf->id, $cmf->weight ) if ($U->is_true($cmf->facet_field)); } - return $self->field_id_map; + return $self->search_field_id_map; } sub initialize_relevance_bumps { @@ -140,7 +190,7 @@ sub initialize_relevance_bumps { my $sra_list = shift; for my $sra (@$sra_list) { - my $c = $self->field_class_by_id( $sra->field ); + my $c = $self->search_field_class_by_id( $sra->field ); __PACKAGE__->add_relevance_bump( $c->{classname}, $c->{field}, $sra->bump_type, $sra->multiplier ); } @@ -152,7 +202,7 @@ sub initialize_normalizers { my $tree = shift; # open-ils.cstore.direct.config.metabib_field_index_norm_map.search.atomic { "id" : { "!=" : null } }, { "flesh" : 1, "flesh_fields" : { "cmfinm" : ["norm"] }, "order_by" : [{ "class" : "cmfinm", "field" : "pos" }] } for my $cmfinm ( @$tree ) { - my $field_info = $self->field_class_by_id( $cmfinm->field ); + my $field_info = $self->search_field_class_by_id( $cmfinm->field ); __PACKAGE__->add_query_normalizer( $field_info->{classname}, $field_info->{field}, $cmfinm->norm->func, OpenSRF::Utils::JSON->JSON2perl($cmfinm->params) ); } } @@ -168,7 +218,7 @@ sub initialize { return $_complete if ($_complete); - $self->initialize_field_id_map( $args{config_metabib_field} ) + $self->initialize_search_field_id_map( $args{config_metabib_field} ) if ($args{config_metabib_field}); $self->initialize_relevance_bumps( $args{search_relevance_adjustment} ) @@ -188,43 +238,49 @@ sub initialize { sub TEST_SETUP { - __PACKAGE__->add_field_id_map( series => seriestitle => 1 => 1 ); + __PACKAGE__->add_search_field_id_map( series => seriestitle => 1 => 1 ); + + __PACKAGE__->add_search_field_id_map( series => seriestitle => 1 => 1 ); __PACKAGE__->add_relevance_bump( series => seriestitle => first_word => 1.5 ); __PACKAGE__->add_relevance_bump( series => seriestitle => full_match => 20 ); - __PACKAGE__->add_field_id_map( title => abbreviated => 2 => 1 ); + __PACKAGE__->add_search_field_id_map( title => abbreviated => 2 => 1 ); __PACKAGE__->add_relevance_bump( title => abbreviated => first_word => 1.5 ); __PACKAGE__->add_relevance_bump( title => abbreviated => full_match => 20 ); - __PACKAGE__->add_field_id_map( title => translated => 3 => 1 ); + __PACKAGE__->add_search_field_id_map( title => translated => 3 => 1 ); __PACKAGE__->add_relevance_bump( title => translated => first_word => 1.5 ); __PACKAGE__->add_relevance_bump( title => translated => full_match => 20 ); - __PACKAGE__->add_field_id_map( title => proper => 6 => 1 ); + __PACKAGE__->add_search_field_id_map( title => proper => 6 => 1 ); __PACKAGE__->add_query_normalizer( title => proper => 'naco_normalize' ); __PACKAGE__->add_relevance_bump( title => proper => first_word => 1.5 ); __PACKAGE__->add_relevance_bump( title => proper => full_match => 20 ); __PACKAGE__->add_relevance_bump( title => proper => word_order => 10 ); - __PACKAGE__->add_field_id_map( author => coporate => 7 => 1 ); + __PACKAGE__->add_search_field_id_map( author => coporate => 7 => 1 ); __PACKAGE__->add_relevance_bump( author => coporate => first_word => 1.5 ); __PACKAGE__->add_relevance_bump( author => coporate => full_match => 20 ); - __PACKAGE__->add_field_id_map( author => personal => 8 => 1 ); + __PACKAGE__->add_facet_field_id_map( author => personal => 8 => 1 ); + + __PACKAGE__->add_search_field_id_map( author => personal => 8 => 1 ); __PACKAGE__->add_relevance_bump( author => personal => first_word => 1.5 ); __PACKAGE__->add_relevance_bump( author => personal => full_match => 20 ); __PACKAGE__->add_query_normalizer( author => personal => 'naco_normalize' ); __PACKAGE__->add_query_normalizer( author => personal => 'split_date_range' ); - __PACKAGE__->add_field_id_map( subject => topic => 14 => 1 ); + __PACKAGE__->add_facet_field_id_map( subject => topic => 14 => 1 ); + + __PACKAGE__->add_search_field_id_map( subject => topic => 14 => 1 ); __PACKAGE__->add_relevance_bump( subject => topic => first_word => 1 ); __PACKAGE__->add_relevance_bump( subject => topic => full_match => 1 ); - __PACKAGE__->add_field_id_map( subject => complete => 16 => 1 ); + __PACKAGE__->add_search_field_id_map( subject => complete => 16 => 1 ); __PACKAGE__->add_relevance_bump( subject => complete => first_word => 1 ); __PACKAGE__->add_relevance_bump( subject => complete => full_match => 1 ); - __PACKAGE__->add_field_id_map( keyword => keyword => 15 => 1 ); + __PACKAGE__->add_search_field_id_map( keyword => keyword => 15 => 1 ); __PACKAGE__->add_relevance_bump( keyword => keyword => first_word => 1 ); __PACKAGE__->add_relevance_bump( keyword => keyword => full_match => 1 ); @@ -506,7 +562,7 @@ sub flatten { my $node_rank = $node->rank . " * ${talias}_weight.weight"; - $from .= "\n\tLEFT JOIN (\n\t\tSELECT *\n\t\t FROM $table\n\t\t WHERE index_vector @@ (" .$node->tsquery . ')'; + $from .= "\n\tLEFT JOIN (\n\t\tSELECT * /* search */\n\t\t FROM $table\n\t\t WHERE index_vector @@ (" .$node->tsquery . ')'; my @bump_fields; if (@{$node->fields} > 0) { @@ -541,6 +597,26 @@ sub flatten { push @rank_list, $node_rank; + } elsif ($node->isa( 'QueryParser::query_plan::facet' )) { + + my $table = $node->table; + my $talias = $node->table_alias; + + $from .= "\n\tJOIN (\n\t\tSELECT * /* facet */\n\t\t FROM $table\n\t\t WHERE value IN (\$_$$\$" . join("\$_$$\$,\$_$$\$", @{$node->values}) . "\$_$$\$)". + "\n\t\t\tAND field IN (SELECT id FROM config.metabib_field WHERE field_class = \$_$$\$". $node->classname ."\$_$$\$ AND facet_field"; + + if (@{$node->fields} > 0) { + $from .= " AND name IN ("; + $from .= "\$_$$\$" . join("\$_$$\$,\$_$$\$", @{$node->fields}) . "\$_$$\$)"; + } + + $from .= ")"; + + my $core_limit = $self->QueryParser->core_limit || 25000; + $from .= "\n\t\tLIMIT $core_limit\n\t) AS $talias ON (m.source = $talias.source)"; + + $where .= 'TRUE'; + } else { my $subnode = $node->flatten; @@ -565,6 +641,39 @@ package OpenILS::Application::Storage::Driver::Pg::QueryParser::query_plan::filt use base 'QueryParser::query_plan::filter'; #------------------------------- +package OpenILS::Application::Storage::Driver::Pg::QueryParser::query_plan::facet; +use base 'QueryParser::query_plan::facet'; + +sub classname { + my $self = shift; + my ($classname) = split '\|', $self->name; + return $classname; +} + +sub table { + my $self = shift; + return 'metabib.' . $self->classname . '_field_entry'; +} + +sub fields { + my $self = shift; + my ($classname,@fields) = split '\|', $self->name; + return \@fields; +} + +sub table_alias { + my $self = shift; + + my $table_alias = "$self"; + $table_alias =~ s/^.*\(0(x[0-9a-fA-F]+)\)$/$1/go; + $table_alias .= '_' . $self->name; + $table_alias =~ s/\|/_/go; + + return $table_alias; +} + + +#------------------------------- package OpenILS::Application::Storage::Driver::Pg::QueryParser::query_plan::modifier; use base 'QueryParser::query_plan::modifier'; diff --git a/Open-ILS/src/perlmods/OpenILS/Application/Storage/QueryParser.pm b/Open-ILS/src/perlmods/OpenILS/Application/Storage/QueryParser.pm index 8b8307b6f1..390cc264bc 100755 --- a/Open-ILS/src/perlmods/OpenILS/Application/Storage/QueryParser.pm +++ b/Open-ILS/src/perlmods/OpenILS/Application/Storage/QueryParser.pm @@ -14,6 +14,11 @@ our %parser_config = ( } ); +sub facet_class_count { + my $self = shift; + return @{$self->facet_classes}; +} + sub search_class_count { my $self = shift; return @{$self->search_classes}; @@ -106,6 +111,19 @@ sub add_search_modifier { return $modifier; } +sub add_facet_class { + my $pkg = shift; + $pkg = ref($pkg) || $pkg; + my $class = shift; + + return $class if (grep { $_ eq $class } @{$pkg->facet_classes}); + + push @{$pkg->facet_classes}, $class; + $pkg->facet_fields->{$class} = []; + + return $class; +} + sub add_search_class { my $pkg = shift; $pkg = ref($pkg) || $pkg; @@ -134,6 +152,16 @@ sub operator { return $parser_config{$class}{operators}{$opname}; } +sub facet_classes { + my $class = shift; + $class = ref($class) || $class; + my $classes = shift; + + $parser_config{$class}{facet_classes} ||= []; + $parser_config{$class}{facet_classes} = $classes if (ref($classes) && @$classes); + return $parser_config{$class}{facet_classes}; +} + sub search_classes { my $class = shift; $class = ref($class) || $class; @@ -188,6 +216,19 @@ sub default_search_class { return $QueryParser::parser_config{$pkg}{default_class}; } +sub remove_facet_class { + my $pkg = shift; + $pkg = ref($pkg) || $pkg; + my $class = shift; + + return $class if (!grep { $_ eq $class } @{$pkg->facet_classes}); + + $pkg->facet_classes( [ grep { $_ ne $class } @{$pkg->facet_classes} ] ); + delete $QueryParser::parser_config{$pkg}{facet_fields}{$class}; + + return $class; +} + sub remove_search_class { my $pkg = shift; $pkg = ref($pkg) || $pkg; @@ -201,6 +242,29 @@ sub remove_search_class { return $class; } +sub add_facet_field { + my $pkg = shift; + $pkg = ref($pkg) || $pkg; + my $class = shift; + my $field = shift; + + $pkg->add_facet_class( $class ); + + return { $class => $field } if (grep { $_ eq $field } @{$pkg->facet_fields->{$class}}); + + push @{$pkg->facet_fields->{$class}}, $field; + + return { $class => $field }; +} + +sub facet_fields { + my $class = shift; + $class = ref($class) || $class; + + $parser_config{$class}{facet_fields} ||= {}; + return $parser_config{$class}{facet_fields}; +} + sub add_search_field { my $pkg = shift; $pkg = ref($pkg) || $pkg; @@ -269,6 +333,19 @@ sub search_field_aliases { return $parser_config{$class}{field_alias_map}; } +sub remove_facet_field { + my $pkg = shift; + $pkg = ref($pkg) || $pkg; + my $class = shift; + my $field = shift; + + return { $class => $field } if (!$pkg->facet_fields->{$class} || !grep { $_ eq $field } @{$pkg->facet_fields->{$class}}); + + $pkg->facet_fields->{$class} = [ grep { $_ ne $field } @{$pkg->facet_fields->{$class}} ]; + + return { $class => $field }; +} + sub remove_search_field { my $pkg = shift; $pkg = ref($pkg) || $pkg; @@ -372,7 +449,7 @@ sub decompose { $search_class_re .= '|' unless ($first_class); $first_class = 0; $search_class_re .= $class . '(?:\|\w+)*'; - $seeen_class{$class} = 1; + $seen_classes{$class} = 1; } for my $class ( keys %{$pkg->search_class_aliases} ) { @@ -386,8 +463,8 @@ sub decompose { $search_class_re .= '|' unless ($first_class); $first_class = 0; - $search_class_re .= $class . '(?:\|\w+)*' if (!$seeen_class{$class}); - $seeen_class{$class} = 1; + $search_class_re .= $class . '(?:\|\w+)*' if (!$seen_classes{$class}); + $seen_classes{$class} = 1; } $search_class_re .= '):'; @@ -412,6 +489,9 @@ sub decompose { # Build the filter and modifier uber-regexps + my $facet_re = '^\s*((?:' . join( '|', @{$pkg->facet_classes}) . ')(?:\|\w+)*)\[(.+?)\]'; + warn " Facet RE: $facet_re\n" if $self->debug; + my $filter_re = '^\s*(' . join( '|', @{$pkg->filters}) . ')\(([^()]+)\)'; my $filter_as_class_re = '^\s*(' . join( '|', @{$pkg->filters}) . '):\s*(\S+)'; @@ -494,6 +574,15 @@ sub decompose { $struct->joiner( '|' ); $last_type = 'OR'; + } elsif ($self->facet_class_count && /$facet_re/) { # changing current class + warn "Encountered facet: $1 => $2\n" if $self->debug; + + my $facet = $1; + my $facet_value = [ split '\s*#\s*', $2 ]; + $struct->new_facet( $facet => $facet_value ); + $_ = $'; + + $last_type = ''; } elsif ($self->search_class_count && /$search_class_re/) { # changing current class warn "Encountered class change: $1\n" if $self->debug; @@ -613,6 +702,18 @@ sub new_node { return $node; } +sub new_facet { + my $self = shift; + my $pkg = ref($self) || $self; + my $name = shift; + my $args = shift; + + my $node = do{$pkg.'::facet'}->new( plan => $self, name => $name, 'values' => $args ); + $self->add_node( $node ); + + return $node; +} + sub new_filter { my $self = shift; my $pkg = ref($self) || $self; @@ -724,6 +825,24 @@ sub add_modifier { return $self; } +sub facets { + my $self = shift; + $self->{facets} ||= []; + return $self->{facets}; +} + +sub add_facet { + my $self = shift; + my $facet = shift; + + $self->{facets} ||= []; + return $self if (grep {$_->name eq $facet->name} @{$self->{facets}}); + + push(@{$self->{facets}}, $facet); + + return $self; +} + sub filters { my $self = shift; $self->{filters} ||= []; @@ -907,6 +1026,32 @@ sub args { } #------------------------------- +package QueryParser::query_plan::facet; + +sub new { + my $pkg = shift; + $pkg = ref($pkg) || $pkg; + my %args = @_; + + return bless \%args => $pkg; +} + +sub plan { + my $self = shift; + return $self->{plan}; +} + +sub name { + my $self = shift; + return $self->{name}; +} + +sub values { + my $self = shift; + return $self->{'values'}; +} + +#------------------------------- package QueryParser::query_plan::modifier; sub new { -- 2.11.0