From 788760e831c540bb88e11324ded23ce97c005f4c Mon Sep 17 00:00:00 2001 From: miker <miker@dcc99617-32d9-48b4-a31d-7c20da2025e4> Date: Fri, 12 Mar 2010 19:12:49 +0000 Subject: [PATCH] get the new query parser into the repo git-svn-id: svn://svn.open-ils.org/ILS/trunk@15833 dcc99617-32d9-48b4-a31d-7c20da2025e4 --- .../Application/Storage/Driver/Pg/QueryParser.pm | 601 ++++++++++++++ .../OpenILS/Application/Storage/QueryParser.pm | 911 +++++++++++++++++++++ 2 files changed, 1512 insertions(+) create mode 100644 Open-ILS/src/perlmods/OpenILS/Application/Storage/Driver/Pg/QueryParser.pm create mode 100755 Open-ILS/src/perlmods/OpenILS/Application/Storage/QueryParser.pm diff --git a/Open-ILS/src/perlmods/OpenILS/Application/Storage/Driver/Pg/QueryParser.pm b/Open-ILS/src/perlmods/OpenILS/Application/Storage/Driver/Pg/QueryParser.pm new file mode 100644 index 0000000000..56c1fc0aaa --- /dev/null +++ b/Open-ILS/src/perlmods/OpenILS/Application/Storage/Driver/Pg/QueryParser.pm @@ -0,0 +1,601 @@ +package OpenILS::Application::Storage::Driver::Pg::QueryParser; +use OpenILS::Application::Storage::QueryParser; +use base 'QueryParser'; +use OpenSRF::Utils::JSON; + +sub simple_plan { + my $self = shift; + + return 0 unless $self->parse_tree; + return 0 if @{$self->parse_tree->filters}; + return 0 if @{$self->parse_tree->modifiers}; + for my $node ( @{ $self->parse_tree->query_nodes } ) { + return 0 if (!ref($node) && $node eq '|'); + next unless (ref($node)); + return 0 if ($node->isa('QueryParser::query_plan')); + } + + return 1; +} + +sub toSQL { + my $self = shift; + return $self->parse_tree->toSQL; +} + +sub field_id_map { + my $self = shift; + my $map = shift; + + $self->custom_data->{field_id_map} ||= {}; + $self->custom_data->{field_id_map} = $map if ($map); + return $self->custom_data->{field_id_map}; +} + +sub add_field_id_map { + my $self = shift; + my $class = shift; + my $field = shift; + my $id = shift; + my $weight = shift; + + $self->add_search_field( $class => $field ); + $self->field_id_map->{by_id}{$id} = { classname => $class, field => $field, weight => $weight }; + $self->field_id_map->{by_class}{$class}{$field} = $id; + + return { + by_id => { $id => { classname => $class, field => $field, weight => $weight } }, + by_class => { $class => { $field => $id } } + }; +} + +sub field_class_by_id { + my $self = shift; + my $id = shift; + + return $self->field_id_map->{by_id}{$id}; +} + +sub field_ids_by_class { + my $self = shift; + my $class = shift; + my $field = shift; + + return undef unless ($class); + + if ($field) { + return [$self->field_id_map->{by_class}{$class}{$field}]; + } + + return [values( %{ $self->field_id_map->{by_class}{$class} } )]; +} + +sub relevance_bumps { + my $self = shift; + my $bumps = shift; + + $self->custom_data->{rel_bumps} ||= {}; + $self->custom_data->{rel_bumps} = $bumps if ($bumps); + return $self->custom_data->{rel_bumps}; +} + +sub find_relevance_bumps { + my $self = shift; + my $class = shift; + my $field = shift; + + return $self->relevance_bumps->{$class}{$field}; +} + +sub add_relevance_bump { + my $self = shift; + my $class = shift; + my $field = shift; + my $type = shift; + my $multiplier = shift; + my $active = shift; + + $active = 1 if (!defined($active)); + + $self->relevance_bumps->{$class}{$field}{$type} = { multiplier => $multiplier, active => $active }; + + return { $class => { $field => { $type => { multiplier => $multiplier, active => $active } } } }; +} + + +sub initialize_field_id_map { + my $self = shift; + my $cmf_list = shift; + + for my $cmf (@$cmf_list) { + $self->add_field_id_map( $cmf->field_class, $cmf->field, $cmf->id, $cmf->weight ); + } + + return $self->field_id_map; +} + +sub initialize_relevance_bumps { + my $self = shift; + my $sra_list = shift; + + for my $sra (@$sra_list) { + my $c = $self->field_class_by_id( $sra->field ); + $self->add_relevance_bump( $c->{classname}, $c->{field}, $sra->bump_type, $sra->multiplier ); + } + + return $self->relevance_bumps; +} + +sub initialize_normalizers { + my $self = shift; + my $tree = shift; # open-ils.cstore.direct.config.metabib_field_index_norm_map.search.atomic { "id" : { "!=" : null } }, { "flesh" : 1, "flesh_fields" : { "cmfinm" : ["norm"] }, "order_by" : [{ "class" : "cmfinm", "field" : "pos" }] } + + for my $cmfinm ( @$tree ) { + my $field_info = $self->field_class_by_id( $cmfinm->field ); + $self->add_query_normalizer( $field_info->{classname}, $field_info->{field}, $cmfinm->norm->func, OpenSRF::Utils::JSON->JSON2perl($cmfinm->params) ); + } +} + +our $_complete = 0; +sub initialization_complete { + return $_complete; +} + +sub initialize { + my $self = shift; + my %args = @_; + + return $_complete if ($_complete); + + $self->initialize_field_id_map( $args{config_metabib_field} ) + if ($args{config_metabib_field}); + + $self->initialize_relevance_bumps( $args{search_relevance_adjustment} ) + if ($args{search_relevance_adjustment}); + + $self->initialize_normalizers( $args{config_metabib_field_index_norm_map} ) + if ($args{config_metabib_field_index_norm_map}); + + $_complete = 1 if ( + $args{config_metabib_field_index_norm_map} && + $args{search_relevance_adjustment} && + $args{config_metabib_field} + ); + + return $_complete; +} + +sub TEST_SETUP { + + __PACKAGE__->add_field_id_map( series => seriestitle => 1 => 1 ); + __PACKAGE__->add_relevance_bump( series => seriestitle => first_word => 1.5 ); + __PACKAGE__->add_relevance_bump( series => seriestitle => full_match => 20 ); + + __PACKAGE__->add_field_id_map( title => abbreviated => 2 => 1 ); + __PACKAGE__->add_relevance_bump( title => abbreviated => first_word => 1.5 ); + __PACKAGE__->add_relevance_bump( title => abbreviated => full_match => 20 ); + + __PACKAGE__->add_field_id_map( title => translated => 3 => 1 ); + __PACKAGE__->add_relevance_bump( title => translated => first_word => 1.5 ); + __PACKAGE__->add_relevance_bump( title => translated => full_match => 20 ); + + __PACKAGE__->add_field_id_map( title => proper => 6 => 1 ); + __PACKAGE__->add_query_normalizer( title => proper => 'naco_normalize' ); + __PACKAGE__->add_relevance_bump( title => proper => first_word => 1.5 ); + __PACKAGE__->add_relevance_bump( title => proper => full_match => 20 ); + __PACKAGE__->add_relevance_bump( title => proper => word_order => 10 ); + + __PACKAGE__->add_field_id_map( author => coporate => 7 => 1 ); + __PACKAGE__->add_relevance_bump( author => coporate => first_word => 1.5 ); + __PACKAGE__->add_relevance_bump( author => coporate => full_match => 20 ); + + __PACKAGE__->add_field_id_map( author => personal => 8 => 1 ); + __PACKAGE__->add_relevance_bump( author => personal => first_word => 1.5 ); + __PACKAGE__->add_relevance_bump( author => personal => full_match => 20 ); + __PACKAGE__->add_query_normalizer( author => personal => 'naco_normalize' ); + __PACKAGE__->add_query_normalizer( author => personal => 'split_date_range' ); + + __PACKAGE__->add_field_id_map( subject => topic => 14 => 1 ); + __PACKAGE__->add_relevance_bump( subject => topic => first_word => 1 ); + __PACKAGE__->add_relevance_bump( subject => topic => full_match => 1 ); + + __PACKAGE__->add_field_id_map( subject => complete => 16 => 1 ); + __PACKAGE__->add_relevance_bump( subject => complete => first_word => 1 ); + __PACKAGE__->add_relevance_bump( subject => complete => full_match => 1 ); + + __PACKAGE__->add_field_id_map( keyword => keyword => 15 => 1 ); + __PACKAGE__->add_relevance_bump( keyword => keyword => first_word => 1 ); + __PACKAGE__->add_relevance_bump( keyword => keyword => full_match => 1 ); + + + __PACKAGE__->add_search_class_alias( keyword => 'kw' ); + __PACKAGE__->add_search_class_alias( title => 'ti' ); + __PACKAGE__->add_search_class_alias( author => 'au' ); + __PACKAGE__->add_search_class_alias( author => 'name' ); + __PACKAGE__->add_search_class_alias( author => 'dc.contributor' ); + __PACKAGE__->add_search_class_alias( subject => 'su' ); + __PACKAGE__->add_search_class_alias( subject => 'bib.subject(?:Title|Place|Occupation)' ); + __PACKAGE__->add_search_class_alias( series => 'se' ); + __PACKAGE__->add_search_class_alias( keyword => 'dc.identifier' ); + + __PACKAGE__->add_query_normalizer( author => corporate => 'naco_normalize' ); + __PACKAGE__->add_query_normalizer( keyword => keyword => 'naco_normalize' ); + + __PACKAGE__->add_search_field_alias( subject => name => 'bib.subjectName' ); + +} + +__PACKAGE__->default_search_class( 'keyword' ); + +__PACKAGE__->add_search_filter( 'audience' ); +__PACKAGE__->add_search_filter( 'vr_format' ); +__PACKAGE__->add_search_filter( 'format' ); +__PACKAGE__->add_search_filter( 'item_type' ); +__PACKAGE__->add_search_filter( 'item_form' ); +__PACKAGE__->add_search_filter( 'lit_form' ); +__PACKAGE__->add_search_filter( 'location' ); +__PACKAGE__->add_search_filter( 'site' ); +__PACKAGE__->add_search_filter( 'depth' ); +__PACKAGE__->add_search_filter( 'sort' ); +__PACKAGE__->add_search_filter( 'language' ); +__PACKAGE__->add_search_filter( 'preferred_language' ); +__PACKAGE__->add_search_filter( 'preferred_language_weight' ); +__PACKAGE__->add_search_filter( 'statuses' ); +__PACKAGE__->add_search_filter( 'bib_level' ); +__PACKAGE__->add_search_filter( 'before' ); +__PACKAGE__->add_search_filter( 'after' ); +__PACKAGE__->add_search_filter( 'during' ); +__PACKAGE__->add_search_filter( 'core_limit' ); +__PACKAGE__->add_search_filter( 'check_limit' ); +__PACKAGE__->add_search_filter( 'skip_check' ); +__PACKAGE__->add_search_filter( 'estimation_strategy' ); + +__PACKAGE__->add_search_modifier( 'available' ); +__PACKAGE__->add_search_modifier( 'descending' ); +__PACKAGE__->add_search_modifier( 'ascending' ); +__PACKAGE__->add_search_modifier( 'metarecord' ); +__PACKAGE__->add_search_modifier( 'metabib' ); +__PACKAGE__->add_search_modifier( 'staff' ); + + +#------------------------------- +package OpenILS::Application::Storage::Driver::Pg::QueryParser::query_plan; +use base 'QueryParser::query_plan'; + +sub toSQL { + my $self = shift; + my $flat_plan = $self->flatten; + + # generate the relevance ranking + my $rel = "AVG(\n\t\t(" . join(")+\n\t\t(", @{$$flat_plan{rank_list}}) . ")\n\t)"; + + # find any supplied sort option + my ($sort_filter) = $self->find_filter('sort'); + if ($sort_filter) { + $sort_filter = $sort_filter->args->[0]; + } else { + $sort_filter = 'rel'; + } + + my %filters; + my ($format) = $self->find_filter('format'); + if ($format) { + my ($t,$f) = split('-', $format->args->[0]); + $self->new_filter( item_type => [ split '', $t ] ) if ($t); + $self->new_filter( item_form => [ split '', $f ] ) if ($f); + } + + for my $f ( qw/audience vr_format item_type item_form lit_form language bib_level/ ) { + my $col = $f; + $col = 'item_lang' if ($f eq 'language'); + $filters{$f} = ''; + my ($filter) = $self->find_filter($f); + if ($filter) { + $filters{$f} = "AND mrd.$col in (\$_$$\$" . join("\$_$$\$,\$_$$\$",@{$filter->args}) . "\$_$$\$)"; + } + } + + my $audience = $filters{audience}; + my $vr_format = $filters{vr_format}; + my $item_type = $filters{item_type}; + my $item_form = $filters{item_form}; + my $lit_form = $filters{lit_form}; + my $language = $filters{language}; + my $bib_level = $filters{bib_level}; + + my $rank = $rel; + + my $desc = 'ASC'; + $desc = 'DESC' if ($self->find_modifier('descending')); + + if ($sort_filter eq 'rel') { # relevance ranking flips sort dir + if ($desc eq 'ASC') { + $desc = 'DESC'; + } else { + $desc = 'ASC'; + } + } else { + if ($sort_filter eq 'title') { + my $default = $desc eq 'DESC' ? ' ' : 'zzzzzz'; + $rank = <<" SQL"; +( COALESCE( FIRST (( + SELECT LTRIM(SUBSTR( frt.value, COALESCE(SUBSTRING(frt.ind2 FROM E'\\\\d+'),'0')::INT + 1 )) + FROM metabib.full_rec frt + WHERE frt.record = m.source + AND frt.tag = 'tnf' + AND frt.subfield = 'a' + LIMIT 1 + )),'$default')) + SQL + } elsif ($sort_filter eq 'pubdate') { + $rank = "COALESCE( FIRST(NULLIF(REGEXP_REPLACE(mrd.date1, E'\\\\D+', '0', 'g'),'')), '0' )::INT"; + } elsif ($sort_filter eq 'create_date') { + $rank = "( FIRST (( SELECT create_date FROM biblio.record_entry rbr WHERE rbr.id = m.source)) )"; + } elsif ($sort_filter eq 'edit_date') { + $rank = "( FIRST (( SELECT edit_date FROM biblio.record_entry rbr WHERE rbr.id = m.source)) )"; + } elsif ($sort_filter eq 'author') { + my $default = $desc eq 'DESC' ? ' ' : 'zzzzzz'; + $rank = <<" SQL" +( COALESCE( FIRST (( + SELECT LTRIM(fra.value) + FROM metabib.full_rec fra + WHERE fra.record = m.source + AND fra.tag LIKE '1%' + AND fra.subfield = 'a' + ORDER BY fra.tag::text::int + LIMIT 1 + )),'$default')) + SQL + } else { + # default to rel ranking + $rank = $rel; + } + } + + + my $key = 'm.source'; + $key = 'm.metarecord' if (grep {$_->name eq 'metarecord'} @{$self->modifiers}); + + my $sp_size = $self->QueryParser->superpage_size; + my $sp = $self->QueryParser->superpage; + + my $offset = ''; + if ($sp > 1) { + $offset = 'OFFSET ' . ($sp - 1) * $sp_size; + } + + return <<SQL +SELECT $key AS id, + ARRAY_ACCUM(DISTINCT m.source) AS records, + $rel AS rel, + $rank AS rank, + COALESCE( FIRST(NULLIF(REGEXP_REPLACE(mrd.date1, E'\\\\D+', '0', 'g'),'')), '0' )::INT AS tie_break + FROM metabib.metarecord_source_map m + JOIN metabib.rec_descriptor mrd ON (m.source = mrd.record) + $$flat_plan{from} + WHERE 1=1 + $audience + $vr_format + $item_type + $item_form + $lit_form + $language + $bib_level + AND $$flat_plan{where} + GROUP BY 1 + ORDER BY 4 $desc, 5 DESC + LIMIT $sp_size + $offset +SQL + +} + + +sub rel_bump { + my $self = shift; + my $node = shift; + my $bump = shift; + my $multiplier = shift; + + my $only_atoms = $node->only_atoms; + return '' if (!@$only_atoms); + + if ($bump eq 'first_word') { + return "/* first_word */ CASE WHEN naco_normalize(".$node->table_alias.".value) ". + "LIKE naco_normalize(\$_$$\$".$only_atoms->[0]->content."\$_$$\$) \|\| '\%' ". + "THEN $multiplier ELSE 1 END"; + } elsif ($bump eq 'full_match') { + return "/* full_match */ CASE WHEN naco_normalize(".$node->table_alias.".value) ". + "LIKE". join( '||\'%\'||', map { " naco_normalize(\$_$$\$".$_->content."\$_$$\$) " } @$only_atoms ) . + "THEN $multiplier ELSE 1 END"; + } elsif ($bump eq 'word_order') { + return "/* word_order */ CASE WHEN naco_normalize(".$node->table_alias.".value) ". + "LIKE '\%'||". join( '||\'%\'||', map { " naco_normalize(\$_$$\$".$_->content."\$_$$\$) " } @$only_atoms ) . '||\'%\' '. + "THEN $multiplier ELSE 1 END"; + } + + return ''; +} + +sub flatten { + my $self = shift; + + my $from = shift || ''; + my $where = shift || ''; + + my @rank_list; + for my $node ( @{$self->query_nodes} ) { + if (ref($node)) { + if ($node->isa( 'QueryParser::query_plan::node' )) { + + my $table = $node->table; + my $talias = $node->table_alias; + + my $node_rank = $node->rank . " * ${talias}_weight.weight"; + + $from .= "\n\tLEFT JOIN (\n\t\tSELECT *\n\t\t FROM $table\n\t\t WHERE index_vector @@ (" .$node->tsquery . ')'; + + my @bump_fields; + if (@{$node->fields} > 0) { + @bump_fields = @{$node->fields}; + $from .= "\n\t\t\tAND field IN (SELECT id FROM config.metabib_field WHERE field_class = \$_$$\$". $node->classname ."\$_$$\$ AND name IN ("; + $from .= "\$_$$\$" . join("\$_$$\$,\$_$$\$", @{$node->fields}) . "\$_$$\$))"; + + } else { + @bump_fields = @{$self->QueryParser->search_fields->{$node->classname}}; + } + + my %used_bumps; + for my $field ( @bump_fields ) { + my $bumps = $self->QueryParser->find_relevance_bumps( $node->classname => $field ); + for my $b (keys %$bumps) { + next if (!$$bumps{$b}{active}); + next if ($used_bumps{$b}); + $used_bumps{$b} = 1; + + my $bump_case = $self->rel_bump( $node, $b, $$bumps{$b}{multiplier} ); + $node_rank .= "\n\t\t\t\t * " . $bump_case if ($bump_case); + } + } + + $from .= "\n\t\tLIMIT " . $self->QueryParser->core_limit . "\n\t) AS " . $node->table_alias . ' ON (m.source = ' . $node->table_alias . ".source)"; + $from .= "\n\tJOIN config.metabib_field AS ${talias}_weight ON (${talias}_weight.id = $talias.field)\n"; + + $where .= $node->table_alias . ".id IS NOT NULL "; + + push @rank_list, $node_rank; + + } else { + my $subnode = $node->flatten; + + push(@rank_list, @{$$subnode{rank_list}}); + $from .= $$subnode{from}; + $where .= "($$subnode{where})"; + } + } else { + $where .= ' AND ' if ($node eq '&'); + $where .= ' OR ' if ($node eq '|'); + # ... stitching the WHERE together ... + } + } + + return { rank_list => \@rank_list, from => $from, where => $where }; + +} + + +#------------------------------- +package OpenILS::Application::Storage::Driver::Pg::QueryParser::query_plan::filter; +use base 'QueryParser::query_plan::filter'; + +#------------------------------- +package OpenILS::Application::Storage::Driver::Pg::QueryParser::query_plan::modifier; +use base 'QueryParser::query_plan::modifier'; + +#------------------------------- +package OpenILS::Application::Storage::Driver::Pg::QueryParser::query_plan::node::atom; +use base 'QueryParser::query_plan::node::atom'; + +sub sql { + my $self = shift; + my $sql = shift; + + $self->{sql} = $sql if ($sql); + + return $self->{sql} if ($self->{sql}); + return $self->buildSQL; +} + +sub buildSQL { + my $self = shift; + + my $classname = $self->node->classname; + + my $normalizers = $self->node->plan->QueryParser->query_normalizers( $classname ); + my $fields = $self->node->fields; + + $fields = $self->node->plan->QueryParser->search_fields->{$classname} if (!@$fields); + + my @norm_list; + for my $field (@$fields) { + for my $nfield (keys %$normalizers) { + for my $nizer ( @{$$normalizers{$nfield}} ) { + push(@norm_list, $nizer) if ($field eq $nfield && !(grep {$_ eq $nizer} @norm_list)); + } + } + } + + my $sql = "\$_$$\$" . $self->content . "\$_$$\$";; + + for my $n ( @norm_list ) { + $sql = join(', ', $sql, map { "\$_$$\$" . $_ . "\$_$$\$" } @{ $n->{params} }); + $sql = $n->{function}."($sql)"; + } + + $sql = "to_tsquery('$classname'," . ($self->prefix ? "\$_$$\$" . $self->prefix . "\$_$$\$||" : '') . "'('||regexp_replace($sql,E'(?:\\\\s+|:)','&','g')||')')"; + + return $self->sql($sql); +} + +#------------------------------- +package OpenILS::Application::Storage::Driver::Pg::QueryParser::query_plan::node; +use base 'QueryParser::query_plan::node'; + +sub only_atoms { + my $self = shift; + + my $atoms = $self->query_atoms; + my @only_atoms; + for my $a (@$atoms) { + push(@only_atoms, $a) if (ref($a) && $a->isa('QueryParser::query_plan::node::atom')); + } + + return \@only_atoms; +} + +sub table { + my $self = shift; + my $table = shift; + $self->{table} = $table if ($table); + return $self->{table} if $self->{table}; + return $self->table( 'metabib.' . $self->classname . '_field_entry' ); +} + +sub table_alias { + my $self = shift; + my $table_alias = shift; + $self->{table_alias} = $table_alias if ($table_alias); + return $self->{table_alias} if ($self->{table_alias}); + + $table_alias = "$self"; + $table_alias =~ s/^.*\(0(x[0-9a-fA-F]+)\)$/$1/go; + $table_alias .= '_' . $self->requested_class; + $table_alias =~ s/\|/_/go; + + return $self->table_alias( $table_alias ); +} + +sub tsquery { + my $self = shift; + return $self->{tsquery} if ($self->{tsquery}); + + for my $atom (@{$self->query_atoms}) { + if (ref($atom)) { + $self->{tsquery} .= "\n\t\t\t" .$atom->sql; + } else { + $self->{tsquery} .= $atom x 2; + } + } + + return $self->{tsquery}; +} + +sub rank { + my $self = shift; + return $self->{rank} if ($self->{rank}); + return $self->{rank} = 'rank(' . $self->table_alias . '.index_vector, ' . $self->tsquery . ')'; +} + + +1; + diff --git a/Open-ILS/src/perlmods/OpenILS/Application/Storage/QueryParser.pm b/Open-ILS/src/perlmods/OpenILS/Application/Storage/QueryParser.pm new file mode 100755 index 0000000000..3b7fb335d2 --- /dev/null +++ b/Open-ILS/src/perlmods/OpenILS/Application/Storage/QueryParser.pm @@ -0,0 +1,911 @@ +package QueryParser; +our %parser_config = ( + QueryParser => { + filters => [], + modifiers => [], + operators => { + 'and' => '&&', + 'or' => '||', + group_start => '(', + group_end => ')', + required => '+', + modifier => '#' + } + } +); + +sub search_class_count { + my $self = shift; + return @{$self->search_classes}; +} + +sub filter_count { + my $self = shift; + return @{$self->filters}; +} + +sub modifier_count { + my $self = shift; + return @{$self->modifiers}; +} + +sub custom_data { + my $class = shift; + $class = ref($class) || $class; + + $parser_config{$class}{custom_data} ||= {}; + return $parser_config{$class}{custom_data}; +} + +sub operators { + my $class = shift; + $class = ref($class) || $class; + + $parser_config{$class}{operators} ||= {}; + return $parser_config{$class}{operators}; +} + +sub filters { + my $class = shift; + $class = ref($class) || $class; + + $parser_config{$class}{filters} ||= []; + return $parser_config{$class}{filters}; +} + +sub modifiers { + my $class = shift; + $class = ref($class) || $class; + + $parser_config{$class}{modifiers} ||= []; + return $parser_config{$class}{modifiers}; +} + +sub new { + my $class = shift; + $class = ref($class) || $class; + + my %opts = @_; + + my $self = bless {} => $class; + + for my $o (keys %{QueryParser->operators}) { + $class->operator($o => QueryParser->operator($o)) unless ($class->operator($o)); + } + + for my $opt ( keys %opts) { + $self->$opt( $opts{$opt} ) if ($self->can($opt)); + } + + return $self; +} + +sub new_plan { + my $self = shift; + my $pkg = ref($self) || $self; + return do{$pkg.'::query_plan'}->new( QueryParser => $self, @_ ); +} + +sub add_search_filter { + my $pkg = shift; + $pkg = ref($pkg) || $pkg; + my $filter = shift; + + return $filter if (grep { $_ eq $filter } @{$pkg->filters}); + push @{$pkg->filters}, $filter; + return $filter; +} + +sub add_search_modifier { + my $pkg = shift; + $pkg = ref($pkg) || $pkg; + my $modifier = shift; + + return $modifier if (grep { $_ eq $modifier } @{$pkg->modifiers}); + push @{$pkg->modifiers}, $modifier; + return $modifier; +} + +sub add_search_class { + my $pkg = shift; + $pkg = ref($pkg) || $pkg; + my $class = shift; + + return $class if (grep { $_ eq $class } @{$pkg->search_classes}); + + push @{$pkg->search_classes}, $class; + $pkg->search_fields->{$class} = []; + $pkg->default_search_class( $pkg->search_classes->[0] ) if (@{$pkg->search_classes} == 1); + + return $class; +} + +sub operator { + my $class = shift; + $class = ref($class) || $class; + my $opname = shift; + my $op = shift; + + return undef unless ($opname); + + $parser_config{$class}{operators} ||= {}; + $parser_config{$class}{operators}{$opname} = $op if ($op); + + return $parser_config{$class}{operators}{$opname}; +} + +sub search_classes { + my $class = shift; + $class = ref($class) || $class; + my $classes = shift; + + $parser_config{$class}{classes} ||= []; + $parser_config{$class}{classes} = $classes if (ref($classes) && @$classes); + return $parser_config{$class}{classes}; +} + +sub add_query_normalizer { + my $pkg = shift; + $pkg = ref($pkg) || $pkg; + my $class = shift; + my $field = shift; + my $func = shift; + my $params = shift || []; + + return $func if (grep { $_ eq $func } @{$pkg->query_normalizers->{$class}->{$field}}); + + push(@{$pkg->query_normalizers->{$class}->{$field}}, { function => $func, params => $params }); + + return $func; +} + +sub query_normalizers { + my $pkg = shift; + $pkg = ref($pkg) || $pkg; + + my $class = shift; + my $field = shift; + + $parser_config{$pkg}{normalizers} ||= {}; + if ($class) { + if ($field) { + $parser_config{$pkg}{normalizers}{$class}{$field} ||= []; + return $parser_config{$pkg}{normalizers}{$class}{$field}; + } else { + return $parser_config{$pkg}{normalizers}{$class}; + } + } + + return $parser_config{$pkg}{normalizers}; +} + +sub default_search_class { + my $pkg = shift; + $pkg = ref($pkg) || $pkg; + my $class = shift; + $QueryParser::parser_config{$pkg}{default_class} = $pkg->add_search_class( $class ) if $class; + + return $QueryParser::parser_config{$pkg}{default_class}; +} + +sub remove_search_class { + my $pkg = shift; + $pkg = ref($pkg) || $pkg; + my $class = shift; + + return $class if (!grep { $_ eq $class } @{$pkg->search_classes}); + + $pkg->search_classes( [ grep { $_ ne $class } @{$pkg->search_classes} ] ); + delete $QueryParser::parser_config{$pkg}{fields}{$class}; + + return $class; +} + +sub add_search_field { + my $pkg = shift; + $pkg = ref($pkg) || $pkg; + my $class = shift; + my $field = shift; + + $pkg->add_search_class( $class ); + + return { $class => $field } if (grep { $_ eq $field } @{$pkg->search_fields->{$class}}); + + push @{$pkg->search_fields->{$class}}, $field; + + return { $class => $field }; +} + +sub search_fields { + my $class = shift; + $class = ref($class) || $class; + + $parser_config{$class}{fields} ||= {}; + return $parser_config{$class}{fields}; +} + +sub add_search_class_alias { + my $pkg = shift; + $pkg = ref($pkg) || $pkg; + my $class = shift; + my $alias = shift; + + $pkg->add_search_class( $class ); + + return { $class => $alias } if (grep { $_ eq $alias } @{$pkg->search_class_aliases->{$class}}); + + push @{$pkg->search_class_aliases->{$class}}, $alias; + + return { $class => $alias }; +} + +sub search_class_aliases { + my $class = shift; + $class = ref($class) || $class; + + $parser_config{$class}{class_map} ||= {}; + return $parser_config{$class}{class_map}; +} + +sub add_search_field_alias { + my $pkg = shift; + $pkg = ref($pkg) || $pkg; + my $class = shift; + my $field = shift; + my $alias = shift; + + return { $class => { $field => $alias } } if (grep { $_ eq $alias } @{$pkg->search_field_aliases->{$class}{$field}}); + + push @{$pkg->search_field_aliases->{$class}{$field}}, $alias; + + return { $class => { $field => $alias } }; +} + +sub search_field_aliases { + my $class = shift; + $class = ref($class) || $class; + + $parser_config{$class}{field_alias_map} ||= {}; + return $parser_config{$class}{field_alias_map}; +} + +sub remove_search_field { + my $pkg = shift; + $pkg = ref($pkg) || $pkg; + my $class = shift; + my $field = shift; + + return { $class => $field } if (!$pkg->search_fields->{$class} || !grep { $_ eq $field } @{$pkg->search_fields->{$class}}); + + $pkg->search_fields->{$class} = [ grep { $_ ne $field } @{$pkg->search_fields->{$class}} ]; + + return { $class => $field }; +} + +sub remove_search_field_alias { + my $pkg = shift; + $pkg = ref($pkg) || $pkg; + my $class = shift; + my $field = shift; + my $alias = shift; + + return { $class => { $field => $alias } } if (!$pkg->search_field_aliases->{$class}{$field} || !grep { $_ eq $alias } @{$pkg->search_field_aliases->{$class}{$field}}); + + $pkg->search_field_aliases->{$class}{$field} = [ grep { $_ ne $alias } @{$pkg->search_field_aliases->{$class}{$field}} ]; + + return { $class => { $field => $alias } }; +} + +sub remove_search_class_alias { + my $pkg = shift; + $pkg = ref($pkg) || $pkg; + my $class = shift; + my $alias = shift; + + return { $class => $alias } if (!$pkg->search_class_aliases->{$class} || !grep { $_ eq $alias } @{$pkg->search_class_aliases->{$class}}); + + $pkg->search_class_aliases->{$class} = [ grep { $_ ne $alias } @{$pkg->search_class_aliases->{$class}} ]; + + return { $class => $alias }; +} + +sub debug { + my $self = shift; + my $q = shift; + $self->{_debug} = $q if (defined $q); + return $self->{_debug}; +} + +sub query { + my $self = shift; + my $q = shift; + $self->{_query} = $q if (defined $q); + return $self->{_query}; +} + +sub parse_tree { + my $self = shift; + my $q = shift; + $self->{_parse_tree} = $q if (defined $q); + return $self->{_parse_tree}; +} + +sub parse { + my $self = shift; + $self->parse_tree( + $self->decompose( + $self->query( shift() ) + ) + ); + + return $self; +} + +sub decompose { + my $self = shift; + my $pkg = ref($self) || $self;; + + $_ = shift; + my $current_class = shift || $self->default_search_class; + + my $recursing = shift || 0; + + # Build the search class+field uber-regexp + my $search_class_re = '^\s*('; + my $first_class = 1; + + for my $class ( keys %{$pkg->search_field_aliases} ) { + + for my $field ( keys %{$pkg->search_field_aliases->{$class}} ) { + + for my $alias ( @{$pkg->search_field_aliases->{$class}{$field}} ) { + $alias = qr/$alias/; + s/\b$alias[:=]/$class\|$field:/g; + } + + $search_class_re .= '|' unless ($first_class); + $first_class = 0; + + $search_class_re .= $class; + } + } + + for my $class ( keys %{$pkg->search_class_aliases} ) { + + for my $alias ( @{$pkg->search_class_aliases->{$class}} ) { + $alias = qr/$alias/; + s/(^|[^|])\b$alias\|/$1$class\|/g; + s/(^|[^|])\b$alias[:=]/$1$class:/g; + } + + $search_class_re .= '|' unless ($first_class); + $first_class = 0; + + $search_class_re .= $class . '(?:\|\w+)*'; + } + $search_class_re .= '):'; + + my $required_re = $pkg->operator('required'); + $required_re = qr/^\s*\Q$required_re\E/; + my $and_re = $pkg->operator('and'); + $and_re = qr/^\s*\Q$and_re\E/; + + my $or_re = $pkg->operator('or'); + $or_re = qr/^\s*\Q$or_re\E/; + + my $group_start_re = $pkg->operator('group_start'); + $group_start_re = qr/^\s*\Q$group_start_re\E/; + + my $group_end = $pkg->operator('group_end'); + my $group_end_re = qr/^\s*\Q$group_end\E/; + + my $modifier_tag_re = $pkg->operator('modifier'); + $modifier_tag_re = qr/^\s*\Q$modifier_tag_re\E/; + + + # Build the filter and modifier uber-regexps + my $filter_re = '^\s*(' . join( '|', @{$pkg->filters}) . ')\(([^()]+)\)'; + my $filter_as_class_re = '^\s*(' . join( '|', @{$pkg->filters}) . '):\s*(\S+)'; + + my $modifier_re = '^\s*'.$modifier_tag_re.'(' . join( '|', @{$pkg->modifiers}) . ')\b'; + my $modifier_as_class_re = '^\s*(' . join( '|', @{$pkg->modifiers}) . '):\s*(\S+)'; + + my $struct = $self->new_plan( level => $recursing ); + my $remainder = ''; + + my $last_type = ''; + while (!$remainder) { + if (/$group_end_re/) { # end of an explicit group + warn "Encountered explicit group end\n" if $self->debug; + + $_ = $'; + $remainder = $'; + + $last_type = ''; + } elsif ($self->filter_count && /$filter_re/) { # found a filter + warn "Encountered search filter: $1 set to $2\n" if $self->debug; + + $_ = $'; + $struct->new_filter( $1 => [ split '[, ]+', $2 ] ); + + $last_type = ''; + } elsif ($self->filter_count && /$filter_as_class_re/) { # found a filter + warn "Encountered search filter: $1 set to $2\n" if $self->debug; + + $_ = $'; + $struct->new_filter( $1 => [ split '[, ]+', $2 ] ); + + $last_type = ''; + } elsif ($self->modifier_count && /$modifier_re/) { # found a modifier + warn "Encountered search modifier: $1\n" if $self->debug; + + $_ = $'; + if (!$struct->top_plan) { + warn " Search modifiers only allowed at the top level of the query\n" if $self->debug; + } else { + $struct->new_modifier($1); + } + + $last_type = ''; + } elsif ($self->modifier_count && /$modifier_as_class_re/) { # found a modifier + warn "Encountered search modifier: $1\n" if $self->debug; + + my $mod = $1; + + $_ = $'; + if (!$struct->top_plan) { + warn " Search modifiers only allowed at the top level of the query\n" if $self->debug; + } elsif ($2 =~ /^[ty1]/i) { + $struct->new_modifier($mod); + } + + $last_type = ''; + } elsif (/$group_start_re/) { # start of an explicit group + warn "Encountered explicit group start\n" if $self->debug; + + my ($substruct, $subremainder) = $self->decompose( $', $current_class, $recursing + 1 ); + $struct->add_node( $substruct ); + $_ = $subremainder; + + $last_type = ''; + } elsif (/$and_re/) { # ANDed expression + $_ = $'; + next if ($last_type eq 'AND'); + next if ($last_type eq 'OR'); + warn "Encountered AND\n" if $self->debug; + + $struct->joiner( '&' ); + + $last_type = 'AND'; + } elsif (/$or_re/) { # ORed expression + $_ = $'; + next if ($last_type eq 'AND'); + next if ($last_type eq 'OR'); + warn "Encountered OR\n" if $self->debug; + + $struct->joiner( '|' ); + + $last_type = 'OR'; + } elsif ($self->search_class_count && /$search_class_re/) { # changing current class + warn "Encountered class change: $1\n" if $self->debug; + + $current_class = $1; + $struct->classed_node( $current_class ); + $_ = $'; + + $last_type = ''; + } elsif (/^\s*"([^"]+)"/) { # phrase, always anded + warn "Encountered phrase: $1\n" if $self->debug; + + $struct->joiner( '&' ); + my $phrase = $1; + + my $class_node = $struct->classed_node($current_class); + $class_node->add_phrase( $phrase ); + $_ = $phrase . $'; + + $last_type = ''; + } elsif (/$required_re([^\s)]+)/) { # phrase, always anded + warn "Encountered required atom (mini phrase): $1\n" if $self->debug; + + my $phrase = $1; + + my $class_node = $struct->classed_node($current_class); + $class_node->add_phrase( $phrase ); + $_ = $phrase . $'; + $struct->joiner( '&' ); + + $last_type = ''; + } elsif (/^\s*([^$group_end\s]+)/o) { # atom + warn "Encountered atom: $1\n" if $self->debug; + warn "Remainder: $'\n" if $self->debug; + + my $atom = $1; + my $after = $'; + + my $class_node = $struct->classed_node($current_class); + my $negator = ($atom =~ s/^-//o) ? '!' : ''; + + $class_node->add_fts_atom( $atom, prefix => $negator, node => $class_node ); + $struct->joiner( '&' ); + + $_ = $after; + $last_type = ''; + } + + last unless ($_); + + } + + return $struct if !wantarray; + return ($struct, $remainder); +} + +sub find_class_index { + my $class = shift; + my $query = shift; + + my ($class_part, @field_parts) = split '\|', $class; + $class_part ||= $class; + + for my $idx ( 0 .. scalar(@$query) - 1 ) { + next unless ref($$query[$idx]); + return $idx if ( $$query[$idx]{requested_class} && $class eq $$query[$idx]{requested_class} ); + } + + push(@$query, { classname => $class_part, (@field_parts ? (fields => \@field_parts) : ()), requested_class => $class, ftsquery => [], phrases => [] }); + return -1; +} + +sub core_limit { + my $self = shift; + my $l = shift; + $self->{core_limit} = $l if ($l); + return $self->{core_limit}; +} + +sub superpage { + my $self = shift; + my $l = shift; + $self->{superpage} = $l if ($l); + return $self->{superpage}; +} + +sub superpage_size { + my $self = shift; + my $l = shift; + $self->{superpage_size} = $l if ($l); + return $self->{superpage_size}; +} + + +#------------------------------- +package QueryParser::query_plan; + +sub QueryParser { + my $self = shift; + return undef unless ref($self); + return $self->{QueryParser}; +} + +sub new { + my $pkg = shift; + $pkg = ref($pkg) || $pkg; + my %args = (joiner => '&', @_); + + return bless \%args => $pkg; +} + +sub new_node { + my $self = shift; + my $pkg = ref($self) || $self; + my $node = do{$pkg.'::node'}->new( plan => $self, @_ ); + $self->add_node( $node ); + return $node; +} + +sub new_filter { + my $self = shift; + my $pkg = ref($self) || $self; + my $name = shift; + my $args = shift; + + my $node = do{$pkg.'::filter'}->new( plan => $self, name => $name, args => $args ); + $self->add_filter( $node ); + + return $node; +} + +sub find_filter { + my $self = shift; + my $needle = shift;; + return undef unless ($needle); + return grep { $_->name eq $needle } @{ $self->filters }; +} + +sub find_modifier { + my $self = shift; + my $needle = shift;; + return undef unless ($needle); + return grep { $_->name eq $needle } @{ $self->modifiers }; +} + +sub new_modifier { + my $self = shift; + my $pkg = ref($self) || $self; + my $name = shift; + + my $node = do{$pkg.'::modifier'}->new( $name ); + $self->add_modifier( $node ); + + return $node; +} + +sub classed_node { + my $self = shift; + my $requested_class = shift; + + my $node; + for my $n (@{$self->{query}}) { + next unless (ref($n) && $n->isa( 'QueryParser::query_plan::node' )); + if ($n->requested_class eq $requested_class) { + $node = $n; + last; + } + } + + if (!$node) { + $node = $self->new_node; + $node->requested_class( $requested_class ); + } + + return $node; +} + +sub query_nodes { + my $self = shift; + return $self->{query}; +} + +sub add_node { + my $self = shift; + my $node = shift; + + $self->{query} ||= []; + push(@{$self->{query}}, $self->joiner) if (@{$self->{query}}); + push(@{$self->{query}}, $node); + + return $self; +} + +sub top_plan { + my $self = shift; + + return $self->{level} ? 0 : 1; +} + +sub plan_level { + my $self = shift; + return $self->{level}; +} + +sub joiner { + my $self = shift; + my $joiner = shift; + + $self->{joiner} = $joiner if ($joiner); + return $self->{joiner}; +} + +sub modifiers { + my $self = shift; + $self->{modifiers} ||= []; + return $self->{modifiers}; +} + +sub add_modifier { + my $self = shift; + my $modifier = shift; + + $self->{modifiers} ||= []; + return $self if (grep {$$_ eq $$modifier} @{$self->{modifiers}}); + + push(@{$self->{modifiers}}, $modifier); + + return $self; +} + +sub filters { + my $self = shift; + $self->{filters} ||= []; + return $self->{filters}; +} + +sub add_filter { + my $self = shift; + my $filter = shift; + + $self->{filters} ||= []; + return $self if (grep {$_->name eq $filter->name} @{$self->{filters}}); + + push(@{$self->{filters}}, $filter); + + return $self; +} + + +#------------------------------- +package QueryParser::query_plan::node; + +sub new { + my $pkg = shift; + $pkg = ref($pkg) || $pkg; + my %args = @_; + + return bless \%args => $pkg; +} + +sub new_atom { + my $self = shift; + my $pkg = ref($self) || $self; + return do{$pkg.'::atom'}->new( @_ ); +} + +sub requested_class { # also split into classname and fields + my $self = shift; + my $class = shift; + + if ($class) { + my ($class_part, @field_parts) = split '\|', $class; + $class_part ||= $class; + + $self->{requested_class} = $class; + $self->{classname} = $class_part; + $self->{fields} = \@field_parts; + } + + return $self->{requested_class}; +} + +sub plan { + my $self = shift; + my $plan = shift; + + $self->{plan} = $plan if ($plan); + return $self->{plan}; +} + +sub classname { + my $self = shift; + my $class = shift; + + $self->{classname} = $class if ($class); + return $self->{classname}; +} + +sub fields { + my $self = shift; + my @fields = @_; + + $self->{fields} ||= []; + $self->{fields} = \@fields if (@fields); + return $self->{fields}; +} + +sub phrases { + my $self = shift; + my @phrases = @_; + + $self->{phrases} ||= []; + $self->{phrases} = \@phrases if (@phrases); + return $self->{phrases}; +} + +sub add_phrase { + my $self = shift; + my $phrase = shift; + + push(@{$self->phrases}, $phrase); + + return $self; +} + +sub query_atoms { + my $self = shift; + my @query_atoms = @_; + + $self->{query_atoms} ||= []; + $self->{query_atoms} = \@query_atoms if (@query_atoms); + return $self->{query_atoms}; +} + +sub add_fts_atom { + my $self = shift; + my $atom = shift; + + if (!ref($atom)) { + my $content = $atom; + my @parts = @_; + + $atom = $self->new_atom( content => $content, @parts ); + } + + push(@{$self->query_atoms}, $self->plan->joiner) if (@{$self->query_atoms}); + push(@{$self->query_atoms}, $atom); + + return $self; +} + +#------------------------------- +package QueryParser::query_plan::node::atom; + +sub new { + my $pkg = shift; + $pkg = ref($pkg) || $pkg; + my %args = @_; + + return bless \%args => $pkg; +} + +sub node { + my $self = shift; + return undef unless (ref $self); + return $self->{node}; +} + +sub content { + my $self = shift; + return undef unless (ref $self); + return $self->{content}; +} + +sub prefix { + my $self = shift; + return undef unless (ref $self); + return $self->{prefix}; +} + +#------------------------------- +package QueryParser::query_plan::filter; + +sub new { + my $pkg = shift; + $pkg = ref($pkg) || $pkg; + my %args = @_; + + return bless \%args => $pkg; +} + +sub plan { + my $self = shift; + return $self->{plan}; +} + +sub name { + my $self = shift; + return $self->{name}; +} + +sub args { + my $self = shift; + return $self->{args}; +} + +#------------------------------- +package QueryParser::query_plan::modifier; + +sub new { + my $pkg = shift; + $pkg = ref($pkg) || $pkg; + my $modifier = shift; + + return bless \$modifier => $pkg; +} + +sub name { + my $self = shift; + return $$self; +} + +1; + -- 2.11.0