From: Thomas Berezansky Date: Thu, 11 Oct 2012 15:43:25 +0000 (-0400) Subject: QueryParser Driver: Much work X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=e1149c69cf994f9f04441056b7e0b0538ac3d0ce;p=evergreen%2Fequinox.git QueryParser Driver: Much work Switch to configurable fts configs Add "combined" index vectors Add word boundary checks for phrase searches Use combined rel_bump function And probably more I forgot about Signed-off-by: Thomas Berezansky Signed-off-by: Lebbeous Fogle-Weekley --- diff --git a/Open-ILS/examples/fm_IDL.xml b/Open-ILS/examples/fm_IDL.xml index 1510df1779..ce64b5fb9b 100644 --- a/Open-ILS/examples/fm_IDL.xml +++ b/Open-ILS/examples/fm_IDL.xml @@ -2311,6 +2311,10 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + + + + @@ -9821,6 +9825,68 @@ SELECT usr, + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/Driver/Pg/QueryParser.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/Driver/Pg/QueryParser.pm index 9afb7540ac..b5fc7948ab 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/Driver/Pg/QueryParser.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/Driver/Pg/QueryParser.pm @@ -98,14 +98,19 @@ sub quote_value { sub quote_phrase_value { my $self = shift; my $value = shift; - - my $left_anchored = $value =~ m/^\^/; - my $right_anchored = $value =~ m/\$$/; - $value =~ s/\^// if $left_anchored; - $value =~ s/\$$// if $right_anchored; + my $wb = shift; + + my $left_anchored = ''; + my $right_anchored = ''; + $left_anchored = $1 if $value =~ m/^([*\^])/; + $right_anchored = $1 if $value =~ m/([*\$])$/; + $value =~ s/^[*\^]// if $left_anchored; + $value =~ s/[*\$]$// if $right_anchored; $value = quotemeta($value); - $value = '^' . $value if $left_anchored; - $value = "$value\$" if $right_anchored; + $value = '^' . $value if $left_anchored eq '^'; + $value = "$value\$" if $right_anchored eq '$'; + $value = '[[:<:]]' . $value if $wb && !$left_anchored; + $value .= '[[:>:]]' if $wb && !$right_anchored; return $self->quote_value($value); } @@ -297,6 +302,78 @@ sub add_relevance_bump { return { $class => { $field => { $type => { multiplier => $multiplier, active => $active } } } }; } +sub search_class_weights { + my $self = shift; + my $class = shift; + my $a_weight = shift; + my $b_weight = shift; + my $c_weight = shift; + my $d_weight = shift; + + $self->custom_data->{class_weights} ||= {}; + # Note: This reverses the A-D order, putting D first, because that is how the call actually works in PG + $self->custom_data->{class_weights}->{$class} ||= [0.1, 0.2, 0.4, 1.0]; + $self->custom_data->{class_weights}->{$class} = [$d_weight, $c_weight, $b_weight, $a_weight] if $a_weight; + return $self->custom_data->{class_weights}->{$class}; +} + +sub class_ts_config { + my $self = shift; + my $class = shift; + my $lang = shift || 'DEFAULT'; + my $always = shift; + my $ts_config = shift; + + $self->custom_data->{class_ts_config} ||= {}; + $self->custom_data->{class_ts_config}->{$class} ||= {}; + $self->custom_data->{class_ts_config}->{$class}->{$lang} ||= {}; + $self->custom_data->{class_ts_config}->{$class}->{$lang}->{normal} ||= []; + $self->custom_data->{class_ts_config}->{$class}->{$lang}->{always} ||= []; + $self->custom_data->{class_ts_config}->{$class}->{'DEFAULT'} ||= {}; + $self->custom_data->{class_ts_config}->{$class}->{'DEFAULT'}->{normal} ||= []; + $self->custom_data->{class_ts_config}->{$class}->{'DEFAULT'}->{always} ||= []; + + if ($ts_config) { + push @{$self->custom_data->{class_ts_config}->{$class}->{$lang}->{normal}}, $ts_config unless $always; + push @{$self->custom_data->{class_ts_config}->{$class}->{$lang}->{always}}, $ts_config if $always; + } + + my $return = []; + push @$return, @{$self->custom_data->{class_ts_config}->{$class}->{$lang}->{always}}; + push @$return, @{$self->custom_data->{class_ts_config}->{$class}->{$lang}->{normal}} unless $always; + if($lang ne 'DEFAULT') { + push @$return, @{$self->custom_data->{class_ts_config}->{$class}->{'DEFAULT'}->{always}}; + push @$return, @{$self->custom_data->{class_ts_config}->{$class}->{'DEFAULT'}->{normal}} unless $always; + } + return $return; +} + +sub field_ts_config { + my $self = shift; + my $class = shift; + my $field = shift; + my $lang = shift || 'DEFAULT'; + my $ts_config = shift; + + $self->custom_data->{field_ts_config} ||= {}; + $self->custom_data->{field_ts_config}->{$class} ||= {}; + $self->custom_data->{field_ts_config}->{$class}->{$field} ||= {}; + $self->custom_data->{field_ts_config}->{$class}->{$field}->{$lang} ||= []; + $self->custom_data->{field_ts_config}->{$class}->{$field}->{'DEFAULT'} ||= []; + + if ($ts_config) { + push @{$self->custom_data->{field_ts_config}->{$class}->{$field}->{$lang}}, $ts_config; + } + + my $return = []; + push @$return, @{$self->custom_data->{field_ts_config}->{$class}->{$field}->{$lang}}; + if($lang ne 'DEFAULT') { + push @$return, @{$self->custom_data->{field_ts_config}->{$class}->{$field}->{'DEFAULT'}}; + } + # Make it easy on us: Grab any "always" for the class here. If we have none we grab them all. + push @$return, @{$self->class_ts_config($class, $lang, scalar(@$return))}; + return $return; +} sub initialize_search_field_id_map { my $self = shift; @@ -365,6 +442,36 @@ sub initialize_filter_normalizers { } } +sub initialize_class_weights { + my $self = shift; + my $classes = shift; + + for my $search_class (@$classes) { + __PACKAGE__->search_class_weights( $search_class->name, $search_class->a_weight, $search_class->b_weight, $search_class->c_weight, $search_class->d_weight ); + } +} + +sub initialize_class_ts_config { + my $self = shift; + my $class_entries = shift; + + for my $search_class_entry (@$class_entries) { + __PACKAGE__->class_ts_config($search_class_entry->field_class,$search_class_entry->search_lang,$U->is_true($search_class_entry->always),$search_class_entry->ts_config); + } +} + +sub initialize_field_ts_config { + my $self = shift; + my $field_entries = shift; + my $field_objects = shift; + my %field_hash = map { $_->id => $_ } @$field_objects; + + for my $search_field_entry (@$field_entries) { + my $field_object = $field_hash{$search_field_entry->metabib_field}; + __PACKAGE__->field_ts_config($field_object->field_class,$field_object->name,$search_field_entry->search_lang,$search_field_entry->ts_config); + } +} + our $_complete = 0; sub initialization_complete { return $_complete; @@ -406,6 +513,15 @@ sub initialize { $self->initialize_filter_normalizers( $args{config_record_attr_index_norm_map} ) if ($args{config_record_attr_index_norm_map}); + $self->initialize_search_class_weights( $args{config_metabib_class} ) + if ($args{config_metabib_class}); + + $self->initialize_class_ts_config( $args{config_metabib_class_ts_map} ) + if ($args{config_metabib_class_ts_map}); + + $self->initialize_field_ts_config( $args{config_metabib_field_ts_map}, $args{config_metabib_field} ) + if ($args{config_metabib_field_ts_map} && $args{config_metabib_field}); + $_complete = 1 if ( $args{config_metabib_field_index_norm_map} && $args{search_relevance_adjustment} && @@ -467,6 +583,27 @@ sub TEST_SETUP { __PACKAGE__->add_relevance_bump( keyword => keyword => first_word => 1 ); __PACKAGE__->add_relevance_bump( keyword => keyword => full_match => 1 ); + __PACKAGE__->class_ts_config( 'series', undef, 1, 'english_nostop' ); + __PACKAGE__->class_ts_config( 'title', undef, 1, 'english_nostop' ); + __PACKAGE__->class_ts_config( 'author', undef, 1, 'english_nostop' ); + __PACKAGE__->class_ts_config( 'subject', undef, 1, 'english_nostop' ); + __PACKAGE__->class_ts_config( 'keyword', undef, 1, 'english_nostop' ); + __PACKAGE__->class_ts_config( 'series', undef, 1, 'simple' ); + __PACKAGE__->class_ts_config( 'title', undef, 1, 'simple' ); + __PACKAGE__->class_ts_config( 'author', undef, 1, 'simple' ); + __PACKAGE__->class_ts_config( 'subject', undef, 1, 'simple' ); + __PACKAGE__->class_ts_config( 'keyword', undef, 1, 'simple' ); + + # French! To test language limiters + __PACKAGE__->class_ts_config( 'series', 'fre', 1, 'french_nostop' ); + __PACKAGE__->class_ts_config( 'title', 'fre', 1, 'french_nostop' ); + __PACKAGE__->class_ts_config( 'author', 'fre', 1, 'french_nostop' ); + __PACKAGE__->class_ts_config( 'subject', 'fre', 1, 'french_nostop' ); + __PACKAGE__->class_ts_config( 'keyword', 'fre', 1, 'french_nostop' ); + + # Not a default config by any means, but good for some testing + __PACKAGE__->field_ts_config( 'author', 'personal', 'eng', 'english' ); + __PACKAGE__->field_ts_config( 'author', 'personal', 'fre', 'french' ); __PACKAGE__->add_search_class_alias( keyword => 'kw' ); __PACKAGE__->add_search_class_alias( title => 'ti' ); @@ -831,29 +968,6 @@ SQL } - -sub rel_bump { - my $self = shift; - my $node = shift; - my $bump = shift; - my $multiplier = shift; - - my $only_atoms = $node->only_real_atoms; - return '' if (!@$only_atoms); - - if ($bump eq 'first_word') { - return "/* first_word */ COALESCE(NULLIF( (search_normalize(".$node->table_alias.".value) ~ ('^'||search_normalize(".$self->QueryParser->quote_phrase_value($only_atoms->[0]->content)."))), FALSE )::INT * $multiplier, 1)"; - } elsif ($bump eq 'full_match') { - return "/* full_match */ COALESCE(NULLIF( (search_normalize(".$node->table_alias.".value) ~ ('^'||". - join( "||' '||", map { "search_normalize(".$self->QueryParser->quote_phrase_value($_->content).")" } @$only_atoms )."||'\$')), FALSE )::INT * $multiplier, 1)"; - } elsif ($bump eq 'word_order') { - return "/* word_order */ COALESCE(NULLIF( (search_normalize(".$node->table_alias.".value) ~ (". - join( "||'.*'||", map { "search_normalize(".$self->QueryParser->quote_phrase_value($_->content).")" } @$only_atoms ).")), FALSE )::INT * $multiplier, 1)"; - } - - return ''; -} - sub flatten { my $self = shift; @@ -874,21 +988,27 @@ sub flatten { } my $table = $node->table; + my $ctable = $node->combined_table; my $talias = $node->table_alias; my $node_rank = 'COALESCE(' . $node->rank . " * ${talias}.weight, 0.0)"; $from .= "\n" . ${spc} x 4 ."LEFT JOIN (\n" - . ${spc} x 5 . "SELECT fe.*, fe_weight.weight, ${talias}_xq.tsq /* search */\n" + . ${spc} x 5 . "SELECT fe.*, fe_weight.weight, ${talias}_xq.tsq, ${talias}_xq.tsq_rank /* search */\n" . ${spc} x 6 . "FROM $table AS fe"; $from .= "\n" . ${spc} x 7 . "JOIN config.metabib_field AS fe_weight ON (fe_weight.id = fe.field)"; if ($node->dummy_count < @{$node->only_atoms} ) { $with .= ",\n " if $with; - $with .= "${talias}_xq AS (SELECT ". $node->tsquery ." AS tsq )"; - $from .= "\n" . ${spc} x 6 . "JOIN ${talias}_xq ON (fe.index_vector @@ ${talias}_xq.tsq)"; + $with .= "${talias}_xq AS (SELECT ". $node->tsquery ." AS tsq,". $node->tsquery_rank ." AS tsq_rank )"; + $from .= "\n" . ${spc} x 6 . "JOIN $ctable AS com ON (com.record = fe.source)"; + if (@{$node->fields} > 0) { + $from .= "\n" . ${spc} x 6 . "JOIN ${talias}_xq ON (com.index_vector @@ ${talias}_xq.tsq_rank AND fe.index_vector @@ ${talias}_xq.tsq)"; + } else { + $from .= "\n" . ${spc} x 6 . "JOIN ${talias}_xq ON (com.index_vector @@ ${talias}_xq.tsq)"; + } } else { - $from .= "\n" . ${spc} x 6 . ", (SELECT NULL::tsquery AS tsq ) AS ${talias}_xq"; + $from .= "\n" . ${spc} x 6 . ", (SELECT NULL::tsquery AS tsq, NULL:tsquery AS tsq_rank ) AS ${talias}_xq"; } my @bump_fields; @@ -915,6 +1035,8 @@ sub flatten { my %used_bumps; + my @bumps; + my @bumpmults; for my $field ( @bump_fields ) { my $bumps = $self->QueryParser->find_relevance_bumps( $node->classname => $field ); for my $b (keys %$bumps) { @@ -923,24 +1045,31 @@ sub flatten { $used_bumps{$b} = 1; next if ($$bumps{$b}{multiplier} == 1); # optimization to remove unneeded bumps - - my $bump_case = $self->rel_bump( $node, $b, $$bumps{$b}{multiplier} ); - $node_rank .= "\n" . ${spc} x 5 . "* " . $bump_case if ($bump_case); + push @bumps, $b; + push @bumpmults, $$bumps{$b}{multiplier}; } } + if(scalar @bumps > 0 && scalar @{$node->only_positive_atoms} > 0) { + # Note: Previous rank function used search_normalize outright. Duplicating that here. + $node_rank .= "\n" . ${spc} x 5 . "* evergreen.rel_bump(('{' || search_normalize("; + $node_rank .= join(") || ',' || search_normalize(",map { $self->QueryParser->quote_phrase_value($_->content) } @{$node->only_positive_atoms}); + $node_rank .= ") || '}')::TEXT[], " . $node->table_alias . ".value, '{" . join(",",@bumps) . "}'::TEXT[], '{" . join(",",@bumpmults) . "}'::NUMERIC[])"; + } + my $NOT = ''; $NOT = 'NOT ' if $node->negate; $where .= "$NOT(" . $talias . ".id IS NOT NULL"; if (@{$node->phrases}) { $where .= ' AND ' . join(' AND ', map { - "${talias}.value ~* ".$self->QueryParser->quote_phrase_value($_) + "${talias}.value ~* ".$self->QueryParser->quote_phrase_value($_, 1) } @{$node->phrases}); - } - for my $atom (@{$node->only_real_atoms}) { - next unless $atom->{content} && $atom->{content} =~ /(^\^|\$$)/; - $where .= " AND ${talias}.value ~* ".$self->QueryParser->quote_phrase_value($atom->{content}); + } else { + for my $atom (@{$node->only_real_atoms}) { + next unless $atom->{content} && $atom->{content} =~ /(^\^|\$$)/; + $where .= " AND ${talias}.value ~* ".$self->QueryParser->quote_phrase_value($atom->{content}); + } } $where .= ')'; @@ -948,7 +1077,6 @@ sub flatten { } elsif ($node->isa( 'QueryParser::query_plan::facet' )) { - my $table = $node->table; my $talias = $node->table_alias; my @field_ids; @@ -1211,11 +1339,6 @@ sub classname { return $classname; } -sub table { - my $self = shift; - return 'metabib.' . $self->classname . '_field_entry'; -} - sub fields { my $self = shift; my ($classname,@fields) = split '\|', $self->name; @@ -1262,6 +1385,30 @@ sub buildSQL { my $normalizers = $self->node->plan->QueryParser->query_normalizers( $classname ); my $fields = $self->node->fields; + my $lang; + my $filter = $self->node->plan->find_filter('preferred_language'); + $lang ||= $filter->args->[0] if ($filter && $filter->args); + $lang ||= $self->node->plan->QueryParser->default_preferred_language; + my $ts_configs = []; + + if (@{$self->node->phrases}) { + # We assume we want 'simple' for phrases. Gives us less to match against later. + $ts_configs = ['simple']; + } else { + if (!@$fields) { + $ts_configs = $self->node->plan->QueryParser->class_ts_config($classname, $lang); + } else { + for my $field (@$fields) { + push @$ts_configs, @{$self->node->plan->QueryParser->field_ts_config($classname, $field, $lang)}; + } + } + $ts_configs = [keys %{{map { $_ => 1 } @$ts_configs}}]; + } + + # Assume we want exact if none otherwise provided. + # Because we can reasonably expect this to exist + $ts_configs = ['simple'] unless (scalar @$ts_configs); + $fields = $self->node->plan->QueryParser->search_fields->{$classname} if (!@$fields); my %norms; @@ -1288,6 +1435,8 @@ sub buildSQL { my $prefix = $self->prefix || ''; my $suffix = $self->suffix || ''; + my $joiner = ' || '; + $joiner = ' && ' if $self->prefix eq '!'; # Negative atoms should be "none of the variants" instead of "any of the variants" $prefix = "'$prefix' ||" if $prefix; my $suffix_op = ''; @@ -1296,7 +1445,13 @@ sub buildSQL { $suffix_op = ":$suffix" if $suffix; $suffix_after = "|| '$suffix_op'" if $suffix; - $sql = "to_tsquery('$classname', COALESCE(NULLIF($prefix '(' || btrim(regexp_replace($sql,E'(?:\\\\s+|:)','$suffix_op&','g'),'&|') $suffix_after || ')', '()'), ''))"; + my @sql_set = (); + for my $ts_config (@$ts_configs) { + push @sql_set, "to_tsquery('$ts_config', COALESCE(NULLIF($prefix '(' || btrim(regexp_replace($sql,E'(?:\\\\s+|:)','$suffix_op&','g'),'&|') $suffix_after || ')', '()'), ''))"; + } + + $sql = join($joiner, @sql_set); + $sql = '(' . $sql . ')' if (scalar(@$ts_configs) > 1); return $self->sql($sql); } @@ -1332,6 +1487,18 @@ sub only_real_atoms { return \@only_real_atoms; } +sub only_positive_atoms { + my $self = shift; + + my $atoms = $self->query_atoms; + my @only_positive_atoms; + for my $a (@$atoms) { + push(@only_positive_atoms, $a) if (ref($a) && $a->isa('QueryParser::query_plan::node::atom') && !($a->{dummy}) && ($a->{prefix} ne '!')); + } + + return \@only_positive_atoms; +} + sub dummy_count { my $self = shift; return $self->{dummy_count}; @@ -1345,6 +1512,14 @@ sub table { return $self->table( 'metabib.' . $self->classname . '_field_entry' ); } +sub combined_table { + my $self = shift; + my $ctable = shift; + $self->{ctable} = $ctable if ($ctable); + return $self->{ctable} if $self->{ctable}; + return $self->combined_table( 'metabib.combined_' . $self->classname . '_field_entry' ); +} + sub table_alias { my $self = shift; my $table_alias = shift; @@ -1374,8 +1549,21 @@ sub tsquery { return $self->{tsquery}; } +sub tsquery_rank { + my $self = shift; + return $self->{tsquery_rank} if ($self->{tsquery_rank}); + my @atomlines; + + for my $atom (@{$self->only_positive_atoms}) { + push @atomlines, "\n" . ${spc} x 3 . $atom->sql; + } + $self->{tsquery_rank} = join(' ||', @atomlines); + return $self->{tsquery_rank}; +} + sub rank { my $self = shift; + return $self->{rank} if ($self->{rank}); my $rank_norm_map = $self->plan->QueryParser->custom_data->{rank_cd_weight_map}; @@ -1384,8 +1572,9 @@ sub rank { $cover_density += $$rank_norm_map{$norm} if ($self->plan->find_modifier($norm)); } - return $self->{rank} if ($self->{rank}); - return $self->{rank} = 'ts_rank_cd(' . $self->table_alias . '.index_vector, ' . $self->table_alias . ".tsq, $cover_density)"; + my $weights = join(', ', @{$self->plan->QueryParser->search_class_weights($self->classname)}); + + return $self->{rank} = "ts_rank_cd('{" . $weights . "}', " . $self->table_alias . '.index_vector, ' . $self->table_alias . ".tsq_rank, $cover_density)"; } diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/Publisher/metabib.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/Publisher/metabib.pm index 3241bc38bd..96a70cf9c1 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/Publisher/metabib.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/Publisher/metabib.pm @@ -53,6 +53,21 @@ sub _initialize_parser { 'open-ils.cstore.direct.config.record_attr_definition.search.atomic', { name => { "!=" => undef } } )->gather(1), + config_metabib_class_ts_map => + $cstore->request( + 'open-ils.cstore.direct.config.metabib_class_ts_map.search.atomic', + { active => "t" } + )->gather(1), + config_metabib_field_ts_map => + $cstore->request( + 'open-ils.cstore.direct.config.metabib_field_ts_map.search.atomic', + { active => "t" } + )->gather(1), + config_metabib_class => + $cstore->request( + 'open-ils.cstore.direct.config.metabib_class.search.atomic', + { name => { "!=" => undef } } + )->gather(1), ); $cstore->disconnect; diff --git a/Open-ILS/src/sql/Pg/000.english.pg90.fts-config.sql b/Open-ILS/src/sql/Pg/000.english.pg90.fts-config.sql deleted file mode 100644 index 7ddce06b73..0000000000 --- a/Open-ILS/src/sql/Pg/000.english.pg90.fts-config.sql +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (C) 2004-2008 Georgia Public Library Service - * Copyright (C) 2008 Equinox Software, Inc., Laurentian University - * Mike Rylander - * Dan Scott - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -BEGIN; - -SET search_path = public, pg_catalog; - -CREATE OR REPLACE FUNCTION oils_tsearch2 () RETURNS TRIGGER AS $$ -BEGIN - NEW.index_vector = to_tsvector((TG_ARGV[0])::regconfig, NEW.value); - RETURN NEW; -END; -$$ LANGUAGE PLPGSQL; - -DROP TEXT SEARCH DICTIONARY IF EXISTS english_nostop CASCADE; - -CREATE TEXT SEARCH DICTIONARY english_nostop (TEMPLATE=pg_catalog.snowball, language='english'); -COMMENT ON TEXT SEARCH DICTIONARY english_nostop IS 'English snowball stemmer with no stopwords for ASCII words only.'; - -CREATE TEXT SEARCH CONFIGURATION title ( COPY = pg_catalog.english ); -ALTER TEXT SEARCH CONFIGURATION title ALTER MAPPING FOR word, hword, hword_part WITH pg_catalog.simple; -ALTER TEXT SEARCH CONFIGURATION title ALTER MAPPING FOR asciiword, asciihword, hword_asciipart WITH english_nostop; -CREATE TEXT SEARCH CONFIGURATION author ( COPY = title ); -CREATE TEXT SEARCH CONFIGURATION subject ( COPY = title ); -CREATE TEXT SEARCH CONFIGURATION keyword ( COPY = title ); -CREATE TEXT SEARCH CONFIGURATION identifier ( COPY = title ); -CREATE TEXT SEARCH CONFIGURATION series ( COPY = title ); -CREATE TEXT SEARCH CONFIGURATION "default" ( COPY = title ); - -COMMIT; diff --git a/Open-ILS/src/sql/Pg/000.english.pg91.fts-config.sql b/Open-ILS/src/sql/Pg/000.english.pg91.fts-config.sql deleted file mode 120000 index fd3fe587ae..0000000000 --- a/Open-ILS/src/sql/Pg/000.english.pg91.fts-config.sql +++ /dev/null @@ -1 +0,0 @@ -000.english.pg90.fts-config.sql \ No newline at end of file diff --git a/Open-ILS/src/sql/Pg/000.english.pg91.fts-config.sql b/Open-ILS/src/sql/Pg/000.english.pg91.fts-config.sql new file mode 100644 index 0000000000..0419aa727f --- /dev/null +++ b/Open-ILS/src/sql/Pg/000.english.pg91.fts-config.sql @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2004-2008 Georgia Public Library Service + * Copyright (C) 2008 Equinox Software, Inc., Laurentian University + * Mike Rylander + * Dan Scott + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +BEGIN; + +SET search_path = public, pg_catalog; + +CREATE OR REPLACE FUNCTION oils_tsearch2 () RETURNS TRIGGER AS $$ +BEGIN + NEW.index_vector = to_tsvector((TG_ARGV[0])::regconfig, NEW.value); + RETURN NEW; +END; +$$ LANGUAGE PLPGSQL; + +DO $$ +DECLARE +lang TEXT; +BEGIN +FOR lang IN SELECT substring(pptsd.dictname from '(.*)_stem$') AS lang FROM pg_catalog.pg_ts_dict pptsd JOIN pg_catalog.pg_namespace ppn ON ppn.oid = pptsd.dictnamespace +WHERE ppn.nspname = 'pg_catalog' AND pptsd.dictname LIKE '%_stem' LOOP +RAISE NOTICE 'FOUND LANGUAGE %', lang; + +EXECUTE 'DROP TEXT SEARCH DICTIONARY IF EXISTS ' || lang || '_nostop CASCADE; +CREATE TEXT SEARCH DICTIONARY ' || lang || '_nostop (TEMPLATE=pg_catalog.snowball, language=''' || lang || '''); +COMMENT ON TEXT SEARCH DICTIONARY ' || lang || '_nostop IS ''' ||lang || ' snowball stemmer with no stopwords for ASCII words only.''; +CREATE TEXT SEARCH CONFIGURATION ' || lang || '_nostop ( COPY = pg_catalog.' || lang || ' ); +ALTER TEXT SEARCH CONFIGURATION ' || lang || '_nostop ALTER MAPPING FOR word, hword, hword_part WITH pg_catalog.simple; +ALTER TEXT SEARCH CONFIGURATION ' || lang || '_nostop ALTER MAPPING FOR asciiword, asciihword, hword_asciipart WITH ' || lang || '_nostop;'; + +END LOOP; +END; +$$; +--CREATE TEXT SEARCH CONFIGURATION title ( COPY = english_nostop ); +--CREATE TEXT SEARCH CONFIGURATION author ( COPY = english_nostop ); +--CREATE TEXT SEARCH CONFIGURATION subject ( COPY = english_nostop ); +CREATE TEXT SEARCH CONFIGURATION keyword ( COPY = english_nostop ); +--CREATE TEXT SEARCH CONFIGURATION identifier ( COPY = english_nostop ); +--CREATE TEXT SEARCH CONFIGURATION series ( COPY = english_nostop ); +CREATE TEXT SEARCH CONFIGURATION "default" ( COPY = english_nostop ); + + +COMMIT; diff --git a/Open-ILS/src/sql/Pg/000.english.pg92.fts-config.sql b/Open-ILS/src/sql/Pg/000.english.pg92.fts-config.sql index fd3fe587ae..0b24fd90f5 120000 --- a/Open-ILS/src/sql/Pg/000.english.pg92.fts-config.sql +++ b/Open-ILS/src/sql/Pg/000.english.pg92.fts-config.sql @@ -1 +1 @@ -000.english.pg90.fts-config.sql \ No newline at end of file +000.english.pg91.fts-config.sql \ No newline at end of file diff --git a/Open-ILS/src/sql/Pg/002.schema.config.sql b/Open-ILS/src/sql/Pg/002.schema.config.sql index eb7b60e159..11dd989628 100644 --- a/Open-ILS/src/sql/Pg/002.schema.config.sql +++ b/Open-ILS/src/sql/Pg/002.schema.config.sql @@ -169,7 +169,11 @@ CREATE TABLE config.metabib_class ( name TEXT PRIMARY KEY, label TEXT NOT NULL UNIQUE, buoyant BOOL DEFAULT FALSE NOT NULL, - restrict BOOL DEFAULT FALSE NOT NULL + restrict BOOL DEFAULT FALSE NOT NULL, + a_weight NUMERIC DEFAULT 1.0 NOT NULL, + b_weight NUMERIC DEFAULT 0.4 NOT NULL, + c_weight NUMERIC DEFAULT 0.2 NOT NULL, + d_weight NUMERIC DEFAULT 0.1 NOT NULL ); CREATE TABLE config.metabib_field ( @@ -198,6 +202,49 @@ $$; CREATE UNIQUE INDEX config_metabib_field_class_name_idx ON config.metabib_field (field_class, name); +CREATE TABLE config.ts_config_list ( + id TEXT PRIMARY KEY, + name TEXT NOT NULL +); +COMMENT ON TABLE config.ts_config_list IS $$ +Full Text Configs + +A list of full text configs with names and descriptions. +$$; + +CREATE TABLE config.metabib_class_ts_map ( + id SERIAL PRIMARY KEY, + field_class TEXT NOT NULL REFERENCES config.metabib_class (name), + ts_config TEXT NOT NULL REFERENCES config.ts_config_list (id), + active BOOL NOT NULL DEFAULT TRUE, + index_weight CHAR(1) NOT NULL DEFAULT 'C' CHECK (index_weight IN ('A','B','C','D')), + index_lang TEXT NULL, + search_lang TEXT NULL, + always BOOL NOT NULL DEFAULT true +); +COMMENT ON TABLE config.metabib_class_ts_map IS $$ +Text Search Configs for metabib class indexing + +This table contains text search config definitions for +storing index_vector values. +$$; + +CREATE TABLE config.metabib_field_ts_map ( + id SERIAL PRIMARY KEY, + metabib_field INT NOT NULL REFERENCES config.metabib_field (id), + ts_config TEXT NOT NULL REFERENCES config.ts_config_list (id), + active BOOL NOT NULL DEFAULT TRUE, + index_weight CHAR(1) NOT NULL DEFAULT 'C' CHECK (index_weight IN ('A','B','C','D')), + index_lang TEXT NULL, + search_lang TEXT NULL +); +COMMENT ON TABLE config.metabib_field_ts_map IS $$ +Text Search Configs for metabib field indexing + +This table contains text search config definitions for +storing index_vector values. +$$; + CREATE TABLE config.metabib_search_alias ( alias TEXT PRIMARY KEY, field_class TEXT NOT NULL REFERENCES config.metabib_class (name), @@ -784,75 +831,6 @@ BEGIN END; $f$ LANGUAGE PLPGSQL; -CREATE OR REPLACE FUNCTION oils_tsearch2 () RETURNS TRIGGER AS $$ -DECLARE - normalizer RECORD; - value TEXT := ''; -BEGIN - - value := NEW.value; - - IF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN - FOR normalizer IN - SELECT n.func AS func, - n.param_count AS param_count, - m.params AS params - FROM config.index_normalizer n - JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id) - WHERE field = NEW.field AND m.pos < 0 - ORDER BY m.pos LOOP - EXECUTE 'SELECT ' || normalizer.func || '(' || - quote_literal( value ) || - CASE - WHEN normalizer.param_count > 0 - THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'') - ELSE '' - END || - ')' INTO value; - - END LOOP; - - NEW.value := value; - END IF; - - IF NEW.index_vector = ''::tsvector THEN - RETURN NEW; - END IF; - - IF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN - FOR normalizer IN - SELECT n.func AS func, - n.param_count AS param_count, - m.params AS params - FROM config.index_normalizer n - JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id) - WHERE field = NEW.field AND m.pos >= 0 - ORDER BY m.pos LOOP - EXECUTE 'SELECT ' || normalizer.func || '(' || - quote_literal( value ) || - CASE - WHEN normalizer.param_count > 0 - THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'') - ELSE '' - END || - ')' INTO value; - - END LOOP; - END IF; - - IF TG_TABLE_NAME::TEXT ~ 'browse_entry$' THEN - value := ARRAY_TO_STRING( - evergreen.regexp_split_to_array(value, E'\\W+'), ' ' - ); - value := public.search_normalize(value); - END IF; - - NEW.index_vector = to_tsvector((TG_ARGV[0])::regconfig, value); - - RETURN NEW; -END; -$$ LANGUAGE PLPGSQL; - -- List applied db patches that are deprecated by (and block the application of) my_db_patch CREATE OR REPLACE FUNCTION evergreen.upgrade_list_applied_deprecates ( my_db_patch TEXT ) RETURNS SETOF evergreen.patch AS $$ SELECT DISTINCT l.version diff --git a/Open-ILS/src/sql/Pg/030.schema.metabib.sql b/Open-ILS/src/sql/Pg/030.schema.metabib.sql index e833ef22cf..da9d4ddca1 100644 --- a/Open-ILS/src/sql/Pg/030.schema.metabib.sql +++ b/Open-ILS/src/sql/Pg/030.schema.metabib.sql @@ -44,6 +44,14 @@ CREATE INDEX metabib_identifier_field_entry_index_vector_idx ON metabib.identifi CREATE INDEX metabib_identifier_field_entry_value_idx ON metabib.identifier_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR; CREATE INDEX metabib_identifier_field_entry_source_idx ON metabib.identifier_field_entry (source); +CREATE TABLE metabib.combined_identifier_field_entry ( + record BIGINT NOT NULL, + metabib_field INT NULL, + index_vector tsvector NOT NULL +); +CREATE UNIQUE INDEX metabib_combined_identifier_field_entry_fakepk_idx ON metabib.combined_identifier_field_entry (record, COALESCE(metabib_field::TEXT,'')); +CREATE INDEX metabib_combined_identifier_field_entry_index_vector_idx ON metabib.combined_identifier_field_entry USING GIST (index_vector); +CREATE INDEX metabib_combined_identifier_field_source_idx ON metabib.combined_identifier_field_entry (metabib_field); CREATE TABLE metabib.title_field_entry ( id BIGSERIAL PRIMARY KEY, @@ -60,6 +68,14 @@ CREATE INDEX metabib_title_field_entry_index_vector_idx ON metabib.title_field_e CREATE INDEX metabib_title_field_entry_value_idx ON metabib.title_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR; CREATE INDEX metabib_title_field_entry_source_idx ON metabib.title_field_entry (source); +CREATE TABLE metabib.combined_title_field_entry ( + record BIGINT NOT NULL, + metabib_field INT NULL, + index_vector tsvector NOT NULL +); +CREATE UNIQUE INDEX metabib_combined_title_field_entry_fakepk_idx ON metabib.combined_title_field_entry (record, COALESCE(metabib_field::TEXT,'')); +CREATE INDEX metabib_combined_title_field_entry_index_vector_idx ON metabib.combined_title_field_entry USING GIST (index_vector); +CREATE INDEX metabib_combined_title_field_source_idx ON metabib.combined_title_field_entry (metabib_field); CREATE TABLE metabib.author_field_entry ( id BIGSERIAL PRIMARY KEY, @@ -76,6 +92,14 @@ CREATE INDEX metabib_author_field_entry_index_vector_idx ON metabib.author_field CREATE INDEX metabib_author_field_entry_value_idx ON metabib.author_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR; CREATE INDEX metabib_author_field_entry_source_idx ON metabib.author_field_entry (source); +CREATE TABLE metabib.combined_author_field_entry ( + record BIGINT NOT NULL, + metabib_field INT NULL, + index_vector tsvector NOT NULL +); +CREATE UNIQUE INDEX metabib_combined_author_field_entry_fakepk_idx ON metabib.combined_author_field_entry (record, COALESCE(metabib_field::TEXT,'')); +CREATE INDEX metabib_combined_author_field_entry_index_vector_idx ON metabib.combined_author_field_entry USING GIST (index_vector); +CREATE INDEX metabib_combined_author_field_source_idx ON metabib.combined_author_field_entry (metabib_field); CREATE TABLE metabib.subject_field_entry ( id BIGSERIAL PRIMARY KEY, @@ -92,6 +116,14 @@ CREATE INDEX metabib_subject_field_entry_index_vector_idx ON metabib.subject_fie CREATE INDEX metabib_subject_field_entry_value_idx ON metabib.subject_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR; CREATE INDEX metabib_subject_field_entry_source_idx ON metabib.subject_field_entry (source); +CREATE TABLE metabib.combined_subject_field_entry ( + record BIGINT NOT NULL, + metabib_field INT NULL, + index_vector tsvector NOT NULL +); +CREATE UNIQUE INDEX metabib_combined_subject_field_entry_fakepk_idx ON metabib.combined_subject_field_entry (record, COALESCE(metabib_field::TEXT,'')); +CREATE INDEX metabib_combined_subject_field_entry_index_vector_idx ON metabib.combined_subject_field_entry USING GIST (index_vector); +CREATE INDEX metabib_combined_subject_field_source_idx ON metabib.combined_subject_field_entry (metabib_field); CREATE TABLE metabib.keyword_field_entry ( id BIGSERIAL PRIMARY KEY, @@ -108,6 +140,14 @@ CREATE INDEX metabib_keyword_field_entry_index_vector_idx ON metabib.keyword_fie CREATE INDEX metabib_keyword_field_entry_value_idx ON metabib.keyword_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR; CREATE INDEX metabib_keyword_field_entry_source_idx ON metabib.keyword_field_entry (source); +CREATE TABLE metabib.combined_keyword_field_entry ( + record BIGINT NOT NULL, + metabib_field INT NULL, + index_vector tsvector NOT NULL +); +CREATE UNIQUE INDEX metabib_combined_keyword_field_entry_fakepk_idx ON metabib.combined_keyword_field_entry (record, COALESCE(metabib_field::TEXT,'')); +CREATE INDEX metabib_combined_keyword_field_entry_index_vector_idx ON metabib.combined_keyword_field_entry USING GIST (index_vector); +CREATE INDEX metabib_combined_keyword_field_source_idx ON metabib.combined_keyword_field_entry (metabib_field); CREATE TABLE metabib.series_field_entry ( id BIGSERIAL PRIMARY KEY, @@ -124,6 +164,14 @@ CREATE INDEX metabib_series_field_entry_index_vector_idx ON metabib.series_field CREATE INDEX metabib_series_field_entry_value_idx ON metabib.series_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR; CREATE INDEX metabib_series_field_entry_source_idx ON metabib.series_field_entry (source); +CREATE TABLE metabib.combined_series_field_entry ( + record BIGINT NOT NULL, + metabib_field INT NULL, + index_vector tsvector NOT NULL +); +CREATE UNIQUE INDEX metabib_combined_series_field_entry_fakepk_idx ON metabib.combined_series_field_entry (record, COALESCE(metabib_field::TEXT,'')); +CREATE INDEX metabib_combined_series_field_entry_index_vector_idx ON metabib.combined_series_field_entry USING GIST (index_vector); +CREATE INDEX metabib_combined_series_field_source_idx ON metabib.combined_series_field_entry (metabib_field); CREATE TABLE metabib.facet_entry ( id BIGSERIAL PRIMARY KEY, @@ -473,6 +521,59 @@ END; $func$ LANGUAGE PLPGSQL; +CREATE OR REPLACE FUNCTION metabib.update_combined_index_vectors(bib_id BIGINT) RETURNS VOID AS $func$ +BEGIN + DELETE FROM metabib.combined_keyword_field_entry WHERE record = bib_id; + INSERT INTO metabib.combined_keyword_field_entry(record, metabib_field, index_vector) + SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector) + FROM metabib.keyword_field_entry WHERE source = bib_id GROUP BY field; + INSERT INTO metabib.combined_keyword_field_entry(record, metabib_field, index_vector) + SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector) + FROM metabib.keyword_field_entry WHERE source = bib_id; + + DELETE FROM metabib.combined_title_field_entry WHERE record = bib_id; + INSERT INTO metabib.combined_title_field_entry(record, metabib_field, index_vector) + SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector) + FROM metabib.title_field_entry WHERE source = bib_id GROUP BY field; + INSERT INTO metabib.combined_title_field_entry(record, metabib_field, index_vector) + SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector) + FROM metabib.title_field_entry WHERE source = bib_id; + + DELETE FROM metabib.combined_author_field_entry WHERE record = bib_id; + INSERT INTO metabib.combined_author_field_entry(record, metabib_field, index_vector) + SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector) + FROM metabib.author_field_entry WHERE source = bib_id GROUP BY field; + INSERT INTO metabib.combined_author_field_entry(record, metabib_field, index_vector) + SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector) + FROM metabib.author_field_entry WHERE source = bib_id; + + DELETE FROM metabib.combined_subject_field_entry WHERE record = bib_id; + INSERT INTO metabib.combined_subject_field_entry(record, metabib_field, index_vector) + SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector) + FROM metabib.subject_field_entry WHERE source = bib_id GROUP BY field; + INSERT INTO metabib.combined_subject_field_entry(record, metabib_field, index_vector) + SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector) + FROM metabib.subject_field_entry WHERE source = bib_id; + + DELETE FROM metabib.combined_series_field_entry WHERE record = bib_id; + INSERT INTO metabib.combined_series_field_entry(record, metabib_field, index_vector) + SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector) + FROM metabib.series_field_entry WHERE source = bib_id GROUP BY field; + INSERT INTO metabib.combined_series_field_entry(record, metabib_field, index_vector) + SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector) + FROM metabib.series_field_entry WHERE source = bib_id; + + DELETE FROM metabib.combined_identifier_field_entry WHERE record = bib_id; + INSERT INTO metabib.combined_identifier_field_entry(record, metabib_field, index_vector) + SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector) + FROM metabib.identifier_field_entry WHERE source = bib_id GROUP BY field; + INSERT INTO metabib.combined_identifier_field_entry(record, metabib_field, index_vector) + SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector) + FROM metabib.identifier_field_entry WHERE source = bib_id; + +END; +$func$ LANGUAGE PLPGSQL; + CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries( bib_id BIGINT, skip_facet BOOL DEFAULT FALSE, skip_browse BOOL DEFAULT FALSE, skip_search BOOL DEFAULT FALSE ) RETURNS VOID AS $func$ DECLARE fclass RECORD; @@ -538,6 +639,10 @@ BEGIN END LOOP; + IF NOT skip_search THEN + PERFORM metabib.update_combined_index_vectors(bib_id); + END IF; + RETURN; END; $func$ LANGUAGE PLPGSQL; @@ -1491,4 +1596,74 @@ SELECT DISTINCT END; $func$ LANGUAGE PLPGSQL; +CREATE OR REPLACE FUNCTION public.oils_tsearch2 () RETURNS TRIGGER AS $$ +DECLARE + normalizer RECORD; + value TEXT := ''; + temp_vector TEXT := ''; + ts_rec RECORD; + cur_weight "char"; +BEGIN + + value := NEW.value; + NEW.index_vector = ''::tsvector; + + IF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN + FOR normalizer IN + SELECT n.func AS func, + n.param_count AS param_count, + m.params AS params + FROM config.index_normalizer n + JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id) + WHERE field = NEW.field + ORDER BY m.pos LOOP + EXECUTE 'SELECT ' || normalizer.func || '(' || + quote_literal( value ) || + CASE + WHEN normalizer.param_count > 0 + THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'') + ELSE '' + END || + ')' INTO value; + + END LOOP; + NEW.value = value; + END IF; + + IF TG_TABLE_NAME::TEXT ~ 'browse_entry$' THEN + value := ARRAY_TO_STRING( + evergreen.regexp_split_to_array(value, E'\\W+'), ' ' + ); + value := public.search_normalize(value); + NEW.index_vector = to_tsvector(TG_ARGV[0]::regconfig, value); + ELSIF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN + FOR ts_rec IN + SELECT ts_config, index_weight + FROM config.metabib_class_ts_map + WHERE field_class = TG_ARGV[0] + AND index_lang IS NULL OR EXISTS (SELECT 1 FROM metabib.record_attr WHERE id = NEW.source AND index_lang IN(attrs->'item_lang',attrs->'language')) + AND always OR NOT EXISTS (SELECT 1 FROM config.metabib_field_ts_map WHERE metabib_field = NEW.field) + UNION + SELECT ts_config, index_weight + FROM config.metabib_field_ts_map + WHERE metabib_field = NEW.field + AND index_lang IS NULL OR EXISTS (SELECT 1 FROM metabib.record_attr WHERE id = NEW.source AND index_lang IN(attrs->'item_lang',attrs->'language')) + ORDER BY index_weight ASC + LOOP + IF cur_weight IS NOT NULL AND cur_weight != ts_rec.index_weight THEN + NEW.index_vector = NEW.index_vector || setweight(temp_vector::tsvector,cur_weight); + temp_vector = ''; + END IF; + cur_weight = ts_rec.index_weight; + SELECT INTO temp_vector temp_vector || ' ' || to_tsvector(ts_rec.ts_config::regconfig, value)::TEXT; + END LOOP; + NEW.index_vector = NEW.index_vector || setweight(temp_vector::tsvector,cur_weight); + ELSE + NEW.index_vector = to_tsvector(TG_ARGV[0]::regconfig, value); + END IF; + + RETURN NEW; +END; +$$ LANGUAGE PLPGSQL; + COMMIT; diff --git a/Open-ILS/src/sql/Pg/950.data.seed-values.sql b/Open-ILS/src/sql/Pg/950.data.seed-values.sql index 8ee95dfefd..9beb73e864 100644 --- a/Open-ILS/src/sql/Pg/950.data.seed-values.sql +++ b/Open-ILS/src/sql/Pg/950.data.seed-values.sql @@ -12362,3 +12362,35 @@ VALUES ( 'bool', NULL ); + + +INSERT INTO config.ts_config_list(id, name) VALUES + ('simple','Non-Stemmed Simple'), + ('danish_nostop','Danish Stemmed'), + ('dutch_nostop','Dutch Stemmed'), + ('english_nostop','English Stemmed'), + ('finnish_nostop','Finnish Stemmed'), + ('french_nostop','French Stemmed'), + ('german_nostop','German Stemmed'), + ('hungarian_nostop','Hungarian Stemmed'), + ('italian_nostop','Italian Stemmed'), + ('norwegian_nostop','Norwegian Stemmed'), + ('portuguese_nostop','Portuguese Stemmed'), + ('romanian_nostop','Romanian Stemmed'), + ('russian_nostop','Russian Stemmed'), + ('spanish_nostop','Spanish Stemmed'), + ('swedish_nostop','Swedish Stemmed'), + ('turkish_nostop','Turkish Stemmed'); + +INSERT INTO config.metabib_class_ts_map(field_class, ts_config, index_weight, always) VALUES + ('keyword','simple','A',true), + ('keyword','english_nostop','C',true), + ('title','simple','A',true), + ('title','english_nostop','C',true), + ('author','simple','A',true), + ('author','english_nostop','C',true), + ('series','simple','A',true), + ('series','english_nostop','C',true), + ('subject','simple','A',true), + ('subject','english_nostop','C',true), + ('identifier','simple','A',true); diff --git a/Open-ILS/src/sql/Pg/999.functions.global.sql b/Open-ILS/src/sql/Pg/999.functions.global.sql index 02f2861482..d6ed10b628 100644 --- a/Open-ILS/src/sql/Pg/999.functions.global.sql +++ b/Open-ILS/src/sql/Pg/999.functions.global.sql @@ -2165,4 +2165,25 @@ BEGIN END; $$ LANGUAGE plpgsql; +CREATE OR REPLACE FUNCTION evergreen.rel_bump(terms TEXT[], value TEXT, bumps TEXT[], mults NUMERIC[]) RETURNS NUMERIC AS +$BODY$ +use strict; +my ($terms,$value,$bumps,$mults) = @_; + +my $retval = 1; + +for (my $id = 0; $id < @$bumps; $id++) { + if ($bumps->[$id] eq 'first_word') { + $retval *= $mults->[$id] if ($value =~ /^$terms->[0]/); + } elsif ($bumps->[$id] eq 'full_match') { + my $fullmatch = join(' ', @$terms); + $retval *= $mults->[$id] if ($value =~ /^$fullmatch$/); + } elsif ($bumps->[$id] eq 'word_order') { + my $wordorder = join('.*', @$terms); + $retval *= $mults->[$id] if ($value =~ /$wordorder/); + } +} +return $retval; +$BODY$ LANGUAGE plperlu IMMUTABLE STRICT COST 100; + -- user activity functions --