From 1fd8103bf33430fe0b4b5ae8d2b2b051802ac473 Mon Sep 17 00:00:00 2001 From: Galen Charlton Date: Fri, 11 Mar 2016 13:27:03 -0500 Subject: [PATCH] LP#1549505: add flag to tweak popularity-adjusted relevance This adds a new global_flag, search.max_popularity_importance_multiplier, to control the factor by which popularity affects Popularity Adjusted Relevance ranking. The value should be a decimal number, typically between 1.0 and 2.0: * 1.0 be would be equivalent to not adjusting relevance for popularity at all. * 1.1 would mean that the multiplier would range from 1 (for zero popularity) to 1.1 (for maximum popularity), for a maximum boost of 10% of the base relevance value of the record. What's happening in the code: Scale the 0-5 effect of popularity badges by providing a multiplier for the badge average based on the overall maximum multiplier. Two examples, comparing the effect to the default $max_mult value of 2.0, which causes a $adjusted_scale value of 0.2: * Given the default $max_mult of 2.0, the value of $adjusted_scale will be 0.2 [($max_mult - 1.0) / 5.0]. For a record whose average badge score is the maximum of 5.0, that would make the relevance multiplier be 2.0: 1.0 + (5.0 [average score] * 0.2 [ $adjusted_scale ], This would have the effect of doubling the effective relevance of highly popular items. * Given a $max_mult of 1.1, the value of $adjusted_scale will be 0.02, meaning that the average badge value will be multiplied by 0.02 rather than 0.2, then added to 1.0 and used as a multiplier against the base relevance. Thus a change of at most 10% to the base relevance for a record with a 5.0 average badge score. This will allow records that are naturally very relevant to avoid being pushed below badge-heavy records. * Given a $max_mult of 3.0, the value of $adjusted_scale will be 0.4, meaning that the average badge value will be multiplied by 0.4 rather than 0.2, then added to 1.0 and used as a multiplier against the base relevance. Thus a change of as much as 200% to (or three times the size of) the base relevance for a record with a 5.0 average badge score. This in turn will cause badges to outweigh relevance to a very large degree. The maximum badge multiplier can be set to a value less than 1.0; this would have the effect of making less popular items show up higher in the results. While this is not a likely option for production use, it could be useful for identifying interesting long-tail hits, particularly in a database where enough badges are configured so that very few records have an average badge score of zero. Signed-off-by: Galen Charlton Signed-off-by: Mike Rylander Signed-off-by: Kathy Lussier --- .../Application/Storage/Driver/Pg/QueryParser.pm | 63 +++++++++++++++++++++- .../Application/Storage/Publisher/metabib.pm | 13 +++++ Open-ILS/src/sql/Pg/950.data.seed-values.sql | 12 +++++ .../Pg/upgrade/XXXX.schema.statisitcal-ratings.sql | 12 +++++ 4 files changed, 99 insertions(+), 1 deletion(-) diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/Driver/Pg/QueryParser.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/Driver/Pg/QueryParser.pm index f06d097f62..9265a8891e 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/Driver/Pg/QueryParser.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/Driver/Pg/QueryParser.pm @@ -126,6 +126,14 @@ sub default_preferred_language_multiplier { return $self->custom_data->{default_preferred_language_multiplier}; } +sub max_popularity_importance_multiplier { + my $self = shift; + my $max = shift; + + $self->custom_data->{max_popularity_importance_multiplier} = $max if defined($max); + return $self->custom_data->{max_popularity_importance_multiplier}; +} + sub simple_plan { my $self = shift; @@ -884,7 +892,60 @@ sub toSQL { } elsif ($sort_filter eq 'edit_date') { $rank = "FIRST((SELECT edit_date FROM biblio.record_entry rbr WHERE rbr.id = m.source))"; } elsif ($sort_filter eq 'poprel') { - $rank = '1.0/((' . $rel . ') * (1.0 + AVG(COALESCE(pop_with.total_score::NUMERIC,0.0)) / 5.0))::NUMERIC'; + my $max_mult = $self->QueryParser->max_popularity_importance_multiplier() // 2.0; + $max_mult = 0.1 if $max_mult < 0.1; # keep it within reasonable bounds, + # and avoid the division-by-zero error + # you'd get if you allowed it to be + # zero + + if ( $max_mult == 1.0 ) { # no adjustment requested by the configuration + $rank = "1.0/($rel)::NUMERIC"; + } else { # calculate adjustment + + # Scale the 0-5 effect of popularity badges by providing a multiplier + # for the badge average based on the overall maximum + # multiplier. Two examples, comparing the effect to the default + # $max_mult value of 2.0, which causes a $adjusted_scale value + # of 0.2: + # + # * Given the default $max_mult of 2.0, the value of + # $adjusted_scale will be 0.2 [($max_mult - 1.0) / 5.0]. + # For a record whose average badge score is the maximum + # of 5.0, that would make the relevance multiplier be + # 2.0: + # 1.0 + (5.0 [average score] * 0.2 [ $adjusted_scale ], + # This would have the effect of doubling the effective + # relevance of highly popular items. + # + # * Given a $max_mult of 1.1, the value of $adjusted_scale + # will be 0.02, meaning that the average badge value will be + # multiplied by 0.02 rather than 0.2, then added to 1.0 and + # used as a multiplier against the base relevance. Thus a + # change of at most 10% to the base relevance for a record + # with a 5.0 average badge score. This will allow records + # that are naturally very relevant to avoid being pushed + # below badge-heavy records. + # + # * Given a $max_mult of 3.0, the value of $adjusted_scale + # will be 0.4, meaning that the average badge value will be + # multiplied by 0.4 rather than 0.2, then added to 1.0 and + # used as a multiplier against the base relevance. Thus a + # change of as much as 200% to (or three times the size of) + # the base relevance for a record with a 5.0 average badge + # score. This in turn will cause badges to outweigh + # relevance to a very large degree. + # + # The maximum badge multiplier can be set to a value less than + # 1.0; this would have the effect of making less popular items + # show up higher in the results. While this is not a likely + # option for production use, it could be useful for identifying + # interesting long-tail hits, particularly in a database + # where enough badges are configured so that very few records + # have an overage badge score of zero. + + my $adjusted_scale = ( $max_mult - 1.0 ) / 5.0; + $rank = "1.0/(( $rel ) * (1.0 + (AVG(COALESCE(pop_with.total_score::NUMERIC,0.0)) * $adjusted_scale)))::NUMERIC"; + } } elsif ($sort_filter =~ /^pop/) { $rank = '1.0/(AVG(COALESCE(pop_with.total_score::NUMERIC,0.0)) + 5.0)::NUMERIC'; my $pop_desc = $desc eq 'ASC' ? 'DESC' : 'ASC'; diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/Publisher/metabib.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/Publisher/metabib.pm index aebd348284..debc865b04 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/Publisher/metabib.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/Publisher/metabib.pm @@ -5,6 +5,7 @@ use OpenSRF::EX qw/:try/; use OpenILS::Application::Storage::FTS; use OpenILS::Utils::Fieldmapper; use OpenSRF::Utils::Logger qw/:level/; +use OpenILS::Application::AppUtils; use OpenSRF::Utils::Cache; use OpenSRF::Utils::JSON; use Data::Dumper; @@ -12,6 +13,8 @@ use Digest::MD5 qw/md5_hex/; use OpenILS::Application::Storage::QueryParser; +my $U = 'OpenILS::Application::AppUtils'; + my $log = 'OpenSRF::Utils::Logger'; $VERSION = 1; @@ -70,6 +73,16 @@ sub _initialize_parser { )->gather(1), ); + my $max_mult; + my $cgf = $cstore->request( + 'open-ils.cstore.direct.config.global_flag.retrieve', + 'search.max_popularity_importance_multiplier' + )->gather(1); + $max_mult = $cgf->value if $cgf && $U->is_true($cgf->enabled); + $max_mult //= 2.0; + $max_mult = 2.0 unless $max_mult =~ /^-?(?:\d+\.?|\.\d)\d*\z/; # just in case + $parser->max_popularity_importance_multiplier($max_mult); + $cstore->disconnect; die("Cannot initialize $parser!") unless ($parser->initialization_complete); } diff --git a/Open-ILS/src/sql/Pg/950.data.seed-values.sql b/Open-ILS/src/sql/Pg/950.data.seed-values.sql index 31974ca386..feb6cb433a 100644 --- a/Open-ILS/src/sql/Pg/950.data.seed-values.sql +++ b/Open-ILS/src/sql/Pg/950.data.seed-values.sql @@ -11724,6 +11724,18 @@ INSERT INTO config.global_flag (name, label, value, enabled) VALUES ( '', TRUE ); + +INSERT INTO config.global_flag (name, label, value, enabled) VALUES ( + 'search.max_popularity_importance_multiplier', + oils_i18n_gettext( + 'search.max_popularity_importance_multiplier', + 'Maximum popularity importance multiplier for popularity-adjusted relevance searches (decimal value between 1.0 and 2.0)', + 'cgf', + 'label' + ), + '2.0', + TRUE +); */ INSERT INTO config.usr_setting_type (name,opac_visible,label,description,datatype) diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.statisitcal-ratings.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.statisitcal-ratings.sql index 1bcbe63404..d6e5a22a09 100644 --- a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.statisitcal-ratings.sql +++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.statisitcal-ratings.sql @@ -135,6 +135,18 @@ INSERT INTO config.global_flag (name, label, value, enabled) VALUES ( TRUE ); +INSERT INTO config.global_flag (name, label, value, enabled) VALUES ( + 'search.max_popularity_importance_multiplier', + oils_i18n_gettext( + 'search.max_popularity_importance_multiplier', + 'Maximum popularity importance multiplier for popularity-adjusted relevance searches (decimal value between 1.0 and 2.0)', + 'cgf', + 'label' + ), + '2.0', + TRUE +); + CREATE TABLE rating.popularity_parameter ( id INT PRIMARY KEY, name TEXT NOT NULL UNIQUE, -- i18n -- 2.11.0