From 893626b092ef6a8cafc8b7064fe9bbad99fda9ad Mon Sep 17 00:00:00 2001 From: dbs Date: Thu, 14 Jan 2010 04:54:48 +0000 Subject: [PATCH] Move the decode_utf8() call for various feeds to entityize() MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit decode_utf8() is special in that it won't transform any string once its 'utf8' flag has been set, so it is a safe subroutine to call multiple times for a given string. As it turns out, SRU (and in turn Z39.50) is suffering from a double-encoding of the search terms, so we have to forcefully double-decode the terms with the decode('utf8') variation that does not respect the 'utf8' string. This will enable Z39.50 and SRU queries to actually return results for queries like 'Montréal' and 'Québec'. Eventually we need to figure out where in the SRU/CGI stack the strings are being incorrectly encoded in the first place, but for now a much-improved Z39.50 server seems acceptable. git-svn-id: svn://svn.open-ils.org/ILS/trunk@15320 dcc99617-32d9-48b4-a31d-7c20da2025e4 --- Open-ILS/src/perlmods/OpenILS/Application/AppUtils.pm | 5 +++++ Open-ILS/src/perlmods/OpenILS/WWW/SuperCat.pm | 13 +++++++++---- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/Open-ILS/src/perlmods/OpenILS/Application/AppUtils.pm b/Open-ILS/src/perlmods/OpenILS/Application/AppUtils.pm index 55546dc63e..b96521c96e 100644 --- a/Open-ILS/src/perlmods/OpenILS/Application/AppUtils.pm +++ b/Open-ILS/src/perlmods/OpenILS/Application/AppUtils.pm @@ -14,6 +14,7 @@ use OpenILS::Const qw/:const/; use Unicode::Normalize; use OpenSRF::Utils::SettingsClient; use UUID::Tiny; +use Encode; # --------------------------------------------------------------------------- # Pile of utilty methods used accross applications. @@ -1485,6 +1486,10 @@ sub entityize { my($self, $string, $form) = @_; $form ||= ""; + # If we're going to convert non-ASCII characters to XML entities, + # we had better be dealing with a UTF8 string to begin with + $string = decode_utf8($string); + if ($form eq 'D') { $string = NFD($string); } else { diff --git a/Open-ILS/src/perlmods/OpenILS/WWW/SuperCat.pm b/Open-ILS/src/perlmods/OpenILS/WWW/SuperCat.pm index 4418c72d58..71ced48c9a 100644 --- a/Open-ILS/src/perlmods/OpenILS/WWW/SuperCat.pm +++ b/Open-ILS/src/perlmods/OpenILS/WWW/SuperCat.pm @@ -874,7 +874,7 @@ sub bookbag_feed { print "Content-type: ". $feed->type ."; charset=utf-8\n\n"; - print $U->entityize(decode_utf8($feed->toString)) . "\n"; + print $U->entityize($feed->toString) . "\n"; return Apache2::Const::OK; } @@ -951,7 +951,7 @@ sub changes_feed { print "Content-type: ". $feed->type ."; charset=utf-8\n\n"; - print $U->entityize(decode_utf8($feed->toString)) . "\n"; + print $U->entityize($feed->toString) . "\n"; return Apache2::Const::OK; } @@ -1676,8 +1676,13 @@ sub sru_search { my ($shortname, $holdings) = $url =~ m#/?([^/]*)(/holdings)?#; if ( $resp->type eq 'searchRetrieve' ) { - my $cql_query = decode_utf8($req->query); - my $search_string = decode_utf8($req->cql->toEvergreen); + + # These terms are arriving to us double-encoded, so until we + # figure out where in the CGI/SRU chain that's happening, we + # have to # forcefully double-decode them a second time with + # the outer decode('utf8', $string) call + my $cql_query = decode('utf8', decode_utf8($req->query)); + my $search_string = decode('utf8', decode_utf8($req->cql->toEvergreen)); # Ensure the search string overrides the default site if ($shortname and $search_string !~ m#site:#) { -- 2.11.0