From: dbs Date: Thu, 14 Jan 2010 04:54:48 +0000 (+0000) Subject: Move the decode_utf8() call for various feeds to entityize() X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=893626b092ef6a8cafc8b7064fe9bbad99fda9ad;p=evergreen%2Ftadl.git Move the decode_utf8() call for various feeds to entityize() decode_utf8() is special in that it won't transform any string once its 'utf8' flag has been set, so it is a safe subroutine to call multiple times for a given string. As it turns out, SRU (and in turn Z39.50) is suffering from a double-encoding of the search terms, so we have to forcefully double-decode the terms with the decode('utf8') variation that does not respect the 'utf8' string. This will enable Z39.50 and SRU queries to actually return results for queries like 'Montréal' and 'Québec'. Eventually we need to figure out where in the SRU/CGI stack the strings are being incorrectly encoded in the first place, but for now a much-improved Z39.50 server seems acceptable. git-svn-id: svn://svn.open-ils.org/ILS/trunk@15320 dcc99617-32d9-48b4-a31d-7c20da2025e4 --- diff --git a/Open-ILS/src/perlmods/OpenILS/Application/AppUtils.pm b/Open-ILS/src/perlmods/OpenILS/Application/AppUtils.pm index 55546dc63e..b96521c96e 100644 --- a/Open-ILS/src/perlmods/OpenILS/Application/AppUtils.pm +++ b/Open-ILS/src/perlmods/OpenILS/Application/AppUtils.pm @@ -14,6 +14,7 @@ use OpenILS::Const qw/:const/; use Unicode::Normalize; use OpenSRF::Utils::SettingsClient; use UUID::Tiny; +use Encode; # --------------------------------------------------------------------------- # Pile of utilty methods used accross applications. @@ -1485,6 +1486,10 @@ sub entityize { my($self, $string, $form) = @_; $form ||= ""; + # If we're going to convert non-ASCII characters to XML entities, + # we had better be dealing with a UTF8 string to begin with + $string = decode_utf8($string); + if ($form eq 'D') { $string = NFD($string); } else { diff --git a/Open-ILS/src/perlmods/OpenILS/WWW/SuperCat.pm b/Open-ILS/src/perlmods/OpenILS/WWW/SuperCat.pm index 4418c72d58..71ced48c9a 100644 --- a/Open-ILS/src/perlmods/OpenILS/WWW/SuperCat.pm +++ b/Open-ILS/src/perlmods/OpenILS/WWW/SuperCat.pm @@ -874,7 +874,7 @@ sub bookbag_feed { print "Content-type: ". $feed->type ."; charset=utf-8\n\n"; - print $U->entityize(decode_utf8($feed->toString)) . "\n"; + print $U->entityize($feed->toString) . "\n"; return Apache2::Const::OK; } @@ -951,7 +951,7 @@ sub changes_feed { print "Content-type: ". $feed->type ."; charset=utf-8\n\n"; - print $U->entityize(decode_utf8($feed->toString)) . "\n"; + print $U->entityize($feed->toString) . "\n"; return Apache2::Const::OK; } @@ -1676,8 +1676,13 @@ sub sru_search { my ($shortname, $holdings) = $url =~ m#/?([^/]*)(/holdings)?#; if ( $resp->type eq 'searchRetrieve' ) { - my $cql_query = decode_utf8($req->query); - my $search_string = decode_utf8($req->cql->toEvergreen); + + # These terms are arriving to us double-encoded, so until we + # figure out where in the CGI/SRU chain that's happening, we + # have to # forcefully double-decode them a second time with + # the outer decode('utf8', $string) call + my $cql_query = decode('utf8', decode_utf8($req->query)); + my $search_string = decode('utf8', decode_utf8($req->cql->toEvergreen)); # Ensure the search string overrides the default site if ($shortname and $search_string !~ m#site:#) {