From: miker Date: Tue, 2 Jan 2007 22:01:15 +0000 (+0000) Subject: new utf-8 escaping code, backpatched from head; printf format fix X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=cd53e9a5df21db2f6c5380c7d5585e331867b973;p=Evergreen.git new utf-8 escaping code, backpatched from head; printf format fix git-svn-id: svn://svn.open-ils.org/ILS/branches/rel_1_0@6719 dcc99617-32d9-48b4-a31d-7c20da2025e4 --- diff --git a/OpenSRF/src/utils/utils.c b/OpenSRF/src/utils/utils.c index cdb8cab006..a226cabedc 100644 --- a/OpenSRF/src/utils/utils.c +++ b/OpenSRF/src/utils/utils.c @@ -17,7 +17,6 @@ GNU General Public License for more details. #include "utils.h" #include - inline void* safe_malloc( int size ) { void* ptr = (void*) malloc( size ); if( ptr == NULL ) { @@ -255,56 +254,44 @@ int buffer_add_char(growing_buffer* gb, char c) { char* uescape( const char* string, int size, int full_escape ) { growing_buffer* buf = buffer_init(size + 64); + int clen = 0; int idx = 0; - long unsigned int c = 0; + unsigned long int c = 0x0; while (string[idx]) { - - c ^= c; - - if ((string[idx] & 0xF0) == 0xF0) { - c = string[idx]<<18; - - if( size - idx < 4 ) return NULL; - - idx++; - c |= (string[idx] & 0x3F)<<12; - - idx++; - c |= (string[idx] & 0x3F)<<6; - - idx++; - c |= (string[idx] & 0x3F); - - c ^= 0xFF000000; - - buffer_fadd(buf, "\\u%0.4x", c); - - } else if ((string[idx] & 0xE0) == 0xE0) { - c = string[idx]<<12; - if( size - idx < 3 ) return NULL; - - idx++; - c |= (string[idx] & 0x3F)<<6; - - idx++; - c |= (string[idx] & 0x3F); - - c ^= 0xFFF80000; - - buffer_fadd(buf, "\\u%0.4x", c); - - } else if ((string[idx] & 0xC0) == 0xC0) { - // Two byte char - c = string[idx]<<6; - if( size - idx < 2 ) return NULL; - - idx++; - c |= (string[idx] & 0x3F); - - c ^= 0xFFFFF000; - - buffer_fadd(buf, "\\u%0.4x", c); + + c = 0x0; + + if ((unsigned char)string[idx] >= 0x80) { // not ASCII + + if ((unsigned char)string[idx] >= 0xC0 && (unsigned char)string[idx] <= 0xF4) { // starts a UTF8 string + + clen = 1; + if (((unsigned char)string[idx] & 0xF0) == 0xF0) { + clen = 3; + c = (unsigned char)string[idx] ^ 0xF0; + + } else if (((unsigned char)string[idx] & 0xE0) == 0xE0) { + clen = 2; + c = (unsigned char)string[idx] ^ 0xE0; + + } else if (((unsigned char)string[idx] & 0xC0) == 0xC0) { + clen = 1; + c = (unsigned char)string[idx] ^ 0xC0; + } + + for (;clen;clen--) { + + idx++; // look at the next byte + c = (c << 6) | ((unsigned char)string[idx] & 0x3F); // add this byte worth + + } + + buffer_fadd(buf, "\\u%04x", c); + + } else { + return NULL; + } } else { c = string[idx]; @@ -347,9 +334,9 @@ char* uescape( const char* string, int size, int full_escape ) { OSRF_BUFFER_ADD_CHAR(buf, '\\'); break; - default: - if( c < 32 ) buffer_fadd(buf, "\\u%0.4x", c); - else OSRF_BUFFER_ADD_CHAR(buf, c); + default: + if( c < 32 ) buffer_fadd(buf, "\\u%04x", c); + else OSRF_BUFFER_ADD_CHAR(buf, c); } } else {