From: miker Date: Wed, 22 Nov 2006 20:08:29 +0000 (+0000) Subject: switching to some UTF8 code from perl X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=ba1fbb875fc0de41735ce54174a7d8b351d53a2a;p=opensrf%2Fbjwebb.git switching to some UTF8 code from perl git-svn-id: svn://svn.open-ils.org/OpenSRF/trunk@797 9efc2488-bf62-4759-914b-345cdb29e865 --- diff --git a/src/utils/utils.c b/src/utils/utils.c index 4a7e1b3..e4963ba 100644 --- a/src/utils/utils.c +++ b/src/utils/utils.c @@ -17,7 +17,6 @@ GNU General Public License for more details. #include "utils.h" #include - inline void* safe_malloc( int size ) { void* ptr = (void*) malloc( size ); if( ptr == NULL ) { @@ -259,53 +258,16 @@ char* uescape( const char* string, int size, int full_escape ) { long unsigned int c = 0; while (string[idx]) { - + c ^= c; - - if ((string[idx] & 0xF0) == 0xF0) { - c = string[idx]<<18; - - if( size - idx < 4 ) return NULL; - - idx++; - c |= (string[idx] & 0x3F)<<12; - - idx++; - c |= (string[idx] & 0x3F)<<6; - - idx++; - c |= (string[idx] & 0x3F); - - c ^= 0xFF000000; - - buffer_fadd(buf, "\\u%0.4x", c); - - } else if ((string[idx] & 0xE0) == 0xE0) { - c = string[idx]<<12; - if( size - idx < 3 ) return NULL; - - idx++; - c |= (string[idx] & 0x3F)<<6; - - idx++; - c |= (string[idx] & 0x3F); - - c ^= 0xFFF80000; - - buffer_fadd(buf, "\\u%0.4x", c); - - } else if ((string[idx] & 0xC0) == 0xC0) { - // Two byte char - c = string[idx]<<6; - if( size - idx < 2 ) return NULL; - - idx++; - c |= (string[idx] & 0x3F); - - c ^= 0xFFFFF000; - - buffer_fadd(buf, "\\u%0.4x", c); + if (!OSRF_UTF8_IS_ASCII(string[idx])) { + if (OSRF_UTF8_IS_START) { + do { + OSRF_UTF8_ACCUMULATE(c, string[idx]); + } while (OSRF_UTF8_IS_CONTINUATION(string[idx++])); + buffer_fadd(buf, "\\u%0.4x", c); + } else return NULL; } else { c = string[idx]; diff --git a/src/utils/utils.h b/src/utils/utils.h index 41aa488..1e00168 100644 --- a/src/utils/utils.h +++ b/src/utils/utils.h @@ -31,6 +31,15 @@ GNU General Public License for more details. #include "md5.h" +#define OSRF_UTF8_IS_ASCII(c) ((c) < 0x80) +#define OSRF_UTF8_IS_START(c) ((c) >= 0xc0 && ((c) <= 0xfd)) +#define OSRF_UTF8_IS_CONTINUATION(c) ((c) >= 0x80 && ((c) <= 0xbf)) +#define OSRF_UTF8_IS_CONTINUED(c) ((c) & 0x80) + +#define OSRF_UTF8_CONTINUATION_MASK (0x3f) +#define OSRF_UTF8_ACCUMULATION_SHIFT 6 +#define OSRF_UTF8_ACCUMULATE(_o, _n) (((_o) << UTF8_ACCUMULATION_SHIFT) | ((_n) & UTF8_CONTINUATION_MASK)) + #define OSRF_MALLOC(ptr, size) \ ptr = (void*) malloc( size ); \ if( ptr == NULL ) { \