switching to some UTF8 code from perl
authormiker <miker@9efc2488-bf62-4759-914b-345cdb29e865>
Wed, 22 Nov 2006 20:08:29 +0000 (20:08 +0000)
committermiker <miker@9efc2488-bf62-4759-914b-345cdb29e865>
Wed, 22 Nov 2006 20:08:29 +0000 (20:08 +0000)
git-svn-id: svn://svn.open-ils.org/OpenSRF/trunk@797 9efc2488-bf62-4759-914b-345cdb29e865

src/utils/utils.c
src/utils/utils.h

index 4a7e1b3..e4963ba 100644 (file)
@@ -17,7 +17,6 @@ GNU General Public License for more details.
 #include "utils.h"
 #include <errno.h>
 
-
 inline void* safe_malloc( int size ) {
        void* ptr = (void*) malloc( size );
        if( ptr == NULL ) {
@@ -259,53 +258,16 @@ char* uescape( const char* string, int size, int full_escape ) {
        long unsigned int c = 0;
 
        while (string[idx]) {
-       
+
                c ^= c;
-               
-               if ((string[idx] & 0xF0) == 0xF0) {
-                       c = string[idx]<<18;
-
-                       if( size - idx < 4 ) return NULL;
-                       
-                       idx++;
-                       c |= (string[idx] & 0x3F)<<12;
-                       
-                       idx++;
-                       c |= (string[idx] & 0x3F)<<6;
-                       
-                       idx++;
-                       c |= (string[idx] & 0x3F);
-                       
-                       c ^= 0xFF000000;
-                       
-                       buffer_fadd(buf, "\\u%0.4x", c);
-
-               } else if ((string[idx] & 0xE0) == 0xE0) {
-                       c = string[idx]<<12;
-                       if( size - idx < 3 ) return NULL;
-                       
-                       idx++;
-                       c |= (string[idx] & 0x3F)<<6;
-                       
-                       idx++;
-                       c |= (string[idx] & 0x3F);
-                       
-                       c ^= 0xFFF80000;
-                       
-                       buffer_fadd(buf, "\\u%0.4x", c);
-
-               } else if ((string[idx] & 0xC0) == 0xC0) {
-                       // Two byte char
-                       c = string[idx]<<6;
-                       if( size - idx < 2 ) return NULL;
-                       
-                       idx++;
-                       c |= (string[idx] & 0x3F);
-                       
-                       c ^= 0xFFFFF000;
-                       
-                       buffer_fadd(buf, "\\u%0.4x", c);
 
+               if (!OSRF_UTF8_IS_ASCII(string[idx])) {
+                       if (OSRF_UTF8_IS_START) {
+                               do {
+                                       OSRF_UTF8_ACCUMULATE(c, string[idx]);
+                               } while (OSRF_UTF8_IS_CONTINUATION(string[idx++]));
+                               buffer_fadd(buf, "\\u%0.4x", c);
+                       } else return NULL;
                } else {
                        c = string[idx];
 
index 41aa488..1e00168 100644 (file)
@@ -31,6 +31,15 @@ GNU General Public License for more details.
 
 #include "md5.h"
 
+#define OSRF_UTF8_IS_ASCII(c)          ((c) <  0x80)
+#define OSRF_UTF8_IS_START(c)          ((c) >= 0xc0 && ((c) <= 0xfd))
+#define OSRF_UTF8_IS_CONTINUATION(c)           ((c) >= 0x80 && ((c) <= 0xbf))
+#define OSRF_UTF8_IS_CONTINUED(c)              ((c) &  0x80)
+
+#define OSRF_UTF8_CONTINUATION_MASK            (0x3f)
+#define OSRF_UTF8_ACCUMULATION_SHIFT           6
+#define OSRF_UTF8_ACCUMULATE(_o, _n)   (((_o) << UTF8_ACCUMULATION_SHIFT) | ((_n) & UTF8_CONTINUATION_MASK))
+
 #define OSRF_MALLOC(ptr, size) \
        ptr = (void*) malloc( size ); \
        if( ptr == NULL ) { \