new utf-8 escaping code, backpatched from head; printf format fix
authormiker <miker@dcc99617-32d9-48b4-a31d-7c20da2025e4>
Tue, 2 Jan 2007 22:01:15 +0000 (22:01 +0000)
committermiker <miker@dcc99617-32d9-48b4-a31d-7c20da2025e4>
Tue, 2 Jan 2007 22:01:15 +0000 (22:01 +0000)
git-svn-id: svn://svn.open-ils.org/ILS/branches/rel_1_0@6719 dcc99617-32d9-48b4-a31d-7c20da2025e4

OpenSRF/src/utils/utils.c

index cdb8cab..a226cab 100644 (file)
@@ -17,7 +17,6 @@ GNU General Public License for more details.
 #include "utils.h"
 #include <errno.h>
 
-
 inline void* safe_malloc( int size ) {
        void* ptr = (void*) malloc( size );
        if( ptr == NULL ) {
@@ -255,56 +254,44 @@ int buffer_add_char(growing_buffer* gb, char c) {
 char* uescape( const char* string, int size, int full_escape ) {
 
        growing_buffer* buf = buffer_init(size + 64);
+       int clen = 0;
        int idx = 0;
-       long unsigned int c = 0;
+       unsigned long int c = 0x0;
 
        while (string[idx]) {
-       
-               c ^= c;
-               
-               if ((string[idx] & 0xF0) == 0xF0) {
-                       c = string[idx]<<18;
-
-                       if( size - idx < 4 ) return NULL;
-                       
-                       idx++;
-                       c |= (string[idx] & 0x3F)<<12;
-                       
-                       idx++;
-                       c |= (string[idx] & 0x3F)<<6;
-                       
-                       idx++;
-                       c |= (string[idx] & 0x3F);
-                       
-                       c ^= 0xFF000000;
-                       
-                       buffer_fadd(buf, "\\u%0.4x", c);
-
-               } else if ((string[idx] & 0xE0) == 0xE0) {
-                       c = string[idx]<<12;
-                       if( size - idx < 3 ) return NULL;
-                       
-                       idx++;
-                       c |= (string[idx] & 0x3F)<<6;
-                       
-                       idx++;
-                       c |= (string[idx] & 0x3F);
-                       
-                       c ^= 0xFFF80000;
-                       
-                       buffer_fadd(buf, "\\u%0.4x", c);
-
-               } else if ((string[idx] & 0xC0) == 0xC0) {
-                       // Two byte char
-                       c = string[idx]<<6;
-                       if( size - idx < 2 ) return NULL;
-                       
-                       idx++;
-                       c |= (string[idx] & 0x3F);
-                       
-                       c ^= 0xFFFFF000;
-                       
-                       buffer_fadd(buf, "\\u%0.4x", c);
+
+               c = 0x0;
+
+               if ((unsigned char)string[idx] >= 0x80) { // not ASCII
+
+                       if ((unsigned char)string[idx] >= 0xC0 && (unsigned char)string[idx] <= 0xF4) { // starts a UTF8 string
+
+                               clen = 1;
+                               if (((unsigned char)string[idx] & 0xF0) == 0xF0) {
+                                       clen = 3;
+                                       c = (unsigned char)string[idx] ^ 0xF0;
+
+                               } else if (((unsigned char)string[idx] & 0xE0) == 0xE0) {
+                                       clen = 2;
+                                       c = (unsigned char)string[idx] ^ 0xE0;
+
+                               } else if (((unsigned char)string[idx] & 0xC0) == 0xC0) {
+                                       clen = 1;
+                                       c = (unsigned char)string[idx] ^ 0xC0;
+                               }
+
+                               for (;clen;clen--) {
+
+                                       idx++; // look at the next byte
+                                       c = (c << 6) | ((unsigned char)string[idx] & 0x3F); // add this byte worth
+
+                               }
+
+                               buffer_fadd(buf, "\\u%04x", c);
+
+                       } else {
+                               return NULL;
+                       }
 
                } else {
                        c = string[idx];
@@ -347,9 +334,9 @@ char* uescape( const char* string, int size, int full_escape ) {
                                                OSRF_BUFFER_ADD_CHAR(buf, '\\');
                                                break;
 
-               default:
-                  if( c < 32 ) buffer_fadd(buf, "\\u%0.4x", c);
-                  else OSRF_BUFFER_ADD_CHAR(buf, c);
+                                       default:
+                                               if( c < 32 ) buffer_fadd(buf, "\\u%04x", c);
+                                               else OSRF_BUFFER_ADD_CHAR(buf, c);
                                }
 
                        } else {