From 4a02ad9f39356a92105fdd1491a95e98634e5ea3 Mon Sep 17 00:00:00 2001 From: erickson Date: Tue, 18 Nov 2008 22:23:29 +0000 Subject: [PATCH] 2 Patches from Scott McKellar, with slight modification: This patch adds a new function buffer_append_uescape(), a streamlined replacement for uescape(). The optimizations are as follows: 1. As described in an earlier post, the new function appends the uescaped text to an existing growing_buffer, rather than into an internal one that it must create and destroy. It thereby saves two mallocs and two frees. In addition, the calling code doesn't have to do a buffer_add(). 2. Because it doesn't create an internal growing_buffer, it doesn't need to do a strlen() to determine how big a buffer to allocate. 3. Since the new version doesn't have a boolean parameter, it doesn't have to test the boolean. 4. For characters < 128 (i.e. ASCII characters), I rearranged the order of the tests so that non-control characters are recognized immediately. In uescape() we first go through a switch/case looking for several specific control characters like '\b' and '\n'. In practice most characters are not control characters, so this rearrangement saves a few CPU cycles. 5. For control characters, uescape() uses buffer_fadd() to format the character into hex. Now, buffer_fadd is slow because it uses vsnprintf() twice, once to determine a buffer size and once to do the formatting. In turn vsnprintf() is slow because it has to parse the format string. In this case we don't need vsnprintf() because we already know exactly how big the buffer needs to be, and the formatting is simple. I eliminated buffer_fadd() and formatted the hex manually with a little bit-twiddling. Some of these optimizations could be applied to uescape(), but I haven't bothered, partly because I wanted a clean comparison for benchmarking purposes and partly because I expect uescape() to disappear from use (though I am leaving it available). ===== This patch is a rewrite of the jsonObjectToJSON and jsonObjectToJSONRaw functions. It is dependent on my previous patch to utils.c and utils.h, adding the new buffer_append_uescape function. One purpose is to replace a call to the uescape function with a call to the faster buffer_append_uescape function. The other purpose is to introduce a faster way to translate a jsonObject into a string. (Also in one spot I broke up a very long string literal into several smaller pieces so that it wouldn't wrap around in the editor.) In the existing jsonObjectToJSON function, we receive a pointer to a jsonObject and return a string of JSON. However we don't translate the original jsonObject directly. Instead, we create a modified clone of the original, inserting an additional JSON_HASH node wherever we find a classname. Then we translate the clone, and finally destroy it. It always struck me as an egregious waste to create and destroy a whole parallel object just so that we could turn it into a string. So I looked for a way to eliminate the cloning. The result is a modification of add_json_to_buffer(), a local function that recursively traverses and translates the jsonObject. When it sees a classname (and it has been asked to expand classnames), the new version inserts additional gibberish into the output text and then continues the traversal, without modifying or copying the original jsonObject at all. In my benchmark, this new approach was faster than the original by a factor of about 5. When I combined this change with the use of the new buffer_append_uencode function, it was faster by a factor of about 7.2. The benchmark used a moderately complex jsonObject about 5 or 6 levels deep, with both hashes and arrays, with classnames at several levels. The performance gain will no doubt depend on the contents of the jsonObject,but I haven't tried to isolate the variables. The new version is a bit trickier and harder to read than the old. In my opinion the speedup is worth the obscurity, because a lot of places in Evergreen will benefit. git-svn-id: svn://svn.open-ils.org/OpenSRF/trunk@1498 9efc2488-bf62-4759-914b-345cdb29e865 --- src/libopensrf/osrf_json_object.c | 105 +++++++++++++++++++++++++----------- src/libopensrf/utils.c | 109 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 183 insertions(+), 31 deletions(-) diff --git a/src/libopensrf/osrf_json_object.c b/src/libopensrf/osrf_json_object.c index 335d70e..26c3250 100644 --- a/src/libopensrf/osrf_json_object.c +++ b/src/libopensrf/osrf_json_object.c @@ -61,7 +61,8 @@ typedef union unusedObjUnion unusedObj; static unusedObj* freeObjList = NULL; -static void add_json_to_buffer( const jsonObject* obj, growing_buffer * buf ); +static void add_json_to_buffer( const jsonObject* obj, + growing_buffer * buf, int do_classname, int second_pass ); /** * Return all unused jsonObjects to the heap @@ -194,7 +195,10 @@ void jsonObjectFree( jsonObject* o ) { unusedObjCapture++; currentListLen++; if (unusedObjCapture > 1 && !(unusedObjCapture % 1000)) - osrfLogDebug( OSRF_LOG_MARK, "Objects malloc()'d: %d, Reusable objects captured: %d, Objects reused: %d, Current List Length: %d", mallocObjCreate, unusedObjCapture, unusedObjRelease, currentListLen ); + osrfLogDebug( OSRF_LOG_MARK, "Objects malloc()'d: %d, " + "Reusable objects captured: %d, Objects reused: %d, " + "Current List Length: %d", + mallocObjCreate, unusedObjCapture, unusedObjRelease, currentListLen ); } static void _jsonFreeHashItem(char* key, void* item){ @@ -256,21 +260,49 @@ const jsonObject* jsonObjectGetKeyConst( const jsonObject* obj, const char* key return osrfHashGet( obj->value.h, key); } -char* jsonObjectToJSON( const jsonObject* obj ) { - jsonObject* obj2 = jsonObjectEncodeClass( obj ); - char* json = jsonObjectToJSONRaw(obj2); - jsonObjectFree(obj2); - return json; -} +/** + * Recursively traverse a jsonObject, formatting it into a JSON string. + * + * The last two parameters are booleans. + * + * If do_classname is true, examine each node for a classname, and if you + * find one, pretend that the node is under an extra layer of JSON_HASH, with + * JSON_CLASS_KEY and JSON_DATA_KEY as keys. + * + * second_pass should always be false except for some recursive calls. It + * is used when expanding classnames, to distinguish between the first and + * second passes through a given node. + * + * @return Nothing + */ +static void add_json_to_buffer( const jsonObject* obj, + growing_buffer * buf, int do_classname, int second_pass ) { -char* jsonObjectToJSONRaw( const jsonObject* obj ) { - if(!obj) return NULL; - growing_buffer* buf = buffer_init(32); - add_json_to_buffer( obj, buf ); - return buffer_release( buf ); -} + if(NULL == obj) { + OSRF_BUFFER_ADD(buf, "null"); + return; + } -static void add_json_to_buffer( const jsonObject* obj, growing_buffer * buf ) { + if( obj->classname && do_classname ) + { + if( second_pass ) + second_pass = 0; + else + { + // Pretend we see an extra layer of JSON_HASH + + OSRF_BUFFER_ADD( buf, "{\"" ); + OSRF_BUFFER_ADD( buf, JSON_CLASS_KEY ); + OSRF_BUFFER_ADD( buf, "\":\"" ); + OSRF_BUFFER_ADD( buf, obj->classname ); + OSRF_BUFFER_ADD( buf, "\",\"" ); + OSRF_BUFFER_ADD( buf, JSON_DATA_KEY ); + OSRF_BUFFER_ADD( buf, "\":" ); + add_json_to_buffer( obj, buf, 1, 1 ); + buffer_add_char( buf, '}' ); + return; + } + } switch(obj->type) { @@ -279,11 +311,11 @@ static void add_json_to_buffer( const jsonObject* obj, growing_buffer * buf ) { else OSRF_BUFFER_ADD(buf, "false"); break; - case JSON_NUMBER: { - if(obj->value.s) OSRF_BUFFER_ADD( buf, obj->value.s ); - else OSRF_BUFFER_ADD_CHAR( buf, '0' ); - break; - } + case JSON_NUMBER: { + if(obj->value.s) OSRF_BUFFER_ADD( buf, obj->value.s ); + else OSRF_BUFFER_ADD_CHAR( buf, '0' ); + break; + } case JSON_NULL: OSRF_BUFFER_ADD(buf, "null"); @@ -291,12 +323,7 @@ static void add_json_to_buffer( const jsonObject* obj, growing_buffer * buf ) { case JSON_STRING: OSRF_BUFFER_ADD_CHAR(buf, '"'); - char* data = obj->value.s; - int len = strlen(data); - - char* output = uescape(data, len, 1); - OSRF_BUFFER_ADD(buf, output); - free(output); + buffer_append_uescape(buf, obj->value.s); OSRF_BUFFER_ADD_CHAR(buf, '"'); break; @@ -306,7 +333,8 @@ static void add_json_to_buffer( const jsonObject* obj, growing_buffer * buf ) { int i; for( i = 0; i != obj->value.l->size; i++ ) { if(i > 0) OSRF_BUFFER_ADD(buf, ","); - add_json_to_buffer( OSRF_LIST_GET_INDEX(obj->value.l, i), buf ); + add_json_to_buffer( + OSRF_LIST_GET_INDEX(obj->value.l, i), buf, do_classname, second_pass ); } } OSRF_BUFFER_ADD_CHAR(buf, ']'); @@ -314,16 +342,18 @@ static void add_json_to_buffer( const jsonObject* obj, growing_buffer * buf ) { } case JSON_HASH: { - + OSRF_BUFFER_ADD_CHAR(buf, '{'); osrfHashIterator* itr = osrfNewHashIterator(obj->value.h); jsonObject* item; int i = 0; while( (item = osrfHashIteratorNext(itr)) ) { - if(i++ > 0) OSRF_BUFFER_ADD(buf, ","); - buffer_fadd(buf, "\"%s\":", osrfHashIteratorKey(itr)); - add_json_to_buffer( item, buf ); + if(i++ > 0) OSRF_BUFFER_ADD_CHAR(buf, ','); + OSRF_BUFFER_ADD_CHAR(buf, '"'); + OSRF_BUFFER_ADD(buf, osrfHashIteratorKey(itr)); + OSRF_BUFFER_ADD(buf, "\":"); + add_json_to_buffer( item, buf, do_classname, second_pass ); } osrfHashIteratorFree(itr); @@ -333,6 +363,19 @@ static void add_json_to_buffer( const jsonObject* obj, growing_buffer * buf ) { } } +char* jsonObjectToJSONRaw( const jsonObject* obj ) { + if(!obj) return NULL; + growing_buffer* buf = buffer_init(32); + add_json_to_buffer( obj, buf, 0, 0 ); + return buffer_release( buf ); +} + +char* jsonObjectToJSON( const jsonObject* obj ) { + if(!obj) return NULL; + growing_buffer* buf = buffer_init(32); + add_json_to_buffer( obj, buf, 1, 0 ); + return buffer_release( buf ); +} jsonIterator* jsonNewIterator(const jsonObject* obj) { if(!obj) return NULL; diff --git a/src/libopensrf/utils.c b/src/libopensrf/utils.c index 778afab..491f168 100644 --- a/src/libopensrf/utils.c +++ b/src/libopensrf/utils.c @@ -16,6 +16,9 @@ GNU General Public License for more details. #include #include +static const char hex_chars[] = "0123456789abcdef"; +static unsigned char hex_code[7] = "\\u00"; + inline void* safe_malloc( int size ) { void* ptr = (void*) malloc( size ); if( ptr == NULL ) { @@ -424,6 +427,112 @@ char* uescape( const char* string, int size, int full_escape ) { return buffer_release(buf); } +int buffer_append_uescape( growing_buffer* buf, const char* string ) { + + if(NULL == string) + return 0; // Nothing to add? Nothing to do + + if( NULL == buf ) + return -1; // Nothing to add to + + int idx = 0; + unsigned long int c; + + while (string[idx]) { + + c = 0x0; + + if ((unsigned char)string[idx] >= 0x80) { // not ASCII + + if ((unsigned char)string[idx] >= 0xC0 && (unsigned char)string[idx] <= 0xF4) { // starts a UTF8 string + + int clen = 1; + if (((unsigned char)string[idx] & 0xF0) == 0xF0) { + clen = 3; + c = (unsigned char)string[idx] ^ 0xF0; + + } else if (((unsigned char)string[idx] & 0xE0) == 0xE0) { + clen = 2; + c = (unsigned char)string[idx] ^ 0xE0; + + } else if (((unsigned char)string[idx] & 0xC0) == 0xC0) { + clen = 1; + c = (unsigned char)string[idx] ^ 0xC0; + } + + for (;clen;clen--) { + + idx++; // look at the next byte + c = (c << 6) | ((unsigned char)string[idx] & 0x3F); // add this byte worth + } + + buffer_fadd(buf, "\\u%04x", c); + + } else { + return idx + 1; + } + + } else if(string[idx] >= ' ' ) { // printable ASCII character + + c = string[idx]; + switch(c) { + case '"': + case '\\': + OSRF_BUFFER_ADD_CHAR(buf, '\\'); + OSRF_BUFFER_ADD_CHAR(buf, c); + break; + + default: + OSRF_BUFFER_ADD_CHAR(buf, c); + } + + } else { + c = string[idx]; + + /* escape the usual suspects */ + switch(c) { + case '\b': + OSRF_BUFFER_ADD_CHAR(buf, '\\'); + OSRF_BUFFER_ADD_CHAR(buf, 'b'); + break; + + case '\f': + OSRF_BUFFER_ADD_CHAR(buf, '\\'); + OSRF_BUFFER_ADD_CHAR(buf, 'f'); + break; + + case '\t': + OSRF_BUFFER_ADD_CHAR(buf, '\\'); + OSRF_BUFFER_ADD_CHAR(buf, 't'); + break; + + case '\n': + OSRF_BUFFER_ADD_CHAR(buf, '\\'); + OSRF_BUFFER_ADD_CHAR(buf, 'n'); + break; + + case '\r': + OSRF_BUFFER_ADD_CHAR(buf, '\\'); + OSRF_BUFFER_ADD_CHAR(buf, 'r'); + break; + + default: + { + // Represent as \u followed by four hex characters + hex_code[ 4 ] = hex_chars[ c >> 4 ]; // high nybble + hex_code[ 5 ] = hex_chars[ c & 0x0F ]; // low nybble + hex_code[ 6 ] = '\0'; + OSRF_BUFFER_ADD(buf, (char *) hex_code); + break; + } + } + } + + idx++; + } + + return 0; +} // A function to turn a process into a daemon int daemonize( void ) { -- 2.11.0