From 850bf4a5659d56ff967fc40b7dfa77692855e8ed Mon Sep 17 00:00:00 2001
From: erickson <erickson@9efc2488-bf62-4759-914b-345cdb29e865>
Date: Fri, 16 May 2008 12:45:11 +0000
Subject: [PATCH] Patch from Scott McKellar:

These patches provide a new and more efficient implementation of
osrfHash, using a hash table for random lookups and a doubly linked
list for iterations.

It is more efficient for two main reasons.  First, for iterations
it doesn't maintain a separate copy of the keys, that then have to go
through the hashing algorithm.  Second, when updating an existing
item, it updates it in place rather than deleting it and creating
a new one.


git-svn-id: svn://svn.open-ils.org/OpenSRF/trunk@1323 9efc2488-bf62-4759-914b-345cdb29e865
---
 include/opensrf/osrf_hash.h |  24 +---
 src/libopensrf/osrf_hash.c  | 304 +++++++++++++++++++++++++++-----------------
 2 files changed, 193 insertions(+), 135 deletions(-)

diff --git a/include/opensrf/osrf_hash.h b/include/opensrf/osrf_hash.h
index edf8d45..544805a 100644
--- a/include/opensrf/osrf_hash.h
+++ b/include/opensrf/osrf_hash.h
@@ -5,21 +5,10 @@
 #include <opensrf/string_array.h>
 #include <opensrf/osrf_list.h>
 
-struct _osrfHashStruct {
-	osrfList* hash; /* this hash */
-	void (*freeItem) (char* key, void* item);	/* callback for freeing stored items */
-	unsigned int size;
-	osrfStringArray* keys;
-};
+struct _osrfHashStruct;
 typedef struct _osrfHashStruct osrfHash;
 
-struct _osrfHashIteratorStruct {
-	char* current;
-	size_t currsize;  // length of "current" buffer
-	int currentIdx;
-	osrfHash* hash;
-	osrfStringArray* keys;
-};
+struct _osrfHashIteratorStruct;
 typedef struct _osrfHashIteratorStruct osrfHashIterator;
 
 /**
@@ -28,7 +17,7 @@ typedef struct _osrfHashIteratorStruct osrfHashIterator;
 osrfHash* osrfNewHash();
 
 /** Installs a callback function for freeing stored items
-    */
+ */
 void osrfHashSetCallback( osrfHash* hash, void (*callback) (char* key, void* item) );
 
 /**
@@ -59,8 +48,6 @@ void* osrfHashGet( osrfHash* hash, const char* key, ... );
   */
 osrfStringArray* osrfHashKeys( osrfHash* hash );
 
-osrfStringArray* osrfHashKeysInc( osrfHash* hash );
-
 /**
   Frees a hash
   */
@@ -71,9 +58,6 @@ void osrfHashFree( osrfHash* hash );
   */
 unsigned long osrfHashGetCount( osrfHash* hash );
 
-
-
-
 /**
   Creates a new list iterator with the given list
   */
@@ -89,7 +73,7 @@ void* osrfHashIteratorNext( osrfHashIterator* itr );
 
 /**
   Returns a pointer to the key of the current hash item
- */
+  */
 const char* osrfHashIteratorKey( const osrfHashIterator* itr );
 
 /**
diff --git a/src/libopensrf/osrf_hash.c b/src/libopensrf/osrf_hash.c
index 47cc0ee..2a0a341 100644
--- a/src/libopensrf/osrf_hash.c
+++ b/src/libopensrf/osrf_hash.c
@@ -1,36 +1,79 @@
+/*
+Copyright (C) 2007, 2008  Georgia Public Library Service
+Bill Erickson <erickson@esilibrary.com>
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+-----------
+
+An osrfHash is a hybrid between a hash table and a doubly linked
+list.  The hash table supports random lookups by key.  The list
+supports iterative traversals.  The sequence of entries in the
+list reflects the sequence in which the entries were added.
+
+osrfHashIterators are somewhat unusual in that, if an iterator
+is positioned on a given entry, deletion of that entry does
+not invalidate the iterator.  The entry to which it points is
+logically but not physically deleted.  You can still advance
+the iterator to the next entry in the list.
+
+*/
+
 #include <opensrf/osrf_hash.h>
 
 struct _osrfHashNodeStruct {
 	char* key;
 	void* item;
+	struct _osrfHashNodeStruct* prev;
+	struct _osrfHashNodeStruct* next;
 };
 typedef struct _osrfHashNodeStruct osrfHashNode;
 
+struct _osrfHashStruct {
+	osrfList* hash; /* this hash */
+	void (*freeItem) (char* key, void* item);	/* callback for freeing stored items */
+	unsigned int size;
+	osrfHashNode* first_key;
+	osrfHashNode* last_key;
+};
+
+struct _osrfHashIteratorStruct {
+	osrfHash* hash;
+	osrfHashNode* curr_node;
+};
+
 /* 0x100 is a good size for small hashes */
 //#define OSRF_HASH_LIST_SIZE 0x100  /* size of the main hash list */
 #define OSRF_HASH_LIST_SIZE 0x10  /* size of the main hash list */
 
+
 /* used internally */
 #define OSRF_HASH_NODE_FREE(h, n) \
 	if(h && n) { \
-		if(h->freeItem) h->freeItem(n->key, n->item);\
+		if(h->freeItem && n->key) h->freeItem(n->key, n->item);\
 		free(n->key); free(n); \
 }
 
-static osrfHashNode* osrfNewHashNode(char* key, void* item);
-static void* osrfHashNodeFree(osrfHash*, osrfHashNode*);
-
 osrfHash* osrfNewHash() {
 	osrfHash* hash;
 	OSRF_MALLOC(hash, sizeof(osrfHash));
 	hash->hash		= osrfNewList();
-	hash->freeItem  = NULL;
-	hash->size      = 0;
-	hash->keys		= osrfNewStringArray(64);
+	hash->first_key = NULL;
+	hash->last_key  = NULL;
 	return hash;
 }
 
-/* algorithm proposed by Donald E. Knuth
+static osrfHashNode* osrfNewHashNode(char* key, void* item);
+
+/* algorithm proposed by Donald E. Knuth 
  * in The Art Of Computer Programming Volume 3 (more or less..)*/
 /*
 static unsigned int osrfHashMakeKey(char* str) {
@@ -44,53 +87,63 @@ static unsigned int osrfHashMakeKey(char* str) {
 }
 */
 
-
 /* macro version of the above function */
 #define OSRF_HASH_MAKE_KEY(str,num) \
    do {\
-      char* __k = str;\
-      unsigned int __len = strlen(__k); \
-      unsigned int __h = __len;\
-      unsigned int __i = 0;\
-      for(__i = 0; __i < __len; __k++, __i++)\
-         __h = ((__h << 5) ^ (__h >> 27)) ^ (*__k);\
-      num = (__h & (OSRF_HASH_LIST_SIZE-1));\
+      const char* k__ = str;\
+      unsigned int len__ = strlen(k__); \
+      unsigned int h__ = len__;\
+      unsigned int i__ = 0;\
+      for(i__ = 0; i__ < len__; k__++, i__++)\
+         h__ = ((h__ << 5) ^ (h__ >> 27)) ^ (*k__);\
+      num = (h__ & (OSRF_HASH_LIST_SIZE-1));\
    } while(0)
 
-/** Installs a callback function for freeing stored items
-    */
+/* Installs a callback function for freeing stored items */
 void osrfHashSetCallback( osrfHash* hash, void (*callback) (char* key, void* item) )
 {
 	if( hash ) hash->freeItem = callback;
 }
 
-/* returns the index of the item and points l to the sublist the item
- * lives in if the item and points n to the hashnode the item 
- * lives in if the item is found.  Otherwise -1 is returned */
-static unsigned int osrfHashFindItem( osrfHash* hash, char* key, osrfList** l, osrfHashNode** n ) {
-	if(!(hash && key)) return -1;
+/* Returns a pointer to the item's node if found; otherwise returns NULL. */
+static osrfHashNode* find_item( const osrfHash* hash,
+		const char* key, unsigned int* bucketkey ) {
+
+	// Find the sub-list in the hash table
 
+	if( hash->size < 6 && !bucketkey )
+	{
+		// For only a few entries, when we don't need to identify
+		// the hash bucket, it's probably faster to search the
+		// linked list instead of hashing
 
+		osrfHashNode* currnode = hash->first_key;
+		while( currnode && strcmp( currnode->key, key ) )
+			 currnode = currnode->next;
+
+		return currnode;
+	}
+				
 	unsigned int i = 0;
 	OSRF_HASH_MAKE_KEY(key,i);
 
+	// If asked, report which slot the key hashes to
+	if( bucketkey ) *bucketkey = i;
+
 	osrfList* list = OSRF_LIST_GET_INDEX( hash->hash, i );
-	if( !list ) { return -1; }
+	if( !list ) { return NULL; }
 
+	// Search the sub-list
+	
 	int k;
 	osrfHashNode* node = NULL;
 	for( k = 0; k < list->size; k++ ) {
 		node = OSRF_LIST_GET_INDEX(list, k);
 		if( node && node->key && !strcmp(node->key, key) )
-			break;
-		node = NULL;
+			return node;
 	}
 
-	if(!node) return -1;
-
-	if(l) *l = list;
-	if(n) *n = node;
-	return k;
+	return NULL;
 }
 
 static osrfHashNode* osrfNewHashNode(char* key, void* item) {
@@ -99,28 +152,36 @@ static osrfHashNode* osrfNewHashNode(char* key, void* item) {
 	OSRF_MALLOC(n, sizeof(osrfHashNode));
 	n->key = strdup(key);
 	n->item = item;
+	n->prev = NULL;
+	n->prev = NULL;
 	return n;
 }
 
-static void* osrfHashNodeFree(osrfHash* hash, osrfHashNode* node) {
-	if(!(node && hash)) return NULL;
-	void* item = NULL;
-	if( hash->freeItem )
-		hash->freeItem( node->key, node->item );
-	else item = node->item;
-	free(node->key);
-	free(node);
-	return item;
-}
-
+/* If an entry exists for a given key, update it; otherwise create it.
+   If an entry exists, and there is no callback function to destroy it,
+   return a pointer to it so that the calling code has the option of
+   destroying it.  Otherwise return NULL.
+*/
 void* osrfHashSet( osrfHash* hash, void* item, const char* key, ... ) {
 	if(!(hash && item && key )) return NULL;
 
+	void* olditem = NULL;
+	unsigned int bucketkey;
+	
 	VA_LIST_TO_STRING(key);
-	void* olditem = osrfHashRemove( hash, VA_BUF );
+	osrfHashNode* node = find_item( hash, VA_BUF, &bucketkey );
+	if( node ) {
+
+		// We already have an item for this key.  Update it in place.
+		if( hash->freeItem ) {
+			hash->freeItem( node->key, node->item );
+		}
+		else
+			olditem = node->item;
 
-	unsigned int bucketkey = 0;
-	OSRF_HASH_MAKE_KEY(VA_BUF,bucketkey);
+		node->item = item;
+		return olditem;
+	}
 	
 	osrfList* bucket;
 	if( !(bucket = OSRF_LIST_GET_INDEX(hash->hash, bucketkey)) ) {
@@ -128,31 +189,65 @@ void* osrfHashSet( osrfHash* hash, void* item, const char* key, ... ) {
 		osrfListSet( hash->hash, bucket, bucketkey );
 	}
 
-	osrfHashNode* node = osrfNewHashNode(VA_BUF, item);
+	node = osrfNewHashNode(VA_BUF, item);
 	osrfListPushFirst( bucket, node );
 
-	if(!osrfStringArrayContains(hash->keys, VA_BUF))
-		osrfStringArrayAdd( hash->keys, VA_BUF );
-
 	hash->size++;
+
+	// Add the new hash node to the end of the linked list
+
+	if( NULL == hash->first_key )
+		hash->first_key = hash->last_key = node;
+	else {
+		node->prev = hash->last_key;
+		hash->last_key->next = node;
+		hash->last_key = node;
+	}
+	
 	return olditem;
 }
 
+/* Delete the entry for a specified key.  If the entry exists,
+   and there is no callback function to destroy the associated
+   item, return a pointer to the formerly associated item.
+   Otherwise return NULL.
+*/
 void* osrfHashRemove( osrfHash* hash, const char* key, ... ) {
 	if(!(hash && key )) return NULL;
 
 	VA_LIST_TO_STRING(key);
 
-	osrfList* list = NULL;
-	osrfHashNode* node;
-	int index = osrfHashFindItem( hash, (char*) VA_BUF, &list, &node );
-	if( index == -1 ) return NULL;
+	osrfHashNode* node = find_item( hash, VA_BUF, NULL );
+	if( !node ) return NULL;
 
-	osrfListRemove( list, index );
 	hash->size--;
 
-	void* item = osrfHashNodeFree(hash, node);
-	osrfStringArrayRemove(hash->keys, VA_BUF);
+	void* item = NULL;  // to be returned
+	if( hash->freeItem )
+		hash->freeItem( node->key, node->item );
+	else
+		item = node->item;
+
+	// Mark the node as logically deleted
+	
+	free(node->key);
+	node->key = NULL;
+	node->item = NULL;
+
+	// Make the node unreachable from the rest of the linked list.
+	// We leave the next and prev pointers in place so that an
+	// iterator parked here can find its way to an adjacent node.
+
+	if( node->prev )
+		node->prev->next = node->next;
+	else
+		hash->first_key = node->next;
+
+	if( node->next )
+		node->next->prev = node->prev;
+	else
+		hash->last_key = node->prev;
+	
 	return item;
 }
 
@@ -161,36 +256,26 @@ void* osrfHashGet( osrfHash* hash, const char* key, ... ) {
 	if(!(hash && key )) return NULL;
 	VA_LIST_TO_STRING(key);
 
-	osrfHashNode* node = NULL;
-	int index = osrfHashFindItem( hash, (char*) VA_BUF, NULL, &node );
-	if( index == -1 ) return NULL;
+	osrfHashNode* node = find_item( hash, (char*) VA_BUF, NULL );
+	if( !node ) return NULL;
 	return node->item;
 }
 
-
-osrfStringArray* osrfHashKeysInc( osrfHash* hash ) {
-	if(!hash) return NULL;
-	return hash->keys;
-}
-
 osrfStringArray* osrfHashKeys( osrfHash* hash ) {
 	if(!hash) return NULL;
 	
-	int i, k;
-	osrfList* list;
 	osrfHashNode* node;
-	osrfStringArray* strings = osrfNewStringArray(8);
+	osrfStringArray* strings = osrfNewStringArray( hash->size );
 
-	for( i = 0; i != hash->hash->size; i++ ) {
-		list = OSRF_LIST_GET_INDEX( hash->hash, i );
-		if(list) {
-			for( k = 0; k != list->size; k++ ) {
-				node = OSRF_LIST_GET_INDEX( list, k );	
-				if( node ) osrfStringArrayAdd( strings, node->key );
-			}
-		}
+	// Add every key on the linked list
+	
+	node = hash->first_key;
+	while( node ) {
+		if( node->key )  // should always be true
+			osrfStringArrayAdd( strings, node->key );
+		node = node->next;
 	}
-
+	
 	return strings;
 }
 
@@ -219,68 +304,57 @@ void osrfHashFree( osrfHash* hash ) {
 	}
 
 	osrfListFree(hash->hash);
-    OSRF_STRING_ARRAY_FREE(hash->keys);
 	free(hash);
 }
 
-
-
 osrfHashIterator* osrfNewHashIterator( osrfHash* hash ) {
 	if(!hash) return NULL;
 	osrfHashIterator* itr;
 	OSRF_MALLOC(itr, sizeof(osrfHashIterator));
 	itr->hash = hash;
-	itr->currentIdx = 0;
-	itr->current = NULL;
-	itr->currsize = 0;
-	itr->keys = osrfHashKeysInc(hash);
+	itr->curr_node = NULL;
 	return itr;
 }
 
 void* osrfHashIteratorNext( osrfHashIterator* itr ) {
 	if(!(itr && itr->hash)) return NULL;
-	if( itr->currentIdx >= itr->keys->size ) return NULL;
-
-	// Copy the string to iter->current
-	const char * curr = osrfStringArrayGetString(itr->keys, itr->currentIdx++);
-	size_t new_len = strlen(curr);
-	if( new_len >= itr->currsize ) {
-		// We need a bigger buffer
-
-		if(0 == itr->currsize) itr->currsize = 64; //default size
-		do {
-			itr->currsize *= 2;
-		} while( new_len >= itr->currsize );
-
-		if(itr->current)
-			free(itr->current);
-		itr->current = safe_malloc(itr->currsize);
-	}
-	strcpy(itr->current, curr);
+
+	// Advance to the next node in the linked list
 	
-	char* val = osrfHashGet( itr->hash, itr->current );
-	return val;
+	if( NULL == itr->curr_node )
+		itr->curr_node = itr->hash->first_key;
+	else
+		itr->curr_node = itr->curr_node->next;
+
+	if( itr->curr_node )
+		return itr->curr_node->item;
+	else
+		return NULL;
 }
 
-void osrfHashIteratorFree( osrfHashIterator* itr ) {
-	if(!itr) return;
-	free(itr->current);
-	free(itr);
+const char* osrfHashIteratorKey( const osrfHashIterator* itr ) {
+	if( itr && itr->curr_node )
+		return itr->curr_node->key;
+	else
+		return NULL;
 }
 
-const char* osrfHashIteratorKey( const osrfHashIterator* itr ) {
-    if( ! itr ) return NULL;
-    return itr->current;
+void osrfHashIteratorFree( osrfHashIterator* itr ) {
+	if(itr)
+		free(itr);
 }
 
 void osrfHashIteratorReset( osrfHashIterator* itr ) {
 	if(!itr) return;
-    if(itr->current) itr->current[0] = '\0';
-	itr->keys = osrfHashKeysInc(itr->hash);
-	itr->currentIdx = 0;
+	itr->curr_node = NULL;
 }
 
 
 int osrfHashIteratorHasNext( osrfHashIterator* itr ) {
-	return ( itr->currentIdx < itr->keys->size ) ? 1 : 0;
+	if( !itr )
+		return 0;
+	else if( itr->curr_node )
+		return itr->curr_node->next ? 1 : 0;
+	else
+		return itr->hash->first_key ? 1 : 0;
 }
-- 
2.11.0