Patch from Scott McKellar:

author erickson <erickson@9efc2488-bf62-4759-914b-345cdb29e865>

Fri, 16 May 2008 12:45:11 +0000 (12:45 +0000)

committer erickson <erickson@9efc2488-bf62-4759-914b-345cdb29e865>

Fri, 16 May 2008 12:45:11 +0000 (12:45 +0000)
author erickson <erickson@9efc2488-bf62-4759-914b-345cdb29e865>
Fri, 16 May 2008 12:45:11 +0000 (12:45 +0000)
committer erickson <erickson@9efc2488-bf62-4759-914b-345cdb29e865>
Fri, 16 May 2008 12:45:11 +0000 (12:45 +0000)
diff --git a/include/opensrf/osrf_hash.h b/include/opensrf/osrf_hash.h

index edf8d45..544805a 100644 (file)
--- a/include/opensrf/osrf_hash.h
+++ b/include/opensrf/osrf_hash.h
@@ -5,21 +5,10 @@
  #include <opensrf/string_array.h>
  #include <opensrf/osrf_list.h>
  
-struct _osrfHashStruct {
-       osrfList* hash; /* this hash */
-       void (*freeItem) (char* key, void* item);       /* callback for freeing stored items */
-       unsigned int size;
-       osrfStringArray* keys;
-};
+struct _osrfHashStruct;
  typedef struct _osrfHashStruct osrfHash;
  
-struct _osrfHashIteratorStruct {
-       char* current;
-       size_t currsize;  // length of "current" buffer
-       int currentIdx;
-       osrfHash* hash;
-       osrfStringArray* keys;
-};
+struct _osrfHashIteratorStruct;
  typedef struct _osrfHashIteratorStruct osrfHashIterator;
  
  /**
@@ -28,7 +17,7 @@ typedef struct _osrfHashIteratorStruct osrfHashIterator;
  osrfHash* osrfNewHash();
  
  /** Installs a callback function for freeing stored items
-    */
+ */
  void osrfHashSetCallback( osrfHash* hash, void (*callback) (char* key, void* item) );
  
  /**
@@ -59,8 +48,6 @@ void* osrfHashGet( osrfHash* hash, const char* key, ... );
    */
  osrfStringArray* osrfHashKeys( osrfHash* hash );
  
-osrfStringArray* osrfHashKeysInc( osrfHash* hash );
-
  /**
    Frees a hash
    */
@@ -71,9 +58,6 @@ void osrfHashFree( osrfHash* hash );
    */
  unsigned long osrfHashGetCount( osrfHash* hash );
  
-
-
-
  /**
    Creates a new list iterator with the given list
    */
@@ -89,7 +73,7 @@ void* osrfHashIteratorNext( osrfHashIterator* itr );
  
  /**
    Returns a pointer to the key of the current hash item
- */
+  */
  const char* osrfHashIteratorKey( const osrfHashIterator* itr );
  
  /**
diff --git a/src/libopensrf/osrf_hash.c b/src/libopensrf/osrf_hash.c

index 47cc0ee..2a0a341 100644 (file)
--- a/src/libopensrf/osrf_hash.c
+++ b/src/libopensrf/osrf_hash.c
@@ -1,36 +1,79 @@
+/*
+Copyright (C) 2007, 2008  Georgia Public Library Service
+Bill Erickson <erickson@esilibrary.com>
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+-----------
+
+An osrfHash is a hybrid between a hash table and a doubly linked
+list.  The hash table supports random lookups by key.  The list
+supports iterative traversals.  The sequence of entries in the
+list reflects the sequence in which the entries were added.
+
+osrfHashIterators are somewhat unusual in that, if an iterator
+is positioned on a given entry, deletion of that entry does
+not invalidate the iterator.  The entry to which it points is
+logically but not physically deleted.  You can still advance
+the iterator to the next entry in the list.
+
+*/
+
  #include <opensrf/osrf_hash.h>
  
  struct _osrfHashNodeStruct {
         char* key;
         void* item;
+       struct _osrfHashNodeStruct* prev;
+       struct _osrfHashNodeStruct* next;
  };
  typedef struct _osrfHashNodeStruct osrfHashNode;
  
+struct _osrfHashStruct {
+       osrfList* hash; /* this hash */
+       void (*freeItem) (char* key, void* item);       /* callback for freeing stored items */
+       unsigned int size;
+       osrfHashNode* first_key;
+       osrfHashNode* last_key;
+};
+
+struct _osrfHashIteratorStruct {
+       osrfHash* hash;
+       osrfHashNode* curr_node;
+};
+
  /* 0x100 is a good size for small hashes */
  //#define OSRF_HASH_LIST_SIZE 0x100  /* size of the main hash list */
  #define OSRF_HASH_LIST_SIZE 0x10  /* size of the main hash list */
  
+
  /* used internally */
  #define OSRF_HASH_NODE_FREE(h, n) \
         if(h && n) { \
-               if(h->freeItem) h->freeItem(n->key, n->item);\
+               if(h->freeItem && n->key) h->freeItem(n->key, n->item);\
                 free(n->key); free(n); \
  }
  
-static osrfHashNode* osrfNewHashNode(char* key, void* item);
-static void* osrfHashNodeFree(osrfHash*, osrfHashNode*);
-
  osrfHash* osrfNewHash() {
         osrfHash* hash;
         OSRF_MALLOC(hash, sizeof(osrfHash));
         hash->hash              = osrfNewList();
-       hash->freeItem  = NULL;
-       hash->size      = 0;
-       hash->keys              = osrfNewStringArray(64);
+       hash->first_key = NULL;
+       hash->last_key  = NULL;
         return hash;
  }
  
-/* algorithm proposed by Donald E. Knuth
+static osrfHashNode* osrfNewHashNode(char* key, void* item);
+
+/* algorithm proposed by Donald E. Knuth 
   * in The Art Of Computer Programming Volume 3 (more or less..)*/
  /*
  static unsigned int osrfHashMakeKey(char* str) {
@@ -44,53 +87,63 @@ static unsigned int osrfHashMakeKey(char* str) {
  }
  */
  
-
  /* macro version of the above function */
  #define OSRF_HASH_MAKE_KEY(str,num) \
     do {\
-      char* __k = str;\
-      unsigned int __len = strlen(__k); \
-      unsigned int __h = __len;\
-      unsigned int __i = 0;\
-      for(__i = 0; __i < __len; __k++, __i++)\
-         __h = ((__h << 5) ^ (__h >> 27)) ^ (*__k);\
-      num = (__h & (OSRF_HASH_LIST_SIZE-1));\
+      const char* k__ = str;\
+      unsigned int len__ = strlen(k__); \
+      unsigned int h__ = len__;\
+      unsigned int i__ = 0;\
+      for(i__ = 0; i__ < len__; k__++, i__++)\
+         h__ = ((h__ << 5) ^ (h__ >> 27)) ^ (*k__);\
+      num = (h__ & (OSRF_HASH_LIST_SIZE-1));\
     } while(0)
  
-/** Installs a callback function for freeing stored items
-    */
+/* Installs a callback function for freeing stored items */
  void osrfHashSetCallback( osrfHash* hash, void (*callback) (char* key, void* item) )
  {
         if( hash ) hash->freeItem = callback;
  }
  
-/* returns the index of the item and points l to the sublist the item
- * lives in if the item and points n to the hashnode the item 
- * lives in if the item is found.  Otherwise -1 is returned */
-static unsigned int osrfHashFindItem( osrfHash* hash, char* key, osrfList** l, osrfHashNode** n ) {
-       if(!(hash && key)) return -1;
+/* Returns a pointer to the item's node if found; otherwise returns NULL. */
+static osrfHashNode* find_item( const osrfHash* hash,
+               const char* key, unsigned int* bucketkey ) {
+
+       // Find the sub-list in the hash table
  
+       if( hash->size < 6 && !bucketkey )
+       {
+               // For only a few entries, when we don't need to identify
+               // the hash bucket, it's probably faster to search the
+               // linked list instead of hashing
  
+               osrfHashNode* currnode = hash->first_key;
+               while( currnode && strcmp( currnode->key, key ) )
+                        currnode = currnode->next;
+
+               return currnode;
+       }
+                               
         unsigned int i = 0;
         OSRF_HASH_MAKE_KEY(key,i);
  
+       // If asked, report which slot the key hashes to
+       if( bucketkey ) *bucketkey = i;
+
         osrfList* list = OSRF_LIST_GET_INDEX( hash->hash, i );
-       if( !list ) { return -1; }
+       if( !list ) { return NULL; }
  
+       // Search the sub-list
+       
         int k;
         osrfHashNode* node = NULL;
         for( k = 0; k < list->size; k++ ) {
                 node = OSRF_LIST_GET_INDEX(list, k);
                 if( node && node->key && !strcmp(node->key, key) )
-                       break;
-               node = NULL;
+                       return node;
         }
  
-       if(!node) return -1;
-
-       if(l) *l = list;
-       if(n) *n = node;
-       return k;
+       return NULL;
  }
  
  static osrfHashNode* osrfNewHashNode(char* key, void* item) {
@@ -99,28 +152,36 @@ static osrfHashNode* osrfNewHashNode(char* key, void* item) {
         OSRF_MALLOC(n, sizeof(osrfHashNode));
         n->key = strdup(key);
         n->item = item;
+       n->prev = NULL;
+       n->prev = NULL;
         return n;
  }
  
-static void* osrfHashNodeFree(osrfHash* hash, osrfHashNode* node) {
-       if(!(node && hash)) return NULL;
-       void* item = NULL;
-       if( hash->freeItem )
-               hash->freeItem( node->key, node->item );
-       else item = node->item;
-       free(node->key);
-       free(node);
-       return item;
-}
-
+/* If an entry exists for a given key, update it; otherwise create it.
+   If an entry exists, and there is no callback function to destroy it,
+   return a pointer to it so that the calling code has the option of
+   destroying it.  Otherwise return NULL.
+*/
  void* osrfHashSet( osrfHash* hash, void* item, const char* key, ... ) {
         if(!(hash && item && key )) return NULL;
  
+       void* olditem = NULL;
+       unsigned int bucketkey;
+       
         VA_LIST_TO_STRING(key);
-       void* olditem = osrfHashRemove( hash, VA_BUF );
+       osrfHashNode* node = find_item( hash, VA_BUF, &bucketkey );
+       if( node ) {
+
+               // We already have an item for this key.  Update it in place.
+               if( hash->freeItem ) {
+                       hash->freeItem( node->key, node->item );
+               }
+               else
+                       olditem = node->item;
  
-       unsigned int bucketkey = 0;
-       OSRF_HASH_MAKE_KEY(VA_BUF,bucketkey);
+               node->item = item;
+               return olditem;
+       }
         
         osrfList* bucket;
         if( !(bucket = OSRF_LIST_GET_INDEX(hash->hash, bucketkey)) ) {
@@ -128,31 +189,65 @@ void* osrfHashSet( osrfHash* hash, void* item, const char* key, ... ) {
                 osrfListSet( hash->hash, bucket, bucketkey );
         }
  
-       osrfHashNode* node = osrfNewHashNode(VA_BUF, item);
+       node = osrfNewHashNode(VA_BUF, item);
         osrfListPushFirst( bucket, node );
  
-       if(!osrfStringArrayContains(hash->keys, VA_BUF))
-               osrfStringArrayAdd( hash->keys, VA_BUF );
-
         hash->size++;
+
+       // Add the new hash node to the end of the linked list
+
+       if( NULL == hash->first_key )
+               hash->first_key = hash->last_key = node;
+       else {
+               node->prev = hash->last_key;
+               hash->last_key->next = node;
+               hash->last_key = node;
+       }
+       
         return olditem;
  }
  
+/* Delete the entry for a specified key.  If the entry exists,
+   and there is no callback function to destroy the associated
+   item, return a pointer to the formerly associated item.
+   Otherwise return NULL.
+*/
  void* osrfHashRemove( osrfHash* hash, const char* key, ... ) {
         if(!(hash && key )) return NULL;
  
         VA_LIST_TO_STRING(key);
  
-       osrfList* list = NULL;
-       osrfHashNode* node;
-       int index = osrfHashFindItem( hash, (char*) VA_BUF, &list, &node );
-       if( index == -1 ) return NULL;
+       osrfHashNode* node = find_item( hash, VA_BUF, NULL );
+       if( !node ) return NULL;
  
-       osrfListRemove( list, index );
         hash->size--;
  
-       void* item = osrfHashNodeFree(hash, node);
-       osrfStringArrayRemove(hash->keys, VA_BUF);
+       void* item = NULL;  // to be returned
+       if( hash->freeItem )
+               hash->freeItem( node->key, node->item );
+       else
+               item = node->item;
+
+       // Mark the node as logically deleted
+       
+       free(node->key);
+       node->key = NULL;
+       node->item = NULL;
+
+       // Make the node unreachable from the rest of the linked list.
+       // We leave the next and prev pointers in place so that an
+       // iterator parked here can find its way to an adjacent node.
+
+       if( node->prev )
+               node->prev->next = node->next;
+       else
+               hash->first_key = node->next;
+
+       if( node->next )
+               node->next->prev = node->prev;
+       else
+               hash->last_key = node->prev;
+       
         return item;
  }
  
@@ -161,36 +256,26 @@ void* osrfHashGet( osrfHash* hash, const char* key, ... ) {
         if(!(hash && key )) return NULL;
         VA_LIST_TO_STRING(key);
  
-       osrfHashNode* node = NULL;
-       int index = osrfHashFindItem( hash, (char*) VA_BUF, NULL, &node );
-       if( index == -1 ) return NULL;
+       osrfHashNode* node = find_item( hash, (char*) VA_BUF, NULL );
+       if( !node ) return NULL;
         return node->item;
  }
  
-
-osrfStringArray* osrfHashKeysInc( osrfHash* hash ) {
-       if(!hash) return NULL;
-       return hash->keys;
-}
-
  osrfStringArray* osrfHashKeys( osrfHash* hash ) {
         if(!hash) return NULL;
         
-       int i, k;
-       osrfList* list;
         osrfHashNode* node;
-       osrfStringArray* strings = osrfNewStringArray(8);
+       osrfStringArray* strings = osrfNewStringArray( hash->size );
  
-       for( i = 0; i != hash->hash->size; i++ ) {
-               list = OSRF_LIST_GET_INDEX( hash->hash, i );
-               if(list) {
-                       for( k = 0; k != list->size; k++ ) {
-                               node = OSRF_LIST_GET_INDEX( list, k );  
-                               if( node ) osrfStringArrayAdd( strings, node->key );
-                       }
-               }
+       // Add every key on the linked list
+       
+       node = hash->first_key;
+       while( node ) {
+               if( node->key )  // should always be true
+                       osrfStringArrayAdd( strings, node->key );
+               node = node->next;
         }
-
+       
         return strings;
  }
  
@@ -219,68 +304,57 @@ void osrfHashFree( osrfHash* hash ) {
         }
  
         osrfListFree(hash->hash);
-    OSRF_STRING_ARRAY_FREE(hash->keys);
         free(hash);
  }
  
-
-
  osrfHashIterator* osrfNewHashIterator( osrfHash* hash ) {
         if(!hash) return NULL;
         osrfHashIterator* itr;
         OSRF_MALLOC(itr, sizeof(osrfHashIterator));
         itr->hash = hash;
-       itr->currentIdx = 0;
-       itr->current = NULL;
-       itr->currsize = 0;
-       itr->keys = osrfHashKeysInc(hash);
+       itr->curr_node = NULL;
         return itr;
  }
  
  void* osrfHashIteratorNext( osrfHashIterator* itr ) {
         if(!(itr && itr->hash)) return NULL;
-       if( itr->currentIdx >= itr->keys->size ) return NULL;
-
-       // Copy the string to iter->current
-       const char * curr = osrfStringArrayGetString(itr->keys, itr->currentIdx++);
-       size_t new_len = strlen(curr);
-       if( new_len >= itr->currsize ) {
-               // We need a bigger buffer
-
-               if(0 == itr->currsize) itr->currsize = 64; //default size
-               do {
-                       itr->currsize *= 2;
-               } while( new_len >= itr->currsize );
-
-               if(itr->current)
-                       free(itr->current);
-               itr->current = safe_malloc(itr->currsize);
-       }
-       strcpy(itr->current, curr);
+
+       // Advance to the next node in the linked list
         
-       char* val = osrfHashGet( itr->hash, itr->current );
-       return val;
+       if( NULL == itr->curr_node )
+               itr->curr_node = itr->hash->first_key;
+       else
+               itr->curr_node = itr->curr_node->next;
+
+       if( itr->curr_node )
+               return itr->curr_node->item;
+       else
+               return NULL;
  }
  
-void osrfHashIteratorFree( osrfHashIterator* itr ) {
-       if(!itr) return;
-       free(itr->current);
-       free(itr);
+const char* osrfHashIteratorKey( const osrfHashIterator* itr ) {
+       if( itr && itr->curr_node )
+               return itr->curr_node->key;
+       else
+               return NULL;
  }
  
-const char* osrfHashIteratorKey( const osrfHashIterator* itr ) {
-    if( ! itr ) return NULL;
-    return itr->current;
+void osrfHashIteratorFree( osrfHashIterator* itr ) {
+       if(itr)
+               free(itr);
  }
  
  void osrfHashIteratorReset( osrfHashIterator* itr ) {
         if(!itr) return;
-    if(itr->current) itr->current[0] = '\0';
-       itr->keys = osrfHashKeysInc(itr->hash);
-       itr->currentIdx = 0;
+       itr->curr_node = NULL;
  }
  
  
  int osrfHashIteratorHasNext( osrfHashIterator* itr ) {
-       return ( itr->currentIdx < itr->keys->size ) ? 1 : 0;
+       if( !itr )
+               return 0;
+       else if( itr->curr_node )
+               return itr->curr_node->next ? 1 : 0;
+       else
+               return itr->hash->first_key ? 1 : 0;
  }
author	erickson <erickson@9efc2488-bf62-4759-914b-345cdb29e865>
	Fri, 16 May 2008 12:45:11 +0000 (12:45 +0000)
committer	erickson <erickson@9efc2488-bf62-4759-914b-345cdb29e865>
	Fri, 16 May 2008 12:45:11 +0000 (12:45 +0000)
include/opensrf/osrf_hash.h		patch \| blob \| history
src/libopensrf/osrf_hash.c		patch \| blob \| history