Hash Tables With Open Addressing

August 24, 2012

NOTE: This version of the code has a BUG. See the comments below for a fix. Thanks to madscifi for finding the BUG.

We store our hash table in a vector, using #f for nil, the empty list () for deleted items, and a (key . value) pair for items in use:

(define (make-hash m) (make-vector m #f))

The lookup function illustrates all of the possible situations we might encounter in insertion or deletion. The initial hash location is in j, the offset as the search marches through the hash table is in i, and j+i is the hash table index. The first cond clause checks if the search reaches the original hash point, meaning that the table is full but the search key is not present. The second clause checks for nil, which also means the search key is not present in the table. The third clause checks for deleted, which doesn’t match but requires the search to continue. The fourth clause identifies a match and returns the key/value pair stored in the table. And the else clause is indeterminate, so the search continues:

(define (lookup eql? h t k)
  (let ((m (vector-length t)) (j (h k)))
    (let loop ((i 0))
      (let ((j+i (modulo (+ j i) m)))
        (cond ((= i m) #f)
              ((not (vector-ref t j+i)) #f)
              ((null? (vector-ref t j+i)) (loop (+ i 1)))
              ((eql? (car (vector-ref t j+i)) k)
                (vector-ref t j+i))
              (else (loop (+ i 1))))))))

The code for insert and delete follows the same structure as lookup, with five clauses in the cond, the last clause being indeterminate; only the actions are different. Both functions return the modified hash table; insert raises an error if it runs out of room:

(define (insert eql? h t k v)
  (let ((m (vector-length t)) (j (h k)))
    (let loop ((i 0))
      (let ((j+i (modulo (+ j i) m)))
        (cond ((= i m) (error 'insert "overflow"))
              ((not (vector-ref t j+i))
                (vector-set! t j+i (cons k v)) t)
              ((null? (vector-ref t j+i))
                (vector-set! t j+i (cons k v)) t)
              ((eql? (car (vector-ref t j+i)) k)
                (vector-set! t j+i (cons k v)) t)
              (else (loop (+ i 1))))))))

(define (delete eql? h t k)
  (let ((m (vector-length t)) (j (h k)))
    (let loop ((i 0))
      (let ((j+i (modulo (+ j i) m)))
        (cond ((= i m) t)
              ((not (vector-ref t j+i)) t)
              ((null? (vector-ref t j+i))
                (loop (+ i 1)))
              ((eql? (car (vector-ref t j+i)) k)
                (vector-set! t j+i (list)) t)
              (else (loop (+ i 1))))))))

The enlist function is different; it loops through the table from back to front, ignoring nil and deleted items, collecting the rest in a list:

(define (enlist t)
  (let loop ((m (vector-length t)) (kvs (list)))
    (cond ((zero? m) kvs)
          ((not (vector-ref t (- m 1))) (loop (- m 1) kvs))
          ((null? (vector-ref t (- m 1))) (loop (- m 1) kvs))
          (else (loop (- m 1) (cons (vector-ref t (- m 1)) kvs))))))

Here are some examples:

> (define t (make-hash 20))
> (set! t (insert string=? string-hash t "aaa" 1))
> (set! t (insert string=? string-hash t "bbb" 2))
> (set! t (insert string=? string-hash t "ccc" 3))
> (set! t (insert string=? string-hash t "ddd" 4))
> (set! t (insert string=? string-hash t "eee" 5))
> t
#20(("ddd" . 4) ("aaa" . 1) #f #f #f #f #f ("ccc" . 3) #f #f
    #f #f #f ("eee" . 5) ("bbb" . 2) #f)
> (set! t (insert string=? string-hash t "xxx" 24))
> (set! t (delete string=? string-hash t "ccc"))
> t
#20(("ddd" . 4) ("aaa" . 1) ("xxx" . 24) #f #f #f #f () #f
    #f #f #f #f ("eee" . 5) ("bbb" . 2) #f)
> (lookup string=? string-hash t "ddd")
("ddd" . 4)
> (lookup string=? string-hash t "ccc")
#f
> (enlist t)
(("ddd" . 4) ("aaa" . 1) ("xxx" . 24) ("eee" . 5) ("bbb" . 2))

We used string-hash from the Standard Prelude. You can run the program at http://programmingpraxis.codepad.org/bCRLwrVT.

Pages: 1 2

9 Responses to “Hash Tables With Open Addressing”

  1. cage said

    My try in Common lisp.

  2. madscifi said

    Needs -std=c++0x if compiling with g++.

    #include <iostream>
    #include <vector>
    #include <tuple>
    #include <stdexcept>
    
    template< class Key, class Value,
    	  class Hasher = std::hash<Key>,
    	  class Equals = std::equal_to<Key> >
    class Hash_OpenAddress
    {
    public:
      Hash_OpenAddress( size_t size, Hasher hasher = Hasher(), Equals equals = Equals() )
        : data( size ), hasher( hasher ), equals( equals )
      {}
    
      void Insert( const Key& key, const Value& value )
      {
        int i = FindIndex( key );
        if( i == -1 ) throw std::runtime_error( "hash full" );
        data[i] = std::make_tuple( Full, key, value );
      }
      
      void Insert( Key&& key, Value&& value )
      {
        int i = FindIndex( key );
        if( i == -1 ) throw std::runtime_error( "hash full" );
        data[i] = std::make_tuple( Full, std::move(key), std::move(value) );
      }
    
      void Delete( const Key& key )
      {
        int i = FindIndex( key );
        if( i != -1 && std::get<STATE>(data[i]) == Full ) {
          data[i] = std::make_tuple( Deleted, Key(), Value() );
        }
      }
    
      // should really be returning a iterator
      const Value* Find( const Key& key ) const
      {
        int i = FindIndex( key );
        const Value* r = 0;
        if( i != -1 && std::get<STATE>(data[i]) == Full ) {
          r = &std::get<VALUE>(data[i]);
        }
        return r;
      }
    
    private:
      template<class K, class V, class H, class E>
      friend std::ostream& operator<<( std::ostream&s, Hash_OpenAddress<K,V,H,E>& h);
    
      enum StateType { Empty, Deleted, Full };
      typedef std::tuple<StateType,Key,Value> Slot;
      enum TupleIndexes { STATE, KEY, VALUE };
    
      std::vector< Slot > data;
    
      Hasher hasher;
      Equals equals;
      
      // some minor inefficiencies due to the common search function
      int FindIndex( const Key& key ) const
      {
        int firstDeleted = -1;
        int i = hasher( key ) % data.size();
        int end = i;
        do {
          if( std::get<STATE>(data[i]) == Empty) {
    	if( firstDeleted != -1 ) {
    	    i = firstDeleted;
    	}
    	return i;
          }
          else if( std::get<STATE>(data[i]) == Deleted && firstDeleted == -1 ) {
    	firstDeleted = i;
          }
          else if( std::get<STATE>(data[i]) == Full
    	       && equals(std::get<KEY>(data[i]),key) ) {
    	return i;
          }
          i = (i + 1) % data.size();
        } while( i != end );
        return firstDeleted;
      }
    };
    
    template<class K, class V, class H, class E>
    std::ostream& operator<<( std::ostream& s, Hash_OpenAddress<K,V,H,E>& h )
    {
      typedef Hash_OpenAddress<K,V,H,E> HT;
      for( auto& e : h.data ) {
        s << "[" << std::get<HT::STATE>(e) << ","
          << std::get<HT::KEY>(e) << ","
          << std::get<HT::VALUE>(e) << "] ";
      }
      return s;
    }
    
    int main(int argc, char* argv[])
    {
      Hash_OpenAddress< std::string, std::string > h(5);
      h.Insert( "aaa", "aaa_value" );
      h.Insert( "bbb", "bbb_value" );
      std::cout << h << std::endl;
    
      const std::string* p = h.Find( "aaa" );
      std::cout << (p?*p:"--not found--") << std::endl;
    
      h.Delete( "aaa" );
      std::cout << h << std::endl;
    
      p = h.Find( "aaa" );
      std::cout << (p?*p:"--not found--") << std::endl;
    
      return 0;
    }
    
  3. madscifi said

    programmingpraxis, there is a bug in the sample implementation. Consider the following sequence of operations (hash forced to simplify demo):

    > (define t (make-hash 5))
    > (set! t (insert string=? (lambda (x) 1) t "aaa" 1))
    > (set! t (insert string=? (lambda (x) 1) t "bbb" 1))
    > (set! t (delete string=? (lambda (x) 1) t "aaa"))
    > (set! t (insert string=? (lambda (x) 1) t "bbb" 2))
    > t
    #(#f ("bbb" . 2) ("bbb" . 1) #f #f ..... )
    
  4. […] day, another post from Programming Praxis. This time, the goal is to write a hash table with open addressing. Basically, whenever there is a […]

  5. programmingpraxis said

    I’m embarrassed at the bug found by madscifi. Instead of fixing the bug directly, I offer an improved version of hash tables with open addressing, due to Knuth (AoCP3, Algo 6.4L and 6.4R). The primary difference between Knuth’s version and the version described above is that during deletion Knuth moves items in the table instead of marking them deleted, so that there is no longer a “deleted” indicator. The code below doesn’t quite follow Knuth; Knuth always leaves one element of the table empty, as a sentinel, at the cost of keeping track of the number of items in the table separately, but we use the entire table, counting items during searching to check for overflow. An empty hash table is a vector of null lists; when an item is inserted into the hash table, the appropriate null list is replaced by a key/value cons pair:

    (define (make-hash len) (make-vector len (list)))

    Each of the lookup, insert and delete functions tests four conditions. If the search wraps around the table back to the starting point, it overflows. If the search finds a null list in the hash table, the sought item does not exist in the table. If the current key equals the sought key, then we perform the appropriate action and terminate. Otherwise, the hash table has a key/value pair at the current location, but it isn’t the one we seek, so the search continues. Note that we worked front to back in the original code, but Knuth works the other way, from back to front.

    The lookup function combines the first two conditions, returning #f indicating that the search key is not present in the table if the search wraps around the table or reaches a null item. If the key is found in the third condition, it is returned, otherwise the next item is checked:

    (define (lookup eql? hash table key)
      (let* ((len (vector-length table)) (n len))
        (let loop ((i (modulo (hash key) len)) (n n))
          (cond ((or (zero? n) (null? (vector-ref table i))) #f)
                ((eql? (car (vector-ref table i)) key) (vector-ref table i))
                (else (loop (modulo (- i 1) len) (- n 1)))))))

    Insertion combines the conditions differently. The first condition reports an overflow and quits. The second and third conditions are combined, and the new key/value pair is inserted in the table, either in a previously-empty location or associating a new value with an existing key. The function returns the modified hash table:

    (define (insert eql? hash table key value)
      (let* ((len (vector-length table)) (n len))
        (let loop ((i (modulo (hash key) len)) (n n))
          (cond ((zero? n) (error 'insert "overflow"))
                ((or (null? (vector-ref table i))
                     (eql? (car (vector-ref table i)) key))
                  (vector-set! table i (cons key value)) table)
                (else (loop (modulo (- i 1) len) (- n 1)))))))

    Deletion is tricky. As in lookup, the first two conditions are combined; if the key is not in the table, the table is returned unchanged. In the third condition, when the key is found, it is deleted, and some item farther down the chain replaces it. The replacement process is recursive, with moved items being replaced by some item farther down the chain; eventually, the chain is reformed so that it has no empty items. Loop1 loops over the items in the chain; loop2 finds an item to replace:

    (define (delete eql? hash table key)
      (let* ((len (vector-length table)) (n len))
        (let loop ((i (modulo (hash key) len)) (n n))
          (cond ((or (zero? n) (null? (vector-ref table i))) table)
                ((eql? (car (vector-ref table i)) key)
                  (let loop1 ((i i))
                    (vector-set! table i (list))
                    (let ((j i))
                      (let loop2 ((i (modulo (- i 1) len)))
                        (if (null? (vector-ref table i)) table
                          (let ((r (modulo (hash (car (vector-ref table i))) len)))
                            (cond ((or (< (- i 1) r j) (< r j i) (< j i (+ r 1)))
                                    (loop2 (modulo (- i 1) len)))
                            (else (vector-set! table j (vector-ref table i))
                                  (loop1 i)))))))))
                (else (loop (modulo (- i 1) len) (- n 1)))))))

    Converting the hash table to a list is simple:

    (define (enlist table) (filter pair? (vector->list table)))

    You can run the program at http://programmingpraxis.codepad.org/Ngle0U3T. It includes a somewhat larger test suite than the original, and passes the madscifi test.

Leave a comment