Ordered Hash Tables

August 2, 2013

We assume two functions hash and lt? are provided:

(define (hash k) k)

(define (lt? k1 k2) (< k1 k2))

A hash table is a vector of lists, all initially empty; the length of the vector is chosen based on the expected number of records to be stored, and is best prime (we make it small for testing):

(define len 5)

(define ht (make-vector len (list)))

The easiest place to start is the lookup function. The function performs linear search through the appropriate bucket and returns a null list to indicate failure in two places: if the bucket is exhausted, or if the key being sought is greater than the current key:

(define (lookup k) (let* ((index (modulo (hash k) len)) (bucket (vector-ref ht index))) (let loop ((bucket bucket)) (cond ((null? bucket) (list)) ((lt? k (caar bucket)) (list)) ((lt? (caar bucket) k) (loop (cdr bucket))) (else (car bucket))))))

Insertion has a structure similar to lookup. The added loop parameter, xs, collects bucket items as they are seen so they can be included in the final output:

(define (insert k v) (let* ((index (modulo (hash k) len)) (bucket (vector-ref ht index))) (let loop ((bucket bucket) (xs (list))) (cond ((null? bucket) (vector-set! ht index (append (reverse xs) (list (cons k v))))) ((lt? k (caar bucket)) (vector-set! ht index (append (reverse xs) (list (cons k v)) bucket))) ((lt? (caar bucket) k) (loop (cdr bucket) (cons (car bucket) xs))) (else (vector-set! ht index (append (reverse xs) (list (cons k v)) (cdr bucket))))))))

Deletion uses the same four-way switch as lookup and insert;
in the first two cases, when the key is not present in the
hash table, it does nothing, leaving the hash table unchanged:

(define (delete k) (let* ((index (modulo (hash k) len)) (bucket (vector-ref ht index))) (let loop ((bucket bucket) (xs (list))) (cond ((null? bucket)) ((lt? k (caar bucket))) ((lt? (caar bucket) k) (loop (cdr bucket) (cons (car bucket) xs))) (else (vector-set! ht index (append (reverse xs) (cdr bucket))))))))

We also provide a function to create a list of the key/value
pairs in the hash table:

(define (enlist) (let ((xs (list))) (do ((i 0 (+ i 1))) ((= i len) xs) (set! xs (append (vector-ref ht i) xs)))))

We test the hash table functions by building a small database,
asserting various facts about it, deleting all the entries in
the database, then checking that it is empty; note the
difference in the two ranges, by which we test
deletion of a non-existent key:

(define alphabet '(alpha bravo charlie delta echo foxtrot golf hotel india juliet kilo lima mike november oscar papa quebec romeo sierra tango uniform victor whiskey xray yankee zulu))

(define (test) (let ((xs (shuffle (map cons (range 1 27) alphabet)))) (do ((xs xs (cdr xs))) ((null? xs)) (insert (caar xs) (cdar xs)))) (assert (length (enlist)) 26) (assert (cdr (lookup 3)) 'charlie) (assert (cdr (lookup 17)) 'quebec) (assert (lookup 43) '()) (let ((xs (shuffle (range 27)))) (do ((xs xs (cdr xs))) ((null? xs)) (delete (car xs)))) (assert (length (enlist)) 0) (assert (lookup 3) '()) (assert (lookup 17) '()) (assert (lookup 43) '()))

> (test) ; no news is good news

We used range, rand, randint, shuffle and assert from the Standard Prelude, but only for testing; the hash-table functions are self-contained. You can run the program at http://programmingpraxis.codepad.org/ae17jWwR.

Posted by programmingpraxis

Filed in Exercises

4 Comments »

4 Responses to “Ordered Hash Tables”

mvaneerde said
August 2, 2013 at 10:46 AM
Since you have a comparison function, you could go further and have each bucket be an auto-growing array; this would allow lookup to use a binary search within the bucket. But at that point it might be better to re-evaluate the hash function.
Erlend said
August 2, 2013 at 2:24 PM
A simple Haskell solution: http://hamberg.no/erlend/posts/2013-08-02-ordered-hash-table.html
Ram said
August 5, 2013 at 6:31 AM
Can we implement the solution in Java?

Daniel said

August 7, 2013 at 3:51 AM

Here’s a solution in Common Lisp. The operations on the sorted alist could be improved by using tail recursion.

An argument can be used to specify the number of buckets. Dynamic resizing is not implemented.

This solution uses separate chaining with linked lists, although a search tree would seemingly improve performance.

;;; Sorted alist operations
;;; These could be improved with tail recursion

(defun sorted-acons (key datum alist &key (comp #'<))
  "Inserts (key . datum) pair into a sorted alist"
  (if (null alist)
      (list (cons key datum))
      (let ((head (car alist))
            (tail (cdr alist)))
        (let ((head-key (car head)))
          (cond ((funcall comp key head-key)
                 (acons key datum alist))
                ((funcall comp head-key key)
                 (cons head (sorted-acons key datum tail :comp comp)))
                (t (acons key datum tail)))))))

(defun sorted-assoc (key alist &key (comp #'<))
  "Return the cons in a sorted alist with key matching argument"
  (if (null alist)
      nil
      (let ((head (car alist))
            (tail (cdr alist)))
        (let ((head-key (car head)))
          (cond ((funcall comp key head-key)
                 nil)
                ((funcall comp head-key key)
                 (sorted-assoc key tail :comp comp))
                (t head))))))

(defun sorted-alist-remove (key alist &key (comp #'<))
  "Remove pair (key . datum) from a sorted alist if key matches argument"
  (if (null alist)
      nil
      (let ((head (car alist))
            (tail (cdr alist)))
        (let ((head-key (car head)))
          (cond ((funcall comp key head-key)
                 alist)
                ((funcall comp head-key key)
                 (cons head (sorted-alist-remove key tail :comp comp)))
                (t tail))))))

;;; Ordered hash table structure and operations

(defun make-ordered-hash (&key
                          (hash-function #'sxhash)
                          (comp #'<)
                          (buckets 16))
  (let ((array (make-array buckets :initial-element nil)))
    (macrolet ((bucket (key)
                 `(aref array (rem (funcall hash-function ,key) buckets))))
      (vector
       #'(lambda (key datum)
           (setf (bucket key)
                 (sorted-acons key datum (bucket key) :comp comp))
           datum)
       #'(lambda (key)
           (let ((pair (sorted-assoc key (bucket key) :comp comp)))
             (if pair
                 (values (cdr pair) t)
                 (values nil nil))))
       #'(lambda (key)
           (setf (bucket key)
                 (sorted-alist-remove key (bucket key) :comp comp)))))))

(defun insert (key value ordered-hash)
  (funcall (aref ordered-hash 0) key value))

(defun lookup (key ordered-hash)
  (funcall (aref ordered-hash 1) key))

(defun del (key ordered-hash)
  (funcall (aref ordered-hash 2) key))

;;; test

(defun test ()
  (let ((table (make-ordered-hash)))
    (insert 25 78 table)
    (insert 95 18 table)
    (insert 10 2 table)
    (assert (= (lookup 25 table) 78))
    (del 95 table)
    (assert (null (lookup 95 table)))))

Programming Praxis