Dynamic Hash Tables

July 12, 2013

We begin with the code for the growable array. Bucket i is stored in element (modulo i w) of the segment at index (+ (quotient i w) 1) of the growable array (vectors are indexed from zero but growable arrays are indexed from one). At any given moment there are u buckets in the data structure, stored in (ceiling (/ u w)) segments in the growable array. A node in the growable array consists of a vector of buckets and the two child nodes.

The growable array expands and shrinks in two ways. If there is room in the last segment, which can be identified when (modulo u w) is not zero, the array grows simply by increasing u by one and placing the new value in the last segment. Likewise, if the last segment will not become empty when an item is removed, which can be identified when (modulo u w) is not one, the array shrinks by setting the u‘th element to a null list and decreasing u by one. If the array itself must grow or shrink, it is done like any normal growable array; when growing, a new segment of w null lists is added to the end of the array, and when shrinking, the last segment is unlinked from the array and becomes garbage.

Here’s the code, with w set small for testing; w should be something like 16, or 64, or 256, or 1024 during production use, depending on expected usage. We refer to the exercise on growable arrays for an explanation:

(define w 2) ; width of a segment of the growable array (define u 2) ; number of buckets currently in use

(define (empty) (vector (make-vector w (list)) (list) (list))) (define (vect t) (vector-ref t 0)) (define (lkid t) (vector-ref t 1)) (define (rkid t) (vector-ref t 2))

(define (get t i) ; fetch value from bucket i of tree t (if (<= u i) (error 'get "out of bounds") (let loop ((t t) (q (+ (quotient i w) 1))) (if (= q 1) (vector-ref (vect t) (modulo i w)) (loop (if (even? (modulo q w)) (lkid t) (rkid t)) (quotient q 2))))))

(define (put t i v) ; store value v in bucket i, return new t (cond ((< u i) (error 'put "out of bounds")) ((< i u) ; replace current value (let loop ((t t) (q (+ (quotient i w) 1))) (cond ((= q 1) (let ((x (vect t))) (vector-set! x (modulo i w) v) (vector x (lkid t) (rkid t)))) ((even? q) (vector (vect t) (loop (lkid t) (quotient q 2)) (rkid t))) (else (vector (vect t) (lkid t) (loop (rkid t) (quotient q 2))))))) ((zero? (modulo u w)) (set! u (+ u 1)) ; expand growable array (let loop ((t t) (q (+ (quotient i w) 1))) (cond ((= q 1) (let ((x (make-vector w (list)))) (vector-set! x 0 v) (vector x (list) (list)))) ((even? q) (vector (vect t) (loop (lkid t) (quotient q 2)) (rkid t))) (else (vector (vect t) (lkid t) (loop (rkid t) (quotient q 2))))))) (else (set! u (+ u 1)) ; expand within current growable array (let loop ((t t) (q (+ (quotient i w) 1))) (cond ((= q 1) (let ((x (vect t))) (vector-set! x (modulo i w) v) (vector x (lkid t) (rkid t)))) ((even? q) (vector (vect t) (loop (lkid t) (quotient q 2)) (rkid t))) (else (vector (vect t) (lkid t) (loop (rkid t) (quotient q 2)))))))))

(define (hirem t) ; remove last bucket from t, return new t (if (zero? u) (error 'hirem "out of bounds")) (set! u (- u 1)) (if (zero? (modulo u w)) (let loop ((t t) (q (+ (quotient u w) 1))) ; reduce growable array by one segment (cond ((= q 1) (list)) ((even? q) (vector (vect t) (loop (lkid t) (quotient q 2)) (rkid t))) (else (vector (vect t) (lkid t) (loop (rkid t) (quotient q 2)))))) (let loop ((t t) (q (+ (quotient u w) 1))) ; shrink within current segment (cond ((= q 1) (let ((x (vect t))) (vector-set! x (modulo u w) (list)) (vector x (lkid t) (rkid t)))) ((even? q) (vector (vect t) (loop (lkid t) (quotient q 2)) (rkid t))) (else (vector (vect t) (lkid t) (loop (rkid t) (quotient q 2))))))))

The index function calculates the bucket number at which a key, if it exists in the table, will be found:

(define (index k) ; index of bucket, whether before or after split (let* ((h (scramble (hash k))) (h-mod-m (modulo h m))) (if (< h-mod-m p) (modulo h (+ m m)) h-mod-m)))

The functions that grow and shrink buckets are fundamental to the operation of the dynamic hash table; both reset p and m, then grow rehashes all key/value pairs in the old bucket and moves some to the new bucket, while shrink merely combines the two buckets:

(define (grow t) (let ((old p) (new (+ p m))) (set! p (+ p 1)) (when (= p m) (set! m (* 2 m)) (set! p 0)) (let loop ((xs (get t old)) (ys (list)) (zs (list))) (cond ((null? xs) (set! t (put t old ys)) (set! t (put t new zs))) ((= (index (caar xs)) new) (loop (cdr xs) ys (cons (car xs) zs))) (else (loop (cdr xs) (cons (car xs) ys) zs)))) t))

(define (shrink t) (set! p (- p 1)) (when (< p 0) (set! m (quotient m 2)) (set! p (- m 1))) (set! t (put t p (append (get t p) (get t (- u 1))))) (set! t (hirem t)) t)

The two functions that simply scan the hash table without modifying it are lookup and enlist, which are both trivial; note that lookup can’t use the built-in function member because the user can define his own eql? predicate, and enlist uses a do-loop that is short and to the point:

(define (lookup t k) (let loop ((bs (get t (index k)))) (cond ((null? bs) (list)) ; not found ((eql? (caar bs) k) (car bs)) ; found (else (loop (cdr bs)))))) ; keep looking

(define (enlist t) (do ((i 0 (+ i 1)) (xs (list) (append (get t i) xs))) ((= i u) xs)))

The functions that perform insertions, deletions and updates are all straight forward. The only trick is keeping track of the number s of key/value pairs currently in the table, because an insertion doesn’t necessarily increase s, a deletion doesn’t necessarily decrease s, and an update doesn’t necessarily leave s unchanged. All three functions either grow or shrink the table if needed:

(define (insert t k v) (if (and (positive? u) (< hi (/ s u))) (set! t (grow t))) (let ((b (index k))) (let loop ((bs (get t b)) (xs (list))) (cond ((null? bs) ; insert new key/value pair (set! s (+ s 1)) (set! t (put t b (cons (cons k v) xs))) t) ((eql? (caar bs) k) ; replace existing value (set! t (put t b (cons (cons k v) (append (cdr bs) xs)))) t) (else (loop (cdr bs) (cons (car bs) xs))))))) ; keep looking

(define (delete t k) (if (and (< n u) (< (/ s u) lo)) (set! t (shrink t))) (let ((b (index k))) (let loop ((bs (get t b)) (xs (list))) (cond ((null? bs) xs) ; not in table, nothing to do ((eql? (caar bs) k) ; in table, delete (set! s (- s 1)) (set! t (put t b (append (cdr bs) xs))) t) (else (loop (cdr bs) (cons (car bs) xs))))))) ; keep looking

(define (update t k p v) (if (and (positive? u) (< hi (/ s u))) (set! t (grow t))) (let ((b (index k))) (let loop ((bs (get t b)) (xs (list))) (cond ((null? bs) ; not in table, insert (set! s (+ s 1)) (set! t (put t b (cons (cons k v) xs))) t) ((eql? (caar bs) k) ; in table, update (set! t (put t b (cons (cons k (p k (cdar bs))) (append (cdr bs) xs)))) t) (else (loop (cdr bs) (cons (car bs) xs))))))) ; keep looking

The rest of the code makes the hash table an abstract data type, with the table itself hidden inside a function closure. We won’t show it here, because the details are both specific to Scheme and rather esoteric, but you can see the entire code assembled on the next page and run the code at http://programmingpraxis.codepad.org/RA6g2R3T.

Posted by programmingpraxis

Filed in Exercises

S	M	T	W	T	F	S
	1	2	3	4	5	6
7	8	9	10	11	12	13
14	15	16	17	18	19	20
21	22	23	24	25	26	27
28	29	30	31

Programming Praxis