## Dynamic Hash Tables

### July 12, 2013

`; dynamic hash tables`

; based on Per-Ake Larson, CACM 4/1988

```
```(define (make-hash . args)

; (make-hash . hash eql?) -- return a newly-allocated empty hash table;

; the hash and eql? functions are optional, but if either is provided

; both must be; defaults are a universal hash function and equal?

; a hash table h is a function that takes a message and zero or more

; arguments; the insert, delete and update messages return a new function,

; so (set! h (h 'message args)) updates hash table h as requested

; (h 'lookup key) -- retrieves from hash table h the (cons key value)

; pair with the given key, or null

; (h 'insert key value) -- inserts a (cons key value) pair in hash table

; h, overwriting any previous value associated with the key

; (h 'delete key) -- removes from hash table h the (cons key value) pair

; with the given key, if it exists

; (h 'update key proc default) -- proc is a function that takes a key and

; value as arguments and returns a new value; if the key is present in

; hash table h, update calls proc with the key and its associated value

; and stores the value returned by proc in place of the original value,;

; otherwise update inserts a new (cons key default) pair in hash table h

; (h 'enlist) -- returns the (cons key value) pairs in hash table h as a list

; (h 'size) -- returns the number of (cons key value) pairs in hash table h

(define (uhash x) ; universal hash function

(define (mod n) (modulo n 4294967296))

(cond ((boolean? x) (if x 357913941 460175067))

((symbol? x) (uhash (symbol->string x)))

((char? x) (char->integer x))

((integer? x) (mod x))

((real? x)

(let* ((r (inexact->exact x))

(n (numerator r))

(d (denominator r)))

(mod (+ n (* 37 d)))))

((rational? x) (mod (+ (numerator x) (* 37 (denominator x)))))

((complex? x)

(mod (+ (uhash (real-part x)) (* 37 (uhash (imag-part x))))))

((null? x) 477338855)

((pair? x)

(let loop ((x x) (s 0))

(if (null? x) s

(loop (cdr x) (mod (+ (* 31 s) (uhash (car x))))))))

((vector? x)

(let loop ((i (- (vector-length x) 1)) (s 0))

(if (negative? i) s

(loop (- i 1) (mod (+ (* 31 s) (uhash (vector-ref x i))))))))

((string? x)

(let loop ((i (- (string-length x) 1)) (s 0))

(if (negative? i) s

(loop (- i 1) (mod (+ (* 31 s) (uhash (string-ref x i))))))))

((procedure? x) (error 'uhash "can't hash procedure"))

((port? x) (error 'uhash "can't hash port"))

(else (error 'uhash "don't know how to hash object"))))

(define (scramble h) ; ensure minimum 20 bit result from hash function

(if (< h 4096) (* h 1048573) (if (< h 1048576) (* h 4093) h)))

(define (empty) (vector (make-vector w (list)) (list) (list)))

(define (vect t) (vector-ref t 0))

(define (lkid t) (vector-ref t 1))

(define (rkid t) (vector-ref t 2))

(define (get t i) ; fetch value from bucket i of tree t

(if (<= u i) (error 'get "out of bounds")

(let loop ((t t) (q (+ (quotient i w) 1)))

(if (= q 1) (vector-ref (vect t) (modulo i w))

(loop (if (even? (modulo q w)) (lkid t) (rkid t))

(quotient q 2))))))

(define (put t i v) ; store value v in bucket i, return new t

(cond ((< u i) (error 'put "out of bounds"))

((< i u) ; replace current value

(let loop ((t t) (q (+ (quotient i w) 1)))

(cond ((= q 1) (let ((x (vect t)))

(vector-set! x (modulo i w) v)

(vector x (lkid t) (rkid t))))

((even? q) (vector (vect t)

(loop (lkid t) (quotient q 2)) (rkid t)))

(else (vector (vect t)

(lkid t) (loop (rkid t) (quotient q 2)))))))

((zero? (modulo u w)) (set! u (+ u 1)) ; add new segment

(let loop ((t t) (q (+ (quotient i w) 1)))

(cond ((= q 1) (let ((x (make-vector w (list))))

(vector-set! x 0 v) (vector x (list) (list))))

((even? q) (vector (vect t)

(loop (lkid t) (quotient q 2)) (rkid t)))

(else (vector (vect t)

(lkid t) (loop (rkid t) (quotient q 2)))))))

(else (set! u (+ u 1)) ; expand within current segment

(let loop ((t t) (q (+ (quotient i w) 1)))

(cond ((= q 1) (let ((x (vect t)))

(vector-set! x (modulo i w) v)

(vector x (lkid t) (rkid t))))

((even? q) (vector (vect t)

(loop (lkid t) (quotient q 2)) (rkid t)))

(else (vector (vect t)

(lkid t) (loop (rkid t) (quotient q 2)))))))))

(define (hirem t) ; remove last bucket from t, return new t

(if (zero? u) (error 'hirem "out of bounds"))

(set! u (- u 1))

(if (zero? (modulo u w))

(let loop ((t t) (q (+ (quotient u w) 1))) ; remove last segment

(cond ((= q 1) (list))

((even? q) (vector (vect t)

(loop (lkid t) (quotient q 2)) (rkid t)))

(else (vector (vect t)

(lkid t) (loop (rkid t) (quotient q 2))))))

(let loop ((t t) (q (+ (quotient u w) 1)))

; remove last bucket within last segment

(cond ((= q 1) (let ((x (vect t)))

(vector-set! x (modulo u w) (list))

(vector x (lkid t) (rkid t))))

((even? q) (vector (vect t)

(loop (lkid t) (quotient q 2)) (rkid t)))

(else (vector (vect t)

(lkid t) (loop (rkid t) (quotient q 2))))))))

(define (index k) ; index of bucket, whether before or after split

(let* ((h (scramble (hash k))) (h-mod-m (modulo h m)))

(if (< h-mod-m p) (modulo h (+ m m)) h-mod-m)))

(define (grow t) ; split bucket, move some keys to new bucket

(let ((old p) (new (+ p m)))

(set! p (+ p 1))

(when (= p m) (set! m (* 2 m)) (set! p 0))

(let loop ((xs (get t old)) (ys (list)) (zs (list)))

(cond ((null? xs)

(set! t (put t old ys))

(set! t (put t new zs)))

((= (index (caar xs)) new)

(loop (cdr xs) ys (cons (car xs) zs)))

(else (loop (cdr xs) (cons (car xs) ys) zs))))

t))

(define (shrink t) ; coalesce last bucket, move all keys

(set! p (- p 1))

(when (< p 0) (set! m (quotient m 2)) (set! p (- m 1)))

(set! t (put t p (append (get t p) (get t (- u 1)))))

(set! t (hirem t))

t)

(define (lookup t k) ; return key/value pair, or null

(let loop ((bs (get t (index k))))

(cond ((null? bs) (list)) ; not found

((eql? (caar bs) k) (car bs)) ; found

(else (loop (cdr bs)))))) ; keep looking

(define (enlist t) ; return all key/value pairs in a list

(do ((i 0 (+ i 1)) (xs (list) (append (get t i) xs))) ((= i u) xs)))

(define (insert t k v) ; insert new key/value pair, or replace value

(if (and (positive? u) (< hi (/ s u))) (set! t (grow t)))

(let ((b (index k)))

(let loop ((bs (get t b)) (xs (list)))

(cond ((null? bs) ; insert new key/value pair

(set! s (+ s 1)) (set! t (put t b (cons (cons k v) xs))) t)

((eql? (caar bs) k) ; replace existing value

(set! t (put t b (cons (cons k v) (append (cdr bs) xs)))) t)

(else (loop (cdr bs) (cons (car bs) xs))))))) ; keep looking

(define (delete t k) ; delete key/value pair if key exists

(if (and (< n u) (< (/ s u) lo)) (set! t (shrink t)))

(let ((b (index k)))

(let loop ((bs (get t b)) (xs (list)))

(cond ((null? bs) xs) ; not in table, nothing to do

((eql? (caar bs) k) ; in table, delete

(set! s (- s 1)) (set! t (put t b (append (cdr bs) xs))) t)

(else (loop (cdr bs) (cons (car bs) xs))))))) ; keep looking

(define (update t k p v) ; update value, or add new key/value pair

(if (and (positive? u) (< hi (/ s u))) (set! t (grow t)))

(let ((b (index k)))

(let loop ((bs (get t b)) (xs (list)))

(cond ((null? bs) ; not in table, insert

(set! s (+ s 1)) (set! t (put t b (cons (cons k v) xs))) t)

((eql? (caar bs) k) ; in table, update

(set! t (put t b (cons (cons k (p k (cdar bs)))

(append (cdr bs) xs)))) t)

(else (loop (cdr bs) (cons (car bs) xs))))))) ; keep looking

(define (new t) (lambda (message . args) (dispatch t message args)))

(define (dispatch t message args) ; perform requested function

(define (arity n)

(if (not (= (length args) n)) (error 'dispatch "incorrect arity")))

(case message ; includes synonymns for some messages

((display debug) ; for debugging

(display "u = ") (display u)

(display "; m = ") (display m)

(display "; p = ") (display p)

(display "; s = ") (display s) (newline)

(do ((i 0 (+ i 1))) ((= i u))

(display i) (display ": ")

(display (get t i)) (newline)))

((lookup fetch get) (arity 1) (apply lookup t args))

((insert store put insert! store! put!)

(arity 2) (new (apply insert t args)))

((delete remove delete! remove!)

(arity 1) (new (apply delete t args)))

((update update!)

(arity 3) (new (apply update t args)))

((size count length) (arity 0) s)

((enlist to-list) (arity 0) (enlist t))))

(define w 64) ; width of a segment of the growable array

(define u 64) ; number of buckets currently in use

(define n 64) ; minimum number of buckets in hash table

(define m 64) ; current maximum number of buckets (Larson's maxp = n * 2^l)

; initialize u, n and m to w; 64 or 256 are good values to use

(define p 0) ; pointer to next bucket to be split 0 .. m-1

(define s 0) ; number of key/value pairs currently in table

(define lo 1) ; minimum load factor (average chain length is 2)

(define hi 3) ; maximum load factor (average chain length is 2)

; (/ hi lo) must be strictly greater than 2

; set hash and eql? based on arguments or default

(define hash #f) (define eql? #f) ; placeholders

(cond ((= (length args) 2) (set! hash (car args)) (set! eql? (cadr args)))

(else (set! hash uhash) (set! eql? equal?)))

(new (empty))) ; main function

```
```

```
```

```
```