Dynamic Hash Tables

July 12, 2013

; dynamic hash tables
; based on Per-Ake Larson, CACM 4/1988

(define (make-hash . args)

; (make-hash . hash eql?) -- return a newly-allocated empty hash table;
;     the hash and eql? functions are optional, but if either is provided
;     both must be; defaults are a universal hash function and equal?

; a hash table h is a function that takes a message and zero or more
; arguments; the insert, delete and update messages return a new function,
; so (set! h (h 'message args)) updates hash table h as requested

; (h 'lookup key) -- retrieves from hash table h the (cons key value)
;     pair with the given key, or null
; (h 'insert key value) -- inserts a (cons key value) pair in hash table
;     h, overwriting any previous value associated with the key
; (h 'delete key) -- removes from hash table h the (cons key value) pair
;     with the given key, if it exists
; (h 'update key proc default) -- proc is a function that takes a key and
;     value as arguments and returns a new value; if the key is present in
;     hash table h, update calls proc with the key and its associated value
;     and stores the value returned by proc in place of the original value,;
;     otherwise update inserts a new (cons key default) pair in hash table h
; (h 'enlist) -- returns the (cons key value) pairs in hash table h as a list
; (h 'size) -- returns the number of (cons key value) pairs in hash table h

  (define (uhash x) ; universal hash function
    (define (mod n) (modulo n 4294967296))
    (cond ((boolean? x) (if x 357913941 460175067))
          ((symbol? x) (uhash (symbol->string x)))
          ((char? x) (char->integer x))
          ((integer? x) (mod x))
          ((real? x)
            (let* ((r (inexact->exact x))
                   (n (numerator r))
                   (d (denominator r)))
              (mod (+ n (* 37 d)))))
          ((rational? x) (mod (+ (numerator x) (* 37 (denominator x)))))
          ((complex? x)
            (mod (+ (uhash (real-part x)) (* 37 (uhash (imag-part x))))))
          ((null? x) 477338855)
          ((pair? x)
            (let loop ((x x) (s 0))
              (if (null? x) s
                (loop (cdr x) (mod (+ (* 31 s) (uhash (car x))))))))
          ((vector? x)
            (let loop ((i (- (vector-length x) 1)) (s 0))
              (if (negative? i) s
                  (loop (- i 1) (mod (+ (* 31 s) (uhash (vector-ref x i))))))))
          ((string? x)
            (let loop ((i (- (string-length x) 1)) (s 0))
              (if (negative? i) s
                (loop (- i 1) (mod (+ (* 31 s) (uhash (string-ref x i))))))))
          ((procedure? x) (error 'uhash "can't hash procedure"))
          ((port? x) (error 'uhash "can't hash port"))
          (else (error 'uhash "don't know how to hash object"))))

  (define (scramble h) ; ensure minimum 20 bit result from hash function
    (if (< h 4096) (* h 1048573) (if (< h 1048576) (* h 4093) h)))

  (define (empty) (vector (make-vector w (list)) (list) (list)))
  (define (vect t) (vector-ref t 0))
  (define (lkid t) (vector-ref t 1))
  (define (rkid t) (vector-ref t 2))

  (define (get t i) ; fetch value from bucket i of tree t
    (if (<= u i) (error 'get "out of bounds")
      (let loop ((t t) (q (+ (quotient i w) 1)))
        (if (= q 1) (vector-ref (vect t) (modulo i w))
          (loop (if (even? (modulo q w)) (lkid t) (rkid t))
                (quotient q 2))))))

  (define (put t i v) ; store value v in bucket i, return new t
    (cond ((< u i) (error 'put "out of bounds"))
          ((< i u) ; replace current value
            (let loop ((t t) (q (+ (quotient i w) 1)))
              (cond ((= q 1) (let ((x (vect t)))
                      (vector-set! x (modulo i w) v)
                      (vector x (lkid t) (rkid t))))
                    ((even? q) (vector (vect t)
                      (loop (lkid t) (quotient q 2)) (rkid t)))
                    (else (vector (vect t)
                      (lkid t) (loop (rkid t) (quotient q 2)))))))
          ((zero? (modulo u w)) (set! u (+ u 1)) ; add new segment
            (let loop ((t t) (q (+ (quotient i w) 1)))
              (cond ((= q 1) (let ((x (make-vector w (list))))
                      (vector-set! x 0 v) (vector x (list) (list))))
                    ((even? q) (vector (vect t)
                      (loop (lkid t) (quotient q 2)) (rkid t)))
                    (else (vector (vect t)
                      (lkid t) (loop (rkid t) (quotient q 2)))))))
          (else (set! u (+ u 1)) ; expand within current segment
            (let loop ((t t) (q (+ (quotient i w) 1)))
              (cond ((= q 1) (let ((x (vect t)))
                      (vector-set! x (modulo i w) v)
                      (vector x (lkid t) (rkid t))))
                    ((even? q) (vector (vect t)
                      (loop (lkid t) (quotient q 2)) (rkid t)))
                    (else (vector (vect t)
                      (lkid t) (loop (rkid t) (quotient q 2)))))))))

  (define (hirem t) ; remove last bucket from t, return new t
    (if (zero? u) (error 'hirem "out of bounds"))
    (set! u (- u 1))
    (if (zero? (modulo u w))
        (let loop ((t t) (q (+ (quotient u w) 1))) ; remove last segment
          (cond ((= q 1) (list))
                ((even? q) (vector (vect t)
                  (loop (lkid t) (quotient q 2)) (rkid t)))
                (else (vector (vect t)
                  (lkid t) (loop (rkid t) (quotient q 2))))))
        (let loop ((t t) (q (+ (quotient u w) 1)))
          ; remove last bucket within last segment
          (cond ((= q 1) (let ((x (vect t)))
                  (vector-set! x (modulo u w) (list))
                  (vector x (lkid t) (rkid t))))
                ((even? q) (vector (vect t)
                  (loop (lkid t) (quotient q 2)) (rkid t)))
                (else (vector (vect t)
                  (lkid t) (loop (rkid t) (quotient q 2))))))))

  (define (index k) ; index of bucket, whether before or after split
    (let* ((h (scramble (hash k))) (h-mod-m (modulo h m)))
      (if (< h-mod-m p) (modulo h (+ m m)) h-mod-m)))

  (define (grow t) ; split bucket, move some keys to new bucket
    (let ((old p) (new (+ p m)))
      (set! p (+ p 1))
      (when (= p m) (set! m (* 2 m)) (set! p 0))
      (let loop ((xs (get t old)) (ys (list)) (zs (list)))
        (cond ((null? xs)
                (set! t (put t old ys))
                (set! t (put t new zs)))
              ((= (index (caar xs)) new)
                (loop (cdr xs) ys (cons (car xs) zs)))
              (else (loop (cdr xs) (cons (car xs) ys) zs))))
      t))

  (define (shrink t) ; coalesce last bucket, move all keys
    (set! p (- p 1))
    (when (< p 0) (set! m (quotient m 2)) (set! p (- m 1)))
    (set! t (put t p (append (get t p) (get t (- u 1)))))
    (set! t (hirem t))
    t)

  (define (lookup t k) ; return key/value pair, or null
    (let loop ((bs (get t (index k))))
      (cond ((null? bs) (list)) ; not found
            ((eql? (caar bs) k) (car bs)) ; found
            (else (loop (cdr bs)))))) ; keep looking

  (define (enlist t) ; return all key/value pairs in a list
    (do ((i 0 (+ i 1)) (xs (list) (append (get t i) xs))) ((= i u) xs)))

  (define (insert t k v) ; insert new key/value pair, or replace value
    (if (and (positive? u) (< hi (/ s u))) (set! t (grow t)))
    (let ((b (index k)))
      (let loop ((bs (get t b)) (xs (list)))
        (cond ((null? bs) ; insert new key/value pair
               (set! s (+ s 1)) (set! t (put t b (cons (cons k v) xs))) t)
              ((eql? (caar bs) k) ; replace existing value
                (set! t (put t b (cons (cons k v) (append (cdr bs) xs)))) t)
              (else (loop (cdr bs) (cons (car bs) xs))))))) ; keep looking

  (define (delete t k) ; delete key/value pair if key exists
    (if (and (< n u) (< (/ s u) lo)) (set! t (shrink t)))
    (let ((b (index k)))
      (let loop ((bs (get t b)) (xs (list)))
        (cond ((null? bs) xs) ; not in table, nothing to do
              ((eql? (caar bs) k) ; in table, delete
                (set! s (- s 1)) (set! t (put t b (append (cdr bs) xs))) t)
              (else (loop (cdr bs) (cons (car bs) xs))))))) ; keep looking

  (define (update t k p v) ; update value, or add new key/value pair
    (if (and (positive? u) (< hi (/ s u))) (set! t (grow t)))
    (let ((b (index k)))
      (let loop ((bs (get t b)) (xs (list)))
        (cond ((null? bs) ; not in table, insert
                (set! s (+ s 1)) (set! t (put t b (cons (cons k v) xs))) t)
              ((eql? (caar bs) k) ; in table, update
                (set! t (put t b (cons (cons k (p k (cdar bs)))
                                       (append (cdr bs) xs)))) t)
              (else (loop (cdr bs) (cons (car bs) xs))))))) ; keep looking

  (define (new t) (lambda (message . args) (dispatch t message args)))

  (define (dispatch t message args) ; perform requested function
    (define (arity n)
      (if (not (= (length args) n)) (error 'dispatch "incorrect arity")))
    (case message ; includes synonymns for some messages
      ((display debug) ; for debugging
        (display "u = ") (display u)
        (display "; m = ") (display m)
        (display "; p = ") (display p)
        (display "; s = ") (display s) (newline)
        (do ((i 0 (+ i 1))) ((= i u))
          (display i) (display ": ")
          (display (get t i)) (newline)))
      ((lookup fetch get) (arity 1) (apply lookup t args))
      ((insert store put insert! store! put!)
        (arity 2) (new (apply insert t args)))
      ((delete remove delete! remove!)
        (arity 1) (new (apply delete t args)))
      ((update update!)
        (arity 3) (new (apply update t args)))
      ((size count length) (arity 0) s)
      ((enlist to-list) (arity 0) (enlist t))))

  (define w 64) ; width of a segment of the growable array
  (define u 64) ; number of buckets currently in use
  (define n 64) ; minimum number of buckets in hash table
  (define m 64) ; current maximum number of buckets (Larson's maxp = n * 2^l)
  ; initialize u, n and m to w; 64 or 256 are good values to use
  (define p 0) ; pointer to next bucket to be split 0 .. m-1
  (define s 0) ; number of key/value pairs currently in table
  (define lo 1) ; minimum load factor (average chain length is 2)
  (define hi 3) ; maximum load factor (average chain length is 2)
  ; (/ hi lo) must be strictly greater than 2

  ; set hash and eql? based on arguments or default
  (define hash #f) (define eql? #f) ; placeholders
  (cond ((= (length args) 2) (set! hash (car args)) (set! eql? (cadr args)))
  (else (set! hash uhash) (set! eql? equal?)))

  (new (empty))) ; main function

About these ads

Pages: 1 2 3

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s

Follow

Get every new post delivered to your Inbox.

Join 598 other followers