Sets

June 7, 2013

Since we can’t use lists, the obvious choices for the underlying representation of sets are either hash tables or some sort of trees. We choose hash tables, because the use of trees requires an ordering comparison but hash tables require only an equality comparison, which is exactly what sets require. We will use code based on the hash tables provided by the Standard Prelude, with equal? for the equality comparison and the universal hash function from a previous exercise; we’ll store only a key and not an associated value, because sets don’t have an associated value.

Our other choice is to make sets abstract instead of concrete, so they can interact with their environment only through the defined interface. In Scheme this is done by making a closure; object-oriented languages might design a class instead. Here is the complete set library, all in a single function called as (make-set limit):

(define (make-set limit)

  (define (hash x) ; universal hash function
    (define (mod n) (modulo n 4294967296))
    (cond ((boolean? x) (if x 1 0))
          ((symbol? x) (hash (symbol->string x)))
          ((char? x) (char->integer x))
          ((integer? x) (mod x))
          ((real? x)
            (let* ((r (inexact->exact x))
                   (n (numerator r))
                   (d (denominator r)))
              (mod (+ n (* 37 d)))))
          ((rational? x) (mod (+ (numerator x) (* 37 (denominator x)))))
          ((complex? x)
            (mod (+ (hash (real-part x))
                    (* 37 (hash (imag-part x))))))
          ((null? x) 4294967295)
          ((pair? x)
            (let loop ((x x) (s 0))
              (if (null? x) s
                (loop (cdr x) (mod (+ (* 31 s) (hash (car x))))))))
          ((vector? x)
            (let loop ((i (- (vector-length x) 1)) (s 0))
              (if (negative? i) s
                  (loop (- i 1) (mod (+ (* 31 s) (hash (vector-ref x i))))))))
          ((string? x)
            (let loop ((i (- (string-length x) 1)) (s 0))
              (if (negative? i) s
                (loop (- i 1) (mod (+ (* 31 s) (hash (string-ref x i))))))))
          ((procedure? x) (error 'hash "can't hash procedure"))
          ((port? x) (error 'hash "can't hash port"))
          (else (error 'hash "don't know how to hash object"))))

  (define (member? set key)
    (let* ((h (modulo (hash key) limit))
           (b (vector-ref set h)))
      (if (member key b) #t #f)))

  (define (adjoin set key)
    (let* ((h (modulo (hash key) limit))
           (b (vector-ref set h)))
      (cond ((member key b) set)
      (else (vector-set! set h (cons key b)) set))))

  (define (delete set key)
    (define (remove x xs)
      (let loop ((xs xs) (zs (list)))
        (cond ((null? xs) zs)
              ((equal? (car xs) x)
                (append (cdr xs) zs))
              (else (loop (cdr xs) (cons (car xs) zs))))))
    (let* ((h (modulo (hash key) limit))
           (b (vector-ref set h)))
      (cond ((not (member key b)) set)
      (else (vector-set! set h (remove key b)) set))))

  (define (intersect set1 set2)
    (let ((set (make-set limit)))
      (let loop ((keys (enlist set1)))
        (cond ((null? keys) set)
              ((set2 'member? (car keys))
                (set! set (set 'adjoin (car keys)))
                (loop (cdr keys)))
              (else (loop (cdr keys)))))))

  (define (union set1 set2)
    (let ((set (make-set limit)))
      (let loop ((keys (enlist set1)))
        (when (pair? keys)
          (set! set (set 'adjoin (car keys)))
          (loop (cdr keys))))
      (let loop ((keys (set2 'enlist)))
        (when (pair? keys)
          (set! set (set 'adjoin (car keys)))
          (loop (cdr keys))))
      set))

  (define (minus set1 set2)
    (let ((set (make-set limit)))
      (let loop ((keys (enlist set1)))
        (when (pair? keys)
          (set! set (set 'adjoin (car keys)))
          (loop (cdr keys))))
      (let loop ((keys (set2 'enlist)))
        (when (pair? keys)
          (set! set (set 'delete (car keys)))
          (loop (cdr keys))))
      set))

  (define (enlist set)
    (let loop ((i 0) (s (list)))
      (if (= i limit) s
        (loop (+ i 1) (append (vector-ref set i) s)))))

  (define (size set)
    (let loop ((i 0) (s 0))
      (if (= i limit) s
        (loop (+ i 1) (+ s (length (vector-ref set i)))))))

  (define (new set)
    (lambda (message . arg) (dispatch set message arg)))

  (define (dispatch set message arg)
    (case message
      ((member?) (member? set (car arg)))
      ((adjoin) (new (adjoin set (car arg))))
      ((delete) (new (delete set (car arg))))
      ((intersect) (intersect set (car arg)))
      ((union) (union set (car arg)))
      ((minus) (minus set (car arg)))
      ((enlist) (enlist set))
      ((size) (size set))
      (else (error 'set "invalid message"))))

  (new (make-vector limit (list))))

Let’s first discuss the closure. A call to make-set returns a function; it is normally used like (define s (make-set 997)). Then you can add an element to the set by (set! s (s 'adjoin item); the inner function call returns a new s function that replaces the old s in the outer set!. Function dispatch handles the arguments in the newly-defined function; function new is a wrapper that forms a function (with lambda) and returns it as the value returned to the caller. The hash table that defines the items in the set is closed inside the function where the caller has no access to it, which is why such a function is called a closure.

The rest of the functions implement the hash table that stores the set, and are generally straight forward. The only tricky part is that the intersect, union and minus functions deal with three sets in different forms. Set1 is the “base” set, and is in internal form, not within a closure, so it is operated on by the functions defined inside the closure. But set2 was passed to the function from the outside and is its own closure, so it is called like any other use of a set outside the closure rather than using the functions defined inside the closure. The result set is also in “external” form. Thus, to make a list of the items in set1 we say (enlist set1) but to make a list of the items in set2 we say (set2 'enlist).

Here are some examples:

> (define s (make-set 97))
> (set! s (s 'adjoin 1))
> (set! s (s 'adjoin 2))
> (set! s (s 'adjoin 3))
> (define t (make-set 97))
> (set! t (t 'adjoin 3))
> (set! t (t 'adjoin 4))
> (set! t (t 'adjoin 5))
> (s 'enlist)
(3 2 1)
> (s 'size)
3
> (t 'enlist)
(5 4 3)
> (t 'size)
3
> ((s 'intersect t) 'enlist)
(3)
> ((s 'union t) 'enlist)
(5 4 3 2 1)
> ((s 'minus t) 'enlist)
(2 1)

The universal hash function comes from a previous exercise. You can run the program at http://programmingpraxis.codepad.org/i82KOZRE.

Advertisement

Pages: 1 2

2 Responses to “Sets”

  1. […] today’s Programming Praxis exercise, our goal is to implement a Set data structure. Let’s get […]

  2. My Haskell solution (see http://bonsaicode.wordpress.com/2013/06/07/programming-praxis-sets/ for a version with comments):

    import Data.Hashable
    import qualified Data.HashTable.IO as H
    import Data.List (sort)
    
    data Set a = Set (H.BasicHashTable a ())
    
    new :: IO (Set a)
    new = fmap Set H.new
    
    member :: (Eq a, Hashable a) => a -> Set a -> IO Bool
    member x (Set s) = fmap (maybe False $ const True) $ H.lookup s x
    
    adjoin :: (Eq a, Hashable a) => a -> Set a -> IO (Set a)
    adjoin x (Set s) = H.insert s x () >> return (Set s)
    
    delete :: (Eq a, Hashable a) => a -> Set a -> IO (Set a)
    delete x (Set s) = H.delete s x >> return (Set s)
    
    fold :: (a -> b -> IO b) -> Set a -> b -> IO b
    fold f (Set s) x = H.foldM (\a (k,_) -> f k a) x s
    
    union :: (Eq a, Hashable a) => Set a -> Set a -> IO (Set a)
    union s1 s2 = fold adjoin s2 =<< fold adjoin s1 =<< new
    
    combine :: (Eq a, Hashable a) => (Bool -> Bool) -> Set a -> Set a -> IO (Set a)
    combine cond s1 s2 = fold (\k a -> member k s2 >>= \b ->
        if cond b then adjoin k a else return a) s1 =<< new 
    
    intersect :: (Eq a, Hashable a) => Set a -> Set a -> IO (Set a)
    intersect = combine id
    
    minus :: (Eq a, Hashable a) => Set a -> Set a -> IO (Set a)
    minus = combine not
    
    toList :: Set a -> IO [a]
    toList s = fold ((return .) . (:)) s []
    
    size :: Set a -> IO Int
    size = fmap length . toList
    

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s

%d bloggers like this: