Spell Checking

April 17, 2009

Our word list will be stored in a trie that provides t-null for the empty trie, t-look to determine if word is in the trie, and t-bind to bind a new word to the trie. The branches of the trie are stored in association lists, but instead of the standard Scheme a-lists we use our own, in which characters are stored in sorted order. All comparisons are case-insensitive. First the a-list functions:

(define a-null '())

(define (a-look c a)
  (cond ((null? a) #f)
        ((char-ci<? c (caar a)) #f)
        ((char-ci<? (caar a) c) (a-look c (cdr a)))
        (else (car a))))

(define (a-bind c x a)
  (cond ((null? a) (list (cons c x)))
        ((char-ci<? c (caar a)) (cons (cons c x) a))
        ((char-ci<? (caar a) c) (cons (car a) (a-bind c x (cdr a))))
        (else (cons (cons c x) (cdr a)))))

A trie is a pair consisting of a value and an a-list of successors. The value is boxed in a list, so '() indicates no-value and '(x) indicates the value x. The null trie conses the null list to the null a-list:

(define t-null (cons '() a-null))

Trie lookup goes one character at a time. There are two ways for lookup to fail: if the end of the key finds a '() value, or if an intermediate node does not exist:

(define (t-look ks t)
  (if (null? ks)
      (if (pair? (car t)) (caar t) #f)
      (let ((x (a-look (car ks) (cdr t))))
        (if x (t-look (cdr ks) (cdr x))#f))))

Adding or replacing a value in a trie is similar to lookup, except that it never fails; if necessary, the trie is extended with a null node:

(define (t-bind ks x t)
  (if (null? ks) (cons (list x) (cdr t))
    (let* ((a1 (a-look (car ks) (cdr t)))
           (t2 (t-bind (cdr ks) x (if (pair? a1) (cdr a1) t-null))))
      (cons (car t) (a-bind (car ks) t2 (cdr t))))))

Make-dict reads a word list, one word per line, binding each word to #t; read-line comes from the Standard Prelude:

(define (make-dict filename)
  (with-input-from-file filename
    (lambda ()
      (let loop ((word (read-line)) (dict t-null))
        (if (eof-object? word) dict
          (loop (read-line) (t-bind (string->list word) #t dict)))))))

The dictionary is loaded from the standard word list:

(define dict (make-dict "/usr/dict/words"))

Spell-checking a word is done by submitting the key to the dictionary:

(define (spell word)
  (t-look (string->list word) dict))

Programming is spelled correctly, but my dictionary thinks praxis is somehow akin to cwm:

> (spell "Programming")
#t
> (spell "Praxis")
#f

You can see the program at http://codepad.org/fK2CUpEQ.

Pages: 1 2

6 Responses to “Spell Checking”

  1. FalconNL said

    In Haskell:

    import qualified Data.ByteString.Char8 as B
    import Data.Trie

    main = do trie <- fmap (fromList . map (flip (,) ()) . B.lines) $ B.readFile "words.txt" print $ valid trie "qarter" valid trie = flip member trie . B.pack [/sourcecode]

  2. Eric H said

    Another in scheme:


    #! /bin/sh
    #| Hey Emacs, this is -*-scheme-*- code!
    #$Id: v4-script-template.ss 5887 2008-12-30 18:12:50Z erich $
    exec mzscheme -l errortrace –require "$0" –main — ${1+"$@"}
    |#
    #lang scheme
    (require "trie.ss"
    (planet schematics/schemeunit:3)
    (planet schematics/schemeunit:3/text-ui))
    (define snarf-dictionary
    (match-lambda
    [(? string? inp)
    (snarf-dictionary (build-path inp))]
    [(? path? inp)
    (fprintf (current-error-port) "Reading dictionary ~s … " inp)
    (let ((dict (call-with-input-file inp snarf-dictionary)))
    (fprintf (current-error-port) "done; ~s words~%"
    (dict-count dict))
    dict)]
    [(? input-port? inp)
    (for/fold ([dict (make-immutable-trie)])
    ([word (in-lines inp)])
    (dict-set dict word word ))]))
    (define dict-tests
    (test-suite
    "dictionary"
    (let ((d (snarf-dictionary "/usr/share/dict/words")))
    (check-not-false (dict-ref d "dog"))
    (check-false (dict-ref d "I bet this word isn't in the dictionary"))
    (printf "Hey: ~a~%"
    (for/list ((w (in-list '("sam" "Sam" "snord" "flutter" "butter" "smith" "Smith"))))
    (cons w (dict-ref d w))))
    )))
    (define trie-tests
    (test-suite
    "top"
    (test-case
    "tries"
    (let ((t (make-immutable-trie)))
    (check-true (trie? t))
    (check-equal? (dict-count t) 0)
    (let ((t (dict-set t "c" "The letter 'c'")))
    (check-equal? (dict-count t) 1)
    (check-equal? (dict-ref t "c") "The letter 'c'")
    (let* ((exp "The furry 'cat'")
    (t (dict-set t "cat" exp)))
    (check-false (dict-ref t "ca" #f))
    (check-equal? (dict-count t) 2)
    (check-equal? (dict-ref t "cat") exp)
    ))
    )
    (let ((t (make-immutable-trie)))
    (check-true (dict? t))
    (check-false (dict-ref t "cat" #f))
    (check-equal? (dict-count t) 0)
    (let ((t (dict-set t "c" 'plurgh)))
    (check-equal? (dict-ref t "c" #f) 'plurgh)
    (check-equal? (dict-count t) 1)
    (let ((t (dict-set t "cat" "hat")))
    (check-equal? (dict-ref t "cat" #f) "hat")
    (let ((t (dict-set t "cats" "mats")))
    (check-equal? (dict-ref t "cat" #f) "hat")
    (check-equal? (dict-ref t "cats" #f) "mats")
    (check-equal? (dict-count t) 3))
    )
    )))
    dict-tests))
    (provide main)
    (define (main . args)
    (exit (run-tests
    trie-tests
    'verbose)))

    view raw

    trie-tests.ss

    hosted with ❤ by GitHub


    #lang scheme
    (require mzlib/trace)
    (define (ref t key [failure-result #f])
    (define (ref-inner t chars)
    (if (null? chars)
    (if (box? (trie-value t))
    (unbox (trie-value t))
    failure-result)
    (let ((probe (dict-ref (trie-alist t) (car chars) #f)))
    (cond
    ((not probe)
    failure-result)
    ((box? probe)
    (unbox probe))
    (#t
    (ref-inner probe
    (cdr chars)))))))
    ;; (trace ref-inner)
    (ref-inner t (string->list key)))
    ;; (trace ref)
    (define (set t key value)
    (define (set-inner t chars value)
    ;; chars: () => set the box
    ;; chars: (ch . rest) => lookup ch => old value; (set-inner oldvalue (cdr chars) value)
    (let ((new (make-trie '() (box-immutable value))))
    (if (null? chars)
    new
    (let ((probe (dict-ref (trie-alist t) (car chars) (make-trie '() #f))))
    (make-trie
    (dict-set (trie-alist t) (car chars)
    (set-inner probe (cdr chars) value))
    (trie-value t))))))
    ;; (trace set-inner)
    (set-inner t (string->list key) value))
    ;; (trace set)
    (define (count t)
    (foldl +
    (if (box? (trie-value t))
    1
    0)
    (map (compose count cdr) (trie-alist t))))
    ;; (trace count)
    (define (iterate-first t)
    (and (not (null? t))
    0))
    ;; (trace iterate-first)
    (define (iterate-next t pos)
    (if (= pos (sub1 (length t)))
    #f
    (add1 pos)))
    ;; (trace iterate-next)
    (define (iterate-key t pos)
    (car (list-ref t pos)))
    ;; (trace iterate-key)
    (define (iterate-value t pos)
    (cdr (list-ref t pos)))
    ;; (trace iterate-value)
    (define-struct trie
    (alist
    value ;;either #f or a box
    )
    #:property prop:dict (vector
    ref
    #f set
    #f remove
    count
    iterate-first iterate-next
    iterate-key iterate-value)
    #:transparent)
    (provide make-immutable-trie trie?)
    (define (make-immutable-trie)
    (make-trie '() #f))

    view raw

    trie.ss

    hosted with ❤ by GitHub

  3. Vikas Tandi said

    implemented in c language:
    http://codepad.org/PmYVlSN6

Leave a comment