Word Count
December 8, 2009
Here’s our version:
#! /usr/bin/scheme --script
(define l-flag #t)
(define w-flag #t)
(define c-flag #t)
(define (update-flags fs)
(if (not (member #\l fs)) (set! l-flag #f))
(if (not (member #\w fs)) (set! w-flag #f))
(if (not (member #\c fs)) (set! c-flag #f)))
(define (put-dec n width)
(let* ((n-str (number->string n)))
(display (make-string (- width (string-length n-str)) #\space))
(display n-str)))
(define (wc)
(let loop ((inword #f) (c (read-char)) (ls 0) (ws 0) (cs 0))
(cond ((eof-object? c) (values ls ws cs))
((char=? c #\newline)
(loop #f (read-char) (add1 ls) ws (add1 cs)))
((not (member c '(#\space #\newline #\tab)))
(if inword
(loop #t (read-char) ls ws (add1 cs))
(loop #t (read-char) ls (add1 ws) (add1 cs))))
(else (loop #f (read-char) ls ws (add1 cs))))))
(define (main args)
(when (and (pair? args) (char=? (string-ref (car args) 0) #\-))
(update-flags (cdr (string->list (car args))))
(set! args (cdr args)))
(if (null? args)
(let-values (((ls ws cs) (wc)))
(when l-flag (display ls) (display " "))
(when w-flag (display ws) (display " "))
(when c-flag (display cs) (display " "))
(newline))
(let loop ((args args) (l-tot 0) (w-tot 0) (c-tot 0))
(if (null? args)
(begin (when l-flag (put-dec l-tot 12))
(when w-flag (put-dec w-tot 12))
(when c-flag (put-dec c-tot 12)))
(with-input-from-file (car args)
(lambda ()
(let-values (((ls ws cs) (wc)))
(when l-flag (put-dec ls 12))
(when w-flag (put-dec ws 12))
(when c-flag (put-dec cs 12))
(display " ") (display (car args)) (newline)
(loop (cdr args) (+ l-tot ls) (+ w-tot ws) (+ c-tot cs)))))))))
(main (cdr (command-line)))
The code for handling the optional flags and filenames is tedious but straight forward; the she-bang line is specific to Chez Scheme, as is the command-line
procedure, but most Scheme systems have something similar. The wc
function performs the actual counting; each input character adds 1 to cs
, each newline character adds 1 to ls
, and ws
is incremented each time the inword
variable, which is #t
if and only if the current character is not a blank, tab or newline, goes from #f
to #t
.
We use add1
from the Standard Prelude. You can see the collected code at http://programmingpraxis.codepad.org/ZxPjiEvw.
[…] Praxis – Word Count By Remco Niemeijer In today’s Programming Praxis exercise, we have to implement the Unix wc command line utility. Let’s get […]
My Haskell solution (see http://bonsaicode.wordpress.com/2009/12/08/programming-praxis-word-count/ for a version with comments):
GNU wc has three major improvements on the naive algorithm:
1) It does its own block buffering rather than using stdio buffering, which means that characters are counted a block at a time.
2) It has multiple inner loops, deciding on the basis of the options which one to run (thus the inword logic is not executed at all if -w is not specified).
3) If -c is the only option, then it attempts to fstat() files rather than reading them, being careful to make sure the file is a regular file (not a device file) and taking into account the possibility that stdin may be a file that isn’t positioned at its beginning. This allows O(1) behavior in favorable circumstances.
It also extends classic wc semantics by being able to count bytes with -b and possibly-multibyte characters with -c, though if characters are known to be single-byte in the current encoding it will treat -c and -b the same (optimization #3 above really applies to -b). It also provides -L which returns the length of the longest line and -W to return the count of words, which are obvious and useful extensions.
# Init Some Variables
count_lines = true
count_words = true
count_chars = true
line_count = 0
word_count = 0
char_count = 0
filename = ARGV[0]
#Check Arguments
if(ARGV[0] =~ /^-.+$/)
count_lines = (ARGV[0].index(‘l’) != nil)
count_words = (ARGV[0].index(‘w’) != nil)
count_chars = (ARGV[0].index(‘c’) != nil)
filename = ARGV[1]
end
#Count with Regexp
file = File.open(filename,’r+’)
while (line = file.gets) do
line_count += 1;
word_count += line.scan(/[^ \n\t]+/).size
char_count += line.size
end
puts “Line Count = #{line_count}” unless !count_lines
puts “Word Count = #{word_count}” unless !count_words
puts “Character Count = #{char_count}” unless !count_chars
Sorry about the previous post:
#include
#include
#include
#include
int lflag = 0;
int wflag = 0;
int cflag = 0;
char *fname = ”;
char buf[BUFSIZ];
extern int errno;
main(argc, argv)
int argc;
char *argv[];
{
int i, fd;
while (argc > 1 && argv[1][0] == ‘-‘) {
switch (argv[1][1]) {
case ‘l’ : lflag = 1;
break;
case ‘w’ : wflag = 1;
break;
case ‘c’ : cflag = 1;
break;
default : printf(“usage: \n”);
exit(1);
}
argc–;
argv++;
}
if (lflag == 0 && wflag == 0 && cflag == 0)
lflag = wflag = cflag = 1;
if (argc == 1)
wc(STDIN_FILENO);
else
for (i = 1; i < argc; i++)
if ((fd = open(argv[i], O_RDONLY)) == -1) {
printf("%s: can not open %s, errno=%d\n", argv[0], argv[i],errno);
exit(1);
}
else
{
fname = argv[i];
wc(fd);
close(fd);
}
exit(0);
}
wc(fd)
int fd;
{
int n, l = 0, w = 0, t = 0, ws = 1;
char *cp, c;
while (n = read(fd, buf, BUFSIZ)) {
t += n;
for (cp = buf; cp != (buf + n); cp++) {
c = *cp;
if (c == '\n') {
l++;
ws = 1;
}
else
if (c != ' ' && c != '\t') {
if (ws) {
ws = 0;
w++;
}
}
else
ws = 1;
}
}
if (lflag)
printf ("%d ", l);
if (wflag)
printf("%d ", w);
if (cflag)
printf("%d ", t);
if (fname)
printf("%s", fname);
printf("\n");
}
I too posted before reading the instructions. Sorry about that.
[…] to the RSS feed or email list for updates on this topic.The goal for this Programming Praxis was to implement the Unix wc function. This one took me a couple days (I haven’t had a lot of time recently) to complete, but I […]
A ruby version that a) doesn’t look all that rubyish and b) doesn’t format the output nicely but does work more or less like the unix version
ruby solution that only takes a filename (http://codepad.org/CC98XPaa) returns values lines,characters,words in that order