Word Count
December 8, 2009
Here’s our version:
#! /usr/bin/scheme --script
(define l-flag #t)
(define w-flag #t)
(define c-flag #t)
(define (update-flags fs)
(if (not (member #\l fs)) (set! l-flag #f))
(if (not (member #\w fs)) (set! w-flag #f))
(if (not (member #\c fs)) (set! c-flag #f)))
(define (put-dec n width)
(let* ((n-str (number->string n)))
(display (make-string (- width (string-length n-str)) #\space))
(display n-str)))
(define (wc)
(let loop ((inword #f) (c (read-char)) (ls 0) (ws 0) (cs 0))
(cond ((eof-object? c) (values ls ws cs))
((char=? c #\newline)
(loop #f (read-char) (add1 ls) ws (add1 cs)))
((not (member c '(#\space #\newline #\tab)))
(if inword
(loop #t (read-char) ls ws (add1 cs))
(loop #t (read-char) ls (add1 ws) (add1 cs))))
(else (loop #f (read-char) ls ws (add1 cs))))))
(define (main args)
(when (and (pair? args) (char=? (string-ref (car args) 0) #\-))
(update-flags (cdr (string->list (car args))))
(set! args (cdr args)))
(if (null? args)
(let-values (((ls ws cs) (wc)))
(when l-flag (display ls) (display " "))
(when w-flag (display ws) (display " "))
(when c-flag (display cs) (display " "))
(newline))
(let loop ((args args) (l-tot 0) (w-tot 0) (c-tot 0))
(if (null? args)
(begin (when l-flag (put-dec l-tot 12))
(when w-flag (put-dec w-tot 12))
(when c-flag (put-dec c-tot 12)))
(with-input-from-file (car args)
(lambda ()
(let-values (((ls ws cs) (wc)))
(when l-flag (put-dec ls 12))
(when w-flag (put-dec ws 12))
(when c-flag (put-dec cs 12))
(display " ") (display (car args)) (newline)
(loop (cdr args) (+ l-tot ls) (+ w-tot ws) (+ c-tot cs)))))))))
(main (cdr (command-line)))
The code for handling the optional flags and filenames is tedious but straight forward; the she-bang line is specific to Chez Scheme, as is the command-line procedure, but most Scheme systems have something similar. The wc function performs the actual counting; each input character adds 1 to cs, each newline character adds 1 to ls, and ws is incremented each time the inword variable, which is #t if and only if the current character is not a blank, tab or newline, goes from #f to #t.
We use add1 from the Standard Prelude. You can see the collected code at http://programmingpraxis.codepad.org/ZxPjiEvw.
[…] Praxis – Word Count By Remco Niemeijer In today’s Programming Praxis exercise, we have to implement the Unix wc command line utility. Let’s get […]
My Haskell solution (see http://bonsaicode.wordpress.com/2009/12/08/programming-praxis-word-count/ for a version with comments):
import System.Environment import Text.Printf parseOpts :: [String] -> ([Bool], [String]) parseOpts (('-':ps):args) = (map (`elem` ps) "lwc", args) parseOpts args = (replicate 3 True, args) count :: [Bool] -> [(String, String)] -> [String] count opts = map (\(name, text) -> concat [printf "%8s" $ if opt then show . length $ f text else "-" | (f, opt) <- zip [lines, words, map return] opts] ++ " " ++ name) main :: IO () main = do args <- getArgs let (opts, files) = parseOpts args mapM_ putStrLn . count opts =<< if null files then fmap (\x -> [("", x)]) getContents else fmap (zip files) $ mapM readFile files# # returns, for the given file, an array with the number of lines, # words and character # def word_count file lines = 0 chars = 0 words = 0 open(file) do |f| f.each do |line| lines += 1 chars += line.length words += line.split.length end end [lines, words, chars] end # retrieve command line options options = ['w', 'l', 'c'] if ARGV[0] =~ /-([lwc])([lwc])?([lwc])?/ options = [$1, $2, $3] ARGV.shift end unless ARGV[0] abort("Usage: #{$0} file1 file2 ...") end cumulate = ARGV.length > 1 # process each file and output the count total_lines = 0 total_chars = 0 total_words = 0 ARGV.each do |file| unless File.exist?(file) $stderr.puts "File not found: #{file}" next end lines, words, chars = word_count(file) print "#{'%7d' % lines}\t" if options.include?('l') print "#{'%7d' % words}\t" if options.include?('w') print "#{'%7d' % chars}\t" if options.include?('c') print "\t#{file}" if cumulate puts total_lines += lines total_chars += chars total_words += words end if cumulate print "#{'%7d' % total_lines}\t" if options.include?('l') print "#{'%7d' % total_words}\t" if options.include?('w') print "#{'%7d' % total_chars}\t" if options.include?('c') puts "total" endGNU wc has three major improvements on the naive algorithm:
1) It does its own block buffering rather than using stdio buffering, which means that characters are counted a block at a time.
2) It has multiple inner loops, deciding on the basis of the options which one to run (thus the inword logic is not executed at all if -w is not specified).
3) If -c is the only option, then it attempts to fstat() files rather than reading them, being careful to make sure the file is a regular file (not a device file) and taking into account the possibility that stdin may be a file that isn’t positioned at its beginning. This allows O(1) behavior in favorable circumstances.
It also extends classic wc semantics by being able to count bytes with -b and possibly-multibyte characters with -c, though if characters are known to be single-byte in the current encoding it will treat -c and -b the same (optimization #3 above really applies to -b). It also provides -L which returns the length of the longest line and -W to return the count of words, which are obvious and useful extensions.
# Init Some Variables
count_lines = true
count_words = true
count_chars = true
line_count = 0
word_count = 0
char_count = 0
filename = ARGV[0]
#Check Arguments
if(ARGV[0] =~ /^-.+$/)
count_lines = (ARGV[0].index(‘l’) != nil)
count_words = (ARGV[0].index(‘w’) != nil)
count_chars = (ARGV[0].index(‘c’) != nil)
filename = ARGV[1]
end
#Count with Regexp
file = File.open(filename,’r+’)
while (line = file.gets) do
line_count += 1;
word_count += line.scan(/[^ \n\t]+/).size
char_count += line.size
end
puts “Line Count = #{line_count}” unless !count_lines
puts “Word Count = #{word_count}” unless !count_words
puts “Character Count = #{char_count}” unless !count_chars
Sorry about the previous post:
# Init Some Variables count_lines = true count_words = true count_chars = true line_count = 0 word_count = 0 char_count = 0 filename = ARGV[0] #Check Arguments if(ARGV[0] =~ /^-.+$/) count_lines = (ARGV[0].index('l') != nil) count_words = (ARGV[0].index('w') != nil) count_chars = (ARGV[0].index('c') != nil) filename = ARGV[1] end #Count with Regexp file = File.open(filename,'r+') while (line = file.gets) do line_count += 1; word_count += line.scan(/[^ \n\t]+/).size char_count += line.size end puts "Line Count = #{line_count}" unless !count_lines puts "Word Count = #{word_count}" unless !count_words puts "Character Count = #{char_count}" unless !count_chars#include
#include
#include
#include
int lflag = 0;
int wflag = 0;
int cflag = 0;
char *fname = ”;
char buf[BUFSIZ];
extern int errno;
main(argc, argv)
int argc;
char *argv[];
{
int i, fd;
while (argc > 1 && argv[1][0] == ‘-‘) {
switch (argv[1][1]) {
case ‘l’ : lflag = 1;
break;
case ‘w’ : wflag = 1;
break;
case ‘c’ : cflag = 1;
break;
default : printf(“usage: \n”);
exit(1);
}
argc–;
argv++;
}
if (lflag == 0 && wflag == 0 && cflag == 0)
lflag = wflag = cflag = 1;
if (argc == 1)
wc(STDIN_FILENO);
else
for (i = 1; i < argc; i++)
if ((fd = open(argv[i], O_RDONLY)) == -1) {
printf("%s: can not open %s, errno=%d\n", argv[0], argv[i],errno);
exit(1);
}
else
{
fname = argv[i];
wc(fd);
close(fd);
}
exit(0);
}
wc(fd)
int fd;
{
int n, l = 0, w = 0, t = 0, ws = 1;
char *cp, c;
while (n = read(fd, buf, BUFSIZ)) {
t += n;
for (cp = buf; cp != (buf + n); cp++) {
c = *cp;
if (c == '\n') {
l++;
ws = 1;
}
else
if (c != ' ' && c != '\t') {
if (ws) {
ws = 0;
w++;
}
}
else
ws = 1;
}
}
if (lflag)
printf ("%d ", l);
if (wflag)
printf("%d ", w);
if (cflag)
printf("%d ", t);
if (fname)
printf("%s", fname);
printf("\n");
}
I too posted before reading the instructions. Sorry about that.
#include <stdio.h> #include <sys/fcntl.h> #include <stdlib.h> #include <unistd.h> int lflag = 0; int wflag = 0; int cflag = 0; char *fname = '\0'; char buf[BUFSIZ]; extern int errno; main(argc, argv) int argc; char *argv[]; { int i, fd; while (argc > 1 && argv[1][0] == '-') { switch (argv[1][1]) { case 'l' : lflag = 1; break; case 'w' : wflag = 1; break; case 'c' : cflag = 1; break; default : printf("usage: wc [-lwc] [name...]\n"); exit(1); } argc--; argv++; } if (lflag == 0 && wflag == 0 && cflag == 0) lflag = wflag = cflag = 1; if (argc == 1) wc(STDIN_FILENO); else for (i = 1; i < argc; i++) if ((fd = open(argv[i], O_RDONLY)) == -1) { printf("%s: can not open %s, errno=%d\n", argv[0], argv[i],errno); exit(1); } else { fname = argv[i]; wc(fd); close(fd); } exit(0); } wc(fd) int fd; { int n, l = 0, w = 0, t = 0, ws = 1; char *cp, c; while (n = read(fd, buf, BUFSIZ)) { t += n; for (cp = buf; cp != (buf + n); cp++) { c = *cp; if (c == '\n') { l++; ws = 1; } else if (c != ' ' && c != '\t') { if (ws) { ws = 0; w++; } } else ws = 1; } } if (lflag) printf ("%d ", l); if (wflag) printf("%d ", w); if (cflag) printf("%d ", t); if (fname) printf("%s", fname); printf("\n"); }[…] to the RSS feed or email list for updates on this topic.The goal for this Programming Praxis was to implement the Unix wc function. This one took me a couple days (I haven’t had a lot of time recently) to complete, but I […]
A ruby version that a) doesn’t look all that rubyish and b) doesn’t format the output nicely but does work more or less like the unix version
require 'getoptlong' opts = GetoptLong.new( ["--words", "-w", GetoptLong::NO_ARGUMENT], ["--chars", "-c", GetoptLong::NO_ARGUMENT], ["--lines", "-l", GetoptLong::NO_ARGUMENT], ["--verbose", "-v", GetoptLong::NO_ARGUMENT] ) words = false lines = false chars = false $verbose = false begin opts.each do | opt, arg| case opt when "--words" words = true when "--chars" chars = true when "--lines" lines = true when "--verbose" $verbose = true end end rescue puts "Illegal command line option." exit end accumulate = false if ARGV.length > 1 accumulate = true wcl_totals = Hash.new(0) end puts "accumulate = #{accumulate}" ARGV.each do |file_name| File.open(file_name) do | file | wcl = Hash.new(0) while line = file.gets wcl[:words] += line.split.length wcl[:chars] += line.length wcl[:lines] += 1 end if accumulate wcl_totals[:words] += wcl[:words] wcl_totals[:chars] += wcl[:chars] wcl_totals[:lines] += wcl[:lines] end puts "#{wcl[:words] if words} #{wcl[:lines] if lines} #{wcl[:chars] if chars} #{file_name}" end end puts "#{wcl_totals[:words] if words} #{wcl_totals[:lines] if lines} #{wcl_totals[:chars] if chars} Total" if accumulateruby solution that only takes a filename (http://codepad.org/CC98XPaa) returns values lines,characters,words in that order