Cut
August 17, 2010
We begin with a function to expand ranges, which is reminiscent of a previous exercise:
(define (expand-ranges str)
(define (make-range str)
(let ((endpoints (map string->number (string-split #\- str))))
(if (null? (cdr endpoints))
(list (car endpoints))
(range (car endpoints) (+ (cadr endpoints) 1)))))
(apply append (map make-range (string-split #\, str))))
Cut operates in two modes. In character mode, it writes the characters corresponding to the expanded range (remember that character positions are counted from one, not zero), followed by a newline. Field mode is harder, because first the fields must be split on the delimiter, then the delimiter must be inserted between fields (but not at the end of the line):
(define (write-chars cs str)
(do ((cs cs (cdr cs))) ((null? cs) (newline))
(display (string-ref str (- (car cs) 1)))))
(define (write-fields fs str delim)
(let ((fields (string-split delim str)))
(do ((fs fs (cdr fs))) ((null? fs))
(display (list-ref fields (- (car fs) 1)))
(display (if (pair? (cdr fs)) delim #\newline)))))
Do-file handles a single file, regardless of character mode or field mode, leaving the task of setting the current input port to the caller. The two legs of the if each handle one mode, using a do loop to process each line individually:
(define (do-file opts)
(if (assoc #\c opts)
(let ((cs (expand-ranges (cdr (assoc #\c opts)))))
(do ((line (read-line) (read-line)))
((eof-object? line))
(write-chars cs line)))
(let ((fs (expand-ranges (cdr (assoc #\f opts))))
(delim (string-ref (cdr (assoc #\d opts)) 0)))
(do ((line (read-line) (read-line)))
((eof-object? line))
(write-fields fs line delim)))))
All that’s left is the main program, which extracts parameters from the command line, then calls do-file to handle the current input port if there are no files on the command line, or processes the files individually in a do loop if one or more files are named on the command line:
(let-values (((opts files) (getopt "c:d:f:"
"usage: cut -clist [file ...] or cut -flist [-dchar] [file ...]"
(cdr (command-line)))))
(if (null? files) (do-file opts)
(do ((files files (cdr files))) ((null? files))
(with-input-from-file (car files) (lambda () (do-file opts))))))
Note that command-line is specific to Chez Scheme, and must change for other Scheme implementations. We used range, read-line, and string-split from the Standard Prelude, and getopt from an earlier exercise. You can see the program assembled at http://programmingpraxis.codepad.org/U3Z6l5bV.
This was pretty fun to write.
#!/usr/bin/env python from optparse import OptionParser from sys import stdout, stdin, exit def parse_list(_list): ranges = [i for i in _list.split(",") if "-" in i] _list = [int(i) for i in _list.split(",") if not "-" in i] for i in range(0, len(ranges)): ranges[i] = ranges[i].split("-") for j in range(int(ranges[i][0]), int(ranges[i][1]) + 1): if not j in _list: _list.append(j) return [i for i in set(_list)] def parse_options(): parser = OptionParser( usage="usage: %prog OPTION... [FILE]...", version="%prog 0.1" ) parser.add_option( "-c", "--characters", dest="character_list", help="select only these characters", metavar="LIST", default=None ) parser.add_option( "-d", "--delimiter", dest="delimiter", help="use CHARACTER instead of TAB as a filed delimiter", metavar="CHARACTER", default="\t" ) parser.add_option( "-f", "--fields", dest="field_list", help="select only these fields", metavar="LIST", default=None ) options, args = parser.parse_args() if options.character_list and options.field_list: parser.error("options -c and -f are mutually exclusive.") if not options.character_list and not options.field_list: parser.error("you must specify a list of characters or fields.") try: if args and open(args[0], "r"): pass except IOError: parser.error("file '%s' does not exist." % args[0]) return options, args def main(): options, args = parse_options() if args: _input = open(args[0], "r").readlines() else: _input = stdin.readlines() if options.character_list: _list = parse_list(options.character_list) for line in _input: for i in _list: stdout.write(line[i - 1]) stdout.write("\n") else: _list = parse_list(options.field_list) for line in _input: line = line.split(options.delimiter) for i in _list: stdout.write(line[i - 1]) if i != _list[-1]: stdout.write(options.delimiter) stdout.write("\n") if __name__ == "__main__": main()I couldn’t help but have a try at it in Elisp. Of course I think it’s useless, but there it is:
;;; cut.el --- Dimitri Fontaine ;; ;; https://programmingpraxis.com/2010/08/17/cut/ ;; (eval-when-compile (require 'cl)) (defun dim:cut (mode list &optional delimiter) "Implement Unix cut in Emacs Lisp. For the fun of it." (unless (member mode '(char field)) (error "Cut operates in `char' or `field' mode only.")) (let* ((output (get-buffer-create "*cut*")) (ranges (mapcar (lambda (x) ;; split ranges, 1-4,5,6-8 (if (string-match "-" x) (mapcar 'string-to-int (split-string x "-")) (list (string-to-int x) (string-to-int x)))) (split-string list ","))) (content (mapcar (lambda (line) (if (eq mode 'char) line (split-string line (or delimiter "\t")))) (split-string (buffer-substring-no-properties (point-min) (point-max)) "\n")))) (with-current-buffer output (erase-buffer) (insert (loop for line in content concat (concat (loop for (b e) in ranges concat (concat (if (eq mode 'char) (if (> (length line) e) (substring line (- b 1) (- e 1)) (when (> (length line) b) (substring line (- b 1)))) ;; field based cutting (loop for i from b to e concat (nth (- i 1) line))) (when (eq mode 'field) (or delimiter "\t")))) "\n")))) (set-window-buffer (selected-window) output))) (defun cut (ranges &optional delimiter) "Interactive caller for dim:cut" (interactive (list (read-string "ranges: ") (unless current-prefix-arg (read-char "delimiter: ")))) (dim:cut (if current-prefix-arg 'char 'field) ranges (unless current-prefix-arg (char-to-string delimiter))))Here it is in ruby …
require 'getoptlong' def parse_list(list) print_list = [] split_list = list.split(',') split_list.each do |element| if element =~ /-/ first_last = element.split('-') first_last[0].to_i.upto(first_last[1].to_i) do |f| print_list << f end else print_list << element.to_i end end print_list end def parse_line_fields(line, print_list, separator) all_fields = line.split(separator) print_list.each_with_index do |f, i| print "#{separator if i>0}#{all_fields[f-1]}" if f-1 <= all_fields.size end puts end def parse_line_columns(line, print_list) print_list.each do |f| print "#{line[f-1]}" if f-1 <= line.size end puts end # Set up the command line options opts = GetoptLong.new( ["--field", "-f", GetoptLong::REQUIRED_ARGUMENT], ["--separator", "-d", GetoptLong::REQUIRED_ARGUMENT], ["--column", "-c", GetoptLong::REQUIRED_ARGUMENT], ["--verbose", "-v", GetoptLong::NO_ARGUMENT] ) # Set the default values for the options field = false separator = "\t" column = false list = "" $verbose = false # Parse the command line options. If we find one we don't recognize # an exception will be thrown and we'll rescue with a message. begin opts.each do | opt, arg| case opt when "--field" field = true list = arg when "--separator" separator = arg when "--column" column = true list = arg when "--verbose" $verbose = true end end rescue puts "Illegal command line option." exit end print_list = parse_list(list) ARGV.each do |file_name| File.open(file_name) do | file | while line = file.gets if field parse_line_fields(line, print_list, separator) else parse_line_columns(line, print_list) end end end endThis one prints the fields/columns in the order that they are given in in the command line unlike the Unix cut. I like it better this way but if you don’t then just sort the print_list on the return from parse_list(). The only other oddity is printing the separator in the field. Basically, if we decide to print a field and we’re past the first element, we’ll print the separator before the field. This makes it so we don’t have to have a separator hanging out there after the last field is printed.
Here is my complete implementation in c
http://codepad.org/QZL317EK