Brainfuck
October 4, 2011
Our brainfuck interpreter “compiles” a brainfuck program by stripping comment characters and storing the program in a vector:
(define (compile prog)
(list->vector
(filter (lambda (c) (member c (string->list "><+-.,[]")))
(string->list prog))))
The interpreter keeps two global vectors: iv is the compiled instruction vector, and dv is the data store. The ip instruction pointer and dp data pointer are handled in the controlling loop that runs through the instructions, decoding each one and performing the appropriate action as long as the instruction pointer doesn’t run off the end of the instruction vector:
(define (brainfuck prog)
(let* ((iv (compile prog))
(len (vector-length iv))
(dv (make-vector 30000 0)))
(let loop ((ip 0) (dp 0))
(when (< ip len)
(case (vector-ref iv ip)
((#\>) (loop (+ ip 1) (+ dp 1)))
((#\<) (loop (+ ip 1) (- dp 1)))
((#\+) (vector-set! dv dp (+ (vector-ref dv dp) 1))
(loop (+ ip 1) dp))
((#\-) (vector-set! dv dp (- (vector-ref dv dp) 1))
(loop (+ ip 1) dp))
((#\.) (display (integer->char (vector-ref dv dp)))
(loop (+ ip 1) dp))
((#\,) (vector-set! dv dp (char->integer (read-char)))
(loop (+ ip 1) dp))
((#\[) (if (zero? (vector-ref dv dp))
(loop (+ (forward-match iv ip) 1) dp)
(loop (+ ip 1) dp)))
((#\]) (if (zero? (vector-ref dv dp))
(loop (+ ip 1) dp)
(loop (+ (backward-match iv ip) 1) dp))))))))
Two auxiliary functions find matching brackets:
(define (forward-match iv ip)
(let loop ((ip (+ ip 1)) (n 0))
(cond ((char=? (vector-ref iv ip) #\[)
(loop (+ ip 1) (+ n 1)))
((char=? (vector-ref iv ip) #\])
(if (zero? n) ip
(loop (+ ip 1) (- n 1))))
(else (loop (+ ip 1) n)))))
(define (backward-match iv ip)
(let loop ((ip (- ip 1)) (n 0))
(cond ((char=? (vector-ref iv ip) #\])
(loop (- ip 1) (+ n 1)))
((char=? (vector-ref iv ip) #\[)
(if (zero? n) ip
(loop (- ip 1) (- n 1))))
(else (loop (- ip 1) n)))))
You can run the program at
http://programmingpraxis.codepad.org/aetegm3n
.
It looks like that the above brainfuck code is slightly wrong. It should be:
++++++++++[>+++++++>++++++++++>+++>+<<<++.>+.+++++++..+++.>++.<.+++.——.——–.>+.>.
You replaced <<<< by >>. The correct code is on page 2.
My last post does not print well. For the correct code see http://en.wikipedia.org/wiki/Brainfuck
I think this works.
This was my first prolog program…
% Everyone a depth further! inc_dist([],[]). inc_dist([N|XS],[M|RS]) :- M is N+1, inc_dist(XS,RS). dec_dist([],[]). dec_dist([N|XS],[M|RS]) :- M is N-1, dec_dist(XS,RS). %% First, BF without loops, input, outputs. % No more command bf_exec([], L,H,R,_S,_T) :- reverse(L,M), append(M,[H|[]],X), append(X,R,Res), print(Res). %% Move to the right % Create a new cell with a 0 in it bf_exec([right|N], L, H, [],S,T) :- inc_dist(T,T2), bf_exec(N,[H|L],0,[],[right|S],T2). bf_exec([right|N], L, H, [X|XS],S,T) :- inc_dist(T,T2), bf_exec(N,[H|L],X,XS,[right|S],T2). % Move to the left % Create a new cell with a 0 in it bf_exec([left|N], [], H, R, S, T) :- inc_dist(T,T2), bf_exec(N,[],0,[H|R],[left|S],T2). bf_exec([left|N], [X|XS], H, R, S, T) :- inc_dist(T,T2), bf_exec(N,XS,X,[H|R],[left|S],T2). % Increment bf_exec([incr|N], L, H, R,S,T) :- inc_dist(T,T2), M is H+1, bf_exec(N,L,M,R,[incr|S],T2). % Decrement bf_exec([decr|N], L, H, R,S,T) :- inc_dist(T,T2), M is H-1, bf_exec(N,L,M,R,[decr|S],T2). %% Now with I/O % Input bf_exec([input|N], L, _H, R, S, T) :- inc_dist(T,T2), get_char(O), char_code(O,C), bf_exec(N, L, C, R,[input|S],T2). % Output bf_exec([output|N], L, H, R, S, T) :- inc_dist(T,T2), char_code(O,H), print(O), bf_exec(N, L, H, R,[output|S],T2). %% And now loops % Push a new loop handler bf_exec([startloop|N], L, H, R,S,T) :- inc_dist(T,T2), bf_exec(N, L, H, R,[startloop|S],[1|T2]). % End loop % Pop off the handler bf_exec([endloop|N], L, 0, R,S,[Top|Rest]) :- inc_dist([Top|Rest],[_Tap|NewRest]), bf_exec(N, L, 0, R, [endloop|S],NewRest). % Rewind, restart bf_exec([endloop|N], L, H, R,S,T) :- bf_exec([rewind,endloop|N],L,H,R,S,T). % Reach loopstart. % Restart, No need to reset the handler. bf_exec([rewind|CL], L, H, R,[startloop|CS],[1|Rest]) :- bf_exec(CL,L,H,R,[startloop|CS],[1|Rest]). % Move back! bf_exec([rewind|CL], L, H, R,[X|XS],T) :- dec_dist(T,T2), bf_exec([rewind,X|CL],L,H,R,XS,T2). bf_eval(X) :- bf_exec(X,[],0,[],[],[]), !. % Hello World % bf_eval([incr,incr,incr,incr,incr,incr,incr,incr,incr,incr,startloop,right,incr,incr,incr,incr,incr,incr,incr,right,incr,incr,incr,incr,incr,incr,incr,incr,incr,incr,right,incr,incr,incr,right,incr,left,left,left,left,decr,endloop,right,incr,incr,output,right,incr,output,incr,incr,incr,incr,incr,incr,incr,output,output,incr,incr,incr,output,right,incr,incr,output,left,left,incr,incr,incr,incr,incr,incr,incr,incr,incr,incr,incr,incr,incr,incr,incr,output,right,output,incr,incr,incr,output,decr,decr,decr,decr,decr,decr,output,decr,decr,decr,decr,decr,decr,decr,decr,output,right,incr,output,right,output]).This works on the Hello World! program.
def parse(txt): data = [0] * 30000 ptr = 0 pos = 0 output = [] while pos < len(txt): c = txt[pos] if c == '<': ptr -= 1 elif c == '>': ptr += 1 elif c == '+': data[ptr] += 1 elif c == '-': data[ptr] -= 1 elif c == '.': output.append(chr(data[ptr])) elif c == ',': try: data[ptr] = ord(txt[pos+1]) pos += 1 except IndexError: data[ptr] = '\0' elif c == "[" and data[ptr] == 0: pos = txt.find("]", pos) elif c == "]": pos = txt.rfind("[", 0, pos) - 1 pos += 1 print("".join(output)) parse("++++++++++[>+++++++>++++++++++>+++>+<<<<-]>++.>+.+++++++..+++.>++.<<+++++++++++++++.>.+++.------.--------.>+.>.")Paul: Fixed. Thanks.
My earlier code did not work for nested loops. This code is slow, but it works. It is better to compile the function for speed.
NUMBER_CELLS = 30000 class BrainFuckException(Exception): pass def parse(txt, afile=None): data = [0] * NUMBER_CELLS ptr = 0 pos = 0 output = [] while pos < len(txt): c = txt[pos] if c == '<': ptr -= 1 elif c == '>': ptr += 1 elif c == '+': data[ptr] += 1 elif c == '-': data[ptr] -= 1 elif c == '.': output.append(chr(data[ptr])) elif c == ',': data[ptr] = ord(afile.read(1) or '\0') elif c == "[" and data[ptr] == 0: openbracket = 1 while pos < len(txt) - 1: pos += 1 openbracket += txt[pos] == '[' openbracket -= txt[pos] == ']' if openbracket == 0: break else: raise BrainFuckException("unmatched close brace") elif c == "]": closebracket = 1 while pos > 1: pos -= 1 closebracket += txt[pos] == ']' closebracket -= txt[pos] == '[' if closebracket == 0: break else: raise BrainFuckException("unmatched open brace") pos -= 1 pos += 1 assert 0 <= ptr < NUMBER_CELLS, "ptr=%d" % (ptr) print("".join(output)) parse("++++++++++[>+++++++>++++++++++>+++>+<<<<-]>++.>+.+++++++..+++.>++.<<+++++++++++++++.>.+++.------.--------.>+.>.")[...] today’s Programming Praxis exercise, our goal is to implement a Brainfuck interpreter. Let’s get [...]
My Haskell solution (see http://bonsaicode.wordpress.com/2011/10/04/programming-praxis-brainfuck/ for a version with comments):
import Data.List.Zipper run :: String -> IO String run = fmap toList . flip step (fromList $ replicate 30000 '\NUL') . fromList step :: Zipper Char -> Zipper Char -> IO (Zipper Char) step prog s = if endp prog then return s else uncurry step =<< instruction (cursor prog) prog s instruction :: Char -> Zipper Char -> Zipper Char -> IO (Zipper Char, Zipper Char) instruction '<' prog s = return (right prog, left s) instruction '>' prog s = return (right prog, right s) instruction '+' prog s = return (right prog, replace (succ $ cursor s) s) instruction '-' prog s = return (right prog, replace (pred $ cursor s) s) instruction '.' prog s = putStr [cursor s] >> return (right prog, s) instruction ',' prog s = fmap ((,) (right prog) . flip replace s) getChar instruction '[' prog s = return $ (if cursor s == '\NUL' then right $ move right '[' ']' prog else right prog, s) instruction ']' prog s = return (move left ']' '[' prog, s) instruction _ prog s = return (right prog, s) move :: (Zipper Char -> Zipper Char) -> Char -> Char -> Zipper Char -> Zipper Char move dir open close = f 0 . dir where f 0 z | cursor z == close = z f n z = f (if cursor z == open then n + 1 else if cursor z == close then n - 1 else n) $ dir zHere’s my python 3 version. It is essentially the same as other’s above.
A minor optimization is to implement looping using a loop stack. When ‘[' is encountered, ip (the instruction pointer)
is pushed on the loop stack (ls)(at this point ip points to the instruction after the '['). When a ']‘ is encounterd,
d[dp] is tested. If d[dp] is non-zero a jump to the top of the loop is done by loading ip from the top of the loop stack.
If d[dp] is zero, the loop stack is popped and execution continues after the ‘]’.
import sys def bf(source): d = [0]*30000 dp = 0 ip = 0 ls = [] while 0 <= ip < len(source): inst = source[ip] ip += 1 if inst in '<>': dp += 1 if inst == '>' else -1 elif inst in '+-': d[dp] += 1 if inst == '+' else -1 elif inst == '.': sys.stdout.write(chr(d[dp])) elif inst == ',': d[dp] = ord(sys.stdin.read(1)) elif inst == '[': ls.append(ip) if not d[dp]: nest = 0 while source[ip] != ']' or nest: if source[ip] == '[': nest += 1 elif source[ip] == ']': nest -= 1 ip += 1 elif inst == ']': if d[dp]: ip = ls[-1] else: ls.pop()Just because, here is a bf-to-python cross-compiler.
It optimizes runs of ‘+’, ‘-’, ” to single statements. E.g., ‘+++’ is translated to ‘d[dp] += 3′.
import re import sys def bf2py(source): d = [0]*30000 dp = 0 ip = 0 indent = 0 fmt = "{0:>{1}}{2}{3}".format src = [] code = re.sub(r"[^][<>.,+-]", '', source) for tok in re.findall(r"(\++|-+|<+|>+|,+|[].[])", code): if tok[0] == '>': src.append(fmt('', indent, "dp +=", len(tok))) elif tok[0] == '<': src.append(fmt('', indent, "dp -=", len(tok))) elif tok[0] == '+': src.append(fmt('', indent, "d[dp] +=", len(tok))) elif tok[0] == '-': src.append(fmt('', indent, "d[dp] -=", len(tok))) elif tok[0] == '.': src.append(fmt('', indent, "sys.stdout.write(chr(d[dp]))", ' ' )) elif tok[0] == ',': src.append(fmt('', indent, "d[dp] = ord(sys.stdin.read(1))", ' ')) elif tok[0] == '[': src.append(fmt('', indent, "while d[dp]:", ' ')) indent += 2 elif tok[0] == ']': indent -= 2 exec('\n'.join(src))Well, a loop is just a subroutine, so getting in/out of it should be handled by the evaluator.
Parse the string into a tree, then eval the tree. Loops are then just subtrees that need to be evaluated before we can continue.
(define (parse str) (let ((len (string-length str)) (i 0)) (define (aux) (if (= i len) '() (case (string-ref str i) ((#\[ ) (incr i) (let ((res (aux))) (cons res (aux)))) ((#\]) (incr i) '()) (else (let ((c (string-ref str i))) (incr i) (if (member c (string->list "><+-.,[]")) (cons c (aux)) (aux))))))) (aux))) (define (bf-eval str) (let ((*tape* (make-table test: =)) (*tc* 0) (l (parse str))) (define (aux l) (when (not (null? l)) (case (car l) ((#\>) (incr *tc*) (aux (cdr l))) ((#\<) (decr *tc*) (aux (cdr l))) ((#\+) (table-set! *tape* *tc* (1+ (table-ref *tape* *tc* 0))) (aux (cdr l))) ((#\-) (table-set! *tape* *tc* (1- (table-ref *tape* *tc* 0))) (aux (cdr l))) ((#\.) (display (integer->char (table-ref *tape* *tc* 0))) (aux (cdr l))) ((#\,) (table-set! *tape* *tc* (char->integer (string-ref (read-line) 0))) (aux (cdr l))) (else ;; it's a loop, with more body (if (zero? (table-ref *tape* *tc* 0)) ;; next (aux (cdr l)) ;; body, and then self again (begin (aux (car l)) (aux l))))))) (pp l) (aux l) ;(table->list *tape*) ))My slightly larger than necessary version. Note that I got the idea of using a loop stack from Mike, my first idea was similar to Paul Hofstra’s first answer, and wouldn’t have worked properly.
func parse_bf(raw_instr []byte) { re, err := regexp.Compile(`[^<>\+\-\,\.\[\]]`) if err != nil { fmt.Println("couldn't compile regexp:", err) } instr := re.ReplaceAll(raw_instr, []byte("")) dp := 0 ip := 0 var loopstack vector.IntVector data := make([]byte, 30000) in_out := make([]byte, 1) for ip < len(instr) { switch instr[ip] { case '+': if dp >= 0 && dp < 30000 { data[dp]++ } else { fmt.Println("Error: data pointer outside memory:", dp) return } case '-': if dp >= 0 && dp < 30000 { data[dp]-- } else { fmt.Println("Error: data pointer outside memory:", dp) return } case '>': dp++ case '<': dp-- case '[': loopstack.Push(ip) if data[dp] == 0 { nestLevel := 1 for nestLevel > 0 { ip++ if instr[ip] == '[' {nestLevel++} if instr[ip] == ']' {nestLevel--} } ip-- // ensure the loop gets popped } case ']': if (data[dp] > 0) { ip = loopstack.Last() } else { if len(loopstack) > 0 { loopstack.Pop() } else { fmt.Println("Error: unmatched close loop statement") return; } } case '.': in_out[0] = data[dp] _, err := os.Stdout.Write(in_out) if err != nil {fmt.Println("Error: can't write do stdout: ", err); return} case ',': _, err := os.Stdin.Read(in_out) if err != nil {fmt.Println("Error: can't read from stdin: ", err); return} data[dp] = in_out[0] } ip++ } }(* Brainfuck “compiler” in OCaml *)
let src = open_in Sys.argv.(1)
let right (f, i) = f, i + 1
let left (f, i) = f, i – 1
let set f i x j = if j = i then x else f j
let inc (f, i) = set f i (f i + 1), i
let dec (f, i) = set f i (f i – 1), i
let inp (f, i) = set f i (try input_byte stdin with End_of_file -> 0), i
let cur (f, i) = f i
let out s = output_byte stdout (cur s); s
let rec loop p s = if cur s 0 then loop p (p s) else s
let id x = x
let (>>) f g x = g (f x)
let rec compile () =
let emit q = q >> compile () in
try match (input_char src) with
| ‘>’ -> emit right
| ‘ emit left
| ‘+’ -> emit inc
| ‘-’ -> emit dec
| ‘.’ -> emit out
| ‘,’ -> emit inp
| ‘[' -> emit (loop (compile ()))
| ']‘ -> id
| _ -> compile ()
with End_of_file -> id
let const c x = c
let _ = (compile ()) (const 0, 0)
In ruby (I believe it handles everything) …
def bf_parse(program) data = Array.new(30000, 0) data_pointer = 0 program_pointer = 0 program_array = program.split(//) output = "" while program_pointer < program.size do c = program_array[program_pointer] case c when '<' # < move the data pointer one cell left data_pointer -= 1 when '>' # > move the data pointer one cell right data_pointer += 1 when '+' # + increment the cell at the current data pointer data[data_pointer] += 1 when '-' # - decrement the cell at the current data pointer data[data_pointer] -= 1 when '.' # . output the cell at the current data pointer as a character output << data[data_pointer] when ',' # , input the next character to the cell at the current data pointer c = STDIN.getc data[data_pointer] = c.ord if c != nil when '[' # [ if the cell at the current data pointer is zero, move the instruction pointer to the matching ] if data[data_pointer] == 0 then bracket_count = 1 while program_pointer < program.size do program_pointer +=1 case program_array[program_pointer] when '[' bracket_count += 1 when ']' bracket_count -= 1 end break if bracket_count == 0 end end when ']' # ] move the instruction pointer to the matching [ bracket_count = -1 while program_pointer < program.size do program_pointer -=1 case program_array[program_pointer] when '[' bracket_count += 1 when ']' bracket_count -= 1 end break if bracket_count == 0 end program_pointer -=1 else puts "Illegal BrainFuck character in program #{c}" end program_pointer += 1 end output end