JSON: Parsing Input
July 12, 2011
This exercise is long but straight forward. Mapping JSON into Scheme requires a few decisions: Strings are mapped to strings. Numbers are mapped to numbers, which are integers if only the integer part of the number is given but real numbers if a decimal fraction or exponent is included. Booleans are mapped to #t
and #f
, but there is no obvious mapping for null, so we map it to the symbol the-json-null-object
. Arrays are mapped to vectors, objects are mapped to a-lists. Here is the definition of the null object:
(define json-null (string->symbol "the-json-null-object"))
(define (json-null? obj) (eqv? json-null obj))
The parser is a single large function with internal defines for the different types of JSON elements. The parser is simple: at each step only a single character of lookahead is required to decide what to do next. Eat
consumes a single character, reporting an error if it is unexpected. Skip-white-space
does what it says. Each of the other internal defines parses a particular element type, called by read-value
.
(define (read-json . args)
(let ((port (if (pair? args) (car args) (current-input-port))))
(define (eat char)
(let ((c (peek-char port)))
(if (eof-object? c)
(error 'read-json "eat unexpected end of file")
(if (char=? c char) (read-char port)
(error 'read-json "eat unexpected character")))))
(define (skip-white-space)
(let ((c (peek-char port)))
(if (and (not (eof-object? c)) (char-whitespace? c))
(begin (eat c) (skip-white-space)))))
(define (read-member)
(skip-white-space)
(let ((key (read-string)))
(skip-white-space)
(eat #\:)
(skip-white-space)
(let ((val (read-value)))
(skip-white-space)
(cons key val))))
(define (read-object)
(eat #\{)
(skip-white-space)
(let ((c (peek-char port)))
(if (eof-object? c)
(error 'read-json "read-object unexpected end of file")
(if (char=? (peek-char port) #\})
(begin (eat #\}) (list))
(let loop ((xs (list (read-member))))
(skip-white-space)
(let ((c (peek-char port)))
(cond ((char=? c #\}) (reverse xs))
((char=? c #\,)
(eat #\,)
(skip-white-space)
(loop (cons (read-member) xs)))
(else (error 'read-json "read-object syntax error")))))))))
(define (read-array)
(eat #\[)
(skip-white-space)
(let ((c (peek-char port)))
(if (eof-object? c) (error 'read-json "read-array unexpected end of file")
(if (char=? c #\]) (begin (eat #\]) (vector))
(let loop ((xs (list (read-value))))
(skip-white-space)
(let ((c (peek-char port)))
(cond ((eof-object? c)
(error 'read-json "read-array unexpected end of file"))
((char=? c #\]) (list->vector (reverse xs)))
((char=? c #\,)
(eat #\,)
(skip-white-space)
(loop (cons (read-value) xs)))
(else (error 'read-json "read-array syntax error")))))))))
(define (read-escape)
(let ((c (peek-char port)))
(cond ((eof-object? c)
(error 'read-json "read-escape unexpected end of file"))
((char=? c #\)
(eat #\)
(let ((c (peek-char port)))
(cond
((eof-object? c)
(error 'read-json "read-escape unexpected end of file"))
((char=? c #\") (read-char port) #\")
((char=? c #\) (read-char port) #\)
((char=? c #\/) (read-char port) #\/)
((char=? c #\b) (read-char port) #\backspace)
((char=? c #\f) (read-char port) #\page)
((char=? c #\n) (read-char port) #\newline)
((char=? c #\r) (read-char port) #\return)
((char=? c #\t) (read-char port) #\tab)
((char=? c #\u) (error 'read-json "unicode not supported"))
(else (error 'read-json "unrecognized escape sequence")))))
(else (read-char port)))))
(define (read-string)
(eat #\")
(let loop ((c (peek-char port)) (cs '()))
(cond ((eof-object? c)
(error 'read-json "read-string unexpected end of file"))
((char=? c #\")
(eat c) (skip-white-space) (list->string (reverse cs)))
(else (let ((c (read-escape)))
(loop (peek-char port) (cons c cs)))))))
(define (read-number)
(define (char->digit c) (- (char->integer c) 48))
(let* ((sign (let ((c (peek-char port)))
(cond ((eof-object? c)
(error 'read-json "read-number unexpected end of file"))
((char=? c #\-) (eat #\-)
(let ((c (read-char port)))
(if (and (not (eof-object? c)) (char-numeric? c)) -1
(error 'read-json "read-number syntax error")))) ((char-numeric? c) 1)
(else (error 'read-json "read-number syntax error")))))
(numb (let loop ((c (peek-char port)) (n 0))
(cond ((eof-object? c) n)
((char-numeric? c) (eat c)
(loop (peek-char port)
(+ (* n 10) (char->digit c))))
(else n))))
(frac (let ((c (peek-char port)))
(if (or (eof-object? c) (not (char=? c #\.))) 0
(begin (eat #\.)
(let ((c (peek-char port)))
(if (or (eof-object? c) (not (char-numeric? c)))
(error 'read-json "read-number syntax error")
(let loop ((c c) (tens 10) (n 0))
(cond ((eof-object? c) n)
((char-numeric? c) (eat c)
(loop (peek-char port) (* tens 10)
(+ n (/ (char->digit c) tens))))
(else n)))))))))
(expo (let ((c (peek-char port)))
(if (or (eof-object? c) (not (char-ci=? c #\E))) 0
(begin (eat c)
(let* ((sign (let ((c (peek-char port)))
(cond ((char=? c #\+) (eat c) 1)
((char=? c #\-) (eat c) -1)
((char-numeric? c) 1)
(else (error 'read-json
"read-number syntax error")))))
(numb (let loop ((c (peek-char port)) (n 0))
(cond ((eof-object? c) n)
((char-numeric? c) (eat c)
(loop (peek-char port)
(+ (* n 10) (char->digit c))))
(else n)))))
(* sign numb)))))))
(if (and (zero? frac) (zero? expo)) (* sign numb)
(exact->inexact (* sign (+ numb frac) (expt 10 expo))))))
(define (read-constant)
(let loop ((c (peek-char port)) (cs '()))
(if (and (not (eof-object? c)) (char-alphabetic? c))
(begin (eat c) (loop (peek-char port) (cons c cs)))
(let ((str (list->string (reverse cs))))
(cond ((string=? str "true") (skip-white-space) #t)
((string=? str "false") (skip-white-space) #f)
((string=? str "null") (skip-white-space) json-null)
(else (error 'read-json "unrecognized constant")))))))
(define (read-value)
(skip-white-space)
(let ((c (peek-char port)))
(cond ((eof-object? c)
(error 'read-json "read-value unexpected end of file"))
((char=? c #\{) (read-object))
((char=? c #\[) (read-array))
((char=? c #\") (read-string))
((or (char-numeric? c) (char=? c #\-)) (read-number))
((char-alphabetic? c) (read-constant))
(else (error 'read-json "read-value syntax error")))))
(if (eof-object? (peek-char port)) (read-char port) (read-value))))
It’s long, but the pieces are individually simple, with the complication coming from checking all possible error conditions.
It is convenient to provide a conversion from strings to JSON objects:
(define (string->json str)
(with-input-from-string str (lambda () (read-json))))
You can run the program at http://programmingpraxis.codepad.org/iq1vyzGY; unfortunately, the Scheme system used at Codepad doesn’t provide string ports, so the code won’t work.
Here is my implementation in python. It may contain some bugs i only made some tests:
import exceptions
class json_parser:
def __init__(self, string):
self.json_data=self.__remove_blanks(string)
self.pointer=0
def __remove_blanks(self,string):
checker=0
new_list=[]
inside_string=False
for i in list(string):
if inside_string or i != ‘ ‘ :
new_list.append(i)
if i == ‘”‘:
inside_string = not inside_string
return “”.join(n for n in new_list)
def __parse_obj(self):
new_dic={}
self.pointer+=1
while self.json_data[self.pointer] != ‘}’:
if self.json_data[self.pointer] == ‘”‘:
key=self.__parse_string()
else:
raise Exception #The only possible type of value for a key is String
if self.json_data[self.pointer] == ‘:’:
self.pointer+=1
else:
raise Exception #invalid object
value=self.__parse_value()
if value == -1:
return -1
new_dic[key]=value
if self.json_data[self.pointer] == ‘,’:
self.pointer+=1
self.pointer+=1
return new_dic
def __parse_array(self):
new_array=[]
self.pointer+=1
while self.json_data[self.pointer] != ‘]’:
value=self.__parse_value()
if value == -1:
return -1
else:
new_array.append(value)
if self.json_data[self.pointer] == ‘,’:
self.pointer+=1
self.pointer+=1
return new_array
def __parse_string(self):
self.pointer+=1
start=self.pointer
while self.json_data[self.pointer]!='”‘:
self.pointer+=1;
if self.pointer==len(self.json_data):
raise Exception #the string isn’t closed
self.pointer+=1
return self.json_data[start:self.pointer-1]
def __parse_other(self):
if self.json_data[self.pointer:self.pointer+4]==’true’:
self.pointer+=4
return True
if self.json_data[self.pointer:self.pointer+4]==’null’:
self.pointer+=4
return None
if self.json_data[self.pointer:self.pointer+5]==’false’:
self.pointer+=5
return False
start=self.pointer
while (self.json_data[self.pointer].isdigit()) or (self.json_data[self.pointer] in ([‘-‘,’.’,’e’,’E’])):
self.pointer+=1
if ‘.’ in self.json_data[start:self.pointer]:
return float(self.json_data[start:self.pointer])
else:
return int(self.json_data[start:self.pointer])
def __parse_value(self):
try:
if self.json_data[self.pointer]=='{‘:
new_value = self.__parse_obj()
elif self.json_data[self.pointer]=='[‘:
new_value = self.__parse_array()
elif self.json_data[self.pointer]=='”‘:
new_value = self.__parse_string()
else:
new_value = self.__parse_other()
except Exception:
print ‘Error:: Invalid Data Format, unknown character at position’, self.pointer
return -1
return new_value
def parse(self):
if self.json_data[self.pointer]=='{‘ or self.json_data[self.pointer]=='[‘:
final_object = self.__parse_value()
else:
print ‘Error:: Invalid inicial Data Format’
final_object=None
return final_object
[…] one of their problems (an old one, from 2009) . So today i’m posting here my solution for the problem of this week. Basically they ask us to write a JSON parser in our favorite computer language, so i chose […]