JSON: Parsing Input

July 12, 2011

This exercise is long but straight forward. Mapping JSON into Scheme requires a few decisions: Strings are mapped to strings. Numbers are mapped to numbers, which are integers if only the integer part of the number is given but real numbers if a decimal fraction or exponent is included. Booleans are mapped to #t and #f, but there is no obvious mapping for null, so we map it to the symbol the-json-null-object. Arrays are mapped to vectors, objects are mapped to a-lists. Here is the definition of the null object:

(define json-null (string->symbol "the-json-null-object"))
(define (json-null? obj) (eqv? json-null obj))

The parser is a single large function with internal defines for the different types of JSON elements. The parser is simple: at each step only a single character of lookahead is required to decide what to do next. Eat consumes a single character, reporting an error if it is unexpected. Skip-white-space does what it says. Each of the other internal defines parses a particular element type, called by read-value.

(define (read-json . args)
  (let ((port (if (pair? args) (car args) (current-input-port))))

    (define (eat char)
      (let ((c (peek-char port)))
        (if (eof-object? c)
            (error 'read-json "eat unexpected end of file")
            (if (char=? c char) (read-char port)
              (error 'read-json "eat unexpected character")))))

    (define (skip-white-space)
      (let ((c (peek-char port)))
        (if (and (not (eof-object? c)) (char-whitespace? c))
            (begin (eat c) (skip-white-space)))))

    (define (read-member)
      (skip-white-space)
      (let ((key (read-string)))
        (skip-white-space)
        (eat #\:)
        (skip-white-space)
        (let ((val (read-value)))
          (skip-white-space)
          (cons key val))))

    (define (read-object)
      (eat #\{)
      (skip-white-space)
      (let ((c (peek-char port)))
        (if (eof-object? c)
            (error 'read-json "read-object unexpected end of file")
            (if (char=? (peek-char port) #\})
                (begin (eat #\}) (list))
                (let loop ((xs (list (read-member))))
                  (skip-white-space)
                  (let ((c (peek-char port)))
                    (cond ((char=? c #\}) (reverse xs))
                          ((char=? c #\,)
                            (eat #\,)
                            (skip-white-space)
                            (loop (cons (read-member) xs)))
                          (else (error 'read-json "read-object syntax error")))))))))

    (define (read-array)
      (eat #\[)
      (skip-white-space)
      (let ((c (peek-char port)))
        (if (eof-object? c) (error 'read-json "read-array unexpected end of file")
          (if (char=? c #\]) (begin (eat #\]) (vector))
          (let loop ((xs (list (read-value))))
            (skip-white-space)
            (let ((c (peek-char port)))
              (cond ((eof-object? c)
                      (error 'read-json "read-array unexpected end of file"))
                    ((char=? c #\]) (list->vector (reverse xs)))
                    ((char=? c #\,)
                      (eat #\,)
                      (skip-white-space)
                      (loop (cons (read-value) xs)))
                    (else (error 'read-json "read-array syntax error")))))))))

    (define (read-escape)
      (let ((c (peek-char port)))
        (cond ((eof-object? c)
                (error 'read-json "read-escape unexpected end of file"))
              ((char=? c #\)
                (eat #\)
                (let ((c (peek-char port)))
                  (cond
                    ((eof-object? c)
                      (error 'read-json "read-escape unexpected end of file"))
                    ((char=? c #\") (read-char port) #\")
                    ((char=? c #\) (read-char port) #\)
                    ((char=? c #\/) (read-char port) #\/)
                    ((char=? c #\b) (read-char port) #\backspace)
                    ((char=? c #\f) (read-char port) #\page)
                    ((char=? c #\n) (read-char port) #\newline)
                    ((char=? c #\r) (read-char port) #\return)
                    ((char=? c #\t) (read-char port) #\tab)
                    ((char=? c #\u) (error 'read-json "unicode not supported"))
                    (else (error 'read-json "unrecognized escape sequence")))))
              (else (read-char port)))))

    (define (read-string)
      (eat #\")
      (let loop ((c (peek-char port)) (cs '()))
        (cond ((eof-object? c)
                (error 'read-json "read-string unexpected end of file"))
              ((char=? c #\")
                (eat c) (skip-white-space) (list->string (reverse cs)))
              (else (let ((c (read-escape)))
                      (loop (peek-char port) (cons c cs)))))))

    (define (read-number)
      (define (char->digit c) (- (char->integer c) 48))
      (let* ((sign (let ((c (peek-char port)))
                     (cond ((eof-object? c)
                             (error 'read-json "read-number unexpected end of file"))
                           ((char=? c #\-) (eat #\-)
                             (let ((c (read-char port)))
                               (if (and (not (eof-object? c)) (char-numeric? c)) -1
                                 (error 'read-json "read-number syntax error")))) ((char-numeric? c) 1)
                           (else (error 'read-json "read-number syntax error")))))
             (numb (let loop ((c (peek-char port)) (n 0))
                     (cond ((eof-object? c) n)
                           ((char-numeric? c) (eat c)
                             (loop (peek-char port)
                                   (+ (* n 10) (char->digit c))))
                           (else n))))
             (frac (let ((c (peek-char port)))
                     (if (or (eof-object? c) (not (char=? c #\.))) 0
                       (begin (eat #\.)
                         (let ((c (peek-char port)))
                           (if (or (eof-object? c) (not (char-numeric? c)))
                               (error 'read-json "read-number syntax error")
                               (let loop ((c c) (tens 10) (n 0))
                                 (cond ((eof-object? c) n)
                                       ((char-numeric? c) (eat c)
                                         (loop (peek-char port) (* tens 10)
                                           (+ n (/ (char->digit c) tens))))
                                       (else n)))))))))
             (expo (let ((c (peek-char port)))
                     (if (or (eof-object? c) (not (char-ci=? c #\E))) 0
                       (begin (eat c)
                         (let* ((sign (let ((c (peek-char port)))
                                        (cond ((char=? c #\+) (eat c) 1)
                                              ((char=? c #\-) (eat c) -1)
                                              ((char-numeric? c) 1)
                                              (else (error 'read-json
                                                "read-number syntax error")))))
                                (numb (let loop ((c (peek-char port)) (n 0))
                                        (cond ((eof-object? c) n)
                                              ((char-numeric? c) (eat c)
                                                (loop (peek-char port)
                                                  (+ (* n 10) (char->digit c))))
                                              (else n)))))
                           (* sign numb)))))))

        (if (and (zero? frac) (zero? expo)) (* sign numb)
          (exact->inexact (* sign (+ numb frac) (expt 10 expo))))))

    (define (read-constant)
      (let loop ((c (peek-char port)) (cs '()))
        (if (and (not (eof-object? c)) (char-alphabetic? c))
            (begin (eat c) (loop (peek-char port) (cons c cs)))
            (let ((str (list->string (reverse cs))))
              (cond ((string=? str "true") (skip-white-space) #t)
                    ((string=? str "false") (skip-white-space) #f)
                    ((string=? str "null") (skip-white-space) json-null)
                    (else (error 'read-json "unrecognized constant")))))))

    (define (read-value)
      (skip-white-space)
      (let ((c (peek-char port)))
      (cond ((eof-object? c)
              (error 'read-json "read-value unexpected end of file"))
            ((char=? c #\{) (read-object))
            ((char=? c #\[) (read-array))
            ((char=? c #\") (read-string))
            ((or (char-numeric? c) (char=? c #\-)) (read-number))
            ((char-alphabetic? c) (read-constant))
            (else (error 'read-json "read-value syntax error")))))

    (if (eof-object? (peek-char port)) (read-char port) (read-value))))

It’s long, but the pieces are individually simple, with the complication coming from checking all possible error conditions.

It is convenient to provide a conversion from strings to JSON objects:

(define (string->json str)
  (with-input-from-string str (lambda () (read-json))))

You can run the program at http://programmingpraxis.codepad.org/iq1vyzGY; unfortunately, the Scheme system used at Codepad doesn’t provide string ports, so the code won’t work.

Pages: 1 2

2 Responses to “JSON: Parsing Input”

  1. dethos said

    Here is my implementation in python. It may contain some bugs i only made some tests:

    import exceptions

    class json_parser:
    def __init__(self, string):
    self.json_data=self.__remove_blanks(string)
    self.pointer=0

    def __remove_blanks(self,string):
    checker=0
    new_list=[]
    inside_string=False
    for i in list(string):
    if inside_string or i != ‘ ‘ :
    new_list.append(i)
    if i == ‘”‘:
    inside_string = not inside_string

    return “”.join(n for n in new_list)

    def __parse_obj(self):
    new_dic={}
    self.pointer+=1
    while self.json_data[self.pointer] != ‘}’:
    if self.json_data[self.pointer] == ‘”‘:
    key=self.__parse_string()
    else:
    raise Exception #The only possible type of value for a key is String

    if self.json_data[self.pointer] == ‘:’:
    self.pointer+=1
    else:
    raise Exception #invalid object

    value=self.__parse_value()
    if value == -1:
    return -1

    new_dic[key]=value
    if self.json_data[self.pointer] == ‘,’:
    self.pointer+=1

    self.pointer+=1
    return new_dic

    def __parse_array(self):
    new_array=[]
    self.pointer+=1
    while self.json_data[self.pointer] != ‘]’:
    value=self.__parse_value()
    if value == -1:
    return -1
    else:
    new_array.append(value)

    if self.json_data[self.pointer] == ‘,’:
    self.pointer+=1
    self.pointer+=1
    return new_array

    def __parse_string(self):
    self.pointer+=1
    start=self.pointer
    while self.json_data[self.pointer]!='”‘:
    self.pointer+=1;
    if self.pointer==len(self.json_data):
    raise Exception #the string isn’t closed
    self.pointer+=1
    return self.json_data[start:self.pointer-1]

    def __parse_other(self):
    if self.json_data[self.pointer:self.pointer+4]==’true’:
    self.pointer+=4
    return True

    if self.json_data[self.pointer:self.pointer+4]==’null’:
    self.pointer+=4
    return None

    if self.json_data[self.pointer:self.pointer+5]==’false’:
    self.pointer+=5
    return False

    start=self.pointer
    while (self.json_data[self.pointer].isdigit()) or (self.json_data[self.pointer] in ([‘-‘,’.’,’e’,’E’])):
    self.pointer+=1

    if ‘.’ in self.json_data[start:self.pointer]:
    return float(self.json_data[start:self.pointer])
    else:
    return int(self.json_data[start:self.pointer])

    def __parse_value(self):
    try:
    if self.json_data[self.pointer]=='{‘:
    new_value = self.__parse_obj()
    elif self.json_data[self.pointer]=='[‘:
    new_value = self.__parse_array()
    elif self.json_data[self.pointer]=='”‘:
    new_value = self.__parse_string()
    else:
    new_value = self.__parse_other()
    except Exception:
    print ‘Error:: Invalid Data Format, unknown character at position’, self.pointer
    return -1

    return new_value

    def parse(self):
    if self.json_data[self.pointer]=='{‘ or self.json_data[self.pointer]=='[‘:
    final_object = self.__parse_value()
    else:
    print ‘Error:: Invalid inicial Data Format’
    final_object=None

    return final_object

  2. […] one of their problems (an old one, from 2009) . So today i’m posting here my solution for the problem of this week. Basically they ask us to write a JSON parser in our favorite computer language, so i chose […]

Leave a comment