UNB/ CS/ David Bremner/ teaching/ cs4613/ lectures/ lecture2/ json-lexer.rkt
#lang racket/base
(require brag/support)
(provide tokenize tokenize-string)

;; Too lazy to support unicode \u1234

(define-lex-abbrevs
  [un-escaped (:& graphic
                  (:~ #\" #\\))]
  [escaped (:: #\\ (:or #\" #\\ #\/ #\b #\f #\n #\r #\t))]
 )

(define (tokenize-string str)
  (let ([ip (open-input-string str)])
        (tokenize ip)))

(define (tokenize ip)
    (define my-lexer
      (lexer-src-pos
       [(:or "[" "]" "{" "}" "(" ")" ":" ",")
        (token lexeme lexeme)]
        [(:+ numeric)
         (token 'NUMBER (string->number lexeme))]
        [(:: #\" (:+ (:or #\space un-escaped escaped)) #\")
         (token 'STRING lexeme)]
        [whitespace
         (token 'WHITESPACE lexeme #:skip? #t)]
        [(eof)
         (void)]))
    (define (next-token) (my-lexer ip))
    next-token)
;;