571 lines
18 KiB
Scheme
571 lines
18 KiB
Scheme
;;; Copyright 2017 Cisco Systems, Inc.
|
|
;;;
|
|
;;; Licensed under the Apache License, Version 2.0 (the "License");
|
|
;;; you may not use this file except in compliance with the License.
|
|
;;; You may obtain a copy of the License at
|
|
;;;
|
|
;;; http://www.apache.org/licenses/LICENSE-2.0
|
|
;;;
|
|
;;; Unless required by applicable law or agreed to in writing, software
|
|
;;; distributed under the License is distributed on an "AS IS" BASIS,
|
|
;;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
;;; See the License for the specific language governing permissions and
|
|
;;; limitations under the License.
|
|
|
|
;;; This file contains a sample parser defined via the ez-grammar system
|
|
;;; and a simple test of the parser.
|
|
|
|
;;; This file is organized as follows:
|
|
;;;
|
|
;;; - (streams) library providing the required exports for ez-grammar and
|
|
;;; the parser.
|
|
;;;
|
|
;;; - (state-case) library exporting the state-case macro, copped from
|
|
;;; cmacros.ss, for use by the lexer.
|
|
;;;
|
|
;;; - (lexer) library providing a simple lexer that reads characters
|
|
;;; from a port and produces a corresponding stream of tokens.
|
|
;;;
|
|
;;; - (parser) library providing the sample parser.
|
|
;;;
|
|
;;; - ez-grammar-test procedure that tests the sample parser.
|
|
;;;
|
|
;;; Instructions for running the test are at the end of this file.
|
|
|
|
(library (streams)
|
|
(export stream-cons stream-car stream-cdr stream-nil stream-null?
|
|
stream-map stream stream-append2 stream-append-all stream-last-forced)
|
|
(import (chezscheme))
|
|
|
|
(define stream-cons
|
|
(lambda (x thunk)
|
|
(cons x thunk)))
|
|
|
|
(define stream-car
|
|
(lambda (x)
|
|
(car x)))
|
|
|
|
(define stream-cdr
|
|
(lambda (x)
|
|
(when (procedure? (cdr x)) (set-cdr! x ((cdr x))))
|
|
(cdr x)))
|
|
|
|
(define stream-nil '())
|
|
|
|
(define stream-null?
|
|
(lambda (x)
|
|
(null? x)))
|
|
|
|
(define stream-map
|
|
(lambda (f x)
|
|
(if (stream-null? x)
|
|
'()
|
|
(stream-cons (f (stream-car x))
|
|
(lambda ()
|
|
(stream-map f (stream-cdr x)))))))
|
|
|
|
(define stream
|
|
(lambda xs
|
|
xs))
|
|
|
|
(define stream-append2
|
|
(lambda (xs thunk)
|
|
(if (null? xs)
|
|
(thunk)
|
|
(stream-cons (stream-car xs)
|
|
(lambda ()
|
|
(stream-append2 (stream-cdr xs) thunk))))))
|
|
|
|
(define stream-append-all
|
|
(lambda (stream$) ;; stream of streams
|
|
(if (stream-null? stream$)
|
|
stream$
|
|
(stream-append2 (stream-car stream$)
|
|
(lambda () (stream-append-all (stream-cdr stream$)))))))
|
|
|
|
(define stream-last-forced
|
|
(lambda (x)
|
|
(and (not (null? x))
|
|
(let loop ([x x])
|
|
(let ([next (cdr x)])
|
|
(if (pair? next)
|
|
(loop next)
|
|
(car x)))))))
|
|
)
|
|
|
|
(library (state-case)
|
|
(export state-case eof)
|
|
(import (chezscheme))
|
|
|
|
;;; from Chez Scheme Version 9.5.1 cmacros.ss
|
|
(define-syntax state-case
|
|
(lambda (x)
|
|
(define state-case-test
|
|
(lambda (cvar k)
|
|
(with-syntax ((cvar cvar))
|
|
(syntax-case k (-)
|
|
(char
|
|
(char? (datum char))
|
|
#'(char=? cvar char))
|
|
((char1 - char2)
|
|
(and (char? (datum char1)) (char? (datum char2)))
|
|
#'(char<=? char1 cvar char2))
|
|
(predicate
|
|
(identifier? #'predicate)
|
|
#'(predicate cvar))))))
|
|
(define state-case-help
|
|
(lambda (cvar clauses)
|
|
(syntax-case clauses (else)
|
|
(((else exp1 exp2 ...))
|
|
#'(begin exp1 exp2 ...))
|
|
((((k ...) exp1 exp2 ...) . more)
|
|
(with-syntax (((test ...)
|
|
(map (lambda (k) (state-case-test cvar k))
|
|
#'(k ...)))
|
|
(rest (state-case-help cvar #'more)))
|
|
#'(if (or test ...) (begin exp1 exp2 ...) rest)))
|
|
(((k exp1 exp2 ...) . more)
|
|
(with-syntax ((test (state-case-test cvar #'k))
|
|
(rest (state-case-help cvar #'more)))
|
|
#'(if test (begin exp1 exp2 ...) rest))))))
|
|
(syntax-case x (eof)
|
|
((_ cvar (eof exp1 exp2 ...) more ...)
|
|
(identifier? #'cvar)
|
|
(with-syntax ((rest (state-case-help #'cvar #'(more ...))))
|
|
#'(if (eof-object? cvar)
|
|
(begin exp1 exp2 ...)
|
|
rest))))))
|
|
|
|
(define-syntax eof
|
|
(lambda (x)
|
|
(syntax-error x "misplaced aux keyword")))
|
|
)
|
|
|
|
(library (lexer)
|
|
(export token? token-type token-value token-bfp token-efp lexer)
|
|
(import (chezscheme) (state-case) (streams))
|
|
|
|
(define-record-type token
|
|
(nongenerative)
|
|
(fields type value bfp efp))
|
|
|
|
;; test lexer
|
|
(define lexer
|
|
(lambda (fn ip)
|
|
(define $prev-pos 0)
|
|
(define $pos 0)
|
|
(define ($get-char)
|
|
(set! $pos (+ $pos 1))
|
|
(get-char ip))
|
|
(define ($unread-char c)
|
|
(set! $pos (- $pos 1))
|
|
(unread-char c ip))
|
|
(define ($ws!) (set! $prev-pos $pos))
|
|
(define ($make-token type value)
|
|
(let ([tok (make-token type value $prev-pos $pos)])
|
|
(set! $prev-pos $pos)
|
|
tok))
|
|
(define ($lex-error c)
|
|
(errorf #f "unexpected ~a at character ~s of ~a"
|
|
(if (eof-object? c)
|
|
"eof"
|
|
(format "character '~c'" c))
|
|
$pos fn))
|
|
(define-syntax lex-error
|
|
(syntax-rules ()
|
|
[(_ ?c)
|
|
(let ([c ?c])
|
|
($lex-error c)
|
|
(void))]))
|
|
(let-values ([(sp get-buf) (open-string-output-port)])
|
|
(define (return-token type value)
|
|
(stream-cons ($make-token type value) lex))
|
|
(module (identifier-initial? identifier-subsequent?)
|
|
(define identifier-initial?
|
|
(lambda (c)
|
|
(char-alphabetic? c)))
|
|
(define identifier-subsequent?
|
|
(lambda (c)
|
|
(or (char-alphabetic? c)
|
|
(char-numeric? c)))))
|
|
(define-syntax define-state-case
|
|
(syntax-rules ()
|
|
[(_ ?def-id ?char-id clause ...)
|
|
(define (?def-id)
|
|
(let ([?char-id ($get-char)])
|
|
(state-case ?char-id clause ...)))]))
|
|
(define-state-case lex c
|
|
[eof stream-nil]
|
|
[char-whitespace? ($ws!) (lex)]
|
|
[char-numeric? (lex-number c)]
|
|
[#\/ (seen-slash)]
|
|
[identifier-initial? (put-char sp c) (lex-identifier)]
|
|
[#\( (return-token 'lparen #\()]
|
|
[#\) (return-token 'rparen #\))]
|
|
[#\! (return-token 'bang #\!)]
|
|
[#\+ (seen-plus)]
|
|
[#\- (seen-minus)]
|
|
[#\= (seen-equals)]
|
|
[#\* (return-token 'binop '*)]
|
|
[#\, (return-token 'sep #\,)]
|
|
[#\; (return-token 'sep #\;)]
|
|
[else (lex-error c)])
|
|
(module (lex-identifier)
|
|
(define (id) (return-token 'id (string->symbol (get-buf))))
|
|
(define-state-case next c
|
|
[eof (id)]
|
|
[identifier-subsequent? (put-char sp c) (next)]
|
|
[else ($unread-char c) (id)])
|
|
(define (lex-identifier) (next)))
|
|
(define-state-case seen-plus c
|
|
[eof (return-token 'binop '+)]
|
|
[char-numeric? (lex-signed-number #\+ c)]
|
|
[else (return-token 'binop '+)])
|
|
(define-state-case seen-minus c
|
|
[eof (return-token 'binop '-)]
|
|
[char-numeric? (lex-signed-number #\- c)]
|
|
[else (return-token 'binop '-)])
|
|
(define-state-case seen-equals c
|
|
[eof (return-token 'binop '=)]
|
|
[#\> (return-token 'big-arrow #f)]
|
|
[else (return-token 'binop '=)])
|
|
(module (lex-number lex-signed-number)
|
|
(define (finish-number)
|
|
(let ([str (get-buf)])
|
|
(let ([n (string->number str 10)])
|
|
(unless n (errorf 'lexer "unexpected number literal ~a" str))
|
|
(return-token 'integer n))))
|
|
(define (num)
|
|
(let ([c ($get-char)])
|
|
(state-case c
|
|
[eof (finish-number)]
|
|
[char-numeric? (put-char sp c) (num)]
|
|
[else ($unread-char c) (finish-number)])))
|
|
(define (lex-signed-number s c)
|
|
(put-char sp s)
|
|
(lex-number c))
|
|
(define (lex-number c)
|
|
(state-case c
|
|
[eof (assert #f)]
|
|
[char-numeric? (put-char sp c) (num)]
|
|
[else (assert #f)])))
|
|
(define-state-case seen-slash c
|
|
[eof (return-token 'binop '/)]
|
|
[#\* (lex-block-comment)]
|
|
[#\/ (lex-comment)]
|
|
[else (return-token 'binop '/)])
|
|
(define-state-case lex-comment c
|
|
[eof (lex)]
|
|
[#\newline ($ws!) (lex)]
|
|
[else (lex-comment)])
|
|
(define (lex-block-comment)
|
|
(define-state-case maybe-end-comment c
|
|
[eof (lex-error c)]
|
|
[#\/ ($ws!) (lex)]
|
|
[else (lex-block-comment)])
|
|
(let ([c ($get-char)])
|
|
(state-case c
|
|
[eof (lex-error c)]
|
|
[#\* (maybe-end-comment)]
|
|
[else (lex-block-comment)])))
|
|
(lex))))
|
|
|
|
(record-writer (record-type-descriptor token)
|
|
(lambda (x p wr)
|
|
(put-char p #\[)
|
|
(wr (token-type x) p)
|
|
(put-char p #\,)
|
|
(put-char p #\space)
|
|
(wr (token-value x) p)
|
|
(put-char p #\])
|
|
(put-char p #\:)
|
|
(wr (token-bfp x) p)
|
|
(put-char p #\-)
|
|
(wr (token-efp x) p)))
|
|
)
|
|
|
|
(module parser ()
|
|
(export parse *sfd*)
|
|
(import (chezscheme) (streams) (lexer))
|
|
(define *sfd*)
|
|
(module (define-grammar is sat parse-consumed-all? parse-result-value grammar-trace make-src)
|
|
(define (sep->parser sep)
|
|
(cond
|
|
[(char? sep) (sat (lambda (x) (and (eq? (token-type x) 'sep) (eq? (token-value x) sep))))]
|
|
[(symbol? sep) (sat (lambda (x) (eq? (token-type x) sep)))]
|
|
[else (errorf "don't know how to parse separator: ~s" sep)]))
|
|
(meta define (constant? x) (let ([x (syntax->datum x)]) (or (string? x) (char? x))))
|
|
(define constant->parser
|
|
(lambda (const)
|
|
(define (token-sat type val)
|
|
(sat (lambda (x)
|
|
(let ([ans (and (token? x) (eqv? (token-type x) type) (eqv? (token-value x) val))])
|
|
(when (grammar-trace) (printf " ~s is [~s, ~a]? => ~s~%" x type val ans))
|
|
ans))))
|
|
(if (string? const)
|
|
(case const
|
|
[else (token-sat 'id (string->symbol const))])
|
|
(case const
|
|
[#\( (token-sat 'lparen const)]
|
|
[#\) (token-sat 'rparen const)]
|
|
[#\! (token-sat 'bang const)]
|
|
[else (errorf 'constant->parser "don't know how to construct a parser for ~a" const)]))))
|
|
(meta define (constant->markdown k)
|
|
(format "~a" k))
|
|
(define binop->parser
|
|
(lambda (binop)
|
|
(define (binop-sat type val)
|
|
(is val
|
|
(where [x <- item] (and (token? x) (eq? (token-type x) type) (eq? (token-value x) val)))))
|
|
(define (unexpected) (errorf 'binop->parser "don't know how to construct a parser for ~a" binop))
|
|
(if (string? binop)
|
|
(binop-sat 'binop
|
|
(case binop
|
|
["=" '=]
|
|
["+" '+]
|
|
["-" '-]
|
|
["*" '*]
|
|
["/" '/]
|
|
[else (unexpected)]))
|
|
(unexpected))))
|
|
(define make-src
|
|
(lambda (bfp efp)
|
|
(make-source-object *sfd* bfp efp)))
|
|
(include "ez-grammar.ss"))
|
|
|
|
(define token
|
|
(case-lambda
|
|
[(type)
|
|
(is (token-value x)
|
|
(where
|
|
[x <- (sat (lambda (x)
|
|
(let ([ans (eq? (token-type x) type)])
|
|
(when (grammar-trace) (printf " ~s is ~s? => ~s~%" x type ans))
|
|
ans)))]))]
|
|
[(type val)
|
|
(is (token-value x)
|
|
(where
|
|
[x <- (sat (lambda (x)
|
|
(let ([ans (and
|
|
(eq? (token-type x) type)
|
|
(eqv? (token-value x) val))])
|
|
(when (grammar-trace) (printf " ~s is [~s, ~s]? => ~s~%" x type val ans))
|
|
ans)))]))]))
|
|
|
|
(define identifier (token 'id))
|
|
|
|
(define integer (token 'integer))
|
|
|
|
(define-grammar expr (markdown-directory ".")
|
|
(TERMINALS
|
|
(identifier (x y) (DESCRIPTION ("An identifier is ...")))
|
|
(integer (i) (DESCRIPTION ("An integer literal is ..."))))
|
|
(expr (e)
|
|
(BINOP src ((RIGHT "=") (LEFT "+" "-") (LEFT "*" "/")) t) =>
|
|
(lambda (src op x y)
|
|
(make-annotation `(,op ,x ,y) src `(,op ,(annotation-stripped x) ,(annotation-stripped y)))))
|
|
(term (t)
|
|
[test-SEP+ :: src "sepplus" #\( (SEP+ e #\;) #\) =>
|
|
(lambda (src e+)
|
|
(make-annotation `(SEP+ ,@e+) src `(SEP+ ,@(map annotation-stripped e+))))]
|
|
[test-SEP* :: src "sepstar" #\( (SEP* e #\,) #\) =>
|
|
(lambda (src e*)
|
|
(make-annotation `(SEP* ,@e*) src `(SEP* ,@(map annotation-stripped e*))))]
|
|
[test-OPT :: src "opt" #\( (OPT e #f) #\) =>
|
|
(lambda (src maybe-e)
|
|
(if maybe-e
|
|
(make-annotation `(OPT ,maybe-e) src `(OPT ,(annotation-stripped maybe-e)))
|
|
(make-annotation `(OPT) src `(OPT))))]
|
|
[test-K+ :: src "kplus" #\( (K+ e) #\) =>
|
|
(lambda (src e+)
|
|
(make-annotation `(K+ ,@e+) src `(K+ ,@(map annotation-stripped e+))))]
|
|
[test-K* :: src "kstar" #\( (K* e) #\) =>
|
|
(lambda (src e*)
|
|
(make-annotation `(K* ,@e*) src `(K* ,@(map annotation-stripped e*))))]
|
|
[varref :: src x =>
|
|
(lambda (src id)
|
|
(make-annotation `(id ,id) src `(id ,id)))]
|
|
[intref :: src i =>
|
|
(lambda (src n)
|
|
(make-annotation `(int ,n) src `(int ,n)))]
|
|
[group :: src #\( e #\) =>
|
|
(lambda (src e)
|
|
`(group ,src ,e))]))
|
|
|
|
(define parse
|
|
(lambda (fn ip)
|
|
(let ([token-stream (lexer fn ip)])
|
|
(define (oops)
|
|
(let ([last-token (stream-last-forced token-stream)])
|
|
(if last-token
|
|
(errorf 'parse "parse error at or before character ~s of ~a" (token-bfp last-token) fn)
|
|
(errorf 'parse "no expressions found in ~a" fn))))
|
|
;;; return the first result, if any, for which the input stream was entirely consumed.
|
|
(let loop ([res* (expr token-stream)])
|
|
(if (null? res*)
|
|
(oops)
|
|
(let ([res (car res*)])
|
|
(if (parse-consumed-all? res)
|
|
(parse-result-value res)
|
|
(loop (cdr res*))))))))))
|
|
|
|
(define run
|
|
(lambda (fn)
|
|
(import parser)
|
|
(let* ([ip (open-file-input-port fn)]
|
|
[sfd (make-source-file-descriptor fn ip #t)]
|
|
[ip (transcoded-port ip (native-transcoder))])
|
|
(fluid-let ([*sfd* sfd])
|
|
(eval
|
|
`(let ()
|
|
(define-syntax define-ops
|
|
(lambda (x)
|
|
(syntax-case x ()
|
|
[(_ op ...)
|
|
#`(begin
|
|
(define-syntax op
|
|
(lambda (x)
|
|
(let ([src (annotation-source (syntax->annotation x))])
|
|
(with-syntax ([bfp (source-object-bfp src)] [efp (source-object-efp src)])
|
|
(syntax-case x ()
|
|
[(_ e (... ...)) #'`(op (bfp . efp) ,e (... ...))])))))
|
|
...)])))
|
|
(define-ops SEP+ SEP* OPT K+ K* id int group)
|
|
(define-ops = + - * /)
|
|
(define x 'x)
|
|
(define y 'y)
|
|
(define z 'z)
|
|
,(dynamic-wind
|
|
void
|
|
(lambda () (parse fn ip))
|
|
(lambda () (close-input-port ip)))))))))
|
|
|
|
(define (ez-grammar-test)
|
|
(define n 0)
|
|
(define test
|
|
(lambda (line* okay?)
|
|
(set! n (+ n 1))
|
|
(let ([fn (format "testfile~s" n)])
|
|
(with-output-to-file fn
|
|
(lambda () (for-each (lambda (line) (printf "~a\n" line)) line*))
|
|
'replace)
|
|
(let ([result (parameterize ([compile-profile #t] [compile-interpret-simple #f])
|
|
(guard (c [else c]) (run fn)))])
|
|
(guard (c [else #f]) (profile-dump-html))
|
|
(delete-file fn)
|
|
(delete-file "profile.html")
|
|
(delete-file (format "~a.html" fn))
|
|
(unless (okay? result)
|
|
(printf "test ~s failed\n" n)
|
|
(printf " test code:")
|
|
(for-each (lambda (line) (printf " ~a\n" line)) line*)
|
|
(printf " result:\n ")
|
|
(if (condition? result)
|
|
(begin (display-condition result) (newline))
|
|
(parameterize ([pretty-initial-indent 4])
|
|
(pretty-print result)))
|
|
(newline))))))
|
|
|
|
(define-syntax returns
|
|
(syntax-rules ()
|
|
[(_ k) (lambda (x) (equal? x 'k))]))
|
|
|
|
(define-syntax oops
|
|
(syntax-rules ()
|
|
[(_ (c) e1 e2 ...)
|
|
(lambda (c) (and (condition? c) e1 e2 ...))]))
|
|
|
|
(test
|
|
'(
|
|
"1347"
|
|
)
|
|
(returns
|
|
(int (0 . 4) 1347)))
|
|
|
|
(test
|
|
'(
|
|
"3 /*"
|
|
)
|
|
(oops (c)
|
|
(equal? (condition-message c) "unexpected ~a at character ~s of ~a")
|
|
(equal? (condition-irritants c) '("eof" 6 "testfile2"))))
|
|
|
|
(test
|
|
'(
|
|
"3 / 4 + 5 opt(6)"
|
|
)
|
|
(oops (c)
|
|
(equal? (condition-message c) "parse error at or before character ~s of ~a")
|
|
(equal? (condition-irritants c) '(10 "testfile3"))))
|
|
|
|
(test
|
|
'(
|
|
"x = y = 5"
|
|
)
|
|
(returns
|
|
(=
|
|
(0 . 9)
|
|
(id (0 . 1) x)
|
|
(= (4 . 9) (id (4 . 5) y) (int (8 . 9) 5)))))
|
|
|
|
(test
|
|
'(
|
|
"x = y = x + 5 - z * 7 + 8 / z"
|
|
)
|
|
(returns
|
|
(=
|
|
(0 . 29)
|
|
(id (0 . 1) x)
|
|
(=
|
|
(4 . 29)
|
|
(id (4 . 5) y)
|
|
(+
|
|
(8 . 29)
|
|
(-
|
|
(8 . 21)
|
|
(+ (8 . 13) (id (8 . 9) x) (int (12 . 13) 5))
|
|
(* (16 . 21) (id (16 . 17) z) (int (20 . 21) 7)))
|
|
(/ (24 . 29) (int (24 . 25) 8) (id (28 . 29) z)))))))
|
|
|
|
(test
|
|
'(
|
|
"opt(opt(opt()))"
|
|
)
|
|
(returns
|
|
(OPT (0 . 15) (OPT (4 . 14) (OPT (8 . 13))))))
|
|
|
|
(test
|
|
'(
|
|
"kstar(3 4 kplus(1 2 3 kstar()))"
|
|
)
|
|
(returns
|
|
(K* (0 . 31)
|
|
(int (6 . 7) 3)
|
|
(int (8 . 9) 4)
|
|
(K+ (10 . 30)
|
|
(int (16 . 17) 1)
|
|
(int (18 . 19) 2)
|
|
(int (20 . 21) 3)
|
|
(K* (22 . 29))))))
|
|
|
|
(test
|
|
'(
|
|
"sepplus( opt() ; opt(5) ; sepstar(17, 34) ; sepstar())"
|
|
)
|
|
(returns
|
|
(SEP+ (0 . 54)
|
|
(OPT (9 . 14))
|
|
(OPT (17 . 23) (int (21 . 22) 5))
|
|
(SEP* (26 . 41) (int (34 . 36) 17) (int (38 . 40) 34))
|
|
(SEP* (44 . 53)))))
|
|
|
|
(delete-file "expr.md")
|
|
(printf "~s tests ran\n" n)
|
|
)
|
|
|
|
#!eof
|
|
|
|
The following should print only "<n> tests ran".
|
|
|
|
echo '(ez-grammar-test)' | ../bin/scheme -q ez-grammar-test.ss
|