You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
This repo is archived. You can view files and clone it, but cannot push or open issues/pull-requests.

571 lines
18 KiB
Scheme

;;; Copyright 2017 Cisco Systems, Inc.
;;;
;;; Licensed under the Apache License, Version 2.0 (the "License");
;;; you may not use this file except in compliance with the License.
;;; You may obtain a copy of the License at
;;;
;;; http://www.apache.org/licenses/LICENSE-2.0
;;;
;;; Unless required by applicable law or agreed to in writing, software
;;; distributed under the License is distributed on an "AS IS" BASIS,
;;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
;;; See the License for the specific language governing permissions and
;;; limitations under the License.
;;; This file contains a sample parser defined via the ez-grammar system
;;; and a simple test of the parser.
;;; This file is organized as follows:
;;;
;;; - (streams) library providing the required exports for ez-grammar and
;;; the parser.
;;;
;;; - (state-case) library exporting the state-case macro, copped from
;;; cmacros.ss, for use by the lexer.
;;;
;;; - (lexer) library providing a simple lexer that reads characters
;;; from a port and produces a corresponding stream of tokens.
;;;
;;; - (parser) library providing the sample parser.
;;;
;;; - ez-grammar-test procedure that tests the sample parser.
;;;
;;; Instructions for running the test are at the end of this file.
(library (streams)
(export stream-cons stream-car stream-cdr stream-nil stream-null?
stream-map stream stream-append2 stream-append-all stream-last-forced)
(import (chezscheme))
(define stream-cons
(lambda (x thunk)
(cons x thunk)))
(define stream-car
(lambda (x)
(car x)))
(define stream-cdr
(lambda (x)
(when (procedure? (cdr x)) (set-cdr! x ((cdr x))))
(cdr x)))
(define stream-nil '())
(define stream-null?
(lambda (x)
(null? x)))
(define stream-map
(lambda (f x)
(if (stream-null? x)
'()
(stream-cons (f (stream-car x))
(lambda ()
(stream-map f (stream-cdr x)))))))
(define stream
(lambda xs
xs))
(define stream-append2
(lambda (xs thunk)
(if (null? xs)
(thunk)
(stream-cons (stream-car xs)
(lambda ()
(stream-append2 (stream-cdr xs) thunk))))))
(define stream-append-all
(lambda (stream$) ;; stream of streams
(if (stream-null? stream$)
stream$
(stream-append2 (stream-car stream$)
(lambda () (stream-append-all (stream-cdr stream$)))))))
(define stream-last-forced
(lambda (x)
(and (not (null? x))
(let loop ([x x])
(let ([next (cdr x)])
(if (pair? next)
(loop next)
(car x)))))))
)
(library (state-case)
(export state-case eof)
(import (chezscheme))
;;; from Chez Scheme Version 9.5.1 cmacros.ss
(define-syntax state-case
(lambda (x)
(define state-case-test
(lambda (cvar k)
(with-syntax ((cvar cvar))
(syntax-case k (-)
(char
(char? (datum char))
#'(char=? cvar char))
((char1 - char2)
(and (char? (datum char1)) (char? (datum char2)))
#'(char<=? char1 cvar char2))
(predicate
(identifier? #'predicate)
#'(predicate cvar))))))
(define state-case-help
(lambda (cvar clauses)
(syntax-case clauses (else)
(((else exp1 exp2 ...))
#'(begin exp1 exp2 ...))
((((k ...) exp1 exp2 ...) . more)
(with-syntax (((test ...)
(map (lambda (k) (state-case-test cvar k))
#'(k ...)))
(rest (state-case-help cvar #'more)))
#'(if (or test ...) (begin exp1 exp2 ...) rest)))
(((k exp1 exp2 ...) . more)
(with-syntax ((test (state-case-test cvar #'k))
(rest (state-case-help cvar #'more)))
#'(if test (begin exp1 exp2 ...) rest))))))
(syntax-case x (eof)
((_ cvar (eof exp1 exp2 ...) more ...)
(identifier? #'cvar)
(with-syntax ((rest (state-case-help #'cvar #'(more ...))))
#'(if (eof-object? cvar)
(begin exp1 exp2 ...)
rest))))))
(define-syntax eof
(lambda (x)
(syntax-error x "misplaced aux keyword")))
)
(library (lexer)
(export token? token-type token-value token-bfp token-efp lexer)
(import (chezscheme) (state-case) (streams))
(define-record-type token
(nongenerative)
(fields type value bfp efp))
;; test lexer
(define lexer
(lambda (fn ip)
(define $prev-pos 0)
(define $pos 0)
(define ($get-char)
(set! $pos (+ $pos 1))
(get-char ip))
(define ($unread-char c)
(set! $pos (- $pos 1))
(unread-char c ip))
(define ($ws!) (set! $prev-pos $pos))
(define ($make-token type value)
(let ([tok (make-token type value $prev-pos $pos)])
(set! $prev-pos $pos)
tok))
(define ($lex-error c)
(errorf #f "unexpected ~a at character ~s of ~a"
(if (eof-object? c)
"eof"
(format "character '~c'" c))
$pos fn))
(define-syntax lex-error
(syntax-rules ()
[(_ ?c)
(let ([c ?c])
($lex-error c)
(void))]))
(let-values ([(sp get-buf) (open-string-output-port)])
(define (return-token type value)
(stream-cons ($make-token type value) lex))
(module (identifier-initial? identifier-subsequent?)
(define identifier-initial?
(lambda (c)
(char-alphabetic? c)))
(define identifier-subsequent?
(lambda (c)
(or (char-alphabetic? c)
(char-numeric? c)))))
(define-syntax define-state-case
(syntax-rules ()
[(_ ?def-id ?char-id clause ...)
(define (?def-id)
(let ([?char-id ($get-char)])
(state-case ?char-id clause ...)))]))
(define-state-case lex c
[eof stream-nil]
[char-whitespace? ($ws!) (lex)]
[char-numeric? (lex-number c)]
[#\/ (seen-slash)]
[identifier-initial? (put-char sp c) (lex-identifier)]
[#\( (return-token 'lparen #\()]
[#\) (return-token 'rparen #\))]
[#\! (return-token 'bang #\!)]
[#\+ (seen-plus)]
[#\- (seen-minus)]
[#\= (seen-equals)]
[#\* (return-token 'binop '*)]
[#\, (return-token 'sep #\,)]
[#\; (return-token 'sep #\;)]
[else (lex-error c)])
(module (lex-identifier)
(define (id) (return-token 'id (string->symbol (get-buf))))
(define-state-case next c
[eof (id)]
[identifier-subsequent? (put-char sp c) (next)]
[else ($unread-char c) (id)])
(define (lex-identifier) (next)))
(define-state-case seen-plus c
[eof (return-token 'binop '+)]
[char-numeric? (lex-signed-number #\+ c)]
[else (return-token 'binop '+)])
(define-state-case seen-minus c
[eof (return-token 'binop '-)]
[char-numeric? (lex-signed-number #\- c)]
[else (return-token 'binop '-)])
(define-state-case seen-equals c
[eof (return-token 'binop '=)]
[#\> (return-token 'big-arrow #f)]
[else (return-token 'binop '=)])
(module (lex-number lex-signed-number)
(define (finish-number)
(let ([str (get-buf)])
(let ([n (string->number str 10)])
(unless n (errorf 'lexer "unexpected number literal ~a" str))
(return-token 'integer n))))
(define (num)
(let ([c ($get-char)])
(state-case c
[eof (finish-number)]
[char-numeric? (put-char sp c) (num)]
[else ($unread-char c) (finish-number)])))
(define (lex-signed-number s c)
(put-char sp s)
(lex-number c))
(define (lex-number c)
(state-case c
[eof (assert #f)]
[char-numeric? (put-char sp c) (num)]
[else (assert #f)])))
(define-state-case seen-slash c
[eof (return-token 'binop '/)]
[#\* (lex-block-comment)]
[#\/ (lex-comment)]
[else (return-token 'binop '/)])
(define-state-case lex-comment c
[eof (lex)]
[#\newline ($ws!) (lex)]
[else (lex-comment)])
(define (lex-block-comment)
(define-state-case maybe-end-comment c
[eof (lex-error c)]
[#\/ ($ws!) (lex)]
[else (lex-block-comment)])
(let ([c ($get-char)])
(state-case c
[eof (lex-error c)]
[#\* (maybe-end-comment)]
[else (lex-block-comment)])))
(lex))))
(record-writer (record-type-descriptor token)
(lambda (x p wr)
(put-char p #\[)
(wr (token-type x) p)
(put-char p #\,)
(put-char p #\space)
(wr (token-value x) p)
(put-char p #\])
(put-char p #\:)
(wr (token-bfp x) p)
(put-char p #\-)
(wr (token-efp x) p)))
)
(module parser ()
(export parse *sfd*)
(import (chezscheme) (streams) (lexer))
(define *sfd*)
(module (define-grammar is sat parse-consumed-all? parse-result-value grammar-trace make-src)
(define (sep->parser sep)
(cond
[(char? sep) (sat (lambda (x) (and (eq? (token-type x) 'sep) (eq? (token-value x) sep))))]
[(symbol? sep) (sat (lambda (x) (eq? (token-type x) sep)))]
[else (errorf "don't know how to parse separator: ~s" sep)]))
(meta define (constant? x) (let ([x (syntax->datum x)]) (or (string? x) (char? x))))
(define constant->parser
(lambda (const)
(define (token-sat type val)
(sat (lambda (x)
(let ([ans (and (token? x) (eqv? (token-type x) type) (eqv? (token-value x) val))])
(when (grammar-trace) (printf " ~s is [~s, ~a]? => ~s~%" x type val ans))
ans))))
(if (string? const)
(case const
[else (token-sat 'id (string->symbol const))])
(case const
[#\( (token-sat 'lparen const)]
[#\) (token-sat 'rparen const)]
[#\! (token-sat 'bang const)]
[else (errorf 'constant->parser "don't know how to construct a parser for ~a" const)]))))
(meta define (constant->markdown k)
(format "~a" k))
(define binop->parser
(lambda (binop)
(define (binop-sat type val)
(is val
(where [x <- item] (and (token? x) (eq? (token-type x) type) (eq? (token-value x) val)))))
(define (unexpected) (errorf 'binop->parser "don't know how to construct a parser for ~a" binop))
(if (string? binop)
(binop-sat 'binop
(case binop
["=" '=]
["+" '+]
["-" '-]
["*" '*]
["/" '/]
[else (unexpected)]))
(unexpected))))
(define make-src
(lambda (bfp efp)
(make-source-object *sfd* bfp efp)))
(include "ez-grammar.ss"))
(define token
(case-lambda
[(type)
(is (token-value x)
(where
[x <- (sat (lambda (x)
(let ([ans (eq? (token-type x) type)])
(when (grammar-trace) (printf " ~s is ~s? => ~s~%" x type ans))
ans)))]))]
[(type val)
(is (token-value x)
(where
[x <- (sat (lambda (x)
(let ([ans (and
(eq? (token-type x) type)
(eqv? (token-value x) val))])
(when (grammar-trace) (printf " ~s is [~s, ~s]? => ~s~%" x type val ans))
ans)))]))]))
(define identifier (token 'id))
(define integer (token 'integer))
(define-grammar expr (markdown-directory ".")
(TERMINALS
(identifier (x y) (DESCRIPTION ("An identifier is ...")))
(integer (i) (DESCRIPTION ("An integer literal is ..."))))
(expr (e)
(BINOP src ((RIGHT "=") (LEFT "+" "-") (LEFT "*" "/")) t) =>
(lambda (src op x y)
(make-annotation `(,op ,x ,y) src `(,op ,(annotation-stripped x) ,(annotation-stripped y)))))
(term (t)
[test-SEP+ :: src "sepplus" #\( (SEP+ e #\;) #\) =>
(lambda (src e+)
(make-annotation `(SEP+ ,@e+) src `(SEP+ ,@(map annotation-stripped e+))))]
[test-SEP* :: src "sepstar" #\( (SEP* e #\,) #\) =>
(lambda (src e*)
(make-annotation `(SEP* ,@e*) src `(SEP* ,@(map annotation-stripped e*))))]
[test-OPT :: src "opt" #\( (OPT e #f) #\) =>
(lambda (src maybe-e)
(if maybe-e
(make-annotation `(OPT ,maybe-e) src `(OPT ,(annotation-stripped maybe-e)))
(make-annotation `(OPT) src `(OPT))))]
[test-K+ :: src "kplus" #\( (K+ e) #\) =>
(lambda (src e+)
(make-annotation `(K+ ,@e+) src `(K+ ,@(map annotation-stripped e+))))]
[test-K* :: src "kstar" #\( (K* e) #\) =>
(lambda (src e*)
(make-annotation `(K* ,@e*) src `(K* ,@(map annotation-stripped e*))))]
[varref :: src x =>
(lambda (src id)
(make-annotation `(id ,id) src `(id ,id)))]
[intref :: src i =>
(lambda (src n)
(make-annotation `(int ,n) src `(int ,n)))]
[group :: src #\( e #\) =>
(lambda (src e)
`(group ,src ,e))]))
(define parse
(lambda (fn ip)
(let ([token-stream (lexer fn ip)])
(define (oops)
(let ([last-token (stream-last-forced token-stream)])
(if last-token
(errorf 'parse "parse error at or before character ~s of ~a" (token-bfp last-token) fn)
(errorf 'parse "no expressions found in ~a" fn))))
;;; return the first result, if any, for which the input stream was entirely consumed.
(let loop ([res* (expr token-stream)])
(if (null? res*)
(oops)
(let ([res (car res*)])
(if (parse-consumed-all? res)
(parse-result-value res)
(loop (cdr res*))))))))))
(define run
(lambda (fn)
(import parser)
(let* ([ip (open-file-input-port fn)]
[sfd (make-source-file-descriptor fn ip #t)]
[ip (transcoded-port ip (native-transcoder))])
(fluid-let ([*sfd* sfd])
(eval
`(let ()
(define-syntax define-ops
(lambda (x)
(syntax-case x ()
[(_ op ...)
#`(begin
(define-syntax op
(lambda (x)
(let ([src (annotation-source (syntax->annotation x))])
(with-syntax ([bfp (source-object-bfp src)] [efp (source-object-efp src)])
(syntax-case x ()
[(_ e (... ...)) #'`(op (bfp . efp) ,e (... ...))])))))
...)])))
(define-ops SEP+ SEP* OPT K+ K* id int group)
(define-ops = + - * /)
(define x 'x)
(define y 'y)
(define z 'z)
,(dynamic-wind
void
(lambda () (parse fn ip))
(lambda () (close-input-port ip)))))))))
(define (ez-grammar-test)
(define n 0)
(define test
(lambda (line* okay?)
(set! n (+ n 1))
(let ([fn (format "testfile~s" n)])
(with-output-to-file fn
(lambda () (for-each (lambda (line) (printf "~a\n" line)) line*))
'replace)
(let ([result (parameterize ([compile-profile #t] [compile-interpret-simple #f])
(guard (c [else c]) (run fn)))])
(guard (c [else #f]) (profile-dump-html))
(delete-file fn)
(delete-file "profile.html")
(delete-file (format "~a.html" fn))
(unless (okay? result)
(printf "test ~s failed\n" n)
(printf " test code:")
(for-each (lambda (line) (printf " ~a\n" line)) line*)
(printf " result:\n ")
(if (condition? result)
(begin (display-condition result) (newline))
(parameterize ([pretty-initial-indent 4])
(pretty-print result)))
(newline))))))
(define-syntax returns
(syntax-rules ()
[(_ k) (lambda (x) (equal? x 'k))]))
(define-syntax oops
(syntax-rules ()
[(_ (c) e1 e2 ...)
(lambda (c) (and (condition? c) e1 e2 ...))]))
(test
'(
"1347"
)
(returns
(int (0 . 4) 1347)))
(test
'(
"3 /*"
)
(oops (c)
(equal? (condition-message c) "unexpected ~a at character ~s of ~a")
(equal? (condition-irritants c) '("eof" 6 "testfile2"))))
(test
'(
"3 / 4 + 5 opt(6)"
)
(oops (c)
(equal? (condition-message c) "parse error at or before character ~s of ~a")
(equal? (condition-irritants c) '(10 "testfile3"))))
(test
'(
"x = y = 5"
)
(returns
(=
(0 . 9)
(id (0 . 1) x)
(= (4 . 9) (id (4 . 5) y) (int (8 . 9) 5)))))
(test
'(
"x = y = x + 5 - z * 7 + 8 / z"
)
(returns
(=
(0 . 29)
(id (0 . 1) x)
(=
(4 . 29)
(id (4 . 5) y)
(+
(8 . 29)
(-
(8 . 21)
(+ (8 . 13) (id (8 . 9) x) (int (12 . 13) 5))
(* (16 . 21) (id (16 . 17) z) (int (20 . 21) 7)))
(/ (24 . 29) (int (24 . 25) 8) (id (28 . 29) z)))))))
(test
'(
"opt(opt(opt()))"
)
(returns
(OPT (0 . 15) (OPT (4 . 14) (OPT (8 . 13))))))
(test
'(
"kstar(3 4 kplus(1 2 3 kstar()))"
)
(returns
(K* (0 . 31)
(int (6 . 7) 3)
(int (8 . 9) 4)
(K+ (10 . 30)
(int (16 . 17) 1)
(int (18 . 19) 2)
(int (20 . 21) 3)
(K* (22 . 29))))))
(test
'(
"sepplus( opt() ; opt(5) ; sepstar(17, 34) ; sepstar())"
)
(returns
(SEP+ (0 . 54)
(OPT (9 . 14))
(OPT (17 . 23) (int (21 . 22) 5))
(SEP* (26 . 41) (int (34 . 36) 17) (int (38 . 40) 34))
(SEP* (44 . 53)))))
(delete-file "expr.md")
(printf "~s tests ran\n" n)
)
#!eof
The following should print only "<n> tests ran".
echo '(ez-grammar-test)' | ../bin/scheme -q ez-grammar-test.ss