You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
This repo is archived. You can view files and clone it, but cannot push or open issues/pull-requests.

411 lines
22 KiB
Scheme

;;; Copyright (c) 2000-2015 Dipanwita Sarkar, Andrew W. Keep, R. Kent Dybvig, Oscar Waddell
;;; See the accompanying file Copyright for details
(library (nanopass meta-parser)
(export make-meta-parser rhs-in-context-quasiquote meta-parse-term
make-quasiquote-transformer make-in-context-transformer
output-records->syntax parse-cata)
(import (rnrs)
(nanopass helpers)
(nanopass records)
(nanopass syntaxconvert)
(nanopass meta-syntax-dispatch))
(define make-ntspec-meta-parser-assoc
(lambda (tid)
(lambda (ntspec)
(cons ntspec (construct-unique-id tid "meta-parse-" (ntspec-name ntspec))))))
(define make-meta-parser
(lambda (desc)
(let* ([lang-name (language-name desc)]
[ntspecs (language-ntspecs desc)]
[tspecs (language-tspecs desc)]
[ntspec-meta-parsers (map (make-ntspec-meta-parser-assoc lang-name) ntspecs)])
(define lookup-meta-parser
(lambda (ntspec)
(cond
[(assq ntspec ntspec-meta-parsers) => cdr]
[else (syntax-violation 'define-language
(format "unexpected nonterminal ~s in langauge ~s while building meta-parser, expected on of ~s"
(syntax->datum (ntspec-name ntspec)) (syntax->datum lang-name)
(map (lambda (nt) (syntax->datum (ntspec-name nt))) ntspecs))
lang-name)])))
(define make-meta-parse-proc
(lambda (ntspec cata?)
(define parse-field
(lambda (m level maybe?)
(cond
[(meta-name->tspec m tspecs) =>
(lambda (name)
(let f ([level level] [x m])
(if (= level 0)
#`(meta-parse-term '#,name #,x #,cata? #,maybe?)
#`(map (lambda (x)
(if (nano-dots? x)
(make-nano-dots #,(f (- level 1)
#'(nano-dots-x x)))
#,(f (- level 1) #'x)))
#,x))))]
[(meta-name->ntspec m ntspecs) =>
(lambda (spec)
(with-syntax ([proc-name (lookup-meta-parser spec)])
(let f ([level level] [x m])
(if (= level 0)
#`(proc-name #,x #t #t #,maybe?)
#`(map (lambda (x)
(if (nano-dots? x)
(make-nano-dots #,(f (- level 1)
#'(nano-dots-x x)))
#,(f (- level 1) #'x)))
#,x)))))]
[else (syntax-violation 'define-language
(format "unrecognized meta variable ~s in language ~s, when building meta parser" m lang-name)
lang-name)])))
(define make-term-clause
(lambda (x)
(lambda (alt)
#`[(memq (meta-var->raw-meta-var (syntax->datum #,x))
(quote #,(tspec-meta-vars (terminal-alt-tspec alt))))
(make-nano-meta '#,alt (list (make-nano-unquote #,x)))])))
(define make-nonterm-unquote
(lambda (x)
(lambda (alt)
#`[(memq (meta-var->raw-meta-var (syntax->datum #,x))
(quote #,(ntspec-meta-vars (nonterminal-alt-ntspec alt))))
(make-nano-meta '#,alt (list (make-nano-unquote #,x)))])))
(define make-nonterm-clause
(lambda (x maybe?)
(lambda (alt)
#`(#,(lookup-meta-parser (nonterminal-alt-ntspec alt)) #,x #f nested? maybe?))))
(define make-pair-clause
(lambda (stx first-stx rest-stx)
(lambda (alt)
(with-syntax ([(field-var ...) (pair-alt-field-names alt)])
(with-syntax ([(parsed-field ...)
(map parse-field #'(field-var ...)
(pair-alt-field-levels alt)
(pair-alt-field-maybes alt))]
[field-pats (datum->syntax #'* (pair-alt-pattern alt))])
#`[#,(if (pair-alt-implicit? alt)
#`(meta-syntax-dispatch #,stx 'field-pats)
#`(and (eq? (syntax->datum #,first-stx) '#,(car (alt-syn alt)))
(meta-syntax-dispatch #,rest-stx 'field-pats)))
=> (lambda (ls)
(apply
(lambda (field-var ...)
(make-nano-meta '#,alt (list parsed-field ...)))
ls))])))))
(define separate-syn
(lambda (ls)
(let loop ([ls ls] [pair* '()] [pair-imp* '()] [term* '()] [imp* '()] [nonimp* '()])
(if (null? ls)
(values (reverse pair*) (reverse pair-imp*) (reverse term*) (reverse imp*) (reverse nonimp*))
(let ([v (car ls)])
(cond
[(nonterminal-alt? v)
(if (has-implicit-alt? (nonterminal-alt-ntspec v))
(loop (cdr ls) pair* pair-imp* term* (cons v imp*) nonimp*)
(loop (cdr ls) pair* pair-imp* term* imp* (cons v nonimp*)))]
[(terminal-alt? v) (loop (cdr ls) pair* pair-imp* (cons v term*) imp* nonimp*)]
[(pair-alt-implicit? v) (loop (cdr ls) pair* (cons v pair-imp*) term* imp* nonimp*)]
[else (loop (cdr ls) (cons v pair*) pair-imp* term* imp* nonimp*)]))))))
(let-values ([(pair-alt* pair-imp-alt* term-alt* nonterm-imp-alt* nonterm-nonimp-alt*)
(separate-syn (ntspec-alts ntspec))])
#`(lambda (stx error? nested? maybe?)
(or (syntax-case stx (unquote)
[(unquote id)
(identifier? #'id)
(if nested?
(make-nano-unquote #'id)
(cond
#,@(map (make-term-clause #'#'id) term-alt*)
; TODO: right now we can match the meta for this item, but we
; cannot generate the needed nano-meta because we have no
; alt record to put into it. (perhaps the current model is
; just pushed as far as it can be right now, and we need to
; rework it.)
#,@(map (make-nonterm-unquote #'#'id) nonterm-imp-alt*)
#,@(map (make-nonterm-unquote #'#'id) nonterm-nonimp-alt*)
[else #f]))]
[(unquote x)
(if nested?
(if #,cata?
(parse-cata #'x '#,(ntspec-name ntspec) maybe?)
(make-nano-unquote #'x))
(syntax-violation #f "cata unsupported at top-level of pattern" stx))]
[_ #f])
#,@(map (make-nonterm-clause #'stx #'maybe?) nonterm-nonimp-alt*)
(syntax-case stx ()
[(a . d)
(cond
#,@(map (make-pair-clause #'stx #'#'a #'#'d) pair-alt*)
#,@(map (make-pair-clause #'stx #'#'a #'#'d) pair-imp-alt*)
[else #f])]
; if we find something here that is not a pair, assume it should
; be treated as a quoted constant, and will be checked appropriately
; by the run-time constructor check
[atom (make-nano-quote #''atom)])
#,@(map (make-nonterm-clause #'stx #'maybe?) nonterm-imp-alt*)
(and error? (syntax-violation who "unrecognized pattern or template" stx)))))))
(with-syntax ([cata? (gentemp)])
(with-syntax ([(ntspec-id ...) (map ntspec-name ntspecs)]
[(parse-name ...) (map cdr ntspec-meta-parsers)]
[(parse-proc ...)
(map (lambda (ntspec) (make-meta-parse-proc ntspec #'cata?)) ntspecs)])
#`(lambda (ntspec-name stx input?)
(let ([cata? input?])
(define-who parse-name parse-proc) ...
(case ntspec-name
[(ntspec-id) (parse-name stx #t (not input?) #f)] ...
[else (syntax-violation '#,(construct-id lang-name "meta-parse-" lang-name)
(format "unexpected nonterminal ~s passed to meta parser for language ~s while meta-parsing, expected one of ~s"
ntspec-name '#,lang-name '#,(map ntspec-name ntspecs))
stx)]))))))))
;; used to handle output of meta-parsers
(define meta-parse-term
(lambda (tname stx cata? maybe?)
(syntax-case stx (unquote)
[(unquote x)
(if (and cata? (not (identifier? #'x)))
(parse-cata #'x (tspec-type tname) maybe?)
(make-nano-unquote #'x))]
[(a . d)
(syntax-violation 'meta-parse-term "invalid pattern or template" stx)]
[stx
; treat everything else we find as ,'foo because if we don't
; `(primapp void) is interpreted as:
; `(primapp #<procedure void>)
; instead we want it to treat it as:
; `(primapp ,'void)
; which is how it would have to be written without this.
; Note that we don't care what literal expression we find here
; because at runtime it will be checked like every other element
; used to construct the output record, and anything invalid will
; be caught then. (If we check earlier, then it forces us to use
; the terminal predicates at compile-time, which means that can't
; be in the same library, and that is a bummer for other reasons,
; so better to be flexible and let something invalid go through
; here to be caught later.)
(make-nano-quote #''stx)])))
;; used in the input meta parser to parse cata syntax
;; TODO: support for multiple input terms.
(define parse-cata
; should be more picky if nonterminal is specified--see 10/08/2007 NOTES
(lambda (x itype maybe?)
(define (serror) (syntax-violation 'define-pass "invalid cata syntax" x))
(define (s0 stuff)
(syntax-case stuff ()
[(: . stuff) (colon? #':) (s2 #f #'stuff)]
[(-> . stuff) (arrow? #'->) (s4 #f #f '() #'stuff)]
[(e . stuff) (s1 #'e #'stuff)]
[() (make-nano-cata itype x #f #f '() maybe?)]
[_ (serror)]))
(define (s1 e stuff)
(syntax-case stuff ()
[(: . stuff) (colon? #':) (s2 e #'stuff)]
[(-> . stuff)
(and (arrow? #'->) (identifier? e))
(s4 #f (list e) '() #'stuff)]
[(expr . stuff)
; it is pre-mature to check for identifier here since these could be input exprs
#;(and (identifier? #'id) (identifier? e))
(identifier? e)
(s3 #f (list #'expr e) #'stuff)]
[() (identifier? e) (make-nano-cata itype x #f #f (list e) maybe?)]
[_ (serror)]))
(define (s2 f stuff)
(syntax-case stuff ()
[(-> . stuff)
(arrow? #'->)
(s4 f #f '() #'stuff)]
[(id . stuff)
(identifier? #'id)
(s3 f (list #'id) #'stuff)]
[_ (serror)]))
(define (s3 f e* stuff)
(syntax-case stuff ()
[(-> . stuff)
(arrow? #'->)
(s4 f (reverse e*) '() #'stuff)]
[(e . stuff)
; this check is premature, since these could be input expressions
#;(identifier? #'id)
(s3 f (cons #'e e*) #'stuff)]
[()
; now we want to check if these are identifiers, because they are our return ids
(for-all identifier? e*)
(make-nano-cata itype x f #f (reverse e*) maybe?)]
[_ (serror)]))
(define (s4 f maybe-inid* routid* stuff)
(syntax-case stuff ()
[(id . stuff)
(identifier? #'id)
(s4 f maybe-inid* (cons #'id routid*) #'stuff)]
[() (make-nano-cata itype x f maybe-inid* (reverse routid*) maybe?)]
[_ (serror)]))
(syntax-case x ()
[(stuff ...) (s0 #'(stuff ...))])))
;; used in the output of the input metaparser and in the output of
;; define-pass
(define rhs-in-context-quasiquote
(lambda (id type omrec ometa-parser body)
(if type
(with-syntax ([quasiquote (datum->syntax id 'quasiquote)]
[in-context (datum->syntax id 'in-context)])
#`(let-syntax ([quasiquote
'#,(make-quasiquote-transformer id type omrec ometa-parser)]
[in-context
'#,(make-in-context-transformer id omrec ometa-parser)])
#,body))
(with-syntax ([in-context (datum->syntax id 'in-context)])
#`(let-syntax ([in-context
'#,(make-in-context-transformer id omrec ometa-parser)])
#,body)))))
;; Done to do allow a programmer to specify what the context for
;; their quasiquote is, incase it is different from the current
;; expression.
;; bug fix #8 (not sure what this refers to)
(define make-in-context-transformer
(lambda (pass-name omrec ometa-parser)
(lambda (x)
(syntax-case x ()
[(_ ntname stuff ...)
(with-syntax ([quasiquote (datum->syntax pass-name 'quasiquote)])
#`(let-syntax ([quasiquote '#,(make-quasiquote-transformer
pass-name #'ntname
omrec ometa-parser)])
stuff ...))]))))
;; Used to make quasiquote transformers in the in-context transformer
;; and in the normal right hand side transformer in do-define-pass and
;; make-rhs
(define make-quasiquote-transformer
(lambda (pass-name ntname omrec ometa-parser)
(lambda (x)
(syntax-case x ()
[(_ stuff)
; TODO move error message like this into wherever the template doesn't match is
(output-records->syntax pass-name ntname omrec ometa-parser
(ometa-parser (syntax->datum ntname) #'stuff #f))
#;(let ([stx #f])
(trace-let quasiquote-transformer ([t (syntax->datum #'stuff)])
(let ([t (output-records->syntax pass-name ntname omrec ometa-parser
(ometa-parser (syntax->datum ntname) #'stuff #f))])
(set! stx t)
(syntax->datum t)))
stx)]))))
;; helper function used by the output metaparser in the meta-parsing
;; two step
;; TODO:
;; - defeated (for now) at getting rid of the unnecessary bindings. still convinced this is possible and to be fixed.
;; - we are using bound-id-union to append lists of variables that are unique by construction (unless I am misreading the code) this is pointless.
;; - we are mapping over the field-names to find the specs for the fields. this seems waistful in a small way (building an unnecessary list) and a big way (lookup something that could be cached)
;; - we are always building the checking version of the pair-alt constructor here, but could probably be avoiding that.
(define output-records->syntax
(lambda (pass-name ntname omrec ometa-parser rhs-rec)
(define id->msg
(lambda (id)
(cond
[(fx=? (optimize-level) 3) #f]
[(syntax->source-info id) =>
(lambda (si) (format "expression ~s ~a" (syntax->datum id) si))]
[else (format "expression ~s" (syntax->datum id))])))
(define process-nano-fields
(lambda (elt* spec* binding*)
(if (null? elt*)
(values '() '() '() binding*)
(let-values ([(elt elt-id elt-var* binding*) (process-nano-elt (car elt*) (car spec*) binding*)])
(let-values ([(elt* elt*-id elt*-var* binding*)
(process-nano-fields (cdr elt*) (cdr spec*) binding*)])
(values (cons elt elt*) (cons elt-id elt*-id) (bound-id-union elt-var* elt*-var*) binding*))))))
(define process-nano-dots
(lambda (orig-elt spec binding*)
; ought to check that each of var* are bound to proper lists
; and that they have the same lengths
(let-values ([(elt id var* binding*) (process-nano-elt (nano-dots-x orig-elt) spec binding*)])
(if (null? var*)
; TODO: store original syntax object in nano-dots record and use it here
(syntax-violation (syntax->datum pass-name)
"no variables within ellipsis pattern"
(let f ([elt (nano-dots-x orig-elt)])
(cond
[(nano-meta? elt) (map f (nano-meta-fields elt))]
[(nano-quote? elt) (cadr (nano-quote-x elt))]
[(nano-unquote? elt) (nano-unquote-x elt)]
[(nano-cata? elt) (nano-cata-syntax elt)]
[(list? elt) (map f elt)]
[else elt])))
(values
(if (null? (cdr var*))
(let ([t (car var*)])
(if (eq? t elt)
t
#`(map (lambda (#,t) #,elt) #,t)))
#`(map (lambda #,var* #,elt) #,@var*))
id var* binding*)))))
(define process-nano-list
(lambda (elt* spec binding*)
(let f ([elt* elt*] [binding* binding*])
(if (null? elt*)
(values #''() '() '() binding*)
(let ([elt (car elt*)] [elt* (cdr elt*)])
(if (nano-dots? elt)
(if (null? elt*)
(process-nano-dots elt spec binding*)
(let-values ([(elt elt-id elt-var* binding*)
(process-nano-dots elt spec binding*)])
(let-values ([(elt* elt*-id* elt*-var* binding*) (f elt* binding*)])
(values #`(append #,elt #,elt*)
(cons elt-id elt*-id*)
(bound-id-union elt-var* elt*-var*)
binding*))))
(let-values ([(elt elt-id elt-var* binding*) (process-nano-elt elt spec binding*)])
(let-values ([(elt* elt*-id* elt*-var* binding*) (f elt* binding*)])
(values #`(cons #,elt #,elt*)
(cons elt-id elt*-id*)
(bound-id-union elt-var* elt*-var*)
binding*)))))))))
(define process-nano-meta
(lambda (x binding*)
(let ([prec-alt (nano-meta-alt x)])
(let-values ([(field* id* var* binding*)
(process-nano-fields (nano-meta-fields x)
(map (lambda (x) (find-spec x omrec)) (pair-alt-field-names prec-alt))
binding*)])
(values
#`(#,(pair-alt-maker prec-alt) '#,pass-name #,@field* #,@(map id->msg id*))
#f var* binding*)))))
(define process-nano-elt
(lambda (elt spec binding*)
(cond
[(nano-meta? elt)
(assert (pair-alt? (nano-meta-alt elt)))
(process-nano-meta elt binding*)]
[(nano-quote? elt) (let ([x (nano-quote-x elt)]) (values x x '() binding*))]
[(nano-unquote? elt)
(let ([x (nano-unquote-x elt)])
(with-syntax ([expr (if (ntspec? spec)
; TODO: when we eventually turn these processors into named entities (either
; directly with meta define, define-syntax or some sort of property, replace
; this with the appropriate call. In the meantime this should allow us to
; remove some of our in-contexts
(with-syntax ([quasiquote (datum->syntax pass-name 'quasiquote)])
#`(let-syntax ([quasiquote '#,(make-quasiquote-transformer
pass-name (spec-type spec)
omrec ometa-parser)])
#,x))
x)]
[tmp (car (generate-temporaries '(x)))])
(values #'tmp x (list #'tmp) (cons #'(tmp expr) binding*))))]
[(list? elt) (process-nano-list elt spec binding*)]
[else (values elt elt '() binding*)])))
(let-values ([(elt id var* binding*)
(process-nano-elt rhs-rec (nonterm-id->ntspec 'define-pass ntname (language-ntspecs omrec)) '())])
#`(let #,binding* #,elt)))))