diff options
author | Noah Lavine <nlavine@haverford.edu> | 2011-03-28 15:13:35 -0400 |
---|---|---|
committer | Noah Lavine <nlavine@haverford.edu> | 2011-09-05 21:51:07 -0400 |
commit | e3e43d6a0460e277ae3f337c6ad22343fb7d21f2 (patch) | |
tree | 2c588527b353e2d8f24c9ee79966bf10972ee73d | |
parent | f1b75a66871c95d583736a7b04e9f45b28f3b07b (diff) | |
download | guile-e3e43d6a0460e277ae3f337c6ad22343fb7d21f2.tar.gz |
Move define-nonterm
* module/ice-9/peg/string-peg.scm: remove define-nonterm and make a simpler
macro called `define-sexp-parser' to make the PEG grammar
* module/ice-9/peg.scm: move define-nonterm macro to this file
* module/ice-9/peg/codegen.scm: move code to wrap a parser result nicely to
this file, under name `wrap-parser-for-users'
-rw-r--r-- | module/ice-9/peg.scm | 33 | ||||
-rw-r--r-- | module/ice-9/peg/codegen.scm | 29 | ||||
-rw-r--r-- | module/ice-9/peg/string-peg.scm | 107 |
3 files changed, 89 insertions, 80 deletions
diff --git a/module/ice-9/peg.scm b/module/ice-9/peg.scm index 644af6d79..4f4bbf877 100644 --- a/module/ice-9/peg.scm +++ b/module/ice-9/peg.scm @@ -19,7 +19,7 @@ (define-module (ice-9 peg) #:export (peg-parse -; define-nonterm + define-nonterm ; define-nonterm-f peg-match) ; #:export-syntax (define-nonterm) @@ -30,7 +30,7 @@ #:re-export (peg-sexp-compile define-grammar define-grammar-f - define-nonterm +; define-nonterm keyword-flatten context-flatten peg:start @@ -67,6 +67,35 @@ execute the STMTs and try again." #f (make-prec 0 (car res) string (string-collapse (cadr res)))))) +;; The results of parsing using a nonterminal are cached. Think of it like a +;; hash with no conflict resolution. Process for deciding on the cache size +;; wasn't very scientific; just ran the benchmarks and stopped a little after +;; the point of diminishing returns on my box. +(define *cache-size* 512) + +;; Defines a new nonterminal symbol accumulating with ACCUM. +(define-syntax define-nonterm + (lambda (x) + (syntax-case x () + ((_ sym accum pat) + (let ((matchf (peg-sexp-compile #'pat (syntax->datum #'accum))) + (accumsym (syntax->datum #'accum)) + (c (datum->syntax x (gensym))));; the cache + ;; CODE is the code to parse the string if the result isn't cached. + (let ((syn (wrap-parser-for-users x matchf accumsym #'sym))) + #`(begin + (define #,c (make-vector *cache-size* #f));; the cache + (define (sym str strlen at) + (let* ((vref (vector-ref #,c (modulo at *cache-size*)))) + ;; Check to see whether the value is cached. + (if (and vref (eq? (car vref) str) (= (cadr vref) at)) + (caddr vref);; If it is return it. + (let ((fres ;; Else calculate it and cache it. + (#,syn str strlen at))) + (vector-set! #,c (modulo at *cache-size*) + (list str at fres)) + fres))))))))))) + ;; Searches through STRING for something that parses to PEG-MATCHER. Think ;; regexp search. (define-syntax peg-match diff --git a/module/ice-9/peg/codegen.scm b/module/ice-9/peg/codegen.scm index 2c85ccca9..0804d1ed4 100644 --- a/module/ice-9/peg/codegen.scm +++ b/module/ice-9/peg/codegen.scm @@ -18,7 +18,7 @@ ;;;; (define-module (ice-9 peg codegen) - #:export (peg-sexp-compile) + #:export (peg-sexp-compile wrap-parser-for-users) #:use-module (ice-9 peg) #:use-module (ice-9 peg string-peg) #:use-module (ice-9 pretty-print) @@ -244,3 +244,30 @@ return EXP." (lit #`(and success #,(cggr accum 'cg-body #'(reverse body) #'new-end))))))))))) + +;; Packages the results of a parser +(define (wrap-parser-for-users for-syntax parser accumsym s-syn) + #`(lambda (str strlen at) + (let ((res (#,parser str strlen at))) + ;; Try to match the nonterminal. + (if res + ;; If we matched, do some post-processing to figure out + ;; what data to propagate upward. + (let ((at (car res)) + (body (cadr res))) + #,(cond + ((eq? accumsym 'name) + #`(list at '#,s-syn)) + ((eq? accumsym 'all) + #`(list (car res) + (cond + ((not (list? body)) + (list '#,s-syn body)) + ((null? body) '#,s-syn) + ((symbol? (car body)) + (list '#,s-syn body)) + (else (cons '#,s-syn body))))) + ((eq? accumsym 'none) #`(list (car res) '())) + (else #`(begin res)))) + ;; If we didn't match, just return false. + #f)))) diff --git a/module/ice-9/peg/string-peg.scm b/module/ice-9/peg/string-peg.scm index f7e21f6b6..a899727b4 100644 --- a/module/ice-9/peg/string-peg.scm +++ b/module/ice-9/peg/string-peg.scm @@ -22,16 +22,11 @@ peg-as-peg define-grammar define-grammar-f - define-nonterm peg-grammar) #:use-module (ice-9 peg) - #:use-module (ice-9 peg codegen)) - -;; The results of parsing using a nonterminal are cached. Think of it like a -;; hash with no conflict resolution. Process for deciding on the cache size -;; wasn't very scientific; just ran the benchmarks and stopped a little after -;; the point of diminishing returns on my box. -(define *cache-size* 512) + #:use-module (ice-9 peg codegen) + #:use-module (ice-9 peg match-record) + #:use-module (ice-9 peg simplify-tree)) ;; Gets the left-hand depth of a list. (define (depth lst) @@ -39,58 +34,6 @@ 0 (+ 1 (depth (car lst))))) -(eval-when (compile load eval) -(define (syntax-for-non-cache-case for-syntax matchf-syn accumsym s-syn) -; (let ((matchf-syn (datum->syntax for-syntax matchf))) - #`(lambda (str strlen at) - (let ((res (#,matchf-syn str strlen at))) - ;; Try to match the nonterminal. - (if res - ;; If we matched, do some post-processing to figure out - ;; what data to propagate upward. - (let ((at (car res)) - (body (cadr res))) - #,(cond - ((eq? accumsym 'name) - #`(list at '#,s-syn)) - ((eq? accumsym 'all) - #`(list (car res) - (cond - ((not (list? body)) - (list '#,s-syn body)) - ((null? body) '#,s-syn) - ((symbol? (car body)) - (list '#,s-syn body)) - (else (cons '#,s-syn body))))) - ((eq? accumsym 'none) #`(list (car res) '())) - (else #`(begin res)))) - ;; If we didn't match, just return false. - #f)))) -) - -;; Defines a new nonterminal symbol accumulating with ACCUM. -(define-syntax define-nonterm - (lambda (x) - (syntax-case x () - ((_ sym accum pat) - (let ((matchf (peg-sexp-compile #'pat (syntax->datum #'accum))) - (accumsym (syntax->datum #'accum)) - (c (datum->syntax x (gensym))));; the cache - ;; CODE is the code to parse the string if the result isn't cached. - (let ((syn (syntax-for-non-cache-case x matchf accumsym #'sym))) - #`(begin - (define #,c (make-vector *cache-size* #f));; the cache - (define (sym str strlen at) - (let* ((vref (vector-ref #,c (modulo at *cache-size*)))) - ;; Check to see whether the value is cached. - (if (and vref (eq? (car vref) str) (= (cadr vref) at)) - (caddr vref);; If it is return it. - (let ((fres ;; Else calculate it and cache it. - (#,syn str strlen at))) - (vector-set! #,c (modulo at *cache-size*) - (list str at fres)) - fres))))))))))) - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;; Parse string PEGs using sexp PEGs. ;; See the variable PEG-AS-PEG for an easier-to-read syntax. @@ -114,34 +57,43 @@ LB < '[' RB < ']' ") -(define-nonterm peg-grammar all +(define-syntax define-sexp-parser + (lambda (x) + (syntax-case x () + ((_ sym accum pat) + (let* ((matchf (peg-sexp-compile #'pat (syntax->datum #'accum))) + (accumsym (syntax->datum #'accum)) + (syn (wrap-parser-for-users x matchf accumsym #'sym))) + #`(define sym #,syn)))))) + +(define-sexp-parser peg-grammar all (body lit (and peg-nonterminal (or "<--" "<-" "<") peg-sp peg-pattern) +)) -(define-nonterm peg-pattern all +(define-sexp-parser peg-pattern all (and peg-alternative (body lit (and (ignore "/") peg-sp peg-alternative) *))) -(define-nonterm peg-alternative all +(define-sexp-parser peg-alternative all (body lit (and (body lit (or "!" "&") ?) peg-sp peg-suffix) +)) -(define-nonterm peg-suffix all +(define-sexp-parser peg-suffix all (and peg-primary (body lit (and (or "*" "+" "?") peg-sp) *))) -(define-nonterm peg-primary all +(define-sexp-parser peg-primary all (or (and "(" peg-sp peg-pattern ")" peg-sp) (and "." peg-sp) peg-literal peg-charclass (and peg-nonterminal (body ! "<" 1)))) -(define-nonterm peg-literal all +(define-sexp-parser peg-literal all (and "'" (body lit (and (body ! "'" 1) peg-any) *) "'" peg-sp)) -(define-nonterm peg-charclass all +(define-sexp-parser peg-charclass all (and (ignore "[") (body lit (and (body ! "]" 1) (or charclass-range charclass-single)) *) (ignore "]") peg-sp)) -(define-nonterm charclass-range all (and peg-any "-" peg-any)) -(define-nonterm charclass-single all peg-any) -(define-nonterm peg-nonterminal all +(define-sexp-parser charclass-range all (and peg-any "-" peg-any)) +(define-sexp-parser charclass-single all peg-any) +(define-sexp-parser peg-nonterminal all (and (body lit (or (range #\a #\z) (range #\A #\Z) (range #\0 #\9) "-") +) peg-sp)) -(define-nonterm peg-sp none +(define-sexp-parser peg-sp none (body lit (or " " "\t" "\n") *)) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -297,9 +249,10 @@ RB < ']' ;; Builds a lambda-expressions for the pattern STR using accum. (define (peg-string-compile str-stx accum) - (peg-sexp-compile - (compressor - (peg-pattern->defn - (peg:tree (peg-parse peg-pattern (syntax->datum str-stx))) str-stx) - str-stx) - accum)) + (let ((string (syntax->datum str-stx))) + (peg-sexp-compile + (compressor + (peg-pattern->defn + (peg:tree (peg-parse peg-pattern string)) str-stx) + str-stx) + accum))) |