summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNoah Lavine <nlavine@haverford.edu>2011-03-28 15:13:35 -0400
committerNoah Lavine <nlavine@haverford.edu>2011-09-05 21:51:07 -0400
commite3e43d6a0460e277ae3f337c6ad22343fb7d21f2 (patch)
tree2c588527b353e2d8f24c9ee79966bf10972ee73d
parentf1b75a66871c95d583736a7b04e9f45b28f3b07b (diff)
downloadguile-e3e43d6a0460e277ae3f337c6ad22343fb7d21f2.tar.gz
Move define-nonterm
* module/ice-9/peg/string-peg.scm: remove define-nonterm and make a simpler macro called `define-sexp-parser' to make the PEG grammar * module/ice-9/peg.scm: move define-nonterm macro to this file * module/ice-9/peg/codegen.scm: move code to wrap a parser result nicely to this file, under name `wrap-parser-for-users'
-rw-r--r--module/ice-9/peg.scm33
-rw-r--r--module/ice-9/peg/codegen.scm29
-rw-r--r--module/ice-9/peg/string-peg.scm107
3 files changed, 89 insertions, 80 deletions
diff --git a/module/ice-9/peg.scm b/module/ice-9/peg.scm
index 644af6d79..4f4bbf877 100644
--- a/module/ice-9/peg.scm
+++ b/module/ice-9/peg.scm
@@ -19,7 +19,7 @@
(define-module (ice-9 peg)
#:export (peg-parse
-; define-nonterm
+ define-nonterm
; define-nonterm-f
peg-match)
; #:export-syntax (define-nonterm)
@@ -30,7 +30,7 @@
#:re-export (peg-sexp-compile
define-grammar
define-grammar-f
- define-nonterm
+; define-nonterm
keyword-flatten
context-flatten
peg:start
@@ -67,6 +67,35 @@ execute the STMTs and try again."
#f
(make-prec 0 (car res) string (string-collapse (cadr res))))))
+;; The results of parsing using a nonterminal are cached. Think of it like a
+;; hash with no conflict resolution. Process for deciding on the cache size
+;; wasn't very scientific; just ran the benchmarks and stopped a little after
+;; the point of diminishing returns on my box.
+(define *cache-size* 512)
+
+;; Defines a new nonterminal symbol accumulating with ACCUM.
+(define-syntax define-nonterm
+ (lambda (x)
+ (syntax-case x ()
+ ((_ sym accum pat)
+ (let ((matchf (peg-sexp-compile #'pat (syntax->datum #'accum)))
+ (accumsym (syntax->datum #'accum))
+ (c (datum->syntax x (gensym))));; the cache
+ ;; CODE is the code to parse the string if the result isn't cached.
+ (let ((syn (wrap-parser-for-users x matchf accumsym #'sym)))
+ #`(begin
+ (define #,c (make-vector *cache-size* #f));; the cache
+ (define (sym str strlen at)
+ (let* ((vref (vector-ref #,c (modulo at *cache-size*))))
+ ;; Check to see whether the value is cached.
+ (if (and vref (eq? (car vref) str) (= (cadr vref) at))
+ (caddr vref);; If it is return it.
+ (let ((fres ;; Else calculate it and cache it.
+ (#,syn str strlen at)))
+ (vector-set! #,c (modulo at *cache-size*)
+ (list str at fres))
+ fres)))))))))))
+
;; Searches through STRING for something that parses to PEG-MATCHER. Think
;; regexp search.
(define-syntax peg-match
diff --git a/module/ice-9/peg/codegen.scm b/module/ice-9/peg/codegen.scm
index 2c85ccca9..0804d1ed4 100644
--- a/module/ice-9/peg/codegen.scm
+++ b/module/ice-9/peg/codegen.scm
@@ -18,7 +18,7 @@
;;;;
(define-module (ice-9 peg codegen)
- #:export (peg-sexp-compile)
+ #:export (peg-sexp-compile wrap-parser-for-users)
#:use-module (ice-9 peg)
#:use-module (ice-9 peg string-peg)
#:use-module (ice-9 pretty-print)
@@ -244,3 +244,30 @@ return EXP."
(lit
#`(and success
#,(cggr accum 'cg-body #'(reverse body) #'new-end)))))))))))
+
+;; Packages the results of a parser
+(define (wrap-parser-for-users for-syntax parser accumsym s-syn)
+ #`(lambda (str strlen at)
+ (let ((res (#,parser str strlen at)))
+ ;; Try to match the nonterminal.
+ (if res
+ ;; If we matched, do some post-processing to figure out
+ ;; what data to propagate upward.
+ (let ((at (car res))
+ (body (cadr res)))
+ #,(cond
+ ((eq? accumsym 'name)
+ #`(list at '#,s-syn))
+ ((eq? accumsym 'all)
+ #`(list (car res)
+ (cond
+ ((not (list? body))
+ (list '#,s-syn body))
+ ((null? body) '#,s-syn)
+ ((symbol? (car body))
+ (list '#,s-syn body))
+ (else (cons '#,s-syn body)))))
+ ((eq? accumsym 'none) #`(list (car res) '()))
+ (else #`(begin res))))
+ ;; If we didn't match, just return false.
+ #f))))
diff --git a/module/ice-9/peg/string-peg.scm b/module/ice-9/peg/string-peg.scm
index f7e21f6b6..a899727b4 100644
--- a/module/ice-9/peg/string-peg.scm
+++ b/module/ice-9/peg/string-peg.scm
@@ -22,16 +22,11 @@
peg-as-peg
define-grammar
define-grammar-f
- define-nonterm
peg-grammar)
#:use-module (ice-9 peg)
- #:use-module (ice-9 peg codegen))
-
-;; The results of parsing using a nonterminal are cached. Think of it like a
-;; hash with no conflict resolution. Process for deciding on the cache size
-;; wasn't very scientific; just ran the benchmarks and stopped a little after
-;; the point of diminishing returns on my box.
-(define *cache-size* 512)
+ #:use-module (ice-9 peg codegen)
+ #:use-module (ice-9 peg match-record)
+ #:use-module (ice-9 peg simplify-tree))
;; Gets the left-hand depth of a list.
(define (depth lst)
@@ -39,58 +34,6 @@
0
(+ 1 (depth (car lst)))))
-(eval-when (compile load eval)
-(define (syntax-for-non-cache-case for-syntax matchf-syn accumsym s-syn)
-; (let ((matchf-syn (datum->syntax for-syntax matchf)))
- #`(lambda (str strlen at)
- (let ((res (#,matchf-syn str strlen at)))
- ;; Try to match the nonterminal.
- (if res
- ;; If we matched, do some post-processing to figure out
- ;; what data to propagate upward.
- (let ((at (car res))
- (body (cadr res)))
- #,(cond
- ((eq? accumsym 'name)
- #`(list at '#,s-syn))
- ((eq? accumsym 'all)
- #`(list (car res)
- (cond
- ((not (list? body))
- (list '#,s-syn body))
- ((null? body) '#,s-syn)
- ((symbol? (car body))
- (list '#,s-syn body))
- (else (cons '#,s-syn body)))))
- ((eq? accumsym 'none) #`(list (car res) '()))
- (else #`(begin res))))
- ;; If we didn't match, just return false.
- #f))))
-)
-
-;; Defines a new nonterminal symbol accumulating with ACCUM.
-(define-syntax define-nonterm
- (lambda (x)
- (syntax-case x ()
- ((_ sym accum pat)
- (let ((matchf (peg-sexp-compile #'pat (syntax->datum #'accum)))
- (accumsym (syntax->datum #'accum))
- (c (datum->syntax x (gensym))));; the cache
- ;; CODE is the code to parse the string if the result isn't cached.
- (let ((syn (syntax-for-non-cache-case x matchf accumsym #'sym)))
- #`(begin
- (define #,c (make-vector *cache-size* #f));; the cache
- (define (sym str strlen at)
- (let* ((vref (vector-ref #,c (modulo at *cache-size*))))
- ;; Check to see whether the value is cached.
- (if (and vref (eq? (car vref) str) (= (cadr vref) at))
- (caddr vref);; If it is return it.
- (let ((fres ;; Else calculate it and cache it.
- (#,syn str strlen at)))
- (vector-set! #,c (modulo at *cache-size*)
- (list str at fres))
- fres)))))))))))
-
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;; Parse string PEGs using sexp PEGs.
;; See the variable PEG-AS-PEG for an easier-to-read syntax.
@@ -114,34 +57,43 @@ LB < '['
RB < ']'
")
-(define-nonterm peg-grammar all
+(define-syntax define-sexp-parser
+ (lambda (x)
+ (syntax-case x ()
+ ((_ sym accum pat)
+ (let* ((matchf (peg-sexp-compile #'pat (syntax->datum #'accum)))
+ (accumsym (syntax->datum #'accum))
+ (syn (wrap-parser-for-users x matchf accumsym #'sym)))
+ #`(define sym #,syn))))))
+
+(define-sexp-parser peg-grammar all
(body lit (and peg-nonterminal (or "<--" "<-" "<") peg-sp peg-pattern) +))
-(define-nonterm peg-pattern all
+(define-sexp-parser peg-pattern all
(and peg-alternative
(body lit (and (ignore "/") peg-sp peg-alternative) *)))
-(define-nonterm peg-alternative all
+(define-sexp-parser peg-alternative all
(body lit (and (body lit (or "!" "&") ?) peg-sp peg-suffix) +))
-(define-nonterm peg-suffix all
+(define-sexp-parser peg-suffix all
(and peg-primary (body lit (and (or "*" "+" "?") peg-sp) *)))
-(define-nonterm peg-primary all
+(define-sexp-parser peg-primary all
(or (and "(" peg-sp peg-pattern ")" peg-sp)
(and "." peg-sp)
peg-literal
peg-charclass
(and peg-nonterminal (body ! "<" 1))))
-(define-nonterm peg-literal all
+(define-sexp-parser peg-literal all
(and "'" (body lit (and (body ! "'" 1) peg-any) *) "'" peg-sp))
-(define-nonterm peg-charclass all
+(define-sexp-parser peg-charclass all
(and (ignore "[")
(body lit (and (body ! "]" 1)
(or charclass-range charclass-single)) *)
(ignore "]")
peg-sp))
-(define-nonterm charclass-range all (and peg-any "-" peg-any))
-(define-nonterm charclass-single all peg-any)
-(define-nonterm peg-nonterminal all
+(define-sexp-parser charclass-range all (and peg-any "-" peg-any))
+(define-sexp-parser charclass-single all peg-any)
+(define-sexp-parser peg-nonterminal all
(and (body lit (or (range #\a #\z) (range #\A #\Z) (range #\0 #\9) "-") +) peg-sp))
-(define-nonterm peg-sp none
+(define-sexp-parser peg-sp none
(body lit (or " " "\t" "\n") *))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -297,9 +249,10 @@ RB < ']'
;; Builds a lambda-expressions for the pattern STR using accum.
(define (peg-string-compile str-stx accum)
- (peg-sexp-compile
- (compressor
- (peg-pattern->defn
- (peg:tree (peg-parse peg-pattern (syntax->datum str-stx))) str-stx)
- str-stx)
- accum))
+ (let ((string (syntax->datum str-stx)))
+ (peg-sexp-compile
+ (compressor
+ (peg-pattern->defn
+ (peg:tree (peg-parse peg-pattern string)) str-stx)
+ str-stx)
+ accum)))