summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNoah Lavine <nlavine@haverford.edu>2011-03-05 23:54:50 -0500
committerNoah Lavine <noah.b.lavine@gmail.com>2012-01-20 08:26:56 -0500
commit72514626135a15ea53564d3b8b21e96ce052f4f5 (patch)
tree8f49d8ff0c681543f68fa05380019993cc5b2029
parentcedad179cffcf357ed6f1fba2c830a02f0418888 (diff)
downloadguile-72514626135a15ea53564d3b8b21e96ce052f4f5.tar.gz
Separate PEG Strings
* module/ice-9/peg.scm: remove functions dealing with PEGs as strings * module/ice-9/peg/string-peg.scm: and put them here
-rw-r--r--module/ice-9/peg.scm291
-rw-r--r--module/ice-9/peg/codegen.scm1
-rw-r--r--module/ice-9/peg/string-peg.scm305
3 files changed, 315 insertions, 282 deletions
diff --git a/module/ice-9/peg.scm b/module/ice-9/peg.scm
index 9bf152c86..b96104a23 100644
--- a/module/ice-9/peg.scm
+++ b/module/ice-9/peg.scm
@@ -18,14 +18,11 @@
;;;;
(define-module (ice-9 peg)
- #:export (peg-string-compile
- context-flatten
+ #:export (context-flatten
peg-parse
- define-nonterm
- define-nonterm-f
+; define-nonterm
+; define-nonterm-f
peg-match
- define-grammar
- define-grammar-f
peg:start
peg:end
peg:string
@@ -33,8 +30,13 @@
peg:substring
peg-record?
keyword-flatten)
+; #:export-syntax (define-nonterm)
#:use-module (ice-9 peg codegen)
- #:re-export (peg-sexp-compile)
+ #:use-module (ice-9 peg string-peg)
+ #:re-export (peg-sexp-compile
+ define-grammar
+ define-grammar-f
+ define-nonterm)
#:use-module (system base pmatch))
;;;
@@ -64,62 +66,6 @@ execute the STMTs and try again."
;;;;; FOR DEFINING AND USING NONTERMINALS
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; The results of parsing using a nonterminal are cached. Think of it like a
-;; hash with no conflict resolution. Process for deciding on the cache size
-;; wasn't very scientific; just ran the benchmarks and stopped a little after
-;; the point of diminishing returns on my box.
-(define *cache-size* 512)
-
-(define (syntax-for-non-cache-case for-syntax matchf-syn accumsym s-syn)
-; (let ((matchf-syn (datum->syntax for-syntax matchf)))
- #`(lambda (str strlen at)
- (let ((res (#,matchf-syn str strlen at)))
- ;; Try to match the nonterminal.
- (if res
- ;; If we matched, do some post-processing to figure out
- ;; what data to propagate upward.
- (let ((at (car res))
- (body (cadr res)))
- #,(cond
- ((eq? accumsym 'name)
- #`(list at '#,s-syn))
- ((eq? accumsym 'all)
- #`(list (car res)
- (cond
- ((not (list? body))
- (list '#,s-syn body))
- ((null? body) '#,s-syn)
- ((symbol? (car body))
- (list '#,s-syn body))
- (else (cons '#,s-syn body)))))
- ((eq? accumsym 'none) #`(list (car res) '()))
- (else #`(begin res))))
- ;; If we didn't match, just return false.
- #f))))
-
-;; Defines a new nonterminal symbol accumulating with ACCUM.
-(define-syntax define-nonterm
- (lambda (x)
- (syntax-case x ()
- ((_ sym accum pat)
- (let ((matchf (peg-sexp-compile #'pat (syntax->datum #'accum)))
- (accumsym (syntax->datum #'accum))
- (c (datum->syntax x (gensym))));; the cache
- ;; CODE is the code to parse the string if the result isn't cached.
- (let ((syn (syntax-for-non-cache-case x matchf accumsym #'sym)))
- #`(begin
- (define #,c (make-vector *cache-size* #f));; the cache
- (define (sym str strlen at)
- (let* ((vref (vector-ref #,c (modulo at *cache-size*))))
- ;; Check to see whether the value is cached.
- (if (and vref (eq? (car vref) str) (= (cadr vref) at))
- (caddr vref);; If it is return it.
- (let ((fres ;; Else calculate it and cache it.
- (#,syn str strlen at)))
- (vector-set! #,c (modulo at *cache-size*)
- (list str at fres))
- fres)))))))))))
-
;; Parses STRING using NONTERM
(define (peg-parse nonterm string)
;; We copy the string before using it because it might have been modified
@@ -225,225 +171,6 @@ execute the STMTs and try again."
(member (car x) keyword-lst)))
lst))
-;; Gets the left-hand depth of a list.
-(define (depth lst)
- (if (or (not (list? lst)) (null? lst))
- 0
- (+ 1 (depth (car lst)))))
-
-;; Trims characters off the front and end of STR.
-;; (trim-1chars "'ab'") -> "ab"
-(define (trim-1chars str) (substring str 1 (- (string-length str) 1)))
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;;;; Parse string PEGs using sexp PEGs.
-;; See the variable PEG-AS-PEG for an easier-to-read syntax.
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; Grammar for PEGs in PEG grammar.
-(define peg-as-peg
-"grammar <-- (nonterminal ('<--' / '<-' / '<') sp pattern)+
-pattern <-- alternative (SLASH sp alternative)*
-alternative <-- ([!&]? sp suffix)+
-suffix <-- primary ([*+?] sp)*
-primary <-- '(' sp pattern ')' sp / '.' sp / literal / charclass / nonterminal !'<'
-literal <-- ['] (!['] .)* ['] sp
-charclass <-- LB (!']' (CCrange / CCsingle))* RB sp
-CCrange <-- . '-' .
-CCsingle <-- .
-nonterminal <-- [a-zA-Z0-9-]+ sp
-sp < [ \t\n]*
-SLASH < '/'
-LB < '['
-RB < ']'
-")
-
-(define-nonterm peg-grammar all
- (body lit (and peg-nonterminal (or "<--" "<-" "<") peg-sp peg-pattern) +))
-(define-nonterm peg-pattern all
- (and peg-alternative
- (body lit (and (ignore "/") peg-sp peg-alternative) *)))
-(define-nonterm peg-alternative all
- (body lit (and (body lit (or "!" "&") ?) peg-sp peg-suffix) +))
-(define-nonterm peg-suffix all
- (and peg-primary (body lit (and (or "*" "+" "?") peg-sp) *)))
-(define-nonterm peg-primary all
- (or (and "(" peg-sp peg-pattern ")" peg-sp)
- (and "." peg-sp)
- peg-literal
- peg-charclass
- (and peg-nonterminal (body ! "<" 1))))
-(define-nonterm peg-literal all
- (and "'" (body lit (and (body ! "'" 1) peg-any) *) "'" peg-sp))
-(define-nonterm peg-charclass all
- (and (ignore "[")
- (body lit (and (body ! "]" 1)
- (or charclass-range charclass-single)) *)
- (ignore "]")
- peg-sp))
-(define-nonterm charclass-range all (and peg-any "-" peg-any))
-(define-nonterm charclass-single all peg-any)
-(define-nonterm peg-nonterminal all
- (and (body lit (or (range #\a #\z) (range #\A #\Z) (range #\0 #\9) "-") +) peg-sp))
-(define-nonterm peg-sp none
- (body lit (or " " "\t" "\n") *))
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;;;; PARSE STRING PEGS
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; Pakes a string representing a PEG grammar and defines all the nonterminals in
-;; it as the associated PEGs.
-(define (peg-parser str for-syntax)
- (let ((parsed (peg-parse peg-grammar str)))
- (if (not parsed)
- (begin
- ;; (display "Invalid PEG grammar!\n")
- #f)
- (let ((lst (peg:tree parsed)))
- (cond
- ((or (not (list? lst)) (null? lst))
- lst)
- ((eq? (car lst) 'peg-grammar)
- #`(begin
- #,@(map (lambda (x) (peg-nonterm->defn x for-syntax))
- (context-flatten (lambda (lst) (<= (depth lst) 2))
- (cdr lst))))))))))
-
-;; Macro wrapper for PEG-PARSER. Parses PEG grammars expressed as strings and
-;; defines all the appropriate nonterminals.
-(define-syntax define-grammar
- (lambda (x)
- (syntax-case x ()
- ((_ str)
- (peg-parser (syntax->datum #'str) x)))))
-(define define-grammar-f peg-parser)
-
-;; Parse a nonterminal and pattern listed in LST.
-(define (peg-nonterm->defn lst for-syntax)
- (let* ((nonterm (car lst))
- (grabber (cadr lst))
- (pattern (caddr lst))
- (nonterm-name (datum->syntax for-syntax
- (string->symbol (cadr nonterm)))))
- #`(define-nonterm #,nonterm-name
- #,(cond
- ((string=? grabber "<--") (datum->syntax for-syntax 'all))
- ((string=? grabber "<-") (datum->syntax for-syntax 'body))
- (else (datum->syntax for-syntax 'none)))
- #,(compressor (peg-pattern->defn pattern for-syntax) for-syntax))))
-
-;; Parse a pattern.
-(define (peg-pattern->defn lst for-syntax)
- #`(or #,@(map (lambda (x) (peg-alternative->defn x for-syntax))
- (context-flatten (lambda (x) (eq? (car x) 'peg-alternative))
- (cdr lst)))))
-
-;; Parse an alternative.
-(define (peg-alternative->defn lst for-syntax)
- #`(and #,@(map (lambda (x) (peg-body->defn x for-syntax))
- (context-flatten (lambda (x) (or (string? (car x))
- (eq? (car x) 'peg-suffix)))
- (cdr lst)))))
-
-;; Parse a body.
-(define (peg-body->defn lst for-syntax)
- (let ((suffix '())
- (front (datum->syntax for-syntax 'lit)))
- (cond
- ((eq? (car lst) 'peg-suffix)
- (set! suffix lst))
- ((string? (car lst))
- (begin (set! front (datum->syntax for-syntax
- (string->symbol (car lst))))
- (set! suffix (cadr lst))))
- (else `(peg-parse-body-fail ,lst)))
- #`(body #,front #,@(peg-suffix->defn
- suffix
- for-syntax))))
-
-;; Parse a suffix.
-(define (peg-suffix->defn lst for-syntax)
- #`(#,(peg-primary->defn (cadr lst) for-syntax)
- #,(if (null? (cddr lst))
- 1
- (datum->syntax for-syntax (string->symbol (caddr lst))))))
-
-;; Parse a primary.
-(define (peg-primary->defn lst for-syntax)
- (let ((el (cadr lst)))
- (cond
- ((list? el)
- (cond
- ((eq? (car el) 'peg-literal)
- (peg-literal->defn el for-syntax))
- ((eq? (car el) 'peg-charclass)
- (peg-charclass->defn el for-syntax))
- ((eq? (car el) 'peg-nonterminal)
- (datum->syntax for-syntax (string->symbol (cadr el))))))
- ((string? el)
- (cond
- ((equal? el "(")
- (peg-pattern->defn (caddr lst) for-syntax))
- ((equal? el ".")
- (datum->syntax for-syntax 'peg-any))
- (else (datum->syntax for-syntax
- `(peg-parse-any unknown-string ,lst)))))
- (else (datum->syntax for-syntax
- `(peg-parse-any unknown-el ,lst))))))
-
-;; Parses a literal.
-(define (peg-literal->defn lst for-syntax)
- (datum->syntax for-syntax (trim-1chars (cadr lst))))
-
-;; Parses a charclass.
-(define (peg-charclass->defn lst for-syntax)
- #`(or
- #,@(map
- (lambda (cc)
- (cond
- ((eq? (car cc) 'charclass-range)
- #`(range #,(datum->syntax
- for-syntax
- (string-ref (cadr cc) 0))
- #,(datum->syntax
- for-syntax
- (string-ref (cadr cc) 2))))
- ((eq? (car cc) 'charclass-single)
- (datum->syntax for-syntax (cadr cc)))))
- (context-flatten
- (lambda (x) (or (eq? (car x) 'charclass-range)
- (eq? (car x) 'charclass-single)))
- (cdr lst)))))
-
-;; Compresses a list to save the optimizer work.
-;; e.g. (or (and a)) -> a
-(define (compressor-core lst)
- (if (or (not (list? lst)) (null? lst))
- lst
- (cond
- ((and (or (eq? (car lst) 'or) (eq? (car lst) 'and))
- (null? (cddr lst)))
- (compressor-core (cadr lst)))
- ((and (eq? (car lst) 'body)
- (eq? (cadr lst) 'lit)
- (eq? (cadddr lst) 1))
- (compressor-core (caddr lst)))
- (else (map compressor-core lst)))))
-
-(define (compressor syn for-syntax)
- (datum->syntax for-syntax
- (compressor-core (syntax->datum syn))))
-
-;; Builds a lambda-expressions for the pattern STR using accum.
-(define (peg-string-compile str-stx accum)
- (peg-sexp-compile
- (compressor
- (peg-pattern->defn
- (peg:tree (peg-parse peg-pattern (syntax->datum str-stx))) str-stx)
- str-stx)
- accum))
-
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;; PMATCH STRUCTURE MUNGING
;; Pretty self-explanatory.
diff --git a/module/ice-9/peg/codegen.scm b/module/ice-9/peg/codegen.scm
index 43f44cc66..2c85ccca9 100644
--- a/module/ice-9/peg/codegen.scm
+++ b/module/ice-9/peg/codegen.scm
@@ -20,6 +20,7 @@
(define-module (ice-9 peg codegen)
#:export (peg-sexp-compile)
#:use-module (ice-9 peg)
+ #:use-module (ice-9 peg string-peg)
#:use-module (ice-9 pretty-print)
#:use-module (system base pmatch))
diff --git a/module/ice-9/peg/string-peg.scm b/module/ice-9/peg/string-peg.scm
new file mode 100644
index 000000000..f7e21f6b6
--- /dev/null
+++ b/module/ice-9/peg/string-peg.scm
@@ -0,0 +1,305 @@
+;;;; string-peg.scm --- representing PEG grammars as strings
+;;;;
+;;;; Copyright (C) 2010, 2011 Free Software Foundation, Inc.
+;;;;
+;;;; This library is free software; you can redistribute it and/or
+;;;; modify it under the terms of the GNU Lesser General Public
+;;;; License as published by the Free Software Foundation; either
+;;;; version 3 of the License, or (at your option) any later version.
+;;;;
+;;;; This library is distributed in the hope that it will be useful,
+;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;;;; Lesser General Public License for more details.
+;;;;
+;;;; You should have received a copy of the GNU Lesser General Public
+;;;; License along with this library; if not, write to the Free Software
+;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;;;;
+
+(define-module (ice-9 peg string-peg)
+ #:export (peg-string-compile
+ peg-as-peg
+ define-grammar
+ define-grammar-f
+ define-nonterm
+ peg-grammar)
+ #:use-module (ice-9 peg)
+ #:use-module (ice-9 peg codegen))
+
+;; The results of parsing using a nonterminal are cached. Think of it like a
+;; hash with no conflict resolution. Process for deciding on the cache size
+;; wasn't very scientific; just ran the benchmarks and stopped a little after
+;; the point of diminishing returns on my box.
+(define *cache-size* 512)
+
+;; Gets the left-hand depth of a list.
+(define (depth lst)
+ (if (or (not (list? lst)) (null? lst))
+ 0
+ (+ 1 (depth (car lst)))))
+
+(eval-when (compile load eval)
+(define (syntax-for-non-cache-case for-syntax matchf-syn accumsym s-syn)
+; (let ((matchf-syn (datum->syntax for-syntax matchf)))
+ #`(lambda (str strlen at)
+ (let ((res (#,matchf-syn str strlen at)))
+ ;; Try to match the nonterminal.
+ (if res
+ ;; If we matched, do some post-processing to figure out
+ ;; what data to propagate upward.
+ (let ((at (car res))
+ (body (cadr res)))
+ #,(cond
+ ((eq? accumsym 'name)
+ #`(list at '#,s-syn))
+ ((eq? accumsym 'all)
+ #`(list (car res)
+ (cond
+ ((not (list? body))
+ (list '#,s-syn body))
+ ((null? body) '#,s-syn)
+ ((symbol? (car body))
+ (list '#,s-syn body))
+ (else (cons '#,s-syn body)))))
+ ((eq? accumsym 'none) #`(list (car res) '()))
+ (else #`(begin res))))
+ ;; If we didn't match, just return false.
+ #f))))
+)
+
+;; Defines a new nonterminal symbol accumulating with ACCUM.
+(define-syntax define-nonterm
+ (lambda (x)
+ (syntax-case x ()
+ ((_ sym accum pat)
+ (let ((matchf (peg-sexp-compile #'pat (syntax->datum #'accum)))
+ (accumsym (syntax->datum #'accum))
+ (c (datum->syntax x (gensym))));; the cache
+ ;; CODE is the code to parse the string if the result isn't cached.
+ (let ((syn (syntax-for-non-cache-case x matchf accumsym #'sym)))
+ #`(begin
+ (define #,c (make-vector *cache-size* #f));; the cache
+ (define (sym str strlen at)
+ (let* ((vref (vector-ref #,c (modulo at *cache-size*))))
+ ;; Check to see whether the value is cached.
+ (if (and vref (eq? (car vref) str) (= (cadr vref) at))
+ (caddr vref);; If it is return it.
+ (let ((fres ;; Else calculate it and cache it.
+ (#,syn str strlen at)))
+ (vector-set! #,c (modulo at *cache-size*)
+ (list str at fres))
+ fres)))))))))))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;; Parse string PEGs using sexp PEGs.
+;; See the variable PEG-AS-PEG for an easier-to-read syntax.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Grammar for PEGs in PEG grammar.
+(define peg-as-peg
+"grammar <-- (nonterminal ('<--' / '<-' / '<') sp pattern)+
+pattern <-- alternative (SLASH sp alternative)*
+alternative <-- ([!&]? sp suffix)+
+suffix <-- primary ([*+?] sp)*
+primary <-- '(' sp pattern ')' sp / '.' sp / literal / charclass / nonterminal !'<'
+literal <-- ['] (!['] .)* ['] sp
+charclass <-- LB (!']' (CCrange / CCsingle))* RB sp
+CCrange <-- . '-' .
+CCsingle <-- .
+nonterminal <-- [a-zA-Z0-9-]+ sp
+sp < [ \t\n]*
+SLASH < '/'
+LB < '['
+RB < ']'
+")
+
+(define-nonterm peg-grammar all
+ (body lit (and peg-nonterminal (or "<--" "<-" "<") peg-sp peg-pattern) +))
+(define-nonterm peg-pattern all
+ (and peg-alternative
+ (body lit (and (ignore "/") peg-sp peg-alternative) *)))
+(define-nonterm peg-alternative all
+ (body lit (and (body lit (or "!" "&") ?) peg-sp peg-suffix) +))
+(define-nonterm peg-suffix all
+ (and peg-primary (body lit (and (or "*" "+" "?") peg-sp) *)))
+(define-nonterm peg-primary all
+ (or (and "(" peg-sp peg-pattern ")" peg-sp)
+ (and "." peg-sp)
+ peg-literal
+ peg-charclass
+ (and peg-nonterminal (body ! "<" 1))))
+(define-nonterm peg-literal all
+ (and "'" (body lit (and (body ! "'" 1) peg-any) *) "'" peg-sp))
+(define-nonterm peg-charclass all
+ (and (ignore "[")
+ (body lit (and (body ! "]" 1)
+ (or charclass-range charclass-single)) *)
+ (ignore "]")
+ peg-sp))
+(define-nonterm charclass-range all (and peg-any "-" peg-any))
+(define-nonterm charclass-single all peg-any)
+(define-nonterm peg-nonterminal all
+ (and (body lit (or (range #\a #\z) (range #\A #\Z) (range #\0 #\9) "-") +) peg-sp))
+(define-nonterm peg-sp none
+ (body lit (or " " "\t" "\n") *))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;; PARSE STRING PEGS
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Pakes a string representing a PEG grammar and defines all the nonterminals in
+;; it as the associated PEGs.
+(define (peg-parser str for-syntax)
+ (let ((parsed (peg-parse peg-grammar str)))
+ (if (not parsed)
+ (begin
+ ;; (display "Invalid PEG grammar!\n")
+ #f)
+ (let ((lst (peg:tree parsed)))
+ (cond
+ ((or (not (list? lst)) (null? lst))
+ lst)
+ ((eq? (car lst) 'peg-grammar)
+ #`(begin
+ #,@(map (lambda (x) (peg-nonterm->defn x for-syntax))
+ (context-flatten (lambda (lst) (<= (depth lst) 2))
+ (cdr lst))))))))))
+
+;; Macro wrapper for PEG-PARSER. Parses PEG grammars expressed as strings and
+;; defines all the appropriate nonterminals.
+(define-syntax define-grammar
+ (lambda (x)
+ (syntax-case x ()
+ ((_ str)
+ (peg-parser (syntax->datum #'str) x)))))
+(define define-grammar-f peg-parser)
+
+;; Parse a nonterminal and pattern listed in LST.
+(define (peg-nonterm->defn lst for-syntax)
+ (let* ((nonterm (car lst))
+ (grabber (cadr lst))
+ (pattern (caddr lst))
+ (nonterm-name (datum->syntax for-syntax
+ (string->symbol (cadr nonterm)))))
+ #`(define-nonterm #,nonterm-name
+ #,(cond
+ ((string=? grabber "<--") (datum->syntax for-syntax 'all))
+ ((string=? grabber "<-") (datum->syntax for-syntax 'body))
+ (else (datum->syntax for-syntax 'none)))
+ #,(compressor (peg-pattern->defn pattern for-syntax) for-syntax))))
+
+;; Parse a pattern.
+(define (peg-pattern->defn lst for-syntax)
+ #`(or #,@(map (lambda (x) (peg-alternative->defn x for-syntax))
+ (context-flatten (lambda (x) (eq? (car x) 'peg-alternative))
+ (cdr lst)))))
+
+;; Parse an alternative.
+(define (peg-alternative->defn lst for-syntax)
+ #`(and #,@(map (lambda (x) (peg-body->defn x for-syntax))
+ (context-flatten (lambda (x) (or (string? (car x))
+ (eq? (car x) 'peg-suffix)))
+ (cdr lst)))))
+
+;; Parse a body.
+(define (peg-body->defn lst for-syntax)
+ (let ((suffix '())
+ (front (datum->syntax for-syntax 'lit)))
+ (cond
+ ((eq? (car lst) 'peg-suffix)
+ (set! suffix lst))
+ ((string? (car lst))
+ (begin (set! front (datum->syntax for-syntax
+ (string->symbol (car lst))))
+ (set! suffix (cadr lst))))
+ (else `(peg-parse-body-fail ,lst)))
+ #`(body #,front #,@(peg-suffix->defn
+ suffix
+ for-syntax))))
+
+;; Parse a suffix.
+(define (peg-suffix->defn lst for-syntax)
+ #`(#,(peg-primary->defn (cadr lst) for-syntax)
+ #,(if (null? (cddr lst))
+ 1
+ (datum->syntax for-syntax (string->symbol (caddr lst))))))
+
+;; Parse a primary.
+(define (peg-primary->defn lst for-syntax)
+ (let ((el (cadr lst)))
+ (cond
+ ((list? el)
+ (cond
+ ((eq? (car el) 'peg-literal)
+ (peg-literal->defn el for-syntax))
+ ((eq? (car el) 'peg-charclass)
+ (peg-charclass->defn el for-syntax))
+ ((eq? (car el) 'peg-nonterminal)
+ (datum->syntax for-syntax (string->symbol (cadr el))))))
+ ((string? el)
+ (cond
+ ((equal? el "(")
+ (peg-pattern->defn (caddr lst) for-syntax))
+ ((equal? el ".")
+ (datum->syntax for-syntax 'peg-any))
+ (else (datum->syntax for-syntax
+ `(peg-parse-any unknown-string ,lst)))))
+ (else (datum->syntax for-syntax
+ `(peg-parse-any unknown-el ,lst))))))
+
+;; Trims characters off the front and end of STR.
+;; (trim-1chars "'ab'") -> "ab"
+(define (trim-1chars str) (substring str 1 (- (string-length str) 1)))
+
+;; Parses a literal.
+(define (peg-literal->defn lst for-syntax)
+ (datum->syntax for-syntax (trim-1chars (cadr lst))))
+
+;; Parses a charclass.
+(define (peg-charclass->defn lst for-syntax)
+ #`(or
+ #,@(map
+ (lambda (cc)
+ (cond
+ ((eq? (car cc) 'charclass-range)
+ #`(range #,(datum->syntax
+ for-syntax
+ (string-ref (cadr cc) 0))
+ #,(datum->syntax
+ for-syntax
+ (string-ref (cadr cc) 2))))
+ ((eq? (car cc) 'charclass-single)
+ (datum->syntax for-syntax (cadr cc)))))
+ (context-flatten
+ (lambda (x) (or (eq? (car x) 'charclass-range)
+ (eq? (car x) 'charclass-single)))
+ (cdr lst)))))
+
+;; Compresses a list to save the optimizer work.
+;; e.g. (or (and a)) -> a
+(define (compressor-core lst)
+ (if (or (not (list? lst)) (null? lst))
+ lst
+ (cond
+ ((and (or (eq? (car lst) 'or) (eq? (car lst) 'and))
+ (null? (cddr lst)))
+ (compressor-core (cadr lst)))
+ ((and (eq? (car lst) 'body)
+ (eq? (cadr lst) 'lit)
+ (eq? (cadddr lst) 1))
+ (compressor-core (caddr lst)))
+ (else (map compressor-core lst)))))
+
+(define (compressor syn for-syntax)
+ (datum->syntax for-syntax
+ (compressor-core (syntax->datum syn))))
+
+;; Builds a lambda-expressions for the pattern STR using accum.
+(define (peg-string-compile str-stx accum)
+ (peg-sexp-compile
+ (compressor
+ (peg-pattern->defn
+ (peg:tree (peg-parse peg-pattern (syntax->datum str-stx))) str-stx)
+ str-stx)
+ accum))