summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNoah Lavine <nlavine@haverford.edu>2011-03-05 15:23:59 -0500
committerAndy Wingo <wingo@pobox.com>2011-03-24 21:12:26 +0100
commit43de09f94cb5c6055d4a38c81996b968eb508dfa (patch)
treea543921590e1a038a73394162646506698f610b9
parent4c7622eb6ece4fbafbd09498e78315c75a815842 (diff)
downloadguile-43de09f94cb5c6055d4a38c81996b968eb508dfa.tar.gz
Split peg.scm
* module/ice-9/peg.scm: move code generators to new module * module/ice-9/peg/codegen.scm: new module for PEG code generators * module/Makefile.am (ICE_9_SOURCES): Add codegen.scm.
-rw-r--r--module/Makefile.am1
-rw-r--r--module/ice-9/peg.scm223
-rw-r--r--module/ice-9/peg/codegen.scm245
3 files changed, 251 insertions, 218 deletions
diff --git a/module/Makefile.am b/module/Makefile.am
index 6cebff195..89fbe2d34 100644
--- a/module/Makefile.am
+++ b/module/Makefile.am
@@ -202,6 +202,7 @@ ICE_9_SOURCES = \
ice-9/null.scm \
ice-9/occam-channel.scm \
ice-9/optargs.scm \
+ ice-9/peg/codegen.scm \
ice-9/peg.scm \
ice-9/poe.scm \
ice-9/poll.scm \
diff --git a/module/ice-9/peg.scm b/module/ice-9/peg.scm
index d91a74e57..0acc4598b 100644
--- a/module/ice-9/peg.scm
+++ b/module/ice-9/peg.scm
@@ -18,8 +18,7 @@
;;;;
(define-module (ice-9 peg)
- #:export (peg-sexp-compile
- peg-string-compile
+ #:export (peg-string-compile
context-flatten
peg-parse
define-nonterm
@@ -34,8 +33,9 @@
peg:substring
peg-record?
keyword-flatten)
- #:use-module (system base pmatch)
- #:use-module (ice-9 pretty-print))
+ #:use-module (ice-9 peg codegen)
+ #:re-export (peg-sexp-compile)
+ #:use-module (system base pmatch))
;;;
;;; Helper Macros
@@ -58,222 +58,9 @@ execute the STMTs and try again."
((_) #t)
(else #f)))))
-(define-syntax push!
- (syntax-rules ()
- "Push an object onto a list."
- ((_ lst obj)
- (set! lst (cons obj lst)))))
-
-(define-syntax single-filter
- (syntax-rules ()
- "If EXP is a list of one element, return the element. Otherwise
-return EXP."
- ((_ exp)
- (pmatch exp
- ((,elt) elt)
- (,elts elts)))))
-
-(define-syntax push-not-null!
- (syntax-rules ()
- "If OBJ is non-null, push it onto LST, otherwise do nothing."
- ((_ lst obj)
- (if (not (null? obj))
- (push! lst obj)))))
-
(eval-when (compile load eval)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;;;; CODE GENERATORS
-;; These functions generate scheme code for parsing PEGs.
-;; Conventions:
-;; accum: (all name body none)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; Code we generate will have a certain return structure depending on how we're
-;; accumulating (the ACCUM variable).
-(define (cg-generic-ret accum name body-uneval at)
- ;; name, body-uneval and at are syntax
- #`(let ((body #,body-uneval))
- #,(cond
- ((and (eq? accum 'all) name)
- #`(list #,at
- (cond
- ((not (list? body)) (list '#,name body))
- ((null? body) '#,name)
- ((symbol? (car body)) (list '#,name body))
- (else (cons '#,name body)))))
- ((eq? accum 'name)
- #`(list #,at '#,name))
- ((eq? accum 'body)
- #`(list #,at
- (cond
- ((single? body) (car body))
- (else body))))
- ((eq? accum 'none)
- #`(list #,at '()))
- (else
- (begin
- (pretty-print `(cg-generic-ret-error ,accum ,name ,body-uneval ,at))
- (pretty-print "Defaulting to accum of none.\n")
- #`(list #,at '()))))))
-
-;; The short name makes the formatting below much easier to read.
-(define cggr cg-generic-ret)
-
-;; Generates code that matches a particular string.
-;; E.g.: (cg-string syntax "abc" 'body)
-(define (cg-string pat accum)
- (let ((plen (string-length pat)))
- #`(lambda (str len pos)
- (let ((end (+ pos #,plen)))
- (and (<= end len)
- (string= str #,pat pos end)
- #,(case accum
- ((all) #`(list end (list 'cg-string #,pat)))
- ((name) #`(list end 'cg-string))
- ((body) #`(list end #,pat))
- ((none) #`(list end '()))
- (else (error "bad accum" accum))))))))
-
-;; Generates code for matching any character.
-;; E.g.: (cg-peg-any syntax 'body)
-(define (cg-peg-any accum)
- #`(lambda (str len pos)
- (and (< pos len)
- #,(case accum
- ((all) #`(list (1+ pos)
- (list 'cg-peg-any (substring str pos (1+ pos)))))
- ((name) #`(list (1+ pos) 'cg-peg-any))
- ((body) #`(list (1+ pos) (substring str pos (1+ pos))))
- ((none) #`(list (1+ pos) '()))
- (else (error "bad accum" accum))))))
-
-;; Generates code for matching a range of characters between start and end.
-;; E.g.: (cg-range syntax #\a #\z 'body)
-(define (cg-range start end accum)
- #`(lambda (str len pos)
- (and (< pos len)
- (let ((c (string-ref str pos)))
- (and (char>=? c #,start)
- (char<=? c #,end)
- #,(case accum
- ((all) #`(list (1+ pos) (list 'cg-range (string c))))
- ((name) #`(list (1+ pos) 'cg-range))
- ((body) #`(list (1+ pos) (string c)))
- ((none) #`(list (1+ pos) '()))
- (else (error "bad accum" accum))))))))
-
-;; Filters the accum argument to peg-sexp-compile for buildings like string
-;; literals (since we don't want to tag them with their name if we're doing an
-;; "all" accum).
-(define (builtin-accum-filter accum)
- (cond
- ((eq? accum 'all) 'body)
- ((eq? accum 'name) 'name)
- ((eq? accum 'body) 'body)
- ((eq? accum 'none) 'none)))
-(define baf builtin-accum-filter)
-
-;; Takes an arbitrary expressions and accumulation variable, then parses it.
-;; E.g.: (peg-sexp-compile syntax '(and "abc" (or "-" (range #\a #\z))) 'all)
-(define (peg-sexp-compile pat accum)
- (syntax-case pat (peg-any range ignore capture peg and or body)
- (peg-any
- (cg-peg-any (baf accum)))
- (sym (identifier? #'sym) ;; nonterminal
- #'sym)
- (str (string? (syntax->datum #'str)) ;; literal string
- (cg-string (syntax->datum #'str) (baf accum)))
- ((range start end) ;; range of characters (e.g. [a-z])
- (and (char? (syntax->datum #'start)) (char? (syntax->datum #'end)))
- (cg-range (syntax->datum #'start) (syntax->datum #'end) (baf accum)))
- ((ignore pat) ;; match but don't parse
- (peg-sexp-compile #'pat 'none))
- ((capture pat) ;; parse
- (peg-sexp-compile #'pat 'body))
- ((peg pat) ;; embedded PEG string
- (string? (syntax->datum #'pat))
- (peg-string-compile #'pat (baf accum)))
- ((and pat ...)
- (cg-and #'(pat ...) (baf accum)))
- ((or pat ...)
- (cg-or #'(pat ...) (baf accum)))
- ((body type pat num)
- (cg-body (baf accum) #'type #'pat #'num))))
-
-;; Top-level function builder for AND. Reduces to a call to CG-AND-INT.
-(define (cg-and clauses accum)
- #`(lambda (str len pos)
- (let ((body '()))
- #,(cg-and-int clauses accum #'str #'len #'pos #'body))))
-
-;; Internal function builder for AND (calls itself).
-(define (cg-and-int clauses accum str strlen at body)
- (syntax-case clauses ()
- (()
- (cggr accum 'cg-and #`(reverse #,body) at))
- ((first rest ...)
- #`(let ((res (#,(peg-sexp-compile #'first accum) #,str #,strlen #,at)))
- (and res
- ;; update AT and BODY then recurse
- (let ((newat (car res))
- (newbody (cadr res)))
- (set! #,at newat)
- (push-not-null! #,body (single-filter newbody))
- #,(cg-and-int #'(rest ...) accum str strlen at body)))))))
-
-;; Top-level function builder for OR. Reduces to a call to CG-OR-INT.
-(define (cg-or clauses accum)
- #`(lambda (str len pos)
- #,(cg-or-int clauses accum #'str #'len #'pos)))
-
-;; Internal function builder for OR (calls itself).
-(define (cg-or-int clauses accum str strlen at)
- (syntax-case clauses ()
- (()
- #f)
- ((first rest ...)
- #`(or (#,(peg-sexp-compile #'first accum) #,str #,strlen #,at)
- #,(cg-or-int #'(rest ...) accum str strlen at)))))
-
-;; Returns a function that parses a BODY element.
-(define (cg-body accum type pat num)
- #`(lambda (str strlen at)
- (let ((body '()))
- (let lp ((end at) (count 0))
- (let* ((match (#,(peg-sexp-compile pat accum) str strlen end))
- (new-end (if match (car match) end))
- (count (if (> new-end end) (1+ count) count)))
- (if (> new-end end)
- (push-not-null! body (single-filter (cadr match))))
- (if (and (> new-end end)
- #,(syntax-case num (+ * ?)
- (n (number? (syntax->datum #'n))
- #'(< count n))
- (+ #t)
- (* #t)
- (? #'(< count 1))))
- (lp new-end count)
- (let ((success #,(syntax-case num (+ * ?)
- (n (number? (syntax->datum #'n))
- #'(= count n))
- (+ #'(>= count 1))
- (* #t)
- (? #t))))
- #,(syntax-case type (! & lit)
- (!
- #`(if success
- #f
- #,(cggr accum 'cg-body #''() #'at)))
- (&
- #`(and success
- #,(cggr accum 'cg-body #''() #'at)))
- (lit
- #`(and success
- #,(cggr accum 'cg-body #'(reverse body) #'new-end)))))))))))
-
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;; FOR DEFINING AND USING NONTERMINALS
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -511,7 +298,7 @@ RB < ']'
(let ((parsed (peg-parse peg-grammar str)))
(if (not parsed)
(begin
- ;; (pretty-print "Invalid PEG grammar!\n")
+ ;; (display "Invalid PEG grammar!\n")
#f)
(let ((lst (peg:tree parsed)))
(cond
diff --git a/module/ice-9/peg/codegen.scm b/module/ice-9/peg/codegen.scm
new file mode 100644
index 000000000..43f44cc66
--- /dev/null
+++ b/module/ice-9/peg/codegen.scm
@@ -0,0 +1,245 @@
+;;;; codegen.scm --- code generation for composable parsers
+;;;;
+;;;; Copyright (C) 2011 Free Software Foundation, Inc.
+;;;;
+;;;; This library is free software; you can redistribute it and/or
+;;;; modify it under the terms of the GNU Lesser General Public
+;;;; License as published by the Free Software Foundation; either
+;;;; version 3 of the License, or (at your option) any later version.
+;;;;
+;;;; This library is distributed in the hope that it will be useful,
+;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;;;; Lesser General Public License for more details.
+;;;;
+;;;; You should have received a copy of the GNU Lesser General Public
+;;;; License along with this library; if not, write to the Free Software
+;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;;;;
+
+(define-module (ice-9 peg codegen)
+ #:export (peg-sexp-compile)
+ #:use-module (ice-9 peg)
+ #:use-module (ice-9 pretty-print)
+ #:use-module (system base pmatch))
+
+(define-syntax single?
+ (syntax-rules ()
+ "Return #t if X is a list of one element."
+ ((_ x)
+ (pmatch x
+ ((_) #t)
+ (else #f)))))
+
+(define-syntax single-filter
+ (syntax-rules ()
+ "If EXP is a list of one element, return the element. Otherwise
+return EXP."
+ ((_ exp)
+ (pmatch exp
+ ((,elt) elt)
+ (,elts elts)))))
+
+(define-syntax push-not-null!
+ (syntax-rules ()
+ "If OBJ is non-null, push it onto LST, otherwise do nothing."
+ ((_ lst obj)
+ (if (not (null? obj))
+ (push! lst obj)))))
+
+(define-syntax push!
+ (syntax-rules ()
+ "Push an object onto a list."
+ ((_ lst obj)
+ (set! lst (cons obj lst)))))
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;; CODE GENERATORS
+;; These functions generate scheme code for parsing PEGs.
+;; Conventions:
+;; accum: (all name body none)
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Code we generate will have a certain return structure depending on how we're
+;; accumulating (the ACCUM variable).
+(define (cg-generic-ret accum name body-uneval at)
+ ;; name, body-uneval and at are syntax
+ #`(let ((body #,body-uneval))
+ #,(cond
+ ((and (eq? accum 'all) name)
+ #`(list #,at
+ (cond
+ ((not (list? body)) (list '#,name body))
+ ((null? body) '#,name)
+ ((symbol? (car body)) (list '#,name body))
+ (else (cons '#,name body)))))
+ ((eq? accum 'name)
+ #`(list #,at '#,name))
+ ((eq? accum 'body)
+ #`(list #,at
+ (cond
+ ((single? body) (car body))
+ (else body))))
+ ((eq? accum 'none)
+ #`(list #,at '()))
+ (else
+ (begin
+ (pretty-print `(cg-generic-ret-error ,accum ,name ,body-uneval ,at))
+ (pretty-print "Defaulting to accum of none.\n")
+ #`(list #,at '()))))))
+
+;; The short name makes the formatting below much easier to read.
+(define cggr cg-generic-ret)
+
+;; Generates code that matches a particular string.
+;; E.g.: (cg-string syntax "abc" 'body)
+(define (cg-string pat accum)
+ (let ((plen (string-length pat)))
+ #`(lambda (str len pos)
+ (let ((end (+ pos #,plen)))
+ (and (<= end len)
+ (string= str #,pat pos end)
+ #,(case accum
+ ((all) #`(list end (list 'cg-string #,pat)))
+ ((name) #`(list end 'cg-string))
+ ((body) #`(list end #,pat))
+ ((none) #`(list end '()))
+ (else (error "bad accum" accum))))))))
+
+;; Generates code for matching any character.
+;; E.g.: (cg-peg-any syntax 'body)
+(define (cg-peg-any accum)
+ #`(lambda (str len pos)
+ (and (< pos len)
+ #,(case accum
+ ((all) #`(list (1+ pos)
+ (list 'cg-peg-any (substring str pos (1+ pos)))))
+ ((name) #`(list (1+ pos) 'cg-peg-any))
+ ((body) #`(list (1+ pos) (substring str pos (1+ pos))))
+ ((none) #`(list (1+ pos) '()))
+ (else (error "bad accum" accum))))))
+
+;; Generates code for matching a range of characters between start and end.
+;; E.g.: (cg-range syntax #\a #\z 'body)
+(define (cg-range start end accum)
+ #`(lambda (str len pos)
+ (and (< pos len)
+ (let ((c (string-ref str pos)))
+ (and (char>=? c #,start)
+ (char<=? c #,end)
+ #,(case accum
+ ((all) #`(list (1+ pos) (list 'cg-range (string c))))
+ ((name) #`(list (1+ pos) 'cg-range))
+ ((body) #`(list (1+ pos) (string c)))
+ ((none) #`(list (1+ pos) '()))
+ (else (error "bad accum" accum))))))))
+
+;; Filters the accum argument to peg-sexp-compile for buildings like string
+;; literals (since we don't want to tag them with their name if we're doing an
+;; "all" accum).
+(define (builtin-accum-filter accum)
+ (cond
+ ((eq? accum 'all) 'body)
+ ((eq? accum 'name) 'name)
+ ((eq? accum 'body) 'body)
+ ((eq? accum 'none) 'none)))
+(define baf builtin-accum-filter)
+
+;; Takes an arbitrary expressions and accumulation variable, then parses it.
+;; E.g.: (peg-sexp-compile syntax '(and "abc" (or "-" (range #\a #\z))) 'all)
+(define (peg-sexp-compile pat accum)
+ (syntax-case pat (peg-any range ignore capture peg and or body)
+ (peg-any
+ (cg-peg-any (baf accum)))
+ (sym (identifier? #'sym) ;; nonterminal
+ #'sym)
+ (str (string? (syntax->datum #'str)) ;; literal string
+ (cg-string (syntax->datum #'str) (baf accum)))
+ ((range start end) ;; range of characters (e.g. [a-z])
+ (and (char? (syntax->datum #'start)) (char? (syntax->datum #'end)))
+ (cg-range (syntax->datum #'start) (syntax->datum #'end) (baf accum)))
+ ((ignore pat) ;; match but don't parse
+ (peg-sexp-compile #'pat 'none))
+ ((capture pat) ;; parse
+ (peg-sexp-compile #'pat 'body))
+ ((peg pat) ;; embedded PEG string
+ (string? (syntax->datum #'pat))
+ (peg-string-compile #'pat (baf accum)))
+ ((and pat ...)
+ (cg-and #'(pat ...) (baf accum)))
+ ((or pat ...)
+ (cg-or #'(pat ...) (baf accum)))
+ ((body type pat num)
+ (cg-body (baf accum) #'type #'pat #'num))))
+
+;; Top-level function builder for AND. Reduces to a call to CG-AND-INT.
+(define (cg-and clauses accum)
+ #`(lambda (str len pos)
+ (let ((body '()))
+ #,(cg-and-int clauses accum #'str #'len #'pos #'body))))
+
+;; Internal function builder for AND (calls itself).
+(define (cg-and-int clauses accum str strlen at body)
+ (syntax-case clauses ()
+ (()
+ (cggr accum 'cg-and #`(reverse #,body) at))
+ ((first rest ...)
+ #`(let ((res (#,(peg-sexp-compile #'first accum) #,str #,strlen #,at)))
+ (and res
+ ;; update AT and BODY then recurse
+ (let ((newat (car res))
+ (newbody (cadr res)))
+ (set! #,at newat)
+ (push-not-null! #,body (single-filter newbody))
+ #,(cg-and-int #'(rest ...) accum str strlen at body)))))))
+
+;; Top-level function builder for OR. Reduces to a call to CG-OR-INT.
+(define (cg-or clauses accum)
+ #`(lambda (str len pos)
+ #,(cg-or-int clauses accum #'str #'len #'pos)))
+
+;; Internal function builder for OR (calls itself).
+(define (cg-or-int clauses accum str strlen at)
+ (syntax-case clauses ()
+ (()
+ #f)
+ ((first rest ...)
+ #`(or (#,(peg-sexp-compile #'first accum) #,str #,strlen #,at)
+ #,(cg-or-int #'(rest ...) accum str strlen at)))))
+
+;; Returns a function that parses a BODY element.
+(define (cg-body accum type pat num)
+ #`(lambda (str strlen at)
+ (let ((body '()))
+ (let lp ((end at) (count 0))
+ (let* ((match (#,(peg-sexp-compile pat accum) str strlen end))
+ (new-end (if match (car match) end))
+ (count (if (> new-end end) (1+ count) count)))
+ (if (> new-end end)
+ (push-not-null! body (single-filter (cadr match))))
+ (if (and (> new-end end)
+ #,(syntax-case num (+ * ?)
+ (n (number? (syntax->datum #'n))
+ #'(< count n))
+ (+ #t)
+ (* #t)
+ (? #'(< count 1))))
+ (lp new-end count)
+ (let ((success #,(syntax-case num (+ * ?)
+ (n (number? (syntax->datum #'n))
+ #'(= count n))
+ (+ #'(>= count 1))
+ (* #t)
+ (? #t))))
+ #,(syntax-case type (! & lit)
+ (!
+ #`(if success
+ #f
+ #,(cggr accum 'cg-body #''() #'at)))
+ (&
+ #`(and success
+ #,(cggr accum 'cg-body #''() #'at)))
+ (lit
+ #`(and success
+ #,(cggr accum 'cg-body #'(reverse body) #'new-end)))))))))))