summaryrefslogtreecommitdiff
path: root/module/ice-9/peg
diff options
context:
space:
mode:
authorNoah Lavine <nlavine@haverford.edu>2011-03-05 23:54:50 -0500
committerAndy Wingo <wingo@pobox.com>2013-01-16 10:11:43 +0100
commit5e16c41703fd01162075c974939c7a97c632f33c (patch)
tree9d1acb3b8ce2de8305c9f44129cbe3d339f69395 /module/ice-9/peg
parent00923497d2b83943e642860d15a431d65340e7c8 (diff)
downloadguile-5e16c41703fd01162075c974939c7a97c632f33c.tar.gz
Separate PEG Strings
* module/ice-9/peg.scm: remove functions dealing with PEGs as strings * module/ice-9/peg/string-peg.scm: and put them here
Diffstat (limited to 'module/ice-9/peg')
-rw-r--r--module/ice-9/peg/codegen.scm1
-rw-r--r--module/ice-9/peg/string-peg.scm305
2 files changed, 306 insertions, 0 deletions
diff --git a/module/ice-9/peg/codegen.scm b/module/ice-9/peg/codegen.scm
index 43f44cc66..2c85ccca9 100644
--- a/module/ice-9/peg/codegen.scm
+++ b/module/ice-9/peg/codegen.scm
@@ -20,6 +20,7 @@
(define-module (ice-9 peg codegen)
#:export (peg-sexp-compile)
#:use-module (ice-9 peg)
+ #:use-module (ice-9 peg string-peg)
#:use-module (ice-9 pretty-print)
#:use-module (system base pmatch))
diff --git a/module/ice-9/peg/string-peg.scm b/module/ice-9/peg/string-peg.scm
new file mode 100644
index 000000000..f7e21f6b6
--- /dev/null
+++ b/module/ice-9/peg/string-peg.scm
@@ -0,0 +1,305 @@
+;;;; string-peg.scm --- representing PEG grammars as strings
+;;;;
+;;;; Copyright (C) 2010, 2011 Free Software Foundation, Inc.
+;;;;
+;;;; This library is free software; you can redistribute it and/or
+;;;; modify it under the terms of the GNU Lesser General Public
+;;;; License as published by the Free Software Foundation; either
+;;;; version 3 of the License, or (at your option) any later version.
+;;;;
+;;;; This library is distributed in the hope that it will be useful,
+;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;;;; Lesser General Public License for more details.
+;;;;
+;;;; You should have received a copy of the GNU Lesser General Public
+;;;; License along with this library; if not, write to the Free Software
+;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;;;;
+
+(define-module (ice-9 peg string-peg)
+ #:export (peg-string-compile
+ peg-as-peg
+ define-grammar
+ define-grammar-f
+ define-nonterm
+ peg-grammar)
+ #:use-module (ice-9 peg)
+ #:use-module (ice-9 peg codegen))
+
+;; The results of parsing using a nonterminal are cached. Think of it like a
+;; hash with no conflict resolution. Process for deciding on the cache size
+;; wasn't very scientific; just ran the benchmarks and stopped a little after
+;; the point of diminishing returns on my box.
+(define *cache-size* 512)
+
+;; Gets the left-hand depth of a list.
+(define (depth lst)
+ (if (or (not (list? lst)) (null? lst))
+ 0
+ (+ 1 (depth (car lst)))))
+
+(eval-when (compile load eval)
+(define (syntax-for-non-cache-case for-syntax matchf-syn accumsym s-syn)
+; (let ((matchf-syn (datum->syntax for-syntax matchf)))
+ #`(lambda (str strlen at)
+ (let ((res (#,matchf-syn str strlen at)))
+ ;; Try to match the nonterminal.
+ (if res
+ ;; If we matched, do some post-processing to figure out
+ ;; what data to propagate upward.
+ (let ((at (car res))
+ (body (cadr res)))
+ #,(cond
+ ((eq? accumsym 'name)
+ #`(list at '#,s-syn))
+ ((eq? accumsym 'all)
+ #`(list (car res)
+ (cond
+ ((not (list? body))
+ (list '#,s-syn body))
+ ((null? body) '#,s-syn)
+ ((symbol? (car body))
+ (list '#,s-syn body))
+ (else (cons '#,s-syn body)))))
+ ((eq? accumsym 'none) #`(list (car res) '()))
+ (else #`(begin res))))
+ ;; If we didn't match, just return false.
+ #f))))
+)
+
+;; Defines a new nonterminal symbol accumulating with ACCUM.
+(define-syntax define-nonterm
+ (lambda (x)
+ (syntax-case x ()
+ ((_ sym accum pat)
+ (let ((matchf (peg-sexp-compile #'pat (syntax->datum #'accum)))
+ (accumsym (syntax->datum #'accum))
+ (c (datum->syntax x (gensym))));; the cache
+ ;; CODE is the code to parse the string if the result isn't cached.
+ (let ((syn (syntax-for-non-cache-case x matchf accumsym #'sym)))
+ #`(begin
+ (define #,c (make-vector *cache-size* #f));; the cache
+ (define (sym str strlen at)
+ (let* ((vref (vector-ref #,c (modulo at *cache-size*))))
+ ;; Check to see whether the value is cached.
+ (if (and vref (eq? (car vref) str) (= (cadr vref) at))
+ (caddr vref);; If it is return it.
+ (let ((fres ;; Else calculate it and cache it.
+ (#,syn str strlen at)))
+ (vector-set! #,c (modulo at *cache-size*)
+ (list str at fres))
+ fres)))))))))))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;; Parse string PEGs using sexp PEGs.
+;; See the variable PEG-AS-PEG for an easier-to-read syntax.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Grammar for PEGs in PEG grammar.
+(define peg-as-peg
+"grammar <-- (nonterminal ('<--' / '<-' / '<') sp pattern)+
+pattern <-- alternative (SLASH sp alternative)*
+alternative <-- ([!&]? sp suffix)+
+suffix <-- primary ([*+?] sp)*
+primary <-- '(' sp pattern ')' sp / '.' sp / literal / charclass / nonterminal !'<'
+literal <-- ['] (!['] .)* ['] sp
+charclass <-- LB (!']' (CCrange / CCsingle))* RB sp
+CCrange <-- . '-' .
+CCsingle <-- .
+nonterminal <-- [a-zA-Z0-9-]+ sp
+sp < [ \t\n]*
+SLASH < '/'
+LB < '['
+RB < ']'
+")
+
+(define-nonterm peg-grammar all
+ (body lit (and peg-nonterminal (or "<--" "<-" "<") peg-sp peg-pattern) +))
+(define-nonterm peg-pattern all
+ (and peg-alternative
+ (body lit (and (ignore "/") peg-sp peg-alternative) *)))
+(define-nonterm peg-alternative all
+ (body lit (and (body lit (or "!" "&") ?) peg-sp peg-suffix) +))
+(define-nonterm peg-suffix all
+ (and peg-primary (body lit (and (or "*" "+" "?") peg-sp) *)))
+(define-nonterm peg-primary all
+ (or (and "(" peg-sp peg-pattern ")" peg-sp)
+ (and "." peg-sp)
+ peg-literal
+ peg-charclass
+ (and peg-nonterminal (body ! "<" 1))))
+(define-nonterm peg-literal all
+ (and "'" (body lit (and (body ! "'" 1) peg-any) *) "'" peg-sp))
+(define-nonterm peg-charclass all
+ (and (ignore "[")
+ (body lit (and (body ! "]" 1)
+ (or charclass-range charclass-single)) *)
+ (ignore "]")
+ peg-sp))
+(define-nonterm charclass-range all (and peg-any "-" peg-any))
+(define-nonterm charclass-single all peg-any)
+(define-nonterm peg-nonterminal all
+ (and (body lit (or (range #\a #\z) (range #\A #\Z) (range #\0 #\9) "-") +) peg-sp))
+(define-nonterm peg-sp none
+ (body lit (or " " "\t" "\n") *))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;; PARSE STRING PEGS
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Pakes a string representing a PEG grammar and defines all the nonterminals in
+;; it as the associated PEGs.
+(define (peg-parser str for-syntax)
+ (let ((parsed (peg-parse peg-grammar str)))
+ (if (not parsed)
+ (begin
+ ;; (display "Invalid PEG grammar!\n")
+ #f)
+ (let ((lst (peg:tree parsed)))
+ (cond
+ ((or (not (list? lst)) (null? lst))
+ lst)
+ ((eq? (car lst) 'peg-grammar)
+ #`(begin
+ #,@(map (lambda (x) (peg-nonterm->defn x for-syntax))
+ (context-flatten (lambda (lst) (<= (depth lst) 2))
+ (cdr lst))))))))))
+
+;; Macro wrapper for PEG-PARSER. Parses PEG grammars expressed as strings and
+;; defines all the appropriate nonterminals.
+(define-syntax define-grammar
+ (lambda (x)
+ (syntax-case x ()
+ ((_ str)
+ (peg-parser (syntax->datum #'str) x)))))
+(define define-grammar-f peg-parser)
+
+;; Parse a nonterminal and pattern listed in LST.
+(define (peg-nonterm->defn lst for-syntax)
+ (let* ((nonterm (car lst))
+ (grabber (cadr lst))
+ (pattern (caddr lst))
+ (nonterm-name (datum->syntax for-syntax
+ (string->symbol (cadr nonterm)))))
+ #`(define-nonterm #,nonterm-name
+ #,(cond
+ ((string=? grabber "<--") (datum->syntax for-syntax 'all))
+ ((string=? grabber "<-") (datum->syntax for-syntax 'body))
+ (else (datum->syntax for-syntax 'none)))
+ #,(compressor (peg-pattern->defn pattern for-syntax) for-syntax))))
+
+;; Parse a pattern.
+(define (peg-pattern->defn lst for-syntax)
+ #`(or #,@(map (lambda (x) (peg-alternative->defn x for-syntax))
+ (context-flatten (lambda (x) (eq? (car x) 'peg-alternative))
+ (cdr lst)))))
+
+;; Parse an alternative.
+(define (peg-alternative->defn lst for-syntax)
+ #`(and #,@(map (lambda (x) (peg-body->defn x for-syntax))
+ (context-flatten (lambda (x) (or (string? (car x))
+ (eq? (car x) 'peg-suffix)))
+ (cdr lst)))))
+
+;; Parse a body.
+(define (peg-body->defn lst for-syntax)
+ (let ((suffix '())
+ (front (datum->syntax for-syntax 'lit)))
+ (cond
+ ((eq? (car lst) 'peg-suffix)
+ (set! suffix lst))
+ ((string? (car lst))
+ (begin (set! front (datum->syntax for-syntax
+ (string->symbol (car lst))))
+ (set! suffix (cadr lst))))
+ (else `(peg-parse-body-fail ,lst)))
+ #`(body #,front #,@(peg-suffix->defn
+ suffix
+ for-syntax))))
+
+;; Parse a suffix.
+(define (peg-suffix->defn lst for-syntax)
+ #`(#,(peg-primary->defn (cadr lst) for-syntax)
+ #,(if (null? (cddr lst))
+ 1
+ (datum->syntax for-syntax (string->symbol (caddr lst))))))
+
+;; Parse a primary.
+(define (peg-primary->defn lst for-syntax)
+ (let ((el (cadr lst)))
+ (cond
+ ((list? el)
+ (cond
+ ((eq? (car el) 'peg-literal)
+ (peg-literal->defn el for-syntax))
+ ((eq? (car el) 'peg-charclass)
+ (peg-charclass->defn el for-syntax))
+ ((eq? (car el) 'peg-nonterminal)
+ (datum->syntax for-syntax (string->symbol (cadr el))))))
+ ((string? el)
+ (cond
+ ((equal? el "(")
+ (peg-pattern->defn (caddr lst) for-syntax))
+ ((equal? el ".")
+ (datum->syntax for-syntax 'peg-any))
+ (else (datum->syntax for-syntax
+ `(peg-parse-any unknown-string ,lst)))))
+ (else (datum->syntax for-syntax
+ `(peg-parse-any unknown-el ,lst))))))
+
+;; Trims characters off the front and end of STR.
+;; (trim-1chars "'ab'") -> "ab"
+(define (trim-1chars str) (substring str 1 (- (string-length str) 1)))
+
+;; Parses a literal.
+(define (peg-literal->defn lst for-syntax)
+ (datum->syntax for-syntax (trim-1chars (cadr lst))))
+
+;; Parses a charclass.
+(define (peg-charclass->defn lst for-syntax)
+ #`(or
+ #,@(map
+ (lambda (cc)
+ (cond
+ ((eq? (car cc) 'charclass-range)
+ #`(range #,(datum->syntax
+ for-syntax
+ (string-ref (cadr cc) 0))
+ #,(datum->syntax
+ for-syntax
+ (string-ref (cadr cc) 2))))
+ ((eq? (car cc) 'charclass-single)
+ (datum->syntax for-syntax (cadr cc)))))
+ (context-flatten
+ (lambda (x) (or (eq? (car x) 'charclass-range)
+ (eq? (car x) 'charclass-single)))
+ (cdr lst)))))
+
+;; Compresses a list to save the optimizer work.
+;; e.g. (or (and a)) -> a
+(define (compressor-core lst)
+ (if (or (not (list? lst)) (null? lst))
+ lst
+ (cond
+ ((and (or (eq? (car lst) 'or) (eq? (car lst) 'and))
+ (null? (cddr lst)))
+ (compressor-core (cadr lst)))
+ ((and (eq? (car lst) 'body)
+ (eq? (cadr lst) 'lit)
+ (eq? (cadddr lst) 1))
+ (compressor-core (caddr lst)))
+ (else (map compressor-core lst)))))
+
+(define (compressor syn for-syntax)
+ (datum->syntax for-syntax
+ (compressor-core (syntax->datum syn))))
+
+;; Builds a lambda-expressions for the pattern STR using accum.
+(define (peg-string-compile str-stx accum)
+ (peg-sexp-compile
+ (compressor
+ (peg-pattern->defn
+ (peg:tree (peg-parse peg-pattern (syntax->datum str-stx))) str-stx)
+ str-stx)
+ accum))