summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLudovic Courtès <ludo@gnu.org>2010-03-20 00:08:36 +0100
committerLudovic Courtès <ludo@gnu.org>2010-03-31 00:42:01 +0200
commit0ecd70a2714c184b57aa92c6c061c0ee7b51df79 (patch)
tree5a2e0a1540777ff45fc966ae3034bc218a35ddab
parentbd7131d3adf60b98837bd8bc3711ec7cf9069569 (diff)
downloadguile-0ecd70a2714c184b57aa92c6c061c0ee7b51df79.tar.gz
Adapt ECMAScript parser and lexer to `(system base lalr)'.
* module/language/ecmascript/tokenize.scm: Use `make-lexical-token' and related procedures instead of pairs as tokens passed to the parser. Pass source location information in the form of `source-location' objects. * module/language/ecmascript/parse.scm (read-ecmascript, read-ecmascript/1): Instantiate a new parser at each call. (parse-ecmascript): Rename to... (make-parser): ... this. Change `->' to `:' in the grammar syntax. * module/language/ecmascript/parse-lalr.scm: Remove. * module/Makefile.am (ECMASCRIPT_LANG_SOURCES): Remove `language/ecmascript/parse-lalr.scm'.
-rw-r--r--module/Makefile.am1
-rw-r--r--module/language/ecmascript/parse-lalr.scm1731
-rw-r--r--module/language/ecmascript/parse.scm522
-rw-r--r--module/language/ecmascript/tokenize.scm60
4 files changed, 297 insertions, 2017 deletions
diff --git a/module/Makefile.am b/module/Makefile.am
index bae73168d..ca3852417 100644
--- a/module/Makefile.am
+++ b/module/Makefile.am
@@ -117,7 +117,6 @@ VALUE_LANG_SOURCES = \
language/value/spec.scm
ECMASCRIPT_LANG_SOURCES = \
- language/ecmascript/parse-lalr.scm \
language/ecmascript/tokenize.scm \
language/ecmascript/parse.scm \
language/ecmascript/impl.scm \
diff --git a/module/language/ecmascript/parse-lalr.scm b/module/language/ecmascript/parse-lalr.scm
deleted file mode 100644
index b702511ca..000000000
--- a/module/language/ecmascript/parse-lalr.scm
+++ /dev/null
@@ -1,1731 +0,0 @@
-;; (language ecmascript parse-lalr) -- yacc's parser generator, in Guile
-;; Copyright (C) 1984,1989,1990 Free Software Foundation, Inc.
-;; Copyright (C) 1996-2002 Dominique Boucher
-
-;;;; This library is free software; you can redistribute it and/or
-;;;; modify it under the terms of the GNU Lesser General Public
-;;;; License as published by the Free Software Foundation; either
-;;;; version 3 of the License, or (at your option) any later version.
-;;;;
-;;;; This library is distributed in the hope that it will be useful,
-;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
-;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-;;;; Lesser General Public License for more details.
-;;;;
-;;;; You should have received a copy of the GNU Lesser General Public
-;;;; License along with this library; if not, write to the Free Software
-;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
-
-;; ---------------------------------------------------------------------- ;;
-#!
-;;; Commentary:
-This file contains yet another LALR(1) parser generator written in
-Scheme. In contrast to other such parser generators, this one
-implements a more efficient algorithm for computing the lookahead sets.
-The algorithm is the same as used in Bison (GNU yacc) and is described
-in the following paper:
-
-"Efficient Computation of LALR(1) Look-Ahead Set", F. DeRemer and
-T. Pennello, TOPLAS, vol. 4, no. 4, october 1982.
-
-As a consequence, it is not written in a fully functional style.
-In fact, much of the code is a direct translation from C to Scheme
-of the Bison sources.
-
-@section Defining a parser
-
-The module @code{(language ecmascript parse-lalr)} declares a macro
-called @code{lalr-parser}:
-
-@lisp
- (lalr-parser tokens rules ...)
-@end lisp
-
-This macro, when given appropriate arguments, generates an LALR(1)
-syntax analyzer. The macro accepts at least two arguments. The first
-is a list of symbols which represent the terminal symbols of the
-grammar. The remaining arguments are the grammar production rules.
-
-@section Running the parser
-
-The parser generated by the @code{lalr-parser} macro is a function that
-takes two parameters. The first parameter is a lexical analyzer while
-the second is an error procedure.
-
-The lexical analyzer is zero-argument function (a thunk)
-invoked each time the parser needs to look-ahead in the token stream.
-A token is usually a pair whose @code{car} is the symbol corresponding to
-the token (the same symbol as used in the grammar definition). The
-@code{cdr} of the pair is the semantic value associated with the token. For
-example, a string token would have the @code{car} set to @code{'string}
-while the @code{cdr} is set to the string value @code{"hello"}.
-
-Once the end of file is encountered, the lexical analyzer must always
-return the symbol @code{'*eoi*} each time it is invoked.
-
-The error procedure must be a function that accepts at least two
-parameters.
-
-@section The grammar format
-
-The grammar is specified by first giving the list of terminals and the
-list of non-terminal definitions. Each non-terminal definition
-is a list where the first element is the non-terminal and the other
-elements are the right-hand sides (lists of grammar symbols). In
-addition to this, each rhs can be followed by a semantic action.
-
-For example, consider the following (yacc) grammar for a very simple
-expression language:
-@example
- e : e '+' t
- | e '-' t
- | t
- ;
- t : t '*' f
- : t '/' f
- | f
- ;
- f : ID
- ;
-@end example
-The same grammar, written for the scheme parser generator, would look
-like this (with semantic actions)
-@lisp
-(define expr-parser
- (lalr-parser
- ; Terminal symbols
- (ID + - * /)
- ; Productions
- (e (e + t) -> (+ $1 $3)
- (e - t) -> (- $1 $3)
- (t) -> $1)
- (t (t * f) -> (* $1 $3)
- (t / f) -> (/ $1 $3)
- (f) -> $1)
- (f (ID) -> $1)))
-@end lisp
-In semantic actions, the symbol @code{$n} refers to the synthesized
-attribute value of the nth symbol in the production. The value
-associated with the non-terminal on the left is the result of
-evaluating the semantic action (it defaults to @code{#f}).
-
-The above grammar implicitly handles operator precedences. It is also
-possible to explicitly assign precedences and associativity to
-terminal symbols and productions a la Yacc. Here is a modified
-(and augmented) version of the grammar:
-@lisp
-(define expr-parser
- (lalr-parser
- ; Terminal symbols
- (ID
- (left: + -)
- (left: * /)
- (nonassoc: uminus))
- (e (e + e) -> (+ $1 $3)
- (e - e) -> (- $1 $3)
- (e * e) -> (* $1 $3)
- (e / e) -> (/ $1 $3)
- (- e (prec: uminus)) -> (- $2)
- (ID) -> $1)))
-@end lisp
-The @code{left:} directive is used to specify a set of left-associative
-operators of the same precedence level, the @code{right:} directive for
-right-associative operators, and @code{nonassoc:} for operators that
-are not associative. Note the use of the (apparently) useless
-terminal @code{uminus}. It is only defined in order to assign to the
-penultimate rule a precedence level higher than that of @code{*} and
-@code{/}. The @code{prec:} directive can only appear as the last element of a
-rule. Finally, note that precedence levels are incremented from
-left to right, i.e. the precedence level of @code{+} and @code{-} is less
-than the precedence level of @code{*} and @code{/} since the formers appear
-first in the list of terminal symbols (token definitions).
-
-@section A final note on conflict resolution
-
-Conflicts in the grammar are handled in a conventional way.
-In the absence of precedence directives,
-Shift/Reduce conflicts are resolved by shifting, and Reduce/Reduce
-conflicts are resolved by choosing the rule listed first in the
-grammar definition.
-
-You can print the states of the generated parser by evaluating
-@code{(print-states)}. The format of the output is similar to the one
-produced by bison when given the -v command-line option.
-;;; Code:
-!#
-
-;;; ---------- SYSTEM DEPENDENT SECTION -----------------
-;; put in a module by Richard Todd
-(define-module (language ecmascript parse-lalr)
- #:export (lalr-parser
- print-states))
-
-;; this code is by Thien-Thi Nguyen, found in a google search
-(begin
- (defmacro def-macro (form . body)
- `(defmacro ,(car form) ,(cdr form) ,@body))
- (def-macro (BITS-PER-WORD) 28)
- (def-macro (lalr-error msg obj) `(throw 'lalr-error ,msg ,obj))
- (def-macro (logical-or x . y) `(logior ,x ,@y)))
-
-;;; ---------- END OF SYSTEM DEPENDENT SECTION ------------
-
-;; - Macros pour la gestion des vecteurs de bits
-
-(def-macro (set-bit v b)
- `(let ((x (quotient ,b (BITS-PER-WORD)))
- (y (expt 2 (remainder ,b (BITS-PER-WORD)))))
- (vector-set! ,v x (logical-or (vector-ref ,v x) y))))
-
-(def-macro (bit-union v1 v2 n)
- `(do ((i 0 (+ i 1)))
- ((= i ,n))
- (vector-set! ,v1 i (logical-or (vector-ref ,v1 i)
- (vector-ref ,v2 i)))))
-
-;; - Macro pour les structures de donnees
-
-(def-macro (new-core) `(make-vector 4 0))
-(def-macro (set-core-number! c n) `(vector-set! ,c 0 ,n))
-(def-macro (set-core-acc-sym! c s) `(vector-set! ,c 1 ,s))
-(def-macro (set-core-nitems! c n) `(vector-set! ,c 2 ,n))
-(def-macro (set-core-items! c i) `(vector-set! ,c 3 ,i))
-(def-macro (core-number c) `(vector-ref ,c 0))
-(def-macro (core-acc-sym c) `(vector-ref ,c 1))
-(def-macro (core-nitems c) `(vector-ref ,c 2))
-(def-macro (core-items c) `(vector-ref ,c 3))
-
-(def-macro (new-shift) `(make-vector 3 0))
-(def-macro (set-shift-number! c x) `(vector-set! ,c 0 ,x))
-(def-macro (set-shift-nshifts! c x) `(vector-set! ,c 1 ,x))
-(def-macro (set-shift-shifts! c x) `(vector-set! ,c 2 ,x))
-(def-macro (shift-number s) `(vector-ref ,s 0))
-(def-macro (shift-nshifts s) `(vector-ref ,s 1))
-(def-macro (shift-shifts s) `(vector-ref ,s 2))
-
-(def-macro (new-red) `(make-vector 3 0))
-(def-macro (set-red-number! c x) `(vector-set! ,c 0 ,x))
-(def-macro (set-red-nreds! c x) `(vector-set! ,c 1 ,x))
-(def-macro (set-red-rules! c x) `(vector-set! ,c 2 ,x))
-(def-macro (red-number c) `(vector-ref ,c 0))
-(def-macro (red-nreds c) `(vector-ref ,c 1))
-(def-macro (red-rules c) `(vector-ref ,c 2))
-
-
-
-(def-macro (new-set nelem)
- `(make-vector ,nelem 0))
-
-
-(def-macro (vector-map f v)
- `(let ((vm-n (- (vector-length ,v) 1)))
- (let loop ((vm-low 0) (vm-high vm-n))
- (if (= vm-low vm-high)
- (vector-set! ,v vm-low (,f (vector-ref ,v vm-low) vm-low))
- (let ((vm-middle (quotient (+ vm-low vm-high) 2)))
- (loop vm-low vm-middle)
- (loop (+ vm-middle 1) vm-high))))))
-
-
-;; - Constantes
-(define STATE-TABLE-SIZE 1009)
-
-
-;; - Tableaux
-(define rrhs #f)
-(define rlhs #f)
-(define ritem #f)
-(define nullable #f)
-(define derives #f)
-(define fderives #f)
-(define firsts #f)
-(define kernel-base #f)
-(define kernel-end #f)
-(define shift-symbol #f)
-(define shift-set #f)
-(define red-set #f)
-(define state-table #f)
-(define acces-symbol #f)
-(define reduction-table #f)
-(define shift-table #f)
-(define consistent #f)
-(define lookaheads #f)
-(define LA #f)
-(define LAruleno #f)
-(define lookback #f)
-(define goto-map #f)
-(define from-state #f)
-(define to-state #f)
-(define includes #f)
-(define F #f)
-(define action-table #f)
-
-;; - Variables
-(define nitems #f)
-(define nrules #f)
-(define nvars #f)
-(define nterms #f)
-(define nsyms #f)
-(define nstates #f)
-(define first-state #f)
-(define last-state #f)
-(define final-state #f)
-(define first-shift #f)
-(define last-shift #f)
-(define first-reduction #f)
-(define last-reduction #f)
-(define nshifts #f)
-(define maxrhs #f)
-(define ngotos #f)
-(define token-set-size #f)
-
-(define (gen-tables! tokens gram)
- (initialize-all)
- (rewrite-grammar
- tokens
- gram
- (lambda (terms terms/prec vars gram gram/actions)
- (set! the-terminals/prec (list->vector terms/prec))
- (set! the-terminals (list->vector terms))
- (set! the-nonterminals (list->vector vars))
- (set! nterms (length terms))
- (set! nvars (length vars))
- (set! nsyms (+ nterms nvars))
- (let ((no-of-rules (length gram/actions))
- (no-of-items (let loop ((l gram/actions) (count 0))
- (if (null? l)
- count
- (loop (cdr l) (+ count (length (caar l))))))))
- (pack-grammar no-of-rules no-of-items gram)
- (set-derives)
- (set-nullable)
- (generate-states)
- (lalr)
- (build-tables)
- (compact-action-table terms)
- gram/actions))))
-
-
-(define (initialize-all)
- (set! rrhs #f)
- (set! rlhs #f)
- (set! ritem #f)
- (set! nullable #f)
- (set! derives #f)
- (set! fderives #f)
- (set! firsts #f)
- (set! kernel-base #f)
- (set! kernel-end #f)
- (set! shift-symbol #f)
- (set! shift-set #f)
- (set! red-set #f)
- (set! state-table (make-vector STATE-TABLE-SIZE '()))
- (set! acces-symbol #f)
- (set! reduction-table #f)
- (set! shift-table #f)
- (set! consistent #f)
- (set! lookaheads #f)
- (set! LA #f)
- (set! LAruleno #f)
- (set! lookback #f)
- (set! goto-map #f)
- (set! from-state #f)
- (set! to-state #f)
- (set! includes #f)
- (set! F #f)
- (set! action-table #f)
- (set! nstates #f)
- (set! first-state #f)
- (set! last-state #f)
- (set! final-state #f)
- (set! first-shift #f)
- (set! last-shift #f)
- (set! first-reduction #f)
- (set! last-reduction #f)
- (set! nshifts #f)
- (set! maxrhs #f)
- (set! ngotos #f)
- (set! token-set-size #f)
- (set! rule-precedences '()))
-
-
-(define (pack-grammar no-of-rules no-of-items gram)
- (set! nrules (+ no-of-rules 1))
- (set! nitems no-of-items)
- (set! rlhs (make-vector nrules #f))
- (set! rrhs (make-vector nrules #f))
- (set! ritem (make-vector (+ 1 nitems) #f))
-
- (let loop ((p gram) (item-no 0) (rule-no 1))
- (if (not (null? p))
- (let ((nt (caar p)))
- (let loop2 ((prods (cdar p)) (it-no2 item-no) (rl-no2 rule-no))
- (if (null? prods)
- (loop (cdr p) it-no2 rl-no2)
- (begin
- (vector-set! rlhs rl-no2 nt)
- (vector-set! rrhs rl-no2 it-no2)
- (let loop3 ((rhs (car prods)) (it-no3 it-no2))
- (if (null? rhs)
- (begin
- (vector-set! ritem it-no3 (- rl-no2))
- (loop2 (cdr prods) (+ it-no3 1) (+ rl-no2 1)))
- (begin
- (vector-set! ritem it-no3 (car rhs))
- (loop3 (cdr rhs) (+ it-no3 1))))))))))))
-
-
-;; Fonction set-derives
-;; --------------------
-(define (set-derives)
- (define delts (make-vector (+ nrules 1) 0))
- (define dset (make-vector nvars -1))
-
- (let loop ((i 1) (j 0)) ; i = 0
- (if (< i nrules)
- (let ((lhs (vector-ref rlhs i)))
- (if (>= lhs 0)
- (begin
- (vector-set! delts j (cons i (vector-ref dset lhs)))
- (vector-set! dset lhs j)
- (loop (+ i 1) (+ j 1)))
- (loop (+ i 1) j)))))
-
- (set! derives (make-vector nvars 0))
-
- (let loop ((i 0))
- (if (< i nvars)
- (let ((q (let loop2 ((j (vector-ref dset i)) (s '()))
- (if (< j 0)
- s
- (let ((x (vector-ref delts j)))
- (loop2 (cdr x) (cons (car x) s)))))))
- (vector-set! derives i q)
- (loop (+ i 1))))))
-
-
-
-(define (set-nullable)
- (set! nullable (make-vector nvars #f))
- (let ((squeue (make-vector nvars #f))
- (rcount (make-vector (+ nrules 1) 0))
- (rsets (make-vector nvars #f))
- (relts (make-vector (+ nitems nvars 1) #f)))
- (let loop ((r 0) (s2 0) (p 0))
- (let ((*r (vector-ref ritem r)))
- (if *r
- (if (< *r 0)
- (let ((symbol (vector-ref rlhs (- *r))))
- (if (and (>= symbol 0)
- (not (vector-ref nullable symbol)))
- (begin
- (vector-set! nullable symbol #t)
- (vector-set! squeue s2 symbol)
- (loop (+ r 1) (+ s2 1) p))))
- (let loop2 ((r1 r) (any-tokens #f))
- (let* ((symbol (vector-ref ritem r1)))
- (if (> symbol 0)
- (loop2 (+ r1 1) (or any-tokens (>= symbol nvars)))
- (if (not any-tokens)
- (let ((ruleno (- symbol)))
- (let loop3 ((r2 r) (p2 p))
- (let ((symbol (vector-ref ritem r2)))
- (if (> symbol 0)
- (begin
- (vector-set! rcount ruleno
- (+ (vector-ref rcount ruleno) 1))
- (vector-set! relts p2
- (cons (vector-ref rsets symbol)
- ruleno))
- (vector-set! rsets symbol p2)
- (loop3 (+ r2 1) (+ p2 1)))
- (loop (+ r2 1) s2 p2)))))
- (loop (+ r1 1) s2 p))))))
- (let loop ((s1 0) (s3 s2))
- (if (< s1 s3)
- (let loop2 ((p (vector-ref rsets (vector-ref squeue s1))) (s4 s3))
- (if p
- (let* ((x (vector-ref relts p))
- (ruleno (cdr x))
- (y (- (vector-ref rcount ruleno) 1)))
- (vector-set! rcount ruleno y)
- (if (= y 0)
- (let ((symbol (vector-ref rlhs ruleno)))
- (if (and (>= symbol 0)
- (not (vector-ref nullable symbol)))
- (begin
- (vector-set! nullable symbol #t)
- (vector-set! squeue s4 symbol)
- (loop2 (car x) (+ s4 1)))
- (loop2 (car x) s4)))
- (loop2 (car x) s4))))
- (loop (+ s1 1) s4)))))))))
-
-
-
-; Fonction set-firsts qui calcule un tableau de taille
-; nvars et qui donne, pour chaque non-terminal X, une liste des
-; non-terminaux pouvant apparaitre au debut d'une derivation a
-; partir de X.
-
-(define (set-firsts)
- (set! firsts (make-vector nvars '()))
-
- ;; -- initialization
- (let loop ((i 0))
- (if (< i nvars)
- (let loop2 ((sp (vector-ref derives i)))
- (if (null? sp)
- (loop (+ i 1))
- (let ((sym (vector-ref ritem (vector-ref rrhs (car sp)))))
- (if (< -1 sym nvars)
- (vector-set! firsts i (sinsert sym (vector-ref firsts i))))
- (loop2 (cdr sp)))))))
-
- ;; -- reflexive and transitive closure
- (let loop ((continue #t))
- (if continue
- (let loop2 ((i 0) (cont #f))
- (if (>= i nvars)
- (loop cont)
- (let* ((x (vector-ref firsts i))
- (y (let loop3 ((l x) (z x))
- (if (null? l)
- z
- (loop3 (cdr l)
- (sunion (vector-ref firsts (car l)) z))))))
- (if (equal? x y)
- (loop2 (+ i 1) cont)
- (begin
- (vector-set! firsts i y)
- (loop2 (+ i 1) #t))))))))
-
- (let loop ((i 0))
- (if (< i nvars)
- (begin
- (vector-set! firsts i (sinsert i (vector-ref firsts i)))
- (loop (+ i 1))))))
-
-
-
-
-; Fonction set-fderives qui calcule un tableau de taille
-; nvars et qui donne, pour chaque non-terminal, une liste des regles pouvant
-; etre derivees a partir de ce non-terminal. (se sert de firsts)
-
-(define (set-fderives)
- (set! fderives (make-vector nvars #f))
-
- (set-firsts)
-
- (let loop ((i 0))
- (if (< i nvars)
- (let ((x (let loop2 ((l (vector-ref firsts i)) (fd '()))
- (if (null? l)
- fd
- (loop2 (cdr l)
- (sunion (vector-ref derives (car l)) fd))))))
- (vector-set! fderives i x)
- (loop (+ i 1))))))
-
-
-; Fonction calculant la fermeture d'un ensemble d'items LR0
-; ou core est une liste d'items
-
-(define (closure core)
- ;; Initialization
- (define ruleset (make-vector nrules #f))
-
- (let loop ((csp core))
- (if (not (null? csp))
- (let ((sym (vector-ref ritem (car csp))))
- (if (< -1 sym nvars)
- (let loop2 ((dsp (vector-ref fderives sym)))
- (if (not (null? dsp))
- (begin
- (vector-set! ruleset (car dsp) #t)
- (loop2 (cdr dsp))))))
- (loop (cdr csp)))))
-
- (let loop ((ruleno 1) (csp core) (itemsetv '())) ; ruleno = 0
- (if (< ruleno nrules)
- (if (vector-ref ruleset ruleno)
- (let ((itemno (vector-ref rrhs ruleno)))
- (let loop2 ((c csp) (itemsetv2 itemsetv))
- (if (and (pair? c)
- (< (car c) itemno))
- (loop2 (cdr c) (cons (car c) itemsetv2))
- (loop (+ ruleno 1) c (cons itemno itemsetv2)))))
- (loop (+ ruleno 1) csp itemsetv))
- (let loop2 ((c csp) (itemsetv2 itemsetv))
- (if (pair? c)
- (loop2 (cdr c) (cons (car c) itemsetv2))
- (reverse itemsetv2))))))
-
-
-
-(define (allocate-item-sets)
- (set! kernel-base (make-vector nsyms 0))
- (set! kernel-end (make-vector nsyms #f)))
-
-
-(define (allocate-storage)
- (allocate-item-sets)
- (set! red-set (make-vector (+ nrules 1) 0)))
-
-;; --
-
-
-(define (initialize-states)
- (let ((p (new-core)))
- (set-core-number! p 0)
- (set-core-acc-sym! p #f)
- (set-core-nitems! p 1)
- (set-core-items! p '(0))
-
- (set! first-state (list p))
- (set! last-state first-state)
- (set! nstates 1)))
-
-
-
-(define (generate-states)
- (allocate-storage)
- (set-fderives)
- (initialize-states)
- (let loop ((this-state first-state))
- (if (pair? this-state)
- (let* ((x (car this-state))
- (is (closure (core-items x))))
- (save-reductions x is)
- (new-itemsets is)
- (append-states)
- (if (> nshifts 0)
- (save-shifts x))
- (loop (cdr this-state))))))
-
-
-;; Fonction calculant les symboles sur lesquels il faut "shifter"
-;; et regroupe les items en fonction de ces symboles
-
-(define (new-itemsets itemset)
- ;; - Initialization
- (set! shift-symbol '())
- (let loop ((i 0))
- (if (< i nsyms)
- (begin
- (vector-set! kernel-end i '())
- (loop (+ i 1)))))
-
- (let loop ((isp itemset))
- (if (pair? isp)
- (let* ((i (car isp))
- (sym (vector-ref ritem i)))
- (if (>= sym 0)
- (begin
- (set! shift-symbol (sinsert sym shift-symbol))
- (let ((x (vector-ref kernel-end sym)))
- (if (null? x)
- (begin
- (vector-set! kernel-base sym (cons (+ i 1) x))
- (vector-set! kernel-end sym (vector-ref kernel-base sym)))
- (begin
- (set-cdr! x (list (+ i 1)))
- (vector-set! kernel-end sym (cdr x)))))))
- (loop (cdr isp)))))
-
- (set! nshifts (length shift-symbol)))
-
-
-
-(define (get-state sym)
- (let* ((isp (vector-ref kernel-base sym))
- (n (length isp))
- (key (let loop ((isp1 isp) (k 0))
- (if (null? isp1)
- (modulo k STATE-TABLE-SIZE)
- (loop (cdr isp1) (+ k (car isp1))))))
- (sp (vector-ref state-table key)))
- (if (null? sp)
- (let ((x (new-state sym)))
- (vector-set! state-table key (list x))
- (core-number x))
- (let loop ((sp1 sp))
- (if (and (= n (core-nitems (car sp1)))
- (let loop2 ((i1 isp) (t (core-items (car sp1))))
- (if (and (pair? i1)
- (= (car i1)
- (car t)))
- (loop2 (cdr i1) (cdr t))
- (null? i1))))
- (core-number (car sp1))
- (if (null? (cdr sp1))
- (let ((x (new-state sym)))
- (set-cdr! sp1 (list x))
- (core-number x))
- (loop (cdr sp1))))))))
-
-
-(define (new-state sym)
- (let* ((isp (vector-ref kernel-base sym))
- (n (length isp))
- (p (new-core)))
- (set-core-number! p nstates)
- (set-core-acc-sym! p sym)
- (if (= sym nvars) (set! final-state nstates))
- (set-core-nitems! p n)
- (set-core-items! p isp)
- (set-cdr! last-state (list p))
- (set! last-state (cdr last-state))
- (set! nstates (+ nstates 1))
- p))
-
-
-;; --
-
-(define (append-states)
- (set! shift-set
- (let loop ((l (reverse shift-symbol)))
- (if (null? l)
- '()
- (cons (get-state (car l)) (loop (cdr l)))))))
-
-;; --
-
-(define (save-shifts core)
- (let ((p (new-shift)))
- (set-shift-number! p (core-number core))
- (set-shift-nshifts! p nshifts)
- (set-shift-shifts! p shift-set)
- (if last-shift
- (begin
- (set-cdr! last-shift (list p))
- (set! last-shift (cdr last-shift)))
- (begin
- (set! first-shift (list p))
- (set! last-shift first-shift)))))
-
-(define (save-reductions core itemset)
- (let ((rs (let loop ((l itemset))
- (if (null? l)
- '()
- (let ((item (vector-ref ritem (car l))))
- (if (< item 0)
- (cons (- item) (loop (cdr l)))
- (loop (cdr l))))))))
- (if (pair? rs)
- (let ((p (new-red)))
- (set-red-number! p (core-number core))
- (set-red-nreds! p (length rs))
- (set-red-rules! p rs)
- (if last-reduction
- (begin
- (set-cdr! last-reduction (list p))
- (set! last-reduction (cdr last-reduction)))
- (begin
- (set! first-reduction (list p))
- (set! last-reduction first-reduction)))))))
-
-
-;; --
-
-(define (lalr)
- (set! token-set-size (+ 1 (quotient nterms (BITS-PER-WORD))))
- (set-accessing-symbol)
- (set-shift-table)
- (set-reduction-table)
- (set-max-rhs)
- (initialize-LA)
- (set-goto-map)
- (initialize-F)
- (build-relations)
- (digraph includes)
- (compute-lookaheads))
-
-(define (set-accessing-symbol)
- (set! acces-symbol (make-vector nstates #f))
- (let loop ((l first-state))
- (if (pair? l)
- (let ((x (car l)))
- (vector-set! acces-symbol (core-number x) (core-acc-sym x))
- (loop (cdr l))))))
-
-(define (set-shift-table)
- (set! shift-table (make-vector nstates #f))
- (let loop ((l first-shift))
- (if (pair? l)
- (let ((x (car l)))
- (vector-set! shift-table (shift-number x) x)
- (loop (cdr l))))))
-
-(define (set-reduction-table)
- (set! reduction-table (make-vector nstates #f))
- (let loop ((l first-reduction))
- (if (pair? l)
- (let ((x (car l)))
- (vector-set! reduction-table (red-number x) x)
- (loop (cdr l))))))
-
-(define (set-max-rhs)
- (let loop ((p 0) (curmax 0) (length 0))
- (let ((x (vector-ref ritem p)))
- (if x
- (if (>= x 0)
- (loop (+ p 1) curmax (+ length 1))
- (loop (+ p 1) (max curmax length) 0))
- (set! maxrhs curmax)))))
-
-(define (initialize-LA)
- (define (last l)
- (if (null? (cdr l))
- (car l)
- (last (cdr l))))
-
- (set! consistent (make-vector nstates #f))
- (set! lookaheads (make-vector (+ nstates 1) #f))
-
- (let loop ((count 0) (i 0))
- (if (< i nstates)
- (begin
- (vector-set! lookaheads i count)
- (let ((rp (vector-ref reduction-table i))
- (sp (vector-ref shift-table i)))
- (if (and rp
- (or (> (red-nreds rp) 1)
- (and sp
- (not
- (< (vector-ref acces-symbol
- (last (shift-shifts sp)))
- nvars)))))
- (loop (+ count (red-nreds rp)) (+ i 1))
- (begin
- (vector-set! consistent i #t)
- (loop count (+ i 1))))))
-
- (begin
- (vector-set! lookaheads nstates count)
- (let ((c (max count 1)))
- (set! LA (make-vector c #f))
- (do ((j 0 (+ j 1))) ((= j c)) (vector-set! LA j (new-set token-set-size)))
- (set! LAruleno (make-vector c -1))
- (set! lookback (make-vector c #f)))
- (let loop ((i 0) (np 0))
- (if (< i nstates)
- (if (vector-ref consistent i)
- (loop (+ i 1) np)
- (let ((rp (vector-ref reduction-table i)))
- (if rp
- (let loop2 ((j (red-rules rp)) (np2 np))
- (if (null? j)
- (loop (+ i 1) np2)
- (begin
- (vector-set! LAruleno np2 (car j))
- (loop2 (cdr j) (+ np2 1)))))
- (loop (+ i 1) np))))))))))
-
-
-(define (set-goto-map)
- (set! goto-map (make-vector (+ nvars 1) 0))
- (let ((temp-map (make-vector (+ nvars 1) 0)))
- (let loop ((ng 0) (sp first-shift))
- (if (pair? sp)
- (let loop2 ((i (reverse (shift-shifts (car sp)))) (ng2 ng))
- (if (pair? i)
- (let ((symbol (vector-ref acces-symbol (car i))))
- (if (< symbol nvars)
- (begin
- (vector-set! goto-map symbol
- (+ 1 (vector-ref goto-map symbol)))
- (loop2 (cdr i) (+ ng2 1)))
- (loop2 (cdr i) ng2)))
- (loop ng2 (cdr sp))))
-
- (let loop ((k 0) (i 0))
- (if (< i nvars)
- (begin
- (vector-set! temp-map i k)
- (loop (+ k (vector-ref goto-map i)) (+ i 1)))
-
- (begin
- (do ((i 0 (+ i 1)))
- ((>= i nvars))
- (vector-set! goto-map i (vector-ref temp-map i)))
-
- (set! ngotos ng)
- (vector-set! goto-map nvars ngotos)
- (vector-set! temp-map nvars ngotos)
- (set! from-state (make-vector ngotos #f))
- (set! to-state (make-vector ngotos #f))
-
- (do ((sp first-shift (cdr sp)))
- ((null? sp))
- (let* ((x (car sp))
- (state1 (shift-number x)))
- (do ((i (shift-shifts x) (cdr i)))
- ((null? i))
- (let* ((state2 (car i))
- (symbol (vector-ref acces-symbol state2)))
- (if (< symbol nvars)
- (let ((k (vector-ref temp-map symbol)))
- (vector-set! temp-map symbol (+ k 1))
- (vector-set! from-state k state1)
- (vector-set! to-state k state2))))))))))))))
-
-
-(define (map-goto state symbol)
- (let loop ((low (vector-ref goto-map symbol))
- (high (- (vector-ref goto-map (+ symbol 1)) 1)))
- (if (> low high)
- (begin
- (display (list "Error in map-goto" state symbol) (current-error-port))
- (newline (current-error-port))
- 0)
- (let* ((middle (quotient (+ low high) 2))
- (s (vector-ref from-state middle)))
- (cond
- ((= s state)
- middle)
- ((< s state)
- (loop (+ middle 1) high))
- (else
- (loop low (- middle 1))))))))
-
-
-(define (initialize-F)
- (set! F (make-vector ngotos #f))
- (do ((i 0 (+ i 1))) ((= i ngotos)) (vector-set! F i (new-set token-set-size)))
-
- (let ((reads (make-vector ngotos #f)))
-
- (let loop ((i 0) (rowp 0))
- (if (< i ngotos)
- (let* ((rowf (vector-ref F rowp))
- (stateno (vector-ref to-state i))
- (sp (vector-ref shift-table stateno)))
- (if sp
- (let loop2 ((j (shift-shifts sp)) (edges '()))
- (if (pair? j)
- (let ((symbol (vector-ref acces-symbol (car j))))
- (if (< symbol nvars)
- (if (vector-ref nullable symbol)
- (loop2 (cdr j) (cons (map-goto stateno symbol)
- edges))
- (loop2 (cdr j) edges))
- (begin
- (set-bit rowf (- symbol nvars))
- (loop2 (cdr j) edges))))
- (if (pair? edges)
- (vector-set! reads i (reverse edges))))))
- (loop (+ i 1) (+ rowp 1)))))
- (digraph reads)))
-
-(define (add-lookback-edge stateno ruleno gotono)
- (let ((k (vector-ref lookaheads (+ stateno 1))))
- (let loop ((found #f) (i (vector-ref lookaheads stateno)))
- (if (and (not found) (< i k))
- (if (= (vector-ref LAruleno i) ruleno)
- (loop #t i)
- (loop found (+ i 1)))
-
- (if (not found)
- (begin (display "Error in add-lookback-edge : " (current-error-port))
- (display (list stateno ruleno gotono) (current-error-port))
- (newline (current-error-port)))
- (vector-set! lookback i
- (cons gotono (vector-ref lookback i))))))))
-
-
-(define (transpose r-arg n)
- (let ((new-end (make-vector n #f))
- (new-R (make-vector n #f)))
- (do ((i 0 (+ i 1)))
- ((= i n))
- (let ((x (list 'bidon)))
- (vector-set! new-R i x)
- (vector-set! new-end i x)))
- (do ((i 0 (+ i 1)))
- ((= i n))
- (let ((sp (vector-ref r-arg i)))
- (if (pair? sp)
- (let loop ((sp2 sp))
- (if (pair? sp2)
- (let* ((x (car sp2))
- (y (vector-ref new-end x)))
- (set-cdr! y (cons i (cdr y)))
- (vector-set! new-end x (cdr y))
- (loop (cdr sp2))))))))
- (do ((i 0 (+ i 1)))
- ((= i n))
- (vector-set! new-R i (cdr (vector-ref new-R i))))
-
- new-R))
-
-
-
-(define (build-relations)
-
- (define (get-state stateno symbol)
- (let loop ((j (shift-shifts (vector-ref shift-table stateno)))
- (stno stateno))
- (if (null? j)
- stno
- (let ((st2 (car j)))
- (if (= (vector-ref acces-symbol st2) symbol)
- st2
- (loop (cdr j) st2))))))
-
- (set! includes (make-vector ngotos #f))
- (do ((i 0 (+ i 1)))
- ((= i ngotos))
- (let ((state1 (vector-ref from-state i))
- (symbol1 (vector-ref acces-symbol (vector-ref to-state i))))
- (let loop ((rulep (vector-ref derives symbol1))
- (edges '()))
- (if (pair? rulep)
- (let ((*rulep (car rulep)))
- (let loop2 ((rp (vector-ref rrhs *rulep))
- (stateno state1)
- (states (list state1)))
- (let ((*rp (vector-ref ritem rp)))
- (if (> *rp 0)
- (let ((st (get-state stateno *rp)))
- (loop2 (+ rp 1) st (cons st states)))
- (begin
-
- (if (not (vector-ref consistent stateno))
- (add-lookback-edge stateno *rulep i))
-
- (let loop2 ((done #f)
- (stp (cdr states))
- (rp2 (- rp 1))
- (edgp edges))
- (if (not done)
- (let ((*rp (vector-ref ritem rp2)))
- (if (< -1 *rp nvars)
- (loop2 (not (vector-ref nullable *rp))
- (cdr stp)
- (- rp2 1)
- (cons (map-goto (car stp) *rp) edgp))
- (loop2 #t stp rp2 edgp)))
-
- (loop (cdr rulep) edgp))))))))
- (vector-set! includes i edges)))))
- (set! includes (transpose includes ngotos)))
-
-
-
-(define (compute-lookaheads)
- (let ((n (vector-ref lookaheads nstates)))
- (let loop ((i 0))
- (if (< i n)
- (let loop2 ((sp (vector-ref lookback i)))
- (if (pair? sp)
- (let ((LA-i (vector-ref LA i))
- (F-j (vector-ref F (car sp))))
- (bit-union LA-i F-j token-set-size)
- (loop2 (cdr sp)))
- (loop (+ i 1))))))))
-
-
-
-(define (digraph relation)
- (define infinity (+ ngotos 2))
- (define INDEX (make-vector (+ ngotos 1) 0))
- (define VERTICES (make-vector (+ ngotos 1) 0))
- (define top 0)
- (define R relation)
-
- (define (traverse i)
- (set! top (+ 1 top))
- (vector-set! VERTICES top i)
- (let ((height top))
- (vector-set! INDEX i height)
- (let ((rp (vector-ref R i)))
- (if (pair? rp)
- (let loop ((rp2 rp))
- (if (pair? rp2)
- (let ((j (car rp2)))
- (if (= 0 (vector-ref INDEX j))
- (traverse j))
- (if (> (vector-ref INDEX i)
- (vector-ref INDEX j))
- (vector-set! INDEX i (vector-ref INDEX j)))
- (let ((F-i (vector-ref F i))
- (F-j (vector-ref F j)))
- (bit-union F-i F-j token-set-size))
- (loop (cdr rp2))))))
- (if (= (vector-ref INDEX i) height)
- (let loop ()
- (let ((j (vector-ref VERTICES top)))
- (set! top (- top 1))
- (vector-set! INDEX j infinity)
- (if (not (= i j))
- (begin
- (bit-union (vector-ref F i)
- (vector-ref F j)
- token-set-size)
- (loop)))))))))
-
- (let loop ((i 0))
- (if (< i ngotos)
- (begin
- (if (and (= 0 (vector-ref INDEX i))
- (pair? (vector-ref R i)))
- (traverse i))
- (loop (+ i 1))))))
-
-
-;; ---------------------------------------------------------------------- ;;
-;; operator precedence management ;;
-;; ---------------------------------------------------------------------- ;;
-
-; a vector of precedence descriptors where each element
-; is of the form (terminal type precedence)
-(define the-terminals/prec #f) ; terminal symbols with precedence
-; the precedence is an integer >= 0
-(define (get-symbol-precedence sym)
- (caddr (vector-ref the-terminals/prec sym)))
-; the operator type is either 'none, 'left, 'right, or 'nonassoc
-(define (get-symbol-assoc sym)
- (cadr (vector-ref the-terminals/prec sym)))
-
-(define rule-precedences '())
-(define (add-rule-precedence! rule sym)
- (set! rule-precedences
- (cons (cons rule sym) rule-precedences)))
-
-(define (get-rule-precedence ruleno)
- (cond
- ((assq ruleno rule-precedences)
- => (lambda (p)
- (get-symbol-precedence (cdr p))))
- (else
- ;; process the rule symbols from left to right
- (let loop ((i (vector-ref rrhs ruleno))
- (prec 0))
- (let ((item (vector-ref ritem i)))
- ;; end of rule
- (if (< item 0)
- prec
- (let ((i1 (+ i 1)))
- (if (>= item nvars)
- ;; it's a terminal symbol
- (loop i1 (get-symbol-precedence (- item nvars)))
- (loop i1 prec)))))))))
-
-;; ---------------------------------------------------------------------- ;;
-;; Build the various tables ;;
-;; ---------------------------------------------------------------------- ;;
-(define (build-tables)
-
- (define (resolve-conflict sym rule)
- (let ((sym-prec (get-symbol-precedence sym))
- (sym-assoc (get-symbol-assoc sym))
- (rule-prec (get-rule-precedence rule)))
- (cond
- ((> sym-prec rule-prec) 'shift)
- ((< sym-prec rule-prec) 'reduce)
- ((eq? sym-assoc 'left) 'reduce)
- ((eq? sym-assoc 'right) 'shift)
- (else 'shift))))
-
- ;; --- Add an action to the action table ------------------------------ ;;
- (define (add-action St Sym Act)
- (let* ((x (vector-ref action-table St))
- (y (assv Sym x)))
- (if y
- (if (not (= Act (cdr y)))
- ;; -- there is a conflict
- (begin
- (if (and (<= (cdr y) 0)
- (<= Act 0))
- ;; --- reduce/reduce conflict ----------------------- ;;
- (begin
- (display "%% Reduce/Reduce conflict " (current-error-port))
- (display "(reduce " (current-error-port))
- (display (- Act) (current-error-port))
- (display ", reduce " (current-error-port))
- (display (- (cdr y)) (current-error-port))
- (display ") on " (current-error-port))
- (print-symbol (+ Sym nvars) (current-error-port))
- (display " in state " (current-error-port))
- (display St (current-error-port))
- (newline (current-error-port))
- (set-cdr! y (max (cdr y) Act)))
- ;; --- shift/reduce conflict ------------------------ ;;
- ;; can we resolve the conflict using precedences?
- (case (resolve-conflict Sym (- (cdr y)))
- ;; -- shift
- ((shift)
- (set-cdr! y Act))
- ;; -- reduce
- ((reduce)
- #f) ; well, nothing to do...
- ;; -- signal a conflict!
- (else
- (display "%% Shift/Reduce conflict " (current-error-port))
- (display "(shift " (current-error-port))
- (display Act (current-error-port))
- (display ", reduce " (current-error-port))
- (display (- (cdr y)) (current-error-port))
- (display ") on " (current-error-port))
- (print-symbol (+ Sym nvars) (current-error-port))
- (display " in state " (current-error-port))
- (display St (current-error-port))
- (newline (current-error-port))
- (set-cdr! y Act))))))
-
- (vector-set! action-table St (cons (cons Sym Act) x)))))
-
- (set! action-table (make-vector nstates '()))
-
- (do ((i 0 (+ i 1))) ; i = state
- ((= i nstates))
- (let ((red (vector-ref reduction-table i)))
- (if (and red (>= (red-nreds red) 1))
- (if (and (= (red-nreds red) 1) (vector-ref consistent i))
- (add-action i 'default (- (car (red-rules red))))
- (let ((k (vector-ref lookaheads (+ i 1))))
- (let loop ((j (vector-ref lookaheads i)))
- (if (< j k)
- (let ((rule (- (vector-ref LAruleno j)))
- (lav (vector-ref LA j)))
- (let loop2 ((token 0) (x (vector-ref lav 0)) (y 1) (z 0))
- (if (< token nterms)
- (begin
- (let ((in-la-set? (modulo x 2)))
- (if (= in-la-set? 1)
- (add-action i token rule)))
- (if (= y (BITS-PER-WORD))
- (loop2 (+ token 1)
- (vector-ref lav (+ z 1))
- 1
- (+ z 1))
- (loop2 (+ token 1) (quotient x 2) (+ y 1) z)))))
- (loop (+ j 1)))))))))
-
- (let ((shiftp (vector-ref shift-table i)))
- (if shiftp
- (let loop ((k (shift-shifts shiftp)))
- (if (pair? k)
- (let* ((state (car k))
- (symbol (vector-ref acces-symbol state)))
- (if (>= symbol nvars)
- (add-action i (- symbol nvars) state))
- (loop (cdr k))))))))
-
- (add-action final-state 0 'accept))
-
-(define (compact-action-table terms)
- (define (most-common-action acts)
- (let ((accums '()))
- (let loop ((l acts))
- (if (pair? l)
- (let* ((x (cdar l))
- (y (assv x accums)))
- (if (and (number? x) (< x 0))
- (if y
- (set-cdr! y (+ 1 (cdr y)))
- (set! accums (cons `(,x . 1) accums))))
- (loop (cdr l)))))
-
- (let loop ((l accums) (max 0) (sym #f))
- (if (null? l)
- sym
- (let ((x (car l)))
- (if (> (cdr x) max)
- (loop (cdr l) (cdr x) (car x))
- (loop (cdr l) max sym)))))))
-
- (define (translate-terms acts)
- (map (lambda (act)
- (cons (list-ref terms (car act))
- (cdr act)))
- acts))
-
- (do ((i 0 (+ i 1)))
- ((= i nstates))
- (let ((acts (vector-ref action-table i)))
- (if (vector? (vector-ref reduction-table i))
- (let ((act (most-common-action acts)))
- (vector-set! action-table i
- (cons `(*default* . ,(if act act 'error))
- (translate-terms
- (lalr-filter (lambda (x)
- (not (eq? (cdr x) act)))
- acts)))))
- (vector-set! action-table i
- (cons `(*default* . *error*)
- (translate-terms acts)))))))
-
-
-
-;; --
-
-(define (rewrite-grammar tokens grammar k)
-
- (define eoi '*eoi*)
-
- (define (check-terminal term terms)
- (cond
- ((not (valid-terminal? term))
- (lalr-error "invalid terminal: " term))
- ((member term terms)
- (lalr-error "duplicate definition of terminal: " term))))
-
- (define (prec->type prec)
- (cdr (assq prec '((left: . left)
- (right: . right)
- (nonassoc: . nonassoc)))))
-
- (cond
- ;; --- a few error conditions ---------------------------------------- ;;
- ((not (list? tokens))
- (lalr-error "Invalid token list: " tokens))
- ((not (pair? grammar))
- (lalr-error "Grammar definition must have a non-empty list of productions" '()))
-
- (else
- ;; --- check the terminals ---------------------------------------- ;;
- (let loop1 ((lst tokens)
- (rev-terms '())
- (rev-terms/prec '())
- (prec-level 0))
- (if (pair? lst)
- (let ((term (car lst)))
- (cond
- ((pair? term)
- (if (and (memq (car term) '(left: right: nonassoc:))
- (not (null? (cdr term))))
- (let ((prec (+ prec-level 1))
- (optype (prec->type (car term))))
- (let loop-toks ((l (cdr term))
- (rev-terms rev-terms)
- (rev-terms/prec rev-terms/prec))
- (if (null? l)
- (loop1 (cdr lst) rev-terms rev-terms/prec prec)
- (let ((term (car l)))
- (check-terminal term rev-terms)
- (loop-toks
- (cdr l)
- (cons term rev-terms)
- (cons (list term optype prec) rev-terms/prec))))))
-
- (lalr-error "invalid operator precedence specification: " term)))
-
- (else
- (check-terminal term rev-terms)
- (loop1 (cdr lst)
- (cons term rev-terms)
- (cons (list term 'none 0) rev-terms/prec)
- prec-level))))
-
- ;; --- check the grammar rules ------------------------------ ;;
- (let loop2 ((lst grammar) (rev-nonterm-defs '()))
- (if (pair? lst)
- (let ((def (car lst)))
- (if (not (pair? def))
- (lalr-error "Nonterminal definition must be a non-empty list" '())
- (let ((nonterm (car def)))
- (cond ((not (valid-nonterminal? nonterm))
- (lalr-error "Invalid nonterminal:" nonterm))
- ((or (member nonterm rev-terms)
- (assoc nonterm rev-nonterm-defs))
- (lalr-error "Nonterminal previously defined:" nonterm))
- (else
- (loop2 (cdr lst)
- (cons def rev-nonterm-defs)))))))
- (let* ((terms (cons eoi (reverse rev-terms)))
- (terms/prec (cons '(eoi none 0) (reverse rev-terms/prec)))
- (nonterm-defs (reverse rev-nonterm-defs))
- (nonterms (cons '*start* (map car nonterm-defs))))
- (if (= (length nonterms) 1)
- (lalr-error "Grammar must contain at least one nonterminal" '())
- (let loop-defs ((defs (cons `(*start* (,(cadr nonterms) ,eoi) -> $1)
- nonterm-defs))
- (ruleno 0)
- (comp-defs '()))
- (if (pair? defs)
- (let* ((nonterm-def (car defs))
- (compiled-def (rewrite-nonterm-def
- nonterm-def
- ruleno
- terms nonterms)))
- (loop-defs (cdr defs)
- (+ ruleno (length compiled-def))
- (cons compiled-def comp-defs)))
-
- (let ((compiled-nonterm-defs (reverse comp-defs)))
- (k terms
- terms/prec
- nonterms
- (map (lambda (x) (cons (caaar x) (map cdar x)))
- compiled-nonterm-defs)
- (apply append compiled-nonterm-defs))))))))))))))
-
-
-(define *arrow* '->)
-
-(define (rewrite-nonterm-def nonterm-def ruleno terms nonterms)
-
- (define No-NT (length nonterms))
-
- (define (encode x)
- (let ((PosInNT (pos-in-list x nonterms)))
- (if PosInNT
- PosInNT
- (let ((PosInT (pos-in-list x terms)))
- (if PosInT
- (+ No-NT PosInT)
- (lalr-error "undefined symbol : " x))))))
-
- (define (process-prec-directive rhs ruleno)
- (let loop ((l rhs))
- (if (null? l)
- '()
- (let ((first (car l))
- (rest (cdr l)))
- (cond
- ((or (member first terms) (member first nonterms))
- (cons first (loop rest)))
- ((and (pair? first)
- (eq? (car first) 'prec:))
- (pair? (cdr first))
- (if (and (pair? (cdr first))
- (member (cadr first) terms))
- (if (null? (cddr first))
- (begin
- (add-rule-precedence! ruleno (pos-in-list (cadr first) terms))
- (loop rest))
- (lalr-error "prec: directive should be at end of rule: " rhs))
- (lalr-error "Invalid prec: directive: " first)))
- (else
- (lalr-error "Invalid terminal or nonterminal: " first)))))))
-
-
- (if (not (pair? (cdr nonterm-def)))
- (lalr-error "At least one production needed for nonterminal" (car nonterm-def))
- (let ((name (symbol->string (car nonterm-def))))
- (let loop1 ((lst (cdr nonterm-def))
- (i 1)
- (rev-productions-and-actions '()))
- (if (not (pair? lst))
- (reverse rev-productions-and-actions)
- (let* ((rhs (process-prec-directive (car lst) (+ ruleno i -1)))
- (rest (cdr lst))
- (prod (map encode (cons (car nonterm-def) rhs))))
- (for-each (lambda (x)
- (if (not (or (member x terms) (member x nonterms)))
- (lalr-error "Invalid terminal or nonterminal" x)))
- rhs)
- (if (and (pair? rest)
- (eq? (car rest) *arrow*)
- (pair? (cdr rest)))
- (loop1 (cddr rest)
- (+ i 1)
- (cons (cons prod (cadr rest))
- rev-productions-and-actions))
- (let* ((rhs-length (length rhs))
- (action
- (cons 'vector
- (cons (list 'quote (string->symbol
- (string-append
- name
- "-"
- (number->string i))))
- (let loop-j ((j 1))
- (if (> j rhs-length)
- '()
- (cons (string->symbol
- (string-append
- "$"
- (number->string j)))
- (loop-j (+ j 1)))))))))
- (loop1 rest
- (+ i 1)
- (cons (cons prod action)
- rev-productions-and-actions))))))))))
-
-(define (valid-nonterminal? x)
- (symbol? x))
-
-(define (valid-terminal? x)
- (symbol? x)) ; DB
-
-;; ---------------------------------------------------------------------- ;;
-;; Miscellaneous ;;
-;; ---------------------------------------------------------------------- ;;
-(define (pos-in-list x lst)
- (let loop ((lst lst) (i 0))
- (cond ((not (pair? lst)) #f)
- ((equal? (car lst) x) i)
- (else (loop (cdr lst) (+ i 1))))))
-
-(define (sunion lst1 lst2) ; union of sorted lists
- (let loop ((L1 lst1)
- (L2 lst2))
- (cond ((null? L1) L2)
- ((null? L2) L1)
- (else
- (let ((x (car L1)) (y (car L2)))
- (cond
- ((> x y)
- (cons y (loop L1 (cdr L2))))
- ((< x y)
- (cons x (loop (cdr L1) L2)))
- (else
- (loop (cdr L1) L2))
- ))))))
-
-(define (sinsert elem lst)
- (let loop ((l1 lst))
- (if (null? l1)
- (cons elem l1)
- (let ((x (car l1)))
- (cond ((< elem x)
- (cons elem l1))
- ((> elem x)
- (cons x (loop (cdr l1))))
- (else
- l1))))))
-
-(define (lalr-filter p lst)
- (let loop ((l lst))
- (if (null? l)
- '()
- (let ((x (car l)) (y (cdr l)))
- (if (p x)
- (cons x (loop y))
- (loop y))))))
-
-;; ---------------------------------------------------------------------- ;;
-;; Debugging tools ... ;;
-;; ---------------------------------------------------------------------- ;;
-(define the-terminals #f) ; names of terminal symbols
-(define the-nonterminals #f) ; non-terminals
-
-(define (print-item item-no)
- (let loop ((i item-no))
- (let ((v (vector-ref ritem i)))
- (if (>= v 0)
- (loop (+ i 1))
- (let* ((rlno (- v))
- (nt (vector-ref rlhs rlno)))
- (display (vector-ref the-nonterminals nt)) (display " --> ")
- (let loop ((i (vector-ref rrhs rlno)))
- (let ((v (vector-ref ritem i)))
- (if (= i item-no)
- (display ". "))
- (if (>= v 0)
- (begin
- (print-symbol v)
- (display " ")
- (loop (+ i 1)))
- (begin
- (display " (rule ")
- (display (- v))
- (display ")")
- (newline))))))))))
-
-(define (print-symbol n . port)
- (display (if (>= n nvars)
- (vector-ref the-terminals (- n nvars))
- (vector-ref the-nonterminals n))
- (if (null? port)
- (current-output-port)
- (car port))))
-
-(define (print-states)
-"Print the states of a generated parser."
- (define (print-action act)
- (cond
- ((eq? act '*error*)
- (display " : Error"))
- ((eq? act 'accept)
- (display " : Accept input"))
- ((< act 0)
- (display " : reduce using rule ")
- (display (- act)))
- (else
- (display " : shift and goto state ")
- (display act)))
- (newline)
- #t)
-
- (define (print-actions acts)
- (let loop ((l acts))
- (if (null? l)
- #t
- (let ((sym (caar l))
- (act (cdar l)))
- (display " ")
- (cond
- ((eq? sym 'default)
- (display "default action"))
- (else
- (if (number? sym)
- (print-symbol (+ sym nvars))
- (display sym))))
- (print-action act)
- (loop (cdr l))))))
-
- (if (not action-table)
- (begin
- (display "No generated parser available!")
- (newline)
- #f)
- (begin
- (display "State table") (newline)
- (display "-----------") (newline) (newline)
-
- (let loop ((l first-state))
- (if (null? l)
- #t
- (let* ((core (car l))
- (i (core-number core))
- (items (core-items core))
- (actions (vector-ref action-table i)))
- (display "state ") (display i) (newline)
- (newline)
- (for-each (lambda (x) (display " ") (print-item x))
- items)
- (newline)
- (print-actions actions)
- (newline)
- (loop (cdr l))))))))
-
-
-
-;; ---------------------------------------------------------------------- ;;
-
-(define build-goto-table
- (lambda ()
- `(vector
- ,@(map
- (lambda (shifts)
- (list 'quote
- (if shifts
- (let loop ((l (shift-shifts shifts)))
- (if (null? l)
- '()
- (let* ((state (car l))
- (symbol (vector-ref acces-symbol state)))
- (if (< symbol nvars)
- (cons `(,symbol . ,state)
- (loop (cdr l)))
- (loop (cdr l))))))
- '())))
- (vector->list shift-table)))))
-
-
-(define build-reduction-table
- (lambda (gram/actions)
- `(vector
- '()
- ,@(map
- (lambda (p)
- (let ((act (cdr p)))
- `(lambda (___stack ___sp ___goto-table ___k)
- ,(let* ((nt (caar p)) (rhs (cdar p)) (n (length rhs)))
- `(let* (,@(if act
- (let loop ((i 1) (l rhs))
- (if (pair? l)
- (let ((rest (cdr l)))
- (cons
- `(,(string->symbol
- (string-append
- "$"
- (number->string
- (+ (- n i) 1))))
- (vector-ref ___stack (- ___sp ,(- (* i 2) 1))))
- (loop (+ i 1) rest)))
- '()))
- '()))
- ,(if (= nt 0)
- '$1
- `(___push ___stack (- ___sp ,(* 2 n))
- ,nt ___goto-table ,(cdr p) ___k)))))))
-
- gram/actions))))
-
-
-;; @section (api "API")
-
-(define-macro (lalr-parser tokens . rules)
- (let* ((gram/actions (gen-tables! tokens rules))
- (code
- `(letrec ((___max-stack-size 500)
-
- (___atable ',action-table)
- (___gtable ,(build-goto-table))
- (___grow-stack (lambda (stack)
- ;; make a new stack twice as big as the original
- (let ((new-stack (make-vector (* 2 (vector-length stack)) #f)))
- ;; then copy the elements...
- (let loop ((i (- (vector-length stack) 1)))
- (if (< i 0)
- new-stack
- (begin
- (vector-set! new-stack i (vector-ref stack i))
- (loop (- i 1))))))))
-
- (___push (lambda (stack sp new-cat goto-table lval k)
- (let* ((state (vector-ref stack sp))
- (new-state (cdr (assq new-cat (vector-ref goto-table state))))
- (new-sp (+ sp 2))
- (stack (if (< new-sp (vector-length stack))
- stack
- (___grow-stack stack))))
- (vector-set! stack new-sp new-state)
- (vector-set! stack (- new-sp 1) lval)
- (k stack new-sp))))
-
- (___action (lambda (x l)
- (let ((y (assq x l)))
- (if y (cdr y) (cdar l)))))
-
- (___rtable ,(build-reduction-table gram/actions)))
-
- (lambda (lexerp errorp)
-
- (let ((stack (make-vector ___max-stack-size 0)))
- (let loop ((stack stack) (sp 0) (input (lexerp)))
- (let* ((state (vector-ref stack sp))
- (i (if (pair? input) (car input) input))
- (attr (if (pair? input) (cdr input) #f))
- (act (___action i (vector-ref ___atable state))))
-
- (if (not (symbol? i))
- (errorp "PARSE ERROR: invalid token: " input))
-
- (cond
-
- ;; Input succesfully parsed
- ((eq? act 'accept)
- (vector-ref stack 1))
-
- ;; Syntax error in input
- ((eq? act '*error*)
- (if (eq? i '*eoi*)
- (errorp "PARSE ERROR : unexpected end of input ")
- (errorp "PARSE ERROR : unexpected token : " input)))
-
- ;; Shift current token on top of the stack
- ((>= act 0)
- (let ((stack (if (< (+ sp 2) (vector-length stack))
- stack
- (___grow-stack stack))))
- (vector-set! stack (+ sp 1) attr)
- (vector-set! stack (+ sp 2) act)
- (loop stack (+ sp 2) (lexerp))))
-
- ;; Reduce by rule (- act)
- (else
- ((vector-ref ___rtable (- act))
- stack sp ___gtable
- (lambda (stack sp)
- (loop stack sp input))))))))))))
- code))
-
-;; arch-tag: 4FE771DE-F56D-11D8-8B77-000A95B4C7DC
diff --git a/module/language/ecmascript/parse.scm b/module/language/ecmascript/parse.scm
index ce731a736..e9d6673ce 100644
--- a/module/language/ecmascript/parse.scm
+++ b/module/language/ecmascript/parse.scm
@@ -1,6 +1,6 @@
;;; ECMAScript for Guile
-;; Copyright (C) 2009 Free Software Foundation, Inc.
+;; Copyright (C) 2009, 2010 Free Software Foundation, Inc.
;;;; This library is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public
@@ -19,23 +19,29 @@
;;; Code:
(define-module (language ecmascript parse)
- #:use-module (language ecmascript parse-lalr)
+ #:use-module (system base lalr)
#:use-module (language ecmascript tokenize)
- #:export (read-ecmascript read-ecmascript/1 parse-ecmascript))
+ #:export (read-ecmascript read-ecmascript/1 make-parser))
(define (syntax-error message . args)
(apply throw 'SyntaxError message args))
(define (read-ecmascript port)
- (parse-ecmascript (make-tokenizer port) syntax-error))
+ (let ((parse (make-parser)))
+ (parse (make-tokenizer port) syntax-error)))
(define (read-ecmascript/1 port)
- (parse-ecmascript (make-tokenizer/1 port) syntax-error))
+ (let ((parse (make-parser)))
+ (parse (make-tokenizer/1 port) syntax-error)))
(define *eof-object*
(call-with-input-string "" read-char))
-(define parse-ecmascript
+(define (make-parser)
+ ;; Return a fresh ECMAScript parser. Parsers produced by `lalr-scm' are now
+ ;; stateful (e.g., they won't invoke the tokenizer any more once it has
+ ;; returned `*eoi*'), hence the need to instantiate new parsers.
+
(lalr-parser
;; terminal (i.e. input) token types
(lbrace rbrace lparen rparen lbracket rbracket dot semicolon comma <
@@ -49,289 +55,289 @@
Identifier StringLiteral NumericLiteral RegexpLiteral)
- (Program (SourceElements) -> $1
- (*eoi*) -> *eof-object*)
+ (Program (SourceElements) : $1
+ (*eoi*) : *eof-object*)
;;
;; Verily, here we define statements. Expressions are defined
;; afterwards.
;;
- (SourceElement (Statement) -> $1
- (FunctionDeclaration) -> $1)
-
- (FunctionDeclaration (function Identifier lparen rparen lbrace FunctionBody rbrace) -> `(var (,$2 (lambda () ,$6)))
- (function Identifier lparen FormalParameterList rparen lbrace FunctionBody rbrace) -> `(var (,$2 (lambda ,$4 ,$7))))
- (FunctionExpression (function lparen rparen lbrace FunctionBody rbrace) -> `(lambda () ,$5)
- (function Identifier lparen rparen lbrace FunctionBody rbrace) -> `(lambda () ,$6)
- (function lparen FormalParameterList rparen lbrace FunctionBody rbrace) -> `(lambda ,$3 ,$6)
- (function Identifier lparen FormalParameterList rparen lbrace FunctionBody rbrace) -> `(lambda ,$4 ,$7))
- (FormalParameterList (Identifier) -> `(,$1)
- (FormalParameterList comma Identifier) -> `(,@$1 ,$3))
- (SourceElements (SourceElement) -> $1
- (SourceElements SourceElement) -> (if (and (pair? $1) (eq? (car $1) 'begin))
+ (SourceElement (Statement) : $1
+ (FunctionDeclaration) : $1)
+
+ (FunctionDeclaration (function Identifier lparen rparen lbrace FunctionBody rbrace) : `(var (,$2 (lambda () ,$6)))
+ (function Identifier lparen FormalParameterList rparen lbrace FunctionBody rbrace) : `(var (,$2 (lambda ,$4 ,$7))))
+ (FunctionExpression (function lparen rparen lbrace FunctionBody rbrace) : `(lambda () ,$5)
+ (function Identifier lparen rparen lbrace FunctionBody rbrace) : `(lambda () ,$6)
+ (function lparen FormalParameterList rparen lbrace FunctionBody rbrace) : `(lambda ,$3 ,$6)
+ (function Identifier lparen FormalParameterList rparen lbrace FunctionBody rbrace) : `(lambda ,$4 ,$7))
+ (FormalParameterList (Identifier) : `(,$1)
+ (FormalParameterList comma Identifier) : `(,@$1 ,$3))
+ (SourceElements (SourceElement) : $1
+ (SourceElements SourceElement) : (if (and (pair? $1) (eq? (car $1) 'begin))
`(begin ,@(cdr $1) ,$2)
`(begin ,$1 ,$2)))
- (FunctionBody (SourceElements) -> $1)
-
- (Statement (Block) -> $1
- (VariableStatement) -> $1
- (EmptyStatement) -> $1
- (ExpressionStatement) -> $1
- (IfStatement) -> $1
- (IterationStatement) -> $1
- (ContinueStatement) -> $1
- (BreakStatement) -> $1
- (ReturnStatement) -> $1
- (WithStatement) -> $1
- (LabelledStatement) -> $1
- (SwitchStatement) -> $1
- (ThrowStatement) -> $1
- (TryStatement) -> $1)
-
- (Block (lbrace StatementList rbrace) -> `(block ,$2))
- (StatementList (Statement) -> $1
- (StatementList Statement) -> (if (and (pair? $1) (eq? (car $1) 'begin))
+ (FunctionBody (SourceElements) : $1)
+
+ (Statement (Block) : $1
+ (VariableStatement) : $1
+ (EmptyStatement) : $1
+ (ExpressionStatement) : $1
+ (IfStatement) : $1
+ (IterationStatement) : $1
+ (ContinueStatement) : $1
+ (BreakStatement) : $1
+ (ReturnStatement) : $1
+ (WithStatement) : $1
+ (LabelledStatement) : $1
+ (SwitchStatement) : $1
+ (ThrowStatement) : $1
+ (TryStatement) : $1)
+
+ (Block (lbrace StatementList rbrace) : `(block ,$2))
+ (StatementList (Statement) : $1
+ (StatementList Statement) : (if (and (pair? $1) (eq? (car $1) 'begin))
`(begin ,@(cdr $1) ,$2)
`(begin ,$1 ,$2)))
- (VariableStatement (var VariableDeclarationList) -> `(var ,@$2))
- (VariableDeclarationList (VariableDeclaration) -> `(,$1)
- (VariableDeclarationList comma VariableDeclaration) -> `(,@$1 ,$2))
- (VariableDeclarationListNoIn (VariableDeclarationNoIn) -> `(,$1)
- (VariableDeclarationListNoIn comma VariableDeclarationNoIn) -> `(,@$1 ,$2))
- (VariableDeclaration (Identifier) -> `(,$1)
- (Identifier Initialiser) -> `(,$1 ,$2))
- (VariableDeclarationNoIn (Identifier) -> `(,$1)
- (Identifier Initialiser) -> `(,$1 ,$2))
- (Initialiser (= AssignmentExpression) -> $2)
- (InitialiserNoIn (= AssignmentExpressionNoIn) -> $2)
+ (VariableStatement (var VariableDeclarationList) : `(var ,@$2))
+ (VariableDeclarationList (VariableDeclaration) : `(,$1)
+ (VariableDeclarationList comma VariableDeclaration) : `(,@$1 ,$2))
+ (VariableDeclarationListNoIn (VariableDeclarationNoIn) : `(,$1)
+ (VariableDeclarationListNoIn comma VariableDeclarationNoIn) : `(,@$1 ,$2))
+ (VariableDeclaration (Identifier) : `(,$1)
+ (Identifier Initialiser) : `(,$1 ,$2))
+ (VariableDeclarationNoIn (Identifier) : `(,$1)
+ (Identifier Initialiser) : `(,$1 ,$2))
+ (Initialiser (= AssignmentExpression) : $2)
+ (InitialiserNoIn (= AssignmentExpressionNoIn) : $2)
- (EmptyStatement (semicolon) -> '(begin))
+ (EmptyStatement (semicolon) : '(begin))
- (ExpressionStatement (Expression semicolon) -> $1)
+ (ExpressionStatement (Expression semicolon) : $1)
- (IfStatement (if lparen Expression rparen Statement else Statement) -> `(if ,$3 ,$5 ,$7)
- (if lparen Expression rparen Statement) -> `(if ,$3 ,$5))
+ (IfStatement (if lparen Expression rparen Statement else Statement) : `(if ,$3 ,$5 ,$7)
+ (if lparen Expression rparen Statement) : `(if ,$3 ,$5))
- (IterationStatement (do Statement while lparen Expression rparen semicolon) -> `(do ,$2 ,$5)
+ (IterationStatement (do Statement while lparen Expression rparen semicolon) : `(do ,$2 ,$5)
- (while lparen Expression rparen Statement) -> `(while ,$3 ,$5)
+ (while lparen Expression rparen Statement) : `(while ,$3 ,$5)
- (for lparen semicolon semicolon rparen Statement) -> `(for #f #f #f ,$6)
- (for lparen semicolon semicolon Expression rparen Statement) -> `(for #f #f ,$5 ,$7)
- (for lparen semicolon Expression semicolon rparen Statement) -> `(for #f ,$4 #f ,$7)
- (for lparen semicolon Expression semicolon Expression rparen Statement) -> `(for #f ,$4 ,$6 ,$8)
+ (for lparen semicolon semicolon rparen Statement) : `(for #f #f #f ,$6)
+ (for lparen semicolon semicolon Expression rparen Statement) : `(for #f #f ,$5 ,$7)
+ (for lparen semicolon Expression semicolon rparen Statement) : `(for #f ,$4 #f ,$7)
+ (for lparen semicolon Expression semicolon Expression rparen Statement) : `(for #f ,$4 ,$6 ,$8)
- (for lparen ExpressionNoIn semicolon semicolon rparen Statement) -> `(for ,$3 #f #f ,$7)
- (for lparen ExpressionNoIn semicolon semicolon Expression rparen Statement) -> `(for ,$3 #f ,$6 ,$8)
- (for lparen ExpressionNoIn semicolon Expression semicolon rparen Statement) -> `(for ,$3 ,$5 #f ,$8)
- (for lparen ExpressionNoIn semicolon Expression semicolon Expression rparen Statement) -> `(for ,$3 ,$5 ,$7 ,$9)
+ (for lparen ExpressionNoIn semicolon semicolon rparen Statement) : `(for ,$3 #f #f ,$7)
+ (for lparen ExpressionNoIn semicolon semicolon Expression rparen Statement) : `(for ,$3 #f ,$6 ,$8)
+ (for lparen ExpressionNoIn semicolon Expression semicolon rparen Statement) : `(for ,$3 ,$5 #f ,$8)
+ (for lparen ExpressionNoIn semicolon Expression semicolon Expression rparen Statement) : `(for ,$3 ,$5 ,$7 ,$9)
- (for lparen var VariableDeclarationListNoIn semicolon semicolon rparen Statement) -> `(for (var ,@$4) #f #f ,$8)
- (for lparen var VariableDeclarationListNoIn semicolon semicolon Expression rparen Statement) -> `(for (var ,@$4) #f ,$7 ,$9)
- (for lparen var VariableDeclarationListNoIn semicolon Expression semicolon rparen Statement) -> `(for (var ,@$4) ,$6 #f ,$9)
- (for lparen var VariableDeclarationListNoIn semicolon Expression semicolon Expression rparen Statement) -> `(for (var ,@$4) ,$6 ,$8 ,$10)
+ (for lparen var VariableDeclarationListNoIn semicolon semicolon rparen Statement) : `(for (var ,@$4) #f #f ,$8)
+ (for lparen var VariableDeclarationListNoIn semicolon semicolon Expression rparen Statement) : `(for (var ,@$4) #f ,$7 ,$9)
+ (for lparen var VariableDeclarationListNoIn semicolon Expression semicolon rparen Statement) : `(for (var ,@$4) ,$6 #f ,$9)
+ (for lparen var VariableDeclarationListNoIn semicolon Expression semicolon Expression rparen Statement) : `(for (var ,@$4) ,$6 ,$8 ,$10)
- (for lparen LeftHandSideExpression in Expression rparen Statement) -> `(for-in ,$3 ,$5 ,$7)
- (for lparen var VariableDeclarationNoIn in Expression rparen Statement) -> `(begin (var ,$4) (for-in (ref ,@$4) ,$6 ,$8)))
+ (for lparen LeftHandSideExpression in Expression rparen Statement) : `(for-in ,$3 ,$5 ,$7)
+ (for lparen var VariableDeclarationNoIn in Expression rparen Statement) : `(begin (var ,$4) (for-in (ref ,@$4) ,$6 ,$8)))
- (ContinueStatement (continue Identifier semicolon) -> `(continue ,$2)
- (continue semicolon) -> `(continue))
+ (ContinueStatement (continue Identifier semicolon) : `(continue ,$2)
+ (continue semicolon) : `(continue))
- (BreakStatement (break Identifier semicolon) -> `(break ,$2)
- (break semicolon) -> `(break))
+ (BreakStatement (break Identifier semicolon) : `(break ,$2)
+ (break semicolon) : `(break))
- (ReturnStatement (return Expression semicolon) -> `(return ,$2)
- (return semicolon) -> `(return))
+ (ReturnStatement (return Expression semicolon) : `(return ,$2)
+ (return semicolon) : `(return))
- (WithStatement (with lparen Expression rparen Statement) -> `(with ,$3 ,$5))
+ (WithStatement (with lparen Expression rparen Statement) : `(with ,$3 ,$5))
- (SwitchStatement (switch lparen Expression rparen CaseBlock) -> `(switch ,$3 ,@$5))
- (CaseBlock (lbrace rbrace) -> '()
- (lbrace CaseClauses rbrace) -> $2
- (lbrace CaseClauses DefaultClause rbrace) -> `(,@$2 ,@$3)
- (lbrace DefaultClause rbrace) -> `(,$2)
- (lbrace DefaultClause CaseClauses rbrace) -> `(,@$2 ,@$3))
- (CaseClauses (CaseClause) -> `(,$1)
- (CaseClauses CaseClause) -> `(,@$1 ,$2))
- (CaseClause (case Expression colon) -> `(case ,$2)
- (case Expression colon StatementList) -> `(case ,$2 ,$4))
- (DefaultClause (default colon) -> `(default)
- (default colon StatementList) -> `(default ,$3))
+ (SwitchStatement (switch lparen Expression rparen CaseBlock) : `(switch ,$3 ,@$5))
+ (CaseBlock (lbrace rbrace) : '()
+ (lbrace CaseClauses rbrace) : $2
+ (lbrace CaseClauses DefaultClause rbrace) : `(,@$2 ,@$3)
+ (lbrace DefaultClause rbrace) : `(,$2)
+ (lbrace DefaultClause CaseClauses rbrace) : `(,@$2 ,@$3))
+ (CaseClauses (CaseClause) : `(,$1)
+ (CaseClauses CaseClause) : `(,@$1 ,$2))
+ (CaseClause (case Expression colon) : `(case ,$2)
+ (case Expression colon StatementList) : `(case ,$2 ,$4))
+ (DefaultClause (default colon) : `(default)
+ (default colon StatementList) : `(default ,$3))
- (LabelledStatement (Identifier colon Statement) -> `(label ,$1 ,$3))
+ (LabelledStatement (Identifier colon Statement) : `(label ,$1 ,$3))
- (ThrowStatement (throw Expression semicolon) -> `(throw ,$2))
+ (ThrowStatement (throw Expression semicolon) : `(throw ,$2))
- (TryStatement (try Block Catch) -> `(try ,$2 ,$3 #f)
- (try Block Finally) -> `(try ,$2 #f ,$3)
- (try Block Catch Finally) -> `(try ,$2 ,$3 ,$4))
- (Catch (catch lparen Identifier rparen Block) -> `(catch ,$3 ,$5))
- (Finally (finally Block) -> `(finally ,$2))
+ (TryStatement (try Block Catch) : `(try ,$2 ,$3 #f)
+ (try Block Finally) : `(try ,$2 #f ,$3)
+ (try Block Catch Finally) : `(try ,$2 ,$3 ,$4))
+ (Catch (catch lparen Identifier rparen Block) : `(catch ,$3 ,$5))
+ (Finally (finally Block) : `(finally ,$2))
;;
;; As promised, expressions. We build up to Expression bottom-up, so
;; as to get operator precedence right.
;;
- (PrimaryExpression (this) -> 'this
- (null) -> 'null
- (true) -> 'true
- (false) -> 'false
- (Identifier) -> `(ref ,$1)
- (StringLiteral) -> `(string ,$1)
- (RegexpLiteral) -> `(regexp ,$1)
- (NumericLiteral) -> `(number ,$1)
- (ArrayLiteral) -> $1
- (ObjectLiteral) -> $1
- (lparen Expression rparen) -> $2)
-
- (ArrayLiteral (lbracket rbracket) -> '(array)
- (lbracket Elision rbracket) -> '(array ,@$2)
- (lbracket ElementList rbracket) -> `(array ,@$2)
- (lbracket ElementList comma rbracket) -> `(array ,@$2)
- (lbracket ElementList comma Elision rbracket) -> `(array ,@$2))
- (ElementList (AssignmentExpression) -> `(,$1)
- (Elision AssignmentExpression) -> `(,@$1 ,$2)
- (ElementList comma AssignmentExpression) -> `(,@$1 ,$3)
- (ElementList comma Elision AssignmentExpression) -> `(,@$1 ,@$3 ,$4))
- (Elision (comma) -> '((number 0))
- (Elision comma) -> `(,@$1 (number 0)))
-
- (ObjectLiteral (lbrace rbrace) -> `(object)
- (lbrace PropertyNameAndValueList rbrace) -> `(object ,@$2))
- (PropertyNameAndValueList (PropertyName colon AssignmentExpression) -> `((,$1 ,$3))
- (PropertyNameAndValueList comma PropertyName colon AssignmentExpression) -> `(,@$1 (,$3 ,$5)))
- (PropertyName (Identifier) -> $1
- (StringLiteral) -> (string->symbol $1)
- (NumericLiteral) -> $1)
-
- (MemberExpression (PrimaryExpression) -> $1
- (FunctionExpression) -> $1
- (MemberExpression lbracket Expression rbracket) -> `(aref ,$1 ,$3)
- (MemberExpression dot Identifier) -> `(pref ,$1 ,$3)
- (new MemberExpression Arguments) -> `(new ,$2 ,$3))
-
- (NewExpression (MemberExpression) -> $1
- (new NewExpression) -> `(new ,$2 ()))
-
- (CallExpression (MemberExpression Arguments) -> `(call ,$1 ,$2)
- (CallExpression Arguments) -> `(call ,$1 ,$2)
- (CallExpression lbracket Expression rbracket) -> `(aref ,$1 ,$3)
- (CallExpression dot Identifier) -> `(pref ,$1 ,$3))
- (Arguments (lparen rparen) -> '()
- (lparen ArgumentList rparen) -> $2)
- (ArgumentList (AssignmentExpression) -> `(,$1)
- (ArgumentList comma AssignmentExpression) -> `(,@$1 ,$3))
-
- (LeftHandSideExpression (NewExpression) -> $1
- (CallExpression) -> $1)
-
- (PostfixExpression (LeftHandSideExpression) -> $1
- (LeftHandSideExpression ++) -> `(postinc ,$1)
- (LeftHandSideExpression --) -> `(postdec ,$1))
-
- (UnaryExpression (PostfixExpression) -> $1
- (delete UnaryExpression) -> `(delete ,$2)
- (void UnaryExpression) -> `(void ,$2)
- (typeof UnaryExpression) -> `(typeof ,$2)
- (++ UnaryExpression) -> `(preinc ,$2)
- (-- UnaryExpression) -> `(predec ,$2)
- (+ UnaryExpression) -> `(+ ,$2)
- (- UnaryExpression) -> `(- ,$2)
- (~ UnaryExpression) -> `(~ ,$2)
- (! UnaryExpression) -> `(! ,$2))
-
- (MultiplicativeExpression (UnaryExpression) -> $1
- (MultiplicativeExpression * UnaryExpression) -> `(* ,$1 ,$3)
- (MultiplicativeExpression / UnaryExpression) -> `(/ ,$1 ,$3)
- (MultiplicativeExpression % UnaryExpression) -> `(% ,$1 ,$3))
-
- (AdditiveExpression (MultiplicativeExpression) -> $1
- (AdditiveExpression + MultiplicativeExpression) -> `(+ ,$1 ,$3)
- (AdditiveExpression - MultiplicativeExpression) -> `(- ,$1 ,$3))
-
- (ShiftExpression (AdditiveExpression) -> $1
- (ShiftExpression << MultiplicativeExpression) -> `(<< ,$1 ,$3)
- (ShiftExpression >> MultiplicativeExpression) -> `(>> ,$1 ,$3)
- (ShiftExpression >>> MultiplicativeExpression) -> `(>>> ,$1 ,$3))
-
- (RelationalExpression (ShiftExpression) -> $1
- (RelationalExpression < ShiftExpression) -> `(< ,$1 ,$3)
- (RelationalExpression > ShiftExpression) -> `(> ,$1 ,$3)
- (RelationalExpression <= ShiftExpression) -> `(<= ,$1 ,$3)
- (RelationalExpression >= ShiftExpression) -> `(>= ,$1 ,$3)
- (RelationalExpression instanceof ShiftExpression) -> `(instanceof ,$1 ,$3)
- (RelationalExpression in ShiftExpression) -> `(in ,$1 ,$3))
-
- (RelationalExpressionNoIn (ShiftExpression) -> $1
- (RelationalExpressionNoIn < ShiftExpression) -> `(< ,$1 ,$3)
- (RelationalExpressionNoIn > ShiftExpression) -> `(> ,$1 ,$3)
- (RelationalExpressionNoIn <= ShiftExpression) -> `(<= ,$1 ,$3)
- (RelationalExpressionNoIn >= ShiftExpression) -> `(>= ,$1 ,$3)
- (RelationalExpressionNoIn instanceof ShiftExpression) -> `(instanceof ,$1 ,$3))
-
- (EqualityExpression (RelationalExpression) -> $1
- (EqualityExpression == RelationalExpression) -> `(== ,$1 ,$3)
- (EqualityExpression != RelationalExpression) -> `(!= ,$1 ,$3)
- (EqualityExpression === RelationalExpression) -> `(=== ,$1 ,$3)
- (EqualityExpression !== RelationalExpression) -> `(!== ,$1 ,$3))
-
- (EqualityExpressionNoIn (RelationalExpressionNoIn) -> $1
- (EqualityExpressionNoIn == RelationalExpressionNoIn) -> `(== ,$1 ,$3)
- (EqualityExpressionNoIn != RelationalExpressionNoIn) -> `(!= ,$1 ,$3)
- (EqualityExpressionNoIn === RelationalExpressionNoIn) -> `(=== ,$1 ,$3)
- (EqualityExpressionNoIn !== RelationalExpressionNoIn) -> `(!== ,$1 ,$3))
-
- (BitwiseANDExpression (EqualityExpression) -> $1
- (BitwiseANDExpression & EqualityExpression) -> `(& ,$1 ,$3))
- (BitwiseANDExpressionNoIn (EqualityExpressionNoIn) -> $1
- (BitwiseANDExpressionNoIn & EqualityExpressionNoIn) -> `(& ,$1 ,$3))
-
- (BitwiseXORExpression (BitwiseANDExpression) -> $1
- (BitwiseXORExpression ^ BitwiseANDExpression) -> `(^ ,$1 ,$3))
- (BitwiseXORExpressionNoIn (BitwiseANDExpressionNoIn) -> $1
- (BitwiseXORExpressionNoIn ^ BitwiseANDExpressionNoIn) -> `(^ ,$1 ,$3))
-
- (BitwiseORExpression (BitwiseXORExpression) -> $1
- (BitwiseORExpression bor BitwiseXORExpression) -> `(bor ,$1 ,$3))
- (BitwiseORExpressionNoIn (BitwiseXORExpressionNoIn) -> $1
- (BitwiseORExpressionNoIn bor BitwiseXORExpressionNoIn) -> `(bor ,$1 ,$3))
-
- (LogicalANDExpression (BitwiseORExpression) -> $1
- (LogicalANDExpression && BitwiseORExpression) -> `(and ,$1 ,$3))
- (LogicalANDExpressionNoIn (BitwiseORExpressionNoIn) -> $1
- (LogicalANDExpressionNoIn && BitwiseORExpressionNoIn) -> `(and ,$1 ,$3))
-
- (LogicalORExpression (LogicalANDExpression) -> $1
- (LogicalORExpression or LogicalANDExpression) -> `(or ,$1 ,$3))
- (LogicalORExpressionNoIn (LogicalANDExpressionNoIn) -> $1
- (LogicalORExpressionNoIn or LogicalANDExpressionNoIn) -> `(or ,$1 ,$3))
-
- (ConditionalExpression (LogicalORExpression) -> $1
- (LogicalORExpression ? AssignmentExpression colon AssignmentExpression) -> `(if ,$1 ,$3 ,$5))
- (ConditionalExpressionNoIn (LogicalORExpressionNoIn) -> $1
- (LogicalORExpressionNoIn ? AssignmentExpressionNoIn colon AssignmentExpressionNoIn) -> `(if ,$1 ,$3 ,$5))
-
- (AssignmentExpression (ConditionalExpression) -> $1
- (LeftHandSideExpression AssignmentOperator AssignmentExpression) -> `(,$2 ,$1 ,$3))
- (AssignmentExpressionNoIn (ConditionalExpressionNoIn) -> $1
- (LeftHandSideExpression AssignmentOperator AssignmentExpressionNoIn) -> `(,$2 ,$1 ,$3))
- (AssignmentOperator (=) -> '=
- (*=) -> '*=
- (/=) -> '/=
- (%=) -> '%=
- (+=) -> '+=
- (-=) -> '-=
- (<<=) -> '<<=
- (>>=) -> '>>=
- (>>>=) -> '>>>=
- (&=) -> '&=
- (^=) -> '^=
- (bor=) -> 'bor=)
-
- (Expression (AssignmentExpression) -> $1
- (Expression comma AssignmentExpression) -> `(begin ,$1 ,$3))
- (ExpressionNoIn (AssignmentExpressionNoIn) -> $1
- (ExpressionNoIn comma AssignmentExpressionNoIn) -> `(begin ,$1 ,$3))))
+ (PrimaryExpression (this) : 'this
+ (null) : 'null
+ (true) : 'true
+ (false) : 'false
+ (Identifier) : `(ref ,$1)
+ (StringLiteral) : `(string ,$1)
+ (RegexpLiteral) : `(regexp ,$1)
+ (NumericLiteral) : `(number ,$1)
+ (ArrayLiteral) : $1
+ (ObjectLiteral) : $1
+ (lparen Expression rparen) : $2)
+
+ (ArrayLiteral (lbracket rbracket) : '(array)
+ (lbracket Elision rbracket) : '(array ,@$2)
+ (lbracket ElementList rbracket) : `(array ,@$2)
+ (lbracket ElementList comma rbracket) : `(array ,@$2)
+ (lbracket ElementList comma Elision rbracket) : `(array ,@$2))
+ (ElementList (AssignmentExpression) : `(,$1)
+ (Elision AssignmentExpression) : `(,@$1 ,$2)
+ (ElementList comma AssignmentExpression) : `(,@$1 ,$3)
+ (ElementList comma Elision AssignmentExpression) : `(,@$1 ,@$3 ,$4))
+ (Elision (comma) : '((number 0))
+ (Elision comma) : `(,@$1 (number 0)))
+
+ (ObjectLiteral (lbrace rbrace) : `(object)
+ (lbrace PropertyNameAndValueList rbrace) : `(object ,@$2))
+ (PropertyNameAndValueList (PropertyName colon AssignmentExpression) : `((,$1 ,$3))
+ (PropertyNameAndValueList comma PropertyName colon AssignmentExpression) : `(,@$1 (,$3 ,$5)))
+ (PropertyName (Identifier) : $1
+ (StringLiteral) : (string->symbol $1)
+ (NumericLiteral) : $1)
+
+ (MemberExpression (PrimaryExpression) : $1
+ (FunctionExpression) : $1
+ (MemberExpression lbracket Expression rbracket) : `(aref ,$1 ,$3)
+ (MemberExpression dot Identifier) : `(pref ,$1 ,$3)
+ (new MemberExpression Arguments) : `(new ,$2 ,$3))
+
+ (NewExpression (MemberExpression) : $1
+ (new NewExpression) : `(new ,$2 ()))
+
+ (CallExpression (MemberExpression Arguments) : `(call ,$1 ,$2)
+ (CallExpression Arguments) : `(call ,$1 ,$2)
+ (CallExpression lbracket Expression rbracket) : `(aref ,$1 ,$3)
+ (CallExpression dot Identifier) : `(pref ,$1 ,$3))
+ (Arguments (lparen rparen) : '()
+ (lparen ArgumentList rparen) : $2)
+ (ArgumentList (AssignmentExpression) : `(,$1)
+ (ArgumentList comma AssignmentExpression) : `(,@$1 ,$3))
+
+ (LeftHandSideExpression (NewExpression) : $1
+ (CallExpression) : $1)
+
+ (PostfixExpression (LeftHandSideExpression) : $1
+ (LeftHandSideExpression ++) : `(postinc ,$1)
+ (LeftHandSideExpression --) : `(postdec ,$1))
+
+ (UnaryExpression (PostfixExpression) : $1
+ (delete UnaryExpression) : `(delete ,$2)
+ (void UnaryExpression) : `(void ,$2)
+ (typeof UnaryExpression) : `(typeof ,$2)
+ (++ UnaryExpression) : `(preinc ,$2)
+ (-- UnaryExpression) : `(predec ,$2)
+ (+ UnaryExpression) : `(+ ,$2)
+ (- UnaryExpression) : `(- ,$2)
+ (~ UnaryExpression) : `(~ ,$2)
+ (! UnaryExpression) : `(! ,$2))
+
+ (MultiplicativeExpression (UnaryExpression) : $1
+ (MultiplicativeExpression * UnaryExpression) : `(* ,$1 ,$3)
+ (MultiplicativeExpression / UnaryExpression) : `(/ ,$1 ,$3)
+ (MultiplicativeExpression % UnaryExpression) : `(% ,$1 ,$3))
+
+ (AdditiveExpression (MultiplicativeExpression) : $1
+ (AdditiveExpression + MultiplicativeExpression) : `(+ ,$1 ,$3)
+ (AdditiveExpression - MultiplicativeExpression) : `(- ,$1 ,$3))
+
+ (ShiftExpression (AdditiveExpression) : $1
+ (ShiftExpression << MultiplicativeExpression) : `(<< ,$1 ,$3)
+ (ShiftExpression >> MultiplicativeExpression) : `(>> ,$1 ,$3)
+ (ShiftExpression >>> MultiplicativeExpression) : `(>>> ,$1 ,$3))
+
+ (RelationalExpression (ShiftExpression) : $1
+ (RelationalExpression < ShiftExpression) : `(< ,$1 ,$3)
+ (RelationalExpression > ShiftExpression) : `(> ,$1 ,$3)
+ (RelationalExpression <= ShiftExpression) : `(<= ,$1 ,$3)
+ (RelationalExpression >= ShiftExpression) : `(>= ,$1 ,$3)
+ (RelationalExpression instanceof ShiftExpression) : `(instanceof ,$1 ,$3)
+ (RelationalExpression in ShiftExpression) : `(in ,$1 ,$3))
+
+ (RelationalExpressionNoIn (ShiftExpression) : $1
+ (RelationalExpressionNoIn < ShiftExpression) : `(< ,$1 ,$3)
+ (RelationalExpressionNoIn > ShiftExpression) : `(> ,$1 ,$3)
+ (RelationalExpressionNoIn <= ShiftExpression) : `(<= ,$1 ,$3)
+ (RelationalExpressionNoIn >= ShiftExpression) : `(>= ,$1 ,$3)
+ (RelationalExpressionNoIn instanceof ShiftExpression) : `(instanceof ,$1 ,$3))
+
+ (EqualityExpression (RelationalExpression) : $1
+ (EqualityExpression == RelationalExpression) : `(== ,$1 ,$3)
+ (EqualityExpression != RelationalExpression) : `(!= ,$1 ,$3)
+ (EqualityExpression === RelationalExpression) : `(=== ,$1 ,$3)
+ (EqualityExpression !== RelationalExpression) : `(!== ,$1 ,$3))
+
+ (EqualityExpressionNoIn (RelationalExpressionNoIn) : $1
+ (EqualityExpressionNoIn == RelationalExpressionNoIn) : `(== ,$1 ,$3)
+ (EqualityExpressionNoIn != RelationalExpressionNoIn) : `(!= ,$1 ,$3)
+ (EqualityExpressionNoIn === RelationalExpressionNoIn) : `(=== ,$1 ,$3)
+ (EqualityExpressionNoIn !== RelationalExpressionNoIn) : `(!== ,$1 ,$3))
+
+ (BitwiseANDExpression (EqualityExpression) : $1
+ (BitwiseANDExpression & EqualityExpression) : `(& ,$1 ,$3))
+ (BitwiseANDExpressionNoIn (EqualityExpressionNoIn) : $1
+ (BitwiseANDExpressionNoIn & EqualityExpressionNoIn) : `(& ,$1 ,$3))
+
+ (BitwiseXORExpression (BitwiseANDExpression) : $1
+ (BitwiseXORExpression ^ BitwiseANDExpression) : `(^ ,$1 ,$3))
+ (BitwiseXORExpressionNoIn (BitwiseANDExpressionNoIn) : $1
+ (BitwiseXORExpressionNoIn ^ BitwiseANDExpressionNoIn) : `(^ ,$1 ,$3))
+
+ (BitwiseORExpression (BitwiseXORExpression) : $1
+ (BitwiseORExpression bor BitwiseXORExpression) : `(bor ,$1 ,$3))
+ (BitwiseORExpressionNoIn (BitwiseXORExpressionNoIn) : $1
+ (BitwiseORExpressionNoIn bor BitwiseXORExpressionNoIn) : `(bor ,$1 ,$3))
+
+ (LogicalANDExpression (BitwiseORExpression) : $1
+ (LogicalANDExpression && BitwiseORExpression) : `(and ,$1 ,$3))
+ (LogicalANDExpressionNoIn (BitwiseORExpressionNoIn) : $1
+ (LogicalANDExpressionNoIn && BitwiseORExpressionNoIn) : `(and ,$1 ,$3))
+
+ (LogicalORExpression (LogicalANDExpression) : $1
+ (LogicalORExpression or LogicalANDExpression) : `(or ,$1 ,$3))
+ (LogicalORExpressionNoIn (LogicalANDExpressionNoIn) : $1
+ (LogicalORExpressionNoIn or LogicalANDExpressionNoIn) : `(or ,$1 ,$3))
+
+ (ConditionalExpression (LogicalORExpression) : $1
+ (LogicalORExpression ? AssignmentExpression colon AssignmentExpression) : `(if ,$1 ,$3 ,$5))
+ (ConditionalExpressionNoIn (LogicalORExpressionNoIn) : $1
+ (LogicalORExpressionNoIn ? AssignmentExpressionNoIn colon AssignmentExpressionNoIn) : `(if ,$1 ,$3 ,$5))
+
+ (AssignmentExpression (ConditionalExpression) : $1
+ (LeftHandSideExpression AssignmentOperator AssignmentExpression) : `(,$2 ,$1 ,$3))
+ (AssignmentExpressionNoIn (ConditionalExpressionNoIn) : $1
+ (LeftHandSideExpression AssignmentOperator AssignmentExpressionNoIn) : `(,$2 ,$1 ,$3))
+ (AssignmentOperator (=) : '=
+ (*=) : '*=
+ (/=) : '/=
+ (%=) : '%=
+ (+=) : '+=
+ (-=) : '-=
+ (<<=) : '<<=
+ (>>=) : '>>=
+ (>>>=) : '>>>=
+ (&=) : '&=
+ (^=) : '^=
+ (bor=) : 'bor=)
+
+ (Expression (AssignmentExpression) : $1
+ (Expression comma AssignmentExpression) : `(begin ,$1 ,$3))
+ (ExpressionNoIn (AssignmentExpressionNoIn) : $1
+ (ExpressionNoIn comma AssignmentExpressionNoIn) : `(begin ,$1 ,$3))))
diff --git a/module/language/ecmascript/tokenize.scm b/module/language/ecmascript/tokenize.scm
index 2ab8045cc..65a8b1e62 100644
--- a/module/language/ecmascript/tokenize.scm
+++ b/module/language/ecmascript/tokenize.scm
@@ -1,6 +1,6 @@
;;; ECMAScript for Guile
-;; Copyright (C) 2009 Free Software Foundation, Inc.
+;; Copyright (C) 2009, 2010 Free Software Foundation, Inc.
;;;; This library is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public
@@ -21,6 +21,7 @@
(define-module (language ecmascript tokenize)
#:use-module (ice-9 rdelim)
#:use-module ((srfi srfi-1) #:select (unfold-right))
+ #:use-module (system base lalr)
#:export (next-token make-tokenizer make-tokenizer/1 tokenize tokenize/1))
(define (syntax-error message . args)
@@ -75,8 +76,8 @@
(lp (read-char port))))))
(div?
(case c1
- ((#\=) (read-char port) `(/= . #f))
- (else `(/ . #f))))
+ ((#\=) (read-char port) (make-lexical-token '/= #f #f))
+ (else (make-lexical-token '/ #f #f))))
(else
(read-regexp port)))))
@@ -95,7 +96,9 @@
(char-numeric? c)
(char=? c #\$)
(char=? c #\_))))
- `(RegexpLiteral . (,(string-append head str) . ,(reverse flags)))
+ (make-lexical-token 'RegexpLiteral #f
+ (cons (string-append head str)
+ (reverse flags)))
(begin (read-char port)
(lp (peek-char port) (cons c flags))))))
((char=? terminator #\\)
@@ -216,7 +219,7 @@
("import" . import)
("public" . public)))
-(define (read-identifier port)
+(define (read-identifier port loc)
(let lp ((c (peek-char port)) (chars '()))
(if (or (eof-object? c)
(not (or (char-alphabetic? c)
@@ -225,10 +228,11 @@
(char=? c #\_))))
(let ((word (list->string (reverse chars))))
(cond ((assoc-ref *keywords* word)
- => (lambda (x) `(,x . #f)))
+ => (lambda (x) (make-lexical-token x loc #f)))
((assoc-ref *future-reserved-words* word)
(syntax-error "word is reserved for the future, dude." word))
- (else `(Identifier . ,(string->symbol word)))))
+ (else (make-lexical-token 'Identifier loc
+ (string->symbol word)))))
(begin (read-char port)
(lp (peek-char port) (cons c chars))))))
@@ -368,7 +372,7 @@
(else
(lp (cons (list (string-ref (caar puncs) 0) #f) nodes)
puncs))))))
- (lambda (port)
+ (lambda (port loc)
(let lp ((c (peek-char port)) (tree punc-tree) (candidate #f))
(cond
((assv-ref tree c)
@@ -376,15 +380,17 @@
(read-char port)
(lp (peek-char port) (cdr node-tail) (car node-tail))))
(candidate
- `(,candidate . #f))
+ (make-lexical-token candidate loc #f))
(else
(syntax-error "bad syntax: character not allowed" c)))))))
(define (next-token port div?)
- (let ((c (peek-char port))
- (props `((filename . ,(port-filename port))
- (line . ,(port-line port))
- (column . ,(port-column port)))))
+ (let ((c (peek-char port))
+ (loc (make-source-location (port-filename port)
+ (port-line port)
+ (port-column port)
+ (false-if-exception (seek port 0 SEEK_CUR))
+ #f)))
(let ((tok
(case c
((#\ht #\vt #\np #\space)
@@ -400,7 +406,7 @@
(read-slash port div?))
((#\" #\')
; string literal
- `(StringLiteral . ,(read-string port)))
+ (make-lexical-token 'StringLiteral loc (read-string port)))
(else
(cond
((eof-object? c)
@@ -409,15 +415,14 @@
(char=? c #\$)
(char=? c #\_))
;; reserved word or identifier
- (read-identifier port))
+ (read-identifier port loc))
((char-numeric? c)
;; numeric -- also accept . FIXME, requires lookahead
- `(NumericLiteral . ,(read-numeric port)))
+ (make-lexical-token 'NumericLiteral loc (read-numeric port)))
(else
;; punctuation
- (read-punctuation port)))))))
- (if (pair? tok)
- (set-source-properties! tok props))
+ (read-punctuation port loc)))))))
+
tok)))
(define (make-tokenizer port)
@@ -435,31 +440,32 @@
(if eoi?
'*eoi*
(let ((tok (next-token port div?)))
- (case (if (pair? tok) (car tok) tok)
+ (case (if (lexical-token? tok) (lexical-token-category tok) tok)
((lparen)
- (set! stack (cons 'lparen stack)))
+ (set! stack (make-lexical-token 'lparen #f stack)))
((rparen)
(if (and (pair? stack) (eq? (car stack) 'lparen))
(set! stack (cdr stack))
(syntax-error "unexpected right parenthesis")))
((lbracket)
- (set! stack (cons 'lbracket stack)))
+ (set! stack (make-lexical-token 'lbracket #f stack)))
((rbracket)
(if (and (pair? stack) (eq? (car stack) 'lbracket))
(set! stack (cdr stack))
(syntax-error "unexpected right bracket" stack)))
((lbrace)
- (set! stack (cons 'lbrace stack)))
+ (set! stack (make-lexical-token 'lbrace #f stack)))
((rbrace)
(if (and (pair? stack) (eq? (car stack) 'lbrace))
(set! stack (cdr stack))
(syntax-error "unexpected right brace" stack)))
((semicolon)
(set! eoi? (null? stack))))
- (set! div? (and (pair? tok)
- (or (eq? (car tok) 'Identifier)
- (eq? (car tok) 'NumericLiteral)
- (eq? (car tok) 'StringLiteral))))
+ (set! div? (and (lexical-token? tok)
+ (let ((cat (lexical-token-category tok)))
+ (or (eq? cat 'Identifier)
+ (eq? cat 'NumericLiteral)
+ (eq? cat 'StringLiteral)))))
tok)))))
(define (tokenize port)