diff options
-rw-r--r-- | doc/guile-vm.texi | 9 | ||||
-rw-r--r-- | module/Makefile.am | 6 | ||||
-rw-r--r-- | module/language/brainfuck/compile-scheme.scm | 127 | ||||
-rw-r--r-- | module/language/brainfuck/parse.scm | 98 | ||||
-rw-r--r-- | module/language/brainfuck/spec.scm | 42 |
5 files changed, 282 insertions, 0 deletions
diff --git a/doc/guile-vm.texi b/doc/guile-vm.texi index 927c09e88..240c4f8b5 100644 --- a/doc/guile-vm.texi +++ b/doc/guile-vm.texi @@ -811,6 +811,15 @@ translate)} modules. Language front-ends can then be retrieved using the @code{lookup-language} procedure of the @code{(system base language)} module. +In order to integrate a new language @code{lang} into Guile's compiler system, +one has to create the module @code{(language lang spec)} containing the +language definition and referencing the parser, compiler and other +routines processing it. The module hierarchy in @code{(language brainfuck)} +defines a very basic Brainfuck implementation meant to serve as +easy-to-understand example on how to do this. See for instance +@url{http://en.wikipedia.org/wiki/Brainfuck} for more information about the +Brainfuck language itself. + @deftp @scmrec{} <language> name title version reader printer read-file expander translator evaluator environment Denotes a language front-end specification a various methods used by the compiler to handle source written in that language. Of particular diff --git a/module/Makefile.am b/module/Makefile.am index bcc4864e5..244746481 100644 --- a/module/Makefile.am +++ b/module/Makefile.am @@ -41,6 +41,7 @@ SOURCES = \ $(SCHEME_LANG_SOURCES) \ $(TREE_IL_LANG_SOURCES) \ $(GHIL_LANG_SOURCES) $(GLIL_LANG_SOURCES) \ + $(BRAINFUCK_LANG_SOURCES) \ $(ASSEMBLY_LANG_SOURCES) $(BYTECODE_LANG_SOURCES) \ $(OBJCODE_LANG_SOURCES) $(VALUE_LANG_SOURCES) \ \ @@ -112,6 +113,11 @@ ECMASCRIPT_LANG_SOURCES = \ language/ecmascript/array.scm \ language/ecmascript/compile-ghil.scm +BRAINFUCK_LANG_SOURCES = \ + language/brainfuck/spec.scm \ + language/brainfuck/parse.scm \ + language/brainfuck/compile-scheme.scm + SCRIPTS_SOURCES = \ scripts/PROGRAM.scm \ scripts/autofrisk.scm \ diff --git a/module/language/brainfuck/compile-scheme.scm b/module/language/brainfuck/compile-scheme.scm new file mode 100644 index 000000000..97c32d0b7 --- /dev/null +++ b/module/language/brainfuck/compile-scheme.scm @@ -0,0 +1,127 @@ +;;; Brainfuck for GNU Guile + +;; Copyright (C) 2009 Free Software Foundation, Inc. + +;; This program is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. +;; +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with this program; see the file COPYING. If not, write to +;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. + +;;; Code: + +(define-module (language brainfuck compile-scheme) + #:export (compile-scheme)) + +; Compilation of Brainfuck to Scheme is pretty straight-forward. For all of +; brainfuck's instructions, there are basic representations in Scheme we +; only have to generate. +; +; Brainfuck's pointer and data-tape are stored in the variables pointer and +; tape, where tape is a vector of integer values initially set to zero. Pointer +; starts out at position 0. +; Our tape is thus of finite length, with an address range of 0..n for +; some defined upper bound n depending on the length of our tape. + + +; Define the length to use for the tape. + +(define tape-size 30000) + + +; This compiles a whole brainfuck program. This constructs a Scheme code like: +; (let ((pointer 0) +; (tape (make-vector tape-size 0))) +; (begin +; <body> +; (write-char #\newline))) +; +; So first the pointer and tape variables are set up correctly, then the +; program's body is executed in this context, and finally we output an +; additional newline character in case the program does not output one. +; +; TODO: Find out and explain the details about env, the three return values and +; how to use the options. Implement options to set the tape-size, maybe. + +(define (compile-scheme exp env opts) + (values + `(let ((pointer 0) + (tape (make-vector ,tape-size 0))) + ,@(if (not (eq? '<brainfuck> (car exp))) + (error "expected brainfuck program") + `(begin + ,@(compile-body (cdr exp)) + (write-char #\newline)))) + env + env)) + + +; Compile a list of instructions to get a list of Scheme codes. As we always +; strip off the car of the instructions-list and cons the result onto the +; result-list, it will get out in reversed order first; so we have to (reverse) +; it on return. + +(define (compile-body instructions) + (let iterate ((cur instructions) + (result '())) + (if (null? cur) + (reverse result) + (let ((compiled (compile-instruction (car cur)))) + (iterate (cdr cur) (cons compiled result)))))) + + +; Compile a single instruction to Scheme, using the direct representations +; all of Brainfuck's instructions have. + +(define (compile-instruction ins) + (case (car ins) + + ; Pointer moval >< is done simply by something like: + ; (set! pointer (+ pointer +-1)) + ((<bf-move>) + (let ((dir (cadr ins))) + `(set! pointer (+ pointer ,dir)))) + + ; Cell increment +- is done as: + ; (vector-set! tape pointer (+ (vector-ref tape pointer) +-1)) + ((<bf-increment>) + (let ((inc (cadr ins))) + `(vector-set! tape pointer (+ (vector-ref tape pointer) ,inc)))) + + ; Output . is done by converting the cell's integer value to a character + ; first and then printing out this character: + ; (write-char (integer->char (vector-ref tape pointer))) + ((<bf-print>) + '(write-char (integer->char (vector-ref tape pointer)))) + + ; Input , is done similarly, read in a character, get its ASCII code and + ; store it into the current cell: + ; (vector-set! tape pointer (char->integer (read-char))) + ((<bf-read>) + '(vector-set! tape pointer (char->integer (read-char)))) + + ; For loops [...] we use a named let construction to execute the body until + ; the current cell gets zero. The body is compiled via a recursive call + ; back to (compile-body). + ; (let iterate () + ; (if (not (= (vector-ref! tape pointer) 0)) + ; (begin + ; <body> + ; (iterate)))) + ((<bf-loop>) + `(let iterate () + (if (not (= (vector-ref tape pointer) 0)) + (begin + ,@(compile-body (cdr ins)) + (iterate))))) + + (else (error "unknown brainfuck instruction " (car ins))))) diff --git a/module/language/brainfuck/parse.scm b/module/language/brainfuck/parse.scm new file mode 100644 index 000000000..54dbaeecc --- /dev/null +++ b/module/language/brainfuck/parse.scm @@ -0,0 +1,98 @@ +;;; Brainfuck for GNU Guile. + +;; Copyright (C) 2009 Free Software Foundation, Inc. + +;; This program is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. +;; +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with this program; see the file COPYING. If not, write to +;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. + +;;; Code: + +(define-module (language brainfuck parse) + #:export (read-brainfuck)) + +; Purpose of the parse module is to read in brainfuck in text form and produce +; the corresponding tree representing the brainfuck code. +; +; Each object (representing basically a single instruction) is structured like: +; (<instruction> [arguments]) +; where <instruction> is a symbolic name representing the type of instruction +; and the optional arguments represent further data (for instance, the body of +; a [...] loop as a number of nested instructions). +; +; A full brainfuck program is represented by the (<brainfuck> instructions) +; object. + + +; Read a brainfuck program from an input port. We construct the <brainfuck> +; program and read in the instructions using (read-body). + +(define (read-brainfuck p) + `(<brainfuck> ,@(read-body p))) + + +; While reading a number of instructions in sequence, all of them are cons'ed +; onto a list of instructions; thus this list gets out in reverse order. +; Additionally, for "comment characters" (everything not an instruction) we +; generate <bf-nop> NOP instructions. +; +; This routine reverses a list of instructions and removes all <bf-nop>'s on the +; way to fix these two issues for a read-in list. + +(define (reverse-without-nops lst) + (let iterate ((cur lst) + (result '())) + (if (null? cur) + result + (let ((head (car cur)) + (tail (cdr cur))) + (if (eq? (car head) '<bf-nop>) + (iterate tail result) + (iterate tail (cons head result))))))) + + +; Read in a set of instructions until a terminating ] character is found (or +; end of file is reached). This is used both for loop bodies and whole +; programs, so that a program has to be either terminated by EOF or an +; additional ], too. +; +; For instance, the basic program so just echo one character would be: +; ,.] + +(define (read-body p) + (let iterate ((parsed '())) + (let ((chr (read-char p))) + (if (or (eof-object? chr) (eq? #\] chr)) + (reverse-without-nops parsed) + (iterate (cons (process-input-char chr p) parsed)))))) + + +; This routine processes a single character of input and builds the +; corresponding instruction. Loop bodies are read by recursively calling +; back (read-body). +; +; For the poiner movement commands >< and the cell increment/decrement +- +; commands, we only use one instruction form each and specify the direction of +; the pointer/value increment using an argument to the instruction form. + +(define (process-input-char chr p) + (case chr + ((#\>) '(<bf-move> 1)) + ((#\<) '(<bf-move> -1)) + ((#\+) '(<bf-increment> 1)) + ((#\-) '(<bf-increment> -1)) + ((#\.) '(<bf-print>)) + ((#\,) '(<bf-read>)) + ((#\[) `(<bf-loop> ,@(read-body p))) + (else '(<bf-nop>)))) diff --git a/module/language/brainfuck/spec.scm b/module/language/brainfuck/spec.scm new file mode 100644 index 000000000..a303984b2 --- /dev/null +++ b/module/language/brainfuck/spec.scm @@ -0,0 +1,42 @@ +;;; Brainfuck for GNU Guile. + +;; Copyright (C) 2009 Free Software Foundation, Inc. + +;; This program is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. +;; +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with this program; see the file COPYING. If not, write to +;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. + +;;; Code: + +(define-module (language brainfuck spec) + #:use-module (language brainfuck compile-scheme) + #:use-module (language brainfuck parse) + #:use-module (system base language) + #:export (brainfuck)) + + +; The new language is integrated into Guile via this (define-language) +; specification in the special module (language [lang] spec). +; Provided is a parser-routine in #:reader, a output routine in #:printer +; and one or more compiler routines (as target-language - routine pairs) +; in #:compilers. This is the basic set of fields needed to specify a new +; language. + +(define-language brainfuck + #:title "Guile Brainfuck" + #:version "1.0" + #:reader (lambda () (read-brainfuck (current-input-port))) + #:compilers `((scheme . ,compile-scheme)) + #:printer write + ) |