summaryrefslogtreecommitdiff
path: root/module/ice-9/read.scm
diff options
context:
space:
mode:
authorAndy Wingo <wingo@pobox.com>2021-02-13 22:22:33 +0100
committerAndy Wingo <wingo@pobox.com>2021-02-13 22:36:05 +0100
commit40e4e3b2a40bc44bf72a9ae05ea686cbdaf79e05 (patch)
tree4e15e2dfde0a54a5d05a3ced73d192f9fb3bb2c0 /module/ice-9/read.scm
parent7f8149b4de16fba955e2c3bb1ca71e49a62ac95d (diff)
downloadguile-40e4e3b2a40bc44bf72a9ae05ea686cbdaf79e05.tar.gz
Add "read" implementation in Scheme
* module/Makefile.am (SOURCES): Add ice-9/read. * module/ice-9/read.scm: New file. The idea is to move the compiler to use this "read", after proving that it does the same as C. Then we can switch to read-syntax that returns syntax objects with source locations, allowing us to annotate any datum.
Diffstat (limited to 'module/ice-9/read.scm')
-rw-r--r--module/ice-9/read.scm866
1 files changed, 866 insertions, 0 deletions
diff --git a/module/ice-9/read.scm b/module/ice-9/read.scm
new file mode 100644
index 000000000..bf4ed2f94
--- /dev/null
+++ b/module/ice-9/read.scm
@@ -0,0 +1,866 @@
+;;; Scheme reader
+;;; Copyright (C) 1995-1997,1999-2001,2003-2004,2006-2012,2014-2021
+;;; Free Software Foundation, Inc.
+;;;
+;;; This library is free software: you can redistribute it and/or modify
+;;; it under the terms of the GNU Lesser General Public License as
+;;; published by the Free Software Foundation, either version 3 of the
+;;; License, or (at your option) any later version.
+;;;
+;;; This library is distributed in the hope that it will be useful, but
+;;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;;; Lesser General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU Lesser General Public
+;;; License along with this program. If not, see
+;;; <http://www.gnu.org/licenses/>.
+
+;;; Commentary:
+;;;
+;;; Implementation of Scheme's "read".
+;;;
+;;; Code:
+
+
+;; While porting read.c to Scheme, I found these expressions that result
+;; in undesirable behavior in the C reader. Most all of them are also
+;; present in the Scheme reader. Probably I should fix all of them, but
+;; I would first like to prove that the Scheme reader is good enough.
+;;
+;; (call-with-input-string "," read)
+;; (read-disable 'square-brackets), then (call-with-input-string "]" read)
+;; (call-with-input-string "(#tru1)" read) => '(#t ru1)
+;; (call-with-input-string "(#true1)" read) => '(#t 1)
+;; (call-with-input-string "(#fAlse)" read) => '(#f Alse)
+;; (call-with-input-string "(#f1 #f2 #f3)" read) => error reading array
+;; #: foo
+;; #:#|what|#foo
+;; #@-(1 2 3) => #(1 2 3)
+;; (#*10101010102) => (#*1010101010 2)
+
+(define-module (ice-9 read)
+ #:use-module (srfi srfi-11)
+ #:use-module (ice-9 textual-ports)
+ #:use-module (rnrs bytevectors)
+ #:replace (read))
+
+(define read-hash-procedures
+ (fluid->parameter %read-hash-procedures))
+
+(define (read-hash-procedure ch)
+ (assq-ref (read-hash-procedures) ch))
+
+(define (read-hash-extend ch proc)
+ (let ((alist (read-hash-procedures)))
+ (read-hash-procedures
+ (if proc
+ (assq-set! alist ch proc)
+ (assq-remove! alist ch)))))
+
+(define bitfield:record-positions? 0)
+(define bitfield:case-insensitive? 2)
+(define bitfield:keyword-style 4)
+(define bitfield:r6rs-escapes? 6)
+(define bitfield:square-brackets? 8)
+(define bitfield:hungry-eol-escapes? 10)
+(define bitfield:curly-infix? 12)
+(define bitfield:r7rs-symbols? 14)
+(define read-option-bits 16)
+
+(define read-option-mask #b11)
+(define read-option-inherit #b11)
+(define read-options-inherit-all (1- (ash 1 read-option-bits)))
+
+(define keyword-style-hash-prefix 0)
+(define keyword-style-prefix 1)
+(define keyword-style-postfix 2)
+
+(define (compute-reader-options port)
+ (let ((options (read-options))
+ (port-options (or (%port-property port 'port-read-options)
+ read-options-inherit-all)))
+ (define-syntax-rule (option field exp)
+ (let ((port-option (logand port-options (ash read-option-mask field))))
+ (if (= port-option (ash read-option-inherit field))
+ exp
+ port-option)))
+ (define (bool key field)
+ (option field
+ (if (memq key options) (ash 1 field) 0)))
+ (define (enum key values field)
+ (option field
+ (ash (assq-ref values (and=> (memq key options) cadr)) field)))
+ (logior (bool 'positions bitfield:record-positions?)
+ (bool 'case-insensitive bitfield:case-insensitive?)
+ (enum 'keyword-style '((#f . 0) (prefix . 1) (postfix . 2))
+ bitfield:keyword-style)
+ (bool 'r6rs-hex-escapes bitfield:r6rs-escapes?)
+ (bool 'square-brackets bitfield:square-brackets?)
+ (bool 'hungry-eol-escapes bitfield:hungry-eol-escapes?)
+ (bool 'curly-infix bitfield:curly-infix?)
+ (bool 'r7rs-symbols bitfield:r7rs-symbols?))))
+
+(define (set-option options field new)
+ (logior new (logand options (lognot (ash #b11 field)))))
+
+(define (set-port-read-option! port field value)
+ (let ((options (or (%port-property port 'port-read-options)
+ read-options-inherit-all))
+ (new (ash value field)))
+ (%set-port-property! port 'port-read-options
+ (set-option options field new)
+ )))
+
+(define* (read #:optional (port (current-input-port)))
+ ;; init read options
+ (define opts (compute-reader-options port))
+ (define (enabled? field)
+ (not (zero? (logand (ash 1 field) opts))))
+ (define (set-reader-option! field value)
+ (set! opts (set-option opts field value))
+ (set-port-read-option! port field value))
+ (define (record-positions?) (enabled? bitfield:record-positions?))
+ (define (case-insensitive?) (enabled? bitfield:case-insensitive?))
+ (define (keyword-style) (logand read-option-mask
+ (ash opts (- bitfield:keyword-style))))
+ (define (r6rs-escapes?) (enabled? bitfield:r6rs-escapes?))
+ (define (square-brackets?) (enabled? bitfield:square-brackets?))
+ (define (hungry-eol-escapes?) (enabled? bitfield:hungry-eol-escapes?))
+ (define (curly-infix?) (enabled? bitfield:curly-infix?))
+ (define (r7rs-symbols?) (enabled? bitfield:r7rs-symbols?))
+ (define neoteric 0)
+ (define (next) (get-char port))
+ (define (peek) (lookahead-char port))
+ (define filename (port-filename port))
+ (define (get-pos) (cons (port-line port) (port-column port)))
+
+ (define (annotate line column datum)
+ ;; FIXME: Return a syntax object instead, so we can avoid the
+ ;; srcprops side table.
+ (when (and (supports-source-properties? datum)
+ ;; Line or column can be invalid via set-port-column! or
+ ;; ungetting chars beyond start of line.
+ (<= 0 line)
+ (<= 1 column))
+ ;; We always capture the column after one char of lookahead;
+ ;; subtract off that lookahead value.
+ (set-source-properties! datum `((filename . ,filename)
+ (line . ,line)
+ (column . ,(1- column)))))
+ datum)
+
+ (define (input-error msg . args)
+ (apply error msg args))
+
+ (define (read-semicolon-comment)
+ (let ((ch (next)))
+ (cond
+ ((eof-object? ch) ch)
+ ((eqv? ch #\newline) (next))
+ (else (read-semicolon-comment)))))
+
+ (define-syntax-rule (take-until first pred)
+ (let ((acc (open-output-string)))
+ (put-char acc first)
+ (let lp ()
+ (let ((ch (peek)))
+ (cond
+ ((or (eof-object? ch)
+ (pred ch))
+ (get-output-string acc))
+ (else
+ (put-char acc ch)
+ (next)
+ (lp)))))))
+ (define-syntax-rule (take-while first pred)
+ (take-until first (lambda (ch) (not (pred ch)))))
+
+ (define (delimiter? ch)
+ (or (memv ch '(#\( #\) #\; #\"
+ #\space #\return #\ff #\newline #\tab))
+ (and (memv ch '(#\[ #\])) (or (square-brackets?) (curly-infix?)))
+ (and (memv ch '(#\{ #\})) (curly-infix?))))
+
+ (define (read-token ch)
+ (take-until ch delimiter?))
+
+ (define (read-mixed-case-symbol ch)
+ (let* ((str (read-token ch))
+ (len (string-length str)))
+ (cond
+ ((and (eq? (keyword-style) keyword-style-postfix)
+ (> len 0) (eqv? #\: (string-ref str (1- len))))
+ (let ((str (substring str 0 (1- len))))
+ (symbol->keyword
+ (string->symbol
+ (if (case-insensitive?)
+ (string-downcase str)
+ str)))))
+ (else
+ (string->symbol
+ (if (case-insensitive?)
+ (string-downcase str)
+ str))))))
+
+ (define (read-parenthesized rdelim)
+ (define (finish-curly-infix ret)
+ ;; Perform syntactic transformations on {...} lists.
+ (define (extract-infix-list ls)
+ (and (pair? ls)
+ (let ((x (car ls))
+ (ls (cdr ls)))
+ (and (pair? ls)
+ (let ((op (car ls))
+ (ls (cdr ls)))
+ (if (null? ls)
+ (list op x)
+ (let ((tail (extract-infix-list ls)))
+ (and tail
+ (equal? op (car tail))
+ (cons* op x (cdr tail))))))))))
+ (cond
+ ((or (not (eqv? rdelim #\}))) ret) ; Only on {...} lists.
+ ((null? ret) ret) ; {} => ()
+ ((null? (cdr ret)) (car ret)) ; {x} => x
+ ((null? (cddr ret)) ret) ; {x y} => (x y)
+ ((extract-infix-list ret)) ; {x + y + ... + z} => (+ x y ... z)
+ (else (cons '$nfx$ ret)))) ; {x y . z} => ($nfx$ x y . z)
+ (define curly? (eqv? rdelim #\}))
+ (finish-curly-infix
+ (let lp ((ch (next-non-whitespace)))
+ (when (eof-object? ch)
+ (input-error "unexpected end of input while searching for " rdelim))
+ (cond
+ ((eqv? ch rdelim) '())
+ ((or (eqv? ch #\))
+ (and (eqv? ch #\]) (or (square-brackets?) (curly-infix?)))
+ (and (eqv? ch #\}) (curly-infix?)))
+ (input-error "mismatched close paren" ch))
+ (else
+ (let ((expr (read-expr ch)))
+ ;; Note that it is possible for scm_read_expression to
+ ;; return `.', but not as part of a dotted pair: as in
+ ;; #{.}#. Indeed an example is here!
+ (if (and (eqv? ch #\.) (eq? expr '#{.}#))
+ (let* ((tail (read-expr (next-non-whitespace)))
+ (close (next-non-whitespace)))
+ (unless (eqv? close rdelim)
+ (input-error "missing close paren" rdelim))
+ tail)
+ (cons expr (lp (next-non-whitespace))))))))))
+
+ (define (hex-digit ch)
+ (case ch
+ ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)
+ (- (char->integer ch) (char->integer #\0)))
+ ((#\a #\b #\c #\d #\e #\f)
+ (+ 10 (- (char->integer ch) (char->integer #\a))))
+ ((#\A #\B #\C #\D #\E #\F)
+ (+ 10 (- (char->integer ch) (char->integer #\A))))
+ (else #f)))
+
+ (define (read-r6rs-hex-escape)
+ (let ((ch (next)))
+ (cond
+ ((hex-digit ch) =>
+ (lambda (res)
+ (let lp ((res res))
+ (let ((ch (next)))
+ (cond
+ ((hex-digit ch) => (lambda (digit) (lp (+ (* 16 res) digit))))
+ ((eqv? ch #\;) (integer->char res))
+ (else
+ (input-error "invalid character in escape sequence: ~S" ch)))))))
+ (else
+ (input-error "invalid character in escape sequence: ~S" ch)))))
+
+ (define (read-fixed-hex-escape len)
+ (let lp ((len len) (res 0))
+ (if (zero? len)
+ (integer->char res)
+ (let ((ch (next)))
+ (cond
+ ((hex-digit ch) =>
+ (lambda (digit)
+ (lp (1- len) (+ (* res 16) digit))))
+ (else
+ (input-error "invalid character in escape sequence: ~S" ch)))))))
+
+ (define (read-string rdelim)
+ (let ((acc (open-output-string)))
+ (let lp ()
+ (let ((ch (next)))
+ (cond
+ ((eof-object? ch)
+ (input-error "unexpected end of input while reading string"))
+ ((eqv? ch rdelim)
+ (get-output-string acc))
+ ((eqv? ch #\\)
+ (let ((ch (next)))
+ (when (eof-object? ch)
+ (input-error "unexpected end of input while reading string"))
+ (case ch
+ ((#\newline)
+ (when (hungry-eol-escapes?)
+ ;; Skip intraline whitespace before continuing.
+ (let lp ()
+ (let ((ch (peek)))
+ (unless (or (eof-object? ch)
+ (eqv? ch #\tab)
+ (eq? (char-general-category ch) 'Zs))
+ (next)
+ (lp))))))
+ ;; Accept "\(" for use at the beginning of
+ ;; lines in multiline strings to avoid
+ ;; confusing emacs lisp modes.
+ ((#\| #\\ #\() (put-char acc ch))
+ ((#\0) (put-char acc #\nul))
+ ((#\f) (put-char acc #\ff))
+ ((#\n) (put-char acc #\newline))
+ ((#\r) (put-char acc #\return))
+ ((#\t) (put-char acc #\tab))
+ ((#\a) (put-char acc #\alarm))
+ ((#\v) (put-char acc #\vtab))
+ ((#\b) (put-char acc #\backspace))
+ ((#\x)
+ (let ((ch (if (or (r6rs-escapes?) (eqv? rdelim #\|))
+ (read-r6rs-hex-escape)
+ (read-fixed-hex-escape 2))))
+ (put-char acc ch)))
+ ((#\u)
+ (put-char acc (read-fixed-hex-escape 4)))
+ ((#\U)
+ (put-char acc (read-fixed-hex-escape 8)))
+ (else
+ (unless (eqv? ch rdelim)
+ (input-error "invalid character in escape sequence: ~S" ch))
+ (put-char acc ch)))
+ (lp)))
+ (else
+ (put-char acc ch)
+ (lp)))))))
+
+ (define (read-character)
+ (let ((ch (next)))
+ (cond
+ ((eof-object? ch)
+ (input-error "unexpected end of input after #\\"))
+ (else
+ (let* ((tok (read-token ch))
+ (len (string-length tok)))
+ (define dotted-circle #\x25cc)
+ (define r5rs-charnames
+ '(("space" . #\x20) ("newline" . #\x0a)))
+ (define r6rs-charnames
+ '(("nul" . #\x00) ("alarm" . #\x07) ("backspace" . #\x08)
+ ("tab" . #\x09) ("linefeed" . #\x0a) ("vtab" . #\x0b)
+ ("page" . #\x0c) ("return" . #\x0d) ("esc" . #\x1b)
+ ("delete" . #\x7f)))
+ (define r7rs-charnames
+ '(("escape" . #\x1b)))
+ (define C0-control-charnames
+ '(("nul" . #\x00) ("soh" . #\x01) ("stx" . #\x02)
+ ("etx" . #\x03) ("eot" . #\x04) ("enq" . #\x05)
+ ("ack" . #\x06) ("bel" . #\x07) ("bs" . #\x08)
+ ("ht" . #\x09) ("lf" . #\x0a) ("vt" . #\x0b)
+ ("ff" . #\x0c) ("cr" . #\x0d) ("so" . #\x0e)
+ ("si" . #\x0f) ("dle" . #\x10) ("dc1" . #\x11)
+ ("dc2" . #\x12) ("dc3" . #\x13) ("dc4" . #\x14)
+ ("nak" . #\x15) ("syn" . #\x16) ("etb" . #\x17)
+ ("can" . #\x18) ("em" . #\x19) ("sub" . #\x1a)
+ ("esc" . #\x1b) ("fs" . #\x1c) ("gs" . #\x1d)
+ ("rs" . #\x1e) ("us" . #\x1f) ("sp" . #\x20)
+ ("del" . #\x7f)))
+ (define alt-charnames
+ '(("null" . #\x0) ("nl" . #\x0a) ("np" . #\x0c)))
+ ;; Although R6RS and R7RS charnames specified as being
+ ;; case-sensitive, Guile matches them case-insensitively, like
+ ;; other char names.
+ (define (named-char tok alist)
+ (let lp ((alist alist))
+ (and (pair? alist)
+ (if (string-ci=? tok (caar alist))
+ (cdar alist)
+ (lp (cdr alist))))))
+ (cond
+ ((= len 1) ch)
+ ((and (= len 2) (eqv? (string-ref tok 1) dotted-circle))
+ ;; Ignore dotted circles, which may be used to keep
+ ;; combining characters from combining with the backslash in
+ ;; #\charname.
+ ch)
+ ((and (<= (char->integer #\0) (char->integer ch) (char->integer #\7))
+ (string->number tok 8))
+ ;; Specifying a codepoint as an octal value.
+ => integer->char)
+ ((and (eqv? ch #\x) (> len 1)
+ (string->number (substring tok 1) 16))
+ ;; Specifying a codepoint as an hexadecimal value. Skip
+ ;; initial "x".
+ => integer->char)
+ ((named-char tok r5rs-charnames))
+ ((named-char tok r6rs-charnames))
+ ((named-char tok r7rs-charnames))
+ ((named-char tok C0-control-charnames))
+ ((named-char tok alt-charnames))
+ (else
+ (input-error "unknown character name ~a" tok))))))))
+
+ (define (read-vector)
+ (list->vector (read-parenthesized #\))))
+
+ (define (read-srfi-4-vector ch)
+ (read-array ch))
+
+ (define (maybe-read-boolean-tail tail)
+ (let ((len (string-length tail)))
+ (let lp ((i 0))
+ (or (= i len)
+ (let ((ch (peek)))
+ (and (not (eof-object? ch))
+ (eqv? (char-downcase ch) (string-ref tail i))
+ (or (begin
+ (next)
+ (lp (1+ i)))
+ (begin
+ (unget-char port ch)
+ #f))))))))
+
+ (define (read-false-or-srfi-4-vector)
+ (let ((ch (peek)))
+ (if (or (eqv? ch #\3)
+ (eqv? ch #\6))
+ (read-srfi-4-vector #\f)
+ (begin
+ (maybe-read-boolean-tail "alse")
+ #f))))
+
+ (define (read-bytevector)
+ (define (expect ch)
+ (unless (eqv? (next) ch)
+ (input-error "invalid bytevector prefix" ch)))
+ (expect #\u)
+ (expect #\8)
+ (expect #\()
+ (u8-list->bytevector (read-parenthesized #\))))
+
+ ;; FIXME: We should require a terminating delimiter.
+ (define (read-bitvector)
+ (list->bitvector
+ (let lp ()
+ (let ((ch (peek)))
+ (case ch
+ ((#\0) (next) (cons #f (lp)))
+ ((#\1) (next) (cons #t (lp)))
+ (else '()))))))
+
+ (define (read-boolean ch)
+ ;; Historically, Guile hasn't required a delimiter after #f / #t.
+ ;; When the longer #false / #true forms were added, we kept this
+ ;; behavior. It is terrible and we should change it!!
+ (case ch
+ ((#\t #\T)
+ (maybe-read-boolean-tail "rue")
+ #t)
+ (else
+ (maybe-read-boolean-tail "alse")
+ #f)))
+
+ (define (read-keyword)
+ (let ((ch (next-non-whitespace)))
+ (when (eof-object? ch)
+ (input-error "end of input while reading keyword"))
+ (let ((expr (read-expr ch)))
+ (unless (symbol? expr)
+ (input-error "keyword prefix #: not followed by a symbol: ~a"
+ expr))
+ (symbol->keyword expr))))
+
+ (define (read-array ch)
+ (define (read-decimal-integer ch alt)
+ ;; This parser has problems but it's what Guile's read.c does. Any
+ ;; fix should come later and to both of them.
+ (define (decimal-digit ch)
+ (and (not (eof-object? ch))
+ (let ((digit (- (char->integer ch) (char->integer #\0))))
+ (and (<= 0 digit 9) digit))))
+ (let-values (((sign ch) (if (eqv? ch #\-)
+ (values -1 (next))
+ (values 1 ch))))
+ (let lp ((ch ch) (res #f))
+ (cond
+ ((decimal-digit ch)
+ => (lambda (digit)
+ (lp (next) (if res (+ (* 10 res) digit) digit))))
+ (else
+ (values ch (if res (* res sign) alt)))))))
+ (define (read-rank ch)
+ (let-values (((ch rank) (read-decimal-integer ch 1)))
+ (when (< rank 0)
+ (input-error "array rank must be non-negative"))
+ (when (eof-object? ch)
+ (input-error "unexpected end of input while reading array"))
+ (values ch rank)))
+ (define (read-tag ch)
+ (let lp ((ch ch) (chars '()))
+ (when (eof-object? ch)
+ (input-error "unexpected end of input while reading array"))
+ (if (memv ch '(#\( #\@ @\:))
+ (values ch
+ (if (null? chars)
+ #t
+ (string->symbol (list->string (reverse chars)))))
+ (lp (next) (cons ch chars)))))
+ (define (read-dimension ch)
+ (let*-values (((ch lbnd) (if (eqv? ch #\@)
+ (read-decimal-integer (next) 0)
+ (values ch 0)))
+ ((ch len) (if (eqv? ch #\:)
+ (read-decimal-integer (next) 0)
+ (values ch #f))))
+ (when (and len (< len 0))
+ (input-error "array length must be non-negative"))
+ (when (eof-object? ch)
+ (input-error "unexpected end of input while reading array"))
+ (values ch
+ (if len
+ (if (zero? lbnd)
+ len
+ (list lbnd (+ lbnd (1- len))))
+ lbnd))))
+ (define (read-shape ch alt)
+ (if (memv ch '(#\@ @\:))
+ (let*-values (((ch head) (read-dimension ch))
+ ((ch tail) (read-shape ch '())))
+ (values ch (cons head tail)))
+ (values ch alt)))
+ (define (read-elements ch rank)
+ (unless (eqv? ch #\()
+ (input-error "missing '(' in vector or array literal"))
+ (let ((elts (read-parenthesized #\))))
+ (if (zero? rank)
+ (begin
+ ;; Handle special print syntax of rank zero arrays; see
+ ;; scm_i_print_array for a rationale.
+ (when (null? elts)
+ (input-error "too few elements in array literal, need 1"))
+ (unless (null? (cdr elts))
+ (input-error "too many elements in array literal, need 1"))
+ (car elts))
+ elts)))
+ (let*-values (((ch rank) (read-rank ch))
+ ((ch tag) (read-tag ch))
+ ((ch shape) (read-shape ch rank))
+ ((elts) (read-elements ch rank)))
+ (when (and (pair? shape) (not (eqv? (length shape) rank)))
+ (input-error
+ "the number of shape specifications must match the array rank"))
+ (list->typed-array tag shape elts)))
+
+ (define (read-number-and-radix ch)
+ (let ((tok (string-append "#" (read-token ch))))
+ (or (string->number tok)
+ (input-error "unknown # object"))))
+
+ (define (read-extended-symbol)
+ (define (next-not-eof)
+ (let ((ch (next)))
+ (when (eof-object? ch)
+ (input-error "end of input while reading symbol"))
+ ch))
+ (string->symbol
+ (list->string
+ (let lp ((saw-brace? #f))
+ (let ((ch (next-not-eof)))
+ (cond
+ (saw-brace?
+ (if (eqv? ch #\#)
+ '()
+ (cons #\} (lp #f))))
+ ((eqv? ch #\})
+ (lp #t))
+ ((eqv? ch #\\)
+ ;; It used to be that print.c would print extended-read-syntax
+ ;; symbols with backslashes before "non-standard" chars, but
+ ;; this routine wouldn't do anything with those escapes.
+ ;; Bummer. What we've done is to change print.c to output
+ ;; R6RS hex escapes for those characters, relying on the fact
+ ;; that the extended read syntax would never put a `\' before
+ ;; an `x'. For now, we just ignore other instances of
+ ;; backslash in the string.
+ (let* ((ch (next-not-eof))
+ (ch (if (eqv? ch #\x)
+ (read-r6rs-hex-escape)
+ ch)))
+ (cons ch (lp #f))))
+ (else
+ (cons ch (lp #f)))))))))
+
+ (define (read-nil)
+ ;; Have already read "#\n" -- now read "il".
+ (let ((id (read-mixed-case-symbol #\n)))
+ (unless (eq? id 'nil)
+ (input-error "unexpected input while reading #nil: ~a" id))
+ #nil))
+
+ (define (read-sharp)
+ (let* ((ch (next)))
+ (cond
+ ((eof-object? ch)
+ (input-error "unexpected end of input after #"))
+ ((read-hash-procedure ch)
+ => (lambda (proc) (proc ch)))
+ (else
+ (case ch
+ ((#\\) (read-character))
+ ((#\() (read-vector))
+ ((#\s #\u \c) (read-srfi-4-vector ch))
+ ((#\f) (read-false-or-srfi-4-vector))
+ ((#\v) (read-bytevector))
+ ((#\*) (read-bitvector))
+ ((#\t #\T #\F) (read-boolean ch))
+ ((#\:) (read-keyword))
+ ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\@)
+ (read-array ch))
+ ((#\i #\e #\b #\B #\o #\O #\d #\D #\x #\X #\I #\E)
+ (read-number-and-radix ch))
+ ((#\{) (read-extended-symbol))
+ ((#\') (list 'syntax (read-expr (next-non-whitespace))))
+ ((#\`) (list 'quasisyntax (read-expr (next-non-whitespace))))
+ ((#\,)
+ (if (eqv? #\@ (peek))
+ (begin
+ (next)
+ (list 'unsyntax-splicing (read-expr (next-non-whitespace))))
+ (list 'unsyntax (read-expr (next-non-whitespace)))))
+ ((#\n) (read-nil))
+ (else
+ (input-error "Unknown # object: ~S" ch)))))))
+
+ (define (read-number ch)
+ (let* ((str (read-token ch)))
+ (or (string->number str)
+ (string->symbol (if (case-insensitive?)
+ (string-downcase str)
+ str)))))
+
+ (define (read-expr* ch)
+ (case ch
+ ((#\{)
+ (cond
+ ((curly-infix?)
+ (set! neoteric (1+ neoteric))
+ (let ((expr (read-parenthesized #\})))
+ (set! neoteric (1- neoteric))
+ expr))
+ (else
+ (read-mixed-case-symbol ch))))
+ ((#\[)
+ (cond
+ ((square-brackets?)
+ (read-parenthesized #\]))
+ ((curly-infix?)
+ ;; The syntax of neoteric expressions requires that '[' be a
+ ;; delimiter when curly-infix is enabled, so it cannot be part
+ ;; of an unescaped symbol. We might as well do something
+ ;; useful with it, so we adopt Kawa's convention: [...] =>
+ ;; ($bracket-list$ ...)
+ ;; FIXME: source locations for this cons
+ (cons '$bracket-list$ (read-parenthesized #\])))
+ (else
+ (read-mixed-case-symbol ch))))
+ ((#\()
+ (read-parenthesized #\)))
+ ((#\")
+ (read-string ch))
+ ((#\|)
+ (if (r7rs-symbols?)
+ (string->symbol (read-string ch))
+ (read-mixed-case-symbol ch)))
+ ((#\')
+ (list 'quote (read-expr (next-non-whitespace))))
+ ((#\`)
+ (list 'quasiquote (read-expr (next-non-whitespace))))
+ ((#\,)
+ (cond
+ ((eqv? #\@ (peek))
+ (next)
+ (list 'unquote-splicing (read-expr (next-non-whitespace))))
+ (else
+ (list 'unquote (read-expr (next-non-whitespace))))))
+ ((#\#)
+ ;; FIXME: read-sharp should recur if we read a comment
+ (read-sharp))
+ ((#\))
+ (input-error "unexpected \")\""))
+ ((#\})
+ (if (curly-infix?)
+ (input-error "unexpected \"}\"")
+ (read-mixed-case-symbol ch)))
+ ((#\])
+ (if (square-brackets?)
+ (input-error "unexpected \"]\"")
+ (read-mixed-case-symbol ch)))
+ ((#f)
+ ;; EOF.
+ the-eof-object)
+ ((#\:)
+ (if (eq? (keyword-style) keyword-style-prefix)
+ ;; FIXME: Don't skip whitespace here.
+ (let ((ch (next-non-whitespace)))
+ (when (eof-object? ch)
+ (input-error "unexpected end of input while reading :keyword"))
+ (symbol->keyword (read-expr ch)))
+ (read-mixed-case-symbol ch)))
+ ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\+ #\- #\.)
+ (read-number ch))
+ (else
+ (read-mixed-case-symbol ch))))
+
+ (define (read-neoteric ch)
+ (let lp ((expr (read-expr ch)))
+ ;; 'expr' is the first component of the neoteric expression. If
+ ;; the next character is '(', '[', or '{', (without any
+ ;; intervening whitespace), we use it to construct a new
+ ;; expression, and loop. For example:
+ ;; f{n - 1}(x) => ((f (- n 1)) x).
+ (case (peek)
+ ((#\() ;; e(...) => (e ...)
+ (next)
+ (lp (cons expr (read-parenthesized #\)))))
+ ((#\[) ;; e[...] => ($bracket-apply$ e ...)
+ (next)
+ (lp (cons* '$bracket-apply$ expr (read-parenthesized #\]))))
+ ((#\{) ;; e{} => (e); e{...} => (e {...})
+ (next)
+ (let ((args (read-parenthesized #\})))
+ (lp (if (null? args)
+ (list expr)
+ (list expr args)))))
+ (else
+ expr))))
+
+ (define (read-expr ch)
+ (let ((line (port-line port))
+ (column (port-column port)))
+ (annotate
+ line
+ column
+ (if (zero? neoteric)
+ (read-expr* ch)
+ (read-neoteric ch)))))
+
+ (define (read-directive)
+ (let ((ch (next)))
+ (cond
+ ((eof-object? ch)
+ (input-error "unexpected end of input after #!"))
+ (else
+ (string->symbol
+ (take-while ch (lambda (ch)
+ (or (eqv? ch #\-) (char-alphabetic? ch)))))))))
+
+ (define (skip-scsh-comment)
+ (let lp ((ch (next)))
+ (cond
+ ((eof-object? ch)
+ (input-error "unexpected end of input while looking for !#"))
+ ((eqv? ch #\!)
+ (let ((ch (next)))
+ (if (eqv? ch #\#)
+ (next)
+ (lp ch))))
+ (else
+ (lp (next))))))
+
+ (define (process-shebang)
+ ;; After having read #!, we complete either with #!r6rs,
+ ;; #!fold-case, #!no-fold-case, #!curly-infix,
+ ;; #!curly-infix-and-bracket-lists, or a SCSH block comment
+ ;; terminated by !#.
+ (let ((sym (read-directive)))
+ (cond
+ ((eq? sym 'r6rs)
+ (set-reader-option! bitfield:case-insensitive? 0)
+ (set-reader-option! bitfield:r6rs-escapes? 1)
+ (set-reader-option! bitfield:square-brackets? 1)
+ (set-reader-option! bitfield:keyword-style keyword-style-hash-prefix)
+ (set-reader-option! bitfield:hungry-eol-escapes? 1)
+ (next))
+ ((eq? sym 'fold-case)
+ (set-reader-option! bitfield:case-insensitive? 1)
+ (next))
+ ((eq? sym 'no-fold-case)
+ (set-reader-option! bitfield:case-insensitive? 0)
+ (next))
+ ((eq? sym 'curly-infix)
+ (set-reader-option! bitfield:curly-infix? 1)
+ (next))
+ ((eq? sym 'curly-infix-and-bracket-lists)
+ (set-reader-option! bitfield:curly-infix? 1)
+ (set-reader-option! bitfield:square-brackets? 0)
+ (next))
+ (else
+ (skip-scsh-comment)))))
+
+ (define (skip-eol-comment)
+ (let ((ch (next)))
+ (cond
+ ((eof-object? ch) ch)
+ ((eq? ch #\newline) (next))
+ (else (skip-eol-comment)))))
+
+ ;; Unlike SCSH-style block comments, SRFI-30/R6RS block comments may be
+ ;; nested.
+ (define (skip-r6rs-block-comment)
+ ;; We have read #|, now looking for |#.
+ (let ((ch (next)))
+ (when (eof-object? ch)
+ (input-error "unterminated `#| ... |#' comment"))
+ (cond
+ ((and (eqv? ch #\|) (eqv? (peek) #\#))
+ ;; Done.
+ (next)
+ (values))
+ ((and (eqv? ch #\#) (eqv? (peek) #\|))
+ ;; A nested comment.
+ (next)
+ (skip-r6rs-block-comment)
+ (skip-r6rs-block-comment))
+ (else
+ (skip-r6rs-block-comment)))))
+
+ (define (next-non-whitespace)
+ (let lp ((ch (next)))
+ (case ch
+ ((#\;)
+ (lp (skip-eol-comment)))
+ ((#\#)
+ (case (peek)
+ ((#\!)
+ (next)
+ (lp (process-shebang)))
+ ((#\;)
+ (next)
+ (let ((ch (next-non-whitespace)))
+ (when (eof-object? ch)
+ (input-error "no expression after #; comment"))
+ (read-expr ch))
+ (next-non-whitespace))
+ ((#\|)
+ (if (read-hash-procedure #\|)
+ ch
+ (begin
+ (next)
+ (skip-r6rs-block-comment)
+ (next-non-whitespace))))
+ (else ch)))
+ ((#\space #\return #\ff #\newline #\tab)
+ (next-non-whitespace))
+ (else ch))))
+
+ (let ((ch (next-non-whitespace)))
+ (if (eof-object? ch)
+ ch
+ (read-expr ch))))