summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndy Wingo <wingo@oblong.net>2009-08-12 16:33:49 +0200
committerAndy Wingo <wingo@oblong.net>2009-08-12 16:34:05 +0200
commit94ff26b96b555f0263fab2221cd55801119ffddd (patch)
tree6911dde6ef41d2b87eb9cfa6b857e7692bc21ac1
parent6cf48307989d2552f2215ef8406ea92745d2d3e9 (diff)
downloadguile-94ff26b96b555f0263fab2221cd55801119ffddd.tar.gz
rework the vm support for wide strings
* libguile/_scm.h (SCM_OBJCODE_MINOR_VERSION): Bump. * libguile/vm-engine.c (vm_error_bad_wide_string_length): New error case. * libguile/vm-i-loader.c (load-unsigned-integer, load-integer) (load-keyword): Remove these instructions. The former two are obsoleted by make-int64/make-uint64, the latter via make-keyword. (load-string): Only handle narrow strings. (load-symbol): Only handle narrow symbols. The wide case is handled via make-symbol. (load-wide-string): New instruction, for wide strings. * libguile/vm-i-system.c (define): Move here from loaders.c, as now it just takes a sym on the stack. (make-keyword, make-symbol): New instructions. * module/language/assembly.scm: Remove removed instructions. No more width byte in load-string etc. * module/language/assembly/compile-bytecode.scm (write-bytecode): Adapt to change in instruction set. * module/language/glil/compile-assembly.scm (glil->assembly): Compile define by pushing the sym then emitting (define). (dump-object): Dump narrow and wide strings differently. Use make-keyword and make-symbol as appropriate. * module/language/tree-il/compile-glil.scm (flatten): When compiling a ref to a primitive (not a call), first see if the primitive is actually bound in the root module. (That's not the case with e.g. bytevector-u8-ref). * module/system/xref.scm (program-callee-rev-vars): Don't parse out "nexts". * test-suite/tests/asm-to-bytecode.test ("compiler"): Adapt to bytecode format change.
-rw-r--r--libguile/_scm.h2
-rw-r--r--libguile/vm-engine.c4
-rw-r--r--libguile/vm-i-loader.c153
-rw-r--r--libguile/vm-i-system.c28
-rw-r--r--module/language/assembly.scm18
-rw-r--r--module/language/assembly/compile-bytecode.scm19
-rw-r--r--module/language/assembly/decompile-bytecode.scm27
-rw-r--r--module/language/glil/compile-assembly.scm19
-rw-r--r--module/language/tree-il/compile-glil.scm9
-rw-r--r--module/system/xref.scm2
-rw-r--r--test-suite/tests/asm-to-bytecode.test17
11 files changed, 99 insertions, 199 deletions
diff --git a/libguile/_scm.h b/libguile/_scm.h
index ff16a8587..737e01edd 100644
--- a/libguile/_scm.h
+++ b/libguile/_scm.h
@@ -172,7 +172,7 @@
// major and minor versions must be single characters
#define SCM_OBJCODE_MAJOR_VERSION 0
-#define SCM_OBJCODE_MINOR_VERSION A
+#define SCM_OBJCODE_MINOR_VERSION B
#define SCM_OBJCODE_MAJOR_VERSION_STRING \
SCM_CPP_STRINGIFY(SCM_OBJCODE_MAJOR_VERSION)
#define SCM_OBJCODE_MINOR_VERSION_STRING \
diff --git a/libguile/vm-engine.c b/libguile/vm-engine.c
index 98a6e491b..b0888c1ec 100644
--- a/libguile/vm-engine.c
+++ b/libguile/vm-engine.c
@@ -220,6 +220,10 @@ VM_NAME (struct scm_vm *vp, SCM program, SCM *argv, int nargs)
finish_args = SCM_EOL;
goto vm_error;
+ vm_error_bad_wide_string_length:
+ err_msg = scm_from_locale_string ("VM: Bad wide string length: ~S");
+ goto vm_error;
+
#if VM_CHECK_IP
vm_error_invalid_address:
err_msg = scm_from_locale_string ("VM: Invalid program address");
diff --git a/libguile/vm-i-loader.c b/libguile/vm-i-loader.c
index 8de7f0036..e242ef9bf 100644
--- a/libguile/vm-i-loader.c
+++ b/libguile/vm-i-loader.c
@@ -20,42 +20,6 @@
/* This file is included in vm_engine.c */
-VM_DEFINE_LOADER (80, load_unsigned_integer, "load-unsigned-integer")
-{
- size_t len;
-
- FETCH_LENGTH (len);
- if (SCM_LIKELY (len <= 8))
- {
- scm_t_uint64 val = 0;
- while (len-- > 0)
- val = (val << 8U) + FETCH ();
- SYNC_REGISTER ();
- PUSH (scm_from_uint64 (val));
- NEXT;
- }
- else
- SCM_MISC_ERROR ("load-unsigned-integer: not implemented yet", SCM_EOL);
-}
-
-VM_DEFINE_LOADER (81, load_integer, "load-integer")
-{
- size_t len;
-
- FETCH_LENGTH (len);
- if (SCM_LIKELY (len <= 4))
- {
- int val = 0;
- while (len-- > 0)
- val = (val << 8) + FETCH ();
- SYNC_REGISTER ();
- PUSH (scm_from_int (val));
- NEXT;
- }
- else
- SCM_MISC_ERROR ("load-integer: not implemented yet", SCM_EOL);
-}
-
VM_DEFINE_LOADER (82, load_number, "load-number")
{
size_t len;
@@ -72,82 +36,24 @@ VM_DEFINE_LOADER (82, load_number, "load-number")
VM_DEFINE_LOADER (83, load_string, "load-string")
{
size_t len;
- int width;
- SCM str;
+ char *buf;
FETCH_LENGTH (len);
- FETCH_WIDTH (width);
SYNC_REGISTER ();
- if (width == 1)
- {
- char *buf;
- str = scm_i_make_string (len, &buf);
- memcpy (buf, (char *) ip, len);
- }
- else if (width == 4)
- {
- scm_t_wchar *wbuf;
- str = scm_i_make_wide_string (len, &wbuf);
- memcpy ((char *) wbuf, (char *) ip, len * width);
- }
- else
- SCM_MISC_ERROR ("load-string: invalid character width", SCM_EOL);
- PUSH (str);
- ip += len * width;
+ PUSH (scm_i_make_string (len, &buf));
+ memcpy (buf, (char *) ip, len);
+ ip += len;
NEXT;
}
VM_DEFINE_LOADER (84, load_symbol, "load-symbol")
{
size_t len;
- int width;
- SCM str;
- FETCH_LENGTH (len);
- FETCH_WIDTH (width);
- SYNC_REGISTER ();
- if (width == 1)
- {
- char *buf;
- str = scm_i_make_string (len, &buf);
- memcpy (buf, (char *) ip, len);
- }
- else if (width == 4)
- {
- scm_t_wchar *wbuf;
- str = scm_i_make_wide_string (len, &wbuf);
- memcpy ((char *) wbuf, (char *) ip, len * width);
- }
- else
- SCM_MISC_ERROR ("load-symbol: invalid character width", SCM_EOL);
- PUSH (scm_string_to_symbol (str));
- ip += len * width;
- NEXT;
-}
-
-VM_DEFINE_LOADER (85, load_keyword, "load-keyword")
-{
- size_t len;
- int width;
- SCM str;
FETCH_LENGTH (len);
- FETCH_WIDTH (width);
SYNC_REGISTER ();
- if (width == 1)
- {
- char *buf;
- str = scm_i_make_string (len, &buf);
- memcpy (buf, (char *) ip, len);
- }
- else if (width == 4)
- {
- scm_t_wchar *wbuf;
- str = scm_i_make_wide_string (len, &wbuf);
- memcpy ((char *) wbuf, (char *) ip, len * width);
- }
- else
- SCM_MISC_ERROR ("load-keyword: invalid character width", SCM_EOL);
- PUSH (scm_symbol_to_keyword (scm_string_to_symbol (str)));
- ip += len * width;
+ /* FIXME: should be scm_from_latin1_symboln */
+ PUSH (scm_from_locale_symboln ((const char*)ip, len));
+ ip += len;
NEXT;
}
@@ -181,46 +87,33 @@ VM_DEFINE_INSTRUCTION (87, link_now, "link-now", 0, 1, 1)
NEXT;
}
-VM_DEFINE_LOADER (88, define, "define")
+VM_DEFINE_LOADER (89, load_array, "load-array")
{
- SCM str, sym;
+ SCM type, shape;
size_t len;
-
- int width;
FETCH_LENGTH (len);
- FETCH_WIDTH (width);
- SYNC_REGISTER ();
- if (width == 1)
- {
- char *buf;
- str = scm_i_make_string (len, &buf);
- memcpy (buf, (char *) ip, len);
- }
- else if (width == 4)
- {
- scm_t_wchar *wbuf;
- str = scm_i_make_wide_string (len, &wbuf);
- memcpy ((char *) wbuf, (char *) ip, len * width);
- }
- else
- SCM_MISC_ERROR ("load define: invalid character width", SCM_EOL);
- sym = scm_string_to_symbol (str);
- ip += len * width;
-
+ POP (shape);
+ POP (type);
SYNC_REGISTER ();
- PUSH (scm_sym2var (sym, scm_current_module_lookup_closure (), SCM_BOOL_T));
+ PUSH (scm_from_contiguous_typed_array (type, shape, ip, len));
+ ip += len;
NEXT;
}
-VM_DEFINE_LOADER (89, load_array, "load-array")
+VM_DEFINE_LOADER (90, load_wide_string, "load-wide-string")
{
- SCM type, shape;
size_t len;
+ scm_t_wchar *wbuf;
+
FETCH_LENGTH (len);
- POP (shape);
- POP (type);
+ if (SCM_UNLIKELY (len % 4))
+ { finish_args = scm_list_1 (scm_from_size_t (len));
+ goto vm_error_bad_wide_string_length;
+ }
+
SYNC_REGISTER ();
- PUSH (scm_from_contiguous_typed_array (type, shape, ip, len));
+ PUSH (scm_i_make_wide_string (len / 4, &wbuf));
+ memcpy ((char *) wbuf, (char *) ip, len);
ip += len;
NEXT;
}
diff --git a/libguile/vm-i-system.c b/libguile/vm-i-system.c
index 9604ce55a..b298c88a6 100644
--- a/libguile/vm-i-system.c
+++ b/libguile/vm-i-system.c
@@ -1246,6 +1246,34 @@ VM_DEFINE_INSTRUCTION (65, fix_closure, "fix-closure", 2, 0, 1)
NEXT;
}
+VM_DEFINE_INSTRUCTION (66, define, "define", 0, 0, 2)
+{
+ SCM sym, val;
+ POP (sym);
+ POP (val);
+ SYNC_REGISTER ();
+ VARIABLE_SET (scm_sym2var (sym, scm_current_module_lookup_closure (),
+ SCM_BOOL_T),
+ val);
+ NEXT;
+}
+
+VM_DEFINE_INSTRUCTION (67, make_keyword, "make-keyword", 0, 1, 1)
+{
+ CHECK_UNDERFLOW ();
+ SYNC_REGISTER ();
+ *sp = scm_symbol_to_keyword (*sp);
+ NEXT;
+}
+
+VM_DEFINE_INSTRUCTION (68, make_symbol, "make-symbol", 0, 1, 1)
+{
+ CHECK_UNDERFLOW ();
+ SYNC_REGISTER ();
+ *sp = scm_string_to_symbol (*sp);
+ NEXT;
+}
+
/*
(defun renumber-ops ()
diff --git a/module/language/assembly.scm b/module/language/assembly.scm
index 5571bee61..683da6cc1 100644
--- a/module/language/assembly.scm
+++ b/module/language/assembly.scm
@@ -34,30 +34,21 @@
;; lengths are encoded in 3 bytes
(define *len-len* 3)
-;; the number of bytes per string character is encoded in 1 byte
-(define *width-len* 1)
-
(define (byte-length assembly)
(pmatch assembly
(,label (guard (not (pair? label)))
0)
- ((load-unsigned-integer ,str)
- (+ 1 *len-len* (string-length str)))
- ((load-integer ,str)
- (+ 1 *len-len* (string-length str)))
((load-number ,str)
(+ 1 *len-len* (string-length str)))
((load-string ,str)
- (+ 1 *len-len* *width-len* (* (string-width str) (string-length str))))
+ (+ 1 *len-len* (string-length str)))
+ ((load-wide-string ,str)
+ (+ 1 *len-len* (* 4 (string-length str))))
((load-symbol ,str)
- (+ 1 *len-len* *width-len* (* (string-width str) (string-length str))))
- ((load-keyword ,str)
- (+ 1 *len-len* *width-len* (* (string-width str) (string-length str))))
+ (+ 1 *len-len* (string-length str)))
((load-array ,bv)
(+ 1 *len-len* (bytevector-length bv)))
- ((define ,str)
- (+ 1 *len-len* *width-len* (* (string-width str) (string-length str))))
((load-program ,nargs ,nrest ,nlocs ,labels ,len ,meta . ,code)
(+ 1 *program-header-len* len (if meta (1- (byte-length meta)) 0)))
((,inst . _) (guard (>= (instruction-length inst) 0))
@@ -171,5 +162,4 @@
n4)))
((load-string ,s) s)
((load-symbol ,s) (string->symbol s))
- ((load-keyword ,s) (symbol->keyword (string->symbol s)))
(else #f)))
diff --git a/module/language/assembly/compile-bytecode.scm b/module/language/assembly/compile-bytecode.scm
index 840c73b3a..c49c20081 100644
--- a/module/language/assembly/compile-bytecode.scm
+++ b/module/language/assembly/compile-bytecode.scm
@@ -65,11 +65,13 @@
(write-byte (logand (ash x -8) 255))
(write-byte (logand (ash x -16) 255))
(write-byte (logand (ash x -24) 255)))
- (define (write-uint32 x) (case byte-order
- ((1234) (write-uint32-le x))
- ((4321) (write-uint32-be x))
- (else (error "unknown endianness" byte-order))))
+ (define (write-uint32 x)
+ (case byte-order
+ ((1234) (write-uint32-le x))
+ ((4321) (write-uint32-be x))
+ (else (error "unknown endianness" byte-order))))
(define (write-wide-string s)
+ (write-loader-len (* 4 (string-length s)))
(string-for-each (lambda (c) (write-uint32 (char->integer c))) s))
(define (write-loader-len len)
(write-byte (ash len -16))
@@ -133,14 +135,11 @@
;; `scm_c_make_objcode_slice ()'.
(write-bytecode meta write get-addr '()))))
((make-char32 ,x) (write-uint32-be x))
- ((load-unsigned-integer ,str) (write-loader str))
- ((load-integer ,str) (write-loader str))
((load-number ,str) (write-loader str))
- ((load-string ,str) (write-sized-loader str))
- ((load-symbol ,str) (write-sized-loader str))
- ((load-keyword ,str) (write-sized-loader str))
+ ((load-string ,str) (write-loader str))
+ ((load-wide-string ,str) (write-wide-string str))
+ ((load-symbol ,str) (write-loader str))
((load-array ,bv) (write-bytevector bv))
- ((define ,str) (write-sized-loader str))
((br ,l) (write-break l))
((br-if ,l) (write-break l))
((br-if-not ,l) (write-break l))
diff --git a/module/language/assembly/decompile-bytecode.scm b/module/language/assembly/decompile-bytecode.scm
index a05db537d..8cdebcfd0 100644
--- a/module/language/assembly/decompile-bytecode.scm
+++ b/module/language/assembly/decompile-bytecode.scm
@@ -96,16 +96,6 @@
(lp (cons exp out))))))))))
(define (decode-bytecode pop)
- (define (get1 bytes-per-char)
- (if (= bytes-per-char 1)
- (pop)
- (let* ((a (pop))
- (b (pop))
- (c (pop))
- (d (pop)))
- (if (= byte-order 1234)
- (+ (ash d 24) (ash c 16) (ash b 8) a)
- (+ (ash a 24) (ash b 16) (ash c 8) d)))))
(and=> (pop)
(lambda (opcode)
(let ((inst (opcode->instruction opcode)))
@@ -117,29 +107,24 @@
;; the negative length indicates a variable length
;; instruction
(let* ((make-sequence
- (if (eq? inst 'load-array)
+ (if (or (memq inst '(load-array load-wide-string)))
make-bytevector
make-string))
(sequence-set!
- (if (eq? inst 'load-array)
+ (if (or (memq inst '(load-array load-wide-string)))
bytevector-u8-set!
(lambda (str pos value)
(string-set! str pos (integer->char value)))))
(len (let* ((a (pop)) (b (pop)) (c (pop)))
(+ (ash a 16) (ash b 8) c)))
- (bytes-per-count
- (if (or (eq? inst 'load-string)
- (eq? inst 'load-symbol)
- (eq? inst 'load-keyword)
- (eq? inst 'define))
- (pop)
- 1))
(seq (make-sequence len)))
(let lp ((i 0))
(if (= i len)
- `(,inst ,seq)
+ `(,inst ,(if (eq? inst 'load-wide-string)
+ (utf32->string seq)
+ seq))
(begin
- (sequence-set! seq i (get1 bytes-per-count))
+ (sequence-set! seq i (pop))
(lp (1+ i)))))))
(else
;; fixed length
diff --git a/module/language/glil/compile-assembly.scm b/module/language/glil/compile-assembly.scm
index 4bd6c4f04..c67ef694b 100644
--- a/module/language/glil/compile-assembly.scm
+++ b/module/language/glil/compile-assembly.scm
@@ -318,8 +318,8 @@
,(modulo i 256))))
object-alist)))))
((define)
- (emit-code `((define ,(symbol->string name))
- (variable-set))))
+ (emit-code `(,@(dump-object name addr)
+ (define))))
(else
(error "unknown toplevel var kind" op name))))
@@ -391,11 +391,20 @@
((number? x)
`((load-number ,(number->string x))))
((string? x)
- `((load-string ,x)))
+ (case (string-width x)
+ ((1) `((load-string ,x)))
+ ((4) (align-code `(load-wide-string ,x) addr 4 4))
+ (else (error "bad string width" x))))
((symbol? x)
- `((load-symbol ,(symbol->string x))))
+ (let ((str (symbol->string x)))
+ (case (string-width str)
+ ((1) `((load-symbol ,str)))
+ ((4) `(,@(dump-object str addr)
+ (make-symbol)))
+ (else (error "bad string width" str)))))
((keyword? x)
- `((load-keyword ,(symbol->string (keyword->symbol x)))))
+ `(,@(dump-object (keyword->symbol x) addr)
+ (make-keyword)))
((list? x)
(let ((tail (let ((len (length x)))
(if (>= len 65536) (too-long "list"))
diff --git a/module/language/tree-il/compile-glil.scm b/module/language/tree-il/compile-glil.scm
index 48db6f6c4..503e0a44f 100644
--- a/module/language/tree-il/compile-glil.scm
+++ b/module/language/tree-il/compile-glil.scm
@@ -492,11 +492,16 @@
((tail push vals)
(emit-code src (make-glil-toplevel 'ref name))))
(maybe-emit-return))
- (else
- (pk 'ew-the-badness x (current-module) (fluid-ref *comp-module*))
+ ((module-variable the-root-module name)
(case context
((tail push vals)
(emit-code src (make-glil-module 'ref '(guile) name #f))))
+ (maybe-emit-return))
+ (else
+ (case context
+ ((tail push vals)
+ (emit-code src (make-glil-module
+ 'ref (module-name (fluid-ref *comp-module*)) name #f))))
(maybe-emit-return))))
((<lexical-ref> src name gensym)
diff --git a/module/system/xref.scm b/module/system/xref.scm
index 0613754ab..906ec8e4a 100644
--- a/module/system/xref.scm
+++ b/module/system/xref.scm
@@ -35,7 +35,7 @@
(progv (make-vector (vector-length objects) #f))
(asm (decompile (program-objcode prog) #:to 'assembly)))
(pmatch asm
- ((load-program ,nargs ,nrest ,nlocs ,next ,labels ,len . ,body)
+ ((load-program ,nargs ,nrest ,nlocs ,labels ,len . ,body)
(for-each
(lambda (x)
(pmatch x
diff --git a/test-suite/tests/asm-to-bytecode.test b/test-suite/tests/asm-to-bytecode.test
index d01e93c43..a8e251b83 100644
--- a/test-suite/tests/asm-to-bytecode.test
+++ b/test-suite/tests/asm-to-bytecode.test
@@ -65,31 +65,18 @@
(comp-test '(make-int8 3)
#(make-int8 3))
- (comp-test `(load-integer ,(string (integer->char 0)))
- #(load-integer 0 0 1 0))
-
- (comp-test `(load-integer ,(string (integer->char 255)))
- #(load-integer 0 0 1 255))
-
- (comp-test `(load-integer ,(string (integer->char 1) (integer->char 0)))
- #(load-integer 0 0 2 1 0))
-
(comp-test '(load-number "3.14")
(vector 'load-number 0 0 4 (char->integer #\3) (char->integer #\.)
(char->integer #\1) (char->integer #\4)))
(comp-test '(load-string "foo")
- (vector 'load-string 0 0 3 1 (char->integer #\f) (char->integer #\o)
+ (vector 'load-string 0 0 3 (char->integer #\f) (char->integer #\o)
(char->integer #\o)))
(comp-test '(load-symbol "foo")
- (vector 'load-symbol 0 0 3 1 (char->integer #\f) (char->integer #\o)
+ (vector 'load-symbol 0 0 3 (char->integer #\f) (char->integer #\o)
(char->integer #\o)))
- (comp-test '(load-keyword "qux")
- (vector 'load-keyword 0 0 3 1 (char->integer #\q) (char->integer #\u)
- (char->integer #\x)))
-
(comp-test '(load-program 3 2 1 () 3 #f (make-int8 3) (return))
#(load-program
3 2 (uint16 1) ;; nargs, nrest, nlocs