diff options
-rw-r--r-- | libguile/_scm.h | 2 | ||||
-rw-r--r-- | libguile/vm-engine.c | 4 | ||||
-rw-r--r-- | libguile/vm-i-loader.c | 153 | ||||
-rw-r--r-- | libguile/vm-i-system.c | 28 | ||||
-rw-r--r-- | module/language/assembly.scm | 18 | ||||
-rw-r--r-- | module/language/assembly/compile-bytecode.scm | 19 | ||||
-rw-r--r-- | module/language/assembly/decompile-bytecode.scm | 27 | ||||
-rw-r--r-- | module/language/glil/compile-assembly.scm | 19 | ||||
-rw-r--r-- | module/language/tree-il/compile-glil.scm | 9 | ||||
-rw-r--r-- | module/system/xref.scm | 2 | ||||
-rw-r--r-- | test-suite/tests/asm-to-bytecode.test | 17 |
11 files changed, 99 insertions, 199 deletions
diff --git a/libguile/_scm.h b/libguile/_scm.h index ff16a8587..737e01edd 100644 --- a/libguile/_scm.h +++ b/libguile/_scm.h @@ -172,7 +172,7 @@ // major and minor versions must be single characters #define SCM_OBJCODE_MAJOR_VERSION 0 -#define SCM_OBJCODE_MINOR_VERSION A +#define SCM_OBJCODE_MINOR_VERSION B #define SCM_OBJCODE_MAJOR_VERSION_STRING \ SCM_CPP_STRINGIFY(SCM_OBJCODE_MAJOR_VERSION) #define SCM_OBJCODE_MINOR_VERSION_STRING \ diff --git a/libguile/vm-engine.c b/libguile/vm-engine.c index 98a6e491b..b0888c1ec 100644 --- a/libguile/vm-engine.c +++ b/libguile/vm-engine.c @@ -220,6 +220,10 @@ VM_NAME (struct scm_vm *vp, SCM program, SCM *argv, int nargs) finish_args = SCM_EOL; goto vm_error; + vm_error_bad_wide_string_length: + err_msg = scm_from_locale_string ("VM: Bad wide string length: ~S"); + goto vm_error; + #if VM_CHECK_IP vm_error_invalid_address: err_msg = scm_from_locale_string ("VM: Invalid program address"); diff --git a/libguile/vm-i-loader.c b/libguile/vm-i-loader.c index 8de7f0036..e242ef9bf 100644 --- a/libguile/vm-i-loader.c +++ b/libguile/vm-i-loader.c @@ -20,42 +20,6 @@ /* This file is included in vm_engine.c */ -VM_DEFINE_LOADER (80, load_unsigned_integer, "load-unsigned-integer") -{ - size_t len; - - FETCH_LENGTH (len); - if (SCM_LIKELY (len <= 8)) - { - scm_t_uint64 val = 0; - while (len-- > 0) - val = (val << 8U) + FETCH (); - SYNC_REGISTER (); - PUSH (scm_from_uint64 (val)); - NEXT; - } - else - SCM_MISC_ERROR ("load-unsigned-integer: not implemented yet", SCM_EOL); -} - -VM_DEFINE_LOADER (81, load_integer, "load-integer") -{ - size_t len; - - FETCH_LENGTH (len); - if (SCM_LIKELY (len <= 4)) - { - int val = 0; - while (len-- > 0) - val = (val << 8) + FETCH (); - SYNC_REGISTER (); - PUSH (scm_from_int (val)); - NEXT; - } - else - SCM_MISC_ERROR ("load-integer: not implemented yet", SCM_EOL); -} - VM_DEFINE_LOADER (82, load_number, "load-number") { size_t len; @@ -72,82 +36,24 @@ VM_DEFINE_LOADER (82, load_number, "load-number") VM_DEFINE_LOADER (83, load_string, "load-string") { size_t len; - int width; - SCM str; + char *buf; FETCH_LENGTH (len); - FETCH_WIDTH (width); SYNC_REGISTER (); - if (width == 1) - { - char *buf; - str = scm_i_make_string (len, &buf); - memcpy (buf, (char *) ip, len); - } - else if (width == 4) - { - scm_t_wchar *wbuf; - str = scm_i_make_wide_string (len, &wbuf); - memcpy ((char *) wbuf, (char *) ip, len * width); - } - else - SCM_MISC_ERROR ("load-string: invalid character width", SCM_EOL); - PUSH (str); - ip += len * width; + PUSH (scm_i_make_string (len, &buf)); + memcpy (buf, (char *) ip, len); + ip += len; NEXT; } VM_DEFINE_LOADER (84, load_symbol, "load-symbol") { size_t len; - int width; - SCM str; - FETCH_LENGTH (len); - FETCH_WIDTH (width); - SYNC_REGISTER (); - if (width == 1) - { - char *buf; - str = scm_i_make_string (len, &buf); - memcpy (buf, (char *) ip, len); - } - else if (width == 4) - { - scm_t_wchar *wbuf; - str = scm_i_make_wide_string (len, &wbuf); - memcpy ((char *) wbuf, (char *) ip, len * width); - } - else - SCM_MISC_ERROR ("load-symbol: invalid character width", SCM_EOL); - PUSH (scm_string_to_symbol (str)); - ip += len * width; - NEXT; -} - -VM_DEFINE_LOADER (85, load_keyword, "load-keyword") -{ - size_t len; - int width; - SCM str; FETCH_LENGTH (len); - FETCH_WIDTH (width); SYNC_REGISTER (); - if (width == 1) - { - char *buf; - str = scm_i_make_string (len, &buf); - memcpy (buf, (char *) ip, len); - } - else if (width == 4) - { - scm_t_wchar *wbuf; - str = scm_i_make_wide_string (len, &wbuf); - memcpy ((char *) wbuf, (char *) ip, len * width); - } - else - SCM_MISC_ERROR ("load-keyword: invalid character width", SCM_EOL); - PUSH (scm_symbol_to_keyword (scm_string_to_symbol (str))); - ip += len * width; + /* FIXME: should be scm_from_latin1_symboln */ + PUSH (scm_from_locale_symboln ((const char*)ip, len)); + ip += len; NEXT; } @@ -181,46 +87,33 @@ VM_DEFINE_INSTRUCTION (87, link_now, "link-now", 0, 1, 1) NEXT; } -VM_DEFINE_LOADER (88, define, "define") +VM_DEFINE_LOADER (89, load_array, "load-array") { - SCM str, sym; + SCM type, shape; size_t len; - - int width; FETCH_LENGTH (len); - FETCH_WIDTH (width); - SYNC_REGISTER (); - if (width == 1) - { - char *buf; - str = scm_i_make_string (len, &buf); - memcpy (buf, (char *) ip, len); - } - else if (width == 4) - { - scm_t_wchar *wbuf; - str = scm_i_make_wide_string (len, &wbuf); - memcpy ((char *) wbuf, (char *) ip, len * width); - } - else - SCM_MISC_ERROR ("load define: invalid character width", SCM_EOL); - sym = scm_string_to_symbol (str); - ip += len * width; - + POP (shape); + POP (type); SYNC_REGISTER (); - PUSH (scm_sym2var (sym, scm_current_module_lookup_closure (), SCM_BOOL_T)); + PUSH (scm_from_contiguous_typed_array (type, shape, ip, len)); + ip += len; NEXT; } -VM_DEFINE_LOADER (89, load_array, "load-array") +VM_DEFINE_LOADER (90, load_wide_string, "load-wide-string") { - SCM type, shape; size_t len; + scm_t_wchar *wbuf; + FETCH_LENGTH (len); - POP (shape); - POP (type); + if (SCM_UNLIKELY (len % 4)) + { finish_args = scm_list_1 (scm_from_size_t (len)); + goto vm_error_bad_wide_string_length; + } + SYNC_REGISTER (); - PUSH (scm_from_contiguous_typed_array (type, shape, ip, len)); + PUSH (scm_i_make_wide_string (len / 4, &wbuf)); + memcpy ((char *) wbuf, (char *) ip, len); ip += len; NEXT; } diff --git a/libguile/vm-i-system.c b/libguile/vm-i-system.c index 9604ce55a..b298c88a6 100644 --- a/libguile/vm-i-system.c +++ b/libguile/vm-i-system.c @@ -1246,6 +1246,34 @@ VM_DEFINE_INSTRUCTION (65, fix_closure, "fix-closure", 2, 0, 1) NEXT; } +VM_DEFINE_INSTRUCTION (66, define, "define", 0, 0, 2) +{ + SCM sym, val; + POP (sym); + POP (val); + SYNC_REGISTER (); + VARIABLE_SET (scm_sym2var (sym, scm_current_module_lookup_closure (), + SCM_BOOL_T), + val); + NEXT; +} + +VM_DEFINE_INSTRUCTION (67, make_keyword, "make-keyword", 0, 1, 1) +{ + CHECK_UNDERFLOW (); + SYNC_REGISTER (); + *sp = scm_symbol_to_keyword (*sp); + NEXT; +} + +VM_DEFINE_INSTRUCTION (68, make_symbol, "make-symbol", 0, 1, 1) +{ + CHECK_UNDERFLOW (); + SYNC_REGISTER (); + *sp = scm_string_to_symbol (*sp); + NEXT; +} + /* (defun renumber-ops () diff --git a/module/language/assembly.scm b/module/language/assembly.scm index 5571bee61..683da6cc1 100644 --- a/module/language/assembly.scm +++ b/module/language/assembly.scm @@ -34,30 +34,21 @@ ;; lengths are encoded in 3 bytes (define *len-len* 3) -;; the number of bytes per string character is encoded in 1 byte -(define *width-len* 1) - (define (byte-length assembly) (pmatch assembly (,label (guard (not (pair? label))) 0) - ((load-unsigned-integer ,str) - (+ 1 *len-len* (string-length str))) - ((load-integer ,str) - (+ 1 *len-len* (string-length str))) ((load-number ,str) (+ 1 *len-len* (string-length str))) ((load-string ,str) - (+ 1 *len-len* *width-len* (* (string-width str) (string-length str)))) + (+ 1 *len-len* (string-length str))) + ((load-wide-string ,str) + (+ 1 *len-len* (* 4 (string-length str)))) ((load-symbol ,str) - (+ 1 *len-len* *width-len* (* (string-width str) (string-length str)))) - ((load-keyword ,str) - (+ 1 *len-len* *width-len* (* (string-width str) (string-length str)))) + (+ 1 *len-len* (string-length str))) ((load-array ,bv) (+ 1 *len-len* (bytevector-length bv))) - ((define ,str) - (+ 1 *len-len* *width-len* (* (string-width str) (string-length str)))) ((load-program ,nargs ,nrest ,nlocs ,labels ,len ,meta . ,code) (+ 1 *program-header-len* len (if meta (1- (byte-length meta)) 0))) ((,inst . _) (guard (>= (instruction-length inst) 0)) @@ -171,5 +162,4 @@ n4))) ((load-string ,s) s) ((load-symbol ,s) (string->symbol s)) - ((load-keyword ,s) (symbol->keyword (string->symbol s))) (else #f))) diff --git a/module/language/assembly/compile-bytecode.scm b/module/language/assembly/compile-bytecode.scm index 840c73b3a..c49c20081 100644 --- a/module/language/assembly/compile-bytecode.scm +++ b/module/language/assembly/compile-bytecode.scm @@ -65,11 +65,13 @@ (write-byte (logand (ash x -8) 255)) (write-byte (logand (ash x -16) 255)) (write-byte (logand (ash x -24) 255))) - (define (write-uint32 x) (case byte-order - ((1234) (write-uint32-le x)) - ((4321) (write-uint32-be x)) - (else (error "unknown endianness" byte-order)))) + (define (write-uint32 x) + (case byte-order + ((1234) (write-uint32-le x)) + ((4321) (write-uint32-be x)) + (else (error "unknown endianness" byte-order)))) (define (write-wide-string s) + (write-loader-len (* 4 (string-length s))) (string-for-each (lambda (c) (write-uint32 (char->integer c))) s)) (define (write-loader-len len) (write-byte (ash len -16)) @@ -133,14 +135,11 @@ ;; `scm_c_make_objcode_slice ()'. (write-bytecode meta write get-addr '())))) ((make-char32 ,x) (write-uint32-be x)) - ((load-unsigned-integer ,str) (write-loader str)) - ((load-integer ,str) (write-loader str)) ((load-number ,str) (write-loader str)) - ((load-string ,str) (write-sized-loader str)) - ((load-symbol ,str) (write-sized-loader str)) - ((load-keyword ,str) (write-sized-loader str)) + ((load-string ,str) (write-loader str)) + ((load-wide-string ,str) (write-wide-string str)) + ((load-symbol ,str) (write-loader str)) ((load-array ,bv) (write-bytevector bv)) - ((define ,str) (write-sized-loader str)) ((br ,l) (write-break l)) ((br-if ,l) (write-break l)) ((br-if-not ,l) (write-break l)) diff --git a/module/language/assembly/decompile-bytecode.scm b/module/language/assembly/decompile-bytecode.scm index a05db537d..8cdebcfd0 100644 --- a/module/language/assembly/decompile-bytecode.scm +++ b/module/language/assembly/decompile-bytecode.scm @@ -96,16 +96,6 @@ (lp (cons exp out)))))))))) (define (decode-bytecode pop) - (define (get1 bytes-per-char) - (if (= bytes-per-char 1) - (pop) - (let* ((a (pop)) - (b (pop)) - (c (pop)) - (d (pop))) - (if (= byte-order 1234) - (+ (ash d 24) (ash c 16) (ash b 8) a) - (+ (ash a 24) (ash b 16) (ash c 8) d))))) (and=> (pop) (lambda (opcode) (let ((inst (opcode->instruction opcode))) @@ -117,29 +107,24 @@ ;; the negative length indicates a variable length ;; instruction (let* ((make-sequence - (if (eq? inst 'load-array) + (if (or (memq inst '(load-array load-wide-string))) make-bytevector make-string)) (sequence-set! - (if (eq? inst 'load-array) + (if (or (memq inst '(load-array load-wide-string))) bytevector-u8-set! (lambda (str pos value) (string-set! str pos (integer->char value))))) (len (let* ((a (pop)) (b (pop)) (c (pop))) (+ (ash a 16) (ash b 8) c))) - (bytes-per-count - (if (or (eq? inst 'load-string) - (eq? inst 'load-symbol) - (eq? inst 'load-keyword) - (eq? inst 'define)) - (pop) - 1)) (seq (make-sequence len))) (let lp ((i 0)) (if (= i len) - `(,inst ,seq) + `(,inst ,(if (eq? inst 'load-wide-string) + (utf32->string seq) + seq)) (begin - (sequence-set! seq i (get1 bytes-per-count)) + (sequence-set! seq i (pop)) (lp (1+ i))))))) (else ;; fixed length diff --git a/module/language/glil/compile-assembly.scm b/module/language/glil/compile-assembly.scm index 4bd6c4f04..c67ef694b 100644 --- a/module/language/glil/compile-assembly.scm +++ b/module/language/glil/compile-assembly.scm @@ -318,8 +318,8 @@ ,(modulo i 256)))) object-alist))))) ((define) - (emit-code `((define ,(symbol->string name)) - (variable-set)))) + (emit-code `(,@(dump-object name addr) + (define)))) (else (error "unknown toplevel var kind" op name)))) @@ -391,11 +391,20 @@ ((number? x) `((load-number ,(number->string x)))) ((string? x) - `((load-string ,x))) + (case (string-width x) + ((1) `((load-string ,x))) + ((4) (align-code `(load-wide-string ,x) addr 4 4)) + (else (error "bad string width" x)))) ((symbol? x) - `((load-symbol ,(symbol->string x)))) + (let ((str (symbol->string x))) + (case (string-width str) + ((1) `((load-symbol ,str))) + ((4) `(,@(dump-object str addr) + (make-symbol))) + (else (error "bad string width" str))))) ((keyword? x) - `((load-keyword ,(symbol->string (keyword->symbol x))))) + `(,@(dump-object (keyword->symbol x) addr) + (make-keyword))) ((list? x) (let ((tail (let ((len (length x))) (if (>= len 65536) (too-long "list")) diff --git a/module/language/tree-il/compile-glil.scm b/module/language/tree-il/compile-glil.scm index 48db6f6c4..503e0a44f 100644 --- a/module/language/tree-il/compile-glil.scm +++ b/module/language/tree-il/compile-glil.scm @@ -492,11 +492,16 @@ ((tail push vals) (emit-code src (make-glil-toplevel 'ref name)))) (maybe-emit-return)) - (else - (pk 'ew-the-badness x (current-module) (fluid-ref *comp-module*)) + ((module-variable the-root-module name) (case context ((tail push vals) (emit-code src (make-glil-module 'ref '(guile) name #f)))) + (maybe-emit-return)) + (else + (case context + ((tail push vals) + (emit-code src (make-glil-module + 'ref (module-name (fluid-ref *comp-module*)) name #f)))) (maybe-emit-return)))) ((<lexical-ref> src name gensym) diff --git a/module/system/xref.scm b/module/system/xref.scm index 0613754ab..906ec8e4a 100644 --- a/module/system/xref.scm +++ b/module/system/xref.scm @@ -35,7 +35,7 @@ (progv (make-vector (vector-length objects) #f)) (asm (decompile (program-objcode prog) #:to 'assembly))) (pmatch asm - ((load-program ,nargs ,nrest ,nlocs ,next ,labels ,len . ,body) + ((load-program ,nargs ,nrest ,nlocs ,labels ,len . ,body) (for-each (lambda (x) (pmatch x diff --git a/test-suite/tests/asm-to-bytecode.test b/test-suite/tests/asm-to-bytecode.test index d01e93c43..a8e251b83 100644 --- a/test-suite/tests/asm-to-bytecode.test +++ b/test-suite/tests/asm-to-bytecode.test @@ -65,31 +65,18 @@ (comp-test '(make-int8 3) #(make-int8 3)) - (comp-test `(load-integer ,(string (integer->char 0))) - #(load-integer 0 0 1 0)) - - (comp-test `(load-integer ,(string (integer->char 255))) - #(load-integer 0 0 1 255)) - - (comp-test `(load-integer ,(string (integer->char 1) (integer->char 0))) - #(load-integer 0 0 2 1 0)) - (comp-test '(load-number "3.14") (vector 'load-number 0 0 4 (char->integer #\3) (char->integer #\.) (char->integer #\1) (char->integer #\4))) (comp-test '(load-string "foo") - (vector 'load-string 0 0 3 1 (char->integer #\f) (char->integer #\o) + (vector 'load-string 0 0 3 (char->integer #\f) (char->integer #\o) (char->integer #\o))) (comp-test '(load-symbol "foo") - (vector 'load-symbol 0 0 3 1 (char->integer #\f) (char->integer #\o) + (vector 'load-symbol 0 0 3 (char->integer #\f) (char->integer #\o) (char->integer #\o))) - (comp-test '(load-keyword "qux") - (vector 'load-keyword 0 0 3 1 (char->integer #\q) (char->integer #\u) - (char->integer #\x))) - (comp-test '(load-program 3 2 1 () 3 #f (make-int8 3) (return)) #(load-program 3 2 (uint16 1) ;; nargs, nrest, nlocs |