summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndy Wingo <wingo@pobox.com>2013-05-01 22:45:19 +0200
committerAndy Wingo <wingo@pobox.com>2013-05-01 22:45:38 +0200
commitbe41919594ecce43fb34f2c46a7f90ac0266cde7 (patch)
tree5fbdb7e6fe2b9be380af737de59d66f3c34b77a0
parentc20e6d6a83422348b3e550420f7044700d13b6a9 (diff)
downloadguile-be41919594ecce43fb34f2c46a7f90ac0266cde7.tar.gz
disassemble-program for rtl
* module/system/vm/assembler.scm: * module/system/vm/disassembler.scm: * module/system/vm/rtl.scm: Split rtl.scm into two modules: an assembler and a disassembler. The disassembler works now. Fixed a couple bugs related to symbol table creation. * module/Makefile.am: * test-suite/tests/rtl.test: Adapt.
-rw-r--r--module/Makefile.am3
-rw-r--r--module/system/vm/assembler.scm (renamed from module/system/vm/rtl.scm)209
-rw-r--r--module/system/vm/disassembler.scm301
-rw-r--r--test-suite/tests/rtl.test2
4 files changed, 322 insertions, 193 deletions
diff --git a/module/Makefile.am b/module/Makefile.am
index c696b5961..9e10f20f0 100644
--- a/module/Makefile.am
+++ b/module/Makefile.am
@@ -354,7 +354,8 @@ SYSTEM_SOURCES = \
system/vm/trace.scm \
system/vm/traps.scm \
system/vm/trap-state.scm \
- system/vm/rtl.scm \
+ system/vm/assembler.scm \
+ system/vm/disassembler.scm \
system/vm/vm.scm \
system/foreign.scm \
system/xref.scm \
diff --git a/module/system/vm/rtl.scm b/module/system/vm/assembler.scm
index ea0cbc2f4..7c7fb488c 100644
--- a/module/system/vm/rtl.scm
+++ b/module/system/vm/assembler.scm
@@ -1,4 +1,4 @@
-;;; Guile VM program functions
+;;; Guile RTL assembler
;;; Copyright (C) 2001, 2009, 2010, 2012, 2013 Free Software Foundation, Inc.
;;;
@@ -18,12 +18,11 @@
;;; Code:
-(define-module (system vm rtl)
+(define-module (system vm assembler)
#:use-module (system base target)
#:use-module (system vm instruction)
#:use-module (system vm elf)
#:use-module (system vm linker)
- #:use-module (system vm program)
#:use-module (system vm objcode)
#:use-module (rnrs bytevectors)
#:use-module (ice-9 vlist)
@@ -36,30 +35,6 @@
link-assembly
assemble-program))
-;;; TODO:
-;;;
-;;; * Make it possible to disassemble a function
-;;; ** Writing function ranges into an ELF section
-;;; *** sorted .symtab section, writing ELF symbols
-;;; ** Being able to determine the bounds of a function
-;;; ** Applying the existing disassemble-buffer function
-;;; ** Write table mapping function IP to name
-;;; ** Making disassemble-buffer better
-;;;
-;;; * Provide line number information
-;;; ** Provide additional macro-assembly for this
-;;; ** Write to separate ELF section: .debug_lines
-;;;
-;;; * More metadata
-;;; Arities, local variable names and ranges, other literal procedure
-;;; metadata
-;;; ** Write to separate ELF section: .debug_info
-;;;
-;;; .symtab and .debug_info (and to an extent, .debug_aranges et al) are
-;;; redundant, but since .symtab is so much smaller and easier it's
-;;; probably OK to duplicate the information, at least while we
-;;; bootstrap the new tools.
-
(define-syntax-rule (pack-u8-u24 x y)
(logior x (ash y 8)))
@@ -334,160 +309,6 @@
(visit-opcodes define-assembler)
-(define-syntax disassembler
- (lambda (x)
- (define (parse-first-word exp type)
- #`(let ((word #,exp))
- #,(case type
- ((U8_X24)
- #'(list))
- ((U8_U24)
- #'(list (ash word -8)))
- ((U8_L24)
- ;; Fixme: translate back to label
- #'(list (ash word -8)))
- ((U8_R24)
- ;; FIXME: parse rest instructions correctly
- #'(list (ash word -8)))
- ((U8_U8_I16)
- #'(list (logand (ash word -8) #xff)
- (ash word -16)))
- ((U8_U12_U12)
- #'(list (logand (ash word -8) #xfff)
- (ash word -20)))
- ((U8_U8_U8_U8)
- #'(list (logand (ash word -8) #xff)
- (logand (ash word -16) #xff)
- (ash word -24)))
- (else
- (error "bad kind" type)))))
-
- (define (parse-tail-word buf offset n type)
- #`(let ((word (u32-ref #,buf (+ #,offset #,n))))
- #,(case type
- ((U8_X24)
- #'(list (logand word #ff)))
- ((U8_U24)
- #'(list (logand word #xff)
- (ash word -8)))
- ((U8_L24)
- ;; Fixme: translate back to label
- #'(list (logand word #xff)
- (ash word -8)))
- ((U8_R24)
- ;; FIXME: parse rest instructions correctly
- #'(list (logand word #xff)
- (ash word -8)))
- ((U8_U8_I16)
- ;; FIXME: immediates
- #'(list (logand word #xff)
- (logand (ash word -8) #xff)
- (ash word -16)))
- ((U8_U12_U12)
- #'(list (logand word #xff)
- (logand (ash word -8) #xfff)
- (ash word -20)))
- ((U8_U8_U8_U8)
- #'(list (logand word #xff)
- (logand (ash word -8) #xff)
- (logand (ash word -16) #xff)
- (ash word -24)))
- ((U32)
- #'(list word))
- ((I32)
- ;; FIXME: immediates
- #'(list word))
- ((A32)
- ;; FIXME: long immediates
- #'(list word))
- ((B32)
- ;; FIXME: long immediates
- #'(list word))
- ((N32)
- ;; FIXME: non-immediate
- #'(list word))
- ((S32)
- ;; FIXME: indirect access
- #'(list word))
- ((L32)
- ;; FIXME: offset
- #'(list word))
- ((LO32)
- ;; FIXME: offset
- #'(list word))
- ((X8_U24)
- #'(list (ash word -8)))
- ((X8_U12_U12)
- #'(list (logand (ash word -8) #xfff)
- (ash word -20)))
- ((X8_R24)
- ;; FIXME: rest
- #'(list (ash word -8)))
- ((X8_L24)
- ;; FIXME: label
- #'(list (ash word -8)))
- ((U1_X7_L24)
- ;; FIXME: label
- #'(list (logand word #x1)
- (ash word -8)))
- ((U1_U7_L24)
- ;; FIXME: label
- #'(list (logand word #x1)
- (logand (ash word -1) #x7f)
- (ash word -8)))
- (else
- (error "bad kind" type)))))
-
- (syntax-case x ()
- ((_ name opcode word0 word* ...)
- (with-syntax ((asm
- (parse-first-word #'first
- (syntax->datum #'word0)))
- ((asm* ...)
- (map (lambda (word n)
- (parse-tail-word #'buf #'offset (1+ n)
- word))
- (syntax->datum #'(word* ...))
- (iota (length #'(word* ...))))))
- #'(lambda (buf offset first)
- (values (+ 1 (length '(word* ...)))
- (cons 'name (append asm asm* ...)))))))))
-
-(define (disasm-invalid buf offset first)
- (error "bad instruction" (logand first #xff) first buf offset))
-
-(define disassemblers (make-vector 256 disasm-invalid))
-
-(define-syntax define-disassembler
- (lambda (x)
- (syntax-case x ()
- ((_ name opcode arg ...)
- (with-syntax ((parse (id-append #'name #'parse- #'name)))
- #'(let ((parse (disassembler name opcode arg ...)))
- (vector-set! disassemblers opcode parse)))))))
-
-(visit-opcodes define-disassembler)
-
-;; -> len list
-(define (disassemble-one buf offset)
- (let ((first (u32-ref buf offset)))
- ((vector-ref disassemblers (logand first #xff)) buf offset first)))
-
-;; -> list
-(define* (disassemble-buffer buf #:optional
- (offset 0)
- (end (u32vector-length buf)))
-
- (let ((locals (u32-ref buf offset))
- (meta (s32-ref buf (1+ offset))))
- (let lp ((offset (+ offset 2))
- (out '()))
- (if (< offset end)
- (call-with-values (lambda () (disassemble-one buf offset))
- (lambda (len elt)
- (lp (+ offset len) (cons elt out))))
- (cons* locals meta (reverse out))))))
-
(define-inlinable (immediate? x)
(not (zero? (logand (object-address x) 6))))
@@ -1012,20 +833,26 @@
(write-elf-symbol bv (* n size) endianness word-size
(make-elf-symbol
#:name name
- #:value (meta-low-pc meta)
- #:size (- (meta-high-pc meta) (meta-low-pc meta))
+ ;; Symbol value and size are measured in
+ ;; bytes, not u32s.
+ #:value (* 4 (meta-low-pc meta))
+ #:size (* 4 (- (meta-high-pc meta)
+ (meta-low-pc meta)))
#:type STT_FUNC
#:visibility STV_HIDDEN
#:shndx (elf-section-index text-section)))))
meta (iota n))
- (values (make-object asm '.symtab
- bv
- '() '()
- #:type SHT_SYMTAB #:flags 0)
- (make-object asm '.strtab
- (link-string-table strtab)
- '() '()
- #:type SHT_STRTAB #:flags 0))))
+ (let ((strtab (make-object asm '.strtab
+ (link-string-table strtab)
+ '() '()
+ #:type SHT_STRTAB #:flags 0)))
+ (values (make-object asm '.symtab
+ bv
+ '() '()
+ #:type SHT_SYMTAB #:flags 0 #:entsize size
+ #:link (elf-section-index
+ (linker-object-section strtab)))
+ strtab))))
(define (link-objects asm)
(let*-values (((ro rw rw-init) (link-constants asm))
diff --git a/module/system/vm/disassembler.scm b/module/system/vm/disassembler.scm
new file mode 100644
index 000000000..1c3a30513
--- /dev/null
+++ b/module/system/vm/disassembler.scm
@@ -0,0 +1,301 @@
+;;; Guile RTL disassembler
+
+;;; Copyright (C) 2001, 2009, 2010, 2012, 2013 Free Software Foundation, Inc.
+;;;
+;;; This library is free software; you can redistribute it and/or
+;;; modify it under the terms of the GNU Lesser General Public
+;;; License as published by the Free Software Foundation; either
+;;; version 3 of the License, or (at your option) any later version.
+;;;
+;;; This library is distributed in the hope that it will be useful,
+;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;;; Lesser General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU Lesser General Public
+;;; License along with this library; if not, write to the Free Software
+;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+;;; Code:
+
+(define-module (system vm disassembler)
+ #:use-module (system vm instruction)
+ #:use-module (system vm elf)
+ #:use-module (system vm program)
+ #:use-module (system vm objcode)
+ #:use-module (system foreign)
+ #:use-module (rnrs bytevectors)
+ #:use-module (ice-9 format)
+ #:use-module (ice-9 vlist)
+ #:use-module (srfi srfi-1)
+ #:use-module (srfi srfi-4)
+ #:export (disassemble-program))
+
+(define-syntax-rule (u32-ref buf n)
+ (bytevector-u32-native-ref buf (* n 4)))
+
+(define-syntax-rule (s32-ref buf n)
+ (bytevector-s32-native-ref buf (* n 4)))
+
+(define-syntax visit-opcodes
+ (lambda (x)
+ (syntax-case x ()
+ ((visit-opcodes macro arg ...)
+ (with-syntax (((inst ...)
+ (map (lambda (x) (datum->syntax #'macro x))
+ (rtl-instruction-list))))
+ #'(begin
+ (macro arg ... . inst)
+ ...))))))
+
+(eval-when (expand compile load eval)
+ (define (id-append ctx a b)
+ (datum->syntax ctx (symbol-append (syntax->datum a) (syntax->datum b)))))
+
+(define-syntax join-subformats
+ (lambda (x)
+ (syntax-case x ()
+ ((_)
+ #f)
+ ((_ #f rest ...)
+ #'(join-subformats rest ...))
+ ((_ (fmt arg ...))
+ (string? (syntax->datum #'fmt))
+ #'(list fmt arg ...))
+ ((_ (fmt arg ...) #f rest ...)
+ (string? (syntax->datum #'fmt))
+ #'(join-subformats (fmt arg ...) rest ...))
+ ((_ (fmt arg ...) (fmt* arg* ...) rest ...)
+ (and (string? (syntax->datum #'fmt)) (string? (syntax->datum #'fmt*)))
+ (let ((fmt** (string-append (syntax->datum #'fmt)
+ ", "
+ (syntax->datum #'fmt*))))
+ #`(join-subformats (#,fmt** arg ... arg* ...) rest ...))))))
+
+(define (make-immediate n)
+ (pointer->scm (make-pointer n)))
+
+(define-syntax disassembler
+ (lambda (x)
+ (define (parse-first-word word type)
+ (with-syntax ((word word))
+ (case type
+ ((U8_X24)
+ #'(()
+ #f))
+ ((U8_U24)
+ #'(((ash word -8))
+ #f))
+ ((U8_L24)
+ ;; Fixme: translate back to label
+ #'(((ash word -8))
+ #f))
+ ((U8_R24)
+ ;; FIXME: parse rest instructions correctly
+ #'(((ash word -8))
+ #f))
+ ((U8_U8_I16)
+ #'(((logand (ash word -8) #xff)
+ (ash word -16))
+ ("~S" (make-immediate (ash word -16)))))
+ ((U8_U12_U12)
+ #'(((logand (ash word -8) #xfff)
+ (ash word -20))
+ #f))
+ ((U8_U8_U8_U8)
+ #'(((logand (ash word -8) #xff)
+ (logand (ash word -16) #xff)
+ (ash word -24))
+ #f))
+ (else
+ (error "bad kind" type)))))
+
+ (define (parse-tail-word word type)
+ (with-syntax ((word word))
+ (case type
+ ((U8_X24)
+ #'(((logand word #ff))
+ #f))
+ ((U8_U24)
+ #'(((logand word #xff)
+ (ash word -8))
+ #f))
+ ((U8_L24)
+ ;; Fixme: translate back to label
+ #'(((logand word #xff)
+ (ash word -8))
+ #f))
+ ((U8_R24)
+ ;; FIXME: parse rest instructions correctly
+ #'(((logand word #xff)
+ (ash word -8))
+ #f))
+ ((U8_U8_I16)
+ ;; FIXME: immediates
+ #'(((logand word #xff)
+ (logand (ash word -8) #xff)
+ (ash word -16))
+ #f))
+ ((U8_U12_U12)
+ #'(((logand word #xff)
+ (logand (ash word -8) #xfff)
+ (ash word -20))
+ #f))
+ ((U8_U8_U8_U8)
+ #'(((logand word #xff)
+ (logand (ash word -8) #xff)
+ (logand (ash word -16) #xff)
+ (ash word -24))
+ #f))
+ ((U32)
+ #'((word)
+ #f))
+ ((I32)
+ ;; FIXME: immediates
+ #'((word)
+ #f))
+ ((A32)
+ ;; FIXME: long immediates
+ #'((word)
+ #f))
+ ((B32)
+ ;; FIXME: long immediates
+ #'((word)
+ #f))
+ ((N32)
+ ;; FIXME: non-immediate
+ #'((word)
+ #f))
+ ((S32)
+ ;; FIXME: indirect access
+ #'((word)
+ #f))
+ ((L32)
+ ;; FIXME: offset
+ #'((word)
+ #f))
+ ((LO32)
+ ;; FIXME: offset
+ #'((word)
+ #f))
+ ((X8_U24)
+ #'(((ash word -8))
+ #f))
+ ((X8_U12_U12)
+ #'(((logand (ash word -8) #xfff)
+ (ash word -20))
+ #f))
+ ((X8_R24)
+ ;; FIXME: rest
+ #'(((ash word -8))
+ #f))
+ ((X8_L24)
+ ;; FIXME: label
+ #'(((ash word -8))
+ #f))
+ ((U1_X7_L24)
+ ;; FIXME: label
+ #'(((logand word #x1)
+ (ash word -8))
+ #f))
+ ((U1_U7_L24)
+ ;; FIXME: label
+ #'(((logand word #x1)
+ (logand (ash word -1) #x7f)
+ (ash word -8))
+ #f))
+ (else
+ (error "bad kind" type)))))
+
+ (syntax-case x ()
+ ((_ name opcode word0 word* ...)
+ (let ((vars (generate-temporaries #'(word* ...))))
+ (with-syntax (((word* ...) vars)
+ ((n ...) (map 1+ (iota (length #'(word* ...)))))
+ (((asm ...) note)
+ (parse-first-word #'first (syntax->datum #'word0)))
+ ((((asm* ...) note*) ...)
+ (map (lambda (word type)
+ (parse-tail-word word type))
+ vars
+ (syntax->datum #'(word* ...)))))
+ #'(lambda (buf offset first)
+ (let ((word* (u32-ref buf (+ offset n)))
+ ...)
+ (values (+ 1 (length '(word* ...)))
+ (list 'name asm ... asm* ... ...)
+ (join-subformats note note* ...))))))))))
+
+(define (disasm-invalid buf offset first)
+ (error "bad instruction" (logand first #xff) first buf offset))
+
+(define disassemblers (make-vector 256 disasm-invalid))
+
+(define-syntax define-disassembler
+ (lambda (x)
+ (syntax-case x ()
+ ((_ name opcode arg ...)
+ (with-syntax ((parse (id-append #'name #'parse- #'name)))
+ #'(let ((parse (disassembler name opcode arg ...)))
+ (vector-set! disassemblers opcode parse)))))))
+
+(visit-opcodes define-disassembler)
+
+;; -> len list
+(define (disassemble-one buf offset)
+ (let ((first (u32-ref buf offset)))
+ ((vector-ref disassemblers (logand first #xff)) buf offset first)))
+
+(define (find-elf-symbol elf text-offset)
+ (and=>
+ (elf-section-by-name elf ".symtab")
+ (lambda (symtab)
+ (let ((len (elf-symbol-table-len symtab))
+ (strtab (elf-section elf (elf-section-link symtab))))
+ ;; The symbols should be sorted, but maybe somehow that fails
+ ;; (for example if multiple objects are relinked together). So,
+ ;; a modicum of tolerance.
+ (define (bisect)
+ #f)
+ (define (linear-search)
+ (let lp ((n 0))
+ (and (< n len)
+ (let ((sym (elf-symbol-table-ref elf symtab n strtab)))
+ (if (and (<= (elf-symbol-value sym) text-offset)
+ (< text-offset (+ (elf-symbol-value sym)
+ (elf-symbol-size sym))))
+ sym
+ (lp (1+ n)))))))
+ (or (bisect) (linear-search))))))
+
+(define (print-info port addr info extra src)
+ (format port "~4@S ~32S~@[;; ~1{~@?~}~]~@[~61t at ~a~]\n"
+ addr info extra src))
+
+(define* (disassemble-program program #:optional (port (current-output-port)))
+ (let* ((code (rtl-program-code program))
+ (bv (find-mapped-elf-image code))
+ (elf (parse-elf bv))
+ (base (pointer-address (bytevector->pointer (elf-bytes elf))))
+ (text-base (elf-section-offset
+ (or (elf-section-by-name elf ".rtl-text")
+ (error "ELF object has no text section")))))
+ (cond
+ ((find-elf-symbol elf (- code base text-base))
+ => (lambda (sym)
+ ;; The text-base, symbol value, and symbol size are in bytes,
+ ;; but the disassembler operates on u32 units.
+ (let ((start (/ (+ (elf-symbol-value sym) text-base) 4))
+ (size (/ (elf-symbol-size sym) 4)))
+ (format port "Disassembly of ~A at #x~X:\n\n"
+ (elf-symbol-name sym) code)
+ (let lp ((offset 0))
+ (when (< offset size)
+ (call-with-values (lambda ()
+ (disassemble-one bv (+ start offset)))
+ (lambda (len elt extra)
+ (print-info port offset elt extra #f)
+ (lp (+ offset len)))))))))
+ (else
+ (format port "Debugging information unavailable.~%")))
+ (values)))
diff --git a/test-suite/tests/rtl.test b/test-suite/tests/rtl.test
index 219407c3b..74a7ff334 100644
--- a/test-suite/tests/rtl.test
+++ b/test-suite/tests/rtl.test
@@ -18,7 +18,7 @@
(define-module (tests rtl)
#:use-module (test-suite lib)
- #:use-module (system vm rtl))
+ #:use-module (system vm assembler))
(define-syntax-rule (assert-equal val expr)
(let ((x val))