diff options
author | Andy Wingo <wingo@pobox.com> | 2013-05-01 22:45:19 +0200 |
---|---|---|
committer | Andy Wingo <wingo@pobox.com> | 2013-05-01 22:45:38 +0200 |
commit | be41919594ecce43fb34f2c46a7f90ac0266cde7 (patch) | |
tree | 5fbdb7e6fe2b9be380af737de59d66f3c34b77a0 | |
parent | c20e6d6a83422348b3e550420f7044700d13b6a9 (diff) | |
download | guile-be41919594ecce43fb34f2c46a7f90ac0266cde7.tar.gz |
disassemble-program for rtl
* module/system/vm/assembler.scm:
* module/system/vm/disassembler.scm:
* module/system/vm/rtl.scm: Split rtl.scm into two modules: an assembler
and a disassembler. The disassembler works now. Fixed a couple bugs
related to symbol table creation.
* module/Makefile.am:
* test-suite/tests/rtl.test: Adapt.
-rw-r--r-- | module/Makefile.am | 3 | ||||
-rw-r--r-- | module/system/vm/assembler.scm (renamed from module/system/vm/rtl.scm) | 209 | ||||
-rw-r--r-- | module/system/vm/disassembler.scm | 301 | ||||
-rw-r--r-- | test-suite/tests/rtl.test | 2 |
4 files changed, 322 insertions, 193 deletions
diff --git a/module/Makefile.am b/module/Makefile.am index c696b5961..9e10f20f0 100644 --- a/module/Makefile.am +++ b/module/Makefile.am @@ -354,7 +354,8 @@ SYSTEM_SOURCES = \ system/vm/trace.scm \ system/vm/traps.scm \ system/vm/trap-state.scm \ - system/vm/rtl.scm \ + system/vm/assembler.scm \ + system/vm/disassembler.scm \ system/vm/vm.scm \ system/foreign.scm \ system/xref.scm \ diff --git a/module/system/vm/rtl.scm b/module/system/vm/assembler.scm index ea0cbc2f4..7c7fb488c 100644 --- a/module/system/vm/rtl.scm +++ b/module/system/vm/assembler.scm @@ -1,4 +1,4 @@ -;;; Guile VM program functions +;;; Guile RTL assembler ;;; Copyright (C) 2001, 2009, 2010, 2012, 2013 Free Software Foundation, Inc. ;;; @@ -18,12 +18,11 @@ ;;; Code: -(define-module (system vm rtl) +(define-module (system vm assembler) #:use-module (system base target) #:use-module (system vm instruction) #:use-module (system vm elf) #:use-module (system vm linker) - #:use-module (system vm program) #:use-module (system vm objcode) #:use-module (rnrs bytevectors) #:use-module (ice-9 vlist) @@ -36,30 +35,6 @@ link-assembly assemble-program)) -;;; TODO: -;;; -;;; * Make it possible to disassemble a function -;;; ** Writing function ranges into an ELF section -;;; *** sorted .symtab section, writing ELF symbols -;;; ** Being able to determine the bounds of a function -;;; ** Applying the existing disassemble-buffer function -;;; ** Write table mapping function IP to name -;;; ** Making disassemble-buffer better -;;; -;;; * Provide line number information -;;; ** Provide additional macro-assembly for this -;;; ** Write to separate ELF section: .debug_lines -;;; -;;; * More metadata -;;; Arities, local variable names and ranges, other literal procedure -;;; metadata -;;; ** Write to separate ELF section: .debug_info -;;; -;;; .symtab and .debug_info (and to an extent, .debug_aranges et al) are -;;; redundant, but since .symtab is so much smaller and easier it's -;;; probably OK to duplicate the information, at least while we -;;; bootstrap the new tools. - (define-syntax-rule (pack-u8-u24 x y) (logior x (ash y 8))) @@ -334,160 +309,6 @@ (visit-opcodes define-assembler) -(define-syntax disassembler - (lambda (x) - (define (parse-first-word exp type) - #`(let ((word #,exp)) - #,(case type - ((U8_X24) - #'(list)) - ((U8_U24) - #'(list (ash word -8))) - ((U8_L24) - ;; Fixme: translate back to label - #'(list (ash word -8))) - ((U8_R24) - ;; FIXME: parse rest instructions correctly - #'(list (ash word -8))) - ((U8_U8_I16) - #'(list (logand (ash word -8) #xff) - (ash word -16))) - ((U8_U12_U12) - #'(list (logand (ash word -8) #xfff) - (ash word -20))) - ((U8_U8_U8_U8) - #'(list (logand (ash word -8) #xff) - (logand (ash word -16) #xff) - (ash word -24))) - (else - (error "bad kind" type))))) - - (define (parse-tail-word buf offset n type) - #`(let ((word (u32-ref #,buf (+ #,offset #,n)))) - #,(case type - ((U8_X24) - #'(list (logand word #ff))) - ((U8_U24) - #'(list (logand word #xff) - (ash word -8))) - ((U8_L24) - ;; Fixme: translate back to label - #'(list (logand word #xff) - (ash word -8))) - ((U8_R24) - ;; FIXME: parse rest instructions correctly - #'(list (logand word #xff) - (ash word -8))) - ((U8_U8_I16) - ;; FIXME: immediates - #'(list (logand word #xff) - (logand (ash word -8) #xff) - (ash word -16))) - ((U8_U12_U12) - #'(list (logand word #xff) - (logand (ash word -8) #xfff) - (ash word -20))) - ((U8_U8_U8_U8) - #'(list (logand word #xff) - (logand (ash word -8) #xff) - (logand (ash word -16) #xff) - (ash word -24))) - ((U32) - #'(list word)) - ((I32) - ;; FIXME: immediates - #'(list word)) - ((A32) - ;; FIXME: long immediates - #'(list word)) - ((B32) - ;; FIXME: long immediates - #'(list word)) - ((N32) - ;; FIXME: non-immediate - #'(list word)) - ((S32) - ;; FIXME: indirect access - #'(list word)) - ((L32) - ;; FIXME: offset - #'(list word)) - ((LO32) - ;; FIXME: offset - #'(list word)) - ((X8_U24) - #'(list (ash word -8))) - ((X8_U12_U12) - #'(list (logand (ash word -8) #xfff) - (ash word -20))) - ((X8_R24) - ;; FIXME: rest - #'(list (ash word -8))) - ((X8_L24) - ;; FIXME: label - #'(list (ash word -8))) - ((U1_X7_L24) - ;; FIXME: label - #'(list (logand word #x1) - (ash word -8))) - ((U1_U7_L24) - ;; FIXME: label - #'(list (logand word #x1) - (logand (ash word -1) #x7f) - (ash word -8))) - (else - (error "bad kind" type))))) - - (syntax-case x () - ((_ name opcode word0 word* ...) - (with-syntax ((asm - (parse-first-word #'first - (syntax->datum #'word0))) - ((asm* ...) - (map (lambda (word n) - (parse-tail-word #'buf #'offset (1+ n) - word)) - (syntax->datum #'(word* ...)) - (iota (length #'(word* ...)))))) - #'(lambda (buf offset first) - (values (+ 1 (length '(word* ...))) - (cons 'name (append asm asm* ...))))))))) - -(define (disasm-invalid buf offset first) - (error "bad instruction" (logand first #xff) first buf offset)) - -(define disassemblers (make-vector 256 disasm-invalid)) - -(define-syntax define-disassembler - (lambda (x) - (syntax-case x () - ((_ name opcode arg ...) - (with-syntax ((parse (id-append #'name #'parse- #'name))) - #'(let ((parse (disassembler name opcode arg ...))) - (vector-set! disassemblers opcode parse))))))) - -(visit-opcodes define-disassembler) - -;; -> len list -(define (disassemble-one buf offset) - (let ((first (u32-ref buf offset))) - ((vector-ref disassemblers (logand first #xff)) buf offset first))) - -;; -> list -(define* (disassemble-buffer buf #:optional - (offset 0) - (end (u32vector-length buf))) - - (let ((locals (u32-ref buf offset)) - (meta (s32-ref buf (1+ offset)))) - (let lp ((offset (+ offset 2)) - (out '())) - (if (< offset end) - (call-with-values (lambda () (disassemble-one buf offset)) - (lambda (len elt) - (lp (+ offset len) (cons elt out)))) - (cons* locals meta (reverse out)))))) - (define-inlinable (immediate? x) (not (zero? (logand (object-address x) 6)))) @@ -1012,20 +833,26 @@ (write-elf-symbol bv (* n size) endianness word-size (make-elf-symbol #:name name - #:value (meta-low-pc meta) - #:size (- (meta-high-pc meta) (meta-low-pc meta)) + ;; Symbol value and size are measured in + ;; bytes, not u32s. + #:value (* 4 (meta-low-pc meta)) + #:size (* 4 (- (meta-high-pc meta) + (meta-low-pc meta))) #:type STT_FUNC #:visibility STV_HIDDEN #:shndx (elf-section-index text-section))))) meta (iota n)) - (values (make-object asm '.symtab - bv - '() '() - #:type SHT_SYMTAB #:flags 0) - (make-object asm '.strtab - (link-string-table strtab) - '() '() - #:type SHT_STRTAB #:flags 0)))) + (let ((strtab (make-object asm '.strtab + (link-string-table strtab) + '() '() + #:type SHT_STRTAB #:flags 0))) + (values (make-object asm '.symtab + bv + '() '() + #:type SHT_SYMTAB #:flags 0 #:entsize size + #:link (elf-section-index + (linker-object-section strtab))) + strtab)))) (define (link-objects asm) (let*-values (((ro rw rw-init) (link-constants asm)) diff --git a/module/system/vm/disassembler.scm b/module/system/vm/disassembler.scm new file mode 100644 index 000000000..1c3a30513 --- /dev/null +++ b/module/system/vm/disassembler.scm @@ -0,0 +1,301 @@ +;;; Guile RTL disassembler + +;;; Copyright (C) 2001, 2009, 2010, 2012, 2013 Free Software Foundation, Inc. +;;; +;;; This library is free software; you can redistribute it and/or +;;; modify it under the terms of the GNU Lesser General Public +;;; License as published by the Free Software Foundation; either +;;; version 3 of the License, or (at your option) any later version. +;;; +;;; This library is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; Lesser General Public License for more details. +;;; +;;; You should have received a copy of the GNU Lesser General Public +;;; License along with this library; if not, write to the Free Software +;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +;;; Code: + +(define-module (system vm disassembler) + #:use-module (system vm instruction) + #:use-module (system vm elf) + #:use-module (system vm program) + #:use-module (system vm objcode) + #:use-module (system foreign) + #:use-module (rnrs bytevectors) + #:use-module (ice-9 format) + #:use-module (ice-9 vlist) + #:use-module (srfi srfi-1) + #:use-module (srfi srfi-4) + #:export (disassemble-program)) + +(define-syntax-rule (u32-ref buf n) + (bytevector-u32-native-ref buf (* n 4))) + +(define-syntax-rule (s32-ref buf n) + (bytevector-s32-native-ref buf (* n 4))) + +(define-syntax visit-opcodes + (lambda (x) + (syntax-case x () + ((visit-opcodes macro arg ...) + (with-syntax (((inst ...) + (map (lambda (x) (datum->syntax #'macro x)) + (rtl-instruction-list)))) + #'(begin + (macro arg ... . inst) + ...)))))) + +(eval-when (expand compile load eval) + (define (id-append ctx a b) + (datum->syntax ctx (symbol-append (syntax->datum a) (syntax->datum b))))) + +(define-syntax join-subformats + (lambda (x) + (syntax-case x () + ((_) + #f) + ((_ #f rest ...) + #'(join-subformats rest ...)) + ((_ (fmt arg ...)) + (string? (syntax->datum #'fmt)) + #'(list fmt arg ...)) + ((_ (fmt arg ...) #f rest ...) + (string? (syntax->datum #'fmt)) + #'(join-subformats (fmt arg ...) rest ...)) + ((_ (fmt arg ...) (fmt* arg* ...) rest ...) + (and (string? (syntax->datum #'fmt)) (string? (syntax->datum #'fmt*))) + (let ((fmt** (string-append (syntax->datum #'fmt) + ", " + (syntax->datum #'fmt*)))) + #`(join-subformats (#,fmt** arg ... arg* ...) rest ...)))))) + +(define (make-immediate n) + (pointer->scm (make-pointer n))) + +(define-syntax disassembler + (lambda (x) + (define (parse-first-word word type) + (with-syntax ((word word)) + (case type + ((U8_X24) + #'(() + #f)) + ((U8_U24) + #'(((ash word -8)) + #f)) + ((U8_L24) + ;; Fixme: translate back to label + #'(((ash word -8)) + #f)) + ((U8_R24) + ;; FIXME: parse rest instructions correctly + #'(((ash word -8)) + #f)) + ((U8_U8_I16) + #'(((logand (ash word -8) #xff) + (ash word -16)) + ("~S" (make-immediate (ash word -16))))) + ((U8_U12_U12) + #'(((logand (ash word -8) #xfff) + (ash word -20)) + #f)) + ((U8_U8_U8_U8) + #'(((logand (ash word -8) #xff) + (logand (ash word -16) #xff) + (ash word -24)) + #f)) + (else + (error "bad kind" type))))) + + (define (parse-tail-word word type) + (with-syntax ((word word)) + (case type + ((U8_X24) + #'(((logand word #ff)) + #f)) + ((U8_U24) + #'(((logand word #xff) + (ash word -8)) + #f)) + ((U8_L24) + ;; Fixme: translate back to label + #'(((logand word #xff) + (ash word -8)) + #f)) + ((U8_R24) + ;; FIXME: parse rest instructions correctly + #'(((logand word #xff) + (ash word -8)) + #f)) + ((U8_U8_I16) + ;; FIXME: immediates + #'(((logand word #xff) + (logand (ash word -8) #xff) + (ash word -16)) + #f)) + ((U8_U12_U12) + #'(((logand word #xff) + (logand (ash word -8) #xfff) + (ash word -20)) + #f)) + ((U8_U8_U8_U8) + #'(((logand word #xff) + (logand (ash word -8) #xff) + (logand (ash word -16) #xff) + (ash word -24)) + #f)) + ((U32) + #'((word) + #f)) + ((I32) + ;; FIXME: immediates + #'((word) + #f)) + ((A32) + ;; FIXME: long immediates + #'((word) + #f)) + ((B32) + ;; FIXME: long immediates + #'((word) + #f)) + ((N32) + ;; FIXME: non-immediate + #'((word) + #f)) + ((S32) + ;; FIXME: indirect access + #'((word) + #f)) + ((L32) + ;; FIXME: offset + #'((word) + #f)) + ((LO32) + ;; FIXME: offset + #'((word) + #f)) + ((X8_U24) + #'(((ash word -8)) + #f)) + ((X8_U12_U12) + #'(((logand (ash word -8) #xfff) + (ash word -20)) + #f)) + ((X8_R24) + ;; FIXME: rest + #'(((ash word -8)) + #f)) + ((X8_L24) + ;; FIXME: label + #'(((ash word -8)) + #f)) + ((U1_X7_L24) + ;; FIXME: label + #'(((logand word #x1) + (ash word -8)) + #f)) + ((U1_U7_L24) + ;; FIXME: label + #'(((logand word #x1) + (logand (ash word -1) #x7f) + (ash word -8)) + #f)) + (else + (error "bad kind" type))))) + + (syntax-case x () + ((_ name opcode word0 word* ...) + (let ((vars (generate-temporaries #'(word* ...)))) + (with-syntax (((word* ...) vars) + ((n ...) (map 1+ (iota (length #'(word* ...))))) + (((asm ...) note) + (parse-first-word #'first (syntax->datum #'word0))) + ((((asm* ...) note*) ...) + (map (lambda (word type) + (parse-tail-word word type)) + vars + (syntax->datum #'(word* ...))))) + #'(lambda (buf offset first) + (let ((word* (u32-ref buf (+ offset n))) + ...) + (values (+ 1 (length '(word* ...))) + (list 'name asm ... asm* ... ...) + (join-subformats note note* ...)))))))))) + +(define (disasm-invalid buf offset first) + (error "bad instruction" (logand first #xff) first buf offset)) + +(define disassemblers (make-vector 256 disasm-invalid)) + +(define-syntax define-disassembler + (lambda (x) + (syntax-case x () + ((_ name opcode arg ...) + (with-syntax ((parse (id-append #'name #'parse- #'name))) + #'(let ((parse (disassembler name opcode arg ...))) + (vector-set! disassemblers opcode parse))))))) + +(visit-opcodes define-disassembler) + +;; -> len list +(define (disassemble-one buf offset) + (let ((first (u32-ref buf offset))) + ((vector-ref disassemblers (logand first #xff)) buf offset first))) + +(define (find-elf-symbol elf text-offset) + (and=> + (elf-section-by-name elf ".symtab") + (lambda (symtab) + (let ((len (elf-symbol-table-len symtab)) + (strtab (elf-section elf (elf-section-link symtab)))) + ;; The symbols should be sorted, but maybe somehow that fails + ;; (for example if multiple objects are relinked together). So, + ;; a modicum of tolerance. + (define (bisect) + #f) + (define (linear-search) + (let lp ((n 0)) + (and (< n len) + (let ((sym (elf-symbol-table-ref elf symtab n strtab))) + (if (and (<= (elf-symbol-value sym) text-offset) + (< text-offset (+ (elf-symbol-value sym) + (elf-symbol-size sym)))) + sym + (lp (1+ n))))))) + (or (bisect) (linear-search)))))) + +(define (print-info port addr info extra src) + (format port "~4@S ~32S~@[;; ~1{~@?~}~]~@[~61t at ~a~]\n" + addr info extra src)) + +(define* (disassemble-program program #:optional (port (current-output-port))) + (let* ((code (rtl-program-code program)) + (bv (find-mapped-elf-image code)) + (elf (parse-elf bv)) + (base (pointer-address (bytevector->pointer (elf-bytes elf)))) + (text-base (elf-section-offset + (or (elf-section-by-name elf ".rtl-text") + (error "ELF object has no text section"))))) + (cond + ((find-elf-symbol elf (- code base text-base)) + => (lambda (sym) + ;; The text-base, symbol value, and symbol size are in bytes, + ;; but the disassembler operates on u32 units. + (let ((start (/ (+ (elf-symbol-value sym) text-base) 4)) + (size (/ (elf-symbol-size sym) 4))) + (format port "Disassembly of ~A at #x~X:\n\n" + (elf-symbol-name sym) code) + (let lp ((offset 0)) + (when (< offset size) + (call-with-values (lambda () + (disassemble-one bv (+ start offset))) + (lambda (len elt extra) + (print-info port offset elt extra #f) + (lp (+ offset len))))))))) + (else + (format port "Debugging information unavailable.~%"))) + (values))) diff --git a/test-suite/tests/rtl.test b/test-suite/tests/rtl.test index 219407c3b..74a7ff334 100644 --- a/test-suite/tests/rtl.test +++ b/test-suite/tests/rtl.test @@ -18,7 +18,7 @@ (define-module (tests rtl) #:use-module (test-suite lib) - #:use-module (system vm rtl)) + #:use-module (system vm assembler)) (define-syntax-rule (assert-equal val expr) (let ((x val)) |