From 041f11b353c14246f24ee6330b14d27126a2e2ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludovic=20Court=C3=A8s?= Date: Sat, 7 Jan 2023 21:52:06 +0100 Subject: linker, assembler: Avoid intermediate bytevectors. This reduces the amount of memory used during linking and reduces the number of copies to be done between bytevectors. * module/system/vm/linker.scm (): Remove 'bv' field and add 'size' and 'writer'. (make-linker-object): Adjust accordingly. (string-table-size): New procedure. (link-string-table!): Remove. (string-table-writer): New procedure. (allocate-segment): Adjust 'make-linker-object' call. (find-shstrndx): Call the 'linker-object-writer' of O. (add-elf-objects): Adjust 'make-linker-object' call. Remove 'make-bytevector' allocations and move serialization to lazy 'writer' procedures. Define 'segments' and 'add-header-segment!'. Return the latter as the first value. * module/system/vm/assembler.scm (make-object): Remove 'bv' parameter and add 'size' and 'writer'. (link-data): Remove 'make-bytevector' call and move serialization to a lazy 'writer' procedure. (link-text-object): Likewise. (link-frame-maps): Likewise. (link-dynamic-section): Likewise. (link-shstrtab): Likewise. (link-symtab): Likewise. (link-arities): Likewise, and remove 'bytevector-append'. (link-docstrs): Likewise. (link-procprops): Likewise. (link-debug): Likewise, and define 'copy-writer'. * test-suite/tests/linker.test (link-elf-with-one-main-section): Adjust accordingly. --- module/system/vm/assembler.scm | 143 ++++++++++++++++++++++++++--------------- module/system/vm/linker.scm | 119 ++++++++++++++++++++-------------- test-suite/tests/linker.test | 21 ++++-- 3 files changed, 176 insertions(+), 107 deletions(-) diff --git a/module/system/vm/assembler.scm b/module/system/vm/assembler.scm index e82eb953a..2ecfce78c 100644 --- a/module/system/vm/assembler.scm +++ b/module/system/vm/assembler.scm @@ -51,6 +51,7 @@ #:use-module (system syntax internal) #:use-module (language bytecode) #:use-module (rnrs bytevectors) + #:use-module (rnrs bytevectors gnu) #:use-module (ice-9 binary-ports) #:use-module (ice-9 vlist) #:use-module (ice-9 match) @@ -1762,7 +1763,7 @@ returned instead." ;;; Helper for linking objects. ;;; -(define (make-object asm name bv relocs labels . kwargs) +(define (make-object asm name size writer relocs labels . kwargs) "Make a linker object. This helper handles interning the name in the shstrtab, assigning the size, allocating a fresh index, and defining a corresponding linker symbol for the start of the section." @@ -1773,9 +1774,9 @@ corresponding linker symbol for the start of the section." (apply make-elf-section #:index index #:name name-idx - #:size (bytevector-length bv) + #:size size kwargs) - bv relocs + size writer relocs (cons (make-linker-symbol name 0) labels)))) @@ -2102,18 +2103,27 @@ should be .data or .rodata), and return the resulting linker object. (else (let* ((byte-len (vhash-fold (lambda (k v len) (+ (byte-length k) (align len 8))) - 0 data)) - (buf (make-bytevector byte-len 0))) + 0 data))) (let lp ((i 0) (pos 0) (relocs '()) (symbols '())) (if (< i (vlist-length data)) (match (vlist-ref data i) ((obj . obj-label) - (write buf pos obj) (lp (1+ i) (align (+ (byte-length obj) pos) 8) (add-relocs obj pos relocs) (cons (make-linker-symbol obj-label pos) symbols)))) - (make-object asm name buf relocs symbols + (make-object asm name byte-len + (lambda (bv offset) + (let loop ((i 0) (pos offset)) + (when (< i (vlist-length data)) + (match (vlist-ref data i) + ((obj . obj-label) + (write bv pos obj) + (loop (1+ i) + (align + (+ (byte-length obj) pos) + 8))))))) + relocs symbols #:flags (match name ('.data (logior SHF_ALLOC SHF_WRITE)) ('.rodata SHF_ALLOC)))))))))) @@ -2219,13 +2229,14 @@ The offsets are expected to be expressed in words." (define (link-text-object asm) "Link the .rtl-text section, swapping the endianness of the bytes if needed." - (let ((buf (make-bytevector (asm-pos asm)))) - (bytevector-copy! (asm-buf asm) 0 buf 0 (bytevector-length buf)) - (unless (eq? (asm-endianness asm) (native-endianness)) - (byte-swap/4! buf)) - (patch-relocs! buf (asm-relocs asm) (asm-labels asm)) - (make-object asm '.rtl-text - buf + (let ((size (asm-pos asm))) + (make-object asm '.rtl-text size + (lambda (bv offset) + (let ((buf (bytevector-slice bv offset size))) + (bytevector-copy! (asm-buf asm) 0 buf 0 size) + (unless (eq? (asm-endianness asm) (native-endianness)) + (byte-swap/4! buf)) + (patch-relocs! buf (asm-relocs asm) (asm-labels asm)))) (process-relocs (asm-relocs asm) (asm-labels asm)) (process-labels (asm-labels asm))))) @@ -2261,7 +2272,7 @@ needed." (let* ((endianness (asm-endianness asm)) (header-pos frame-maps-prefix-len) (map-pos (+ header-pos (* count frame-map-header-len))) - (bv (make-bytevector (+ map-pos map-len) 0))) + (size (+ map-pos map-len))) (define (write! bv) (bytevector-u32-set! bv 4 map-pos endianness) (let lp ((maps maps) (header-pos header-pos) (map-pos map-pos)) @@ -2281,9 +2292,9 @@ needed." (write-bytes (1+ map-pos) (ash map -8) (1- byte-length))))))))) - (write! bv) - (make-object asm '.guile.frame-maps - bv + (make-object asm '.guile.frame-maps size + (lambda (bv offset) + (write! (bytevector-slice bv offset))) (list (make-linker-reloc 'abs32/1 0 0 '.rtl-text)) '() #:type SHT_PROGBITS #:flags SHF_ALLOC))) (match (asm-slot-maps asm) @@ -2319,7 +2330,7 @@ procedure with label @var{rw-init}. @var{rw-init} may be false. If (words (if rw (+ words 4) words)) (words (if rw-init (+ words 2) words)) (words (if frame-maps (+ words 2) words)) - (bv (make-bytevector (* word-size words) 0))) + (size (* word-size words))) (define relocs ;; This must match the 'set-label!' calls below. @@ -2353,7 +2364,7 @@ procedure with label @var{rw-init}. @var{rw-init} may be false. If (set-uword! 4 DT_GUILE_GC_ROOT) (set-label! 5 '.data) (set-uword! 6 DT_GUILE_GC_ROOT_SZ) - (set-uword! 7 (bytevector-length (linker-object-bv rw))) + (set-uword! 7 (linker-object-size rw)) (when rw-init (set-uword! 8 DT_INIT) ; constants (set-label! 9 rw-init))) @@ -2363,8 +2374,10 @@ procedure with label @var{rw-init}. @var{rw-init} may be false. If (set-uword! (- words 2) DT_NULL) (set-uword! (- words 1) 0)) - (write! bv) - (make-object asm '.dynamic bv relocs '() + (make-object asm '.dynamic size + (lambda (bv offset) + (write! (bytevector-slice bv offset))) + relocs '() #:type SHT_DYNAMIC #:flags SHF_ALLOC))) (case (asm-word-size asm) ((4) (emit-dynamic-section 4 bytevector-u32-set! abs32/1)) @@ -2375,7 +2388,8 @@ procedure with label @var{rw-init}. @var{rw-init} may be false. If "Link the string table for the section headers." (intern-section-name! asm ".shstrtab") (make-object asm '.shstrtab - (link-string-table! (asm-shstrtab asm)) + (string-table-size (asm-shstrtab asm)) + (string-table-writer (asm-shstrtab asm)) '() '() #:type SHT_STRTAB #:flags 0)) @@ -2385,8 +2399,7 @@ procedure with label @var{rw-init}. @var{rw-init} may be false. If (size (elf-symbol-len word-size)) (meta (reverse (asm-meta asm))) (n (length meta)) - (strtab (make-string-table)) - (bv (make-bytevector (* n size) 0))) + (strtab (make-string-table))) (define (intern-string! name) (string-table-intern! strtab (if name (symbol->string name) ""))) (define names @@ -2410,13 +2423,13 @@ procedure with label @var{rw-init}. @var{rw-init} may be false. If text-section)))) names meta (iota n))) - (write-symbols! bv 0) (let ((strtab (make-object asm '.strtab - (link-string-table! strtab) + (string-table-size strtab) + (string-table-writer strtab) '() '() #:type SHT_STRTAB #:flags 0))) (values (make-object asm '.symtab - bv + (* n size) write-symbols! '() '() #:type SHT_SYMTAB #:flags 0 #:entsize size #:link (elf-section-index @@ -2626,13 +2639,6 @@ procedure with label @var{rw-init}. @var{rw-init} may be false. If ((arity) (lambda-size arity)) (arities (case-lambda-size arities)))) - (define (bytevector-append a b) - (let ((out (make-bytevector (+ (bytevector-length a) - (bytevector-length b))))) - (bytevector-copy! a 0 out 0 (bytevector-length a)) - (bytevector-copy! b 0 out (bytevector-length a) (bytevector-length b)) - out)) - (let* ((endianness (asm-endianness asm)) (metas (reverse (asm-meta asm))) (header-size (fold (lambda (meta size) @@ -2644,12 +2650,23 @@ procedure with label @var{rw-init}. @var{rw-init} may be false. If (bytevector-u32-set! headers 0 (bytevector-length headers) endianness) (let-values (((names-port get-name-bv) (open-bytevector-output-port))) (let* ((relocs (write-arities asm metas headers names-port strtab)) + (name-bv (get-name-bv)) (strtab (make-object asm '.guile.arities.strtab - (link-string-table! strtab) + (string-table-size strtab) + (string-table-writer strtab) '() '() #:type SHT_STRTAB #:flags 0))) (values (make-object asm '.guile.arities - (bytevector-append headers (get-name-bv)) + (+ header-size (bytevector-length name-bv)) + (lambda (bv offset) + ;; FIXME: Avoid extra allocation + copy. + (bytevector-copy! headers 0 + bv offset + header-size) + (bytevector-copy! name-bv 0 + bv + (+ offset header-size) + (bytevector-length name-bv))) relocs '() #:type SHT_PROGBITS #:flags 0 #:link (elf-section-index @@ -2681,28 +2698,31 @@ procedure with label @var{rw-init}. @var{rw-init} may be false. If (cons (meta-low-pc meta) (cdar tail))))) (reverse (asm-meta asm)))) (let* ((endianness (asm-endianness asm)) - (docstrings (find-docstrings)) (strtab (make-string-table)) - (bv (make-bytevector (* (length docstrings) docstr-size) 0))) + (docstrings (map (match-lambda + ((pc . str) + (cons pc (string-table-intern! strtab str)))) + (find-docstrings)))) (define (write-docstrings! bv offset) (fold (lambda (pair pos) (match pair - ((pc . string) + ((pc . string-pos) (bytevector-u32-set! bv pos pc endianness) (bytevector-u32-set! bv (+ pos 4) - (string-table-intern! strtab string) + string-pos endianness) (+ pos docstr-size)))) offset docstrings)) - (write-docstrings! bv 0) (let ((strtab (make-object asm '.guile.docstrs.strtab - (link-string-table! strtab) + (string-table-size strtab) + (string-table-writer strtab) '() '() #:type SHT_STRTAB #:flags 0))) (values (make-object asm '.guile.docstrs - bv + (* (length docstrings) docstr-size) + write-docstrings! '() '() #:type SHT_PROGBITS #:flags 0 #:link (elf-section-index @@ -2751,7 +2771,7 @@ procedure with label @var{rw-init}. @var{rw-init} may be false. If (reverse (asm-meta asm)))) (let* ((endianness (asm-endianness asm)) (procprops (find-procprops)) - (bv (make-bytevector (* (length procprops) procprops-size) 0))) + (size (* (length procprops) procprops-size))) (define (write-procprops! bv offset) (let lp ((procprops procprops) (pos offset)) (match procprops @@ -2773,9 +2793,8 @@ procedure with label @var{rw-init}. @var{rw-init} may be false. If (intern-constant asm props)) relocs)))))) - (write-procprops! bv 0) (make-object asm '.guile.procprops - bv + size write-procprops! relocs '() #:type SHT_PROGBITS #:flags 0))) @@ -3094,6 +3113,11 @@ procedure with label @var{rw-init}. @var{rw-init} may be false. If (for-each write-die children) (put-uleb128 die-port 0)))))) + (define (copy-writer source) + (lambda (bv offset) + (bytevector-copy! source 0 bv offset + (bytevector-length source)))) + ;; Compilation unit header. (put-u32 die-port 0) ; Length; will patch later. (put-u16 die-port 4) ; DWARF 4. @@ -3111,19 +3135,32 @@ procedure with label @var{rw-init}. @var{rw-init} may be false. If ;; Patch DWARF32 length. (bytevector-u32-set! bv 0 (- (bytevector-length bv) 4) (asm-endianness asm)) - (make-object asm '.debug_info bv die-relocs '() + (make-object asm '.debug_info + (bytevector-length bv) + (copy-writer bv) + die-relocs '() #:type SHT_PROGBITS #:flags 0)) - (make-object asm '.debug_abbrev (get-abbrev-bv) '() '() - #:type SHT_PROGBITS #:flags 0) - (make-object asm '.debug_str (link-string-table! strtab) '() '() + (let ((bv (get-abbrev-bv))) + (make-object asm '.debug_abbrev + (bytevector-length bv) (copy-writer bv) + '() '() + #:type SHT_PROGBITS #:flags 0)) + (make-object asm '.debug_str + (string-table-size strtab) + (string-table-writer strtab) + '() '() #:type SHT_PROGBITS #:flags 0) - (make-object asm '.debug_loc #vu8() '() '() + (make-object asm '.debug_loc + 0 (lambda (bv offset) #t) + '() '() #:type SHT_PROGBITS #:flags 0) (let ((bv (get-line-bv))) ;; Patch DWARF32 length. (bytevector-u32-set! bv 0 (- (bytevector-length bv) 4) (asm-endianness asm)) - (make-object asm '.debug_line bv line-relocs '() + (make-object asm '.debug_line + (bytevector-length bv) (copy-writer bv) + line-relocs '() #:type SHT_PROGBITS #:flags 0))))) (define (link-objects asm) diff --git a/module/system/vm/linker.scm b/module/system/vm/linker.scm index a618958f6..56e19c285 100644 --- a/module/system/vm/linker.scm +++ b/module/system/vm/linker.scm @@ -66,6 +66,7 @@ (define-module (system vm linker) #:use-module (rnrs bytevectors) + #:use-module (rnrs bytevectors gnu) #:use-module (system foreign) #:use-module (system base target) #:use-module ((srfi srfi-1) #:select (append-map)) @@ -81,13 +82,15 @@ linker-object? linker-object-name linker-object-section - linker-object-bv + linker-object-size + linker-object-writer linker-object-relocs (linker-object-symbols* . linker-object-symbols) make-string-table string-table-intern! - link-string-table! + string-table-size + string-table-writer link-elf)) @@ -134,20 +137,22 @@ (address linker-symbol-address)) (define-record-type - (%make-linker-object name section bv relocs symbols) + (%make-linker-object name section size writer relocs symbols) linker-object? (name linker-object-name) (section linker-object-section) - (bv linker-object-bv) + (size linker-object-size) + (writer linker-object-writer set-linker-object-writer!) (relocs linker-object-relocs) (symbols linker-object-symbols)) -(define (make-linker-object name section bv relocs symbols) +(define (make-linker-object name section size writer relocs symbols) "Create a linker object named @var{name} (a string, or #f for no name), -@code{} header @var{section}, bytevector contents @var{bv}, +@code{} header @var{section}, its @var{size} in bytes, +a procedure @code{writer} to write its contents to a bytevector, a list of linker relocations @var{relocs}, and list of linker symbols @var{symbols}." - (%make-linker-object name section bv relocs + (%make-linker-object name section size writer relocs ;; Hide a symbol to the beginning of the section ;; in the symbols. (cons (make-linker-symbol (gensym "*section*") 0) @@ -169,6 +174,10 @@ list of linker relocations @var{relocs}, and list of linker symbols "Return a string table with one entry: the empty string." (%make-string-table '(("" 0 #vu8())) #f)) +(define (string-table-size strtab) + "Return the size in bytes of the wire representation of @var{strtab}." + (string-table-length (string-table-strings strtab))) + (define (string-table-length strings) "Return the number of bytes needed for the @var{strings}." (match strings @@ -192,19 +201,19 @@ Returns the byte index of the string in that table." strings)) next)))))) -(define (link-string-table! table) - "Link the functional string table @var{table} into a sequence of -bytes, suitable for use as the contents of an ELF string table section." - (match table - (($ strings #f) - (let ((out (make-bytevector (string-table-length strings) 0))) - (for-each - (match-lambda - ((_ pos bytes) - (bytevector-copy! bytes 0 out pos (bytevector-length bytes)))) - strings) - (set-string-table-linked?! table #t) - out)))) +(define (string-table-writer table) + "Return a \"writer\" procedure that links the string +table @var{table} into a sequence of bytes, suitable for use as the +contents of an ELF string table section." + (lambda (bv offset) + (match table + (($ strings #f) + (for-each (match-lambda + ((_ pos bytes) + (bytevector-copy! bytes 0 bv (+ pos offset) + (bytevector-length bytes)))) + strings) + (set-string-table-linked?! table #t))))) (define (segment-kind section) "Return the type of segment needed to store @var{section}, as a pair. @@ -401,7 +410,8 @@ the segment table using @code{write-segment-header!}." (cons (make-linker-object (linker-object-name o) (relocate-section-header section addr) - (linker-object-bv o) + (linker-object-size o) + (linker-object-writer o) (linker-object-relocs o) (linker-object-symbols o)) out) @@ -458,7 +468,6 @@ locations, as given in @var{symtab}." (let* ((section (linker-object-section o)) (offset (elf-section-offset section)) (len (elf-section-size section)) - (bytes (linker-object-bv o)) (relocs (linker-object-relocs o))) (if (zero? (logand SHF_ALLOC (elf-section-flags section))) (unless (zero? (elf-section-addr section)) @@ -467,9 +476,9 @@ locations, as given in @var{symtab}." (error "loadable section has offset != addr" section))) (if (not (= (elf-section-type section) SHT_NOBITS)) (begin - (if (not (= len (bytevector-length bytes))) - (error "unexpected length" section bytes)) - (bytevector-copy! bytes 0 bv offset len) + (unless (= len (linker-object-size o)) + (error "unexpected length" section o)) + ((linker-object-writer o) bv offset) (for-each (lambda (reloc) (process-reloc reloc bv offset symtab endianness)) relocs))))) @@ -515,7 +524,7 @@ list of objects, augmented with objects for the special ELF sections." (make-linker-object "" (make-elf-section #:index 0 #:type SHT_NULL #:flags 0 #:addralign 0) - #vu8() '() '())) + 0 (lambda (bv offset) #t) '() '())) ;; The ELF header and the segment table. ;; @@ -529,15 +538,15 @@ list of objects, augmented with objects for the special ELF sections." (elf-header-shoff-offset word-size) 0 shoff-label)) - (size (+ phoff (* phnum phentsize))) - (bv (make-bytevector size 0))) - (write-elf-header bv header) + (size (+ phoff (* phnum phentsize)))) ;; Leave the segment table uninitialized; it will be filled in ;; later by calls to the write-segment-header! closure. (make-linker-object #f (make-elf-section #:index index #:type SHT_PROGBITS #:flags SHF_ALLOC #:size size) - bv + size + (lambda (bv offset) + (write-elf-header (bytevector-slice bv offset) header)) (list shoff-reloc) '()))) @@ -545,7 +554,6 @@ list of objects, augmented with objects for the special ELF sections." ;; (define (make-footer objects shoff-label) (let* ((size (* shentsize shnum)) - (bv (make-bytevector size 0)) (section-table (make-elf-section #:index (length objects) #:type SHT_PROGBITS #:flags 0 @@ -578,10 +586,6 @@ list of objects, augmented with objects for the special ELF sections." (* shentsize (elf-section-index section))))) (write-elf-section-header bv offset endianness word-size section)))) - (for-each (lambda (object) - (write-object-elf-header! bv 0 object)) - objects) - (let ((relocs (fold-values (lambda (object relocs) (compute-reloc @@ -591,7 +595,14 @@ list of objects, augmented with objects for the special ELF sections." relocs)) objects (compute-reloc shoff-label section-table '())))) - (%make-linker-object #f section-table bv relocs + (%make-linker-object #f section-table size + (lambda (bv offset) + (for-each (lambda (object) + (write-object-elf-header! bv + offset + object)) + objects)) + relocs (list (make-linker-symbol shoff-label 0)))))) (let* ((null-section (make-null-section)) @@ -602,7 +613,8 @@ list of objects, augmented with objects for the special ELF sections." (objects (cons header objects)) (footer (make-footer objects shoff)) - (objects (cons footer objects))) + (objects (cons footer objects)) + (segments '())) ;; The header includes the segment table, which needs offsets and ;; sizes of the segments. Normally we would use relocs to rewrite @@ -611,16 +623,27 @@ list of objects, augmented with objects for the special ELF sections." ;; between two symbols, and it's probably a bad idea architecturally ;; to create one. ;; - ;; So instead we return a closure to patch up the segment table. - ;; Normally we'd shy away from such destructive interfaces, but it's - ;; OK as we create the header section ourselves. - ;; - (define (write-segment-header! segment) - (let ((bv (linker-object-bv header)) - (offset (+ phoff (* (elf-segment-index segment) phentsize)))) - (write-elf-program-header bv offset endianness word-size segment))) - - (values write-segment-header! objects))) + ;; So instead change HEADER's writer to patch up the segment table. + (define (add-header-segment! segment) + (set! segments (cons segment segments))) + + (define write-header! + (linker-object-writer header)) + + (define (write-header+segments! bv offset) + (for-each (lambda (segment) + (let ((offset (+ offset + phoff + (* (elf-segment-index segment) phentsize)))) + (write-elf-program-header bv offset + endianness + word-size + segment))) + segments) + (write-header! bv offset)) + + (set-linker-object-writer! header write-header+segments!) + (values add-header-segment! objects))) (define (record-special-segments write-segment-header! phidx all-objects) (let lp ((phidx phidx) (objects all-objects)) @@ -735,7 +758,7 @@ Returns a bytevector." (receive (size objects symtab) (allocate-elf objects page-aligned? endianness word-size abi type machine-type) - (let ((bv (make-bytevector size 0))) + (let ((bv (make-bytevector size 0))) ;TODO: Remove allocation. (for-each (lambda (object) (write-linker-object bv object symtab endianness)) diff --git a/test-suite/tests/linker.test b/test-suite/tests/linker.test index e7ecc291e..ea54618b4 100644 --- a/test-suite/tests/linker.test +++ b/test-suite/tests/linker.test @@ -1,6 +1,6 @@ ;;;; linker.test -*- scheme -*- ;;;; -;;;; Copyright 2013, 2019 Free Software Foundation, Inc. +;;;; Copyright 2013, 2019, 2023 Free Software Foundation, Inc. ;;;; ;;;; This library is free software; you can redistribute it and/or ;;;; modify it under the terms of the GNU Lesser General Public @@ -25,23 +25,32 @@ (define (link-elf-with-one-main-section name bytes) (let ((strtab (make-string-table))) - (define (make-object index name bv relocs . kwargs) + (define (make-object index name size writer relocs . kwargs) (let ((name-idx (string-table-intern! strtab (symbol->string name)))) (make-linker-object (symbol->string name) (apply make-elf-section #:index index #:name name-idx - #:size (bytevector-length bv) + #:size size kwargs) - bv relocs + size writer relocs (list (make-linker-symbol name 0))))) (define (make-shstrtab) (string-table-intern! strtab ".shstrtab") - (make-object 2 '.shstrtab (link-string-table! strtab) '() + (make-object 2 '.shstrtab + (string-table-size strtab) + (string-table-writer strtab) + '() #:type SHT_STRTAB #:flags 0)) (let* ((word-size (target-word-size)) (endianness (target-endianness)) - (sec (make-object 1 name bytes '())) + (sec (make-object 1 name + (bytevector-length bytes) + (lambda (bv offset) + (bytevector-copy! bytes 0 bv offset + (bytevector-length + bytes))) + '())) ;; This needs to be linked last, because linking other ;; sections adds entries to the string table. (shstrtab (make-shstrtab))) -- cgit v1.2.1