diff options
authorAlexander Yermolovich <>2023-01-24 15:54:28 -0800
committerAlexander Yermolovich <>2023-01-24 15:55:03 -0800
commitf230099c132722a3065ac6a00e1cd6052b1a7365 (patch)
parent94f255c2c4d5c6733819affac5d1acb19e3f5e94 (diff)
[BOLT][DWARF] Reuse entries in .debug_addr when not modified
In some binaries produced with ThinLTO there are CUs that share entry in .debug_addr. Before we would generate a new entry for each. Which lead to binary size increase. This changes the behavior so that we re-use entries in .debug_addr. Reviewed By: maksfb Differential Revision:
5 files changed, 474 insertions, 0 deletions
diff --git a/bolt/lib/Core/DebugData.cpp b/bolt/lib/Core/DebugData.cpp
index 1cc7cda7b744..a1250177f928 100644
--- a/bolt/lib/Core/DebugData.cpp
+++ b/bolt/lib/Core/DebugData.cpp
@@ -480,6 +480,7 @@ AddressSectionBuffer DebugAddrWriterDwarf5::finalize() {
DWARFDebugAddrTable AddrTable;
DIDumpOptions DumpOpts;
constexpr uint32_t HeaderSize = 8;
+ DenseMap<uint64_t, uint64_t> UnmodifiedAddressOffsets;
for (std::unique_ptr<DWARFUnit> &CU : BC->DwCtx->compile_units()) {
const uint64_t CUID = getCUID(*CU.get());
const uint8_t AddrSize = CU->getAddressByteSize();
@@ -494,11 +495,18 @@ AddressSectionBuffer DebugAddrWriterDwarf5::finalize() {
// Address base offset is to the first entry.
// The size of header is 8 bytes.
uint64_t Offset = *BaseOffset - HeaderSize;
+ auto Iter = UnmodifiedAddressOffsets.find(Offset);
+ if (Iter != UnmodifiedAddressOffsets.end()) {
+ DWOIdToOffsetMap[CUID] = Iter->getSecond();
+ continue;
+ }
+ UnmodifiedAddressOffsets[Offset] = Buffer.size() + HeaderSize;
if (Error Err = AddrTable.extract(AddrData, &Offset, 5, AddrSize,
DumpOpts.WarningHandler)) {
uint32_t Index = 0;
for (uint64_t Addr : AddrTable.getAddressEntries())
AMIter->second.insert(Addr, Index++);
diff --git a/bolt/test/X86/Inputs/dwarf5-helper1-addr-section-reuse.s b/bolt/test/X86/Inputs/dwarf5-helper1-addr-section-reuse.s
new file mode 100644
index 000000000000..6227ebb1daeb
--- /dev/null
+++ b/bolt/test/X86/Inputs/dwarf5-helper1-addr-section-reuse.s
@@ -0,0 +1,144 @@
+ # int foo() {
+ # return 0;
+ #}
+ .text
+ .file "helper1.cpp"
+ .globl _Z3foov # -- Begin function _Z3foov
+ .p2align 4, 0x90
+ .type _Z3foov,@function
+_Z3foov: # @_Z3foov
+ .file 0 "testAddr" "helper1.cpp" md5 0xd7d0e6ab5d3b7dbefa5b3fd3cd035697
+ .loc 0 1 0 # helper1.cpp:1:0
+ .cfi_startproc
+# %bb.0:
+ pushq %rbp
+ .cfi_def_cfa_offset 16
+ .cfi_offset %rbp, -16
+ movq %rsp, %rbp
+ .cfi_def_cfa_register %rbp
+ .loc 0 2 3 prologue_end # helper1.cpp:2:3
+ xorl %eax, %eax
+ .loc 0 2 3 epilogue_begin is_stmt 0 # helper1.cpp:2:3
+ popq %rbp
+ .cfi_def_cfa %rsp, 8
+ retq
+ .size _Z3foov, .Lfunc_end0-_Z3foov
+ .cfi_endproc
+ # -- End function
+ .section .debug_abbrev,"",@progbits
+ .byte 1 # Abbreviation Code
+ .byte 17 # DW_TAG_compile_unit
+ .byte 1 # DW_CHILDREN_yes
+ .byte 37 # DW_AT_producer
+ .byte 37 # DW_FORM_strx1
+ .byte 19 # DW_AT_language
+ .byte 5 # DW_FORM_data2
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 114 # DW_AT_str_offsets_base
+ .byte 23 # DW_FORM_sec_offset
+ .byte 16 # DW_AT_stmt_list
+ .byte 23 # DW_FORM_sec_offset
+ .byte 27 # DW_AT_comp_dir
+ .byte 37 # DW_FORM_strx1
+ .byte 115 # DW_AT_addr_base
+ .byte 23 # DW_FORM_sec_offset
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 2 # Abbreviation Code
+ .byte 46 # DW_TAG_subprogram
+ .byte 0 # DW_CHILDREN_no
+ .byte 64 # DW_AT_frame_base
+ .byte 24 # DW_FORM_exprloc
+ .byte 110 # DW_AT_linkage_name
+ .byte 37 # DW_FORM_strx1
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 58 # DW_AT_decl_file
+ .byte 11 # DW_FORM_data1
+ .byte 59 # DW_AT_decl_line
+ .byte 11 # DW_FORM_data1
+ .byte 73 # DW_AT_type
+ .byte 19 # DW_FORM_ref4
+ .byte 63 # DW_AT_external
+ .byte 25 # DW_FORM_flag_present
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 3 # Abbreviation Code
+ .byte 36 # DW_TAG_base_type
+ .byte 0 # DW_CHILDREN_no
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 62 # DW_AT_encoding
+ .byte 11 # DW_FORM_data1
+ .byte 11 # DW_AT_byte_size
+ .byte 11 # DW_FORM_data1
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 0 # EOM(3)
+ .section .debug_info,"",@progbits
+ .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+ .short 5 # DWARF version number
+ .byte 1 # DWARF Unit Type
+ .byte 8 # Address Size (in bytes)
+ .long .debug_abbrev # Offset Into Abbrev. Section
+ .byte 1 # Abbrev [1] 0xc:0x2c DW_TAG_compile_unit
+ .byte 0 # DW_AT_producer
+ .short 33 # DW_AT_language
+ .byte 1 # DW_AT_name
+ .long .Lstr_offsets_base0 # DW_AT_str_offsets_base
+ .long .Lline_table_start0 # DW_AT_stmt_list
+ .byte 2 # DW_AT_comp_dir
+ .long 0x8 # DW_AT_addr_base
+ .byte 2 # Abbrev [2] 0x23:0x10 DW_TAG_subprogram
+ .byte 1 # DW_AT_frame_base
+ .byte 86
+ .byte 3 # DW_AT_linkage_name
+ .byte 4 # DW_AT_name
+ .byte 0 # DW_AT_decl_file
+ .byte 1 # DW_AT_decl_line
+ .long 51 # DW_AT_type
+ # DW_AT_external
+ .byte 3 # Abbrev [3] 0x33:0x4 DW_TAG_base_type
+ .byte 5 # DW_AT_name
+ .byte 5 # DW_AT_encoding
+ .byte 4 # DW_AT_byte_size
+ .byte 0 # End Of Children Mark
+ .section .debug_str_offsets,"",@progbits
+ .long 28 # Length of String Offsets Set
+ .short 5
+ .short 0
+ .section .debug_str,"MS",@progbits,1
+ .asciz "clang version 15.0.0" # string offset=0
+ .asciz "helper1.cpp" # string offset=146
+ .asciz "testAddr" # string offset=158
+ .asciz "_Z3foov" # string offset=205
+ .asciz "foo" # string offset=213
+ .asciz "int" # string offset=217
+ .section .debug_str_offsets,"",@progbits
+ .long .Linfo_string0
+ .long .Linfo_string1
+ .long .Linfo_string2
+ .long .Linfo_string3
+ .long .Linfo_string4
+ .long .Linfo_string5
+ .ident "clang version 15.0.0"
+ .section ".note.GNU-stack","",@progbits
+ .addrsig
+ .section .debug_line,"",@progbits
diff --git a/bolt/test/X86/Inputs/dwarf5-helper2-addr-section-reuse.s b/bolt/test/X86/Inputs/dwarf5-helper2-addr-section-reuse.s
new file mode 100644
index 000000000000..468cb82f0029
--- /dev/null
+++ b/bolt/test/X86/Inputs/dwarf5-helper2-addr-section-reuse.s
@@ -0,0 +1,144 @@
+ # int foo() {
+ # return 0;
+ #}
+ .text
+ .file "helper2.cpp"
+ .globl _Z4foo2v # -- Begin function _Z4foo2v
+ .p2align 4, 0x90
+ .type _Z4foo2v,@function
+_Z4foo2v: # @_Z4foo2v
+ .file 0 "testAddr" "helper2.cpp" md5 0xd625613508584d1558af439616b0e2d3
+ .loc 0 1 0 # helper2.cpp:1:0
+ .cfi_startproc
+# %bb.0:
+ pushq %rbp
+ .cfi_def_cfa_offset 16
+ .cfi_offset %rbp, -16
+ movq %rsp, %rbp
+ .cfi_def_cfa_register %rbp
+ .loc 0 2 3 prologue_end # helper2.cpp:2:3
+ xorl %eax, %eax
+ .loc 0 2 3 epilogue_begin is_stmt 0 # helper2.cpp:2:3
+ popq %rbp
+ .cfi_def_cfa %rsp, 8
+ retq
+ .size _Z4foo2v, .Lfunc_end0-_Z4foo2v
+ .cfi_endproc
+ # -- End function
+ .section .debug_abbrev,"",@progbits
+ .byte 1 # Abbreviation Code
+ .byte 17 # DW_TAG_compile_unit
+ .byte 1 # DW_CHILDREN_yes
+ .byte 37 # DW_AT_producer
+ .byte 37 # DW_FORM_strx1
+ .byte 19 # DW_AT_language
+ .byte 5 # DW_FORM_data2
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 114 # DW_AT_str_offsets_base
+ .byte 23 # DW_FORM_sec_offset
+ .byte 16 # DW_AT_stmt_list
+ .byte 23 # DW_FORM_sec_offset
+ .byte 27 # DW_AT_comp_dir
+ .byte 37 # DW_FORM_strx1
+ .byte 115 # DW_AT_addr_base
+ .byte 23 # DW_FORM_sec_offset
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 2 # Abbreviation Code
+ .byte 46 # DW_TAG_subprogram
+ .byte 0 # DW_CHILDREN_no
+ .byte 64 # DW_AT_frame_base
+ .byte 24 # DW_FORM_exprloc
+ .byte 110 # DW_AT_linkage_name
+ .byte 37 # DW_FORM_strx1
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 58 # DW_AT_decl_file
+ .byte 11 # DW_FORM_data1
+ .byte 59 # DW_AT_decl_line
+ .byte 11 # DW_FORM_data1
+ .byte 73 # DW_AT_type
+ .byte 19 # DW_FORM_ref4
+ .byte 63 # DW_AT_external
+ .byte 25 # DW_FORM_flag_present
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 3 # Abbreviation Code
+ .byte 36 # DW_TAG_base_type
+ .byte 0 # DW_CHILDREN_no
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 62 # DW_AT_encoding
+ .byte 11 # DW_FORM_data1
+ .byte 11 # DW_AT_byte_size
+ .byte 11 # DW_FORM_data1
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 0 # EOM(3)
+ .section .debug_info,"",@progbits
+ .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+ .short 5 # DWARF version number
+ .byte 1 # DWARF Unit Type
+ .byte 8 # Address Size (in bytes)
+ .long .debug_abbrev # Offset Into Abbrev. Section
+ .byte 1 # Abbrev [1] 0xc:0x2c DW_TAG_compile_unit
+ .byte 0 # DW_AT_producer
+ .short 33 # DW_AT_language
+ .byte 1 # DW_AT_name
+ .long .Lstr_offsets_base0 # DW_AT_str_offsets_base
+ .long .Lline_table_start0 # DW_AT_stmt_list
+ .byte 2 # DW_AT_comp_dir
+ .long 0x8 # DW_AT_addr_base
+ .byte 2 # Abbrev [2] 0x23:0x10 DW_TAG_subprogram
+ .byte 1 # DW_AT_frame_base
+ .byte 86
+ .byte 3 # DW_AT_linkage_name
+ .byte 4 # DW_AT_name
+ .byte 0 # DW_AT_decl_file
+ .byte 1 # DW_AT_decl_line
+ .long 51 # DW_AT_type
+ # DW_AT_external
+ .byte 3 # Abbrev [3] 0x33:0x4 DW_TAG_base_type
+ .byte 5 # DW_AT_name
+ .byte 5 # DW_AT_encoding
+ .byte 4 # DW_AT_byte_size
+ .byte 0 # End Of Children Mark
+ .section .debug_str_offsets,"",@progbits
+ .long 28 # Length of String Offsets Set
+ .short 5
+ .short 0
+ .section .debug_str,"MS",@progbits,1
+ .asciz "clang version 15.0.0" # string offset=0
+ .asciz "helper2.cpp" # string offset=146
+ .asciz "testAddr" # string offset=158
+ .asciz "_Z4foo2v" # string offset=205
+ .asciz "foo2" # string offset=214
+ .asciz "int" # string offset=219
+ .section .debug_str_offsets,"",@progbits
+ .long .Linfo_string0
+ .long .Linfo_string1
+ .long .Linfo_string2
+ .long .Linfo_string3
+ .long .Linfo_string4
+ .long .Linfo_string5
+ .ident "clang version 15.0.0"
+ .section ".note.GNU-stack","",@progbits
+ .addrsig
+ .section .debug_line,"",@progbits
diff --git a/bolt/test/X86/Inputs/dwarf5-main-addr-section-reuse.s b/bolt/test/X86/Inputs/dwarf5-main-addr-section-reuse.s
new file mode 100644
index 000000000000..c2738eadea9a
--- /dev/null
+++ b/bolt/test/X86/Inputs/dwarf5-main-addr-section-reuse.s
@@ -0,0 +1,160 @@
+ #int main() {
+ # return 0;
+ #}
+ .text
+ .file "main.cpp"
+ .globl main # -- Begin function main
+ .p2align 4, 0x90
+ .type main,@function
+main: # @main
+ .file 0 "testAddr" "main.cpp" md5 0xd7b10b0a4a7fb8e0790feb7a5cd9b69a
+ .loc 0 1 0 # main.cpp:1:0
+ .cfi_startproc
+# %bb.0:
+ pushq %rbp
+ .cfi_def_cfa_offset 16
+ .cfi_offset %rbp, -16
+ movq %rsp, %rbp
+ .cfi_def_cfa_register %rbp
+ movl $0, -4(%rbp)
+ .loc 0 2 3 prologue_end # main.cpp:2:3
+ xorl %eax, %eax
+ .loc 0 2 3 epilogue_begin is_stmt 0 # main.cpp:2:3
+ popq %rbp
+ .cfi_def_cfa %rsp, 8
+ retq
+ .size main, .Lfunc_end0-main
+ .cfi_endproc
+ # -- End function
+ .section .debug_abbrev,"",@progbits
+ .byte 1 # Abbreviation Code
+ .byte 17 # DW_TAG_compile_unit
+ .byte 1 # DW_CHILDREN_yes
+ .byte 37 # DW_AT_producer
+ .byte 37 # DW_FORM_strx1
+ .byte 19 # DW_AT_language
+ .byte 5 # DW_FORM_data2
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 114 # DW_AT_str_offsets_base
+ .byte 23 # DW_FORM_sec_offset
+ .byte 16 # DW_AT_stmt_list
+ .byte 23 # DW_FORM_sec_offset
+ .byte 27 # DW_AT_comp_dir
+ .byte 37 # DW_FORM_strx1
+ .byte 17 # DW_AT_low_pc
+ .byte 27 # DW_FORM_addrx
+ .byte 18 # DW_AT_high_pc
+ .byte 6 # DW_FORM_data4
+ .byte 115 # DW_AT_addr_base
+ .byte 23 # DW_FORM_sec_offset
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 2 # Abbreviation Code
+ .byte 46 # DW_TAG_subprogram
+ .byte 0 # DW_CHILDREN_no
+ .byte 17 # DW_AT_low_pc
+ .byte 27 # DW_FORM_addrx
+ .byte 18 # DW_AT_high_pc
+ .byte 6 # DW_FORM_data4
+ .byte 64 # DW_AT_frame_base
+ .byte 24 # DW_FORM_exprloc
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 58 # DW_AT_decl_file
+ .byte 11 # DW_FORM_data1
+ .byte 59 # DW_AT_decl_line
+ .byte 11 # DW_FORM_data1
+ .byte 73 # DW_AT_type
+ .byte 19 # DW_FORM_ref4
+ .byte 63 # DW_AT_external
+ .byte 25 # DW_FORM_flag_present
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 3 # Abbreviation Code
+ .byte 36 # DW_TAG_base_type
+ .byte 0 # DW_CHILDREN_no
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 62 # DW_AT_encoding
+ .byte 11 # DW_FORM_data1
+ .byte 11 # DW_AT_byte_size
+ .byte 11 # DW_FORM_data1
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 0 # EOM(3)
+ .section .debug_info,"",@progbits
+ .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+ .short 5 # DWARF version number
+ .byte 1 # DWARF Unit Type
+ .byte 8 # Address Size (in bytes)
+ .long .debug_abbrev # Offset Into Abbrev. Section
+ .byte 1 # Abbrev [1] 0xc:0x2b DW_TAG_compile_unit
+ .byte 0 # DW_AT_producer
+ .short 33 # DW_AT_language
+ .byte 1 # DW_AT_name
+ .long .Lstr_offsets_base0 # DW_AT_str_offsets_base
+ .long .Lline_table_start0 # DW_AT_stmt_list
+ .byte 2 # DW_AT_comp_dir
+ .byte 0 # DW_AT_low_pc
+ .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
+ .long .Laddr_table_base0 # DW_AT_addr_base
+ .byte 2 # Abbrev [2] 0x23:0xf DW_TAG_subprogram
+ .byte 0 # DW_AT_low_pc
+ .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
+ .byte 1 # DW_AT_frame_base
+ .byte 86
+ .byte 3 # DW_AT_name
+ .byte 0 # DW_AT_decl_file
+ .byte 1 # DW_AT_decl_line
+ .long 50 # DW_AT_type
+ # DW_AT_external
+ .byte 3 # Abbrev [3] 0x32:0x4 DW_TAG_base_type
+ .byte 4 # DW_AT_name
+ .byte 5 # DW_AT_encoding
+ .byte 4 # DW_AT_byte_size
+ .byte 0 # End Of Children Mark
+ .section .debug_str_offsets,"",@progbits
+ .long 24 # Length of String Offsets Set
+ .short 5
+ .short 0
+ .section .debug_str,"MS",@progbits,1
+ .asciz "clang version 15.0.0" # string offset=0
+ .asciz "main.cpp" # string offset=146
+ .asciz "testAddr" # string offset=155
+ .asciz "main" # string offset=202
+ .asciz "int" # string offset=207
+ .section .debug_str_offsets,"",@progbits
+ .long .Linfo_string0
+ .long .Linfo_string1
+ .long .Linfo_string2
+ .long .Linfo_string3
+ .long .Linfo_string4
+ .section .debug_addr,"",@progbits
+ .long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution
+ .short 5 # DWARF version number
+ .byte 8 # Address size
+ .byte 0 # Segment selector size
+ .quad .Lfunc_begin0
+ .ident "clang version 15.0.0"
+ .section ".note.GNU-stack","",@progbits
+ .addrsig
+ .section .debug_line,"",@progbits
diff --git a/bolt/test/X86/dwarf5-addr-section-reuse.s b/bolt/test/X86/dwarf5-addr-section-reuse.s
new file mode 100644
index 000000000000..bc747e0657b5
--- /dev/null
+++ b/bolt/test/X86/dwarf5-addr-section-reuse.s
@@ -0,0 +1,18 @@
+# RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf5-main-addr-section-reuse.s -o %tmain.o
+# RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf5-helper1-addr-section-reuse.s -o %thelper1.o
+# RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf5-helper2-addr-section-reuse.s -o %thelper2.o
+# RUN: %clang %cflags -dwarf-5 %tmain.o %thelper1.o %thelper2.o -o %t.exe -Wl,-q
+# RUN: llvm-dwarfdump --debug-info %t.exe | FileCheck --check-prefix=PRECHECK %s
+# RUN: llvm-bolt %t.exe -o %t.exe.bolt --update-debug-sections
+# RUN: llvm-dwarfdump --debug-info %t.exe.bolt | FileCheck --check-prefix=POSTCHECK %s
+# This test checks that when a binary is bolted if CU is not modified and has DW_AT_addr_base that is shared
+# after being bolted CUs still share same entry in .debug_addr.
+# PRECHECK: DW_AT_addr_base (0x00000008)
+# PRECHECK: DW_AT_addr_base (0x00000008)
+# PRECHECK: DW_AT_addr_base (0x00000008)
+# POSTCHECK: DW_AT_addr_base (0x00000008)
+# POSTCHECK: DW_AT_addr_base (0x00000020)
+# POSTCHECK: DW_AT_addr_base (0x00000020)