summaryrefslogtreecommitdiff
path: root/bolt/runtime
diff options
context:
space:
mode:
authorVasily Leonenko <vasily.leonenko@huawei.com>2021-06-19 04:08:35 +0800
committerMaksim Panchenko <maks@fb.com>2021-06-19 04:08:35 +0800
commitad79d51778e37293502b7a8246fa387092771969 (patch)
tree34a5b705a9fa5f00ac93e8f7c72dadeea547bf12 /bolt/runtime
parent60b10a8eade65114c7e6600675a11beed6f94091 (diff)
downloadllvm-ad79d51778e37293502b7a8246fa387092771969.tar.gz
[PR] Instrumentation: Generate and use _start and _fini trampolines
Summary: This commit implements new method for _start & _fini functions hooking which allows to use relative jumps for future PIE & .so library support. Instead of using absolute address of _start & _fini functions known on linking stage - we'll use dynamically created trampoline functions and use corresponding symbols in instrumentation runtime library. As we would like to use instrumentation for dynamically loaded binaries (with PIE & .so), thus we need to compile instrumentation library with "-fPIC" flag to support relative address resolution for functions and data. For shared libraries we need to handle initialization of instrumentation library case by using DT_INIT section entry point. Also this commit adds detection if the binary is executable or shared library based on existence of PT_INTERP header. In case of shared library we save information about real library init function address for further usage for instrumentation library init trampoline function creation and also update DT_INIT to point instrumentation library init function. Functions called from init/fini functions should be called with forced stack alignment to avoid issues with instructions which relies on it. E.g. optimized string operations. Vasily Leonenko, Advanced Software Technology Lab, Huawei (cherry picked from FBD30092316)
Diffstat (limited to 'bolt/runtime')
-rw-r--r--bolt/runtime/CMakeLists.txt2
-rw-r--r--bolt/runtime/instr.cpp20
2 files changed, 13 insertions, 9 deletions
diff --git a/bolt/runtime/CMakeLists.txt b/bolt/runtime/CMakeLists.txt
index 9ea769f88a2a..cc679c31ff6d 100644
--- a/bolt/runtime/CMakeLists.txt
+++ b/bolt/runtime/CMakeLists.txt
@@ -22,7 +22,7 @@ add_library(bolt_rt_hugify STATIC
)
# Don't let the compiler think it can create calls to standard libs
-target_compile_options(bolt_rt_instr PRIVATE -ffreestanding -fno-exceptions -fno-rtti)
+target_compile_options(bolt_rt_instr PRIVATE -ffreestanding -fno-exceptions -fno-rtti -fPIE)
target_include_directories(bolt_rt_instr PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
target_compile_options(bolt_rt_hugify PRIVATE -ffreestanding -fno-exceptions -fno-rtti)
target_include_directories(bolt_rt_hugify PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
diff --git a/bolt/runtime/instr.cpp b/bolt/runtime/instr.cpp
index 3ad7a8cca6ff..bab860f26274 100644
--- a/bolt/runtime/instr.cpp
+++ b/bolt/runtime/instr.cpp
@@ -102,10 +102,10 @@ extern bool __bolt_instr_use_pid;
// TODO: We need better linking support to make that happen.
extern void (*__bolt_trampoline_ind_call)();
extern void (*__bolt_trampoline_ind_tailcall)();
-// Function pointers to init/fini routines in the binary, so we can resume
-// regular execution of these functions that we hooked
-extern void (*__bolt_instr_init_ptr)();
-extern void (*__bolt_instr_fini_ptr)();
+// Function pointers to init/fini trampoline routines in the binary, so we can
+// resume regular execution of these functions that we hooked
+extern void (*__bolt_start_trampoline)();
+extern void (*__bolt_fini_trampoline)();
#endif
@@ -1366,7 +1366,8 @@ extern "C" void __bolt_instr_clear_counters() {
/// call this function directly to get your profile written to disk
/// on demand.
///
-extern "C" void __bolt_instr_data_dump() {
+extern "C" void __attribute((force_align_arg_pointer))
+__bolt_instr_data_dump() {
// Already dumping
if (!GlobalWriteProfileMutex->acquire())
return;
@@ -1451,7 +1452,7 @@ extern "C" void __bolt_instr_indirect_call();
extern "C" void __bolt_instr_indirect_tailcall();
/// Initialization code
-extern "C" void __bolt_instr_setup() {
+extern "C" void __attribute((force_align_arg_pointer)) __bolt_instr_setup() {
const uint64_t CountersStart =
reinterpret_cast<uint64_t>(&__bolt_instr_locations[0]);
const uint64_t CountersEnd = alignTo(
@@ -1526,13 +1527,16 @@ extern "C" __attribute((naked)) void __bolt_instr_start()
__asm__ __volatile__(SAVE_ALL
"call __bolt_instr_setup\n"
RESTORE_ALL
- "jmp *__bolt_instr_init_ptr(%%rip)\n"
+ "jmp __bolt_start_trampoline\n"
:::);
}
/// This is hooking into ELF's DT_FINI
extern "C" void __bolt_instr_fini() {
- __bolt_instr_fini_ptr();
+ // Currently using assembly inline for trampoline function call
+ // due to issues with function pointer dereferencing in case of
+ // C function call.
+ __asm__ __volatile__("call __bolt_fini_trampoline\n" :::);
if (__bolt_instr_sleep_time == 0)
__bolt_instr_data_dump();
DEBUG(report("Finished.\n"));