/* -------------------------------------------------------------------------- * PEi386(+) specifics (Win32 targets) * ------------------------------------------------------------------------*/ /* The information for this linker comes from Microsoft Portable Executable and Common Object File Format Specification revision 8.3 February 2013 It can be found online at: https://msdn.microsoft.com/en-us/windows/hardware/gg463119.aspx Things move, so if that fails, try searching for it via http://www.google.com/search?q=PE+COFF+specification The ultimate reference for the PE format is the Winnt.h header file that comes with the Platform SDKs; as always, implementations will drift wrt their documentation. A good background article on the PE format is Matt Pietrek's March 1994 article in Microsoft System Journal (MSJ) (Vol.9, No. 3): "Peering Inside the PE: A Tour of the Win32 Portable Executable File Format." The info in there has recently been updated in a two part article in MSDN magazine, issues Feb and March 2002, "Inside Windows: An In-Depth Look into the Win32 Portable Executable File Format" John Levine's book "Linkers and Loaders" contains useful info on PE too. The PE specification doesn't specify how to do the actual relocations. For this reason, and because both PE and ELF are based on COFF, the relocations for the PEi386+ code is based on the ELF relocations for the equivalent relocation type. The ELF ABI can be found at http://www.x86-64.org/documentation/abi.pdf The current code is based on version 0.99.6 - October 2013 The current GHCi linker supports the following four object file formats: * PE/PE+ obj - The normal COFF_ANON_OBJ format which is generated by default from Windows compilers * PE/PE+ big-obj - The big object format COFF_ANON_BIG_OBJ which extends the number of sections to 2^31 and the number of symbols in each section. This requires a flag but all Windows compilers can produce it. * PE Import format - The import library format defined in the PE standard COFF_IMPORT_LIB and commonly has the file extension .lib * GNU BFD import format - The import library format defined and used by GNU tools and commonly has the file extension .dll.a . See note below. Note [The need for import libraries] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In its original incarnation, PE had no native support for dynamic linking. Let's examine how dynamic linking is now implemented. Consider a simple program with a reference to function and data symbols provided by a DLL: // myprogram.c #include int do_something() { libfoo_function(); return libfoo_data; } The header file shipped with libfoo will look like the following: // libfoo.h __declspec(dllimport) int libfoo_function(); __declspec(dllimport) int libfoo_data; When the C compiler is compiling myprogram.c, it will see these dllimport declarations and use them to produce a module definition (.def) file which summarizes the symbols that we expect the DLL to export. This will look like: EXPORTS libfoo_function libfoo_data DATA The C compiler will pass this file to the `dlltool` utility, which will generate an *import library*. The import library will contain placeholder symbols (with names starting with `__imp_`), along with instructions for the dynamic linker to fix-up these references to point to the "real" symbol definition. For historical reasons involving lack of documentation, NDAs, and (probably) Steve Balmer, there are two flavours of import flavours: * Native Windows-style import libraries. These typically bear the .lib file extension and encode their relocation information in the `.idata` section. Documentation for this format is not available [here](https://docs.microsoft.com/en-us/windows/win32/debug/pe-format#import-library-format). These are handled in `checkAndLoadImportLibrary()` * GNU BFD-style import libraries. These typically have the .dll.a extension and encode the relocation information in a set of sections named `.idata$` where `` is an integer which encodes the section's meaning. Somewhat ironically, despite being devised in response to the native Windows format having no public documentation, there is no official documentation for this format but Note [BFD import library] attempts to summarize what we know. These are handled in `ocGetNames_PEi386()`. Note [BFD import library] ~~~~~~~~~~~~~~~~~~~~~~~~~ On Windows, compilers don't link directly to dynamic libraries. The reason for this is that the exports are not always by symbol, the Import Address Table (IAT) also allows exports by ordinal number or raw addresses. So to solve the linking issue, import libraries were added. Import libraries can be seen as a specification of how to link implicitly against a dynamic library. As a side note, import libraries are also the mechanism which can be used to break mutual dependencies between shared libraries and to implement delay loading or override the location of a shared library at startup. Linkers use these import libraries to populate the IAT of the resulting binary. At startup the system dynamic loader processes the IAT entries and populates the symbols with the correct addresses. Anyway, the Windows PE format specifies a simple and efficient format for this: It's essentially a list, saying these X symbols can be found in DLL y. Commonly, y is a versioned name. e.g. `liby_43.dll`. This is an artifact of the days when Windows did not support side-by-side assemblies. So the solution was to version the DLLs by renaming them to include explicit version numbers, and to then use the import libraries to point to the right version, having the linker do the leg work. The format in the PE specification is commonly named using the suffix .lib. Unfortunately, GCC/binutils decided not to implement this format, and instead have created their own format. This format is either named using the suffix .dll.a or .a depending on the tool that makes them. This format is undocumented. However the source of dlltool.c in binutils is pretty handy to understand it (see binutils/dlltool.c; grep for ".idata section description"). To understand the implementation in GHC, this is what is important: The import library is generally an archive containing one object file for each imported symbol. In addition, there is a "head" object, which contains the name of the DLL which the symbols are imported from, among other things. The `.idata$` section group is used to hold this information. An import library object file will always have these section groups, but the specific configuration depends on what the purpose of the file is. They will also never have a CODE or DATA section, though depending on the tool that creates them they may have the section headers, which will mostly be empty. The import data sections consist of the following: * `.idata$2` contains the Import Directory Table (IDT), which contains an entry for each imported DLL. Each entry contains: a reference to the DLL's name (in `.idata$7`) and references to its entries in the ILT and IAT sections. This is contained in the head object. * `.idata$6` contains the Hint Name Table (HNT). This is a table of of (symbol ordinal, symbol name) pairs, which are referred to be the ILT and IAT as described below. * `.idata$5` contains the Import Address Table (IAT). This consists of an array of pointers (one array for each imported DLL) which the loader will update to point to the target symbol identified by the hint referenced by the corresponding ILT entry. Moreover, the IAT pointers' initial values also point to the corresponding HNT entry. * `.idata$4` contains the Import Lookup Table (ILT). This contains an array of references to HNT entries for each imported DLL. * `.idata$7` contains the names of the imported DLLs. This is contained in the head object. You have two different possible configurations: 1) Those that define a redirection. In this case the `.idata$7` section will contain the name of the actual dll to load. This will be the only content of the section. In the symbol table, the last symbol will be the name used to refer to the dll in the relocation tables. This name will always be in the format `symbol_name_iname`, however when referred to, the format `_head_symbol_name` is used. We record this symbol early on during `ocGetNames` and load the dll and use the module handle as the symbol address. 2) Symbol definitions. In this case the HNT (`.idata$6`) will contain the symbol to load. This is stored in the fixed format of 2-byte ordinals followed by (null-terminated) symbol name. The ordinal is to be used when the DLL does not export symbols by name. (note: We don't currently support this in the runtime linker, but it's easy to add should it be needed). The last symbol in the symbol table of the section will contain the name symbol which contains the dll name to use to resolve the reference. As a technicality, this also means that the GCC format will allow us to use one library to store references to multiple dlls. This can't be produced by dlltool, but it can be combined using ar. This is an important feature required for dynamic linking support for GHC. So the runtime linker now supports this too. Example: Dynamic code references -------------------------------- To see what such an import library looks like, let's first start with the case of a function (e.g. `libfoo_function` above) with bind-now semantics (lazy-loading will look much different). The import library will contain the following: .section .text # This stub (which Windows calls a thunk) is what calls to # libfoo_function will hit if the symbol isn't declared with # __declspec(dllimport) libfoo_function: jmp *0x0(%rip) .quad __imp_libfoo_function .section .idata$5 # IAT # This is the location which the loader will # update to point to the definition # of libfoo_function __imp_libfoo_function: .quad hint1 - __image_base__ .section .idata$4 # ILT # This (and hint1 below) is what tells the # loader where __imp_libfoo_function should point ilt1: .quad hint1 - __image_base__ .section .idata$6 # HNT hint1: .short ORDINAL_OF_libfoo_function .asciiz "libfoo_function" To handle a reference to an IAT entry like `__imp_libfoo_function`, the GHC linker will (in `lookupSymbolInDLLs`) first strip off the `__imp_` prefix to find the name of the referenced dynamic symbol. It then resolves the symbol's address and allocates an `IndirectAddr` where it can place the address, which it will return as the resolution of the `___libfoo_function`. Example: Dynamic data references -------------------------------- Let's now consider the import library for a data symbol. This is essentially equivalent to the code case, but without the need to emit a thunk: .section .idata$5 # IAT __imp_libfoo_data: .quad hint2 - __image_base__ .section .idata$4 # ILT ilt2: .quad hint2 - __image_base__ .section .idata$6 # ILT hint2: .short ORDINAL_OF_libfoo_data .asciiz "libfoo_data" Note [Memory allocation] ~~~~~~~~~~~~~~~~~~~~~~~~ The loading of an object begins in `preloadObjectFile`, which allocates a buffer, `oc->image`, into which the object file is read. It then calls `ocVerifyImage`, where we traverse the object file's header and populate `ObjectCode.sections`. Specifically, we create a Section for each of the object's sections such that: * the `.start` field points to its data in the mapped image * the `.size` field reflects its intended size * the .`info` field contains a `SectionFormatField` with other information from its section header entry (namely `VirtualSize`, `VirtualAddress`, and `Characteristics`) We then proceed to `ocGetNames`, where we again walk the section table header and determine which sections need to be mapped and how (e.g. as readable-writable or readable-executable). We then allocate memory for each section using the appropriate m32 allocator and, where necessary, copy the data from `section.start` (which points to the section in `oc->image`) into the new allocation. Finally, `addSection()` updates the `section.start` field to reflect the section's new home. In addition, we also allocate space for the global BSS section. At this point we have no further need for the preloaded image buffer, `oc->image` and therefore free it. Having populated the sections, we can proceed to add the object's symbols to the symbol table. This is a matter of walking the object file's symbol table, computing the symbol's address, and calling `ghciInsertSymbolTable`. Finally, we enter `ocResolve`, where we resolve relocations and and allocate jump islands (using the m32 allocator for backing storage) as necessary. */ #include "Rts.h" #if defined(x86_64_HOST_ARCH) #define USED_IF_x86_64_HOST_ARCH /* Nothing */ #else #define USED_IF_x86_64_HOST_ARCH STG_UNUSED #endif #if defined(mingw32_HOST_OS) #include "RtsUtils.h" #include "RtsSymbolInfo.h" #include "GetEnv.h" #include "CheckUnload.h" #include "LinkerInternals.h" #include "linker/PEi386.h" #include "linker/PEi386Types.h" #include "linker/SymbolExtras.h" #include #include /* SHGetFolderPathW */ #include #include #include #include #include #include #include #include #if defined(x86_64_HOST_ARCH) static size_t makeSymbolExtra_PEi386( ObjectCode* oc, uint64_t index, size_t s, SymbolName* symbol, SymType sym_type); #endif static void addDLLHandle( pathchar* dll_name, HINSTANCE instance); static bool verifyCOFFHeader( uint16_t machine, IMAGE_FILE_HEADER *hdr, pathchar *fileName); static bool checkIfDllLoaded( HINSTANCE instance); static uint32_t getSectionAlignment( Section section); static size_t getAlignedValue( size_t value, Section section); static void releaseOcInfo( ObjectCode* oc); static SymbolAddr *lookupSymbolInDLLs ( const SymbolName* lbl, ObjectCode *dependent ); const Alignments pe_alignments[] = { { IMAGE_SCN_ALIGN_1BYTES , 1 }, { IMAGE_SCN_ALIGN_2BYTES , 2 }, { IMAGE_SCN_ALIGN_4BYTES , 4 }, { IMAGE_SCN_ALIGN_8BYTES , 8 }, { IMAGE_SCN_ALIGN_16BYTES , 16 }, { IMAGE_SCN_ALIGN_32BYTES , 32 }, { IMAGE_SCN_ALIGN_64BYTES , 64 }, { IMAGE_SCN_ALIGN_128BYTES , 128 }, { IMAGE_SCN_ALIGN_256BYTES , 256 }, { IMAGE_SCN_ALIGN_512BYTES , 512 }, { IMAGE_SCN_ALIGN_1024BYTES, 1024}, { IMAGE_SCN_ALIGN_2048BYTES, 2048}, { IMAGE_SCN_ALIGN_4096BYTES, 4096}, { IMAGE_SCN_ALIGN_8192BYTES, 8192}, }; const int pe_alignments_cnt = sizeof (pe_alignments) / sizeof (Alignments); const int default_alignment = 8; /* See Note [_iob_func symbol] In order to emulate __iob_func the memory location needs to point the location of the I/O structures in memory. As such we need RODATA to contain the pointer as a redirect. Essentially it's a DATA DLL reference. */ const void* __rts_iob_func = (void*)&__acrt_iob_func; void initLinker_PEi386() { if (!ghciInsertSymbolTable(WSTR("(GHCi/Ld special symbols)"), symhash, "__image_base__", GetModuleHandleW (NULL), HS_BOOL_TRUE, SYM_TYPE_CODE, NULL)) { barf("ghciInsertSymbolTable failed"); } #if defined(mingw32_HOST_OS) addDLLHandle(WSTR("*.exe"), GetModuleHandle(NULL)); #endif /* Register the cleanup routine as an exit handler, this gives other exit handlers a chance to run which may need linker information. Exit handlers are ran in reverse registration order so this needs to be before the linker loads anything. */ atexit (exitLinker_PEi386); } void exitLinker_PEi386() { } /* A list thereof. */ static OpenedDLL* opened_dlls = NULL; /* Adds a DLL instance to the list of DLLs in which to search for symbols. */ static void addDLLHandle(pathchar* dll_name, HINSTANCE instance) { /* At this point, we actually know what was loaded. So bail out if it's already been loaded. */ if (checkIfDllLoaded(instance)) { return; } OpenedDLL* o_dll; o_dll = stgMallocBytes( sizeof(OpenedDLL), "addDLLHandle" ); o_dll->name = dll_name ? pathdup(dll_name) : NULL; o_dll->instance = instance; o_dll->next = opened_dlls; opened_dlls = o_dll; /* Now discover the dependencies of dll_name that were just loaded in our process space. The reason is we have access to them without the user having to explicitly specify them. */ PIMAGE_NT_HEADERS header = (PIMAGE_NT_HEADERS)((BYTE *)instance + ((PIMAGE_DOS_HEADER)instance)->e_lfanew); PIMAGE_IMPORT_DESCRIPTOR imports = (PIMAGE_IMPORT_DESCRIPTOR)((BYTE *)instance + header-> OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT].VirtualAddress); bool importTableMissing = header->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT].Size == 0; if (importTableMissing) { return; } /* Ignore these compatibility shims. */ const pathchar* ms_dll = WSTR("api-ms-win-"); const int len = wcslen(ms_dll); do { pathchar* module = mkPath((char*)(BYTE *)instance + imports->Name); HINSTANCE module_instance = GetModuleHandleW(module); if (0 != wcsncmp(module, ms_dll, len) && module_instance && !checkIfDllLoaded(module_instance)) { IF_DEBUG(linker, debugBelch("Loading dependency %" PATH_FMT " -> %" PATH_FMT ".\n", dll_name, module)); /* Now recursively load dependencies too. */ addDLLHandle(module, module_instance); } stgFree(module); imports++; } while (imports->Name); } static OpenedDLL* findLoadedDll(HINSTANCE instance) { for (OpenedDLL* o_dll = opened_dlls; o_dll != NULL; o_dll = o_dll->next) { if (o_dll->instance == instance) { return o_dll; } } return NULL; } static bool checkIfDllLoaded(HINSTANCE instance) { return findLoadedDll (instance) != NULL; } void freePreloadObjectFile_PEi386(ObjectCode *oc) { if (oc->image) { stgFree (oc->image); oc->image = NULL; } if (oc->info) { /* Release the unwinder information. See Note [Exception Unwinding]. */ if (oc->info->xdata) { if (!RtlDeleteFunctionTable (oc->info->xdata->start)) debugBelch ("Unable to remove Exception handlers for %" PATH_FMT "\n", oc->fileName); oc->info->xdata = NULL; oc->info->pdata = NULL; } if (oc->info->ch_info) { stgFree (oc->info->ch_info); } stgFree (oc->info); oc->info = NULL; } } static void releaseOcInfo(ObjectCode* oc) { if (!oc) return; if (oc->info) { stgFree (oc->info->ch_info); stgFree (oc->info->str_tab); stgFree (oc->info->symbols); stgFree (oc->info); oc->info = NULL; } for (int i = 0; i < oc->n_sections; i++){ Section *section = &oc->sections[i]; if (section->info) { stgFree (section->info->name); if (section->info->relocs) { stgFree (section->info->relocs); section->info->relocs = NULL; } stgFree (section->info); section->info = NULL; } } } /************* * This function determines what kind of COFF image we are dealing with. * This is needed in order to correctly load and verify objects and their * sections. *************/ COFF_OBJ_TYPE getObjectType ( char* image, pathchar* fileName ) { /* {D1BAA1C7-BAEE-4ba9-AF20-FAF66AA4DCB8} */ static const char header_bigobj_classid[16] = { 0xC7, 0xA1, 0xBA, 0xD1, 0xEE, 0xBA, 0xa9, 0x4b, 0xAF, 0x20, 0xFA, 0xF6, 0x6A, 0xA4, 0xDC, 0xB8 }; WORD machine; COFF_OBJ_TYPE ret = COFF_UNKNOWN; /* First check if we have an ANON_OBJECT_HEADER signature. */ ANON_OBJECT_HEADER* anon = (ANON_OBJECT_HEADER*)image; if ( anon->Sig1 == IMAGE_FILE_MACHINE_UNKNOWN && anon->Sig2 == IMPORT_OBJECT_HDR_SIG2) { machine = anon->Machine; if (verifyCOFFHeader (machine, NULL, fileName)) { switch (anon->Version) { case 0: ret = COFF_IMPORT_LIB; break; case 1: ret = COFF_ANON_OBJ; break; case 2: if (memcmp (&anon->ClassID, header_bigobj_classid, 16) == 0) ret = COFF_ANON_BIG_OBJ; break; default: break; } } } else { /* If it's not an ANON_OBJECT then try an image file. */ IMAGE_FILE_HEADER* img = (IMAGE_FILE_HEADER*)image; machine = img->Machine; if (verifyCOFFHeader (machine, img, fileName)) ret = COFF_IMAGE; } return ret; } /************* * Retrieve common header information *************/ COFF_HEADER_INFO* getHeaderInfo ( ObjectCode* oc ) { COFF_OBJ_TYPE coff_type = getObjectType (oc->image, OC_INFORMATIVE_FILENAME(oc)); COFF_HEADER_INFO* info = stgMallocBytes (sizeof(COFF_HEADER_INFO), "getHeaderInfo"); memset (info, 0, sizeof(COFF_HEADER_INFO)); info->type = coff_type; switch (coff_type) { case COFF_IMAGE: { IMAGE_FILE_HEADER* hdr = (IMAGE_FILE_HEADER*)oc->image; info->sizeOfHeader = sizeof(IMAGE_FILE_HEADER); info->sizeOfOptionalHeader = hdr->SizeOfOptionalHeader; info->pointerToSymbolTable = hdr->PointerToSymbolTable; info->numberOfSymbols = hdr->NumberOfSymbols; info->numberOfSections = hdr->NumberOfSections; } break; case COFF_ANON_BIG_OBJ: { ANON_OBJECT_HEADER_BIGOBJ* hdr = (ANON_OBJECT_HEADER_BIGOBJ*)oc->image; info->sizeOfHeader = sizeof(ANON_OBJECT_HEADER_BIGOBJ); info->sizeOfOptionalHeader = 0; info->pointerToSymbolTable = hdr->PointerToSymbolTable; info->numberOfSymbols = hdr->NumberOfSymbols; info->numberOfSections = hdr->NumberOfSections; } break; default: { stgFree (info); info = NULL; errorBelch ("Unknown COFF %d type in getHeaderInfo.", coff_type); } break; } return info; } /************* * Symbol utility functions *************/ __attribute__ ((always_inline)) inline size_t getSymbolSize ( COFF_HEADER_INFO *info ) { ASSERT(info); switch (info->type) { case COFF_ANON_BIG_OBJ: return sizeof_COFF_symbol_ex; default: return sizeof_COFF_symbol_og; } } __attribute__ ((always_inline)) inline int32_t getSymSectionNumber ( COFF_HEADER_INFO *info, COFF_symbol* sym ) { ASSERT(info); ASSERT(sym); switch (info->type) { case COFF_ANON_BIG_OBJ: return sym->ex.SectionNumber; default: return sym->og.SectionNumber; } } __attribute__ ((always_inline)) inline uint32_t getSymValue ( COFF_HEADER_INFO *info, COFF_symbol* sym ) { ASSERT(info); ASSERT(sym); switch (info->type) { case COFF_ANON_BIG_OBJ: return sym->ex.Value; default: return sym->og.Value; } } __attribute__ ((always_inline)) inline uint8_t getSymStorageClass ( COFF_HEADER_INFO *info, COFF_symbol* sym ) { ASSERT(info); ASSERT(sym); switch (info->type) { case COFF_ANON_BIG_OBJ: return sym->ex.StorageClass; default: return sym->og.StorageClass; } } __attribute__ ((always_inline)) inline uint8_t getSymNumberOfAuxSymbols ( COFF_HEADER_INFO *info, COFF_symbol* sym ) { ASSERT(info); ASSERT(sym); switch (info->type) { case COFF_ANON_BIG_OBJ: return sym->ex.NumberOfAuxSymbols; default: return sym->og.NumberOfAuxSymbols; } } __attribute__ ((always_inline)) inline uint16_t getSymType ( COFF_HEADER_INFO *info, COFF_symbol* sym ) { ASSERT(info); ASSERT(sym); switch (info->type) { case COFF_ANON_BIG_OBJ: return sym->ex.Type; default: return sym->og.Type; } } __attribute__ ((always_inline)) inline uint8_t* getSymShortName ( COFF_HEADER_INFO *info, COFF_symbol* sym ) { ASSERT(info); ASSERT(sym); switch (info->type) { case COFF_ANON_BIG_OBJ: return sym->ex.N.ShortName; default: return sym->og.N.ShortName; } } const char * addDLL_PEi386( pathchar *dll_name, HINSTANCE *loaded ) { /* ------------------- Win32 DLL loader ------------------- */ pathchar* buf; HINSTANCE instance; IF_DEBUG(linker, debugBelch("addDLL; dll_name = `%" PATH_FMT "'\n", dll_name)); /* The file name has no suffix (yet) so that we can try both foo.dll and foo.drv The documentation for LoadLibrary says: If no file name extension is specified in the lpFileName parameter, the default library extension .dll is appended. However, the file name string can include a trailing point character (.) to indicate that the module name has no extension. */ size_t bufsize = pathlen(dll_name) + 10; buf = stgMallocBytes(bufsize * sizeof(wchar_t), "addDLL"); /* These are ordered by probability of success and order we'd like them. */ const wchar_t *formats[] = { L"%ls.DLL", L"%ls.DRV", L"lib%ls.DLL", L"%ls" }; const DWORD flags[] = { LOAD_LIBRARY_SEARCH_USER_DIRS | LOAD_LIBRARY_SEARCH_DEFAULT_DIRS, 0 }; int cFormat, cFlag; int flags_start = 1; /* Assume we don't support the new API. */ /* Detect if newer API are available, if not, skip the first flags entry. */ if (GetProcAddress((HMODULE)LoadLibraryW(L"Kernel32.DLL"), "AddDllDirectory")) { flags_start = 0; } /* Iterate through the possible flags and formats. */ for (cFlag = flags_start; cFlag < 2; cFlag++) { for (cFormat = 0; cFormat < 4; cFormat++) { snwprintf(buf, bufsize, formats[cFormat], dll_name); instance = LoadLibraryExW(buf, NULL, flags[cFlag]); if (instance == NULL) { if (GetLastError() != ERROR_MOD_NOT_FOUND) { goto error; } } else { break; /* We're done. DLL has been loaded. */ } } } /* Check if we managed to load the DLL. */ if (instance == NULL) { goto error; } addDLLHandle(buf, instance); if (loaded) { *loaded = instance; } stgFree(buf); return NULL; error: stgFree(buf); char* errormsg = stgMallocBytes(sizeof(char) * 80, "addDLL_PEi386"); snprintf(errormsg, 80, "addDLL: %" PATH_FMT " or dependencies not loaded. (Win32 error %lu)", dll_name, GetLastError()); /* LoadLibrary failed; return a ptr to the error msg. */ return errormsg; } pathchar* findSystemLibrary_PEi386( pathchar* dll_name ) { const unsigned int init_buf_size = 1024; unsigned int bufsize = init_buf_size; wchar_t* result = stgMallocBytes(sizeof(wchar_t) * bufsize, "findSystemLibrary_PEi386"); DWORD wResult = SearchPathW(NULL, dll_name, NULL, bufsize, result, NULL); if (wResult > bufsize) { result = realloc(result, sizeof(wchar_t) * wResult); wResult = SearchPathW(NULL, dll_name, NULL, wResult, result, NULL); } if (!wResult) { stgFree(result); return NULL; } return result; } HsPtr addLibrarySearchPath_PEi386(pathchar* dll_path) { // Make sure the path is an absolute path in UNC-style to ensure that we // aren't subject to the MAX_PATH restriction. See #21059. wchar_t *abs_path = __rts_create_device_name(dll_path); HsPtr result = AddDllDirectory(abs_path); if (!result) { sysErrorBelch("addLibrarySearchPath: %" PATH_FMT " (Win32 error %lu)", abs_path, GetLastError()); stgFree(abs_path); return NULL; } stgFree(abs_path); return result; } bool removeLibrarySearchPath_PEi386(HsPtr dll_path_index) { bool result = false; if (dll_path_index != NULL) { result = RemoveDllDirectory(dll_path_index); // dll_path_index is now invalid, do not use it after this point. if (!result) { sysErrorBelch("removeLibrarySearchPath: (Win32 error %lu)", GetLastError()); return false; } } return !result; } /* We assume file pointer is right at the beginning of COFF object. */ static uint32_t getSectionAlignment( Section section) { uint32_t c = section.info->props; for(int i = 0; i < pe_alignments_cnt; i++) { if ((c & 0xF00000) == pe_alignments[i].mask) return pe_alignments[i].value; } /* No alignment flag found, assume 8-byte aligned. */ return default_alignment; } /* ---------------------- * return a value aligned to the section requirements */ static size_t getAlignedValue( size_t value, Section section) { uint32_t alignment = getSectionAlignment(section); uint32_t mask = (uint32_t)alignment - 1; return (size_t)((value + mask) & ~mask); } /* ----------------------- * This loads import libraries following Microsoft's official standard in the PE * documentation. This is a smaller more efficient format which is just a list * of symbol name => dll. * * This function must fail gracefully and if it does, the filestream needs to * be reset to what it was when the function was called. */ bool checkAndLoadImportLibrary( pathchar* arch_name, char* member_name, FILE* f ) { char* image; static bool load_dll_warn = false; if (load_dll_warn) { return 0; } /* Based on Import Library specification. PE Spec section 7.1 */ COFF_import_header hdr; size_t n; n = fread(&hdr, 1, sizeof_COFF_import_Header, f); if (n != sizeof_COFF_import_Header) { errorBelch("loadImportLibrary: error whilst reading `%s' header " "in `%" PATH_FMT "'\n", member_name, arch_name); fseek(f, -(long int)sizeof_COFF_import_Header, SEEK_CUR); return false; } if ( hdr.Sig1 != IMAGE_FILE_MACHINE_UNKNOWN || hdr.Sig2 != IMPORT_OBJECT_HDR_SIG2 || getObjectType ((char*)&hdr, arch_name) != COFF_IMPORT_LIB) { fseek(f, -(long int)sizeof_COFF_import_Header, SEEK_CUR); IF_DEBUG(linker, debugBelch("loadArchive: Object `%s` is not an import lib. Skipping...\n", member_name)); return false; } IF_DEBUG(linker, debugBelch("loadArchive: reading %lu bytes at %ld\n", hdr.SizeOfData, ftell(f))); image = stgMallocBytes(hdr.SizeOfData, "checkAndLoadImportLibrary(image)"); n = fread(image, 1, hdr.SizeOfData, f); if (n != hdr.SizeOfData) { errorBelch("loadArchive: error whilst reading `%s' header in `%" PATH_FMT "'. Did not read enough bytes.\n", member_name, arch_name); fseek(f, -(n + sizeof_COFF_import_Header), SEEK_CUR); return false; } char* symbol = strtok(image, "\0"); int symLen = strlen(symbol) + 1; int nameLen = n - symLen; char* dllName = stgMallocBytes(sizeof(char) * nameLen, "checkAndLoadImportLibrary(dllname)"); dllName = strncpy(dllName, image + symLen, nameLen); pathchar* dll = stgMallocBytes(sizeof(wchar_t) * nameLen, "checkAndLoadImportLibrary(dll)"); mbstowcs(dll, dllName, nameLen); stgFree(dllName); IF_DEBUG(linker, debugBelch("loadArchive: read symbol %s from lib `%" PATH_FMT "'\n", symbol, dll)); const char* result = addDLL(dll); stgFree(image); if (result != NULL) { errorBelch("Could not load `%" PATH_FMT "'. Reason: %s\n", dll, result); load_dll_warn = true; stgFree(dll); fseek(f, -(n + sizeof_COFF_import_Header), SEEK_CUR); return false; } stgFree(dll); return true; } static void printName ( uint8_t* name, ObjectCode* oc ) { if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) { uint32_t strtab_offset = * (uint32_t*)(name + 4); debugBelch("%s", oc->info->str_tab + strtab_offset - PEi386_STRTAB_OFFSET); } else { int i; for (i = 0; i < 8; i++) { if (name[i] == 0) break; debugBelch("%c", name[i] ); } } } static void copyName ( uint8_t* name, ObjectCode* oc, uint8_t* dst, int dstSize ) { if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) { uint32_t strtab_offset = * (uint32_t*)(name + 4); strncpy ((char*)dst, oc->info->str_tab + strtab_offset - PEi386_STRTAB_OFFSET, dstSize); dst[dstSize-1] = 0; } else { int i = 0; while (1) { if (i >= 8) break; if (name[i] == 0) break; dst[i] = name[i]; i++; } dst[i] = 0; } } char* get_sym_name ( uint8_t* name, ObjectCode* oc ) { char* newstr; /* If the string is longer than 8 bytes, look in the string table for it -- this will be correctly zero terminated. */ if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) { uint32_t strtab_offset = * (uint32_t*)(name + 4); return oc->info->str_tab + strtab_offset - PEi386_STRTAB_OFFSET; } /* Otherwise, if shorter than 8 bytes, return the original, which by defn is correctly terminated. */ if (name[7]==0) return (char*)name; /* The annoying case: 8 bytes. Copy into a temporary (XXX which is never freed ...) */ newstr = stgMallocBytes(9, "get_sym_name"); ASSERT(newstr); strncpy (newstr, (char*)name,8); newstr[8] = 0; return newstr; } /* Getting the name of a section is mildly tricky, so we make a function for it. Sadly, in one case we have to copy the string (when it is exactly 8 bytes long there's no trailing '\0'), so for consistency we *always* copy the string; the caller must free it */ char * get_name_string (uint8_t* name, ObjectCode* oc) { char *newstr; if (name[0]=='/') { int strtab_offset = strtol((char*)name+1,NULL,10)-PEi386_STRTAB_OFFSET; char* str = oc->info->str_tab + strtab_offset; int len = strlen(str); newstr = stgMallocBytes(len + 1, "cstring_from_section_symbol_name"); strncpy(newstr, str, len + 1); return newstr; } else { newstr = stgMallocBytes(9, "cstring_from_section_symbol_name"); ASSERT(newstr); strncpy(newstr,(char*)name,8); newstr[8] = 0; return newstr; } } /* See Note [mingw-w64 name decoration scheme] */ #if !defined(x86_64_HOST_ARCH) static void zapTrailingAtSign ( SymbolName* sym ) { char* lst = strrchr (sym, '@'); if (lst) lst[0]='\0'; } #endif SymbolAddr* lookupSymbolInDLLs ( const SymbolName* lbl, ObjectCode *dependent ) { OpenedDLL* o_dll; SymbolAddr* sym; for (o_dll = opened_dlls; o_dll != NULL; o_dll = o_dll->next) { /* debugBelch("look in %ls for %s\n", o_dll->name, lbl); */ sym = GetProcAddress(o_dll->instance, lbl+STRIP_LEADING_UNDERSCORE); if (sym != NULL) { /*debugBelch("found %s in %s\n", lbl+1,o_dll->name);*/ return sym; } // TODO: Drop this /* Ticket #2283. Long description: http://support.microsoft.com/kb/132044 tl;dr: If C/C++ compiler sees __declspec(dllimport) ... foo ... it generates call *__imp_foo, and __imp_foo here has exactly the same semantics as in __imp_foo = GetProcAddress(..., "foo") */ if (sym == NULL && strncmp (lbl, "__imp_", 6) == 0) { sym = GetProcAddress(o_dll->instance, lbl + 6 + STRIP_LEADING_UNDERSCORE); if (sym != NULL) { SymbolAddr** indirect = m32_alloc(dependent->rw_m32, sizeof(SymbolAddr*), 8); if (indirect == NULL) { barf("lookupSymbolInDLLs: Failed to allocation indirection"); } *indirect = sym; IF_DEBUG(linker, debugBelch("warning: %s from %S is linked instead of %s\n", lbl+6+STRIP_LEADING_UNDERSCORE, o_dll->name, lbl)); return (void*) indirect; } } sym = GetProcAddress(o_dll->instance, lbl); if (sym != NULL) { /*debugBelch("found %s in %s\n", lbl,o_dll->name);*/ return sym; } } return NULL; } static bool verifyCOFFHeader ( uint16_t machine, IMAGE_FILE_HEADER *hdr, pathchar *fileName ) { #if defined(i386_HOST_ARCH) if (machine != IMAGE_FILE_MACHINE_I386) { errorBelch("%" PATH_FMT ": Not a x86 PE file.", fileName); return false; } #elif defined(x86_64_HOST_ARCH) if (machine != IMAGE_FILE_MACHINE_AMD64) { errorBelch("%" PATH_FMT ": Not a x86_64 PE+ file.", fileName); return false; } #else errorBelch("PE/PE+ not supported on this arch."); #endif if (!hdr) return true; if (hdr->SizeOfOptionalHeader != 0) { errorBelch("%" PATH_FMT ": PE/PE+ with nonempty optional header", fileName); return 0; } if ( (hdr->Characteristics & IMAGE_FILE_EXECUTABLE_IMAGE) || (hdr->Characteristics & IMAGE_FILE_DLL ) || (hdr->Characteristics & IMAGE_FILE_SYSTEM ) ) { errorBelch("%" PATH_FMT ": Not a PE/PE+ object file", fileName); return false; } if ( (hdr->Characteristics & IMAGE_FILE_BYTES_REVERSED_HI)) { errorBelch("%" PATH_FMT ": Invalid PE/PE+ word size or endianness: %d", fileName, (int)(hdr->Characteristics)); return false; } return true; } bool ocVerifyImage_PEi386 ( ObjectCode* oc ) { COFF_HEADER_INFO *info = getHeaderInfo (oc); /* If the header could not be read, then don't process the ObjectCode. This the case when the ObjectCode has been partially freed. */ if (!info) return false; uint32_t i, noRelocs; COFF_section* sectab; COFF_symbol* symtab; uint8_t* strtab; sectab = (COFF_section*) ( ((uint8_t*)(oc->image)) + info->sizeOfHeader + info->sizeOfOptionalHeader ); symtab = (COFF_symbol*) ( ((uint8_t*)(oc->image)) + info->pointerToSymbolTable ); strtab = ((uint8_t*)symtab) + info->numberOfSymbols * getSymbolSize (info); /* .BSS Section is initialized in ocGetNames_PEi386 but we need the Sections array initialized here already. */ Section *sections; sections = (Section*)stgCallocBytes( sizeof(Section), info->numberOfSections + 1, /* +1 for the global BSS section see ocGetNames_PEi386 */ "ocVerifyImage_PEi386(sections)"); oc->sections = sections; oc->n_sections = info->numberOfSections + 1; oc->info = stgCallocBytes (sizeof(struct ObjectCodeFormatInfo), 1, "ocVerifyImage_PEi386(info)"); oc->info->init = NULL; oc->info->finit = NULL; oc->info->ch_info = info; /* Copy the tables over from object-file. Copying these allows us to simplify the indexing and to release the object file immediately after this step as all information we need would be in available. After loading we can also release everything in the info structure as it won't be needed again further freeing up memory. COFF_symbol is a union type, so we have to "adjust" the array to be able to access it using normal subscript notation. This eliminates the complex indexing later on. */ uint32_t s_symbols = info->numberOfSymbols * sizeof(COFF_symbol); uint32_t sym_size = getSymbolSize (info); oc->info->symbols = stgMallocBytes (s_symbols, "ocVerifyImage_PEi386(oc->info->symbols)"); for (i = 0; i < info->numberOfSymbols; i++) memcpy (oc->info->symbols+i, (char*)symtab + sym_size * i, sym_size); uint32_t n_strtab = (*(uint32_t*)strtab) - PEi386_STRTAB_OFFSET; oc->info->str_tab = stgMallocBytes (n_strtab, "ocVerifyImage_PEi386(oc->info->str_tab)"); memcpy (oc->info->str_tab, strtab + PEi386_STRTAB_OFFSET, n_strtab); /* Initialize the Sections */ for (i = 0; i < info->numberOfSections; i++) { uint32_t relocs_offset; COFF_section* sectab_i = (COFF_section*) myindex(sizeof_COFF_section, sectab, i); Section *section = §ions[i]; /* Calculate the start of the section data. */ section->start = oc->image + sectab_i->PointerToRawData; section->size = sectab_i->SizeOfRawData; section->info = stgCallocBytes (sizeof(struct SectionFormatInfo), 1, "ocVerifyImage_PEi386(section.info)"); section->info->name = get_name_string (sectab_i->Name, oc); section->info->alignment = getSectionAlignment (*section); section->info->props = sectab_i->Characteristics; section->info->virtualSize = sectab_i->Misc.VirtualSize; section->info->virtualAddr = sectab_i->VirtualAddress; COFF_reloc* reltab = (COFF_reloc*) (oc->image + sectab_i->PointerToRelocations); if (section->info->props & IMAGE_SCN_LNK_NRELOC_OVFL ) { /* If the relocation field (a short) has overflowed, the * real count can be found in the first reloc entry. * * See Section 4.1 (last para) of the PE spec (rev6.0). */ COFF_reloc* rel = (COFF_reloc*) myindex ( sizeof_COFF_reloc, reltab, 0 ); noRelocs = rel->VirtualAddress - 1; relocs_offset = 1; } else { noRelocs = sectab_i->NumberOfRelocations; relocs_offset = 0; } section->info->noRelocs = noRelocs; section->info->relocs = NULL; if (noRelocs > 0) { section->info->relocs = stgMallocBytes (noRelocs * sizeof (COFF_reloc), "ocVerifyImage_PEi386(section->info->relocs)"); memcpy (section->info->relocs, reltab + relocs_offset, noRelocs * sizeof (COFF_reloc)); } } /* Initialize the last section's info field which contains the .bss section, the .info of which will be initialized by ocGetNames. Discard the .info that we computed above. */ stgFree(sections[info->numberOfSections].info); sections[info->numberOfSections].info = NULL; /* No further verification after this point; only debug printing. */ i = 0; IF_DEBUG(linker, i=1); if (i == 0) return true; debugBelch("sectab offset = %" FMT_SizeT "\n", ((uint8_t*)sectab) - ((uint8_t*)oc->image) ); debugBelch("symtab offset = %" FMT_SizeT "\n", ((uint8_t*)symtab) - ((uint8_t*)oc->image) ); debugBelch("strtab offset = %" FMT_SizeT "\n", ((uint8_t*)strtab) - ((uint8_t*)oc->image) ); debugBelch("\n" ); if (info->type == COFF_IMAGE) { IMAGE_FILE_HEADER* hdr = (IMAGE_FILE_HEADER*)oc->image; debugBelch( "COFF Type: IMAGE_FILE_HEADER\n"); debugBelch( "Machine: 0x%x\n", (uint32_t)(hdr->Machine) ); debugBelch( "# sections: %d\n", (uint32_t)(hdr->NumberOfSections) ); debugBelch( "time/date: 0x%x\n", (uint32_t)(hdr->TimeDateStamp) ); debugBelch( "symtab offset: %d\n", (uint32_t)(hdr->PointerToSymbolTable) ); debugBelch( "# symbols: %d\n", (uint32_t)(hdr->NumberOfSymbols) ); debugBelch( "sz of opt hdr: %d\n", (uint32_t)(hdr->SizeOfOptionalHeader) ); debugBelch( "characteristics: 0x%x\n", (uint32_t)(hdr->Characteristics) ); } else if (info->type == COFF_ANON_BIG_OBJ) { ANON_OBJECT_HEADER_BIGOBJ* hdr = (ANON_OBJECT_HEADER_BIGOBJ*)oc->image; debugBelch( "COFF Type: ANON_OBJECT_HEADER_BIGOBJ\n"); debugBelch( "Machine: 0x%x\n", (uint32_t)(hdr->Machine) ); debugBelch( "# sections: %d\n", (uint32_t)(hdr->NumberOfSections) ); debugBelch( "time/date: 0x%x\n", (uint32_t)(hdr->TimeDateStamp) ); debugBelch( "symtab offset: %d\n", (uint32_t)(hdr->PointerToSymbolTable) ); debugBelch( "# symbols: %d\n", (uint32_t)(hdr->NumberOfSymbols) ); } else { debugBelch( "COFF Type: UNKNOWN\n"); return false; } i = 0; IF_DEBUG(linker_verbose, i=1); if (i == 0) return true; /* Print the section table. */ debugBelch("\n" ); for (i = 0; i < info->numberOfSections; i++) { COFF_section* sectab_i = (COFF_section*) myindex ( sizeof_COFF_section, sectab, i ); Section section = sections[i]; debugBelch( "\n" "section %d\n" " name `", i ); printName (sectab_i->Name, oc); debugBelch( "'\n" " vsize %lu\n" " vaddr %lu\n" " data sz %lu\n" " data off 0x%p\n" " num rel %hu\n" " off rel %lu\n" " ptr raw 0x%lx\n" " align %u\n" " data adj %zu\n", sectab_i->Misc.VirtualSize, sectab_i->VirtualAddress, sectab_i->SizeOfRawData, section.start, sectab_i->NumberOfRelocations, sectab_i->PointerToRelocations, sectab_i->PointerToRawData, getSectionAlignment (section), getAlignedValue (section.size, section) ); noRelocs = section.info->noRelocs; for (uint32_t j = 0; j < noRelocs; j++) { COFF_reloc rel = section.info->relocs[j]; debugBelch( " type 0x%-4x vaddr 0x%-8lx name `", rel.Type, rel.VirtualAddress ); COFF_symbol sym = oc->info->symbols[rel.SymbolTableIndex]; printName (getSymShortName (info, &sym), oc); debugBelch("'\n" ); } debugBelch("\n" ); } debugBelch("\n" ); debugBelch("string table has size 0x%x\n", n_strtab + PEi386_STRTAB_OFFSET); debugBelch("---START of string table---\n"); for (i = 4; i < n_strtab; i++) { if (strtab[i] == 0) debugBelch("\n"); else debugBelch("%c", strtab[i] ); } debugBelch("--- END of string table---\n"); debugBelch("\n" ); for (i = 0; i < info->numberOfSymbols; i++) { COFF_symbol* symtab_i = &oc->info->symbols[i]; debugBelch( "symbol %d\n" " name `", i ); printName (getSymShortName (info, symtab_i), oc); debugBelch( "'\n" " value 0x%x\n" " 1+sec# %d\n" " type 0x%x\n" " sclass 0x%x\n" " nAux %d\n", getSymValue (info, symtab_i), getSymSectionNumber (info, symtab_i), getSymType (info, symtab_i), getSymStorageClass (info, symtab_i), getSymNumberOfAuxSymbols (info, symtab_i) ); i += getSymNumberOfAuxSymbols (info, symtab_i); } debugBelch("\n" ); return true; } bool ocGetNames_PEi386 ( ObjectCode* oc ) { bool has_code_section = false; COFF_HEADER_INFO *info = oc->info->ch_info; /* Copy section information into the ObjectCode. */ for (unsigned int i = 0; i < info->numberOfSections; i++) { /* By default consider all section as CODE or DATA, which means we want to load them. */ SectionKind kind = SECTIONKIND_CODE_OR_RODATA; Section *section = &oc->sections[i]; uint32_t alignment = getSectionAlignment(*section); // These will be computed below and determine how we will handle the // section size_t sz = section->size; bool do_copy = true; bool do_zero = false; IF_DEBUG(linker, debugBelch("section name = %s (%x)\n", section->info->name, section->info->props )); /* The PE file section flag indicates whether the section contains code or data. */ if (section->info->props & IMAGE_SCN_CNT_CODE) { has_code_section = has_code_section || section->size > 0; kind = SECTIONKIND_CODE_OR_RODATA; } if (section->info->props & IMAGE_SCN_MEM_WRITE) { kind = SECTIONKIND_RWDATA; } /* Check next if it contains any uninitialized data */ if (section->info->props & IMAGE_SCN_CNT_UNINITIALIZED_DATA) { kind = SECTIONKIND_RWDATA; do_copy = false; } /* Finally check if it can be discarded. This will also ignore .debug sections */ if ( section->info->props & IMAGE_SCN_MEM_DISCARDABLE || section->info->props & IMAGE_SCN_LNK_REMOVE) { kind = SECTIONKIND_OTHER; } if (0==strncmp(".ctors", section->info->name, 6)) { kind = SECTIONKIND_INIT_ARRAY; oc->info->init = &oc->sections[i]; } if (0==strncmp(".dtors", section->info->name, 6)) { kind = SECTIONKIND_FINIT_ARRAY; oc->info->finit = &oc->sections[i]; } if ( 0 == strncmp(".stab" , section->info->name, 5 ) || 0 == strncmp(".stabstr" , section->info->name, 8 ) || 0 == strncmp(".debug" , section->info->name, 6 ) || 0 == strncmp(".rdata$zzz", section->info->name, 10)) kind = SECTIONKIND_DEBUG; /* Exception Unwind information. See Note [Exception Unwinding]. */ if (0 == strncmp(".xdata" , section->info->name, 6 )) { kind = SECTIONKIND_EXCEPTION_UNWIND; oc->info->xdata = &oc->sections[i]; } /* Exception handler tables, See Note [Exception Unwinding]. */ if (0 == strncmp(".pdata" , section->info->name, 6 )) { kind = SECTIONKIND_EXCEPTION_TABLE; oc->info->pdata = &oc->sections[i]; } if (0==strncmp(".idata", section->info->name, 6)) { kind = SECTIONKIND_IMPORT; } /* See Note [BFD import library]. */ if (0==strncmp(".idata$7", section->info->name, 8)) { kind = SECTIONKIND_BFD_IMPORT_LIBRARY_HEAD; } if (0==strncmp(".idata$6", section->info->name, 8)) { kind = SECTIONKIND_BFD_IMPORT_LIBRARY; } /* Allocate space for any (local, anonymous) .bss sections. */ if (0==strncmp(".bss", section->info->name, 4)) { /* sof 10/05: the PE spec text isn't too clear regarding what * the SizeOfRawData field is supposed to hold for object * file sections containing just uninitialized data -- for executables, * it is supposed to be zero; unclear what it's supposed to be * for object files. However, VirtualSize is guaranteed to be * zero for object files, which definitely suggests that SizeOfRawData * will be non-zero (where else would the size of this .bss section be * stored?) Looking at the COFF_section info for incoming object files, * this certainly appears to be the case. * * => I suspect we've been incorrectly handling .bss sections in * (relocatable) object files up until now. This turned out to bite us * with ghc-6.4.1's use of gcc-3.4.x, which has started to emit * initially-zeroed-out local 'static' variable decls into the .bss * section. (The specific function in Q which triggered this is * libraries/base/cbits/dirUtils.c:__hscore_getFolderPath()) * * TODO: check if this comment is still relevant. */ if (section->info->virtualSize == 0 && section->size == 0) { IF_DEBUG(linker_verbose, debugBelch("skipping empty .bss section\n")); continue; } /* This is a non-empty .bss section. Allocate zeroed space for it */ kind = SECTIONKIND_RWDATA; do_zero = true; do_copy = false; IF_DEBUG(linker_verbose, debugBelch("BSS anon section\n")); } CHECK(section->size == 0 || section->info->virtualSize == 0); if (sz < section->info->virtualSize) { sz = section->info->virtualSize; } // Ignore these section types if (kind == SECTIONKIND_OTHER || sz == 0) { continue; } // Allocate memory for the section. uint8_t *start; if (kind == SECTIONKIND_CODE_OR_RODATA) { start = m32_alloc(oc->rx_m32, sz, alignment); } else { start = m32_alloc(oc->rw_m32, sz, alignment); } if (!start) { barf("Could not allocate any heap memory from private heap (requested %" FMT_SizeT " bytes).", sz); } if (do_copy) { memcpy(start, section->start, sz); } else if (do_zero) { memset(start, 0, sz); } addSection(section, kind, SECTION_NOMEM, start, sz, 0, 0, 0); addProddableBlock(oc, oc->sections[i].start, sz); } /* Copy exported symbols into the ObjectCode. */ oc->n_symbols = info->numberOfSymbols; oc->symbols = stgCallocBytes(sizeof(Symbol_t), oc->n_symbols, "ocGetNames_PEi386(oc->symbols)"); /* Work out the size of the global BSS section */ StgWord globalBssSize = 0; for (unsigned int i=0; i < info->numberOfSymbols; i++) { COFF_symbol* sym = &oc->info->symbols[i]; if (getSymSectionNumber (info, sym) == IMAGE_SYM_UNDEFINED && getSymValue (info, sym) > 0 && getSymStorageClass (info, sym) != IMAGE_SYM_CLASS_SECTION) { globalBssSize += getSymValue (info, sym); } i += getSymNumberOfAuxSymbols (info, sym); } /* Allocate BSS space */ SymbolAddr* bss = NULL; if (globalBssSize > 0) { bss = m32_alloc(oc->rw_m32, globalBssSize, 16); if (bss == NULL) { barf("ocGetNames_PEi386: Failed to allocate global bss section"); } addSection(&oc->sections[oc->n_sections-1], SECTIONKIND_RWDATA, SECTION_MALLOC, bss, globalBssSize, 0, 0, 0); IF_DEBUG(linker_verbose, debugBelch("bss @ %p %" FMT_Word "\n", bss, globalBssSize)); addProddableBlock(oc, bss, globalBssSize); } else { addSection(&oc->sections[oc->n_sections-1], SECTIONKIND_OTHER, SECTION_NOMEM, NULL, 0, 0, 0, 0); } /* At this point we're done with oc->image and all relevant memory have been copied. Release it to free up the memory. */ stgFree (oc->image); oc->image = NULL; for (unsigned int i = 0; i < (uint32_t)oc->n_symbols; i++) { COFF_symbol* sym = &oc->info->symbols[i]; int32_t secNumber = getSymSectionNumber (info, sym); uint32_t symValue = getSymValue (info, sym); uint8_t symStorageClass = getSymStorageClass (info, sym); SymbolAddr *addr = NULL; bool isWeak = false; SymbolName *sname = get_sym_name (getSymShortName (info, sym), oc); Section *section = secNumber > 0 ? &oc->sections[secNumber-1] : NULL; SymType type; switch (getSymType(oc->info->ch_info, sym)) { case 0x00: type = SYM_TYPE_DATA; break; case 0x20: type = SYM_TYPE_CODE; break; default: debugBelch("Invalid symbol type: 0x%x\n", getSymType(oc->info->ch_info, sym)); return 1; } if ( secNumber != IMAGE_SYM_UNDEFINED && secNumber > 0 && section && section->kind != SECTIONKIND_BFD_IMPORT_LIBRARY) { /* This symbol is global and defined, viz, exported */ /* for IMAGE_SYMCLASS_EXTERNAL && !IMAGE_SYM_UNDEFINED, the address of the symbol is: address of relevant section + offset in section */ if (symStorageClass == IMAGE_SYM_CLASS_EXTERNAL || ( symStorageClass == IMAGE_SYM_CLASS_STATIC && section->info->props & IMAGE_SCN_LNK_COMDAT) ) { addr = (SymbolAddr*)((size_t)section->start + symValue); isWeak = section->info->props & IMAGE_SCN_LNK_COMDAT; } } else if (symStorageClass == IMAGE_SYM_CLASS_WEAK_EXTERNAL) { isWeak = true; } else if ( secNumber == IMAGE_SYM_UNDEFINED && symValue > 0) { /* This symbol isn't in any section at all, ie, global bss. Allocate zeroed space for it from the BSS section */ addr = bss; bss = (SymbolAddr*)((StgWord)bss + (StgWord)symValue); IF_DEBUG(linker_verbose, debugBelch("bss symbol @ %p %u\n", addr, symValue)); } else if (section && section->kind == SECTIONKIND_BFD_IMPORT_LIBRARY) { setImportSymbol(oc, sname); // There is nothing that we need to resolve in this object since we // will never call the import stubs in its text section oc->status = OBJECT_DONT_RESOLVE; IF_DEBUG(linker_verbose, debugBelch("import symbol %s\n", sname)); } else if (secNumber > 0 && section && section->kind == SECTIONKIND_BFD_IMPORT_LIBRARY_HEAD) { /* This is an Gnu BFD import section. We should load the dll and lookup the symbols. See Note [BFD import library]. */ char* dllName = section->start; if (strlen(dllName) == 0 || dllName[0] == 0 || has_code_section) continue; pathchar* dirName = pathdir(oc->fileName); HsPtr token = addLibrarySearchPath(dirName); stgFree(dirName); sym = &oc->info->symbols[oc->n_symbols-1]; sname = get_sym_name (getSymShortName (info, sym), oc); IF_DEBUG(linker_verbose, debugBelch("loading symbol `%s' from dll: '%ls' => `%s'\n", sname, oc->fileName, dllName)); pathchar* dll = mkPath(dllName); HINSTANCE dllInstance = 0; const char* result = addDLL_PEi386(dll, &dllInstance); removeLibrarySearchPath(token); stgFree(dll); if (result != NULL || dllInstance == 0) { errorBelch("Could not load `%s'. Reason: %s\n", (char*)dllName, result); return false; } /* Set the _dll_iname symbol to the dll's handle. */ addr = (SymbolAddr*)dllInstance; /* the symbols are named _iname when defined, but are named _head_ when looked up. (Ugh. thanks GCC.) So correct it when stored so we don't have to correct it each time when retrieved. */ int size = strlen(sname)+1; char *tmp = stgMallocBytes(size * sizeof(char), "ocGetNames_PEi386"); strncpy (tmp, sname, size); char *pos = strstr(tmp, "_iname"); /* drop anything after the name. There are some inconsistencies with whitespaces trailing the name. */ if (pos) pos[0] = '\0'; int start = 0; /* msys2 project's import lib builder has some inconsistent name mangling. Their names start with _ or __ yet they drop this when making the _head_ symbol. So do the same. */ while (tmp[start]=='_') start++; snprintf (sname, size, "_head_%s", tmp+start); sname[size-start]='\0'; stgFree(tmp); sname = strdup (sname); if (!ghciInsertSymbolTable(oc->fileName, symhash, sname, addr, false, type, oc)) return false; break; } if ((addr != NULL || isWeak) && (!section || (section && section->kind != SECTIONKIND_IMPORT))) { /* debugBelch("addSymbol %p `%s' Weak:%lld \n", addr, sname, isWeak); */ sname = strdup (sname); IF_DEBUG(linker_verbose, debugBelch("addSymbol %p `%s'\n", addr, sname)); ASSERT(i < (uint32_t)oc->n_symbols); oc->symbols[i].name = sname; oc->symbols[i].addr = addr; oc->symbols[i].type = type; if (isWeak) { setWeakSymbol(oc, sname); } if (! ghciInsertSymbolTable(oc->fileName, symhash, sname, addr, isWeak, type, oc)) return false; } else { /* We're skipping the symbol, but if we ever load this object file we'll want to skip it then too. */ oc->symbols[i].name = NULL; oc->symbols[i].addr = NULL; } i += getSymNumberOfAuxSymbols (info, sym); } return true; } #if defined(x86_64_HOST_ARCH) static size_t makeSymbolExtra_PEi386( ObjectCode* oc, uint64_t index STG_UNUSED, size_t s, char* symbol STG_UNUSED, SymType type ) { SymbolExtra *extra; if (type == SYM_TYPE_CODE) { // jmp *-14(%rip) extra = m32_alloc(oc->rx_m32, sizeof(SymbolExtra), 8); CHECK(extra); extra->addr = (uint64_t)s; static uint8_t jmp[] = { 0xFF, 0x25, 0xF2, 0xFF, 0xFF, 0xFF }; memcpy(extra->jumpIsland, jmp, 6); IF_DEBUG(linker_verbose, debugBelch("makeSymbolExtra(code): %s -> %p\n", symbol, &extra->jumpIsland)); return (size_t)&extra->jumpIsland; } else if (type == SYM_TYPE_INDIRECT_DATA) { extra = m32_alloc(oc->rw_m32, sizeof(SymbolExtra), 8); CHECK(extra); void *v = *(void**) s; extra->addr = (uint64_t)v; IF_DEBUG(linker_verbose, debugBelch("makeSymbolExtra(data): %s -> %p\n", symbol, &extra->addr)); return (size_t)&extra->addr; } else { extra = m32_alloc(oc->rw_m32, sizeof(SymbolExtra), 8); CHECK(extra); extra->addr = (uint64_t)s; IF_DEBUG(linker_verbose, debugBelch("makeSymbolExtra(indirect-data): %s -> %p\n", symbol, &extra->addr)); return (size_t)&extra->addr; } } void ocProtectExtras(ObjectCode* oc STG_UNUSED) { } #endif /* x86_64_HOST_ARCH */ bool ocResolve_PEi386 ( ObjectCode* oc ) { uint64_t A; size_t S; SymbolAddr* pP; unsigned int i; uint32_t j, noRelocs; /* ToDo: should be variable-sized? But is at least safe in the sense of buffer-overrun-proof. */ uint8_t symbol[1000]; /* debugBelch("resolving for %"PATH_FMT "\n", oc->fileName); */ /* Such libraries have been partially freed and can't be resolved. */ if (oc->status == OBJECT_DONT_RESOLVE) return 1; COFF_HEADER_INFO *info = oc->info->ch_info; uint32_t numberOfSections = info->numberOfSections; for (i = 0; i < numberOfSections; i++) { Section section = oc->sections[i]; /* Ignore sections called which contain stabs debugging information. */ if (section.kind == SECTIONKIND_DEBUG) continue; noRelocs = section.info->noRelocs; for (j = 0; j < noRelocs; j++) { COFF_symbol* sym; COFF_reloc* reloc = §ion.info->relocs[j]; /* the location to patch */ pP = (SymbolAddr*)( (uintptr_t)section.start + (uintptr_t)reloc->VirtualAddress - (uintptr_t)section.info->virtualAddr ); /* the existing contents of pP */ A = *(uint32_t*)pP; /* the symbol to connect to */ uint64_t symIndex = reloc->SymbolTableIndex; sym = &oc->info->symbols[symIndex]; SymType sym_type; IF_DEBUG(linker_verbose, debugBelch( "reloc sec %2d num %3d: P=%p, type 0x%-4x " "vaddr 0x%-8lx name `", i, j, pP, reloc->Type, reloc->VirtualAddress ); printName (getSymShortName (info, sym), oc); debugBelch("'\n" )); if (getSymStorageClass (info, sym) == IMAGE_SYM_CLASS_STATIC) { Section section = oc->sections[getSymSectionNumber (info, sym)-1]; S = ((size_t)(section.start)) + ((size_t)(getSymValue (info, sym))); } else { copyName ( getSymShortName (info, sym), oc, symbol, sizeof(symbol)-1 ); S = (size_t) lookupDependentSymbol( (char*)symbol, oc, &sym_type ); if ((void*)S == NULL) { errorBelch(" | %" PATH_FMT ": unknown symbol `%s'", oc->fileName, symbol); releaseOcInfo (oc); return false; } } IF_DEBUG(linker_verbose, debugBelch("S=%zx\n", S)); /* All supported relocations write at least 4 bytes */ checkProddableBlock(oc, pP, 4); switch (reloc->Type) { #if defined(i386_HOST_ARCH) case IMAGE_REL_I386_DIR32: case IMAGE_REL_I386_DIR32NB: *(uint32_t *)pP = S + A; break; case IMAGE_REL_I386_REL32: /* Tricky. We have to insert a displacement at pP which, when added to the PC for the _next_ insn, gives the address of the target (S). Problem is to know the address of the next insn when we only know pP. We assume that this literal field is always the last in the insn, so that the address of the next insn is pP+4 -- hence the constant 4. Also I don't know if A should be added, but so far it has always been zero. SOF 05/2005: 'A' (old contents of *pP) have been observed to contain values other than zero (the 'wx' object file that came with wxhaskell-0.9.4; dunno how it was compiled..). So, add displacement to old value instead of asserting A to be zero. Fixes wxhaskell-related crashes, and no other ill effects have been observed. Update: the reason why we're seeing these more elaborate relocations is due to a switch in how the NCG compiles SRTs and offsets to them from info tables. SRTs live in .(ro)data, while info tables live in .text, causing GAS to emit REL32/DISP32 relocations with non-zero values. Adding the displacement is the right thing to do. */ *(uint32_t *)pP = ((uint32_t)S) + A - ((uint32_t)(size_t)pP) - 4; break; #elif defined(x86_64_HOST_ARCH) case 1: /* R_X86_64_64 (ELF constant 1) - IMAGE_REL_AMD64_ADDR64 (PE constant 1) */ { uint64_t A; checkProddableBlock(oc, pP, 8); A = *(uint64_t*)pP; *(uint64_t *)pP = S + A; break; } case 2: /* R_X86_64_32 (ELF constant 10) - IMAGE_REL_AMD64_ADDR32 (PE constant 2) */ case 3: /* IMAGE_REL_AMD64_ADDR32NB (PE constant 3) */ case 17: /* R_X86_64_32S ELF constant, no PE mapping. See note [ELF constant in PE file] */ { uint64_t v; v = S + A; /* If IMAGE_REL_AMD64_ADDR32NB then subtract the image base. */ if (reloc->Type == 3) v -= (uint64_t) GetModuleHandleW(NULL); // N.B. in the case of the sign-extended relocations we must ensure that v // fits in a signed 32-bit value. See #15808. if (((int64_t) v > (int64_t) INT32_MAX) || ((int64_t) v < (int64_t) INT32_MIN)) { copyName (getSymShortName (info, sym), oc, symbol, sizeof(symbol)-1); S = makeSymbolExtra_PEi386(oc, symIndex, S, (char *)symbol, sym_type); /* And retry */ v = S + A; /* If IMAGE_REL_AMD64_ADDR32NB then subtract the image base. */ if (reloc->Type == 3) v -= (uint64_t) GetModuleHandleW(NULL); if (((int64_t) v > (int64_t) INT32_MAX) || ((int64_t) v < (int64_t) INT32_MIN)) { barf("IMAGE_REL_AMD64_ADDR32[NB]: High bits are set in 0x%zx for %s", v, (char *)symbol); } } *(uint32_t *)pP = (uint32_t)v; break; } case 4: /* R_X86_64_PC32 (ELF constant 2) - IMAGE_REL_AMD64_REL32 (PE constant 4) */ { intptr_t v; v = S + (int32_t)A - ((intptr_t)pP) - 4; if ((v > (int64_t) INT32_MAX) || (v < (int64_t) INT32_MIN)) { /* Make the trampoline then */ copyName (getSymShortName (info, sym), oc, symbol, sizeof(symbol)-1); S = makeSymbolExtra_PEi386(oc, symIndex, S, (char *)symbol, sym_type); /* And retry */ v = S + (int32_t)A - ((intptr_t)pP) - 4; if ((v > (int64_t) INT32_MAX) || (v < (int64_t) INT32_MIN)) { barf("IMAGE_REL_AMD64_REL32: High bits are set in 0x%zx for %s", v, (char *)symbol); } } *(uint32_t *)pP = (uint32_t)v; break; } #endif default: debugBelch("%" PATH_FMT ": unhandled PEi386 relocation type %d\n", oc->fileName, reloc->Type); releaseOcInfo (oc); return false; } } /* Register the exceptions inside this OC. See Note [Exception Unwinding]. */ if (section.kind == SECTIONKIND_EXCEPTION_TABLE) { #if defined(x86_64_HOST_ARCH) unsigned numEntries = section.size / sizeof(RUNTIME_FUNCTION); if (numEntries == 0) continue; /* Now register the exception handler for the range and point it to the unwind data. */ if (!RtlAddFunctionTable (section.start, numEntries, (uintptr_t) GetModuleHandleW(NULL))) { sysErrorBelch("Unable to register Exception handler for %p for " "section %s in %" PATH_FMT " (Win32 error %lu)", section.start, section.info->name, oc->fileName, GetLastError()); releaseOcInfo (oc); return false; } #endif /* x86_64_HOST_ARCH. */ } } IF_DEBUG(linker, debugBelch("completed %" PATH_FMT "\n", oc->fileName)); return true; } /* Note [ELF constant in PE file] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For some reason, the PE files produced by GHC contain a linux relocation constant 17 (0x11) in the object files. As far as I (Phyx-) can tell this constant doesn't seem like it's coming from GHC, or at least I could not find anything in the .s output that GHC produces which specifies the relocation type. This leads me to believe that this is a bug in GAS. However because this constant is there we must deal with it. This is done by mapping it to the equivalent in behaviour PE relocation constant 0x03. See #9907 */ /* Note [Exception Unwinding] ~~~~~~~~~~~~~~~~~~~~~~~~~~ Exception Unwinding on Windows is handle using two named sections. .pdata: Exception registration tables. The .pdata section contains an array of function table entries that are used for exception handling. The entries must be sorted according to the function addresses (the first field in each structure) before being emitted into the final image. It is pointed to by the exception table entry in the image data directory. For x64 each entry contains: Offset Size Field Description 0 4 Begin Address The RVA of the corresponding function. 4 4 End Address The RVA of the end of the function. 8 4 Unwind Information The RVA of the unwind information. Note that these are RVAs even after being resolved by the linker, they are however ImageBase relative rather than PC relative. These are typically filled in by an ADDR32NB relocation. On disk the section looks like: Function Table #6 (4) Begin End Info 00000000 00000000 000001A1 00000000 0000000C 000001A1 000001BF 00000034 00000018 000001BF 00000201 00000040 00000024 00000201 0000021F 0000004C RELOCATIONS #6 Symbol Symbol Offset Type Applied To Index Name -------- ---------------- ----------------- -------- ------ 00000000 ADDR32NB 00000000 E .text 00000004 ADDR32NB 000001A1 E .text 00000008 ADDR32NB 00000000 16 .xdata 0000000C ADDR32NB 000001A1 E .text 00000010 ADDR32NB 000001BF E .text 00000014 ADDR32NB 00000034 16 .xdata 00000018 ADDR32NB 000001BF E .text 0000001C ADDR32NB 00000201 E .text 00000020 ADDR32NB 00000040 16 .xdata 00000024 ADDR32NB 00000201 E .text 00000028 ADDR32NB 0000021F E .text 0000002C ADDR32NB 0000004C 16 .xdata This means that if we leave it up to the relocation processing to do the work we don't need to do anything special here. Note that every single function will have an entry in this table regardless whether they have an unwind code or not. The reason for this is that unwind handlers can be chained, and such another function may have registered an overlapping region. .xdata: Exception unwind codes. This section contains an array of entries telling the unwinder how to do unwinding. They are pointed to by the .pdata table enteries from the Info field. Each entry is very complicated but for now what is important is that the addresses are resolved by the relocs for us. Once we have resolved .pdata and .xdata we can simply pass the content of .pdata on to RtlAddFunctionTable and the OS will do the rest. When we're unloading the object we have to unregister them using RtlDeleteFunctionTable. */ bool ocRunInit_PEi386 ( ObjectCode *oc ) { if (!oc || !oc->info || !oc->info->init) { return true; } int argc, envc; char **argv, **envv; getProgArgv(&argc, &argv); getProgEnvv(&envc, &envv); Section section = *oc->info->init; CHECK(SECTIONKIND_INIT_ARRAY == section.kind); uint8_t *init_startC = section.start; init_t *init_start = (init_t*)init_startC; init_t *init_end = (init_t*)(init_startC + section.size); // ctors are run *backwards*! for (init_t *init = init_end - 1; init >= init_start; init--) (*init)(argc, argv, envv); freeProgEnvv(envc, envv); releaseOcInfo (oc); return true; } SymbolAddr *lookupSymbol_PEi386(SymbolName *lbl, ObjectCode *dependent, SymType *type) { RtsSymbolInfo *pinfo; if (!ghciLookupSymbolInfo(symhash, lbl, &pinfo)) { IF_DEBUG(linker, debugBelch("lookupSymbol: symbol '%s' not found\n", lbl)); SymbolAddr* sym; /* See Note [mingw-w64 name decoration scheme] */ #if !defined(x86_64_HOST_ARCH) zapTrailingAtSign ( lbl ); #endif if (type) { // Unfortunately we can only assume that this is the case. Ideally // the user would have given us an import library, which would allow // us to determine the symbol type precisely. *type = SYM_TYPE_CODE; } sym = lookupSymbolInDLLs(lbl, dependent); return sym; // might be NULL if not found } else { if (type) *type = pinfo->type; // If Windows, perform initialization of uninitialized // Symbols from the C runtime which was loaded above. // We do this on lookup to prevent the hit when // The symbol isn't being used. if (pinfo->value == (void*)0xBAADF00D) { char symBuffer[50]; const char *crt_impl = "ucrtbase"; sprintf(symBuffer, "_%s", lbl); static HMODULE crt = NULL; if (!crt) crt = GetModuleHandle(crt_impl); pinfo->value = GetProcAddress(crt, symBuffer); return pinfo->value; } else if (pinfo && pinfo->owner && isSymbolImport (pinfo->owner, lbl)) { /* See Note [BFD import library]. */ HINSTANCE dllInstance = (HINSTANCE)lookupDependentSymbol(pinfo->value, dependent, type); if (!dllInstance && pinfo->value) return pinfo->value; if (!dllInstance) { errorBelch("Unable to load import dll symbol `%s'. " "No _iname symbol.", lbl); return NULL; } IF_DEBUG(linker, debugBelch("indexing import %s => %s using dll instance %p\n", lbl, (char*)pinfo->value, dllInstance)); pinfo->value = GetProcAddress((HMODULE)dllInstance, lbl); clearImportSymbol (pinfo->owner, lbl); return pinfo->value; } else { if (dependent) { // Add dependent as symbol's owner's dependency ObjectCode *owner = pinfo->owner; if (owner) { // TODO: what does it mean for a symbol to not have an owner? insertHashSet(dependent->dependencies, (W_)owner); } } return loadSymbol(lbl, pinfo); } } } /* ----------------------------------------------------------------------------- * Debugging operations. */ typedef struct _SymX { SymbolName* name; uintptr_t loc; } SymX; static int comp (const void * elem1, const void * elem2) { SymX f = *((SymX*)elem1); SymX s = *((SymX*)elem2); if (f.loc > s.loc) return 1; if (f.loc < s.loc) return -1; return 0; } pathchar* resolveSymbolAddr_PEi386 (pathchar* buffer, int size, SymbolAddr* symbol, uintptr_t* top ){ SYMBOL_INFO sym; ZeroMemory (&sym, sizeof(SYMBOL_INFO)); sym.MaxNameLen = sizeof(char) * 1024; DWORD64 uDisplacement = 0; HANDLE hProcess = GetCurrentProcess(); ObjectCode* obj = NULL; uintptr_t start, end; *top = 0; pathprintf (buffer, size, WSTR("0x%" PRIxPTR), symbol); if (SymFromAddr (hProcess, (uintptr_t)symbol, &uDisplacement, &sym)) { /* Try using Windows symbols. */ wcscat (buffer, WSTR(" ")); pathchar* name = mkPath (sym.Name); wcscat (buffer, name); stgFree (name); if (uDisplacement != 0) { int64_t displacement = (int64_t)uDisplacement; pathchar s_disp[50]; if (displacement < 0) pathprintf ((pathchar*)s_disp, 50, WSTR("-%ld"), -displacement); else pathprintf ((pathchar*)s_disp, 50, WSTR("+%ld"), displacement); wcscat (buffer, s_disp); } } else { /* Try to calculate from information inside the rts. */ uintptr_t loc = (uintptr_t)symbol; for (ObjectCode* oc = objects; oc; oc = oc->next) { for (int i = 0; i < oc->n_sections; i++) { Section section = oc->sections[i]; start = (uintptr_t)section.start; end = start + section.size; if (loc > start && loc <= end) { wcscat (buffer, WSTR(" ")); if (oc->archiveMemberName) { wcscat (buffer, oc->archiveMemberName); } else { wcscat (buffer, oc->fileName); } pathchar s_disp[50]; pathprintf (s_disp, 50, WSTR("+0x%" PRIxPTR), loc - start); wcscat (buffer, s_disp); obj = oc; goto exit_loop; } } } /* If we managed to make it here, we must not have any symbols nor be dealing with code we've linked. The only thing left is an internal segfault or one in a dynamic library. So let's enumerate the module address space. */ HMODULE *hMods = NULL; DWORD cbNeeded; EnumProcessModules (hProcess, hMods, 0, &cbNeeded); hMods = stgMallocBytes (cbNeeded, "resolveSymbolAddr_PEi386"); if (EnumProcessModules (hProcess, hMods, cbNeeded, &cbNeeded)) { uintptr_t loc = (uintptr_t)symbol; MODULEINFO info; for (uint32_t i = 0; i < cbNeeded / sizeof(HMODULE); i++) { ZeroMemory (&info, sizeof (MODULEINFO)); if (GetModuleInformation (hProcess, hMods[i], &info, sizeof(MODULEINFO))) { uintptr_t start = (uintptr_t)info.lpBaseOfDll; uintptr_t end = start + info.SizeOfImage; if (loc >= start && loc < end) { /* Hoera, finally found some information. */ pathchar tmp[MAX_PATH]; if (GetModuleFileNameExW (hProcess, hMods[i], tmp, MAX_PATH)) { wcscat (buffer, WSTR(" ")); wcscat (buffer, tmp); pathprintf (tmp, MAX_PATH, WSTR("+0x%" PRIxPTR), loc - start); wcscat (buffer, tmp); } break; } } } } stgFree(hMods); } /* Finally any file/line number. */ IMAGEHLP_LINE64 lineInfo = {0}; DWORD dwDisplacement = 0; exit_loop: if (SymGetLineFromAddr64(hProcess, (uintptr_t)symbol, &dwDisplacement, &lineInfo)) { /* Try using Windows symbols. */ pathchar s_line[512]; pathprintf ((pathchar*) s_line, 512, WSTR(" %ls (%lu)"), lineInfo.FileName, lineInfo.LineNumber); wcscat (buffer, s_line); if (dwDisplacement != 0) { pathprintf ((pathchar*) s_line, 512, WSTR(" +%lu byte%s"), dwDisplacement, (dwDisplacement == 1 ? WSTR("") : WSTR("s"))); } wcscat (buffer, s_line); } else if (obj) { /* Try to calculate from information inside the rts. */ SymX* locs = stgCallocBytes (sizeof(SymX), obj->n_symbols, "resolveSymbolAddr"); int blanks = 0; for (int i = 0; i < obj->n_symbols; i++) { SymbolName* sym = obj->symbols[i].name; if (sym == NULL) { blanks++; continue; } RtsSymbolInfo* a = NULL; ghciLookupSymbolInfo(symhash, sym, &a); if (a) { SymX sx = {0}; sx.name = sym; sx.loc = (uintptr_t)a->value; locs[i] = sx; } } qsort (locs, obj->n_symbols, sizeof (SymX), comp); uintptr_t key = (uintptr_t)symbol; SymX* res = NULL; for (int x = blanks; x < obj->n_symbols; x++) { if (x < (obj->n_symbols -1)) { if (locs[x].loc >= key && key < locs[x+1].loc) { res = &locs[x]; break; } } else { if (locs[x].loc >= key) { res = &locs[x]; break; } } } if (res) { pathchar s_disp[512]; *top = (uintptr_t)res->loc; pathprintf ((pathchar*)s_disp, 512, WSTR("\n\t\t (%s+0x%" PRIxPTR ")"), res->name, res->loc - key); wcscat (buffer, s_disp); } stgFree (locs); } return buffer; } #endif /* mingw32_HOST_OS */