diff options
author | Ben Gamari <ben@smart-cactus.org> | 2022-03-21 18:56:42 -0400 |
---|---|---|
committer | Ben Gamari <ben@smart-cactus.org> | 2022-04-06 16:25:25 -0400 |
commit | 7e8cc293702354bec350c4d3c784a70cd8f7ffe4 (patch) | |
tree | 611f581939fa6dfd9c5e0d03f64fa2c973897ff1 /rts | |
parent | 655e7d8f968eeaabfc02329d7ac9361678364c06 (diff) | |
download | haskell-7e8cc293702354bec350c4d3c784a70cd8f7ffe4.tar.gz |
rts/PEi386: Rework linker
This is a significant rework of the PEi386 linker, making the linker
compatible with high image base addresses. Specifically, we now use the
m32 allocator instead of `HeapAllocate`.
In addition I found a number of latent bugs in our handling of import
libraries and relocations. I've added quite a few comments describing
what I've learned about Windows import libraries while fixing these.
Thanks to Tamar Christina (@Phyx) for providing the address space search
logic, countless hours of help while debugging, and his boundless
Windows knowledge.
Co-Authored-By: Tamar Christina <tamar@zhox.com>
Diffstat (limited to 'rts')
-rw-r--r-- | rts/Linker.c | 16 | ||||
-rw-r--r-- | rts/LinkerInternals.h | 18 | ||||
-rw-r--r-- | rts/linker/M32Alloc.c | 23 | ||||
-rw-r--r-- | rts/linker/MMap.c | 10 | ||||
-rw-r--r-- | rts/linker/PEi386.c | 777 | ||||
-rw-r--r-- | rts/linker/PEi386.h | 22 | ||||
-rw-r--r-- | rts/linker/PEi386Types.h | 4 |
7 files changed, 493 insertions, 377 deletions
diff --git a/rts/Linker.c b/rts/Linker.c index c1918ecbba..823d58cc67 100644 --- a/rts/Linker.c +++ b/rts/Linker.c @@ -817,9 +817,8 @@ HsInt insertSymbol(pathchar* obj_name, SymbolName* key, SymbolAddr* data) #if defined(OBJFORMAT_PEi386) SymbolAddr* lookupDependentSymbol (SymbolName* lbl, ObjectCode *dependent, SymType *type) { - (void)dependent; // TODO ASSERT_LOCK_HELD(&linker_mutex); - return lookupSymbol_PEi386(lbl, type); + return lookupSymbol_PEi386(lbl, dependent, type); } #else @@ -1482,7 +1481,7 @@ HsInt loadOc (ObjectCode* oc) { int r; - IF_DEBUG(linker, debugBelch("loadOc: start (%s)\n", oc->fileName)); + IF_DEBUG(linker, debugBelch("loadOc: start (%" PATH_FMT ")\n", oc->fileName)); /* verify the in-memory image */ # if defined(OBJFORMAT_ELF) @@ -1513,8 +1512,7 @@ HsInt loadOc (ObjectCode* oc) symbol table. Allocating space for jump tables in ocAllocateExtras would just be a waste then as we'll be stopping further processing of the library in the next few steps. If necessary, the actual allocation - happens in `ocGetNames_PEi386` and `ocAllocateExtras_PEi386` simply - set the correct pointers. + happens in `ocGetNames_PEi386` simply set the correct pointers. */ #if defined(NEED_SYMBOL_EXTRAS) @@ -1550,12 +1548,6 @@ HsInt loadOc (ObjectCode* oc) return r; } -#if defined(NEED_SYMBOL_EXTRAS) -# if defined(OBJFORMAT_PEi386) - ocAllocateExtras_PEi386 ( oc ); -# endif -#endif - /* Loaded, but not resolved yet, ensure the OC is in a consistent state. If a target has requested the ObjectCode not to be resolved then honor this requests. Usually this means the ObjectCode has not been initialized @@ -1567,7 +1559,7 @@ HsInt loadOc (ObjectCode* oc) oc->status = OBJECT_LOADED; } } - IF_DEBUG(linker, debugBelch("loadOc: done (%s).\n", oc->fileName)); + IF_DEBUG(linker, debugBelch("loadOc: done (%" PATH_FMT ").\n", oc->fileName)); return 1; } diff --git a/rts/LinkerInternals.h b/rts/LinkerInternals.h index b1cd3a8a9a..917984a5c6 100644 --- a/rts/LinkerInternals.h +++ b/rts/LinkerInternals.h @@ -41,7 +41,16 @@ typedef struct _Section Section; /* * Note [Processing overflowed relocations] * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * TODO + * When processing relocations whose targets exceed the relocation's maximum + * displacement, we can take advantage of knowledge of the symbol type to avoid + * linker failures. In particular, if we know that a symbol is a code symbol + * then we can handle the relocation by creating a "jump island", a small bit + * of code which immediately jumps (with an instruction sequence capable of + * larger displacement) to the target. + * + * This is not possible for data symbols (or, for that matter, Haskell symbols + * when TNTC is in use). In these cases we have to rather fail and ask the user + * to recompile their program as position-independent. */ /* What kind of thing a symbol identifies. We need to know this to determine how @@ -49,7 +58,7 @@ typedef struct _Section Section; typedef enum _SymType { SYM_TYPE_CODE, /* the symbol is a function and can be relocated via a jump island */ SYM_TYPE_DATA, /* the symbol is data */ - SYM_TYPE_INDIRECT_DATA, /* see Note [Relocating iob_func on Windows] */ + SYM_TYPE_INDIRECT_DATA, /* see Note [_iob_func symbol] */ } SymType; @@ -104,8 +113,11 @@ typedef SECTIONKIND_EXCEPTION_UNWIND, /* Section belongs to an import section group. e.g. .idata$. */ SECTIONKIND_IMPORT, + /* Section defines the head section of a BFD-style import library, e.g. idata$7. */ + SECTIONKIND_BFD_IMPORT_LIBRARY_HEAD, /* Section defines an import library entry, e.g. idata$7. */ - SECTIONKIND_IMPORT_LIBRARY, + SECTIONKIND_BFD_IMPORT_LIBRARY, + /* Unknown section */ SECTIONKIND_NOINFOAVAIL } SectionKind; diff --git a/rts/linker/M32Alloc.c b/rts/linker/M32Alloc.c index 5ce0342482..b1138e032c 100644 --- a/rts/linker/M32Alloc.c +++ b/rts/linker/M32Alloc.c @@ -460,6 +460,15 @@ m32_is_large_object(size_t size, size_t alignment) return size >= getPageSize() - ROUND_UP(sizeof(struct m32_page_t), alignment); } +static void +m32_report_allocation(struct m32_allocator_t *alloc STG_UNUSED, void *addr STG_UNUSED, size_t size STG_UNUSED) +{ + IF_DEBUG(linker_verbose, debugBelch( + "m32_allocated(%p:%s): %p - %p\n", + alloc, alloc->executable ? "RX": "RW", + addr, (uint8_t*) addr + size)); +} + /** * Allocate `size` bytes of memory with the given alignment. * @@ -474,6 +483,8 @@ m32_alloc(struct m32_allocator_t *alloc, size_t size, size_t alignment) if (m32_is_large_object(size,alignment)) { // large object size_t alsize = ROUND_UP(sizeof(struct m32_page_t), alignment); + // TODO: lower-bound allocation size to allocation granularity and return + // remainder to free pool. struct m32_page_t *page = mmapAnonForLinker(alsize+size); if (page == NULL) { sysErrorBelch("m32_alloc: Failed to map pages for %zd bytes", size); @@ -486,7 +497,9 @@ m32_alloc(struct m32_allocator_t *alloc, size_t size, size_t alignment) SET_PAGE_TYPE(page, FILLED_PAGE); page->filled_page.size = alsize + size; m32_allocator_push_filled_list(&alloc->unprotected_list, (struct m32_page_t *) page); - return (char*) page + alsize; + uint8_t *res = (uint8_t *) page + alsize; + m32_report_allocation(alloc, res, size); + return res; } // small object @@ -508,6 +521,7 @@ m32_alloc(struct m32_allocator_t *alloc, size_t size, size_t alignment) if (size <= pgsz - alsize) { void * addr = (char*)alloc->pages[i] + alsize; alloc->pages[i]->current_size = alsize + size; + m32_report_allocation(alloc, addr, size); return addr; } @@ -534,9 +548,10 @@ m32_alloc(struct m32_allocator_t *alloc, size_t size, size_t alignment) SET_PAGE_TYPE(page, NURSERY_PAGE); alloc->pages[empty] = page; // Add header size and padding - alloc->pages[empty]->current_size = - size+ROUND_UP(sizeof(struct m32_page_t),alignment); - return (char*)page + ROUND_UP(sizeof(struct m32_page_t),alignment); + alloc->pages[empty]->current_size = size + ROUND_UP(sizeof(struct m32_page_t),alignment); + uint8_t *res = (uint8_t *) page + ROUND_UP(sizeof(struct m32_page_t), alignment); + m32_report_allocation(alloc, res, size); + return res; } #else diff --git a/rts/linker/MMap.c b/rts/linker/MMap.c index ff6cc720b9..1ecfbc6c7f 100644 --- a/rts/linker/MMap.c +++ b/rts/linker/MMap.c @@ -92,7 +92,15 @@ memoryAccessToProt(MemoryAccess access) void * mmapAnonForLinker (size_t bytes) { - return VirtualAlloc(NULL, bytes, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); + size_t size = 0; + /* For linking purposes we want to load code within a 4GB range from the + load address of the application. As such we need to find a location to + allocate at. */ + void* region = allocaLocalBytes (bytes, &size); + if (region == NULL) { + return NULL; + } + return VirtualAlloc(region, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); } void diff --git a/rts/linker/PEi386.c b/rts/linker/PEi386.c index b8bcc8abec..64e26a174c 100644 --- a/rts/linker/PEi386.c +++ b/rts/linker/PEi386.c @@ -55,7 +55,58 @@ COFF_IMPORT_LIB and commonly has the file extension .lib * GNU BFD import format - The import library format defined and used by GNU - tools. See note below. + tools and commonly has the file extension .dll.a . See note below. + + Note [The need for import libraries] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + In its original incarnation, PE had no native support for dynamic linking. + Let's examine how dynamic linking is now implemented. Consider a simple + program with a reference to function and data symbols provided by a DLL: + + // myprogram.c + #include <libfoo.h> + int do_something() { + libfoo_function(); + return libfoo_data; + } + + The header file shipped with libfoo will look like the following: + + // libfoo.h + __declspec(dllimport) int libfoo_function(); + __declspec(dllimport) int libfoo_data; + + When the C compiler is compiling myprogram.c, it will see these dllimport + declarations and use them to produce a module definition (.def) file which + summarizes the symbols that we expect the DLL to export. This will look like: + + EXPORTS + libfoo_function + libfoo_data DATA + + The C compiler will pass this file to the `dlltool` utility, which will + generate an *import library*. The import library will contain + placeholder symbols (with names starting with `__imp_`), along with + instructions for the dynamic linker to fix-up these references to point to + the "real" symbol definition. + + For historical reasons involving lack of documentation, NDAs, and (probably) + Steve Balmer, there are two flavours of import flavours: + + * Native Windows-style import libraries. These typically bear the .lib file + extension and encode their relocation information in the `.idata` section. + Documentation for this format is not available + [here](https://docs.microsoft.com/en-us/windows/win32/debug/pe-format#import-library-format). + These are handled in `checkAndLoadImportLibrary()` + + * GNU BFD-style import libraries. These typically have the .dll.a + extension and encode the relocation information in a set of sections + named `.idata$<N>` where `<N>` is an integer which encodes the section's + meaning. Somewhat ironically, despite being devised in response to the + native Windows format having no public documentation, there is no official + documentation for this format but Note [BFD import library] attempts to + summarize what we know. These are handled in `ocGetNames_PEi386()`. + Note [BFD import library] ~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -77,7 +128,7 @@ Anyway, the Windows PE format specifies a simple and efficient format for this: It's essentially a list, saying these X symbols can be found in DLL y. - Commonly, y is a versioned name. e.g. liby_43.dll. This is an artifact of + Commonly, y is a versioned name. e.g. `liby_43.dll`. This is an artifact of the days when Windows did not support side-by-side assemblies. So the solution was to version the DLLs by renaming them to include explicit version numbers, and to then use the import libraries to point to the right @@ -88,35 +139,62 @@ have created their own format. This format is either named using the suffix .dll.a or .a depending on the tool that makes them. This format is undocumented. However the source of dlltool.c in binutils is pretty handy to - understant it. + understand it (see binutils/dlltool.c; grep for ".idata section description"). To understand the implementation in GHC, this is what is important: - the .idata section group is used to hold this information. An import library + The import library is generally an archive containing one object file for + each imported symbol. In addition, there is a "head" object, which contains + the name of the DLL which the symbols are imported from, among other things. + + The `.idata$` section group is used to hold this information. An import library object file will always have these section groups, but the specific configuration depends on what the purpose of the file is. They will also never have a CODE or DATA section, though depending on the tool that creates them they may have the section headers, which will mostly be empty. - You have to different possible configuration: + The import data sections consist of the following: + + * `.idata$2` contains the Import Directory Table (IDT), which contains an entry + for each imported DLL. Each entry contains: a reference to the DLL's name + (in `.idata$7`) and references to its entries in the ILT and IAT sections. + This is contained in the head object. + + * `.idata$6` contains the Hint Name Table (HNT). This is a table of + of (symbol ordinal, symbol name) pairs, which are referred to be the ILT + and IAT as described below. + + * `.idata$5` contains the Import Address Table (IAT). This consists of an + array of pointers (one array for each imported DLL) which the loader will + update to point to the target symbol identified by the hint referenced by + the corresponding ILT entry. Moreover, the IAT pointers' initial values + also point to the corresponding HNT entry. + + * `.idata$4` contains the Import Lookup Table (ILT). This contains an array + of references to HNT entries for each imported DLL. + + * `.idata$7` contains the names of the imported DLLs. This is contained + in the head object. - 1) Those that define a redirection. In this case the .idata$7 section will + You have two different possible configurations: + + 1) Those that define a redirection. In this case the `.idata$7` section will contain the name of the actual dll to load. This will be the only content of the section. In the symbol table, the last symbol will be the name used to refer to the dll in the relocation tables. This name will always - be in the format "symbol_name_iname", however when referred to, the format - "_head_symbol_name" is used. + be in the format `symbol_name_iname`, however when referred to, the format + `_head_symbol_name` is used. - We record this symbol early on during GetNames and load the dll and use + We record this symbol early on during `ocGetNames` and load the dll and use the module handle as the symbol address. - 2) Symbol definitions. In this case .idata$6 will contain the symbol to load. - This is stored in the fixed format of 2-byte ordinals followed by a null - terminated string with the symbol name. The ordinal is to be used when - the dll does not export symbols by name. (NOTE: We don't currently - support this in the runtime linker, but it's easy to add should it be - needed). The last symbol in the symbol table of the section will contain - the name symbol which contains the dll name to use to resolve the + 2) Symbol definitions. In this case the HNT (`.idata$6`) will contain the + symbol to load. This is stored in the fixed format of 2-byte ordinals + followed by (null-terminated) symbol name. The ordinal is + to be used when the DLL does not export symbols by name. (note: We don't + currently support this in the runtime linker, but it's easy to add should + it be needed). The last symbol in the symbol table of the section will + contain the name symbol which contains the dll name to use to resolve the reference. As a technicality, this also means that the GCC format will allow us to use @@ -125,48 +203,97 @@ required for dynamic linking support for GHC. So the runtime linker now supports this too. + + Example: Dynamic code references + -------------------------------- + To see what such an import library looks like, let's first start with the case + of a function (e.g. `libfoo_function` above) with bind-now semantics (lazy-loading + will look much different). The import library will contain the following: + + .section .text + # This stub (which Windows calls a thunk) is what calls to + # libfoo_function will hit if the symbol isn't declared with + # __declspec(dllimport) + libfoo_function: + jmp *0x0(%rip) + .quad __imp_libfoo_function + + .section .idata$5 # IAT + # This is the location which the loader will + # update to point to the definition + # of libfoo_function + __imp_libfoo_function: + .quad hint1 - __image_base__ + + .section .idata$4 # ILT + # This (and hint1 below) is what tells the + # loader where __imp_libfoo_function should point + ilt1: + .quad hint1 - __image_base__ + + .section .idata$6 # HNT + hint1: + .short ORDINAL_OF_libfoo_function + .asciiz "libfoo_function" + + To handle a reference to an IAT entry like `__imp_libfoo_function`, the GHC + linker will (in `lookupSymbolInDLLs`) first strip off the `__imp_` prefix to + find the name of the referenced dynamic symbol. It then resolves the + symbol's address and allocates an `IndirectAddr` where it can place the + address, which it will return as the resolution of the `___libfoo_function`. + + Example: Dynamic data references + -------------------------------- + Let's now consider the import library for a data symbol. This is essentially + equivalent to the code case, but without the need to emit a thunk: + + .section .idata$5 # IAT + __imp_libfoo_data: + .quad hint2 - __image_base__ + + .section .idata$4 # ILT + ilt2: + .quad hint2 - __image_base__ + + .section .idata$6 # ILT + hint2: + .short ORDINAL_OF_libfoo_data + .asciiz "libfoo_data" + + Note [Memory allocation] ~~~~~~~~~~~~~~~~~~~~~~~~ - Previously on Windows we would use VirtualAlloc to allocate enough space for - loading the entire object file into memory and keep it there for the duration - until the entire object file has been unloaded. - - This has a couple of problems, first of, VirtualAlloc and the other Virtual - functions interact directly with the memory manager. Requesting memory from - VirtualAlloc will always return whole pages (32k), aligned on a 4k boundary. - - This means for an object file of size N kbytes, we're always wasting 32-N - kbytes of memory. Nothing else can access this memory. - - Because of this we're now using HeapAlloc and other heap function to create - a private heap. Another solution would have been to write our own memory - manager to keep track of where we have free memory, but the private heap - solution is simpler. - - The private heap is created with full rights just as the pages we used to get - from VirtualAlloc (e.g. READ/WRITE/EXECUTE). In the end we end up using - memory much more efficiently than before. The downside is that heap memory - is always Allocated AND Committed, thus when the heap resizes the new size is - committed. It becomes harder to see how much we're actually using. This makes - it seem like for small programs that we're using more memory than before. - Certainly a clean GHCi startup will have a slightly higher commit count. - - The second major change in how we allocate memory is that we no longer need - the entire object file. We now allocate the object file using normal malloc - and instead read bits from it. All tables are stored in the Object file info - table and are discarded as soon as they are no longer needed, e.g. after - relocation is finished. Only section data is kept around, but this data is - copied into the private heap. - - The major knock on effect of this is that we have more memory to use in the - sub 2GB range, which means that Template Haskell should fail a lot less as we - will violate the small memory model much less than before. - - Note [Section alignment] - ~~~~~~~~~~~~~~~~~~~~~~~~ - The Windows linker aligns memory to it's section alignment requirement by - aligning it during the copying to the private heap. We also ensure that the - trampoline "region" we reserve is 8 bytes aligned. + The loading of an object begins in `preloadObjectFile`, which allocates a buffer, + `oc->image`, into which the object file is read. It then calls `ocVerifyImage`, + where we traverse the object file's header and populate `ObjectCode.sections`. + Specifically, we create a Section for each of the object's sections such + that: + + * the `.start` field points to its data in the mapped image + * the `.size` field reflects its intended size + * the .`info` field contains a `SectionFormatField` with other information + from its section header entry (namely `VirtualSize`, `VirtualAddress`, and + `Characteristics`) + + We then proceed to `ocGetNames`, where we again walk the section table header + and determine which sections need to be mapped and how (e.g. as readable-writable or + readable-executable). We then allocate memory for each section using the + appropriate m32 allocator and, where necessary, copy the data from + `section.start` (which points to the section in `oc->image`) + into the new allocation. Finally, `addSection()` updates the `section.start` field + to reflect the section's new home. In addition, we also allocate space for + the global BSS section. + + At this point we have no further need for the preloaded image buffer, + `oc->image` and therefore free it. + + Having populated the sections, we can proceed to add the object's symbols to + the symbol table. This is a matter of walking the object file's symbol table, + computing the symbol's address, and calling `ghciInsertSymbolTable`. + + Finally, we enter `ocResolve`, where we resolve relocations and and allocate + jump islands (using the m32 allocator for backing storage) as necessary. + */ #include "Rts.h" @@ -224,34 +351,14 @@ static bool checkIfDllLoaded( static uint32_t getSectionAlignment( Section section); -static uint8_t* getAlignedMemory( - uint8_t* value, - Section section); - static size_t getAlignedValue( size_t value, Section section); -static void addCopySection( - ObjectCode *oc, - Section *s, - SectionKind kind, - SectionAlloc alloc, - void* start, - StgWord size); - static void releaseOcInfo( ObjectCode* oc); -/* Add ld symbol for PE image base. */ -#if defined(__GNUC__) -#define __ImageBase __MINGW_LSYMBOL(_image_base__) -#endif - -/* Get the base of the module. */ -/* This symbol is defined by ld. */ -extern IMAGE_DOS_HEADER __ImageBase; -#define __image_base (void*)((HINSTANCE)&__ImageBase) +static SymbolAddr *lookupSymbolInDLLs ( const SymbolName* lbl, ObjectCode *dependent ); const Alignments pe_alignments[] = { { IMAGE_SCN_ALIGN_1BYTES , 1 }, @@ -272,8 +379,6 @@ const Alignments pe_alignments[] = { const int pe_alignments_cnt = sizeof (pe_alignments) / sizeof (Alignments); const int default_alignment = 8; -const int initHeapSizeMB = 15; -static HANDLE code_heap = NULL; /* See Note [_iob_func symbol] In order to emulate __iob_func the memory location needs to point the @@ -281,51 +386,120 @@ static HANDLE code_heap = NULL; the pointer as a redirect. Essentially it's a DATA DLL reference. */ const void* __rts_iob_func = (void*)&__acrt_iob_func; -/* Low Fragmentation Heap, try to prevent heap from increasing in size when - space can simply be reclaimed. These are enums missing from mingw-w64's - headers. */ -#define HEAP_LFH 2 -#define HeapOptimizeResources 3 +/******************************************** + * Memory Management functions + ********************************************/ + +static inline uintptr_t round_up(uintptr_t num, uint64_t factor) +{ + return num + factor - 1 - (num + factor - 1) % factor; +} + +/* A wrapper for VirtualQuery() providing useful debug output */ +static int virtualQuery(void *baseAddr, PMEMORY_BASIC_INFORMATION info) +{ + int res = VirtualQuery (baseAddr, info, sizeof (*info)); + IF_DEBUG(linker_verbose, + debugBelch("Probing region 0x%p (0x%p) - 0x%p (%" FMT_SizeT ") [%ld] with base 0x%p\n", + baseAddr, + info->BaseAddress, + (uint8_t *) info->BaseAddress + info->RegionSize, + info->RegionSize, info->State, + info->AllocationBase)); + if (!res) { + IF_DEBUG(linker_verbose, debugBelch("Querying 0x%p failed. Aborting..\n", baseAddr)); + return 1; + } + return 0; +} + + +/* + * Try and find a location in the VMMAP to allocate SZ bytes starting at + * BASEADDR. If successful then location to use is returned and the amount of + * bytes you *must* allocate is returned in REQ. You are free to use less but + * you must allocate the amount given in REQ. If not successful NULL. + */ +static void *allocateBytes(void* baseAddr, size_t sz, size_t *req) +{ + SYSTEM_INFO sys; + GetSystemInfo(&sys); + const uint64_t max_range = 4294967296UL; + IF_DEBUG(linker_verbose, debugBelch("Base Address 0x%p\n", baseAddr)); + IF_DEBUG(linker_verbose, debugBelch("Requesting mapping of %" FMT_SizeT " bytes within range %" + PRId64 " bytes\n", sz, max_range)); + + MEMORY_BASIC_INFORMATION info; + IF_DEBUG(linker_verbose, debugBelch("Initial query @ 0x%p...\n", baseAddr)); + int res = virtualQuery(baseAddr, &info); + if (res) { + return NULL; + } + + uint8_t *endAddr = (uint8_t *) baseAddr + max_range; + uint8_t *initialAddr = info.AllocationBase; + uint8_t *region = NULL; + while (!region + && (uint64_t) llabs(initialAddr - endAddr) <= max_range + && (void *) initialAddr < sys.lpMaximumApplicationAddress) + { + res = virtualQuery(initialAddr, &info); + if (res) { + return NULL; + } + + if ((info.State & MEM_FREE) == MEM_FREE) { + IF_DEBUG(linker_verbose, debugBelch("Free range at 0x%p of %zu bytes\n", + info.BaseAddress, info.RegionSize)); + + MEMORY_BASIC_INFORMATION info2; + res = virtualQuery(endAddr+1, &info2); + if (info.RegionSize >= sz) { + if (info.AllocationBase == 0) { + size_t needed_sz = round_up (sz, sys.dwAllocationGranularity); + if (info.RegionSize >= needed_sz) { + IF_DEBUG(linker_verbose, debugBelch("Range is unmapped, Allocation " + "required by granule...\n")); + *req = needed_sz; + region + = (void*)(uintptr_t)round_up ((uintptr_t)initialAddr, + sys.dwAllocationGranularity); + IF_DEBUG(linker_verbose, debugBelch("Requested %" PRId64 ", rounded: %" + PRId64 ".\n", sz, *req)); + IF_DEBUG(linker_verbose, debugBelch("Aligned region claimed 0x%p -> " + "0x%p.\n", initialAddr, region)); + } + } else { + IF_DEBUG(linker_verbose, debugBelch("Range is usable for us, claiming...\n")); + *req = sz; + region = initialAddr; + } + } + } + initialAddr = (uint8_t *) info.BaseAddress + info.RegionSize; + } + + return region; +} + +void *allocaLocalBytes(size_t sz, size_t *req) +{ + return allocateBytes (GetModuleHandleW (NULL), sz, req); +} void initLinker_PEi386() { if (!ghciInsertSymbolTable(WSTR("(GHCi/Ld special symbols)"), - symhash, "__image_base__", __image_base, HS_BOOL_TRUE, SYM_TYPE_CODE, NULL)) { + symhash, "__image_base__", + GetModuleHandleW (NULL), HS_BOOL_TRUE, + SYM_TYPE_CODE, NULL)) { barf("ghciInsertSymbolTable failed"); } #if defined(mingw32_HOST_OS) addDLLHandle(WSTR("*.exe"), GetModuleHandle(NULL)); - /* - * Most of these are included by base, but GCC always includes them - * So lets make sure we always have them too. - * - * In most cases they would have been loaded by the - * addDLLHandle above. - */ - addDLL(WSTR("msvcrt")); - addDLL(WSTR("kernel32")); - addDLL(WSTR("advapi32")); - addDLL(WSTR("shell32")); - addDLL(WSTR("user32")); #endif - /* See Note [Memory allocation]. */ - /* Create a private heap which we will use to store all code and data. */ - SYSTEM_INFO sSysInfo; - GetSystemInfo(&sSysInfo); - code_heap = HeapCreate (HEAP_CREATE_ENABLE_EXECUTE, - initHeapSizeMB * sSysInfo.dwPageSize , 0); - if (!code_heap) - barf ("Could not create private heap during initialization. Aborting."); - - /* Set some flags for the new code heap. */ - HeapSetInformation(code_heap, HeapEnableTerminationOnCorruption, NULL, 0); - unsigned long HeapInformation = HEAP_LFH; - HeapSetInformation(code_heap, HeapEnableTerminationOnCorruption, - &HeapInformation, sizeof(HeapInformation)); - HeapSetInformation(code_heap, HeapOptimizeResources, NULL, 0); - /* Register the cleanup routine as an exit handler, this gives other exit handlers a chance to run which may need linker information. Exit handlers are ran in reverse registration order so this needs to be before the linker loads anything. */ @@ -334,19 +508,11 @@ void initLinker_PEi386() void exitLinker_PEi386() { - /* See Note [Memory allocation]. */ - if (code_heap) { - HeapDestroy (code_heap); - code_heap = NULL; - } } /* A list thereof. */ static OpenedDLL* opened_dlls = NULL; -/* A list thereof. */ -static IndirectAddr* indirects = NULL; - /* Adds a DLL instance to the list of DLLs in which to search for symbols. */ static void addDLLHandle(pathchar* dll_name, HINSTANCE instance) { @@ -426,11 +592,6 @@ void freePreloadObjectFile_PEi386(ObjectCode *oc) } if (oc->info) { - if (oc->info->image) { - HeapFree(code_heap, 0, oc->info->image); - oc->info->image = NULL; - } - /* Release the unwinder information. See Note [Exception Unwinding]. */ if (oc->info->xdata) { @@ -440,20 +601,13 @@ void freePreloadObjectFile_PEi386(ObjectCode *oc) oc->info->xdata = NULL; oc->info->pdata = NULL; } - if (oc->info->ch_info) + + if (oc->info->ch_info) { stgFree (oc->info->ch_info); + } stgFree (oc->info); oc->info = NULL; } - - IndirectAddr *ia, *ia_next; - ia = indirects; - while (ia != NULL) { - ia_next = ia->next; - stgFree(ia); - ia = ia_next; - } - indirects = NULL; } static void releaseOcInfo(ObjectCode* oc) { @@ -828,16 +982,6 @@ static uint32_t getSectionAlignment( } /* ---------------------- - * return a memory location aligned to the section requirements - */ -static uint8_t* getAlignedMemory( - uint8_t* value, Section section) { - uint32_t alignment = getSectionAlignment(section); - uintptr_t mask = (uintptr_t)alignment - 1; - return (uint8_t*)(((uintptr_t)value + mask) & ~mask); -} - -/* ---------------------- * return a value aligned to the section requirements */ static size_t getAlignedValue( @@ -1028,7 +1172,7 @@ zapTrailingAtSign ( SymbolName* sym ) #endif SymbolAddr* -lookupSymbolInDLLs ( const SymbolName* lbl ) +lookupSymbolInDLLs ( const SymbolName* lbl, ObjectCode *dependent ) { OpenedDLL* o_dll; SymbolAddr* sym; @@ -1042,6 +1186,7 @@ lookupSymbolInDLLs ( const SymbolName* lbl ) return sym; } + // TODO: Drop this /* Ticket #2283. Long description: http://support.microsoft.com/kb/132044 tl;dr: @@ -1053,15 +1198,15 @@ lookupSymbolInDLLs ( const SymbolName* lbl ) sym = GetProcAddress(o_dll->instance, lbl + 6 + STRIP_LEADING_UNDERSCORE); if (sym != NULL) { - IndirectAddr* ret; - ret = stgMallocBytes( sizeof(IndirectAddr), "lookupSymbolInDLLs" ); - ret->addr = sym; - ret->next = indirects; - indirects = ret; + SymbolAddr** indirect = m32_alloc(dependent->rw_m32, sizeof(SymbolAddr*), 8); + if (indirect == NULL) { + barf("lookupSymbolInDLLs: Failed to allocation indirection"); + } + *indirect = sym; IF_DEBUG(linker, debugBelch("warning: %s from %S is linked instead of %s\n", lbl+6+STRIP_LEADING_UNDERSCORE, o_dll->name, lbl)); - return (void*) & ret->addr; + return (void*) indirect; } } @@ -1152,8 +1297,6 @@ ocVerifyImage_PEi386 ( ObjectCode* oc ) oc->n_sections = info->numberOfSections + 1; oc->info = stgCallocBytes (sizeof(struct ObjectCodeFormatInfo), 1, "ocVerifyImage_PEi386(info)"); - oc->info->secBytesTotal = 0; - oc->info->secBytesUsed = 0; oc->info->init = NULL; oc->info->finit = NULL; oc->info->ch_info = info; @@ -1224,8 +1367,6 @@ ocVerifyImage_PEi386 ( ObjectCode* oc ) memcpy (section->info->relocs, reltab + relocs_offset, noRelocs * sizeof (COFF_reloc)); } - - oc->info->secBytesTotal += getAlignedValue (section->size, *section); } /* Initialize the last section's info field which contains the .bss @@ -1234,23 +1375,6 @@ ocVerifyImage_PEi386 ( ObjectCode* oc ) stgFree(sections[info->numberOfSections].info); sections[info->numberOfSections].info = NULL; - /* Calculate space for trampolines nearby. - We get back 8-byte aligned memory (is that guaranteed?), but - the offsets to the sections within the file are all 4 mod 8 - (is that guaranteed?). We therefore need to offset the image - by 4, so that all the pointers are 8-byte aligned, so that - pointer tagging works. */ - /* For 32-bit case we don't need this, hence we use macro - PEi386_IMAGE_OFFSET, which equals to 4 for 64-bit case and 0 for - 32-bit case. */ - /* We allocate trampolines area for all symbols right behind - image data, aligned on 8. */ - oc->info->trampoline - = (PEi386_IMAGE_OFFSET + 2 * default_alignment - + oc->info->secBytesTotal) & ~0x7; - oc->info->secBytesTotal - = oc->info->trampoline + info->numberOfSymbols * sizeof(SymbolExtra); - /* No further verification after this point; only debug printing. */ i = 0; IF_DEBUG(linker, i=1); @@ -1402,123 +1526,93 @@ bool ocGetNames_PEi386 ( ObjectCode* oc ) { bool has_code_section = false; - - SymbolName* sname = NULL; - SymbolAddr* addr = NULL; - unsigned int i; - COFF_HEADER_INFO *info = oc->info->ch_info; /* Copy section information into the ObjectCode. */ - for (i = 0; i < info->numberOfSections; i++) { - uint8_t* start; - uint8_t* end; - uint32_t sz; - + for (unsigned int i = 0; i < info->numberOfSections; i++) { /* By default consider all section as CODE or DATA, which means we want to load them. */ SectionKind kind = SECTIONKIND_CODE_OR_RODATA; - Section section = oc->sections[i]; + Section *section = &oc->sections[i]; + uint32_t alignment = getSectionAlignment(*section); - IF_DEBUG(linker, debugBelch("section name = %s\n", section.info->name )); + // These will be computed below and determine how we will handle the + // section + size_t sz = section->size; + bool do_copy = true; + bool do_zero = false; + + IF_DEBUG(linker, debugBelch("section name = %s (%x)\n", section->info->name, section->info->props )); /* The PE file section flag indicates whether the section contains code or data. */ - if (section.info->props & IMAGE_SCN_CNT_CODE) { - has_code_section = has_code_section || section.size > 0; + if (section->info->props & IMAGE_SCN_CNT_CODE) { + has_code_section = has_code_section || section->size > 0; kind = SECTIONKIND_CODE_OR_RODATA; } - if (section.info->props & IMAGE_SCN_CNT_INITIALIZED_DATA) - kind = SECTIONKIND_CODE_OR_RODATA; + if (section->info->props & IMAGE_SCN_MEM_WRITE) { + kind = SECTIONKIND_RWDATA; + } /* Check next if it contains any uninitialized data */ - if (section.info->props & IMAGE_SCN_CNT_UNINITIALIZED_DATA) + if (section->info->props & IMAGE_SCN_CNT_UNINITIALIZED_DATA) { kind = SECTIONKIND_RWDATA; + do_copy = false; + } /* Finally check if it can be discarded. This will also ignore .debug sections */ - if ( section.info->props & IMAGE_SCN_MEM_DISCARDABLE - || section.info->props & IMAGE_SCN_LNK_REMOVE) + if ( section->info->props & IMAGE_SCN_MEM_DISCARDABLE + || section->info->props & IMAGE_SCN_LNK_REMOVE) { kind = SECTIONKIND_OTHER; + } - if (0==strncmp(".ctors", section.info->name, 6)) { + if (0==strncmp(".ctors", section->info->name, 6)) { kind = SECTIONKIND_INIT_ARRAY; oc->info->init = &oc->sections[i]; } - if (0==strncmp(".dtors", section.info->name, 6)) { + if (0==strncmp(".dtors", section->info->name, 6)) { kind = SECTIONKIND_FINIT_ARRAY; oc->info->finit = &oc->sections[i]; } - if ( 0 == strncmp(".stab" , section.info->name, 5 ) - || 0 == strncmp(".stabstr" , section.info->name, 8 ) - || 0 == strncmp(".debug" , section.info->name, 6 ) - || 0 == strncmp(".rdata$zzz", section.info->name, 10)) + if ( 0 == strncmp(".stab" , section->info->name, 5 ) + || 0 == strncmp(".stabstr" , section->info->name, 8 ) + || 0 == strncmp(".debug" , section->info->name, 6 ) + || 0 == strncmp(".rdata$zzz", section->info->name, 10)) kind = SECTIONKIND_DEBUG; /* Exception Unwind information. See Note [Exception Unwinding]. */ - if (0 == strncmp(".xdata" , section.info->name, 6 )) { + if (0 == strncmp(".xdata" , section->info->name, 6 )) { kind = SECTIONKIND_EXCEPTION_UNWIND; oc->info->xdata = &oc->sections[i]; } /* Exception handler tables, See Note [Exception Unwinding]. */ - if (0 == strncmp(".pdata" , section.info->name, 6 )) { + if (0 == strncmp(".pdata" , section->info->name, 6 )) { kind = SECTIONKIND_EXCEPTION_TABLE; oc->info->pdata = &oc->sections[i]; } - if (0==strncmp(".idata", section.info->name, 6)) + if (0==strncmp(".idata", section->info->name, 6)) { kind = SECTIONKIND_IMPORT; + } + /* See Note [BFD import library]. */ - if (0==strncmp(".idata$7", section.info->name, 8)) - kind = SECTIONKIND_IMPORT_LIBRARY; - - if (0==strncmp(".idata$6", section.info->name, 8)) { - /* The first two bytes contain the ordinal of the function - in the format of lowpart highpart. The two bytes combined - for the total range of 16 bits which is the function export limit - of DLLs. */ - sname = (SymbolName*)section.start+2; - COFF_symbol* sym = &oc->info->symbols[info->numberOfSymbols-1]; - addr = get_sym_name( getSymShortName (info, sym), oc); - - IF_DEBUG(linker, - debugBelch("addImportSymbol `%s' => `%s'\n", - sname, (char*)addr)); - /* We're going to free the any data associated with the import - library without copying the sections. So we have to duplicate - the symbol name and values before the pointers become invalid. */ - sname = strdup (sname); - addr = strdup (addr); - if (!ghciInsertSymbolTable(oc->fileName, symhash, sname, - addr, false, SYM_TYPE_DATA, oc)) { - releaseOcInfo (oc); - stgFree (oc->image); - oc->image = NULL; - return false; - } - setImportSymbol (oc, sname); - - /* Don't process this oc any further. Just exit. */ - oc->n_symbols = 0; - oc->symbols = NULL; - stgFree (oc->image); - oc->image = NULL; - releaseOcInfo (oc); - oc->status = OBJECT_DONT_RESOLVE; - return true; + if (0==strncmp(".idata$7", section->info->name, 8)) { + kind = SECTIONKIND_BFD_IMPORT_LIBRARY_HEAD; } - /* Allocate space for any (local, anonymous) .bss sections. */ - if (0==strncmp(".bss", section.info->name, 4)) { - uint32_t bss_sz; - uint8_t* zspace; + if (0==strncmp(".idata$6", section->info->name, 8)) { + kind = SECTIONKIND_BFD_IMPORT_LIBRARY; + } + /* Allocate space for any (local, anonymous) .bss sections. */ + if (0==strncmp(".bss", section->info->name, 4)) { /* sof 10/05: the PE spec text isn't too clear regarding what * the SizeOfRawData field is supposed to hold for object * file sections containing just uninitialized data -- for executables, @@ -1538,42 +1632,49 @@ ocGetNames_PEi386 ( ObjectCode* oc ) * * TODO: check if this comment is still relevant. */ - if (section.info->virtualSize == 0 && section.size == 0) continue; + if (section->info->virtualSize == 0 && section->size == 0) { + IF_DEBUG(linker_verbose, debugBelch("skipping empty .bss section\n")); + continue; + } + /* This is a non-empty .bss section. Allocate zeroed space for it */ - bss_sz = section.info->virtualSize; - if (bss_sz < section.size) { bss_sz = section.size; } - zspace = stgCallocBytes(1, bss_sz, "ocGetNames_PEi386(anonymous bss)"); - oc->sections[i].start = zspace; - oc->sections[i].size = bss_sz; - section = oc->sections[i]; - /* debugBelch("BSS anon section at 0x%x\n", zspace); */ + kind = SECTIONKIND_RWDATA; + do_zero = true; + do_copy = false; + IF_DEBUG(linker_verbose, debugBelch("BSS anon section\n")); } - /* Allocate space for the sections since we have a real oc. - We initially mark it the region as non-accessible. But will adjust - as we go along. */ - if (!oc->info->image) { - /* See Note [Memory allocation]. */ - ASSERT(code_heap); - oc->info->image - = HeapAlloc (code_heap, HEAP_ZERO_MEMORY, oc->info->secBytesTotal); - if (!oc->info->image) - barf ("Could not allocate any heap memory from private heap."); + CHECK(section->size == 0 || section->info->virtualSize == 0); + if (sz < section->info->virtualSize) { + sz = section->info->virtualSize; } - CHECK(section.size == 0 || section.info->virtualSize == 0); - sz = section.size; - if (sz < section.info->virtualSize) sz = section.info->virtualSize; + // Ignore these section types + if (kind == SECTIONKIND_OTHER || sz == 0) { + continue; + } - start = section.start; - end = start + sz; + // Allocate memory for the section. + uint8_t *start; + if (kind == SECTIONKIND_CODE_OR_RODATA) { + start = m32_alloc(oc->rx_m32, sz, alignment); + } else { + start = m32_alloc(oc->rw_m32, sz, alignment); + } + if (!start) { + barf("Could not allocate any heap memory from private heap (requested %" FMT_SizeT " bytes).", + sz); + } - if (kind != SECTIONKIND_OTHER && end > start) { - /* See Note [Section alignment]. */ - addCopySection(oc, &oc->sections[i], kind, SECTION_NOMEM, start, sz); - addProddableBlock(oc, oc->sections[i].start, sz); + if (do_copy) { + memcpy(start, section->start, sz); + } else if (do_zero) { + memset(start, 0, sz); } + + addSection(section, kind, SECTION_NOMEM, start, sz, 0, 0, 0); + addProddableBlock(oc, oc->sections[i].start, sz); } /* Copy exported symbols into the ObjectCode. */ @@ -1584,7 +1685,7 @@ ocGetNames_PEi386 ( ObjectCode* oc ) /* Work out the size of the global BSS section */ StgWord globalBssSize = 0; - for (i=0; i < info->numberOfSymbols; i++) { + for (unsigned int i=0; i < info->numberOfSymbols; i++) { COFF_symbol* sym = &oc->info->symbols[i]; if (getSymSectionNumber (info, sym) == IMAGE_SYM_UNDEFINED && getSymValue (info, sym) > 0 @@ -1597,12 +1698,14 @@ ocGetNames_PEi386 ( ObjectCode* oc ) /* Allocate BSS space */ SymbolAddr* bss = NULL; if (globalBssSize > 0) { - bss = stgCallocBytes(1, globalBssSize, - "ocGetNames_PEi386(non-anonymous bss)"); + bss = m32_alloc(oc->rw_m32, globalBssSize, 16); + if (bss == NULL) { + barf("ocGetNames_PEi386: Failed to allocate global bss section"); + } addSection(&oc->sections[oc->n_sections-1], SECTIONKIND_RWDATA, SECTION_MALLOC, bss, globalBssSize, 0, 0, 0); - IF_DEBUG(linker, debugBelch("bss @ %p %" FMT_Word "\n", bss, globalBssSize)); + IF_DEBUG(linker_verbose, debugBelch("bss @ %p %" FMT_Word "\n", bss, globalBssSize)); addProddableBlock(oc, bss, globalBssSize); } else { addSection(&oc->sections[oc->n_sections-1], @@ -1614,31 +1717,31 @@ ocGetNames_PEi386 ( ObjectCode* oc ) stgFree (oc->image); oc->image = NULL; - for (i = 0; i < (uint32_t)oc->n_symbols; i++) { + for (unsigned int i = 0; i < (uint32_t)oc->n_symbols; i++) { COFF_symbol* sym = &oc->info->symbols[i]; int32_t secNumber = getSymSectionNumber (info, sym); uint32_t symValue = getSymValue (info, sym); uint8_t symStorageClass = getSymStorageClass (info, sym); - addr = NULL; + SymbolAddr *addr = NULL; bool isWeak = false; - sname = get_sym_name (getSymShortName (info, sym), oc); + SymbolName *sname = get_sym_name (getSymShortName (info, sym), oc); Section *section = secNumber > 0 ? &oc->sections[secNumber-1] : NULL; SymType type; - switch (sym->og.Type) { + switch (getSymType(oc->info->ch_info, sym)) { case 0x00: type = SYM_TYPE_DATA; break; case 0x20: type = SYM_TYPE_CODE; break; default: - debugBelch("Invalid symbol type\n"); + debugBelch("Invalid symbol type: 0x%x\n", getSymType(oc->info->ch_info, sym)); return 1; } if ( secNumber != IMAGE_SYM_UNDEFINED && secNumber > 0 && section - && section->kind != SECTIONKIND_IMPORT_LIBRARY) { + && section->kind != SECTIONKIND_BFD_IMPORT_LIBRARY) { /* This symbol is global and defined, viz, exported */ /* for IMAGE_SYMCLASS_EXTERNAL && !IMAGE_SYM_UNDEFINED, @@ -1663,10 +1766,18 @@ ocGetNames_PEi386 ( ObjectCode* oc ) bss = (SymbolAddr*)((StgWord)bss + (StgWord)symValue); IF_DEBUG(linker_verbose, debugBelch("bss symbol @ %p %u\n", addr, symValue)); } + else if (section && section->kind == SECTIONKIND_BFD_IMPORT_LIBRARY) { + setImportSymbol(oc, sname); + // There is nothing that we need to resolve in this object since we + // will never call the import stubs in its text section + oc->status = OBJECT_DONT_RESOLVE; + + IF_DEBUG(linker_verbose, debugBelch("import symbol %s\n", sname)); + } else if (secNumber > 0 && section - && section->kind == SECTIONKIND_IMPORT_LIBRARY) { - /* This is an import section. We should load the dll and lookup + && section->kind == SECTIONKIND_BFD_IMPORT_LIBRARY_HEAD) { + /* This is an Gnu BFD import section. We should load the dll and lookup the symbols. See Note [BFD import library]. */ char* dllName = section->start; @@ -1760,42 +1871,32 @@ ocGetNames_PEi386 ( ObjectCode* oc ) #if defined(x86_64_HOST_ARCH) -/* We've already reserved a room for symbol extras in loadObj, - * so simply set correct pointer here. - */ -bool -ocAllocateExtras_PEi386 ( ObjectCode* oc ) -{ - /* If the ObjectCode was unloaded we don't need a trampoline, it's likely - an import library so we're discarding it earlier. */ - if (!oc->info) - return false; - - // These are allocated on-demand from m32 by makeSymbolExtra_PEi386 - oc->first_symbol_extra = 0; - oc->n_symbol_extras = 0; - oc->symbol_extras = NULL; - - return true; -} - static size_t makeSymbolExtra_PEi386( ObjectCode* oc, uint64_t index STG_UNUSED, size_t s, char* symbol STG_UNUSED, SymType type ) { - SymbolExtra *extra = m32_alloc(oc->rx_m32, sizeof(SymbolExtra), 8); + SymbolExtra *extra; if (type == SYM_TYPE_CODE) { // jmp *-14(%rip) + extra = m32_alloc(oc->rx_m32, sizeof(SymbolExtra), 8); + CHECK(extra); extra->addr = (uint64_t)s; static uint8_t jmp[] = { 0xFF, 0x25, 0xF2, 0xFF, 0xFF, 0xFF }; memcpy(extra->jumpIsland, jmp, 6); + IF_DEBUG(linker_verbose, debugBelch("makeSymbolExtra(code): %s -> %p\n", symbol, &extra->jumpIsland)); return (size_t)&extra->jumpIsland; } else if (type == SYM_TYPE_INDIRECT_DATA) { + extra = m32_alloc(oc->rw_m32, sizeof(SymbolExtra), 8); + CHECK(extra); void *v = *(void**) s; extra->addr = (uint64_t)v; + IF_DEBUG(linker_verbose, debugBelch("makeSymbolExtra(data): %s -> %p\n", symbol, &extra->addr)); return (size_t)&extra->addr; } else { + extra = m32_alloc(oc->rw_m32, sizeof(SymbolExtra), 8); + CHECK(extra); extra->addr = (uint64_t)s; + IF_DEBUG(linker_verbose, debugBelch("makeSymbolExtra(indirect-data): %s -> %p\n", symbol, &extra->addr)); return (size_t)&extra->addr; } } @@ -1831,7 +1932,7 @@ ocResolve_PEi386 ( ObjectCode* oc ) /* Ignore sections called which contain stabs debugging information. */ if (section.kind == SECTIONKIND_DEBUG) - continue; + continue; noRelocs = section.info->noRelocs; for (j = 0; j < noRelocs; j++) { @@ -1854,13 +1955,13 @@ ocResolve_PEi386 ( ObjectCode* oc ) IF_DEBUG(linker_verbose, debugBelch( - "reloc sec %2d num %3d: type 0x%-4x " + "reloc sec %2d num %3d: P=%p, type 0x%-4x " "vaddr 0x%-8lx name `", - i, j, + i, j, pP, reloc->Type, reloc->VirtualAddress ); printName (getSymShortName (info, sym), oc); - debugBelch("'\n" )); + debugBelch("'\n" )); if (getSymStorageClass (info, sym) == IMAGE_SYM_CLASS_STATIC) { Section section = oc->sections[getSymSectionNumber (info, sym)-1]; @@ -1876,6 +1977,8 @@ ocResolve_PEi386 ( ObjectCode* oc ) return false; } } + IF_DEBUG(linker_verbose, debugBelch("S=%zx\n", S)); + /* All supported relocations write at least 4 bytes */ checkProddableBlock(oc, pP, 4); switch (reloc->Type) { @@ -1930,7 +2033,7 @@ ocResolve_PEi386 ( ObjectCode* oc ) /* If IMAGE_REL_AMD64_ADDR32NB then subtract the image base. */ if (reloc->Type == 3) - v -= (uint64_t)__image_base; + v -= (uint64_t) GetModuleHandleW(NULL); // N.B. in the case of the sign-extended relocations we must ensure that v // fits in a signed 32-bit value. See #15808. @@ -1943,10 +2046,10 @@ ocResolve_PEi386 ( ObjectCode* oc ) /* If IMAGE_REL_AMD64_ADDR32NB then subtract the image base. */ if (reloc->Type == 3) - v -= (uint64_t)__image_base; + v -= (uint64_t) GetModuleHandleW(NULL); if (((int64_t) v > (int64_t) INT32_MAX) || ((int64_t) v < (int64_t) INT32_MIN)) { - barf("IMAGE_REL_AMD64_ADDR32[NB]: High bits are set in %zx for %s", + barf("IMAGE_REL_AMD64_ADDR32[NB]: High bits are set in 0x%zx for %s", v, (char *)symbol); } } @@ -1965,7 +2068,7 @@ ocResolve_PEi386 ( ObjectCode* oc ) /* And retry */ v = S + (int32_t)A - ((intptr_t)pP) - 4; if ((v > (int64_t) INT32_MAX) || (v < (int64_t) INT32_MIN)) { - barf("IMAGE_REL_AMD64_REL32: High bits are set in %zx for %s", + barf("IMAGE_REL_AMD64_REL32: High bits are set in 0x%zx for %s", v, (char *)symbol); } } @@ -1992,7 +2095,7 @@ ocResolve_PEi386 ( ObjectCode* oc ) /* Now register the exception handler for the range and point it to the unwind data. */ - if (!RtlAddFunctionTable (section.start, numEntries, (uintptr_t)__image_base)) { + if (!RtlAddFunctionTable (section.start, numEntries, (uintptr_t) GetModuleHandleW(NULL))) { sysErrorBelch("Unable to register Exception handler for %p for " "section %s in %" PATH_FMT " (Win32 error %lu)", section.start, section.info->name, oc->fileName, @@ -2123,7 +2226,7 @@ ocRunInit_PEi386 ( ObjectCode *oc ) return true; } -SymbolAddr *lookupSymbol_PEi386(SymbolName *lbl, SymType *type) +SymbolAddr *lookupSymbol_PEi386(SymbolName *lbl, ObjectCode *dependent, SymType *type) { RtsSymbolInfo *pinfo; @@ -2136,8 +2239,13 @@ SymbolAddr *lookupSymbol_PEi386(SymbolName *lbl, SymType *type) #if !defined(x86_64_HOST_ARCH) zapTrailingAtSign ( lbl ); #endif - if (type) *type = SYM_TYPE_CODE; // TODO - sym = lookupSymbolInDLLs(lbl); + if (type) { + // Unfortunately we can only assume that this is the case. Ideally + // the user would have given us an import library, which would allow + // us to determine the symbol type precisely. + *type = SYM_TYPE_CODE; + } + sym = lookupSymbolInDLLs(lbl, dependent); return sym; // might be NULL if not found } else { if (type) *type = pinfo->type; @@ -2153,11 +2261,12 @@ SymbolAddr *lookupSymbol_PEi386(SymbolName *lbl, SymType *type) static HMODULE msvcrt = NULL; if (!msvcrt) msvcrt = GetModuleHandle("msvcrt"); pinfo->value = GetProcAddress(msvcrt, symBuffer); + return pinfo->value; } else if (pinfo && pinfo->owner && isSymbolImport (pinfo->owner, lbl)) { /* See Note [BFD import library]. */ - HINSTANCE dllInstance = (HINSTANCE)lookupDependentSymbol(pinfo->value, NULL, type); + HINSTANCE dllInstance = (HINSTANCE)lookupDependentSymbol(pinfo->value, dependent, type); if (!dllInstance && pinfo->value) return pinfo->value; @@ -2173,39 +2282,21 @@ SymbolAddr *lookupSymbol_PEi386(SymbolName *lbl, SymType *type) pinfo->value = GetProcAddress((HMODULE)dllInstance, lbl); clearImportSymbol (pinfo->owner, lbl); return pinfo->value; + } else { + if (dependent) { + // Add dependent as symbol's owner's dependency + ObjectCode *owner = pinfo->owner; + if (owner) { + // TODO: what does it mean for a symbol to not have an owner? + insertHashSet(dependent->dependencies, (W_)owner); + } + } + return loadSymbol(lbl, pinfo); } - return loadSymbol(lbl, pinfo); } } /* ----------------------------------------------------------------------------- - * Section management. - */ - - /* See Note [Section alignment]. */ -static void -addCopySection (ObjectCode *oc, Section *s, SectionKind kind, - SectionAlloc alloc, void* start, StgWord size) { - char* pos = oc->info->image + oc->info->secBytesUsed; - char* newStart = (char*)getAlignedMemory ((uint8_t*)pos, *s); - memcpy (newStart, start, size); - uintptr_t offset = (uintptr_t)newStart - (uintptr_t)oc->info->image; - oc->info->secBytesUsed = (size_t)offset + size; - start = newStart; - - /* Initially I wanted to apply the right memory protection to the region and - which would leaved the gaps in between the regions as inaccessible memory - to prevent exploits. - The problem is protection is always on page granularity, so we can use - less memory and be insecure or use more memory and be secure. - For now, I've chosen lower memory over secure as the first pass, this - doesn't regress security over the current implementation. After this - patch I will change to different implementation that will fix the mem - protection and keep the memory size small. */ - addSection (s, kind, alloc, start, size, 0, 0, 0); -} - -/* ----------------------------------------------------------------------------- * Debugging operations. */ diff --git a/rts/linker/PEi386.h b/rts/linker/PEi386.h index 7148ba7436..2624ff6832 100644 --- a/rts/linker/PEi386.h +++ b/rts/linker/PEi386.h @@ -4,7 +4,9 @@ #include "LinkerInternals.h" #include "PathUtils.h" #include <windows.h> +#include <stdint.h> #include <stdbool.h> +#include <inttypes.h> #include "BeginPrivate.h" @@ -56,9 +58,8 @@ bool ocResolve_PEi386 ( ObjectCode* oc ); bool ocRunInit_PEi386 ( ObjectCode *oc ); bool ocGetNames_PEi386 ( ObjectCode* oc ); bool ocVerifyImage_PEi386 ( ObjectCode* oc ); -SymbolAddr *lookupSymbol_PEi386(SymbolName *lbl, SymType *type); -bool ocAllocateExtras_PEi386 ( ObjectCode* oc ); -SymbolAddr *lookupSymbolInDLLs ( const SymbolName* lbl ); +SymbolAddr *lookupSymbol_PEi386(SymbolName *lbl, ObjectCode *dependent, SymType *type); + /* See Note [mingw-w64 name decoration scheme] */ /* We use myindex to calculate array addresses, rather than simply doing the normal subscript thing. That's because @@ -124,13 +125,6 @@ struct _OpenedDLL { HINSTANCE instance; } OpenedDLL; -/* A record for storing indirectly linked functions from DLLs. */ -typedef -struct _IndirectAddr { - SymbolAddr* addr; - struct _IndirectAddr* next; -} IndirectAddr; - /* Some alignment information. */ typedef struct _Alignments { @@ -172,4 +166,12 @@ because we have no stdcall convention on 64 bits. See #9218 */ +/******************************************** + * Memory Management functions + ********************************************/ + +/* Same as the above, but use the current process's load address as the starting + point for memory allocations. */ +void *allocaLocalBytes(size_t sz, size_t *req); + #include "EndPrivate.h" diff --git a/rts/linker/PEi386Types.h b/rts/linker/PEi386Types.h index 9ad2103f92..792cc03fda 100644 --- a/rts/linker/PEi386Types.h +++ b/rts/linker/PEi386Types.h @@ -17,10 +17,6 @@ struct SectionFormatInfo { uint64_t virtualAddr; }; struct ObjectCodeFormatInfo { - size_t secBytesTotal; - size_t secBytesUsed; - char* image; - size_t trampoline; Section* init; Section* finit; Section* pdata; |