summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTamar Christina <tamar@zhox.com>2022-12-04 15:55:43 +0000
committerMatthew Pickering <matthewtpickering@gmail.com>2023-01-02 23:38:30 +0000
commit6658c45d43de1a85ac4e823a92613b5c3d04d868 (patch)
tree9ce811925327eaf5048ef1d5356a8ca90d548f76
parent14ee7a04efadcd687d87af6c6be3ef2b6e7b63d3 (diff)
downloadhaskell-wip/T22130.tar.gz
linker: Fix BFD import librarieswip/T22130
-rw-r--r--hadrian/src/Settings/Packages.hs2
-rw-r--r--libraries/base/System/Posix/Internals.hs1
-rw-r--r--rts/Linker.c26
-rw-r--r--rts/LinkerInternals.h13
-rw-r--r--rts/linker/PEi386.c96
-rw-r--r--testsuite/tests/ghci/linking/dyn/Makefile4
-rw-r--r--testsuite/tests/ghci/linking/dyn/all.T6
-rw-r--r--testsuite/tests/ghci/linking/dyn/bin_impl_gcc/ASimpL.dllbin0 -> 15508756 bytes
-rw-r--r--testsuite/tests/ghci/linking/dyn/bin_impl_gcc/libASx.dll.abin0 -> 29698010 bytes
-rw-r--r--testsuite/tests/rts/all.T2
10 files changed, 121 insertions, 29 deletions
diff --git a/hadrian/src/Settings/Packages.hs b/hadrian/src/Settings/Packages.hs
index 569a78ef1b..a0a2ef3bb5 100644
--- a/hadrian/src/Settings/Packages.hs
+++ b/hadrian/src/Settings/Packages.hs
@@ -404,7 +404,7 @@ rtsPackageArgs = package rts ? do
, builder HsCpp ? pure
[ "-DTOP=" ++ show top ]
- , builder HsCpp ? flag UseLibdw ? arg "-DUSE_LIBDW"
+ , builder HsCpp ? flag UseLibdw ? arg "-DUSE_LIBDW" ]
-- Compile various performance-critical pieces *without* -fPIC -dynamic
-- even when building a shared library. If we don't do this, then the
diff --git a/libraries/base/System/Posix/Internals.hs b/libraries/base/System/Posix/Internals.hs
index 6aae25ab5e..4a82bbae1c 100644
--- a/libraries/base/System/Posix/Internals.hs
+++ b/libraries/base/System/Posix/Internals.hs
@@ -904,6 +904,7 @@ foreign import capi unsafe "stdio.h value SEEK_END" sEEK_END :: CInt
{-
Note [Windows types]
+~~~~~~~~~~~~~~~~~~~~
Windows' _read and _write have types that differ from POSIX. They take an
unsigned int for length and return a signed int where POSIX uses size_t and
diff --git a/rts/Linker.c b/rts/Linker.c
index af91a098be..7eb0409709 100644
--- a/rts/Linker.c
+++ b/rts/Linker.c
@@ -135,8 +135,7 @@ extern void iconv();
This is to enable lazy loading of symbols. Eager loading is problematic
as it means that all symbols must be available, even those which we will
never use. This is especially painful on Windows, where the number of
- libraries required to link things like mingwex (TODO: We no longer depend
- on mingwex, so think of a different example here) grows to be quite high.
+ libraries required to link things like QT or WxWidgets grows to be quite high.
We proceed through these stages as follows,
@@ -194,8 +193,7 @@ extern void iconv();
1) Dependency chains, if A.o required a .o in libB but A.o isn't required to link
then we don't need to load libB. This means the dependency chain for libraries
- such as mingw32 and mingwex (TODO: We no longer depend on mingwex, so think of
- a different example here) can be broken down.
+ such as ucrt can be broken down.
2) The number of duplicate symbols, since now only symbols that are
true duplicates will display the error.
@@ -228,7 +226,7 @@ static void ghciRemoveSymbolTable(StrHashTable *table, const SymbolName* key,
static const char *
symbolTypeString (SymType type)
{
- switch (type) {
+ switch (type & ~SYM_TYPE_DUP_DISCARD) {
case SYM_TYPE_CODE: return "code";
case SYM_TYPE_DATA: return "data";
case SYM_TYPE_INDIRECT_DATA: return "indirect-data";
@@ -277,14 +275,18 @@ int ghciInsertSymbolTable(
insertStrHashTable(table, key, pinfo);
return 1;
}
- else if (pinfo->type != type)
+ else if (pinfo->type ^ type)
{
- debugBelch("Symbol type mismatch.\n");
- debugBelch("Symbol %s was defined by %" PATH_FMT " to be a %s symbol.\n",
- key, obj_name, symbolTypeString(type));
- debugBelch(" yet was defined by %" PATH_FMT " to be a %s symbol.\n",
- pinfo->owner ? pinfo->owner->fileName : WSTR("<builtin>"),
- symbolTypeString(pinfo->type));
+ /* We were asked to discard the symbol on duplicates, do so quietly. */
+ if (!(type & SYM_TYPE_DUP_DISCARD))
+ {
+ debugBelch("Symbol type mismatch.\n");
+ debugBelch("Symbol %s was defined by %" PATH_FMT " to be a %s symbol.\n",
+ key, obj_name, symbolTypeString(type));
+ debugBelch(" yet was defined by %" PATH_FMT " to be a %s symbol.\n",
+ pinfo->owner ? pinfo->owner->fileName : WSTR("<builtin>"),
+ symbolTypeString(pinfo->type));
+ }
return 1;
}
else if (pinfo->strength == STRENGTH_STRONG)
diff --git a/rts/LinkerInternals.h b/rts/LinkerInternals.h
index 8982318b2a..6797095d1e 100644
--- a/rts/LinkerInternals.h
+++ b/rts/LinkerInternals.h
@@ -54,11 +54,16 @@ typedef struct _Section Section;
*/
/* What kind of thing a symbol identifies. We need to know this to determine how
- * to process overflowing relocations. See Note [Processing overflowed relocations]. */
+ * to process overflowing relocations. See Note [Processing overflowed relocations].
+ * This is bitfield however only the option SYM_TYPE_DUP_DISCARD can be combined
+ * with the other values. */
typedef enum _SymType {
- SYM_TYPE_CODE, /* the symbol is a function and can be relocated via a jump island */
- SYM_TYPE_DATA, /* the symbol is data */
- SYM_TYPE_INDIRECT_DATA, /* see Note [_iob_func symbol] */
+ SYM_TYPE_CODE = 1 << 0, /* the symbol is a function and can be relocated via a jump island */
+ SYM_TYPE_DATA = 1 << 1, /* the symbol is data */
+ SYM_TYPE_INDIRECT_DATA = 1 << 2, /* see Note [_iob_func symbol] */
+ SYM_TYPE_DUP_DISCARD = 1 << 3, /* the symbol is a symbol in a BFD import library
+ however if a duplicate is found with a mismatching
+ SymType then discard this one. */
} SymType;
diff --git a/rts/linker/PEi386.c b/rts/linker/PEi386.c
index 8d5238f792..b5134235b0 100644
--- a/rts/linker/PEi386.c
+++ b/rts/linker/PEi386.c
@@ -261,6 +261,54 @@
.asciiz "libfoo_data"
+ Note [GHC Linking model and import libraries]
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ The above describes how import libraries work for static linking.
+ Fundamentally this does not apply to dynamic linking as we do in GHC.
+ The issue is two-folds:
+
+ 1. In the linking model above it is expected that the .idata sections be
+ materialized into PLTs during linking. However in GHC we never create
+ PLTs, but have out own mechanism for this which is the jump island
+ machinery. This is required for efficiency. For one materializing the
+ .idata sections would result in wasting pages. We'd use one page for
+ every ~100 bytes. This is extremely wasteful and also fragments the
+ memory. Secondly the dynamic linker is lazy. We only perform the final
+ loading if the symbol is used, however with an import library we can
+ discard the actual OC immediately after reading it. This prevents us from
+ keeping ~1k in memory per symbol for no reason.
+
+ 2. GHC itself does not observe symbol visibility correctly during NGC. This
+ in itself isn't an academic exercise. The issue stems from GHC using one
+ mechanism for providing two incompatible linking modes:
+ a) The first mode is generating Haskell shared libraries which are
+ intended to be used by other Haskell code. This requires us to
+ export the info, data and closures. For this GHC just re-exports
+ all symbols. But it doesn't correcly mark data/code. Symbol
+ visibility is overwritten by telling the linker to export all
+ symbols.
+ b) The second code is producing code that's supposed to be call-able
+ through a C insterface. This in reality does not require the
+ export of closures and info tables. But also does not require the
+ inclusion of the RTS inside the DLL. Hover this is done today
+ because we don't properly have the RTS as a dynamic library.
+ i.e. GHC does not only export symbols denoted by foreign export.
+ Also GHC should depend on an RTS library, but at the moment it
+ cannot because of TNTC is incompatible with dynamic linking.
+
+ These two issues mean that for GHC we need to take a different approach
+ to handling import libraries. For normal C libraries we have proper
+ differentiation between CODE and DATA. For GHC produced import libraries
+ we do not. As such the SYM_TYPE_DUP_DISCARD tells the linker that if a
+ duplicate symbol is found, and we were going to discard it anyway, just do
+ so quitely. This works because the RTS symbols themselves are provided by
+ the currently loaded RTS as built-in symbols.
+
+ Secondly we cannot rely on a text symbol being available. As such we
+ should only depend on the symbols as defined in the .idata sections,
+ otherwise we would not be able to correctly link against GHC produced
+ import libraries.
+
Note [Memory allocation]
~~~~~~~~~~~~~~~~~~~~~~~~
The loading of an object begins in `preloadObjectFile`, which allocates a buffer,
@@ -1658,7 +1706,10 @@ ocGetNames_PEi386 ( ObjectCode* oc )
if ( secNumber != IMAGE_SYM_UNDEFINED
&& secNumber > 0
&& section
- && section->kind != SECTIONKIND_BFD_IMPORT_LIBRARY) {
+ /* Skip all BFD import sections. */
+ && section->kind != SECTIONKIND_IMPORT
+ && section->kind != SECTIONKIND_BFD_IMPORT_LIBRARY
+ && section->kind != SECTIONKIND_BFD_IMPORT_LIBRARY_HEAD) {
/* This symbol is global and defined, viz, exported */
/* for IMAGE_SYMCLASS_EXTERNAL
&& !IMAGE_SYM_UNDEFINED,
@@ -1691,12 +1742,49 @@ ocGetNames_PEi386 ( ObjectCode* oc )
IF_DEBUG(linker_verbose, debugBelch("bss symbol @ %p %u\n", addr, symValue));
}
else if (section && section->kind == SECTIONKIND_BFD_IMPORT_LIBRARY) {
- setImportSymbol(oc, sname);
+ /* Disassembly of section .idata$5:
+
+ 0000000000000000 <__imp_Insert>:
+ ...
+ 0: IMAGE_REL_AMD64_ADDR32NB .idata$6
+
+ The first two bytes contain the ordinal of the function
+ in the format of lowpart highpart. The two bytes combined
+ for the total range of 16 bits which is the function export limit
+ of DLLs. See note [GHC Linking model and import libraries]. */
+ sname = (SymbolName*)section->start+2;
+ COFF_symbol* sym = &oc->info->symbols[info->numberOfSymbols-1];
+ addr = get_sym_name( getSymShortName (info, sym), oc);
+
+ IF_DEBUG(linker,
+ debugBelch("addImportSymbol `%s' => `%s'\n",
+ sname, (char*)addr));
+ /* We're going to free the any data associated with the import
+ library without copying the sections. So we have to duplicate
+ the symbol name and values before the pointers become invalid. */
+ sname = strdup (sname);
+ addr = strdup (addr);
+ type = has_code_section ? SYM_TYPE_CODE : SYM_TYPE_DATA;
+ type |= SYM_TYPE_DUP_DISCARD;
+ if (!ghciInsertSymbolTable(oc->fileName, symhash, sname,
+ addr, false, type, oc)) {
+ releaseOcInfo (oc);
+ stgFree (oc->image);
+ oc->image = NULL;
+ return false;
+ }
+ setImportSymbol (oc, sname);
+
+ /* Don't process this oc any further. Just exit. */
+ oc->n_symbols = 0;
+ oc->symbols = NULL;
+ stgFree (oc->image);
+ oc->image = NULL;
+ releaseOcInfo (oc);
// There is nothing that we need to resolve in this object since we
// will never call the import stubs in its text section
oc->status = OBJECT_DONT_RESOLVE;
-
- IF_DEBUG(linker_verbose, debugBelch("import symbol %s\n", sname));
+ return true;
}
else if (secNumber > 0
&& section
diff --git a/testsuite/tests/ghci/linking/dyn/Makefile b/testsuite/tests/ghci/linking/dyn/Makefile
index da8ee20790..a58e437448 100644
--- a/testsuite/tests/ghci/linking/dyn/Makefile
+++ b/testsuite/tests/ghci/linking/dyn/Makefile
@@ -88,10 +88,6 @@ compile_libAB_dyn:
.PHONY: compile_libAS_impl_gcc
compile_libAS_impl_gcc:
- rm -rf bin_impl_gcc
- mkdir bin_impl_gcc
- '$(TEST_HC)' $(MY_TEST_HC_OPTS) -odir "bin_impl_gcc" -shared A.c -o "bin_impl_gcc/$(call DLL,ASimpL)"
- mv bin_impl_gcc/libASimpL.dll.a bin_impl_gcc/libASx.dll.a
echo "main" | '$(TEST_HC)' $(TEST_HC_OPTS_INTERACTIVE) T11072.hs -lASx -L./bin_impl_gcc
.PHONY: compile_libAS_impl_msvc
diff --git a/testsuite/tests/ghci/linking/dyn/all.T b/testsuite/tests/ghci/linking/dyn/all.T
index e8b99d1f19..557087444e 100644
--- a/testsuite/tests/ghci/linking/dyn/all.T
+++ b/testsuite/tests/ghci/linking/dyn/all.T
@@ -41,9 +41,9 @@ test('T10458',
extra_hc_opts('-L"$PWD/T10458dir" -lAS')],
ghci_script, ['T10458.script'])
-test('T11072gcc', [extra_files(['A.c', 'T11072.hs']),
- expect_broken(18718),
- unless(doing_ghci, skip), unless(opsys('mingw32'), skip)],
+test('T11072gcc', [extra_files(['A.c', 'T11072.hs', 'bin_impl_gcc/']),
+ unless(doing_ghci, skip), unless(opsys('mingw32'), skip),
+ unless(arch('x86_64'), skip)],
makefile_test, ['compile_libAS_impl_gcc'])
test('T11072msvc', [extra_files(['A.c', 'T11072.hs', 'libAS.def', 'i686/', 'x86_64/']),
diff --git a/testsuite/tests/ghci/linking/dyn/bin_impl_gcc/ASimpL.dll b/testsuite/tests/ghci/linking/dyn/bin_impl_gcc/ASimpL.dll
new file mode 100644
index 0000000000..2debfc0d2d
--- /dev/null
+++ b/testsuite/tests/ghci/linking/dyn/bin_impl_gcc/ASimpL.dll
Binary files differ
diff --git a/testsuite/tests/ghci/linking/dyn/bin_impl_gcc/libASx.dll.a b/testsuite/tests/ghci/linking/dyn/bin_impl_gcc/libASx.dll.a
new file mode 100644
index 0000000000..3c13ab3e2f
--- /dev/null
+++ b/testsuite/tests/ghci/linking/dyn/bin_impl_gcc/libASx.dll.a
Binary files differ
diff --git a/testsuite/tests/rts/all.T b/testsuite/tests/rts/all.T
index 2b8ce66e3e..6dab35abcf 100644
--- a/testsuite/tests/rts/all.T
+++ b/testsuite/tests/rts/all.T
@@ -410,7 +410,7 @@ test('T10296b', [only_ways(['threaded2'])], compile_and_run, [''])
test('numa001', [ extra_run_opts('8'), unless(unregisterised(), extra_ways(['debug_numa'])) ]
, compile_and_run, [''])
-test('T12497', [ unless(opsys('mingw32'), skip)
+test('T12497', [ unless(opsys('mingw32'), skip), expect_broken(22694)
],
makefile_test, ['T12497'])