summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTamar Christina <tamar@zhox.com>2022-12-04 15:55:43 +0000
committerMarge Bot <ben+marge-bot@smart-cactus.org>2023-02-03 14:07:30 -0500
commit48e391952c17ff7eab10b0b1456e3f2a2af28a9b (patch)
tree3f1d77f2c01e70cff568a6c30acf3db421e3b566
parentde1d15127ac3f41ac3044215b0ea3398a36edc89 (diff)
downloadhaskell-48e391952c17ff7eab10b0b1456e3f2a2af28a9b.tar.gz
linker: Fix BFD import libraries
This commit fixes the BFD style import library support in the runtime linker. This was accidentally broken during the refactoring to clang and went unnoticed because clang itself is unable to generate the BFD style import libraries. With this change we can not link against both GCC or Clang produced libraries again and intermix code produced by both compilers.
-rw-r--r--hadrian/src/Settings/Packages.hs2
-rw-r--r--libraries/base/System/Posix/Internals.hs1
-rw-r--r--rts/Linker.c26
-rw-r--r--rts/LinkerInternals.h13
-rw-r--r--rts/linker/PEi386.c96
-rw-r--r--testsuite/tests/ghci/linking/dyn/Makefile4
-rw-r--r--testsuite/tests/ghci/linking/dyn/all.T6
-rw-r--r--testsuite/tests/ghci/linking/dyn/bin_impl_gcc/ASimpL.dllbin0 -> 15508756 bytes
-rw-r--r--testsuite/tests/ghci/linking/dyn/bin_impl_gcc/libASx.dll.abin0 -> 29698010 bytes
-rw-r--r--testsuite/tests/rts/all.T2
10 files changed, 121 insertions, 29 deletions
diff --git a/hadrian/src/Settings/Packages.hs b/hadrian/src/Settings/Packages.hs
index c441ca6cb3..39ba685255 100644
--- a/hadrian/src/Settings/Packages.hs
+++ b/hadrian/src/Settings/Packages.hs
@@ -403,7 +403,7 @@ rtsPackageArgs = package rts ? do
, builder HsCpp ? pure
[ "-DTOP=" ++ show top ]
- , builder HsCpp ? flag UseLibdw ? arg "-DUSE_LIBDW"
+ , builder HsCpp ? flag UseLibdw ? arg "-DUSE_LIBDW" ]
-- Compile various performance-critical pieces *without* -fPIC -dynamic
-- even when building a shared library. If we don't do this, then the
diff --git a/libraries/base/System/Posix/Internals.hs b/libraries/base/System/Posix/Internals.hs
index 6aae25ab5e..4a82bbae1c 100644
--- a/libraries/base/System/Posix/Internals.hs
+++ b/libraries/base/System/Posix/Internals.hs
@@ -904,6 +904,7 @@ foreign import capi unsafe "stdio.h value SEEK_END" sEEK_END :: CInt
{-
Note [Windows types]
+~~~~~~~~~~~~~~~~~~~~
Windows' _read and _write have types that differ from POSIX. They take an
unsigned int for length and return a signed int where POSIX uses size_t and
diff --git a/rts/Linker.c b/rts/Linker.c
index af91a098be..7eb0409709 100644
--- a/rts/Linker.c
+++ b/rts/Linker.c
@@ -135,8 +135,7 @@ extern void iconv();
This is to enable lazy loading of symbols. Eager loading is problematic
as it means that all symbols must be available, even those which we will
never use. This is especially painful on Windows, where the number of
- libraries required to link things like mingwex (TODO: We no longer depend
- on mingwex, so think of a different example here) grows to be quite high.
+ libraries required to link things like QT or WxWidgets grows to be quite high.
We proceed through these stages as follows,
@@ -194,8 +193,7 @@ extern void iconv();
1) Dependency chains, if A.o required a .o in libB but A.o isn't required to link
then we don't need to load libB. This means the dependency chain for libraries
- such as mingw32 and mingwex (TODO: We no longer depend on mingwex, so think of
- a different example here) can be broken down.
+ such as ucrt can be broken down.
2) The number of duplicate symbols, since now only symbols that are
true duplicates will display the error.
@@ -228,7 +226,7 @@ static void ghciRemoveSymbolTable(StrHashTable *table, const SymbolName* key,
static const char *
symbolTypeString (SymType type)
{
- switch (type) {
+ switch (type & ~SYM_TYPE_DUP_DISCARD) {
case SYM_TYPE_CODE: return "code";
case SYM_TYPE_DATA: return "data";
case SYM_TYPE_INDIRECT_DATA: return "indirect-data";
@@ -277,14 +275,18 @@ int ghciInsertSymbolTable(
insertStrHashTable(table, key, pinfo);
return 1;
}
- else if (pinfo->type != type)
+ else if (pinfo->type ^ type)
{
- debugBelch("Symbol type mismatch.\n");
- debugBelch("Symbol %s was defined by %" PATH_FMT " to be a %s symbol.\n",
- key, obj_name, symbolTypeString(type));
- debugBelch(" yet was defined by %" PATH_FMT " to be a %s symbol.\n",
- pinfo->owner ? pinfo->owner->fileName : WSTR("<builtin>"),
- symbolTypeString(pinfo->type));
+ /* We were asked to discard the symbol on duplicates, do so quietly. */
+ if (!(type & SYM_TYPE_DUP_DISCARD))
+ {
+ debugBelch("Symbol type mismatch.\n");
+ debugBelch("Symbol %s was defined by %" PATH_FMT " to be a %s symbol.\n",
+ key, obj_name, symbolTypeString(type));
+ debugBelch(" yet was defined by %" PATH_FMT " to be a %s symbol.\n",
+ pinfo->owner ? pinfo->owner->fileName : WSTR("<builtin>"),
+ symbolTypeString(pinfo->type));
+ }
return 1;
}
else if (pinfo->strength == STRENGTH_STRONG)
diff --git a/rts/LinkerInternals.h b/rts/LinkerInternals.h
index 8982318b2a..6797095d1e 100644
--- a/rts/LinkerInternals.h
+++ b/rts/LinkerInternals.h
@@ -54,11 +54,16 @@ typedef struct _Section Section;
*/
/* What kind of thing a symbol identifies. We need to know this to determine how
- * to process overflowing relocations. See Note [Processing overflowed relocations]. */
+ * to process overflowing relocations. See Note [Processing overflowed relocations].
+ * This is bitfield however only the option SYM_TYPE_DUP_DISCARD can be combined
+ * with the other values. */
typedef enum _SymType {
- SYM_TYPE_CODE, /* the symbol is a function and can be relocated via a jump island */
- SYM_TYPE_DATA, /* the symbol is data */
- SYM_TYPE_INDIRECT_DATA, /* see Note [_iob_func symbol] */
+ SYM_TYPE_CODE = 1 << 0, /* the symbol is a function and can be relocated via a jump island */
+ SYM_TYPE_DATA = 1 << 1, /* the symbol is data */
+ SYM_TYPE_INDIRECT_DATA = 1 << 2, /* see Note [_iob_func symbol] */
+ SYM_TYPE_DUP_DISCARD = 1 << 3, /* the symbol is a symbol in a BFD import library
+ however if a duplicate is found with a mismatching
+ SymType then discard this one. */
} SymType;
diff --git a/rts/linker/PEi386.c b/rts/linker/PEi386.c
index 8d5238f792..b5134235b0 100644
--- a/rts/linker/PEi386.c
+++ b/rts/linker/PEi386.c
@@ -261,6 +261,54 @@
.asciiz "libfoo_data"
+ Note [GHC Linking model and import libraries]
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ The above describes how import libraries work for static linking.
+ Fundamentally this does not apply to dynamic linking as we do in GHC.
+ The issue is two-folds:
+
+ 1. In the linking model above it is expected that the .idata sections be
+ materialized into PLTs during linking. However in GHC we never create
+ PLTs, but have out own mechanism for this which is the jump island
+ machinery. This is required for efficiency. For one materializing the
+ .idata sections would result in wasting pages. We'd use one page for
+ every ~100 bytes. This is extremely wasteful and also fragments the
+ memory. Secondly the dynamic linker is lazy. We only perform the final
+ loading if the symbol is used, however with an import library we can
+ discard the actual OC immediately after reading it. This prevents us from
+ keeping ~1k in memory per symbol for no reason.
+
+ 2. GHC itself does not observe symbol visibility correctly during NGC. This
+ in itself isn't an academic exercise. The issue stems from GHC using one
+ mechanism for providing two incompatible linking modes:
+ a) The first mode is generating Haskell shared libraries which are
+ intended to be used by other Haskell code. This requires us to
+ export the info, data and closures. For this GHC just re-exports
+ all symbols. But it doesn't correcly mark data/code. Symbol
+ visibility is overwritten by telling the linker to export all
+ symbols.
+ b) The second code is producing code that's supposed to be call-able
+ through a C insterface. This in reality does not require the
+ export of closures and info tables. But also does not require the
+ inclusion of the RTS inside the DLL. Hover this is done today
+ because we don't properly have the RTS as a dynamic library.
+ i.e. GHC does not only export symbols denoted by foreign export.
+ Also GHC should depend on an RTS library, but at the moment it
+ cannot because of TNTC is incompatible with dynamic linking.
+
+ These two issues mean that for GHC we need to take a different approach
+ to handling import libraries. For normal C libraries we have proper
+ differentiation between CODE and DATA. For GHC produced import libraries
+ we do not. As such the SYM_TYPE_DUP_DISCARD tells the linker that if a
+ duplicate symbol is found, and we were going to discard it anyway, just do
+ so quitely. This works because the RTS symbols themselves are provided by
+ the currently loaded RTS as built-in symbols.
+
+ Secondly we cannot rely on a text symbol being available. As such we
+ should only depend on the symbols as defined in the .idata sections,
+ otherwise we would not be able to correctly link against GHC produced
+ import libraries.
+
Note [Memory allocation]
~~~~~~~~~~~~~~~~~~~~~~~~
The loading of an object begins in `preloadObjectFile`, which allocates a buffer,
@@ -1658,7 +1706,10 @@ ocGetNames_PEi386 ( ObjectCode* oc )
if ( secNumber != IMAGE_SYM_UNDEFINED
&& secNumber > 0
&& section
- && section->kind != SECTIONKIND_BFD_IMPORT_LIBRARY) {
+ /* Skip all BFD import sections. */
+ && section->kind != SECTIONKIND_IMPORT
+ && section->kind != SECTIONKIND_BFD_IMPORT_LIBRARY
+ && section->kind != SECTIONKIND_BFD_IMPORT_LIBRARY_HEAD) {
/* This symbol is global and defined, viz, exported */
/* for IMAGE_SYMCLASS_EXTERNAL
&& !IMAGE_SYM_UNDEFINED,
@@ -1691,12 +1742,49 @@ ocGetNames_PEi386 ( ObjectCode* oc )
IF_DEBUG(linker_verbose, debugBelch("bss symbol @ %p %u\n", addr, symValue));
}
else if (section && section->kind == SECTIONKIND_BFD_IMPORT_LIBRARY) {
- setImportSymbol(oc, sname);
+ /* Disassembly of section .idata$5:
+
+ 0000000000000000 <__imp_Insert>:
+ ...
+ 0: IMAGE_REL_AMD64_ADDR32NB .idata$6
+
+ The first two bytes contain the ordinal of the function
+ in the format of lowpart highpart. The two bytes combined
+ for the total range of 16 bits which is the function export limit
+ of DLLs. See note [GHC Linking model and import libraries]. */
+ sname = (SymbolName*)section->start+2;
+ COFF_symbol* sym = &oc->info->symbols[info->numberOfSymbols-1];
+ addr = get_sym_name( getSymShortName (info, sym), oc);
+
+ IF_DEBUG(linker,
+ debugBelch("addImportSymbol `%s' => `%s'\n",
+ sname, (char*)addr));
+ /* We're going to free the any data associated with the import
+ library without copying the sections. So we have to duplicate
+ the symbol name and values before the pointers become invalid. */
+ sname = strdup (sname);
+ addr = strdup (addr);
+ type = has_code_section ? SYM_TYPE_CODE : SYM_TYPE_DATA;
+ type |= SYM_TYPE_DUP_DISCARD;
+ if (!ghciInsertSymbolTable(oc->fileName, symhash, sname,
+ addr, false, type, oc)) {
+ releaseOcInfo (oc);
+ stgFree (oc->image);
+ oc->image = NULL;
+ return false;
+ }
+ setImportSymbol (oc, sname);
+
+ /* Don't process this oc any further. Just exit. */
+ oc->n_symbols = 0;
+ oc->symbols = NULL;
+ stgFree (oc->image);
+ oc->image = NULL;
+ releaseOcInfo (oc);
// There is nothing that we need to resolve in this object since we
// will never call the import stubs in its text section
oc->status = OBJECT_DONT_RESOLVE;
-
- IF_DEBUG(linker_verbose, debugBelch("import symbol %s\n", sname));
+ return true;
}
else if (secNumber > 0
&& section
diff --git a/testsuite/tests/ghci/linking/dyn/Makefile b/testsuite/tests/ghci/linking/dyn/Makefile
index 1e000ff524..ea48f25af2 100644
--- a/testsuite/tests/ghci/linking/dyn/Makefile
+++ b/testsuite/tests/ghci/linking/dyn/Makefile
@@ -88,10 +88,6 @@ compile_libAB_dyn:
.PHONY: compile_libAS_impl_gcc
compile_libAS_impl_gcc:
- rm -rf bin_impl_gcc
- mkdir bin_impl_gcc
- '$(TEST_HC)' $(MY_TEST_HC_OPTS) -odir "bin_impl_gcc" -shared A.c -o "bin_impl_gcc/$(call DLL,ASimpL)"
- mv bin_impl_gcc/libASimpL.dll.a bin_impl_gcc/libASx.dll.a
echo "main" | '$(TEST_HC)' $(TEST_HC_OPTS_INTERACTIVE) T11072.hs -lASx -L./bin_impl_gcc
.PHONY: compile_libAS_impl_msvc
diff --git a/testsuite/tests/ghci/linking/dyn/all.T b/testsuite/tests/ghci/linking/dyn/all.T
index e8b99d1f19..557087444e 100644
--- a/testsuite/tests/ghci/linking/dyn/all.T
+++ b/testsuite/tests/ghci/linking/dyn/all.T
@@ -41,9 +41,9 @@ test('T10458',
extra_hc_opts('-L"$PWD/T10458dir" -lAS')],
ghci_script, ['T10458.script'])
-test('T11072gcc', [extra_files(['A.c', 'T11072.hs']),
- expect_broken(18718),
- unless(doing_ghci, skip), unless(opsys('mingw32'), skip)],
+test('T11072gcc', [extra_files(['A.c', 'T11072.hs', 'bin_impl_gcc/']),
+ unless(doing_ghci, skip), unless(opsys('mingw32'), skip),
+ unless(arch('x86_64'), skip)],
makefile_test, ['compile_libAS_impl_gcc'])
test('T11072msvc', [extra_files(['A.c', 'T11072.hs', 'libAS.def', 'i686/', 'x86_64/']),
diff --git a/testsuite/tests/ghci/linking/dyn/bin_impl_gcc/ASimpL.dll b/testsuite/tests/ghci/linking/dyn/bin_impl_gcc/ASimpL.dll
new file mode 100644
index 0000000000..2debfc0d2d
--- /dev/null
+++ b/testsuite/tests/ghci/linking/dyn/bin_impl_gcc/ASimpL.dll
Binary files differ
diff --git a/testsuite/tests/ghci/linking/dyn/bin_impl_gcc/libASx.dll.a b/testsuite/tests/ghci/linking/dyn/bin_impl_gcc/libASx.dll.a
new file mode 100644
index 0000000000..3c13ab3e2f
--- /dev/null
+++ b/testsuite/tests/ghci/linking/dyn/bin_impl_gcc/libASx.dll.a
Binary files differ
diff --git a/testsuite/tests/rts/all.T b/testsuite/tests/rts/all.T
index dcdd991817..f3349e0232 100644
--- a/testsuite/tests/rts/all.T
+++ b/testsuite/tests/rts/all.T
@@ -411,7 +411,7 @@ test('T10296b', [only_ways(['threaded2'])], compile_and_run, [''])
test('numa001', [ extra_run_opts('8'), unless(unregisterised(), extra_ways(['debug_numa'])) ]
, compile_and_run, [''])
-test('T12497', [ unless(opsys('mingw32'), skip)
+test('T12497', [ unless(opsys('mingw32'), skip), expect_broken(22694)
],
makefile_test, ['T12497'])