diff options
author | Peter Trommler <ptrommler@acm.org> | 2014-11-30 12:00:39 -0600 |
---|---|---|
committer | Austin Seipp <austin@well-typed.com> | 2014-11-30 12:00:40 -0600 |
commit | 383733b9191a36e2d3f757700842dbc3855911d9 (patch) | |
tree | 5d86bffc23aec991454ff506124e7a075e983bea | |
parent | 643635ea1d779054e1bb3b1825cd7894c5748811 (diff) | |
download | haskell-383733b9191a36e2d3f757700842dbc3855911d9.tar.gz |
Fix obscure problem with using the system linker (#8935)
Summary:
In a statically linked GHCi symbol `environ` resolves to NULL when
called from a Haskell script.
When resolving symbols in a Haskell script we need to search the
executable program and its dependent (DT_NEEDED) shared libraries
first and then search the loaded libraries.
We want to be able to override functions in loaded libraries later.
Libraries must be opened with local scope (RTLD_LOCAL) and not global.
The latter adds all symbols to the executable program's symbols where
they are then searched in loading order. We want reverse loading order.
When libraries are loaded with local scope the dynamic linker
cannot use symbols in that library when resolving the dependencies
in another shared library. This changes the way files compiled to
object code must be linked into temporary shared libraries. We link
with the last temporary shared library created so far if it exists.
Since each temporary shared library is linked to the previous temporary
shared library the dynamic linker finds the latest definition of a
symbol by following the dependency chain.
See also Note [RTLD_LOCAL] for a summary of the problem and solution.
Cherry-picked commit 2f8b4c
Changed linker argument ordering
On some ELF systems GNU ld (and others?) default to
--as-needed and the order of libraries in the link
matters.
The last temporary shared library, must appear
before all other libraries. Switching the position
of extra_ld_inputs and lib_path_objs does that.
Fixes #8935 and #9186
Reviewers: austin, hvr, rwbarton, simonmar
Reviewed By: simonmar
Subscribers: thomie, carter, simonmar
Differential Revision: https://phabricator.haskell.org/D349
GHC Trac Issues: #8935, #9186, #9480
-rw-r--r-- | compiler/ghci/Linker.hs | 74 | ||||
-rw-r--r-- | compiler/main/SysTools.lhs | 3 | ||||
-rw-r--r-- | rts/Linker.c | 43 |
3 files changed, 87 insertions, 33 deletions
diff --git a/compiler/ghci/Linker.hs b/compiler/ghci/Linker.hs index 8560310af1..8573f6a37b 100644 --- a/compiler/ghci/Linker.hs +++ b/compiler/ghci/Linker.hs @@ -117,8 +117,12 @@ data PersistentLinkerState -- The currently-loaded packages; always object code -- Held, as usual, in dependency order; though I am not sure if -- that is really important - pkgs_loaded :: ![PackageKey] - } + pkgs_loaded :: ![PackageKey], + + -- we need to remember the name of the last temporary DLL/.so + -- so we can link it + last_temp_so :: !(Maybe FilePath) } + emptyPLS :: DynFlags -> PersistentLinkerState emptyPLS _ = PersistentLinkerState { @@ -126,7 +130,8 @@ emptyPLS _ = PersistentLinkerState { itbl_env = emptyNameEnv, pkgs_loaded = init_pkgs, bcos_loaded = [], - objs_loaded = [] } + objs_loaded = [], + last_temp_so = Nothing } -- Packages that don't need loading, because the compiler -- shares them with the interpreted program. @@ -316,14 +321,15 @@ linkCmdLineLibs' dflags@(DynFlags { ldInputs = cmdline_ld_inputs ; if null cmdline_lib_specs then return pls else do - { mapM_ (preloadLib dflags lib_paths framework_paths) cmdline_lib_specs + { pls1 <- foldM (preloadLib dflags lib_paths framework_paths) pls + cmdline_lib_specs ; maybePutStr dflags "final link ... " ; ok <- resolveObjs ; if succeeded ok then maybePutStrLn dflags "done" else throwGhcExceptionIO (ProgramError "linking extra libraries/objects failed") - ; return pls + ; return pls1 }} @@ -362,19 +368,22 @@ classifyLdInput dflags f return Nothing where platform = targetPlatform dflags -preloadLib :: DynFlags -> [String] -> [String] -> LibrarySpec -> IO () -preloadLib dflags lib_paths framework_paths lib_spec +preloadLib :: DynFlags -> [String] -> [String] -> PersistentLinkerState + -> LibrarySpec -> IO (PersistentLinkerState) +preloadLib dflags lib_paths framework_paths pls lib_spec = do maybePutStr dflags ("Loading object " ++ showLS lib_spec ++ " ... ") case lib_spec of Object static_ish - -> do b <- preload_static lib_paths static_ish + -> do (b, pls1) <- preload_static lib_paths static_ish maybePutStrLn dflags (if b then "done" else "not found") + return pls1 Archive static_ish -> do b <- preload_static_archive lib_paths static_ish maybePutStrLn dflags (if b then "done" else "not found") + return pls DLL dll_unadorned -> do maybe_errstr <- loadDLL (mkSOName platform dll_unadorned) @@ -390,12 +399,14 @@ preloadLib dflags lib_paths framework_paths lib_spec case err2 of Nothing -> maybePutStrLn dflags "done" Just _ -> preloadFailed mm lib_paths lib_spec + return pls DLLPath dll_path -> do maybe_errstr <- loadDLL dll_path case maybe_errstr of Nothing -> maybePutStrLn dflags "done" Just mm -> preloadFailed mm lib_paths lib_spec + return pls Framework framework -> if platformUsesFrameworks (targetPlatform dflags) @@ -403,6 +414,7 @@ preloadLib dflags lib_paths framework_paths lib_spec case maybe_errstr of Nothing -> maybePutStrLn dflags "done" Just mm -> preloadFailed mm framework_paths lib_spec + return pls else panic "preloadLib Framework" where @@ -422,11 +434,13 @@ preloadLib dflags lib_paths framework_paths lib_spec -- Not interested in the paths in the static case. preload_static _paths name = do b <- doesFileExist name - if not b then return False - else do if dynamicGhc - then dynLoadObjs dflags [name] - else loadObj name - return True + if not b then return (False, pls) + else if dynamicGhc + then do pls1 <- dynLoadObjs dflags pls [name] + return (True, pls1) + else do loadObj name + return (True, pls) + preload_static_archive _paths name = do b <- doesFileExist name if not b then return False @@ -784,8 +798,8 @@ dynLinkObjs dflags pls objs = do wanted_objs = map nameOfObject unlinkeds if dynamicGhc - then do dynLoadObjs dflags wanted_objs - return (pls1, Succeeded) + then do pls2 <- dynLoadObjs dflags pls1 wanted_objs + return (pls2, Succeeded) else do mapM_ loadObj wanted_objs -- Link them all together @@ -799,9 +813,11 @@ dynLinkObjs dflags pls objs = do pls2 <- unload_wkr dflags [] pls1 return (pls2, Failed) -dynLoadObjs :: DynFlags -> [FilePath] -> IO () -dynLoadObjs _ [] = return () -dynLoadObjs dflags objs = do + +dynLoadObjs :: DynFlags -> PersistentLinkerState -> [FilePath] + -> IO PersistentLinkerState +dynLoadObjs _ pls [] = return pls +dynLoadObjs dflags pls objs = do let platform = targetPlatform dflags soFile <- newTempName dflags (soExt platform) let -- When running TH for a non-dynamic way, we still need to make @@ -809,10 +825,22 @@ dynLoadObjs dflags objs = do -- Opt_Static off dflags1 = gopt_unset dflags Opt_Static dflags2 = dflags1 { - -- We don't want to link the ldInputs in; we'll - -- be calling dynLoadObjs with any objects that - -- need to be linked. - ldInputs = [], + -- We don't want the original ldInputs in + -- (they're already linked in), but we do want + -- to link against the previous dynLoadObjs + -- library if there was one, so that the linker + -- can resolve dependencies when it loads this + -- library. + ldInputs = + case last_temp_so pls of + Nothing -> [] + Just so -> + let (lp, l) = splitFileName so in + [ Option ("-L" ++ lp) + , Option ("-Wl,-rpath") + , Option ("-Wl," ++ lp) + , Option ("-l:" ++ l) + ], -- Even if we're e.g. profiling, we still want -- the vanilla dynamic libraries, so we set the -- ways / build tag to be just WayDyn. @@ -824,7 +852,7 @@ dynLoadObjs dflags objs = do consIORef (filesToNotIntermediateClean dflags) soFile m <- loadDLL soFile case m of - Nothing -> return () + Nothing -> return pls { last_temp_so = Just soFile } Just err -> panic ("Loading temp shared object failed: " ++ err) rmDupLinkables :: [Linkable] -- Already loaded diff --git a/compiler/main/SysTools.lhs b/compiler/main/SysTools.lhs index c13790a4ff..4c7ab03664 100644 --- a/compiler/main/SysTools.lhs +++ b/compiler/main/SysTools.lhs @@ -1468,6 +1468,7 @@ linkDynLib dflags0 o_files dep_packages in package_hs_libs ++ extra_libs ++ other_flags -- probably _stub.o files + -- and last temporary shared object file let extra_ld_inputs = ldInputs dflags case os of @@ -1585,8 +1586,8 @@ linkDynLib dflags0 o_files dep_packages -- Set the library soname. We use -h rather than -soname as -- Solaris 10 doesn't support the latter: ++ [ Option ("-Wl,-h," ++ takeFileName output_fn) ] - ++ map Option lib_path_opts ++ extra_ld_inputs + ++ map Option lib_path_opts ++ map Option pkg_lib_path_opts ++ map Option pkg_link_opts ) diff --git a/rts/Linker.c b/rts/Linker.c index 0c390ae46c..9c9de61519 100644 --- a/rts/Linker.c +++ b/rts/Linker.c @@ -1839,7 +1839,7 @@ internal_dlopen(const char *dll_name) // (see POSIX also) ACQUIRE_LOCK(&dl_mutex); - hdl = dlopen(dll_name, RTLD_LAZY | RTLD_GLOBAL); + hdl = dlopen(dll_name, RTLD_LAZY|RTLD_LOCAL); /* see Note [RTLD_LOCAL] */ errmsg = NULL; if (hdl == NULL) { @@ -1849,11 +1849,12 @@ internal_dlopen(const char *dll_name) errmsg_copy = stgMallocBytes(strlen(errmsg)+1, "addDLL"); strcpy(errmsg_copy, errmsg); errmsg = errmsg_copy; + } else { + o_so = stgMallocBytes(sizeof(OpenedSO), "addDLL"); + o_so->handle = hdl; + o_so->next = openedSOs; + openedSOs = o_so; } - o_so = stgMallocBytes(sizeof(OpenedSO), "addDLL"); - o_so->handle = hdl; - o_so->next = openedSOs; - openedSOs = o_so; RELEASE_LOCK(&dl_mutex); //--------------- End critical section ------------------- @@ -1861,14 +1862,39 @@ internal_dlopen(const char *dll_name) return errmsg; } +/* + Note [RTLD_LOCAL] + + In GHCi we want to be able to override previous .so's with newly + loaded .so's when we recompile something. This further implies that + when we look up a symbol in internal_dlsym() we have to iterate + through the loaded libraries (in order from most recently loaded to + oldest) looking up the symbol in each one until we find it. + + However, this can cause problems for some symbols that are copied + by the linker into the executable image at runtime - see #8935 for a + lengthy discussion. To solve that problem we need to look up + symbols in the main executable *first*, before attempting to look + them up in the loaded .so's. But in order to make that work, we + have to always call dlopen with RTLD_LOCAL, so that the loaded + libraries don't populate the global symbol table. +*/ + static void * -internal_dlsym(void *hdl, const char *symbol) { +internal_dlsym(const char *symbol) { OpenedSO* o_so; void *v; // We acquire dl_mutex as concurrent dl* calls may alter dlerror ACQUIRE_LOCK(&dl_mutex); dlerror(); + // look in program first + v = dlsym(dl_prog_handle, symbol); + if (dlerror() == NULL) { + RELEASE_LOCK(&dl_mutex); + return v; + } + for (o_so = openedSOs; o_so != NULL; o_so = o_so->next) { v = dlsym(o_so->handle, symbol); if (dlerror() == NULL) { @@ -1876,7 +1902,6 @@ internal_dlsym(void *hdl, const char *symbol) { return v; } } - v = dlsym(hdl, symbol); RELEASE_LOCK(&dl_mutex); return v; } @@ -2036,7 +2061,7 @@ static void* lookupSymbol_ (char *lbl) if (!ghciLookupSymbolTable(symhash, lbl, &val)) { IF_DEBUG(linker, debugBelch("lookupSymbol: symbol not found\n")); # if defined(OBJFORMAT_ELF) - return internal_dlsym(dl_prog_handle, lbl); + return internal_dlsym(lbl); # elif defined(OBJFORMAT_MACHO) # if HAVE_DLFCN_H /* On OS X 10.3 and later, we use dlsym instead of the old legacy @@ -2050,7 +2075,7 @@ static void* lookupSymbol_ (char *lbl) */ IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s with dlsym\n", lbl)); ASSERT(lbl[0] == '_'); - return internal_dlsym(dl_prog_handle, lbl + 1); + return internal_dlsym(lbl + 1); # else if (NSIsSymbolNameDefined(lbl)) { NSSymbol symbol = NSLookupAndBindSymbol(lbl); |