6 files changed, 173 insertions, 23 deletions
diff --git a/configure.ac b/configure.ac
index f414308ae8..013f21f3ca 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1219,6 +1219,8 @@ dnl ~~~~~~~~~~~~~~~~~~~~
 dnl The portability situation here is complicated:
 dnl
 dnl * FreeBSD supports pthread_set_name_np in <pthread_np.h>
+dnl   and (if not _POSIX_SOURCE) pthread_setname_np() in <pthread.h>
+dnl   because of the conditional visibility, we prefer the former.
 dnl * glibc supports pthread_setname_np
 dnl * Darwin supports pthread_setname_np but does not take a
 dnl   pthread_t argument.
@@ -1270,7 +1272,7 @@ AC_LINK_IFELSE([
     [[
     #include <pthread_np.h>
     ]],
-    [[pthread_setname_np(pthread_self(), "name");]]
+    [[pthread_set_name_np(pthread_self(), "name");]]
   )],
   [
     AC_MSG_RESULT(yes)
diff --git a/rts/LinkerInternals.h b/rts/LinkerInternals.h
index f56eec47fa..e1da899c89 100644
--- a/rts/LinkerInternals.h
+++ b/rts/LinkerInternals.h
@@ -195,6 +195,7 @@ typedef struct {
     } jumpIsland;
 #elif defined(x86_64_HOST_ARCH)
     uint64_t    addr;
+    // See Note [TLSGD relocation] in elf_tlsgd.c
     uint8_t     jumpIsland[8];
 #elif defined(arm_HOST_ARCH)
     uint8_t     jumpIsland[16];
diff --git a/rts/linker/Elf.c b/rts/linker/Elf.c
index 9c4b1f9463..84cb72bd6b 100644
--- a/rts/linker/Elf.c
+++ b/rts/linker/Elf.c
@@ -1528,17 +1528,42 @@ do_Elf_Rela_relocations ( ObjectCode* oc, char* ehdrC,
          S = 0;
       } else {
          Elf_Sym sym = stab[ELF_R_SYM(info)];
-         /* First see if it is a local symbol. */
-         if (ELF_ST_BIND(sym.st_info) == STB_LOCAL) {
-            /* Yes, so we can get the address directly from the ELF symbol
-               table. */
-            symbol = sym.st_name==0 ? "(noname)" : strtab+sym.st_name;
-            if (ELF_R_TYPE(info) == COMPAT_R_X86_64_TLSGD) {
-               /* No support for TLSGD locals, requires new RTLD API */
-               errorBelch("%s: unhandled ELF TLSGD relocation for symbol `%s'",
-                          oc->fileName, symbol);
-               return 0;
+         if (ELF_R_TYPE(info) == COMPAT_R_X86_64_TLSGD) {
+            /*
+             * No support for TLSGD variables *defined* by the object,
+             * only references to *external* TLS variables in already
+             * loaded shared objects (the executable, libc, ...) are
+             * supported.  See Note [TLSGD relocation] in elf_tlsgd.c.
+             */
+            symbol = sym.st_name == 0 ? "(noname)" : strtab+sym.st_name;
+            if (ELF_ST_BIND(sym.st_info) == STB_LOCAL
+                || sym.st_value != 0 || sym.st_name == 0) {
+                errorBelch("%s: unsupported internal ELF TLSGD relocation for"
+                           " symbol `%s'", oc->fileName, symbol);
+                return 0;
             }
+#if defined(x86_64_HOST_ARCH) && defined(freebsd_HOST_OS)
+            S = lookupTlsgdSymbol(symbol, ELF_R_SYM(info), oc);
+#else
+            errorBelch("%s: ELF TLSGD relocation for symbol `%s'"
+                       " not supported on the target platform",
+                       oc->fileName, symbol);
+            return 0;
+#endif
+         } else if (ELF_ST_BIND(sym.st_info) == STB_LOCAL) {
+            /*
+             * For local symbols, we can get the address directly from the ELF
+             * symbol table.
+             *
+             * XXX: Is STB_LOCAL the right test here?  Should we instead be
+             * checking whether the symbol is *defined* by the current object?
+             * Defined globals also need relocation.  Perhaps the point is that
+             * conflicts are resolved in favour of any prior definition, so we
+             * must look at the accumulated symbol table instead (which has
+             * already been updated with our global symbols by the time we get
+             * here).
+             */
+            symbol = sym.st_name==0 ? "(noname)" : strtab+sym.st_name;
             /* See Note [Many ELF Sections] */
             Elf_Word secno = sym.st_shndx;
 #if defined(SHN_XINDEX)
@@ -1548,20 +1573,11 @@ do_Elf_Rela_relocations ( ObjectCode* oc, char* ehdrC,
 #endif
             S = (Elf_Addr)oc->sections[secno].start
                 + stab[ELF_R_SYM(info)].st_value;
-         } else if (ELF_R_TYPE(info) != COMPAT_R_X86_64_TLSGD) {
-            /* No, so look up the name in our global table. */
+         } else {
+            /* If not local, look up the name in our global table. */
             symbol = strtab + sym.st_name;
             S_tmp = lookupDependentSymbol( symbol, oc );
             S = (Elf_Addr)S_tmp;
-         } else {
-            symbol = strtab + sym.st_name;
-#if defined(x86_64_HOST_ARCH) && defined(freebsd_HOST_OS)
-            S = lookupTlsgdSymbol(symbol, ELF_R_SYM(info), oc);
-#else
-            errorBelch("%s: unhandled ELF TLSGD relocation for symbol `%s'",
-                       oc->fileName, symbol);
-            return 0;
-#endif
          }
          if (!S) {
            errorBelch("%s: unknown symbol `%s'", oc->fileName, symbol);
diff --git a/rts/linker/SymbolExtras.c b/rts/linker/SymbolExtras.c
index e209e211e1..ddb58e4a4e 100644
--- a/rts/linker/SymbolExtras.c
+++ b/rts/linker/SymbolExtras.c
@@ -183,6 +183,7 @@ SymbolExtra* makeSymbolExtra( ObjectCode const* oc,
     // jmp *-14(%rip)
     // 0xFF 25 is opcode + ModRM of near absolute indirect jump
     // Two bytes trailing padding, needed for TLSGD GOT entries
+    // See Note [TLSGD relocation] in elf_tlsgd.c
     static uint8_t jmp[] = { 0xFF, 0x25, 0xF2, 0xFF, 0xFF, 0xFF, 0x00, 0x00 };
     extra->addr = target;
     memcpy(extra->jumpIsland, jmp, 8);
diff --git a/rts/linker/elf_tlsgd.c b/rts/linker/elf_tlsgd.c
index 9e9d6a820f..ec42e29ac6 100644
--- a/rts/linker/elf_tlsgd.c
+++ b/rts/linker/elf_tlsgd.c
@@ -2,6 +2,123 @@
 
 #if defined(x86_64_HOST_ARCH) && defined(freebsd_HOST_OS)
 
+/*
+ * Note [TLSGD relocation]
+ *
+ * Quick background: FreeBSD's <ctype.h> is poisoned with static inline code
+ * that gets compiled into every program that uses functions like isdigit(3).
+ * When compiled "-c -fpic" for inclusion in position-independent ".a" files
+ * that are used in GHCi and HLS to load dependent packages at runtime, code
+ * that uses <ctype.h> in some FFI ends up with previously unsupported
+ * thread-specific variable (TLSGD) relocations.  This module narrowly addresses
+ * the issue for FreeBSD, where one often ends up using thread-local storage
+ * without meaning to.
+ *
+ * In the "General Dynamic" Thread-Local-Storage (TLSGD) model, relocations need
+ * an offset into a block of thread-local data associated with a particular
+ * module in which the given thread-local variable is defined.  Such blocks are
+ * not used directly, since after all, the variables are thread-specific.
+ * Rather, each module's initialized thread locals and uninitialised (zeroed)
+ * thread-locals are used to initialise a corresponding block of data in each
+ * thread, possibly on first use by a thread of a variable from a given module.
+ *
+ * A thread that needs the address of a particular TLS variable needs to pass
+ * the module id and offset to __tls_get_addr() (provided by the ELF runtime
+ * linker ld.so, a.k.a. the RTLD, which also manages the loading and unloading
+ * of modules, and dynamic creation of the backing storage for each thread's
+ * dynamic thread-local-storage vector (dtv).
+ *
+ * The data to pass to __tls_get_addr() is found as two consecutive 64-bit
+ * values in the global offset table (GOT) of the object being relocated.
+ * (There are therefore many GOT tables, what's global is the addresses they
+ * point to, which are often outside the current object, not the tables
+ * themselves).
+ *
+ * The module id and offset are not known at compile time, and require
+ * relocation with assistance from the RTLD, because only the RTLD knows the
+ * logical module number for each loaded object (the main executable, and any
+ * shared libraries, such as libc).  Fortunately, modern RTLDs provide an
+ * iterator for the currently loaded modules of a program, which exposes
+ * the associated module id and ELF section headers of each loaded object.
+ * (For static executables, this is instead handled by the C library).
+ *
+ * The iterator in question is dl_iterate_phdr(3).  It repeatedly invokes
+ * the provided callback for each loaded module until the callback returns
+ * a non-zero value indicating that it has found what it was looking for
+ * and does not need to be called with any further modules.
+ *
+ * The "dlpi_info" structure provided to the callback contains the module
+ * id and a reference to the ELF program header list.  In the program header
+ * list the "dynamic" section contains a number of subsections, which include
+ * the symbol table, the string table and either or both the sysv or GNU-style
+ * symbol hash table.
+ *
+ * The size of the symbol table is not directly available, so linear search
+ * through the symbol table is not only inefficient, but in fact not really
+ * possible, since we don't reliably know where the table ends.  However, the
+ * hash tables (sysv and/or GNU) do have clear bounds, and substantially speed
+ * up symbol lookup, so we need to have code to use these tables.  For now,
+ * only the sysv table is supported, but it should be easy to also support the
+ * GNU table (which could be the only present).  On FreeBSD it is rumoured (or
+ * least anecdotally observed) that the tool chains ensure that the sysv table
+ * is always present.
+ *
+ * Thus armed with the symbol, string and hash table for a module, we can use
+ * our wanted symbol's hash to quickly find the relevant hash bucket, and from
+ * there traverse the list of symbols that share that hash, checking that
+ * whether the name is in fact an exact match.
+ *
+ * Note that the name we want may also appear as an undefined entry in the
+ * symbol tables of other modules that also reference it as an external symbol.
+ * Thus the module we're looking for is the one where the symbol's st_value is
+ * non-zero (indicating that it is actually defined in that module).
+ *
+ * Since we're looking for a TLS variable, we just in case also check the type
+ * and avoid erroneous bindings to some other sort of symbol.
+ *
+ * Once the right module is found, we need to push two values into a new slot
+ * in the GOT.  This is done via the makeSymbolExtra() function of the GHC RTS.
+ * Our GOT entries must therefore be wide enough to hold two 64-bit values, but
+ * previously their X86_64 incarnation was only 14 bytes wide.  It has now been
+ * expanded to 16 bytes, by adding two padding bytes to the jumpIsland slot
+ * that follows the `addr` field field of the original GOT entry.  We store the
+ * module id in the `addr` field and the symbol's offset in the expanded
+ * jumpIsland field.  The address `S` of the start of the new GOT entry is
+ * then adjusted to form the relative address `S + A - P` which is stored at the
+ * relocation address `P`.
+ *
+ * The magic additional offsets `0x8000` and `0x800` for MIPS, ... and RISC-V,
+ * were suggested by Fangrui Song (a.k.a. @MaskRay) in a comment on the ticket
+ * discussing the motivating FreeBSD issue:
+ * <https://gitlab.haskell.org/ghc/ghc/-/issues/19086#note_347076>.
+ * His blog at <https://maskray.me/blog/2021-02-14-all-about-thread-local-storage>
+ * may shed more light on these.
+ *
+ * Finally, the bad news.  This code only works when the target TLS variable is
+ * defined by a preloaded shared object (.SO) that is known to the RTLD, has a
+ * module id, and TLS data and bss segments from which the RTLD initialises
+ * (perhaps lazily just-in-time) the per-thread TLS segments.  It is not
+ * presently possible to support TLS variables from runtime loaded ".o" files,
+ * These are not loaded via the RTLD, and don't get a new module id, and
+ * __tls_get_addr() cannot return an appropriate thread-specific address for
+ * these.
+ *
+ * The best solution is probably to deprecate runtime loading of ".o" files,
+ * all runtime loaded objects should be shared objects, loaded via dlopen(),
+ * in which case the RTLD will take of all the TLS relocation details!
+ * Otherwise, packages with FFI code that uses the _Thread_local storage class
+ * will not be runtime loadable in GHCi, Haskell-language-server, and similar
+ * programs that use the GHC RTS runtime linker.  As the popularity of such
+ * variables increases, we'll need have a more comprehensive approach to dealing
+ * with them, not limited to just "external references" as supported here.
+ *
+ * A much more complex approach would be to filter calls to __tls_get_addr(),
+ * using GHC-specific code to allocate per-thread storage for TLS variables in
+ * code loaded via ".o" files, delegating just external TLS variables to the
+ * RTLD.  It is far from clear how to do that, and likely unwise to even think
+ * about going there.
+ */
+
 #include "linker/Elf.h"
 #include "linker/SymbolExtras.h"
 #include <link.h>
diff --git a/rts/posix/itimer/Pthread.c b/rts/posix/itimer/Pthread.c
index 82379b9172..438bc2f69c 100644
--- a/rts/posix/itimer/Pthread.c
+++ b/rts/posix/itimer/Pthread.c
@@ -63,6 +63,9 @@
 #include <string.h>
 
 #include <pthread.h>
+#if defined(HAVE_PTHREAD_NP_H)
+#include <pthread_np.h>
+#endif
 #include <unistd.h>
 #include <fcntl.h>
 
@@ -175,10 +178,20 @@ initTicker (Time interval, TickProc handle_tick)
     /*
      * We can't use the RTS's createOSThread here as we need to remain attached
      * to the thread we create so we can later join to it if requested
+     *
+     * On FreeBSD 12.2 pthread_set_name_np() is unconditionally declared in
+     * <pthread_np.h>, while pthread_setname_np() is conditionally declared in
+     * <pthread.h> when _POSIX_SOURCE is not defined, but we're including
+     * <PosixSource.h>, so must use pthread_set_name_np() instead.  See similar
+     * code in "rts/posix/OSThreads.c".
      */
     if (! pthread_create(&thread, NULL, itimer_thread_func, (void*)handle_tick)) {
-#if defined(HAVE_PTHREAD_SETNAME_NP)
+#if defined(HAVE_PTHREAD_SET_NAME_NP)
+        pthread_set_name_np(thread, "ghc_ticker");
+#elif defined(HAVE_PTHREAD_SETNAME_NP)
         pthread_setname_np(thread, "ghc_ticker");
+#elif defined(HAVE_PTHREAD_SETNAME_NP_DARWIN)
+        pthread_setname_np("ghc_ticker");
 #endif
     } else {
         barf("Itimer: Failed to spawn thread: %s", strerror(errno));