summaryrefslogtreecommitdiff
path: root/rts/IPE.c
diff options
context:
space:
mode:
authorBen Gamari <ben@smart-cactus.org>2022-08-18 20:03:15 -0400
committerMarge Bot <ben+marge-bot@smart-cactus.org>2022-10-11 23:45:10 -0400
commit6b0d2022699d3d8b446d024ee837c0d07e2c1aa0 (patch)
tree3981c27bcbfd7ac99eb5c0b46cea5090d8a27ec9 /rts/IPE.c
parent866c736ef29a07c6f3aa68063ef98ee0ecea12f3 (diff)
downloadhaskell-6b0d2022699d3d8b446d024ee837c0d07e2c1aa0.tar.gz
Refactor IPE initialization
Here we refactor the representation of info table provenance information in object code to significantly reduce its size and link-time impact. Specifically, we deduplicate strings and represent them as 32-bit offsets into a common string table. In addition, we rework the registration logic to eliminate allocation from the registration path, which is run from a static initializer where things like allocation are technically undefined behavior (although it did previously seem to work). For similar reasons we eliminate lock usage from registration path, instead relying on atomic CAS. Closes #22077.
Diffstat (limited to 'rts/IPE.c')
-rw-r--r--rts/IPE.c145
1 files changed, 63 insertions, 82 deletions
diff --git a/rts/IPE.c b/rts/IPE.c
index 10ddee435b..82b5f50ab4 100644
--- a/rts/IPE.c
+++ b/rts/IPE.c
@@ -34,17 +34,22 @@ Unfortunately, inserting into the hash map is relatively expensive. To keep
startup times low, there's a temporary data structure that is optimized for
collecting IPE lists on registration.
-It's a singly linked list of IPE list buffers. Each buffer contains space for
-126 IPE lists. This number is a bit arbitrary, but leaves a few bytes so that
-the whole structure might fit into 1024 bytes.
-
-On registering a new IPE list, there are three cases:
-
-- It's the first entry at all: Allocate a new IpeBufferListNode and make it the
- buffer's first entry.
-- The current IpeBufferListNode has space in it's buffer: Add it to the buffer.
-- The current IpeBufferListNode's buffer is full: Allocate a new one and link it
-to the previous one, making this one the new current.
+It's a singly linked list of IPE list buffers (IpeBufferListNode). These are
+emitted by the code generator, with generally one produced per module. Each
+contains an array of IPE entries and a link field (which is used to link
+buffers onto the pending list.
+
+For reasons of space efficiency, IPE entries are represented slightly
+differently in the object file than the InfoProvEnt which we ultimately expose
+to the user. Specifically, the IPEs in IpeBufferListNode are represented by
+IpeBufferEntrys, along with a corresponding string table. The string fields
+of InfoProvEnt are represented in IpeBufferEntry as 32-bit offsets into the
+string table. This allows us to halve the size of the buffer entries on
+64-bit machines while significantly reducing the number of needed
+relocations, reducing linking cost. Moreover, the code generator takes care
+to deduplicate strings when generating the string table. When we insert a
+set of IpeBufferEntrys into the IPE hash-map we convert them to InfoProvEnts,
+which contain proper string pointers.
Building the hash map is done lazily, i.e. on first lookup or traversal. For
this all IPE lists of all IpeBufferListNode are traversed to insert all IPEs.
@@ -52,43 +57,55 @@ this all IPE lists of all IpeBufferListNode are traversed to insert all IPEs.
After the content of a IpeBufferListNode has been inserted, it's freed.
*/
+static Mutex ipeMapLock;
static HashTable *ipeMap = NULL;
+// Accessed atomically
static IpeBufferListNode *ipeBufferList = NULL;
-static Mutex ipeMapLock;
-
-void initIpeMapLock(void) { initMutex(&ipeMapLock); }
-
-void closeIpeMapLock(void) { closeMutex(&ipeMapLock); }
+void initIpe(void) { initMutex(&ipeMapLock); }
+
+void exitIpe(void) { closeMutex(&ipeMapLock); }
+
+static InfoProvEnt ipeBufferEntryToIpe(const IpeBufferListNode *node, const IpeBufferEntry *ent)
+{
+ const char *strings = node->string_table;
+ return (InfoProvEnt) {
+ .info = ent->info,
+ .prov = {
+ .table_name = &strings[ent->table_name],
+ .closure_desc = &strings[ent->closure_desc],
+ .ty_desc = &strings[ent->ty_desc],
+ .label = &strings[ent->label],
+ .module = &strings[ent->module_name],
+ .srcloc = &strings[ent->srcloc]
+ }
+ };
+}
#if defined(TRACING)
-static void traceIPEFromHashTable(void *data STG_UNUSED,
- StgWord key STG_UNUSED,
+static void traceIPEFromHashTable(void *data STG_UNUSED, StgWord key STG_UNUSED,
const void *value) {
InfoProvEnt *ipe = (InfoProvEnt *)value;
traceIPE(ipe);
}
void dumpIPEToEventLog(void) {
- ACQUIRE_LOCK(&ipeMapLock);
-
- IpeBufferListNode *cursor = ipeBufferList;
+ // Dump pending entries
+ IpeBufferListNode *cursor = RELAXED_LOAD(&ipeBufferList);
while (cursor != NULL) {
- for (int i = 0; i < cursor->count; i++) {
- for (InfoProvEnt **ipeList = cursor->buffer[i]; *ipeList != NULL; ipeList++) {
- InfoProvEnt *ipe = *ipeList;
- traceIPE(ipe);
- }
+ for (uint32_t i = 0; i < cursor->count; i++) {
+ const InfoProvEnt ent = ipeBufferEntryToIpe(cursor, &cursor->entries[i]);
+ traceIPE(&ent);
}
-
cursor = cursor->next;
}
+ // Dump entries already in hashmap
+ ACQUIRE_LOCK(&ipeMapLock);
if (ipeMap != NULL) {
mapHashTable(ipeMap, NULL, &traceIPEFromHashTable);
}
-
RELEASE_LOCK(&ipeMapLock);
}
@@ -105,50 +122,20 @@ Note [The Info Table Provenance Entry (IPE) Map].
Statically initialized IPE lists are registered at startup by a C constructor
function generated by the compiler (CodeOutput.hs) in a *.c file for each
-module.
+module. Since this is called in a static initializer we cannot rely on
+ipeMapLock; we instead use atomic CAS operations to add to the list.
A performance test for IPE registration and lookup can be found here:
https://gitlab.haskell.org/ghc/ghc/-/merge_requests/5724#note_370806
*/
-void registerInfoProvList(InfoProvEnt **ent_list) {
- // The list must be dereferenceable.
- ASSERT(ent_list[0] == NULL || ent_list[0] != NULL);
-
- // Ignore empty lists
- if (ent_list[0] == NULL) {
- return;
- }
-
- ACQUIRE_LOCK(&ipeMapLock);
-
- if (ipeBufferList == NULL) {
- ASSERT(ipeBufferList == NULL);
-
- ipeBufferList = stgMallocBytes(sizeof(IpeBufferListNode),
- "registerInfoProvList-firstNode");
- ipeBufferList->buffer[0] = ent_list;
- ipeBufferList->count = 1;
- ipeBufferList->next = NULL;
- } else {
- if (ipeBufferList->count < IPE_LIST_NODE_BUFFER_SIZE) {
- ipeBufferList->buffer[ipeBufferList->count] = ent_list;
- ipeBufferList->count = ipeBufferList->count + 1;
-
- ASSERT(ipeBufferList->next == NULL ||
- ipeBufferList->next->count == IPE_LIST_NODE_BUFFER_SIZE);
- } else {
- IpeBufferListNode *newNode = stgMallocBytes(
- sizeof(IpeBufferListNode), "registerInfoProvList-nextNode");
- newNode->buffer[0] = ent_list;
- newNode->count = 1;
- newNode->next = ipeBufferList;
- ipeBufferList = newNode;
-
- ASSERT(ipeBufferList->next->count == IPE_LIST_NODE_BUFFER_SIZE);
+void registerInfoProvList(IpeBufferListNode *node) {
+ while (true) {
+ IpeBufferListNode *old = RELAXED_LOAD(&ipeBufferList);
+ node->next = old;
+ if (cas_ptr((volatile void **) &ipeBufferList, old, node) == (void *) old) {
+ return;
}
}
-
- RELEASE_LOCK(&ipeMapLock);
}
InfoProvEnt *lookupIPE(const StgInfoTable *info) {
@@ -159,7 +146,8 @@ InfoProvEnt *lookupIPE(const StgInfoTable *info) {
void updateIpeMap() {
// Check if there's any work at all. If not so, we can circumvent locking,
// which decreases performance.
- if (ipeMap != NULL && ipeBufferList == NULL) {
+ IpeBufferListNode *pending = xchg_ptr((void **) &ipeBufferList, NULL);
+ if (ipeMap != NULL && pending == NULL) {
return;
}
@@ -169,23 +157,16 @@ void updateIpeMap() {
ipeMap = allocHashTable();
}
- while (ipeBufferList != NULL) {
- ASSERT(ipeBufferList->next == NULL ||
- ipeBufferList->next->count == IPE_LIST_NODE_BUFFER_SIZE);
- ASSERT(ipeBufferList->count > 0 &&
- ipeBufferList->count <= IPE_LIST_NODE_BUFFER_SIZE);
-
- IpeBufferListNode *currentNode = ipeBufferList;
-
- for (int i = 0; i < currentNode->count; i++) {
- for (InfoProvEnt **ipeList = currentNode->buffer[i];
- *ipeList != NULL; ipeList++) {
- insertHashTable(ipeMap, (StgWord)(*ipeList)->info, *ipeList);
- }
+ while (pending != NULL) {
+ IpeBufferListNode *currentNode = pending;
+ InfoProvEnt *ip_ents = stgMallocBytes(sizeof(InfoProvEnt) * currentNode->count, "updateIpeMap");
+ for (uint32_t i = 0; i < currentNode->count; i++) {
+ const IpeBufferEntry *ent = &currentNode->entries[i];
+ ip_ents[i] = ipeBufferEntryToIpe(currentNode, ent);
+ insertHashTable(ipeMap, (StgWord) ent->info, &ip_ents[i]);
}
- ipeBufferList = currentNode->next;
- stgFree(currentNode);
+ pending = currentNode->next;
}
RELEASE_LOCK(&ipeMapLock);