Use binary search to speedup checkUnload

We are iterating through all object code for each heap objects when checking whether object code can be unloaded. For large projects in GHCi, this can be very expensive due to the large number of object code that needs to be loaded/unloaded. To speed it up, this arrangess all mapped sections of unloaded object code in a sorted array and use binary search to check if an address location fall on them.
author: Phuong Trinh <lolotp@fb.com> 2019-04-01 20:05:47 +0100
committer: Marge Bot <ben+marge-bot@smart-cactus.org> 2019-05-30 16:43:31 -0400
commit: f81f3964b718eab21f0cfe65067c195f2f2a84bd (patch)
tree: b14805c7ea1882f711de92e77fc08e6d6339c205 /rts/CheckUnload.c
parent: 4ad37a323b9cdb830d718dec08c2960e34410a43 (diff)
download: haskell-f81f3964b718eab21f0cfe65067c195f2f2a84bd.tar.gz
1 files changed, 135 insertions, 32 deletions
diff --git a/rts/CheckUnload.c b/rts/CheckUnload.c
index fa4843d8e4..de8180526b 100644
--- a/rts/CheckUnload.c
+++ b/rts/CheckUnload.c
@@ -38,30 +38,129 @@
 // object as referenced so that it won't get unloaded in this round.
 //
 
-static void checkAddress (HashTable *addrs, const void *addr)
+//
+// In certain circumstances, there may be a lot of unloaded ObjectCode structs
+// chained in `unloaded_objects` (such as when users `:load` a module in a very
+// big repo in GHCi). To speed up checking whether an address lies within any of
+// these objects, we populate the addresses of the their mapped sections in
+// an array sorted by their `start` address and do binary search for our address
+// on that array. Note that this works because the sections are mapped to mutual
+// exclusive memory regions, so we can simply find the largest lower bound among
+// the `start` addresses of the sections and then check if our address is inside
+// that section. In particular, we store the start address and end address of
+// each mapped section in a OCSectionIndex, arrange them all on a contiguous
+// memory range and then sort by start address. We then put this array in an
+// OCSectionIndices struct to be passed into `checkAddress` to do binary search
+// on.
+//
+
+typedef struct {
+    W_ start;
+    W_ end;
+    ObjectCode *oc;
+} OCSectionIndex;
+
+typedef struct {
+    int n_sections;
+    OCSectionIndex *indices;
+} OCSectionIndices;
+
+static OCSectionIndices *createOCSectionIndices(int n_sections)
+{
+    OCSectionIndices *s_indices;
+    s_indices = stgMallocBytes(sizeof(OCSectionIndices), "OCSectionIndices");
+    s_indices->n_sections = n_sections;
+    s_indices->indices = stgMallocBytes(n_sections*sizeof(OCSectionIndex),
+        "OCSectionIndices::indices");
+    return s_indices;
+}
+
+static int cmpSectionIndex(const void* indexa, const void *indexb)
+{
+    W_ s1 = ((OCSectionIndex*)indexa)->start;
+    W_ s2 = ((OCSectionIndex*)indexb)->start;
+    if (s1 < s2) {
+        return -1;
+    } else if (s1 > s2) {
+        return 1;
+    }
+    return 0;
+}
+
+static OCSectionIndices* buildOCSectionIndices(ObjectCode *ocs)
+{
+    int cnt_sections = 0;
+    ObjectCode *oc;
+    for (oc = ocs; oc; oc = oc->next) {
+        cnt_sections += oc->n_sections;
+    }
+    OCSectionIndices* s_indices = createOCSectionIndices(cnt_sections);
+    int s_i = 0, i;
+    for (oc = ocs; oc; oc = oc->next) {
+        for (i = 0; i < oc->n_sections; i++) {
+            if (oc->sections[i].kind != SECTIONKIND_OTHER) {
+                s_indices->indices[s_i].start = (W_)oc->sections[i].start;
+                s_indices->indices[s_i].end = (W_)oc->sections[i].start
+                    + oc->sections[i].size;
+                s_indices->indices[s_i].oc = oc;
+                s_i++;
+            }
+        }
+    }
+    s_indices->n_sections = s_i;
+    qsort(s_indices->indices,
+        s_indices->n_sections,
+        sizeof(OCSectionIndex),
+        cmpSectionIndex);
+    return s_indices;
+}
+
+static void freeOCSectionIndices(OCSectionIndices *section_indices)
+{
+    free(section_indices->indices);
+    free(section_indices);
+}
+
+static ObjectCode *findOC(OCSectionIndices *s_indices, const void *addr) {
+    W_ w_addr = (W_)addr;
+    if (s_indices->n_sections <= 0) return NULL;
+    if (w_addr < s_indices->indices[0].start) return NULL;
+
+    int left = 0, right = s_indices->n_sections;
+    while (left + 1 < right) {
+        int mid = (left + right)/2;
+        W_ w_mid = s_indices->indices[mid].start;
+        if (w_mid <= w_addr) {
+            left = mid;
+        } else {
+            right = mid;
+        }
+    }
+    ASSERT(w_addr >= s_indices->indices[left].start);
+    if (w_addr < s_indices->indices[left].end) {
+        return s_indices->indices[left].oc;
+    }
+    return NULL;
+}
+
+static void checkAddress (HashTable *addrs, const void *addr,
+        OCSectionIndices *s_indices)
 {
     ObjectCode *oc;
-    int i;
 
     if (!lookupHashTable(addrs, (W_)addr)) {
         insertHashTable(addrs, (W_)addr, addr);
 
-        for (oc = unloaded_objects; oc; oc = oc->next) {
-            for (i = 0; i < oc->n_sections; i++) {
-                if (oc->sections[i].kind != SECTIONKIND_OTHER) {
-                    if ((W_)addr >= (W_)oc->sections[i].start &&
-                        (W_)addr <  (W_)oc->sections[i].start
-                                    + oc->sections[i].size) {
-                        oc->referenced = 1;
-                        return;
-                    }
-                }
-            }
+        oc = findOC(s_indices, addr);
+        if (oc != NULL) {
+            oc->referenced = 1;
+            return;
         }
     }
 }
 
-static void searchStackChunk (HashTable *addrs, StgPtr sp, StgPtr stack_end)
+static void searchStackChunk (HashTable *addrs, StgPtr sp, StgPtr stack_end,
+        OCSectionIndices *s_indices)
 {
     StgPtr p;
     const StgRetInfoTable *info;
@@ -73,7 +172,7 @@ static void searchStackChunk (HashTable *addrs, StgPtr sp, StgPtr stack_end)
         switch (info->i.type) {
         case RET_SMALL:
         case RET_BIG:
-            checkAddress(addrs, (const void*)info);
+            checkAddress(addrs, (const void*)info, s_indices);
             break;
 
         default:
@@ -85,7 +184,8 @@ static void searchStackChunk (HashTable *addrs, StgPtr sp, StgPtr stack_end)
 }
 
 
-static void searchHeapBlocks (HashTable *addrs, bdescr *bd)
+static void searchHeapBlocks (HashTable *addrs, bdescr *bd,
+        OCSectionIndices *s_indices)
 {
     StgPtr p;
     const StgInfoTable *info;
@@ -189,7 +289,7 @@ static void searchHeapBlocks (HashTable *addrs, bdescr *bd)
                 prim = true;
                 size = ap_stack_sizeW(ap);
                 searchStackChunk(addrs, (StgPtr)ap->payload,
-                                 (StgPtr)ap->payload + ap->size);
+                                 (StgPtr)ap->payload + ap->size, s_indices);
                 break;
             }
 
@@ -223,7 +323,7 @@ static void searchHeapBlocks (HashTable *addrs, bdescr *bd)
                 StgStack *stack = (StgStack*)p;
                 prim = true;
                 searchStackChunk(addrs, stack->sp,
-                                 stack->stack + stack->stack_size);
+                                 stack->stack + stack->stack_size, s_indices);
                 size = stack_sizeW(stack);
                 break;
             }
@@ -238,7 +338,7 @@ static void searchHeapBlocks (HashTable *addrs, bdescr *bd)
             }
 
             if (!prim) {
-                checkAddress(addrs,info);
+                checkAddress(addrs,info, s_indices);
             }
 
             p += size;
@@ -251,15 +351,16 @@ static void searchHeapBlocks (HashTable *addrs, bdescr *bd)
 // Do not unload the object if the CCS tree refers to a CCS or CC which
 // originates in the object.
 //
-static void searchCostCentres (HashTable *addrs, CostCentreStack *ccs)
+static void searchCostCentres (HashTable *addrs, CostCentreStack *ccs,
+        OCSectionIndices* s_indices)
 {
     IndexTable *i;
 
-    checkAddress(addrs, ccs);
-    checkAddress(addrs, ccs->cc);
+    checkAddress(addrs, ccs, s_indices);
+    checkAddress(addrs, ccs->cc, s_indices);
     for (i = ccs->indexTable; i != NULL; i = i->next) {
         if (!i->back_edge) {
-            searchCostCentres(addrs, i->ccs);
+            searchCostCentres(addrs, i->ccs, s_indices);
         }
     }
 }
@@ -288,6 +389,7 @@ void checkUnload (StgClosure *static_objects)
 
   ACQUIRE_LOCK(&linker_unloaded_mutex);
 
+  OCSectionIndices *s_indices = buildOCSectionIndices(unloaded_objects);
   // Mark every unloadable object as unreferenced initially
   for (oc = unloaded_objects; oc; oc = oc->next) {
       IF_DEBUG(linker, debugBelch("Checking whether to unload %" PATH_FMT "\n",
@@ -299,7 +401,7 @@ void checkUnload (StgClosure *static_objects)
 
   for (p = static_objects; p != END_OF_STATIC_OBJECT_LIST; p = link) {
       p = UNTAG_STATIC_LIST_PTR(p);
-      checkAddress(addrs, p);
+      checkAddress(addrs, p, s_indices);
       info = get_itbl(p);
       link = *STATIC_LINK(info, p);
   }
@@ -309,32 +411,33 @@ void checkUnload (StgClosure *static_objects)
        p != END_OF_CAF_LIST;
        p = ((StgIndStatic *)p)->static_link) {
       p = UNTAG_STATIC_LIST_PTR(p);
-      checkAddress(addrs, p);
+      checkAddress(addrs, p, s_indices);
   }
 
   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
-      searchHeapBlocks (addrs, generations[g].blocks);
-      searchHeapBlocks (addrs, generations[g].large_objects);
+      searchHeapBlocks (addrs, generations[g].blocks, s_indices);
+      searchHeapBlocks (addrs, generations[g].large_objects, s_indices);
 
       for (n = 0; n < n_capabilities; n++) {
           ws = &gc_threads[n]->gens[g];
-          searchHeapBlocks(addrs, ws->todo_bd);
-          searchHeapBlocks(addrs, ws->part_list);
-          searchHeapBlocks(addrs, ws->scavd_list);
+          searchHeapBlocks(addrs, ws->todo_bd, s_indices);
+          searchHeapBlocks(addrs, ws->part_list, s_indices);
+          searchHeapBlocks(addrs, ws->scavd_list, s_indices);
       }
   }
 
 #if defined(PROFILING)
   /* Traverse the cost centre tree, calling checkAddress on each CCS/CC */
-  searchCostCentres(addrs, CCS_MAIN);
+  searchCostCentres(addrs, CCS_MAIN, s_indices);
 
   /* Also check each cost centre in the CC_LIST */
   CostCentre *cc;
   for (cc = CC_LIST; cc != NULL; cc = cc->link) {
-      checkAddress(addrs, cc);
+      checkAddress(addrs, cc, s_indices);
   }
 #endif /* PROFILING */
 
+  freeOCSectionIndices(s_indices);
   // Look through the unloadable objects, and any object that is still
   // marked as unreferenced can be physically unloaded, because we
   // have no references to it.
author	Phuong Trinh <lolotp@fb.com>	2019-04-01 20:05:47 +0100
committer	Marge Bot <ben+marge-bot@smart-cactus.org>	2019-05-30 16:43:31 -0400
commit	f81f3964b718eab21f0cfe65067c195f2f2a84bd (patch)
tree	b14805c7ea1882f711de92e77fc08e6d6339c205 /rts/CheckUnload.c
parent	4ad37a323b9cdb830d718dec08c2960e34410a43 (diff)
download	haskell-f81f3964b718eab21f0cfe65067c195f2f2a84bd.tar.gz