diff options
Diffstat (limited to 'rts')
-rw-r--r-- | rts/CheckUnload.c | 714 | ||||
-rw-r--r-- | rts/CheckUnload.h | 30 | ||||
-rw-r--r-- | rts/Hash.c | 27 | ||||
-rw-r--r-- | rts/Hash.h | 35 | ||||
-rw-r--r-- | rts/Linker.c | 140 | ||||
-rw-r--r-- | rts/LinkerInternals.h | 45 | ||||
-rw-r--r-- | rts/RtsStartup.c | 3 | ||||
-rw-r--r-- | rts/linker/Elf.c | 4 | ||||
-rw-r--r-- | rts/linker/LoadArchive.c | 11 | ||||
-rw-r--r-- | rts/linker/MachO.c | 16 | ||||
-rw-r--r-- | rts/linker/PEi386.c | 3 | ||||
-rw-r--r-- | rts/linker/elf_got.c | 2 | ||||
-rw-r--r-- | rts/sm/Evac.c | 6 | ||||
-rw-r--r-- | rts/sm/GC.c | 32 | ||||
-rw-r--r-- | rts/sm/GC.h | 1 |
15 files changed, 584 insertions, 485 deletions
diff --git a/rts/CheckUnload.c b/rts/CheckUnload.c index f658d2c73d..fcbe0f6156 100644 --- a/rts/CheckUnload.c +++ b/rts/CheckUnload.c @@ -17,43 +17,99 @@ #include "CheckUnload.h" #include "sm/Storage.h" #include "sm/GCThread.h" +#include "sm/HeapUtils.h" // -// Code that we unload may be referenced from: -// - info pointers in heap objects and stack frames -// - pointers to static objects from the heap -// - StablePtrs to static objects -// - pointers to cost centres from the cost centre tree +// Note [Object unloading] +// ~~~~~~~~~~~~~~~~~~~~~~~ // -// We can find live static objects after a major GC, so we don't have -// to look at every closure pointer in the heap. However, we do have -// to look at every info pointer. So this is like a heap census -// traversal: we look at the header of every object, but not its -// contents. +// Overview of object unloading: // -// On the assumption that there aren't many different info pointers in -// a typical heap, we insert addresses into a hash table. The -// first time we see an address, we check it against the pending -// unloadable objects and if it lies within any of them, we mark that -// object as referenced so that it won't get unloaded in this round. +// - In a major GC, for every static object we mark the object's object code and +// its dependencies as 'live'. This is done by `markObjectCode`, called by +// `evacuate`. // - -// Note [Speeding up checkUnload] -// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -// In certain circumstances, there may be a lot of unloaded ObjectCode structs -// chained in `unloaded_objects` (such as when users `:load` a module in a very -// big repo in GHCi). To speed up checking whether an address lies within any of -// these objects, we populate the addresses of their mapped sections in -// an array sorted by their `start` address and do binary search for our address -// on that array. Note that this works because the sections are mapped to mutual -// exclusive memory regions, so we can simply find the largest lower bound among -// the `start` addresses of the sections and then check if our address is inside -// that section. In particular, we store the start address and end address of -// each mapped section in a OCSectionIndex, arrange them all on a contiguous -// memory range and then sort by start address. We then put this array in an -// OCSectionIndices struct to be passed into `checkAddress` to do binary search -// on. +// - Marking object code is done using a global "section index table" +// (global_s_indices below). When we load an object code we add its section +// indices to the table. `markObjectCode` does binary search on this table to +// find object code for the marked object, and mark it and its dependencies. +// +// Dependency of an object code is simply other object code that the object +// code refers to in its code. We know these dependencies by the relocations +// present in the referent. This is recorded by lookupSymbolDependent. +// +// - global_s_indices is updated as we load and unload objects. When we load an +// object code we add its section indices to the table, we remove those +// indices when we unload. +// +// The table is sorted and old indices are removed in `checkUnload`, instead +// on every load/unload, to avoid quadratic behavior when we load a list of +// objects. +// +// - After a major GC `checkUnload` unloads objects that are (1) explicitly +// asked for unloading (via `unloadObj`) and (2) are not marked during GC. +// +// Note that, crucially, we don't unload an object code even if it's not +// reachable from the heap, unless it's explicitly asked for unloading (via +// `unloadObj`). This is a feature and not a but! Two use cases: +// +// - The user might request a symbol from a loaded object at any point with +// lookupSymbol (e.g. GHCi might do this). +// +// - Sometimes we load objects that are not Haskell objects. +// +// To avoid unloading objects that are unreachable but are not asked for +// unloading we maintain a "root set" of object code, `loaded_objects` below. +// `loadObj` adds the loaded objects (and its dependencies) to the list. +// `unloadObj` removes. After a major GC, `checkUnload` first marks the root set +// (`loaded_objects`) to avoid unloading objects that are not asked for +// unloading. +// +// Two other lists `objects` and `old_objects` are similar to large object lists +// in GC. Before a major GC we move `objects` to `old_objects`, and move marked +// objects back to `objects` during evacuation and when marking roots in +// `checkUnload`. Any objects in `old_objects` after that is unloaded. +// +// TODO: We currently don't unload objects when non-moving GC is enabled. The +// implementation would be similar to `nonmovingGcCafs`: +// +// - Maintain a "snapshot": +// +// - Copy `loaded_objects` as the root set of the snapshot +// +// - Stash `objects` to `old_objects` as the snapshot. We don't need a new +// list for this as `old_objects` won't be used by any other code when +// non-moving GC is enabled. +// +// - Copy `global_s_indices` table to be able to mark objects while mutators +// call `loadObj_` and `unloadObj_` concurrently. +// +// - Don't mark object code in `evacuate`, marking will be done in the +// non-moving collector. // +// - After preparation, bump the object code mark bit (`object_code_mark_bit` +// below) and mark static objects using a version of `markObjectCode` that +// basically does the same thing but: +// +// - Needs to update `objects` list in a thread-safe way, as mutators will be +// concurrently calling `loadObj_` and add new stuff to `objects`. +// (alternatively we could have a new list for non-moving GC's objects list, +// and then merge it to the global list in the pause before moving to +// concurrent sweep phase) +// +// - Needs to use the copied `global_s_indices` +// +// - After marking anything left in `old_objects` are unreachable objects within +// the snapshot, unload those. The unload loop will be the same as in +// `checkUnload`. This step needs to happen in the final sync (before sweep +// begins) to avoid races when updating `global_s_indices`. +// +// - NOTE: We don't need write barriers in loadObj/unloadObj as we don't +// introduce a dependency from an already-loaded object to a newly loaded +// object and we don't delete existing dependencies. +// + +uint8_t object_code_mark_bit = 0; typedef struct { W_ start; @@ -62,20 +118,85 @@ typedef struct { } OCSectionIndex; typedef struct { + int capacity; // Doubled on resize int n_sections; + bool sorted; // Invalidated on insertion. Sorted in checkUnload. + bool unloaded; // Whether we removed anything from the table in + // removeOCSectionIndices. If this is set we "compact" the + // table (remove unused entries) in `sortOCSectionIndices. OCSectionIndex *indices; } OCSectionIndices; -static OCSectionIndices *createOCSectionIndices(int n_sections) +// List of currently live objects. Moved to `old_objects` before unload check. +// Marked objects moved back to this list in `markObjectLive`. Remaining objects +// are freed at the end of `checkUnload`. +// +// Double-linked list to be able to remove marked objects. List formed with +// `next` and `prev` fields of `ObjectCode`. +// +// Not static: used in Linker.c. +ObjectCode *objects = NULL; + +// `objects` list is moved here before unload check. Marked objects are moved +// back to `objects`. Remaining objects are freed. +static ObjectCode *old_objects = NULL; + +// Number of objects that we want to unload. When this value is 0 we skip static +// object marking during GC and `checkUnload`. +// +// Not static: we use this value to skip static object marking in evacuate when +// this is 0. +// +// Incremented in `unloadObj_`, decremented as we unload objects in +// `checkUnload`. +int n_unloaded_objects = 0; + +// List of objects that we don't want to unload (i.e. we haven't called +// unloadObj on these yet). Used as root set for unload check in checkUnload. +// Objects are added with loadObj_ and removed with unloadObj_. +// +// List formed with `next_loaded_object` field of `ObjectCode`. +// +// Not static: used in Linker.c. +ObjectCode *loaded_objects; + +// Section index table for currently loaded objects. New indices are added by +// `loadObj_`, indices of unloaded objects are removed in `checkUnload`. Used to +// map static closures to their ObjectCode. +static OCSectionIndices *global_s_indices = NULL; + +static OCSectionIndices *createOCSectionIndices(void) { - OCSectionIndices *s_indices; - s_indices = stgMallocBytes(sizeof(OCSectionIndices), "OCSectionIndices"); - s_indices->n_sections = n_sections; - s_indices->indices = stgMallocBytes(n_sections*sizeof(OCSectionIndex), + // TODO (osa): Maybe initialize as empty (without allocation) and allocate + // on first insertion? + OCSectionIndices *s_indices = stgMallocBytes(sizeof(OCSectionIndices), "OCSectionIndices"); + int capacity = 1024; + s_indices->capacity = capacity; + s_indices->n_sections = 0; + s_indices->sorted = true; + s_indices->unloaded = false; + s_indices->indices = stgMallocBytes(capacity * sizeof(OCSectionIndex), "OCSectionIndices::indices"); return s_indices; } +static void freeOCSectionIndices(OCSectionIndices *s_indices) +{ + free(s_indices->indices); + free(s_indices); +} + +void initUnloadCheck() +{ + global_s_indices = createOCSectionIndices(); +} + +void exitUnloadCheck() +{ + freeOCSectionIndices(global_s_indices); + global_s_indices = NULL; +} + static int cmpSectionIndex(const void* indexa, const void *indexb) { W_ s1 = ((OCSectionIndex*)indexa)->start; @@ -88,44 +209,124 @@ static int cmpSectionIndex(const void* indexa, const void *indexb) return 0; } -static OCSectionIndices* buildOCSectionIndices(ObjectCode *ocs) +static void reserveOCSectionIndices(OCSectionIndices *s_indices, int len) { - int cnt_sections = 0; - ObjectCode *oc; - for (oc = ocs; oc; oc = oc->next) { - cnt_sections += oc->n_sections; + int current_capacity = s_indices->capacity; + int current_len = s_indices->n_sections; + if (current_capacity - current_len >= len) { + return; + } + + // Round up to nearest power of 2 + int new_capacity = 1 << (int)ceil(log2(current_len + len)); + + OCSectionIndex *old_indices = s_indices->indices; + OCSectionIndex *new_indices = stgMallocBytes(new_capacity * sizeof(OCSectionIndex), + "reserveOCSectionIndices"); + + for (int i = 0; i < current_len; ++i) { + new_indices[i] = old_indices[i]; } - OCSectionIndices* s_indices = createOCSectionIndices(cnt_sections); - int s_i = 0, i; - for (oc = ocs; oc; oc = oc->next) { - for (i = 0; i < oc->n_sections; i++) { - if (oc->sections[i].kind != SECTIONKIND_OTHER) { - s_indices->indices[s_i].start = (W_)oc->sections[i].start; - s_indices->indices[s_i].end = (W_)oc->sections[i].start - + oc->sections[i].size; - s_indices->indices[s_i].oc = oc; - s_i++; + + s_indices->capacity = new_capacity; + s_indices->indices = new_indices; + + free(old_indices); +} + +// Insert object section indices of a single ObjectCode. Invalidates 'sorted' +// state. +void insertOCSectionIndices(ObjectCode *oc) +{ + reserveOCSectionIndices(global_s_indices, oc->n_sections); + global_s_indices->sorted = false; + + int s_i = global_s_indices->n_sections; + for (int i = 0; i < oc->n_sections; i++) { + if (oc->sections[i].kind != SECTIONKIND_OTHER) { + global_s_indices->indices[s_i].start = (W_)oc->sections[i].start; + global_s_indices->indices[s_i].end = (W_)oc->sections[i].start + + oc->sections[i].size; + global_s_indices->indices[s_i].oc = oc; + s_i++; + } + } + + global_s_indices->n_sections = s_i; + + // Add object to 'objects' list + if (objects != NULL) { + objects->prev = oc; + } + oc->next = objects; + objects = oc; +} + +static int findSectionIdx(OCSectionIndices *s_indices, const void *addr); + +static void removeOCSectionIndices(OCSectionIndices *s_indices, ObjectCode *oc) +{ + // To avoid quadratic behavior in checkUnload we set `oc` fields of indices + // of unloaded objects NULL here. Removing unused entries is done in + // `sortOCSectionIndices`. + + s_indices->unloaded = true; + + for (int i = 0; i < oc->n_sections; i++) { + if (oc->sections[i].kind != SECTIONKIND_OTHER) { + int section_idx = findSectionIdx(s_indices, oc->sections[i].start); + if (section_idx != -1) { + s_indices->indices[section_idx].oc = NULL; } } } - s_indices->n_sections = s_i; +} + +static void sortOCSectionIndices(OCSectionIndices *s_indices) { + if (s_indices->sorted) { + return; + } + qsort(s_indices->indices, s_indices->n_sections, sizeof(OCSectionIndex), cmpSectionIndex); - return s_indices; + + s_indices->sorted = true; } -static void freeOCSectionIndices(OCSectionIndices *section_indices) -{ - free(section_indices->indices); - free(section_indices); +static void removeRemovedOCSections(OCSectionIndices *s_indices) { + if (!s_indices->unloaded) { + return; + } + + int next_free_idx = 0; + for (int i = 0; i < s_indices->n_sections; ++i) { + if (s_indices->indices[i].oc == NULL) { + // free entry, skip + } else if (i == next_free_idx) { + ++next_free_idx; + } else { + s_indices->indices[next_free_idx] = s_indices->indices[i]; + ++next_free_idx; + } + } + + s_indices->n_sections = next_free_idx; + s_indices->unloaded = true; } -static ObjectCode *findOC(OCSectionIndices *s_indices, const void *addr) { +// Returns -1 if not found +static int findSectionIdx(OCSectionIndices *s_indices, const void *addr) { + ASSERT(s_indices->sorted); + W_ w_addr = (W_)addr; - if (s_indices->n_sections <= 0) return NULL; - if (w_addr < s_indices->indices[0].start) return NULL; + if (s_indices->n_sections <= 0) { + return -1; + } + if (w_addr < s_indices->indices[0].start) { + return -1; + } int left = 0, right = s_indices->n_sections; while (left + 1 < right) { @@ -139,330 +340,125 @@ static ObjectCode *findOC(OCSectionIndices *s_indices, const void *addr) { } ASSERT(w_addr >= s_indices->indices[left].start); if (w_addr < s_indices->indices[left].end) { - return s_indices->indices[left].oc; + return left; } - return NULL; + return -1; } -static void checkAddress (HashTable *addrs, const void *addr, - OCSectionIndices *s_indices) -{ - ObjectCode *oc; - - if (!lookupHashTable(addrs, (W_)addr)) { - insertHashTable(addrs, (W_)addr, addr); +static ObjectCode *findOC(OCSectionIndices *s_indices, const void *addr) { + int oc_idx = findSectionIdx(s_indices, addr); - oc = findOC(s_indices, addr); - if (oc != NULL) { - oc->referenced = 1; - return; - } + if (oc_idx == -1) { + return NULL; } + + return s_indices->indices[oc_idx].oc; } -static void searchStackChunk (HashTable *addrs, StgPtr sp, StgPtr stack_end, - OCSectionIndices *s_indices) -{ - StgPtr p; - const StgRetInfoTable *info; +static bool markObjectLive(void *data STG_UNUSED, StgWord key, const void *value STG_UNUSED) { + ObjectCode *oc = (ObjectCode*)key; + if (oc->mark == object_code_mark_bit) { + return true; // for hash table iteration + } + + oc->mark = object_code_mark_bit; + // Remove from 'old_objects' list + if (oc->prev != NULL) { + // TODO(osa): Maybe 'prev' should be a pointer to the referencing + // *field* ? (instead of referencing *object*) + oc->prev->next = oc->next; + } else { + old_objects = oc->next; + } + if (oc->next != NULL) { + oc->next->prev = oc->prev; + } - p = sp; - while (p < stack_end) { - info = get_ret_itbl((StgClosure *)p); + // Add it to 'objects' list + oc->prev = NULL; + oc->next = objects; + if (objects != NULL) { + objects->prev = oc; + } + objects = oc; - switch (info->i.type) { - case RET_SMALL: - case RET_BIG: - checkAddress(addrs, (const void*)info, s_indices); - break; + // Mark its dependencies + iterHashTable(oc->dependencies, NULL, markObjectLive); - default: - break; - } + return true; // for hash table iteration +} + +void markObjectCode(const void *addr) +{ + if (global_s_indices == NULL) { + return; + } - p += stack_frame_sizeW((StgClosure*)p); + // This should be checked at the call site + ASSERT(!HEAP_ALLOCED(addr)); + + ObjectCode *oc = findOC(global_s_indices, addr); + if (oc != NULL) { + // Mark the object code and its dependencies + markObjectLive(NULL, (W_)oc, NULL); } } - -static void searchHeapBlocks (HashTable *addrs, bdescr *bd, - OCSectionIndices *s_indices) +// Returns whether or not the GC that follows needs to mark code for potential +// unloading. +bool prepareUnloadCheck() { - StgPtr p; - const StgInfoTable *info; - uint32_t size; - bool prim; + if (global_s_indices == NULL) { + return false; + } - for (; bd != NULL; bd = bd->link) { + removeRemovedOCSections(global_s_indices); + sortOCSectionIndices(global_s_indices); - if (bd->flags & BF_PINNED) { - // Assume that objects in PINNED blocks cannot refer to - continue; - } + ASSERT(old_objects == NULL); - p = bd->start; - while (p < bd->free) { - info = get_itbl((StgClosure *)p); - prim = false; - - switch (info->type) { - - case THUNK: - size = thunk_sizeW_fromITBL(info); - break; - - case THUNK_1_1: - case THUNK_0_2: - case THUNK_2_0: - size = sizeofW(StgThunkHeader) + 2; - break; - - case THUNK_1_0: - case THUNK_0_1: - case THUNK_SELECTOR: - size = sizeofW(StgThunkHeader) + 1; - break; - - case FUN: - case FUN_1_0: - case FUN_0_1: - case FUN_1_1: - case FUN_0_2: - case FUN_2_0: - case CONSTR: - case CONSTR_NOCAF: - case CONSTR_1_0: - case CONSTR_0_1: - case CONSTR_1_1: - case CONSTR_0_2: - case CONSTR_2_0: - size = sizeW_fromITBL(info); - break; - - case BLACKHOLE: - case BLOCKING_QUEUE: - prim = true; - size = sizeW_fromITBL(info); - break; - - case IND: - // Special case/Delicate Hack: INDs don't normally - // appear, since we're doing this heap census right - // after GC. However, GarbageCollect() also does - // resurrectThreads(), which can update some - // blackholes when it calls raiseAsync() on the - // resurrected threads. So we know that any IND will - // be the size of a BLACKHOLE. - prim = true; - size = BLACKHOLE_sizeW(); - break; - - case BCO: - prim = true; - size = bco_sizeW((StgBCO *)p); - break; - - case MVAR_CLEAN: - case MVAR_DIRTY: - case TVAR: - case WEAK: - case PRIM: - case MUT_PRIM: - case MUT_VAR_CLEAN: - case MUT_VAR_DIRTY: - prim = true; - size = sizeW_fromITBL(info); - break; - - case AP: - prim = true; - size = ap_sizeW((StgAP *)p); - break; - - case PAP: - prim = true; - size = pap_sizeW((StgPAP *)p); - break; - - case AP_STACK: - { - StgAP_STACK *ap = (StgAP_STACK *)p; - prim = true; - size = ap_stack_sizeW(ap); - searchStackChunk(addrs, (StgPtr)ap->payload, - (StgPtr)ap->payload + ap->size, s_indices); - break; - } + object_code_mark_bit = ~object_code_mark_bit; + old_objects = objects; + objects = NULL; + return true; +} - case ARR_WORDS: - prim = true; - size = arr_words_sizeW((StgArrBytes*)p); - break; - - case MUT_ARR_PTRS_CLEAN: - case MUT_ARR_PTRS_DIRTY: - case MUT_ARR_PTRS_FROZEN_CLEAN: - case MUT_ARR_PTRS_FROZEN_DIRTY: - prim = true; - size = mut_arr_ptrs_sizeW((StgMutArrPtrs *)p); - break; - - case SMALL_MUT_ARR_PTRS_CLEAN: - case SMALL_MUT_ARR_PTRS_DIRTY: - case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN: - case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY: - prim = true; - size = small_mut_arr_ptrs_sizeW((StgSmallMutArrPtrs *)p); - break; - - case TSO: - prim = true; - size = sizeofW(StgTSO); - break; - - case STACK: { - StgStack *stack = (StgStack*)p; - prim = true; - searchStackChunk(addrs, stack->sp, - stack->stack + stack->stack_size, s_indices); - size = stack_sizeW(stack); - break; - } +void checkUnload() +{ + if (global_s_indices == NULL) { + return; + } - case TREC_CHUNK: - prim = true; - size = sizeofW(StgTRecChunk); - break; + // At this point we've marked all dynamically loaded static objects + // (including their dependencies) during GC, but not the root set of object + // code (loaded_objects). Mark the roots first, then unload any unmarked + // objects. - default: - barf("searchHeapBlocks, unknown object: %d", info->type); - } + OCSectionIndices *s_indices = global_s_indices; + ASSERT(s_indices->sorted); - if (!prim) { - checkAddress(addrs,info, s_indices); - } - - p += size; - } + // Mark roots + for (ObjectCode *oc = loaded_objects; oc != NULL; oc = oc->next_loaded_object) { + markObjectLive(NULL, (W_)oc, NULL); } -} -#if defined(PROFILING) -// -// Do not unload the object if the CCS tree refers to a CCS or CC which -// originates in the object. -// -static void searchCostCentres (HashTable *addrs, CostCentreStack *ccs, - OCSectionIndices* s_indices) -{ - IndexTable *i; + // Free unmarked objects + ObjectCode *next = NULL; + for (ObjectCode *oc = old_objects; oc != NULL; oc = next) { + next = oc->next; - checkAddress(addrs, ccs, s_indices); - checkAddress(addrs, ccs->cc, s_indices); - for (i = ccs->indexTable; i != NULL; i = i->next) { - if (!i->back_edge) { - searchCostCentres(addrs, i->ccs, s_indices); - } + removeOCSectionIndices(s_indices, oc); + + // Symbols should be removed by unloadObj_. + // NB (osa): If this assertion doesn't hold then freeObjectCode below + // will corrupt symhash as keys of that table live in ObjectCodes. If + // you see a segfault in a hash table operation in linker (in non-debug + // RTS) then it's probably becuse this assertion did not hold. + ASSERT(oc->symbols == NULL); + + freeObjectCode(oc); + n_unloaded_objects -= 1; } -} -#endif -// -// Check whether we can unload any object code. This is called at the -// appropriate point during a GC, where all the heap data is nice and -// packed together and we have a linked list of the static objects. -// -// The check involves a complete heap traversal, but you only pay for -// this (a) when you have called unloadObj(), and (b) at a major GC, -// which is much more expensive than the traversal we're doing here. -// -void checkUnload (StgClosure *static_objects) -{ - uint32_t g, n; - HashTable *addrs; - StgClosure* p; - const StgInfoTable *info; - ObjectCode *oc, *prev, *next; - gen_workspace *ws; - StgClosure* link; - - if (unloaded_objects == NULL) return; - - ACQUIRE_LOCK(&linker_unloaded_mutex); - - OCSectionIndices *s_indices = buildOCSectionIndices(unloaded_objects); - // Mark every unloadable object as unreferenced initially - for (oc = unloaded_objects; oc; oc = oc->next) { - IF_DEBUG(linker, debugBelch("Checking whether to unload %" PATH_FMT "\n", - oc->fileName)); - oc->referenced = false; - } - - addrs = allocHashTable(); - - for (p = static_objects; p != END_OF_STATIC_OBJECT_LIST; p = link) { - p = UNTAG_STATIC_LIST_PTR(p); - checkAddress(addrs, p, s_indices); - info = get_itbl(p); - checkAddress(addrs, info, s_indices); - link = *STATIC_LINK(info, p); - } - - // CAFs on revertible_caf_list are not on static_objects - for (p = (StgClosure*)revertible_caf_list; - p != END_OF_CAF_LIST; - p = ((StgIndStatic *)p)->static_link) { - p = UNTAG_STATIC_LIST_PTR(p); - checkAddress(addrs, p, s_indices); - } - - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - searchHeapBlocks (addrs, generations[g].blocks, s_indices); - searchHeapBlocks (addrs, generations[g].large_objects, s_indices); - - for (n = 0; n < n_capabilities; n++) { - ws = &gc_threads[n]->gens[g]; - searchHeapBlocks(addrs, ws->todo_bd, s_indices); - searchHeapBlocks(addrs, ws->part_list, s_indices); - searchHeapBlocks(addrs, ws->scavd_list, s_indices); - } - } - -#if defined(PROFILING) - /* Traverse the cost centre tree, calling checkAddress on each CCS/CC */ - searchCostCentres(addrs, CCS_MAIN, s_indices); - - /* Also check each cost centre in the CC_LIST */ - CostCentre *cc; - for (cc = CC_LIST; cc != NULL; cc = cc->link) { - checkAddress(addrs, cc, s_indices); - } -#endif /* PROFILING */ - - freeOCSectionIndices(s_indices); - // Look through the unloadable objects, and any object that is still - // marked as unreferenced can be physically unloaded, because we - // have no references to it. - prev = NULL; - for (oc = unloaded_objects; oc; oc = next) { - next = oc->next; - if (oc->referenced == 0) { - if (prev == NULL) { - unloaded_objects = oc->next; - } else { - prev->next = oc->next; - } - IF_DEBUG(linker, debugBelch("Unloading object file %" PATH_FMT "\n", - oc->fileName)); - freeObjectCode(oc); - } else { - IF_DEBUG(linker, debugBelch("Object file still in use: %" - PATH_FMT "\n", oc->fileName)); - prev = oc; - } - } - - freeHashTable(addrs, NULL); - - RELEASE_LOCK(&linker_unloaded_mutex); + old_objects = NULL; } diff --git a/rts/CheckUnload.h b/rts/CheckUnload.h index ab85ead852..de07aef1c3 100644 --- a/rts/CheckUnload.h +++ b/rts/CheckUnload.h @@ -12,6 +12,34 @@ #include "BeginPrivate.h" -void checkUnload (StgClosure *static_objects); +#include "LinkerInternals.h" + +// Currently live objects +extern ObjectCode *objects; + +// Root set for object collection +extern ObjectCode *loaded_objects; + +// Mark bit for live objects +extern uint8_t object_code_mark_bit; + +// Number of object code currently marked for unloading. See the definition in +// CheckUnload.c for details. +extern int n_unloaded_objects; + +void initUnloadCheck(void); +void exitUnloadCheck(void); + +// Call before major GC to prepare section index table for marking +bool prepareUnloadCheck(void); + +// Mark object code of a static closure address as 'live' +void markObjectCode(const void *addr); + +// Call after major GC to unload unused and unmarked object code +void checkUnload(void); + +// Call on loaded object code +void insertOCSectionIndices(ObjectCode *oc); #include "EndPrivate.h" diff --git a/rts/Hash.c b/rts/Hash.c index 31a285aefa..6e1873ff29 100644 --- a/rts/Hash.c +++ b/rts/Hash.c @@ -492,6 +492,27 @@ mapHashTableKeys(HashTable *table, void *data, MapHashFnKeys fn) } } +void +iterHashTable(HashTable *table, void *data, IterHashFn fn) +{ + /* The last bucket with something in it is table->max + table->split - 1 */ + long segment = (table->max + table->split - 1) / HSEGSIZE; + long index = (table->max + table->split - 1) % HSEGSIZE; + + while (segment >= 0) { + while (index >= 0) { + for (HashList *hl = table->dir[segment][index]; hl != NULL; hl = hl->next) { + if (!fn(data, hl->key, hl->data)) { + return; + } + } + index--; + } + segment--; + index = HSEGSIZE - 1; + } +} + /* ----------------------------------------------------------------------------- * When we initialize a hash table, we set up the first segment as well, * initializing all of the first segment's hash buckets to NULL. @@ -522,12 +543,6 @@ allocHashTable(void) return table; } -void -exitHashTable(void) -{ - /* nothing to do */ -} - int keyCountHashTable (HashTable *table) { return table->kcount; diff --git a/rts/Hash.h b/rts/Hash.h index 8a605d11de..4f9d75d6be 100644 --- a/rts/Hash.h +++ b/rts/Hash.h @@ -19,7 +19,7 @@ typedef struct strhashtable StrHashTable; * `const` so that calling function can mutate what the pointer points to if it * needs to. */ -HashTable * allocHashTable ( void ); +HashTable * allocHashTable ( void ); void insertHashTable ( HashTable *table, StgWord key, const void *data ); void * lookupHashTable ( const HashTable *table, StgWord key ); void * removeHashTable ( HashTable *table, StgWord key, const void *data ); @@ -35,9 +35,12 @@ int keysHashTable(HashTable *table, StgWord keys[], int szKeys); typedef void (*MapHashFn)(void *data, StgWord key, const void *value); typedef void (*MapHashFnKeys)(void *data, StgWord *key, const void *value); +// Return true -> continue; false -> stop +typedef bool (*IterHashFn)(void *data, StgWord key, const void *value); void mapHashTable(HashTable *table, void *data, MapHashFn fn); void mapHashTableKeys(HashTable *table, void *data, MapHashFnKeys fn); +void iterHashTable(HashTable *table, void *data, IterHashFn); /* Hash table access where the keys are C strings (the strings are * assumed to be allocated by the caller, and mustn't be deallocated @@ -79,9 +82,33 @@ void * removeHashTable_ ( HashTable *table, StgWord key, /* Freeing hash tables */ void freeHashTable ( HashTable *table, void (*freeDataFun)(void *) ); -#define freeStrHashTable(table, f) \ - (freeHashTable((HashTable*) table, f)) -void exitHashTable ( void ); +INLINE_HEADER void freeStrHashTable ( StrHashTable *table, void (*freeDataFun)(void *) ) +{ + freeHashTable((HashTable*)table, freeDataFun); +} + +/* + * Hash set API + * + * A hash set is bascially a hash table where values are NULL. + */ + +typedef struct hashtable HashSet; + +INLINE_HEADER HashSet *allocHashSet ( void ) +{ + return (HashSet*)allocHashTable(); +} + +INLINE_HEADER void freeHashSet ( HashSet *set ) +{ + freeHashTable((HashTable*)set, NULL); +} + +INLINE_HEADER void insertHashSet ( HashSet *set, StgWord key ) +{ + insertHashTable((HashTable*)set, key, NULL); +} #include "EndPrivate.h" diff --git a/rts/Linker.c b/rts/Linker.c index 036c7937a4..f6a38e08dd 100644 --- a/rts/Linker.c +++ b/rts/Linker.c @@ -32,6 +32,7 @@ #include "linker/CacheFlush.h" #include "linker/SymbolExtras.h" #include "PathUtils.h" +#include "CheckUnload.h" // createOCSectionIndices #if !defined(mingw32_HOST_OS) #include "posix/Signals.h" @@ -161,23 +162,9 @@ */ StrHashTable *symhash; -/* List of currently loaded objects */ -ObjectCode *objects = NULL; /* initially empty */ - -/* List of objects that have been unloaded via unloadObj(), but are waiting - to be actually freed via checkUnload() */ -ObjectCode *unloaded_objects = NULL; /* initially empty */ - #if defined(THREADED_RTS) -/* This protects all the Linker's global state except unloaded_objects */ +/* This protects all the Linker's global state */ Mutex linker_mutex; -/* - * This protects unloaded_objects. We have a separate mutex for this, because - * the GC needs to access unloaded_objects in checkUnload, while the linker only - * needs to access unloaded_objects in unloadObj(), so this allows most linker - * operations proceed concurrently with the GC. - */ -Mutex linker_unloaded_mutex; #endif /* Generic wrapper function to try and Resolve and RunInit oc files */ @@ -441,12 +428,10 @@ initLinker_ (int retain_cafs) linker_init_done = 1; } - objects = NULL; - unloaded_objects = NULL; + initUnloadCheck(); #if defined(THREADED_RTS) initMutex(&linker_mutex); - initMutex(&linker_unloaded_mutex); #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO) initMutex(&dl_mutex); #endif @@ -532,6 +517,7 @@ exitLinker( void ) { #endif if (linker_init_done == 1) { freeStrHashTable(symhash, free); + exitUnloadCheck(); } #if defined(THREADED_RTS) closeMutex(&linker_mutex); @@ -858,18 +844,24 @@ HsInt insertSymbol(pathchar* obj_name, SymbolName* key, SymbolAddr* data) } /* ----------------------------------------------------------------------------- - * lookup a symbol in the hash table + * Lookup a symbol in the hash table + * + * When 'dependent' is not NULL, adds it as a dependent to the owner of the + * symbol. */ #if defined(OBJFORMAT_PEi386) -SymbolAddr* lookupSymbol_ (SymbolName* lbl) +SymbolAddr* lookupDependentSymbol (SymbolName* lbl, ObjectCode *dependent) { + (void)dependent; // TODO + ASSERT_LOCK_HELD(&linker_mutex); return lookupSymbol_PEi386(lbl); } #else -SymbolAddr* lookupSymbol_ (SymbolName* lbl) +SymbolAddr* lookupDependentSymbol (SymbolName* lbl, ObjectCode *dependent) { + ASSERT_LOCK_HELD(&linker_mutex); IF_DEBUG(linker, debugBelch("lookupSymbol: looking up '%s'\n", lbl)); ASSERT(symhash != NULL); @@ -894,10 +886,18 @@ SymbolAddr* lookupSymbol_ (SymbolName* lbl) return internal_dlsym(lbl + 1); # else - ASSERT(2+2 == 5); + ASSERT(false); return NULL; # endif } else { + if (dependent) { + // Add dependent as symbol's owner's dependency + ObjectCode *owner = pinfo->owner; + if (owner) { + // TODO: what does it mean for a symbol to not have an owner? + insertHashSet(dependent->dependencies, (W_)owner); + } + } return loadSymbol(lbl, pinfo); } } @@ -958,7 +958,9 @@ printLoadedObjects() { SymbolAddr* lookupSymbol( SymbolName* lbl ) { ACQUIRE_LOCK(&linker_mutex); - SymbolAddr* r = lookupSymbol_(lbl); + // NULL for "don't add dependent". When adding a dependency we call + // lookupDependentSymbol directly. + SymbolAddr* r = lookupDependentSymbol(lbl, NULL); if (!r) { errorBelch("^^ Could not load '%s', dependency unresolved. " "See top entry above.\n", lbl); @@ -1267,9 +1269,6 @@ void freeObjectCode (ObjectCode *oc) oc->sections[i].mapped_size); break; case SECTION_M32: - IF_DEBUG(zero_on_gc, - memset(oc->sections[i].start, - 0x00, oc->sections[i].size)); // Freed by m32_allocator_free break; #endif @@ -1323,6 +1322,8 @@ void freeObjectCode (ObjectCode *oc) stgFree(oc->fileName); stgFree(oc->archiveMemberName); + freeHashSet(oc->dependencies); + stgFree(oc); } @@ -1385,6 +1386,10 @@ mkOc( pathchar *path, char *image, int imageSize, /* chain it onto the list of objects */ oc->next = NULL; + oc->prev = NULL; + oc->next_loaded_object = NULL; + oc->mark = object_code_mark_bit; + oc->dependencies = allocHashSet(); #if RTS_LINKER_USE_MMAP oc->rw_m32 = m32_allocator_new(false); @@ -1403,9 +1408,9 @@ mkOc( pathchar *path, char *image, int imageSize, HsInt isAlreadyLoaded( pathchar *path ) { - ObjectCode *o; - for (o = objects; o; o = o->next) { - if (0 == pathcmp(o->fileName, path)) { + for (ObjectCode *o = objects; o; o = o->next) { + if (0 == pathcmp(o->fileName, path) + && o->status != OBJECT_UNLOADED) { return 1; /* already loaded */ } } @@ -1539,21 +1544,16 @@ preloadObjectFile (pathchar *path) */ static HsInt loadObj_ (pathchar *path) { - ObjectCode* oc; - IF_DEBUG(linker, debugBelch("loadObj: %" PATH_FMT "\n", path)); - - /* debugBelch("loadObj %s\n", path ); */ - - /* Check that we haven't already loaded this object. - Ignore requests to load multiple times */ + // Check that we haven't already loaded this object. + // Ignore requests to load multiple times if (isAlreadyLoaded(path)) { IF_DEBUG(linker, debugBelch("ignoring repeated load of %" PATH_FMT "\n", path)); - return 1; /* success */ + return 1; // success } - oc = preloadObjectFile(path); + ObjectCode *oc = preloadObjectFile(path); if (oc == NULL) return 0; if (! loadOc(oc)) { @@ -1564,8 +1564,10 @@ static HsInt loadObj_ (pathchar *path) return 0; } - oc->next = objects; - objects = oc; + insertOCSectionIndices(oc); + + oc->next_loaded_object = loaded_objects; + loaded_objects = oc; return 1; } @@ -1758,13 +1760,10 @@ int ocTryLoad (ObjectCode* oc) { */ static HsInt resolveObjs_ (void) { - ObjectCode *oc; - int r; - IF_DEBUG(linker, debugBelch("resolveObjs: start\n")); - for (oc = objects; oc; oc = oc->next) { - r = ocTryLoad(oc); + for (ObjectCode *oc = objects; oc; oc = oc->next) { + int r = ocTryLoad(oc); if (!r) { errorBelch("Could not load Object Code %" PATH_FMT ".\n", OC_INFORMATIVE_FILENAME(oc)); @@ -1796,45 +1795,35 @@ HsInt resolveObjs (void) */ static HsInt unloadObj_ (pathchar *path, bool just_purge) { - ObjectCode *oc, *prev, *next; - HsBool unloadedAnyObj = HS_BOOL_FALSE; - ASSERT(symhash != NULL); ASSERT(objects != NULL); IF_DEBUG(linker, debugBelch("unloadObj: %" PATH_FMT "\n", path)); - prev = NULL; - for (oc = objects; oc; oc = next) { - next = oc->next; // oc might be freed - - if (!pathcmp(oc->fileName,path)) { + bool unloadedAnyObj = false; + ObjectCode *prev = NULL; + // NOTE (osa): There may be more than one object with the same file name + // (happens when loading archive files) so we don't stop after unloading one + for (ObjectCode *oc = loaded_objects; oc; oc = oc->next_loaded_object) { + if (pathcmp(oc->fileName,path) == 0) { + oc->status = OBJECT_UNLOADED; - // these are both idempotent, so in just_purge mode we can - // later call unloadObj() to really unload the object. + // These are both idempotent, so in just_purge mode we can later + // call unloadObj() to really unload the object. removeOcSymbols(oc); freeOcStablePtrs(oc); + unloadedAnyObj = true; + if (!just_purge) { + n_unloaded_objects += 1; + // Remove object code from root set if (prev == NULL) { - objects = oc->next; + loaded_objects = oc->next_loaded_object; } else { - prev->next = oc->next; + prev->next_loaded_object = oc->next_loaded_object; } - ACQUIRE_LOCK(&linker_unloaded_mutex); - oc->next = unloaded_objects; - unloaded_objects = oc; - oc->status = OBJECT_UNLOADED; - RELEASE_LOCK(&linker_unloaded_mutex); - // We do not own oc any more; it can be released at any time by - // the GC in checkUnload(). - } else { - prev = oc; } - - /* This could be a member of an archive so continue - * unloading other members. */ - unloadedAnyObj = HS_BOOL_TRUE; } else { prev = oc; } @@ -1842,8 +1831,7 @@ static HsInt unloadObj_ (pathchar *path, bool just_purge) if (unloadedAnyObj) { return 1; - } - else { + } else { errorBelch("unloadObj: can't find `%" PATH_FMT "' to unload", path); return 0; } @@ -1867,13 +1855,7 @@ HsInt purgeObj (pathchar *path) static OStatus getObjectLoadStatus_ (pathchar *path) { - ObjectCode *o; - for (o = objects; o; o = o->next) { - if (0 == pathcmp(o->fileName, path)) { - return o->status; - } - } - for (o = unloaded_objects; o; o = o->next) { + for (ObjectCode *o = objects; o; o = o->next) { if (0 == pathcmp(o->fileName, path)) { return o->status; } diff --git a/rts/LinkerInternals.h b/rts/LinkerInternals.h index e8923fb7eb..6b726f7a27 100644 --- a/rts/LinkerInternals.h +++ b/rts/LinkerInternals.h @@ -190,9 +190,6 @@ typedef struct _ObjectCode { /* non-zero if the object file was mmap'd, otherwise malloc'd */ int imageMapped; - /* flag used when deciding whether to unload an object file */ - int referenced; - /* record by how much image has been deliberately misaligned after allocation, so that we can use realloc */ int misalignment; @@ -204,8 +201,37 @@ typedef struct _ObjectCode { int n_segments; Segment *segments; - /* Allow a chain of these things */ - struct _ObjectCode * next; + // + // Garbage collection fields + // + + // Next object in `objects` list + struct _ObjectCode *next; + + // Previous object in `objects` list + struct _ObjectCode *prev; + + // Next object in `loaded_objects` list + struct _ObjectCode *next_loaded_object; + + // Mark bit + uint8_t mark; + + // Set of dependencies (ObjectCode*) of the object file. Traverse + // dependencies using `iterHashTable`. + // + // New entries are added as we resolve symbols in an object file, in + // `lookupDependentSymbol`. When an object file uses multiple symbols from + // another object file we add the dependent multiple times, so we use a + // `HashTable` here rather than a list/array to avoid copies. + // + // Used when unloading object files. See Note [Object unloading] in + // CheckUnload.c. + HashSet *dependencies; + + // + // End of garbage collection fields + // /* SANITY CHECK ONLY: a list of the only memory regions which may safely be prodded during relocation. Any attempt to prod @@ -249,12 +275,8 @@ typedef struct _ObjectCode { (OC)->fileName \ ) -extern ObjectCode *objects; -extern ObjectCode *unloaded_objects; - #if defined(THREADED_RTS) extern Mutex linker_mutex; -extern Mutex linker_unloaded_mutex; #endif /* Type of the initializer */ @@ -305,8 +327,9 @@ int ghciInsertSymbolTable( HsBool weak, ObjectCode *owner); -/* lock-free version of lookupSymbol */ -SymbolAddr* lookupSymbol_ (SymbolName* lbl); +/* Lock-free version of lookupSymbol. When 'dependent' is not NULL, adds it as a + * dependent to the owner of the symbol. */ +SymbolAddr* lookupDependentSymbol (SymbolName* lbl, ObjectCode *dependent); extern StrHashTable *symhash; diff --git a/rts/RtsStartup.c b/rts/RtsStartup.c index 5e2495844c..02e32106d2 100644 --- a/rts/RtsStartup.c +++ b/rts/RtsStartup.c @@ -585,9 +585,6 @@ hs_exit_(bool wait_foreign) /* tear down statistics subsystem */ stat_exit(); - /* free hash table storage */ - exitHashTable(); - // Finally, free all our storage. However, we only free the heap // memory if we have waited for foreign calls to complete; // otherwise a foreign call in progress may still be referencing diff --git a/rts/linker/Elf.c b/rts/linker/Elf.c index 73d68caa5f..023ce1f6ce 100644 --- a/rts/linker/Elf.c +++ b/rts/linker/Elf.c @@ -1100,7 +1100,7 @@ do_Elf_Rel_relocations ( ObjectCode* oc, char* ehdrC, if (ELF_ST_BIND(symbol->elf_sym->st_info) == STB_LOCAL || strncmp(symbol->name, "_GLOBAL_OFFSET_TABLE_", 21) == 0) { S = (Elf_Addr)symbol->addr; } else { - S_tmp = lookupSymbol_( symbol->name ); + S_tmp = lookupDependentSymbol( symbol->name, oc ); S = (Elf_Addr)S_tmp; } if (!S) { @@ -1520,7 +1520,7 @@ do_Elf_Rela_relocations ( ObjectCode* oc, char* ehdrC, } else { /* No, so look up the name in our global table. */ symbol = strtab + sym.st_name; - S_tmp = lookupSymbol_( symbol ); + S_tmp = lookupDependentSymbol( symbol, oc ); S = (Elf_Addr)S_tmp; } if (!S) { diff --git a/rts/linker/LoadArchive.c b/rts/linker/LoadArchive.c index 89350956de..0ad3d94725 100644 --- a/rts/linker/LoadArchive.c +++ b/rts/linker/LoadArchive.c @@ -5,6 +5,7 @@ #include "sm/OSMem.h" #include "RtsUtils.h" #include "LinkerInternals.h" +#include "CheckUnload.h" // loaded_objects, insertOCSectionIndices #include "linker/M32Alloc.h" /* Platform specific headers */ @@ -241,7 +242,6 @@ lookupGNUArchiveIndex(int gnuFileIndexSize, char **fileName_, static HsInt loadArchive_ (pathchar *path) { - ObjectCode* oc = NULL; char *image = NULL; HsInt retcode = 0; int memberSize; @@ -521,8 +521,8 @@ static HsInt loadArchive_ (pathchar *path) pathprintf(archiveMemberName, size, WSTR("%" PATH_FMT "(%.*s)"), path, (int)thisFileNameSize, fileName); - oc = mkOc(path, image, memberSize, false, archiveMemberName - , misalignment); + ObjectCode *oc = mkOc(path, image, memberSize, false, archiveMemberName, + misalignment); #if defined(OBJFORMAT_MACHO) ocInit_MachO( oc ); #endif @@ -537,8 +537,9 @@ static HsInt loadArchive_ (pathchar *path) fclose(f); return 0; } else { - oc->next = objects; - objects = oc; + insertOCSectionIndices(oc); // also adds the object to `objects` list + oc->next_loaded_object = loaded_objects; + loaded_objects = oc; } } else if (isGnuIndex) { diff --git a/rts/linker/MachO.c b/rts/linker/MachO.c index 09c240f1f0..b513c461db 100644 --- a/rts/linker/MachO.c +++ b/rts/linker/MachO.c @@ -242,7 +242,7 @@ resolveImports( addr = (SymbolAddr*) (symbol->nlist->n_value); IF_DEBUG(linker, debugBelch("resolveImports: undefined external %s has value %p\n", symbol->name, addr)); } else { - addr = lookupSymbol_(symbol->name); + addr = lookupDependentSymbol(symbol->name, oc); IF_DEBUG(linker, debugBelch("resolveImports: looking up %s, %p\n", symbol->name, addr)); } @@ -564,12 +564,12 @@ relocateSectionAarch64(ObjectCode * oc, Section * section) uint64_t value = 0; if(symbol->nlist->n_type & N_EXT) { /* external symbols should be able to be - * looked up via the lookupSymbol_ function. + * looked up via the lookupDependentSymbol function. * Either through the global symbol hashmap * or asking the system, if not found * in the symbol hashmap */ - value = (uint64_t)lookupSymbol_((char*)symbol->name); + value = (uint64_t)lookupDependentSymbol((char*)symbol->name, oc); if(!value) barf("Could not lookup symbol: %s!", symbol->name); } else { @@ -609,7 +609,7 @@ relocateSectionAarch64(ObjectCode * oc, Section * section) uint64_t pc = (uint64_t)section->start + ri->r_address; uint64_t value = 0; if(symbol->nlist->n_type & N_EXT) { - value = (uint64_t)lookupSymbol_((char*)symbol->name); + value = (uint64_t)lookupDependentSymbol((char*)symbol->name, oc); if(!value) barf("Could not lookup symbol: %s!", symbol->name); } else { @@ -792,7 +792,7 @@ relocateSection(ObjectCode* oc, int curSection) // symtab, or it is undefined, meaning dlsym must be used // to resolve it. - addr = lookupSymbol_(nm); + addr = lookupDependentSymbol(nm, oc); IF_DEBUG(linker, debugBelch("relocateSection: looked up %s, " "external X86_64_RELOC_GOT or X86_64_RELOC_GOT_LOAD\n" " : addr = %p\n", nm, addr)); @@ -853,7 +853,7 @@ relocateSection(ObjectCode* oc, int curSection) nm, (void *)value)); } else { - addr = lookupSymbol_(nm); + addr = lookupDependentSymbol(nm, oc); if (addr == NULL) { errorBelch("\nlookupSymbol failed in relocateSection (relocate external)\n" @@ -1353,7 +1353,7 @@ ocGetNames_MachO(ObjectCode* oc) if (oc->info->nlist[i].n_type & N_EXT) { if ( (oc->info->nlist[i].n_desc & N_WEAK_DEF) - && lookupSymbol_(nm)) { + && lookupDependentSymbol(nm, oc)) { // weak definition, and we already have a definition IF_DEBUG(linker, debugBelch(" weak: %s\n", nm)); } @@ -1508,7 +1508,7 @@ ocResolve_MachO(ObjectCode* oc) * have the address. */ if(NULL == symbol->addr) { - symbol->addr = lookupSymbol_((char*)symbol->name); + symbol->addr = lookupDependentSymbol((char*)symbol->name, oc); if(NULL == symbol->addr) barf("Failed to lookup symbol: %s", symbol->name); } else { diff --git a/rts/linker/PEi386.c b/rts/linker/PEi386.c index 66bd22e8e9..aa841c070e 100644 --- a/rts/linker/PEi386.c +++ b/rts/linker/PEi386.c @@ -185,6 +185,7 @@ #include "RtsUtils.h" #include "RtsSymbolInfo.h" #include "GetEnv.h" +#include "CheckUnload.h" #include "linker/PEi386.h" #include "linker/PEi386Types.h" #include "linker/SymbolExtras.h" @@ -1894,7 +1895,7 @@ ocResolve_PEi386 ( ObjectCode* oc ) } else { copyName ( getSymShortName (info, sym), oc, symbol, sizeof(symbol)-1 ); - S = (size_t) lookupSymbol_( (char*)symbol ); + S = (size_t) lookupDependentSymbol( (char*)symbol, oc ); if ((void*)S == NULL) { errorBelch(" | %" PATH_FMT ": unknown symbol `%s'", oc->fileName, symbol); releaseOcInfo (oc); diff --git a/rts/linker/elf_got.c b/rts/linker/elf_got.c index 58d5c93b64..bdb436ad21 100644 --- a/rts/linker/elf_got.c +++ b/rts/linker/elf_got.c @@ -88,7 +88,7 @@ fillGot(ObjectCode * oc) { if( STT_NOTYPE == ELF_ST_TYPE(symbol->elf_sym->st_info) || STB_WEAK == ELF_ST_BIND(symbol->elf_sym->st_info)) { if(0x0 == symbol->addr) { - symbol->addr = lookupSymbol_(symbol->name); + symbol->addr = lookupDependentSymbol(symbol->name, oc); if(0x0 == symbol->addr) { if(0 == strncmp(symbol->name,"_GLOBAL_OFFSET_TABLE_",21)) { symbol->addr = oc->info->got_start; diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c index b324a59179..e660fad1d8 100644 --- a/rts/sm/Evac.c +++ b/rts/sm/Evac.c @@ -28,6 +28,7 @@ #include "CNF.h" #include "Scav.h" #include "NonMoving.h" +#include "CheckUnload.h" // n_unloaded_objects and markObjectCode #if defined(THREADED_RTS) && !defined(PARALLEL_GC) #define evacuate(p) evacuate1(p) @@ -596,6 +597,11 @@ loop: if (!HEAP_ALLOCED_GC(q)) { if (!major_gc) return; + // Note [Object unloading] in CheckUnload.c + if (RTS_UNLIKELY(unload_mark_needed)) { + markObjectCode(q); + } + info = get_itbl(q); switch (info->type) { diff --git a/rts/sm/GC.c b/rts/sm/GC.c index a5aa7e1f4e..0dc92a29a0 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -97,6 +97,13 @@ * See also: Note [STATIC_LINK fields] in Storage.h. */ +/* Hot GC globals + * ~~~~~~~~~~~~~~ + * The globals below are quite hot during GC but read-only, initialized during + * the beginning of collection. It is important that they reside in the same + * cache-line to minimize unnecessary cache misses. + */ + /* N is the oldest generation being collected, where the generations * are numbered starting at 0. A major GC (indicated by the major_gc * flag) is when we're collecting all generations. We only attempt to @@ -105,6 +112,7 @@ uint32_t N; bool major_gc; bool deadlock_detect_gc; +bool unload_mark_needed; /* Data used for allocation area sizing. */ @@ -314,6 +322,12 @@ GarbageCollect (uint32_t collect_gen, static_flag == STATIC_FLAG_A ? STATIC_FLAG_B : STATIC_FLAG_A; } + if (major_gc) { + unload_mark_needed = prepareUnloadCheck(); + } else { + unload_mark_needed = false; + } + #if defined(THREADED_RTS) work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled && N >= RtsFlags.ParFlags.parGcLoadBalancingGen; @@ -831,9 +845,12 @@ GarbageCollect (uint32_t collect_gen, resetNurseries(); - // mark the garbage collected CAFs as dead #if defined(DEBUG) - if (major_gc && !RtsFlags.GcFlags.useNonmoving) { gcCAFs(); } + // Mark the garbage collected CAFs as dead. Done in `nonmovingGcCafs()` when + // non-moving GC is enabled. + if (major_gc && !RtsFlags.GcFlags.useNonmoving) { + gcCAFs(); + } #endif // Update the stable name hash table @@ -844,9 +861,14 @@ GarbageCollect (uint32_t collect_gen, // hs_free_stable_ptr(), both of which access the StablePtr table. stablePtrUnlock(); - // Must be after stablePtrUnlock(), because it might free stable ptrs. - if (major_gc) { - checkUnload (gct->scavenged_static_objects); + // Unload dynamically-loaded object code after a major GC. + // See Note [Object unloading] in CheckUnload.c for details. + // + // TODO: Similar to `nonmovingGcCafs` non-moving GC should have its own + // collector for these objects, but that's currently not implemented, so we + // simply don't unload object code when non-moving GC is enabled. + if (major_gc && !RtsFlags.GcFlags.useNonmoving) { + checkUnload(); } #if defined(PROFILING) diff --git a/rts/sm/GC.h b/rts/sm/GC.h index c5d5f6ac81..2c2d14a7d2 100644 --- a/rts/sm/GC.h +++ b/rts/sm/GC.h @@ -35,6 +35,7 @@ extern uint32_t N; extern bool major_gc; /* See Note [Deadlock detection under nonmoving collector]. */ extern bool deadlock_detect_gc; +extern bool unload_mark_needed; extern bdescr *mark_stack_bd; extern bdescr *mark_stack_top_bd; |