/* ---------------------------------------------------------------------------- * * (c) The GHC Team, 2013- * * Check whether dynamically-loaded object code can be safely * unloaded, by searching for references to it from the heap and RTS * data structures. * * --------------------------------------------------------------------------*/ #include "PosixSource.h" #include "Rts.h" #include "RtsUtils.h" #include "Hash.h" #include "LinkerInternals.h" #include "CheckUnload.h" #include "sm/Storage.h" #include "sm/GCThread.h" #include "sm/HeapUtils.h" // // Note [Object unloading] // ~~~~~~~~~~~~~~~~~~~~~~~ // // Overview of object unloading: // // - In a major GC, for every static object we mark the object's object code and // its dependencies as 'live'. This is done by `markObjectCode`, called by // `evacuate`. // // - Marking object code is done using a global "section index table" // (global_s_indices below). When we load an object code we add its section // indices to the table. `markObjectCode` does binary search on this table to // find object code for the marked object, and mark it and its dependencies. // // Dependency of an object code is simply other object code that the object // code refers to in its code. We know these dependencies by the relocations // present in the referent. This is recorded by lookupSymbolDependent. // // - global_s_indices is updated as we load and unload objects. When we load an // object code we add its section indices to the table, we remove those // indices when we unload. // // The table is sorted and old indices are removed in `checkUnload`, instead // on every load/unload, to avoid quadratic behavior when we load a list of // objects. // // - After a major GC `checkUnload` unloads objects that are (1) explicitly // asked for unloading (via `unloadObj`) and (2) are not marked during GC. // // Note that, crucially, we don't unload an object code even if it's not // reachable from the heap, unless it's explicitly asked for unloading (via // `unloadObj`). This is a feature and not a but! Two use cases: // // - The user might request a symbol from a loaded object at any point with // lookupSymbol (e.g. GHCi might do this). // // - Sometimes we load objects that are not Haskell objects. // // To avoid unloading objects that are unreachable but are not asked for // unloading we maintain a "root set" of object code, `loaded_objects` below. // `loadObj` adds the loaded objects (and its dependencies) to the list. // `unloadObj` removes. After a major GC, `checkUnload` first marks the root set // (`loaded_objects`) to avoid unloading objects that are not asked for // unloading. // // Two other lists `objects` and `old_objects` are similar to large object lists // in GC. Before a major GC we move `objects` to `old_objects`, and move marked // objects back to `objects` during evacuation and when marking roots in // `checkUnload`. Any objects in `old_objects` after that is unloaded. // // TODO: We currently don't unload objects when non-moving GC is enabled. The // implementation would be similar to `nonmovingGcCafs`: // // - Maintain a "snapshot": // // - Copy `loaded_objects` as the root set of the snapshot // // - Stash `objects` to `old_objects` as the snapshot. We don't need a new // list for this as `old_objects` won't be used by any other code when // non-moving GC is enabled. // // - Copy `global_s_indices` table to be able to mark objects while mutators // call `loadObj_` and `unloadObj_` concurrently. // // - Don't mark object code in `evacuate`, marking will be done in the // non-moving collector. // // - After preparation, bump the object code mark bit (`object_code_mark_bit` // below) and mark static objects using a version of `markObjectCode` that // basically does the same thing but: // // - Needs to update `objects` list in a thread-safe way, as mutators will be // concurrently calling `loadObj_` and add new stuff to `objects`. // (alternatively we could have a new list for non-moving GC's objects list, // and then merge it to the global list in the pause before moving to // concurrent sweep phase) // // - Needs to use the copied `global_s_indices` // // - After marking anything left in `old_objects` are unreachable objects within // the snapshot, unload those. The unload loop will be the same as in // `checkUnload`. This step needs to happen in the final sync (before sweep // begins) to avoid races when updating `global_s_indices`. // // - NOTE: We don't need write barriers in loadObj/unloadObj as we don't // introduce a dependency from an already-loaded object to a newly loaded // object and we don't delete existing dependencies. // uint8_t object_code_mark_bit = 0; typedef struct { W_ start; W_ end; ObjectCode *oc; } OCSectionIndex; typedef struct { int capacity; // Doubled on resize int n_sections; bool sorted; // Invalidated on insertion. Sorted in checkUnload. bool unloaded; // Whether we removed anything from the table in // removeOCSectionIndices. If this is set we "compact" the // table (remove unused entries) in `sortOCSectionIndices. OCSectionIndex *indices; } OCSectionIndices; // List of currently live objects. Moved to `old_objects` before unload check. // Marked objects moved back to this list in `markObjectLive`. Remaining objects // are freed at the end of `checkUnload`. // // Double-linked list to be able to remove marked objects. List formed with // `next` and `prev` fields of `ObjectCode`. // // Not static: used in Linker.c. ObjectCode *objects = NULL; // `objects` list is moved here before unload check. Marked objects are moved // back to `objects`. Remaining objects are freed. static ObjectCode *old_objects = NULL; // Number of objects that we want to unload. When this value is 0 we skip static // object marking during GC and `checkUnload`. // // Not static: we use this value to skip static object marking in evacuate when // this is 0. // // Incremented in `unloadObj_`, decremented as we unload objects in // `checkUnload`. int n_unloaded_objects = 0; // List of objects that we don't want to unload (i.e. we haven't called // unloadObj on these yet). Used as root set for unload check in checkUnload. // Objects are added with loadObj_ and removed with unloadObj_. // // List formed with `next_loaded_object` field of `ObjectCode`. // // Not static: used in Linker.c. ObjectCode *loaded_objects; // Section index table for currently loaded objects. New indices are added by // `loadObj_`, indices of unloaded objects are removed in `checkUnload`. Used to // map static closures to their ObjectCode. static OCSectionIndices *global_s_indices = NULL; static OCSectionIndices *createOCSectionIndices(void) { // TODO (osa): Maybe initialize as empty (without allocation) and allocate // on first insertion? OCSectionIndices *s_indices = stgMallocBytes(sizeof(OCSectionIndices), "OCSectionIndices"); int capacity = 1024; s_indices->capacity = capacity; s_indices->n_sections = 0; s_indices->sorted = true; s_indices->unloaded = false; s_indices->indices = stgMallocBytes(capacity * sizeof(OCSectionIndex), "OCSectionIndices::indices"); return s_indices; } static void freeOCSectionIndices(OCSectionIndices *s_indices) { free(s_indices->indices); free(s_indices); } void initUnloadCheck() { global_s_indices = createOCSectionIndices(); } void exitUnloadCheck() { freeOCSectionIndices(global_s_indices); global_s_indices = NULL; } static int cmpSectionIndex(const void* indexa, const void *indexb) { W_ s1 = ((OCSectionIndex*)indexa)->start; W_ s2 = ((OCSectionIndex*)indexb)->start; if (s1 < s2) { return -1; } else if (s1 > s2) { return 1; } return 0; } static void reserveOCSectionIndices(OCSectionIndices *s_indices, int len) { int current_capacity = s_indices->capacity; int current_len = s_indices->n_sections; if (current_capacity - current_len >= len) { return; } // Round up to nearest power of 2 int new_capacity = 1 << (int)ceil(log2(current_len + len)); OCSectionIndex *old_indices = s_indices->indices; OCSectionIndex *new_indices = stgMallocBytes(new_capacity * sizeof(OCSectionIndex), "reserveOCSectionIndices"); for (int i = 0; i < current_len; ++i) { new_indices[i] = old_indices[i]; } s_indices->capacity = new_capacity; s_indices->indices = new_indices; free(old_indices); } // Insert object section indices of a single ObjectCode. Invalidates 'sorted' // state. void insertOCSectionIndices(ObjectCode *oc) { reserveOCSectionIndices(global_s_indices, oc->n_sections); global_s_indices->sorted = false; int s_i = global_s_indices->n_sections; for (int i = 0; i < oc->n_sections; i++) { if (oc->sections[i].kind != SECTIONKIND_OTHER) { global_s_indices->indices[s_i].start = (W_)oc->sections[i].start; global_s_indices->indices[s_i].end = (W_)oc->sections[i].start + oc->sections[i].size; global_s_indices->indices[s_i].oc = oc; s_i++; } } global_s_indices->n_sections = s_i; // Add object to 'objects' list if (objects != NULL) { objects->prev = oc; } oc->next = objects; objects = oc; } static int findSectionIdx(OCSectionIndices *s_indices, const void *addr); static void removeOCSectionIndices(OCSectionIndices *s_indices, ObjectCode *oc) { // To avoid quadratic behavior in checkUnload we set `oc` fields of indices // of unloaded objects NULL here. Removing unused entries is done in // `sortOCSectionIndices`. s_indices->unloaded = true; for (int i = 0; i < oc->n_sections; i++) { if (oc->sections[i].kind != SECTIONKIND_OTHER) { int section_idx = findSectionIdx(s_indices, oc->sections[i].start); if (section_idx != -1) { s_indices->indices[section_idx].oc = NULL; } } } } static void sortOCSectionIndices(OCSectionIndices *s_indices) { if (s_indices->sorted) { return; } qsort(s_indices->indices, s_indices->n_sections, sizeof(OCSectionIndex), cmpSectionIndex); s_indices->sorted = true; } static void removeRemovedOCSections(OCSectionIndices *s_indices) { if (!s_indices->unloaded) { return; } int next_free_idx = 0; for (int i = 0; i < s_indices->n_sections; ++i) { if (s_indices->indices[i].oc == NULL) { // free entry, skip } else if (i == next_free_idx) { ++next_free_idx; } else { s_indices->indices[next_free_idx] = s_indices->indices[i]; ++next_free_idx; } } s_indices->n_sections = next_free_idx; s_indices->unloaded = true; } // Returns -1 if not found static int findSectionIdx(OCSectionIndices *s_indices, const void *addr) { ASSERT(s_indices->sorted); W_ w_addr = (W_)addr; if (s_indices->n_sections <= 0) { return -1; } if (w_addr < s_indices->indices[0].start) { return -1; } int left = 0, right = s_indices->n_sections; while (left + 1 < right) { int mid = (left + right)/2; W_ w_mid = s_indices->indices[mid].start; if (w_mid <= w_addr) { left = mid; } else { right = mid; } } ASSERT(w_addr >= s_indices->indices[left].start); if (w_addr < s_indices->indices[left].end) { return left; } return -1; } static ObjectCode *findOC(OCSectionIndices *s_indices, const void *addr) { int oc_idx = findSectionIdx(s_indices, addr); if (oc_idx == -1) { return NULL; } return s_indices->indices[oc_idx].oc; } static bool markObjectLive(void *data STG_UNUSED, StgWord key, const void *value STG_UNUSED) { ObjectCode *oc = (ObjectCode*)key; // N.B. we may be called by the parallel GC and therefore this must be // thread-safe. To avoid taking the linker_mutex in the fast path // (when the object is already marked) we do an atomic exchange here and // only take the lock in the case that the object is unmarked. if (xchg(&oc->mark, object_code_mark_bit) == object_code_mark_bit) { return true; // for hash table iteration } ACQUIRE_LOCK(&linker_mutex); // Remove from 'old_objects' list if (oc->prev != NULL) { // TODO(osa): Maybe 'prev' should be a pointer to the referencing // *field* ? (instead of referencing *object*) oc->prev->next = oc->next; } else { old_objects = oc->next; } if (oc->next != NULL) { oc->next->prev = oc->prev; } // Add it to 'objects' list oc->prev = NULL; oc->next = objects; if (objects != NULL) { objects->prev = oc; } objects = oc; RELEASE_LOCK(&linker_mutex); // Mark its dependencies iterHashTable(oc->dependencies, NULL, markObjectLive); return true; // for hash table iteration } void markObjectCode(const void *addr) { if (global_s_indices == NULL) { return; } // This should be checked at the call site ASSERT(!HEAP_ALLOCED(addr)); ObjectCode *oc = findOC(global_s_indices, addr); if (oc != NULL) { // Mark the object code and its dependencies markObjectLive(NULL, (W_)oc, NULL); } } // Returns whether or not the GC that follows needs to mark code for potential // unloading. bool prepareUnloadCheck() { if (global_s_indices == NULL) { return false; } removeRemovedOCSections(global_s_indices); sortOCSectionIndices(global_s_indices); ASSERT(old_objects == NULL); object_code_mark_bit = ~object_code_mark_bit; old_objects = objects; objects = NULL; return true; } void checkUnload() { if (global_s_indices == NULL) { return; } // At this point we've marked all dynamically loaded static objects // (including their dependencies) during GC, but not the root set of object // code (loaded_objects). Mark the roots first, then unload any unmarked // objects. OCSectionIndices *s_indices = global_s_indices; ASSERT(s_indices->sorted); // Mark roots for (ObjectCode *oc = loaded_objects; oc != NULL; oc = oc->next_loaded_object) { markObjectLive(NULL, (W_)oc, NULL); } // Free unmarked objects ObjectCode *next = NULL; for (ObjectCode *oc = old_objects; oc != NULL; oc = next) { next = oc->next; removeOCSectionIndices(s_indices, oc); // Symbols should be removed by unloadObj_. // NB (osa): If this assertion doesn't hold then freeObjectCode below // will corrupt symhash as keys of that table live in ObjectCodes. If // you see a segfault in a hash table operation in linker (in non-debug // RTS) then it's probably becuse this assertion did not hold. ASSERT(oc->symbols == NULL); freeObjectCode(oc); n_unloaded_objects -= 1; } old_objects = NULL; }