diff options
author | Peyton, Jonathan L <jonathan.l.peyton@intel.com> | 2021-09-13 15:44:04 -0500 |
---|---|---|
committer | Peyton, Jonathan L <jonathan.l.peyton@intel.com> | 2021-09-20 13:01:58 -0500 |
commit | 1e45cd75dfb1df61892c1a26654c8997d8aeef66 (patch) | |
tree | fa8e0d54ec60f6d8b9bf353a89234d44f0f810e0 /openmp | |
parent | 01b097afd0eae593b3a11a88a34e8f50e845d3e7 (diff) | |
download | llvm-1e45cd75dfb1df61892c1a26654c8997d8aeef66.tar.gz |
[OpenMP][host runtime] Fix indirect lock table race condition
The indirect lock table can exhibit a race condition during initializing
and setting/unsetting locks. This occurs if the lock table is
resized by one thread (during an omp_init_lock) and accessed (during an
omp_set|unset_lock) by another thread.
The test runtime/test/lock/omp_init_lock.c test exposed this issue and
will fail if run enough times.
This patch restructures the lock table so pointer/iterator validity is
always kept. Instead of reallocating a single table to a larger size, the
lock table begins preallocated to accommodate 8K locks. Each row of the
table is allocated as needed with each row allowing 1K locks. If the 8K
limit is reached for the initial table, then another table, capable of
holding double the number of locks, is allocated and linked
as the next table. The indices stored in the user's locks take this
linked structure into account when finding the lock within the table.
Differential Revision: https://reviews.llvm.org/D109725
Diffstat (limited to 'openmp')
-rw-r--r-- | openmp/runtime/src/kmp_lock.cpp | 105 | ||||
-rw-r--r-- | openmp/runtime/src/kmp_lock.h | 36 |
2 files changed, 90 insertions, 51 deletions
diff --git a/openmp/runtime/src/kmp_lock.cpp b/openmp/runtime/src/kmp_lock.cpp index d138ba9ecf50..13bb7ace8682 100644 --- a/openmp/runtime/src/kmp_lock.cpp +++ b/openmp/runtime/src/kmp_lock.cpp @@ -3104,7 +3104,7 @@ kmp_indirect_lock_t *__kmp_allocate_indirect_lock(void **user_lock, kmp_int32 gtid, kmp_indirect_locktag_t tag) { kmp_indirect_lock_t *lck; - kmp_lock_index_t idx; + kmp_lock_index_t idx, table_idx; __kmp_acquire_lock(&__kmp_global_lock, gtid); @@ -3117,26 +3117,41 @@ kmp_indirect_lock_t *__kmp_allocate_indirect_lock(void **user_lock, KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n", lck)); } else { - idx = __kmp_i_lock_table.next; - // Check capacity and double the size if it is full - if (idx == __kmp_i_lock_table.size) { - // Double up the space for block pointers - int row = __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; - kmp_indirect_lock_t **new_table = (kmp_indirect_lock_t **)__kmp_allocate( - 2 * row * sizeof(kmp_indirect_lock_t *)); - KMP_MEMCPY(new_table, __kmp_i_lock_table.table, - row * sizeof(kmp_indirect_lock_t *)); - kmp_indirect_lock_t **old_table = __kmp_i_lock_table.table; - __kmp_i_lock_table.table = new_table; - __kmp_free(old_table); - // Allocate new objects in the new blocks - for (int i = row; i < 2 * row; ++i) - *(__kmp_i_lock_table.table + i) = (kmp_indirect_lock_t *)__kmp_allocate( - KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t)); - __kmp_i_lock_table.size = 2 * idx; + kmp_uint32 row, col; + kmp_indirect_lock_table_t *lock_table = &__kmp_i_lock_table; + idx = 0; + // Find location in list of lock tables to put new lock + while (1) { + table_idx = lock_table->next; // index within this table + idx += lock_table->next; // global index within list of tables + if (table_idx < lock_table->nrow_ptrs * KMP_I_LOCK_CHUNK) { + row = table_idx / KMP_I_LOCK_CHUNK; + col = table_idx % KMP_I_LOCK_CHUNK; + // Allocate a new row of locks if necessary + if (!lock_table->table[row]) { + lock_table->table[row] = (kmp_indirect_lock_t *)__kmp_allocate( + sizeof(kmp_indirect_lock_t) * KMP_I_LOCK_CHUNK); + } + break; + } + // Allocate a new lock table if necessary with double the capacity + if (!lock_table->next_table) { + kmp_indirect_lock_table_t *next_table = + (kmp_indirect_lock_table_t *)__kmp_allocate( + sizeof(kmp_indirect_lock_table_t)); + next_table->table = (kmp_indirect_lock_t **)__kmp_allocate( + sizeof(kmp_indirect_lock_t *) * 2 * lock_table->nrow_ptrs); + next_table->nrow_ptrs = 2 * lock_table->nrow_ptrs; + next_table->next = 0; + next_table->next_table = nullptr; + lock_table->next_table = next_table; + } + lock_table = lock_table->next_table; + KMP_ASSERT(lock_table); } - __kmp_i_lock_table.next++; - lck = KMP_GET_I_LOCK(idx); + lock_table->next++; + + lck = &lock_table->table[row][col]; // Allocate a new base lock object lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]); KA_TRACE(20, @@ -3167,10 +3182,7 @@ __kmp_lookup_indirect_lock(void **user_lock, const char *func) { } if (OMP_LOCK_T_SIZE < sizeof(void *)) { kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock); - if (idx >= __kmp_i_lock_table.size) { - KMP_FATAL(LockIsUninitialized, func); - } - lck = KMP_GET_I_LOCK(idx); + lck = __kmp_get_i_lock(idx); } else { lck = *((kmp_indirect_lock_t **)user_lock); } @@ -3180,7 +3192,7 @@ __kmp_lookup_indirect_lock(void **user_lock, const char *func) { return lck; } else { if (OMP_LOCK_T_SIZE < sizeof(void *)) { - return KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(user_lock)); + return __kmp_get_i_lock(KMP_EXTRACT_I_INDEX(user_lock)); } else { return *((kmp_indirect_lock_t **)user_lock); } @@ -3323,12 +3335,13 @@ void __kmp_init_dynamic_user_locks() { return; // Initialize lock index table - __kmp_i_lock_table.size = KMP_I_LOCK_CHUNK; - __kmp_i_lock_table.table = - (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *)); + __kmp_i_lock_table.nrow_ptrs = KMP_I_LOCK_TABLE_INIT_NROW_PTRS; + __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate( + sizeof(kmp_indirect_lock_t *) * KMP_I_LOCK_TABLE_INIT_NROW_PTRS); *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)__kmp_allocate( KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t)); __kmp_i_lock_table.next = 0; + __kmp_i_lock_table.next_table = nullptr; // Indirect lock size __kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t); @@ -3393,7 +3406,6 @@ void __kmp_init_dynamic_user_locks() { // Clean up the lock table. void __kmp_cleanup_indirect_user_locks() { - kmp_lock_index_t i; int k; // Clean up locks in the pools first (they were already destroyed before going @@ -3411,22 +3423,29 @@ void __kmp_cleanup_indirect_user_locks() { __kmp_indirect_lock_pool[k] = NULL; } // Clean up the remaining undestroyed locks. - for (i = 0; i < __kmp_i_lock_table.next; i++) { - kmp_indirect_lock_t *l = KMP_GET_I_LOCK(i); - if (l->lock != NULL) { - // Locks not destroyed explicitly need to be destroyed here. - KMP_I_LOCK_FUNC(l, destroy)(l->lock); - KA_TRACE( - 20, - ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p from table\n", - l)); - __kmp_free(l->lock); + kmp_indirect_lock_table_t *ptr = &__kmp_i_lock_table; + while (ptr) { + for (kmp_uint32 row = 0; row < ptr->nrow_ptrs; ++row) { + if (!ptr->table[row]) + continue; + for (kmp_uint32 col = 0; col < KMP_I_LOCK_CHUNK; ++col) { + kmp_indirect_lock_t *l = &ptr->table[row][col]; + if (l->lock) { + // Locks not destroyed explicitly need to be destroyed here. + KMP_I_LOCK_FUNC(l, destroy)(l->lock); + KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p " + "from table\n", + l)); + __kmp_free(l->lock); + } + } + __kmp_free(ptr->table[row]); } + kmp_indirect_lock_table_t *next_table = ptr->next_table; + if (ptr != &__kmp_i_lock_table) + __kmp_free(ptr); + ptr = next_table; } - // Free the table - for (i = 0; i < __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; i++) - __kmp_free(__kmp_i_lock_table.table[i]); - __kmp_free(__kmp_i_lock_table.table); __kmp_init_user_locks = FALSE; } diff --git a/openmp/runtime/src/kmp_lock.h b/openmp/runtime/src/kmp_lock.h index 4f6ad6414e53..90afd8fd7eb3 100644 --- a/openmp/runtime/src/kmp_lock.h +++ b/openmp/runtime/src/kmp_lock.h @@ -1217,22 +1217,41 @@ extern kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])( ? __kmp_indirect_get_flags[(lck)->type]((lck)->lock) \ : NULL) -#define KMP_I_LOCK_CHUNK \ - 1024 // number of kmp_indirect_lock_t objects to be allocated together +// number of kmp_indirect_lock_t objects to be allocated together +#define KMP_I_LOCK_CHUNK 1024 +// Keep at a power of 2 since it is used in multiplication & division +KMP_BUILD_ASSERT(KMP_I_LOCK_CHUNK % 2 == 0); +// number of row entries in the initial lock table +#define KMP_I_LOCK_TABLE_INIT_NROW_PTRS 8 // Lock table for indirect locks. typedef struct kmp_indirect_lock_table { kmp_indirect_lock_t **table; // blocks of indirect locks allocated - kmp_lock_index_t size; // size of the indirect lock table + kmp_uint32 nrow_ptrs; // number *table pointer entries in table kmp_lock_index_t next; // index to the next lock to be allocated + struct kmp_indirect_lock_table *next_table; } kmp_indirect_lock_table_t; extern kmp_indirect_lock_table_t __kmp_i_lock_table; // Returns the indirect lock associated with the given index. -#define KMP_GET_I_LOCK(index) \ - (*(__kmp_i_lock_table.table + (index) / KMP_I_LOCK_CHUNK) + \ - (index) % KMP_I_LOCK_CHUNK) +// Returns nullptr if no lock at given index +static inline kmp_indirect_lock_t *__kmp_get_i_lock(kmp_lock_index_t idx) { + kmp_indirect_lock_table_t *lock_table = &__kmp_i_lock_table; + while (lock_table) { + kmp_lock_index_t max_locks = lock_table->nrow_ptrs * KMP_I_LOCK_CHUNK; + if (idx < max_locks) { + kmp_lock_index_t row = idx / KMP_I_LOCK_CHUNK; + kmp_lock_index_t col = idx % KMP_I_LOCK_CHUNK; + if (!lock_table->table[row] || idx >= lock_table->next) + break; + return &lock_table->table[row][col]; + } + idx -= max_locks; + lock_table = lock_table->next_table; + } + return nullptr; +} // Number of locks in a lock block, which is fixed to "1" now. // TODO: No lock block implementation now. If we do support, we need to manage @@ -1241,8 +1260,9 @@ extern int __kmp_num_locks_in_block; // Fast lock table lookup without consistency checking #define KMP_LOOKUP_I_LOCK(l) \ - ((OMP_LOCK_T_SIZE < sizeof(void *)) ? KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(l)) \ - : *((kmp_indirect_lock_t **)(l))) + ((OMP_LOCK_T_SIZE < sizeof(void *)) \ + ? __kmp_get_i_lock(KMP_EXTRACT_I_INDEX(l)) \ + : *((kmp_indirect_lock_t **)(l))) // Used once in kmp_error.cpp extern kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p, kmp_uint32); |