diff options
author | Luke Chen <luke.chen@mongodb.com> | 2018-04-23 13:32:45 +1000 |
---|---|---|
committer | Luke Chen <luke.chen@mongodb.com> | 2018-04-23 13:32:45 +1000 |
commit | 9dbed1bc8108798bebc8ae7a0b56fa4858335146 (patch) | |
tree | 1fb7a2fe795032e2da9e24a6bef611738303797b /src/third_party/wiredtiger/src | |
parent | b48579fcba7dfe3c7178b60c88feec96955c02f7 (diff) | |
download | mongo-9dbed1bc8108798bebc8ae7a0b56fa4858335146.tar.gz |
Import wiredtiger: 43c20b5583d8e38ae127beb8b3930d7577902ad0 from branch mongodb-3.8
ref: ad25980c88..43c20b5583
for: 4.0.0-rc0
WT-3851 Optimize wt_compare* routines with NEON instructions for ARM platform
WT-4037 WT_REF structures freed while still in use
Diffstat (limited to 'src/third_party/wiredtiger/src')
-rw-r--r-- | src/third_party/wiredtiger/src/btree/bt_split.c | 106 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/btree_cmp.i | 38 |
2 files changed, 90 insertions, 54 deletions
diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c index d58851a2a23..381c0ee8a9b 100644 --- a/src/third_party/wiredtiger/src/btree/bt_split.c +++ b/src/third_party/wiredtiger/src/btree/bt_split.c @@ -326,17 +326,49 @@ err: if (addr != NULL) { } /* + * __split_ref_final -- + * Finalize the WT_REF move. + */ +static void +__split_ref_final(WT_SESSION_IMPL *session, WT_PAGE ***lockedp) +{ + WT_PAGE **locked; + size_t i; + + /* The parent page's page index has been updated. */ + WT_WRITE_BARRIER(); + + if ((locked = *lockedp) == NULL) + return; + *lockedp = NULL; + + /* + * The moved child pages are locked to prevent them from splitting + * before the parent move completes, unlock them as the final step. + */ + for (i = 0; locked[i] != NULL; ++i) + WT_PAGE_UNLOCK(session, locked[i]); + __wt_free(session, locked); +} + +/* * __split_ref_prepare -- * Prepare a set of WT_REFs for a move. */ -static void -__split_ref_prepare( - WT_SESSION_IMPL *session, WT_PAGE_INDEX *pindex, bool skip_first) +static int +__split_ref_prepare(WT_SESSION_IMPL *session, + WT_PAGE_INDEX *pindex, WT_PAGE ***lockedp, bool skip_first) { - WT_PAGE *child; + WT_DECL_RET; + WT_PAGE *child, **locked; WT_REF *child_ref, *ref; + size_t alloc, cnt; uint32_t i, j; + *lockedp = NULL; + + locked = NULL; + /* The newly created subtree is complete. */ WT_WRITE_BARRIER(); @@ -349,27 +381,43 @@ __split_ref_prepare( * page won't yet know about the created children pages. That's OK, we * spin there until the parent's page index is updated. * - * Lock the newly created page to ensure it doesn't split until all - * child pages have been updated. - */ + * Lock the newly created page to ensure none of its children can split. + * First, to ensure all of the child pages are updated before any pages + * can split. Second, to ensure the original split completes before any + * of the children can split. The latter involves split generations: + * the original split page has references to these children. If they + * split immediately, they could free WT_REF structures based on split + * generations earlier than the split generation we'll eventually choose + * to protect the original split page's previous page index. + */ + alloc = cnt = 0; for (i = skip_first ? 1 : 0; i < pindex->entries; ++i) { ref = pindex->index[i]; child = ref->page; + WT_PAGE_LOCK(session, child); + + /* Track the locked pages for cleanup. */ + WT_ERR(__wt_realloc_def(session, &alloc, cnt + 2, &locked)); + locked[cnt++] = child; + /* Switch the WT_REF's to their new page. */ j = 0; - WT_PAGE_LOCK(session, child); WT_INTL_FOREACH_BEGIN(session, child, child_ref) { child_ref->home = child; child_ref->pindex_hint = j++; } WT_INTL_FOREACH_END; - WT_PAGE_UNLOCK(session, child); #ifdef HAVE_DIAGNOSTIC WT_WITH_PAGE_INDEX(session, __split_verify_intl_key_order(session, child)); #endif } + *lockedp = locked; + return (0); + +err: __split_ref_final(session, &locked); + return (ret); } /* @@ -381,10 +429,9 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root) { WT_BTREE *btree; WT_DECL_RET; - WT_PAGE *child; + WT_PAGE *child, **locked; WT_PAGE_INDEX *alloc_index, *child_pindex, *pindex; - WT_REF **alloc_refp; - WT_REF **child_refp, *ref, **root_refp; + WT_REF **alloc_refp, **child_refp, *ref, **root_refp; WT_SPLIT_ERROR_PHASE complete; size_t child_incr, root_decr, root_incr, size; uint64_t split_gen; @@ -399,6 +446,7 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root) btree = S2BT(session); alloc_index = NULL; + locked = NULL; root_decr = root_incr = 0; complete = WT_ERR_RETURN; @@ -454,10 +502,9 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root) * thread might see a freed WT_REF. To ensure that doesn't happen, the * created pages are set to the current split generation and so can't be * evicted until all readers have left the old generation. - * - * Our thread has a stable split generation, get a copy. */ - split_gen = __wt_session_gen(session, WT_GEN_SPLIT); + split_gen = __wt_gen_next(session, WT_GEN_SPLIT); + WT_ASSERT(session, root->pg_intl_split_gen < split_gen); /* Allocate child pages, and connect them into the new page index. */ for (root_refp = pindex->index, @@ -519,7 +566,7 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root) complete = WT_ERR_PANIC; /* Prepare the WT_REFs for the move. */ - __split_ref_prepare(session, alloc_index, false); + WT_ERR(__split_ref_prepare(session, alloc_index, &locked, false)); /* Encourage a race */ __page_split_timing_stress(session, @@ -552,6 +599,9 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root) split_gen = __wt_gen_next(session, WT_GEN_SPLIT); root->pg_intl_split_gen = split_gen; + /* Finalize the WT_REF move. */ + __split_ref_final(session, &locked); + #ifdef HAVE_DIAGNOSTIC WT_WITH_PAGE_INDEX(session, ret = __split_verify_root(session, root)); @@ -579,7 +629,9 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root) __wt_cache_page_inmem_incr(session, root, root_incr); __wt_cache_page_inmem_decr(session, root, root_decr); -err: switch (complete) { +err: __split_ref_final(session, &locked); + + switch (complete) { case WT_ERR_RETURN: __wt_free_ref_index(session, root, alloc_index, true); break; @@ -915,10 +967,9 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page) { WT_BTREE *btree; WT_DECL_RET; - WT_PAGE *child; + WT_PAGE *child, **locked; WT_PAGE_INDEX *alloc_index, *child_pindex, *pindex, *replace_index; - WT_REF **alloc_refp; - WT_REF **child_refp, *page_ref, **page_refp, *ref; + WT_REF **alloc_refp, **child_refp, *page_ref, **page_refp, *ref; WT_SPLIT_ERROR_PHASE complete; size_t child_incr, page_decr, page_incr, parent_incr, size; uint64_t split_gen; @@ -936,6 +987,7 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page) btree = S2BT(session); alloc_index = replace_index = NULL; page_ref = page->pg_intl_parent_ref; + locked = NULL; page_decr = page_incr = parent_incr = 0; complete = WT_ERR_RETURN; @@ -1009,10 +1061,9 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page) * thread might see a freed WT_REF. To ensure that doesn't happen, the * created pages are set to the current split generation and so can't be * evicted until all readers have left the old generation. - * - * Our thread has a stable split generation, get a copy. */ - split_gen = __wt_session_gen(session, WT_GEN_SPLIT); + split_gen = __wt_gen_next(session, WT_GEN_SPLIT); + WT_ASSERT(session, page->pg_intl_split_gen < split_gen); /* Allocate child pages, and connect them into the new page index. */ WT_ASSERT(session, page_refp == pindex->index + chunk); @@ -1074,7 +1125,7 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page) complete = WT_ERR_PANIC; /* Prepare the WT_REFs for the move. */ - __split_ref_prepare(session, alloc_index, true); + WT_ERR(__split_ref_prepare(session, alloc_index, &locked, true)); /* Encourage a race */ __page_split_timing_stress(session, @@ -1106,6 +1157,9 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page) split_gen = __wt_gen_next(session, WT_GEN_SPLIT); page->pg_intl_split_gen = split_gen; + /* Finalize the WT_REF move. */ + __split_ref_final(session, &locked); + #ifdef HAVE_DIAGNOSTIC WT_WITH_PAGE_INDEX(session, __split_verify_intl_key_order(session, parent)); @@ -1141,7 +1195,9 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page) __wt_cache_page_inmem_incr(session, page, page_incr); __wt_cache_page_inmem_decr(session, page, page_decr); -err: switch (complete) { +err: __split_ref_final(session, &locked); + + switch (complete) { case WT_ERR_RETURN: __wt_free_ref_index(session, page, alloc_index, true); __wt_free_ref_index(session, page, replace_index, false); diff --git a/src/third_party/wiredtiger/src/include/btree_cmp.i b/src/third_party/wiredtiger/src/include/btree_cmp.i index f8679933210..da0f527dfc9 100644 --- a/src/third_party/wiredtiger/src/include/btree_cmp.i +++ b/src/third_party/wiredtiger/src/include/btree_cmp.i @@ -11,33 +11,13 @@ #include <x86intrin.h> #endif #endif - /* 16B alignment */ -#define WT_ALIGNED_16(p) (((uintptr_t)(p) & 0x0f) == 0) -#define WT_VECTOR_SIZE 16 /* chunk size */ #if defined(HAVE_ARM_NEON_INTRIN_H) #include <arm_neon.h> -/* - * _mm_movemask_epi8_neon -- - * Creates a 16-bit mask from the most significant bits of the 16 signed - * or unsigned 8-bit integers. - */ -static inline uint16_t -_mm_movemask_epi8_neon(const uint8x16_t data) -{ - uint64x1_t p; - p = vset_lane_u64(0x8040201008040201, p, 0); - uint8x16_t powers = vcombine_u8(p, p); - uint8x16_t zero8x16 = vdupq_n_s8(0); - int8x16_t input = vcltq_s8((int8x16_t)data, (int8x16_t)zero8x16); - uint64x2_t mask = vpaddlq_u32( - vpaddlq_u16(vpaddlq_u8(vandq_u8((uint8x16_t)input, powers)))); - uint16_t output; - output = - ((vgetq_lane_u8(mask, 8) << 8) | (vgetq_lane_u8(mask, 0) << 0)); - return (output); -} #endif + /* 16B alignment */ +#define WT_ALIGNED_16(p) (((uintptr_t)(p) & 0x0f) == 0) +#define WT_VECTOR_SIZE 16 /* chunk size */ /* * __wt_lex_compare -- @@ -102,12 +82,12 @@ __wt_lex_compare(const WT_ITEM *user_item, const WT_ITEM *tree_item) remain = len % WT_VECTOR_SIZE; len -= remain; for (; len > 0; - len -= WT_VECTOR_SIZE, - userp += WT_VECTOR_SIZE, treep += WT_VECTOR_SIZE) { + len -= WT_VECTOR_SIZE, + userp += WT_VECTOR_SIZE, treep += WT_VECTOR_SIZE) { u = vld1q_u8(userp); t = vld1q_u8(treep); res_eq = vceqq_u8(u, t); - if (_mm_movemask_epi8_neon(res_eq) != 65535) + if (vminvq_u8(res_eq) != 255) break; } len += remain; @@ -209,13 +189,13 @@ __wt_lex_compare_skip( len -= remain; if (WT_ALIGNED_16(userp) && WT_ALIGNED_16(treep)) for (; len > 0; - len -= WT_VECTOR_SIZE, - userp += WT_VECTOR_SIZE, treep += WT_VECTOR_SIZE, + len -= WT_VECTOR_SIZE, + userp += WT_VECTOR_SIZE, treep += WT_VECTOR_SIZE, *matchp += WT_VECTOR_SIZE) { u = vld1q_u8(userp); t = vld1q_u8(treep); res_eq = vceqq_u8(u, t); - if (_mm_movemask_epi8_neon(res_eq) != 65535) + if (vminvq_u8(res_eq) != 255) break; } len += remain; |