diff options
author | Alex Gorrod <alexander.gorrod@mongodb.com> | 2016-12-12 12:23:13 +1100 |
---|---|---|
committer | Alex Gorrod <alexander.gorrod@mongodb.com> | 2016-12-12 12:23:13 +1100 |
commit | 21a6f07d859c132154166bd3d83bbed238d5d719 (patch) | |
tree | bc261840853dda4307c68fd1c889caf4c89dd3d3 /src/third_party/wiredtiger/src/btree/bt_walk.c | |
parent | 7cf929f25638e4ad9525775c8ea0e18f3c86faf5 (diff) | |
download | mongo-21a6f07d859c132154166bd3d83bbed238d5d719.tar.gz |
Import wiredtiger: 1b6c815a3fd34f14c20d5cd627155799d1de535c from branch mongodb-3.6
ref: ca6eee06ff..1b6c815a3f
for: 3.5.1
WT-2336 Add a test validating schema operations via file system call monitoring
WT-2670 Add option to configure read-ahead per table and change default behavior
WT-2960 Inserting multi-megabyte values can cause pathological lookaside usage
WT-2969 Fix a bug that could cause snapshot corruption during compaction
WT-3014 Add GCC/clang support for ELF symbol visibility.
WT-3021 Fixes needed for Java log cursor example, Java raw mode cursors, log cursors in raw mode
WT-3025 fix error path in log_force_sync
WT-3028 Workloads with all dirty pages could trigger diagnostic stuck check
WT-3030 Test failure indicating invalid key order during traversal
WT-3034 Add support for single-writer named snapshots.
WT-3037 Fix some outdated comments in logging
WT-3048 WiredTiger maximum size warning uses the wrong format.
WT-3051 Remove external __wt_hex symbol.
WT-3052 Improve search if an index hint is wrong
WT-3053 Review Python and Java calls to internal WiredTiger functions
WT-3054 Java PackTest, PackTest03 do not compile
WT-3055 Java AsyncTest faults
WT-3057 WiredTiger hazard pointers should use the WT_REF, not the WT_PAGE.
WT-3064 minor tree cleanups: .gitignore, NEWS misspelling
Diffstat (limited to 'src/third_party/wiredtiger/src/btree/bt_walk.c')
-rw-r--r-- | src/third_party/wiredtiger/src/btree/bt_walk.c | 98 |
1 files changed, 52 insertions, 46 deletions
diff --git a/src/third_party/wiredtiger/src/btree/bt_walk.c b/src/third_party/wiredtiger/src/btree/bt_walk.c index fb0d2296823..049700952ee 100644 --- a/src/third_party/wiredtiger/src/btree/bt_walk.c +++ b/src/third_party/wiredtiger/src/btree/bt_walk.c @@ -17,54 +17,60 @@ __ref_index_slot(WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE_INDEX **pindexp, uint32_t *slotp) { WT_PAGE_INDEX *pindex; - uint32_t i; + WT_REF **start, **stop, **p, **t; + uint32_t entries, slot; - /* - * Copy the parent page's index value: the page can split at any time, - * but the index's value is always valid, even if it's not up-to-date. - */ -retry: WT_INTL_INDEX_GET(session, ref->home, pindex); + for (;;) { + /* + * Copy the parent page's index value: the page can split at + * any time, but the index's value is always valid, even if + * it's not up-to-date. + */ + WT_INTL_INDEX_GET(session, ref->home, pindex); + entries = pindex->entries; - /* - * Use the page's reference hint: it should be correct unless the page - * split before our slot. If the page splits after our slot, the hint - * will point earlier in the array than our actual slot, so the first - * loop is from the hint to the end of the list, and the second loop - * is from the start of the list to the end of the list. (The second - * loop overlaps the first, but that only happen in cases where we've - * split the tree and aren't going to find our slot at all, that's not - * worth optimizing.) - * - * It's not an error for the reference hint to be wrong, it just means - * the first retrieval (which sets the hint for subsequent retrievals), - * is slower. - */ - i = ref->pindex_hint; - if (i < pindex->entries && pindex->index[i] == ref) { - *pindexp = pindex; - *slotp = i; - return; - } - while (++i < pindex->entries) - if (pindex->index[i] == ref) { - *pindexp = pindex; - *slotp = ref->pindex_hint = i; - return; - } - for (i = 0; i < pindex->entries; ++i) - if (pindex->index[i] == ref) { - *pindexp = pindex; - *slotp = ref->pindex_hint = i; - return; + /* + * Use the page's reference hint: it should be correct unless + * there was a split or delete in the parent before our slot. + * If the hint is wrong, it can be either too big or too small, + * but often only by a small amount. Search up and down the + * index starting from the hint. + * + * It's not an error for the reference hint to be wrong, it + * just means the first retrieval (which sets the hint for + * subsequent retrievals), is slower. + */ + slot = ref->pindex_hint; + if (slot >= entries) + slot = entries - 1; + if (pindex->index[slot] == ref) + goto found; + for (start = &pindex->index[0], + stop = &pindex->index[entries - 1], + p = t = &pindex->index[slot]; + p > start || t < stop;) { + if (p > start && *--p == ref) { + slot = (uint32_t)(p - start); + goto found; + } + if (t < stop && *++t == ref) { + slot = (uint32_t)(t - start); + goto found; + } } - /* - * If we don't find our reference, the page split and our home pointer - * references the wrong page. When internal pages split, their WT_REF - * structure home values are updated; yield and wait for that to happen. - */ - __wt_yield(); - goto retry; + /* + * If we don't find our reference, the page split and our home + * pointer references the wrong page. When internal pages + * split, their WT_REF structure home values are updated; yield + * and wait for that to happen. + */ + __wt_yield(); + } + +found: WT_ASSERT(session, pindex->index[slot] == ref); + *pindexp = pindex; + *slotp = slot; } /* @@ -431,8 +437,8 @@ restart: /* /* * Move to the next slot, and set the reference hint if * it's wrong (used when we continue the walk). We don't - * update those hints when splitting, so it's common for - * them to be incorrect in some workloads. + * always update the hints when splitting, it's expected + * for them to be incorrect in some workloads. */ ref = pindex->index[slot]; if (ref->pindex_hint != slot) |