diff options
author | Ramon Fernandez <ramon@mongodb.com> | 2016-01-07 16:31:22 -0500 |
---|---|---|
committer | Ramon Fernandez <ramon@mongodb.com> | 2016-01-07 16:31:22 -0500 |
commit | d845b75e5f0837f801bdf371babd985308a1ad80 (patch) | |
tree | 080cd08b3d8fd1a04ff1787667a2f7b7546e371e /src/third_party/wiredtiger/src/btree/col_srch.c | |
parent | 6b95d2ee131a2b8e40f69106ac3f46921269ab46 (diff) | |
download | mongo-d845b75e5f0837f801bdf371babd985308a1ad80.tar.gz |
Import wiredtiger-wiredtiger-2.7.0-269-g44463c5.tar.gz from wiredtiger branch mongodb-3.4
ref: 3c2ad56..44463c5
SERVER-21833 Compact does not release space to the system with WiredTiger
WT-2060 Simplify aggregation of statistics
WT-2099 Seeing memory underflow messages
WT-2113 truncate01 sometimes fails
WT-2177 Add a per-thread seed to random number generator
WT-2198 bulk load and column store appends
WT-2231 pinned page cursor searches could check parent keys
WT-2235 wt printlog option without unicode
WT-2245 WTPERF Truncate has no ability to catch up when it falls behind
WT-2246 column-store append searches the leaf page; the maximum record number fails CRUD operations
WT-2256 WTPERFs throttle option fires in bursts
WT-2257 wtperf doesn't handle overriding workload config
WT-2259 __wt_evict_file_exclusive_on() should clear WT_BTREE_NO_EVICTION on error
WT-2260 Workloads evict internal pages unexpectedly
WT-2262 Random sampling is skewed by tree shape
WT-2265 Wiredtiger related change in ppc64le specific code block in gcc.h
WT-2266 Add wtperf config to set if perf thresholds are fatal
WT-2269 wtperf should dump its config everytime it runs
WT-2272 Stress test assertion in the sweep server
WT-2275 broken DB after application crash
WT-2276 tool to decode checkpoint addr
WT-2277 Remove WT check against big-endian systems
WT-2279 Define WT_PAUSE(), WT_FULL_BARRIER(), etc when s390x is defined
WT-2281 wtperf smoke.sh fails on ppc64le
WT-2282 error in wt_txn_update_oldest verbose message test
WT-2283 retry in txn_update_oldest results in a hang
WT-2285 configure should set BUFFER_ALIGNMENT_DEFAULT to 4kb on linux
WT-2289 failure in fast key check
WT-2290 WT_SESSION.compact could be more effective.
WT-2291 Random cursor walk inefficient in skip list only trees
WT-2297 Fix off-by-one error in Huffman config file parsing
WT-2299 upper-level WiredTiger code is reaching into the block manager
WT-2301 Add reading a range to wtperf
WT-2303 Build warning in wtperf
WT-2304 wtperf crash dumping config
WT-2307 Internal page splits can corrupt cursor iteration
WT-2311 Support Sparc
Diffstat (limited to 'src/third_party/wiredtiger/src/btree/col_srch.c')
-rw-r--r-- | src/third_party/wiredtiger/src/btree/col_srch.c | 117 |
1 files changed, 95 insertions, 22 deletions
diff --git a/src/third_party/wiredtiger/src/btree/col_srch.c b/src/third_party/wiredtiger/src/btree/col_srch.c index e9fa570f97b..c5e2abbe440 100644 --- a/src/third_party/wiredtiger/src/btree/col_srch.c +++ b/src/third_party/wiredtiger/src/btree/col_srch.c @@ -9,12 +9,60 @@ #include "wt_internal.h" /* + * __check_leaf_key_range -- + * Check the search key is in the leaf page's key range. + */ +static inline int +__check_leaf_key_range(WT_SESSION_IMPL *session, + uint64_t recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt) +{ + WT_PAGE_INDEX *pindex; + uint32_t indx; + + /* + * There are reasons we can't do the fast checks, and we continue with + * the leaf page search in those cases, only skipping the complete leaf + * page search if we know it's not going to work. + */ + cbt->compare = 0; + + /* + * Check if the search key is smaller than the parent's starting key for + * this page. + */ + if (recno < leaf->key.recno) { + cbt->compare = 1; /* page keys > search key */ + return (0); + } + + /* + * Check if the search key is greater than or equal to the starting key + * for the parent's next page. + * + * !!! + * Check that "indx + 1" is a valid page-index entry first, because it + * also checks that "indx" is a valid page-index entry, and we have to + * do that latter check before looking at the indx slot of the array + * for a match to leaf (in other words, our page hint might be wrong). + */ + WT_INTL_INDEX_GET(session, leaf->home, pindex); + indx = leaf->pindex_hint; + if (indx + 1 < pindex->entries && pindex->index[indx] == leaf) + if (recno >= pindex->index[indx + 1]->key.recno) { + cbt->compare = -1; /* page keys < search key */ + return (0); + } + + return (0); +} + +/* * __wt_col_search -- * Search a column-store tree for a specific record-based key. */ int __wt_col_search(WT_SESSION_IMPL *session, - uint64_t recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt) + uint64_t search_recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt) { WT_BTREE *btree; WT_COL *cip; @@ -24,6 +72,7 @@ __wt_col_search(WT_SESSION_IMPL *session, WT_PAGE *page; WT_PAGE_INDEX *pindex, *parent_pindex; WT_REF *current, *descent; + uint64_t recno; uint32_t base, indx, limit; int depth; @@ -31,8 +80,38 @@ __wt_col_search(WT_SESSION_IMPL *session, __cursor_pos_clear(cbt); - /* We may only be searching a single leaf page, not the full tree. */ + /* + * When appending a new record, the search record number will be an + * out-of-band value, search for the largest key in the table instead. + */ + if ((recno = search_recno) == WT_RECNO_OOB) + recno = UINT64_MAX; + + /* + * We may be searching only a single leaf page, not the full tree. In + * the normal case where the page links to a parent, check the page's + * parent keys before doing the full search, it's faster when the + * cursor is being re-positioned. (One case where the page doesn't + * have a parent is if it is being re-instantiated in memory as part + * of a split). + */ if (leaf != NULL) { + WT_ASSERT(session, search_recno != WT_RECNO_OOB); + + if (leaf->home != NULL) { + WT_RET(__check_leaf_key_range( + session, recno, leaf, cbt)); + if (cbt->compare != 0) { + /* + * !!! + * WT_CURSOR.search_near uses the slot value to + * decide if there was an on-page match. + */ + cbt->slot = 0; + return (0); + } + } + current = leaf; goto leaf_only; } @@ -120,7 +199,17 @@ leaf_only: page = current->page; cbt->ref = current; cbt->recno = recno; - cbt->compare = 0; + + /* + * Don't bother searching if the caller is appending a new record where + * we'll allocate the record number; we're not going to find a match by + * definition, and we figure out the record number and position when we + * do the work. + */ + if (search_recno == WT_RECNO_OOB) { + cbt->compare = -1; + return (0); + } /* * Set the on-page slot to an impossible value larger than any possible @@ -142,6 +231,7 @@ leaf_only: * that's impossibly large for the page. We do have additional setup to * do in that case, the record may be appended to the page. */ + cbt->compare = 0; if (page->type == WT_PAGE_COL_FIX) { if (recno < page->pg_fix_recno) { cbt->compare = 1; @@ -190,18 +280,10 @@ past_end: * This is a rarely used path: we normally find exact matches, because * column-store files are dense, but in this case the caller searched * past the end of the table. - * - * Don't bother searching if the caller is appending a new record where - * we'll allocate the record number; we're not going to find a match by - * definition, and we figure out the position when we do the work. */ cbt->ins_head = WT_COL_APPEND(page); - if (recno == UINT64_MAX) - cbt->ins = NULL; - else - cbt->ins = __col_insert_search( - cbt->ins_head, cbt->ins_stack, cbt->next_stack, recno); - if (cbt->ins == NULL) + if ((cbt->ins = __col_insert_search( + cbt->ins_head, cbt->ins_stack, cbt->next_stack, recno)) == NULL) cbt->compare = -1; else { cbt->recno = WT_INSERT_RECNO(cbt->ins); @@ -212,14 +294,5 @@ past_end: else cbt->compare = -1; } - - /* - * Note if the record is past the maximum record in the tree, the cursor - * search functions need to know for fixed-length column-stores because - * appended records implicitly create any skipped records, and cursor - * search functions have to handle that case. - */ - if (cbt->compare == -1) - F_SET(cbt, WT_CBT_MAX_RECORD); return (0); } |