From d845b75e5f0837f801bdf371babd985308a1ad80 Mon Sep 17 00:00:00 2001 From: Ramon Fernandez Date: Thu, 7 Jan 2016 16:31:22 -0500 Subject: Import wiredtiger-wiredtiger-2.7.0-269-g44463c5.tar.gz from wiredtiger branch mongodb-3.4 ref: 3c2ad56..44463c5 SERVER-21833 Compact does not release space to the system with WiredTiger WT-2060 Simplify aggregation of statistics WT-2099 Seeing memory underflow messages WT-2113 truncate01 sometimes fails WT-2177 Add a per-thread seed to random number generator WT-2198 bulk load and column store appends WT-2231 pinned page cursor searches could check parent keys WT-2235 wt printlog option without unicode WT-2245 WTPERF Truncate has no ability to catch up when it falls behind WT-2246 column-store append searches the leaf page; the maximum record number fails CRUD operations WT-2256 WTPERFs throttle option fires in bursts WT-2257 wtperf doesn't handle overriding workload config WT-2259 __wt_evict_file_exclusive_on() should clear WT_BTREE_NO_EVICTION on error WT-2260 Workloads evict internal pages unexpectedly WT-2262 Random sampling is skewed by tree shape WT-2265 Wiredtiger related change in ppc64le specific code block in gcc.h WT-2266 Add wtperf config to set if perf thresholds are fatal WT-2269 wtperf should dump its config everytime it runs WT-2272 Stress test assertion in the sweep server WT-2275 broken DB after application crash WT-2276 tool to decode checkpoint addr WT-2277 Remove WT check against big-endian systems WT-2279 Define WT_PAUSE(), WT_FULL_BARRIER(), etc when s390x is defined WT-2281 wtperf smoke.sh fails on ppc64le WT-2282 error in wt_txn_update_oldest verbose message test WT-2283 retry in txn_update_oldest results in a hang WT-2285 configure should set BUFFER_ALIGNMENT_DEFAULT to 4kb on linux WT-2289 failure in fast key check WT-2290 WT_SESSION.compact could be more effective. WT-2291 Random cursor walk inefficient in skip list only trees WT-2297 Fix off-by-one error in Huffman config file parsing WT-2299 upper-level WiredTiger code is reaching into the block manager WT-2301 Add reading a range to wtperf WT-2303 Build warning in wtperf WT-2304 wtperf crash dumping config WT-2307 Internal page splits can corrupt cursor iteration WT-2311 Support Sparc --- src/third_party/wiredtiger/src/btree/col_srch.c | 117 +++++++++++++++++++----- 1 file changed, 95 insertions(+), 22 deletions(-) (limited to 'src/third_party/wiredtiger/src/btree/col_srch.c') diff --git a/src/third_party/wiredtiger/src/btree/col_srch.c b/src/third_party/wiredtiger/src/btree/col_srch.c index e9fa570f97b..c5e2abbe440 100644 --- a/src/third_party/wiredtiger/src/btree/col_srch.c +++ b/src/third_party/wiredtiger/src/btree/col_srch.c @@ -8,13 +8,61 @@ #include "wt_internal.h" +/* + * __check_leaf_key_range -- + * Check the search key is in the leaf page's key range. + */ +static inline int +__check_leaf_key_range(WT_SESSION_IMPL *session, + uint64_t recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt) +{ + WT_PAGE_INDEX *pindex; + uint32_t indx; + + /* + * There are reasons we can't do the fast checks, and we continue with + * the leaf page search in those cases, only skipping the complete leaf + * page search if we know it's not going to work. + */ + cbt->compare = 0; + + /* + * Check if the search key is smaller than the parent's starting key for + * this page. + */ + if (recno < leaf->key.recno) { + cbt->compare = 1; /* page keys > search key */ + return (0); + } + + /* + * Check if the search key is greater than or equal to the starting key + * for the parent's next page. + * + * !!! + * Check that "indx + 1" is a valid page-index entry first, because it + * also checks that "indx" is a valid page-index entry, and we have to + * do that latter check before looking at the indx slot of the array + * for a match to leaf (in other words, our page hint might be wrong). + */ + WT_INTL_INDEX_GET(session, leaf->home, pindex); + indx = leaf->pindex_hint; + if (indx + 1 < pindex->entries && pindex->index[indx] == leaf) + if (recno >= pindex->index[indx + 1]->key.recno) { + cbt->compare = -1; /* page keys < search key */ + return (0); + } + + return (0); +} + /* * __wt_col_search -- * Search a column-store tree for a specific record-based key. */ int __wt_col_search(WT_SESSION_IMPL *session, - uint64_t recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt) + uint64_t search_recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt) { WT_BTREE *btree; WT_COL *cip; @@ -24,6 +72,7 @@ __wt_col_search(WT_SESSION_IMPL *session, WT_PAGE *page; WT_PAGE_INDEX *pindex, *parent_pindex; WT_REF *current, *descent; + uint64_t recno; uint32_t base, indx, limit; int depth; @@ -31,8 +80,38 @@ __wt_col_search(WT_SESSION_IMPL *session, __cursor_pos_clear(cbt); - /* We may only be searching a single leaf page, not the full tree. */ + /* + * When appending a new record, the search record number will be an + * out-of-band value, search for the largest key in the table instead. + */ + if ((recno = search_recno) == WT_RECNO_OOB) + recno = UINT64_MAX; + + /* + * We may be searching only a single leaf page, not the full tree. In + * the normal case where the page links to a parent, check the page's + * parent keys before doing the full search, it's faster when the + * cursor is being re-positioned. (One case where the page doesn't + * have a parent is if it is being re-instantiated in memory as part + * of a split). + */ if (leaf != NULL) { + WT_ASSERT(session, search_recno != WT_RECNO_OOB); + + if (leaf->home != NULL) { + WT_RET(__check_leaf_key_range( + session, recno, leaf, cbt)); + if (cbt->compare != 0) { + /* + * !!! + * WT_CURSOR.search_near uses the slot value to + * decide if there was an on-page match. + */ + cbt->slot = 0; + return (0); + } + } + current = leaf; goto leaf_only; } @@ -120,7 +199,17 @@ leaf_only: page = current->page; cbt->ref = current; cbt->recno = recno; - cbt->compare = 0; + + /* + * Don't bother searching if the caller is appending a new record where + * we'll allocate the record number; we're not going to find a match by + * definition, and we figure out the record number and position when we + * do the work. + */ + if (search_recno == WT_RECNO_OOB) { + cbt->compare = -1; + return (0); + } /* * Set the on-page slot to an impossible value larger than any possible @@ -142,6 +231,7 @@ leaf_only: * that's impossibly large for the page. We do have additional setup to * do in that case, the record may be appended to the page. */ + cbt->compare = 0; if (page->type == WT_PAGE_COL_FIX) { if (recno < page->pg_fix_recno) { cbt->compare = 1; @@ -190,18 +280,10 @@ past_end: * This is a rarely used path: we normally find exact matches, because * column-store files are dense, but in this case the caller searched * past the end of the table. - * - * Don't bother searching if the caller is appending a new record where - * we'll allocate the record number; we're not going to find a match by - * definition, and we figure out the position when we do the work. */ cbt->ins_head = WT_COL_APPEND(page); - if (recno == UINT64_MAX) - cbt->ins = NULL; - else - cbt->ins = __col_insert_search( - cbt->ins_head, cbt->ins_stack, cbt->next_stack, recno); - if (cbt->ins == NULL) + if ((cbt->ins = __col_insert_search( + cbt->ins_head, cbt->ins_stack, cbt->next_stack, recno)) == NULL) cbt->compare = -1; else { cbt->recno = WT_INSERT_RECNO(cbt->ins); @@ -212,14 +294,5 @@ past_end: else cbt->compare = -1; } - - /* - * Note if the record is past the maximum record in the tree, the cursor - * search functions need to know for fixed-length column-stores because - * appended records implicitly create any skipped records, and cursor - * search functions have to handle that case. - */ - if (cbt->compare == -1) - F_SET(cbt, WT_CBT_MAX_RECORD); return (0); } -- cgit v1.2.1