summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger/src/btree/col_srch.c
diff options
context:
space:
mode:
authorRamon Fernandez <ramon@mongodb.com>2016-01-07 16:31:22 -0500
committerRamon Fernandez <ramon@mongodb.com>2016-01-07 16:31:22 -0500
commitd845b75e5f0837f801bdf371babd985308a1ad80 (patch)
tree080cd08b3d8fd1a04ff1787667a2f7b7546e371e /src/third_party/wiredtiger/src/btree/col_srch.c
parent6b95d2ee131a2b8e40f69106ac3f46921269ab46 (diff)
downloadmongo-d845b75e5f0837f801bdf371babd985308a1ad80.tar.gz
Import wiredtiger-wiredtiger-2.7.0-269-g44463c5.tar.gz from wiredtiger branch mongodb-3.4
ref: 3c2ad56..44463c5 SERVER-21833 Compact does not release space to the system with WiredTiger WT-2060 Simplify aggregation of statistics WT-2099 Seeing memory underflow messages WT-2113 truncate01 sometimes fails WT-2177 Add a per-thread seed to random number generator WT-2198 bulk load and column store appends WT-2231 pinned page cursor searches could check parent keys WT-2235 wt printlog option without unicode WT-2245 WTPERF Truncate has no ability to catch up when it falls behind WT-2246 column-store append searches the leaf page; the maximum record number fails CRUD operations WT-2256 WTPERFs throttle option fires in bursts WT-2257 wtperf doesn't handle overriding workload config WT-2259 __wt_evict_file_exclusive_on() should clear WT_BTREE_NO_EVICTION on error WT-2260 Workloads evict internal pages unexpectedly WT-2262 Random sampling is skewed by tree shape WT-2265 Wiredtiger related change in ppc64le specific code block in gcc.h WT-2266 Add wtperf config to set if perf thresholds are fatal WT-2269 wtperf should dump its config everytime it runs WT-2272 Stress test assertion in the sweep server WT-2275 broken DB after application crash WT-2276 tool to decode checkpoint addr WT-2277 Remove WT check against big-endian systems WT-2279 Define WT_PAUSE(), WT_FULL_BARRIER(), etc when s390x is defined WT-2281 wtperf smoke.sh fails on ppc64le WT-2282 error in wt_txn_update_oldest verbose message test WT-2283 retry in txn_update_oldest results in a hang WT-2285 configure should set BUFFER_ALIGNMENT_DEFAULT to 4kb on linux WT-2289 failure in fast key check WT-2290 WT_SESSION.compact could be more effective. WT-2291 Random cursor walk inefficient in skip list only trees WT-2297 Fix off-by-one error in Huffman config file parsing WT-2299 upper-level WiredTiger code is reaching into the block manager WT-2301 Add reading a range to wtperf WT-2303 Build warning in wtperf WT-2304 wtperf crash dumping config WT-2307 Internal page splits can corrupt cursor iteration WT-2311 Support Sparc
Diffstat (limited to 'src/third_party/wiredtiger/src/btree/col_srch.c')
-rw-r--r--src/third_party/wiredtiger/src/btree/col_srch.c117
1 files changed, 95 insertions, 22 deletions
diff --git a/src/third_party/wiredtiger/src/btree/col_srch.c b/src/third_party/wiredtiger/src/btree/col_srch.c
index e9fa570f97b..c5e2abbe440 100644
--- a/src/third_party/wiredtiger/src/btree/col_srch.c
+++ b/src/third_party/wiredtiger/src/btree/col_srch.c
@@ -9,12 +9,60 @@
#include "wt_internal.h"
/*
+ * __check_leaf_key_range --
+ * Check the search key is in the leaf page's key range.
+ */
+static inline int
+__check_leaf_key_range(WT_SESSION_IMPL *session,
+ uint64_t recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt)
+{
+ WT_PAGE_INDEX *pindex;
+ uint32_t indx;
+
+ /*
+ * There are reasons we can't do the fast checks, and we continue with
+ * the leaf page search in those cases, only skipping the complete leaf
+ * page search if we know it's not going to work.
+ */
+ cbt->compare = 0;
+
+ /*
+ * Check if the search key is smaller than the parent's starting key for
+ * this page.
+ */
+ if (recno < leaf->key.recno) {
+ cbt->compare = 1; /* page keys > search key */
+ return (0);
+ }
+
+ /*
+ * Check if the search key is greater than or equal to the starting key
+ * for the parent's next page.
+ *
+ * !!!
+ * Check that "indx + 1" is a valid page-index entry first, because it
+ * also checks that "indx" is a valid page-index entry, and we have to
+ * do that latter check before looking at the indx slot of the array
+ * for a match to leaf (in other words, our page hint might be wrong).
+ */
+ WT_INTL_INDEX_GET(session, leaf->home, pindex);
+ indx = leaf->pindex_hint;
+ if (indx + 1 < pindex->entries && pindex->index[indx] == leaf)
+ if (recno >= pindex->index[indx + 1]->key.recno) {
+ cbt->compare = -1; /* page keys < search key */
+ return (0);
+ }
+
+ return (0);
+}
+
+/*
* __wt_col_search --
* Search a column-store tree for a specific record-based key.
*/
int
__wt_col_search(WT_SESSION_IMPL *session,
- uint64_t recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt)
+ uint64_t search_recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt)
{
WT_BTREE *btree;
WT_COL *cip;
@@ -24,6 +72,7 @@ __wt_col_search(WT_SESSION_IMPL *session,
WT_PAGE *page;
WT_PAGE_INDEX *pindex, *parent_pindex;
WT_REF *current, *descent;
+ uint64_t recno;
uint32_t base, indx, limit;
int depth;
@@ -31,8 +80,38 @@ __wt_col_search(WT_SESSION_IMPL *session,
__cursor_pos_clear(cbt);
- /* We may only be searching a single leaf page, not the full tree. */
+ /*
+ * When appending a new record, the search record number will be an
+ * out-of-band value, search for the largest key in the table instead.
+ */
+ if ((recno = search_recno) == WT_RECNO_OOB)
+ recno = UINT64_MAX;
+
+ /*
+ * We may be searching only a single leaf page, not the full tree. In
+ * the normal case where the page links to a parent, check the page's
+ * parent keys before doing the full search, it's faster when the
+ * cursor is being re-positioned. (One case where the page doesn't
+ * have a parent is if it is being re-instantiated in memory as part
+ * of a split).
+ */
if (leaf != NULL) {
+ WT_ASSERT(session, search_recno != WT_RECNO_OOB);
+
+ if (leaf->home != NULL) {
+ WT_RET(__check_leaf_key_range(
+ session, recno, leaf, cbt));
+ if (cbt->compare != 0) {
+ /*
+ * !!!
+ * WT_CURSOR.search_near uses the slot value to
+ * decide if there was an on-page match.
+ */
+ cbt->slot = 0;
+ return (0);
+ }
+ }
+
current = leaf;
goto leaf_only;
}
@@ -120,7 +199,17 @@ leaf_only:
page = current->page;
cbt->ref = current;
cbt->recno = recno;
- cbt->compare = 0;
+
+ /*
+ * Don't bother searching if the caller is appending a new record where
+ * we'll allocate the record number; we're not going to find a match by
+ * definition, and we figure out the record number and position when we
+ * do the work.
+ */
+ if (search_recno == WT_RECNO_OOB) {
+ cbt->compare = -1;
+ return (0);
+ }
/*
* Set the on-page slot to an impossible value larger than any possible
@@ -142,6 +231,7 @@ leaf_only:
* that's impossibly large for the page. We do have additional setup to
* do in that case, the record may be appended to the page.
*/
+ cbt->compare = 0;
if (page->type == WT_PAGE_COL_FIX) {
if (recno < page->pg_fix_recno) {
cbt->compare = 1;
@@ -190,18 +280,10 @@ past_end:
* This is a rarely used path: we normally find exact matches, because
* column-store files are dense, but in this case the caller searched
* past the end of the table.
- *
- * Don't bother searching if the caller is appending a new record where
- * we'll allocate the record number; we're not going to find a match by
- * definition, and we figure out the position when we do the work.
*/
cbt->ins_head = WT_COL_APPEND(page);
- if (recno == UINT64_MAX)
- cbt->ins = NULL;
- else
- cbt->ins = __col_insert_search(
- cbt->ins_head, cbt->ins_stack, cbt->next_stack, recno);
- if (cbt->ins == NULL)
+ if ((cbt->ins = __col_insert_search(
+ cbt->ins_head, cbt->ins_stack, cbt->next_stack, recno)) == NULL)
cbt->compare = -1;
else {
cbt->recno = WT_INSERT_RECNO(cbt->ins);
@@ -212,14 +294,5 @@ past_end:
else
cbt->compare = -1;
}
-
- /*
- * Note if the record is past the maximum record in the tree, the cursor
- * search functions need to know for fixed-length column-stores because
- * appended records implicitly create any skipped records, and cursor
- * search functions have to handle that case.
- */
- if (cbt->compare == -1)
- F_SET(cbt, WT_CBT_MAX_RECORD);
return (0);
}