summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger/src/btree
diff options
context:
space:
mode:
authorRamon Fernandez <ramon@mongodb.com>2016-05-05 07:32:30 -0400
committerRamon Fernandez <ramon@mongodb.com>2016-05-05 07:32:36 -0400
commit150aa821caa327529a0996793c55a6b2e74acaf8 (patch)
treebd116d3f08cad05243dcdbf90e4a45791e0da1b9 /src/third_party/wiredtiger/src/btree
parent0ae4fb568aa6472a6030fd83a86fb2965d472095 (diff)
downloadmongo-150aa821caa327529a0996793c55a6b2e74acaf8.tar.gz
Import wiredtiger-wiredtiger-2.8.0-410-g636a7b2.tar.gz from wiredtiger branch mongodb-3.4
ref: eaa7b5f..636a7b2 WT-2103 add incremental backup testing to format WT-2223 Add stress testing for in-memory WT-2343 Assert we don't remove or rename when backup cursor is open WT-2349 Add ability to open databases read-only WT-2359 WiredTiger with Python will hang if a calloc failure occurs during __wt_connection_close WT-2360 Allow disjunctions and combinations of operations in join cursors WT-2446 Estimate WT cache hit ratio WT-2450 salvage releases pages, then explicitly evicts them. WT-2453 Throughput drop in wtperf evict Jenkins tests WT-2479 dump utility discards table config (JSON) WT-2504 Should READONLY always read basecfg file? WT-2505 Review clang analyzer warnings WT-2508 test programs should remove test directories on the "clean" target WT-2518 LSM checkpoint handle acquisition optimization WT-2520 WT_SESSION::verify should not alter tables WT-2526 mixing and matching readonly and read/write handles WT-2535 Extend test/format to test for transactions reading their writes WT-2537 cannot open DB written by WT2.6.1 with WT2.8.0 due to WT_NOTFOUND on recovery WT-2539 Implement file streaming above pluggable filesystems WT-2540 Separate stream and file handle methods WT-2542 fixed-length column store reconciliation overwrites original values WT-2544 Investigate any thread populating eviction queue WT-2546 Eviction server not help evict pages sometimes WT-2547 Add 1-eviction-worker jobs to Jenkins WT-2548 Cap the amount of data handed to raw compression. WT-2549 joins using recno keys return no values WT-2550 java ex_schema example fails WT-2552 Public API for pluggable filesystems WT-2553 Document in-memory configuration and WT_CACHE_FULL error return WT-2556 typo in the Java example code WT-2557 format test program should discard log files after incremental backup WT-2558 WT_PAGE structure reorganization WT-2559 Jenkins Windows segfault in logging code WT-2560 test/format workload stuck trying to update oldest transaction ID WT-2562 reconfig02 test failing sometimes on PPC WT-2565 item 3573 on page at [write-check] is a corrupted cell WT-2566 All lock operations should be barriers WT-2567 segfault in test/format log truncate WT-2568 Java PackTest.java compilation error WT-2569 win_handle_read should always call GetLastError on error WT-2570 Minor lint cleanups. WT-2571 join code cleanup WT-2572 don't select an in-memory format run if incompatible options configured WT-2573 free of stack-allocated WT_REF WT-2574 format doesn't free all allocated configure memory WT-2576 variable-length column-store out-of-order return WT-2577 core dump discarding non-existent addresses WT-2579 in-memory configurations break debugging support WT-2580 potential SWIG naming conflict in Java WT-2581 assert multi->disk_image == NULL WT-2582 cache eviction server error: WT_RESTART WT-2583 incremental backup can prevent future recovery WT-2584 don't use periods in error messages WT-2586 Remove ex_config.c until config cursors are supported WT-2592 Joins using non-recno key types not working WT-2593 disk full with pre-allocated log files WT-2595 Fix compiler warning in packing tests WT-2598 in-memory FS needs fast lookup on file names WT-2599 split out the checksum code from the support directory WT-2600 clean up test program #includes WT-2602 LSM stress hangs with very large uncompressed pages WT-2609 Incorrect "skips API_END call" error. WT-2612 The dist/s_prototypes script is creating a debugging file xxx. WT-2613 WT Compile windows Alt is returning a C4100 error WT-2615 Enabling checkpoints in test/format leads to reduced concurrency WT-2616 In-memory deadlock getting size WT-2621 WiredTiger fails to compile on MSVC 2013 SERVER-23661 $sample takes disproportionately long time on newly created collection SERVER-23904 WiredTiger changes for MongoDB 3.3.6
Diffstat (limited to 'src/third_party/wiredtiger/src/btree')
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_curnext.c15
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_curprev.c18
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_cursor.c42
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_debug.c96
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_delete.c7
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_discard.c53
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_handle.c21
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_huffman.c25
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_page.c23
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_read.c8
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_rebalance.c5
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_ret.c2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_slvg.c45
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_split.c152
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_sync.c4
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_vrfy.c12
-rw-r--r--src/third_party/wiredtiger/src/btree/col_modify.c20
-rw-r--r--src/third_party/wiredtiger/src/btree/col_srch.c30
-rw-r--r--src/third_party/wiredtiger/src/btree/row_key.c6
-rw-r--r--src/third_party/wiredtiger/src/btree/row_modify.c13
-rw-r--r--src/third_party/wiredtiger/src/btree/row_srch.c13
21 files changed, 323 insertions, 287 deletions
diff --git a/src/third_party/wiredtiger/src/btree/bt_curnext.c b/src/third_party/wiredtiger/src/btree/bt_curnext.c
index 63b2e2abebc..70b3ba56e31 100644
--- a/src/third_party/wiredtiger/src/btree/bt_curnext.c
+++ b/src/third_party/wiredtiger/src/btree/bt_curnext.c
@@ -86,10 +86,10 @@ __cursor_fix_next(WT_CURSOR_BTREE *cbt, bool newpage)
/* Initialize for each new page. */
if (newpage) {
- cbt->last_standard_recno = __col_fix_last_recno(page);
+ cbt->last_standard_recno = __col_fix_last_recno(cbt->ref);
if (cbt->last_standard_recno == 0)
return (WT_NOTFOUND);
- __cursor_set_recno(cbt, page->pg_fix_recno);
+ __cursor_set_recno(cbt, cbt->ref->ref_recno);
goto new_page;
}
@@ -107,7 +107,7 @@ new_page:
cbt->ins = NULL;
upd = cbt->ins == NULL ? NULL : __wt_txn_read(session, cbt->ins->upd);
if (upd == NULL) {
- cbt->v = __bit_getv_recno(page, cbt->recno, btree->bitcnt);
+ cbt->v = __bit_getv_recno(cbt->ref, cbt->recno, btree->bitcnt);
val->data = &cbt->v;
} else
val->data = WT_UPDATE_DATA(upd);
@@ -179,10 +179,10 @@ __cursor_var_next(WT_CURSOR_BTREE *cbt, bool newpage)
/* Initialize for each new page. */
if (newpage) {
- cbt->last_standard_recno = __col_var_last_recno(page);
+ cbt->last_standard_recno = __col_var_last_recno(cbt->ref);
if (cbt->last_standard_recno == 0)
return (WT_NOTFOUND);
- __cursor_set_recno(cbt, page->pg_var_recno);
+ __cursor_set_recno(cbt, cbt->ref->ref_recno);
goto new_page;
}
@@ -194,7 +194,7 @@ __cursor_var_next(WT_CURSOR_BTREE *cbt, bool newpage)
new_page: /* Find the matching WT_COL slot. */
if ((cip =
- __col_var_search(page, cbt->recno, &rle_start)) == NULL)
+ __col_var_search(cbt->ref, cbt->recno, &rle_start)) == NULL)
return (WT_NOTFOUND);
cbt->slot = WT_COL_SLOT(page, cip);
@@ -558,7 +558,8 @@ __wt_btcur_iterate_setup(WT_CURSOR_BTREE *cbt)
* page.
*/
cbt->last_standard_recno = page->type == WT_PAGE_COL_VAR ?
- __col_var_last_recno(page) : __col_fix_last_recno(page);
+ __col_var_last_recno(cbt->ref) :
+ __col_fix_last_recno(cbt->ref);
/* If we're traversing the append list, set the reference. */
if (cbt->ins_head != NULL &&
diff --git a/src/third_party/wiredtiger/src/btree/bt_curprev.c b/src/third_party/wiredtiger/src/btree/bt_curprev.c
index 7475c0f1312..872f648446c 100644
--- a/src/third_party/wiredtiger/src/btree/bt_curprev.c
+++ b/src/third_party/wiredtiger/src/btree/bt_curprev.c
@@ -128,12 +128,10 @@ static inline int
__cursor_fix_append_prev(WT_CURSOR_BTREE *cbt, bool newpage)
{
WT_ITEM *val;
- WT_PAGE *page;
WT_SESSION_IMPL *session;
WT_UPDATE *upd;
session = (WT_SESSION_IMPL *)cbt->iface.session;
- page = cbt->ref->page;
val = &cbt->iface.value;
if (newpage) {
@@ -176,8 +174,8 @@ __cursor_fix_append_prev(WT_CURSOR_BTREE *cbt, bool newpage)
* to a record number matching the first record on the page.
*/
if (cbt->ins == NULL &&
- (cbt->recno == page->pg_fix_recno ||
- __col_fix_last_recno(page) != 0))
+ (cbt->recno == cbt->ref->ref_recno ||
+ __col_fix_last_recno(cbt->ref) != 0))
return (WT_NOTFOUND);
}
@@ -234,7 +232,7 @@ __cursor_fix_prev(WT_CURSOR_BTREE *cbt, bool newpage)
/* Initialize for each new page. */
if (newpage) {
- cbt->last_standard_recno = __col_fix_last_recno(page);
+ cbt->last_standard_recno = __col_fix_last_recno(cbt->ref);
if (cbt->last_standard_recno == 0)
return (WT_NOTFOUND);
__cursor_set_recno(cbt, cbt->last_standard_recno);
@@ -242,7 +240,7 @@ __cursor_fix_prev(WT_CURSOR_BTREE *cbt, bool newpage)
}
/* Move to the previous entry and return the item. */
- if (cbt->recno == page->pg_fix_recno)
+ if (cbt->recno == cbt->ref->ref_recno)
return (WT_NOTFOUND);
__cursor_set_recno(cbt, cbt->recno - 1);
@@ -255,7 +253,7 @@ new_page:
cbt->ins = NULL;
upd = cbt->ins == NULL ? NULL : __wt_txn_read(session, cbt->ins->upd);
if (upd == NULL) {
- cbt->v = __bit_getv_recno(page, cbt->recno, btree->bitcnt);
+ cbt->v = __bit_getv_recno(cbt->ref, cbt->recno, btree->bitcnt);
val->data = &cbt->v;
} else
val->data = WT_UPDATE_DATA(upd);
@@ -327,7 +325,7 @@ __cursor_var_prev(WT_CURSOR_BTREE *cbt, bool newpage)
/* Initialize for each new page. */
if (newpage) {
- cbt->last_standard_recno = __col_var_last_recno(page);
+ cbt->last_standard_recno = __col_var_last_recno(cbt->ref);
if (cbt->last_standard_recno == 0)
return (WT_NOTFOUND);
__cursor_set_recno(cbt, cbt->last_standard_recno);
@@ -338,12 +336,12 @@ __cursor_var_prev(WT_CURSOR_BTREE *cbt, bool newpage)
for (;;) {
__cursor_set_recno(cbt, cbt->recno - 1);
-new_page: if (cbt->recno < page->pg_var_recno)
+new_page: if (cbt->recno < cbt->ref->ref_recno)
return (WT_NOTFOUND);
/* Find the matching WT_COL slot. */
if ((cip =
- __col_var_search(page, cbt->recno, &rle_start)) == NULL)
+ __col_var_search(cbt->ref, cbt->recno, &rle_start)) == NULL)
return (WT_NOTFOUND);
cbt->slot = WT_COL_SLOT(page, cip);
diff --git a/src/third_party/wiredtiger/src/btree/bt_cursor.c b/src/third_party/wiredtiger/src/btree/bt_cursor.c
index 1f3ac443495..4b73b76c8c8 100644
--- a/src/third_party/wiredtiger/src/btree/bt_cursor.c
+++ b/src/third_party/wiredtiger/src/btree/bt_cursor.c
@@ -164,12 +164,12 @@ __cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp)
* column-store pages don't have slots, but map one-to-one to
* keys, check for retrieval past the end of the page.
*/
- if (cbt->recno >= page->pg_fix_recno + page->pg_fix_entries)
+ if (cbt->recno >= cbt->ref->ref_recno + page->pg_fix_entries)
return (false);
/*
- * Updates aren't stored on the page, an update would have
- * appeared as an "insert" object; no further checks to do.
+ * An update would have appeared as an "insert" object; no
+ * further checks to do.
*/
break;
case BTREE_COL_VAR:
@@ -179,19 +179,18 @@ __cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp)
WT_ASSERT(session, cbt->slot < page->pg_var_entries);
/*
- * Column-store updates aren't stored on the page, instead they
- * are stored as "insert" objects. If search returned an insert
- * object we can't return, the returned on-page object must be
- * checked for a match.
+ * Column-store updates are stored as "insert" objects. If
+ * search returned an insert object we can't return, the
+ * returned on-page object must be checked for a match.
*/
if (cbt->ins != NULL && !F_ISSET(cbt, WT_CBT_VAR_ONPAGE_MATCH))
return (false);
/*
- * Updates aren't stored on the page, an update would have
- * appeared as an "insert" object; however, variable-length
- * column store deletes are written into the backing store,
- * check the cell for a record already deleted when read.
+ * Although updates would have appeared as an "insert" objects,
+ * variable-length column store deletes are written into the
+ * backing store; check the cell for a record already deleted
+ * when read.
*/
cip = &page->pg_var_d[cbt->slot];
if ((cell = WT_COL_PTR(page, cip)) == NULL ||
@@ -211,9 +210,11 @@ __cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp)
if (cbt->ins != NULL)
return (false);
- /* Updates are stored on the page, check for a delete. */
- if (page->pg_row_upd != NULL && (upd = __wt_txn_read(
- session, page->pg_row_upd[cbt->slot])) != NULL) {
+ /* Check for an update. */
+ if (page->modify != NULL &&
+ page->modify->mod_row_update != NULL &&
+ (upd = __wt_txn_read(session,
+ page->modify->mod_row_update[cbt->slot])) != NULL) {
if (WT_UPDATE_DELETED_ISSET(upd))
return (false);
if (updp != NULL)
@@ -325,7 +326,7 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt)
valid = false;
if (F_ISSET(cbt, WT_CBT_ACTIVE) &&
cbt->ref->page->read_gen != WT_READGEN_OLDEST) {
- __wt_txn_cursor_op(session);
+ WT_ERR(__wt_txn_cursor_op(session));
WT_ERR(btree->type == BTREE_ROW ?
__cursor_row_search(session, cbt, cbt->ref, false) :
@@ -405,7 +406,7 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp)
if (btree->type == BTREE_ROW &&
F_ISSET(cbt, WT_CBT_ACTIVE) &&
cbt->ref->page->read_gen != WT_READGEN_OLDEST) {
- __wt_txn_cursor_op(session);
+ WT_ERR(__wt_txn_cursor_op(session));
WT_ERR(__cursor_row_search(session, cbt, cbt->ref, true));
@@ -596,9 +597,12 @@ __curfile_update_check(WT_CURSOR_BTREE *cbt)
return (0);
if (cbt->ins != NULL)
return (__wt_txn_update_check(session, cbt->ins->upd));
- if (btree->type == BTREE_ROW && cbt->ref->page->pg_row_upd != NULL)
- return (__wt_txn_update_check(
- session, cbt->ref->page->pg_row_upd[cbt->slot]));
+
+ if (btree->type == BTREE_ROW &&
+ cbt->ref->page->modify != NULL &&
+ cbt->ref->page->modify->mod_row_update != NULL)
+ return (__wt_txn_update_check(session,
+ cbt->ref->page->modify->mod_row_update[cbt->slot]));
return (0);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c
index 8ce1463a0db..bd5970ecf86 100644
--- a/src/third_party/wiredtiger/src/btree/bt_debug.c
+++ b/src/third_party/wiredtiger/src/btree/bt_debug.c
@@ -19,7 +19,7 @@ typedef struct {
* When using the standard event handlers, the debugging output has to
* do its own message handling because its output isn't line-oriented.
*/
- WT_FH *fh; /* Output file stream */
+ FILE *fp;
WT_ITEM *msg; /* Buffered message */
WT_ITEM *tmp; /* Temporary space */
@@ -36,17 +36,17 @@ static int __debug_config(WT_SESSION_IMPL *, WT_DBG *, const char *);
static int __debug_dsk_cell(WT_DBG *, const WT_PAGE_HEADER *);
static void __debug_dsk_col_fix(WT_DBG *, const WT_PAGE_HEADER *);
static void __debug_item(WT_DBG *, const char *, const void *, size_t);
-static int __debug_page(WT_DBG *, WT_PAGE *, uint32_t);
-static void __debug_page_col_fix(WT_DBG *, WT_PAGE *);
+static int __debug_page(WT_DBG *, WT_REF *, uint32_t);
+static void __debug_page_col_fix(WT_DBG *, WT_REF *);
static int __debug_page_col_int(WT_DBG *, WT_PAGE *, uint32_t);
-static int __debug_page_col_var(WT_DBG *, WT_PAGE *);
-static int __debug_page_metadata(WT_DBG *, WT_PAGE *);
+static int __debug_page_col_var(WT_DBG *, WT_REF *);
+static int __debug_page_metadata(WT_DBG *, WT_REF *);
static int __debug_page_row_int(WT_DBG *, WT_PAGE *, uint32_t);
static int __debug_page_row_leaf(WT_DBG *, WT_PAGE *);
static void __debug_ref(WT_DBG *, WT_REF *);
static void __debug_row_skip(WT_DBG *, WT_INSERT_HEAD *);
static int __debug_tree(
- WT_SESSION_IMPL *, WT_BTREE *, WT_PAGE *, const char *, uint32_t);
+ WT_SESSION_IMPL *, WT_BTREE *, WT_REF *, const char *, uint32_t);
static void __debug_update(WT_DBG *, WT_UPDATE *, bool);
static void __dmsg(WT_DBG *, const char *, ...)
WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 2, 3)));
@@ -97,8 +97,11 @@ __debug_config(WT_SESSION_IMPL *session, WT_DBG *ds, const char *ofile)
if (ofile == NULL)
return (__wt_scr_alloc(session, 512, &ds->msg));
- return (__wt_open(session, ofile, WT_FILE_TYPE_REGULAR,
- WT_OPEN_CREATE | WT_STREAM_LINE_BUFFER | WT_STREAM_WRITE, &ds->fh));
+ if ((ds->fp = fopen(ofile, "w")) == NULL)
+ return (EIO);
+ __wt_stream_set_line_buffer(ds->fp);
+
+ return (0);
}
/*
@@ -127,7 +130,8 @@ __dmsg_wrapup(WT_DBG *ds)
}
/* Close any file we opened. */
- (void)__wt_close(session, &ds->fh);
+ if (ds->fp != NULL)
+ (void)fclose(ds->fp);
}
/*
@@ -152,7 +156,7 @@ __dmsg(WT_DBG *ds, const char *fmt, ...)
* the output chunk, and pass it to the event handler once we see a
* terminating newline.
*/
- if (ds->fh == NULL) {
+ if (ds->fp == NULL) {
msg = ds->msg;
for (;;) {
p = (char *)msg->mem + msg->size;
@@ -184,7 +188,7 @@ __dmsg(WT_DBG *ds, const char *fmt, ...)
}
} else {
va_start(ap, fmt);
- (void)__wt_vfprintf(session, ds->fh, fmt, ap);
+ (void)vfprintf(ds->fp, fmt, ap);
va_end(ap);
}
}
@@ -498,10 +502,10 @@ __wt_debug_tree_shape(
*/
int
__wt_debug_tree_all(
- WT_SESSION_IMPL *session, WT_BTREE *btree, WT_PAGE *page, const char *ofile)
+ WT_SESSION_IMPL *session, WT_BTREE *btree, WT_REF *ref, const char *ofile)
{
return (__debug_tree(session,
- btree, page, ofile, WT_DEBUG_TREE_LEAF | WT_DEBUG_TREE_WALK));
+ btree, ref, ofile, WT_DEBUG_TREE_LEAF | WT_DEBUG_TREE_WALK));
}
/*
@@ -513,9 +517,9 @@ __wt_debug_tree_all(
*/
int
__wt_debug_tree(
- WT_SESSION_IMPL *session, WT_BTREE *btree, WT_PAGE *page, const char *ofile)
+ WT_SESSION_IMPL *session, WT_BTREE *btree, WT_REF *ref, const char *ofile)
{
- return (__debug_tree(session, btree, page, ofile, WT_DEBUG_TREE_WALK));
+ return (__debug_tree(session, btree, ref, ofile, WT_DEBUG_TREE_WALK));
}
/*
@@ -523,7 +527,7 @@ __wt_debug_tree(
* Dump the in-memory information for a page.
*/
int
-__wt_debug_page(WT_SESSION_IMPL *session, WT_PAGE *page, const char *ofile)
+__wt_debug_page(WT_SESSION_IMPL *session, WT_REF *ref, const char *ofile)
{
WT_DBG *ds, _ds;
WT_DECL_RET;
@@ -533,7 +537,7 @@ __wt_debug_page(WT_SESSION_IMPL *session, WT_PAGE *page, const char *ofile)
ds = &_ds;
WT_RET(__debug_config(session, ds, ofile));
- ret = __debug_page(ds, page, WT_DEBUG_TREE_LEAF);
+ ret = __debug_page(ds, ref, WT_DEBUG_TREE_LEAF);
__dmsg_wrapup(ds);
@@ -549,9 +553,8 @@ __wt_debug_page(WT_SESSION_IMPL *session, WT_PAGE *page, const char *ofile)
* in this function
*/
static int
-__debug_tree(
- WT_SESSION_IMPL *session, WT_BTREE *btree,
- WT_PAGE *page, const char *ofile, uint32_t flags)
+__debug_tree(WT_SESSION_IMPL *session,
+ WT_BTREE *btree, WT_REF *ref, const char *ofile, uint32_t flags)
{
WT_DBG *ds, _ds;
WT_DECL_RET;
@@ -560,10 +563,10 @@ __debug_tree(
WT_RET(__debug_config(session, ds, ofile));
/* A NULL page starts at the top of the tree -- it's a convenience. */
- if (page == NULL)
- page = btree->root.page;
+ if (ref == NULL)
+ ref = &btree->root;
- WT_WITH_BTREE(session, btree, ret = __debug_page(ds, page, flags));
+ WT_WITH_BTREE(session, btree, ret = __debug_page(ds, ref, flags));
__dmsg_wrapup(ds);
@@ -575,7 +578,7 @@ __debug_tree(
* Dump the in-memory information for an in-memory page.
*/
static int
-__debug_page(WT_DBG *ds, WT_PAGE *page, uint32_t flags)
+__debug_page(WT_DBG *ds, WT_REF *ref, uint32_t flags)
{
WT_DECL_RET;
WT_SESSION_IMPL *session;
@@ -583,32 +586,32 @@ __debug_page(WT_DBG *ds, WT_PAGE *page, uint32_t flags)
session = ds->session;
/* Dump the page metadata. */
- WT_WITH_PAGE_INDEX(session, ret = __debug_page_metadata(ds, page));
+ WT_WITH_PAGE_INDEX(session, ret = __debug_page_metadata(ds, ref));
WT_RET(ret);
/* Dump the page. */
- switch (page->type) {
+ switch (ref->page->type) {
case WT_PAGE_COL_FIX:
if (LF_ISSET(WT_DEBUG_TREE_LEAF))
- __debug_page_col_fix(ds, page);
+ __debug_page_col_fix(ds, ref);
break;
case WT_PAGE_COL_INT:
WT_WITH_PAGE_INDEX(session,
- ret = __debug_page_col_int(ds, page, flags));
+ ret = __debug_page_col_int(ds, ref->page, flags));
WT_RET(ret);
break;
case WT_PAGE_COL_VAR:
if (LF_ISSET(WT_DEBUG_TREE_LEAF))
- WT_RET(__debug_page_col_var(ds, page));
+ WT_RET(__debug_page_col_var(ds, ref));
break;
case WT_PAGE_ROW_INT:
WT_WITH_PAGE_INDEX(session,
- ret = __debug_page_row_int(ds, page, flags));
+ ret = __debug_page_row_int(ds, ref->page, flags));
WT_RET(ret);
break;
case WT_PAGE_ROW_LEAF:
if (LF_ISSET(WT_DEBUG_TREE_LEAF))
- WT_RET(__debug_page_row_leaf(ds, page));
+ WT_RET(__debug_page_row_leaf(ds, ref->page));
break;
WT_ILLEGAL_VALUE(session);
}
@@ -621,30 +624,32 @@ __debug_page(WT_DBG *ds, WT_PAGE *page, uint32_t flags)
* Dump an in-memory page's metadata.
*/
static int
-__debug_page_metadata(WT_DBG *ds, WT_PAGE *page)
+__debug_page_metadata(WT_DBG *ds, WT_REF *ref)
{
+ WT_PAGE *page;
WT_PAGE_INDEX *pindex;
WT_PAGE_MODIFY *mod;
WT_SESSION_IMPL *session;
uint32_t entries;
session = ds->session;
+ page = ref->page;
mod = page->modify;
__dmsg(ds, "%p", page);
switch (page->type) {
case WT_PAGE_COL_INT:
- __dmsg(ds, " recno %" PRIu64, page->pg_intl_recno);
+ __dmsg(ds, " recno %" PRIu64, ref->ref_recno);
WT_INTL_INDEX_GET(session, page, pindex);
entries = pindex->entries;
break;
case WT_PAGE_COL_FIX:
- __dmsg(ds, " recno %" PRIu64, page->pg_fix_recno);
+ __dmsg(ds, " recno %" PRIu64, ref->ref_recno);
entries = page->pg_fix_entries;
break;
case WT_PAGE_COL_VAR:
- __dmsg(ds, " recno %" PRIu64, page->pg_var_recno);
+ __dmsg(ds, " recno %" PRIu64, ref->ref_recno);
entries = page->pg_var_entries;
break;
case WT_PAGE_ROW_INT:
@@ -707,10 +712,11 @@ __debug_page_metadata(WT_DBG *ds, WT_PAGE *page)
* Dump an in-memory WT_PAGE_COL_FIX page.
*/
static void
-__debug_page_col_fix(WT_DBG *ds, WT_PAGE *page)
+__debug_page_col_fix(WT_DBG *ds, WT_REF *ref)
{
WT_BTREE *btree;
WT_INSERT *ins;
+ WT_PAGE *page;
const WT_PAGE_HEADER *dsk;
WT_SESSION_IMPL *session;
uint64_t recno;
@@ -721,8 +727,9 @@ __debug_page_col_fix(WT_DBG *ds, WT_PAGE *page)
session = ds->session;
btree = S2BT(session);
+ page = ref->page;
dsk = page->dsk;
- recno = page->pg_fix_recno;
+ recno = ref->ref_recno;
if (dsk != NULL) {
ins = WT_SKIP_FIRST(WT_COL_UPDATE_SINGLE(page));
@@ -767,7 +774,7 @@ __debug_page_col_int(WT_DBG *ds, WT_PAGE *page, uint32_t flags)
session = ds->session;
WT_INTL_FOREACH_BEGIN(session, page, ref) {
- __dmsg(ds, "\trecno %" PRIu64 "\n", ref->key.recno);
+ __dmsg(ds, "\trecno %" PRIu64 "\n", ref->ref_recno);
__debug_ref(ds, ref);
} WT_INTL_FOREACH_END;
@@ -775,7 +782,7 @@ __debug_page_col_int(WT_DBG *ds, WT_PAGE *page, uint32_t flags)
WT_INTL_FOREACH_BEGIN(session, page, ref) {
if (ref->state == WT_REF_MEM) {
__dmsg(ds, "\n");
- WT_RET(__debug_page(ds, ref->page, flags));
+ WT_RET(__debug_page(ds, ref, flags));
}
} WT_INTL_FOREACH_END;
@@ -787,18 +794,20 @@ __debug_page_col_int(WT_DBG *ds, WT_PAGE *page, uint32_t flags)
* Dump an in-memory WT_PAGE_COL_VAR page.
*/
static int
-__debug_page_col_var(WT_DBG *ds, WT_PAGE *page)
+__debug_page_col_var(WT_DBG *ds, WT_REF *ref)
{
WT_CELL *cell;
WT_CELL_UNPACK *unpack, _unpack;
WT_COL *cip;
WT_INSERT_HEAD *update;
+ WT_PAGE *page;
uint64_t recno, rle;
uint32_t i;
char tag[64];
unpack = &_unpack;
- recno = page->pg_var_recno;
+ page = ref->page;
+ recno = ref->ref_recno;
WT_COL_FOREACH(page, cip, i) {
if ((cell = WT_COL_PTR(page, cip)) == NULL) {
@@ -849,7 +858,7 @@ __debug_page_row_int(WT_DBG *ds, WT_PAGE *page, uint32_t flags)
WT_INTL_FOREACH_BEGIN(session, page, ref) {
if (ref->state == WT_REF_MEM) {
__dmsg(ds, "\n");
- WT_RET(__debug_page(ds, ref->page, flags));
+ WT_RET(__debug_page(ds, ref, flags));
}
} WT_INTL_FOREACH_END;
return (0);
@@ -952,8 +961,7 @@ __debug_update(WT_DBG *ds, WT_UPDATE *upd, bool hexbyte)
__dmsg(ds, "\tvalue {deleted}\n");
else if (hexbyte) {
__dmsg(ds, "\t{");
- __debug_hex_byte(ds,
- ((uint8_t *)WT_UPDATE_DATA(upd))[0]);
+ __debug_hex_byte(ds, *(uint8_t *)WT_UPDATE_DATA(upd));
__dmsg(ds, "}\n");
} else
__debug_item(ds,
diff --git a/src/third_party/wiredtiger/src/btree/bt_delete.c b/src/third_party/wiredtiger/src/btree/bt_delete.c
index ba16dd204e8..54b7fedb31d 100644
--- a/src/third_party/wiredtiger/src/btree/bt_delete.c
+++ b/src/third_party/wiredtiger/src/btree/bt_delete.c
@@ -288,10 +288,9 @@ __wt_delete_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref)
* read-only or if the application never modifies the tree, we're not
* able to do so.)
*/
- if (btree->modified) {
- WT_RET(__wt_page_modify_init(session, page));
+ WT_RET(__wt_page_modify_init(session, page));
+ if (btree->modified)
__wt_page_modify_set(session, page);
- }
/*
* An operation is accessing a "deleted" page, and we're building an
@@ -326,7 +325,7 @@ __wt_delete_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref)
/* Allocate the per-page update array. */
WT_ERR(__wt_calloc_def(session, page->pg_row_entries, &upd_array));
- page->pg_row_upd = upd_array;
+ page->modify->mod_row_update = upd_array;
/*
* Fill in the per-reference update array with references to update
diff --git a/src/third_party/wiredtiger/src/btree/bt_discard.c b/src/third_party/wiredtiger/src/btree/bt_discard.c
index 1181d92609f..9807d5bc88f 100644
--- a/src/third_party/wiredtiger/src/btree/bt_discard.c
+++ b/src/third_party/wiredtiger/src/btree/bt_discard.c
@@ -40,7 +40,6 @@ __wt_ref_out(WT_SESSION_IMPL *session, WT_REF *ref)
void
__wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep)
{
- WT_FH *fh;
WT_PAGE *page;
WT_PAGE_HEADER *dsk;
WT_PAGE_MODIFY *mod;
@@ -134,10 +133,11 @@ __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep)
dsk = (WT_PAGE_HEADER *)page->dsk;
if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_ALLOC))
__wt_overwrite_and_free_len(session, dsk, dsk->mem_size);
- if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_MAPPED)) {
- fh = S2BT(session)->bm->block->fh;
- (void)fh->fh_map_discard(session, fh, dsk, dsk->mem_size);
- }
+
+ /* Discard any mapped image. */
+ if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_MAPPED))
+ (void)S2BT(session)->bm->map_discard(
+ S2BT(session)->bm, session, dsk, (size_t)dsk->mem_size);
__wt_overwrite_and_free(session, page);
}
@@ -194,16 +194,33 @@ __free_page_modify(WT_SESSION_IMPL *session, WT_PAGE *page)
__free_skip_list(
session, WT_SKIP_FIRST(append), update_ignore);
__wt_free(session, append);
- __wt_free(session, mod->mod_append);
+ __wt_free(session, mod->mod_col_append);
}
/* Free the insert/update array. */
- if (mod->mod_update != NULL)
- __free_skip_array(session, mod->mod_update,
+ if (mod->mod_col_update != NULL)
+ __free_skip_array(session, mod->mod_col_update,
page->type ==
WT_PAGE_COL_FIX ? 1 : page->pg_var_entries,
update_ignore);
break;
+ case WT_PAGE_ROW_LEAF:
+ /*
+ * Free the insert array.
+ *
+ * Row-store tables have one additional slot in the insert array
+ * (the insert array has an extra slot to hold keys that sort
+ * before keys found on the original page).
+ */
+ if (mod->mod_row_insert != NULL)
+ __free_skip_array(session, mod->mod_row_insert,
+ page->pg_row_entries + 1, update_ignore);
+
+ /* Free the update array. */
+ if (mod->mod_row_update != NULL)
+ __free_update(session, mod->mod_row_update,
+ page->pg_row_entries, update_ignore);
+ break;
}
/* Free the overflow on-page, reuse and transaction-cache skiplists. */
@@ -324,10 +341,6 @@ __free_page_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page)
WT_ROW *rip;
uint32_t i;
void *copy;
- bool update_ignore;
-
- /* In some failed-split cases, we can't discard updates. */
- update_ignore = F_ISSET_ATOMIC(page, WT_PAGE_UPDATE_IGNORE);
/*
* Free the in-memory index array.
@@ -342,22 +355,6 @@ __free_page_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page)
page, copy, &ikey, NULL, NULL, NULL);
__wt_free(session, ikey);
}
-
- /*
- * Free the insert array.
- *
- * Row-store tables have one additional slot in the insert array (the
- * insert array has an extra slot to hold keys that sort before keys
- * found on the original page).
- */
- if (page->pg_row_ins != NULL)
- __free_skip_array(session,
- page->pg_row_ins, page->pg_row_entries + 1, update_ignore);
-
- /* Free the update array. */
- if (page->pg_row_upd != NULL)
- __free_update(session,
- page->pg_row_upd, page->pg_row_entries, update_ignore);
}
/*
diff --git a/src/third_party/wiredtiger/src/btree/bt_handle.c b/src/third_party/wiredtiger/src/btree/bt_handle.c
index 02eea9c2f0c..ba545859d07 100644
--- a/src/third_party/wiredtiger/src/btree/bt_handle.c
+++ b/src/third_party/wiredtiger/src/btree/bt_handle.c
@@ -371,7 +371,7 @@ __wt_root_ref_init(WT_REF *root_ref, WT_PAGE *root, bool is_recno)
root_ref->page = root;
root_ref->state = WT_REF_MEM;
- root_ref->key.recno = is_recno ? 1 : WT_RECNO_OOB;
+ root_ref->ref_recno = is_recno ? 1 : WT_RECNO_OOB;
root->pg_intl_parent_ref = root_ref;
}
@@ -495,7 +495,7 @@ __btree_tree_open_empty(WT_SESSION_IMPL *session, bool creation)
case BTREE_COL_FIX:
case BTREE_COL_VAR:
WT_ERR(__wt_page_alloc(
- session, WT_PAGE_COL_INT, 1, 1, true, &root));
+ session, WT_PAGE_COL_INT, 1, true, &root));
root->pg_intl_parent_ref = &btree->root;
pindex = WT_INTL_INDEX_GET_SAFE(root);
@@ -504,11 +504,11 @@ __btree_tree_open_empty(WT_SESSION_IMPL *session, bool creation)
ref->page = NULL;
ref->addr = NULL;
ref->state = WT_REF_DELETED;
- ref->key.recno = 1;
+ ref->ref_recno = 1;
break;
case BTREE_ROW:
WT_ERR(__wt_page_alloc(
- session, WT_PAGE_ROW_INT, 0, 1, true, &root));
+ session, WT_PAGE_ROW_INT, 1, true, &root));
root->pg_intl_parent_ref = &btree->root;
pindex = WT_INTL_INDEX_GET_SAFE(root);
@@ -524,7 +524,7 @@ __btree_tree_open_empty(WT_SESSION_IMPL *session, bool creation)
/* Bulk loads require a leaf page for reconciliation: create it now. */
if (F_ISSET(btree, WT_BTREE_BULK)) {
- WT_ERR(__wt_btree_new_leaf_page(session, 1, &leaf));
+ WT_ERR(__wt_btree_new_leaf_page(session, &leaf));
ref->page = leaf;
ref->state = WT_REF_MEM;
WT_ERR(__wt_page_modify_init(session, leaf));
@@ -548,8 +548,7 @@ err: if (leaf != NULL)
* Create an empty leaf page.
*/
int
-__wt_btree_new_leaf_page(
- WT_SESSION_IMPL *session, uint64_t recno, WT_PAGE **pagep)
+__wt_btree_new_leaf_page(WT_SESSION_IMPL *session, WT_PAGE **pagep)
{
WT_BTREE *btree;
@@ -558,15 +557,15 @@ __wt_btree_new_leaf_page(
switch (btree->type) {
case BTREE_COL_FIX:
WT_RET(__wt_page_alloc(
- session, WT_PAGE_COL_FIX, recno, 0, false, pagep));
+ session, WT_PAGE_COL_FIX, 0, false, pagep));
break;
case BTREE_COL_VAR:
WT_RET(__wt_page_alloc(
- session, WT_PAGE_COL_VAR, recno, 0, false, pagep));
+ session, WT_PAGE_COL_VAR, 0, false, pagep));
break;
case BTREE_ROW:
WT_RET(__wt_page_alloc(
- session, WT_PAGE_ROW_LEAF, WT_RECNO_OOB, 0, false, pagep));
+ session, WT_PAGE_ROW_LEAF, 0, false, pagep));
break;
WT_ILLEGAL_VALUE(session);
}
@@ -639,7 +638,7 @@ __btree_get_last_recno(WT_SESSION_IMPL *session)
page = next_walk->page;
btree->last_recno = page->type == WT_PAGE_COL_VAR ?
- __col_var_last_recno(page) : __col_fix_last_recno(page);
+ __col_var_last_recno(next_walk) : __col_fix_last_recno(next_walk);
return (__wt_page_release(session, next_walk, 0));
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_huffman.c b/src/third_party/wiredtiger/src/btree/bt_huffman.c
index a1aaf2c7ea0..9e9d69c342e 100644
--- a/src/third_party/wiredtiger/src/btree/bt_huffman.c
+++ b/src/third_party/wiredtiger/src/btree/bt_huffman.c
@@ -133,10 +133,10 @@ static int __wt_huffman_read(WT_SESSION_IMPL *,
* Check for a Huffman configuration file and return the file name.
*/
static int
-__huffman_confchk_file(
- WT_SESSION_IMPL *session, WT_CONFIG_ITEM *v, bool *is_utf8p, WT_FH **fhp)
+__huffman_confchk_file(WT_SESSION_IMPL *session,
+ WT_CONFIG_ITEM *v, bool *is_utf8p, WT_FSTREAM **fsp)
{
- WT_FH *fh;
+ WT_FSTREAM *fs;
WT_DECL_RET;
size_t len;
char *fname;
@@ -157,14 +157,13 @@ __huffman_confchk_file(
/* Check the file exists. */
WT_RET(__wt_strndup(session, v->str + len, v->len - len, &fname));
- WT_ERR(__wt_open(session, fname, WT_FILE_TYPE_REGULAR,
- WT_OPEN_FIXED | WT_OPEN_READONLY | WT_STREAM_READ, &fh));
+ WT_ERR(__wt_fopen(session, fname, WT_OPEN_FIXED, WT_STREAM_READ, &fs));
/* Optionally return the file handle. */
- if (fhp == NULL)
- (void)__wt_close(session, &fh);
+ if (fsp == NULL)
+ (void)__wt_fclose(session, &fs);
else
- *fhp = fh;
+ *fsp = fs;
err: __wt_free(session, fname);
@@ -300,7 +299,7 @@ __wt_huffman_read(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *ip,
struct __wt_huffman_table *table, *tp;
WT_DECL_ITEM(tmp);
WT_DECL_RET;
- WT_FH *fh;
+ WT_FSTREAM *fs;
int64_t symbol, frequency;
u_int entries, lineno;
int n;
@@ -309,13 +308,13 @@ __wt_huffman_read(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *ip,
*tablep = NULL;
*entriesp = *numbytesp = 0;
- fh = NULL;
+ fs = NULL;
table = NULL;
/*
* Try and open the backing file.
*/
- WT_RET(__huffman_confchk_file(session, ip, &is_utf8, &fh));
+ WT_RET(__huffman_confchk_file(session, ip, &is_utf8, &fs));
/*
* UTF-8 table is 256 bytes, with a range of 0-255.
@@ -333,7 +332,7 @@ __wt_huffman_read(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *ip,
WT_ERR(__wt_scr_alloc(session, 0, &tmp));
for (tp = table, lineno = 1;; ++tp, ++lineno) {
- WT_ERR(__wt_getline(session, tmp, fh));
+ WT_ERR(__wt_getline(session, fs, tmp));
if (tmp->size == 0)
break;
n = sscanf(
@@ -378,7 +377,7 @@ __wt_huffman_read(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *ip,
if (0) {
err: __wt_free(session, table);
}
- (void)__wt_close(session, &fh);
+ (void)__wt_fclose(session, &fs);
__wt_scr_free(session, &tmp);
return (ret);
diff --git a/src/third_party/wiredtiger/src/btree/bt_page.c b/src/third_party/wiredtiger/src/btree/bt_page.c
index 9fa0145bbdd..00ec8aa4494 100644
--- a/src/third_party/wiredtiger/src/btree/bt_page.c
+++ b/src/third_party/wiredtiger/src/btree/bt_page.c
@@ -10,7 +10,7 @@
static void __inmem_col_fix(WT_SESSION_IMPL *, WT_PAGE *);
static void __inmem_col_int(WT_SESSION_IMPL *, WT_PAGE *);
-static int __inmem_col_var(WT_SESSION_IMPL *, WT_PAGE *, size_t *);
+static int __inmem_col_var(WT_SESSION_IMPL *, WT_PAGE *, uint64_t, size_t *);
static int __inmem_row_int(WT_SESSION_IMPL *, WT_PAGE *, size_t *);
static int __inmem_row_leaf(WT_SESSION_IMPL *, WT_PAGE *);
static int __inmem_row_leaf_entries(
@@ -21,8 +21,8 @@ static int __inmem_row_leaf_entries(
* Create or read a page into the cache.
*/
int
-__wt_page_alloc(WT_SESSION_IMPL *session, uint8_t type,
- uint64_t recno, uint32_t alloc_entries, bool alloc_refs, WT_PAGE **pagep)
+__wt_page_alloc(WT_SESSION_IMPL *session,
+ uint8_t type, uint32_t alloc_entries, bool alloc_refs, WT_PAGE **pagep)
{
WT_CACHE *cache;
WT_DECL_RET;
@@ -67,13 +67,10 @@ __wt_page_alloc(WT_SESSION_IMPL *session, uint8_t type,
switch (type) {
case WT_PAGE_COL_FIX:
- page->pg_fix_recno = recno;
page->pg_fix_entries = alloc_entries;
break;
case WT_PAGE_COL_INT:
case WT_PAGE_ROW_INT:
- page->pg_intl_recno = recno;
-
/*
* Internal pages have an array of references to objects so they
* can split. Allocate the array of references and optionally,
@@ -105,7 +102,6 @@ err: if ((pindex = WT_INTL_INDEX_GET_SAFE(page)) != NULL) {
}
break;
case WT_PAGE_COL_VAR:
- page->pg_var_recno = recno;
page->pg_var_d = (WT_COL *)((uint8_t *)page + sizeof(WT_PAGE));
page->pg_var_entries = alloc_entries;
break;
@@ -191,8 +187,7 @@ __wt_page_inmem(WT_SESSION_IMPL *session, WT_REF *ref,
}
/* Allocate and initialize a new WT_PAGE. */
- WT_RET(__wt_page_alloc(
- session, dsk->type, dsk->recno, alloc_entries, true, &page));
+ WT_RET(__wt_page_alloc(session, dsk->type, alloc_entries, true, &page));
page->dsk = dsk;
F_SET_ATOMIC(page, flags);
@@ -211,7 +206,7 @@ __wt_page_inmem(WT_SESSION_IMPL *session, WT_REF *ref,
__inmem_col_int(session, page);
break;
case WT_PAGE_COL_VAR:
- WT_ERR(__inmem_col_var(session, page, &size));
+ WT_ERR(__inmem_col_var(session, page, dsk->recno, &size));
break;
case WT_PAGE_ROW_INT:
WT_ERR(__inmem_row_int(session, page, &size));
@@ -292,7 +287,7 @@ __inmem_col_int(WT_SESSION_IMPL *session, WT_PAGE *page)
__wt_cell_unpack(cell, unpack);
ref->addr = cell;
- ref->key.recno = unpack->v;
+ ref->ref_recno = unpack->v;
}
}
@@ -329,7 +324,8 @@ __inmem_col_var_repeats(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t *np)
* column-store trees.
*/
static int
-__inmem_col_var(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *sizep)
+__inmem_col_var(
+ WT_SESSION_IMPL *session, WT_PAGE *page, uint64_t recno, size_t *sizep)
{
WT_BTREE *btree;
WT_COL *cip;
@@ -337,13 +333,12 @@ __inmem_col_var(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *sizep)
WT_CELL *cell;
WT_CELL_UNPACK *unpack, _unpack;
const WT_PAGE_HEADER *dsk;
- uint64_t recno, rle;
+ uint64_t rle;
size_t bytes_allocated;
uint32_t i, indx, n, repeat_off;
btree = S2BT(session);
dsk = page->dsk;
- recno = page->pg_var_recno;
repeats = NULL;
repeat_off = 0;
diff --git a/src/third_party/wiredtiger/src/btree/bt_read.c b/src/third_party/wiredtiger/src/btree/bt_read.c
index 5cf6a9bf2bc..89d16a3f827 100644
--- a/src/third_party/wiredtiger/src/btree/bt_read.c
+++ b/src/third_party/wiredtiger/src/btree/bt_read.c
@@ -326,7 +326,7 @@ __evict_force_check(WT_SESSION_IMPL *session, WT_REF *ref)
__wt_page_evict_soon(page);
/* Bump the oldest ID, we're about to do some visibility checks. */
- __wt_txn_update_oldest(session, false);
+ WT_RET(__wt_txn_update_oldest(session, false));
/* If eviction cannot succeed, don't try. */
return (__wt_page_can_evict(session, ref, NULL));
@@ -377,9 +377,7 @@ __page_read(WT_SESSION_IMPL *session, WT_REF *ref)
if (addr == NULL) {
WT_ASSERT(session, previous_state == WT_REF_DELETED);
- WT_ERR(__wt_btree_new_leaf_page(session,
- btree->type == BTREE_ROW ? WT_RECNO_OOB : ref->key.recno,
- &page));
+ WT_ERR(__wt_btree_new_leaf_page(session, &page));
ref->page = page;
goto done;
}
@@ -463,6 +461,8 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags
btree = S2BT(session);
+ WT_STAT_FAST_CONN_INCR(session, cache_pages_requested);
+ WT_STAT_FAST_DATA_INCR(session, cache_pages_requested);
for (evict_soon = stalled = false,
force_attempts = 0, sleep_cnt = wait_cnt = 0;;) {
switch (ref->state) {
diff --git a/src/third_party/wiredtiger/src/btree/bt_rebalance.c b/src/third_party/wiredtiger/src/btree/bt_rebalance.c
index d94eb2ddd80..de54e8433a8 100644
--- a/src/third_party/wiredtiger/src/btree/bt_rebalance.c
+++ b/src/third_party/wiredtiger/src/btree/bt_rebalance.c
@@ -90,7 +90,7 @@ __rebalance_leaf_append(WT_SESSION_IMPL *session,
if (recno == WT_RECNO_OOB)
WT_RET(__wt_row_ikey(session, 0, key, key_len, copy));
else
- copy->key.recno = recno;
+ copy->ref_recno = recno;
copy->page_del = NULL;
return (0);
@@ -147,8 +147,7 @@ __rebalance_internal(WT_SESSION_IMPL *session, WT_REBALANCE_STUFF *rs)
leaf_next = (uint32_t)rs->leaf_next;
/* Allocate a row-store root (internal) page and fill it in. */
- WT_RET(__wt_page_alloc(session, rs->type,
- rs->type == WT_PAGE_COL_INT ? 1 : 0, leaf_next, false, &page));
+ WT_RET(__wt_page_alloc(session, rs->type, leaf_next, false, &page));
page->pg_intl_parent_ref = &btree->root;
WT_ERR(__wt_page_modify_init(session, page));
__wt_page_modify_set(session, page);
diff --git a/src/third_party/wiredtiger/src/btree/bt_ret.c b/src/third_party/wiredtiger/src/btree/bt_ret.c
index ebc0499f6a2..8ef2db67e7b 100644
--- a/src/third_party/wiredtiger/src/btree/bt_ret.c
+++ b/src/third_party/wiredtiger/src/btree/bt_ret.c
@@ -46,7 +46,7 @@ __wt_kv_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd)
}
/* Take the value from the original page. */
- v = __bit_getv_recno(page, cbt->iface.recno, btree->bitcnt);
+ v = __bit_getv_recno(cbt->ref, cursor->recno, btree->bitcnt);
return (__wt_buf_set(session, &cursor->value, &v, 1));
case WT_PAGE_COL_VAR:
/*
diff --git a/src/third_party/wiredtiger/src/btree/bt_slvg.c b/src/third_party/wiredtiger/src/btree/bt_slvg.c
index 0e064d306b6..9b5e4daf74a 100644
--- a/src/third_party/wiredtiger/src/btree/bt_slvg.c
+++ b/src/third_party/wiredtiger/src/btree/bt_slvg.c
@@ -116,8 +116,8 @@ struct __wt_track {
static int __slvg_cleanup(WT_SESSION_IMPL *, WT_STUFF *);
static int __slvg_col_build_internal(WT_SESSION_IMPL *, uint32_t, WT_STUFF *);
static int __slvg_col_build_leaf(WT_SESSION_IMPL *, WT_TRACK *, WT_REF *);
-static int __slvg_col_ovfl(
- WT_SESSION_IMPL *, WT_TRACK *, WT_PAGE *, uint64_t, uint64_t);
+static int __slvg_col_ovfl(WT_SESSION_IMPL *,
+ WT_TRACK *, WT_PAGE *, uint64_t, uint64_t, uint64_t);
static int __slvg_col_range(WT_SESSION_IMPL *, WT_STUFF *);
static int __slvg_col_range_missing(WT_SESSION_IMPL *, WT_STUFF *);
static int __slvg_col_range_overlap(
@@ -166,11 +166,13 @@ __wt_bt_salvage(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[])
WT_DECL_RET;
WT_STUFF *ss, stuff;
uint32_t i, leaf_cnt;
+ bool evict_reset;
WT_UNUSED(cfg);
btree = S2BT(session);
bm = btree->bm;
+ evict_reset = false;
WT_CLEAR(stuff);
ss = &stuff;
@@ -182,6 +184,13 @@ __wt_bt_salvage(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[])
WT_ERR(__wt_scr_alloc(session, 0, &ss->tmp2));
/*
+ * Salvage handles its own page eviction; get exclusive access to the
+ * file, have eviction ignore the tree entirely.
+ */
+ WT_ERR(__wt_evict_file_exclusive_on(session));
+ evict_reset = true;
+
+ /*
* Step 1:
* Inform the underlying block manager that we're salvaging the file.
*/
@@ -295,13 +304,13 @@ __wt_bt_salvage(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[])
case WT_PAGE_COL_VAR:
WT_WITH_PAGE_INDEX(session,
ret = __slvg_col_build_internal(
- session, leaf_cnt, ss));
+ session, leaf_cnt, ss));
WT_ERR(ret);
break;
case WT_PAGE_ROW_LEAF:
WT_WITH_PAGE_INDEX(session,
ret = __slvg_row_build_internal(
- session, leaf_cnt, ss));
+ session, leaf_cnt, ss));
WT_ERR(ret);
break;
}
@@ -341,6 +350,9 @@ err: WT_TRET(bm->salvage_end(bm, session));
if (ss->root_ref.page != NULL)
__wt_ref_out(session, &ss->root_ref);
+ if (evict_reset)
+ __wt_evict_file_exclusive_off(session);
+
/* Discard the leaf and overflow page memory. */
WT_TRET(__slvg_cleanup(session, ss));
@@ -1159,7 +1171,7 @@ __slvg_col_build_internal(
/* Allocate a column-store root (internal) page and fill it in. */
WT_RET(__wt_page_alloc(
- session, WT_PAGE_COL_INT, 1, leaf_cnt, true, &page));
+ session, WT_PAGE_COL_INT, leaf_cnt, true, &page));
WT_ERR(__slvg_modify_init(session, page));
pindex = WT_INTL_INDEX_GET_SAFE(page);
@@ -1180,7 +1192,7 @@ __slvg_col_build_internal(
ref->addr = addr;
addr = NULL;
- ref->key.recno = trk->col_start;
+ ref->ref_recno = trk->col_start;
ref->state = WT_REF_DISK;
/*
@@ -1223,7 +1235,7 @@ __slvg_col_build_leaf(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_REF *ref)
WT_DECL_RET;
WT_PAGE *page;
WT_SALVAGE_COOKIE *cookie, _cookie;
- uint64_t skip, take;
+ uint64_t recno, skip, take;
uint32_t *entriesp, save_entries;
cookie = &_cookie;
@@ -1243,7 +1255,8 @@ __slvg_col_build_leaf(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_REF *ref)
* Calculate the number of K/V entries we are going to skip, and
* the total number of K/V entries we'll take from this page.
*/
- cookie->skip = skip = trk->col_start - page->pg_var_recno;
+ recno = page->dsk->recno;
+ cookie->skip = skip = trk->col_start - recno;
cookie->take = take = (trk->col_stop - trk->col_start) + 1;
WT_ERR(__wt_verbose(session, WT_VERB_SALVAGE,
@@ -1255,7 +1268,7 @@ __slvg_col_build_leaf(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_REF *ref)
/* Set the referenced flag on overflow pages we're using. */
if (page->type == WT_PAGE_COL_VAR && trk->trk_ovfl_cnt != 0)
- WT_ERR(__slvg_col_ovfl(session, trk, page, skip, take));
+ WT_ERR(__slvg_col_ovfl(session, trk, page, recno, skip, take));
/*
* If we're missing some part of the range, the real start range is in
@@ -1263,9 +1276,9 @@ __slvg_col_build_leaf(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_REF *ref)
* reference as well as the page itself.
*/
if (trk->col_missing == 0)
- page->pg_var_recno = trk->col_start;
+ ref->ref_recno = trk->col_start;
else {
- page->pg_var_recno = trk->col_missing;
+ ref->ref_recno = trk->col_missing;
cookie->missing = trk->col_start - trk->col_missing;
WT_ERR(__wt_verbose(session, WT_VERB_SALVAGE,
@@ -1274,7 +1287,6 @@ __slvg_col_build_leaf(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_REF *ref)
session, trk->trk_addr, trk->trk_addr_size, trk->ss->tmp1),
cookie->missing));
}
- ref->key.recno = page->pg_var_recno;
/*
* We can't discard the original blocks associated with this page now.
@@ -1338,21 +1350,20 @@ __slvg_col_ovfl_single(
* Mark overflow items referenced by the merged page.
*/
static int
-__slvg_col_ovfl(WT_SESSION_IMPL *session,
- WT_TRACK *trk, WT_PAGE *page, uint64_t skip, uint64_t take)
+__slvg_col_ovfl(WT_SESSION_IMPL *session, WT_TRACK *trk,
+ WT_PAGE *page, uint64_t recno, uint64_t skip, uint64_t take)
{
WT_CELL_UNPACK unpack;
WT_CELL *cell;
WT_COL *cip;
WT_DECL_RET;
- uint64_t recno, start, stop;
+ uint64_t start, stop;
uint32_t i;
/*
* Merging a variable-length column-store page, and we took some number
* of records, figure out which (if any) overflow records we used.
*/
- recno = page->pg_var_recno;
start = recno + skip;
stop = (recno + skip + take) - 1;
@@ -1816,7 +1827,7 @@ __slvg_row_build_internal(
/* Allocate a row-store root (internal) page and fill it in. */
WT_RET(__wt_page_alloc(
- session, WT_PAGE_ROW_INT, WT_RECNO_OOB, leaf_cnt, true, &page));
+ session, WT_PAGE_ROW_INT, leaf_cnt, true, &page));
WT_ERR(__slvg_modify_init(session, page));
pindex = WT_INTL_INDEX_GET_SAFE(page);
diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c
index 4f16a290958..2d7b0a0030f 100644
--- a/src/third_party/wiredtiger/src/btree/bt_split.c
+++ b/src/third_party/wiredtiger/src/btree/bt_split.c
@@ -207,8 +207,8 @@ __split_verify_intl_key_order(WT_SESSION_IMPL *session, WT_PAGE *page)
WT_INTL_FOREACH_BEGIN(session, page, ref) {
WT_ASSERT(session, ref->home == page);
- WT_ASSERT(session, ref->key.recno > recno);
- recno = ref->key.recno;
+ WT_ASSERT(session, ref->ref_recno > recno);
+ recno = ref->ref_recno;
} WT_INTL_FOREACH_END;
break;
case WT_PAGE_ROW_INT:
@@ -335,7 +335,7 @@ __split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home,
if ((ikey = __wt_ref_key_instantiated(ref)) == NULL) {
__wt_ref_key(from_home, ref, &key, &size);
WT_RET(__wt_row_ikey(session, 0, key, size, ref));
- ikey = ref->key.ikey;
+ ikey = ref->ref_ikey;
} else {
WT_RET(
__split_ovfl_key_cleanup(session, from_home, ref));
@@ -529,7 +529,7 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root)
WT_REF **child_refp, *ref, **root_refp;
WT_SPLIT_ERROR_PHASE complete;
size_t child_incr, root_decr, root_incr, size;
- uint64_t recno, split_gen;
+ uint64_t split_gen;
uint32_t children, chunk, i, j, remain;
uint32_t slots;
void *p;
@@ -593,10 +593,8 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root)
alloc_refp = alloc_index->index, i = 0; i < children; ++i) {
slots = i == children - 1 ? remain : chunk;
- recno = root->type == WT_PAGE_COL_INT ?
- (*root_refp)->key.recno : WT_RECNO_OOB;
WT_ERR(__wt_page_alloc(
- session, root->type, recno, slots, false, &child));
+ session, root->type, slots, false, &child));
/*
* Initialize the page's child reference; we need a copy of the
@@ -611,7 +609,7 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root)
WT_ERR(__wt_row_ikey(session, 0, p, size, ref));
root_incr += sizeof(WT_IKEY) + size;
} else
- ref->key.recno = recno;
+ ref->ref_recno = (*root_refp)->ref_recno;
ref->state = WT_REF_MEM;
/* Initialize the child page. */
@@ -737,7 +735,6 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
alloc_index = pindex = NULL;
parent_decr = 0;
- parent_entries = 0;
empty_parent = false;
complete = WT_ERR_RETURN;
@@ -1014,7 +1011,7 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page)
WT_REF **child_refp, *page_ref, **page_refp, *ref;
WT_SPLIT_ERROR_PHASE complete;
size_t child_incr, page_decr, page_incr, parent_incr, size;
- uint64_t recno, split_gen;
+ uint64_t split_gen;
uint32_t children, chunk, i, j, remain;
uint32_t slots;
void *p;
@@ -1099,10 +1096,8 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page)
for (alloc_refp = alloc_index->index + 1, i = 1; i < children; ++i) {
slots = i == children - 1 ? remain : chunk;
- recno = page->type == WT_PAGE_COL_INT ?
- (*page_refp)->key.recno : WT_RECNO_OOB;
WT_ERR(__wt_page_alloc(
- session, page->type, recno, slots, false, &child));
+ session, page->type, slots, false, &child));
/*
* Initialize the page's child reference; we need a copy of the
@@ -1117,7 +1112,7 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page)
WT_ERR(__wt_row_ikey(session, 0, p, size, ref));
parent_incr += sizeof(WT_IKEY) + size;
} else
- ref->key.recno = recno;
+ ref->ref_recno = (*page_refp)->ref_recno;
ref->state = WT_REF_MEM;
/* Initialize the child page. */
@@ -1483,6 +1478,15 @@ __split_multi_inmem(
uint32_t i, slot;
/*
+ * In 04/2016, we removed column-store record numbers from the WT_PAGE
+ * structure, leading to hard-to-debug problems because we corrupt the
+ * page if we search it using the wrong initial record number. For now,
+ * assert the record number is set.
+ */
+ WT_ASSERT(session,
+ orig->type != WT_PAGE_COL_VAR || ref->ref_recno != 0);
+
+ /*
* This code re-creates an in-memory page that is part of a set created
* while evicting a large page, and adds references to any unresolved
* update chains to the new page. We get here due to choosing to keep
@@ -1525,7 +1529,7 @@ __split_multi_inmem(
/* Build a key. */
if (supd->ins == NULL) {
slot = WT_ROW_SLOT(orig, supd->rip);
- upd = orig->pg_row_upd[slot];
+ upd = orig->modify->mod_row_update[slot];
WT_ERR(__wt_row_leaf_key(
session, orig, supd->rip, key, false));
@@ -1588,7 +1592,7 @@ __split_multi_inmem_final(WT_PAGE *orig, WT_MULTI *multi)
case WT_PAGE_ROW_LEAF:
if (supd->ins == NULL) {
slot = WT_ROW_SLOT(orig, supd->rip);
- orig->pg_row_upd[slot] = NULL;
+ orig->modify->mod_row_update[slot] = NULL;
} else
supd->ins->upd = NULL;
break;
@@ -1605,11 +1609,16 @@ __split_multi_inmem_fail(WT_SESSION_IMPL *session, WT_PAGE *orig, WT_REF *ref)
/*
* We failed creating new in-memory pages. For error-handling reasons,
* we've left the update chains referenced by both the original and
- * new pages. Discard the new pages, setting a flag so the discard code
- * doesn't discard the updates on the page.
+ * new pages. Discard the new allocated WT_REF structures and their
+ * pages (setting a flag so the discard code doesn't discard the updates
+ * on the page).
+ *
+ * Our callers allocate WT_REF arrays, then individual WT_REFs, check
+ * for uninitialized information.
*/
- if (ref->page != NULL) {
- F_SET_ATOMIC(ref->page, WT_PAGE_UPDATE_IGNORE);
+ if (ref != NULL) {
+ if (ref->page != NULL)
+ F_SET_ATOMIC(ref->page, WT_PAGE_UPDATE_IGNORE);
__wt_free_ref(session, ref, orig->type, true);
}
}
@@ -1627,7 +1636,6 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session,
WT_REF *ref;
size_t incr;
- addr = NULL;
incr = 0;
/* Allocate an underlying WT_REF. */
@@ -1635,9 +1643,24 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session,
ref = *refp;
incr += sizeof(WT_REF);
- /* Any parent reference is filled in by our caller. */
- ref->home = NULL;
+ /*
+ * Set the WT_REF key before (optionally) building the page, underlying
+ * column-store functions need the page's key space to search it.
+ */
+ switch (page->type) {
+ case WT_PAGE_ROW_INT:
+ case WT_PAGE_ROW_LEAF:
+ ikey = multi->key.ikey;
+ WT_RET(__wt_row_ikey(
+ session, 0, WT_IKEY_DATA(ikey), ikey->size, ref));
+ incr += sizeof(WT_IKEY) + ikey->size;
+ break;
+ default:
+ ref->ref_recno = multi->key.recno;
+ break;
+ }
+ /* If there's a disk image, build a page, otherwise set the address. */
if (multi->disk_image == NULL) {
/*
* Copy the address: we could simply take the buffer, but that
@@ -1651,28 +1674,13 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session,
addr->type = multi->addr.type;
WT_RET(__wt_strndup(session,
multi->addr.addr, addr->size, &addr->addr));
- } else
+ ref->state = WT_REF_DISK;
+ } else {
WT_RET(__split_multi_inmem(session, page, ref, multi));
-
- switch (page->type) {
- case WT_PAGE_ROW_INT:
- case WT_PAGE_ROW_LEAF:
- ikey = multi->key.ikey;
- WT_RET(__wt_row_ikey(
- session, 0, WT_IKEY_DATA(ikey), ikey->size, ref));
- incr += sizeof(WT_IKEY) + ikey->size;
- break;
- default:
- ref->key.recno = multi->key.recno;
- break;
+ ref->state = WT_REF_MEM;
}
- ref->state = addr != NULL ? WT_REF_DISK : WT_REF_MEM;
-
- /*
- * If our caller wants to track the memory allocations, we have a return
- * reference.
- */
+ /* Optionally return changes in the memory footprint. */
if (incrp != NULL)
*incrp += incr;
return (0);
@@ -1773,17 +1781,12 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
parent_incr += sizeof(WT_IKEY) + key->size;
__wt_scr_free(session, &key);
} else
- child->key.recno = ref->key.recno;
+ child->ref_recno = ref->ref_recno;
/*
* The second page in the split is a new WT_REF/page pair.
*/
- if (type == WT_PAGE_ROW_LEAF)
- WT_ERR(__wt_page_alloc(session,
- type, WT_RECNO_OOB, 0, false, &right));
- else
- WT_ERR(__wt_page_alloc(session,
- type, WT_INSERT_RECNO(moved_ins), 0, false, &right));
+ WT_ERR(__wt_page_alloc(session, type, 0, false, &right));
/*
* The new page is dirty by definition, plus column-store splits update
@@ -1793,11 +1796,15 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
__wt_page_modify_set(session, right);
if (type == WT_PAGE_ROW_LEAF) {
- WT_ERR(__wt_calloc_one(session, &right->pg_row_ins));
- WT_ERR(__wt_calloc_one(session, &right->pg_row_ins[0]));
+ WT_ERR(__wt_calloc_one(
+ session, &right->modify->mod_row_insert));
+ WT_ERR(__wt_calloc_one(
+ session, &right->modify->mod_row_insert[0]));
} else {
- WT_ERR(__wt_calloc_one(session, &right->modify->mod_append));
- WT_ERR(__wt_calloc_one(session, &right->modify->mod_append[0]));
+ WT_ERR(__wt_calloc_one(
+ session, &right->modify->mod_col_append));
+ WT_ERR(__wt_calloc_one(
+ session, &right->modify->mod_col_append[0]));
}
right_incr += sizeof(WT_INSERT_HEAD);
right_incr += sizeof(WT_INSERT_HEAD *);
@@ -1814,7 +1821,7 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
child));
parent_incr += sizeof(WT_IKEY) + WT_INSERT_KEY_SIZE(moved_ins);
} else
- child->key.recno = WT_INSERT_RECNO(moved_ins);
+ child->ref_recno = WT_INSERT_RECNO(moved_ins);
/*
* Allocation operations completed, we're going to split.
@@ -1823,8 +1830,8 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
*/
if (type != WT_PAGE_ROW_LEAF) {
WT_ASSERT(session,
- page->modify->mod_split_recno == WT_RECNO_OOB);
- page->modify->mod_split_recno = child->key.recno;
+ page->modify->mod_col_split_recno == WT_RECNO_OOB);
+ page->modify->mod_col_split_recno = child->ref_recno;
}
/*
@@ -1848,7 +1855,7 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
* can be ignored.)
*/
tmp_ins_head = type == WT_PAGE_ROW_LEAF ?
- right->pg_row_ins[0] : right->modify->mod_append[0];
+ right->modify->mod_row_insert[0] : right->modify->mod_col_append[0];
tmp_ins_head->head[0] = tmp_ins_head->tail[0] = moved_ins;
/*
@@ -1970,7 +1977,7 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
* Reset the split column-store page record.
*/
if (type != WT_PAGE_ROW_LEAF)
- page->modify->mod_split_recno = WT_RECNO_OOB;
+ page->modify->mod_col_split_recno = WT_RECNO_OOB;
/*
* Clear the allocated page's reference to the moved insert list element
@@ -1983,11 +1990,11 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
* lists have.
*/
if (type == WT_PAGE_ROW_LEAF)
- right->pg_row_ins[0]->head[0] =
- right->pg_row_ins[0]->tail[0] = NULL;
+ right->modify->mod_row_insert[0]->head[0] =
+ right->modify->mod_row_insert[0]->tail[0] = NULL;
else
- right->modify->mod_append[0]->head[0] =
- right->modify->mod_append[0]->tail[0] = NULL;
+ right->modify->mod_col_append[0]->head[0] =
+ right->modify->mod_col_append[0]->tail[0] = NULL;
ins_head->tail[0]->next[0] = moved_ins;
ins_head->tail[0] = moved_ins;
@@ -1999,12 +2006,12 @@ err: if (split_ref[0] != NULL) {
ref->addr = split_ref[0]->addr;
if (type == WT_PAGE_ROW_LEAF)
- __wt_free(session, split_ref[0]->key.ikey);
+ __wt_free(session, split_ref[0]->ref_ikey);
__wt_free(session, split_ref[0]);
}
if (split_ref[1] != NULL) {
if (type == WT_PAGE_ROW_LEAF)
- __wt_free(session, split_ref[1]->key.ikey);
+ __wt_free(session, split_ref[1]->ref_ikey);
__wt_free(session, split_ref[1]);
}
if (right != NULL) {
@@ -2170,7 +2177,7 @@ __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref)
WT_DECL_RET;
WT_PAGE *page;
WT_PAGE_MODIFY *mod;
- WT_REF new;
+ WT_REF *new;
page = ref->page;
mod = page->modify;
@@ -2187,9 +2194,15 @@ __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref)
* exactly what we want to do.
*
* Build the new page.
+ *
+ * Allocate a WT_REF because the error path uses routines that will ea
+ * free memory. The only field we need to set is the record number, as
+ * it's used by the search routines.
*/
- memset(&new, 0, sizeof(new));
- WT_ERR(__split_multi_inmem(session, page, &new, &mod->mod_multi[0]));
+ WT_RET(__wt_calloc_one(session, &new));
+ new->ref_recno = ref->ref_recno;
+
+ WT_ERR(__split_multi_inmem(session, page, new, &mod->mod_multi[0]));
/*
* The rewrite succeeded, we can no longer fail.
@@ -2209,11 +2222,12 @@ __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref)
__wt_ref_out(session, ref);
/* Swap the new page into place. */
- ref->page = new.page;
+ ref->page = new->page;
WT_PUBLISH(ref->state, WT_REF_MEM);
+ __wt_free(session, new);
return (0);
-err: __split_multi_inmem_fail(session, page, &new);
+err: __split_multi_inmem_fail(session, page, new);
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_sync.c b/src/third_party/wiredtiger/src/btree/bt_sync.c
index 826589f8bdd..5d60c436a08 100644
--- a/src/third_party/wiredtiger/src/btree/bt_sync.c
+++ b/src/third_party/wiredtiger/src/btree/bt_sync.c
@@ -81,7 +81,7 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
if (__wt_page_is_modified(page) &&
WT_TXNID_LT(page->modify->update_txn, oldest_id)) {
if (txn->isolation == WT_ISO_READ_COMMITTED)
- __wt_txn_get_snapshot(session);
+ WT_ERR(__wt_txn_get_snapshot(session));
leaf_bytes += page->memory_footprint;
++leaf_pages;
WT_ERR(__wt_reconcile(session, walk, NULL, 0));
@@ -100,7 +100,7 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
* the metadata shouldn't be that big, and (b) if we do ever
*/
if (txn->isolation == WT_ISO_READ_COMMITTED)
- __wt_txn_get_snapshot(session);
+ WT_ERR(__wt_txn_get_snapshot(session));
/*
* We cannot check the tree modified flag in the case of a
diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy.c b/src/third_party/wiredtiger/src/btree/bt_vrfy.c
index 83dc7924312..531a0dc125a 100644
--- a/src/third_party/wiredtiger/src/btree/bt_vrfy.c
+++ b/src/third_party/wiredtiger/src/btree/bt_vrfy.c
@@ -355,7 +355,7 @@ __verify_tree(WT_SESSION_IMPL *session, WT_REF *ref, WT_VSTUFF *vs)
if (vs->dump_blocks)
WT_RET(__wt_debug_disk(session, page->dsk, NULL));
if (vs->dump_pages)
- WT_RET(__wt_debug_page(session, page, NULL));
+ WT_RET(__wt_debug_page(session, ref, NULL));
#endif
/*
@@ -364,13 +364,11 @@ __verify_tree(WT_SESSION_IMPL *session, WT_REF *ref, WT_VSTUFF *vs)
*/
switch (page->type) {
case WT_PAGE_COL_FIX:
- recno = page->pg_fix_recno;
- goto recno_chk;
case WT_PAGE_COL_INT:
- recno = page->pg_intl_recno;
+ recno = ref->ref_recno;
goto recno_chk;
case WT_PAGE_COL_VAR:
- recno = page->pg_var_recno;
+ recno = ref->ref_recno;
recno_chk: if (recno != vs->record_total + 1)
WT_RET_MSG(session, WT_ERROR,
"page at %s has a starting record of %" PRIu64
@@ -485,7 +483,7 @@ celltype_err: WT_RET_MSG(session, WT_ERROR,
* reviewed to this point.
*/
++entry;
- if (child_ref->key.recno != vs->record_total + 1) {
+ if (child_ref->ref_recno != vs->record_total + 1) {
WT_RET_MSG(session, WT_ERROR,
"the starting record number in entry %"
PRIu32 " of the column internal page at "
@@ -494,7 +492,7 @@ celltype_err: WT_RET_MSG(session, WT_ERROR,
entry,
__wt_page_addr_string(
session, child_ref, vs->tmp1),
- child_ref->key.recno,
+ child_ref->ref_recno,
vs->record_total + 1);
}
diff --git a/src/third_party/wiredtiger/src/btree/col_modify.c b/src/third_party/wiredtiger/src/btree/col_modify.c
index fd60b12538a..a7920da5267 100644
--- a/src/third_party/wiredtiger/src/btree/col_modify.c
+++ b/src/third_party/wiredtiger/src/btree/col_modify.c
@@ -55,7 +55,8 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
*/
if (recno == WT_RECNO_OOB ||
recno > (btree->type == BTREE_COL_VAR ?
- __col_var_last_recno(page) : __col_fix_last_recno(page)))
+ __col_var_last_recno(cbt->ref) :
+ __col_fix_last_recno(cbt->ref)))
append = true;
}
@@ -107,17 +108,17 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
/* Allocate the append/update list reference as necessary. */
if (append) {
WT_PAGE_ALLOC_AND_SWAP(session,
- page, mod->mod_append, ins_headp, 1);
- ins_headp = &mod->mod_append[0];
+ page, mod->mod_col_append, ins_headp, 1);
+ ins_headp = &mod->mod_col_append[0];
} else if (page->type == WT_PAGE_COL_FIX) {
WT_PAGE_ALLOC_AND_SWAP(session,
- page, mod->mod_update, ins_headp, 1);
- ins_headp = &mod->mod_update[0];
+ page, mod->mod_col_update, ins_headp, 1);
+ ins_headp = &mod->mod_col_update[0];
} else {
WT_PAGE_ALLOC_AND_SWAP(session,
- page, mod->mod_update, ins_headp,
+ page, mod->mod_col_update, ins_headp,
page->pg_var_entries);
- ins_headp = &mod->mod_update[cbt->slot];
+ ins_headp = &mod->mod_col_update[cbt->slot];
}
/* Allocate the WT_INSERT_HEAD structure as necessary. */
@@ -142,8 +143,9 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
* it's easy (as opposed to in row-store) and a difficult bug to
* otherwise diagnose.
*/
- WT_ASSERT(session, mod->mod_split_recno == WT_RECNO_OOB ||
- (recno != WT_RECNO_OOB && mod->mod_split_recno > recno));
+ WT_ASSERT(session, mod->mod_col_split_recno == WT_RECNO_OOB ||
+ (recno != WT_RECNO_OOB &&
+ mod->mod_col_split_recno > recno));
if (upd_arg == NULL) {
WT_ERR(
diff --git a/src/third_party/wiredtiger/src/btree/col_srch.c b/src/third_party/wiredtiger/src/btree/col_srch.c
index 4730267a545..6c96181d3bf 100644
--- a/src/third_party/wiredtiger/src/btree/col_srch.c
+++ b/src/third_party/wiredtiger/src/btree/col_srch.c
@@ -30,7 +30,7 @@ __check_leaf_key_range(WT_SESSION_IMPL *session,
* Check if the search key is smaller than the parent's starting key for
* this page.
*/
- if (recno < leaf->key.recno) {
+ if (recno < leaf->ref_recno) {
cbt->compare = 1; /* page keys > search key */
return (0);
}
@@ -48,7 +48,7 @@ __check_leaf_key_range(WT_SESSION_IMPL *session,
WT_INTL_INDEX_GET(session, leaf->home, pindex);
indx = leaf->pindex_hint;
if (indx + 1 < pindex->entries && pindex->index[indx] == leaf)
- if (recno >= pindex->index[indx + 1]->key.recno) {
+ if (recno >= pindex->index[indx + 1]->ref_recno) {
cbt->compare = -1; /* page keys < search key */
return (0);
}
@@ -133,14 +133,12 @@ restart: /*
if (page->type != WT_PAGE_COL_INT)
break;
- WT_ASSERT(session, current->key.recno == page->pg_intl_recno);
-
WT_INTL_INDEX_GET(session, page, pindex);
base = pindex->entries;
descent = pindex->index[base - 1];
/* Fast path appends. */
- if (recno >= descent->key.recno) {
+ if (recno >= descent->ref_recno) {
/*
* If on the last slot (the key is larger than any key
* on the page), check for an internal page split race.
@@ -158,9 +156,9 @@ restart: /*
indx = base + (limit >> 1);
descent = pindex->index[indx];
- if (recno == descent->key.recno)
+ if (recno == descent->ref_recno)
break;
- if (recno < descent->key.recno)
+ if (recno < descent->ref_recno)
continue;
base = indx + 1;
--limit;
@@ -172,7 +170,7 @@ descend: /*
* (last + 1) index. The slot for descent is the one before
* base.
*/
- if (recno != descent->key.recno) {
+ if (recno != descent->ref_recno) {
/*
* We don't have to correct for base == 0 because the
* only way for base to be 0 is if recno is the page's
@@ -237,13 +235,13 @@ leaf_only:
* do in that case, the record may be appended to the page.
*/
if (page->type == WT_PAGE_COL_FIX) {
- if (recno < page->pg_fix_recno) {
- cbt->recno = page->pg_fix_recno;
+ if (recno < current->ref_recno) {
+ cbt->recno = current->ref_recno;
cbt->compare = 1;
return (0);
}
- if (recno >= page->pg_fix_recno + page->pg_fix_entries) {
- cbt->recno = page->pg_fix_recno + page->pg_fix_entries;
+ if (recno >= current->ref_recno + page->pg_fix_entries) {
+ cbt->recno = current->ref_recno + page->pg_fix_entries;
goto past_end;
} else {
cbt->recno = recno;
@@ -251,14 +249,14 @@ leaf_only:
ins_head = WT_COL_UPDATE_SINGLE(page);
}
} else {
- if (recno < page->pg_var_recno) {
- cbt->recno = page->pg_var_recno;
+ if (recno < current->ref_recno) {
+ cbt->recno = current->ref_recno;
cbt->slot = 0;
cbt->compare = 1;
return (0);
}
- if ((cip = __col_var_search(page, recno, NULL)) == NULL) {
- cbt->recno = __col_var_last_recno(page);
+ if ((cip = __col_var_search(current, recno, NULL)) == NULL) {
+ cbt->recno = __col_var_last_recno(current);
cbt->slot = page->pg_var_entries == 0 ?
0 : page->pg_var_entries - 1;
goto past_end;
diff --git a/src/third_party/wiredtiger/src/btree/row_key.c b/src/third_party/wiredtiger/src/btree/row_key.c
index 9fff092d079..83fd2dad9e4 100644
--- a/src/third_party/wiredtiger/src/btree/row_key.c
+++ b/src/third_party/wiredtiger/src/btree/row_key.c
@@ -517,7 +517,7 @@ __wt_row_ikey(WT_SESSION_IMPL *session,
{
uintptr_t oldv;
- oldv = (uintptr_t)ref->key.ikey;
+ oldv = (uintptr_t)ref->ref_ikey;
WT_DIAGNOSTIC_YIELD;
/*
@@ -527,10 +527,10 @@ __wt_row_ikey(WT_SESSION_IMPL *session,
WT_ASSERT(session, oldv == 0 || (oldv & WT_IK_FLAG) != 0);
WT_ASSERT(session, ref->state != WT_REF_SPLIT);
WT_ASSERT(session,
- __wt_atomic_cas_ptr(&ref->key.ikey, (WT_IKEY *)oldv, ikey));
+ __wt_atomic_cas_ptr(&ref->ref_ikey, (WT_IKEY *)oldv, ikey));
}
#else
- ref->key.ikey = ikey;
+ ref->ref_ikey = ikey;
#endif
return (0);
}
diff --git a/src/third_party/wiredtiger/src/btree/row_modify.c b/src/third_party/wiredtiger/src/btree/row_modify.c
index 176016bb340..f0424ff93b4 100644
--- a/src/third_party/wiredtiger/src/btree/row_modify.c
+++ b/src/third_party/wiredtiger/src/btree/row_modify.c
@@ -53,6 +53,7 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
WT_INSERT *ins;
WT_INSERT_HEAD *ins_head, **ins_headp;
WT_PAGE *page;
+ WT_PAGE_MODIFY *mod;
WT_UPDATE *old_upd, *upd, **upd_entry;
size_t ins_size, upd_size;
uint32_t ins_slot;
@@ -70,6 +71,7 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
/* If we don't yet have a modify structure, we'll need one. */
WT_RET(__wt_page_modify_init(session, page));
+ mod = page->modify;
/*
* Modify: allocate an update array as necessary, build a WT_UPDATE
@@ -83,11 +85,12 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
if (cbt->compare == 0) {
if (cbt->ins == NULL) {
/* Allocate an update array as necessary. */
- WT_PAGE_ALLOC_AND_SWAP(session, page,
- page->pg_row_upd, upd_entry, page->pg_row_entries);
+ WT_PAGE_ALLOC_AND_SWAP(session,
+ page, mod->mod_row_update,
+ upd_entry, page->pg_row_entries);
/* Set the WT_UPDATE array reference. */
- upd_entry = &page->pg_row_upd[cbt->slot];
+ upd_entry = &mod->mod_row_update[cbt->slot];
} else
upd_entry = &cbt->ins->upd;
@@ -144,11 +147,11 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
* slot. That's hard, so we set a flag.
*/
WT_PAGE_ALLOC_AND_SWAP(session, page,
- page->pg_row_ins, ins_headp, page->pg_row_entries + 1);
+ mod->mod_row_insert, ins_headp, page->pg_row_entries + 1);
ins_slot = F_ISSET(cbt, WT_CBT_SEARCH_SMALLEST) ?
page->pg_row_entries: cbt->slot;
- ins_headp = &page->pg_row_ins[ins_slot];
+ ins_headp = &mod->mod_row_insert[ins_slot];
/* Allocate the WT_INSERT_HEAD structure as necessary. */
WT_PAGE_ALLOC_AND_SWAP(session, page, *ins_headp, ins_head, 1);
diff --git a/src/third_party/wiredtiger/src/btree/row_srch.c b/src/third_party/wiredtiger/src/btree/row_srch.c
index 6169a0a810a..a631764be7e 100644
--- a/src/third_party/wiredtiger/src/btree/row_srch.c
+++ b/src/third_party/wiredtiger/src/btree/row_srch.c
@@ -634,6 +634,7 @@ __wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
WT_INSERT *ins, **start, **stop;
WT_INSERT_HEAD *ins_head;
WT_PAGE *page;
+ uint64_t samples;
uint32_t choice, entries, i;
int level;
@@ -688,7 +689,7 @@ __wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
* Step down the skip list levels, selecting a random chunk of the name
* space at each level.
*/
- while (level > 0) {
+ for (samples = entries; level > 0; samples += entries) {
/*
* There are (entries) or (entries + 1) chunks of the name space
* considered at each level. They are: between start and the 1st
@@ -765,6 +766,16 @@ __wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
cbt->ins_head = ins_head;
cbt->compare = 0;
+ /*
+ * Random lookups in newly created collections can be slow if a page
+ * consists of a large skiplist. Schedule the page for eviction if we
+ * encounter a large skiplist. This worthwhile because applications
+ * that take a sample often take many samples, so the overhead of
+ * traversing the skip list each time accumulates to real time.
+ */
+ if (samples > 5000)
+ __wt_page_evict_soon(page);
+
return (0);
}