summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRamon Fernandez <ramon.fernandez@mongodb.com>2015-08-06 16:55:10 -0400
committerRamon Fernandez <ramon.fernandez@mongodb.com>2015-08-06 16:55:10 -0400
commit4b03c1c71f9657d0b84617ef32ea5b96faec576b (patch)
treea957278b79c29733ef0fe6aa87c09784aa7af890
parentda5744a4dba5bdba10e5e5fb63eb27fbf769de0d (diff)
downloadmongo-4b03c1c71f9657d0b84617ef32ea5b96faec576b.tar.gz
Import wiredtiger-wiredtiger-mongodb-3.0.4-49-g48648de.tar.gz from wiredtiger branch mongodb-3.0
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/wtperf.c18
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/wtperf.h2
-rw-r--r--src/third_party/wiredtiger/dist/log.py3
-rw-r--r--src/third_party/wiredtiger/dist/s_string.ok1
-rw-r--r--src/third_party/wiredtiger/dist/stat_data.py1
-rw-r--r--src/third_party/wiredtiger/src/block/block_ckpt.c11
-rw-r--r--src/third_party/wiredtiger/src/block/block_ext.c153
-rw-r--r--src/third_party/wiredtiger/src/block/block_open.c15
-rw-r--r--src/third_party/wiredtiger/src/block/block_read.c24
-rw-r--r--src/third_party/wiredtiger/src/block/block_slvg.c2
-rw-r--r--src/third_party/wiredtiger/src/block/block_vrfy.c14
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_debug.c8
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_discard.c4
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_handle.c4
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_page.c6
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_slvg.c4
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_split.c20
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_sync.c4
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_walk.c105
-rw-r--r--src/third_party/wiredtiger/src/btree/col_modify.c2
-rw-r--r--src/third_party/wiredtiger/src/btree/col_srch.c2
-rw-r--r--src/third_party/wiredtiger/src/btree/row_modify.c14
-rw-r--r--src/third_party/wiredtiger/src/btree/row_srch.c10
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_api.c2
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_log.c10
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_lru.c27
-rw-r--r--src/third_party/wiredtiger/src/include/btmem.h17
-rw-r--r--src/third_party/wiredtiger/src/include/btree.i64
-rw-r--r--src/third_party/wiredtiger/src/include/cursor.i6
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h13
-rw-r--r--src/third_party/wiredtiger/src/include/gcc.h15
-rw-r--r--src/third_party/wiredtiger/src/include/log.h28
-rw-r--r--src/third_party/wiredtiger/src/include/misc.h8
-rw-r--r--src/third_party/wiredtiger/src/include/mutex.h4
-rw-r--r--src/third_party/wiredtiger/src/include/os.h2
-rw-r--r--src/third_party/wiredtiger/src/include/serial.i121
-rw-r--r--src/third_party/wiredtiger/src/include/session.h4
-rw-r--r--src/third_party/wiredtiger/src/include/stat.h1
-rw-r--r--src/third_party/wiredtiger/src/include/txn.h8
-rw-r--r--src/third_party/wiredtiger/src/include/txn.i30
-rw-r--r--src/third_party/wiredtiger/src/include/wiredtiger.in144
-rw-r--r--src/third_party/wiredtiger/src/include/wt_internal.h2
-rw-r--r--src/third_party/wiredtiger/src/log/log.c40
-rw-r--r--src/third_party/wiredtiger/src/log/log_auto.c3
-rw-r--r--src/third_party/wiredtiger/src/log/log_slot.c129
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_cursor.c19
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_manager.c2
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_work_unit.c4
-rw-r--r--src/third_party/wiredtiger/src/meta/meta_track.c105
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_mtx_rw.c2
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_thread.c6
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_write.c52
-rw-r--r--src/third_party/wiredtiger/src/session/session_api.c8
-rw-r--r--src/third_party/wiredtiger/src/support/rand.c44
-rw-r--r--src/third_party/wiredtiger/src/support/stat.c2
-rw-r--r--src/third_party/wiredtiger/src/txn/txn.c52
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_ckpt.c74
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_ext.c4
58 files changed, 815 insertions, 664 deletions
diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf.c b/src/third_party/wiredtiger/bench/wtperf/wtperf.c
index 8780d270664..a4f679ae736 100644
--- a/src/third_party/wiredtiger/bench/wtperf/wtperf.c
+++ b/src/third_party/wiredtiger/bench/wtperf/wtperf.c
@@ -117,13 +117,13 @@ randomize_value(CONFIG_THREAD *thread, char *value_buf)
* randomly chosen byte (other than the trailing NUL).
* Make sure we don't write a NUL: keep the value the same length.
*/
- i = __wt_random(thread->rnd) % (thread->cfg->value_sz - 1);
+ i = __wt_random(&thread->rnd) % (thread->cfg->value_sz - 1);
while (value_buf[i] == '\0' && i > 0)
--i;
if (i > 0) {
vb = (uint8_t *)value_buf;
- vb[0] = (__wt_random(thread->rnd) % 255) + 1;
- vb[i] = (__wt_random(thread->rnd) % 255) + 1;
+ vb[0] = (__wt_random(&thread->rnd) % 255) + 1;
+ vb[i] = (__wt_random(&thread->rnd) % 255) + 1;
}
}
@@ -2155,13 +2155,11 @@ start_threads(CONFIG *cfg,
* new RNG state further along in the sequence.
*/
if (i == 0)
- __wt_random_init(thread->rnd);
- else {
- thread->rnd[0] = (thread - 1)->rnd[0];
- thread->rnd[1] = (thread - 1)->rnd[1];
- }
+ __wt_random_init(&thread->rnd);
+ else
+ thread->rnd = (thread - 1)->rnd;
for (j = 0; j < 1000; ++j)
- (void)__wt_random(thread->rnd);
+ (void)__wt_random(&thread->rnd);
/*
* Every thread gets a key/data buffer because we don't bother
@@ -2283,7 +2281,7 @@ wtperf_rand(CONFIG_THREAD *thread)
* Use WiredTiger's random number routine: it's lock-free and fairly
* good.
*/
- rval = (uint64_t)__wt_random(thread->rnd);
+ rval = (uint64_t)__wt_random(&thread->rnd);
/* Use Pareto distribution to give 80/20 hot/cold values. */
if (cfg->pareto) {
diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf.h b/src/third_party/wiredtiger/bench/wtperf/wtperf.h
index 201623c7859..7ae55c5ca19 100644
--- a/src/third_party/wiredtiger/bench/wtperf/wtperf.h
+++ b/src/third_party/wiredtiger/bench/wtperf/wtperf.h
@@ -209,7 +209,7 @@ typedef struct {
struct __config_thread { /* Per-thread structure */
CONFIG *cfg; /* Enclosing configuration */
- uint32_t rnd[2]; /* Random number generation state */
+ WT_RAND_STATE rnd; /* Random number generation state */
pthread_t handle; /* Handle */
diff --git a/src/third_party/wiredtiger/dist/log.py b/src/third_party/wiredtiger/dist/log.py
index 57b8fdc0f23..abe72cea5c4 100644
--- a/src/third_party/wiredtiger/dist/log.py
+++ b/src/third_party/wiredtiger/dist/log.py
@@ -114,7 +114,8 @@ __wt_logrec_alloc(WT_SESSION_IMPL *session, size_t size, WT_ITEM **logrecp)
{
\tWT_ITEM *logrec;
-\tWT_RET(__wt_scr_alloc(session, WT_ALIGN(size + 1, LOG_ALIGN), &logrec));
+\tWT_RET(
+\t __wt_scr_alloc(session, WT_ALIGN(size + 1, WT_LOG_ALIGN), &logrec));
\tWT_CLEAR(*(WT_LOG_RECORD *)logrec->data);
\tlogrec->size = offsetof(WT_LOG_RECORD, record);
diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok
index a966134e5ce..2bdd1d88a54 100644
--- a/src/third_party/wiredtiger/dist/s_string.ok
+++ b/src/third_party/wiredtiger/dist/s_string.ok
@@ -809,6 +809,7 @@ minorp
minprefix
mkdir
mmap
+mmrand
mnt
msecs
msg
diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py
index f133ab899ea..6e2efb66eb6 100644
--- a/src/third_party/wiredtiger/dist/stat_data.py
+++ b/src/third_party/wiredtiger/dist/stat_data.py
@@ -215,7 +215,6 @@ connection_stats = [
##########################################
# Logging statistics
##########################################
- LogStat('log_buffer_grow', 'log buffer size increases'),
LogStat('log_buffer_size', 'total log buffer size', 'no_clear,no_scale'),
LogStat('log_bytes_payload', 'log bytes of payload data'),
LogStat('log_bytes_written', 'log bytes written'),
diff --git a/src/third_party/wiredtiger/src/block/block_ckpt.c b/src/third_party/wiredtiger/src/block/block_ckpt.c
index 18c3978c90f..40bba8184a1 100644
--- a/src/third_party/wiredtiger/src/block/block_ckpt.c
+++ b/src/third_party/wiredtiger/src/block/block_ckpt.c
@@ -315,7 +315,7 @@ __ckpt_extlist_fblocks(
* file that contains a previous checkpoint's extents.
*/
return (__wt_block_insert_ext(
- session, &block->live.ckpt_avail, el->offset, el->size));
+ session, block, &block->live.ckpt_avail, el->offset, el->size));
}
#ifdef HAVE_DIAGNOSTIC
@@ -537,7 +537,7 @@ __ckpt_process(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckptbase)
* must be paired in the checkpoint.
*/
if (a->root_offset != WT_BLOCK_INVALID_OFFSET)
- WT_ERR(__wt_block_insert_ext(session,
+ WT_ERR(__wt_block_insert_ext(session, block,
&a->discard, a->root_offset, a->root_size));
/*
@@ -554,10 +554,10 @@ __ckpt_process(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckptbase)
*/
if (a->alloc.entries != 0)
WT_ERR(__wt_block_extlist_merge(
- session, &a->alloc, &b->alloc));
+ session, block, &a->alloc, &b->alloc));
if (a->discard.entries != 0)
WT_ERR(__wt_block_extlist_merge(
- session, &a->discard, &b->discard));
+ session, block, &a->discard, &b->discard));
/*
* If the "to" checkpoint is also being deleted, we're done with
@@ -775,7 +775,8 @@ __wt_block_checkpoint_resolve(WT_SESSION_IMPL *session, WT_BLOCK *block)
block->ckpt_inprogress = 0;
__wt_spin_lock(session, &block->live_lock);
- ret = __wt_block_extlist_merge(session, &ci->ckpt_avail, &ci->avail);
+ ret = __wt_block_extlist_merge(
+ session, block, &ci->ckpt_avail, &ci->avail);
__wt_spin_unlock(session, &block->live_lock);
/* Discard the lists remaining after the checkpoint call. */
diff --git a/src/third_party/wiredtiger/src/block/block_ext.c b/src/third_party/wiredtiger/src/block/block_ext.c
index e89c70060f3..d593537446b 100644
--- a/src/third_party/wiredtiger/src/block/block_ext.c
+++ b/src/third_party/wiredtiger/src/block/block_ext.c
@@ -8,12 +8,25 @@
#include "wt_internal.h"
-static int __block_append(WT_SESSION_IMPL *, WT_EXTLIST *, wt_off_t, wt_off_t);
+/*
+ * WT_BLOCK_RET --
+ * Handle extension list errors that would normally panic the system but
+ * which should fail gracefully when verifying.
+ */
+#define WT_BLOCK_RET(session, block, v, ...) do { \
+ int __ret = (v); \
+ __wt_err(session, __ret, __VA_ARGS__); \
+ return ((block)->verify ? __ret : __wt_panic(session)); \
+} while (0)
+
+static int __block_append(WT_SESSION_IMPL *,
+ WT_BLOCK *, WT_EXTLIST *, wt_off_t, wt_off_t);
static int __block_ext_overlap(WT_SESSION_IMPL *,
WT_BLOCK *, WT_EXTLIST *, WT_EXT **, WT_EXTLIST *, WT_EXT **);
static int __block_extlist_dump(
WT_SESSION_IMPL *, const char *, WT_EXTLIST *, int);
-static int __block_merge(WT_SESSION_IMPL *, WT_EXTLIST *, wt_off_t, wt_off_t);
+static int __block_merge(WT_SESSION_IMPL *,
+ WT_BLOCK *, WT_EXTLIST *, wt_off_t, wt_off_t);
/*
* __block_off_srch_last --
@@ -308,8 +321,8 @@ __wt_block_misplaced(WT_SESSION_IMPL *session,
* Remove a record from an extent list.
*/
static int
-__block_off_remove(
- WT_SESSION_IMPL *session, WT_EXTLIST *el, wt_off_t off, WT_EXT **extp)
+__block_off_remove(WT_SESSION_IMPL *session, WT_BLOCK *block,
+ WT_EXTLIST *el, wt_off_t off, WT_EXT **extp)
{
WT_EXT *ext, **astack[WT_SKIP_MAXDEPTH];
WT_SIZE *szp, **sstack[WT_SKIP_MAXDEPTH];
@@ -370,7 +383,7 @@ __block_off_remove(
return (0);
corrupt:
- WT_PANIC_RET(session, EINVAL,
+ WT_BLOCK_RET(session, block, EINVAL,
"attempt to remove non-existent offset from an extent list");
}
@@ -380,8 +393,8 @@ corrupt:
* overlapping entry.
*/
int
-__wt_block_off_remove_overlap(
- WT_SESSION_IMPL *session, WT_EXTLIST *el, wt_off_t off, wt_off_t size)
+__wt_block_off_remove_overlap(WT_SESSION_IMPL *session, WT_BLOCK *block,
+ WT_EXTLIST *el, wt_off_t off, wt_off_t size)
{
WT_EXT *before, *after, *ext;
wt_off_t a_off, a_size, b_off, b_size;
@@ -393,7 +406,8 @@ __wt_block_off_remove_overlap(
/* If "before" or "after" overlaps, retrieve the overlapping entry. */
if (before != NULL && before->off + before->size > off) {
- WT_RET(__block_off_remove(session, el, before->off, &ext));
+ WT_RET(__block_off_remove(
+ session, block, el, before->off, &ext));
/* Calculate overlapping extents. */
a_off = ext->off;
@@ -401,7 +415,8 @@ __wt_block_off_remove_overlap(
b_off = off + size;
b_size = ext->size - (a_size + size);
} else if (after != NULL && off + size > after->off) {
- WT_RET(__block_off_remove(session, el, after->off, &ext));
+ WT_RET(__block_off_remove(
+ session, block, el, after->off, &ext));
/*
* Calculate overlapping extents. There's no initial overlap
@@ -525,7 +540,7 @@ __wt_block_alloc(
__block_size_srch(block->live.avail.sz, size, sstack);
if ((szp = *sstack[0]) == NULL) {
append: WT_RET(__block_extend(session, block, offp, size));
- WT_RET(__block_append(session,
+ WT_RET(__block_append(session, block,
&block->live.alloc, *offp, (wt_off_t)size));
return (0);
}
@@ -535,7 +550,8 @@ append: WT_RET(__block_extend(session, block, offp, size));
}
/* Remove the record, and set the returned offset. */
- WT_RET(__block_off_remove(session, &block->live.avail, ext->off, &ext));
+ WT_RET(__block_off_remove(
+ session, block, &block->live.avail, ext->off, &ext));
*offp = ext->off;
/* If doing a partial allocation, adjust the record and put it back. */
@@ -561,7 +577,7 @@ append: WT_RET(__block_extend(session, block, offp, size));
/* Add the newly allocated extent to the list of allocations. */
WT_RET(__block_merge(
- session, &block->live.alloc, *offp, (wt_off_t)size));
+ session, block, &block->live.alloc, *offp, (wt_off_t)size));
return (0);
}
@@ -618,12 +634,12 @@ __wt_block_off_free(
* list.
*/
if ((ret = __wt_block_off_remove_overlap(
- session, &block->live.alloc, offset, size)) == 0)
- ret = __block_merge(
- session, &block->live.avail, offset, (wt_off_t)size);
+ session, block, &block->live.alloc, offset, size)) == 0)
+ ret = __block_merge(session, block,
+ &block->live.avail, offset, (wt_off_t)size);
else if (ret == WT_NOTFOUND)
- ret = __block_merge(
- session, &block->live.discard, offset, (wt_off_t)size);
+ ret = __block_merge(session, block,
+ &block->live.discard, offset, (wt_off_t)size);
return (ret);
}
@@ -770,9 +786,12 @@ __block_ext_overlap(WT_SESSION_IMPL *session,
*/
*ap = (*ap)->next[0];
*bp = (*bp)->next[0];
- WT_RET(__block_merge(session, avail, b->off, b->size));
- WT_RET(__block_off_remove(session, ael, a->off, NULL));
- WT_RET(__block_off_remove(session, bel, b->off, NULL));
+ WT_RET(__block_merge(
+ session, block, avail, b->off, b->size));
+ WT_RET(__block_off_remove(
+ session, block, ael, a->off, NULL));
+ WT_RET(__block_off_remove(
+ session, block, bel, b->off, NULL));
}
else if (a->size > b->size) { /* Case #4 */
/*
@@ -780,7 +799,8 @@ __block_ext_overlap(WT_SESSION_IMPL *session,
* Increment/Decrement A's offset/size by the size of B
* Insert A on its list
*/
- WT_RET(__block_off_remove(session, ael, a->off, &a));
+ WT_RET(__block_off_remove(
+ session, block, ael, a->off, &a));
a->off += b->size;
a->size -= b->size;
WT_RET(__block_ext_insert(session, ael, a));
@@ -791,15 +811,18 @@ __block_ext_overlap(WT_SESSION_IMPL *session,
* Delete B
*/
*bp = (*bp)->next[0];
- WT_RET(__block_merge(session, avail, b->off, b->size));
- WT_RET(__block_off_remove(session, bel, b->off, NULL));
+ WT_RET(__block_merge(
+ session, block, avail, b->off, b->size));
+ WT_RET(__block_off_remove(
+ session, block, bel, b->off, NULL));
} else { /* Case #9 */
/*
* Remove B from its list
* Increment/Decrement B's offset/size by the size of A
* Insert B on its list
*/
- WT_RET(__block_off_remove(session, bel, b->off, &b));
+ WT_RET(__block_off_remove(
+ session, block, bel, b->off, &b));
b->off += a->size;
b->size -= a->size;
WT_RET(__block_ext_insert(session, bel, b));
@@ -810,8 +833,10 @@ __block_ext_overlap(WT_SESSION_IMPL *session,
* Delete A
*/
*ap = (*ap)->next[0];
- WT_RET(__block_merge(session, avail, a->off, a->size));
- WT_RET(__block_off_remove(session, ael, a->off, NULL));
+ WT_RET(__block_merge(
+ session, block, avail, a->off, a->size));
+ WT_RET(__block_off_remove(
+ session, block, ael, a->off, NULL));
} /* Case #6 */
} else if (a->off + a->size == b->off + b->size) {
/*
@@ -819,7 +844,7 @@ __block_ext_overlap(WT_SESSION_IMPL *session,
* Decrement A's size by the size of B
* Insert A on its list
*/
- WT_RET(__block_off_remove(session, ael, a->off, &a));
+ WT_RET(__block_off_remove(session, block, ael, a->off, &a));
a->size -= b->size;
WT_RET(__block_ext_insert(session, ael, a));
@@ -829,8 +854,8 @@ __block_ext_overlap(WT_SESSION_IMPL *session,
* Delete B
*/
*bp = (*bp)->next[0];
- WT_RET(__block_merge(session, avail, b->off, b->size));
- WT_RET(__block_off_remove(session, bel, b->off, NULL));
+ WT_RET(__block_merge(session, block, avail, b->off, b->size));
+ WT_RET(__block_off_remove(session, block, bel, b->off, NULL));
} else if /* Case #3, #7 */
(a->off + a->size < b->off + b->size) {
/*
@@ -838,14 +863,14 @@ __block_ext_overlap(WT_SESSION_IMPL *session,
*/
off = b->off;
size = (a->off + a->size) - b->off;
- WT_RET(__block_merge(session, avail, off, size));
+ WT_RET(__block_merge(session, block, avail, off, size));
/*
* Remove A from its list
* Decrement A's size by the overlap
* Insert A on its list
*/
- WT_RET(__block_off_remove(session, ael, a->off, &a));
+ WT_RET(__block_off_remove(session, block, ael, a->off, &a));
a->size -= size;
WT_RET(__block_ext_insert(session, ael, a));
@@ -854,7 +879,7 @@ __block_ext_overlap(WT_SESSION_IMPL *session,
* Increment/Decrement B's offset/size by the overlap
* Insert B on its list
*/
- WT_RET(__block_off_remove(session, bel, b->off, &b));
+ WT_RET(__block_off_remove(session, block, bel, b->off, &b));
b->off += size;
b->size -= size;
WT_RET(__block_ext_insert(session, bel, b));
@@ -868,12 +893,12 @@ __block_ext_overlap(WT_SESSION_IMPL *session,
* Decrement A's size by trailing part of A plus B's size
* Insert A on its list
*/
- WT_RET(__block_off_remove(session, ael, a->off, &a));
+ WT_RET(__block_off_remove(session, block, ael, a->off, &a));
a->size = b->off - a->off;
WT_RET(__block_ext_insert(session, ael, a));
/* Add trailing part of A to A's list as a new element. */
- WT_RET(__block_merge(session, ael, off, size));
+ WT_RET(__block_merge(session, block, ael, off, size));
/*
* Move caller's B to the next element
@@ -881,8 +906,8 @@ __block_ext_overlap(WT_SESSION_IMPL *session,
* Delete B
*/
*bp = (*bp)->next[0];
- WT_RET(__block_merge(session, avail, b->off, b->size));
- WT_RET(__block_off_remove(session, bel, b->off, NULL));
+ WT_RET(__block_merge(session, block, avail, b->off, b->size));
+ WT_RET(__block_off_remove(session, block, bel, b->off, NULL));
}
return (0);
@@ -893,7 +918,8 @@ __block_ext_overlap(WT_SESSION_IMPL *session,
* Merge one extent list into another.
*/
int
-__wt_block_extlist_merge(WT_SESSION_IMPL *session, WT_EXTLIST *a, WT_EXTLIST *b)
+__wt_block_extlist_merge(WT_SESSION_IMPL *session, WT_BLOCK *block,
+ WT_EXTLIST *a, WT_EXTLIST *b)
{
WT_EXT *ext;
WT_EXTLIST tmp;
@@ -923,7 +949,7 @@ __wt_block_extlist_merge(WT_SESSION_IMPL *session, WT_EXTLIST *a, WT_EXTLIST *b)
}
WT_EXT_FOREACH(ext, a->off)
- WT_RET(__block_merge(session, b, ext->off, ext->size));
+ WT_RET(__block_merge(session, block, b, ext->off, ext->size));
return (0);
}
@@ -933,12 +959,13 @@ __wt_block_extlist_merge(WT_SESSION_IMPL *session, WT_EXTLIST *a, WT_EXTLIST *b)
* Append a new entry to the allocation list.
*/
static int
-__block_append(
- WT_SESSION_IMPL *session, WT_EXTLIST *el, wt_off_t off, wt_off_t size)
+__block_append(WT_SESSION_IMPL *session, WT_BLOCK *block,
+ WT_EXTLIST *el, wt_off_t off, wt_off_t size)
{
WT_EXT *ext, **astack[WT_SKIP_MAXDEPTH];
u_int i;
+ WT_UNUSED(block);
WT_ASSERT(session, el->track_size == 0);
/*
@@ -979,8 +1006,8 @@ __block_append(
* Insert an extent into an extent list, merging if possible.
*/
int
-__wt_block_insert_ext(
- WT_SESSION_IMPL *session, WT_EXTLIST *el, wt_off_t off, wt_off_t size)
+__wt_block_insert_ext(WT_SESSION_IMPL *session, WT_BLOCK *block,
+ WT_EXTLIST *el, wt_off_t off, wt_off_t size)
{
/*
* There are currently two copies of this function (this code is a one-
@@ -993,7 +1020,7 @@ __wt_block_insert_ext(
* Callers of this function are expected to have already acquired any
* locks required to manipulate the extent list.
*/
- return (__block_merge(session, el, off, size));
+ return (__block_merge(session, block, el, off, size));
}
/*
@@ -1002,8 +1029,8 @@ __wt_block_insert_ext(
* version).
*/
static int
-__block_merge(
- WT_SESSION_IMPL *session, WT_EXTLIST *el, wt_off_t off, wt_off_t size)
+__block_merge(WT_SESSION_IMPL *session, WT_BLOCK *block,
+ WT_EXTLIST *el, wt_off_t off, wt_off_t size)
{
WT_EXT *ext, *after, *before;
@@ -1014,7 +1041,7 @@ __block_merge(
__block_off_srch_pair(el, off, &before, &after);
if (before != NULL) {
if (before->off + before->size > off)
- WT_PANIC_RET(session, EINVAL,
+ WT_BLOCK_RET(session, block, EINVAL,
"%s: existing range %" PRIdMAX "-%" PRIdMAX
" overlaps with merge range %" PRIdMAX "-%" PRIdMAX,
el->name,
@@ -1025,8 +1052,8 @@ __block_merge(
before = NULL;
}
if (after != NULL) {
- if (off + size > after->off)
- WT_PANIC_RET(session, EINVAL,
+ if (off + size > after->off) {
+ WT_BLOCK_RET(session, block, EINVAL,
"%s: merge range %" PRIdMAX "-%" PRIdMAX
" overlaps with existing range %" PRIdMAX
"-%" PRIdMAX,
@@ -1034,6 +1061,7 @@ __block_merge(
(intmax_t)off, (intmax_t)(off + size),
(intmax_t)after->off,
(intmax_t)(after->off + after->size));
+ }
if (off + size != after->off)
after = NULL;
}
@@ -1053,7 +1081,8 @@ __block_merge(
* the record we're going to use, adjust it and re-insert it.
*/
if (before == NULL) {
- WT_RET(__block_off_remove(session, el, after->off, &ext));
+ WT_RET(__block_off_remove(
+ session, block, el, after->off, &ext));
WT_RET(__wt_verbose(session, WT_VERB_BLOCK,
"%s: range grows from %" PRIdMAX "-%" PRIdMAX ", to %"
@@ -1067,10 +1096,11 @@ __block_merge(
} else {
if (after != NULL) {
size += after->size;
- WT_RET(
- __block_off_remove(session, el, after->off, NULL));
+ WT_RET(__block_off_remove(
+ session, block, el, after->off, NULL));
}
- WT_RET(__block_off_remove(session, el, before->off, &ext));
+ WT_RET(__block_off_remove(
+ session, block, el, before->off, &ext));
WT_RET(__wt_verbose(session, WT_VERB_BLOCK,
"%s: range grows from %" PRIdMAX "-%" PRIdMAX ", to %"
@@ -1115,8 +1145,8 @@ __wt_block_extlist_read_avail(WT_SESSION_IMPL *session,
* Extent blocks are allocated from the available list: if reading the
* avail list, the extent blocks might be included, remove them.
*/
- WT_ERR_NOTFOUND_OK(
- __wt_block_off_remove_overlap(session, el, el->offset, el->size));
+ WT_ERR_NOTFOUND_OK(__wt_block_off_remove_overlap(
+ session, block, el, el->offset, el->size));
err:
#ifdef HAVE_DIAGNOSTIC
@@ -1137,7 +1167,8 @@ __wt_block_extlist_read(WT_SESSION_IMPL *session,
WT_DECL_ITEM(tmp);
WT_DECL_RET;
wt_off_t off, size;
- int (*func)(WT_SESSION_IMPL *, WT_EXTLIST *, wt_off_t, wt_off_t);
+ int (*func)(
+ WT_SESSION_IMPL *, WT_BLOCK *, WT_EXTLIST *, wt_off_t, wt_off_t);
const uint8_t *p;
/* If there isn't a list, we're done. */
@@ -1187,14 +1218,16 @@ __wt_block_extlist_read(WT_SESSION_IMPL *session,
if (off < block->allocsize ||
off % block->allocsize != 0 ||
size % block->allocsize != 0 ||
- off + size > ckpt_size)
-corrupted: WT_PANIC_RET(session, WT_ERROR,
+ off + size > ckpt_size) {
+corrupted: __wt_scr_free(session, &tmp);
+ WT_BLOCK_RET(session, block, WT_ERROR,
"file contains a corrupted %s extent list, range %"
PRIdMAX "-%" PRIdMAX " past end-of-file",
el->name,
(intmax_t)off, (intmax_t)(off + size));
+ }
- WT_ERR(func(session, el, off, size));
+ WT_ERR(func(session, block, el, off, size));
}
if (WT_VERBOSE_ISSET(session, WT_VERB_BLOCK))
@@ -1290,7 +1323,7 @@ __wt_block_extlist_write(WT_SESSION_IMPL *session,
* blocks never appear on any allocation list.
*/
WT_TRET(__wt_block_off_remove_overlap(
- session, &block->live.alloc, el->offset, el->size));
+ session, block, &block->live.alloc, el->offset, el->size));
WT_ERR(__wt_verbose(session, WT_VERB_BLOCK,
"%s written %" PRIdMAX "/%" PRIu32,
@@ -1331,7 +1364,7 @@ __wt_block_extlist_truncate(
*/
orig = fh->size;
size = ext->off;
- WT_RET(__block_off_remove(session, el, size, NULL));
+ WT_RET(__block_off_remove(session, block, el, size, NULL));
fh->size = size;
/*
diff --git a/src/third_party/wiredtiger/src/block/block_open.c b/src/third_party/wiredtiger/src/block/block_open.c
index 8e45ec85a97..4728066b487 100644
--- a/src/third_party/wiredtiger/src/block/block_open.c
+++ b/src/third_party/wiredtiger/src/block/block_open.c
@@ -133,8 +133,7 @@ __block_destroy(WT_SESSION_IMPL *session, WT_BLOCK *block)
bucket = block->name_hash % WT_HASH_ARRAY_SIZE;
WT_CONN_BLOCK_REMOVE(conn, block, bucket);
- if (block->name != NULL)
- __wt_free(session, block->name);
+ __wt_free(session, block->name);
if (block->fh != NULL)
WT_TRET(__wt_close(session, &block->fh));
@@ -196,14 +195,20 @@ __wt_block_open(WT_SESSION_IMPL *session,
}
}
- /* Basic structure allocation, initialization. */
+ /*
+ * Basic structure allocation, initialization.
+ *
+ * Note: set the block's name-hash value before any work that can fail
+ * because cleanup calls the block destroy code which uses that hash
+ * value to remove the block from the underlying linked lists.
+ */
WT_ERR(__wt_calloc_one(session, &block));
block->ref = 1;
+ block->name_hash = hash;
+ block->allocsize = allocsize;
WT_CONN_BLOCK_INSERT(conn, block, bucket);
WT_ERR(__wt_strdup(session, filename, &block->name));
- block->name_hash = hash;
- block->allocsize = allocsize;
WT_ERR(__wt_config_gets(session, cfg, "block_allocation", &cval));
block->allocfirst =
diff --git a/src/third_party/wiredtiger/src/block/block_read.c b/src/third_party/wiredtiger/src/block/block_read.c
index ef944fcb152..0d631396b41 100644
--- a/src/third_party/wiredtiger/src/block/block_read.c
+++ b/src/third_party/wiredtiger/src/block/block_read.c
@@ -192,21 +192,29 @@ __wt_block_read_off(WT_SESSION_IMPL *session, WT_BLOCK *block,
buf->size = size;
blk = WT_BLOCK_HEADER_REF(buf->mem);
- page_cksum = blk->cksum;
- if (page_cksum == cksum) {
+ if (blk->cksum == cksum) {
blk->cksum = 0;
page_cksum = __wt_cksum(buf->mem,
F_ISSET(blk, WT_BLOCK_DATA_CKSUM) ?
size : WT_BLOCK_COMPRESS_SKIP);
if (page_cksum == cksum)
return (0);
- }
- if (!F_ISSET(session, WT_SESSION_SALVAGE_CORRUPT_OK))
- __wt_errx(session,
- "read checksum error [%" PRIu32 "B @ %" PRIuMAX ", %"
- PRIu32 " != %" PRIu32 "]",
- size, (uintmax_t)offset, cksum, page_cksum);
+ if (!F_ISSET(session, WT_SESSION_SALVAGE_CORRUPT_OK))
+ __wt_errx(session,
+ "read checksum error for %" PRIu32 "B block at "
+ "offset %" PRIuMAX ": calculated block checksum "
+ "of %" PRIu32 " doesn't match expected checksum "
+ "of %" PRIu32,
+ size, (uintmax_t)offset, page_cksum, cksum);
+ } else
+ if (!F_ISSET(session, WT_SESSION_SALVAGE_CORRUPT_OK))
+ __wt_errx(session,
+ "read checksum error for %" PRIu32 "B block at "
+ "offset %" PRIuMAX ": block header checksum "
+ "of %" PRIu32 " doesn't match expected checksum "
+ "of %" PRIu32,
+ size, (uintmax_t)offset, blk->cksum, cksum);
/* Panic if a checksum fails during an ordinary read. */
return (block->verify ||
diff --git a/src/third_party/wiredtiger/src/block/block_slvg.c b/src/third_party/wiredtiger/src/block/block_slvg.c
index 517fb92491e..c78a6c39942 100644
--- a/src/third_party/wiredtiger/src/block/block_slvg.c
+++ b/src/third_party/wiredtiger/src/block/block_slvg.c
@@ -53,7 +53,7 @@ __wt_block_salvage_start(WT_SESSION_IMPL *session, WT_BLOCK *block)
* any blocks we don't want as we process the file.
*/
WT_RET(__wt_block_insert_ext(
- session, &block->live.alloc, allocsize, len - allocsize));
+ session, block, &block->live.alloc, allocsize, len - allocsize));
return (0);
}
diff --git a/src/third_party/wiredtiger/src/block/block_vrfy.c b/src/third_party/wiredtiger/src/block/block_vrfy.c
index 29a9e4950b4..c9df768a624 100644
--- a/src/third_party/wiredtiger/src/block/block_vrfy.c
+++ b/src/third_party/wiredtiger/src/block/block_vrfy.c
@@ -87,6 +87,12 @@ __wt_block_verify_start(WT_SESSION_IMPL *session,
WT_RET(__bit_alloc(session, block->frags, &block->fragfile));
/*
+ * Set this before reading any extent lists: don't panic if we see
+ * corruption.
+ */
+ block->verify = 1;
+
+ /*
* We maintain an allocation list that is rolled forward through the
* set of checkpoints.
*/
@@ -102,8 +108,6 @@ __wt_block_verify_start(WT_SESSION_IMPL *session,
/* Configuration: strict behavior on any error. */
WT_RET(__wt_config_gets(session, cfg, "strict", &cval));
block->verify_strict = cval.val ? 1 : 0;
-
- block->verify = 1;
return (0);
}
@@ -228,7 +232,7 @@ __wt_verify_ckpt_load(
WT_RET(__wt_block_extlist_read(
session, block, el, ci->file_size));
WT_RET(__wt_block_extlist_merge(
- session, el, &block->verify_alloc));
+ session, block, el, &block->verify_alloc));
__wt_block_extlist_free(session, el);
}
el = &ci->discard;
@@ -236,7 +240,7 @@ __wt_verify_ckpt_load(
WT_RET(__wt_block_extlist_read(
session, block, el, ci->file_size));
WT_EXT_FOREACH(ext, el->off)
- WT_RET(__wt_block_off_remove_overlap(session,
+ WT_RET(__wt_block_off_remove_overlap(session, block,
&block->verify_alloc, ext->off, ext->size));
__wt_block_extlist_free(session, el);
}
@@ -265,7 +269,7 @@ __wt_verify_ckpt_load(
* checkpoints.
*/
if (ci->root_offset != WT_BLOCK_INVALID_OFFSET)
- WT_RET(__wt_block_off_remove_overlap(session,
+ WT_RET(__wt_block_off_remove_overlap(session, block,
&block->verify_alloc, ci->root_offset, ci->root_size));
/*
diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c
index dba2da223bd..041398d4e43 100644
--- a/src/third_party/wiredtiger/src/btree/bt_debug.c
+++ b/src/third_party/wiredtiger/src/btree/bt_debug.c
@@ -600,7 +600,7 @@ __debug_page_metadata(WT_DBG *ds, WT_PAGE *page)
switch (page->type) {
case WT_PAGE_COL_INT:
__dmsg(ds, " recno %" PRIu64, page->pg_intl_recno);
- pindex = WT_INTL_INDEX_COPY(page);
+ pindex = WT_INTL_INDEX_GET_SAFE(page);
entries = pindex->entries;
break;
case WT_PAGE_COL_FIX:
@@ -612,7 +612,7 @@ __debug_page_metadata(WT_DBG *ds, WT_PAGE *page)
entries = page->pg_var_entries;
break;
case WT_PAGE_ROW_INT:
- pindex = WT_INTL_INDEX_COPY(page);
+ pindex = WT_INTL_INDEX_GET_SAFE(page);
entries = pindex->entries;
break;
case WT_PAGE_ROW_LEAF:
@@ -634,8 +634,8 @@ __debug_page_metadata(WT_DBG *ds, WT_PAGE *page)
__dmsg(ds, ", evict-lru");
if (F_ISSET_ATOMIC(page, WT_PAGE_SCANNING))
__dmsg(ds, ", scanning");
- if (F_ISSET_ATOMIC(page, WT_PAGE_SPLITTING))
- __dmsg(ds, ", splitting");
+ if (F_ISSET_ATOMIC(page, WT_PAGE_SPLIT_LOCKED))
+ __dmsg(ds, ", split locked");
if (mod != NULL)
switch (F_ISSET(mod, WT_PM_REC_MASK)) {
diff --git a/src/third_party/wiredtiger/src/btree/bt_discard.c b/src/third_party/wiredtiger/src/btree/bt_discard.c
index 2a0a5e37f98..a05c6217338 100644
--- a/src/third_party/wiredtiger/src/btree/bt_discard.c
+++ b/src/third_party/wiredtiger/src/btree/bt_discard.c
@@ -56,7 +56,7 @@ __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep)
*/
WT_ASSERT(session, !__wt_page_is_modified(page));
WT_ASSERT(session, !F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU));
- WT_ASSERT(session, !F_ISSET_ATOMIC(page, WT_PAGE_SPLITTING));
+ WT_ASSERT(session, !F_ISSET_ATOMIC(page, WT_PAGE_SPLIT_LOCKED));
#ifdef HAVE_DIAGNOSTIC
{
@@ -210,7 +210,7 @@ __free_page_modify(WT_SESSION_IMPL *session, WT_PAGE *page)
static void
__free_page_int(WT_SESSION_IMPL *session, WT_PAGE *page)
{
- __wt_free_ref_index(session, page, WT_INTL_INDEX_COPY(page), 0);
+ __wt_free_ref_index(session, page, WT_INTL_INDEX_GET_SAFE(page), 0);
}
/*
diff --git a/src/third_party/wiredtiger/src/btree/bt_handle.c b/src/third_party/wiredtiger/src/btree/bt_handle.c
index 9d9ab66e0f7..e249f997d87 100644
--- a/src/third_party/wiredtiger/src/btree/bt_handle.c
+++ b/src/third_party/wiredtiger/src/btree/bt_handle.c
@@ -422,7 +422,7 @@ __btree_tree_open_empty(WT_SESSION_IMPL *session, int creation)
__wt_page_alloc(session, WT_PAGE_COL_INT, 1, 1, 1, &root));
root->pg_intl_parent_ref = &btree->root;
- pindex = WT_INTL_INDEX_COPY(root);
+ pindex = WT_INTL_INDEX_GET_SAFE(root);
ref = pindex->index[0];
ref->home = root;
ref->page = NULL;
@@ -435,7 +435,7 @@ __btree_tree_open_empty(WT_SESSION_IMPL *session, int creation)
__wt_page_alloc(session, WT_PAGE_ROW_INT, 0, 1, 1, &root));
root->pg_intl_parent_ref = &btree->root;
- pindex = WT_INTL_INDEX_COPY(root);
+ pindex = WT_INTL_INDEX_GET_SAFE(root);
ref = pindex->index[0];
ref->home = root;
ref->page = NULL;
diff --git a/src/third_party/wiredtiger/src/btree/bt_page.c b/src/third_party/wiredtiger/src/btree/bt_page.c
index 120220223f8..b8b67720fce 100644
--- a/src/third_party/wiredtiger/src/btree/bt_page.c
+++ b/src/third_party/wiredtiger/src/btree/bt_page.c
@@ -272,7 +272,7 @@ __wt_page_alloc(WT_SESSION_IMPL *session, uint8_t type,
size += sizeof(WT_REF);
}
if (0) {
-err: if ((pindex = WT_INTL_INDEX_COPY(page)) != NULL) {
+err: if ((pindex = WT_INTL_INDEX_GET_SAFE(page)) != NULL) {
for (i = 0; i < pindex->entries; ++i)
__wt_free(session, pindex->index[i]);
__wt_free(session, pindex);
@@ -459,7 +459,7 @@ __inmem_col_int(WT_SESSION_IMPL *session, WT_PAGE *page)
* Walk the page, building references: the page contains value items.
* The value items are on-page items (WT_CELL_VALUE).
*/
- pindex = WT_INTL_INDEX_COPY(page);
+ pindex = WT_INTL_INDEX_GET_SAFE(page);
refp = pindex->index;
WT_CELL_FOREACH(btree, dsk, cell, unpack, i) {
ref = *refp++;
@@ -594,7 +594,7 @@ __inmem_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *sizep)
* location cookie pairs. Keys are on-page/overflow items and location
* cookies are WT_CELL_ADDR_XXX items.
*/
- pindex = WT_INTL_INDEX_COPY(page);
+ pindex = WT_INTL_INDEX_GET_SAFE(page);
refp = pindex->index;
WT_CELL_FOREACH(btree, dsk, cell, unpack, i) {
ref = *refp;
diff --git a/src/third_party/wiredtiger/src/btree/bt_slvg.c b/src/third_party/wiredtiger/src/btree/bt_slvg.c
index ba1802116d0..e493a84679a 100644
--- a/src/third_party/wiredtiger/src/btree/bt_slvg.c
+++ b/src/third_party/wiredtiger/src/btree/bt_slvg.c
@@ -1175,7 +1175,7 @@ __slvg_col_build_internal(
__wt_page_alloc(session, WT_PAGE_COL_INT, 1, leaf_cnt, 1, &page));
WT_ERR(__slvg_modify_init(session, page));
- pindex = WT_INTL_INDEX_COPY(page);
+ WT_INTL_INDEX_GET(session, page, pindex);
for (refp = pindex->index, i = 0; i < ss->pages_next; ++i) {
if ((trk = ss->pages[i]) == NULL)
continue;
@@ -1820,7 +1820,7 @@ __slvg_row_build_internal(
__wt_page_alloc(session, WT_PAGE_ROW_INT, 0, leaf_cnt, 1, &page));
WT_ERR(__slvg_modify_init(session, page));
- pindex = WT_INTL_INDEX_COPY(page);
+ WT_INTL_INDEX_GET(session, page, pindex);
for (refp = pindex->index, i = 0; i < ss->pages_next; ++i) {
if ((trk = ss->pages[i]) == NULL)
continue;
diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c
index eb2382cd610..acef71f1d94 100644
--- a/src/third_party/wiredtiger/src/btree/bt_split.c
+++ b/src/third_party/wiredtiger/src/btree/bt_split.c
@@ -178,7 +178,7 @@ __split_should_deepen(
btree = S2BT(session);
page = ref->page;
- pindex = WT_INTL_INDEX_COPY(page);
+ pindex = WT_INTL_INDEX_GET_SAFE(page);
/*
* Deepen the tree if the page's memory footprint is larger than the
@@ -393,7 +393,7 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent, uint32_t children)
parent_incr = parent_decr = 0;
panic = 0;
- pindex = WT_INTL_INDEX_COPY(parent);
+ pindex = WT_INTL_INDEX_GET_SAFE(parent);
WT_STAT_FAST_CONN_INCR(session, cache_eviction_deepen);
WT_STAT_FAST_DATA_INCR(session, cache_eviction_deepen);
@@ -491,7 +491,7 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent, uint32_t children)
* to change.
*/
child_incr = 0;
- child_pindex = WT_INTL_INDEX_COPY(child);
+ child_pindex = WT_INTL_INDEX_GET_SAFE(child);
for (child_refp = child_pindex->index, j = 0; j < slots; ++j) {
WT_ERR(__split_ref_deepen_move(session,
parent, *parent_refp, &parent_decr, &child_incr));
@@ -518,7 +518,7 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent, uint32_t children)
* footprint. From now on we've modified the parent page, attention
* needs to be paid.
*/
- WT_ASSERT(session, WT_INTL_INDEX_COPY(parent) == pindex);
+ WT_ASSERT(session, WT_INTL_INDEX_GET_SAFE(parent) == pindex);
WT_INTL_INDEX_SET(parent, alloc_index);
split_gen = WT_ATOMIC_ADD8(S2C(session)->split_gen, 1);
panic = 1;
@@ -567,7 +567,7 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent, uint32_t children)
*/
if (child_ref->home == parent) {
child_ref->home = child;
- child_ref->ref_hint = 0;
+ child_ref->pindex_hint = 0;
}
} WT_INTL_FOREACH_END;
}
@@ -825,11 +825,11 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref,
*/
for (;;) {
parent = ref->home;
- F_CAS_ATOMIC(parent, WT_PAGE_SPLITTING, ret);
+ F_CAS_ATOMIC(parent, WT_PAGE_SPLIT_LOCKED, ret);
if (ret == 0) {
if (parent == ref->home)
break;
- F_CLR_ATOMIC(parent, WT_PAGE_SPLITTING);
+ F_CLR_ATOMIC(parent, WT_PAGE_SPLIT_LOCKED);
continue;
}
__wt_yield();
@@ -847,7 +847,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref,
hazard = 1;
}
- pindex = WT_INTL_INDEX_COPY(parent);
+ pindex = WT_INTL_INDEX_GET_SAFE(parent);
parent_entries = pindex->entries;
/*
@@ -906,7 +906,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref,
* Update the parent page's index: this update makes the split visible
* to threads descending the tree.
*/
- WT_ASSERT(session, WT_INTL_INDEX_COPY(parent) == pindex);
+ WT_ASSERT(session, WT_INTL_INDEX_GET_SAFE(parent) == pindex);
WT_INTL_INDEX_SET(parent, alloc_index);
split_gen = WT_ATOMIC_ADD8(S2C(session)->split_gen, 1);
alloc_index = NULL;
@@ -1037,7 +1037,7 @@ err: if (!complete)
if (next_ref->state == WT_REF_SPLIT)
next_ref->state = WT_REF_DELETED;
}
- F_CLR_ATOMIC(parent, WT_PAGE_SPLITTING);
+ F_CLR_ATOMIC(parent, WT_PAGE_SPLIT_LOCKED);
if (hazard)
WT_TRET(__wt_hazard_clear(session, parent));
diff --git a/src/third_party/wiredtiger/src/btree/bt_sync.c b/src/third_party/wiredtiger/src/btree/bt_sync.c
index ca3b8f327b3..0650f26e5e9 100644
--- a/src/third_party/wiredtiger/src/btree/bt_sync.c
+++ b/src/third_party/wiredtiger/src/btree/bt_sync.c
@@ -70,7 +70,7 @@ __sync_file(WT_SESSION_IMPL *session, int syncop)
if (__wt_page_is_modified(page) &&
__wt_txn_visible_all(
session, page->modify->update_txn)) {
- if (txn->isolation == TXN_ISO_READ_COMMITTED)
+ if (txn->isolation == WT_ISO_READ_COMMITTED)
__wt_txn_get_snapshot(session);
leaf_bytes += page->memory_footprint;
++leaf_pages;
@@ -185,7 +185,7 @@ err: /* On error, clear any left-over tree walk. */
if (walk != NULL)
WT_TRET(__wt_page_release(session, walk, flags));
- if (txn->isolation == TXN_ISO_READ_COMMITTED && session->ncursors == 0)
+ if (txn->isolation == WT_ISO_READ_COMMITTED && session->ncursors == 0)
__wt_txn_release_snapshot(session);
if (btree->checkpointing) {
diff --git a/src/third_party/wiredtiger/src/btree/bt_walk.c b/src/third_party/wiredtiger/src/btree/bt_walk.c
index d146850b505..2705f371fb5 100644
--- a/src/third_party/wiredtiger/src/btree/bt_walk.c
+++ b/src/third_party/wiredtiger/src/btree/bt_walk.c
@@ -9,6 +9,66 @@
#include "wt_internal.h"
/*
+ * __page_refp --
+ * Return the page's index and slot for a reference.
+ */
+static inline void
+__page_refp(WT_SESSION_IMPL *session,
+ WT_REF *ref, WT_PAGE_INDEX **pindexp, uint32_t *slotp)
+{
+ WT_PAGE_INDEX *pindex;
+ uint32_t i;
+
+ /*
+ * Copy the parent page's index value: the page can split at any time,
+ * but the index's value is always valid, even if it's not up-to-date.
+ */
+retry: WT_INTL_INDEX_GET(session, ref->home, pindex);
+
+ /*
+ * Use the page's reference hint: it should be correct unless the page
+ * split before our slot. If the page splits after our slot, the hint
+ * will point earlier in the array than our actual slot, so the first
+ * loop is from the hint to the end of the list, and the second loop
+ * is from the start of the list to the end of the list. (The second
+ * loop overlaps the first, but that only happen in cases where we've
+ * deepened the tree and aren't going to find our slot at all, that's
+ * not worth optimizing.)
+ *
+ * It's not an error for the reference hint to be wrong, it just means
+ * the first retrieval (which sets the hint for subsequent retrievals),
+ * is slower.
+ */
+ i = ref->pindex_hint;
+ if (i < pindex->entries && pindex->index[i]->page == ref->page) {
+ *pindexp = pindex;
+ *slotp = i;
+ return;
+ }
+ while (++i < pindex->entries)
+ if (pindex->index[i]->page == ref->page) {
+ *pindexp = pindex;
+ *slotp = ref->pindex_hint = i;
+ return;
+ }
+ for (i = 0; i < pindex->entries; ++i)
+ if (pindex->index[i]->page == ref->page) {
+ *pindexp = pindex;
+ *slotp = ref->pindex_hint = i;
+ return;
+ }
+
+ /*
+ * If we don't find our reference, the page split into a new level and
+ * our home pointer references the wrong page. After internal pages
+ * deepen, their reference structure home value are updated; yield and
+ * wait for that to happen.
+ */
+ __wt_yield();
+ goto retry;
+}
+
+/*
* __wt_tree_walk --
* Move to the next/previous page in the tree.
*/
@@ -21,10 +81,11 @@ __wt_tree_walk(WT_SESSION_IMPL *session,
WT_PAGE *page;
WT_PAGE_INDEX *pindex;
WT_REF *couple, *couple_orig, *ref;
- int prev, skip;
+ int empty_internal, prev, skip;
uint32_t slot;
btree = S2BT(session);
+ empty_internal = 0;
/*
* Tree walks are special: they look inside page structures that splits
@@ -99,7 +160,7 @@ ascend: /*
}
/* Figure out the current slot in the WT_REF array. */
- __wt_page_refp(session, ref, &pindex, &slot);
+ __page_refp(session, ref, &pindex, &slot);
for (;;) {
/*
@@ -111,6 +172,15 @@ ascend: /*
(!prev && slot == pindex->entries - 1)) {
ref = ref->home->pg_intl_parent_ref;
+ /*
+ * If we got all the way through an internal page and
+ * all of the child pages were deleted, evict it.
+ */
+ if (empty_internal) {
+ __wt_page_evict_soon(ref->page);
+ empty_internal = 0;
+ }
+
/* Optionally skip internal pages. */
if (LF_ISSET(WT_READ_SKIP_INTL))
goto ascend;
@@ -134,19 +204,13 @@ ascend: /*
* parent of the current child page, our parent
* reference can't have split or been evicted.
*/
- __wt_page_refp(session, ref, &pindex, &slot);
+ __page_refp(session, ref, &pindex, &slot);
if ((ret = __wt_page_swap(
session, couple, ref, flags)) != 0) {
WT_TRET(__wt_page_release(
session, couple, flags));
WT_ERR(ret);
}
-
- /*
- * Set the reference hint (used when we continue
- * the walk).
- */
- ref->ref_hint = slot;
}
*refp = ref;
@@ -162,13 +226,22 @@ ascend: /*
++*walkcntp;
for (;;) {
+ /*
+ * Move to the next slot, and set the reference hint if
+ * it's wrong (used when we continue the walk). We don't
+ * update those hints when splitting, so it's common for
+ * them to be incorrect in some workloads.
+ */
ref = pindex->index[slot];
+ if (ref->pindex_hint != slot)
+ ref->pindex_hint = slot;
/*
- * Set the reference hint (used when we continue the
- * walk).
+ * If we see any child states other than deleted, the
+ * page isn't empty.
*/
- ref->ref_hint = slot;
+ if (ref->state != WT_REF_DELETED)
+ empty_internal = 0;
if (LF_ISSET(WT_READ_CACHE)) {
/*
@@ -270,7 +343,7 @@ ascend: /*
couple == couple_orig ||
WT_PAGE_IS_INTERNAL(couple->page));
ref = couple;
- __wt_page_refp(session, ref, &pindex, &slot);
+ __page_refp(session, ref, &pindex, &slot);
if (couple == couple_orig)
break;
}
@@ -282,10 +355,10 @@ ascend: /*
*/
descend: couple = ref;
page = ref->page;
- if (page->type == WT_PAGE_ROW_INT ||
- page->type == WT_PAGE_COL_INT) {
- pindex = WT_INTL_INDEX_COPY(page);
+ if (WT_PAGE_IS_INTERNAL(page)) {
+ WT_INTL_INDEX_GET(session, page, pindex);
slot = prev ? pindex->entries - 1 : 0;
+ empty_internal = 1;
} else {
*refp = ref;
goto done;
diff --git a/src/third_party/wiredtiger/src/btree/col_modify.c b/src/third_party/wiredtiger/src/btree/col_modify.c
index dda56c19636..01db31057fc 100644
--- a/src/third_party/wiredtiger/src/btree/col_modify.c
+++ b/src/third_party/wiredtiger/src/btree/col_modify.c
@@ -160,7 +160,7 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
* The serial mutex acts as our memory barrier to flush these
* writes before inserting them into the list.
*/
- if (WT_SKIP_FIRST(ins_head) == NULL || recno == 0)
+ if (cbt->ins_stack[0] == NULL || recno == 0)
for (i = 0; i < skipdepth; i++) {
cbt->ins_stack[i] = &ins_head->head[i];
ins->next[i] = cbt->next_stack[i] = NULL;
diff --git a/src/third_party/wiredtiger/src/btree/col_srch.c b/src/third_party/wiredtiger/src/btree/col_srch.c
index db1b565b439..a34a223168d 100644
--- a/src/third_party/wiredtiger/src/btree/col_srch.c
+++ b/src/third_party/wiredtiger/src/btree/col_srch.c
@@ -50,7 +50,7 @@ restart: page = current->page;
WT_ASSERT(session, current->key.recno == page->pg_intl_recno);
WT_ASSERT(session, session->split_gen != 0);
- pindex = WT_INTL_INDEX_COPY(page);
+ WT_INTL_INDEX_GET(session, page, pindex);
base = pindex->entries;
descent = pindex->index[base - 1];
diff --git a/src/third_party/wiredtiger/src/btree/row_modify.c b/src/third_party/wiredtiger/src/btree/row_modify.c
index d56b44bbd95..2dd42de5900 100644
--- a/src/third_party/wiredtiger/src/btree/row_modify.c
+++ b/src/third_party/wiredtiger/src/btree/row_modify.c
@@ -47,13 +47,13 @@ __wt_page_modify_alloc(WT_SESSION_IMPL *session, WT_PAGE *page)
*/
int
__wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
- WT_ITEM *key, WT_ITEM *value, WT_UPDATE *upd, int is_remove)
+ WT_ITEM *key, WT_ITEM *value, WT_UPDATE *upd_arg, int is_remove)
{
WT_DECL_RET;
WT_INSERT *ins;
WT_INSERT_HEAD *ins_head, **ins_headp;
WT_PAGE *page;
- WT_UPDATE *old_upd, **upd_entry;
+ WT_UPDATE *old_upd, *upd, **upd_entry;
size_t ins_size, upd_size;
uint32_t ins_slot;
u_int i, skipdepth;
@@ -61,6 +61,7 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
ins = NULL;
page = cbt->ref->page;
+ upd = upd_arg;
logged = 0;
/* This code expects a remove to have a NULL value. */
@@ -90,7 +91,7 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
} else
upd_entry = &cbt->ins->upd;
- if (upd == NULL) {
+ if (upd_arg == NULL) {
/* Make sure the update can proceed. */
WT_ERR(__wt_txn_update_check(
session, old_upd = *upd_entry));
@@ -165,7 +166,7 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
cbt->ins_head = ins_head;
cbt->ins = ins;
- if (upd == NULL) {
+ if (upd_arg == NULL) {
WT_ERR(
__wt_update_alloc(session, value, &upd, &upd_size));
WT_ERR(__wt_txn_modify(session, upd));
@@ -191,7 +192,7 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
* The serial mutex acts as our memory barrier to flush these
* writes before inserting them into the list.
*/
- if (WT_SKIP_FIRST(ins_head) == NULL)
+ if (cbt->ins_stack[0] == NULL)
for (i = 0; i < skipdepth; i++) {
cbt->ins_stack[i] = &ins_head->head[i];
ins->next[i] = cbt->next_stack[i] = NULL;
@@ -218,7 +219,8 @@ err: /*
__wt_txn_unmodify(session);
__wt_free(session, ins);
cbt->ins = NULL;
- __wt_free(session, upd);
+ if (upd_arg == NULL)
+ __wt_free(session, upd);
}
return (ret);
diff --git a/src/third_party/wiredtiger/src/btree/row_srch.c b/src/third_party/wiredtiger/src/btree/row_srch.c
index 9967c5ecb0c..6a8ca5f401c 100644
--- a/src/third_party/wiredtiger/src/btree/row_srch.c
+++ b/src/third_party/wiredtiger/src/btree/row_srch.c
@@ -196,7 +196,7 @@ restart: page = current->page;
break;
WT_ASSERT(session, session->split_gen != 0);
- pindex = WT_INTL_INDEX_COPY(page);
+ WT_INTL_INDEX_GET(session, page, pindex);
/*
* Fast-path internal pages with one child, a common case for
@@ -489,9 +489,9 @@ restart:
break;
WT_ASSERT(session, session->split_gen != 0);
- pindex = WT_INTL_INDEX_COPY(page);
+ WT_INTL_INDEX_GET(session, page, pindex);
descent = pindex->index[
- __wt_random(session->rnd) % pindex->entries];
+ __wt_random(&session->rnd) % pindex->entries];
/*
* Swap the parent page for the child page; return on error,
@@ -524,9 +524,9 @@ restart:
cbt->ref = current;
cbt->compare = 0;
WT_ASSERT(session, session->split_gen != 0);
- pindex = WT_INTL_INDEX_COPY(btree->root.page);
+ WT_INTL_INDEX_GET(session, btree->root.page, pindex);
cbt->slot = pindex->entries < 2 ?
- __wt_random(session->rnd) % page->pg_row_entries : 0;
+ __wt_random(&session->rnd) % page->pg_row_entries : 0;
return (__wt_row_leaf_key(session,
page, page->pg_row_d + cbt->slot, &cbt->search_key, 0));
diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c
index b41cad25914..dacbb0539a9 100644
--- a/src/third_party/wiredtiger/src/conn/conn_api.c
+++ b/src/third_party/wiredtiger/src/conn/conn_api.c
@@ -1630,7 +1630,7 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
session = conn->default_session = &conn->dummy_session;
session->iface.connection = &conn->iface;
session->name = "wiredtiger_open";
- __wt_random_init(session->rnd);
+ __wt_random_init(&session->rnd);
__wt_event_handler_set(session, event_handler);
/* Remaining basic initialization of the connection structure. */
diff --git a/src/third_party/wiredtiger/src/conn/conn_log.c b/src/third_party/wiredtiger/src/conn/conn_log.c
index 85d9bb08d26..1e5e322016c 100644
--- a/src/third_party/wiredtiger/src/conn/conn_log.c
+++ b/src/third_party/wiredtiger/src/conn/conn_log.c
@@ -369,7 +369,7 @@ __log_wrlsn_server(void *arg)
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
WT_LOG *log;
- WT_LOG_WRLSN_ENTRY written[SLOT_POOL];
+ WT_LOG_WRLSN_ENTRY written[WT_SLOT_POOL];
WT_LOGSLOT *slot;
WT_SESSION_IMPL *session;
size_t written_i;
@@ -392,7 +392,7 @@ __log_wrlsn_server(void *arg)
* Walk the array once saving any slots that are in the
* WT_LOG_SLOT_WRITTEN state.
*/
- while (i < SLOT_POOL) {
+ while (i < WT_SLOT_POOL) {
save_i = i;
slot = &log->slot_pool[i++];
if (slot->slot_state != WT_LOG_SLOT_WRITTEN)
@@ -433,7 +433,7 @@ __log_wrlsn_server(void *arg)
/*
* Signal the close thread if needed.
*/
- if (F_ISSET(slot, SLOT_CLOSEFH))
+ if (F_ISSET(slot, WT_SLOT_CLOSEFH))
WT_ERR(__wt_cond_signal(session,
conn->log_file_cond));
WT_ERR(__wt_log_slot_free(session, slot));
@@ -541,9 +541,9 @@ __wt_logmgr_create(WT_SESSION_IMPL *session, const char *cfg[])
&log->log_archive_lock, "log archive lock"));
if (FLD_ISSET(conn->direct_io, WT_FILE_TYPE_LOG))
log->allocsize =
- WT_MAX((uint32_t)conn->buffer_alignment, LOG_ALIGN);
+ WT_MAX((uint32_t)conn->buffer_alignment, WT_LOG_ALIGN);
else
- log->allocsize = LOG_ALIGN;
+ log->allocsize = WT_LOG_ALIGN;
WT_INIT_LSN(&log->alloc_lsn);
WT_INIT_LSN(&log->ckpt_lsn);
WT_INIT_LSN(&log->first_lsn);
diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c
index 63a905539ce..d99d90ec323 100644
--- a/src/third_party/wiredtiger/src/evict/evict_lru.c
+++ b/src/third_party/wiredtiger/src/evict/evict_lru.c
@@ -157,7 +157,6 @@ __evict_server(void *arg)
WT_CACHE *cache;
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
- WT_EVICT_WORKER *worker;
WT_SESSION_IMPL *session;
session = arg;
@@ -172,30 +171,6 @@ __evict_server(void *arg)
break;
/*
- * If we have caught up and there are more than the minimum
- * number of eviction workers running, shut one down.
- */
- if (conn->evict_workers > conn->evict_workers_min) {
- WT_TRET(__wt_verbose(session, WT_VERB_EVICTSERVER,
- "Stopping evict worker: %"PRIu32"\n",
- conn->evict_workers));
- worker = &conn->evict_workctx[--conn->evict_workers];
- F_CLR(worker, WT_EVICT_WORKER_RUN);
- WT_TRET(__wt_cond_signal(
- session, cache->evict_waiter_cond));
- WT_TRET(__wt_thread_join(session, worker->tid));
- /*
- * Flag errors here with a message, but don't shut down
- * the eviction server - that's fatal.
- */
- WT_ASSERT(session, ret == 0);
- if (ret != 0) {
- (void)__wt_msg(session,
- "Error stopping eviction worker: %d", ret);
- ret = 0;
- }
- }
- /*
* Clear the walks so we don't pin pages while asleep,
* otherwise we can block applications evicting large pages.
*/
@@ -692,7 +667,7 @@ __wt_evict_page(WT_SESSION_IMPL *session, WT_REF *ref)
__wt_txn_update_oldest(session, 1);
txn = &session->txn;
saved_iso = txn->isolation;
- txn->isolation = TXN_ISO_EVICTION;
+ txn->isolation = WT_ISO_EVICTION;
/*
* Sanity check: if a transaction has updates, its updates should not
diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h
index 23b17ef2cd3..1c04af1aef3 100644
--- a/src/third_party/wiredtiger/src/include/btmem.h
+++ b/src/third_party/wiredtiger/src/include/btmem.h
@@ -422,8 +422,17 @@ struct __wt_page {
/*
* Macros to copy/set the index because the name is obscured to ensure
* the field isn't read multiple times.
+ *
+ * There are two versions of WT_INTL_INDEX_GET because the session split
+ * generation is usually set, but it's not always required: for example,
+ * if a page is locked for splitting, or being created or destroyed.
*/
-#define WT_INTL_INDEX_COPY(page) ((page)->u.intl.__index)
+#define WT_INTL_INDEX_GET_SAFE(page) \
+ ((page)->u.intl.__index)
+#define WT_INTL_INDEX_GET(session, page, pindex) do { \
+ WT_ASSERT(session, session->split_gen != 0); \
+ (pindex) = WT_INTL_INDEX_GET_SAFE(page); \
+} while (0)
#define WT_INTL_INDEX_SET(page, v) do { \
WT_WRITE_BARRIER(); \
((page)->u.intl.__index) = (v); \
@@ -439,7 +448,7 @@ struct __wt_page {
WT_PAGE_INDEX *__pindex; \
WT_REF **__refp; \
uint32_t __entries; \
- for (__pindex = WT_INTL_INDEX_COPY(page), \
+ for (__pindex = WT_INTL_INDEX_GET_SAFE(page), \
__refp = __pindex->index, \
__entries = __pindex->entries; __entries > 0; --__entries) {\
(ref) = *__refp++;
@@ -541,7 +550,7 @@ struct __wt_page {
#define WT_PAGE_EVICT_LRU 0x08 /* Page is on the LRU queue */
#define WT_PAGE_SCANNING 0x10 /* Obsolete updates are being scanned */
#define WT_PAGE_SPLIT_INSERT 0x20 /* A leaf page was split for append */
-#define WT_PAGE_SPLITTING 0x40 /* An internal page is growing */
+#define WT_PAGE_SPLIT_LOCKED 0x40 /* An internal page is growing */
uint8_t flags_atomic; /* Atomic flags, use F_*_ATOMIC */
/*
@@ -672,7 +681,7 @@ struct __wt_ref {
* up our slot in the page's index structure.
*/
WT_PAGE * volatile home; /* Reference page */
- uint32_t ref_hint; /* Reference page index hint */
+ uint32_t pindex_hint; /* Reference page index hint */
volatile WT_PAGE_STATE state; /* Page state */
diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i
index aac430988be..23cb54a4179 100644
--- a/src/third_party/wiredtiger/src/include/btree.i
+++ b/src/third_party/wiredtiger/src/include/btree.i
@@ -269,62 +269,6 @@ __wt_page_evict_soon(WT_PAGE *page)
}
/*
- * __wt_page_refp --
- * Return the page's index and slot for a reference.
- */
-static inline void
-__wt_page_refp(WT_SESSION_IMPL *session,
- WT_REF *ref, WT_PAGE_INDEX **pindexp, uint32_t *slotp)
-{
- WT_PAGE_INDEX *pindex;
- uint32_t i;
-
- WT_ASSERT(session, session->split_gen != 0);
-
- /*
- * Copy the parent page's index value: the page can split at any time,
- * but the index's value is always valid, even if it's not up-to-date.
- */
-retry: pindex = WT_INTL_INDEX_COPY(ref->home);
-
- /*
- * Use the page's reference hint: it should be correct unless the page
- * split before our slot. If the page splits after our slot, the hint
- * will point earlier in the array than our actual slot, so the first
- * loop is from the hint to the end of the list, and the second loop
- * is from the start of the list to the end of the list. (The second
- * loop overlaps the first, but that only happen in cases where we've
- * deepened the tree and aren't going to find our slot at all, that's
- * not worth optimizing.)
- *
- * It's not an error for the reference hint to be wrong, it just means
- * the first retrieval (which sets the hint for subsequent retrievals),
- * is slower.
- */
- for (i = ref->ref_hint; i < pindex->entries; ++i)
- if (pindex->index[i]->page == ref->page) {
- *pindexp = pindex;
- *slotp = ref->ref_hint = i;
- return;
- }
- for (i = 0; i < pindex->entries; ++i)
- if (pindex->index[i]->page == ref->page) {
- *pindexp = pindex;
- *slotp = ref->ref_hint = i;
- return;
- }
-
- /*
- * If we don't find our reference, the page split into a new level and
- * our home pointer references the wrong page. After internal pages
- * deepen, their reference structure home value are updated; yield and
- * wait for that to happen.
- */
- __wt_yield();
- goto retry;
-}
-
-/*
* __wt_page_modify_init --
* A page is about to be modified, allocate the modification structure.
*/
@@ -1219,19 +1163,19 @@ __wt_skip_choose_depth(WT_SESSION_IMPL *session)
u_int d;
for (d = 1; d < WT_SKIP_MAXDEPTH &&
- __wt_random(session->rnd) < WT_SKIP_PROBABILITY; d++)
+ __wt_random(&session->rnd) < WT_SKIP_PROBABILITY; d++)
;
return (d);
}
/*
- * __wt_btree_size_overflow --
+ * __wt_btree_lsm_size --
* Check if the size of an in-memory tree with a single leaf page is over
* a specified maximum. If called on anything other than a simple tree with a
* single leaf page, returns true so the calling code will switch to a new tree.
*/
static inline int
-__wt_btree_size_overflow(WT_SESSION_IMPL *session, uint64_t maxsize)
+__wt_btree_lsm_size(WT_SESSION_IMPL *session, uint64_t maxsize)
{
WT_BTREE *btree;
WT_PAGE *child, *root;
@@ -1250,7 +1194,7 @@ __wt_btree_size_overflow(WT_SESSION_IMPL *session, uint64_t maxsize)
return (1);
/* Check for a tree with a single leaf page. */
- pindex = WT_INTL_INDEX_COPY(root);
+ pindex = WT_INTL_INDEX_GET_SAFE(root);
if (pindex->entries != 1) /* > 1 child page, switch */
return (1);
diff --git a/src/third_party/wiredtiger/src/include/cursor.i b/src/third_party/wiredtiger/src/include/cursor.i
index 606fee53749..47b772377c0 100644
--- a/src/third_party/wiredtiger/src/include/cursor.i
+++ b/src/third_party/wiredtiger/src/include/cursor.i
@@ -187,6 +187,12 @@ __cursor_func_init(WT_CURSOR_BTREE *cbt, int reenter)
if (reenter)
WT_RET(__curfile_leave(cbt));
+ /*
+ * Any old insert position is now invalid. We rely on this being
+ * cleared to detect if a new skiplist is installed after a search.
+ */
+ cbt->ins_stack[0] = NULL;
+
/* If the transaction is idle, check that the cache isn't full. */
WT_RET(__wt_txn_idle_cache_check(session));
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index 63b6bb2cbc5..a11f3dcd73c 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -26,14 +26,14 @@ extern int __wt_block_compact_end(WT_SESSION_IMPL *session, WT_BLOCK *block);
extern int __wt_block_compact_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, int *skipp);
extern int __wt_block_compact_page_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size, int *skipp);
extern int __wt_block_misplaced(WT_SESSION_IMPL *session, WT_BLOCK *block, const char *tag, wt_off_t offset, uint32_t size, int live);
-extern int __wt_block_off_remove_overlap( WT_SESSION_IMPL *session, WT_EXTLIST *el, wt_off_t off, wt_off_t size);
+extern int __wt_block_off_remove_overlap(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, wt_off_t off, wt_off_t size);
extern int __wt_block_alloc( WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t *offp, wt_off_t size);
extern int __wt_block_free(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size);
extern int __wt_block_off_free( WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t offset, wt_off_t size);
extern int __wt_block_extlist_check( WT_SESSION_IMPL *session, WT_EXTLIST *al, WT_EXTLIST *bl);
extern int __wt_block_extlist_overlap( WT_SESSION_IMPL *session, WT_BLOCK *block, WT_BLOCK_CKPT *ci);
-extern int __wt_block_extlist_merge(WT_SESSION_IMPL *session, WT_EXTLIST *a, WT_EXTLIST *b);
-extern int __wt_block_insert_ext( WT_SESSION_IMPL *session, WT_EXTLIST *el, wt_off_t off, wt_off_t size);
+extern int __wt_block_extlist_merge(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *a, WT_EXTLIST *b);
+extern int __wt_block_insert_ext(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, wt_off_t off, wt_off_t size);
extern int __wt_block_extlist_read_avail(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, wt_off_t ckpt_size);
extern int __wt_block_extlist_read(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, wt_off_t ckpt_size);
extern int __wt_block_extlist_write(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, WT_EXTLIST *additional);
@@ -171,7 +171,7 @@ extern int __wt_row_ikey_alloc(WT_SESSION_IMPL *session, uint32_t cell_offset, c
extern int __wt_row_ikey_incr(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t cell_offset, const void *key, size_t size, WT_REF *ref);
extern int __wt_row_ikey(WT_SESSION_IMPL *session, uint32_t cell_offset, const void *key, size_t size, WT_REF *ref);
extern int __wt_page_modify_alloc(WT_SESSION_IMPL *session, WT_PAGE *page);
-extern int __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ITEM *key, WT_ITEM *value, WT_UPDATE *upd, int is_remove);
+extern int __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ITEM *key, WT_ITEM *value, WT_UPDATE *upd_arg, int is_remove);
extern int __wt_row_insert_alloc(WT_SESSION_IMPL *session, WT_ITEM *key, u_int skipdepth, WT_INSERT **insp, size_t *ins_sizep);
extern int __wt_update_alloc( WT_SESSION_IMPL *session, WT_ITEM *value, WT_UPDATE **updp, size_t *sizep);
extern WT_UPDATE *__wt_update_obsolete_check( WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd);
@@ -363,7 +363,6 @@ extern int __wt_log_slot_notify(WT_SESSION_IMPL *session, WT_LOGSLOT *slot);
extern int __wt_log_slot_wait(WT_SESSION_IMPL *session, WT_LOGSLOT *slot);
extern int64_t __wt_log_slot_release(WT_LOGSLOT *slot, uint64_t size);
extern int __wt_log_slot_free(WT_SESSION_IMPL *session, WT_LOGSLOT *slot);
-extern int __wt_log_slot_grow_buffers(WT_SESSION_IMPL *session, size_t newsize);
extern int __wt_clsm_request_switch(WT_CURSOR_LSM *clsm);
extern int __wt_clsm_await_switch(WT_CURSOR_LSM *clsm);
extern int __wt_clsm_init_merge( WT_CURSOR *cursor, u_int start_chunk, uint32_t start_id, u_int nchunks);
@@ -644,8 +643,8 @@ extern uint32_t __wt_nlpo2(uint32_t v);
extern uint32_t __wt_log2_int(uint32_t n);
extern int __wt_ispo2(uint32_t v);
extern uint32_t __wt_rduppo2(uint32_t n, uint32_t po2);
-extern void __wt_random_init(uint32_t *rnd);
-extern uint32_t __wt_random(uint32_t *rnd);
+extern void __wt_random_init(WT_RAND_STATE volatile *rnd_state);
+extern uint32_t __wt_random(WT_RAND_STATE volatile *rnd_state);
extern int __wt_buf_grow_worker(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size);
extern int __wt_buf_fmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4)));
extern int __wt_buf_catfmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4)));
diff --git a/src/third_party/wiredtiger/src/include/gcc.h b/src/third_party/wiredtiger/src/include/gcc.h
index 2efbb20b39a..7135bd479c7 100644
--- a/src/third_party/wiredtiger/src/include/gcc.h
+++ b/src/third_party/wiredtiger/src/include/gcc.h
@@ -87,18 +87,25 @@
* To avoid locking shared data structures such as statistics and to permit
* atomic state changes, we rely on the WT_ATOMIC_ADD and WT_ATOMIC_CAS
* (compare and swap) operations.
- *
- * Note that we avoid __sync_bool_compare_and_swap due to problems with
- * optimization with some versions of clang. See
- * http://llvm.org/bugs/show_bug.cgi?id=21499 for details.
*/
#define __WT_ATOMIC_ADD(v, val, n) \
(WT_STATIC_ASSERT(sizeof(v) == (n)), __sync_add_and_fetch(&(v), val))
#define __WT_ATOMIC_FETCH_ADD(v, val, n) \
(WT_STATIC_ASSERT(sizeof(v) == (n)), __sync_fetch_and_add(&(v), val))
+#ifdef __clang__
+/*
+ * We avoid __sync_bool_compare_and_swap with due to problems with
+ * optimization with some versions of clang. See
+ * http://llvm.org/bugs/show_bug.cgi?id=21499 for details.
+ */
#define __WT_ATOMIC_CAS(v, old, new, n) \
(WT_STATIC_ASSERT(sizeof(v) == (n)), \
__sync_val_compare_and_swap(&(v), old, new) == (old))
+#else
+#define __WT_ATOMIC_CAS(v, old, new, n) \
+ (WT_STATIC_ASSERT(sizeof(v) == (n)), \
+ __sync_bool_compare_and_swap(&(v), old, new))
+#endif
#define __WT_ATOMIC_CAS_VAL(v, old, new, n) \
(WT_STATIC_ASSERT(sizeof(v) == (n)), \
__sync_val_compare_and_swap(&(v), old, new))
diff --git a/src/third_party/wiredtiger/src/include/log.h b/src/third_party/wiredtiger/src/include/log.h
index 66f346e2fc3..7a8a13327fa 100644
--- a/src/third_party/wiredtiger/src/include/log.h
+++ b/src/third_party/wiredtiger/src/include/log.h
@@ -11,8 +11,8 @@
#define WT_LOG_TMPNAME "WiredTigerTmplog" /* Log temporary name */
/* Logging subsystem declarations. */
-#define LOG_ALIGN 128
-#define WT_LOG_SLOT_BUF_INIT_SIZE 64 * 1024
+#define WT_LOG_ALIGN 128
+#define WT_LOG_SLOT_BUF_SIZE 256 * 1024
#define WT_INIT_LSN(l) do { \
(l)->file = 1; \
@@ -81,7 +81,7 @@ typedef WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) struct {
volatile int64_t slot_state; /* Slot state */
uint64_t slot_group_size; /* Group size */
int32_t slot_error; /* Error value */
-#define SLOT_INVALID_INDEX 0xffffffff
+#define WT_SLOT_INVALID_INDEX 0xffffffff
uint32_t slot_index; /* Active slot index */
wt_off_t slot_start_offset; /* Starting file offset */
WT_LSN slot_release_lsn; /* Slot release LSN */
@@ -91,15 +91,14 @@ typedef WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) struct {
WT_ITEM slot_buf; /* Buffer for grouped writes */
int32_t slot_churn; /* Active slots are scarce. */
-#define SLOT_BUF_GROW 0x01 /* Grow buffer on release */
-#define SLOT_BUFFERED 0x02 /* Buffer writes */
-#define SLOT_CLOSEFH 0x04 /* Close old fh on release */
-#define SLOT_SYNC 0x08 /* Needs sync on release */
-#define SLOT_SYNC_DIR 0x10 /* Directory sync on release */
+#define WT_SLOT_BUFFERED 0x01 /* Buffer writes */
+#define WT_SLOT_CLOSEFH 0x02 /* Close old fh on release */
+#define WT_SLOT_SYNC 0x04 /* Needs sync on release */
+#define WT_SLOT_SYNC_DIR 0x08 /* Directory sync on release */
uint32_t flags; /* Flags */
} WT_LOGSLOT;
-#define SLOT_INIT_FLAGS (SLOT_BUFFERED)
+#define WT_SLOT_INIT_FLAGS (WT_SLOT_BUFFERED)
typedef struct {
WT_LOGSLOT *slot;
@@ -150,16 +149,17 @@ typedef struct {
/*
* Consolidation array information
- * SLOT_ACTIVE must be less than SLOT_POOL.
+ * WT_SLOT_ACTIVE must be less than WT_SLOT_POOL.
* Our testing shows that the more consolidation we generate the
* better the performance we see which equates to an active slot
* slot count of one.
*/
-#define SLOT_ACTIVE 1
-#define SLOT_POOL 128
+#define WT_SLOT_ACTIVE 1
+#define WT_SLOT_POOL 128
uint32_t pool_index; /* Global pool index */
- WT_LOGSLOT *slot_array[SLOT_ACTIVE]; /* Active slots */
- WT_LOGSLOT slot_pool[SLOT_POOL]; /* Pool of all slots */
+ WT_LOGSLOT *slot_array[WT_SLOT_ACTIVE]; /* Active slots */
+ WT_LOGSLOT slot_pool[WT_SLOT_POOL]; /* Pool of all slots */
+ size_t slot_buf_size; /* Buffer size for slots */
#define WT_LOG_FORCE_CONSOLIDATE 0x01 /* Disable direct writes */
uint32_t flags;
diff --git a/src/third_party/wiredtiger/src/include/misc.h b/src/third_party/wiredtiger/src/include/misc.h
index 12cf2dec375..ba12f00f672 100644
--- a/src/third_party/wiredtiger/src/include/misc.h
+++ b/src/third_party/wiredtiger/src/include/misc.h
@@ -215,3 +215,11 @@
#define __wt_page_swap(session, held, want, flags) \
__wt_page_swap_func(session, held, want, flags)
#endif
+
+/* Random number generator state. */
+union __wt_rand_state {
+ uint64_t v;
+ struct {
+ uint32_t w, z;
+ } x;
+};
diff --git a/src/third_party/wiredtiger/src/include/mutex.h b/src/third_party/wiredtiger/src/include/mutex.h
index 07aa740c525..8f3cd168193 100644
--- a/src/third_party/wiredtiger/src/include/mutex.h
+++ b/src/third_party/wiredtiger/src/include/mutex.h
@@ -32,7 +32,9 @@ typedef union { /* Read/write lock */
WiredTiger read/write locks require modification for big-endian systems.
#else
uint64_t u;
- uint32_t us;
+ struct {
+ uint32_t us;
+ } i;
struct {
uint16_t writers;
uint16_t readers;
diff --git a/src/third_party/wiredtiger/src/include/os.h b/src/third_party/wiredtiger/src/include/os.h
index ba5d95657d5..edb59b0f521 100644
--- a/src/third_party/wiredtiger/src/include/os.h
+++ b/src/third_party/wiredtiger/src/include/os.h
@@ -56,7 +56,7 @@ typedef enum {
case EMFILE: \
case ENFILE: \
case ENOSPC: \
- __wt_sleep(0L, 500000L); \
+ __wt_sleep(0L, 50000L); \
continue; \
default: \
break; \
diff --git a/src/third_party/wiredtiger/src/include/serial.i b/src/third_party/wiredtiger/src/include/serial.i
index 9e6b0f7916c..0fc23348800 100644
--- a/src/third_party/wiredtiger/src/include/serial.i
+++ b/src/third_party/wiredtiger/src/include/serial.i
@@ -30,11 +30,11 @@ __page_write_gen_wrapped_check(WT_PAGE *page)
}
/*
- * __insert_serial_func --
- * Worker function to add a WT_INSERT entry to a skiplist.
+ * __insert_simple_func --
+ * Worker function to add a WT_INSERT entry to the middle of a skiplist.
*/
static inline int
-__insert_serial_func(WT_SESSION_IMPL *session, WT_INSERT_HEAD *ins_head,
+__insert_simple_func(WT_SESSION_IMPL *session,
WT_INSERT ***ins_stack, WT_INSERT *new_ins, u_int skipdepth)
{
u_int i;
@@ -42,31 +42,62 @@ __insert_serial_func(WT_SESSION_IMPL *session, WT_INSERT_HEAD *ins_head,
WT_UNUSED(session);
/*
- * Confirm we are still in the expected position, and no item has been
- * added where our insert belongs. Take extra care at the beginning
- * and end of the list (at each level): retry if we race there.
+ * Update the skiplist elements referencing the new WT_INSERT item.
+ * If we fail connecting one of the upper levels in the skiplist,
+ * return success: the levels we updated are correct and sufficient.
+ * Even though we don't get the benefit of the memory we allocated,
+ * we can't roll back.
*
- * !!!
- * Note the test for ins_stack[0] == NULL: that's the test for an
- * uninitialized cursor, ins_stack[0] is cleared as part of
- * initializing a cursor for a search.
+ * All structure setup must be flushed before the structure is entered
+ * into the list. We need a write barrier here, our callers depend on
+ * it. Don't pass complex arguments to the macro, some implementations
+ * read the old value multiple times.
*/
for (i = 0; i < skipdepth; i++) {
- if (ins_stack[i] == NULL ||
- *ins_stack[i] != new_ins->next[i])
- return (WT_RESTART);
- if (new_ins->next[i] == NULL &&
- ins_head->tail[i] != NULL &&
- ins_stack[i] != &ins_head->tail[i]->next[i])
- return (WT_RESTART);
+ WT_INSERT *old_ins = *ins_stack[i];
+ if (old_ins != new_ins->next[i] ||
+ !WT_ATOMIC_CAS8(*ins_stack[i], old_ins, new_ins))
+ return (i == 0 ? WT_RESTART : 0);
}
- /* Update the skiplist elements referencing the new WT_INSERT item. */
+ return (0);
+}
+
+/*
+ * __insert_serial_func --
+ * Worker function to add a WT_INSERT entry to a skiplist.
+ */
+static inline int
+__insert_serial_func(WT_SESSION_IMPL *session, WT_INSERT_HEAD *ins_head,
+ WT_INSERT ***ins_stack, WT_INSERT *new_ins, u_int skipdepth)
+{
+ u_int i;
+
+ /* The cursor should be positioned. */
+ WT_ASSERT(session, ins_stack[0] != NULL);
+
+ /*
+ * Update the skiplist elements referencing the new WT_INSERT item.
+ *
+ * Confirm we are still in the expected position, and no item has been
+ * added where our insert belongs. If we fail connecting one of the
+ * upper levels in the skiplist, return success: the levels we updated
+ * are correct and sufficient. Even though we don't get the benefit of
+ * the memory we allocated, we can't roll back.
+ *
+ * All structure setup must be flushed before the structure is entered
+ * into the list. We need a write barrier here, our callers depend on
+ * it. Don't pass complex arguments to the macro, some implementations
+ * read the old value multiple times.
+ */
for (i = 0; i < skipdepth; i++) {
+ WT_INSERT *old_ins = *ins_stack[i];
+ if (old_ins != new_ins->next[i] ||
+ !WT_ATOMIC_CAS8(*ins_stack[i], old_ins, new_ins))
+ return (i == 0 ? WT_RESTART : 0);
if (ins_head->tail[i] == NULL ||
ins_stack[i] == &ins_head->tail[i]->next[i])
ins_head->tail[i] = new_ins;
- *ins_stack[i] = new_ins;
}
return (0);
@@ -128,20 +159,20 @@ __wt_col_append_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
WT_INSERT *new_ins = *new_insp;
WT_DECL_RET;
- /* Clear references to memory we now own. */
- *new_insp = NULL;
-
/* Check for page write generation wrap. */
WT_RET(__page_write_gen_wrapped_check(page));
+ /* Clear references to memory we now own and must free on error. */
+ *new_insp = NULL;
+
/* Acquire the page's spinlock, call the worker function. */
WT_PAGE_LOCK(session, page);
ret = __col_append_serial_func(
session, ins_head, ins_stack, new_ins, recnop, skipdepth);
WT_PAGE_UNLOCK(session, page);
- /* Free unused memory on error. */
if (ret != 0) {
+ /* Free unused memory on error. */
__wt_free(session, new_ins);
return (ret);
}
@@ -171,21 +202,32 @@ __wt_insert_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
{
WT_INSERT *new_ins = *new_insp;
WT_DECL_RET;
-
- /* Clear references to memory we now own. */
- *new_insp = NULL;
+ int simple;
+ u_int i;
/* Check for page write generation wrap. */
WT_RET(__page_write_gen_wrapped_check(page));
- /* Acquire the page's spinlock, call the worker function. */
- WT_PAGE_LOCK(session, page);
- ret = __insert_serial_func(
- session, ins_head, ins_stack, new_ins, skipdepth);
- WT_PAGE_UNLOCK(session, page);
+ /* Clear references to memory we now own and must free on error. */
+ *new_insp = NULL;
+
+ simple = 1;
+ for (i = 0; i < skipdepth; i++)
+ if (new_ins->next[i] == NULL)
+ simple = 0;
+
+ if (simple)
+ ret = __insert_simple_func(
+ session, ins_stack, new_ins, skipdepth);
+ else {
+ WT_PAGE_LOCK(session, page);
+ ret = __insert_serial_func(
+ session, ins_head, ins_stack, new_ins, skipdepth);
+ WT_PAGE_UNLOCK(session, page);
+ }
- /* Free unused memory on error. */
if (ret != 0) {
+ /* Free unused memory on error. */
__wt_free(session, new_ins);
return (ret);
}
@@ -215,17 +257,19 @@ __wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
WT_DECL_RET;
WT_UPDATE *obsolete, *upd = *updp;
- /* Clear references to memory we now own. */
- *updp = NULL;
-
/* Check for page write generation wrap. */
WT_RET(__page_write_gen_wrapped_check(page));
+ /* Clear references to memory we now own and must free on error. */
+ *updp = NULL;
+
/*
+ * All structure setup must be flushed before the structure is entered
+ * into the list. We need a write barrier here, our callers depend on
+ * it.
+ *
* Swap the update into place. If that fails, a new update was added
- * after our search, we raced. Check if our update is still permitted,
- * and if it is, do a full-barrier to ensure the update's next pointer
- * is set before we update the linked list and try again.
+ * after our search, we raced. Check if our update is still permitted.
*/
while (!WT_ATOMIC_CAS8(*srch_upd, upd->next, upd)) {
if ((ret = __wt_txn_update_check(
@@ -234,7 +278,6 @@ __wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
__wt_free(session, upd);
return (ret);
}
- WT_WRITE_BARRIER();
}
/*
diff --git a/src/third_party/wiredtiger/src/include/session.h b/src/third_party/wiredtiger/src/include/session.h
index 2c88727c662..3efb8011e3b 100644
--- a/src/third_party/wiredtiger/src/include/session.h
+++ b/src/third_party/wiredtiger/src/include/session.h
@@ -146,9 +146,9 @@ struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_session_impl {
* to clear everything but the fields that persist.
*/
#define WT_SESSION_CLEAR_SIZE(s) \
- (WT_PTRDIFF(&(s)->rnd[0], s))
+ (WT_PTRDIFF(&(s)->rnd, s))
- uint32_t rnd[2]; /* Random number generation state */
+ WT_RAND_STATE rnd; /* Random number generation state */
/* Hashed handle reference list array */
SLIST_HEAD(__dhandles_hash, __wt_data_handle_cache) *dhhash;
diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h
index 8624ebb456e..2acaad39b0e 100644
--- a/src/third_party/wiredtiger/src/include/stat.h
+++ b/src/third_party/wiredtiger/src/include/stat.h
@@ -203,7 +203,6 @@ struct __wt_connection_stats {
WT_STATS dh_session_handles;
WT_STATS dh_session_sweeps;
WT_STATS file_open;
- WT_STATS log_buffer_grow;
WT_STATS log_buffer_size;
WT_STATS log_bytes_payload;
WT_STATS log_bytes_written;
diff --git a/src/third_party/wiredtiger/src/include/txn.h b/src/third_party/wiredtiger/src/include/txn.h
index d2b369a41c4..7a31ed2f3fe 100644
--- a/src/third_party/wiredtiger/src/include/txn.h
+++ b/src/third_party/wiredtiger/src/include/txn.h
@@ -63,10 +63,10 @@ struct __wt_txn_global {
};
typedef enum __wt_txn_isolation {
- TXN_ISO_EVICTION, /* Internal: eviction context */
- TXN_ISO_READ_UNCOMMITTED,
- TXN_ISO_READ_COMMITTED,
- TXN_ISO_SNAPSHOT
+ WT_ISO_EVICTION, /* Internal: eviction context */
+ WT_ISO_READ_UNCOMMITTED,
+ WT_ISO_READ_COMMITTED,
+ WT_ISO_SNAPSHOT
} WT_TXN_ISOLATION;
/*
diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i
index a9b19ca1ff5..95a8f99cf1b 100644
--- a/src/third_party/wiredtiger/src/include/txn.i
+++ b/src/third_party/wiredtiger/src/include/txn.i
@@ -170,7 +170,7 @@ __wt_txn_visible(WT_SESSION_IMPL *session, uint64_t id)
* Eviction only sees globally visible updates, or if there is a
* checkpoint transaction running, use its transaction.
*/
- if (txn->isolation == TXN_ISO_EVICTION)
+ if (txn->isolation == WT_ISO_EVICTION)
return (__wt_txn_visible_all(session, id));
/*
@@ -183,7 +183,7 @@ __wt_txn_visible(WT_SESSION_IMPL *session, uint64_t id)
* Metadata updates use non-transactional techniques (such as the
* schema and metadata locks) to protect access to in-flight updates.
*/
- if (txn->isolation == TXN_ISO_READ_UNCOMMITTED ||
+ if (txn->isolation == WT_ISO_READ_UNCOMMITTED ||
session->dhandle == session->meta_dhandle)
return (1);
@@ -192,7 +192,7 @@ __wt_txn_visible(WT_SESSION_IMPL *session, uint64_t id)
return (1);
/*
- * TXN_ISO_SNAPSHOT, TXN_ISO_READ_COMMITTED: the ID is visible if it is
+ * WT_ISO_SNAPSHOT, WT_ISO_READ_COMMITTED: the ID is visible if it is
* not the result of a concurrent transaction, that is, if was
* committed before the snapshot was taken.
*
@@ -222,19 +222,19 @@ __wt_txn_begin(WT_SESSION_IMPL *session, const char *cfg[])
txn->isolation = session->isolation;
txn->txn_logsync = S2C(session)->txn_logsync;
- if (cfg != NULL)
- WT_RET(__wt_txn_config(session, cfg));
+ if (cfg != NULL)
+ WT_RET(__wt_txn_config(session, cfg));
F_SET(txn, TXN_RUNNING);
- if (txn->isolation == TXN_ISO_SNAPSHOT) {
+ if (txn->isolation == WT_ISO_SNAPSHOT) {
if (session->ncursors > 0)
WT_RET(__wt_session_copy_values(session));
- /*
- * We're about to allocate a snapshot: if we need to block for
- * eviction, it's better to do it beforehand.
- */
- WT_RET(__wt_cache_full_check(session));
+ /*
+ * We're about to allocate a snapshot: if we need to block for
+ * eviction, it's better to do it beforehand.
+ */
+ WT_RET(__wt_cache_full_check(session));
__wt_txn_get_snapshot(session);
}
return (0);
@@ -385,7 +385,7 @@ __wt_txn_update_check(WT_SESSION_IMPL *session, WT_UPDATE *upd)
WT_TXN *txn;
txn = &session->txn;
- if (txn->isolation == TXN_ISO_SNAPSHOT)
+ if (txn->isolation == WT_ISO_SNAPSHOT)
while (upd != NULL && !__wt_txn_visible(session, upd->txnid)) {
if (upd->txnid != WT_TXN_ABORTED) {
WT_STAT_FAST_DATA_INCR(
@@ -411,7 +411,7 @@ __wt_txn_read_last(WT_SESSION_IMPL *session)
/* Release the snap_min ID we put in the global table. */
if (!F_ISSET(txn, TXN_RUNNING) ||
- txn->isolation != TXN_ISO_SNAPSHOT)
+ txn->isolation != WT_ISO_SNAPSHOT)
__wt_txn_release_snapshot(session);
}
@@ -446,12 +446,12 @@ __wt_txn_cursor_op(WT_SESSION_IMPL *session)
* further forward, so that once a read-uncommitted cursor is
* positioned on a value, it can't be freed.
*/
- if (txn->isolation == TXN_ISO_READ_UNCOMMITTED &&
+ if (txn->isolation == WT_ISO_READ_UNCOMMITTED &&
!F_ISSET(txn, TXN_HAS_ID) &&
TXNID_LT(txn_state->snap_min, txn_global->last_running))
txn_state->snap_min = txn_global->last_running;
- if (txn->isolation != TXN_ISO_READ_UNCOMMITTED &&
+ if (txn->isolation != WT_ISO_READ_UNCOMMITTED &&
!F_ISSET(txn, TXN_HAS_SNAPSHOT))
__wt_txn_get_snapshot(session);
}
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in
index 4804290acba..c28ce83d122 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger.in
+++ b/src/third_party/wiredtiger/src/include/wiredtiger.in
@@ -3345,150 +3345,148 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_CONN_DH_SESSION_SWEEPS 1069
/*! connection: files currently open */
#define WT_STAT_CONN_FILE_OPEN 1070
-/*! log: log buffer size increases */
-#define WT_STAT_CONN_LOG_BUFFER_GROW 1071
/*! log: total log buffer size */
-#define WT_STAT_CONN_LOG_BUFFER_SIZE 1072
+#define WT_STAT_CONN_LOG_BUFFER_SIZE 1071
/*! log: log bytes of payload data */
-#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1073
+#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1072
/*! log: log bytes written */
-#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1074
+#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1073
/*! log: yields waiting for previous log file close */
-#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1075
+#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1074
/*! log: total size of compressed records */
-#define WT_STAT_CONN_LOG_COMPRESS_LEN 1076
+#define WT_STAT_CONN_LOG_COMPRESS_LEN 1075
/*! log: total in-memory size of compressed records */
-#define WT_STAT_CONN_LOG_COMPRESS_MEM 1077
+#define WT_STAT_CONN_LOG_COMPRESS_MEM 1076
/*! log: log records too small to compress */
-#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1078
+#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1077
/*! log: log records not compressed */
-#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1079
+#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1078
/*! log: log records compressed */
-#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1080
+#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1079
/*! log: maximum log file size */
-#define WT_STAT_CONN_LOG_MAX_FILESIZE 1081
+#define WT_STAT_CONN_LOG_MAX_FILESIZE 1080
/*! log: pre-allocated log files prepared */
-#define WT_STAT_CONN_LOG_PREALLOC_FILES 1082
+#define WT_STAT_CONN_LOG_PREALLOC_FILES 1081
/*! log: number of pre-allocated log files to create */
-#define WT_STAT_CONN_LOG_PREALLOC_MAX 1083
+#define WT_STAT_CONN_LOG_PREALLOC_MAX 1082
/*! log: pre-allocated log files used */
-#define WT_STAT_CONN_LOG_PREALLOC_USED 1084
+#define WT_STAT_CONN_LOG_PREALLOC_USED 1083
/*! log: log read operations */
-#define WT_STAT_CONN_LOG_READS 1085
+#define WT_STAT_CONN_LOG_READS 1084
/*! log: log release advances write LSN */
-#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1086
+#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1085
/*! log: records processed by log scan */
-#define WT_STAT_CONN_LOG_SCAN_RECORDS 1087
+#define WT_STAT_CONN_LOG_SCAN_RECORDS 1086
/*! log: log scan records requiring two reads */
-#define WT_STAT_CONN_LOG_SCAN_REREADS 1088
+#define WT_STAT_CONN_LOG_SCAN_REREADS 1087
/*! log: log scan operations */
-#define WT_STAT_CONN_LOG_SCANS 1089
+#define WT_STAT_CONN_LOG_SCANS 1088
/*! log: consolidated slot closures */
-#define WT_STAT_CONN_LOG_SLOT_CLOSES 1090
+#define WT_STAT_CONN_LOG_SLOT_CLOSES 1089
/*! log: logging bytes consolidated */
-#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1091
+#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1090
/*! log: consolidated slot joins */
-#define WT_STAT_CONN_LOG_SLOT_JOINS 1092
+#define WT_STAT_CONN_LOG_SLOT_JOINS 1091
/*! log: consolidated slot join races */
-#define WT_STAT_CONN_LOG_SLOT_RACES 1093
+#define WT_STAT_CONN_LOG_SLOT_RACES 1092
/*! log: slots selected for switching that were unavailable */
-#define WT_STAT_CONN_LOG_SLOT_SWITCH_FAILS 1094
+#define WT_STAT_CONN_LOG_SLOT_SWITCH_FAILS 1093
/*! log: record size exceeded maximum */
-#define WT_STAT_CONN_LOG_SLOT_TOOBIG 1095
+#define WT_STAT_CONN_LOG_SLOT_TOOBIG 1094
/*! log: failed to find a slot large enough for record */
-#define WT_STAT_CONN_LOG_SLOT_TOOSMALL 1096
+#define WT_STAT_CONN_LOG_SLOT_TOOSMALL 1095
/*! log: consolidated slot join transitions */
-#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1097
+#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1096
/*! log: log sync operations */
-#define WT_STAT_CONN_LOG_SYNC 1098
+#define WT_STAT_CONN_LOG_SYNC 1097
/*! log: log sync_dir operations */
-#define WT_STAT_CONN_LOG_SYNC_DIR 1099
+#define WT_STAT_CONN_LOG_SYNC_DIR 1098
/*! log: log server thread advances write LSN */
-#define WT_STAT_CONN_LOG_WRITE_LSN 1100
+#define WT_STAT_CONN_LOG_WRITE_LSN 1099
/*! log: log write operations */
-#define WT_STAT_CONN_LOG_WRITES 1101
+#define WT_STAT_CONN_LOG_WRITES 1100
/*! LSM: sleep for LSM checkpoint throttle */
-#define WT_STAT_CONN_LSM_CHECKPOINT_THROTTLE 1102
+#define WT_STAT_CONN_LSM_CHECKPOINT_THROTTLE 1101
/*! LSM: sleep for LSM merge throttle */
-#define WT_STAT_CONN_LSM_MERGE_THROTTLE 1103
+#define WT_STAT_CONN_LSM_MERGE_THROTTLE 1102
/*! LSM: rows merged in an LSM tree */
-#define WT_STAT_CONN_LSM_ROWS_MERGED 1104
+#define WT_STAT_CONN_LSM_ROWS_MERGED 1103
/*! LSM: application work units currently queued */
-#define WT_STAT_CONN_LSM_WORK_QUEUE_APP 1105
+#define WT_STAT_CONN_LSM_WORK_QUEUE_APP 1104
/*! LSM: merge work units currently queued */
-#define WT_STAT_CONN_LSM_WORK_QUEUE_MANAGER 1106
+#define WT_STAT_CONN_LSM_WORK_QUEUE_MANAGER 1105
/*! LSM: tree queue hit maximum */
-#define WT_STAT_CONN_LSM_WORK_QUEUE_MAX 1107
+#define WT_STAT_CONN_LSM_WORK_QUEUE_MAX 1106
/*! LSM: switch work units currently queued */
-#define WT_STAT_CONN_LSM_WORK_QUEUE_SWITCH 1108
+#define WT_STAT_CONN_LSM_WORK_QUEUE_SWITCH 1107
/*! LSM: tree maintenance operations scheduled */
-#define WT_STAT_CONN_LSM_WORK_UNITS_CREATED 1109
+#define WT_STAT_CONN_LSM_WORK_UNITS_CREATED 1108
/*! LSM: tree maintenance operations discarded */
-#define WT_STAT_CONN_LSM_WORK_UNITS_DISCARDED 1110
+#define WT_STAT_CONN_LSM_WORK_UNITS_DISCARDED 1109
/*! LSM: tree maintenance operations executed */
-#define WT_STAT_CONN_LSM_WORK_UNITS_DONE 1111
+#define WT_STAT_CONN_LSM_WORK_UNITS_DONE 1110
/*! connection: memory allocations */
-#define WT_STAT_CONN_MEMORY_ALLOCATION 1112
+#define WT_STAT_CONN_MEMORY_ALLOCATION 1111
/*! connection: memory frees */
-#define WT_STAT_CONN_MEMORY_FREE 1113
+#define WT_STAT_CONN_MEMORY_FREE 1112
/*! connection: memory re-allocations */
-#define WT_STAT_CONN_MEMORY_GROW 1114
+#define WT_STAT_CONN_MEMORY_GROW 1113
/*! thread-yield: page acquire busy blocked */
-#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1115
+#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1114
/*! thread-yield: page acquire eviction blocked */
-#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1116
+#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1115
/*! thread-yield: page acquire locked blocked */
-#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1117
+#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1116
/*! thread-yield: page acquire read blocked */
-#define WT_STAT_CONN_PAGE_READ_BLOCKED 1118
+#define WT_STAT_CONN_PAGE_READ_BLOCKED 1117
/*! thread-yield: page acquire time sleeping (usecs) */
-#define WT_STAT_CONN_PAGE_SLEEP 1119
+#define WT_STAT_CONN_PAGE_SLEEP 1118
/*! connection: total read I/Os */
-#define WT_STAT_CONN_READ_IO 1120
+#define WT_STAT_CONN_READ_IO 1119
/*! reconciliation: page reconciliation calls */
-#define WT_STAT_CONN_REC_PAGES 1121
+#define WT_STAT_CONN_REC_PAGES 1120
/*! reconciliation: page reconciliation calls for eviction */
-#define WT_STAT_CONN_REC_PAGES_EVICTION 1122
+#define WT_STAT_CONN_REC_PAGES_EVICTION 1121
/*! reconciliation: split bytes currently awaiting free */
-#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1123
+#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1122
/*! reconciliation: split objects currently awaiting free */
-#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1124
+#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1123
/*! connection: pthread mutex shared lock read-lock calls */
-#define WT_STAT_CONN_RWLOCK_READ 1125
+#define WT_STAT_CONN_RWLOCK_READ 1124
/*! connection: pthread mutex shared lock write-lock calls */
-#define WT_STAT_CONN_RWLOCK_WRITE 1126
+#define WT_STAT_CONN_RWLOCK_WRITE 1125
/*! session: open cursor count */
-#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1127
+#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1126
/*! session: open session count */
-#define WT_STAT_CONN_SESSION_OPEN 1128
+#define WT_STAT_CONN_SESSION_OPEN 1127
/*! transaction: transaction begins */
-#define WT_STAT_CONN_TXN_BEGIN 1129
+#define WT_STAT_CONN_TXN_BEGIN 1128
/*! transaction: transaction checkpoints */
-#define WT_STAT_CONN_TXN_CHECKPOINT 1130
+#define WT_STAT_CONN_TXN_CHECKPOINT 1129
/*! transaction: transaction checkpoint generation */
-#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1131
+#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1130
/*! transaction: transaction checkpoint currently running */
-#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1132
+#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1131
/*! transaction: transaction checkpoint max time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1133
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1132
/*! transaction: transaction checkpoint min time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1134
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1133
/*! transaction: transaction checkpoint most recent time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1135
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1134
/*! transaction: transaction checkpoint total time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1136
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1135
/*! transaction: transactions committed */
-#define WT_STAT_CONN_TXN_COMMIT 1137
+#define WT_STAT_CONN_TXN_COMMIT 1136
/*! transaction: transaction failures due to cache overflow */
-#define WT_STAT_CONN_TXN_FAIL_CACHE 1138
+#define WT_STAT_CONN_TXN_FAIL_CACHE 1137
/*! transaction: transaction range of IDs currently pinned by a checkpoint */
-#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1139
+#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1138
/*! transaction: transaction range of IDs currently pinned */
-#define WT_STAT_CONN_TXN_PINNED_RANGE 1140
+#define WT_STAT_CONN_TXN_PINNED_RANGE 1139
/*! transaction: transactions rolled back */
-#define WT_STAT_CONN_TXN_ROLLBACK 1141
+#define WT_STAT_CONN_TXN_ROLLBACK 1140
/*! connection: total write I/Os */
-#define WT_STAT_CONN_WRITE_IO 1142
+#define WT_STAT_CONN_WRITE_IO 1141
/*!
* @}
diff --git a/src/third_party/wiredtiger/src/include/wt_internal.h b/src/third_party/wiredtiger/src/include/wt_internal.h
index 576827bebcd..fa25fc872f4 100644
--- a/src/third_party/wiredtiger/src/include/wt_internal.h
+++ b/src/third_party/wiredtiger/src/include/wt_internal.h
@@ -265,6 +265,8 @@ struct __wt_upd_skipped;
typedef struct __wt_upd_skipped WT_UPD_SKIPPED;
struct __wt_update;
typedef struct __wt_update WT_UPDATE;
+union __wt_rand_state;
+ typedef union __wt_rand_state WT_RAND_STATE;
/*
* Forward type declarations for internal types: END
* DO NOT EDIT: automatically built by dist/s_typedef.
diff --git a/src/third_party/wiredtiger/src/log/log.c b/src/third_party/wiredtiger/src/log/log.c
index 5c1d76105cb..76cf5f55f7b 100644
--- a/src/third_party/wiredtiger/src/log/log.c
+++ b/src/third_party/wiredtiger/src/log/log.c
@@ -363,7 +363,7 @@ __log_acquire(WT_SESSION_IMPL *session, uint64_t recsize, WT_LOGSLOT *slot)
if (!__log_size_fit(session, &log->alloc_lsn, recsize)) {
WT_RET(__wt_log_newfile(session, 0, &created_log));
if (log->log_close_fh != NULL)
- F_SET(slot, SLOT_CLOSEFH);
+ F_SET(slot, WT_SLOT_CLOSEFH);
}
/*
@@ -456,8 +456,9 @@ __log_fill(WT_SESSION_IMPL *session,
logrec = (WT_LOG_RECORD *)record->mem;
/*
* Call __wt_write. For now the offset is the real byte offset.
- * If the offset becomes a unit of LOG_ALIGN this is where we would
- * multiply by LOG_ALIGN to get the real file byte offset for write().
+ * If the offset becomes a unit of WT_LOG_ALIGN this is where we would
+ * multiply by WT_LOG_ALIGN to get the real file byte offset for
+ * write().
*/
if (direct)
WT_ERR(__wt_write(session, myslot->slot->slot_fh,
@@ -567,7 +568,7 @@ __log_openfile(WT_SESSION_IMPL *session,
log = S2C(session)->log;
if (log == NULL)
- allocsize = LOG_ALIGN;
+ allocsize = WT_LOG_ALIGN;
else
allocsize = log->allocsize;
WT_RET(__wt_scr_alloc(session, 0, &buf));
@@ -943,7 +944,7 @@ __log_filesize(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *eof)
*eof = 0;
WT_RET(__wt_filesize(session, fh, &log_size));
if (log == NULL)
- allocsize = LOG_ALIGN;
+ allocsize = WT_LOG_ALIGN;
else
allocsize = log->allocsize;
@@ -1031,7 +1032,7 @@ __log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, int *freep)
*freep = 1;
/* Write the buffered records */
- if (F_ISSET(slot, SLOT_BUFFERED)) {
+ if (F_ISSET(slot, WT_SLOT_BUFFERED)) {
write_size = (size_t)
(slot->slot_end_lsn.offset - slot->slot_start_offset);
WT_ERR(__wt_write(session, slot->slot_fh,
@@ -1045,8 +1046,8 @@ __log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, int *freep)
* off to the worker thread. The caller is responsible for freeing
* the slot in that case. Otherwise the worker thread will free it.
*/
- if (F_ISSET(slot, SLOT_BUFFERED) &&
- !F_ISSET(slot, SLOT_SYNC | SLOT_SYNC_DIR)) {
+ if (F_ISSET(slot, WT_SLOT_BUFFERED) &&
+ !F_ISSET(slot, WT_SLOT_SYNC | WT_SLOT_SYNC_DIR)) {
*freep = 0;
slot->slot_state = WT_LOG_SLOT_WRITTEN;
/*
@@ -1076,7 +1077,7 @@ __log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, int *freep)
/*
* Signal the close thread if needed.
*/
- if (F_ISSET(slot, SLOT_CLOSEFH))
+ if (F_ISSET(slot, WT_SLOT_CLOSEFH))
WT_ERR(__wt_cond_signal(session, conn->log_file_cond));
/*
@@ -1084,7 +1085,7 @@ __log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, int *freep)
* so that threads finishing writing to the log will wait while the
* current fsync completes and advance log->sync_lsn.
*/
- while (F_ISSET(slot, SLOT_SYNC | SLOT_SYNC_DIR)) {
+ while (F_ISSET(slot, WT_SLOT_SYNC | WT_SLOT_SYNC_DIR)) {
/*
* We have to wait until earlier log files have finished their
* sync operations. The most recent one will set the LSN to the
@@ -1109,7 +1110,7 @@ __log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, int *freep)
* not yet stable in its parent directory. Do that
* now if needed.
*/
- if (F_ISSET(slot, SLOT_SYNC_DIR) &&
+ if (F_ISSET(slot, WT_SLOT_SYNC_DIR) &&
(log->sync_dir_lsn.file < sync_lsn.file)) {
WT_ASSERT(session, log->log_dir_fh != NULL);
WT_ERR(__wt_verbose(session, WT_VERB_LOG,
@@ -1124,7 +1125,7 @@ __log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, int *freep)
/*
* Sync the log file if needed.
*/
- if (F_ISSET(slot, SLOT_SYNC) &&
+ if (F_ISSET(slot, WT_SLOT_SYNC) &&
LOG_CMP(&log->sync_lsn, &slot->slot_end_lsn) < 0) {
WT_ERR(__wt_verbose(session, WT_VERB_LOG,
"log_release: sync log %s", log->log_fh->name));
@@ -1136,7 +1137,7 @@ __log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, int *freep)
/*
* Clear the flags before leaving the loop.
*/
- F_CLR(slot, SLOT_SYNC | SLOT_SYNC_DIR);
+ F_CLR(slot, WT_SLOT_SYNC | WT_SLOT_SYNC_DIR);
locked = 0;
__wt_spin_unlock(session, &log->log_sync_lock);
break;
@@ -1421,7 +1422,7 @@ __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags,
* records and larger allocation boundaries should always be
* a multiple of this.
*/
- allocsize = LOG_ALIGN;
+ allocsize = WT_LOG_ALIGN;
lastlog = 0;
firstlog = UINT32_MAX;
WT_RET(__log_get_files(session,
@@ -1447,7 +1448,7 @@ __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags,
session, 0, &log_fh, WT_LOG_FILENAME, start_lsn.file));
WT_ERR(__log_filesize(session, log_fh, &log_size));
rd_lsn = start_lsn;
- WT_ERR(__wt_buf_initsize(session, &buf, LOG_ALIGN));
+ WT_ERR(__wt_buf_initsize(session, &buf, WT_LOG_ALIGN));
for (;;) {
if (rd_lsn.offset + allocsize > log_size) {
advance:
@@ -1620,9 +1621,9 @@ __log_direct_write(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp,
locked = 1;
if (LF_ISSET(WT_LOG_DSYNC | WT_LOG_FSYNC))
- F_SET(&tmp, SLOT_SYNC_DIR);
+ F_SET(&tmp, WT_SLOT_SYNC_DIR);
if (LF_ISSET(WT_LOG_FSYNC))
- F_SET(&tmp, SLOT_SYNC);
+ F_SET(&tmp, WT_SLOT_SYNC);
WT_ERR(__log_acquire(session, record->size, &tmp));
__wt_spin_unlock(session, &log->log_slot_lock);
locked = 0;
@@ -1820,11 +1821,6 @@ __log_write_internal(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp,
session, record, lsnp, flags)) == EAGAIN)
;
WT_ERR(ret);
- /*
- * Increase the buffer size of any slots we can get access
- * to, so future consolidations are likely to succeed.
- */
- WT_ERR(__wt_log_slot_grow_buffers(session, 4 * rdup_len));
return (0);
}
WT_ERR(ret);
diff --git a/src/third_party/wiredtiger/src/log/log_auto.c b/src/third_party/wiredtiger/src/log/log_auto.c
index f35a7058511..bd830687df2 100644
--- a/src/third_party/wiredtiger/src/log/log_auto.c
+++ b/src/third_party/wiredtiger/src/log/log_auto.c
@@ -7,7 +7,8 @@ __wt_logrec_alloc(WT_SESSION_IMPL *session, size_t size, WT_ITEM **logrecp)
{
WT_ITEM *logrec;
- WT_RET(__wt_scr_alloc(session, WT_ALIGN(size + 1, LOG_ALIGN), &logrec));
+ WT_RET(
+ __wt_scr_alloc(session, WT_ALIGN(size + 1, WT_LOG_ALIGN), &logrec));
WT_CLEAR(*(WT_LOG_RECORD *)logrec->data);
logrec->size = offsetof(WT_LOG_RECORD, record);
diff --git a/src/third_party/wiredtiger/src/log/log_slot.c b/src/third_party/wiredtiger/src/log/log_slot.c
index 02b3056be6f..a08a9aff001 100644
--- a/src/third_party/wiredtiger/src/log/log_slot.c
+++ b/src/third_party/wiredtiger/src/log/log_slot.c
@@ -35,15 +35,15 @@ __wt_log_slot_init(WT_SESSION_IMPL *session)
conn = S2C(session);
log = conn->log;
- for (i = 0; i < SLOT_POOL; i++) {
+ for (i = 0; i < WT_SLOT_POOL; i++) {
log->slot_pool[i].slot_state = WT_LOG_SLOT_FREE;
- log->slot_pool[i].slot_index = SLOT_INVALID_INDEX;
+ log->slot_pool[i].slot_index = WT_SLOT_INVALID_INDEX;
}
/*
* Set up the available slots from the pool the first time.
*/
- for (i = 0; i < SLOT_ACTIVE; i++) {
+ for (i = 0; i < WT_SLOT_ACTIVE; i++) {
slot = &log->slot_pool[i];
slot->slot_index = (uint32_t)i;
slot->slot_state = WT_LOG_SLOT_READY;
@@ -53,14 +53,18 @@ __wt_log_slot_init(WT_SESSION_IMPL *session)
/*
* Allocate memory for buffers now that the arrays are setup. Split
* this out to make error handling simpler.
+ *
+ * Cap the slot buffer to the log file size.
*/
- for (i = 0; i < SLOT_POOL; i++) {
+ log->slot_buf_size =
+ WT_MIN((size_t)conn->log_file_max, WT_LOG_SLOT_BUF_SIZE);
+ for (i = 0; i < WT_SLOT_POOL; i++) {
WT_ERR(__wt_buf_init(session,
- &log->slot_pool[i].slot_buf, WT_LOG_SLOT_BUF_INIT_SIZE));
- F_SET(&log->slot_pool[i], SLOT_INIT_FLAGS);
+ &log->slot_pool[i].slot_buf, log->slot_buf_size));
+ F_SET(&log->slot_pool[i], WT_SLOT_INIT_FLAGS);
}
WT_STAT_FAST_CONN_INCRV(session,
- log_buffer_size, WT_LOG_SLOT_BUF_INIT_SIZE * SLOT_POOL);
+ log_buffer_size, log->slot_buf_size * WT_SLOT_POOL);
if (0) {
err: while (--i >= 0)
__wt_buf_free(session, &log->slot_pool[i].slot_buf);
@@ -82,7 +86,7 @@ __wt_log_slot_destroy(WT_SESSION_IMPL *session)
conn = S2C(session);
log = conn->log;
- for (i = 0; i < SLOT_POOL; i++)
+ for (i = 0; i < WT_SLOT_POOL; i++)
__wt_buf_free(session, &log->slot_pool[i].slot_buf);
return (0);
}
@@ -101,13 +105,18 @@ __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize,
WT_LOG *log;
WT_LOGSLOT *slot;
int64_t cur_state, new_state, old_state;
- uint32_t allocated_slot, slot_grow_attempts;
+ uint32_t allocated_slot, slot_attempts;
conn = S2C(session);
log = conn->log;
- slot_grow_attempts = 0;
+ slot_attempts = 0;
+
+ if (mysize >= (uint64_t)log->slot_buf_size) {
+ WT_STAT_FAST_CONN_INCR(session, log_slot_toobig);
+ return (ENOMEM);
+ }
find_slot:
- allocated_slot = __wt_random(session->rnd) % SLOT_ACTIVE;
+ allocated_slot = __wt_random(&session->rnd) % WT_SLOT_ACTIVE;
slot = log->slot_array[allocated_slot];
old_state = slot->slot_state;
join_slot:
@@ -131,12 +140,11 @@ join_slot:
goto find_slot;
}
/*
- * If the slot buffer isn't big enough to hold this update, mark
- * the slot for a buffer size increase and find another slot.
+ * If the slot buffer isn't big enough to hold this update, try
+ * to find another slot.
*/
if (new_state > (int64_t)slot->slot_buf.memsize) {
- F_SET(slot, SLOT_BUF_GROW);
- if (++slot_grow_attempts > 5) {
+ if (++slot_attempts > 5) {
WT_STAT_FAST_CONN_INCR(session, log_slot_toosmall);
return (ENOMEM);
}
@@ -159,9 +167,9 @@ join_slot:
*/
WT_STAT_FAST_CONN_INCR(session, log_slot_joins);
if (LF_ISSET(WT_LOG_DSYNC | WT_LOG_FSYNC))
- F_SET(slot, SLOT_SYNC_DIR);
+ F_SET(slot, WT_SLOT_SYNC_DIR);
if (LF_ISSET(WT_LOG_FSYNC))
- F_SET(slot, SLOT_SYNC);
+ F_SET(slot, WT_SLOT_SYNC);
myslotp->slot = slot;
myslotp->offset = (wt_off_t)old_state - WT_LOG_SLOT_READY;
return (0);
@@ -193,7 +201,7 @@ retry:
*/
pool_i = log->pool_index;
newslot = &log->slot_pool[pool_i];
- if (++log->pool_index >= SLOT_POOL)
+ if (++log->pool_index >= WT_SLOT_POOL)
log->pool_index = 0;
if (newslot->slot_state != WT_LOG_SLOT_FREE) {
WT_STAT_FAST_CONN_INCR(session, log_slot_switch_fails);
@@ -203,7 +211,7 @@ retry:
* churn is used to change how long we pause before closing
* the slot - which leads to more consolidation and less churn.
*/
- if (++switch_fails % SLOT_POOL == 0 && slot->slot_churn < 5)
+ if (++switch_fails % WT_SLOT_POOL == 0 && slot->slot_churn < 5)
++slot->slot_churn;
__wt_yield();
goto retry;
@@ -297,90 +305,13 @@ __wt_log_slot_release(WT_LOGSLOT *slot, uint64_t size)
int
__wt_log_slot_free(WT_SESSION_IMPL *session, WT_LOGSLOT *slot)
{
- WT_DECL_RET;
-
- ret = 0;
- /*
- * Grow the buffer if needed before returning it to the pool.
- */
- if (F_ISSET(slot, SLOT_BUF_GROW)) {
- WT_STAT_FAST_CONN_INCR(session, log_buffer_grow);
- WT_STAT_FAST_CONN_INCRV(session,
- log_buffer_size, slot->slot_buf.memsize);
- WT_ERR(__wt_buf_grow(session,
- &slot->slot_buf, slot->slot_buf.memsize * 2));
- }
-err:
- /*
- * No matter if there is an error, we always want to free
- * the slot back to the pool.
- */
+ WT_UNUSED(session);
/*
* Make sure flags don't get retained between uses.
* We have to reset them them here because multiple threads may
* change the flags when joining the slot.
*/
- slot->flags = SLOT_INIT_FLAGS;
+ slot->flags = WT_SLOT_INIT_FLAGS;
slot->slot_state = WT_LOG_SLOT_FREE;
- return (ret);
-}
-
-/*
- * __wt_log_slot_grow_buffers --
- * Increase the buffer size of all available slots in the buffer pool.
- * Go to some lengths to include active (but unused) slots to handle
- * the case where all log write record sizes exceed the size of the
- * active buffer.
- */
-int
-__wt_log_slot_grow_buffers(WT_SESSION_IMPL *session, size_t newsize)
-{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_LOG *log;
- WT_LOGSLOT *slot;
- int64_t orig_state;
- uint64_t old_size, total_growth;
- int i;
-
- conn = S2C(session);
- log = conn->log;
- total_growth = 0;
- WT_STAT_FAST_CONN_INCR(session, log_buffer_grow);
- /*
- * Take the log slot lock to prevent other threads growing buffers
- * at the same time. Could tighten the scope of this lock, or have
- * a separate lock if there is contention.
- */
- __wt_spin_lock(session, &log->log_slot_lock);
- for (i = 0; i < SLOT_POOL; i++) {
- slot = &log->slot_pool[i];
- /* Avoid atomic operations if they won't succeed. */
- if (slot->slot_state != WT_LOG_SLOT_FREE &&
- slot->slot_state != WT_LOG_SLOT_READY)
- continue;
- /* Don't keep growing unrelated buffers. */
- if (slot->slot_buf.memsize > (10 * newsize) &&
- !F_ISSET(slot, SLOT_BUF_GROW))
- continue;
- orig_state = WT_ATOMIC_CAS_VAL8(
- slot->slot_state, WT_LOG_SLOT_FREE, WT_LOG_SLOT_PENDING);
- if (orig_state != WT_LOG_SLOT_FREE) {
- orig_state = WT_ATOMIC_CAS_VAL8(slot->slot_state,
- WT_LOG_SLOT_READY, WT_LOG_SLOT_PENDING);
- if (orig_state != WT_LOG_SLOT_READY)
- continue;
- }
-
- /* We have a slot - now go ahead and grow the buffer. */
- old_size = slot->slot_buf.memsize;
- F_CLR(slot, SLOT_BUF_GROW);
- WT_ERR(__wt_buf_grow(session, &slot->slot_buf,
- WT_MAX(slot->slot_buf.memsize * 2, newsize)));
- slot->slot_state = orig_state;
- total_growth += slot->slot_buf.memsize - old_size;
- }
-err: __wt_spin_unlock(session, &log->log_slot_lock);
- WT_STAT_FAST_CONN_INCRV(session, log_buffer_size, total_growth);
- return (ret);
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_cursor.c b/src/third_party/wiredtiger/src/lsm/lsm_cursor.c
index 111de7a2be1..0962da7768b 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_cursor.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_cursor.c
@@ -132,10 +132,11 @@ __clsm_enter_update(WT_CURSOR_LSM *clsm)
hard_limit = F_ISSET(lsm_tree, WT_LSM_TREE_NEED_SWITCH) ? 1 : 0;
if (have_primary) {
+ WT_ENTER_PAGE_INDEX(session);
WT_WITH_BTREE(session, ((WT_CURSOR_BTREE *)primary)->btree,
- ovfl = __wt_btree_size_overflow(
- session, hard_limit ?
+ ovfl = __wt_btree_lsm_size(session, hard_limit ?
2 * lsm_tree->chunk_size : lsm_tree->chunk_size));
+ WT_LEAVE_PAGE_INDEX(session);
/* If there was no overflow, we're done. */
if (!ovfl)
@@ -206,7 +207,7 @@ __clsm_enter(WT_CURSOR_LSM *clsm, int reset, int update)
if (clsm->dsk_gen != clsm->lsm_tree->dsk_gen)
goto open;
- if (session->txn.isolation == TXN_ISO_SNAPSHOT)
+ if (session->txn.isolation == WT_ISO_SNAPSHOT)
__wt_txn_cursor_op(session);
/*
@@ -219,7 +220,7 @@ __clsm_enter(WT_CURSOR_LSM *clsm, int reset, int update)
* conflict.
*/
clsm->nupdates = 1;
- if (session->txn.isolation == TXN_ISO_SNAPSHOT &&
+ if (session->txn.isolation == WT_ISO_SNAPSHOT &&
F_ISSET(clsm, WT_CLSM_OPEN_SNAPSHOT)) {
WT_ASSERT(session,
F_ISSET(&session->txn, TXN_HAS_SNAPSHOT));
@@ -245,7 +246,7 @@ __clsm_enter(WT_CURSOR_LSM *clsm, int reset, int update)
* - a read operation and the cursor is open for reading.
*/
if ((!update ||
- session->txn.isolation != TXN_ISO_SNAPSHOT ||
+ session->txn.isolation != WT_ISO_SNAPSHOT ||
F_ISSET(clsm, WT_CLSM_OPEN_SNAPSHOT)) &&
((update && clsm->primary_chunk != NULL) ||
(!update && F_ISSET(clsm, WT_CLSM_OPEN_READ))))
@@ -417,7 +418,7 @@ __clsm_open_cursors(
* Ensure that any snapshot update has cursors on the right set of
* chunks to guarantee visibility is correct.
*/
- if (update && txn->isolation == TXN_ISO_SNAPSHOT)
+ if (update && txn->isolation == WT_ISO_SNAPSHOT)
F_SET(clsm, WT_CLSM_OPEN_SNAPSHOT);
/*
@@ -1533,9 +1534,11 @@ __wt_clsm_open(WT_SESSION_IMPL *session,
if (bulk && (ret == EBUSY || (ret == 0 && lsm_tree->nchunks > 1)))
WT_ERR_MSG(session, EINVAL,
"bulk-load is only supported on newly created LSM trees");
- WT_ASSERT(session, !bulk || lsm_tree->exclusive);
/* Flag any errors from the tree get. */
- WT_RET(ret);
+ WT_ERR(ret);
+
+ /* Make sure we have exclusive access if and only if we want it */
+ WT_ASSERT(session, !bulk || lsm_tree->exclusive);
WT_ERR(__wt_calloc_one(session, &clsm));
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_manager.c b/src/third_party/wiredtiger/src/lsm/lsm_manager.c
index 12b24984fcb..84c509158d1 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_manager.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_manager.c
@@ -227,7 +227,7 @@ __wt_lsm_manager_start(WT_SESSION_IMPL *session)
for (i = 0; i < WT_LSM_MAX_WORKERS; i++) {
WT_ERR(__wt_open_internal_session(
S2C(session), "lsm-worker", 1, 0, &worker_session));
- worker_session->isolation = TXN_ISO_READ_UNCOMMITTED;
+ worker_session->isolation = WT_ISO_READ_UNCOMMITTED;
manager->lsm_worker_cookies[i].session = worker_session;
}
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c b/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c
index 99140f89c51..4f5e1516f1c 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c
@@ -109,7 +109,7 @@ __wt_lsm_get_chunk_to_flush(WT_SESSION_IMPL *session,
* enough to trigger checkpoints.
*/
if (evict_chunk != NULL && flush_chunk != NULL) {
- chunk = (__wt_random(session->rnd) & 1) ?
+ chunk = (__wt_random(&session->rnd) & 1) ?
evict_chunk : flush_chunk;
WT_ERR(__wt_lsm_manager_push_entry(
session, WT_LSM_WORK_FLUSH, 0, lsm_tree));
@@ -307,7 +307,7 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session,
if ((ret = __wt_session_get_btree(
session, chunk->uri, NULL, NULL, 0)) == 0) {
saved_isolation = session->txn.isolation;
- session->txn.isolation = TXN_ISO_EVICTION;
+ session->txn.isolation = WT_ISO_EVICTION;
ret = __wt_cache_op(session, NULL, WT_SYNC_WRITE_LEAVES);
session->txn.isolation = saved_isolation;
WT_TRET(__wt_session_release_btree(session));
diff --git a/src/third_party/wiredtiger/src/meta/meta_track.c b/src/third_party/wiredtiger/src/meta/meta_track.c
index 66e7e3977f4..189a095ae74 100644
--- a/src/third_party/wiredtiger/src/meta/meta_track.c
+++ b/src/third_party/wiredtiger/src/meta/meta_track.c
@@ -67,6 +67,18 @@ __meta_track_next(WT_SESSION_IMPL *session, WT_META_TRACK **trkp)
}
/*
+ * __meta_track_clear --
+ * Clear the structure.
+ */
+static void
+__meta_track_clear(WT_SESSION_IMPL *session, WT_META_TRACK *trk)
+{
+ __wt_free(session, trk->a);
+ __wt_free(session, trk->b);
+ memset(trk, 0, sizeof(WT_META_TRACK));
+}
+
+/*
* __wt_meta_track_discard --
* Cleanup metadata tracking when closing a session.
*/
@@ -96,31 +108,21 @@ __wt_meta_track_on(WT_SESSION_IMPL *session)
* Apply the changes in a metadata tracking record.
*/
static int
-__meta_track_apply(WT_SESSION_IMPL *session, WT_META_TRACK *trk, int unroll)
+__meta_track_apply(WT_SESSION_IMPL *session, WT_META_TRACK *trk)
{
WT_BM *bm;
WT_BTREE *btree;
WT_DECL_RET;
int tret;
- /*
- * Unlock handles and complete checkpoints regardless of whether we are
- * unrolling.
- */
- if (!unroll && trk->op != WT_ST_CHECKPOINT &&
- trk->op != WT_ST_DROP_COMMIT && trk->op != WT_ST_LOCK)
- goto free;
-
switch (trk->op) {
case WT_ST_EMPTY: /* Unused slot */
break;
case WT_ST_CHECKPOINT: /* Checkpoint, see above */
- if (!unroll) {
- btree = trk->dhandle->handle;
- bm = btree->bm;
- WT_WITH_DHANDLE(session, trk->dhandle,
- WT_TRET(bm->checkpoint_resolve(bm, session)));
- }
+ btree = trk->dhandle->handle;
+ bm = btree->bm;
+ WT_WITH_DHANDLE(session, trk->dhandle,
+ WT_TRET(bm->checkpoint_resolve(bm, session)));
break;
case WT_ST_DROP_COMMIT:
if ((tret = __wt_remove_if_exists(session, trk->a)) != 0) {
@@ -130,8 +132,40 @@ __meta_track_apply(WT_SESSION_IMPL *session, WT_META_TRACK *trk, int unroll)
WT_TRET(tret);
}
break;
+ case WT_ST_LOCK:
+ WT_WITH_DHANDLE(session, trk->dhandle,
+ WT_TRET(__wt_session_release_btree(session)));
+ break;
+ case WT_ST_FILEOP:
+ case WT_ST_REMOVE:
+ case WT_ST_SET:
+ break;
+ WT_ILLEGAL_VALUE(session);
+ }
+
+ __meta_track_clear(session, trk);
+ return (ret);
+}
+
+/*
+ * __meta_track_unroll --
+ * Undo the changes in a metadata tracking record.
+ */
+static int
+__meta_track_unroll(WT_SESSION_IMPL *session, WT_META_TRACK *trk)
+{
+ WT_DECL_RET;
+ int tret;
+
+ switch (trk->op) {
+ case WT_ST_EMPTY: /* Unused slot */
+ break;
+ case WT_ST_CHECKPOINT: /* Checkpoint, see above */
+ break;
+ case WT_ST_DROP_COMMIT:
+ break;
case WT_ST_LOCK: /* Handle lock, see above */
- if (unroll && trk->created)
+ if (trk->created)
F_SET(trk->dhandle, WT_DHANDLE_DISCARD);
WT_WITH_DHANDLE(session, trk->dhandle,
WT_TRET(__wt_session_release_btree(session)));
@@ -185,11 +219,7 @@ __meta_track_apply(WT_SESSION_IMPL *session, WT_META_TRACK *trk, int unroll)
WT_ILLEGAL_VALUE(session);
}
-free: trk->op = WT_ST_EMPTY;
- __wt_free(session, trk->a);
- __wt_free(session, trk->b);
- trk->dhandle = NULL;
-
+ __meta_track_clear(session, trk);
return (ret);
}
@@ -253,33 +283,38 @@ __wt_meta_track_off(WT_SESSION_IMPL *session, int need_sync, int unroll)
if (trk == trk_orig)
return (0);
- while (--trk >= trk_orig)
- WT_TRET(__meta_track_apply(session, trk, unroll));
+ if (unroll) {
+ while (--trk >= trk_orig)
+ WT_TRET(__meta_track_unroll(session, trk));
+ /* Unroll operations don't need to flush the metadata. */
+ return (ret);
+ }
/*
- * Unroll operations don't need to flush the metadata.
- *
- * Also, if we don't have the metadata handle (e.g, we're in the
- * process of creating the metadata), we can't sync it.
+ * If we don't have the metadata handle (e.g, we're in the process of
+ * creating the metadata), we can't sync it.
*/
- if (unroll || ret != 0 || !need_sync || session->meta_dhandle == NULL)
- return (ret);
+ if (!need_sync || session->meta_dhandle == NULL)
+ goto done;
/* If we're logging, make sure the metadata update was flushed. */
if (FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_ENABLED)) {
- if (!FLD_ISSET(S2C(session)->txn_logsync,
- WT_LOG_DSYNC | WT_LOG_FSYNC))
- WT_WITH_DHANDLE(session, session->meta_dhandle,
- ret = __wt_txn_checkpoint_log(session,
- 0, WT_TXN_LOG_CKPT_SYNC, NULL));
+ WT_WITH_DHANDLE(session, session->meta_dhandle,
+ ret = __wt_txn_checkpoint_log(session,
+ 0, WT_TXN_LOG_CKPT_SYNC, NULL));
+ WT_RET(ret);
} else {
WT_WITH_DHANDLE(session, session->meta_dhandle,
ret = __wt_checkpoint(session, NULL));
WT_RET(ret);
WT_WITH_DHANDLE(session, session->meta_dhandle,
ret = __wt_checkpoint_sync(session, NULL));
+ WT_RET(ret);
}
+done: /* Apply any tracked operations post-commit. */
+ for (; trk_orig < trk; trk_orig++)
+ WT_TRET(__meta_track_apply(session, trk_orig));
return (ret);
}
@@ -316,7 +351,7 @@ __wt_meta_track_sub_off(WT_SESSION_IMPL *session)
session->meta_track_next = session->meta_track_sub = NULL;
while (--trk >= trk_orig)
- WT_TRET(__meta_track_apply(session, trk, 0));
+ WT_TRET(__meta_track_apply(session, trk));
session->meta_track_next = trk_orig;
return (ret);
diff --git a/src/third_party/wiredtiger/src/os_posix/os_mtx_rw.c b/src/third_party/wiredtiger/src/os_posix/os_mtx_rw.c
index 3a3b0e0d74f..c3ae43b605f 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_mtx_rw.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_mtx_rw.c
@@ -216,7 +216,7 @@ __wt_writeunlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock)
++copy.s.writers;
++copy.s.readers;
- l->us = copy.us;
+ l->i.us = copy.i.us;
return (0);
}
diff --git a/src/third_party/wiredtiger/src/os_posix/os_thread.c b/src/third_party/wiredtiger/src/os_posix/os_thread.c
index c70a04c8df7..10eeef558bc 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_thread.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_thread.c
@@ -19,7 +19,8 @@ __wt_thread_create(WT_SESSION_IMPL *session,
WT_DECL_RET;
/* Spawn a new thread of control. */
- if ((ret = pthread_create(tidret, NULL, func, arg)) == 0)
+ WT_SYSCALL_RETRY(pthread_create(tidret, NULL, func, arg), ret);
+ if (ret == 0)
return (0);
WT_RET_MSG(session, ret, "pthread_create");
}
@@ -33,7 +34,8 @@ __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid)
{
WT_DECL_RET;
- if ((ret = pthread_join(tid, NULL)) == 0)
+ WT_SYSCALL_RETRY(pthread_join(tid, NULL), ret);
+ if (ret == 0)
return (0);
WT_RET_MSG(session, ret, "pthread_join");
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c
index 14ab05fbb25..e11490ac7fc 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_write.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c
@@ -343,11 +343,12 @@ __wt_reconcile(WT_SESSION_IMPL *session,
WT_PAGE *page;
WT_PAGE_MODIFY *mod;
WT_RECONCILE *r;
- int locked;
+ int page_lock, scan_lock, split_lock;
conn = S2C(session);
page = ref->page;
mod = page->modify;
+ page_lock = scan_lock = split_lock = 0;
/* We're shouldn't get called with a clean page, that's an error. */
if (!__wt_page_is_modified(page))
@@ -386,22 +387,38 @@ __wt_reconcile(WT_SESSION_IMPL *session,
/*
* The compaction process looks at the page's modification information;
- * if compaction is running, lock the page down.
- *
- * Otherwise, flip on the scanning flag: obsolete updates cannot be
- * freed while reconciliation is in progress.
+ * if compaction is running, acquire the page's lock.
*/
- locked = 0;
if (conn->compact_in_memory_pass) {
- locked = 1;
WT_PAGE_LOCK(session, page);
- } else
+ page_lock = 1;
+ }
+
+ /*
+ * Reconciliation reads the lists of updates, so obsolete updates cannot
+ * be discarded while reconciliation is in progress.
+ */
+ for (;;) {
+ F_CAS_ATOMIC(page, WT_PAGE_SCANNING, ret);
+ if (ret == 0)
+ break;
+ __wt_yield();
+ }
+ scan_lock = 1;
+
+ /*
+ * Mark internal pages as splitting to ensure we don't deadlock when
+ * performing an in-memory split during a checkpoint.
+ */
+ if (WT_PAGE_IS_INTERNAL(page)) {
for (;;) {
- F_CAS_ATOMIC(page, WT_PAGE_SCANNING, ret);
+ F_CAS_ATOMIC(page, WT_PAGE_SPLIT_LOCKED, ret);
if (ret == 0)
break;
__wt_yield();
}
+ split_lock = 1;
+ }
/* Reconcile the page. */
switch (page->type) {
@@ -434,11 +451,13 @@ __wt_reconcile(WT_SESSION_IMPL *session,
else
WT_TRET(__rec_write_wrapup_err(session, r, page));
- /* Release the page lock if we're holding one. */
- if (locked)
- WT_PAGE_UNLOCK(session, page);
- else
+ /* Release the locks we're holding. */
+ if (split_lock)
+ F_CLR_ATOMIC(page, WT_PAGE_SPLIT_LOCKED);
+ if (scan_lock)
F_CLR_ATOMIC(page, WT_PAGE_SCANNING);
+ if (page_lock)
+ WT_PAGE_UNLOCK(session, page);
/*
* Clean up the boundary structures: some workloads result in millions
@@ -523,7 +542,7 @@ __rec_root_write(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t flags)
}
WT_ASSERT(session, session->split_gen != 0);
- pindex = WT_INTL_INDEX_COPY(next);
+ WT_INTL_INDEX_GET(session, next, pindex);
for (i = 0; i < mod->mod_multi_entries; ++i) {
WT_ERR(__wt_multi_to_ref(session,
next, &mod->mod_multi[i], &pindex->index[i], NULL));
@@ -2961,7 +2980,7 @@ __wt_bulk_init(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk)
"bulk-load is only possible for newly created trees");
/* Get a reference to the empty leaf page. */
- pindex = WT_INTL_INDEX_COPY(btree->root.page);
+ pindex = WT_INTL_INDEX_GET_SAFE(btree->root.page);
cbulk->ref = pindex->index[0];
cbulk->leaf = cbulk->ref->page;
@@ -5046,6 +5065,9 @@ err: __wt_scr_free(session, &tkey);
WT_FULL_BARRIER();
} else {
mod->rec_max_txn = r->max_txn;
+ if (!F_ISSET(r, WT_EVICTING) &&
+ TXNID_LT(btree->rec_max_txn, r->max_txn))
+ btree->rec_max_txn = r->max_txn;
if (WT_ATOMIC_CAS4(mod->write_gen, r->orig_write_gen, 0))
__wt_cache_dirty_decr(session, page);
diff --git a/src/third_party/wiredtiger/src/session/session_api.c b/src/third_party/wiredtiger/src/session/session_api.c
index 599c7bdf44a..b042e73f7d5 100644
--- a/src/third_party/wiredtiger/src/session/session_api.c
+++ b/src/third_party/wiredtiger/src/session/session_api.c
@@ -202,9 +202,9 @@ __session_reconfigure(WT_SESSION *wt_session, const char *config)
if (cval.len != 0)
session->isolation = session->txn.isolation =
WT_STRING_MATCH("snapshot", cval.str, cval.len) ?
- TXN_ISO_SNAPSHOT :
+ WT_ISO_SNAPSHOT :
WT_STRING_MATCH("read-uncommitted", cval.str, cval.len) ?
- TXN_ISO_READ_UNCOMMITTED : TXN_ISO_READ_COMMITTED;
+ WT_ISO_READ_UNCOMMITTED : WT_ISO_READ_COMMITTED;
err: API_END_RET_NOTFOUND_MAP(session, ret);
}
@@ -1064,7 +1064,7 @@ __wt_open_session(WT_CONNECTION_IMPL *conn,
WT_ERR(__wt_cond_alloc(session, "session", 0, &session_ret->cond));
if (WT_SESSION_FIRST_USE(session_ret))
- __wt_random_init(session_ret->rnd);
+ __wt_random_init(&session_ret->rnd);
__wt_event_handler_set(session_ret,
event_handler == NULL ? session->event_handler : event_handler);
@@ -1087,7 +1087,7 @@ __wt_open_session(WT_CONNECTION_IMPL *conn,
}
/* Initialize transaction support: default to read-committed. */
- session_ret->isolation = TXN_ISO_READ_COMMITTED;
+ session_ret->isolation = WT_ISO_READ_COMMITTED;
WT_ERR(__wt_txn_init(session_ret));
/*
diff --git a/src/third_party/wiredtiger/src/support/rand.c b/src/third_party/wiredtiger/src/support/rand.c
index bd51b2ea0d5..4d0f90b87dc 100644
--- a/src/third_party/wiredtiger/src/support/rand.c
+++ b/src/third_party/wiredtiger/src/support/rand.c
@@ -29,19 +29,22 @@
#include "wt_internal.h"
#undef M_W
-#define M_W (rnd)[0]
+#define M_W(r) r.x.w
#undef M_Z
-#define M_Z (rnd)[1]
+#define M_Z(r) r.x.z
/*
* __wt_random_init --
* Initialize return of a 32-bit pseudo-random number.
*/
void
-__wt_random_init(uint32_t *rnd)
+__wt_random_init(WT_RAND_STATE volatile * rnd_state)
{
- M_W = 521288629;
- M_Z = 362436069;
+ WT_RAND_STATE rnd;
+
+ M_W(rnd) = 521288629;
+ M_Z(rnd) = 362436069;
+ *rnd_state = rnd;
}
/*
@@ -60,11 +63,32 @@ __wt_random_init(uint32_t *rnd)
* forever. Take local copies of the shared values to avoid this.
*/
uint32_t
-__wt_random(uint32_t *rnd)
+__wt_random(WT_RAND_STATE volatile * rnd_state)
{
- uint32_t w = M_W, z = M_Z;
+ WT_RAND_STATE rnd;
+ uint32_t w, z;
+
+ /*
+ * Take a copy of the random state so we can ensure that the
+ * calculation operates on the state consistently regardless of
+ * concurrent calls with the same random state.
+ */
+ rnd = *rnd_state;
+ w = M_W(rnd);
+ z = M_Z(rnd);
+
+ /*
+ * Check if the value goes to 0 (from which we won't recover), and reset
+ * to the initial state. This has additional benefits if a caller fails
+ * to initialize the state, or initializes with a seed that results in a
+ * short period.
+ */
+ if (z == 0 || w == 0)
+ __wt_random_init(rnd_state);
+
+ M_Z(rnd) = z = 36969 * (z & 65535) + (z >> 16);
+ M_W(rnd) = w = 18000 * (w & 65535) + (w >> 16);
+ *rnd_state = rnd;
- M_Z = z = 36969 * (z & 65535) + (z >> 16);
- M_W = w = 18000 * (w & 65535) + (w >> 16);
- return (z << 16) + (w & 65535);
+ return ((z << 16) + (w & 65535));
}
diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c
index 824914bf8bf..8db47646b11 100644
--- a/src/third_party/wiredtiger/src/support/stat.c
+++ b/src/third_party/wiredtiger/src/support/stat.c
@@ -447,7 +447,6 @@ __wt_stat_init_connection_stats(WT_CONNECTION_STATS *stats)
stats->log_slot_joins.desc = "log: consolidated slot joins";
stats->log_slot_toosmall.desc =
"log: failed to find a slot large enough for record";
- stats->log_buffer_grow.desc = "log: log buffer size increases";
stats->log_bytes_payload.desc = "log: log bytes of payload data";
stats->log_bytes_written.desc = "log: log bytes written";
stats->log_reads.desc = "log: log read operations";
@@ -622,7 +621,6 @@ __wt_stat_refresh_connection_stats(void *stats_arg)
stats->log_slot_transitions.v = 0;
stats->log_slot_joins.v = 0;
stats->log_slot_toosmall.v = 0;
- stats->log_buffer_grow.v = 0;
stats->log_bytes_payload.v = 0;
stats->log_bytes_written.v = 0;
stats->log_reads.v = 0;
diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c
index f6f5a695b4f..a391ec8be88 100644
--- a/src/third_party/wiredtiger/src/txn/txn.c
+++ b/src/third_party/wiredtiger/src/txn/txn.c
@@ -59,7 +59,7 @@ __wt_txn_release_snapshot(WT_SESSION_IMPL *session)
WT_ASSERT(session,
txn_state->snap_min == WT_TXN_NONE ||
- session->txn.isolation == TXN_ISO_READ_UNCOMMITTED ||
+ session->txn.isolation == WT_ISO_READ_UNCOMMITTED ||
!__wt_txn_visible_all(session, txn_state->snap_min));
txn_state->snap_min = WT_TXN_NONE;
@@ -87,20 +87,6 @@ __wt_txn_get_snapshot(WT_SESSION_IMPL *session)
txn_global = &conn->txn_global;
txn_state = WT_SESSION_TXN_STATE(session);
- current_id = snap_min = txn_global->current;
- prev_oldest_id = txn_global->oldest_id;
-
- /* For pure read-only workloads, avoid scanning. */
- if (prev_oldest_id == current_id) {
- txn_state->snap_min = current_id;
- __txn_sort_snapshot(session, 0, current_id);
-
- /* Check that the oldest ID has not moved in the meantime. */
- if (prev_oldest_id == txn_global->oldest_id &&
- txn_global->scan_count == 0)
- return;
- }
-
/*
* We're going to scan. Increment the count of scanners to prevent the
* oldest ID from moving forwards. Spin if the count is negative,
@@ -112,9 +98,21 @@ __wt_txn_get_snapshot(WT_SESSION_IMPL *session)
} while (count < 0 ||
!WT_ATOMIC_CAS4(txn_global->scan_count, count, count + 1));
- /* The oldest ID cannot change until the scan count goes to zero. */
- prev_oldest_id = txn_global->oldest_id;
current_id = snap_min = txn_global->current;
+ prev_oldest_id = txn_global->oldest_id;
+
+ /* For pure read-only workloads, avoid scanning. */
+ if (prev_oldest_id == current_id) {
+ txn_state->snap_min = current_id;
+ __txn_sort_snapshot(session, 0, current_id);
+
+ /* Check that the oldest ID has not moved in the meantime. */
+ if (prev_oldest_id == txn_global->oldest_id) {
+ WT_ASSERT(session, txn_global->scan_count > 0);
+ (void)WT_ATOMIC_SUB4(txn_global->scan_count, 1);
+ return;
+ }
+ }
/* Walk the array of concurrent transactions. */
WT_ORDERED_READ(session_cnt, conn->session_cnt);
@@ -299,9 +297,9 @@ __wt_txn_config(WT_SESSION_IMPL *session, const char *cfg[])
if (cval.len != 0)
txn->isolation =
WT_STRING_MATCH("snapshot", cval.str, cval.len) ?
- TXN_ISO_SNAPSHOT :
+ WT_ISO_SNAPSHOT :
WT_STRING_MATCH("read-committed", cval.str, cval.len) ?
- TXN_ISO_READ_COMMITTED : TXN_ISO_READ_UNCOMMITTED;
+ WT_ISO_READ_COMMITTED : WT_ISO_READ_UNCOMMITTED;
/*
* The default sync setting is inherited from the connection, but can
@@ -333,6 +331,7 @@ __wt_txn_release(WT_SESSION_IMPL *session)
WT_TXN *txn;
WT_TXN_GLOBAL *txn_global;
WT_TXN_STATE *txn_state;
+ int was_oldest;
txn = &session->txn;
WT_ASSERT(session, txn->mod_count == 0);
@@ -340,6 +339,7 @@ __wt_txn_release(WT_SESSION_IMPL *session)
txn_global = &S2C(session)->txn_global;
txn_state = WT_SESSION_TXN_STATE(session);
+ was_oldest = 0;
/* Clear the transaction's ID from the global table. */
if (WT_SESSION_IS_CHECKPOINT(session)) {
@@ -353,6 +353,9 @@ __wt_txn_release(WT_SESSION_IMPL *session)
WT_ASSERT(session, txn_state->id != WT_TXN_NONE &&
txn->id != WT_TXN_NONE);
WT_PUBLISH(txn_state->id, WT_TXN_NONE);
+
+ /* Quick check for the oldest transaction. */
+ was_oldest = (txn->id == txn_global->last_running);
txn->id = WT_TXN_NONE;
}
@@ -369,7 +372,16 @@ __wt_txn_release(WT_SESSION_IMPL *session)
*/
__wt_txn_release_snapshot(session);
txn->isolation = session->isolation;
- F_CLR(txn, TXN_ERROR | TXN_HAS_ID | TXN_RUNNING);
+ /* Ensure the transaction flags are cleared on exit */
+ txn->flags = 0;
+
+ /*
+ * When the oldest transaction in the system completes, bump the oldest
+ * ID. This is racy and so not guaranteed, but in practice it keeps
+ * the oldest ID from falling too far behind.
+ */
+ if (was_oldest)
+ __wt_txn_update_oldest(session, 1);
}
/*
diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
index 08d8b778371..1ae99fb1c97 100644
--- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c
+++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
@@ -358,10 +358,10 @@ __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
u_int i;
conn = S2C(session);
+ txn = &session->txn;
txn_global = &conn->txn_global;
txn_state = WT_SESSION_TXN_STATE(session);
saved_isolation = session->isolation;
- txn = &session->txn;
full = idle = logging = tracking = 0;
/* Ensure the metadata table is open before taking any locks. */
@@ -373,6 +373,9 @@ __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
*/
WT_RET(__checkpoint_apply_all(session, cfg, NULL, &full));
+ /* Configure logging only if doing a full checkpoint. */
+ logging = FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED);
+
/*
* Get a list of handles we want to flush; this may pull closed objects
* into the session cache, but we're going to do that eventually anyway.
@@ -400,7 +403,7 @@ __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
"starting write leaves", &verb_timer));
/* Flush dirty leaf pages before we start the checkpoint. */
- session->isolation = txn->isolation = TXN_ISO_READ_COMMITTED;
+ session->isolation = txn->isolation = WT_ISO_READ_COMMITTED;
WT_ERR(__checkpoint_apply(session, cfg, __checkpoint_write_leaves));
/*
@@ -421,7 +424,7 @@ __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
tracking = 1;
/* Tell logging that we are about to start a database checkpoint. */
- if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) && full)
+ if (full && logging)
WT_ERR(__wt_txn_checkpoint_log(
session, full, WT_TXN_LOG_CKPT_PREPARE, NULL));
@@ -491,11 +494,9 @@ __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
txn_state->id = txn_state->snap_min = WT_TXN_NONE;
/* Tell logging that we have started a database checkpoint. */
- if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) && full) {
+ if (full && logging)
WT_ERR(__wt_txn_checkpoint_log(
session, full, WT_TXN_LOG_CKPT_START, NULL));
- logging = 1;
- }
WT_ERR(__checkpoint_apply(session, cfg, __wt_checkpoint));
@@ -531,22 +532,29 @@ __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
WT_ERR(__wt_txn_commit(session, NULL));
/*
- * Disable metadata tracking during the metadata checkpoint.
- *
- * We don't lock old checkpoints in the metadata file: there is no way
- * to open one. We are holding other handle locks, it is not safe to
- * lock conn->spinlock.
+ * Ensure that the metadata changes are durable before the checkpoint
+ * is resolved. Do this by either checkpointing the metadata or syncing
+ * the log file.
+ * Recovery relies on the checkpoint LSN in the metadata only being
+ * updated by full checkpoints so only checkpoint the metadata for
+ * full or non-logged checkpoints.
*/
- session->isolation = txn->isolation = TXN_ISO_READ_UNCOMMITTED;
- saved_meta_next = session->meta_track_next;
- session->meta_track_next = NULL;
- WT_WITH_DHANDLE(session,
- session->meta_dhandle, ret = __wt_checkpoint(session, cfg));
- session->meta_track_next = saved_meta_next;
- WT_ERR(ret);
-
- WT_ERR(__checkpoint_verbose_track(session,
- "metadata sync completed", &verb_timer));
+ if (full || !logging) {
+ session->isolation = txn->isolation = WT_ISO_READ_UNCOMMITTED;
+ /* Disable metadata tracking during the metadata checkpoint. */
+ saved_meta_next = session->meta_track_next;
+ session->meta_track_next = NULL;
+ WT_WITH_DHANDLE(session,
+ session->meta_dhandle, ret = __wt_checkpoint(session, cfg));
+ session->meta_track_next = saved_meta_next;
+ WT_ERR(ret);
+
+ WT_ERR(__checkpoint_verbose_track(session,
+ "metadata sync completed", &verb_timer));
+ } else
+ WT_WITH_DHANDLE(session, session->meta_dhandle,
+ ret = __wt_txn_checkpoint_log(session,
+ 0, WT_TXN_LOG_CKPT_SYNC, NULL));
if (full) {
WT_ERR(__wt_epoch(session, &stop));
@@ -566,7 +574,7 @@ err: /*
* overwritten the checkpoint, so what ends up on disk is not
* consistent.
*/
- session->isolation = txn->isolation = TXN_ISO_READ_UNCOMMITTED;
+ session->isolation = txn->isolation = WT_ISO_READ_UNCOMMITTED;
if (tracking)
WT_TRET(__wt_meta_track_off(session, 0, ret != 0));
@@ -585,8 +593,8 @@ err: /*
* Tell logging that we have finished a database checkpoint. Do not
* write a log record if the database was idle.
*/
- if (logging) {
- if (ret == 0 && full &&
+ if (full && logging) {
+ if (ret == 0 &&
F_ISSET((WT_BTREE *)session->meta_dhandle->handle,
WT_BTREE_SKIP_CKPT))
idle = 1;
@@ -1170,19 +1178,21 @@ __wt_checkpoint_close(WT_SESSION_IMPL *session, int final)
}
/*
- * If closing a modified file, checkpoint the file and optionally flush
- * the writes (the checkpoint call will discard the blocks, there's no
- * additional step needed).
- *
* We should already have the schema lock unless we're finishing a bulk
* load -- the only other paths to closing files (sweep and LSM) have
* already checked for read-only trees.
*/
- if (!final)
- WT_ASSERT(session,
- bulk || F_ISSET(session, WT_SESSION_SCHEMA_LOCKED));
+ WT_ASSERT(session,
+ final || bulk || F_ISSET(session, WT_SESSION_SCHEMA_LOCKED));
+
+ /*
+ * Turn on metadata tracking if:
+ * - The session is not already doing metadata tracking.
+ * - The file was bulk loaded.
+ * - The close is not during connection close.
+ */
+ need_tracking = !WT_META_TRACKING(session) && !bulk && !final;
- need_tracking = !bulk && !final && !WT_META_TRACKING(session);
if (need_tracking)
WT_RET(__wt_meta_track_on(session));
diff --git a/src/third_party/wiredtiger/src/txn/txn_ext.c b/src/third_party/wiredtiger/src/txn/txn_ext.c
index e35b6f16ea1..36d42a8996f 100644
--- a/src/third_party/wiredtiger/src/txn/txn_ext.c
+++ b/src/third_party/wiredtiger/src/txn/txn_ext.c
@@ -40,9 +40,9 @@ __wt_ext_transaction_isolation_level(
session = (WT_SESSION_IMPL *)wt_session;
txn = &session->txn;
- if (txn->isolation == TXN_ISO_READ_COMMITTED)
+ if (txn->isolation == WT_ISO_READ_COMMITTED)
return (WT_TXN_ISO_READ_COMMITTED);
- if (txn->isolation == TXN_ISO_READ_UNCOMMITTED)
+ if (txn->isolation == WT_ISO_READ_UNCOMMITTED)
return (WT_TXN_ISO_READ_UNCOMMITTED);
return (WT_TXN_ISO_SNAPSHOT);
}