summaryrefslogtreecommitdiff
path: root/src/third_party
diff options
context:
space:
mode:
authorAlexander Gorrod <alexander.gorrod@mongodb.com>2015-07-23 23:23:52 +0000
committerAlexander Gorrod <alexander.gorrod@mongodb.com>2015-07-23 23:23:52 +0000
commit455aa3de2fe23454b8acd2a6d4ae575f2bb1aa74 (patch)
tree5f06bd87e9e5fbbeb7f709f919965f998cc8ec1b /src/third_party
parent92f1bacdb1dbc17919e7a0f77f0d6c5b981933da (diff)
downloadmongo-455aa3de2fe23454b8acd2a6d4ae575f2bb1aa74.tar.gz
Import wiredtiger-wiredtiger-2.6.1-332-gfdedd36.tar.gz from wiredtiger branch mongodb-3.2
Diffstat (limited to 'src/third_party')
-rwxr-xr-xsrc/third_party/wiredtiger/dist/s_all2
-rw-r--r--src/third_party/wiredtiger/dist/s_string.ok3
-rw-r--r--src/third_party/wiredtiger/dist/stat_data.py3
-rw-r--r--src/third_party/wiredtiger/src/block/block_ckpt.c11
-rw-r--r--src/third_party/wiredtiger/src/block/block_ext.c153
-rw-r--r--src/third_party/wiredtiger/src/block/block_slvg.c2
-rw-r--r--src/third_party/wiredtiger/src/block/block_vrfy.c14
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_io.c8
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_walk.c84
-rw-r--r--src/third_party/wiredtiger/src/btree/row_modify.c12
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_log.c172
-rw-r--r--src/third_party/wiredtiger/src/include/btree.i54
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h9
-rw-r--r--src/third_party/wiredtiger/src/include/gcc.h12
-rw-r--r--src/third_party/wiredtiger/src/include/log.h3
-rw-r--r--src/third_party/wiredtiger/src/include/mutex.h4
-rw-r--r--src/third_party/wiredtiger/src/include/stat.h2
-rw-r--r--src/third_party/wiredtiger/src/include/wiredtiger.in10
-rw-r--r--src/third_party/wiredtiger/src/log/log.c1
-rw-r--r--src/third_party/wiredtiger/src/log/log_slot.c70
-rw-r--r--src/third_party/wiredtiger/src/meta/meta_track.c90
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_mtx_rw.c2
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_write.c2
-rw-r--r--src/third_party/wiredtiger/src/support/stat.c5
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_ckpt.c72
25 files changed, 475 insertions, 325 deletions
diff --git a/src/third_party/wiredtiger/dist/s_all b/src/third_party/wiredtiger/dist/s_all
index 60e8b8f1551..c624db06a97 100755
--- a/src/third_party/wiredtiger/dist/s_all
+++ b/src/third_party/wiredtiger/dist/s_all
@@ -42,7 +42,7 @@ errchk()
return
fi
- echo "####################### ERROR ############################"
+ echo "####################### MESSAGE ############################"
echo "s_all run of: \"$1\" resulted in:"
sed -e 's/^/ /' $2
echo "#######################"
diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok
index ba5717d1b4a..1ed92b79ba8 100644
--- a/src/third_party/wiredtiger/dist/s_string.ok
+++ b/src/third_party/wiredtiger/dist/s_string.ok
@@ -506,6 +506,7 @@ dmsg
doxgen
doxygen
ds
+dsb
dsk
dsrc
dst
@@ -648,6 +649,7 @@ lang
latencies
lbrace
lbracket
+ld
len
lenp
level's
@@ -897,6 +899,7 @@ subtree
sunique
superset
sw
+sy
sys
t's
tV
diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py
index ee9c8782594..caf68364696 100644
--- a/src/third_party/wiredtiger/dist/stat_data.py
+++ b/src/third_party/wiredtiger/dist/stat_data.py
@@ -241,12 +241,11 @@ connection_stats = [
LogStat('log_writes', 'log write operations'),
LogStat('log_write_lsn', 'log server thread advances write LSN'),
+ LogStat('log_slot_coalesced', 'written slots coalesced'),
LogStat('log_slot_consolidated', 'logging bytes consolidated'),
LogStat('log_slot_closes', 'consolidated slot closures'),
LogStat('log_slot_joins', 'consolidated slot joins'),
LogStat('log_slot_races', 'consolidated slot join races'),
- LogStat('log_slot_switch_fails',
- 'slots selected for switching that were unavailable'),
LogStat('log_slot_toobig', 'record size exceeded maximum'),
LogStat('log_slot_toosmall',
'failed to find a slot large enough for record'),
diff --git a/src/third_party/wiredtiger/src/block/block_ckpt.c b/src/third_party/wiredtiger/src/block/block_ckpt.c
index 15a9da169fc..c88c44fb9c3 100644
--- a/src/third_party/wiredtiger/src/block/block_ckpt.c
+++ b/src/third_party/wiredtiger/src/block/block_ckpt.c
@@ -315,7 +315,7 @@ __ckpt_extlist_fblocks(
* file that contains a previous checkpoint's extents.
*/
return (__wt_block_insert_ext(
- session, &block->live.ckpt_avail, el->offset, el->size));
+ session, block, &block->live.ckpt_avail, el->offset, el->size));
}
#ifdef HAVE_DIAGNOSTIC
@@ -537,7 +537,7 @@ __ckpt_process(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckptbase)
* must be paired in the checkpoint.
*/
if (a->root_offset != WT_BLOCK_INVALID_OFFSET)
- WT_ERR(__wt_block_insert_ext(session,
+ WT_ERR(__wt_block_insert_ext(session, block,
&a->discard, a->root_offset, a->root_size));
/*
@@ -554,10 +554,10 @@ __ckpt_process(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckptbase)
*/
if (a->alloc.entries != 0)
WT_ERR(__wt_block_extlist_merge(
- session, &a->alloc, &b->alloc));
+ session, block, &a->alloc, &b->alloc));
if (a->discard.entries != 0)
WT_ERR(__wt_block_extlist_merge(
- session, &a->discard, &b->discard));
+ session, block, &a->discard, &b->discard));
/*
* If the "to" checkpoint is also being deleted, we're done with
@@ -775,7 +775,8 @@ __wt_block_checkpoint_resolve(WT_SESSION_IMPL *session, WT_BLOCK *block)
block->ckpt_inprogress = 0;
__wt_spin_lock(session, &block->live_lock);
- ret = __wt_block_extlist_merge(session, &ci->ckpt_avail, &ci->avail);
+ ret = __wt_block_extlist_merge(
+ session, block, &ci->ckpt_avail, &ci->avail);
__wt_spin_unlock(session, &block->live_lock);
/* Discard the lists remaining after the checkpoint call. */
diff --git a/src/third_party/wiredtiger/src/block/block_ext.c b/src/third_party/wiredtiger/src/block/block_ext.c
index e89c70060f3..d593537446b 100644
--- a/src/third_party/wiredtiger/src/block/block_ext.c
+++ b/src/third_party/wiredtiger/src/block/block_ext.c
@@ -8,12 +8,25 @@
#include "wt_internal.h"
-static int __block_append(WT_SESSION_IMPL *, WT_EXTLIST *, wt_off_t, wt_off_t);
+/*
+ * WT_BLOCK_RET --
+ * Handle extension list errors that would normally panic the system but
+ * which should fail gracefully when verifying.
+ */
+#define WT_BLOCK_RET(session, block, v, ...) do { \
+ int __ret = (v); \
+ __wt_err(session, __ret, __VA_ARGS__); \
+ return ((block)->verify ? __ret : __wt_panic(session)); \
+} while (0)
+
+static int __block_append(WT_SESSION_IMPL *,
+ WT_BLOCK *, WT_EXTLIST *, wt_off_t, wt_off_t);
static int __block_ext_overlap(WT_SESSION_IMPL *,
WT_BLOCK *, WT_EXTLIST *, WT_EXT **, WT_EXTLIST *, WT_EXT **);
static int __block_extlist_dump(
WT_SESSION_IMPL *, const char *, WT_EXTLIST *, int);
-static int __block_merge(WT_SESSION_IMPL *, WT_EXTLIST *, wt_off_t, wt_off_t);
+static int __block_merge(WT_SESSION_IMPL *,
+ WT_BLOCK *, WT_EXTLIST *, wt_off_t, wt_off_t);
/*
* __block_off_srch_last --
@@ -308,8 +321,8 @@ __wt_block_misplaced(WT_SESSION_IMPL *session,
* Remove a record from an extent list.
*/
static int
-__block_off_remove(
- WT_SESSION_IMPL *session, WT_EXTLIST *el, wt_off_t off, WT_EXT **extp)
+__block_off_remove(WT_SESSION_IMPL *session, WT_BLOCK *block,
+ WT_EXTLIST *el, wt_off_t off, WT_EXT **extp)
{
WT_EXT *ext, **astack[WT_SKIP_MAXDEPTH];
WT_SIZE *szp, **sstack[WT_SKIP_MAXDEPTH];
@@ -370,7 +383,7 @@ __block_off_remove(
return (0);
corrupt:
- WT_PANIC_RET(session, EINVAL,
+ WT_BLOCK_RET(session, block, EINVAL,
"attempt to remove non-existent offset from an extent list");
}
@@ -380,8 +393,8 @@ corrupt:
* overlapping entry.
*/
int
-__wt_block_off_remove_overlap(
- WT_SESSION_IMPL *session, WT_EXTLIST *el, wt_off_t off, wt_off_t size)
+__wt_block_off_remove_overlap(WT_SESSION_IMPL *session, WT_BLOCK *block,
+ WT_EXTLIST *el, wt_off_t off, wt_off_t size)
{
WT_EXT *before, *after, *ext;
wt_off_t a_off, a_size, b_off, b_size;
@@ -393,7 +406,8 @@ __wt_block_off_remove_overlap(
/* If "before" or "after" overlaps, retrieve the overlapping entry. */
if (before != NULL && before->off + before->size > off) {
- WT_RET(__block_off_remove(session, el, before->off, &ext));
+ WT_RET(__block_off_remove(
+ session, block, el, before->off, &ext));
/* Calculate overlapping extents. */
a_off = ext->off;
@@ -401,7 +415,8 @@ __wt_block_off_remove_overlap(
b_off = off + size;
b_size = ext->size - (a_size + size);
} else if (after != NULL && off + size > after->off) {
- WT_RET(__block_off_remove(session, el, after->off, &ext));
+ WT_RET(__block_off_remove(
+ session, block, el, after->off, &ext));
/*
* Calculate overlapping extents. There's no initial overlap
@@ -525,7 +540,7 @@ __wt_block_alloc(
__block_size_srch(block->live.avail.sz, size, sstack);
if ((szp = *sstack[0]) == NULL) {
append: WT_RET(__block_extend(session, block, offp, size));
- WT_RET(__block_append(session,
+ WT_RET(__block_append(session, block,
&block->live.alloc, *offp, (wt_off_t)size));
return (0);
}
@@ -535,7 +550,8 @@ append: WT_RET(__block_extend(session, block, offp, size));
}
/* Remove the record, and set the returned offset. */
- WT_RET(__block_off_remove(session, &block->live.avail, ext->off, &ext));
+ WT_RET(__block_off_remove(
+ session, block, &block->live.avail, ext->off, &ext));
*offp = ext->off;
/* If doing a partial allocation, adjust the record and put it back. */
@@ -561,7 +577,7 @@ append: WT_RET(__block_extend(session, block, offp, size));
/* Add the newly allocated extent to the list of allocations. */
WT_RET(__block_merge(
- session, &block->live.alloc, *offp, (wt_off_t)size));
+ session, block, &block->live.alloc, *offp, (wt_off_t)size));
return (0);
}
@@ -618,12 +634,12 @@ __wt_block_off_free(
* list.
*/
if ((ret = __wt_block_off_remove_overlap(
- session, &block->live.alloc, offset, size)) == 0)
- ret = __block_merge(
- session, &block->live.avail, offset, (wt_off_t)size);
+ session, block, &block->live.alloc, offset, size)) == 0)
+ ret = __block_merge(session, block,
+ &block->live.avail, offset, (wt_off_t)size);
else if (ret == WT_NOTFOUND)
- ret = __block_merge(
- session, &block->live.discard, offset, (wt_off_t)size);
+ ret = __block_merge(session, block,
+ &block->live.discard, offset, (wt_off_t)size);
return (ret);
}
@@ -770,9 +786,12 @@ __block_ext_overlap(WT_SESSION_IMPL *session,
*/
*ap = (*ap)->next[0];
*bp = (*bp)->next[0];
- WT_RET(__block_merge(session, avail, b->off, b->size));
- WT_RET(__block_off_remove(session, ael, a->off, NULL));
- WT_RET(__block_off_remove(session, bel, b->off, NULL));
+ WT_RET(__block_merge(
+ session, block, avail, b->off, b->size));
+ WT_RET(__block_off_remove(
+ session, block, ael, a->off, NULL));
+ WT_RET(__block_off_remove(
+ session, block, bel, b->off, NULL));
}
else if (a->size > b->size) { /* Case #4 */
/*
@@ -780,7 +799,8 @@ __block_ext_overlap(WT_SESSION_IMPL *session,
* Increment/Decrement A's offset/size by the size of B
* Insert A on its list
*/
- WT_RET(__block_off_remove(session, ael, a->off, &a));
+ WT_RET(__block_off_remove(
+ session, block, ael, a->off, &a));
a->off += b->size;
a->size -= b->size;
WT_RET(__block_ext_insert(session, ael, a));
@@ -791,15 +811,18 @@ __block_ext_overlap(WT_SESSION_IMPL *session,
* Delete B
*/
*bp = (*bp)->next[0];
- WT_RET(__block_merge(session, avail, b->off, b->size));
- WT_RET(__block_off_remove(session, bel, b->off, NULL));
+ WT_RET(__block_merge(
+ session, block, avail, b->off, b->size));
+ WT_RET(__block_off_remove(
+ session, block, bel, b->off, NULL));
} else { /* Case #9 */
/*
* Remove B from its list
* Increment/Decrement B's offset/size by the size of A
* Insert B on its list
*/
- WT_RET(__block_off_remove(session, bel, b->off, &b));
+ WT_RET(__block_off_remove(
+ session, block, bel, b->off, &b));
b->off += a->size;
b->size -= a->size;
WT_RET(__block_ext_insert(session, bel, b));
@@ -810,8 +833,10 @@ __block_ext_overlap(WT_SESSION_IMPL *session,
* Delete A
*/
*ap = (*ap)->next[0];
- WT_RET(__block_merge(session, avail, a->off, a->size));
- WT_RET(__block_off_remove(session, ael, a->off, NULL));
+ WT_RET(__block_merge(
+ session, block, avail, a->off, a->size));
+ WT_RET(__block_off_remove(
+ session, block, ael, a->off, NULL));
} /* Case #6 */
} else if (a->off + a->size == b->off + b->size) {
/*
@@ -819,7 +844,7 @@ __block_ext_overlap(WT_SESSION_IMPL *session,
* Decrement A's size by the size of B
* Insert A on its list
*/
- WT_RET(__block_off_remove(session, ael, a->off, &a));
+ WT_RET(__block_off_remove(session, block, ael, a->off, &a));
a->size -= b->size;
WT_RET(__block_ext_insert(session, ael, a));
@@ -829,8 +854,8 @@ __block_ext_overlap(WT_SESSION_IMPL *session,
* Delete B
*/
*bp = (*bp)->next[0];
- WT_RET(__block_merge(session, avail, b->off, b->size));
- WT_RET(__block_off_remove(session, bel, b->off, NULL));
+ WT_RET(__block_merge(session, block, avail, b->off, b->size));
+ WT_RET(__block_off_remove(session, block, bel, b->off, NULL));
} else if /* Case #3, #7 */
(a->off + a->size < b->off + b->size) {
/*
@@ -838,14 +863,14 @@ __block_ext_overlap(WT_SESSION_IMPL *session,
*/
off = b->off;
size = (a->off + a->size) - b->off;
- WT_RET(__block_merge(session, avail, off, size));
+ WT_RET(__block_merge(session, block, avail, off, size));
/*
* Remove A from its list
* Decrement A's size by the overlap
* Insert A on its list
*/
- WT_RET(__block_off_remove(session, ael, a->off, &a));
+ WT_RET(__block_off_remove(session, block, ael, a->off, &a));
a->size -= size;
WT_RET(__block_ext_insert(session, ael, a));
@@ -854,7 +879,7 @@ __block_ext_overlap(WT_SESSION_IMPL *session,
* Increment/Decrement B's offset/size by the overlap
* Insert B on its list
*/
- WT_RET(__block_off_remove(session, bel, b->off, &b));
+ WT_RET(__block_off_remove(session, block, bel, b->off, &b));
b->off += size;
b->size -= size;
WT_RET(__block_ext_insert(session, bel, b));
@@ -868,12 +893,12 @@ __block_ext_overlap(WT_SESSION_IMPL *session,
* Decrement A's size by trailing part of A plus B's size
* Insert A on its list
*/
- WT_RET(__block_off_remove(session, ael, a->off, &a));
+ WT_RET(__block_off_remove(session, block, ael, a->off, &a));
a->size = b->off - a->off;
WT_RET(__block_ext_insert(session, ael, a));
/* Add trailing part of A to A's list as a new element. */
- WT_RET(__block_merge(session, ael, off, size));
+ WT_RET(__block_merge(session, block, ael, off, size));
/*
* Move caller's B to the next element
@@ -881,8 +906,8 @@ __block_ext_overlap(WT_SESSION_IMPL *session,
* Delete B
*/
*bp = (*bp)->next[0];
- WT_RET(__block_merge(session, avail, b->off, b->size));
- WT_RET(__block_off_remove(session, bel, b->off, NULL));
+ WT_RET(__block_merge(session, block, avail, b->off, b->size));
+ WT_RET(__block_off_remove(session, block, bel, b->off, NULL));
}
return (0);
@@ -893,7 +918,8 @@ __block_ext_overlap(WT_SESSION_IMPL *session,
* Merge one extent list into another.
*/
int
-__wt_block_extlist_merge(WT_SESSION_IMPL *session, WT_EXTLIST *a, WT_EXTLIST *b)
+__wt_block_extlist_merge(WT_SESSION_IMPL *session, WT_BLOCK *block,
+ WT_EXTLIST *a, WT_EXTLIST *b)
{
WT_EXT *ext;
WT_EXTLIST tmp;
@@ -923,7 +949,7 @@ __wt_block_extlist_merge(WT_SESSION_IMPL *session, WT_EXTLIST *a, WT_EXTLIST *b)
}
WT_EXT_FOREACH(ext, a->off)
- WT_RET(__block_merge(session, b, ext->off, ext->size));
+ WT_RET(__block_merge(session, block, b, ext->off, ext->size));
return (0);
}
@@ -933,12 +959,13 @@ __wt_block_extlist_merge(WT_SESSION_IMPL *session, WT_EXTLIST *a, WT_EXTLIST *b)
* Append a new entry to the allocation list.
*/
static int
-__block_append(
- WT_SESSION_IMPL *session, WT_EXTLIST *el, wt_off_t off, wt_off_t size)
+__block_append(WT_SESSION_IMPL *session, WT_BLOCK *block,
+ WT_EXTLIST *el, wt_off_t off, wt_off_t size)
{
WT_EXT *ext, **astack[WT_SKIP_MAXDEPTH];
u_int i;
+ WT_UNUSED(block);
WT_ASSERT(session, el->track_size == 0);
/*
@@ -979,8 +1006,8 @@ __block_append(
* Insert an extent into an extent list, merging if possible.
*/
int
-__wt_block_insert_ext(
- WT_SESSION_IMPL *session, WT_EXTLIST *el, wt_off_t off, wt_off_t size)
+__wt_block_insert_ext(WT_SESSION_IMPL *session, WT_BLOCK *block,
+ WT_EXTLIST *el, wt_off_t off, wt_off_t size)
{
/*
* There are currently two copies of this function (this code is a one-
@@ -993,7 +1020,7 @@ __wt_block_insert_ext(
* Callers of this function are expected to have already acquired any
* locks required to manipulate the extent list.
*/
- return (__block_merge(session, el, off, size));
+ return (__block_merge(session, block, el, off, size));
}
/*
@@ -1002,8 +1029,8 @@ __wt_block_insert_ext(
* version).
*/
static int
-__block_merge(
- WT_SESSION_IMPL *session, WT_EXTLIST *el, wt_off_t off, wt_off_t size)
+__block_merge(WT_SESSION_IMPL *session, WT_BLOCK *block,
+ WT_EXTLIST *el, wt_off_t off, wt_off_t size)
{
WT_EXT *ext, *after, *before;
@@ -1014,7 +1041,7 @@ __block_merge(
__block_off_srch_pair(el, off, &before, &after);
if (before != NULL) {
if (before->off + before->size > off)
- WT_PANIC_RET(session, EINVAL,
+ WT_BLOCK_RET(session, block, EINVAL,
"%s: existing range %" PRIdMAX "-%" PRIdMAX
" overlaps with merge range %" PRIdMAX "-%" PRIdMAX,
el->name,
@@ -1025,8 +1052,8 @@ __block_merge(
before = NULL;
}
if (after != NULL) {
- if (off + size > after->off)
- WT_PANIC_RET(session, EINVAL,
+ if (off + size > after->off) {
+ WT_BLOCK_RET(session, block, EINVAL,
"%s: merge range %" PRIdMAX "-%" PRIdMAX
" overlaps with existing range %" PRIdMAX
"-%" PRIdMAX,
@@ -1034,6 +1061,7 @@ __block_merge(
(intmax_t)off, (intmax_t)(off + size),
(intmax_t)after->off,
(intmax_t)(after->off + after->size));
+ }
if (off + size != after->off)
after = NULL;
}
@@ -1053,7 +1081,8 @@ __block_merge(
* the record we're going to use, adjust it and re-insert it.
*/
if (before == NULL) {
- WT_RET(__block_off_remove(session, el, after->off, &ext));
+ WT_RET(__block_off_remove(
+ session, block, el, after->off, &ext));
WT_RET(__wt_verbose(session, WT_VERB_BLOCK,
"%s: range grows from %" PRIdMAX "-%" PRIdMAX ", to %"
@@ -1067,10 +1096,11 @@ __block_merge(
} else {
if (after != NULL) {
size += after->size;
- WT_RET(
- __block_off_remove(session, el, after->off, NULL));
+ WT_RET(__block_off_remove(
+ session, block, el, after->off, NULL));
}
- WT_RET(__block_off_remove(session, el, before->off, &ext));
+ WT_RET(__block_off_remove(
+ session, block, el, before->off, &ext));
WT_RET(__wt_verbose(session, WT_VERB_BLOCK,
"%s: range grows from %" PRIdMAX "-%" PRIdMAX ", to %"
@@ -1115,8 +1145,8 @@ __wt_block_extlist_read_avail(WT_SESSION_IMPL *session,
* Extent blocks are allocated from the available list: if reading the
* avail list, the extent blocks might be included, remove them.
*/
- WT_ERR_NOTFOUND_OK(
- __wt_block_off_remove_overlap(session, el, el->offset, el->size));
+ WT_ERR_NOTFOUND_OK(__wt_block_off_remove_overlap(
+ session, block, el, el->offset, el->size));
err:
#ifdef HAVE_DIAGNOSTIC
@@ -1137,7 +1167,8 @@ __wt_block_extlist_read(WT_SESSION_IMPL *session,
WT_DECL_ITEM(tmp);
WT_DECL_RET;
wt_off_t off, size;
- int (*func)(WT_SESSION_IMPL *, WT_EXTLIST *, wt_off_t, wt_off_t);
+ int (*func)(
+ WT_SESSION_IMPL *, WT_BLOCK *, WT_EXTLIST *, wt_off_t, wt_off_t);
const uint8_t *p;
/* If there isn't a list, we're done. */
@@ -1187,14 +1218,16 @@ __wt_block_extlist_read(WT_SESSION_IMPL *session,
if (off < block->allocsize ||
off % block->allocsize != 0 ||
size % block->allocsize != 0 ||
- off + size > ckpt_size)
-corrupted: WT_PANIC_RET(session, WT_ERROR,
+ off + size > ckpt_size) {
+corrupted: __wt_scr_free(session, &tmp);
+ WT_BLOCK_RET(session, block, WT_ERROR,
"file contains a corrupted %s extent list, range %"
PRIdMAX "-%" PRIdMAX " past end-of-file",
el->name,
(intmax_t)off, (intmax_t)(off + size));
+ }
- WT_ERR(func(session, el, off, size));
+ WT_ERR(func(session, block, el, off, size));
}
if (WT_VERBOSE_ISSET(session, WT_VERB_BLOCK))
@@ -1290,7 +1323,7 @@ __wt_block_extlist_write(WT_SESSION_IMPL *session,
* blocks never appear on any allocation list.
*/
WT_TRET(__wt_block_off_remove_overlap(
- session, &block->live.alloc, el->offset, el->size));
+ session, block, &block->live.alloc, el->offset, el->size));
WT_ERR(__wt_verbose(session, WT_VERB_BLOCK,
"%s written %" PRIdMAX "/%" PRIu32,
@@ -1331,7 +1364,7 @@ __wt_block_extlist_truncate(
*/
orig = fh->size;
size = ext->off;
- WT_RET(__block_off_remove(session, el, size, NULL));
+ WT_RET(__block_off_remove(session, block, el, size, NULL));
fh->size = size;
/*
diff --git a/src/third_party/wiredtiger/src/block/block_slvg.c b/src/third_party/wiredtiger/src/block/block_slvg.c
index 517fb92491e..c78a6c39942 100644
--- a/src/third_party/wiredtiger/src/block/block_slvg.c
+++ b/src/third_party/wiredtiger/src/block/block_slvg.c
@@ -53,7 +53,7 @@ __wt_block_salvage_start(WT_SESSION_IMPL *session, WT_BLOCK *block)
* any blocks we don't want as we process the file.
*/
WT_RET(__wt_block_insert_ext(
- session, &block->live.alloc, allocsize, len - allocsize));
+ session, block, &block->live.alloc, allocsize, len - allocsize));
return (0);
}
diff --git a/src/third_party/wiredtiger/src/block/block_vrfy.c b/src/third_party/wiredtiger/src/block/block_vrfy.c
index 2a279a174dc..dc9662bd5e0 100644
--- a/src/third_party/wiredtiger/src/block/block_vrfy.c
+++ b/src/third_party/wiredtiger/src/block/block_vrfy.c
@@ -87,6 +87,12 @@ __wt_block_verify_start(WT_SESSION_IMPL *session,
WT_RET(__bit_alloc(session, block->frags, &block->fragfile));
/*
+ * Set this before reading any extent lists: don't panic if we see
+ * corruption.
+ */
+ block->verify = 1;
+
+ /*
* We maintain an allocation list that is rolled forward through the
* set of checkpoints.
*/
@@ -102,8 +108,6 @@ __wt_block_verify_start(WT_SESSION_IMPL *session,
/* Configuration: strict behavior on any error. */
WT_RET(__wt_config_gets(session, cfg, "strict", &cval));
block->verify_strict = cval.val ? 1 : 0;
-
- block->verify = 1;
return (0);
}
@@ -228,7 +232,7 @@ __wt_verify_ckpt_load(
WT_RET(__wt_block_extlist_read(
session, block, el, ci->file_size));
WT_RET(__wt_block_extlist_merge(
- session, el, &block->verify_alloc));
+ session, block, el, &block->verify_alloc));
__wt_block_extlist_free(session, el);
}
el = &ci->discard;
@@ -236,7 +240,7 @@ __wt_verify_ckpt_load(
WT_RET(__wt_block_extlist_read(
session, block, el, ci->file_size));
WT_EXT_FOREACH(ext, el->off)
- WT_RET(__wt_block_off_remove_overlap(session,
+ WT_RET(__wt_block_off_remove_overlap(session, block,
&block->verify_alloc, ext->off, ext->size));
__wt_block_extlist_free(session, el);
}
@@ -265,7 +269,7 @@ __wt_verify_ckpt_load(
* checkpoints.
*/
if (ci->root_offset != WT_BLOCK_INVALID_OFFSET)
- WT_RET(__wt_block_off_remove_overlap(session,
+ WT_RET(__wt_block_off_remove_overlap(session, block,
&block->verify_alloc, ci->root_offset, ci->root_size));
/*
diff --git a/src/third_party/wiredtiger/src/btree/bt_io.c b/src/third_party/wiredtiger/src/btree/bt_io.c
index ec7d3109c0c..a8bbf8a0266 100644
--- a/src/third_party/wiredtiger/src/btree/bt_io.c
+++ b/src/third_party/wiredtiger/src/btree/bt_io.c
@@ -73,7 +73,13 @@ __wt_bt_read(WT_SESSION_IMPL *session,
ip = etmp;
dsk = ip->data;
- }
+ } else if (btree->kencryptor != NULL &&
+ !F_ISSET(btree, WT_BTREE_VERIFY) &&
+ !F_ISSET(session, WT_SESSION_SALVAGE_CORRUPT_OK))
+ WT_ERR_MSG(session, WT_ERROR,
+ "encryption configured, and existing file is not "
+ "encrypted");
+
if (F_ISSET(dsk, WT_PAGE_COMPRESSED)) {
if (btree->compressor == NULL ||
btree->compressor->decompress == NULL)
diff --git a/src/third_party/wiredtiger/src/btree/bt_walk.c b/src/third_party/wiredtiger/src/btree/bt_walk.c
index f5b7c4661b6..f257a955801 100644
--- a/src/third_party/wiredtiger/src/btree/bt_walk.c
+++ b/src/third_party/wiredtiger/src/btree/bt_walk.c
@@ -9,6 +9,66 @@
#include "wt_internal.h"
/*
+ * __page_refp --
+ * Return the page's index and slot for a reference.
+ */
+static inline void
+__page_refp(WT_SESSION_IMPL *session,
+ WT_REF *ref, WT_PAGE_INDEX **pindexp, uint32_t *slotp)
+{
+ WT_PAGE_INDEX *pindex;
+ uint32_t i;
+
+ /*
+ * Copy the parent page's index value: the page can split at any time,
+ * but the index's value is always valid, even if it's not up-to-date.
+ */
+retry: WT_INTL_INDEX_GET(session, ref->home, pindex);
+
+ /*
+ * Use the page's reference hint: it should be correct unless the page
+ * split before our slot. If the page splits after our slot, the hint
+ * will point earlier in the array than our actual slot, so the first
+ * loop is from the hint to the end of the list, and the second loop
+ * is from the start of the list to the end of the list. (The second
+ * loop overlaps the first, but that only happen in cases where we've
+ * deepened the tree and aren't going to find our slot at all, that's
+ * not worth optimizing.)
+ *
+ * It's not an error for the reference hint to be wrong, it just means
+ * the first retrieval (which sets the hint for subsequent retrievals),
+ * is slower.
+ */
+ i = ref->pindex_hint;
+ if (i < pindex->entries && pindex->index[i]->page == ref->page) {
+ *pindexp = pindex;
+ *slotp = i;
+ return;
+ }
+ while (++i < pindex->entries)
+ if (pindex->index[i]->page == ref->page) {
+ *pindexp = pindex;
+ *slotp = ref->pindex_hint = i;
+ return;
+ }
+ for (i = 0; i < pindex->entries; ++i)
+ if (pindex->index[i]->page == ref->page) {
+ *pindexp = pindex;
+ *slotp = ref->pindex_hint = i;
+ return;
+ }
+
+ /*
+ * If we don't find our reference, the page split into a new level and
+ * our home pointer references the wrong page. After internal pages
+ * deepen, their reference structure home value are updated; yield and
+ * wait for that to happen.
+ */
+ __wt_yield();
+ goto retry;
+}
+
+/*
* __wt_tree_walk --
* Move to the next/previous page in the tree.
*/
@@ -99,7 +159,7 @@ ascend: /*
}
/* Figure out the current slot in the WT_REF array. */
- __wt_page_refp(session, ref, &pindex, &slot);
+ __page_refp(session, ref, &pindex, &slot);
for (;;) {
/*
@@ -134,19 +194,13 @@ ascend: /*
* parent of the current child page, our parent
* reference can't have split or been evicted.
*/
- __wt_page_refp(session, ref, &pindex, &slot);
+ __page_refp(session, ref, &pindex, &slot);
if ((ret = __wt_page_swap(
session, couple, ref, flags)) != 0) {
WT_TRET(__wt_page_release(
session, couple, flags));
WT_ERR(ret);
}
-
- /*
- * Set the reference hint (used when we continue
- * the walk).
- */
- ref->pindex_hint = slot;
}
*refp = ref;
@@ -162,13 +216,15 @@ ascend: /*
++*walkcntp;
for (;;) {
- ref = pindex->index[slot];
-
/*
- * Set the reference hint (used when we continue the
- * walk).
+ * Move to the next slot, and set the reference hint if
+ * it's wrong (used when we continue the walk). We don't
+ * update those hints when splitting, so it's common for
+ * them to be incorrect in some workloads.
*/
- ref->pindex_hint = slot;
+ ref = pindex->index[slot];
+ if (ref->pindex_hint != slot)
+ ref->pindex_hint = slot;
if (LF_ISSET(WT_READ_CACHE)) {
/*
@@ -270,7 +326,7 @@ ascend: /*
couple == couple_orig ||
WT_PAGE_IS_INTERNAL(couple->page));
ref = couple;
- __wt_page_refp(session, ref, &pindex, &slot);
+ __page_refp(session, ref, &pindex, &slot);
if (couple == couple_orig)
break;
}
diff --git a/src/third_party/wiredtiger/src/btree/row_modify.c b/src/third_party/wiredtiger/src/btree/row_modify.c
index d56b44bbd95..f0a10cdf528 100644
--- a/src/third_party/wiredtiger/src/btree/row_modify.c
+++ b/src/third_party/wiredtiger/src/btree/row_modify.c
@@ -47,13 +47,13 @@ __wt_page_modify_alloc(WT_SESSION_IMPL *session, WT_PAGE *page)
*/
int
__wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
- WT_ITEM *key, WT_ITEM *value, WT_UPDATE *upd, int is_remove)
+ WT_ITEM *key, WT_ITEM *value, WT_UPDATE *upd_arg, int is_remove)
{
WT_DECL_RET;
WT_INSERT *ins;
WT_INSERT_HEAD *ins_head, **ins_headp;
WT_PAGE *page;
- WT_UPDATE *old_upd, **upd_entry;
+ WT_UPDATE *old_upd, *upd, **upd_entry;
size_t ins_size, upd_size;
uint32_t ins_slot;
u_int i, skipdepth;
@@ -61,6 +61,7 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
ins = NULL;
page = cbt->ref->page;
+ upd = upd_arg;
logged = 0;
/* This code expects a remove to have a NULL value. */
@@ -90,7 +91,7 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
} else
upd_entry = &cbt->ins->upd;
- if (upd == NULL) {
+ if (upd_arg == NULL) {
/* Make sure the update can proceed. */
WT_ERR(__wt_txn_update_check(
session, old_upd = *upd_entry));
@@ -165,7 +166,7 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
cbt->ins_head = ins_head;
cbt->ins = ins;
- if (upd == NULL) {
+ if (upd_arg == NULL) {
WT_ERR(
__wt_update_alloc(session, value, &upd, &upd_size));
WT_ERR(__wt_txn_modify(session, upd));
@@ -218,7 +219,8 @@ err: /*
__wt_txn_unmodify(session);
__wt_free(session, ins);
cbt->ins = NULL;
- __wt_free(session, upd);
+ if (upd_arg == NULL)
+ __wt_free(session, upd);
}
return (ret);
diff --git a/src/third_party/wiredtiger/src/conn/conn_log.c b/src/third_party/wiredtiger/src/conn/conn_log.c
index be7ce2e9344..de4bf7268ed 100644
--- a/src/third_party/wiredtiger/src/conn/conn_log.c
+++ b/src/third_party/wiredtiger/src/conn/conn_log.c
@@ -392,100 +392,154 @@ typedef struct {
(entry1).lsn.offset < (entry2).lsn.offset))
/*
- * __log_wrlsn_server --
- * The log wrlsn server thread.
+ * __wt_log_wrlsn --
+ * Process written log slots and attempt to coalesce them if the LSNs
+ * are contiguous. Returns 1 if slots were freed, 0 if no slots were
+ * freed in the progress arg. Must be called with the log slot lock held.
*/
-static WT_THREAD_RET
-__log_wrlsn_server(void *arg)
+int
+__wt_log_wrlsn(WT_SESSION_IMPL *session, uint32_t *free_i, int *yield)
{
WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
WT_LOG *log;
WT_LOG_WRLSN_ENTRY written[WT_SLOT_POOL];
- WT_LOGSLOT *slot;
- WT_SESSION_IMPL *session;
+ WT_LOGSLOT *coalescing, *slot;
size_t written_i;
uint32_t i, save_i;
- int yield;
- session = arg;
conn = S2C(session);
log = conn->log;
- yield = 0;
- while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) {
- /*
- * No need to use the log_slot_lock because the slot pool
- * is statically allocated and any slot in the
- * WT_LOG_SLOT_WRITTEN state is exclusively ours for now.
- */
- i = 0;
- written_i = 0;
+ coalescing = NULL;
+ written_i = 0;
+ i = 0;
+ if (free_i != NULL)
+ *free_i = WT_SLOT_POOL;
+
+ /*
+ * Walk the array once saving any slots that are in the
+ * WT_LOG_SLOT_WRITTEN state.
+ */
+ while (i < WT_SLOT_POOL) {
+ save_i = i;
+ slot = &log->slot_pool[i++];
+ if (free_i != NULL && *free_i == WT_SLOT_POOL &&
+ slot->slot_state == WT_LOG_SLOT_FREE)
+ *free_i = save_i;
+ if (slot->slot_state != WT_LOG_SLOT_WRITTEN)
+ continue;
+ written[written_i].slot_index = save_i;
+ written[written_i++].lsn = slot->slot_release_lsn;
+ }
+ /*
+ * If we found any written slots process them. We sort them
+ * based on the release LSN, and then look for them in order.
+ */
+ if (written_i > 0) {
/*
- * Walk the array once saving any slots that are in the
- * WT_LOG_SLOT_WRITTEN state.
+ * If wanted, reset the yield variable to indicate that we
+ * have found written slots.
*/
- while (i < WT_SLOT_POOL) {
- save_i = i;
- slot = &log->slot_pool[i++];
- if (slot->slot_state != WT_LOG_SLOT_WRITTEN)
- continue;
- written[written_i].slot_index = save_i;
- written[written_i++].lsn = slot->slot_release_lsn;
- }
+ if (yield != NULL)
+ *yield = 0;
+ WT_INSERTION_SORT(written, written_i,
+ WT_LOG_WRLSN_ENTRY, WT_WRLSN_ENTRY_CMP_LT);
+
/*
- * If we found any written slots process them. We sort them
- * based on the release LSN, and then look for them in order.
+ * We know the written array is sorted by LSN. Go
+ * through them either advancing write_lsn or coalesce
+ * contiguous ranges of written slots.
*/
- if (written_i > 0) {
- yield = 0;
- WT_INSERTION_SORT(written, written_i,
- WT_LOG_WRLSN_ENTRY, WT_WRLSN_ENTRY_CMP_LT);
-
- /*
- * We know the written array is sorted by LSN. Go
- * through them either advancing write_lsn or stop
- * as soon as one is not in order.
- */
- for (i = 0; i < written_i; i++) {
- if (WT_LOG_CMP(&log->write_lsn,
- &written[i].lsn) != 0)
- break;
+ for (i = 0; i < written_i; i++) {
+ slot = &log->slot_pool[written[i].slot_index];
+ if (coalescing != NULL) {
+ if (WT_LOG_CMP(&coalescing->slot_end_lsn,
+ &written[i].lsn) != 0) {
+ coalescing = slot;
+ continue;
+ }
+ /*
+ * If we get here we have a slot to coalesce
+ * and free.
+ */
+ coalescing->slot_end_lsn = slot->slot_end_lsn;
+ WT_STAT_FAST_CONN_INCR(
+ session, log_slot_coalesced);
+ /*
+ * Copy the flag for later closing.
+ */
+ if (F_ISSET(slot, WT_SLOT_CLOSEFH))
+ F_SET(coalescing, WT_SLOT_CLOSEFH);
+ } else {
+ /*
+ * If this written slot is not the next LSN,
+ * try to start coalescing with later slots.
+ */
+ if (WT_LOG_CMP(
+ &log->write_lsn, &written[i].lsn) != 0) {
+ coalescing = slot;
+ continue;
+ }
/*
* If we get here we have a slot to process.
* Advance the LSN and process the slot.
*/
- slot = &log->slot_pool[written[i].slot_index];
WT_ASSERT(session, WT_LOG_CMP(&written[i].lsn,
&slot->slot_release_lsn) == 0);
log->write_start_lsn = slot->slot_start_lsn;
log->write_lsn = slot->slot_end_lsn;
- WT_ERR(__wt_cond_signal(session,
- log->log_write_cond));
+ WT_RET(__wt_cond_signal(
+ session, log->log_write_cond));
WT_STAT_FAST_CONN_INCR(session, log_write_lsn);
-
/*
* Signal the close thread if needed.
*/
if (F_ISSET(slot, WT_SLOT_CLOSEFH))
- WT_ERR(__wt_cond_signal(session,
- conn->log_file_cond));
- WT_ERR(__wt_log_slot_free(session, slot));
+ WT_RET(__wt_cond_signal(
+ session, conn->log_file_cond));
}
+ WT_RET(__wt_log_slot_free(session, slot));
+ if (free_i != NULL && *free_i == WT_SLOT_POOL &&
+ slot->slot_state == WT_LOG_SLOT_FREE)
+ *free_i = save_i;
}
- /*
- * If we saw a later write, we always want to yield because
- * we know something is in progress.
- */
- if (yield++ < 1000)
+ }
+ return (0);
+}
+
+/*
+ * __log_wrlsn_server --
+ * The log wrlsn server thread.
+ */
+static WT_THREAD_RET
+__log_wrlsn_server(void *arg)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_LOG *log;
+ WT_SESSION_IMPL *session;
+ int locked, yield;
+
+ session = arg;
+ conn = S2C(session);
+ log = conn->log;
+ locked = yield = 0;
+ while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) {
+ __wt_spin_lock(session, &log->log_slot_lock);
+ locked = 1;
+ WT_ERR(__wt_log_wrlsn(session, NULL, &yield));
+ locked = 0;
+ __wt_spin_unlock(session, &log->log_slot_lock);
+ if (++yield < 1000)
__wt_yield();
else
- /* Wait until the next event. */
WT_ERR(__wt_cond_wait(session,
conn->log_wrlsn_cond, 100000));
}
-
- if (0)
+ if (0) {
err: __wt_err(session, ret, "log wrlsn server error");
+ }
+ if (locked)
+ __wt_spin_unlock(session, &log->log_slot_lock);
return (WT_THREAD_RET_VALUE);
}
diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i
index caa6c469b30..d13ec1972fb 100644
--- a/src/third_party/wiredtiger/src/include/btree.i
+++ b/src/third_party/wiredtiger/src/include/btree.i
@@ -274,60 +274,6 @@ __wt_page_evict_soon(WT_PAGE *page)
}
/*
- * __wt_page_refp --
- * Return the page's index and slot for a reference.
- */
-static inline void
-__wt_page_refp(WT_SESSION_IMPL *session,
- WT_REF *ref, WT_PAGE_INDEX **pindexp, uint32_t *slotp)
-{
- WT_PAGE_INDEX *pindex;
- uint32_t i;
-
- /*
- * Copy the parent page's index value: the page can split at any time,
- * but the index's value is always valid, even if it's not up-to-date.
- */
-retry: WT_INTL_INDEX_GET(session, ref->home, pindex);
-
- /*
- * Use the page's reference hint: it should be correct unless the page
- * split before our slot. If the page splits after our slot, the hint
- * will point earlier in the array than our actual slot, so the first
- * loop is from the hint to the end of the list, and the second loop
- * is from the start of the list to the end of the list. (The second
- * loop overlaps the first, but that only happen in cases where we've
- * deepened the tree and aren't going to find our slot at all, that's
- * not worth optimizing.)
- *
- * It's not an error for the reference hint to be wrong, it just means
- * the first retrieval (which sets the hint for subsequent retrievals),
- * is slower.
- */
- for (i = ref->pindex_hint; i < pindex->entries; ++i)
- if (pindex->index[i]->page == ref->page) {
- *pindexp = pindex;
- *slotp = ref->pindex_hint = i;
- return;
- }
- for (i = 0; i < pindex->entries; ++i)
- if (pindex->index[i]->page == ref->page) {
- *pindexp = pindex;
- *slotp = ref->pindex_hint = i;
- return;
- }
-
- /*
- * If we don't find our reference, the page split into a new level and
- * our home pointer references the wrong page. After internal pages
- * deepen, their reference structure home value are updated; yield and
- * wait for that to happen.
- */
- __wt_yield();
- goto retry;
-}
-
-/*
* __wt_page_modify_init --
* A page is about to be modified, allocate the modification structure.
*/
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index 87099ac839f..0826fa7b10b 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -26,14 +26,14 @@ extern int __wt_block_compact_end(WT_SESSION_IMPL *session, WT_BLOCK *block);
extern int __wt_block_compact_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, int *skipp);
extern int __wt_block_compact_page_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size, int *skipp);
extern int __wt_block_misplaced(WT_SESSION_IMPL *session, WT_BLOCK *block, const char *tag, wt_off_t offset, uint32_t size, int live);
-extern int __wt_block_off_remove_overlap( WT_SESSION_IMPL *session, WT_EXTLIST *el, wt_off_t off, wt_off_t size);
+extern int __wt_block_off_remove_overlap(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, wt_off_t off, wt_off_t size);
extern int __wt_block_alloc( WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t *offp, wt_off_t size);
extern int __wt_block_free(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size);
extern int __wt_block_off_free( WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t offset, wt_off_t size);
extern int __wt_block_extlist_check( WT_SESSION_IMPL *session, WT_EXTLIST *al, WT_EXTLIST *bl);
extern int __wt_block_extlist_overlap( WT_SESSION_IMPL *session, WT_BLOCK *block, WT_BLOCK_CKPT *ci);
-extern int __wt_block_extlist_merge(WT_SESSION_IMPL *session, WT_EXTLIST *a, WT_EXTLIST *b);
-extern int __wt_block_insert_ext( WT_SESSION_IMPL *session, WT_EXTLIST *el, wt_off_t off, wt_off_t size);
+extern int __wt_block_extlist_merge(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *a, WT_EXTLIST *b);
+extern int __wt_block_insert_ext(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, wt_off_t off, wt_off_t size);
extern int __wt_block_extlist_read_avail(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, wt_off_t ckpt_size);
extern int __wt_block_extlist_read(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, wt_off_t ckpt_size);
extern int __wt_block_extlist_write(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, WT_EXTLIST *additional);
@@ -171,7 +171,7 @@ extern int __wt_row_ikey_alloc(WT_SESSION_IMPL *session, uint32_t cell_offset, c
extern int __wt_row_ikey_incr(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t cell_offset, const void *key, size_t size, WT_REF *ref);
extern int __wt_row_ikey(WT_SESSION_IMPL *session, uint32_t cell_offset, const void *key, size_t size, WT_REF *ref);
extern int __wt_page_modify_alloc(WT_SESSION_IMPL *session, WT_PAGE *page);
-extern int __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ITEM *key, WT_ITEM *value, WT_UPDATE *upd, int is_remove);
+extern int __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ITEM *key, WT_ITEM *value, WT_UPDATE *upd_arg, int is_remove);
extern int __wt_row_insert_alloc(WT_SESSION_IMPL *session, WT_ITEM *key, u_int skipdepth, WT_INSERT **insp, size_t *ins_sizep);
extern int __wt_update_alloc( WT_SESSION_IMPL *session, WT_ITEM *value, WT_UPDATE **updp, size_t *sizep);
extern WT_UPDATE *__wt_update_obsolete_check( WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd);
@@ -237,6 +237,7 @@ extern int __wt_conn_dhandle_discard(WT_SESSION_IMPL *session);
extern int __wt_connection_init(WT_CONNECTION_IMPL *conn);
extern int __wt_connection_destroy(WT_CONNECTION_IMPL *conn);
extern int __wt_log_truncate_files( WT_SESSION_IMPL *session, WT_CURSOR *cursor, const char *cfg[]);
+extern int __wt_log_wrlsn(WT_SESSION_IMPL *session, uint32_t *free_i, int *yield);
extern int __wt_logmgr_create(WT_SESSION_IMPL *session, const char *cfg[]);
extern int __wt_logmgr_open(WT_SESSION_IMPL *session);
extern int __wt_logmgr_destroy(WT_SESSION_IMPL *session);
diff --git a/src/third_party/wiredtiger/src/include/gcc.h b/src/third_party/wiredtiger/src/include/gcc.h
index 889fd908388..1c61768d372 100644
--- a/src/third_party/wiredtiger/src/include/gcc.h
+++ b/src/third_party/wiredtiger/src/include/gcc.h
@@ -169,6 +169,18 @@
#define WT_READ_BARRIER() WT_FULL_BARRIER()
#define WT_WRITE_BARRIER() WT_FULL_BARRIER()
+#elif defined(__aarch64__)
+#define WT_PAUSE() __asm__ volatile("yield" ::: "memory")
+#define WT_FULL_BARRIER() do { \
+ __asm__ volatile ("dsb sy" ::: "memory"); \
+} while (0)
+#define WT_READ_BARRIER() do { \
+ __asm__ volatile ("dsb ld" ::: "memory"); \
+} while (0)
+#define WT_WRITE_BARRIER() do { \
+ __asm__ volatile ("dsb st" ::: "memory"); \
+} while (0)
+
#else
#error "No write barrier implementation for this hardware"
#endif
diff --git a/src/third_party/wiredtiger/src/include/log.h b/src/third_party/wiredtiger/src/include/log.h
index 051f9fb262e..fbb0a3e3842 100644
--- a/src/third_party/wiredtiger/src/include/log.h
+++ b/src/third_party/wiredtiger/src/include/log.h
@@ -158,10 +158,9 @@ typedef struct {
*/
#define WT_SLOT_ACTIVE 1
#define WT_SLOT_POOL 128
- uint32_t pool_index; /* Global pool index */
WT_LOGSLOT *slot_array[WT_SLOT_ACTIVE]; /* Active slots */
WT_LOGSLOT slot_pool[WT_SLOT_POOL]; /* Pool of all slots */
- uint32_t slot_buf_size; /* Buffer size for slots */
+ size_t slot_buf_size; /* Buffer size for slots */
#define WT_LOG_FORCE_CONSOLIDATE 0x01 /* Disable direct writes */
uint32_t flags;
diff --git a/src/third_party/wiredtiger/src/include/mutex.h b/src/third_party/wiredtiger/src/include/mutex.h
index fd7fd16dea7..7a5028d6a28 100644
--- a/src/third_party/wiredtiger/src/include/mutex.h
+++ b/src/third_party/wiredtiger/src/include/mutex.h
@@ -32,7 +32,9 @@ typedef union { /* Read/write lock */
WiredTiger read/write locks require modification for big-endian systems.
#else
uint64_t u;
- uint32_t us;
+ struct {
+ uint32_t us;
+ } i;
struct {
uint16_t writers;
uint16_t readers;
diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h
index f05d87c058b..6dc9282a613 100644
--- a/src/third_party/wiredtiger/src/include/stat.h
+++ b/src/third_party/wiredtiger/src/include/stat.h
@@ -221,10 +221,10 @@ struct __wt_connection_stats {
WT_STATS log_scan_rereads;
WT_STATS log_scans;
WT_STATS log_slot_closes;
+ WT_STATS log_slot_coalesced;
WT_STATS log_slot_consolidated;
WT_STATS log_slot_joins;
WT_STATS log_slot_races;
- WT_STATS log_slot_switch_fails;
WT_STATS log_slot_toobig;
WT_STATS log_slot_toosmall;
WT_STATS log_slot_transitions;
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in
index 096fea3eeb3..e8f3b9958ce 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger.in
+++ b/src/third_party/wiredtiger/src/include/wiredtiger.in
@@ -3720,14 +3720,14 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_CONN_LOG_SCANS 1087
/*! log: consolidated slot closures */
#define WT_STAT_CONN_LOG_SLOT_CLOSES 1088
+/*! log: written slots coalesced */
+#define WT_STAT_CONN_LOG_SLOT_COALESCED 1089
/*! log: logging bytes consolidated */
-#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1089
+#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1090
/*! log: consolidated slot joins */
-#define WT_STAT_CONN_LOG_SLOT_JOINS 1090
+#define WT_STAT_CONN_LOG_SLOT_JOINS 1091
/*! log: consolidated slot join races */
-#define WT_STAT_CONN_LOG_SLOT_RACES 1091
-/*! log: slots selected for switching that were unavailable */
-#define WT_STAT_CONN_LOG_SLOT_SWITCH_FAILS 1092
+#define WT_STAT_CONN_LOG_SLOT_RACES 1092
/*! log: record size exceeded maximum */
#define WT_STAT_CONN_LOG_SLOT_TOOBIG 1093
/*! log: failed to find a slot large enough for record */
diff --git a/src/third_party/wiredtiger/src/log/log.c b/src/third_party/wiredtiger/src/log/log.c
index 77ae0383cbe..4242571fe53 100644
--- a/src/third_party/wiredtiger/src/log/log.c
+++ b/src/third_party/wiredtiger/src/log/log.c
@@ -1217,6 +1217,7 @@ __wt_log_newfile(WT_SESSION_IMPL *session, int conn_create, int *created)
*/
while (log->log_close_fh != NULL) {
WT_STAT_FAST_CONN_INCR(session, log_close_yields);
+ WT_RET(__wt_log_wrlsn(session, NULL, NULL));
__wt_yield();
}
log->log_close_fh = log->log_fh;
diff --git a/src/third_party/wiredtiger/src/log/log_slot.c b/src/third_party/wiredtiger/src/log/log_slot.c
index 8723d492e13..0b580af4526 100644
--- a/src/third_party/wiredtiger/src/log/log_slot.c
+++ b/src/third_party/wiredtiger/src/log/log_slot.c
@@ -53,15 +53,14 @@ __wt_log_slot_init(WT_SESSION_IMPL *session)
/*
* Allocate memory for buffers now that the arrays are setup. Split
* this out to make error handling simpler.
- */
- /*
+ *
* Cap the slot buffer to the log file size.
*/
- log->slot_buf_size = (uint32_t)WT_MIN(
- conn->log_file_max, WT_LOG_SLOT_BUF_SIZE);
+ log->slot_buf_size =
+ WT_MIN((size_t)conn->log_file_max, WT_LOG_SLOT_BUF_SIZE);
for (i = 0; i < WT_SLOT_POOL; i++) {
WT_ERR(__wt_buf_init(session,
- &log->slot_pool[i].slot_buf, (size_t)log->slot_buf_size));
+ &log->slot_pool[i].slot_buf, log->slot_buf_size));
F_SET(&log->slot_pool[i], WT_SLOT_INIT_FLAGS);
}
WT_STAT_FAST_CONN_INCRV(session,
@@ -189,6 +188,36 @@ join_slot:
}
/*
+ * __log_slot_find_free --
+ * Find and return a free log slot.
+ */
+static int
+__log_slot_find_free(WT_SESSION_IMPL *session, WT_LOGSLOT **slot)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_LOG *log;
+ uint32_t pool_i;
+
+ conn = S2C(session);
+ log = conn->log;
+ WT_ASSERT(session, slot != NULL);
+ /*
+ * Encourage processing and moving the write LSN forward.
+ * That process has to walk the slots anyway, so do that
+ * work and let it give us the index of a free slot along
+ * the way.
+ */
+ WT_RET(__wt_log_wrlsn(session, &pool_i, NULL));
+ while (pool_i == WT_SLOT_POOL) {
+ __wt_yield();
+ WT_RET(__wt_log_wrlsn(session, &pool_i, NULL));
+ }
+ *slot = &log->slot_pool[pool_i];
+ WT_ASSERT(session, (*slot)->slot_state == WT_LOG_SLOT_FREE);
+ return (0);
+}
+
+/*
* __wt_log_slot_close --
* Close a slot and do not allow any other threads to join this slot.
* Remove this from the active slot array and move a new slot from
@@ -202,40 +231,13 @@ __wt_log_slot_close(WT_SESSION_IMPL *session, WT_LOGSLOT *slot)
WT_LOG *log;
WT_LOGSLOT *newslot;
int64_t old_state;
- int32_t yields;
- uint32_t pool_i, switch_fails;
conn = S2C(session);
log = conn->log;
- switch_fails = 0;
-retry:
/*
* Find an unused slot in the pool.
*/
- pool_i = log->pool_index;
- newslot = &log->slot_pool[pool_i];
- if (++log->pool_index >= WT_SLOT_POOL)
- log->pool_index = 0;
- if (newslot->slot_state != WT_LOG_SLOT_FREE) {
- WT_STAT_FAST_CONN_INCR(session, log_slot_switch_fails);
- /*
- * If it takes a number of attempts to find an available slot
- * it's likely all slots are waiting to be released. This
- * churn is used to change how long we pause before closing
- * the slot - which leads to more consolidation and less churn.
- */
- if (++switch_fails % WT_SLOT_POOL == 0 && slot->slot_churn < 5)
- ++slot->slot_churn;
- __wt_yield();
- goto retry;
- } else if (slot->slot_churn > 0) {
- --slot->slot_churn;
- WT_ASSERT(session, slot->slot_churn >= 0);
- }
-
- /* Pause to allow other threads a chance to consolidate. */
- for (yields = slot->slot_churn; yields >= 0; yields--)
- __wt_yield();
+ WT_RET(__log_slot_find_free(session, &newslot));
/*
* Swap out the slot we're going to use and put a free one in the
@@ -244,7 +246,7 @@ retry:
WT_STAT_FAST_CONN_INCR(session, log_slot_closes);
newslot->slot_state = WT_LOG_SLOT_READY;
newslot->slot_index = slot->slot_index;
- log->slot_array[newslot->slot_index] = &log->slot_pool[pool_i];
+ log->slot_array[newslot->slot_index] = newslot;
old_state = WT_ATOMIC_STORE8(slot->slot_state, WT_LOG_SLOT_PENDING);
slot->slot_group_size = (uint64_t)(old_state - WT_LOG_SLOT_READY);
/*
diff --git a/src/third_party/wiredtiger/src/meta/meta_track.c b/src/third_party/wiredtiger/src/meta/meta_track.c
index c887af58540..2ac1bfa71a1 100644
--- a/src/third_party/wiredtiger/src/meta/meta_track.c
+++ b/src/third_party/wiredtiger/src/meta/meta_track.c
@@ -125,31 +125,21 @@ __wt_meta_track_on(WT_SESSION_IMPL *session)
* Apply the changes in a metadata tracking record.
*/
static int
-__meta_track_apply(WT_SESSION_IMPL *session, WT_META_TRACK *trk, int unroll)
+__meta_track_apply(WT_SESSION_IMPL *session, WT_META_TRACK *trk)
{
WT_BM *bm;
WT_BTREE *btree;
WT_DECL_RET;
int tret;
- /*
- * Unlock handles and complete checkpoints regardless of whether we are
- * unrolling.
- */
- if (!unroll && trk->op != WT_ST_CHECKPOINT &&
- trk->op != WT_ST_DROP_COMMIT && trk->op != WT_ST_LOCK)
- goto free;
-
switch (trk->op) {
case WT_ST_EMPTY: /* Unused slot */
break;
case WT_ST_CHECKPOINT: /* Checkpoint, see above */
- if (!unroll) {
- btree = trk->dhandle->handle;
- bm = btree->bm;
- WT_WITH_DHANDLE(session, trk->dhandle,
- WT_TRET(bm->checkpoint_resolve(bm, session)));
- }
+ btree = trk->dhandle->handle;
+ bm = btree->bm;
+ WT_WITH_DHANDLE(session, trk->dhandle,
+ WT_TRET(bm->checkpoint_resolve(bm, session)));
break;
case WT_ST_DROP_COMMIT:
if ((tret = __wt_remove_if_exists(session, trk->a)) != 0) {
@@ -159,8 +149,40 @@ __meta_track_apply(WT_SESSION_IMPL *session, WT_META_TRACK *trk, int unroll)
WT_TRET(tret);
}
break;
+ case WT_ST_LOCK:
+ WT_WITH_DHANDLE(session, trk->dhandle,
+ WT_TRET(__wt_session_release_btree(session)));
+ break;
+ case WT_ST_FILEOP:
+ case WT_ST_REMOVE:
+ case WT_ST_SET:
+ break;
+ WT_ILLEGAL_VALUE(session);
+ }
+
+ __meta_track_clear(session, trk);
+ return (ret);
+}
+
+/*
+ * __meta_track_unroll --
+ * Undo the changes in a metadata tracking record.
+ */
+static int
+__meta_track_unroll(WT_SESSION_IMPL *session, WT_META_TRACK *trk)
+{
+ WT_DECL_RET;
+ int tret;
+
+ switch (trk->op) {
+ case WT_ST_EMPTY: /* Unused slot */
+ break;
+ case WT_ST_CHECKPOINT: /* Checkpoint, see above */
+ break;
+ case WT_ST_DROP_COMMIT:
+ break;
case WT_ST_LOCK: /* Handle lock, see above */
- if (unroll && trk->created)
+ if (trk->created)
F_SET(trk->dhandle, WT_DHANDLE_DISCARD);
WT_WITH_DHANDLE(session, trk->dhandle,
WT_TRET(__wt_session_release_btree(session)));
@@ -214,8 +236,7 @@ __meta_track_apply(WT_SESSION_IMPL *session, WT_META_TRACK *trk, int unroll)
WT_ILLEGAL_VALUE(session);
}
-free: __meta_track_clear(session, trk);
-
+ __meta_track_clear(session, trk);
return (ret);
}
@@ -279,33 +300,38 @@ __wt_meta_track_off(WT_SESSION_IMPL *session, int need_sync, int unroll)
if (trk == trk_orig)
return (0);
- while (--trk >= trk_orig)
- WT_TRET(__meta_track_apply(session, trk, unroll));
+ if (unroll) {
+ while (--trk >= trk_orig)
+ WT_TRET(__meta_track_unroll(session, trk));
+ /* Unroll operations don't need to flush the metadata. */
+ return (ret);
+ }
/*
- * Unroll operations don't need to flush the metadata.
- *
- * Also, if we don't have the metadata handle (e.g, we're in the
- * process of creating the metadata), we can't sync it.
+ * If we don't have the metadata handle (e.g, we're in the process of
+ * creating the metadata), we can't sync it.
*/
- if (unroll || ret != 0 || !need_sync || session->meta_dhandle == NULL)
- return (ret);
+ if (!need_sync || session->meta_dhandle == NULL)
+ goto done;
/* If we're logging, make sure the metadata update was flushed. */
if (FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_ENABLED)) {
- if (!FLD_ISSET(S2C(session)->txn_logsync,
- WT_LOG_DSYNC | WT_LOG_FSYNC))
- WT_WITH_DHANDLE(session, session->meta_dhandle,
- ret = __wt_txn_checkpoint_log(session,
- 0, WT_TXN_LOG_CKPT_SYNC, NULL));
+ WT_WITH_DHANDLE(session, session->meta_dhandle,
+ ret = __wt_txn_checkpoint_log(session,
+ 0, WT_TXN_LOG_CKPT_SYNC, NULL));
+ WT_RET(ret);
} else {
WT_WITH_DHANDLE(session, session->meta_dhandle,
ret = __wt_checkpoint(session, NULL));
WT_RET(ret);
WT_WITH_DHANDLE(session, session->meta_dhandle,
ret = __wt_checkpoint_sync(session, NULL));
+ WT_RET(ret);
}
+done: /* Apply any tracked operations post-commit. */
+ for (; trk_orig < trk; trk_orig++)
+ WT_TRET(__meta_track_apply(session, trk_orig));
return (ret);
}
@@ -342,7 +368,7 @@ __wt_meta_track_sub_off(WT_SESSION_IMPL *session)
session->meta_track_next = session->meta_track_sub = NULL;
while (--trk >= trk_orig)
- WT_TRET(__meta_track_apply(session, trk, 0));
+ WT_TRET(__meta_track_apply(session, trk));
session->meta_track_next = trk_orig;
return (ret);
diff --git a/src/third_party/wiredtiger/src/os_posix/os_mtx_rw.c b/src/third_party/wiredtiger/src/os_posix/os_mtx_rw.c
index 19183ed9030..cdd4f8a24e1 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_mtx_rw.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_mtx_rw.c
@@ -216,7 +216,7 @@ __wt_writeunlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock)
++copy.s.writers;
++copy.s.readers;
- l->us = copy.us;
+ l->i.us = copy.i.us;
return (0);
}
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c
index 703bebb1597..53a73b44feb 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_write.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c
@@ -5108,7 +5108,7 @@ err: __wt_scr_free(session, &tkey);
*/
mod->rec_max_txn = r->max_txn;
if (!F_ISSET(r, WT_EVICTING) &&
- !WT_TXNID_LT(btree->rec_max_txn, r->max_txn))
+ WT_TXNID_LT(btree->rec_max_txn, r->max_txn))
btree->rec_max_txn = r->max_txn;
/*
diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c
index 0310fdc207c..b0e7d660587 100644
--- a/src/third_party/wiredtiger/src/support/stat.c
+++ b/src/third_party/wiredtiger/src/support/stat.c
@@ -484,12 +484,11 @@ __wt_stat_init_connection_stats(WT_CONNECTION_STATS *stats)
stats->log_prealloc_used.desc = "log: pre-allocated log files used";
stats->log_slot_toobig.desc = "log: record size exceeded maximum";
stats->log_scan_records.desc = "log: records processed by log scan";
- stats->log_slot_switch_fails.desc =
- "log: slots selected for switching that were unavailable";
stats->log_compress_mem.desc =
"log: total in-memory size of compressed records";
stats->log_buffer_size.desc = "log: total log buffer size";
stats->log_compress_len.desc = "log: total size of compressed records";
+ stats->log_slot_coalesced.desc = "log: written slots coalesced";
stats->log_close_yields.desc =
"log: yields waiting for previous log file close";
stats->lsm_work_queue_app.desc =
@@ -647,9 +646,9 @@ __wt_stat_refresh_connection_stats(void *stats_arg)
stats->log_prealloc_used.v = 0;
stats->log_slot_toobig.v = 0;
stats->log_scan_records.v = 0;
- stats->log_slot_switch_fails.v = 0;
stats->log_compress_mem.v = 0;
stats->log_compress_len.v = 0;
+ stats->log_slot_coalesced.v = 0;
stats->log_close_yields.v = 0;
stats->lsm_rows_merged.v = 0;
stats->lsm_checkpoint_throttle.v = 0;
diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
index f317a3dc697..49fcd69ffed 100644
--- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c
+++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
@@ -352,7 +352,7 @@ __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
WT_TXN_STATE *txn_state;
void *saved_meta_next;
u_int i;
- int full, fullckpt_logging, idle, tracking;
+ int full, idle, logging, tracking;
const char *txn_cfg[] = { WT_CONFIG_BASE(session,
WT_SESSION_begin_transaction), "isolation=snapshot", NULL };
@@ -361,7 +361,7 @@ __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
txn_global = &conn->txn_global;
txn_state = WT_SESSION_TXN_STATE(session);
saved_isolation = session->isolation;
- full = fullckpt_logging = idle = tracking = 0;
+ full = idle = logging= tracking = 0;
/* Ensure the metadata table is open before taking any locks. */
WT_RET(__wt_metadata_open(session));
@@ -373,8 +373,7 @@ __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
WT_RET(__checkpoint_apply_all(session, cfg, NULL, &full));
/* Configure logging only if doing a full checkpoint. */
- fullckpt_logging =
- full && FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED);
+ logging = FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED);
/*
* Get a list of handles we want to flush; this may pull closed objects
@@ -424,7 +423,7 @@ __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
tracking = 1;
/* Tell logging that we are about to start a database checkpoint. */
- if (fullckpt_logging)
+ if (full && logging)
WT_ERR(__wt_txn_checkpoint_log(
session, full, WT_TXN_LOG_CKPT_PREPARE, NULL));
@@ -494,7 +493,7 @@ __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
txn_state->id = txn_state->snap_min = WT_TXN_NONE;
/* Tell logging that we have started a database checkpoint. */
- if (fullckpt_logging)
+ if (full && logging)
WT_ERR(__wt_txn_checkpoint_log(
session, full, WT_TXN_LOG_CKPT_START, NULL));
@@ -532,26 +531,29 @@ __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
WT_ERR(__wt_txn_commit(session, NULL));
/*
- * If any tree was dirty, we will have updated the metadata with the
- * new checkpoint information. If the metadata is clean, all other
- * trees must have been clean.
- *
- * Disable metadata tracking during the metadata checkpoint.
- *
- * We don't lock old checkpoints in the metadata file: there is no way
- * to open one. We are holding other handle locks, it is not safe to
- * lock conn->spinlock.
+ * Ensure that the metadata changes are durable before the checkpoint
+ * is resolved. Do this by either checkpointing the metadata or syncing
+ * the log file.
+ * Recovery relies on the checkpoint LSN in the metadata only being
+ * updated by full checkpoints so only checkpoint the metadata for
+ * full or non-logged checkpoints.
*/
- session->isolation = txn->isolation = WT_ISO_READ_UNCOMMITTED;
- saved_meta_next = session->meta_track_next;
- session->meta_track_next = NULL;
- WT_WITH_DHANDLE(session,
- session->meta_dhandle, ret = __wt_checkpoint(session, cfg));
- session->meta_track_next = saved_meta_next;
- WT_ERR(ret);
-
- WT_ERR(__checkpoint_verbose_track(session,
- "metadata sync completed", &verb_timer));
+ if (full || !logging) {
+ session->isolation = txn->isolation = WT_ISO_READ_UNCOMMITTED;
+ /* Disable metadata tracking during the metadata checkpoint. */
+ saved_meta_next = session->meta_track_next;
+ session->meta_track_next = NULL;
+ WT_WITH_DHANDLE(session,
+ session->meta_dhandle, ret = __wt_checkpoint(session, cfg));
+ session->meta_track_next = saved_meta_next;
+ WT_ERR(ret);
+
+ WT_ERR(__checkpoint_verbose_track(session,
+ "metadata sync completed", &verb_timer));
+ } else
+ WT_WITH_DHANDLE(session, session->meta_dhandle,
+ ret = __wt_txn_checkpoint_log(session,
+ 0, WT_TXN_LOG_CKPT_SYNC, NULL));
if (full) {
WT_ERR(__wt_epoch(session, &stop));
@@ -590,7 +592,7 @@ err: /*
* Tell logging that we have finished a database checkpoint. Do not
* write a log record if the database was idle.
*/
- if (fullckpt_logging) {
+ if (full && logging) {
if (ret == 0 &&
F_ISSET((WT_BTREE *)session->meta_dhandle->handle,
WT_BTREE_SKIP_CKPT))
@@ -1174,19 +1176,21 @@ __wt_checkpoint_close(WT_SESSION_IMPL *session, int final)
}
/*
- * If closing a modified file, checkpoint the file and optionally flush
- * the writes (the checkpoint call will discard the blocks, there's no
- * additional step needed).
- *
* We should already have the schema lock unless we're finishing a bulk
* load -- the only other paths to closing files (sweep and LSM) have
* already checked for read-only trees.
*/
- if (!final)
- WT_ASSERT(session,
- bulk || F_ISSET(session, WT_SESSION_LOCKED_SCHEMA));
+ WT_ASSERT(session,
+ final || bulk || F_ISSET(session, WT_SESSION_LOCKED_SCHEMA));
+
+ /*
+ * Turn on metadata tracking if:
+ * - The session is not already doing metadata tracking.
+ * - The file was bulk loaded.
+ * - The close is not during connection close.
+ */
+ need_tracking = !WT_META_TRACKING(session) && !bulk && !final;
- need_tracking = !bulk && !final && !WT_META_TRACKING(session);
if (need_tracking)
WT_RET(__wt_meta_track_on(session));