summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2020-01-16 15:56:44 +1100
committerLuke Chen <luke.chen@mongodb.com>2020-01-16 15:56:44 +1100
commit3d6953c361213c5bfab23e51ab274ce592edafe6 (patch)
treec4e4d132ebb5f2f11b3dc27d55dcb73e7b0676bb
parent0480b92d5c6150e34c19aae5d4b231a6e4f43a2e (diff)
downloadmongo-3d6953c361213c5bfab23e51ab274ce592edafe6.tar.gz
Import wiredtiger: 723a4c13292b0bc7e27be411db4d006a0b865bd8 from branch mongodb-3.6r3.6.17-rc0r3.6.17
ref: 4c72feeb92..723a4c1329 for: 3.6.17 WT-4636 Fix strace in syscall test WT-5042 Reduce configuration parsing overhead from checkpoints WT-5120 Checkpoint hangs when reconciliation doesn't release the eviction generation WT-5135 Change lookaside file inserts to use cursor.insert WT-5218 Improve eviction to differentiate between clean and dirty pages with WT_CACHE_EVICT_NOKEEP readgen WT-5247 Ensure that only idempotent modify operations are logged WT-5277 Cursor key out-of-order detected in the lookaside file
-rw-r--r--src/third_party/wiredtiger/dist/s_define.list1
-rw-r--r--src/third_party/wiredtiger/dist/s_string.ok3
-rw-r--r--src/third_party/wiredtiger/import.data8
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_curnext.c14
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_curprev.c8
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_cursor.c224
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_random.c10
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_read.c8
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_ret.c25
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_split.c9
-rw-r--r--src/third_party/wiredtiger/src/btree/col_modify.c4
-rw-r--r--src/third_party/wiredtiger/src/btree/col_srch.c24
-rw-r--r--src/third_party/wiredtiger/src/btree/row_modify.c6
-rw-r--r--src/third_party/wiredtiger/src/btree/row_srch.c19
-rw-r--r--src/third_party/wiredtiger/src/cache/cache_las.c13
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_dhandle.c43
-rw-r--r--src/third_party/wiredtiger/src/include/btree.i7
-rw-r--r--src/third_party/wiredtiger/src/include/cursor.i9
-rw-r--r--src/third_party/wiredtiger/src/include/dhandle.h1
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h19
-rw-r--r--src/third_party/wiredtiger/src/include/wiredtiger.in5
-rw-r--r--src/third_party/wiredtiger/src/include/wt_internal.h10
-rw-r--r--src/third_party/wiredtiger/src/meta/meta_ckpt.c59
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_write.c13
-rw-r--r--src/third_party/wiredtiger/src/support/modify.c51
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_log.c28
-rw-r--r--src/third_party/wiredtiger/test/syscall/syscall.py2
-rw-r--r--src/third_party/wiredtiger/test/syscall/wt2336_base/base.run41
28 files changed, 438 insertions, 226 deletions
diff --git a/src/third_party/wiredtiger/dist/s_define.list b/src/third_party/wiredtiger/dist/s_define.list
index f199900e860..d585c1e268d 100644
--- a/src/third_party/wiredtiger/dist/s_define.list
+++ b/src/third_party/wiredtiger/dist/s_define.list
@@ -72,6 +72,7 @@ WT_TRACK_OP_END
WT_TRACK_OP_INIT
WT_TRET_ERROR_OK
WT_UPDATE_SIZE
+WT_USE_OPENAT
WT_WITH_LOCK_NOWAIT
WT_WITH_LOCK_WAIT
__F
diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok
index b3b95a4e50a..dc20a154981 100644
--- a/src/third_party/wiredtiger/dist/s_string.ok
+++ b/src/third_party/wiredtiger/dist/s_string.ok
@@ -153,6 +153,7 @@ Fsync
Fuerst
GCC
GIDs
+GLIBC
Gcc
Geoff
GetEnvironmentVariableA
@@ -1040,6 +1041,7 @@ online
onpage
oo
opcode
+openat
opendir
openfile
oplist
@@ -1200,6 +1202,7 @@ stdin
stdout
stepp
str
+strace
strcmp
strdup
strerror
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index 602318b9214..ae855e43736 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -1,6 +1,6 @@
{
- "commit": "4c72feeb921607b30984301f4e007fc24b54e26b",
- "github": "wiredtiger/wiredtiger.git",
- "vendor": "wiredtiger",
- "branch": "mongodb-3.6"
+ "vendor": "wiredtiger",
+ "github": "wiredtiger/wiredtiger.git",
+ "branch": "mongodb-3.6",
+ "commit": "723a4c13292b0bc7e27be411db4d006a0b865bd8"
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_curnext.c b/src/third_party/wiredtiger/src/btree/bt_curnext.c
index c9cccc63bf6..d80186ca91b 100644
--- a/src/third_party/wiredtiger/src/btree/bt_curnext.c
+++ b/src/third_party/wiredtiger/src/btree/bt_curnext.c
@@ -150,7 +150,7 @@ new_page: if (cbt->ins == NULL)
++cbt->page_deleted_count;
continue;
}
- return (__wt_value_return(session, cbt, upd));
+ return (__wt_value_return(cbt, upd));
}
/* NOTREACHED */
}
@@ -211,7 +211,7 @@ new_page: /* Find the matching WT_COL slot. */
++cbt->page_deleted_count;
continue;
}
- return (__wt_value_return(session, cbt, upd));
+ return (__wt_value_return(cbt, upd));
}
/*
@@ -331,7 +331,7 @@ new_insert: if ((ins = cbt->ins) != NULL) {
}
key->data = WT_INSERT_KEY(ins);
key->size = WT_INSERT_KEY_SIZE(ins);
- return (__wt_value_return(session, cbt, upd));
+ return (__wt_value_return(cbt, upd));
}
/* Check for the end of the page. */
@@ -468,8 +468,12 @@ __wt_cursor_key_order_check(
* search.
*/
int
-__wt_cursor_key_order_init(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
+__wt_cursor_key_order_init(WT_CURSOR_BTREE *cbt)
{
+ WT_SESSION_IMPL *session;
+
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
+
/*
* Cursor searches set the position for cursor movements, set the
* last-key value for diagnostic checking.
@@ -610,7 +614,7 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating)
* If the update, which returned prepared conflict is
* visible, return the value.
*/
- return (__cursor_kv_return(session, cbt, upd));
+ return (__cursor_kv_return(cbt, upd));
}
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_curprev.c b/src/third_party/wiredtiger/src/btree/bt_curprev.c
index f72b935c441..daee4cef8f4 100644
--- a/src/third_party/wiredtiger/src/btree/bt_curprev.c
+++ b/src/third_party/wiredtiger/src/btree/bt_curprev.c
@@ -296,7 +296,7 @@ new_page: if (cbt->ins == NULL)
++cbt->page_deleted_count;
continue;
}
- return (__wt_value_return(session, cbt, upd));
+ return (__wt_value_return(cbt, upd));
}
/* NOTREACHED */
}
@@ -358,7 +358,7 @@ new_page: if (cbt->recno < cbt->ref->ref_recno)
++cbt->page_deleted_count;
continue;
}
- return (__wt_value_return(session, cbt, upd));
+ return (__wt_value_return(cbt, upd));
}
/*
@@ -488,7 +488,7 @@ new_insert: if ((ins = cbt->ins) != NULL) {
}
key->data = WT_INSERT_KEY(ins);
key->size = WT_INSERT_KEY_SIZE(ins);
- return (__wt_value_return(session, cbt, upd));
+ return (__wt_value_return(cbt, upd));
}
/* Check for the beginning of the page. */
@@ -564,7 +564,7 @@ __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating)
* If the update, which returned prepared conflict is
* visible, return the value.
*/
- return (__cursor_kv_return(session, cbt, upd));
+ return (__cursor_kv_return(cbt, upd));
}
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_cursor.c b/src/third_party/wiredtiger/src/btree/bt_cursor.c
index 8f4f7982e3d..9d62ef50e92 100644
--- a/src/third_party/wiredtiger/src/btree/bt_cursor.c
+++ b/src/third_party/wiredtiger/src/btree/bt_cursor.c
@@ -362,13 +362,15 @@ __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp, bool *valid)
* Column-store search from a cursor.
*/
static inline int
-__cursor_col_search(
- WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_REF *leaf)
+__cursor_col_search(WT_CURSOR_BTREE *cbt, WT_REF *leaf, bool *leaf_foundp)
{
WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- WT_WITH_PAGE_INDEX(session,
- ret = __wt_col_search(session, cbt->iface.recno, leaf, cbt, false));
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
+ WT_WITH_PAGE_INDEX(
+ session, ret = __wt_col_search(
+ cbt, cbt->iface.recno, leaf, false, leaf_foundp));
return (ret);
}
@@ -378,12 +380,15 @@ __cursor_col_search(
*/
static inline int
__cursor_row_search(
- WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_REF *leaf, bool insert)
+ WT_CURSOR_BTREE *cbt, bool insert, WT_REF *leaf, bool *leaf_foundp)
{
WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- WT_WITH_PAGE_INDEX(session, ret = __wt_row_search(
- session, &cbt->iface.key, leaf, cbt, insert, false));
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
+ WT_WITH_PAGE_INDEX(
+ session, ret = __wt_row_search(
+ cbt, &cbt->iface.key, insert, leaf, false, leaf_foundp));
return (ret);
}
@@ -392,11 +397,10 @@ __cursor_row_search(
* Column-store modify from a cursor, with a separate value.
*/
static inline int
-__cursor_col_modify_v(WT_SESSION_IMPL *session,
- WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type)
+__cursor_col_modify_v(WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type)
{
- return (__wt_col_modify(session, cbt,
- cbt->iface.recno, value, NULL, modify_type, false));
+ return (__wt_col_modify(
+ cbt, cbt->iface.recno, value, NULL, modify_type, false));
}
/*
@@ -404,11 +408,10 @@ __cursor_col_modify_v(WT_SESSION_IMPL *session,
* Row-store modify from a cursor, with a separate value.
*/
static inline int
-__cursor_row_modify_v(WT_SESSION_IMPL *session,
- WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type)
+__cursor_row_modify_v(WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type)
{
- return (__wt_row_modify(session, cbt,
- &cbt->iface.key, value, NULL, modify_type, false));
+ return (__wt_row_modify(
+ cbt, &cbt->iface.key, value, NULL, modify_type, false));
}
/*
@@ -416,11 +419,11 @@ __cursor_row_modify_v(WT_SESSION_IMPL *session,
* Column-store modify from a cursor.
*/
static inline int
-__cursor_col_modify(
- WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, u_int modify_type)
+__cursor_col_modify(WT_CURSOR_BTREE *cbt, u_int modify_type)
{
- return (__wt_col_modify(session, cbt,
- cbt->iface.recno, &cbt->iface.value, NULL, modify_type, false));
+ return (__wt_col_modify(
+ cbt, cbt->iface.recno, &cbt->iface.value,
+ NULL, modify_type, false));
}
/*
@@ -428,11 +431,11 @@ __cursor_col_modify(
* Row-store modify from a cursor.
*/
static inline int
-__cursor_row_modify(
- WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, u_int modify_type)
+__cursor_row_modify(WT_CURSOR_BTREE *cbt, u_int modify_type)
{
- return (__wt_row_modify(session, cbt,
- &cbt->iface.key, &cbt->iface.value, NULL, modify_type, false));
+ return (__wt_row_modify(
+ cbt, &cbt->iface.key, &cbt->iface.value,
+ NULL, modify_type, false));
}
/*
@@ -483,7 +486,7 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt)
WT_DECL_RET;
WT_SESSION_IMPL *session;
WT_UPDATE *upd;
- bool valid;
+ bool leaf_found, valid;
btree = cbt->btree;
cursor = &cbt->iface;
@@ -517,19 +520,19 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt)
__wt_txn_cursor_op(session);
WT_ERR(btree->type == BTREE_ROW ?
- __cursor_row_search(session, cbt, cbt->ref, false) :
- __cursor_col_search(session, cbt, cbt->ref));
+ __cursor_row_search(cbt, false, cbt->ref, &leaf_found) :
+ __cursor_col_search(cbt, cbt->ref, &leaf_found));
/* Return, if prepare conflict encountered. */
- if (cbt->compare == 0)
+ if (leaf_found && cbt->compare == 0)
WT_ERR(__wt_cursor_valid(cbt, &upd, &valid));
}
if (!valid) {
WT_ERR(__cursor_func_init(cbt, true));
WT_ERR(btree->type == BTREE_ROW ?
- __cursor_row_search(session, cbt, NULL, false) :
- __cursor_col_search(session, cbt, NULL));
+ __cursor_row_search(cbt, false, NULL, NULL) :
+ __cursor_col_search(cbt, NULL, NULL));
/* Return, if prepare conflict encountered. */
if (cbt->compare == 0)
@@ -537,7 +540,7 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt)
}
if (valid)
- ret = __cursor_kv_return(session, cbt, upd);
+ ret = __cursor_kv_return(cbt, upd);
else if (__cursor_fix_implicit(btree, cbt)) {
/*
* Creating a record past the end of the tree in a fixed-length
@@ -554,7 +557,7 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt)
#ifdef HAVE_DIAGNOSTIC
if (ret == 0)
- WT_ERR(__wt_cursor_key_order_init(session, cbt));
+ WT_ERR(__wt_cursor_key_order_init(cbt));
#endif
err: if (ret != 0) {
@@ -578,7 +581,7 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp)
WT_SESSION_IMPL *session;
WT_UPDATE *upd;
int exact;
- bool valid;
+ bool leaf_found, valid;
btree = cbt->btree;
cursor = &cbt->iface;
@@ -610,34 +613,43 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp)
* pages in the case of column-store, search-near isn't an interesting
* enough case for column-store to add the complexity needed to avoid
* the tree search.
- *
- * Set the "insert" flag for the btree row-store search; we may intend
- * to position the cursor at the end of the tree, rather than match an
- * existing record.
*/
valid = false;
if (btree->type == BTREE_ROW && __cursor_page_pinned(cbt)) {
__wt_txn_cursor_op(session);
-
- WT_ERR(__cursor_row_search(session, cbt, cbt->ref, true));
+ /*
+ * Set the "insert" flag for the btree row-store search; we may
+ * intend to position the cursor at the end of the tree, rather
+ * than match an existing record.
+ */
+ WT_ERR(__cursor_row_search(cbt, true, cbt->ref, &leaf_found));
/*
- * Search-near is trickier than search when searching an already
- * pinned page. If search returns the first or last page slots,
- * discard the results and search the full tree as the neighbor
- * pages might offer better matches. This test is simplistic as
- * we're ignoring append lists (there may be no page slots or we
- * might be legitimately positioned after the last page slot).
- * Ignore those cases, it makes things too complicated.
+ * Only use the pinned page search results if search returns an
+ * exact match or a slot other than the page's boundary slots,
+ * if that's not the case, a neighbor page might offer a better
+ * match. This test is simplistic as we're ignoring append
+ * lists (there may be no page slots or we might be
+ * legitimately positioned after the last page slot). Ignore
+ * those cases, it makes things too complicated.
*/
- if (cbt->slot != 0 && cbt->slot != cbt->ref->page->entries - 1)
+ if (leaf_found &&
+ (cbt->compare == 0 ||
+ (cbt->slot != 0 &&
+ cbt->slot != cbt->ref->page->entries - 1)))
WT_ERR(__wt_cursor_valid(cbt, &upd, &valid));
}
if (!valid) {
WT_ERR(__cursor_func_init(cbt, true));
+
+ /*
+ * Set the "insert" flag for the btree row-store search; we may
+ * intend to position the cursor at the end of the tree, rather
+ * than match an existing record.
+ */
WT_ERR(btree->type == BTREE_ROW ?
- __cursor_row_search(session, cbt, NULL, true) :
- __cursor_col_search(session, cbt, NULL));
+ __cursor_row_search(cbt, true, NULL, NULL) :
+ __cursor_col_search(cbt, NULL, NULL));
WT_ERR(__wt_cursor_valid(cbt, &upd, &valid));
}
@@ -660,7 +672,7 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp)
*/
if (valid) {
exact = cbt->compare;
- ret = __cursor_kv_return(session, cbt, upd);
+ ret = __cursor_kv_return(cbt, upd);
} else if (__cursor_fix_implicit(btree, cbt)) {
cbt->recno = cursor->recno;
cbt->v = 0;
@@ -711,7 +723,7 @@ err: if (ret == 0 && exactp != NULL)
#ifdef HAVE_DIAGNOSTIC
if (ret == 0)
- WT_TRET(__wt_cursor_key_order_init(session, cbt));
+ WT_TRET(__wt_cursor_key_order_init(cbt));
#endif
if (ret != 0) {
@@ -785,8 +797,8 @@ __wt_btcur_insert(WT_CURSOR_BTREE *cbt)
*/
cbt->compare = 0;
ret = btree->type == BTREE_ROW ?
- __cursor_row_modify(session, cbt, WT_UPDATE_STANDARD) :
- __cursor_col_modify(session, cbt, WT_UPDATE_STANDARD);
+ __cursor_row_modify(cbt, WT_UPDATE_STANDARD) :
+ __cursor_col_modify(cbt, WT_UPDATE_STANDARD);
if (ret == 0)
goto done;
@@ -814,7 +826,7 @@ __wt_btcur_insert(WT_CURSOR_BTREE *cbt)
retry: WT_ERR(__cursor_func_init(cbt, true));
if (btree->type == BTREE_ROW) {
- WT_ERR(__cursor_row_search(session, cbt, NULL, true));
+ WT_ERR(__cursor_row_search(cbt, true, NULL, NULL));
/*
* If not overwriting, fail if the key exists, else insert the
* key/value pair.
@@ -826,7 +838,7 @@ retry: WT_ERR(__cursor_func_init(cbt, true));
WT_ERR(WT_DUPLICATE_KEY);
}
- ret = __cursor_row_modify(session, cbt, WT_UPDATE_STANDARD);
+ ret = __cursor_row_modify(cbt, WT_UPDATE_STANDARD);
} else if (append_key) {
/*
* Optionally insert a new record (ignoring the application's
@@ -835,11 +847,11 @@ retry: WT_ERR(__cursor_func_init(cbt, true));
*/
cbt->iface.recno = WT_RECNO_OOB;
cbt->compare = 1;
- WT_ERR(__cursor_col_search(session, cbt, NULL));
- WT_ERR(__cursor_col_modify(session, cbt, WT_UPDATE_STANDARD));
+ WT_ERR(__cursor_col_search(cbt, NULL, NULL));
+ WT_ERR(__cursor_col_modify(cbt, WT_UPDATE_STANDARD));
cursor->recno = cbt->recno;
} else {
- WT_ERR(__cursor_col_search(session, cbt, NULL));
+ WT_ERR(__cursor_col_search(cbt, NULL, NULL));
/*
* If not overwriting, fail if the key exists. Creating a
@@ -856,7 +868,7 @@ retry: WT_ERR(__cursor_func_init(cbt, true));
WT_ERR(WT_DUPLICATE_KEY);
}
- WT_ERR(__cursor_col_modify(session, cbt, WT_UPDATE_STANDARD));
+ WT_ERR(__cursor_col_modify(cbt, WT_UPDATE_STANDARD));
}
err: if (ret == WT_RESTART) {
@@ -940,7 +952,7 @@ __wt_btcur_insert_check(WT_CURSOR_BTREE *cbt)
__cursor_novalue(cursor);
retry: WT_ERR(__cursor_func_init(cbt, true));
- WT_ERR(__cursor_row_search(session, cbt, NULL, true));
+ WT_ERR(__cursor_row_search(cbt, true, NULL, NULL));
/* Just check for conflicts. */
ret = __curfile_update_check(cbt);
@@ -1045,8 +1057,8 @@ __wt_btcur_remove(WT_CURSOR_BTREE *cbt)
*/
cbt->compare = 0;
ret = btree->type == BTREE_ROW ?
- __cursor_row_modify(session, cbt, WT_UPDATE_TOMBSTONE) :
- __cursor_col_modify(session, cbt, WT_UPDATE_TOMBSTONE);
+ __cursor_row_modify(cbt, WT_UPDATE_TOMBSTONE) :
+ __cursor_col_modify(cbt, WT_UPDATE_TOMBSTONE);
if (ret == 0)
goto done;
goto err;
@@ -1068,7 +1080,7 @@ retry: if (positioned == POSITIONED)
WT_ERR(__cursor_func_init(cbt, true));
if (btree->type == BTREE_ROW) {
- WT_ERR(__cursor_row_search(session, cbt, NULL, false));
+ WT_ERR(__cursor_row_search(cbt, false, NULL, NULL));
/* Check whether an update would conflict. */
WT_ERR(__curfile_update_check(cbt));
@@ -1079,9 +1091,9 @@ retry: if (positioned == POSITIONED)
if (!valid)
WT_ERR(WT_NOTFOUND);
- ret = __cursor_row_modify(session, cbt, WT_UPDATE_TOMBSTONE);
+ ret = __cursor_row_modify(cbt, WT_UPDATE_TOMBSTONE);
} else {
- WT_ERR(__cursor_col_search(session, cbt, NULL));
+ WT_ERR(__cursor_col_search(cbt, NULL, NULL));
/*
* If we find a matching record, check whether an update would
@@ -1109,8 +1121,7 @@ retry: if (positioned == POSITIONED)
*/
cbt->recno = cursor->recno;
} else
- ret = __cursor_col_modify(
- session, cbt, WT_UPDATE_TOMBSTONE);
+ ret = __cursor_col_modify(cbt, WT_UPDATE_TOMBSTONE);
}
err: if (ret == WT_RESTART) {
@@ -1139,7 +1150,7 @@ done: switch (positioned) {
* Positioned and we did a search anyway, get a key to
* return.
*/
- WT_TRET(__wt_key_return(session, cbt));
+ WT_TRET(__wt_key_return(cbt));
break;
}
}
@@ -1194,7 +1205,7 @@ __btcur_update(WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type)
WT_DECL_RET;
WT_SESSION_IMPL *session;
uint64_t yield_count, sleep_usecs;
- bool valid;
+ bool leaf_found, valid;
btree = cbt->btree;
cursor = &cbt->iface;
@@ -1227,8 +1238,8 @@ __btcur_update(WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type)
*/
cbt->compare = 0;
ret = btree->type == BTREE_ROW ?
- __cursor_row_modify_v(session, cbt, value, modify_type) :
- __cursor_col_modify_v(session, cbt, value, modify_type);
+ __cursor_row_modify_v(cbt, value, modify_type) :
+ __cursor_col_modify_v(cbt, value, modify_type);
if (ret == 0)
goto done;
@@ -1253,11 +1264,38 @@ __btcur_update(WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type)
WT_ERR(__cursor_localvalue(cursor));
__cursor_state_save(cursor, &state);
-retry: WT_ERR(__cursor_func_init(cbt, true));
+ /*
+ * If our caller configures for a local search and we have a page
+ * pinned, do that search.
+ */
+ if (F_ISSET(cursor, WT_CURSTD_UPDATE_LOCAL)
+ && __cursor_page_pinned(cbt)) {
+ __wt_txn_cursor_op(session);
+ WT_ERR(__wt_txn_autocommit_check(session));
+ WT_ERR(btree->type == BTREE_ROW ?
+ __cursor_row_search(cbt, true, cbt->ref, &leaf_found) :
+ __cursor_col_search(cbt, cbt->ref, &leaf_found));
+ /*
+ * Only use the pinned page search results if search returns an
+ * exact match or a slot other than the page's boundary slots,
+ * if that's not the case, a neighbor page might offer a better
+ * match. This test is simplistic as we're ignoring append
+ * lists (there may be no page slots or we might be
+ * legitimately positioned after the last page slot). Ignore
+ * those cases, it makes things too complicated.
+ */
+ if (leaf_found && (cbt->compare == 0
+ || (cbt->slot != 0 && cbt->slot != cbt->ref->page->entries - 1)))
+ goto update_local;
+ }
+retry:
+ WT_ERR(__cursor_func_init(cbt, true));
+ WT_ERR(btree->type == BTREE_ROW ?
+ __cursor_row_search(cbt, true, NULL, NULL) :
+ __cursor_col_search(cbt, NULL, NULL));
+update_local:
if (btree->type == BTREE_ROW) {
- WT_ERR(__cursor_row_search(session, cbt, NULL, true));
-
/*
* If not overwriting, check for conflicts and fail if the key
* does not exist.
@@ -1270,10 +1308,8 @@ retry: WT_ERR(__cursor_func_init(cbt, true));
if (!valid)
WT_ERR(WT_NOTFOUND);
}
- ret = __cursor_row_modify_v(session, cbt, value, modify_type);
+ ret = __cursor_row_modify_v(cbt, value, modify_type);
} else {
- WT_ERR(__cursor_col_search(session, cbt, NULL));
-
/*
* If not overwriting, fail if the key doesn't exist. If we
* find an update for the key, check for conflicts. Update the
@@ -1291,7 +1327,7 @@ retry: WT_ERR(__cursor_func_init(cbt, true));
!__cursor_fix_implicit(btree, cbt))
WT_ERR(WT_NOTFOUND);
}
- ret = __cursor_col_modify_v(session, cbt, value, modify_type);
+ ret = __cursor_col_modify_v(cbt, value, modify_type);
}
err: if (ret == WT_RESTART) {
@@ -1313,8 +1349,7 @@ done: switch (modify_type) {
/*
* WT_CURSOR.update returns a key and a value.
*/
- ret = __cursor_kv_return(
- session, cbt, cbt->modify_update);
+ ret = __cursor_kv_return(cbt, cbt->modify_update);
break;
case WT_UPDATE_RESERVE:
/*
@@ -1327,7 +1362,7 @@ done: switch (modify_type) {
* WT_CURSOR.modify has already created the return value
* and our job is to leave it untouched.
*/
- ret = __wt_key_return(session, cbt);
+ ret = __wt_key_return(cbt);
break;
case WT_UPDATE_BIRTHMARK:
case WT_UPDATE_TOMBSTONE:
@@ -1670,13 +1705,15 @@ __wt_btcur_equals(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *equalp)
* tree.
*/
static int
-__cursor_truncate(WT_SESSION_IMPL *session,
+__cursor_truncate(
WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop,
- int (*rmfunc)(WT_SESSION_IMPL *, WT_CURSOR_BTREE *, u_int))
+ int (*rmfunc)(WT_CURSOR_BTREE *, u_int))
{
WT_DECL_RET;
+ WT_SESSION_IMPL *session;
uint64_t yield_count, sleep_usecs;
+ session = (WT_SESSION_IMPL *)start->iface.session;
yield_count = sleep_usecs = 0;
/*
@@ -1698,12 +1735,13 @@ __cursor_truncate(WT_SESSION_IMPL *session,
* instantiated the end cursor, so we know that page is pinned in memory
* and we can proceed without concern.
*/
-retry: WT_ERR(__wt_btcur_search(start));
+retry:
+ WT_ERR(__wt_btcur_search(start));
WT_ASSERT(session,
F_MASK((WT_CURSOR *)start, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT);
for (;;) {
- WT_ERR(rmfunc(session, start, WT_UPDATE_TOMBSTONE));
+ WT_ERR(rmfunc(start, WT_UPDATE_TOMBSTONE));
if (stop != NULL && __cursor_equals(start, stop))
return (0);
@@ -1727,14 +1765,16 @@ err: if (ret == WT_RESTART) {
* Discard a cursor range from fixed-width column-store tree.
*/
static int
-__cursor_truncate_fix(WT_SESSION_IMPL *session,
+__cursor_truncate_fix(
WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop,
- int (*rmfunc)(WT_SESSION_IMPL *, WT_CURSOR_BTREE *, u_int))
+ int (*rmfunc)(WT_CURSOR_BTREE *, u_int))
{
WT_DECL_RET;
+ WT_SESSION_IMPL *session;
uint64_t yield_count, sleep_usecs;
const uint8_t *value;
+ session = (WT_SESSION_IMPL *)start->iface.session;
yield_count = sleep_usecs = 0;
/*
@@ -1763,7 +1803,7 @@ retry: WT_ERR(__wt_btcur_search(start));
for (;;) {
value = (const uint8_t *)start->iface.value.data;
if (*value != 0)
- WT_ERR(rmfunc(session, start, WT_UPDATE_TOMBSTONE));
+ WT_ERR(rmfunc(start, WT_UPDATE_TOMBSTONE));
if (stop != NULL && __cursor_equals(start, stop))
return (0);
@@ -1797,6 +1837,7 @@ __wt_btcur_range_truncate(WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop)
btree = start->btree;
WT_STAT_DATA_INCR(session, cursor_truncate);
+ WT_RET(__wt_txn_autocommit_check(session));
/*
* For recovery, log the start and stop keys for a truncate operation,
* not the individual records removed. On the other hand, for rollback
@@ -1811,12 +1852,10 @@ __wt_btcur_range_truncate(WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop)
switch (btree->type) {
case BTREE_COL_FIX:
- WT_ERR(__cursor_truncate_fix(
- session, start, stop, __cursor_col_modify));
+ WT_ERR(__cursor_truncate_fix(start, stop, __cursor_col_modify));
break;
case BTREE_COL_VAR:
- WT_ERR(__cursor_truncate(
- session, start, stop, __cursor_col_modify));
+ WT_ERR(__cursor_truncate(start, stop, __cursor_col_modify));
break;
case BTREE_ROW:
/*
@@ -1831,8 +1870,7 @@ __wt_btcur_range_truncate(WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop)
* that ever changes, we'd need to do something here to ensure a
* fully instantiated cursor.
*/
- WT_ERR(__cursor_truncate(
- session, start, stop, __cursor_row_modify));
+ WT_ERR(__cursor_truncate(start, stop, __cursor_row_modify));
break;
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_random.c b/src/third_party/wiredtiger/src/btree/bt_random.c
index ed68513b245..a66cdb2c3b4 100644
--- a/src/third_party/wiredtiger/src/btree/bt_random.c
+++ b/src/third_party/wiredtiger/src/btree/bt_random.c
@@ -13,16 +13,18 @@
* Return a random key from a row-store leaf page.
*/
int
-__wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
+__wt_row_random_leaf(WT_CURSOR_BTREE *cbt)
{
WT_INSERT *ins, **start, **stop;
WT_INSERT_HEAD *ins_head;
WT_PAGE *page;
+ WT_SESSION_IMPL *session;
uint64_t samples;
uint32_t choice, entries, i;
int level;
page = cbt->ref->page;
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
start = stop = NULL; /* [-Wconditional-uninitialized] */
entries = 0; /* [-Wconditional-uninitialized] */
@@ -425,11 +427,11 @@ random_page_entry:
* Select a random entry from the leaf page. If it's not valid, move to
* the next entry, if that doesn't work, move to the previous entry.
*/
- WT_ERR(__wt_row_random_leaf(session, cbt));
+ WT_ERR(__wt_row_random_leaf(cbt));
WT_ERR(__wt_cursor_valid(cbt, &upd, &valid));
if (valid) {
- WT_ERR(__wt_key_return(session, cbt));
- WT_ERR(__wt_value_return(session, cbt, upd));
+ WT_ERR(__wt_key_return(cbt));
+ WT_ERR(__wt_value_return(cbt, upd));
} else {
if ((ret = __wt_btcur_next(cbt, false)) == WT_NOTFOUND)
ret = __wt_btcur_prev(cbt, false);
diff --git a/src/third_party/wiredtiger/src/btree/bt_read.c b/src/third_party/wiredtiger/src/btree/bt_read.c
index 69b0f95d205..5b0cba71c9c 100644
--- a/src/third_party/wiredtiger/src/btree/bt_read.c
+++ b/src/third_party/wiredtiger/src/btree/bt_read.c
@@ -31,9 +31,9 @@ __col_instantiate(WT_SESSION_IMPL *session,
__wt_free_update_list(session, upd);
/* Search the page and add updates. */
- WT_RET(__wt_col_search(session, recno, ref, cbt, true));
+ WT_RET(__wt_col_search(cbt, recno, ref, true, NULL));
WT_RET(__wt_col_modify(
- session, cbt, recno, NULL, updlist, WT_UPDATE_INVALID, false));
+ cbt, recno, NULL, updlist, WT_UPDATE_INVALID, false));
return (0);
}
@@ -60,9 +60,9 @@ __row_instantiate(WT_SESSION_IMPL *session,
__wt_free_update_list(session, upd);
/* Search the page and add updates. */
- WT_RET(__wt_row_search(session, key, ref, cbt, true, true));
+ WT_RET(__wt_row_search(cbt, key, true, ref, true, NULL));
WT_RET(__wt_row_modify(
- session, cbt, key, NULL, updlist, WT_UPDATE_INVALID, false));
+ cbt, key, NULL, updlist, WT_UPDATE_INVALID, false));
return (0);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_ret.c b/src/third_party/wiredtiger/src/btree/bt_ret.c
index 60cb3d53699..8613bf38585 100644
--- a/src/third_party/wiredtiger/src/btree/bt_ret.c
+++ b/src/third_party/wiredtiger/src/btree/bt_ret.c
@@ -13,15 +13,17 @@
* Change the cursor to reference an internal return key.
*/
static inline int
-__key_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
+__key_return(WT_CURSOR_BTREE *cbt)
{
WT_CURSOR *cursor;
WT_ITEM *tmp;
WT_PAGE *page;
WT_ROW *rip;
+ WT_SESSION_IMPL *session;
page = cbt->ref->page;
cursor = &cbt->iface;
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
if (page->type == WT_PAGE_ROW_LEAF) {
rip = &page->pg_row[cbt->slot];
@@ -78,7 +80,7 @@ __key_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
* Change the cursor to reference an internal original-page return value.
*/
static inline int
-__value_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
+__value_return(WT_CURSOR_BTREE *cbt)
{
WT_BTREE *btree;
WT_CELL *cell;
@@ -86,8 +88,10 @@ __value_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
WT_CURSOR *cursor;
WT_PAGE *page;
WT_ROW *rip;
+ WT_SESSION_IMPL *session;
uint8_t v;
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
btree = S2BT(session);
page = cbt->ref->page;
@@ -134,11 +138,12 @@ __value_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
* value.
*/
int
-__wt_value_return_upd(WT_SESSION_IMPL *session,
+__wt_value_return_upd(
WT_CURSOR_BTREE *cbt, WT_UPDATE *upd, bool ignore_visibility)
{
WT_CURSOR *cursor;
WT_DECL_RET;
+ WT_SESSION_IMPL *session;
WT_UPDATE **listp, *list[WT_MODIFY_ARRAY_SIZE];
size_t allocated_bytes;
u_int i;
@@ -146,6 +151,7 @@ __wt_value_return_upd(WT_SESSION_IMPL *session,
cursor = &cbt->iface;
allocated_bytes = 0;
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
/*
* We're passed a "standard" or "modified" update that's visible to us.
@@ -237,7 +243,7 @@ __wt_value_return_upd(WT_SESSION_IMPL *session,
*/
WT_ASSERT(session, cbt->slot != UINT32_MAX);
- WT_ERR(__value_return(session, cbt));
+ WT_ERR(__value_return(cbt));
}
} else if (upd->type == WT_UPDATE_TOMBSTONE)
WT_ERR(__wt_buf_set(session, &cursor->value, "", 0));
@@ -262,7 +268,7 @@ err: if (allocated_bytes != 0)
* Change the cursor to reference an internal return key.
*/
int
-__wt_key_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
+__wt_key_return(WT_CURSOR_BTREE *cbt)
{
WT_CURSOR *cursor;
@@ -279,7 +285,7 @@ __wt_key_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
*/
F_CLR(cursor, WT_CURSTD_KEY_EXT);
if (!F_ISSET(cursor, WT_CURSTD_KEY_INT)) {
- WT_RET(__key_return(session, cbt));
+ WT_RET(__key_return(cbt));
F_SET(cursor, WT_CURSTD_KEY_INT);
}
return (0);
@@ -290,8 +296,7 @@ __wt_key_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
* Change the cursor to reference an internal return value.
*/
int
-__wt_value_return(
- WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd)
+__wt_value_return(WT_CURSOR_BTREE *cbt, WT_UPDATE *upd)
{
WT_CURSOR *cursor;
@@ -299,9 +304,9 @@ __wt_value_return(
F_CLR(cursor, WT_CURSTD_VALUE_EXT);
if (upd == NULL)
- WT_RET(__value_return(session, cbt));
+ WT_RET(__value_return(cbt));
else
- WT_RET(__wt_value_return_upd(session, cbt, upd, false));
+ WT_RET(__wt_value_return_upd(cbt, upd, false));
F_SET(cursor, WT_CURSTD_VALUE_INT);
return (0);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c
index 0ea367fa360..80eacc95e02 100644
--- a/src/third_party/wiredtiger/src/btree/bt_split.c
+++ b/src/third_party/wiredtiger/src/btree/bt_split.c
@@ -1476,11 +1476,10 @@ __split_multi_inmem(
recno = WT_INSERT_RECNO(supd->ins);
/* Search the page. */
- WT_ERR(__wt_col_search(
- session, recno, ref, &cbt, true));
+ WT_ERR(__wt_col_search(&cbt, recno, ref, true, NULL));
/* Apply the modification. */
- WT_ERR(__wt_col_modify(session, &cbt,
+ WT_ERR(__wt_col_modify(&cbt,
recno, NULL, upd, WT_UPDATE_INVALID, true));
break;
case WT_PAGE_ROW_LEAF:
@@ -1500,7 +1499,7 @@ __split_multi_inmem(
/* Search the page. */
WT_ERR(__wt_row_search(
- session, key, ref, &cbt, true, true));
+ &cbt, key, true, ref, true, NULL));
/*
* Birthmarks should only be applied to on-page values.
@@ -1509,7 +1508,7 @@ __split_multi_inmem(
upd->type != WT_UPDATE_BIRTHMARK);
/* Apply the modification. */
- WT_ERR(__wt_row_modify(session,
+ WT_ERR(__wt_row_modify(
&cbt, key, NULL, upd, WT_UPDATE_INVALID, true));
break;
WT_ILLEGAL_VALUE_ERR(session, orig->type);
diff --git a/src/third_party/wiredtiger/src/btree/col_modify.c b/src/third_party/wiredtiger/src/btree/col_modify.c
index 233a88c9404..4183840a5f3 100644
--- a/src/third_party/wiredtiger/src/btree/col_modify.c
+++ b/src/third_party/wiredtiger/src/btree/col_modify.c
@@ -16,7 +16,7 @@ static int __col_insert_alloc(
* Column-store delete, insert, and update.
*/
int
-__wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
+__wt_col_modify(WT_CURSOR_BTREE *cbt,
uint64_t recno, const WT_ITEM *value,
WT_UPDATE *upd_arg, u_int modify_type, bool exclusive)
{
@@ -27,6 +27,7 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
WT_INSERT_HEAD *ins_head, **ins_headp;
WT_PAGE *page;
WT_PAGE_MODIFY *mod;
+ WT_SESSION_IMPL *session;
WT_UPDATE *old_upd, *upd;
size_t ins_size, upd_size;
u_int i, skipdepth;
@@ -35,6 +36,7 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
btree = cbt->btree;
ins = NULL;
page = cbt->ref->page;
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
upd = upd_arg;
append = logged = false;
diff --git a/src/third_party/wiredtiger/src/btree/col_srch.c b/src/third_party/wiredtiger/src/btree/col_srch.c
index e72ee7455da..ee49b5340a4 100644
--- a/src/third_party/wiredtiger/src/btree/col_srch.c
+++ b/src/third_party/wiredtiger/src/btree/col_srch.c
@@ -61,8 +61,9 @@ __check_leaf_key_range(WT_SESSION_IMPL *session,
* Search a column-store tree for a specific record-based key.
*/
int
-__wt_col_search(WT_SESSION_IMPL *session,
- uint64_t search_recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt, bool restore)
+__wt_col_search(
+ WT_CURSOR_BTREE *cbt, uint64_t search_recno,
+ WT_REF *leaf, bool leaf_safe, bool *leaf_foundp)
{
WT_BTREE *btree;
WT_COL *cip;
@@ -72,10 +73,12 @@ __wt_col_search(WT_SESSION_IMPL *session,
WT_PAGE *page;
WT_PAGE_INDEX *pindex, *parent_pindex;
WT_REF *current, *descent;
+ WT_SESSION_IMPL *session;
uint64_t recno;
uint32_t base, indx, limit, read_flags;
int depth;
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
btree = S2BT(session);
current = NULL;
@@ -93,23 +96,20 @@ __wt_col_search(WT_SESSION_IMPL *session,
* the normal case where we are searching a tree, check the page's
* parent keys before doing the full search, it's faster when the
* cursor is being re-positioned. Skip this if the page is being
- * re-instantiated in memory.
+ * re-instantiated in memory. when the cursor is being re-positioned.
+ * Skip that check if we know the page is the right one
+ * (for example, when re-instantiating a page in memory, in that
+ * case we know the target must be on the current page).
*/
if (leaf != NULL) {
WT_ASSERT(session, search_recno != WT_RECNO_OOB);
- if (!restore) {
+ if (!leaf_safe) {
WT_RET(__check_leaf_key_range(
session, recno, leaf, cbt));
- if (cbt->compare != 0) {
- /*
- * !!!
- * WT_CURSOR.search_near uses the slot value to
- * decide if there was an on-page match.
- */
- cbt->slot = 0;
+ *leaf_foundp = cbt->compare == 0;
+ if (!*leaf_foundp)
return (0);
- }
}
current = leaf;
diff --git a/src/third_party/wiredtiger/src/btree/row_modify.c b/src/third_party/wiredtiger/src/btree/row_modify.c
index 0f89d09f948..c5904916e66 100644
--- a/src/third_party/wiredtiger/src/btree/row_modify.c
+++ b/src/third_party/wiredtiger/src/btree/row_modify.c
@@ -41,8 +41,8 @@ err: __wt_free(session, modify);
* Row-store insert, update and delete.
*/
int
-__wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
- const WT_ITEM *key, const WT_ITEM *value,
+__wt_row_modify(
+ WT_CURSOR_BTREE *cbt, const WT_ITEM *key, const WT_ITEM *value,
WT_UPDATE *upd_arg, u_int modify_type, bool exclusive)
{
WT_DECL_RET;
@@ -50,6 +50,7 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
WT_INSERT_HEAD *ins_head, **ins_headp;
WT_PAGE *page;
WT_PAGE_MODIFY *mod;
+ WT_SESSION_IMPL *session;
WT_UPDATE *old_upd, *upd, **upd_entry;
size_t ins_size, upd_size;
uint32_t ins_slot;
@@ -58,6 +59,7 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
ins = NULL;
page = cbt->ref->page;
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
upd = upd_arg;
logged = false;
diff --git a/src/third_party/wiredtiger/src/btree/row_srch.c b/src/third_party/wiredtiger/src/btree/row_srch.c
index 5dff4b6fa60..5a582196557 100644
--- a/src/third_party/wiredtiger/src/btree/row_srch.c
+++ b/src/third_party/wiredtiger/src/btree/row_srch.c
@@ -205,9 +205,8 @@ __check_leaf_key_range(WT_SESSION_IMPL *session,
* Search a row-store tree for a specific key.
*/
int
-__wt_row_search(WT_SESSION_IMPL *session,
- WT_ITEM *srch_key, WT_REF *leaf, WT_CURSOR_BTREE *cbt,
- bool insert, bool restore)
+__wt_row_search(WT_CURSOR_BTREE *cbt, WT_ITEM *srch_key,
+ bool insert, WT_REF *leaf, bool leaf_safe, bool *leaf_foundp)
{
WT_BTREE *btree;
WT_COLLATOR *collator;
@@ -218,11 +217,13 @@ __wt_row_search(WT_SESSION_IMPL *session,
WT_PAGE_INDEX *pindex, *parent_pindex;
WT_REF *current, *descent;
WT_ROW *rip;
+ WT_SESSION_IMPL *session;
size_t match, skiphigh, skiplow;
uint32_t base, indx, limit, read_flags;
int cmp, depth;
bool append_check, descend_right, done;
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
btree = S2BT(session);
collator = btree->collator;
item = cbt->tmp;
@@ -258,18 +259,12 @@ __wt_row_search(WT_SESSION_IMPL *session,
* re-instantiated in memory.
*/
if (leaf != NULL) {
- if (!restore) {
+ if (!leaf_safe) {
WT_RET(__check_leaf_key_range(
session, srch_key, leaf, cbt));
- if (cbt->compare != 0) {
- /*
- * !!!
- * WT_CURSOR.search_near uses the slot value to
- * decide if there was an on-page match.
- */
- cbt->slot = 0;
+ *leaf_foundp = cbt->compare == 0;
+ if (!*leaf_foundp)
return (0);
- }
}
current = leaf;
diff --git a/src/third_party/wiredtiger/src/cache/cache_las.c b/src/third_party/wiredtiger/src/cache/cache_las.c
index b38da22fc4a..35a76210a0c 100644
--- a/src/third_party/wiredtiger/src/cache/cache_las.c
+++ b/src/third_party/wiredtiger/src/cache/cache_las.c
@@ -704,6 +704,12 @@ __wt_las_insert_block(WT_CURSOR *cursor,
WT_ERR(__wt_txn_begin(session, NULL));
local_txn = true;
+ /*
+ * Inserts should be on the same page absent a split, search any pinned
+ * leaf page.
+ */
+ F_SET(cursor, WT_CURSTD_UPDATE_LOCAL);
+
/* Enter each update in the boundary's list into the lookaside store. */
for (las_counter = 0, i = 0,
list = multi->supd; i < multi->supd_entries; ++i, ++list) {
@@ -799,10 +805,8 @@ __wt_las_insert_block(WT_CURSOR *cursor,
upd->type, &las_value);
/*
- * Using update looks a little strange because the keys
- * are guaranteed to not exist, but since we're
- * appending, we want the cursor to stay positioned in
- * between inserts.
+ * Using update instead of insert so the page stays
+ * pinned and can be searched before the tree.
*/
WT_ERR(cursor->update(cursor));
++insert_cnt;
@@ -831,6 +835,7 @@ err: /* Resolve the transaction. */
}
__las_restore_isolation(session, saved_isolation);
+ F_CLR(cursor, WT_CURSTD_UPDATE_LOCAL);
if (ret == 0 && insert_cnt > 0) {
multi->page_las.las_pageid = las_pageid;
diff --git a/src/third_party/wiredtiger/src/conn/conn_dhandle.c b/src/third_party/wiredtiger/src/conn/conn_dhandle.c
index eeaa71683f1..aa1bd4cb08a 100644
--- a/src/third_party/wiredtiger/src/conn/conn_dhandle.c
+++ b/src/third_party/wiredtiger/src/conn/conn_dhandle.c
@@ -25,6 +25,7 @@ __conn_dhandle_config_clear(WT_SESSION_IMPL *session)
for (a = dhandle->cfg; *a != NULL; ++a)
__wt_free(session, *a);
__wt_free(session, dhandle->cfg);
+ __wt_free(session, dhandle->meta_base);
}
/*
@@ -36,9 +37,12 @@ __conn_dhandle_config_set(WT_SESSION_IMPL *session)
{
WT_DATA_HANDLE *dhandle;
WT_DECL_RET;
- char *metaconf;
+ const char *base, *cfg[3];
+ char *metaconf, *tmp;
dhandle = session->dhandle;
+ base = NULL;
+ tmp = NULL;
/*
* Read the object's entry from the metadata file, we're done if we
@@ -68,8 +72,40 @@ __conn_dhandle_config_set(WT_SESSION_IMPL *session)
WT_ERR(__wt_calloc_def(session, 3, &dhandle->cfg));
switch (dhandle->type) {
case WT_DHANDLE_TYPE_BTREE:
+ /*
+ * We are stripping out the checkpoint and checkpoint_lsn
+ * information from the config string. We save the rest of
+ * the metadata string, that is essentially static and
+ * unchanging and then concatenate the new checkpoint and
+ * LSN information on each checkpoint. The reason is
+ * performance and avoiding a lot of calls to the config
+ * parsing functions during a checkpoint for information
+ * that changes in a very well known way.
+ */
+ cfg[0] = metaconf;
+ cfg[1] = "checkpoint=()";
+ cfg[2] = NULL;
WT_ERR(__wt_strdup(session,
WT_CONFIG_BASE(session, file_meta), &dhandle->cfg[0]));
+ WT_ASSERT(session, dhandle->meta_base == NULL);
+ /*
+ * First collapse and overwrite any checkpoint information
+ * because we do not know the name or how many checkpoints
+ * may be in this metadata. So first we have to set the string
+ * to the empty checkpoint string and call collapse to
+ * overwrite anything existing.
+ */
+ WT_ERR(__wt_config_collapse(session, cfg, &tmp));
+ /*
+ * Now strip out the checkpoint and checkpoint LSN items
+ * from the configuration string and that is now our
+ * base metadata string.
+ */
+ cfg[0] = tmp;
+ cfg[1] = NULL;
+ WT_ERR(__wt_config_merge(session,
+ cfg, "checkpoint=,checkpoint_lsn=", &base));
+ __wt_free(session, tmp);
break;
case WT_DHANDLE_TYPE_TABLE:
WT_ERR(__wt_strdup(session,
@@ -77,9 +113,12 @@ __conn_dhandle_config_set(WT_SESSION_IMPL *session)
break;
}
dhandle->cfg[1] = metaconf;
+ dhandle->meta_base = base;
return (0);
-err: __wt_free(session, metaconf);
+err: __wt_free(session, base);
+ __wt_free(session, metaconf);
+ __wt_free(session, tmp);
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i
index 0984dc93d57..61fb79db907 100644
--- a/src/third_party/wiredtiger/src/include/btree.i
+++ b/src/third_party/wiredtiger/src/include/btree.i
@@ -516,6 +516,13 @@ __wt_page_only_modify_set(WT_SESSION_IMPL *session, WT_PAGE *page)
__wt_atomic_add32(&page->modify->page_state, 1) ==
WT_PAGE_DIRTY_FIRST) {
__wt_cache_dirty_incr(session, page);
+ /*
+ * In the event we dirty a page which is flagged for eviction
+ * soon, we update its read generation to avoid evicting a
+ * dirty page prematurely.
+ */
+ if (page->read_gen == WT_READGEN_WONT_NEED)
+ __wt_cache_read_gen_new(session, page);
/*
* We won the race to dirty the page, but another thread could
diff --git a/src/third_party/wiredtiger/src/include/cursor.i b/src/third_party/wiredtiger/src/include/cursor.i
index cb665e17f5b..52a9736e383 100644
--- a/src/third_party/wiredtiger/src/include/cursor.i
+++ b/src/third_party/wiredtiger/src/include/cursor.i
@@ -315,11 +315,10 @@ __wt_cursor_dhandle_decr_use(WT_SESSION_IMPL *session)
* Return a page referenced key/value pair to the application.
*/
static inline int
-__cursor_kv_return(
- WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd)
+__cursor_kv_return(WT_CURSOR_BTREE *cbt, WT_UPDATE *upd)
{
- WT_RET(__wt_key_return(session, cbt));
- WT_RET(__wt_value_return(session, cbt, upd));
+ WT_RET(__wt_key_return(cbt));
+ WT_RET(__wt_value_return(cbt, upd));
return (0);
}
@@ -457,7 +456,7 @@ value:
* (if any) is visible.
*/
if (upd != NULL)
- return (__wt_value_return(session, cbt, upd));
+ return (__wt_value_return(cbt, upd));
/* Else, simple values have their location encoded in the WT_ROW. */
if (__wt_row_leaf_value(page, rip, vb))
diff --git a/src/third_party/wiredtiger/src/include/dhandle.h b/src/third_party/wiredtiger/src/include/dhandle.h
index f47db3f762c..4b58ed126d9 100644
--- a/src/third_party/wiredtiger/src/include/dhandle.h
+++ b/src/third_party/wiredtiger/src/include/dhandle.h
@@ -74,6 +74,7 @@ struct __wt_data_handle {
uint64_t name_hash; /* Hash of name */
const char *checkpoint; /* Checkpoint name (or NULL) */
const char **cfg; /* Configuration information */
+ const char *meta_base; /* Base metadata configuration */
/*
* Sessions holding a connection's data handle will have a non-zero
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index afc89795500..251c9393c6c 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -95,7 +95,7 @@ extern int __wt_bloom_drop(WT_BLOOM *bloom, const char *config) WT_GCC_FUNC_DECL
extern int __wt_compact(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_compact_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, void *context, bool *skipp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_cursor_key_order_check(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, bool next) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_cursor_key_order_init(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_cursor_key_order_init(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_cursor_key_order_reset(WT_CURSOR_BTREE *cbt);
extern void __wt_btcur_iterate_setup(WT_CURSOR_BTREE *cbt);
extern int __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -157,7 +157,7 @@ extern int __wt_ovfl_remove(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNP
extern int __wt_ovfl_discard(WT_SESSION_IMPL *session, WT_CELL *cell) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_page_alloc(WT_SESSION_IMPL *session, uint8_t type, uint32_t alloc_entries, bool alloc_refs, WT_PAGE **pagep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_page_inmem(WT_SESSION_IMPL *session, WT_REF *ref, const void *image, uint32_t flags, WT_PAGE **pagep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_row_random_leaf(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_random_descent(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_btcur_next_random(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int
@@ -167,9 +167,9 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags
#endif
);
extern int __wt_bt_rebalance(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_value_return_upd(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd, bool ignore_visibility) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_key_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_value_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_value_return_upd(WT_CURSOR_BTREE *cbt, WT_UPDATE *upd, bool ignore_visibility) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_key_return(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_value_return(WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_bt_salvage(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_multi_to_ref(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi, WT_REF **refp, size_t *incrp, bool closing) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -186,8 +186,8 @@ extern int __wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flag
extern int __wt_tree_walk_count(WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *walkcntp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_tree_walk_custom_skip(WT_SESSION_IMPL *session, WT_REF **refp, int (*skip_func)(WT_SESSION_IMPL *, WT_REF *, void *, bool *), void *func_cookie, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_tree_walk_skip(WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *skipleafcntp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint64_t recno, const WT_ITEM *value, WT_UPDATE *upd_arg, u_int modify_type, bool exclusive) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_col_search(WT_SESSION_IMPL *session, uint64_t search_recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt, bool restore) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_col_modify(WT_CURSOR_BTREE *cbt, uint64_t recno, const WT_ITEM *value, WT_UPDATE *upd_arg, u_int modify_type, bool exclusive) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_col_search(WT_CURSOR_BTREE *cbt, uint64_t search_recno, WT_REF *leaf, bool leaf_safe, bool *leaf_foundp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_row_leaf_keys(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_row_leaf_key_copy(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_row_leaf_key_work(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip_arg, WT_ITEM *keyb, bool instantiate) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -195,12 +195,12 @@ extern int __wt_row_ikey_alloc(WT_SESSION_IMPL *session, uint32_t cell_offset, c
extern int __wt_row_ikey_incr(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t cell_offset, const void *key, size_t size, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_row_ikey(WT_SESSION_IMPL *session, uint32_t cell_offset, const void *key, size_t size, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_page_modify_alloc(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, const WT_ITEM *key, const WT_ITEM *value, WT_UPDATE *upd_arg, u_int modify_type, bool exclusive) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_row_modify(WT_CURSOR_BTREE *cbt, const WT_ITEM *key, const WT_ITEM *value, WT_UPDATE *upd_arg, u_int modify_type, bool exclusive) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_row_insert_alloc(WT_SESSION_IMPL *session, const WT_ITEM *key, u_int skipdepth, WT_INSERT **insp, size_t *ins_sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_update_alloc(WT_SESSION_IMPL *session, const WT_ITEM *value, WT_UPDATE **updp, size_t *sizep, u_int modify_type) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern WT_UPDATE *__wt_update_obsolete_check(WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd);
extern int __wt_search_insert(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_INSERT_HEAD *ins_head, WT_ITEM *srch_key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_row_search(WT_SESSION_IMPL *session, WT_ITEM *srch_key, WT_REF *leaf, WT_CURSOR_BTREE *cbt, bool insert, bool restore) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_row_search(WT_CURSOR_BTREE *cbt, WT_ITEM *srch_key, bool insert, WT_REF *leaf, bool leaf_safe, bool *leaf_foundp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_las_config(WT_SESSION_IMPL *session, const char **cfg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern bool __wt_las_empty(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_las_stats_update(WT_SESSION_IMPL *session);
@@ -731,6 +731,7 @@ extern void __wt_huffman_close(WT_SESSION_IMPL *session, void *huffman_arg);
extern void __wt_print_huffman_code(void *huffman_arg, uint16_t symbol);
extern int __wt_huffman_encode(WT_SESSION_IMPL *session, void *huffman_arg, const uint8_t *from_arg, size_t from_len, WT_ITEM *to_buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_huffman_decode(WT_SESSION_IMPL *session, void *huffman_arg, const uint8_t *from_arg, size_t from_len, WT_ITEM *to_buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern bool __wt_modify_idempotent(const void *modify) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_modify_pack(WT_SESSION_IMPL *session, WT_ITEM **modifyp, WT_MODIFY *entries, int nentries) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_modify_apply_api(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_MODIFY *entries, int nentries) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_modify_apply(WT_SESSION_IMPL *session, WT_CURSOR *cursor, const void *modify) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in
index 36f9ecff5c7..8828dd31f80 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger.in
+++ b/src/third_party/wiredtiger/src/include/wiredtiger.in
@@ -716,8 +716,9 @@ struct __wt_cursor {
#define WT_CURSTD_OVERWRITE 0x02000u
#define WT_CURSTD_RAW 0x04000u
#define WT_CURSTD_RAW_SEARCH 0x08000u
-#define WT_CURSTD_VALUE_EXT 0x10000u /* Value points out of tree. */
-#define WT_CURSTD_VALUE_INT 0x20000u /* Value points into tree. */
+#define WT_CURSTD_UPDATE_LOCAL 0x10000u
+#define WT_CURSTD_VALUE_EXT 0x20000u /* Value points out of tree. */
+#define WT_CURSTD_VALUE_INT 0x40000u /* Value points into tree. */
/* AUTOMATIC FLAG VALUE GENERATION STOP */
#define WT_CURSTD_KEY_SET (WT_CURSTD_KEY_EXT | WT_CURSTD_KEY_INT)
#define WT_CURSTD_VALUE_SET (WT_CURSTD_VALUE_EXT | WT_CURSTD_VALUE_INT)
diff --git a/src/third_party/wiredtiger/src/include/wt_internal.h b/src/third_party/wiredtiger/src/include/wt_internal.h
index df3ee2da174..c5a4693319e 100644
--- a/src/third_party/wiredtiger/src/include/wt_internal.h
+++ b/src/third_party/wiredtiger/src/include/wt_internal.h
@@ -328,6 +328,16 @@ union __wt_rand_state;
#elif defined(_MSC_VER)
#include "msvc.h"
#endif
+/*
+ * GLIBC 2.26 and later use the openat syscall to implement open.
+ * Set this flag so that our strace tests know to expect this.
+ */
+#ifdef __GLIBC_PREREQ
+#if __GLIBC_PREREQ(2, 26)
+#define WT_USE_OPENAT 1
+#endif
+#endif
+
#include "hardware.h"
#include "swap.h"
diff --git a/src/third_party/wiredtiger/src/meta/meta_ckpt.c b/src/third_party/wiredtiger/src/meta/meta_ckpt.c
index 13e84efc199..13467a6e635 100644
--- a/src/third_party/wiredtiger/src/meta/meta_ckpt.c
+++ b/src/third_party/wiredtiger/src/meta/meta_ckpt.c
@@ -14,7 +14,7 @@ static int __ckpt_load(WT_SESSION_IMPL *,
WT_CONFIG_ITEM *, WT_CONFIG_ITEM *, WT_CKPT *);
static int __ckpt_named(
WT_SESSION_IMPL *, const char *, const char *, WT_CKPT *);
-static int __ckpt_set(WT_SESSION_IMPL *, const char *, const char *);
+static int __ckpt_set(WT_SESSION_IMPL *, const char *, const char *, bool);
static int __ckpt_version_chk(WT_SESSION_IMPL *, const char *, const char *);
/*
@@ -94,7 +94,7 @@ __wt_meta_checkpoint_clear(WT_SESSION_IMPL *session, const char *fname)
* metadata entry. If no entry is found to update and we're trying to
* clear the checkpoint, just ignore it.
*/
- WT_RET_NOTFOUND_OK(__ckpt_set(session, fname, NULL));
+ WT_RET_NOTFOUND_OK(__ckpt_set(session, fname, NULL, false));
return (0);
}
@@ -104,25 +104,46 @@ __wt_meta_checkpoint_clear(WT_SESSION_IMPL *session, const char *fname)
* Set a file's checkpoint.
*/
static int
-__ckpt_set(WT_SESSION_IMPL *session, const char *fname, const char *v)
+__ckpt_set(
+ WT_SESSION_IMPL *session, const char *fname, const char *v, bool use_base)
{
+ WT_DECL_ITEM(tmp);
WT_DECL_RET;
- const char *cfg[3];
+ const char *cfg[3], *str;
char *config, *newcfg;
+ /*
+ * If the caller knows we're on a path like checkpoints where we
+ * have a valid checkpoint and checkpoint LSN and should use the base,
+ * then use that faster path. Some paths don't have a dhandle or want
+ * to have the older value retained from the existing metadata.
+ * In those cases, use the slower path through configuration
+ * parsing functions.
+ */
config = newcfg = NULL;
-
- /* Retrieve the metadata for this file. */
- WT_ERR(__wt_metadata_search(session, fname, &config));
-
- /* Replace the checkpoint entry. */
- cfg[0] = config;
- cfg[1] = v == NULL ? "checkpoint=()" : v;
- cfg[2] = NULL;
- WT_ERR(__wt_config_collapse(session, cfg, &newcfg));
- WT_ERR(__wt_metadata_update(session, fname, newcfg));
-
-err: __wt_free(session, config);
+ str = v == NULL ? "checkpoint=(),checkpoint_lsn=" : v;
+ if (use_base && session->dhandle != NULL) {
+ WT_ERR(__wt_scr_alloc(session, 0, &tmp));
+ WT_ASSERT(session, strcmp(session->dhandle->name, fname) == 0);
+ /*
+ * Concatenate the metadata base string with the checkpoint
+ * string.
+ */
+ WT_ERR(__wt_buf_fmt(session,
+ tmp, "%s,%s", session->dhandle->meta_base, str));
+ WT_ERR(__wt_metadata_update(session, fname, tmp->mem));
+ } else {
+ /* Retrieve the metadata for this file. */
+ WT_ERR(__wt_metadata_search(session, fname, &config));
+ /* Replace the checkpoint entry. */
+ cfg[0] = config;
+ cfg[1] = str;
+ cfg[2] = NULL;
+ WT_ERR(__wt_config_collapse(session, cfg, &newcfg));
+ WT_ERR(__wt_metadata_update(session, fname, newcfg));
+ }
+err: __wt_scr_free(session, &tmp);
+ __wt_free(session, config);
__wt_free(session, newcfg);
return (ret);
}
@@ -375,6 +396,7 @@ __wt_meta_ckptlist_set(WT_SESSION_IMPL *session,
time_t secs;
int64_t maxorder;
const char *sep;
+ bool has_lsn;
WT_ERR(__wt_scr_alloc(session, 0, &buf));
maxorder = 0;
@@ -448,11 +470,14 @@ __wt_meta_ckptlist_set(WT_SESSION_IMPL *session,
sep = ",";
}
WT_ERR(__wt_buf_catfmt(session, buf, ")"));
+
+ has_lsn = ckptlsn != NULL;
if (ckptlsn != NULL)
WT_ERR(__wt_buf_catfmt(session, buf,
",checkpoint_lsn=(%" PRIu32 ",%" PRIuMAX ")",
ckptlsn->l.file, (uintmax_t)ckptlsn->l.offset));
- WT_ERR(__ckpt_set(session, fname, buf->mem));
+
+ WT_ERR(__ckpt_set(session, fname, buf->mem, has_lsn));
err: __wt_scr_free(session, &buf);
return (ret);
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c
index d1aaf901534..5821292f454 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_write.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c
@@ -4727,7 +4727,6 @@ __rec_col_var(WT_SESSION_IMPL *session,
page = pageref->page;
last = r->last;
vpack = &_vpack;
- cbt = &r->update_modify_cbt;
WT_RET(__rec_split_init(session,
r, page, pageref->ref_recno, btree->maxleafpage_precomp));
@@ -4737,6 +4736,8 @@ __rec_col_var(WT_SESSION_IMPL *session,
size = 0;
upd = NULL;
+ cbt = &r->update_modify_cbt;
+ cbt->iface.session = (WT_SESSION *)session;
/*
* The salvage code may be calling us to reconcile a page where there
* were missing records in the column-store name space. If taking the
@@ -4856,7 +4857,7 @@ record_loop: /*
case WT_UPDATE_MODIFY:
cbt->slot = WT_COL_SLOT(page, cip);
WT_ERR(__wt_value_return_upd(
- session, cbt, upd,
+ cbt, upd,
F_ISSET(r, WT_REC_VISIBLE_ALL)));
data = cbt->iface.value.data;
size = (uint32_t)cbt->iface.value.size;
@@ -5101,7 +5102,7 @@ compare: /*
*/
cbt->slot = UINT32_MAX;
WT_ERR(__wt_value_return_upd(
- session, cbt, upd,
+ cbt, upd,
F_ISSET(r, WT_REC_VISIBLE_ALL)));
data = cbt->iface.value.data;
size = (uint32_t)cbt->iface.value.size;
@@ -5503,6 +5504,7 @@ __rec_row_leaf(WT_SESSION_IMPL *session,
btree = S2BT(session);
cbt = &r->update_modify_cbt;
+ cbt->iface.session = (WT_SESSION *)session;
slvg_skip = salvage == NULL ? 0 : salvage->skip;
key = &r->k;
@@ -5657,7 +5659,7 @@ __rec_row_leaf(WT_SESSION_IMPL *session,
switch (upd->type) {
case WT_UPDATE_MODIFY:
cbt->slot = WT_ROW_SLOT(page, rip);
- WT_ERR(__wt_value_return_upd(session, cbt, upd,
+ WT_ERR(__wt_value_return_upd(cbt, upd,
F_ISSET(r, WT_REC_VISIBLE_ALL)));
WT_ERR(__rec_cell_build_val(session, r,
cbt->iface.value.data,
@@ -5875,6 +5877,7 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins)
btree = S2BT(session);
cbt = &r->update_modify_cbt;
+ cbt->iface.session = (WT_SESSION *)session;
key = &r->k;
val = &r->v;
@@ -5915,7 +5918,7 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins)
*/
cbt->slot = UINT32_MAX;
WT_RET(__wt_value_return_upd(
- session, cbt, upd, F_ISSET(r, WT_REC_VISIBLE_ALL)));
+ cbt, upd, F_ISSET(r, WT_REC_VISIBLE_ALL)));
WT_RET(__rec_cell_build_val(session, r,
cbt->iface.value.data,
cbt->iface.value.size, (uint64_t)0));
diff --git a/src/third_party/wiredtiger/src/support/modify.c b/src/third_party/wiredtiger/src/support/modify.c
index 15f8a33772f..228cfff7fea 100644
--- a/src/third_party/wiredtiger/src/support/modify.c
+++ b/src/third_party/wiredtiger/src/support/modify.c
@@ -8,6 +8,57 @@
#include "wt_internal.h"
+#define WT_MODIFY_FOREACH_BEGIN(mod, p, nentries, napplied) \
+ do { \
+ const size_t *__p = p; \
+ const uint8_t *__data = (const uint8_t *)(__p + (size_t)(nentries)*3); \
+ int __i; \
+ for (__i = 0; __i < (nentries); ++__i) { \
+ memcpy(&(mod).data.size, __p++, sizeof(size_t)); \
+ memcpy(&(mod).offset, __p++, sizeof(size_t)); \
+ memcpy(&(mod).size, __p++, sizeof(size_t)); \
+ (mod).data.data = __data; \
+ __data += (mod).data.size; \
+ if (__i < (napplied)) \
+ continue;
+
+#define WT_MODIFY_FOREACH_END \
+ } \
+ } \
+ while (0)
+
+/*
+ * __wt_modify_idempotent --
+ * Check if a modify operation is idempotent.
+ */
+bool
+__wt_modify_idempotent(const void *modify)
+{
+ WT_MODIFY mod;
+ const size_t *p;
+ size_t tmp;
+ int nentries;
+
+ /* Get the number of modify entries. */
+ p = modify;
+ memcpy(&tmp, p++, sizeof(size_t));
+ nentries = (int)tmp;
+
+ WT_MODIFY_FOREACH_BEGIN (mod, p, nentries, 0)
+ {
+ /*
+ * If the number of bytes being replaced doesn't match the
+ * number of bytes being written, we're resizing and the
+ * operation isn't idempotent.
+ */
+ if (mod.size != mod.data.size)
+ return (false);
+ }
+ WT_MODIFY_FOREACH_END;
+
+ return (true);
+}
+
/*
* __wt_modify_pack --
* Pack a modify structure into a buffer.
diff --git a/src/third_party/wiredtiger/src/txn/txn_log.c b/src/third_party/wiredtiger/src/txn/txn_log.c
index cf8e464239a..50547034d02 100644
--- a/src/third_party/wiredtiger/src/txn/txn_log.c
+++ b/src/third_party/wiredtiger/src/txn/txn_log.c
@@ -91,12 +91,24 @@ __txn_op_log(WT_SESSION_IMPL *session,
#endif
switch (upd->type) {
case WT_UPDATE_MODIFY:
- WT_RET(__wt_logop_row_modify_pack(
- session, logrec, fileid, &cursor->key, &value));
+ /*
+ * Write full updates to the log for size-changing
+ * modify operations: they aren't idempotent and
+ * recovery cannot guarantee that they will be applied
+ * exactly once. We rely on the cursor value already
+ * having the modify applied.
+ */
+ if (__wt_modify_idempotent(upd->data))
+ WT_RET(__wt_logop_row_modify_pack(session,
+ logrec, fileid, &cursor->key, &value));
+ else
+ WT_RET(
+ __wt_logop_row_put_pack(session, logrec,
+ fileid, &cursor->key, &cursor->value));
break;
case WT_UPDATE_STANDARD:
- WT_RET(__wt_logop_row_put_pack(
- session, logrec, fileid, &cursor->key, &value));
+ WT_RET(__wt_logop_row_put_pack(session,
+ logrec, fileid, &cursor->key, &value));
break;
case WT_UPDATE_TOMBSTONE:
WT_RET(__wt_logop_row_remove_pack(
@@ -110,8 +122,12 @@ __txn_op_log(WT_SESSION_IMPL *session,
switch (upd->type) {
case WT_UPDATE_MODIFY:
- WT_RET(__wt_logop_col_modify_pack(
- session, logrec, fileid, recno, &value));
+ if (__wt_modify_idempotent(upd->data))
+ WT_RET(__wt_logop_col_modify_pack(session,
+ logrec, fileid, recno, &value));
+ else
+ WT_RET(__wt_logop_col_put_pack(session,
+ logrec, fileid, recno, &cursor->value));
break;
case WT_UPDATE_STANDARD:
WT_RET(__wt_logop_col_put_pack(
diff --git a/src/third_party/wiredtiger/test/syscall/syscall.py b/src/third_party/wiredtiger/test/syscall/syscall.py
index 1fdf157b0fb..23bb54be5ef 100644
--- a/src/third_party/wiredtiger/test/syscall/syscall.py
+++ b/src/third_party/wiredtiger/test/syscall/syscall.py
@@ -156,7 +156,7 @@ defines_used = [
'HAVE_FTRUNCATE', 'O_ACCMODE', 'O_APPEND', 'O_ASYNC',
'O_CLOEXEC', 'O_CREAT', 'O_EXCL', 'O_EXLOCK', 'O_NOATIME',
'O_NOFOLLOW', 'O_NONBLOCK', 'O_RDONLY', 'O_RDWR', 'O_SHLOCK',
- 'O_TRUNC', 'O_WRONLY' ]
+ 'O_TRUNC', 'O_WRONLY', 'WT_USE_OPENAT' ]
################################################################
diff --git a/src/third_party/wiredtiger/test/syscall/wt2336_base/base.run b/src/third_party/wiredtiger/test/syscall/wt2336_base/base.run
index 328d5b8b7dd..56794dc2777 100644
--- a/src/third_party/wiredtiger/test/syscall/wt2336_base/base.run
+++ b/src/third_party/wiredtiger/test/syscall/wt2336_base/base.run
@@ -32,11 +32,11 @@
*/
#ifdef __linux__
SYSTEM("Linux");
-#define OPEN_EXISTING(name, flags) open(name, flags)
+#define OPEN_EXISTING(name, flags) OPEN(name, flags)
#else /* __linux__ */
SYSTEM("Darwin");
#define O_NOATIME 0
-#define OPEN_EXISTING(name, flags) open(name, flags, 0)
+#define OPEN_EXISTING(name, flags) OPEN(name, flags, 0)
#endif /* __linux__ */
#ifdef HAVE_FTRUNCATE
@@ -49,14 +49,20 @@ SYSTEM("Darwin");
#define FTRUNCATE(fd, len) /* do nothing */
#endif
-TRACE("close,fdatasync,fsync,ftruncate,open,pwrite64,rename");
+#ifdef WT_USE_OPENAT
+#define OPEN(...) openat(AT_FDCWD, __VA_ARGS__)
+#else
+#define OPEN(...) open(__VA_ARGS__)
+#endif
+
+TRACE("close,fdatasync,fsync,ftruncate,open,openat,pwrite64,rename");
RUN("");
...
OUTPUT("--------------wiredtiger_open");
// lock == 3
-lock = open("./WiredTiger.lock", O_RDWR|O_CREAT|O_CLOEXEC, 0666);
+lock = OPEN("./WiredTiger.lock", O_RDWR|O_CREAT|O_CLOEXEC, 0666);
pwrite64(lock, "WiredTiger lock file\n", 0x15, 0x0);
-fd = open("./WiredTiger", O_RDWR|O_CREAT|O_CLOEXEC, 0666);
+fd = OPEN("./WiredTiger", O_RDWR|O_CREAT|O_CLOEXEC, 0666);
pwrite64(fd, "WiredTiger\nWiredTiger"..., ...);
#ifdef __linux__
fdatasync(fd);
@@ -65,7 +71,7 @@ close(fd);
... // On Linux, there are calls to open and read "/proc/meminfo" here.
-fd = open("./WiredTiger.basecfg.set", O_RDWR|O_CREAT|O_EXCL|O_CLOEXEC, 0666);
+fd = OPEN("./WiredTiger.basecfg.set", O_RDWR|O_CREAT|O_EXCL|O_CLOEXEC, 0666);
pwrite64(fd, "# Do not modify this file."..., ...);
#ifdef __linux__
fdatasync(fd);
@@ -74,15 +80,15 @@ close(fd);
rename("./WiredTiger.basecfg.set", "./WiredTiger.basecfg");
#ifdef __linux__
-dir = open("./", O_RDONLY);
+dir = OPEN("./", O_RDONLY);
fdatasync(dir);
close(dir);
#endif
-fd = open("./WiredTiger.wt", O_RDWR|O_CREAT|O_EXCL|O_NOATIME|O_CLOEXEC, 0666);
+fd = OPEN("./WiredTiger.wt", O_RDWR|O_CREAT|O_EXCL|O_NOATIME|O_CLOEXEC, 0666);
#ifdef __linux__
-dir = open("./", O_RDONLY);
+dir = OPEN("./", O_RDONLY);
fdatasync(dir);
close(dir);
#endif /* __linux__ */
@@ -96,7 +102,7 @@ close(fd);
wt = OPEN_EXISTING("./WiredTiger.wt\0", O_RDWR|O_NOATIME|O_CLOEXEC);
FTRUNCATE(wt, 0x1000);
-fd = open("./WiredTiger.turtle.set\0", O_RDWR|O_CREAT|O_EXCL|O_CLOEXEC, 0666);
+fd = OPEN("./WiredTiger.turtle.set\0", O_RDWR|O_CREAT|O_EXCL|O_CLOEXEC, 0666);
pwrite64(fd, "WiredTiger version string\nWiredTiger"..., ...);
#ifdef __linux__
fdatasync(fd);
@@ -106,10 +112,10 @@ rename("./WiredTiger.turtle.set", "./WiredTiger.turtle");
... // There is a second open of turtle here, is it important?
-fd = open("./WiredTigerLAS.wt", O_RDWR|O_CREAT|O_EXCL|O_NOATIME|O_CLOEXEC, 0666);
+fd = OPEN("./WiredTigerLAS.wt", O_RDWR|O_CREAT|O_EXCL|O_NOATIME|O_CLOEXEC, 0666);
#ifdef __linux__
-dir = open("./", O_RDONLY);
+dir = OPEN("./", O_RDONLY);
fdatasync(dir);
close(dir);
#endif /* __linux__ */
@@ -131,10 +137,7 @@ pwrite64(wt, ""..., 0x1000, 0x3000);
#ifdef __linux__
fdatasync(wt);
#endif /* __linux__ */
-fd = OPEN_EXISTING("./WiredTiger.turtle", O_RDWR|O_CLOEXEC);
-
-close(fd);
-fd = open("./WiredTiger.turtle.set", O_RDWR|O_CREAT|O_EXCL|O_CLOEXEC, 0666);
+fd = OPEN("./WiredTiger.turtle.set", O_RDWR|O_CREAT|O_EXCL|O_CLOEXEC, 0666);
pwrite64(fd, "WiredTiger version string\nWiredTiger"..., ...);
#ifdef __linux__
fdatasync(fd);
@@ -142,7 +145,7 @@ fdatasync(fd);
close(fd);
rename("./WiredTiger.turtle.set", "./WiredTiger.turtle");
#ifdef __linux__
-dir = open("./", O_RDONLY);
+dir = OPEN("./", O_RDONLY);
fdatasync(dir);
close(dir);
fdatasync(wt);
@@ -151,9 +154,9 @@ fdatasync(wt);
OUTPUT("--------------open_session");
OUTPUT("--------------create");
-hello = open("./hello.wt", O_RDWR|O_CREAT|O_EXCL|O_NOATIME|O_CLOEXEC, 0666);
+hello = OPEN("./hello.wt", O_RDWR|O_CREAT|O_EXCL|O_NOATIME|O_CLOEXEC, 0666);
#ifdef __linux__
-dir = open("./", O_RDONLY);
+dir = OPEN("./", O_RDONLY);
fdatasync(dir);
close(dir);
#endif /* __linux__ */