summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2017-11-22 20:21:34 +1100
committerLuke Chen <luke.chen@mongodb.com>2017-11-22 20:21:34 +1100
commit734205a16d1fb75f4db81108ed3eb40fe06a0a07 (patch)
treec3be5b78b6a46912c0e3c4f63373e2ed7d09eb07
parente35e66b50e4a0b362004730c8481025917c4e152 (diff)
downloadmongo-r3.6.0-rc5.tar.gz
Import wiredtiger: 7a7d6ffec48afeef4e1d7b23bceb4015986f47b9 from branch mongodb-3.6r3.6.0-rc5
ref: 923937e6b2..7a7d6ffec4 for: 3.6.0-rc5 WT-3751 Allow pages to split when nothing is visible WT-3752 WiredTiger can walk long update chains for hot rows. WT-3754 serial updates to the oldest should consider pinned timestamp for visibility WT-3758 Turn on the snappy compression for the lookaside table WT-3760 Avoid writing overflow values into the lookaside file
-rw-r--r--src/third_party/wiredtiger/dist/s_define.list1
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/src/btree/row_modify.c29
-rw-r--r--src/third_party/wiredtiger/src/include/btmem.h10
-rw-r--r--src/third_party/wiredtiger/src/include/serial.i8
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_write.c76
6 files changed, 92 insertions, 34 deletions
diff --git a/src/third_party/wiredtiger/dist/s_define.list b/src/third_party/wiredtiger/dist/s_define.list
index fb0162079d9..4be8ceee0e3 100644
--- a/src/third_party/wiredtiger/dist/s_define.list
+++ b/src/third_party/wiredtiger/dist/s_define.list
@@ -35,6 +35,7 @@ WT_LOG_SLOT_MASK_OFF
WT_LOG_SLOT_MASK_ON
WT_LOG_SLOT_MAXBITS
WT_LOG_SLOT_UNBUFFERED_ISSET
+WT_LOOKASIDE_COMPRESSOR
WT_PACKED_STRUCT_BEGIN
WT_PACKED_STRUCT_END
WT_PADDING_CHECK
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index cf67d077b45..546c9967ece 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -1,5 +1,5 @@
{
- "commit": "923937e6b2bb7ffc1ff86bfaa8c2f130930286de",
+ "commit": "7a7d6ffec48afeef4e1d7b23bceb4015986f47b9",
"github": "wiredtiger/wiredtiger.git",
"vendor": "wiredtiger",
"branch": "mongodb-3.6"
diff --git a/src/third_party/wiredtiger/src/btree/row_modify.c b/src/third_party/wiredtiger/src/btree/row_modify.c
index 5e84899999a..a2aaeb7673f 100644
--- a/src/third_party/wiredtiger/src/btree/row_modify.c
+++ b/src/third_party/wiredtiger/src/btree/row_modify.c
@@ -299,9 +299,12 @@ WT_UPDATE *
__wt_update_obsolete_check(
WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd)
{
+ WT_TXN_GLOBAL *txn_global;
WT_UPDATE *first, *next;
u_int count;
+ txn_global = &S2C(session)->txn_global;
+
/*
* This function identifies obsolete updates, and truncates them from
* the rest of the chain; because this routine is called from inside
@@ -313,13 +316,14 @@ __wt_update_obsolete_check(
* Only updates with globally visible, self-contained data can terminate
* update chains.
*/
- for (first = NULL, count = 0; upd != NULL; upd = upd->next, count++)
- if (WT_UPDATE_DATA_VALUE(upd) &&
- __wt_txn_upd_visible_all(session, upd)) {
- if (first == NULL)
- first = upd;
- } else if (upd->txnid != WT_TXN_ABORTED)
+ for (first = NULL, count = 0; upd != NULL; upd = upd->next, count++) {
+ if (upd->txnid == WT_TXN_ABORTED)
+ continue;
+ if (!__wt_txn_upd_visible_all(session, upd))
first = NULL;
+ else if (first == NULL && WT_UPDATE_DATA_VALUE(upd))
+ first = upd;
+ }
/*
* We cannot discard this WT_UPDATE structure, we can only discard
@@ -338,9 +342,16 @@ __wt_update_obsolete_check(
* trim update lists independently of the page state, ensure there
* is a modify structure.
*/
- if (count > 20 && page->modify != NULL)
- page->modify->obsolete_check_txn =
- S2C(session)->txn_global.last_running;
+ if (count > 20 && page->modify != NULL) {
+ page->modify->obsolete_check_txn = txn_global->last_running;
+#ifdef HAVE_TIMESTAMPS
+ if (txn_global->has_pinned_timestamp)
+ WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock,
+ __wt_timestamp_set(
+ &page->modify->obsolete_check_timestamp,
+ &txn_global->pinned_timestamp));
+#endif
+ }
return (NULL);
}
diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h
index bd881af0ecf..d45b68d1972 100644
--- a/src/third_party/wiredtiger/src/include/btmem.h
+++ b/src/third_party/wiredtiger/src/include/btmem.h
@@ -182,9 +182,16 @@ struct __wt_ovfl_reuse {
* makes the lookaside table's value more likely to overflow the page size when
* the row-store key is relatively large.
*/
+#ifdef HAVE_BUILTIN_EXTENSION_SNAPPY
+#define WT_LOOKASIDE_COMPRESSOR "snappy"
+#else
+#define WT_LOOKASIDE_COMPRESSOR "none"
+#endif
#define WT_LAS_CONFIG \
"key_format=" WT_UNCHECKED_STRING(QIQu) \
- ",value_format=" WT_UNCHECKED_STRING(QuBu)
+ ",value_format=" WT_UNCHECKED_STRING(QuBu) \
+ ",block_compressor=" WT_LOOKASIDE_COMPRESSOR \
+ ",leaf_value_max=64MB"
/*
* WT_PAGE_LOOKASIDE --
@@ -218,6 +225,7 @@ struct __wt_page_modify {
/* Avoid checking for obsolete updates during checkpoints. */
uint64_t obsolete_check_txn;
+ WT_DECL_TIMESTAMP(obsolete_check_timestamp)
/* The largest transaction seen on the page by reconciliation. */
uint64_t rec_max_txn;
diff --git a/src/third_party/wiredtiger/src/include/serial.i b/src/third_party/wiredtiger/src/include/serial.i
index c5758ee605a..d471ebb399c 100644
--- a/src/third_party/wiredtiger/src/include/serial.i
+++ b/src/third_party/wiredtiger/src/include/serial.i
@@ -263,6 +263,7 @@ __wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
{
WT_DECL_RET;
WT_UPDATE *obsolete, *upd = *updp;
+ wt_timestamp_t *obsolete_timestamp;
uint64_t txn;
/* Clear references to memory we now own and must free on error. */
@@ -309,11 +310,14 @@ __wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
* is used as an indicator of there being further updates on this page.
*/
if ((txn = page->modify->obsolete_check_txn) != WT_TXN_NONE) {
- if (!__wt_txn_visible_all(session, txn, NULL)) {
+ obsolete_timestamp =
+ WT_TIMESTAMP_NULL(&page->modify->obsolete_check_timestamp);
+ if (!__wt_txn_visible_all(session, txn, obsolete_timestamp)) {
/* Try to move the oldest ID forward and re-check. */
WT_RET(__wt_txn_update_oldest(session, 0));
- if (!__wt_txn_visible_all(session, txn, NULL))
+ if (!__wt_txn_visible_all(
+ session, txn, obsolete_timestamp))
return (0);
}
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c
index 7e9980e0887..77b8c2a2e78 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_write.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c
@@ -862,10 +862,11 @@ __rec_raw_compression_config(WT_SESSION_IMPL *session,
/*
* XXX
- * Turn off if lookaside is configured: lookaside potentially writes
- * blocks without entries and raw compression isn't ready for that.
+ * Turn off if lookaside or update/restore are configured: those modes
+ * potentially write blocks without entries and raw compression isn't
+ * ready for that.
*/
- if (LF_ISSET(WT_REC_LOOKASIDE))
+ if (LF_ISSET(WT_REC_LOOKASIDE | WT_REC_UPDATE_RESTORE))
return (false);
/*
@@ -1249,7 +1250,8 @@ err: __wt_scr_free(session, &tmp);
*/
static int
__rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
- WT_INSERT *ins, void *ripcip, WT_CELL_UNPACK *vpack, WT_UPDATE **updp)
+ WT_INSERT *ins, void *ripcip, WT_CELL_UNPACK *vpack,
+ bool *upd_savedp, WT_UPDATE **updp)
{
WT_PAGE *page;
WT_UPDATE *first_txn_upd, *first_upd, *upd;
@@ -1263,6 +1265,8 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
first_ts_upd = NULL;
#endif
+ if (upd_savedp != NULL)
+ *upd_savedp = false;
*updp = NULL;
page = r->page;
@@ -1476,6 +1480,9 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
*/
WT_RET(__rec_update_save(session, r, ins, ripcip, *updp, upd_memsize));
+ if (upd_savedp != NULL)
+ *upd_savedp = true;
+
#ifdef HAVE_TIMESTAMPS
/* Track the oldest saved timestamp for lookaside. */
if (F_ISSET(r, WT_REC_LOOKASIDE)) {
@@ -1505,11 +1512,15 @@ check_original_value:
/*
* Returning an update means the original on-page value might be lost,
* and that's a problem if there's a reader that needs it. There are
- * three cases: any update from a modify operation (because the modify
- * has to be applied to a stable update, not the new on-page update),
- * any lookaside table eviction (because the backing disk image is
- * rewritten), or any reconciliation of a backing overflow record that
- * will be physically removed once it's no longer needed.
+ * several cases:
+ * - any update with no backing record (because we will store an empty
+ * value on page and returning that is wrong).
+ * - any update from a modify operation (because the modify has to be
+ * applied to a stable update, not the new on-page update),
+ * - any lookaside table eviction (because the backing disk image is
+ * rewritten),
+ * - or any reconciliation of a backing overflow record that will be
+ * physically removed once it's no longer needed.
*/
if (*updp != NULL && ((*updp)->type == WT_UPDATE_MODIFIED ||
F_ISSET(r, WT_REC_LOOKASIDE) || (vpack != NULL &&
@@ -4206,7 +4217,7 @@ __rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref)
/* Update any changes to the original on-page data items. */
WT_SKIP_FOREACH(ins, WT_COL_UPDATE_SINGLE(page)) {
- WT_RET(__rec_txn_read(session, r, ins, NULL, NULL, &upd));
+ WT_RET(__rec_txn_read(session, r, ins, NULL, NULL, NULL, &upd));
if (upd != NULL)
__bit_setv(r->first_free,
WT_INSERT_RECNO(ins) - pageref->ref_recno,
@@ -4250,8 +4261,8 @@ __rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref)
break;
upd = NULL;
} else {
- WT_RET(
- __rec_txn_read(session, r, ins, NULL, NULL, &upd));
+ WT_RET(__rec_txn_read(
+ session, r, ins, NULL, NULL, NULL, &upd));
recno = WT_INSERT_RECNO(ins);
}
for (;;) {
@@ -4602,7 +4613,7 @@ record_loop: /*
upd = NULL;
if (ins != NULL && WT_INSERT_RECNO(ins) == src_recno) {
WT_ERR(__rec_txn_read(
- session, r, ins, cip, vpack, &upd));
+ session, r, ins, cip, vpack, NULL, &upd));
ins = WT_SKIP_NEXT(ins);
}
@@ -4819,8 +4830,8 @@ compare: /*
upd = NULL;
} else {
- WT_ERR(
- __rec_txn_read(session, r, ins, NULL, NULL, &upd));
+ WT_ERR(__rec_txn_read(
+ session, r, ins, NULL, NULL, NULL, &upd));
n = WT_INSERT_RECNO(ins);
}
while (src_recno <= n) {
@@ -5318,7 +5329,8 @@ __rec_row_leaf(WT_SESSION_IMPL *session,
vpack = &_vpack;
__wt_cell_unpack(val_cell, vpack);
}
- WT_ERR(__rec_txn_read(session, r, NULL, rip, vpack, &upd));
+ WT_ERR(__rec_txn_read(
+ session, r, NULL, rip, vpack, NULL, &upd));
/* Build value cell. */
dictionary = false;
@@ -5632,7 +5644,7 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins)
WT_CURSOR_BTREE *cbt;
WT_KV *key, *val;
WT_UPDATE *upd;
- bool ovfl_key;
+ bool ovfl_key, upd_saved;
btree = S2BT(session);
cbt = &r->update_modify_cbt;
@@ -5641,11 +5653,32 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins)
val = &r->v;
for (; ins != NULL; ins = WT_SKIP_NEXT(ins)) {
- WT_RET(__rec_txn_read(session, r, ins, NULL, NULL, &upd));
+ WT_RET(__rec_txn_read(
+ session, r, ins, NULL, NULL, &upd_saved, &upd));
+
+ if (upd == NULL) {
+ /*
+ * If no update is visible but some were saved, check
+ * for splits.
+ */
+ if (!upd_saved)
+ continue;
+ if (!__rec_need_split(r, WT_INSERT_KEY_SIZE(ins)))
+ continue;
+
+ /* Copy the current key into place and then split. */
+ WT_RET(__wt_buf_set(session, r->cur,
+ WT_INSERT_KEY(ins), WT_INSERT_KEY_SIZE(ins)));
+ WT_RET(__rec_split_crossing_bnd(
+ session, r, WT_INSERT_KEY_SIZE(ins)));
- /* If no updates are visible there's no work to do. */
- if (upd == NULL)
+ /*
+ * Turn off prefix compression until a full key is
+ * written into the new page.
+ */
+ r->key_pfx_compress = false;
continue;
+ }
switch (upd->type) {
case WT_UPDATE_DELETED:
@@ -5671,7 +5704,8 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins)
break;
WT_ILLEGAL_VALUE(session);
}
- /* Build key cell. */
+
+ /* Build key cell. */
WT_RET(__rec_cell_build_leaf_key(session, r,
WT_INSERT_KEY(ins), WT_INSERT_KEY_SIZE(ins), &ovfl_key));