summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2021-04-28 16:27:05 +1000
committerLuke Chen <luke.chen@mongodb.com>2021-04-28 16:27:05 +1000
commita83b7f8120c16b346e45a47a39cdf2543f94838a (patch)
tree2af87c41a2fa31ad025ff6d08824f1b905660553
parent552e3487e1227e37f9d91d3294895c0049dfce18 (diff)
downloadmongo-a83b7f8120c16b346e45a47a39cdf2543f94838a.tar.gz
Import wiredtiger: 38e948b9eb4ab532372ea62d0bc18b927007b079 from branch mongodb-4.4
ref: 885403077c..38e948b9eb for: 4.4.6 WT-7426 Set write generation number when the page image gets created
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_io.c15
-rw-r--r--src/third_party/wiredtiger/src/include/cell_inline.h3
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_write.c30
4 files changed, 33 insertions, 17 deletions
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index 1c581648cb2..d952484a5f3 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -2,5 +2,5 @@
"vendor": "wiredtiger",
"github": "wiredtiger/wiredtiger.git",
"branch": "mongodb-4.4",
- "commit": "885403077cfdbabc8107b9522af57513952e6f85"
+ "commit": "38e948b9eb4ab532372ea62d0bc18b927007b079"
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_io.c b/src/third_party/wiredtiger/src/btree/bt_io.c
index 611b4d3dc64..373cc7b71f1 100644
--- a/src/third_party/wiredtiger/src/btree/bt_io.c
+++ b/src/third_party/wiredtiger/src/btree/bt_io.c
@@ -303,20 +303,7 @@ __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf, uint8_t *addr, size_t *add
if (encrypted)
F_SET(dsk, WT_PAGE_ENCRYPTED);
- /*
- * We increment the block's write generation so it's easy to identify newer versions of blocks
- * during salvage. (It's common in WiredTiger, at least for the default block manager, for
- * multiple blocks to be internally consistent with identical first and last keys, so we need a
- * way to know the most recent state of the block. We could check which leaf is referenced by a
- * valid internal page, but that implies salvaging internal pages, which I don't want to do, and
- * it's not as good anyway, because the internal page may not have been written after the leaf
- * page was updated. So, write generations it is.
- *
- * Nothing is locked at this point but two versions of a page with the same generation is pretty
- * unlikely, and if we did, they're going to be roughly identical for the purposes of salvage,
- * anyway.
- */
- dsk->write_gen = ++btree->write_gen;
+ WT_ASSERT(session, (dsk->write_gen != 0 && dsk->write_gen > btree->base_write_gen));
/*
* Checksum the data if the buffer isn't compressed or checksums are configured.
diff --git a/src/third_party/wiredtiger/src/include/cell_inline.h b/src/third_party/wiredtiger/src/include/cell_inline.h
index 70233e3f9cd..d3e0f85359d 100644
--- a/src/third_party/wiredtiger/src/include/cell_inline.h
+++ b/src/third_party/wiredtiger/src/include/cell_inline.h
@@ -970,7 +970,8 @@ __cell_unpack_window_cleanup(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk
* No delete txnid=MAX, ts=MAX, txnid=MAX, ts=MAX,
* durable_ts=NONE durable_ts=NONE
*/
- if (dsk->write_gen == 0 || dsk->write_gen > S2BT(session)->base_write_gen)
+ WT_ASSERT(session, dsk->write_gen != 0);
+ if (dsk->write_gen > S2BT(session)->base_write_gen)
return;
/* Tell reconciliation we cleared the transaction ids and the cell needs to be rebuilt. */
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c
index 3e21b7f3ca0..97743d7c3ee 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_write.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c
@@ -1514,6 +1514,32 @@ err:
}
/*
+ * __rec_set_page_write_gen --
+ * Initialize the page write generation number.
+ */
+static void
+__rec_set_page_write_gen(WT_PAGE_HEADER *dsk, WT_BTREE *btree)
+{
+ /*
+ * We increment the block's write generation so it's easy to identify newer versions of blocks
+ * during salvage. (It's common in WiredTiger, at least for the default block manager, for
+ * multiple blocks to be internally consistent with identical first and last keys, so we need a
+ * way to know the most recent state of the block. We could check which leaf is referenced by a
+ * valid internal page, but that implies salvaging internal pages, which I don't want to do, and
+ * it's not as good anyway, because the internal page may not have been written after the leaf
+ * page was updated. So, write generations it is.
+ *
+ * Nothing is locked at this point but two versions of a page with the same generation is pretty
+ * unlikely, and if we did, they're going to be roughly identical for the purposes of salvage,
+ * anyway.
+ *
+ * Other than salvage, the write generation number is used to reset the stale transaction id's
+ * present on the page upon server restart.
+ */
+ dsk->write_gen = ++btree->write_gen;
+}
+
+/*
* __rec_split_write_header --
* Initialize a disk page's header.
*/
@@ -1528,7 +1554,8 @@ __rec_split_write_header(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REC_CHUNK
page = r->page;
dsk->recno = btree->type == BTREE_ROW ? WT_RECNO_OOB : multi->key.recno;
- dsk->write_gen = 0;
+
+ __rec_set_page_write_gen(dsk, btree);
dsk->mem_size = multi->size;
dsk->u.entries = chunk->entries;
dsk->type = page->type;
@@ -2341,6 +2368,7 @@ __wt_rec_cell_build_ovfl(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REC_KV *k
dsk = tmp->mem;
memset(dsk, 0, WT_PAGE_HEADER_SIZE);
dsk->type = WT_PAGE_OVFL;
+ __rec_set_page_write_gen(dsk, btree);
dsk->u.datalen = (uint32_t)kv->buf.size;
memcpy(WT_PAGE_HEADER_BYTE(btree, dsk), kv->buf.data, kv->buf.size);
dsk->mem_size = WT_PAGE_HEADER_BYTE_SIZE(btree) + (uint32_t)kv->buf.size;