diff options
author | Luke Chen <luke.chen@mongodb.com> | 2021-04-28 16:27:05 +1000 |
---|---|---|
committer | Luke Chen <luke.chen@mongodb.com> | 2021-04-28 16:27:05 +1000 |
commit | a83b7f8120c16b346e45a47a39cdf2543f94838a (patch) | |
tree | 2af87c41a2fa31ad025ff6d08824f1b905660553 | |
parent | 552e3487e1227e37f9d91d3294895c0049dfce18 (diff) | |
download | mongo-a83b7f8120c16b346e45a47a39cdf2543f94838a.tar.gz |
Import wiredtiger: 38e948b9eb4ab532372ea62d0bc18b927007b079 from branch mongodb-4.4
ref: 885403077c..38e948b9eb
for: 4.4.6
WT-7426 Set write generation number when the page image gets created
-rw-r--r-- | src/third_party/wiredtiger/import.data | 2 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/btree/bt_io.c | 15 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/cell_inline.h | 3 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/reconcile/rec_write.c | 30 |
4 files changed, 33 insertions, 17 deletions
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index 1c581648cb2..d952484a5f3 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -2,5 +2,5 @@ "vendor": "wiredtiger", "github": "wiredtiger/wiredtiger.git", "branch": "mongodb-4.4", - "commit": "885403077cfdbabc8107b9522af57513952e6f85" + "commit": "38e948b9eb4ab532372ea62d0bc18b927007b079" } diff --git a/src/third_party/wiredtiger/src/btree/bt_io.c b/src/third_party/wiredtiger/src/btree/bt_io.c index 611b4d3dc64..373cc7b71f1 100644 --- a/src/third_party/wiredtiger/src/btree/bt_io.c +++ b/src/third_party/wiredtiger/src/btree/bt_io.c @@ -303,20 +303,7 @@ __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf, uint8_t *addr, size_t *add if (encrypted) F_SET(dsk, WT_PAGE_ENCRYPTED); - /* - * We increment the block's write generation so it's easy to identify newer versions of blocks - * during salvage. (It's common in WiredTiger, at least for the default block manager, for - * multiple blocks to be internally consistent with identical first and last keys, so we need a - * way to know the most recent state of the block. We could check which leaf is referenced by a - * valid internal page, but that implies salvaging internal pages, which I don't want to do, and - * it's not as good anyway, because the internal page may not have been written after the leaf - * page was updated. So, write generations it is. - * - * Nothing is locked at this point but two versions of a page with the same generation is pretty - * unlikely, and if we did, they're going to be roughly identical for the purposes of salvage, - * anyway. - */ - dsk->write_gen = ++btree->write_gen; + WT_ASSERT(session, (dsk->write_gen != 0 && dsk->write_gen > btree->base_write_gen)); /* * Checksum the data if the buffer isn't compressed or checksums are configured. diff --git a/src/third_party/wiredtiger/src/include/cell_inline.h b/src/third_party/wiredtiger/src/include/cell_inline.h index 70233e3f9cd..d3e0f85359d 100644 --- a/src/third_party/wiredtiger/src/include/cell_inline.h +++ b/src/third_party/wiredtiger/src/include/cell_inline.h @@ -970,7 +970,8 @@ __cell_unpack_window_cleanup(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk * No delete txnid=MAX, ts=MAX, txnid=MAX, ts=MAX, * durable_ts=NONE durable_ts=NONE */ - if (dsk->write_gen == 0 || dsk->write_gen > S2BT(session)->base_write_gen) + WT_ASSERT(session, dsk->write_gen != 0); + if (dsk->write_gen > S2BT(session)->base_write_gen) return; /* Tell reconciliation we cleared the transaction ids and the cell needs to be rebuilt. */ diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c index 3e21b7f3ca0..97743d7c3ee 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_write.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c @@ -1514,6 +1514,32 @@ err: } /* + * __rec_set_page_write_gen -- + * Initialize the page write generation number. + */ +static void +__rec_set_page_write_gen(WT_PAGE_HEADER *dsk, WT_BTREE *btree) +{ + /* + * We increment the block's write generation so it's easy to identify newer versions of blocks + * during salvage. (It's common in WiredTiger, at least for the default block manager, for + * multiple blocks to be internally consistent with identical first and last keys, so we need a + * way to know the most recent state of the block. We could check which leaf is referenced by a + * valid internal page, but that implies salvaging internal pages, which I don't want to do, and + * it's not as good anyway, because the internal page may not have been written after the leaf + * page was updated. So, write generations it is. + * + * Nothing is locked at this point but two versions of a page with the same generation is pretty + * unlikely, and if we did, they're going to be roughly identical for the purposes of salvage, + * anyway. + * + * Other than salvage, the write generation number is used to reset the stale transaction id's + * present on the page upon server restart. + */ + dsk->write_gen = ++btree->write_gen; +} + +/* * __rec_split_write_header -- * Initialize a disk page's header. */ @@ -1528,7 +1554,8 @@ __rec_split_write_header(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REC_CHUNK page = r->page; dsk->recno = btree->type == BTREE_ROW ? WT_RECNO_OOB : multi->key.recno; - dsk->write_gen = 0; + + __rec_set_page_write_gen(dsk, btree); dsk->mem_size = multi->size; dsk->u.entries = chunk->entries; dsk->type = page->type; @@ -2341,6 +2368,7 @@ __wt_rec_cell_build_ovfl(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REC_KV *k dsk = tmp->mem; memset(dsk, 0, WT_PAGE_HEADER_SIZE); dsk->type = WT_PAGE_OVFL; + __rec_set_page_write_gen(dsk, btree); dsk->u.datalen = (uint32_t)kv->buf.size; memcpy(WT_PAGE_HEADER_BYTE(btree, dsk), kv->buf.data, kv->buf.size); dsk->mem_size = WT_PAGE_HEADER_BYTE_SIZE(btree) + (uint32_t)kv->buf.size; |