diff options
author | Luke Chen <luke.chen@mongodb.com> | 2019-04-16 14:36:51 +1000 |
---|---|---|
committer | Luke Chen <luke.chen@mongodb.com> | 2019-04-16 14:45:56 +1000 |
commit | a654dcf592ea7ed65426a0de96b4079ff4fc6716 (patch) | |
tree | a5256edad1bb219e6af72fd7e7525f58e235a307 /src/third_party/wiredtiger/src/btree/bt_rebalance.c | |
parent | 19b622ebfb42a525f38e278c09f440eb47b12f1e (diff) | |
download | mongo-a654dcf592ea7ed65426a0de96b4079ff4fc6716.tar.gz |
Import wiredtiger: 9416282c42d40328dfb7ff0f28831f639f98d3cb from branch mongodb-4.2
ref: 1768d66613..9416282c42
for: 4.1.11
WT-4317 Read checksum error in test_wt4156_metadata_salvage
WT-4579 Track the newest durable timestamp for each page
WT-4585 Add WT_WITH_HOTBACKUP_LOCK macro
WT-4598 Enable the assertion that the durable_timestamp is newer than or equals the commit timestamp.
WT-4640 Remove round_to_oldest in favour of roundup_timestamps
WT-4695 Python3: allow most tests to run with Python3 with small changes
WT-4696 Python3: change dist scripts to run under Python3
WT-4698 Python3: fix modify related tests
WT-4699 Python3: fix test_jsondump02.py
WT-4700 Python3: run with same source as Python2
WT-4703 Extend test/checkpoint to do removes and online checking
WT-4704 Add statistic tracking oldest active read timestamp
WT-4705 column-store no longer needs to handle WT_COL page offsets of 0
WT-4707 Failure in verifying cells with copied values
WT-4708 Coverity reported copy-paste error in WiredTiger error message
WT-4711 Python formatting errors reported while running "s_all"
WT-4714 Use the durable timestamp to determine if a page should stay dirty
WT-4724 Syntax error in wtperf_ckpt.sh when running 'dash' as default shell
Diffstat (limited to 'src/third_party/wiredtiger/src/btree/bt_rebalance.c')
-rw-r--r-- | src/third_party/wiredtiger/src/btree/bt_rebalance.c | 51 |
1 files changed, 30 insertions, 21 deletions
diff --git a/src/third_party/wiredtiger/src/btree/bt_rebalance.c b/src/third_party/wiredtiger/src/btree/bt_rebalance.c index 46dc96aedce..c04135ee82d 100644 --- a/src/third_party/wiredtiger/src/btree/bt_rebalance.c +++ b/src/third_party/wiredtiger/src/btree/bt_rebalance.c @@ -57,9 +57,9 @@ __rebalance_discard(WT_SESSION_IMPL *session, WT_REBALANCE_STUFF *rs) * Add a new entry to the list of leaf pages. */ static int -__rebalance_leaf_append(WT_SESSION_IMPL *session, - const uint8_t *key, size_t key_len, - WT_CELL_UNPACK *unpack, WT_REBALANCE_STUFF *rs) +__rebalance_leaf_append(WT_SESSION_IMPL *session, wt_timestamp_t durable_ts, + const uint8_t *key, size_t key_len, WT_CELL_UNPACK *unpack, + WT_REBALANCE_STUFF *rs) { WT_ADDR *copy_addr; WT_REF *copy; @@ -80,7 +80,7 @@ __rebalance_leaf_append(WT_SESSION_IMPL *session, WT_RET(__wt_calloc_one(session, ©_addr)); copy->addr = copy_addr; copy_addr->oldest_start_ts = unpack->oldest_start_ts; - copy_addr->newest_start_ts = unpack->newest_start_ts; + copy_addr->newest_durable_ts = durable_ts; copy_addr->newest_stop_ts = unpack->newest_stop_ts; WT_RET(__wt_memdup( session, unpack->data, unpack->size, ©_addr->addr)); @@ -194,8 +194,8 @@ __rebalance_free_original(WT_SESSION_IMPL *session, WT_REBALANCE_STUFF *rs) * Walk a column-store page and its descendants. */ static int -__rebalance_col_walk( - WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_REBALANCE_STUFF *rs) +__rebalance_col_walk(WT_SESSION_IMPL *session, wt_timestamp_t durable_ts, + const WT_PAGE_HEADER *dsk, WT_REBALANCE_STUFF *rs) { WT_BTREE *btree; WT_CELL_UNPACK unpack; @@ -221,7 +221,8 @@ __rebalance_col_walk( /* An internal page: read it and recursively walk it. */ WT_ERR(__wt_bt_read( session, buf, unpack.data, unpack.size)); - WT_ERR(__rebalance_col_walk(session, buf->data, rs)); + WT_ERR(__rebalance_col_walk( + session, unpack.newest_durable_ts, buf->data, rs)); __wt_verbose(session, WT_VERB_REBALANCE, "free-list append internal page: %s", __wt_addr_string( @@ -232,7 +233,7 @@ __rebalance_col_walk( case WT_CELL_ADDR_LEAF: case WT_CELL_ADDR_LEAF_NO: WT_ERR(__rebalance_leaf_append( - session, NULL, 0, &unpack, rs)); + session, durable_ts, NULL, 0, &unpack, rs)); break; WT_ILLEGAL_VALUE_ERR(session, unpack.type); } @@ -273,8 +274,8 @@ __rebalance_row_leaf_key(WT_SESSION_IMPL *session, * Walk a row-store page and its descendants. */ static int -__rebalance_row_walk( - WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_REBALANCE_STUFF *rs) +__rebalance_row_walk(WT_SESSION_IMPL *session, wt_timestamp_t durable_ts, + const WT_PAGE_HEADER *dsk, WT_REBALANCE_STUFF *rs) { WT_BTREE *btree; WT_CELL_UNPACK key, unpack; @@ -347,7 +348,8 @@ __rebalance_row_walk( /* Read and recursively walk the page. */ WT_ERR(__wt_bt_read( session, buf, unpack.data, unpack.size)); - WT_ERR(__rebalance_row_walk(session, buf->data, rs)); + WT_ERR(__rebalance_row_walk( + session, unpack.newest_durable_ts, buf->data, rs)); break; case WT_CELL_ADDR_LEAF: case WT_CELL_ADDR_LEAF_NO: @@ -376,7 +378,7 @@ __rebalance_row_walk( len = key.size; } WT_ERR(__rebalance_leaf_append( - session, p, len, &unpack, rs)); + session, durable_ts, p, len, &unpack, rs)); first_cell = false; break; @@ -399,17 +401,19 @@ __wt_bt_rebalance(WT_SESSION_IMPL *session, const char *cfg[]) WT_BTREE *btree; WT_DECL_RET; WT_REBALANCE_STUFF *rs, _rstuff; + WT_REF *ref; WT_UNUSED(cfg); btree = S2BT(session); + ref = &btree->root; /* * If the tree has never been written to disk, we're done, rebalance * walks disk images, not in-memory pages. For the same reason, the * tree has to be clean. */ - if (btree->root.page->dsk == NULL) + if (ref->page->dsk == NULL) return (0); if (btree->modified) WT_RET_MSG(session, EINVAL, @@ -422,17 +426,22 @@ __wt_bt_rebalance(WT_SESSION_IMPL *session, const char *cfg[]) WT_ERR(__wt_scr_alloc(session, 0, &rs->tmp2)); /* Set the internal page tree type. */ - rs->type = btree->root.page->type; + rs->type = ref->page->type; - /* Recursively walk the tree. */ + /* + * Recursively walk the tree. We start with a durable timestamp, but + * it should never be used (we'll accumulate durable timestamps from + * all the internal pages in our final write), so set it to something + * impossible. + */ switch (rs->type) { case WT_PAGE_ROW_INT: - WT_ERR( - __rebalance_row_walk(session, btree->root.page->dsk, rs)); + WT_ERR(__rebalance_row_walk( + session, WT_TS_MAX, ref->page->dsk, rs)); break; case WT_PAGE_COL_INT: - WT_ERR( - __rebalance_col_walk(session, btree->root.page->dsk, rs)); + WT_ERR(__rebalance_col_walk( + session, WT_TS_MAX, ref->page->dsk, rs)); break; WT_ILLEGAL_VALUE_ERR(session, rs->type); } @@ -450,8 +459,8 @@ __wt_bt_rebalance(WT_SESSION_IMPL *session, const char *cfg[]) * Swap the old root page for our newly built root page, writing the new * root page as part of a checkpoint will finish the rebalance. */ - __wt_page_out(session, &btree->root.page); - btree->root.page = rs->root; + __wt_page_out(session, &ref->page); + ref->page = rs->root; rs->root = NULL; err: /* Discard any leftover root page we created. */ |