diff options
author | Luke Chen <luke.chen@mongodb.com> | 2021-01-27 16:56:06 +1100 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-01-27 06:35:56 +0000 |
commit | e00fae3a427a480ea7090385392653a9fa6cd1d7 (patch) | |
tree | 22d2dcb6d50a45a24f4db747d3fc4ddfa3106551 | |
parent | 657fd55617da405757b94bc9973df40394a18e5b (diff) | |
download | mongo-e00fae3a427a480ea7090385392653a9fa6cd1d7.tar.gz |
Import wiredtiger: 462a8434b56a1274b2d8cf8dd91240021df294c7 from branch mongodb-5.0
ref: e39ffb5541..462a8434b5
for: 4.9.0
WT-6673 Rollback to stable to fix the inconsistent checkpoint by removing updates outside of the checkpoint snapshot
WT-7121 Include log-structured allocation python tests in WT
WT-7126 Coverity analysis defect 116991: Explicit null dereferenced
WT-7127 Coverity analysis defect 116992: Unchecked return value
WT-7128 Coverity analysis defect 116993: Resource leak
WT-7131 Tiered cursors should return error if configured with zero tiers
Reverted ticket(s):
WT-7091 Restrict usage of LSM to only operate in conjunction with compatible incremental backup mechanism
24 files changed, 678 insertions, 158 deletions
diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py index 997337c7896..9e745e53163 100644 --- a/src/third_party/wiredtiger/dist/stat_data.py +++ b/src/third_party/wiredtiger/dist/stat_data.py @@ -860,6 +860,7 @@ conn_dsrc_stats = [ TxnStat('txn_rts_hs_removed', 'rollback to stable updates removed from history store'), TxnStat('txn_rts_hs_restore_tombstones', 'rollback to stable restored tombstones from history store'), TxnStat('txn_rts_hs_stop_older_than_newer_start', 'rollback to stable hs records with stop timestamps older than newer records'), + TxnStat('txn_rts_inconsistent_ckpt', 'rollback to stable inconsistent checkpoint'), TxnStat('txn_rts_keys_removed', 'rollback to stable keys removed'), TxnStat('txn_rts_keys_restored', 'rollback to stable keys restored'), TxnStat('txn_rts_sweep_hs_keys', 'rollback to stable sweeping history store keys'), diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index a0113950ef4..9a018da1f30 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -2,5 +2,5 @@ "vendor": "wiredtiger", "github": "wiredtiger/wiredtiger.git", "branch": "mongodb-5.0", - "commit": "e39ffb554160de902060cd063c4b1547ff6d5e1e" + "commit": "462a8434b56a1274b2d8cf8dd91240021df294c7" } diff --git a/src/third_party/wiredtiger/src/conn/conn_dhandle.c b/src/third_party/wiredtiger/src/conn/conn_dhandle.c index b9302ccb602..7d8e4bdb8db 100644 --- a/src/third_party/wiredtiger/src/conn/conn_dhandle.c +++ b/src/third_party/wiredtiger/src/conn/conn_dhandle.c @@ -917,6 +917,37 @@ restart: } /* + * __wt_dhandle_update_write_gens -- + * Update the open dhandles write generation, run write generation and base write generation + * number. + */ +void +__wt_dhandle_update_write_gens(WT_SESSION_IMPL *session) +{ + WT_BTREE *btree; + WT_CONNECTION_IMPL *conn; + WT_DATA_HANDLE *dhandle; + + conn = S2C(session); + + for (dhandle = NULL;;) { + WT_WITH_HANDLE_LIST_WRITE_LOCK(session, WT_DHANDLE_NEXT(session, dhandle, &conn->dhqh, q)); + if (dhandle == NULL) + break; + btree = (WT_BTREE *)dhandle->handle; + + WT_ASSERT(session, btree != NULL); + + /* + * Initialize the btree write generation numbers after rollback to stable so that the + * transaction ids of the pages will be reset when loaded from disk to memory. + */ + btree->write_gen = btree->base_write_gen = btree->run_write_gen = + WT_MAX(btree->write_gen, conn->base_write_gen); + } +} + +/* * __wt_verbose_dump_handles -- * Dump information about all data handles. */ diff --git a/src/third_party/wiredtiger/src/cursor/cur_backup.c b/src/third_party/wiredtiger/src/cursor/cur_backup.c index f576a3ffa12..4cb1ae86f0f 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_backup.c +++ b/src/third_party/wiredtiger/src/cursor/cur_backup.c @@ -611,11 +611,6 @@ __backup_config(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb, const char *cfg[ session, EINVAL, "Incremental primary cursor must have a known source identifier"); F_SET(cb, WT_CURBACKUP_INCR); } - - /* Return an error if block-based incremental backup is performed with open LSM trees. */ - if (incremental_config && !TAILQ_EMPTY(&conn->lsmqh)) - WT_ERR_MSG(session, ENOTSUP, "LSM does not work with block-based incremental backup"); - err: if (ret != 0 && cb->incr_src != NULL) { F_CLR(cb->incr_src, WT_BLKINCR_INUSE); diff --git a/src/third_party/wiredtiger/src/docs/backup.dox b/src/third_party/wiredtiger/src/docs/backup.dox index 265253d8959..82979f16423 100644 --- a/src/third_party/wiredtiger/src/docs/backup.dox +++ b/src/third_party/wiredtiger/src/docs/backup.dox @@ -171,9 +171,6 @@ database directory has not been opened and recovery run. Once recovery has run in a backup directory, you can no longer back up to that database directory. -Block-based incremental backup does not work with LSM trees. An error -will be returned in that case. - An example of opening the backup data source for block-based incremental backup: @snippet ex_all.c incremental block backup diff --git a/src/third_party/wiredtiger/src/include/cell_inline.h b/src/third_party/wiredtiger/src/include/cell_inline.h index 8a81adcd896..54a8790ac58 100644 --- a/src/third_party/wiredtiger/src/include/cell_inline.h +++ b/src/third_party/wiredtiger/src/include/cell_inline.h @@ -970,6 +970,16 @@ __cell_unpack_window_cleanup(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk * No delete txnid=MAX, ts=MAX, txnid=MAX, ts=MAX, * durable_ts=NONE durable_ts=NONE */ + + /* + * Don't reset the transaction ids in rollback to stable when called from recovery because + * rollback to stable in addition to stable timestamp also depends on transaction ids from the + * page that are read into cache to decide if an update needs to be rolled back. + */ + if (F_ISSET(S2C(session), WT_CONN_RECOVERING) && + F_ISSET(session, WT_SESSION_ROLLBACK_TO_STABLE)) + return; + if (dsk->write_gen == 0 || dsk->write_gen > S2BT(session)->base_write_gen) return; diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index 68e636cdebf..6f2ced383f4 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -1675,6 +1675,7 @@ extern void __wt_curstat_cache_walk(WT_SESSION_IMPL *session); extern void __wt_curstat_dsrc_final(WT_CURSOR_STAT *cst); extern void __wt_curtable_set_key(WT_CURSOR *cursor, ...); extern void __wt_curtable_set_value(WT_CURSOR *cursor, ...); +extern void __wt_dhandle_update_write_gens(WT_SESSION_IMPL *session); extern void __wt_encrypt_size( WT_SESSION_IMPL *session, WT_KEYED_ENCRYPTOR *kencryptor, size_t incoming_size, size_t *sizep); extern void __wt_err_func( diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h index e1c6cea488a..3562800fcd0 100644 --- a/src/third_party/wiredtiger/src/include/stat.h +++ b/src/third_party/wiredtiger/src/include/stat.h @@ -780,6 +780,7 @@ struct __wt_connection_stats { int64_t rec_time_window_stop_txn; int64_t txn_read_race_prepare_update; int64_t txn_rts_hs_stop_older_than_newer_start; + int64_t txn_rts_inconsistent_ckpt; int64_t txn_rts_keys_removed; int64_t txn_rts_keys_restored; int64_t txn_rts_hs_restore_tombstones; @@ -993,6 +994,7 @@ struct __wt_dsrc_stats { int64_t rec_time_window_stop_txn; int64_t txn_read_race_prepare_update; int64_t txn_rts_hs_stop_older_than_newer_start; + int64_t txn_rts_inconsistent_ckpt; int64_t txn_rts_keys_removed; int64_t txn_rts_keys_restored; int64_t txn_rts_hs_restore_tombstones; diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in index d59138e1bb0..69559d2c3ac 100644 --- a/src/third_party/wiredtiger/src/include/wiredtiger.in +++ b/src/third_party/wiredtiger/src/include/wiredtiger.in @@ -5869,18 +5869,20 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); * than newer records */ #define WT_STAT_CONN_TXN_RTS_HS_STOP_OLDER_THAN_NEWER_START 1466 +/*! transaction: rollback to stable inconsistent checkpoint */ +#define WT_STAT_CONN_TXN_RTS_INCONSISTENT_CKPT 1467 /*! transaction: rollback to stable keys removed */ -#define WT_STAT_CONN_TXN_RTS_KEYS_REMOVED 1467 +#define WT_STAT_CONN_TXN_RTS_KEYS_REMOVED 1468 /*! transaction: rollback to stable keys restored */ -#define WT_STAT_CONN_TXN_RTS_KEYS_RESTORED 1468 +#define WT_STAT_CONN_TXN_RTS_KEYS_RESTORED 1469 /*! transaction: rollback to stable restored tombstones from history store */ -#define WT_STAT_CONN_TXN_RTS_HS_RESTORE_TOMBSTONES 1469 +#define WT_STAT_CONN_TXN_RTS_HS_RESTORE_TOMBSTONES 1470 /*! transaction: rollback to stable sweeping history store keys */ -#define WT_STAT_CONN_TXN_RTS_SWEEP_HS_KEYS 1470 +#define WT_STAT_CONN_TXN_RTS_SWEEP_HS_KEYS 1471 /*! transaction: rollback to stable updates removed from history store */ -#define WT_STAT_CONN_TXN_RTS_HS_REMOVED 1471 +#define WT_STAT_CONN_TXN_RTS_HS_REMOVED 1472 /*! transaction: update conflicts */ -#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1472 +#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1473 /*! * @} @@ -6478,18 +6480,20 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); * than newer records */ #define WT_STAT_DSRC_TXN_RTS_HS_STOP_OLDER_THAN_NEWER_START 2199 +/*! transaction: rollback to stable inconsistent checkpoint */ +#define WT_STAT_DSRC_TXN_RTS_INCONSISTENT_CKPT 2200 /*! transaction: rollback to stable keys removed */ -#define WT_STAT_DSRC_TXN_RTS_KEYS_REMOVED 2200 +#define WT_STAT_DSRC_TXN_RTS_KEYS_REMOVED 2201 /*! transaction: rollback to stable keys restored */ -#define WT_STAT_DSRC_TXN_RTS_KEYS_RESTORED 2201 +#define WT_STAT_DSRC_TXN_RTS_KEYS_RESTORED 2202 /*! transaction: rollback to stable restored tombstones from history store */ -#define WT_STAT_DSRC_TXN_RTS_HS_RESTORE_TOMBSTONES 2202 +#define WT_STAT_DSRC_TXN_RTS_HS_RESTORE_TOMBSTONES 2203 /*! transaction: rollback to stable sweeping history store keys */ -#define WT_STAT_DSRC_TXN_RTS_SWEEP_HS_KEYS 2203 +#define WT_STAT_DSRC_TXN_RTS_SWEEP_HS_KEYS 2204 /*! transaction: rollback to stable updates removed from history store */ -#define WT_STAT_DSRC_TXN_RTS_HS_REMOVED 2204 +#define WT_STAT_DSRC_TXN_RTS_HS_REMOVED 2205 /*! transaction: update conflicts */ -#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2205 +#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2206 /*! * @} diff --git a/src/third_party/wiredtiger/src/meta/meta_ckpt.c b/src/third_party/wiredtiger/src/meta/meta_ckpt.c index 150880625a6..424f0dde8b9 100644 --- a/src/third_party/wiredtiger/src/meta/meta_ckpt.c +++ b/src/third_party/wiredtiger/src/meta/meta_ckpt.c @@ -1044,19 +1044,19 @@ __wt_meta_sysinfo_set(WT_SESSION_IMPL *session) } /* Record snapshot information in metadata for checkpoint. */ - if (txn->snapshot_count > 0) { - WT_ERR(__wt_buf_fmt(session, buf, - WT_SYSTEM_CKPT_SNAPSHOT_MIN "=%" PRIu64 "," WT_SYSTEM_CKPT_SNAPSHOT_MAX "=%" PRIu64 - "," WT_SYSTEM_CKPT_SNAPSHOT_COUNT "=%" PRIu32 - "," WT_SYSTEM_CKPT_SNAPSHOT "=[", - txn->snap_min, txn->snap_max, txn->snapshot_count)); + WT_ERR(__wt_buf_fmt(session, buf, + WT_SYSTEM_CKPT_SNAPSHOT_MIN "=%" PRIu64 "," WT_SYSTEM_CKPT_SNAPSHOT_MAX "=%" PRIu64 + "," WT_SYSTEM_CKPT_SNAPSHOT_COUNT "=%" PRIu32, + txn->snap_min, txn->snap_max, txn->snapshot_count)); + if (txn->snapshot_count > 0) { + WT_ERR(__wt_buf_catfmt(session, buf, "," WT_SYSTEM_CKPT_SNAPSHOT "=[")); for (snap_count = 0; snap_count < txn->snapshot_count - 1; ++snap_count) WT_ERR(__wt_buf_catfmt(session, buf, "%" PRIu64 "%s", txn->snapshot[snap_count], ",")); WT_ERR(__wt_buf_catfmt(session, buf, "%" PRIu64 "%s", txn->snapshot[snap_count], "]")); - WT_ERR(__wt_metadata_update(session, WT_SYSTEM_CKPT_SNAPSHOT_URI, buf->data)); } + WT_ERR(__wt_metadata_update(session, WT_SYSTEM_CKPT_SNAPSHOT_URI, buf->data)); /* Record the base write gen in metadata as part of checkpoint */ WT_ERR(__wt_buf_fmt( diff --git a/src/third_party/wiredtiger/src/os_posix/os_fs.c b/src/third_party/wiredtiger/src/os_posix/os_fs.c index ede22518c26..a472273de48 100644 --- a/src/third_party/wiredtiger/src/os_posix/os_fs.c +++ b/src/third_party/wiredtiger/src/os_posix/os_fs.c @@ -748,7 +748,10 @@ __posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const cha f |= O_CLOEXEC; #endif WT_SYSCALL_RETRY(((pfh->fd = open(name, f, 0444)) == -1 ? -1 : 0), ret); - if (ret != 0) + /* Return error if the file not found during rollback to stable. */ + if (ret != 0 && F_ISSET(session, WT_SESSION_ROLLBACK_TO_STABLE)) + WT_ERR(__wt_errno()); + else if (ret != 0) WT_ERR_MSG(session, ret, "%s: handle-open: open-directory", name); WT_ERR(__posix_open_file_cloexec(session, pfh->fd, name)); goto directory_open; @@ -800,7 +803,10 @@ __posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const cha /* Create/Open the file. */ WT_SYSCALL_RETRY(((pfh->fd = open(name, f, mode)) == -1 ? -1 : 0), ret); - if (ret != 0) + /* Return error if the file not found during rollback to stable. */ + if (ret != 0 && F_ISSET(session, WT_SESSION_ROLLBACK_TO_STABLE)) + WT_ERR(ENOENT); + else if (ret != 0) WT_ERR_MSG(session, ret, pfh->direct_io ? "%s: handle-open: open: failed with direct I/O configured, some " "filesystem types do not support direct I/O" : diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c index 1ee255be706..90de7cc2e94 100644 --- a/src/third_party/wiredtiger/src/support/stat.c +++ b/src/third_party/wiredtiger/src/support/stat.c @@ -207,6 +207,7 @@ static const char *const __stats_dsrc_desc[] = { "reconciliation: records written including a stop transaction ID", "transaction: race to read prepared update retry", "transaction: rollback to stable hs records with stop timestamps older than newer records", + "transaction: rollback to stable inconsistent checkpoint", "transaction: rollback to stable keys removed", "transaction: rollback to stable keys restored", "transaction: rollback to stable restored tombstones from history store", @@ -453,6 +454,7 @@ __wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats) stats->rec_time_window_stop_txn = 0; stats->txn_read_race_prepare_update = 0; stats->txn_rts_hs_stop_older_than_newer_start = 0; + stats->txn_rts_inconsistent_ckpt = 0; stats->txn_rts_keys_removed = 0; stats->txn_rts_keys_restored = 0; stats->txn_rts_hs_restore_tombstones = 0; @@ -686,6 +688,7 @@ __wt_stat_dsrc_aggregate_single(WT_DSRC_STATS *from, WT_DSRC_STATS *to) to->rec_time_window_stop_txn += from->rec_time_window_stop_txn; to->txn_read_race_prepare_update += from->txn_read_race_prepare_update; to->txn_rts_hs_stop_older_than_newer_start += from->txn_rts_hs_stop_older_than_newer_start; + to->txn_rts_inconsistent_ckpt += from->txn_rts_inconsistent_ckpt; to->txn_rts_keys_removed += from->txn_rts_keys_removed; to->txn_rts_keys_restored += from->txn_rts_keys_restored; to->txn_rts_hs_restore_tombstones += from->txn_rts_hs_restore_tombstones; @@ -925,6 +928,7 @@ __wt_stat_dsrc_aggregate(WT_DSRC_STATS **from, WT_DSRC_STATS *to) to->txn_read_race_prepare_update += WT_STAT_READ(from, txn_read_race_prepare_update); to->txn_rts_hs_stop_older_than_newer_start += WT_STAT_READ(from, txn_rts_hs_stop_older_than_newer_start); + to->txn_rts_inconsistent_ckpt += WT_STAT_READ(from, txn_rts_inconsistent_ckpt); to->txn_rts_keys_removed += WT_STAT_READ(from, txn_rts_keys_removed); to->txn_rts_keys_restored += WT_STAT_READ(from, txn_rts_keys_restored); to->txn_rts_hs_restore_tombstones += WT_STAT_READ(from, txn_rts_hs_restore_tombstones); @@ -1413,6 +1417,7 @@ static const char *const __stats_connection_desc[] = { "reconciliation: records written including a stop transaction ID", "transaction: race to read prepared update retry", "transaction: rollback to stable hs records with stop timestamps older than newer records", + "transaction: rollback to stable inconsistent checkpoint", "transaction: rollback to stable keys removed", "transaction: rollback to stable keys restored", "transaction: rollback to stable restored tombstones from history store", @@ -1926,6 +1931,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->rec_time_window_stop_txn = 0; stats->txn_read_race_prepare_update = 0; stats->txn_rts_hs_stop_older_than_newer_start = 0; + stats->txn_rts_inconsistent_ckpt = 0; stats->txn_rts_keys_removed = 0; stats->txn_rts_keys_restored = 0; stats->txn_rts_hs_restore_tombstones = 0; @@ -2450,6 +2456,7 @@ __wt_stat_connection_aggregate(WT_CONNECTION_STATS **from, WT_CONNECTION_STATS * to->txn_read_race_prepare_update += WT_STAT_READ(from, txn_read_race_prepare_update); to->txn_rts_hs_stop_older_than_newer_start += WT_STAT_READ(from, txn_rts_hs_stop_older_than_newer_start); + to->txn_rts_inconsistent_ckpt += WT_STAT_READ(from, txn_rts_inconsistent_ckpt); to->txn_rts_keys_removed += WT_STAT_READ(from, txn_rts_keys_removed); to->txn_rts_keys_restored += WT_STAT_READ(from, txn_rts_keys_restored); to->txn_rts_hs_restore_tombstones += WT_STAT_READ(from, txn_rts_hs_restore_tombstones); diff --git a/src/third_party/wiredtiger/src/tiered/tiered_cursor.c b/src/third_party/wiredtiger/src/tiered/tiered_cursor.c index 26c750fb496..1694e57dbc3 100644 --- a/src/third_party/wiredtiger/src/tiered/tiered_cursor.c +++ b/src/third_party/wiredtiger/src/tiered/tiered_cursor.c @@ -34,8 +34,7 @@ __curtiered_open_cursors(WT_CURSOR_TIERED *curtiered) dhandle = NULL; tiered = curtiered->tiered; - if (tiered->ntiers == 0) - return (0); + WT_ASSERT(session, tiered->ntiers > 0); /* * If the key is pointing to memory that is pinned by a chunk cursor, take a copy before closing @@ -1017,21 +1016,14 @@ err: * documents avoids biasing towards small chunks. Then return the cursor on the chunk we have * picked. */ -static int +static void __curtiered_random_chunk(WT_SESSION_IMPL *session, WT_CURSOR_TIERED *curtiered, WT_CURSOR **cursor) { - u_int i, ntiers; - - /* - * If the tree is empty we cannot do a random lookup, so return a WT_NOTFOUND. - */ - if ((ntiers = curtiered->tiered->ntiers) == 0) - return (WT_NOTFOUND); + u_int i; /* TODO: make randomness respect tree size. */ - i = __wt_random(&session->rnd) % ntiers; + i = __wt_random(&session->rnd) % curtiered->tiered->ntiers; *cursor = curtiered->cursors[i]; - return (0); } /* @@ -1055,7 +1047,7 @@ __curtiered_next_random(WT_CURSOR *cursor) WT_ERR(__curtiered_enter(curtiered, false)); for (;;) { - WT_ERR(__curtiered_random_chunk(session, curtiered, &c)); + __curtiered_random_chunk(session, curtiered, &c); /* * This call to next_random on the chunk can potentially end in WT_NOTFOUND if the chunk we * picked is empty. We want to retry in that case. diff --git a/src/third_party/wiredtiger/src/tiered/tiered_schema.c b/src/third_party/wiredtiger/src/tiered/tiered_schema.c index dc153b31e43..6e7dd84c0e3 100644 --- a/src/third_party/wiredtiger/src/tiered/tiered_schema.c +++ b/src/third_party/wiredtiger/src/tiered/tiered_schema.c @@ -15,12 +15,16 @@ int __wt_tiered_create(WT_SESSION_IMPL *session, const char *uri, bool exclusive, const char *config) { + WT_CONFIG cparser; + WT_CONFIG_ITEM ckey, cval, tierconf; WT_DECL_RET; + int ntiers; char *meta_value; const char *cfg[] = {WT_CONFIG_BASE(session, tiered_meta), config, NULL}; const char *metadata; metadata = NULL; + ntiers = 0; /* If it can be opened, it already exists. */ if ((ret = __wt_metadata_search(session, uri, &meta_value)) != WT_NOTFOUND) { @@ -30,12 +34,24 @@ __wt_tiered_create(WT_SESSION_IMPL *session, const char *uri, bool exclusive, co } WT_RET_NOTFOUND_OK(ret); + /* A tiered cursor must specify at least one underlying table */ + WT_RET(__wt_config_gets(session, cfg, "tiered.tiers", &tierconf)); + __wt_config_subinit(session, &cparser, &tierconf); + + while ((ret = __wt_config_next(&cparser, &ckey, &cval)) == 0) + ++ntiers; + WT_RET_NOTFOUND_OK(ret); + + if (ntiers == 0) + WT_RET_MSG(session, EINVAL, "tiered table must specify at least one tier"); + if (!F_ISSET(S2C(session), WT_CONN_READONLY)) { WT_ERR(__wt_config_merge(session, cfg, NULL, &metadata)); WT_ERR(__wt_metadata_insert(session, uri, metadata)); } err: + __wt_free(session, meta_value); __wt_free(session, metadata); return (ret); } @@ -188,14 +204,14 @@ __tiered_open(WT_SESSION_IMPL *session, const char *cfg[]) /* Point to some items in the copy to save re-parsing. */ WT_RET(__wt_config_gets(session, tiered_cfg, "tiered.tiers", &tierconf)); - /* - * Count the number of tiers. - */ + /* Count the number of tiers. */ __wt_config_subinit(session, &cparser, &tierconf); while ((ret = __wt_config_next(&cparser, &ckey, &cval)) == 0) ++tiered->ntiers; WT_RET_NOTFOUND_OK(ret); + WT_ASSERT(session, tiered->ntiers > 0); + WT_RET(__wt_scr_alloc(session, 0, &buf)); WT_ERR(__wt_calloc_def(session, tiered->ntiers, &tiered->tiers)); @@ -204,7 +220,7 @@ __tiered_open(WT_SESSION_IMPL *session, const char *cfg[]) WT_ERR(__wt_config_next(&cparser, &ckey, &cval)); WT_ERR(__wt_buf_fmt(session, buf, "%.*s", (int)ckey.len, ckey.str)); WT_ERR(__wt_session_get_dhandle(session, (const char *)buf->data, NULL, cfg, 0)); - __wt_atomic_addi32(&session->dhandle->session_inuse, 1); + (void)__wt_atomic_addi32(&session->dhandle->session_inuse, 1); /* Load in reverse order (based on LSM logic). */ tiered->tiers[(tiered->ntiers - 1) - i] = session->dhandle; WT_ERR(__wt_session_release_dhandle(session)); @@ -247,7 +263,7 @@ __wt_tiered_close(WT_SESSION_IMPL *session, WT_TIERED *tiered) __wt_free(session, tiered->value_format); if (tiered->tiers != NULL) { for (i = 0; i < tiered->ntiers; i++) - __wt_atomic_subi32(&tiered->tiers[i]->session_inuse, 1); + (void)__wt_atomic_subi32(&tiered->tiers[i]->session_inuse, 1); __wt_free(session, tiered->tiers); } diff --git a/src/third_party/wiredtiger/src/txn/txn_recover.c b/src/third_party/wiredtiger/src/txn/txn_recover.c index e81451a96b3..89d8e7528d9 100644 --- a/src/third_party/wiredtiger/src/txn/txn_recover.c +++ b/src/third_party/wiredtiger/src/txn/txn_recover.c @@ -506,9 +506,10 @@ __recovery_set_checkpoint_snapshot(WT_SESSION_IMPL *session) * snapshot max. */ WT_ASSERT(session, - conn->recovery_ckpt_snapshot_count == counter && - conn->recovery_ckpt_snapshot[0] == conn->recovery_ckpt_snap_min && - conn->recovery_ckpt_snapshot[counter - 1] < conn->recovery_ckpt_snap_max); + ((conn->recovery_ckpt_snapshot_count == 0) || + (conn->recovery_ckpt_snapshot_count == counter && + conn->recovery_ckpt_snapshot[0] == conn->recovery_ckpt_snap_min && + conn->recovery_ckpt_snapshot[counter - 1] < conn->recovery_ckpt_snap_max))); } err: @@ -742,12 +743,14 @@ __wt_txn_recover(WT_SESSION_IMPL *session, const char *cfg[]) char *config; char ts_string[2][WT_TS_INT_STRING_SIZE]; bool do_checkpoint, eviction_started, hs_exists, needs_rec, was_backup; + bool rts_executed, no_log_recovery; conn = S2C(session); WT_CLEAR(r); WT_INIT_LSN(&r.ckpt_lsn); config = NULL; do_checkpoint = hs_exists = true; + rts_executed = no_log_recovery = false; eviction_started = false; was_backup = F_ISSET(conn, WT_CONN_WAS_BACKUP); @@ -760,13 +763,14 @@ __wt_txn_recover(WT_SESSION_IMPL *session, const char *cfg[]) F_SET(conn, WT_CONN_RECOVERING); WT_ERR(__recovery_set_ckpt_base_write_gen(&r)); - WT_ERR(__recovery_set_checkpoint_snapshot(session)); WT_ERR(__wt_metadata_search(session, WT_METAFILE_URI, &config)); WT_ERR(__recovery_setup_file(&r, WT_METAFILE_URI, config)); WT_ERR(__wt_metadata_cursor_open(session, NULL, &metac)); metafile = &r.files[WT_METAFILE_ID]; metafile->c = metac; + WT_ERR(__recovery_set_checkpoint_timestamp(&r)); + WT_ERR(__recovery_set_oldest_timestamp(&r)); /* * If no log was found (including if logging is disabled), or if the last checkpoint was done * with logging disabled, recovery should not run. Scan the metadata to figure out the largest @@ -781,6 +785,8 @@ __wt_txn_recover(WT_SESSION_IMPL *session, const char *cfg[]) * earlier time. */ WT_ERR(__recovery_file_scan(&r)); + no_log_recovery = true; + /* * The array can be re-allocated in recovery_file_scan. Reset our pointer after scanning all * the files. @@ -793,7 +799,7 @@ __wt_txn_recover(WT_SESSION_IMPL *session, const char *cfg[]) else do_checkpoint = false; WT_ERR(__hs_exists(session, metac, cfg, &hs_exists)); - goto done; + goto rollback_to_stable; } /* @@ -867,6 +873,46 @@ __wt_txn_recover(WT_SESSION_IMPL *session, const char *cfg[]) r.files[0].c = NULL; WT_ERR(metac->close(metac)); +rollback_to_stable: + /* + * Perform rollback to stable only when the following conditions met. + * 1. The connection is not read-only. A read-only connection expects that there shouldn't be + * any changes that need to be done on the database other than reading. + * 2. The history store file was found in the metadata. + */ + if (hs_exists && !F_ISSET(conn, WT_CONN_READONLY)) { + /* Start the eviction threads for rollback to stable if not already started. */ + WT_ERR(__wt_evict_create(session)); + eviction_started = true; + + WT_ERR(__recovery_set_checkpoint_snapshot(session)); + WT_ASSERT(session, + conn->txn_global.has_stable_timestamp == false && + conn->txn_global.stable_timestamp == WT_TS_NONE); + + /* + * Set the stable timestamp from recovery timestamp and process the trees for rollback to + * stable. + */ + conn->txn_global.stable_timestamp = conn->txn_global.recovery_timestamp; + conn->txn_global.has_stable_timestamp = false; + + if (conn->txn_global.recovery_timestamp != WT_TS_NONE) + conn->txn_global.has_stable_timestamp = true; + + __wt_verbose(session, WT_VERB_RECOVERY | WT_VERB_RTS, + "performing recovery rollback_to_stable with stable timestamp: %s and oldest timestamp: " + "%s", + __wt_timestamp_to_string(conn->txn_global.stable_timestamp, ts_string[0]), + __wt_timestamp_to_string(conn->txn_global.oldest_timestamp, ts_string[1])); + rts_executed = true; + WT_ERR(__wt_rollback_to_stable(session, NULL, true)); + } + + /* Don't run recovery if no log was found. */ + if (no_log_recovery) + goto done; + /* * Now, recover all the files apart from the metadata. Pass WT_LOGSCAN_RECOVER so that old logs * get truncated. @@ -904,10 +950,13 @@ __wt_txn_recover(WT_SESSION_IMPL *session, const char *cfg[]) /* * Recovery can touch more data than fits in cache, so it relies on regular eviction to manage - * paging. Start eviction threads for recovery without history store cursors. + * paging. Start eviction threads if not already started for recovery without history store + * cursors. */ - WT_ERR(__wt_evict_create(session)); - eviction_started = true; + if (!eviction_started) { + WT_ERR(__wt_evict_create(session)); + eviction_started = true; + } /* * Always run recovery even if it was a clean shutdown only if this is not a read-only @@ -925,60 +974,26 @@ __wt_txn_recover(WT_SESSION_IMPL *session, const char *cfg[]) WT_ERR(ret); done: - WT_ERR(__recovery_set_checkpoint_timestamp(&r)); - WT_ERR(__recovery_set_oldest_timestamp(&r)); - /* - * Perform rollback to stable only when the following conditions met. - * 1. The connection is not read-only. A read-only connection expects that there shouldn't be - * any changes that need to be done on the database other than reading. - * 2. The history store file was found in the metadata. - */ - if (hs_exists && !F_ISSET(conn, WT_CONN_READONLY)) { - /* Start the eviction threads for rollback to stable if not already started. */ - if (!eviction_started) { - WT_ERR(__wt_evict_create(session)); - eviction_started = true; - } - - /* - * Currently, rollback to stable only needs to make changes to tables that use timestamps. - * That is because eviction does not run in parallel with a checkpoint, so content that is - * written never uses transaction IDs newer than the checkpoint's transaction ID and thus - * never needs to be rolled back. Once eviction is allowed while a checkpoint is active, it - * will be necessary to take the page write generation number into account during rollback - * to stable. For example, a page with write generation 10 and txnid 20 is written in one - * checkpoint, and in the next restart a new page with write generation 30 and txnid 20 is - * written. The rollback to stable operation should only rollback the latest page changes - * solely based on the write generation numbers. - */ - WT_ASSERT(session, - conn->txn_global.has_stable_timestamp == false && - conn->txn_global.stable_timestamp == WT_TS_NONE); - - /* - * Set the stable timestamp from recovery timestamp and process the trees for rollback to - * stable. - */ - conn->txn_global.stable_timestamp = conn->txn_global.recovery_timestamp; - conn->txn_global.has_stable_timestamp = false; - - if (conn->txn_global.recovery_timestamp != WT_TS_NONE) - conn->txn_global.has_stable_timestamp = true; - __wt_verbose(session, WT_VERB_RECOVERY | WT_VERB_RTS, - "Performing recovery rollback_to_stable with stable timestamp: %s and oldest timestamp: " - "%s", - __wt_timestamp_to_string(conn->txn_global.stable_timestamp, ts_string[0]), - __wt_timestamp_to_string(conn->txn_global.oldest_timestamp, ts_string[1])); - - WT_ERR(__wt_rollback_to_stable(session, NULL, false)); - } else if (do_checkpoint) + if (do_checkpoint || rts_executed) /* * Forcibly log a checkpoint so the next open is fast and keep the metadata up to date with * the checkpoint LSN and archiving. */ WT_ERR(session->iface.checkpoint(&session->iface, "force=1")); + if (rts_executed) { + /* Initialize the connection's base write generation after rollback to stable. */ + WT_ERR(__wt_metadata_init_base_write_gen(session)); + + /* + * Update the open dhandles write generations and base write generation with the + * connection's base write generation because the recovery checkpoint writes the pages to + * disk with new write generation number which contains transaction ids that are needed to + * reset later. + */ + __wt_dhandle_update_write_gens(session); + } /* * If we're downgrading and have newer log files, force an archive, no matter what the archive * setting is. diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c index b30cf03be69..23a01efbd2e 100644 --- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c +++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c @@ -8,9 +8,15 @@ #include "wt_internal.h" +#define WT_CHECK_RECOVERY_FLAG_TXNID_CKPT_SNAPMIN(session, txnid) \ + (F_ISSET(S2C(session), WT_CONN_RECOVERING) && (txnid) >= S2C(session)->recovery_ckpt_snap_min) + /* Enable rollback to stable verbose messaging during recovery. */ -#define WT_VERB_RECOVERY_RTS(session) \ - (F_ISSET(S2C(session), WT_CONN_RECOVERING) ? WT_VERB_RECOVERY | WT_VERB_RTS : WT_VERB_RTS) +#define WT_VERB_RECOVERY_RTS(session) \ + (F_ISSET(S2C(session), WT_CONN_RECOVERING) ? \ + WT_VERB_RECOVERY | WT_VERB_RECOVERY_PROGRESS | WT_VERB_RTS : \ + WT_VERB_RTS) + /* * __rollback_abort_newer_update -- * Abort updates in an update change with timestamps newer than the rollback timestamp. Also, @@ -149,6 +155,53 @@ err: } /* + * __rollback_check_if_txnid_non_committed -- + * Check if the transaction id is non committed. + */ +static bool +__rollback_check_if_txnid_non_committed(WT_SESSION_IMPL *session, uint64_t txnid) +{ + WT_CONNECTION_IMPL *conn; + bool found; + + conn = S2C(session); + + /* If not recovery then assume all the data as committed. */ + if (!F_ISSET(conn, WT_CONN_RECOVERING)) + return (false); + + /* + * Only full checkpoint writes the metadata with snapshot. If the recovered checkpoint snapshot + * details are zero then return false i.e, updates are committed. + */ + if (conn->recovery_ckpt_snap_min == 0 && conn->recovery_ckpt_snap_max == 0) + return (false); + + /* + * Snapshot data: + * ids < recovery_ckpt_snap_min are committed, + * ids > recovery_ckpt_snap_max are non committed, + * everything else is committed unless it is found in the recovery_ckpt_snapshot array. + */ + if (txnid < conn->recovery_ckpt_snap_min) + return (false); + else if (txnid > conn->recovery_ckpt_snap_max) + return (true); + + /* + * Return false when the recovery snapshot count is 0, which means there is no uncommitted + * transaction ids. + */ + if (conn->recovery_ckpt_snapshot_count == 0) + return (false); + + WT_BINARY_SEARCH( + txnid, conn->recovery_ckpt_snapshot, conn->recovery_ckpt_snapshot_count, found); + + return (found); +} + +/* * __rollback_row_ondisk_fixup_key -- * Abort updates in the history store and replace the on-disk value with an update that * satisfies the given timestamp. @@ -274,38 +327,48 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW /* * Stop processing when we find the newer version value of this key is stable according to - * the current version stop timestamp when it is not appending the selected update to the - * update chain. Also it confirms that history store doesn't contains any newer version than - * the current version for the key. + * the current version stop timestamp and transaction id when it is not appending the + * selected update to the update chain. Also it confirms that history store doesn't contains + * any newer version than the current version for the key. */ - if (!replace && hs_stop_durable_ts <= rollback_timestamp) { + if (!replace && + (!__rollback_check_if_txnid_non_committed(session, cbt->upd_value->tw.stop_txn)) && + (hs_stop_durable_ts <= rollback_timestamp)) { __wt_verbose(session, WT_VERB_RECOVERY_RTS(session), - "history store update valid with stop timestamp: %s and stable timestamp: %s", + "history store update valid with stop timestamp: %s, stable timestamp: %s and txnid: " + "%" PRIu64, __wt_timestamp_to_string(hs_stop_durable_ts, ts_string[0]), - __wt_timestamp_to_string(rollback_timestamp, ts_string[1])); + __wt_timestamp_to_string(rollback_timestamp, ts_string[1]), + cbt->upd_value->tw.stop_txn); break; } - /* Stop processing when we find a stable update according to the given timestamp. */ - if (hs_durable_ts <= rollback_timestamp) { + /* + * Stop processing when we find a stable update according to the given timestamp and + * transaction id. + */ + if (!__rollback_check_if_txnid_non_committed(session, cbt->upd_value->tw.start_txn) && + (hs_durable_ts <= rollback_timestamp)) { __wt_verbose(session, WT_VERB_RECOVERY_RTS(session), "history store update valid with start timestamp: %s, durable timestamp: %s, stop " - "timestamp: %s and stable timestamp: %s", + "timestamp: %s, stable timestamp: %s and txnid: %" PRIu64, __wt_timestamp_to_string(hs_start_ts, ts_string[0]), __wt_timestamp_to_string(hs_durable_ts, ts_string[1]), __wt_timestamp_to_string(hs_stop_durable_ts, ts_string[2]), - __wt_timestamp_to_string(rollback_timestamp, ts_string[3])); + __wt_timestamp_to_string(rollback_timestamp, ts_string[3]), + cbt->upd_value->tw.start_txn); valid_update_found = true; break; } __wt_verbose(session, WT_VERB_RECOVERY_RTS(session), "history store update aborted with start timestamp: %s, durable timestamp: %s, stop " - "timestamp: %s and stable timestamp: %s", + "timestamp: %s, stable timestamp: %s, start txnid: %" PRIu64 " and stop txnid: %" PRIu64, __wt_timestamp_to_string(hs_start_ts, ts_string[0]), __wt_timestamp_to_string(hs_durable_ts, ts_string[1]), __wt_timestamp_to_string(hs_stop_durable_ts, ts_string[2]), - __wt_timestamp_to_string(rollback_timestamp, ts_string[3])); + __wt_timestamp_to_string(rollback_timestamp, ts_string[3]), cbt->upd_value->tw.start_txn, + cbt->upd_value->tw.stop_txn); /* * Start time point of the current record may be used as stop time point of the previous @@ -331,7 +394,16 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW if (valid_update_found) { WT_ERR(__wt_upd_alloc(session, &full_value, WT_UPDATE_STANDARD, &upd, NULL)); - upd->txnid = cbt->upd_value->tw.start_txn; + /* + * Set the transaction id of updates to WT_TXN_NONE when called from recovery, because + * the connections write generation will be initialized after rollback to stable and the + * updates in the cache will be problematic. The transaction id of pages which are in + * disk will be automatically reset as part of unpacking cell when loaded to cache. + */ + if (F_ISSET(S2C(session), WT_CONN_RECOVERING)) + upd->txnid = WT_TXN_NONE; + else + upd->txnid = cbt->upd_value->tw.start_txn; upd->durable_ts = cbt->upd_value->tw.durable_start_ts; upd->start_ts = cbt->upd_value->tw.start_ts; __wt_verbose(session, WT_VERB_RECOVERY_RTS(session), @@ -353,7 +425,17 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW if (hs_stop_durable_ts <= rollback_timestamp && hs_stop_durable_ts < newer_hs_durable_ts) { WT_ERR(__wt_upd_alloc_tombstone(session, &tombstone, NULL)); - tombstone->txnid = cbt->upd_value->tw.stop_txn; + /* + * Set the transaction id of updates to WT_TXN_NONE when called from recovery, + * because the connections write generation will be initialized after rollback to + * stable and the updates in the cache will be problematic. The transaction id of + * pages which are in disk will be automatically reset as part of unpacking cell + * when loaded to cache. + */ + if (F_ISSET(S2C(session), WT_CONN_RECOVERING)) + tombstone->txnid = WT_TXN_NONE; + else + tombstone->txnid = cbt->upd_value->tw.stop_txn; tombstone->durable_ts = cbt->upd_value->tw.durable_stop_ts; tombstone->start_ts = cbt->upd_value->tw.stop_ts; __wt_verbose(session, WT_VERB_RECOVERY_RTS(session), @@ -443,14 +525,15 @@ __rollback_abort_row_ondisk_kv( WT_STAT_CONN_DATA_INCR(session, txn_rts_sweep_hs_keys); } else return (0); - } else if (vpack->tw.durable_start_ts > rollback_timestamp || + } else if (((vpack->tw.durable_start_ts > rollback_timestamp) || + __rollback_check_if_txnid_non_committed(session, vpack->tw.start_txn)) || (!WT_TIME_WINDOW_HAS_STOP(&vpack->tw) && prepared)) { __wt_verbose(session, WT_VERB_RECOVERY_RTS(session), "on-disk update aborted with start durable timestamp: %s, commit timestamp: %s, " - "prepared: %s and stable timestamp: %s", + "prepared: %s, stable timestamp: %s and txnid : %" PRIu64, __wt_timestamp_to_string(vpack->tw.durable_start_ts, ts_string[0]), __wt_timestamp_to_string(vpack->tw.start_ts, ts_string[1]), prepared ? "true" : "false", - __wt_timestamp_to_string(rollback_timestamp, ts_string[2])); + __wt_timestamp_to_string(rollback_timestamp, ts_string[2]), vpack->tw.start_txn); if (!F_ISSET(S2C(session), WT_CONN_IN_MEMORY)) return (__rollback_row_ondisk_fixup_key(session, page, rip, rollback_timestamp, true)); else { @@ -462,7 +545,9 @@ __rollback_abort_row_ondisk_kv( WT_STAT_CONN_DATA_INCR(session, txn_rts_keys_removed); } } else if (WT_TIME_WINDOW_HAS_STOP(&vpack->tw) && - (vpack->tw.durable_stop_ts > rollback_timestamp || prepared)) { + (((vpack->tw.durable_stop_ts > rollback_timestamp) || + __rollback_check_if_txnid_non_committed(session, vpack->tw.stop_txn)) || + prepared)) { /* * Clear the remove operation from the key by inserting the original on-disk value as a * standard update. @@ -470,17 +555,28 @@ __rollback_abort_row_ondisk_kv( WT_RET(__wt_page_cell_data_ref(session, page, vpack, &buf)); WT_ERR(__wt_upd_alloc(session, &buf, WT_UPDATE_STANDARD, &upd, NULL)); - upd->txnid = vpack->tw.start_txn; + /* + * Set the transaction id of updates to WT_TXN_NONE when called from recovery, because the + * connections write generation will be initialized after rollback to stable and the updates + * in the cache will be problematic. The transaction id of pages which are in disk will be + * automatically reset as part of unpacking cell when loaded to cache. + */ + if (F_ISSET(S2C(session), WT_CONN_RECOVERING)) + upd->txnid = WT_TXN_NONE; + else + upd->txnid = vpack->tw.start_txn; upd->durable_ts = vpack->tw.durable_start_ts; upd->start_ts = vpack->tw.start_ts; F_SET(upd, WT_UPDATE_RESTORED_FROM_DS); WT_STAT_CONN_DATA_INCR(session, txn_rts_keys_restored); __wt_verbose(session, WT_VERB_RECOVERY_RTS(session), - "key restored with commit timestamp: %s, durable timestamp: %s txnid: %" PRIu64 - "and removed commit timestamp: %s, durable timestamp: %s, txnid: %" PRIu64 + "key restored with commit timestamp: %s, durable timestamp: %s, stable timestamp: %s, " + "txnid: %" PRIu64 + " and removed commit timestamp: %s, durable timestamp: %s, txnid: %" PRIu64 ", prepared: %s", __wt_timestamp_to_string(upd->start_ts, ts_string[0]), - __wt_timestamp_to_string(upd->durable_ts, ts_string[1]), upd->txnid, + __wt_timestamp_to_string(upd->durable_ts, ts_string[1]), + __wt_timestamp_to_string(rollback_timestamp, ts_string[2]), upd->txnid, __wt_timestamp_to_string(vpack->tw.stop_ts, ts_string[2]), __wt_timestamp_to_string(vpack->tw.durable_stop_ts, ts_string[3]), vpack->tw.stop_txn, prepared ? "true" : "false"); @@ -735,6 +831,7 @@ __rollback_page_needs_abort( WT_MULTI *multi; WT_PAGE_MODIFY *mod; wt_timestamp_t durable_ts; + uint64_t newest_txn; uint32_t i; char ts_string[WT_TS_INT_STRING_SIZE]; const char *tag; @@ -743,12 +840,14 @@ __rollback_page_needs_abort( addr = ref->addr; mod = ref->page == NULL ? NULL : ref->page->modify; durable_ts = WT_TS_NONE; + newest_txn = WT_TXN_NONE; tag = "undefined state"; prepared = result = false; /* * The rollback operation should be performed on this page when any one of the following is - * greater than the given timestamp: + * greater than the given timestamp or during recovery if the newest transaction id on the page + * is greater than or equal to recovered checkpoint snapshot min: * 1. The reconciled replace page max durable timestamp. * 2. The reconciled multi page max durable timestamp. * 3. The on page address max durable timestamp. @@ -775,17 +874,22 @@ __rollback_page_needs_abort( __wt_cell_unpack_addr(session, ref->home->dsk, (WT_CELL *)addr, &vpack); durable_ts = __rollback_get_ref_max_durable_timestamp(session, &vpack.ta); prepared = vpack.ta.prepare; - result = (durable_ts > rollback_timestamp) || prepared; + newest_txn = vpack.ta.newest_txn; + result = (durable_ts > rollback_timestamp) || prepared || + WT_CHECK_RECOVERY_FLAG_TXNID_CKPT_SNAPMIN(session, newest_txn); } else if (addr != NULL) { tag = "address"; durable_ts = __rollback_get_ref_max_durable_timestamp(session, &addr->ta); prepared = addr->ta.prepare; - result = (durable_ts > rollback_timestamp) || prepared; + newest_txn = addr->ta.newest_txn; + result = (durable_ts > rollback_timestamp) || prepared || + WT_CHECK_RECOVERY_FLAG_TXNID_CKPT_SNAPMIN(session, newest_txn); } __wt_verbose(session, WT_VERB_RECOVERY_RTS(session), - "%p: page with %s durable timestamp: %s and prepared updates: %s", (void *)ref, tag, - __wt_timestamp_to_string(durable_ts, ts_string), prepared ? "true" : "false"); + "%p: page with %s durable timestamp: %s, newest txn: %" PRIu64 " and prepared updates: %s", + (void *)ref, tag, __wt_timestamp_to_string(durable_ts, ts_string), newest_txn, + prepared ? "true" : "false"); return (result); } @@ -904,6 +1008,9 @@ __rollback_to_stable_btree_walk(WT_SESSION_IMPL *session, wt_timestamp_t rollbac WT_DECL_RET; WT_REF *child_ref, *ref; + /* Set this flag to return error instead of panic if file is corrupted. */ + F_SET(session, WT_SESSION_QUIET_CORRUPT_FILE); + /* Walk the tree, marking commits aborted where appropriate. */ ref = NULL; while ((ret = __wt_tree_walk_custom_skip(session, &ref, __wt_rts_page_skip, &rollback_timestamp, @@ -917,6 +1024,7 @@ __rollback_to_stable_btree_walk(WT_SESSION_IMPL *session, wt_timestamp_t rollbac } else WT_RET(__rollback_abort_newer_updates(session, ref, rollback_timestamp)); + F_CLR(session, WT_SESSION_QUIET_CORRUPT_FILE); return (ret); } @@ -1151,12 +1259,14 @@ __rollback_to_stable_btree_apply(WT_SESSION_IMPL *session) WT_TXN_GLOBAL *txn_global; wt_timestamp_t max_durable_ts, newest_start_durable_ts, newest_stop_durable_ts, rollback_timestamp; + uint64_t rollback_txnid; size_t addr_size; char ts_string[2][WT_TS_INT_STRING_SIZE]; const char *config, *uri; - bool durable_ts_found, prepared_updates; + bool durable_ts_found, prepared_updates, has_txn_updates_gt_than_ckpt_snap; txn_global = &S2C(session)->txn_global; + rollback_txnid = 0; addr_size = 0; /* @@ -1173,6 +1283,13 @@ __rollback_to_stable_btree_apply(WT_SESSION_IMPL *session) WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SCHEMA)); WT_RET(__wt_metadata_cursor(session, &cursor)); + if (F_ISSET(S2C(session), WT_CONN_RECOVERING)) + __wt_verbose(session, WT_VERB_RECOVERY_RTS(session), + "Recovered checkpoint snapshot min: %" PRIu64 ", snapshot max: %" PRIu64 + ", snapshot count: %" PRIu32, + S2C(session)->recovery_ckpt_snap_min, S2C(session)->recovery_ckpt_snap_max, + S2C(session)->recovery_ckpt_snapshot_count); + while ((ret = cursor->next(cursor)) == 0) { WT_ERR(cursor->get_key(cursor, &uri)); @@ -1187,7 +1304,7 @@ __rollback_to_stable_btree_apply(WT_SESSION_IMPL *session) /* Find out the max durable timestamp of the object from checkpoint. */ newest_start_durable_ts = newest_stop_durable_ts = WT_TS_NONE; - durable_ts_found = prepared_updates = false; + durable_ts_found = prepared_updates = has_txn_updates_gt_than_ckpt_snap = false; WT_ERR(__wt_config_getones(session, config, "checkpoint", &cval)); __wt_config_subinit(session, &ckptconf, &cval); for (; __wt_config_next(&ckptconf, &key, &cval) == 0;) { @@ -1210,12 +1327,22 @@ __rollback_to_stable_btree_apply(WT_SESSION_IMPL *session) prepared_updates = true; } WT_ERR_NOTFOUND_OK(ret, false); + ret = __wt_config_subgets(session, &cval, "newest_txn", &value); + if (value.len != 0) + rollback_txnid = (uint64_t)value.val; + WT_ERR_NOTFOUND_OK(ret, false); ret = __wt_config_subgets(session, &cval, "addr", &value); if (ret == 0) addr_size = value.len; WT_ERR_NOTFOUND_OK(ret, false); } max_durable_ts = WT_MAX(newest_start_durable_ts, newest_stop_durable_ts); + has_txn_updates_gt_than_ckpt_snap = + WT_CHECK_RECOVERY_FLAG_TXNID_CKPT_SNAPMIN(session, rollback_txnid); + + /* Increment the inconsistent checkpoint stats counter. */ + if (has_txn_updates_gt_than_ckpt_snap) + WT_STAT_CONN_DATA_INCR(session, txn_rts_inconsistent_ckpt); /* * The rollback to stable will skip the tables during recovery and shutdown in the following @@ -1257,15 +1384,18 @@ __rollback_to_stable_btree_apply(WT_SESSION_IMPL *session) * 1. The tree is modified. * 2. The checkpoint durable start/stop timestamp is greater than the rollback timestamp. * 3. There is no durable timestamp in any checkpoint. + * 4. The checkpoint newest txn is greater than snapshot min txn id */ if (S2BT(session)->modified || max_durable_ts > rollback_timestamp || prepared_updates || - !durable_ts_found) { + !durable_ts_found || has_txn_updates_gt_than_ckpt_snap) { __wt_verbose(session, WT_VERB_RECOVERY_RTS(session), "tree rolled back with durable timestamp: %s, or when tree is modified: %s or " - "prepared updates: %s or when durable time is not found: %s", + "prepared updates: %s or when durable time is not found: %s or txnid is greater than " + "recovery checkpoint snap min: %s", __wt_timestamp_to_string(max_durable_ts, ts_string[0]), S2BT(session)->modified ? "true" : "false", prepared_updates ? "true" : "false", - !durable_ts_found ? "true" : "false"); + !durable_ts_found ? "true" : "false", + has_txn_updates_gt_than_ckpt_snap ? "true" : "false"); WT_TRET(__rollback_to_stable_btree(session, rollback_timestamp)); } else __wt_verbose(session, WT_VERB_RECOVERY_RTS(session), @@ -1287,6 +1417,14 @@ __rollback_to_stable_btree_apply(WT_SESSION_IMPL *session) WT_TRET(__rollback_to_stable_btree_hs_truncate(session, S2BT(session)->id)); WT_TRET(__wt_session_release_dhandle(session)); + + /* + * Continue when the table is corrupted and proceed to perform rollback to stable on other + * tables. + */ + if (ret == WT_ERROR && F_ISSET(S2C(session), WT_CONN_DATA_CORRUPTION)) + continue; + WT_ERR(ret); } WT_ERR_NOTFOUND_OK(ret, false); diff --git a/src/third_party/wiredtiger/src/utilities/util_list.c b/src/third_party/wiredtiger/src/utilities/util_list.c index 13a3577745f..1c9cae21bbf 100644 --- a/src/third_party/wiredtiger/src/utilities/util_list.c +++ b/src/third_party/wiredtiger/src/utilities/util_list.c @@ -88,6 +88,12 @@ list_init_block(WT_SESSION *session, const char *key, WT_BLOCK *block) wt_api = session->connection->get_extension_api(session->connection); if ((ret = wt_api->metadata_search(wt_api, session, key, &config)) != 0) WT_ERR(util_err(session, ret, "%s: WT_EXTENSION_API.metadata_search", key)); + /* + * The config variable should be set and not NULL, but Coverity is convinced otherwise. This is + * an infrequent code path. Just add this extra conditional to make it happy. + */ + if (config == NULL) + goto err; if ((ret = wt_api->config_parser_open(wt_api, session, config, strlen(config), &parser)) != 0) WT_ERR(util_err(session, ret, "WT_EXTENSION_API.config_parser_open")); if ((ret = parser->get(parser, "allocation_size", &cval)) == 0) diff --git a/src/third_party/wiredtiger/test/format/config.c b/src/third_party/wiredtiger/test/format/config.c index 13d91b793e3..46b475b219d 100644 --- a/src/third_party/wiredtiger/test/format/config.c +++ b/src/third_party/wiredtiger/test/format/config.c @@ -784,25 +784,11 @@ config_lsm_reset(void) config_single("transaction.timestamps=off", false); } - /* - * LSM does not work with block-based incremental backup, change the incremental backup - * mechanism if block based in configured. - */ + /* LSM may not work with backups, turn off backups if lsm is configured. */ if (g.c_backups) { - if (config_is_perm("backup.incremental") && g.c_backup_incr_flag == INCREMENTAL_BLOCK) - testutil_die(EINVAL, "LSM does not work with backup.incremental=block configuration."); - - if (g.c_backup_incr_flag == INCREMENTAL_BLOCK) - switch (mmrand(NULL, 1, 2)) { - case 1: - /* 50% */ - config_single("backup.incremental=off", false); - break; - case 2: - /* 50% */ - config_single("backup.incremental=log", false); - break; - } + if (config_is_perm("backup")) + testutil_die(EINVAL, "LSM is incompatible with backup configurations"); + config_single("backup=off", false); } } diff --git a/src/third_party/wiredtiger/test/suite/test_checkpoint_snapshot02.py b/src/third_party/wiredtiger/test/suite/test_checkpoint_snapshot02.py new file mode 100644 index 00000000000..0f705301bcd --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_checkpoint_snapshot02.py @@ -0,0 +1,147 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2020 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import fnmatch, os, shutil, threading, time +from wtthread import checkpoint_thread, op_thread +from helper import copy_wiredtiger_home +import wiredtiger, wttest +from wtdataset import SimpleDataSet +from wtscenario import make_scenarios +from wiredtiger import stat + +# test_checkpoint_snapshot02.py +# This test is to run checkpoint and eviction in parallel with timing +# stress for checkpoint and let eviction write more data than checkpoint. +# + +def timestamp_str(t): + return '%x' % t +class test_checkpoint_snapshot02(wttest.WiredTigerTestCase): + + # Create a table. + uri = "table:test_checkpoint_snapshot02" + nrows = 1000 + + def conn_config(self): + config = 'cache_size=5MB,statistics=(all),statistics_log=(json,on_close,wait=1),log=(enabled=true),timing_stress_for_test=[checkpoint_slow]' + return config + + def large_updates(self, uri, value, ds, nrows): + # Update a large number of records. + session = self.session + cursor = session.open_cursor(uri) + for i in range(0, nrows): + session.begin_transaction() + cursor[ds.key(i)] = value + session.commit_transaction() + cursor.close() + + def check(self, check_value, uri, nrows): + session = self.session + session.begin_transaction() + cursor = session.open_cursor(uri) + count = 0 + for k, v in cursor: + self.assertEqual(v, check_value) + count += 1 + session.commit_transaction() + self.assertEqual(count, nrows) + + def test_checkpoint_snapshot(self): + + ds = SimpleDataSet(self, self.uri, 0, key_format="S", value_format="S",config='log=(enabled=false)') + ds.populate() + valuea = "aaaaa" * 100 + valueb = "bbbbb" * 100 + valuec = "ccccc" * 100 + valued = "ddddd" * 100 + + cursor = self.session.open_cursor(self.uri) + self.large_updates(self.uri, valuea, ds, self.nrows) + + self.check(valuea, self.uri, self.nrows) + + self.session.begin_transaction() + + # Create a checkpoint thread + done = threading.Event() + ckpt = checkpoint_thread(self.conn, done) + try: + ckpt.start() + + # Check for the value to wait for checkpoint to start. + cursor = self.session.open_cursor(self.uri) + count = 0 + for k, v in cursor: + self.assertEqual(v, valuea) + count += 1 + self.assertEqual(count, self.nrows) + + # Insert some data from the transaction which is running before + # checkpoint started + for i in range(0, self.nrows): + cursor.set_key(ds.key(i)) + cursor.set_value(valueb) + self.assertEqual(cursor.insert(), 0) + self.session.commit_transaction() + + self.large_updates(self.uri, valuec, ds, self.nrows) + self.large_updates(self.uri, valued, ds, self.nrows) + + finally: + done.set() + ckpt.join() + + #Simulate a crash by copying to a new directory(RESTART). + copy_wiredtiger_home(".", "RESTART") + + # Open the new directory. + self.conn = self.setUpConnectionOpen("RESTART") + self.session = self.setUpSessionOpen(self.conn) + + # Check the table contains the last checkpointed value. + self.check(valuea, self.uri, self.nrows) + + stat_cursor = self.session.open_cursor('statistics:', None, None) + inconsistent_ckpt = stat_cursor[stat.conn.txn_rts_inconsistent_ckpt][2] + hs_removed = stat_cursor[stat.conn.txn_rts_hs_removed][2] + keys_removed = stat_cursor[stat.conn.txn_rts_keys_removed][2] + keys_restored = stat_cursor[stat.conn.txn_rts_keys_restored][2] + pages_visited = stat_cursor[stat.conn.txn_rts_pages_visited][2] + upd_aborted = stat_cursor[stat.conn.txn_rts_upd_aborted][2] + stat_cursor.close() + + self.assertGreater(inconsistent_ckpt, 0) + self.assertGreater(hs_removed, 0) + self.assertEqual(upd_aborted, 0) + self.assertEqual(keys_removed, 0) + self.assertEqual(keys_restored, 0) + self.assertGreaterEqual(pages_visited, 0) + +if __name__ == '__main__': + wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_rollback_to_stable05.py b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable05.py index f3c93509d63..7cfc3ba2fe7 100755 --- a/src/third_party/wiredtiger/test/suite/test_rollback_to_stable05.py +++ b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable05.py @@ -133,12 +133,11 @@ class test_rollback_to_stable05(test_rollback_to_stable_base): self.assertEqual(calls, 1) self.assertEqual(keys_removed, 0) self.assertEqual(keys_restored, 0) + self.assertGreaterEqual(pages_visited, 0) if self.in_memory: - self.assertGreaterEqual(pages_visited, 0) self.assertEqual(upd_aborted, 0) self.assertEqual(hs_removed, 0) else: - self.assertEqual(pages_visited, 0) self.assertEqual(upd_aborted, 0) self.assertEqual(hs_removed, 0) diff --git a/src/third_party/wiredtiger/test/suite/test_rollback_to_stable12.py b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable12.py index 4ac28066596..631213e665b 100755 --- a/src/third_party/wiredtiger/test/suite/test_rollback_to_stable12.py +++ b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable12.py @@ -143,7 +143,7 @@ class test_rollback_to_stable12(test_rollback_to_stable_base): self.assertGreater(pages_visited, 0) self.assertGreaterEqual(hs_removed, 0) self.assertEqual(hs_sweep, 0) - self.assertGreater(pages_walk_skipped, 0) + self.assertGreaterEqual(pages_walk_skipped, 0) if __name__ == '__main__': wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_tiered01.py b/src/third_party/wiredtiger/test/suite/test_tiered01.py index 2a41c3ff7ef..9a7066fd708 100644 --- a/src/third_party/wiredtiger/test/suite/test_tiered01.py +++ b/src/third_party/wiredtiger/test/suite/test_tiered01.py @@ -71,5 +71,12 @@ class test_tiered01(wttest.WiredTigerTestCase): # self.session.drop(self.uri) + # It is an error to configure a tiered table with no tiers + def test_no_tiers(self): + msg = '/tiered table must specify at least one tier/' + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.create(self.uri, 'type=tiered,key_format=S,tiered=(tiers=())'), + msg) + if __name__ == '__main__': wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_tiered02.py b/src/third_party/wiredtiger/test/suite/test_tiered02.py new file mode 100644 index 00000000000..17eb3073c39 --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_tiered02.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2021 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import wiredtiger, wtscenario, wttest +from wtdataset import SimpleDataSet + +# test_tiered02.py +# Test block-log-structured tree configuration options. +class test_tiered02(wttest.WiredTigerTestCase): + K = 1024 + M = 1024 * K + G = 1024 * M + uri = "file:test_tiered02" + + # Occasionally add a lot of records, so that merges (and bloom) happen. + record_count_scenarios = wtscenario.quick_scenarios( + 'nrecs', [10, 10000], [0.9, 0.1]) + + scenarios = wtscenario.make_scenarios(record_count_scenarios, prune=100, prunelong=500) + + # Test drop of an object. + def test_tiered(self): + args = 'key_format=S,block_allocation=log-structured' + self.verbose(3, + 'Test log-structured allocation with config: ' + args + ' count: ' + str(self.nrecs)) + #ds = SimpleDataSet(self, self.uri, self.nrecs, config=args) + ds = SimpleDataSet(self, self.uri, 10, config=args) + ds.populate() + self.session.checkpoint() + ds = SimpleDataSet(self, self.uri, 10000, config=args) + ds.populate() + + self.reopen_conn() + ds = SimpleDataSet(self, self.uri, 1000, config=args) + ds.populate() + +if __name__ == '__main__': + wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_tiered03.py b/src/third_party/wiredtiger/test/suite/test_tiered03.py new file mode 100644 index 00000000000..624387c21a3 --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_tiered03.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2021 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import os, re +import wiredtiger, wtscenario, wttest +from wtdataset import SimpleDataSet + +# test_tiered03.py +# Test block-log-structured tree configuration options. +class test_tiered03(wttest.WiredTigerTestCase): + K = 1024 + M = 1024 * K + G = 1024 * M + uri = 'file:test_tiered03' + + # Occasionally add a lot of records, so that merges (and bloom) happen. + record_count_scenarios = wtscenario.quick_scenarios( + 'nrecs', [10, 10000], [0.9, 0.1]) + + scenarios = wtscenario.make_scenarios(record_count_scenarios, prune=100, prunelong=500) + + # Test sharing data between a primary and a secondary + def test_sharing(self): + args = 'block_allocation=log-structured' + self.verbose(3, + 'Test log-structured allocation with config: ' + args + ' count: ' + str(self.nrecs)) + ds = SimpleDataSet(self, self.uri, 10, config=args) + ds.populate() + ds.check() + self.session.checkpoint() + ds.check() + + # Create a secondary database + dir2 = os.path.join(self.home, 'SECONDARY') + os.mkdir(dir2) + conn2 = self.setUpConnectionOpen(dir2) + session2 = conn2.open_session() + + # Reference the tree from the secondary: + metac = self.session.open_cursor('metadata:') + metac2 = session2.open_cursor('metadata:', None, 'readonly=0') + uri2 = self.uri[:5] + '../' + self.uri[5:] + metac2[uri2] = metac[self.uri] + ",readonly=1" + + cursor2 = session2.open_cursor(uri2) + ds.check_cursor(cursor2) + cursor2.close() + + newds = SimpleDataSet(self, self.uri, 10000, config=args) + newds.populate() + newds.check() + self.session.checkpoint() + newds.check() + + # Check we can still read from the last checkpoint + cursor2 = session2.open_cursor(uri2) + ds.check_cursor(cursor2) + cursor2.close() + + # Bump to new checkpoint + origmeta = metac[self.uri] + checkpoint = re.search(r',checkpoint=\(.+?\)\)', origmeta).group(0)[1:] + self.pr('Orig checkpoint: ' + checkpoint) + session2.alter(uri2, checkpoint) + self.pr('New metadata on secondaery: ' + metac2[uri2]) + + # Check that we can see the new data + cursor2 = session2.open_cursor(uri2) + newds.check_cursor(cursor2) + +if __name__ == '__main__': + wttest.run() |