diff options
16 files changed, 803 insertions, 51 deletions
diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py index d6b8279bc37..35a735442c8 100644 --- a/src/third_party/wiredtiger/dist/api_data.py +++ b/src/third_party/wiredtiger/dist/api_data.py @@ -1143,6 +1143,12 @@ wiredtiger_open_common =\ wiredtiger_open_log_configuration +\ wiredtiger_open_tiered_storage_configuration +\ wiredtiger_open_statistics_log_configuration + [ + Config('backup_restore_target', '', r''' + If non-empty and restoring from a backup, restore only the table object targets listed. + WiredTiger will remove all the metadata entries for the tables that are not listed in th + list from the reconstructed metadata. The target list must include URIs matching of type + table:''', + type='list'), Config('buffer_alignment', '-1', r''' in-memory alignment (in bytes) for buffers used for I/O. The default value of -1 indicates a platform-specific alignment value diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index 95ba62293be..125345c11e8 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -2,5 +2,5 @@ "vendor": "wiredtiger", "github": "wiredtiger/wiredtiger.git", "branch": "mongodb-master", - "commit": "a5d55eec2229a8da1233103d117871a7f51e1df8" + "commit": "746e435bb142b8ae482be38f8a7ed7f4a0180a96" } diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c index 18b5df9b210..f771e55b661 100644 --- a/src/third_party/wiredtiger/src/config/config_def.c +++ b/src/third_party/wiredtiger/src/config/config_def.c @@ -846,6 +846,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_transaction_sync_subconfigs {NULL, NULL, NULL, NULL, NULL, 0}}; static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = { + {"backup_restore_target", "list", NULL, NULL, NULL, 0}, {"block_cache", "category", NULL, NULL, confchk_wiredtiger_open_block_cache_subconfigs, 12}, {"buffer_alignment", "int", NULL, "min=-1,max=1MB", NULL, 0}, {"builtin_extension_config", "string", NULL, NULL, NULL, 0}, @@ -928,6 +929,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = { {NULL, NULL, NULL, NULL, NULL, 0}}; static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = { + {"backup_restore_target", "list", NULL, NULL, NULL, 0}, {"block_cache", "category", NULL, NULL, confchk_wiredtiger_open_block_cache_subconfigs, 12}, {"buffer_alignment", "int", NULL, "min=-1,max=1MB", NULL, 0}, {"builtin_extension_config", "string", NULL, NULL, NULL, 0}, @@ -1010,6 +1012,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = { {NULL, NULL, NULL, NULL, NULL, 0}}; static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = { + {"backup_restore_target", "list", NULL, NULL, NULL, 0}, {"block_cache", "category", NULL, NULL, confchk_wiredtiger_open_block_cache_subconfigs, 12}, {"buffer_alignment", "int", NULL, "min=-1,max=1MB", NULL, 0}, {"builtin_extension_config", "string", NULL, NULL, NULL, 0}, @@ -1088,6 +1091,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = { {NULL, NULL, NULL, NULL, NULL, 0}}; static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = { + {"backup_restore_target", "list", NULL, NULL, NULL, 0}, {"block_cache", "category", NULL, NULL, confchk_wiredtiger_open_block_cache_subconfigs, 12}, {"buffer_alignment", "int", NULL, "min=-1,max=1MB", NULL, 0}, {"builtin_extension_config", "string", NULL, NULL, NULL, 0}, @@ -1462,6 +1466,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator", "version=(major=0,minor=0),write_timestamp_usage=none", confchk_tiered_meta, 52}, {"wiredtiger_open", + "backup_restore_target=," "block_cache=(blkcache_eviction_aggression=1800," "cache_on_checkpoint=true,cache_on_writes=true,enabled=false," "full_target=95,hashsize=0,max_percent_overhead=10,nvram_path=," @@ -1500,8 +1505,9 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator", "transaction_sync=(enabled=false,method=fsync)," "use_environment=true,use_environment_priv=false,verbose=[]," "verify_metadata=false,write_through=", - confchk_wiredtiger_open, 58}, + confchk_wiredtiger_open, 59}, {"wiredtiger_open_all", + "backup_restore_target=," "block_cache=(blkcache_eviction_aggression=1800," "cache_on_checkpoint=true,cache_on_writes=true,enabled=false," "full_target=95,hashsize=0,max_percent_overhead=10,nvram_path=," @@ -1540,8 +1546,9 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator", "transaction_sync=(enabled=false,method=fsync)," "use_environment=true,use_environment_priv=false,verbose=[]," "verify_metadata=false,version=(major=0,minor=0),write_through=", - confchk_wiredtiger_open_all, 59}, + confchk_wiredtiger_open_all, 60}, {"wiredtiger_open_basecfg", + "backup_restore_target=," "block_cache=(blkcache_eviction_aggression=1800," "cache_on_checkpoint=true,cache_on_writes=true,enabled=false," "full_target=95,hashsize=0,max_percent_overhead=10,nvram_path=," @@ -1578,8 +1585,9 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator", "object_target_size=10M),timing_stress_for_test=," "transaction_sync=(enabled=false,method=fsync),verbose=[]," "verify_metadata=false,version=(major=0,minor=0),write_through=", - confchk_wiredtiger_open_basecfg, 53}, + confchk_wiredtiger_open_basecfg, 54}, {"wiredtiger_open_usercfg", + "backup_restore_target=," "block_cache=(blkcache_eviction_aggression=1800," "cache_on_checkpoint=true,cache_on_writes=true,enabled=false," "full_target=95,hashsize=0,max_percent_overhead=10,nvram_path=," @@ -1616,7 +1624,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator", "object_target_size=10M),timing_stress_for_test=," "transaction_sync=(enabled=false,method=fsync),verbose=[]," "verify_metadata=false,write_through=", - confchk_wiredtiger_open_usercfg, 52}, + confchk_wiredtiger_open_usercfg, 53}, {NULL, NULL, NULL, 0}}; int diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c index 78ee338f34e..799b199bfb2 100644 --- a/src/third_party/wiredtiger/src/conn/conn_api.c +++ b/src/third_party/wiredtiger/src/conn/conn_api.c @@ -2980,7 +2980,7 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, const char *c */ WT_ERR(__wt_config_gets(session, cfg, "verify_metadata", &cval)); verify_meta = cval.val; - WT_ERR(__wt_turtle_init(session, verify_meta)); + WT_ERR(__wt_turtle_init(session, verify_meta, cfg)); /* Verify the metadata file. */ if (verify_meta) { @@ -3045,6 +3045,15 @@ err: __wt_scr_discard(session); __wt_scr_discard(&conn->dummy_session); + /* + * Clean up the partial backup restore flag, backup btree id list. The backup id list was used + * in recovery to truncate the history store entries and the flag was used to allow schema drops + * to happen on tables to clean up the entries in the creation of the metadata file. + */ + F_CLR(conn, WT_CONN_BACKUP_PARTIAL_RESTORE); + if (conn->partial_backup_remove_ids != NULL) + __wt_free(session, conn->partial_backup_remove_ids); + if (ret != 0) { /* * Set panic if we're returning the run recovery error or if recovery did not complete so diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h index b681d54c471..a22be3d578d 100644 --- a/src/third_party/wiredtiger/src/include/connection.h +++ b/src/third_party/wiredtiger/src/include/connection.h @@ -211,6 +211,18 @@ struct __wt_name_flag { } while (0) /* + * WT_BACKUP_TARGET -- + * A target URI entry indicating this URI should be restored during a partial backup. + */ +struct __wt_backup_target { + const char *name; /* File name */ + + uint64_t name_hash; /* hash of name */ + TAILQ_ENTRY(__wt_backup_target) hashq; /* internal hash queue */ +}; +typedef TAILQ_HEAD(__wt_backuphash, __wt_backup_target) WT_BACKUPHASH; + +/* * WT_CONNECTION_IMPL -- * Implementation of WT_CONNECTION */ @@ -338,6 +350,7 @@ struct __wt_connection_impl { WT_RWLOCK hot_backup_lock; /* Hot backup serialization */ uint64_t hot_backup_start; /* Clock value of most recent checkpoint needed by hot backup */ char **hot_backup_list; /* Hot backup file list */ + uint32_t *partial_backup_remove_ids; /* Remove btree id list for partial backup */ WT_SESSION_IMPL *ckpt_session; /* Checkpoint thread session */ wt_thread_t ckpt_tid; /* Checkpoint thread */ @@ -618,28 +631,29 @@ struct __wt_connection_impl { uint32_t server_flags; /* AUTOMATIC FLAG VALUE GENERATION START 0 */ -#define WT_CONN_CACHE_CURSORS 0x000001u -#define WT_CONN_CACHE_POOL 0x000002u -#define WT_CONN_CKPT_GATHER 0x000004u -#define WT_CONN_CKPT_SYNC 0x000008u -#define WT_CONN_CLOSING 0x000010u -#define WT_CONN_CLOSING_CHECKPOINT 0x000020u -#define WT_CONN_CLOSING_NO_MORE_OPENS 0x000040u -#define WT_CONN_COMPATIBILITY 0x000080u -#define WT_CONN_DATA_CORRUPTION 0x000100u -#define WT_CONN_EVICTION_RUN 0x000200u -#define WT_CONN_HS_OPEN 0x000400u -#define WT_CONN_INCR_BACKUP 0x000800u -#define WT_CONN_IN_MEMORY 0x001000u -#define WT_CONN_LEAK_MEMORY 0x002000u -#define WT_CONN_LSM_MERGE 0x004000u -#define WT_CONN_OPTRACK 0x008000u -#define WT_CONN_PANIC 0x010000u -#define WT_CONN_READONLY 0x020000u -#define WT_CONN_RECONFIGURING 0x040000u -#define WT_CONN_RECOVERING 0x080000u -#define WT_CONN_SALVAGE 0x100000u -#define WT_CONN_WAS_BACKUP 0x200000u +#define WT_CONN_BACKUP_PARTIAL_RESTORE 0x000001u +#define WT_CONN_CACHE_CURSORS 0x000002u +#define WT_CONN_CACHE_POOL 0x000004u +#define WT_CONN_CKPT_GATHER 0x000008u +#define WT_CONN_CKPT_SYNC 0x000010u +#define WT_CONN_CLOSING 0x000020u +#define WT_CONN_CLOSING_CHECKPOINT 0x000040u +#define WT_CONN_CLOSING_NO_MORE_OPENS 0x000080u +#define WT_CONN_COMPATIBILITY 0x000100u +#define WT_CONN_DATA_CORRUPTION 0x000200u +#define WT_CONN_EVICTION_RUN 0x000400u +#define WT_CONN_HS_OPEN 0x000800u +#define WT_CONN_INCR_BACKUP 0x001000u +#define WT_CONN_IN_MEMORY 0x002000u +#define WT_CONN_LEAK_MEMORY 0x004000u +#define WT_CONN_LSM_MERGE 0x008000u +#define WT_CONN_OPTRACK 0x010000u +#define WT_CONN_PANIC 0x020000u +#define WT_CONN_READONLY 0x040000u +#define WT_CONN_RECONFIGURING 0x080000u +#define WT_CONN_RECOVERING 0x100000u +#define WT_CONN_SALVAGE 0x200000u +#define WT_CONN_WAS_BACKUP 0x400000u /* AUTOMATIC FLAG VALUE GENERATION STOP 32 */ uint32_t flags; }; diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index 0c3c27fe9f0..bba54392137 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -1523,7 +1523,7 @@ extern int __wt_try_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_turtle_exists(WT_SESSION_IMPL *session, bool *existp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_turtle_init(WT_SESSION_IMPL *session, bool verify_meta) +extern int __wt_turtle_init(WT_SESSION_IMPL *session, bool verify_meta, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in index b402a564eb7..09ebb18a72b 100644 --- a/src/third_party/wiredtiger/src/include/wiredtiger.in +++ b/src/third_party/wiredtiger/src/include/wiredtiger.in @@ -2760,6 +2760,10 @@ struct __wt_connection { * event handler is installed that writes error messages to stderr. See * @ref event_message_handling for more information. * @configstart{wiredtiger_open, see dist/api_data.py} + * @config{backup_restore_target, If non-empty and restoring from a backup\, restore only the table + * object targets listed. WiredTiger will remove all the metadata entries for the tables that are + * not listed in th list from the reconstructed metadata. The target list must include URIs + * matching of type table:., a list of strings; default empty.} * @config{block_cache = (, block cache configuration options., a set of related configuration * options defined below.} * @config{ blkcache_eviction_aggression, seconds an diff --git a/src/third_party/wiredtiger/src/include/wt_internal.h b/src/third_party/wiredtiger/src/include/wt_internal.h index f5871dbf788..1fa544ab841 100644 --- a/src/third_party/wiredtiger/src/include/wt_internal.h +++ b/src/third_party/wiredtiger/src/include/wt_internal.h @@ -69,6 +69,8 @@ struct __wt_addr; typedef struct __wt_addr WT_ADDR; struct __wt_addr_copy; typedef struct __wt_addr_copy WT_ADDR_COPY; +struct __wt_backup_target; +typedef struct __wt_backup_target WT_BACKUP_TARGET; struct __wt_blkcache; typedef struct __wt_blkcache WT_BLKCACHE; struct __wt_blkcache_item; diff --git a/src/third_party/wiredtiger/src/meta/meta_turtle.c b/src/third_party/wiredtiger/src/meta/meta_turtle.c index 85321e59795..0fb5bdb4436 100644 --- a/src/third_party/wiredtiger/src/meta/meta_turtle.c +++ b/src/third_party/wiredtiger/src/meta/meta_turtle.c @@ -53,18 +53,55 @@ __metadata_init(WT_SESSION_IMPL *session) } /* + * __metadata_backup_target_uri_search -- + * Search in the backup uri hash table if the given uri exists. + */ +static bool +__metadata_backup_target_uri_search( + WT_SESSION_IMPL *session, WT_BACKUPHASH *backuphash, const char *uri) +{ + WT_BACKUP_TARGET *target_uri; + WT_CONNECTION_IMPL *conn; + uint64_t bucket, hash; + bool found; + + conn = S2C(session); + found = false; + + hash = __wt_hash_city64(uri, strlen(uri)); + bucket = hash & (conn->hash_size - 1); + + TAILQ_FOREACH (target_uri, &backuphash[bucket], hashq) + if (strcmp(uri, target_uri->name) == 0) { + found = true; + break; + } + return (found); +} + +/* * __metadata_load_hot_backup -- * Load the contents of any hot backup file. */ static int -__metadata_load_hot_backup(WT_SESSION_IMPL *session) +__metadata_load_hot_backup(WT_SESSION_IMPL *session, WT_BACKUPHASH *backuphash) { + WT_CONFIG_ITEM cval; + WT_CONNECTION_IMPL *conn; WT_DECL_ITEM(key); WT_DECL_ITEM(value); WT_DECL_RET; WT_FSTREAM *fs; + size_t allocated_name, file_len, max_len, slot; + char *filename, *metadata_conf, *metadata_key, **p, **partial_backup_names, *tablename; + const char *drop_cfg[] = {WT_CONFIG_BASE(session, WT_SESSION_drop), "remove_files=false", NULL}; bool exist; + allocated_name = file_len = max_len = slot = 0; + conn = S2C(session); + filename = NULL; + partial_backup_names = NULL; + /* Look for a hot backup file: if we find it, load it. */ WT_RET(__wt_fs_exist(session, WT_METADATA_BACKUP, &exist)); if (!exist) @@ -81,12 +118,84 @@ __metadata_load_hot_backup(WT_SESSION_IMPL *session) WT_ERR(__wt_getline(session, fs, value)); if (value->size == 0) WT_ERR_PANIC(session, EINVAL, "%s: zero-length value", WT_METADATA_BACKUP); + /* + * When performing partial backup restore, generate a list of tables that is not part of the + * target uri list so that we can drop all entries later. To do this, parse through all the + * table metadata entries and check if the metadata entry exists in the target uri hash + * table. If the metadata entry doesn't exist in the hash table, append the table name to + * the partial backup remove list. + */ + metadata_key = (char *)key->data; + if (F_ISSET(conn, WT_CONN_BACKUP_PARTIAL_RESTORE) && + WT_PREFIX_MATCH(metadata_key, "table:")) { + /* Assert that there should be no WiredTiger tables with a table format. */ + WT_ASSERT( + session, __wt_name_check(session, (const char *)key->data, key->size, true) == 0); + /* + * The target uri will be the deciding factor if a specific metadata table entry needs + * to be dropped. If the metadata table entry does not exist in the target uri hash + * table, append the metadata key to the backup remove list. + */ + if (__metadata_backup_target_uri_search(session, backuphash, metadata_key) == false) { + if (key->size > max_len) + max_len = key->size; + WT_ERR(__wt_realloc_def(session, &allocated_name, slot + 2, &partial_backup_names)); + p = &partial_backup_names[slot]; + p[0] = p[1] = NULL; + + WT_ERR( + __wt_strndup(session, (char *)key->data, key->size, &partial_backup_names[slot])); + slot++; + } + } + + /* + * In the case of partial backup restore, add the entry to the metadata even if the table + * entry doesn't exist so that we can correctly drop all related entries via the schema code + * later. + */ WT_ERR(__wt_metadata_update(session, key->data, value->data)); } - F_SET(S2C(session), WT_CONN_WAS_BACKUP); + F_SET(conn, WT_CONN_WAS_BACKUP); + if (F_ISSET(conn, WT_CONN_BACKUP_PARTIAL_RESTORE) && partial_backup_names != NULL) { + WT_ERR(__wt_calloc_def(session, slot + 1, &conn->partial_backup_remove_ids)); + file_len = strlen("file:") + max_len + strlen(".wt") + 1; + WT_ERR(__wt_calloc_def(session, file_len, &filename)); + /* + * Parse through the partial backup list and attempt to clean up all metadata references + * relating to the file. To do so, perform a schema drop operation on the table to cleanly + * remove all linked references. At the same time generate a list of btree ids to be used in + * recovery to truncate all the history store records. + */ + for (slot = 0; partial_backup_names[slot] != NULL; ++slot) { + tablename = partial_backup_names[slot]; + WT_PREFIX_SKIP_REQUIRED(session, tablename, "table:"); + WT_ERR(__wt_snprintf(filename, file_len, "file:%s.wt", tablename)); + WT_ERR(__wt_metadata_search(session, filename, &metadata_conf)); + WT_ERR(__wt_config_getones(session, metadata_conf, "id", &cval)); + conn->partial_backup_remove_ids[slot] = (uint32_t)cval.val; + + WT_WITH_SCHEMA_LOCK(session, + WT_WITH_TABLE_WRITE_LOCK( + session, ret = __wt_schema_drop(session, partial_backup_names[slot], drop_cfg))); + WT_ERR(ret); + } + } err: + if (filename != NULL) + __wt_free(session, filename); + + /* + * Free the partial backup names list. The backup id list is used in recovery to truncate the + * history store entries that do not exist as part of the database anymore. + */ + if (partial_backup_names != NULL) { + for (slot = 0; partial_backup_names[slot] != NULL; ++slot) + __wt_free(session, partial_backup_names[slot]); + __wt_free(session, partial_backup_names); + } WT_TRET(__wt_fclose(session, &fs)); __wt_scr_free(session, &key); __wt_scr_free(session, &value); @@ -219,18 +328,97 @@ __wt_turtle_exists(WT_SESSION_IMPL *session, bool *existp) } /* + * __metadata_add_backup_target_uri -- + * Add the target uri to the backup uri hash table. + */ +static int +__metadata_add_backup_target_uri( + WT_SESSION_IMPL *session, WT_BACKUPHASH *backuphash, const char *name, size_t len) +{ + WT_BACKUP_TARGET *new_target_uri; + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + uint64_t bucket, hash; + + conn = S2C(session); + + WT_ERR(__wt_calloc_one(session, &new_target_uri)); + WT_ERR(__wt_strndup(session, name, len, &new_target_uri->name)); + + hash = __wt_hash_city64(name, len); + bucket = hash & (conn->hash_size - 1); + new_target_uri->name_hash = hash; + /* Insert target uri entry into hashtable. */ + TAILQ_INSERT_HEAD(&backuphash[bucket], new_target_uri, hashq); + + return (0); +err: + if (new_target_uri != NULL) + __wt_free(session, new_target_uri->name); + __wt_free(session, new_target_uri); + + return (ret); +} + +/* + * __metadata_load_target_uri_list -- + * Load the list of target uris and construct a hashtable from it. + */ +static int +__metadata_load_target_uri_list( + WT_SESSION_IMPL *session, bool exist_backup, const char *cfg[], WT_BACKUPHASH *backuphash) +{ + WT_CONFIG backup_config; + WT_CONFIG_ITEM cval, k, v; + WT_DECL_RET; + + WT_TRET(__wt_config_gets(session, cfg, "backup_restore_target", &cval)); + if (cval.len != 0) { + if (!exist_backup) + WT_RET_MSG(session, EINVAL, + "restoring a partial backup requires the WiredTiger metadata backup file."); + F_SET(S2C(session), WT_CONN_BACKUP_PARTIAL_RESTORE); + + /* + * Check that the configuration string only has table schema formats in the target list and + * construct the target hash table. + */ + __wt_config_subinit(session, &backup_config, &cval); + while ((ret = __wt_config_next(&backup_config, &k, &v)) == 0) { + if (!WT_PREFIX_MATCH(k.str, "table:")) + WT_RET_MSG(session, EINVAL, + "partial backup restore only supports objects of type \"table\" formats in the " + "target uri list, found %.*s instead.", + (int)k.len, k.str); + WT_RET(__metadata_add_backup_target_uri(session, backuphash, (char *)k.str, k.len)); + } + WT_RET_NOTFOUND_OK(ret); + } + return (0); +} + +/* * __wt_turtle_init -- * Check the turtle file and create if necessary. */ int -__wt_turtle_init(WT_SESSION_IMPL *session, bool verify_meta) +__wt_turtle_init(WT_SESSION_IMPL *session, bool verify_meta, const char *cfg[]) { + WT_BACKUPHASH *backuphash; + WT_BACKUP_TARGET *target_uri; + WT_CONNECTION_IMPL *conn; WT_DECL_RET; + uint64_t i; char *metaconf, *unused_value; bool exist_backup, exist_incr, exist_isrc, exist_turtle; bool load, load_turtle, validate_turtle; + conn = S2C(session); load = load_turtle = validate_turtle = false; + /* Initialize target uri hashtable. */ + WT_ERR(__wt_calloc_def(session, conn->hash_size, &backuphash)); + for (i = 0; i < conn->hash_size; ++i) + TAILQ_INIT(&backuphash[i]); /* * Discard any turtle setup file left-over from previous runs. This doesn't matter for @@ -241,7 +429,7 @@ __wt_turtle_init(WT_SESSION_IMPL *session, bool verify_meta) /* If we're a readonly database, we can skip discarding the leftover file. */ if (ret == EACCES) ret = 0; - WT_RET(ret); + WT_ERR(ret); } /* @@ -256,23 +444,23 @@ __wt_turtle_init(WT_SESSION_IMPL *session, bool verify_meta) * turtle file and an incremental backup file, that is an error. Otherwise, if there's already a * turtle file, we're done. */ - WT_RET(__wt_fs_exist(session, WT_LOGINCR_BACKUP, &exist_incr)); - WT_RET(__wt_fs_exist(session, WT_LOGINCR_SRC, &exist_isrc)); - WT_RET(__wt_fs_exist(session, WT_METADATA_BACKUP, &exist_backup)); - WT_RET(__wt_fs_exist(session, WT_METADATA_TURTLE, &exist_turtle)); + WT_ERR(__wt_fs_exist(session, WT_LOGINCR_BACKUP, &exist_incr)); + WT_ERR(__wt_fs_exist(session, WT_LOGINCR_SRC, &exist_isrc)); + WT_ERR(__wt_fs_exist(session, WT_METADATA_BACKUP, &exist_backup)); + WT_ERR(__wt_fs_exist(session, WT_METADATA_TURTLE, &exist_turtle)); if (exist_turtle) { /* * Failure to read means a bad turtle file. Remove it and create a new turtle file. */ - if (F_ISSET(S2C(session), WT_CONN_SALVAGE)) { + if (F_ISSET(conn, WT_CONN_SALVAGE)) { WT_WITH_TURTLE_LOCK( session, ret = __wt_turtle_read(session, WT_METAFILE_URI, &unused_value)); __wt_free(session, unused_value); } if (ret != 0) { - WT_RET(__wt_remove_if_exists(session, WT_METADATA_TURTLE, false)); + WT_ERR(__wt_remove_if_exists(session, WT_METADATA_TURTLE, false)); load_turtle = true; } else /* @@ -287,7 +475,7 @@ __wt_turtle_init(WT_SESSION_IMPL *session, bool verify_meta) * incremental backup file and a destination database that incorrectly ran recovery. */ if (exist_incr && !exist_isrc) - WT_RET_MSG(session, EINVAL, "Incremental backup after running recovery is not allowed"); + WT_ERR_MSG(session, EINVAL, "Incremental backup after running recovery is not allowed"); /* * If we have a backup file and metadata and turtle files, we want to recreate the metadata * from the backup. @@ -296,16 +484,16 @@ __wt_turtle_init(WT_SESSION_IMPL *session, bool verify_meta) __wt_verbose_notice(session, WT_VERB_METADATA, "Both %s and %s exist; recreating metadata from backup", WT_METADATA_TURTLE, WT_METADATA_BACKUP); - WT_RET(__wt_remove_if_exists(session, WT_METAFILE, false)); - WT_RET(__wt_remove_if_exists(session, WT_METADATA_TURTLE, false)); + WT_ERR(__wt_remove_if_exists(session, WT_METAFILE, false)); + WT_ERR(__wt_remove_if_exists(session, WT_METADATA_TURTLE, false)); load = true; } else if (validate_turtle) - WT_RET(__wt_turtle_validate_version(session)); + WT_ERR(__wt_turtle_validate_version(session)); } else load = true; if (load) { if (exist_incr) - F_SET(S2C(session), WT_CONN_WAS_BACKUP); + F_SET(conn, WT_CONN_WAS_BACKUP); /* * Verifying the metadata is incompatible with restarting from a backup because the verify @@ -313,27 +501,40 @@ __wt_turtle_init(WT_SESSION_IMPL *session, bool verify_meta) * here before creating the metadata file and reading in the backup file. */ if (verify_meta && exist_backup) - WT_RET_MSG( + WT_ERR_MSG( session, EINVAL, "restoring a backup is incompatible with metadata verification"); + /* If partial backup target is non-empty, construct the target backup uri list. */ + WT_ERR(__metadata_load_target_uri_list(session, exist_backup, cfg, backuphash)); /* Create the metadata file. */ - WT_RET(__metadata_init(session)); + WT_ERR(__metadata_init(session)); /* Load any hot-backup information. */ - WT_RET(__metadata_load_hot_backup(session)); + WT_ERR(__metadata_load_hot_backup(session, backuphash)); /* Create any bulk-loaded file stubs. */ - WT_RET(__metadata_load_bulk(session)); + WT_ERR(__metadata_load_bulk(session)); } if (load || load_turtle) { /* Create the turtle file. */ - WT_RET(__metadata_config(session, &metaconf)); + WT_ERR(__metadata_config(session, &metaconf)); WT_WITH_TURTLE_LOCK(session, ret = __wt_turtle_update(session, WT_METAFILE_URI, metaconf)); __wt_free(session, metaconf); - WT_RET(ret); + WT_ERR(ret); } +err: + for (i = 0; i < conn->hash_size; ++i) + while (!TAILQ_EMPTY(&backuphash[i])) { + target_uri = TAILQ_FIRST(&backuphash[i]); + /* Remove target uri entry from the hashtable. */ + TAILQ_REMOVE(&backuphash[i], target_uri, hashq); + __wt_free(session, target_uri->name); + __wt_free(session, target_uri); + } + __wt_free(session, backuphash); + /* Remove the backup files, we'll never read them again. */ return (__wt_backup_file_remove(session)); } diff --git a/src/third_party/wiredtiger/src/schema/schema_drop.c b/src/third_party/wiredtiger/src/schema/schema_drop.c index a1420755926..294b458dbcf 100644 --- a/src/third_party/wiredtiger/src/schema/schema_drop.c +++ b/src/third_party/wiredtiger/src/schema/schema_drop.c @@ -300,7 +300,10 @@ __schema_drop(WT_SESSION_IMPL *session, const char *uri, const char *cfg[]) if (ret == WT_NOTFOUND || ret == ENOENT) ret = force ? 0 : ENOENT; - WT_TRET(__wt_meta_track_off(session, true, ret != 0)); + if (F_ISSET(S2C(session), WT_CONN_BACKUP_PARTIAL_RESTORE)) + WT_TRET(__wt_meta_track_off(session, false, ret != 0)); + else + WT_TRET(__wt_meta_track_off(session, true, ret != 0)); return (ret); } diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c index 8c3a36a7d5a..faa68261bce 100644 --- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c +++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c @@ -1471,12 +1471,15 @@ __rollback_to_stable_hs_final_pass(WT_SESSION_IMPL *session, wt_timestamp_t roll { WT_CONFIG ckptconf; WT_CONFIG_ITEM cval, durableval, key; + WT_CONNECTION_IMPL *conn; WT_DECL_RET; wt_timestamp_t max_durable_ts, newest_stop_durable_ts, newest_stop_ts; + size_t i; char *config; char ts_string[2][WT_TS_INT_STRING_SIZE]; config = NULL; + conn = S2C(session); WT_RET(__wt_metadata_search(session, WT_HS_URI, &config)); @@ -1526,6 +1529,15 @@ __rollback_to_stable_hs_final_pass(WT_SESSION_IMPL *session, wt_timestamp_t roll WT_TRET(__wt_session_release_dhandle(session)); + /* + * Truncate history store entries from the partial backup remove list. The list holds all of the + * btree ids that do not exist as part of the database anymore due to performing a selective + * restore from backup. + */ + if (F_ISSET(conn, WT_CONN_BACKUP_PARTIAL_RESTORE) && conn->partial_backup_remove_ids != NULL) + for (i = 0; conn->partial_backup_remove_ids[i] != 0; ++i) + WT_ERR( + __rollback_to_stable_btree_hs_truncate(session, conn->partial_backup_remove_ids[i])); err: __wt_free(session, config); return (ret); diff --git a/src/third_party/wiredtiger/test/suite/test_backup24.py b/src/third_party/wiredtiger/test/suite/test_backup24.py new file mode 100755 index 00000000000..319840be042 --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_backup24.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python +# +# Public Domain 2014-present MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import os, wiredtiger, wttest +from wtbackup import backup_base + +# test_backup24.py +# Test recovering a selective backup with some logged tables, some not logged tables +# and creating more of each during backup. +class test_backup24(backup_base): + dir='backup_all.dir' # Backup directory name + config_log='key_format=S,value_format=S' + config_nolog='key_format=S,value_format=S,log=(enabled=false)' + log_t1="table:logged1" + log_t2="table:logged2" + log_tnew="table:loggednew" + log_tnew_file="loggednew.wt" + logmax="100K" + nolog_t1="table:not1" + nolog_t2="table:not2" + nolog_t2_file="not2.wt" + nolog_tnew="table:notnew" + nolog_tnew_file="notnew.wt" + newuri="table:newtable" + + def add_data(self, uri, key, val): + c = self.session.open_cursor(uri, None, self.data_cursor_config) + for i in range(0, self.nops): + k = key + str(i) + v = val + str(i) + c[k] = v + c.close() + + def check_data(self, uri, key, val): + c = self.session.open_cursor(uri, None, self.data_cursor_config) + for i in range(0, self.nops): + c.set_key(key + str(i)) + self.assertEqual(c.search(), 0) + self.assertEqual(c.get_value(), val + str(i)) + c.close() + + # Create a large cache, otherwise this test runs quite slowly. + def conn_config(self): + return 'debug_mode=(table_logging=true),cache_size=1G,log=(enabled,file_max=%s,remove=false)' % \ + self.logmax + + def test_backup24(self): + log2 = "WiredTigerLog.0000000002" + + # Create two logged and two not-logged tables. + self.session.create(self.log_t1, self.config_log) + self.session.create(self.log_t2, self.config_log) + self.session.create(self.nolog_t1, self.config_nolog) + self.session.create(self.nolog_t2, self.config_nolog) + + # Insert small amounts of data at a time stopping just after we + # cross into log file 2. + while not os.path.exists(log2): + self.add_data(self.log_t1, 'key', 'value') + self.add_data(self.log_t2, 'key', 'value') + self.add_data(self.nolog_t1, 'key', 'value') + self.add_data(self.nolog_t2, 'key', 'value') + + self.session.checkpoint() + # Add more data after the checkpoint. + self.add_data(self.log_t1, 'newkey', 'newvalue') + self.add_data(self.log_t2, 'newkey', 'newvalue') + self.add_data(self.nolog_t1, 'newkey', 'newvalue') + self.add_data(self.nolog_t2, 'newkey', 'newvalue') + + # We allow creates during backup because the file doesn't exist + # when the backup metadata is created on cursor open and the newly + # created file is not in the cursor list. + + # Create and add data to a new table and then copy the files with a full backup. + os.mkdir(self.dir) + + # Open the backup cursor and then create new tables and add data to them. + # Then copy the files. + bkup_c = self.session.open_cursor('backup:', None, None) + + # Now create and populate the new table. Make sure the log records + # are on disk and will be copied to the backup. + self.session.create(self.log_tnew, self.config_log) + self.session.create(self.nolog_tnew, self.config_nolog) + self.add_data(self.log_tnew, 'key', 'value') + self.add_data(self.nolog_tnew, 'key', 'value') + self.session.log_flush('sync=on') + + # Now copy the files using full backup but as a selective backup. We want the logged + # tables but only the first not-logged table. Skip the second not-logged table. + all_files = self.take_selective_backup(self.dir, [self.nolog_t2_file], bkup_c) + orig_logs = [file for file in all_files if "WiredTigerLog" in file] + self.assertFalse(self.log_tnew in all_files) + self.assertFalse(self.nolog_tnew in all_files) + self.assertFalse(self.nolog_t2_file in all_files) + + # Take a log backup. + self.take_log_backup(bkup_c, self.dir, orig_logs) + bkup_c.close() + + target_uris = str([self.log_t1, self.log_t2, self.nolog_t1]).replace("\'", "\"") + backup_conn = self.wiredtiger_open(self.dir, 'backup_restore_target={0}'.format(target_uris)) + flist = os.listdir(self.dir) + self.assertFalse(self.nolog_t2_file in flist) + self.assertFalse(self.nolog_tnew_file in flist) + + # Test the files we didn't copy over during selective backup don't exist in the metadata. + bkup_session = backup_conn.open_session() + metadata_c = bkup_session.open_cursor('metadata:', None, None) + metadata_c.set_key(self.nolog_t2) + self.assertEqual(metadata_c.search(), wiredtiger.WT_NOTFOUND) + metadata_c.set_key(self.nolog_t2_file) + self.assertEqual(metadata_c.search(), wiredtiger.WT_NOTFOUND) + + metadata_c.set_key(self.nolog_tnew) + self.assertEqual(metadata_c.search(), wiredtiger.WT_NOTFOUND) + metadata_c.set_key(self.nolog_tnew_file) + self.assertEqual(metadata_c.search(), wiredtiger.WT_NOTFOUND) + metadata_c.close() + + # Test that the database partial recovered successfully. + self.check_data(self.log_t1, 'key', 'value') + self.check_data(self.log_t2, 'key', 'value') + self.check_data(self.nolog_t1, 'key', 'value') + backup_conn.close() + +if __name__ == '__main__': + wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_backup26.py b/src/third_party/wiredtiger/test/suite/test_backup26.py new file mode 100644 index 00000000000..258a2a0f675 --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_backup26.py @@ -0,0 +1,111 @@ +#!/usr/bin/env python +# +# Public Domain 2014-present MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import wiredtiger, wttest +import os, re, time +from wtbackup import backup_base +from wtscenario import make_scenarios +from wtdataset import SimpleDataSet + +# test_backup26.py +# Test selective backup with large amount of tables. Recovering a partial backup should take +# longer when there are more active tables. Also test recovery correctness with both file and +# table schemas in a partial backup. +class test_backup26(backup_base): + dir='backup.dir' # Backup directory name + uri="table_backup" + ntables = 10000 if wttest.islongtest() else 500 + + # Reverse the backup restore list, WiredTiger should still succeed in this case. + reverse = [ + ["reverse_target_list", dict(reverse=True)], + ["target_list", dict(reverse=False)], + ] + + # Percentage of tables to not copy over in selective backup. + percentage = [ + ('hundred_precent', dict(percentage=1)), + ('ninety_percent', dict(percentage=0.9)), + ('fifty_percent', dict(percentage=0.5)), + ('ten_percent', dict(percentage=0.1)), + ('zero_percent', dict(percentage=0)), + ] + scenarios = make_scenarios(percentage, reverse) + + def test_backup26(self): + selective_remove_uri_file_list = [] + selective_remove_uri_list = [] + selective_uri_list = [] + + for i in range(0, self.ntables): + uri = "table:{0}".format(self.uri + str(i)) + dataset = SimpleDataSet(self, uri, 100, key_format="S") + dataset.populate() + # Append the table uri to the selective backup remove list until the set percentage. + # These tables will not be copied over in selective backup. + if (i <= int(self.ntables * self.percentage)): + selective_remove_uri_list.append(uri) + selective_remove_uri_file_list.append("{0}.wt".format(self.uri + str(i))) + else: + selective_uri_list.append(uri) + self.session.checkpoint() + + os.mkdir(self.dir) + + # Now copy the files using full backup. This should not include the tables inside the remove list. + all_files = self.take_selective_backup(self.dir, selective_remove_uri_file_list) + + target_uris = None + if self.reverse: + target_uris = str(selective_uri_list[::-1]).replace("\'", "\"") + else: + target_uris = str(selective_uri_list).replace("\'", "\"") + starttime = time.time() + # After the full backup, open and recover the backup database. + backup_conn = self.wiredtiger_open(self.dir, "backup_restore_target={0}".format(target_uris)) + elapsed = time.time() - starttime + self.pr("%s partial backup has taken %.2f seconds." % (str(self), elapsed)) + + bkup_session = backup_conn.open_session() + # Open the cursor from uris that were not part of the selective backup and expect failure + # since file doesn't exist. + for remove_uri in selective_remove_uri_list: + self.assertRaisesException( + wiredtiger.WiredTigerError,lambda: bkup_session.open_cursor(remove_uri, None, None)) + + # Open the cursors on tables that copied over to the backup directory. They should still + # recover properly. + for uri in selective_uri_list: + c = bkup_session.open_cursor(uri, None, None) + ds = SimpleDataSet(self, uri, 100, key_format="S") + ds.check_cursor(c) + c.close() + backup_conn.close() + +if __name__ == '__main__': + wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_backup27.py b/src/third_party/wiredtiger/test/suite/test_backup27.py new file mode 100755 index 00000000000..d1bb4d84106 --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_backup27.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python +# +# Public Domain 2014-present MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import os, wiredtiger, wttest +from wtbackup import backup_base +from wtscenario import make_scenarios + +# test_backup27.py +# Test selective backup with history store contents. Recovering a partial backup should +# clear the history entries of the table that does not exist in the backup directory. +class test_backup27(backup_base): + dir='backup.dir' # Backup directory name + newuri="table:table_no_hs" + newuri_file="table_no_hs.wt" + uri="table:table_hs" + + def add_timestamp_data(self, uri, key, val, timestamp): + self.session.begin_transaction() + c = self.session.open_cursor(uri, None, None) + for i in range(0, 1000): + k = key + str(i) + v = val + str(i) + c[k] = v + c.close() + self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(timestamp)) + + def validate_timestamp_data(self, session, uri, key, expected_err, timestamp): + session.begin_transaction('read_timestamp=' + self.timestamp_str(timestamp)) + c = session.open_cursor(uri, None, None) + for i in range(0, 1000): + k = key + str(i) + c.set_key(k) + self.assertEqual(c.search(), expected_err) + c.close() + session.commit_transaction() + + def test_backup27(self): + log2 = "WiredTigerLog.0000000002" + + self.session.create(self.uri, "key_format=S,value_format=S") + self.session.create(self.newuri, "key_format=S,value_format=S") + + self.add_timestamp_data(self.uri, "key", "val", 1) + self.add_timestamp_data(self.newuri, "key", "val", 1) + + self.add_timestamp_data(self.uri, "key", "val5", 5) + self.add_timestamp_data(self.newuri, "key", "val5", 5) + + # Stable timestamp at 10, so that we can retain history store data. + self.conn.set_timestamp('stable_timestamp=' + self.timestamp_str(10)) + self.session.checkpoint() + + os.mkdir(self.dir) + + # Now copy the files using selective backup. This should not include one of the tables. + all_files = self.take_selective_backup(self.dir, [self.newuri_file]) + + # After the full backup, open and partially recover the backup database on only one table. + backup_conn = self.wiredtiger_open(self.dir, "backup_restore_target=[\"{0}\"]".format(self.uri)) + bkup_session = backup_conn.open_session() + + # Test that the history store data still exists for the tables that got restored. + self.validate_timestamp_data(bkup_session, self.uri, "key", 0, 1) + self.validate_timestamp_data(bkup_session, self.uri, "key", 0, 10) + + # Open the cursor and expect failure since file doesn't exist. + self.assertRaisesException( + wiredtiger.WiredTigerError,lambda: bkup_session.open_cursor(self.newuri, None, None)) + bkup_session.create(self.newuri, "key_format=S,value_format=S") + self.validate_timestamp_data(bkup_session, self.newuri, "key", wiredtiger.WT_NOTFOUND, 1) + self.validate_timestamp_data(bkup_session, self.newuri, "key", wiredtiger.WT_NOTFOUND, 5) + backup_conn.close() + +if __name__ == '__main__': + wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_backup28.py b/src/third_party/wiredtiger/test/suite/test_backup28.py new file mode 100644 index 00000000000..50aa0d4ae7f --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_backup28.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python +# +# Public Domain 2014-present MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import wiredtiger, wttest +import os, re +from wtbackup import backup_base +from wtscenario import make_scenarios + +# test_backup28.py +# Test selective backup with different schema types. Recovering a partial backup with target uris +# including colgroups, index or lsm formats should raise a message. The only supported types are +# table formats in the uri list. +class test_backup28(backup_base): + dir='backup.dir' # Backup directory name + uri="table_backup" + + types = [ + ('file', dict(pfx='file:', target_uri_list=["file:table0"])), + ('lsm', dict(pfx='lsm:', target_uri_list=["lsm:table0"])), + ('table-simple', dict(pfx='table:', target_uri_list=["table:table0"])), + ('table-cg', dict(pfx='table:', target_uri_list=["index:table0:i0", "table:table0"])), + ('table-index', dict(pfx='table:', target_uri_list=["colgroup:table0:g0", "table:table0"])), + ] + + scenarios = make_scenarios(types) + + def test_backup28(self): + selective_remove_file_list = [] + uri = self.pfx + 'table0' + create_params = 'key_format=S,value_format=S,' + + cgparam = 'columns=(k,v),colgroups=(g0),' + # Create the main table. + self.session.create(uri, create_params + cgparam) + + if (self.pfx != "lsm:" and self.pfx != "file:"): + # Add in column group and index tables. + colgroup_param = 'columns=(v),' + suburi = 'colgroup:table0:g0' + self.session.create(suburi, colgroup_param) + + suburi = 'index:table0:i0' + self.session.create(suburi, cgparam) + self.session.checkpoint() + + os.mkdir(self.dir) + + # Now copy the files using full backup. Selectively don't copy files based on remove list. + all_files = self.take_selective_backup(self.dir, []) + + target_uri_list_format = str(self.target_uri_list).replace("\'", "\"") + if len(self.target_uri_list) and self.target_uri_list[0] == "table:table0": + # After the full backup, open and recover the backup database, and it should succeed. + backup_conn = self.wiredtiger_open(self.dir, "backup_restore_target={0}".format(target_uri_list_format)) + bkup_session = backup_conn.open_session() + + # Make sure that the table recovered properly. + c = bkup_session.open_cursor(uri, None, None) + c.close() + backup_conn.close() + else: + # After the full backup, perform partial backup restore adding the target uris of + # indexes, colgroups or lsm. This should fail and return with a message, as we only allow + # table formats. + self.assertRaisesHavingMessage(wiredtiger.WiredTigerError, + lambda: self.wiredtiger_open(self.dir, "backup_restore_target={0}".format(target_uri_list_format)), + '/partial backup restore only supports objects of type .* formats in the target uri list/') + + +if __name__ == '__main__': + wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/wtbackup.py b/src/third_party/wiredtiger/test/suite/wtbackup.py index 25de77ba7fa..63df05acc4d 100644 --- a/src/third_party/wiredtiger/test/suite/wtbackup.py +++ b/src/third_party/wiredtiger/test/suite/wtbackup.py @@ -147,6 +147,38 @@ class backup_base(wttest.WiredTigerTestCase, suite_subprocess): shutil.copy(copy_from, copy_to) # + # Uses a backup cursor to perform a selective backup, by iterating through the cursor + # grabbing files that do not exist in the remove list to copy over into a given directory. + # When dealing with a test that performs multiple incremental backups, we need to perform a + # proper backup on each incremental directory as a starting base. + # + def take_selective_backup(self, backup_dir, remove_list, backup_cur=None): + self.pr('Selective backup to ' + backup_dir + ': ') + bkup_c = backup_cur + if backup_cur == None: + config = None + if self.initial_backup: + config = 'incremental=(granularity=1M,enabled=true,this_id=ID0)' + bkup_c = self.session.open_cursor('backup:', None, config) + all_files = [] + + # We cannot use 'for newfile in bkup_c:' usage because backup cursors don't have + # values and adding in get_values returns ENOTSUP and causes the usage to fail. + # If that changes then this, and the use of the duplicate below can change. + while bkup_c.next() == 0: + newfile = bkup_c.get_key() + sz = os.path.getsize(newfile) + if (newfile in remove_list): + continue + self.pr('Copy from: ' + newfile + ' (' + str(sz) + ') to ' + self.dir) + self.copy_file(newfile, backup_dir) + all_files.append(newfile) + + if backup_cur == None: + bkup_c.close() + return all_files + + # # Uses a backup cursor to perform a full backup, by iterating through the cursor # grabbing files to copy over into a given directory. When dealing with a test # that performs multiple incremental backups, we initially perform a full backup |