summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEtienne Petrel <etienne.petrel@mongodb.com>2022-03-15 05:15:30 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-03-15 05:45:32 +0000
commitb7b6ca761e7939ddf75cdf96fa3f37ce5db73b7a (patch)
tree6731d478d6030f1d876ba4c4428745c2701de90f
parent619b7a4f876d1fec82e12ec544ae8ada22dcea17 (diff)
downloadmongo-b7b6ca761e7939ddf75cdf96fa3f37ce5db73b7a.tar.gz
Import wiredtiger: 746e435bb142b8ae482be38f8a7ed7f4a0180a96 from branch mongodb-master
ref: a5d55eec22..746e435bb1 for: 6.0.0 WT-8703 Extend recovering from backup to allow partial restores
-rw-r--r--src/third_party/wiredtiger/dist/api_data.py6
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/src/config/config_def.c16
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_api.c11
-rw-r--r--src/third_party/wiredtiger/src/include/connection.h58
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h2
-rw-r--r--src/third_party/wiredtiger/src/include/wiredtiger.in4
-rw-r--r--src/third_party/wiredtiger/src/include/wt_internal.h2
-rw-r--r--src/third_party/wiredtiger/src/meta/meta_turtle.c243
-rw-r--r--src/third_party/wiredtiger/src/schema/schema_drop.c5
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c12
-rwxr-xr-xsrc/third_party/wiredtiger/test/suite/test_backup24.py154
-rw-r--r--src/third_party/wiredtiger/test/suite/test_backup26.py111
-rwxr-xr-xsrc/third_party/wiredtiger/test/suite/test_backup27.py100
-rw-r--r--src/third_party/wiredtiger/test/suite/test_backup28.py96
-rw-r--r--src/third_party/wiredtiger/test/suite/wtbackup.py32
16 files changed, 803 insertions, 51 deletions
diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py
index d6b8279bc37..35a735442c8 100644
--- a/src/third_party/wiredtiger/dist/api_data.py
+++ b/src/third_party/wiredtiger/dist/api_data.py
@@ -1143,6 +1143,12 @@ wiredtiger_open_common =\
wiredtiger_open_log_configuration +\
wiredtiger_open_tiered_storage_configuration +\
wiredtiger_open_statistics_log_configuration + [
+ Config('backup_restore_target', '', r'''
+ If non-empty and restoring from a backup, restore only the table object targets listed.
+ WiredTiger will remove all the metadata entries for the tables that are not listed in th
+ list from the reconstructed metadata. The target list must include URIs matching of type
+ table:''',
+ type='list'),
Config('buffer_alignment', '-1', r'''
in-memory alignment (in bytes) for buffers used for I/O. The
default value of -1 indicates a platform-specific alignment value
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index 95ba62293be..125345c11e8 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -2,5 +2,5 @@
"vendor": "wiredtiger",
"github": "wiredtiger/wiredtiger.git",
"branch": "mongodb-master",
- "commit": "a5d55eec2229a8da1233103d117871a7f51e1df8"
+ "commit": "746e435bb142b8ae482be38f8a7ed7f4a0180a96"
}
diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c
index 18b5df9b210..f771e55b661 100644
--- a/src/third_party/wiredtiger/src/config/config_def.c
+++ b/src/third_party/wiredtiger/src/config/config_def.c
@@ -846,6 +846,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_transaction_sync_subconfigs
{NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = {
+ {"backup_restore_target", "list", NULL, NULL, NULL, 0},
{"block_cache", "category", NULL, NULL, confchk_wiredtiger_open_block_cache_subconfigs, 12},
{"buffer_alignment", "int", NULL, "min=-1,max=1MB", NULL, 0},
{"builtin_extension_config", "string", NULL, NULL, NULL, 0},
@@ -928,6 +929,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = {
{NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = {
+ {"backup_restore_target", "list", NULL, NULL, NULL, 0},
{"block_cache", "category", NULL, NULL, confchk_wiredtiger_open_block_cache_subconfigs, 12},
{"buffer_alignment", "int", NULL, "min=-1,max=1MB", NULL, 0},
{"builtin_extension_config", "string", NULL, NULL, NULL, 0},
@@ -1010,6 +1012,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = {
{NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = {
+ {"backup_restore_target", "list", NULL, NULL, NULL, 0},
{"block_cache", "category", NULL, NULL, confchk_wiredtiger_open_block_cache_subconfigs, 12},
{"buffer_alignment", "int", NULL, "min=-1,max=1MB", NULL, 0},
{"builtin_extension_config", "string", NULL, NULL, NULL, 0},
@@ -1088,6 +1091,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = {
{NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = {
+ {"backup_restore_target", "list", NULL, NULL, NULL, 0},
{"block_cache", "category", NULL, NULL, confchk_wiredtiger_open_block_cache_subconfigs, 12},
{"buffer_alignment", "int", NULL, "min=-1,max=1MB", NULL, 0},
{"builtin_extension_config", "string", NULL, NULL, NULL, 0},
@@ -1462,6 +1466,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
"version=(major=0,minor=0),write_timestamp_usage=none",
confchk_tiered_meta, 52},
{"wiredtiger_open",
+ "backup_restore_target=,"
"block_cache=(blkcache_eviction_aggression=1800,"
"cache_on_checkpoint=true,cache_on_writes=true,enabled=false,"
"full_target=95,hashsize=0,max_percent_overhead=10,nvram_path=,"
@@ -1500,8 +1505,9 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
"transaction_sync=(enabled=false,method=fsync),"
"use_environment=true,use_environment_priv=false,verbose=[],"
"verify_metadata=false,write_through=",
- confchk_wiredtiger_open, 58},
+ confchk_wiredtiger_open, 59},
{"wiredtiger_open_all",
+ "backup_restore_target=,"
"block_cache=(blkcache_eviction_aggression=1800,"
"cache_on_checkpoint=true,cache_on_writes=true,enabled=false,"
"full_target=95,hashsize=0,max_percent_overhead=10,nvram_path=,"
@@ -1540,8 +1546,9 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
"transaction_sync=(enabled=false,method=fsync),"
"use_environment=true,use_environment_priv=false,verbose=[],"
"verify_metadata=false,version=(major=0,minor=0),write_through=",
- confchk_wiredtiger_open_all, 59},
+ confchk_wiredtiger_open_all, 60},
{"wiredtiger_open_basecfg",
+ "backup_restore_target=,"
"block_cache=(blkcache_eviction_aggression=1800,"
"cache_on_checkpoint=true,cache_on_writes=true,enabled=false,"
"full_target=95,hashsize=0,max_percent_overhead=10,nvram_path=,"
@@ -1578,8 +1585,9 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
"object_target_size=10M),timing_stress_for_test=,"
"transaction_sync=(enabled=false,method=fsync),verbose=[],"
"verify_metadata=false,version=(major=0,minor=0),write_through=",
- confchk_wiredtiger_open_basecfg, 53},
+ confchk_wiredtiger_open_basecfg, 54},
{"wiredtiger_open_usercfg",
+ "backup_restore_target=,"
"block_cache=(blkcache_eviction_aggression=1800,"
"cache_on_checkpoint=true,cache_on_writes=true,enabled=false,"
"full_target=95,hashsize=0,max_percent_overhead=10,nvram_path=,"
@@ -1616,7 +1624,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
"object_target_size=10M),timing_stress_for_test=,"
"transaction_sync=(enabled=false,method=fsync),verbose=[],"
"verify_metadata=false,write_through=",
- confchk_wiredtiger_open_usercfg, 52},
+ confchk_wiredtiger_open_usercfg, 53},
{NULL, NULL, NULL, 0}};
int
diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c
index 78ee338f34e..799b199bfb2 100644
--- a/src/third_party/wiredtiger/src/conn/conn_api.c
+++ b/src/third_party/wiredtiger/src/conn/conn_api.c
@@ -2980,7 +2980,7 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, const char *c
*/
WT_ERR(__wt_config_gets(session, cfg, "verify_metadata", &cval));
verify_meta = cval.val;
- WT_ERR(__wt_turtle_init(session, verify_meta));
+ WT_ERR(__wt_turtle_init(session, verify_meta, cfg));
/* Verify the metadata file. */
if (verify_meta) {
@@ -3045,6 +3045,15 @@ err:
__wt_scr_discard(session);
__wt_scr_discard(&conn->dummy_session);
+ /*
+ * Clean up the partial backup restore flag, backup btree id list. The backup id list was used
+ * in recovery to truncate the history store entries and the flag was used to allow schema drops
+ * to happen on tables to clean up the entries in the creation of the metadata file.
+ */
+ F_CLR(conn, WT_CONN_BACKUP_PARTIAL_RESTORE);
+ if (conn->partial_backup_remove_ids != NULL)
+ __wt_free(session, conn->partial_backup_remove_ids);
+
if (ret != 0) {
/*
* Set panic if we're returning the run recovery error or if recovery did not complete so
diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h
index b681d54c471..a22be3d578d 100644
--- a/src/third_party/wiredtiger/src/include/connection.h
+++ b/src/third_party/wiredtiger/src/include/connection.h
@@ -211,6 +211,18 @@ struct __wt_name_flag {
} while (0)
/*
+ * WT_BACKUP_TARGET --
+ * A target URI entry indicating this URI should be restored during a partial backup.
+ */
+struct __wt_backup_target {
+ const char *name; /* File name */
+
+ uint64_t name_hash; /* hash of name */
+ TAILQ_ENTRY(__wt_backup_target) hashq; /* internal hash queue */
+};
+typedef TAILQ_HEAD(__wt_backuphash, __wt_backup_target) WT_BACKUPHASH;
+
+/*
* WT_CONNECTION_IMPL --
* Implementation of WT_CONNECTION
*/
@@ -338,6 +350,7 @@ struct __wt_connection_impl {
WT_RWLOCK hot_backup_lock; /* Hot backup serialization */
uint64_t hot_backup_start; /* Clock value of most recent checkpoint needed by hot backup */
char **hot_backup_list; /* Hot backup file list */
+ uint32_t *partial_backup_remove_ids; /* Remove btree id list for partial backup */
WT_SESSION_IMPL *ckpt_session; /* Checkpoint thread session */
wt_thread_t ckpt_tid; /* Checkpoint thread */
@@ -618,28 +631,29 @@ struct __wt_connection_impl {
uint32_t server_flags;
/* AUTOMATIC FLAG VALUE GENERATION START 0 */
-#define WT_CONN_CACHE_CURSORS 0x000001u
-#define WT_CONN_CACHE_POOL 0x000002u
-#define WT_CONN_CKPT_GATHER 0x000004u
-#define WT_CONN_CKPT_SYNC 0x000008u
-#define WT_CONN_CLOSING 0x000010u
-#define WT_CONN_CLOSING_CHECKPOINT 0x000020u
-#define WT_CONN_CLOSING_NO_MORE_OPENS 0x000040u
-#define WT_CONN_COMPATIBILITY 0x000080u
-#define WT_CONN_DATA_CORRUPTION 0x000100u
-#define WT_CONN_EVICTION_RUN 0x000200u
-#define WT_CONN_HS_OPEN 0x000400u
-#define WT_CONN_INCR_BACKUP 0x000800u
-#define WT_CONN_IN_MEMORY 0x001000u
-#define WT_CONN_LEAK_MEMORY 0x002000u
-#define WT_CONN_LSM_MERGE 0x004000u
-#define WT_CONN_OPTRACK 0x008000u
-#define WT_CONN_PANIC 0x010000u
-#define WT_CONN_READONLY 0x020000u
-#define WT_CONN_RECONFIGURING 0x040000u
-#define WT_CONN_RECOVERING 0x080000u
-#define WT_CONN_SALVAGE 0x100000u
-#define WT_CONN_WAS_BACKUP 0x200000u
+#define WT_CONN_BACKUP_PARTIAL_RESTORE 0x000001u
+#define WT_CONN_CACHE_CURSORS 0x000002u
+#define WT_CONN_CACHE_POOL 0x000004u
+#define WT_CONN_CKPT_GATHER 0x000008u
+#define WT_CONN_CKPT_SYNC 0x000010u
+#define WT_CONN_CLOSING 0x000020u
+#define WT_CONN_CLOSING_CHECKPOINT 0x000040u
+#define WT_CONN_CLOSING_NO_MORE_OPENS 0x000080u
+#define WT_CONN_COMPATIBILITY 0x000100u
+#define WT_CONN_DATA_CORRUPTION 0x000200u
+#define WT_CONN_EVICTION_RUN 0x000400u
+#define WT_CONN_HS_OPEN 0x000800u
+#define WT_CONN_INCR_BACKUP 0x001000u
+#define WT_CONN_IN_MEMORY 0x002000u
+#define WT_CONN_LEAK_MEMORY 0x004000u
+#define WT_CONN_LSM_MERGE 0x008000u
+#define WT_CONN_OPTRACK 0x010000u
+#define WT_CONN_PANIC 0x020000u
+#define WT_CONN_READONLY 0x040000u
+#define WT_CONN_RECONFIGURING 0x080000u
+#define WT_CONN_RECOVERING 0x100000u
+#define WT_CONN_SALVAGE 0x200000u
+#define WT_CONN_WAS_BACKUP 0x400000u
/* AUTOMATIC FLAG VALUE GENERATION STOP 32 */
uint32_t flags;
};
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index 0c3c27fe9f0..bba54392137 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -1523,7 +1523,7 @@ extern int __wt_try_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *l)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_turtle_exists(WT_SESSION_IMPL *session, bool *existp)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_turtle_init(WT_SESSION_IMPL *session, bool verify_meta)
+extern int __wt_turtle_init(WT_SESSION_IMPL *session, bool verify_meta, const char *cfg[])
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in
index b402a564eb7..09ebb18a72b 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger.in
+++ b/src/third_party/wiredtiger/src/include/wiredtiger.in
@@ -2760,6 +2760,10 @@ struct __wt_connection {
* event handler is installed that writes error messages to stderr. See
* @ref event_message_handling for more information.
* @configstart{wiredtiger_open, see dist/api_data.py}
+ * @config{backup_restore_target, If non-empty and restoring from a backup\, restore only the table
+ * object targets listed. WiredTiger will remove all the metadata entries for the tables that are
+ * not listed in th list from the reconstructed metadata. The target list must include URIs
+ * matching of type table:., a list of strings; default empty.}
* @config{block_cache = (, block cache configuration options., a set of related configuration
* options defined below.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;blkcache_eviction_aggression, seconds an
diff --git a/src/third_party/wiredtiger/src/include/wt_internal.h b/src/third_party/wiredtiger/src/include/wt_internal.h
index f5871dbf788..1fa544ab841 100644
--- a/src/third_party/wiredtiger/src/include/wt_internal.h
+++ b/src/third_party/wiredtiger/src/include/wt_internal.h
@@ -69,6 +69,8 @@ struct __wt_addr;
typedef struct __wt_addr WT_ADDR;
struct __wt_addr_copy;
typedef struct __wt_addr_copy WT_ADDR_COPY;
+struct __wt_backup_target;
+typedef struct __wt_backup_target WT_BACKUP_TARGET;
struct __wt_blkcache;
typedef struct __wt_blkcache WT_BLKCACHE;
struct __wt_blkcache_item;
diff --git a/src/third_party/wiredtiger/src/meta/meta_turtle.c b/src/third_party/wiredtiger/src/meta/meta_turtle.c
index 85321e59795..0fb5bdb4436 100644
--- a/src/third_party/wiredtiger/src/meta/meta_turtle.c
+++ b/src/third_party/wiredtiger/src/meta/meta_turtle.c
@@ -53,18 +53,55 @@ __metadata_init(WT_SESSION_IMPL *session)
}
/*
+ * __metadata_backup_target_uri_search --
+ * Search in the backup uri hash table if the given uri exists.
+ */
+static bool
+__metadata_backup_target_uri_search(
+ WT_SESSION_IMPL *session, WT_BACKUPHASH *backuphash, const char *uri)
+{
+ WT_BACKUP_TARGET *target_uri;
+ WT_CONNECTION_IMPL *conn;
+ uint64_t bucket, hash;
+ bool found;
+
+ conn = S2C(session);
+ found = false;
+
+ hash = __wt_hash_city64(uri, strlen(uri));
+ bucket = hash & (conn->hash_size - 1);
+
+ TAILQ_FOREACH (target_uri, &backuphash[bucket], hashq)
+ if (strcmp(uri, target_uri->name) == 0) {
+ found = true;
+ break;
+ }
+ return (found);
+}
+
+/*
* __metadata_load_hot_backup --
* Load the contents of any hot backup file.
*/
static int
-__metadata_load_hot_backup(WT_SESSION_IMPL *session)
+__metadata_load_hot_backup(WT_SESSION_IMPL *session, WT_BACKUPHASH *backuphash)
{
+ WT_CONFIG_ITEM cval;
+ WT_CONNECTION_IMPL *conn;
WT_DECL_ITEM(key);
WT_DECL_ITEM(value);
WT_DECL_RET;
WT_FSTREAM *fs;
+ size_t allocated_name, file_len, max_len, slot;
+ char *filename, *metadata_conf, *metadata_key, **p, **partial_backup_names, *tablename;
+ const char *drop_cfg[] = {WT_CONFIG_BASE(session, WT_SESSION_drop), "remove_files=false", NULL};
bool exist;
+ allocated_name = file_len = max_len = slot = 0;
+ conn = S2C(session);
+ filename = NULL;
+ partial_backup_names = NULL;
+
/* Look for a hot backup file: if we find it, load it. */
WT_RET(__wt_fs_exist(session, WT_METADATA_BACKUP, &exist));
if (!exist)
@@ -81,12 +118,84 @@ __metadata_load_hot_backup(WT_SESSION_IMPL *session)
WT_ERR(__wt_getline(session, fs, value));
if (value->size == 0)
WT_ERR_PANIC(session, EINVAL, "%s: zero-length value", WT_METADATA_BACKUP);
+ /*
+ * When performing partial backup restore, generate a list of tables that is not part of the
+ * target uri list so that we can drop all entries later. To do this, parse through all the
+ * table metadata entries and check if the metadata entry exists in the target uri hash
+ * table. If the metadata entry doesn't exist in the hash table, append the table name to
+ * the partial backup remove list.
+ */
+ metadata_key = (char *)key->data;
+ if (F_ISSET(conn, WT_CONN_BACKUP_PARTIAL_RESTORE) &&
+ WT_PREFIX_MATCH(metadata_key, "table:")) {
+ /* Assert that there should be no WiredTiger tables with a table format. */
+ WT_ASSERT(
+ session, __wt_name_check(session, (const char *)key->data, key->size, true) == 0);
+ /*
+ * The target uri will be the deciding factor if a specific metadata table entry needs
+ * to be dropped. If the metadata table entry does not exist in the target uri hash
+ * table, append the metadata key to the backup remove list.
+ */
+ if (__metadata_backup_target_uri_search(session, backuphash, metadata_key) == false) {
+ if (key->size > max_len)
+ max_len = key->size;
+ WT_ERR(__wt_realloc_def(session, &allocated_name, slot + 2, &partial_backup_names));
+ p = &partial_backup_names[slot];
+ p[0] = p[1] = NULL;
+
+ WT_ERR(
+ __wt_strndup(session, (char *)key->data, key->size, &partial_backup_names[slot]));
+ slot++;
+ }
+ }
+
+ /*
+ * In the case of partial backup restore, add the entry to the metadata even if the table
+ * entry doesn't exist so that we can correctly drop all related entries via the schema code
+ * later.
+ */
WT_ERR(__wt_metadata_update(session, key->data, value->data));
}
- F_SET(S2C(session), WT_CONN_WAS_BACKUP);
+ F_SET(conn, WT_CONN_WAS_BACKUP);
+ if (F_ISSET(conn, WT_CONN_BACKUP_PARTIAL_RESTORE) && partial_backup_names != NULL) {
+ WT_ERR(__wt_calloc_def(session, slot + 1, &conn->partial_backup_remove_ids));
+ file_len = strlen("file:") + max_len + strlen(".wt") + 1;
+ WT_ERR(__wt_calloc_def(session, file_len, &filename));
+ /*
+ * Parse through the partial backup list and attempt to clean up all metadata references
+ * relating to the file. To do so, perform a schema drop operation on the table to cleanly
+ * remove all linked references. At the same time generate a list of btree ids to be used in
+ * recovery to truncate all the history store records.
+ */
+ for (slot = 0; partial_backup_names[slot] != NULL; ++slot) {
+ tablename = partial_backup_names[slot];
+ WT_PREFIX_SKIP_REQUIRED(session, tablename, "table:");
+ WT_ERR(__wt_snprintf(filename, file_len, "file:%s.wt", tablename));
+ WT_ERR(__wt_metadata_search(session, filename, &metadata_conf));
+ WT_ERR(__wt_config_getones(session, metadata_conf, "id", &cval));
+ conn->partial_backup_remove_ids[slot] = (uint32_t)cval.val;
+
+ WT_WITH_SCHEMA_LOCK(session,
+ WT_WITH_TABLE_WRITE_LOCK(
+ session, ret = __wt_schema_drop(session, partial_backup_names[slot], drop_cfg)));
+ WT_ERR(ret);
+ }
+ }
err:
+ if (filename != NULL)
+ __wt_free(session, filename);
+
+ /*
+ * Free the partial backup names list. The backup id list is used in recovery to truncate the
+ * history store entries that do not exist as part of the database anymore.
+ */
+ if (partial_backup_names != NULL) {
+ for (slot = 0; partial_backup_names[slot] != NULL; ++slot)
+ __wt_free(session, partial_backup_names[slot]);
+ __wt_free(session, partial_backup_names);
+ }
WT_TRET(__wt_fclose(session, &fs));
__wt_scr_free(session, &key);
__wt_scr_free(session, &value);
@@ -219,18 +328,97 @@ __wt_turtle_exists(WT_SESSION_IMPL *session, bool *existp)
}
/*
+ * __metadata_add_backup_target_uri --
+ * Add the target uri to the backup uri hash table.
+ */
+static int
+__metadata_add_backup_target_uri(
+ WT_SESSION_IMPL *session, WT_BACKUPHASH *backuphash, const char *name, size_t len)
+{
+ WT_BACKUP_TARGET *new_target_uri;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ uint64_t bucket, hash;
+
+ conn = S2C(session);
+
+ WT_ERR(__wt_calloc_one(session, &new_target_uri));
+ WT_ERR(__wt_strndup(session, name, len, &new_target_uri->name));
+
+ hash = __wt_hash_city64(name, len);
+ bucket = hash & (conn->hash_size - 1);
+ new_target_uri->name_hash = hash;
+ /* Insert target uri entry into hashtable. */
+ TAILQ_INSERT_HEAD(&backuphash[bucket], new_target_uri, hashq);
+
+ return (0);
+err:
+ if (new_target_uri != NULL)
+ __wt_free(session, new_target_uri->name);
+ __wt_free(session, new_target_uri);
+
+ return (ret);
+}
+
+/*
+ * __metadata_load_target_uri_list --
+ * Load the list of target uris and construct a hashtable from it.
+ */
+static int
+__metadata_load_target_uri_list(
+ WT_SESSION_IMPL *session, bool exist_backup, const char *cfg[], WT_BACKUPHASH *backuphash)
+{
+ WT_CONFIG backup_config;
+ WT_CONFIG_ITEM cval, k, v;
+ WT_DECL_RET;
+
+ WT_TRET(__wt_config_gets(session, cfg, "backup_restore_target", &cval));
+ if (cval.len != 0) {
+ if (!exist_backup)
+ WT_RET_MSG(session, EINVAL,
+ "restoring a partial backup requires the WiredTiger metadata backup file.");
+ F_SET(S2C(session), WT_CONN_BACKUP_PARTIAL_RESTORE);
+
+ /*
+ * Check that the configuration string only has table schema formats in the target list and
+ * construct the target hash table.
+ */
+ __wt_config_subinit(session, &backup_config, &cval);
+ while ((ret = __wt_config_next(&backup_config, &k, &v)) == 0) {
+ if (!WT_PREFIX_MATCH(k.str, "table:"))
+ WT_RET_MSG(session, EINVAL,
+ "partial backup restore only supports objects of type \"table\" formats in the "
+ "target uri list, found %.*s instead.",
+ (int)k.len, k.str);
+ WT_RET(__metadata_add_backup_target_uri(session, backuphash, (char *)k.str, k.len));
+ }
+ WT_RET_NOTFOUND_OK(ret);
+ }
+ return (0);
+}
+
+/*
* __wt_turtle_init --
* Check the turtle file and create if necessary.
*/
int
-__wt_turtle_init(WT_SESSION_IMPL *session, bool verify_meta)
+__wt_turtle_init(WT_SESSION_IMPL *session, bool verify_meta, const char *cfg[])
{
+ WT_BACKUPHASH *backuphash;
+ WT_BACKUP_TARGET *target_uri;
+ WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
+ uint64_t i;
char *metaconf, *unused_value;
bool exist_backup, exist_incr, exist_isrc, exist_turtle;
bool load, load_turtle, validate_turtle;
+ conn = S2C(session);
load = load_turtle = validate_turtle = false;
+ /* Initialize target uri hashtable. */
+ WT_ERR(__wt_calloc_def(session, conn->hash_size, &backuphash));
+ for (i = 0; i < conn->hash_size; ++i)
+ TAILQ_INIT(&backuphash[i]);
/*
* Discard any turtle setup file left-over from previous runs. This doesn't matter for
@@ -241,7 +429,7 @@ __wt_turtle_init(WT_SESSION_IMPL *session, bool verify_meta)
/* If we're a readonly database, we can skip discarding the leftover file. */
if (ret == EACCES)
ret = 0;
- WT_RET(ret);
+ WT_ERR(ret);
}
/*
@@ -256,23 +444,23 @@ __wt_turtle_init(WT_SESSION_IMPL *session, bool verify_meta)
* turtle file and an incremental backup file, that is an error. Otherwise, if there's already a
* turtle file, we're done.
*/
- WT_RET(__wt_fs_exist(session, WT_LOGINCR_BACKUP, &exist_incr));
- WT_RET(__wt_fs_exist(session, WT_LOGINCR_SRC, &exist_isrc));
- WT_RET(__wt_fs_exist(session, WT_METADATA_BACKUP, &exist_backup));
- WT_RET(__wt_fs_exist(session, WT_METADATA_TURTLE, &exist_turtle));
+ WT_ERR(__wt_fs_exist(session, WT_LOGINCR_BACKUP, &exist_incr));
+ WT_ERR(__wt_fs_exist(session, WT_LOGINCR_SRC, &exist_isrc));
+ WT_ERR(__wt_fs_exist(session, WT_METADATA_BACKUP, &exist_backup));
+ WT_ERR(__wt_fs_exist(session, WT_METADATA_TURTLE, &exist_turtle));
if (exist_turtle) {
/*
* Failure to read means a bad turtle file. Remove it and create a new turtle file.
*/
- if (F_ISSET(S2C(session), WT_CONN_SALVAGE)) {
+ if (F_ISSET(conn, WT_CONN_SALVAGE)) {
WT_WITH_TURTLE_LOCK(
session, ret = __wt_turtle_read(session, WT_METAFILE_URI, &unused_value));
__wt_free(session, unused_value);
}
if (ret != 0) {
- WT_RET(__wt_remove_if_exists(session, WT_METADATA_TURTLE, false));
+ WT_ERR(__wt_remove_if_exists(session, WT_METADATA_TURTLE, false));
load_turtle = true;
} else
/*
@@ -287,7 +475,7 @@ __wt_turtle_init(WT_SESSION_IMPL *session, bool verify_meta)
* incremental backup file and a destination database that incorrectly ran recovery.
*/
if (exist_incr && !exist_isrc)
- WT_RET_MSG(session, EINVAL, "Incremental backup after running recovery is not allowed");
+ WT_ERR_MSG(session, EINVAL, "Incremental backup after running recovery is not allowed");
/*
* If we have a backup file and metadata and turtle files, we want to recreate the metadata
* from the backup.
@@ -296,16 +484,16 @@ __wt_turtle_init(WT_SESSION_IMPL *session, bool verify_meta)
__wt_verbose_notice(session, WT_VERB_METADATA,
"Both %s and %s exist; recreating metadata from backup", WT_METADATA_TURTLE,
WT_METADATA_BACKUP);
- WT_RET(__wt_remove_if_exists(session, WT_METAFILE, false));
- WT_RET(__wt_remove_if_exists(session, WT_METADATA_TURTLE, false));
+ WT_ERR(__wt_remove_if_exists(session, WT_METAFILE, false));
+ WT_ERR(__wt_remove_if_exists(session, WT_METADATA_TURTLE, false));
load = true;
} else if (validate_turtle)
- WT_RET(__wt_turtle_validate_version(session));
+ WT_ERR(__wt_turtle_validate_version(session));
} else
load = true;
if (load) {
if (exist_incr)
- F_SET(S2C(session), WT_CONN_WAS_BACKUP);
+ F_SET(conn, WT_CONN_WAS_BACKUP);
/*
* Verifying the metadata is incompatible with restarting from a backup because the verify
@@ -313,27 +501,40 @@ __wt_turtle_init(WT_SESSION_IMPL *session, bool verify_meta)
* here before creating the metadata file and reading in the backup file.
*/
if (verify_meta && exist_backup)
- WT_RET_MSG(
+ WT_ERR_MSG(
session, EINVAL, "restoring a backup is incompatible with metadata verification");
+ /* If partial backup target is non-empty, construct the target backup uri list. */
+ WT_ERR(__metadata_load_target_uri_list(session, exist_backup, cfg, backuphash));
/* Create the metadata file. */
- WT_RET(__metadata_init(session));
+ WT_ERR(__metadata_init(session));
/* Load any hot-backup information. */
- WT_RET(__metadata_load_hot_backup(session));
+ WT_ERR(__metadata_load_hot_backup(session, backuphash));
/* Create any bulk-loaded file stubs. */
- WT_RET(__metadata_load_bulk(session));
+ WT_ERR(__metadata_load_bulk(session));
}
if (load || load_turtle) {
/* Create the turtle file. */
- WT_RET(__metadata_config(session, &metaconf));
+ WT_ERR(__metadata_config(session, &metaconf));
WT_WITH_TURTLE_LOCK(session, ret = __wt_turtle_update(session, WT_METAFILE_URI, metaconf));
__wt_free(session, metaconf);
- WT_RET(ret);
+ WT_ERR(ret);
}
+err:
+ for (i = 0; i < conn->hash_size; ++i)
+ while (!TAILQ_EMPTY(&backuphash[i])) {
+ target_uri = TAILQ_FIRST(&backuphash[i]);
+ /* Remove target uri entry from the hashtable. */
+ TAILQ_REMOVE(&backuphash[i], target_uri, hashq);
+ __wt_free(session, target_uri->name);
+ __wt_free(session, target_uri);
+ }
+ __wt_free(session, backuphash);
+
/* Remove the backup files, we'll never read them again. */
return (__wt_backup_file_remove(session));
}
diff --git a/src/third_party/wiredtiger/src/schema/schema_drop.c b/src/third_party/wiredtiger/src/schema/schema_drop.c
index a1420755926..294b458dbcf 100644
--- a/src/third_party/wiredtiger/src/schema/schema_drop.c
+++ b/src/third_party/wiredtiger/src/schema/schema_drop.c
@@ -300,7 +300,10 @@ __schema_drop(WT_SESSION_IMPL *session, const char *uri, const char *cfg[])
if (ret == WT_NOTFOUND || ret == ENOENT)
ret = force ? 0 : ENOENT;
- WT_TRET(__wt_meta_track_off(session, true, ret != 0));
+ if (F_ISSET(S2C(session), WT_CONN_BACKUP_PARTIAL_RESTORE))
+ WT_TRET(__wt_meta_track_off(session, false, ret != 0));
+ else
+ WT_TRET(__wt_meta_track_off(session, true, ret != 0));
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
index 8c3a36a7d5a..faa68261bce 100644
--- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
+++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
@@ -1471,12 +1471,15 @@ __rollback_to_stable_hs_final_pass(WT_SESSION_IMPL *session, wt_timestamp_t roll
{
WT_CONFIG ckptconf;
WT_CONFIG_ITEM cval, durableval, key;
+ WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
wt_timestamp_t max_durable_ts, newest_stop_durable_ts, newest_stop_ts;
+ size_t i;
char *config;
char ts_string[2][WT_TS_INT_STRING_SIZE];
config = NULL;
+ conn = S2C(session);
WT_RET(__wt_metadata_search(session, WT_HS_URI, &config));
@@ -1526,6 +1529,15 @@ __rollback_to_stable_hs_final_pass(WT_SESSION_IMPL *session, wt_timestamp_t roll
WT_TRET(__wt_session_release_dhandle(session));
+ /*
+ * Truncate history store entries from the partial backup remove list. The list holds all of the
+ * btree ids that do not exist as part of the database anymore due to performing a selective
+ * restore from backup.
+ */
+ if (F_ISSET(conn, WT_CONN_BACKUP_PARTIAL_RESTORE) && conn->partial_backup_remove_ids != NULL)
+ for (i = 0; conn->partial_backup_remove_ids[i] != 0; ++i)
+ WT_ERR(
+ __rollback_to_stable_btree_hs_truncate(session, conn->partial_backup_remove_ids[i]));
err:
__wt_free(session, config);
return (ret);
diff --git a/src/third_party/wiredtiger/test/suite/test_backup24.py b/src/third_party/wiredtiger/test/suite/test_backup24.py
new file mode 100755
index 00000000000..319840be042
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_backup24.py
@@ -0,0 +1,154 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-present MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import os, wiredtiger, wttest
+from wtbackup import backup_base
+
+# test_backup24.py
+# Test recovering a selective backup with some logged tables, some not logged tables
+# and creating more of each during backup.
+class test_backup24(backup_base):
+ dir='backup_all.dir' # Backup directory name
+ config_log='key_format=S,value_format=S'
+ config_nolog='key_format=S,value_format=S,log=(enabled=false)'
+ log_t1="table:logged1"
+ log_t2="table:logged2"
+ log_tnew="table:loggednew"
+ log_tnew_file="loggednew.wt"
+ logmax="100K"
+ nolog_t1="table:not1"
+ nolog_t2="table:not2"
+ nolog_t2_file="not2.wt"
+ nolog_tnew="table:notnew"
+ nolog_tnew_file="notnew.wt"
+ newuri="table:newtable"
+
+ def add_data(self, uri, key, val):
+ c = self.session.open_cursor(uri, None, self.data_cursor_config)
+ for i in range(0, self.nops):
+ k = key + str(i)
+ v = val + str(i)
+ c[k] = v
+ c.close()
+
+ def check_data(self, uri, key, val):
+ c = self.session.open_cursor(uri, None, self.data_cursor_config)
+ for i in range(0, self.nops):
+ c.set_key(key + str(i))
+ self.assertEqual(c.search(), 0)
+ self.assertEqual(c.get_value(), val + str(i))
+ c.close()
+
+ # Create a large cache, otherwise this test runs quite slowly.
+ def conn_config(self):
+ return 'debug_mode=(table_logging=true),cache_size=1G,log=(enabled,file_max=%s,remove=false)' % \
+ self.logmax
+
+ def test_backup24(self):
+ log2 = "WiredTigerLog.0000000002"
+
+ # Create two logged and two not-logged tables.
+ self.session.create(self.log_t1, self.config_log)
+ self.session.create(self.log_t2, self.config_log)
+ self.session.create(self.nolog_t1, self.config_nolog)
+ self.session.create(self.nolog_t2, self.config_nolog)
+
+ # Insert small amounts of data at a time stopping just after we
+ # cross into log file 2.
+ while not os.path.exists(log2):
+ self.add_data(self.log_t1, 'key', 'value')
+ self.add_data(self.log_t2, 'key', 'value')
+ self.add_data(self.nolog_t1, 'key', 'value')
+ self.add_data(self.nolog_t2, 'key', 'value')
+
+ self.session.checkpoint()
+ # Add more data after the checkpoint.
+ self.add_data(self.log_t1, 'newkey', 'newvalue')
+ self.add_data(self.log_t2, 'newkey', 'newvalue')
+ self.add_data(self.nolog_t1, 'newkey', 'newvalue')
+ self.add_data(self.nolog_t2, 'newkey', 'newvalue')
+
+ # We allow creates during backup because the file doesn't exist
+ # when the backup metadata is created on cursor open and the newly
+ # created file is not in the cursor list.
+
+ # Create and add data to a new table and then copy the files with a full backup.
+ os.mkdir(self.dir)
+
+ # Open the backup cursor and then create new tables and add data to them.
+ # Then copy the files.
+ bkup_c = self.session.open_cursor('backup:', None, None)
+
+ # Now create and populate the new table. Make sure the log records
+ # are on disk and will be copied to the backup.
+ self.session.create(self.log_tnew, self.config_log)
+ self.session.create(self.nolog_tnew, self.config_nolog)
+ self.add_data(self.log_tnew, 'key', 'value')
+ self.add_data(self.nolog_tnew, 'key', 'value')
+ self.session.log_flush('sync=on')
+
+ # Now copy the files using full backup but as a selective backup. We want the logged
+ # tables but only the first not-logged table. Skip the second not-logged table.
+ all_files = self.take_selective_backup(self.dir, [self.nolog_t2_file], bkup_c)
+ orig_logs = [file for file in all_files if "WiredTigerLog" in file]
+ self.assertFalse(self.log_tnew in all_files)
+ self.assertFalse(self.nolog_tnew in all_files)
+ self.assertFalse(self.nolog_t2_file in all_files)
+
+ # Take a log backup.
+ self.take_log_backup(bkup_c, self.dir, orig_logs)
+ bkup_c.close()
+
+ target_uris = str([self.log_t1, self.log_t2, self.nolog_t1]).replace("\'", "\"")
+ backup_conn = self.wiredtiger_open(self.dir, 'backup_restore_target={0}'.format(target_uris))
+ flist = os.listdir(self.dir)
+ self.assertFalse(self.nolog_t2_file in flist)
+ self.assertFalse(self.nolog_tnew_file in flist)
+
+ # Test the files we didn't copy over during selective backup don't exist in the metadata.
+ bkup_session = backup_conn.open_session()
+ metadata_c = bkup_session.open_cursor('metadata:', None, None)
+ metadata_c.set_key(self.nolog_t2)
+ self.assertEqual(metadata_c.search(), wiredtiger.WT_NOTFOUND)
+ metadata_c.set_key(self.nolog_t2_file)
+ self.assertEqual(metadata_c.search(), wiredtiger.WT_NOTFOUND)
+
+ metadata_c.set_key(self.nolog_tnew)
+ self.assertEqual(metadata_c.search(), wiredtiger.WT_NOTFOUND)
+ metadata_c.set_key(self.nolog_tnew_file)
+ self.assertEqual(metadata_c.search(), wiredtiger.WT_NOTFOUND)
+ metadata_c.close()
+
+ # Test that the database partial recovered successfully.
+ self.check_data(self.log_t1, 'key', 'value')
+ self.check_data(self.log_t2, 'key', 'value')
+ self.check_data(self.nolog_t1, 'key', 'value')
+ backup_conn.close()
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_backup26.py b/src/third_party/wiredtiger/test/suite/test_backup26.py
new file mode 100644
index 00000000000..258a2a0f675
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_backup26.py
@@ -0,0 +1,111 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-present MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import wiredtiger, wttest
+import os, re, time
+from wtbackup import backup_base
+from wtscenario import make_scenarios
+from wtdataset import SimpleDataSet
+
+# test_backup26.py
+# Test selective backup with large amount of tables. Recovering a partial backup should take
+# longer when there are more active tables. Also test recovery correctness with both file and
+# table schemas in a partial backup.
+class test_backup26(backup_base):
+ dir='backup.dir' # Backup directory name
+ uri="table_backup"
+ ntables = 10000 if wttest.islongtest() else 500
+
+ # Reverse the backup restore list, WiredTiger should still succeed in this case.
+ reverse = [
+ ["reverse_target_list", dict(reverse=True)],
+ ["target_list", dict(reverse=False)],
+ ]
+
+ # Percentage of tables to not copy over in selective backup.
+ percentage = [
+ ('hundred_precent', dict(percentage=1)),
+ ('ninety_percent', dict(percentage=0.9)),
+ ('fifty_percent', dict(percentage=0.5)),
+ ('ten_percent', dict(percentage=0.1)),
+ ('zero_percent', dict(percentage=0)),
+ ]
+ scenarios = make_scenarios(percentage, reverse)
+
+ def test_backup26(self):
+ selective_remove_uri_file_list = []
+ selective_remove_uri_list = []
+ selective_uri_list = []
+
+ for i in range(0, self.ntables):
+ uri = "table:{0}".format(self.uri + str(i))
+ dataset = SimpleDataSet(self, uri, 100, key_format="S")
+ dataset.populate()
+ # Append the table uri to the selective backup remove list until the set percentage.
+ # These tables will not be copied over in selective backup.
+ if (i <= int(self.ntables * self.percentage)):
+ selective_remove_uri_list.append(uri)
+ selective_remove_uri_file_list.append("{0}.wt".format(self.uri + str(i)))
+ else:
+ selective_uri_list.append(uri)
+ self.session.checkpoint()
+
+ os.mkdir(self.dir)
+
+ # Now copy the files using full backup. This should not include the tables inside the remove list.
+ all_files = self.take_selective_backup(self.dir, selective_remove_uri_file_list)
+
+ target_uris = None
+ if self.reverse:
+ target_uris = str(selective_uri_list[::-1]).replace("\'", "\"")
+ else:
+ target_uris = str(selective_uri_list).replace("\'", "\"")
+ starttime = time.time()
+ # After the full backup, open and recover the backup database.
+ backup_conn = self.wiredtiger_open(self.dir, "backup_restore_target={0}".format(target_uris))
+ elapsed = time.time() - starttime
+ self.pr("%s partial backup has taken %.2f seconds." % (str(self), elapsed))
+
+ bkup_session = backup_conn.open_session()
+ # Open the cursor from uris that were not part of the selective backup and expect failure
+ # since file doesn't exist.
+ for remove_uri in selective_remove_uri_list:
+ self.assertRaisesException(
+ wiredtiger.WiredTigerError,lambda: bkup_session.open_cursor(remove_uri, None, None))
+
+ # Open the cursors on tables that copied over to the backup directory. They should still
+ # recover properly.
+ for uri in selective_uri_list:
+ c = bkup_session.open_cursor(uri, None, None)
+ ds = SimpleDataSet(self, uri, 100, key_format="S")
+ ds.check_cursor(c)
+ c.close()
+ backup_conn.close()
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_backup27.py b/src/third_party/wiredtiger/test/suite/test_backup27.py
new file mode 100755
index 00000000000..d1bb4d84106
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_backup27.py
@@ -0,0 +1,100 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-present MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import os, wiredtiger, wttest
+from wtbackup import backup_base
+from wtscenario import make_scenarios
+
+# test_backup27.py
+# Test selective backup with history store contents. Recovering a partial backup should
+# clear the history entries of the table that does not exist in the backup directory.
+class test_backup27(backup_base):
+ dir='backup.dir' # Backup directory name
+ newuri="table:table_no_hs"
+ newuri_file="table_no_hs.wt"
+ uri="table:table_hs"
+
+ def add_timestamp_data(self, uri, key, val, timestamp):
+ self.session.begin_transaction()
+ c = self.session.open_cursor(uri, None, None)
+ for i in range(0, 1000):
+ k = key + str(i)
+ v = val + str(i)
+ c[k] = v
+ c.close()
+ self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(timestamp))
+
+ def validate_timestamp_data(self, session, uri, key, expected_err, timestamp):
+ session.begin_transaction('read_timestamp=' + self.timestamp_str(timestamp))
+ c = session.open_cursor(uri, None, None)
+ for i in range(0, 1000):
+ k = key + str(i)
+ c.set_key(k)
+ self.assertEqual(c.search(), expected_err)
+ c.close()
+ session.commit_transaction()
+
+ def test_backup27(self):
+ log2 = "WiredTigerLog.0000000002"
+
+ self.session.create(self.uri, "key_format=S,value_format=S")
+ self.session.create(self.newuri, "key_format=S,value_format=S")
+
+ self.add_timestamp_data(self.uri, "key", "val", 1)
+ self.add_timestamp_data(self.newuri, "key", "val", 1)
+
+ self.add_timestamp_data(self.uri, "key", "val5", 5)
+ self.add_timestamp_data(self.newuri, "key", "val5", 5)
+
+ # Stable timestamp at 10, so that we can retain history store data.
+ self.conn.set_timestamp('stable_timestamp=' + self.timestamp_str(10))
+ self.session.checkpoint()
+
+ os.mkdir(self.dir)
+
+ # Now copy the files using selective backup. This should not include one of the tables.
+ all_files = self.take_selective_backup(self.dir, [self.newuri_file])
+
+ # After the full backup, open and partially recover the backup database on only one table.
+ backup_conn = self.wiredtiger_open(self.dir, "backup_restore_target=[\"{0}\"]".format(self.uri))
+ bkup_session = backup_conn.open_session()
+
+ # Test that the history store data still exists for the tables that got restored.
+ self.validate_timestamp_data(bkup_session, self.uri, "key", 0, 1)
+ self.validate_timestamp_data(bkup_session, self.uri, "key", 0, 10)
+
+ # Open the cursor and expect failure since file doesn't exist.
+ self.assertRaisesException(
+ wiredtiger.WiredTigerError,lambda: bkup_session.open_cursor(self.newuri, None, None))
+ bkup_session.create(self.newuri, "key_format=S,value_format=S")
+ self.validate_timestamp_data(bkup_session, self.newuri, "key", wiredtiger.WT_NOTFOUND, 1)
+ self.validate_timestamp_data(bkup_session, self.newuri, "key", wiredtiger.WT_NOTFOUND, 5)
+ backup_conn.close()
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_backup28.py b/src/third_party/wiredtiger/test/suite/test_backup28.py
new file mode 100644
index 00000000000..50aa0d4ae7f
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_backup28.py
@@ -0,0 +1,96 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-present MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import wiredtiger, wttest
+import os, re
+from wtbackup import backup_base
+from wtscenario import make_scenarios
+
+# test_backup28.py
+# Test selective backup with different schema types. Recovering a partial backup with target uris
+# including colgroups, index or lsm formats should raise a message. The only supported types are
+# table formats in the uri list.
+class test_backup28(backup_base):
+ dir='backup.dir' # Backup directory name
+ uri="table_backup"
+
+ types = [
+ ('file', dict(pfx='file:', target_uri_list=["file:table0"])),
+ ('lsm', dict(pfx='lsm:', target_uri_list=["lsm:table0"])),
+ ('table-simple', dict(pfx='table:', target_uri_list=["table:table0"])),
+ ('table-cg', dict(pfx='table:', target_uri_list=["index:table0:i0", "table:table0"])),
+ ('table-index', dict(pfx='table:', target_uri_list=["colgroup:table0:g0", "table:table0"])),
+ ]
+
+ scenarios = make_scenarios(types)
+
+ def test_backup28(self):
+ selective_remove_file_list = []
+ uri = self.pfx + 'table0'
+ create_params = 'key_format=S,value_format=S,'
+
+ cgparam = 'columns=(k,v),colgroups=(g0),'
+ # Create the main table.
+ self.session.create(uri, create_params + cgparam)
+
+ if (self.pfx != "lsm:" and self.pfx != "file:"):
+ # Add in column group and index tables.
+ colgroup_param = 'columns=(v),'
+ suburi = 'colgroup:table0:g0'
+ self.session.create(suburi, colgroup_param)
+
+ suburi = 'index:table0:i0'
+ self.session.create(suburi, cgparam)
+ self.session.checkpoint()
+
+ os.mkdir(self.dir)
+
+ # Now copy the files using full backup. Selectively don't copy files based on remove list.
+ all_files = self.take_selective_backup(self.dir, [])
+
+ target_uri_list_format = str(self.target_uri_list).replace("\'", "\"")
+ if len(self.target_uri_list) and self.target_uri_list[0] == "table:table0":
+ # After the full backup, open and recover the backup database, and it should succeed.
+ backup_conn = self.wiredtiger_open(self.dir, "backup_restore_target={0}".format(target_uri_list_format))
+ bkup_session = backup_conn.open_session()
+
+ # Make sure that the table recovered properly.
+ c = bkup_session.open_cursor(uri, None, None)
+ c.close()
+ backup_conn.close()
+ else:
+ # After the full backup, perform partial backup restore adding the target uris of
+ # indexes, colgroups or lsm. This should fail and return with a message, as we only allow
+ # table formats.
+ self.assertRaisesHavingMessage(wiredtiger.WiredTigerError,
+ lambda: self.wiredtiger_open(self.dir, "backup_restore_target={0}".format(target_uri_list_format)),
+ '/partial backup restore only supports objects of type .* formats in the target uri list/')
+
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/wtbackup.py b/src/third_party/wiredtiger/test/suite/wtbackup.py
index 25de77ba7fa..63df05acc4d 100644
--- a/src/third_party/wiredtiger/test/suite/wtbackup.py
+++ b/src/third_party/wiredtiger/test/suite/wtbackup.py
@@ -147,6 +147,38 @@ class backup_base(wttest.WiredTigerTestCase, suite_subprocess):
shutil.copy(copy_from, copy_to)
#
+ # Uses a backup cursor to perform a selective backup, by iterating through the cursor
+ # grabbing files that do not exist in the remove list to copy over into a given directory.
+ # When dealing with a test that performs multiple incremental backups, we need to perform a
+ # proper backup on each incremental directory as a starting base.
+ #
+ def take_selective_backup(self, backup_dir, remove_list, backup_cur=None):
+ self.pr('Selective backup to ' + backup_dir + ': ')
+ bkup_c = backup_cur
+ if backup_cur == None:
+ config = None
+ if self.initial_backup:
+ config = 'incremental=(granularity=1M,enabled=true,this_id=ID0)'
+ bkup_c = self.session.open_cursor('backup:', None, config)
+ all_files = []
+
+ # We cannot use 'for newfile in bkup_c:' usage because backup cursors don't have
+ # values and adding in get_values returns ENOTSUP and causes the usage to fail.
+ # If that changes then this, and the use of the duplicate below can change.
+ while bkup_c.next() == 0:
+ newfile = bkup_c.get_key()
+ sz = os.path.getsize(newfile)
+ if (newfile in remove_list):
+ continue
+ self.pr('Copy from: ' + newfile + ' (' + str(sz) + ') to ' + self.dir)
+ self.copy_file(newfile, backup_dir)
+ all_files.append(newfile)
+
+ if backup_cur == None:
+ bkup_c.close()
+ return all_files
+
+ #
# Uses a backup cursor to perform a full backup, by iterating through the cursor
# grabbing files to copy over into a given directory. When dealing with a test
# that performs multiple incremental backups, we initially perform a full backup