diff options
-rw-r--r-- | src/third_party/wiredtiger/import.data | 2 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/block/block_open.c | 32 | ||||
-rw-r--r-- | src/third_party/wiredtiger/test/csuite/incr_backup/main.c | 10 | ||||
-rw-r--r-- | src/third_party/wiredtiger/test/format/backup.c | 327 | ||||
-rw-r--r-- | src/third_party/wiredtiger/test/format/config.c | 60 | ||||
-rw-r--r-- | src/third_party/wiredtiger/test/format/config.h | 3 | ||||
-rw-r--r-- | src/third_party/wiredtiger/test/format/format.h | 7 |
7 files changed, 420 insertions, 21 deletions
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index 5883a331087..3de62dcdfcd 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -2,5 +2,5 @@ "vendor": "wiredtiger", "github": "wiredtiger/wiredtiger.git", "branch": "mongodb-4.4", - "commit": "187983a50c696eb217a780bb6b29e4bd3433c13b" + "commit": "59c2abc4d95f7d29b8a4ed43c7f182cd3c515e90" } diff --git a/src/third_party/wiredtiger/src/block/block_open.c b/src/third_party/wiredtiger/src/block/block_open.c index 45229528905..c2a3f509701 100644 --- a/src/third_party/wiredtiger/src/block/block_open.c +++ b/src/third_party/wiredtiger/src/block/block_open.c @@ -318,13 +318,37 @@ __desc_read(WT_SESSION_IMPL *session, uint32_t allocsize, WT_BLOCK *block) /* * If a data file is smaller than the allocation size, we're not going to be able to read the - * descriptor block. We should treat this as if the file has been deleted; that is, to log an - * error but continue on. + * descriptor block. + * + * If we're performing rollback to stable as part of recovery, we should treat this as if the + * file has been deleted; that is, to log an error but continue on. + * + * In the general case, we should return a generic error and signal that we've detected data + * corruption. + * + * FIXME: MongoDB relies heavily on the error codes reported when opening cursors (which hits + * this logic if the relevant data handle isn't already open). However this code gets run in + * rollback to stable as part of recovery where we want to skip any corrupted data files + * temporarily to allow MongoDB to initiate salvage. This is why we've been forced into this + * situation. We should address this as part of WT-5832 and clarify what error codes we expect + * to be returning across the API boundary. */ - if (block->size < allocsize) - WT_RET_MSG(session, ENOENT, + if (block->size < allocsize) { + /* + * We use the "ignore history store tombstone" flag as of verify so we need to check that + * we're not performing a verify. + */ + if (F_ISSET(session, WT_SESSION_ROLLBACK_TO_STABLE_FLAGS) && + !F_ISSET(S2BT(session), WT_BTREE_VERIFY)) + ret = ENOENT; + else { + ret = WT_ERROR; + F_SET(S2C(session), WT_CONN_DATA_CORRUPTION); + } + WT_RET_MSG(session, ret, "File %s is smaller than allocation size; file size=%" PRId64 ", alloc size=%" PRIu32, block->name, block->size, allocsize); + } /* Use a scratch buffer to get correct alignment for direct I/O. */ WT_RET(__wt_scr_alloc(session, allocsize, &buf)); diff --git a/src/third_party/wiredtiger/test/csuite/incr_backup/main.c b/src/third_party/wiredtiger/test/csuite/incr_backup/main.c index 3b9ed1319bb..9892eaa373e 100644 --- a/src/third_party/wiredtiger/test/csuite/incr_backup/main.c +++ b/src/third_party/wiredtiger/test/csuite/incr_backup/main.c @@ -295,7 +295,7 @@ again: testutil_check( __wt_snprintf(filename, sizeof(filename), "%s/%s", dirname, prev->names[prevpos])); VERBOSE(3, "Removing file from backup: %s\n", filename); - remove(filename); + testutil_check(remove(filename)); } else { /* * There is something in the current list not in the prev list. Walk past it in the @@ -366,6 +366,7 @@ table_changes(WT_SESSION *session, TABLE *table) item.size = table->max_value_size; key_value(change_count, key, sizeof(key), &item, &op_type); cur->set_key(cur, key); + testutil_assert(op_type < _OPERATION_TYPE_COUNT); switch (op_type) { case INSERT: cur->set_value(cur, &item); @@ -388,7 +389,6 @@ table_changes(WT_SESSION *session, TABLE *table) testutil_check(cur->update(cur)); break; case _OPERATION_TYPE_COUNT: - testutil_assert(false); break; } } @@ -532,7 +532,7 @@ reopen_file(int *fdp, char *buf, size_t buflen, const char *filename, int oflag) if (strcmp(buf, filename) == 0 && *fdp != -1) return; if (*fdp != -1) - close(*fdp); + testutil_check(close(*fdp)); *fdp = open(filename, oflag, 0666); strncpy(buf, filename, buflen); testutil_assert(*fdp >= 0); @@ -653,10 +653,11 @@ check_table(WT_SESSION *session, TABLE *table) expect_records = 0; total_changes = table->change_count; boundary = total_changes % KEYS_PER_TABLE; - op_type = (OPERATION_TYPE)(total_changes % CHANGES_PER_CYCLE) / KEYS_PER_TABLE; + op_type = (OPERATION_TYPE)((total_changes % CHANGES_PER_CYCLE) / KEYS_PER_TABLE); value = dcalloc(1, table->max_value_size); VERBOSE(3, "Checking: %s\n", table->name); + testutil_assert(op_type < _OPERATION_TYPE_COUNT); switch (op_type) { case INSERT: expect_records = total_changes % KEYS_PER_TABLE; @@ -669,7 +670,6 @@ check_table(WT_SESSION *session, TABLE *table) expect_records = KEYS_PER_TABLE - (total_changes % KEYS_PER_TABLE); break; case _OPERATION_TYPE_COUNT: - testutil_assert(false); break; } diff --git a/src/third_party/wiredtiger/test/format/backup.c b/src/third_party/wiredtiger/test/format/backup.c index 074a254c481..5954c41ba22 100644 --- a/src/third_party/wiredtiger/test/format/backup.c +++ b/src/third_party/wiredtiger/test/format/backup.c @@ -54,6 +54,241 @@ check_copy(void) } /* + * The set of active files in a backup. This is our "memory" of files that are used in each backup, + * so we can remove any that are not mentioned in the next backup. + */ +typedef struct { + char **names; + uint32_t count; +} ACTIVE_FILES; + +/* + * active_files_init -- + * Initialize (clear) the active file struct. + */ +static void +active_files_init(ACTIVE_FILES *active) +{ + WT_CLEAR(*active); +} + +#if 0 +/* + * active_files_print -- + * Print the set of active files for debugging. + */ +static void +active_files_print(ACTIVE_FILES *active, const char *msg) +{ + uint32_t i; + + if (active == NULL) + return; + fprintf(stderr, "Active files: %s, %d entries\n", msg, (int)active->count); + for (i = 0; i < active->count; i++) + fprintf(stderr, " %s\n", active->names[i]); +} +#endif + +/* + * active_files_add -- + * Add a new name to the active file list. + */ +static void +active_files_add(ACTIVE_FILES *active, const char *name) +{ + uint32_t pos; + + if (active == NULL) + return; + pos = active->count++; + active->names = drealloc(active->names, sizeof(char *) * active->count); + active->names[pos] = strdup(name); +} + +/* + * active_files_sort_function -- + * Sort function for qsort. + */ +static int +active_files_sort_function(const void *left, const void *right) +{ + return (strcmp(*(const char **)left, *(const char **)right)); +} + +/* + * active_files_sort -- + * Sort the list of names in the active file list. + */ +static void +active_files_sort(ACTIVE_FILES *active) +{ + if (active == NULL) + return; + __wt_qsort(active->names, active->count, sizeof(char *), active_files_sort_function); +} + +/* + * active_files_remove_missing -- + * Files in the previous list that are missing from the current list are removed. + */ +static void +active_files_remove_missing(ACTIVE_FILES *prev, ACTIVE_FILES *cur) +{ + uint32_t curpos, prevpos; + int cmp; + char filename[1024]; + + if (prev == NULL) + return; +#if 0 + active_files_print(prev, "computing removals: previous list of active files"); + active_files_print(cur, "computing removals: current list of active files"); +#endif + curpos = 0; + + /* + * Walk through the two lists looking for non-matches. + */ + for (prevpos = 0; prevpos < prev->count; prevpos++) { +again: + if (curpos >= cur->count) + cmp = -1; /* There are extra entries at the end of the prev list */ + else + cmp = strcmp(prev->names[prevpos], cur->names[curpos]); + + if (cmp == 0) + curpos++; + else if (cmp < 0) { + /* + * There is something in the prev list not in the current list. Remove it, and continue + * - don't advance the current list. + */ + testutil_check( + __wt_snprintf(filename, sizeof(filename), "BACKUP/%s", prev->names[prevpos])); +#if 0 + fprintf(stderr, "Removing file from backup: %s\n", filename); +#endif + remove(filename); + testutil_check( + __wt_snprintf(filename, sizeof(filename), "BACKUP_COPY/%s", prev->names[prevpos])); + remove(filename); + } else { + /* + * There is something in the current list not in the prev list. Walk past it in the + * current list and try again. + */ + curpos++; + goto again; + } + } +} + +/* + * active_files_free -- + * Free the list of active files. + */ +static void +active_files_free(ACTIVE_FILES *active) +{ + uint32_t i; + + if (active == NULL) + return; + for (i = 0; i < active->count; i++) + free(active->names[i]); + free(active->names); + active_files_init(active); +} + +/* + * copy_blocks -- + * Perform a single block-based incremental backup of the given file. + */ +static void +copy_blocks(WT_SESSION *session, WT_CURSOR *bkup_c, const char *name) +{ + WT_CURSOR *incr_cur; + size_t len, tmp_sz; + ssize_t rdsize; + uint64_t offset, type; + u_int size; + int ret, rfd, wfd1, wfd2; + char buf[512], config[512], *first, *second, *tmp; + bool first_pass; + + /* + * We need to prepend the home directory name here because we are not using the WiredTiger + * internal functions that would prepend it for us. + */ + len = strlen(g.home) + strlen("BACKUP") + strlen(name) + 10; + first = dmalloc(len); + + /* + * Save another copy of the original file to make debugging recovery errors easier. + */ + len = strlen(g.home) + strlen("BACKUP_COPY") + strlen(name) + 10; + second = dmalloc(len); + testutil_check(__wt_snprintf(config, sizeof(config), "incremental=(file=%s)", name)); + + /* Open the duplicate incremental backup cursor with the file name given. */ + tmp_sz = 0; + tmp = NULL; + first_pass = true; + rfd = wfd1 = wfd2 = -1; + testutil_check(session->open_cursor(session, NULL, bkup_c, config, &incr_cur)); + while ((ret = incr_cur->next(incr_cur)) == 0) { + testutil_check(incr_cur->get_key(incr_cur, &offset, (uint64_t *)&size, &type)); + if (type == WT_BACKUP_RANGE) { + /* + * Since we are using system calls below instead of a WiredTiger function, we have to + * prepend the home directory to the file names ourselves. + */ + testutil_check(__wt_snprintf(first, len, "%s/BACKUP/%s", g.home, name)); + testutil_check(__wt_snprintf(second, len, "%s/BACKUP_COPY/%s", g.home, name)); + if (tmp_sz < size) { + tmp = drealloc(tmp, size); + tmp_sz = size; + } + if (first_pass) { + testutil_check(__wt_snprintf(buf, sizeof(buf), "%s/%s", g.home, name)); + error_sys_check(rfd = open(buf, O_RDONLY, 0)); + error_sys_check(wfd1 = open(first, O_WRONLY | O_CREAT, 0)); + error_sys_check(wfd2 = open(second, O_WRONLY | O_CREAT, 0)); + first_pass = false; + } + error_sys_check(lseek(rfd, (wt_off_t)offset, SEEK_SET)); + error_sys_check(rdsize = read(rfd, tmp, size)); + error_sys_check(lseek(wfd1, (wt_off_t)offset, SEEK_SET)); + error_sys_check(lseek(wfd2, (wt_off_t)offset, SEEK_SET)); + /* Use the read size since we may have read less than the granularity. */ + error_sys_check(write(wfd1, tmp, (size_t)rdsize)); + error_sys_check(write(wfd2, tmp, (size_t)rdsize)); + } else { + /* + * These operations are using a WiredTiger function so it will prepend the home + * directory to the name for us. + */ + testutil_check(__wt_snprintf(first, len, "BACKUP/%s", name)); + testutil_check(__wt_snprintf(second, len, "BACKUP_COPY/%s", name)); + testutil_assert(type == WT_BACKUP_FILE); + testutil_assert(rfd == -1); + testutil_assert(first_pass == true); + testutil_check(__wt_copy_and_sync(session, name, first)); + testutil_check(__wt_copy_and_sync(session, first, second)); + } + } + testutil_check(incr_cur->close(incr_cur)); + if (rfd != -1) { + error_sys_check(close(rfd)); + error_sys_check(close(wfd1)); + error_sys_check(close(wfd2)); + } + free(first); + free(second); + free(tmp); +} +/* * copy_file -- * Copy a single file into the backup directories. */ @@ -87,13 +322,16 @@ copy_file(WT_SESSION *session, const char *name) WT_THREAD_RET backup(void *arg) { + ACTIVE_FILES active[2], *active_now, *active_prev; WT_CONNECTION *conn; WT_CURSOR *backup_cursor; WT_DECL_RET; WT_SESSION *session; + uint32_t src_id, src_prev; u_int incremental, period; const char *config, *key; - bool full; + char cfg[512]; + bool block_full, full; (void)(arg); @@ -106,7 +344,11 @@ backup(void *arg) * Perform a full backup at somewhere under 10 seconds (that way there's at least one), then at * larger intervals, optionally do incremental backups between full backups. */ + block_full = full = true; incremental = 0; + active_files_init(&active[0]); + active_files_init(&active[1]); + active_now = active_prev = NULL; for (period = mmrand(NULL, 1, 10);; period = mmrand(NULL, 20, 45)) { /* Sleep for short periods so we don't make the run wait. */ while (period > 0 && !g.workers_finished) { @@ -125,17 +367,66 @@ backup(void *arg) break; } - if (incremental) { - config = "target=(\"log:\")"; + if (g.c_backup_incr_flag == INCREMENTAL_BLOCK) { + /* + * If we're doing a full backup as the start of the incremental backup, only send in an + * identifier for this one. + */ + if (block_full) { + active_files_free(&active[0]); + active_files_free(&active[1]); + active_now = &active[g.backup_id % 2]; + active_prev = NULL; + testutil_check(__wt_snprintf( + cfg, sizeof(cfg), "incremental=(enabled,this_id=ID%" PRIu32 ")", g.backup_id++)); + block_full = false; + full = true; + } else { + /* + * 75% of the time, use the most recent source id. 25% of the time, use the id + * that is from two incremental backups prior. The handling of the active files for + * the source one or two incrementals prior is unpleasant but necessary. + */ + src_prev = mmrand(NULL, 1, 4) == 2 && g.backup_id >= 2 && full == false ? 2 : 1; + if (src_prev == 2) { + /* + * If we're going back two incrementals ago, set active_prev to the other list + * of active files (i.e. the active list that is not the immediate previous + * list) and overwrite active_prev with the current one. + */ + active_now = active_prev; + if (active_prev == &active[0]) + active_prev = &active[1]; + else + active_prev = &active[0]; + } else if (active_prev == &active[0]) + active_now = &active[1]; + else + active_now = &active[0]; + src_id = g.backup_id - src_prev; + testutil_check(__wt_snprintf(cfg, sizeof(cfg), + "incremental=(enabled,src_id=ID%u,this_id=ID%" PRIu32 ")", src_id, + g.backup_id++)); + /* Restart a full incremental every once in a while. */ + block_full = mmrand(NULL, 1, 8) == 1; + full = false; + } + config = cfg; + /* Free up the old active file list we're going to overwrite. */ + active_files_free(active_now); + } else if (g.c_backup_incr_flag == INCREMENTAL_LOG) { + testutil_check(__wt_snprintf(cfg, sizeof(cfg), "target=(\"log:\")")); + config = cfg; full = false; } else { - /* Re-create the backup directory. */ - testutil_checkfmt(system(g.home_backup_init), "%s", "backup directory creation failed"); - config = NULL; full = true; } + /* If we're taking a full backup, create the backup directories. */ + if (full) + testutil_checkfmt(system(g.home_backup_init), "%s", "backup directory creation failed"); + /* * open_cursor can return EBUSY if concurrent with a metadata operation, retry in that case. */ @@ -147,17 +438,28 @@ backup(void *arg) while ((ret = backup_cursor->next(backup_cursor)) == 0) { testutil_check(backup_cursor->get_key(backup_cursor, &key)); - copy_file(session, key); + if (g.c_backup_incr_flag == INCREMENTAL_BLOCK) { + if (full) + copy_file(session, key); + else + copy_blocks(session, backup_cursor, key); + + } else + copy_file(session, key); + active_files_add(active_now, key); } if (ret != WT_NOTFOUND) testutil_die(ret, "backup-cursor"); - /* After an incremental backup, truncate the log files. */ - if (incremental) + /* After a log-based incremental backup, truncate the log files. */ + if (g.c_backup_incr_flag == INCREMENTAL_LOG) testutil_check(session->truncate(session, "log:", backup_cursor, NULL, NULL)); testutil_check(backup_cursor->close(backup_cursor)); testutil_check(pthread_rwlock_unlock(&g.backup_lock)); + active_files_sort(active_now); + active_files_remove_missing(active_prev, active_now); + active_prev = active_now; /* * If automatic log archival isn't configured, optionally do incremental backups after each @@ -167,13 +469,18 @@ backup(void *arg) */ if (full) incremental = g.c_logging_archive ? 1 : mmrand(NULL, 1, 5); - if (--incremental == 0) + if (--incremental == 0) { check_copy(); + /* We ran recovery in the backup directory, so next time it must be a full backup. */ + block_full = full = true; + } } if (incremental != 0) check_copy(); + active_files_free(&active[0]); + active_files_free(&active[1]); testutil_check(session->close(session, NULL)); return (WT_THREAD_RET_VALUE); diff --git a/src/third_party/wiredtiger/test/format/config.c b/src/third_party/wiredtiger/test/format/config.c index cad60906561..e367b09ae2f 100644 --- a/src/third_party/wiredtiger/test/format/config.c +++ b/src/third_party/wiredtiger/test/format/config.c @@ -29,6 +29,7 @@ #include "format.h" #include "config.h" +static void config_backup(void); static void config_cache(void); static void config_checkpoint(void); static void config_checksum(void); @@ -41,6 +42,7 @@ static void config_in_memory(void); static void config_in_memory_reset(void); static int config_is_perm(const char *); static void config_lsm_reset(void); +static void config_map_backup_incr(const char *, u_int *); static void config_map_checkpoint(const char *, u_int *); static void config_map_checksum(const char *, u_int *); static void config_map_compression(const char *, u_int *); @@ -176,6 +178,7 @@ config_setup(void) config_transaction(); /* Simple selection. */ + config_backup(); config_checkpoint(); config_checksum(); config_compression("compression"); @@ -237,6 +240,41 @@ config_setup(void) } /* + * config_backup -- + * Backup configuration. + */ +static void +config_backup(void) +{ + const char *cstr; + + /* + * Choose a type of incremental backup. + */ + if (!config_is_perm("backup_incremental")) { + cstr = "backup_incremental=off"; + switch (mmrand(NULL, 1, 10)) { + case 1: /* 30% full backup only */ + case 2: + case 3: + break; + case 4: /* 40% block based incremental */ + case 5: + case 6: + case 7: + cstr = "backup_incremental=block"; + break; + case 8: + case 9: + case 10: /* 30% log based incremental */ + cstr = "backup_incremental=log"; + break; + } + + config_single(cstr, false); + } +} +/* * config_cache -- * Cache configuration. */ @@ -987,7 +1025,10 @@ config_single(const char *s, bool perm) *cp->vstr = NULL; } - if (strncmp(s, "checkpoints", strlen("checkpoints")) == 0) { + if (strncmp(s, "backup_incremental", strlen("backup_incremental")) == 0) { + config_map_backup_incr(equalp, &g.c_backup_incr_flag); + *cp->vstr = dstrdup(equalp); + } else if (strncmp(s, "checkpoints", strlen("checkpoints")) == 0) { config_map_checkpoint(equalp, &g.c_checkpoint_flag); *cp->vstr = dstrdup(equalp); } else if (strncmp(s, "checksum", strlen("checksum")) == 0) { @@ -1101,6 +1142,23 @@ config_map_file_type(const char *s, u_int *vp) } /* + * config_map_backup_incr -- + * Map a incremental backup configuration to a flag. + */ +static void +config_map_backup_incr(const char *s, u_int *vp) +{ + if (strcmp(s, "block") == 0) + *vp = INCREMENTAL_BLOCK; + else if (strcmp(s, "log") == 0) + *vp = INCREMENTAL_LOG; + else if (strcmp(s, "off") == 0) + *vp = INCREMENTAL_OFF; + else + testutil_die(EINVAL, "illegal incremental backup configuration: %s", s); +} + +/* * config_map_checkpoint -- * Map a checkpoint configuration to a flag. */ diff --git a/src/third_party/wiredtiger/test/format/config.h b/src/third_party/wiredtiger/test/format/config.h index f7681cd2e2b..7eec635ec48 100644 --- a/src/third_party/wiredtiger/test/format/config.h +++ b/src/third_party/wiredtiger/test/format/config.h @@ -77,6 +77,9 @@ static CONFIG c[] = {{"abort", "if timed run should drop core", /* 0% */ {"backups", "if backups are enabled", /* 20% */ C_BOOL, 20, 0, 0, &g.c_backups, NULL}, + {"backup_incremental", "type of backup (block | log | off)", C_IGNORE | C_STRING, 0, 0, 0, NULL, + &g.c_backup_incremental}, + {"bitcnt", "number of bits for fixed-length column-store files", 0x0, 1, 8, 8, &g.c_bitcnt, NULL}, {"bloom", "if bloom filters are configured", /* 95% */ diff --git a/src/third_party/wiredtiger/test/format/format.h b/src/third_party/wiredtiger/test/format/format.h index 836e042a8ea..d846fc6f80f 100644 --- a/src/third_party/wiredtiger/test/format/format.h +++ b/src/third_party/wiredtiger/test/format/format.h @@ -90,6 +90,7 @@ typedef struct { bool workers_finished; /* Operations completed */ pthread_rwlock_t backup_lock; /* Backup running */ + uint32_t backup_id; /* Block incremental id */ WT_RAND_STATE rnd; /* Global RNG state */ @@ -118,6 +119,7 @@ typedef struct { uint32_t c_assert_read_timestamp; uint32_t c_auto_throttle; uint32_t c_backups; + char *c_backup_incremental; uint32_t c_bitcnt; uint32_t c_bloom; uint32_t c_bloom_bit_count; @@ -208,6 +210,11 @@ typedef struct { #define VAR 3 u_int type; /* File type's flag value */ +#define INCREMENTAL_BLOCK 1 +#define INCREMENTAL_LOG 2 +#define INCREMENTAL_OFF 3 + u_int c_backup_incr_flag; /* Incremental backup flag value */ + #define CHECKPOINT_OFF 1 #define CHECKPOINT_ON 2 #define CHECKPOINT_WIREDTIGER 3 |