diff options
author | Luke Chen <luke.chen@mongodb.com> | 2020-10-08 16:57:01 +1100 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-10-08 10:10:09 +0000 |
commit | 5b525400af7fa7afe14f2568f21f505b7faf18ce (patch) | |
tree | 35a582f459552548bd6b83b99340f40b2dbbde5a /src/third_party | |
parent | 013c3f1bed2d31995bb1ca041b41b27496e0b1a7 (diff) | |
download | mongo-5b525400af7fa7afe14f2568f21f505b7faf18ce.tar.gz |
Import wiredtiger: fcccb8aaf3b82a84a58cd94eda550ae0e41b1409 from branch mongodb-5.0
ref: 0236242fe8..fcccb8aaf3
for: 4.9.0
WT-6765 Add more debugging and earlier detection of missing file
Diffstat (limited to 'src/third_party')
5 files changed, 68 insertions, 26 deletions
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index ec144dd8675..aaf05fb0b43 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -2,5 +2,5 @@ "vendor": "wiredtiger", "github": "wiredtiger/wiredtiger.git", "branch": "mongodb-5.0", - "commit": "0236242fe8ecd2e02d9473d9a6128e6998b96937" + "commit": "fcccb8aaf3b82a84a58cd94eda550ae0e41b1409" } diff --git a/src/third_party/wiredtiger/src/os_posix/os_fs.c b/src/third_party/wiredtiger/src/os_posix/os_fs.c index 2500e3d2865..ede22518c26 100644 --- a/src/third_party/wiredtiger/src/os_posix/os_fs.c +++ b/src/third_party/wiredtiger/src/os_posix/os_fs.c @@ -208,7 +208,9 @@ __posix_fs_remove( #ifdef __linux__ /* Flush the backing directory to guarantee the remove. */ + WT_RET(__wt_log_printf(session, "REMOVE: posix_directory_sync %s", name)); WT_RET(__posix_directory_sync(session, name)); + WT_RET(__wt_log_printf(session, "REMOVE: DONE posix_directory_sync %s", name)); #endif return (0); } @@ -248,7 +250,9 @@ __posix_fs_rename(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const cha * not provide the guarantee or only provide the guarantee with specific mount options. Flush * both of the from/to directories until it's a performance problem. */ + WT_RET(__wt_log_printf(session, "RENAME: posix_directory_sync %s", from)); WT_RET(__posix_directory_sync(session, from)); + WT_RET(__wt_log_printf(session, "RENAME: DONE posix_directory_sync %s", from)); /* * In almost all cases, we're going to be renaming files in the same directory, we can at least @@ -807,8 +811,11 @@ __posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const cha /* * Durability: some filesystems require a directory sync to be confident the file will appear. */ - if (LF_ISSET(WT_FS_OPEN_DURABLE)) + if (LF_ISSET(WT_FS_OPEN_DURABLE)) { + WT_ERR(__wt_log_printf(session, "OPEN/CREATE: posix_directory_sync %s", name)); WT_ERR(__posix_directory_sync(session, name)); + WT_ERR(__wt_log_printf(session, "OPEN/CREATE: DONE posix_directory_sync %s", name)); + } #endif WT_ERR(__posix_open_file_cloexec(session, pfh->fd, name)); diff --git a/src/third_party/wiredtiger/test/csuite/random_directio/main.c b/src/third_party/wiredtiger/test/csuite/random_directio/main.c index 5e21751781d..2212befaadb 100644 --- a/src/third_party/wiredtiger/test/csuite/random_directio/main.c +++ b/src/third_party/wiredtiger/test/csuite/random_directio/main.c @@ -765,6 +765,24 @@ check_schema(WT_SESSION *session, uint64_t lastid, uint32_t threadid, uint32_t f } } +static void +kill_child(pid_t pid) +{ + int status; + + /* + * The child is stopped, it won't process an abort until it is continued. First signal the + * abort, then signal continue so that the child process will process the abort and dump core. + */ + printf("Send abort to child process ID %d\n", (int)pid); + if (kill(pid, SIGABRT) != 0) + testutil_die(errno, "kill"); + if (kill(pid, SIGCONT) != 0) + testutil_die(errno, "kill"); + if (waitpid(pid, &status, 0) == -1) + testutil_die(errno, "waitpid"); +} + /* * check_db -- * Make a copy of the database and verify its contents. @@ -779,10 +797,10 @@ check_db(uint32_t nth, uint32_t datasize, pid_t pid, bool directio, uint32_t fla uint64_t gotid, id; uint64_t *lastid; uint32_t gotth, kvsize, th, threadmap; - int status; char checkdir[4096], dbgdir[4096], savedir[4096]; char *gotkey, *gotvalue, *keybuf, *p; char **large_arr; + bool fatal; keybuf = dcalloc(datasize, 1); lastid = dcalloc(nth, sizeof(uint64_t)); @@ -803,29 +821,32 @@ check_db(uint32_t nth, uint32_t datasize, pid_t pid, bool directio, uint32_t fla printf( "Copy database home directory using direct I/O to run recovery,\n" "along with a saved 'pre-recovery' copy.\n"); - copy_directory(home, checkdir, directio); - /* Copy the original home directory explicitly without direct I/O. */ - copy_directory(home, dbgdir, false); - copy_directory(checkdir, savedir, false); + /* + * Copy the original home directory explicitly without direct I/O. Copy this first because + * copying with directio may abort and we want to see what the original copy saw. + */ + fatal = copy_directory(home, dbgdir, false); + if (fatal) { + printf("FATAL: Copying from %s to %s, directio %d\n", home, dbgdir, false); + kill_child(pid); + } + fatal = copy_directory(home, checkdir, directio); + if (fatal) { + printf("FATAL: Copying from %s to %s, directio %d\n", home, checkdir, directio); + kill_child(pid); + } + fatal = copy_directory(checkdir, savedir, false); + if (fatal) { + printf("FATAL: Copying from %s to %s, directio %d\n", checkdir, savedir, false); + kill_child(pid); + } printf("Open database, run recovery and verify content\n"); ret = wiredtiger_open(checkdir, NULL, ENV_CONFIG_REC, &conn); /* If this fails, abort the child process before we die so we can see what it was doing. */ if (ret != 0) { - if (pid != 0) { - /* - * The child is stopped, it won't process an abort until it is continued. First signal - * the abort, then signal continue so that the child process will process the abort and - * dump core. - */ - printf("Send abort to child process ID %d\n", (int)pid); - if (kill(pid, SIGABRT) != 0) - testutil_die(errno, "kill"); - if (kill(pid, SIGCONT) != 0) - testutil_die(errno, "kill"); - if (waitpid(pid, &status, 0) == -1) - testutil_die(errno, "waitpid"); - } + if (pid != 0) + kill_child(pid); testutil_check(ret); } testutil_check(conn->open_session(conn, NULL, NULL, &session)); diff --git a/src/third_party/wiredtiger/test/csuite/random_directio/util.c b/src/third_party/wiredtiger/test/csuite/random_directio/util.c index 2d4a4aa845b..df8b65ce248 100644 --- a/src/third_party/wiredtiger/test/csuite/random_directio/util.c +++ b/src/third_party/wiredtiger/test/csuite/random_directio/util.c @@ -43,7 +43,7 @@ * copy_directory -- * Copy a directory, using direct IO if indicated. */ -void +bool copy_directory(const char *fromdir, const char *todir, bool directio) { struct dirent *dp; @@ -52,9 +52,10 @@ copy_directory(const char *fromdir, const char *todir, bool directio) size_t blksize, bufsize, readbytes, n, remaining; ssize_t ioret; uintptr_t bufptr; - int openflags, rfd, wfd; + int enoent, openflags, rfd, wfd; u_char *buf, *orig_buf; char fromfile[4096], tofile[4096]; + bool fatal; #ifdef O_DIRECT openflags = directio ? O_DIRECT : 0; @@ -65,6 +66,8 @@ copy_directory(const char *fromdir, const char *todir, bool directio) orig_buf = dcalloc(COPY_BUF_SIZE, sizeof(u_char)); buf = NULL; blksize = bufsize = 0; + enoent = 0; + fatal = false; dirp = opendir(todir); if (dirp != NULL) { @@ -101,8 +104,19 @@ copy_directory(const char *fromdir, const char *todir, bool directio) * delivered in between those calls so the file may no longer exist but reading the * directory will still return its entry. Handle that case and skip the file if it happens. */ - if (rfd < 0 && errno == ENOENT) + if (rfd < 0 && errno == ENOENT) { + ++enoent; + /* + * At most there can be one thread in the middle of drop due to the schema lock. So if + * we find more than one missing file, we have a fatal and unexpected situation. We want + * to know all the files in this. So note them here and fail later. + */ + printf("COPY_DIR: direct:%d ENOENT %d: Source file %s not found.\n", directio, enoent, + dp->d_name); + if (enoent > 1) + fatal = true; continue; + } testutil_assertfmt(rfd >= 0, "Open of source %s failed with %d\n", fromfile, errno); wfd = open(tofile, O_WRONLY | O_CREAT, 0666); testutil_assertfmt(wfd >= 0, "Open of dest %s failed with %d\n", tofile, errno); @@ -152,4 +166,5 @@ copy_directory(const char *fromdir, const char *todir, bool directio) } testutil_check(closedir(dirp)); free(orig_buf); + return (fatal); } diff --git a/src/third_party/wiredtiger/test/csuite/random_directio/util.h b/src/third_party/wiredtiger/test/csuite/random_directio/util.h index c0d6cc6db8a..a849150d52c 100644 --- a/src/third_party/wiredtiger/test/csuite/random_directio/util.h +++ b/src/third_party/wiredtiger/test/csuite/random_directio/util.h @@ -30,5 +30,4 @@ * util.h * Utility functions for test that simulates system crashes. */ - -extern void copy_directory(const char *, const char *, bool); +extern bool copy_directory(const char *, const char *, bool); |