summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2020-10-08 16:57:01 +1100
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-10-08 10:10:09 +0000
commit5b525400af7fa7afe14f2568f21f505b7faf18ce (patch)
tree35a582f459552548bd6b83b99340f40b2dbbde5a
parent013c3f1bed2d31995bb1ca041b41b27496e0b1a7 (diff)
downloadmongo-5b525400af7fa7afe14f2568f21f505b7faf18ce.tar.gz
Import wiredtiger: fcccb8aaf3b82a84a58cd94eda550ae0e41b1409 from branch mongodb-5.0
ref: 0236242fe8..fcccb8aaf3 for: 4.9.0 WT-6765 Add more debugging and earlier detection of missing file
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_fs.c9
-rw-r--r--src/third_party/wiredtiger/test/csuite/random_directio/main.c59
-rw-r--r--src/third_party/wiredtiger/test/csuite/random_directio/util.c21
-rw-r--r--src/third_party/wiredtiger/test/csuite/random_directio/util.h3
5 files changed, 68 insertions, 26 deletions
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index ec144dd8675..aaf05fb0b43 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -2,5 +2,5 @@
"vendor": "wiredtiger",
"github": "wiredtiger/wiredtiger.git",
"branch": "mongodb-5.0",
- "commit": "0236242fe8ecd2e02d9473d9a6128e6998b96937"
+ "commit": "fcccb8aaf3b82a84a58cd94eda550ae0e41b1409"
}
diff --git a/src/third_party/wiredtiger/src/os_posix/os_fs.c b/src/third_party/wiredtiger/src/os_posix/os_fs.c
index 2500e3d2865..ede22518c26 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_fs.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_fs.c
@@ -208,7 +208,9 @@ __posix_fs_remove(
#ifdef __linux__
/* Flush the backing directory to guarantee the remove. */
+ WT_RET(__wt_log_printf(session, "REMOVE: posix_directory_sync %s", name));
WT_RET(__posix_directory_sync(session, name));
+ WT_RET(__wt_log_printf(session, "REMOVE: DONE posix_directory_sync %s", name));
#endif
return (0);
}
@@ -248,7 +250,9 @@ __posix_fs_rename(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const cha
* not provide the guarantee or only provide the guarantee with specific mount options. Flush
* both of the from/to directories until it's a performance problem.
*/
+ WT_RET(__wt_log_printf(session, "RENAME: posix_directory_sync %s", from));
WT_RET(__posix_directory_sync(session, from));
+ WT_RET(__wt_log_printf(session, "RENAME: DONE posix_directory_sync %s", from));
/*
* In almost all cases, we're going to be renaming files in the same directory, we can at least
@@ -807,8 +811,11 @@ __posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const cha
/*
* Durability: some filesystems require a directory sync to be confident the file will appear.
*/
- if (LF_ISSET(WT_FS_OPEN_DURABLE))
+ if (LF_ISSET(WT_FS_OPEN_DURABLE)) {
+ WT_ERR(__wt_log_printf(session, "OPEN/CREATE: posix_directory_sync %s", name));
WT_ERR(__posix_directory_sync(session, name));
+ WT_ERR(__wt_log_printf(session, "OPEN/CREATE: DONE posix_directory_sync %s", name));
+ }
#endif
WT_ERR(__posix_open_file_cloexec(session, pfh->fd, name));
diff --git a/src/third_party/wiredtiger/test/csuite/random_directio/main.c b/src/third_party/wiredtiger/test/csuite/random_directio/main.c
index 5e21751781d..2212befaadb 100644
--- a/src/third_party/wiredtiger/test/csuite/random_directio/main.c
+++ b/src/third_party/wiredtiger/test/csuite/random_directio/main.c
@@ -765,6 +765,24 @@ check_schema(WT_SESSION *session, uint64_t lastid, uint32_t threadid, uint32_t f
}
}
+static void
+kill_child(pid_t pid)
+{
+ int status;
+
+ /*
+ * The child is stopped, it won't process an abort until it is continued. First signal the
+ * abort, then signal continue so that the child process will process the abort and dump core.
+ */
+ printf("Send abort to child process ID %d\n", (int)pid);
+ if (kill(pid, SIGABRT) != 0)
+ testutil_die(errno, "kill");
+ if (kill(pid, SIGCONT) != 0)
+ testutil_die(errno, "kill");
+ if (waitpid(pid, &status, 0) == -1)
+ testutil_die(errno, "waitpid");
+}
+
/*
* check_db --
* Make a copy of the database and verify its contents.
@@ -779,10 +797,10 @@ check_db(uint32_t nth, uint32_t datasize, pid_t pid, bool directio, uint32_t fla
uint64_t gotid, id;
uint64_t *lastid;
uint32_t gotth, kvsize, th, threadmap;
- int status;
char checkdir[4096], dbgdir[4096], savedir[4096];
char *gotkey, *gotvalue, *keybuf, *p;
char **large_arr;
+ bool fatal;
keybuf = dcalloc(datasize, 1);
lastid = dcalloc(nth, sizeof(uint64_t));
@@ -803,29 +821,32 @@ check_db(uint32_t nth, uint32_t datasize, pid_t pid, bool directio, uint32_t fla
printf(
"Copy database home directory using direct I/O to run recovery,\n"
"along with a saved 'pre-recovery' copy.\n");
- copy_directory(home, checkdir, directio);
- /* Copy the original home directory explicitly without direct I/O. */
- copy_directory(home, dbgdir, false);
- copy_directory(checkdir, savedir, false);
+ /*
+ * Copy the original home directory explicitly without direct I/O. Copy this first because
+ * copying with directio may abort and we want to see what the original copy saw.
+ */
+ fatal = copy_directory(home, dbgdir, false);
+ if (fatal) {
+ printf("FATAL: Copying from %s to %s, directio %d\n", home, dbgdir, false);
+ kill_child(pid);
+ }
+ fatal = copy_directory(home, checkdir, directio);
+ if (fatal) {
+ printf("FATAL: Copying from %s to %s, directio %d\n", home, checkdir, directio);
+ kill_child(pid);
+ }
+ fatal = copy_directory(checkdir, savedir, false);
+ if (fatal) {
+ printf("FATAL: Copying from %s to %s, directio %d\n", checkdir, savedir, false);
+ kill_child(pid);
+ }
printf("Open database, run recovery and verify content\n");
ret = wiredtiger_open(checkdir, NULL, ENV_CONFIG_REC, &conn);
/* If this fails, abort the child process before we die so we can see what it was doing. */
if (ret != 0) {
- if (pid != 0) {
- /*
- * The child is stopped, it won't process an abort until it is continued. First signal
- * the abort, then signal continue so that the child process will process the abort and
- * dump core.
- */
- printf("Send abort to child process ID %d\n", (int)pid);
- if (kill(pid, SIGABRT) != 0)
- testutil_die(errno, "kill");
- if (kill(pid, SIGCONT) != 0)
- testutil_die(errno, "kill");
- if (waitpid(pid, &status, 0) == -1)
- testutil_die(errno, "waitpid");
- }
+ if (pid != 0)
+ kill_child(pid);
testutil_check(ret);
}
testutil_check(conn->open_session(conn, NULL, NULL, &session));
diff --git a/src/third_party/wiredtiger/test/csuite/random_directio/util.c b/src/third_party/wiredtiger/test/csuite/random_directio/util.c
index 2d4a4aa845b..df8b65ce248 100644
--- a/src/third_party/wiredtiger/test/csuite/random_directio/util.c
+++ b/src/third_party/wiredtiger/test/csuite/random_directio/util.c
@@ -43,7 +43,7 @@
* copy_directory --
* Copy a directory, using direct IO if indicated.
*/
-void
+bool
copy_directory(const char *fromdir, const char *todir, bool directio)
{
struct dirent *dp;
@@ -52,9 +52,10 @@ copy_directory(const char *fromdir, const char *todir, bool directio)
size_t blksize, bufsize, readbytes, n, remaining;
ssize_t ioret;
uintptr_t bufptr;
- int openflags, rfd, wfd;
+ int enoent, openflags, rfd, wfd;
u_char *buf, *orig_buf;
char fromfile[4096], tofile[4096];
+ bool fatal;
#ifdef O_DIRECT
openflags = directio ? O_DIRECT : 0;
@@ -65,6 +66,8 @@ copy_directory(const char *fromdir, const char *todir, bool directio)
orig_buf = dcalloc(COPY_BUF_SIZE, sizeof(u_char));
buf = NULL;
blksize = bufsize = 0;
+ enoent = 0;
+ fatal = false;
dirp = opendir(todir);
if (dirp != NULL) {
@@ -101,8 +104,19 @@ copy_directory(const char *fromdir, const char *todir, bool directio)
* delivered in between those calls so the file may no longer exist but reading the
* directory will still return its entry. Handle that case and skip the file if it happens.
*/
- if (rfd < 0 && errno == ENOENT)
+ if (rfd < 0 && errno == ENOENT) {
+ ++enoent;
+ /*
+ * At most there can be one thread in the middle of drop due to the schema lock. So if
+ * we find more than one missing file, we have a fatal and unexpected situation. We want
+ * to know all the files in this. So note them here and fail later.
+ */
+ printf("COPY_DIR: direct:%d ENOENT %d: Source file %s not found.\n", directio, enoent,
+ dp->d_name);
+ if (enoent > 1)
+ fatal = true;
continue;
+ }
testutil_assertfmt(rfd >= 0, "Open of source %s failed with %d\n", fromfile, errno);
wfd = open(tofile, O_WRONLY | O_CREAT, 0666);
testutil_assertfmt(wfd >= 0, "Open of dest %s failed with %d\n", tofile, errno);
@@ -152,4 +166,5 @@ copy_directory(const char *fromdir, const char *todir, bool directio)
}
testutil_check(closedir(dirp));
free(orig_buf);
+ return (fatal);
}
diff --git a/src/third_party/wiredtiger/test/csuite/random_directio/util.h b/src/third_party/wiredtiger/test/csuite/random_directio/util.h
index c0d6cc6db8a..a849150d52c 100644
--- a/src/third_party/wiredtiger/test/csuite/random_directio/util.h
+++ b/src/third_party/wiredtiger/test/csuite/random_directio/util.h
@@ -30,5 +30,4 @@
* util.h
* Utility functions for test that simulates system crashes.
*/
-
-extern void copy_directory(const char *, const char *, bool);
+extern bool copy_directory(const char *, const char *, bool);