summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger/src/os_posix
diff options
context:
space:
mode:
Diffstat (limited to 'src/third_party/wiredtiger/src/os_posix')
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_dir.c188
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_dlopen.c84
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_fallocate.c186
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_fs.c1123
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_getenv.c14
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_map.c231
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c294
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_once.c6
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_pagesize.c4
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_path.c8
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_priv.c6
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_setvbuf.c24
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_sleep.c26
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_snprintf.c19
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_thread.c115
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_time.c59
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_yield.c20
17 files changed, 1159 insertions, 1248 deletions
diff --git a/src/third_party/wiredtiger/src/os_posix/os_dir.c b/src/third_party/wiredtiger/src/os_posix/os_dir.c
index 11c6339177c..3f2b1bf6e2e 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_dir.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_dir.c
@@ -12,129 +12,121 @@
/*
* __directory_list_worker --
- * Get a list of files from a directory, POSIX version.
+ * Get a list of files from a directory, POSIX version.
*/
static int
-__directory_list_worker(WT_FILE_SYSTEM *file_system,
- WT_SESSION *wt_session, const char *directory,
- const char *prefix, char ***dirlistp, uint32_t *countp, bool single)
+__directory_list_worker(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *directory,
+ const char *prefix, char ***dirlistp, uint32_t *countp, bool single)
{
- struct dirent *dp;
- DIR *dirp;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- size_t dirallocsz;
- uint32_t count;
- int tret;
- char **entries;
-
- *dirlistp = NULL;
- *countp = 0;
-
- session = (WT_SESSION_IMPL *)wt_session;
- dirp = NULL;
- dirallocsz = 0;
- entries = NULL;
-
- /*
- * If opendir fails, we should have a NULL pointer with an error value,
- * but various static analysis programs remain unconvinced, check both.
- */
- WT_SYSCALL_RETRY(((dirp = opendir(directory)) == NULL ? -1 : 0), ret);
- if (dirp == NULL || ret != 0) {
- if (ret == 0)
- ret = EINVAL;
- WT_RET_MSG(session, ret,
- "%s: directory-list: opendir", directory);
- }
-
- for (count = 0; (dp = readdir(dirp)) != NULL;) {
- /*
- * Skip . and ..
- */
- if (strcmp(dp->d_name, ".") == 0 ||
- strcmp(dp->d_name, "..") == 0)
- continue;
-
- /* The list of files is optionally filtered by a prefix. */
- if (prefix != NULL && !WT_PREFIX_MATCH(dp->d_name, prefix))
- continue;
-
- WT_ERR(__wt_realloc_def(
- session, &dirallocsz, count + 1, &entries));
- WT_ERR(__wt_strdup(session, dp->d_name, &entries[count]));
- ++count;
-
- if (single)
- break;
- }
-
- *dirlistp = entries;
- *countp = count;
-
-err: WT_SYSCALL(closedir(dirp), tret);
- if (tret != 0) {
- __wt_err(session, tret,
- "%s: directory-list: closedir", directory);
- if (ret == 0)
- ret = tret;
- }
-
- if (ret == 0)
- return (0);
-
- WT_TRET(__wt_posix_directory_list_free(
- file_system, wt_session, entries, count));
-
- WT_RET_MSG(session, ret,
- "%s: directory-list, prefix \"%s\"",
- directory, prefix == NULL ? "" : prefix);
+ struct dirent *dp;
+ DIR *dirp;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ size_t dirallocsz;
+ uint32_t count;
+ int tret;
+ char **entries;
+
+ *dirlistp = NULL;
+ *countp = 0;
+
+ session = (WT_SESSION_IMPL *)wt_session;
+ dirp = NULL;
+ dirallocsz = 0;
+ entries = NULL;
+
+ /*
+ * If opendir fails, we should have a NULL pointer with an error value, but various static
+ * analysis programs remain unconvinced, check both.
+ */
+ WT_SYSCALL_RETRY(((dirp = opendir(directory)) == NULL ? -1 : 0), ret);
+ if (dirp == NULL || ret != 0) {
+ if (ret == 0)
+ ret = EINVAL;
+ WT_RET_MSG(session, ret, "%s: directory-list: opendir", directory);
+ }
+
+ for (count = 0; (dp = readdir(dirp)) != NULL;) {
+ /*
+ * Skip . and ..
+ */
+ if (strcmp(dp->d_name, ".") == 0 || strcmp(dp->d_name, "..") == 0)
+ continue;
+
+ /* The list of files is optionally filtered by a prefix. */
+ if (prefix != NULL && !WT_PREFIX_MATCH(dp->d_name, prefix))
+ continue;
+
+ WT_ERR(__wt_realloc_def(session, &dirallocsz, count + 1, &entries));
+ WT_ERR(__wt_strdup(session, dp->d_name, &entries[count]));
+ ++count;
+
+ if (single)
+ break;
+ }
+
+ *dirlistp = entries;
+ *countp = count;
+
+err:
+ WT_SYSCALL(closedir(dirp), tret);
+ if (tret != 0) {
+ __wt_err(session, tret, "%s: directory-list: closedir", directory);
+ if (ret == 0)
+ ret = tret;
+ }
+
+ if (ret == 0)
+ return (0);
+
+ WT_TRET(__wt_posix_directory_list_free(file_system, wt_session, entries, count));
+
+ WT_RET_MSG(
+ session, ret, "%s: directory-list, prefix \"%s\"", directory, prefix == NULL ? "" : prefix);
}
/*
* __wt_posix_directory_list --
- * Get a list of files from a directory, POSIX version.
+ * Get a list of files from a directory, POSIX version.
*/
int
-__wt_posix_directory_list(WT_FILE_SYSTEM *file_system,
- WT_SESSION *wt_session, const char *directory,
- const char *prefix, char ***dirlistp, uint32_t *countp)
+__wt_posix_directory_list(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
+ const char *directory, const char *prefix, char ***dirlistp, uint32_t *countp)
{
- return (__directory_list_worker(file_system,
- wt_session, directory, prefix, dirlistp, countp, false));
+ return (
+ __directory_list_worker(file_system, wt_session, directory, prefix, dirlistp, countp, false));
}
/*
* __wt_posix_directory_list_single --
- * Get one file from a directory, POSIX version.
+ * Get one file from a directory, POSIX version.
*/
int
-__wt_posix_directory_list_single(WT_FILE_SYSTEM *file_system,
- WT_SESSION *wt_session, const char *directory,
- const char *prefix, char ***dirlistp, uint32_t *countp)
+__wt_posix_directory_list_single(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
+ const char *directory, const char *prefix, char ***dirlistp, uint32_t *countp)
{
- return (__directory_list_worker(file_system,
- wt_session, directory, prefix, dirlistp, countp, true));
+ return (
+ __directory_list_worker(file_system, wt_session, directory, prefix, dirlistp, countp, true));
}
/*
* __wt_posix_directory_list_free --
- * Free memory returned by __wt_posix_directory_list.
+ * Free memory returned by __wt_posix_directory_list.
*/
int
-__wt_posix_directory_list_free(WT_FILE_SYSTEM *file_system,
- WT_SESSION *wt_session, char **dirlist, uint32_t count)
+__wt_posix_directory_list_free(
+ WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, char **dirlist, uint32_t count)
{
- WT_SESSION_IMPL *session;
+ WT_SESSION_IMPL *session;
- WT_UNUSED(file_system);
+ WT_UNUSED(file_system);
- session = (WT_SESSION_IMPL *)wt_session;
+ session = (WT_SESSION_IMPL *)wt_session;
- if (dirlist != NULL) {
- while (count > 0)
- __wt_free(session, dirlist[--count]);
- __wt_free(session, dirlist);
- }
- return (0);
+ if (dirlist != NULL) {
+ while (count > 0)
+ __wt_free(session, dirlist[--count]);
+ __wt_free(session, dirlist);
+ }
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/os_posix/os_dlopen.c b/src/third_party/wiredtiger/src/os_posix/os_dlopen.c
index 4131b99a035..7ba37803a44 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_dlopen.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_dlopen.c
@@ -10,75 +10,73 @@
/*
* __wt_dlopen --
- * Open a dynamic library.
+ * Open a dynamic library.
*/
int
__wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp)
{
- WT_DECL_RET;
- WT_DLH *dlh;
+ WT_DECL_RET;
+ WT_DLH *dlh;
- WT_RET(__wt_calloc_one(session, &dlh));
- WT_ERR(__wt_strdup(session, path == NULL ? "local" : path, &dlh->name));
+ WT_RET(__wt_calloc_one(session, &dlh));
+ WT_ERR(__wt_strdup(session, path == NULL ? "local" : path, &dlh->name));
- if ((dlh->handle = dlopen(path, RTLD_LAZY)) == NULL)
- WT_ERR_MSG(
- session, __wt_errno(), "dlopen(%s): %s", path, dlerror());
+ if ((dlh->handle = dlopen(path, RTLD_LAZY)) == NULL)
+ WT_ERR_MSG(session, __wt_errno(), "dlopen(%s): %s", path, dlerror());
- *dlhp = dlh;
- if (0) {
-err: __wt_free(session, dlh->name);
- __wt_free(session, dlh);
- }
- return (ret);
+ *dlhp = dlh;
+ if (0) {
+err:
+ __wt_free(session, dlh->name);
+ __wt_free(session, dlh);
+ }
+ return (ret);
}
/*
* __wt_dlsym --
- * Lookup a symbol in a dynamic library.
+ * Lookup a symbol in a dynamic library.
*/
int
-__wt_dlsym(WT_SESSION_IMPL *session,
- WT_DLH *dlh, const char *name, bool fail, void *sym_ret)
+__wt_dlsym(WT_SESSION_IMPL *session, WT_DLH *dlh, const char *name, bool fail, void *sym_ret)
{
- void *sym;
+ void *sym;
- *(void **)sym_ret = NULL;
- if ((sym = dlsym(dlh->handle, name)) == NULL) {
- if (fail)
- WT_RET_MSG(session, __wt_errno(),
- "dlsym(%s in %s): %s", name, dlh->name, dlerror());
- return (0);
- }
+ *(void **)sym_ret = NULL;
+ if ((sym = dlsym(dlh->handle, name)) == NULL) {
+ if (fail)
+ WT_RET_MSG(session, __wt_errno(), "dlsym(%s in %s): %s", name, dlh->name, dlerror());
+ return (0);
+ }
- *(void **)sym_ret = sym;
- return (0);
+ *(void **)sym_ret = sym;
+ return (0);
}
/*
* __wt_dlclose --
- * Close a dynamic library
+ * Close a dynamic library
*/
int
__wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- /*
- * FreeBSD dies inside __cxa_finalize when closing handles.
- *
- * For now, just skip the dlclose: this may leak some resources until
- * the process exits, but that is preferable to hard-to-debug crashes
- * during exit.
- */
+/*
+ * FreeBSD dies inside __cxa_finalize when closing handles.
+ *
+ * For now, just skip the dlclose: this may leak some resources until
+ * the process exits, but that is preferable to hard-to-debug crashes
+ * during exit.
+ */
#ifndef __FreeBSD__
- if (dlclose(dlh->handle) != 0) {
- ret = __wt_errno();
- __wt_err(session, ret, "dlclose: %s", dlerror());
- }
+ if (dlclose(dlh->handle) != 0) {
+ ret = __wt_errno();
+ __wt_err(session, ret, "dlclose: %s", dlerror());
+ }
#endif
- __wt_free(session, dlh->name);
- __wt_free(session, dlh);
- return (ret);
+ __wt_free(session, dlh->name);
+ __wt_free(session, dlh);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/os_posix/os_fallocate.c b/src/third_party/wiredtiger/src/os_posix/os_fallocate.c
index 6691e90dc88..06b65b2c921 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_fallocate.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_fallocate.c
@@ -15,145 +15,139 @@
/*
* __posix_std_fallocate --
- * Linux fallocate call.
+ * Linux fallocate call.
*/
static int
-__posix_std_fallocate(
- WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset)
+__posix_std_fallocate(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset)
{
#if defined(HAVE_FALLOCATE)
- WT_DECL_RET;
- WT_FILE_HANDLE_POSIX *pfh;
+ WT_DECL_RET;
+ WT_FILE_HANDLE_POSIX *pfh;
- WT_UNUSED(wt_session);
+ WT_UNUSED(wt_session);
- pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
+ pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
- WT_SYSCALL_RETRY(fallocate(pfh->fd, 0, (wt_off_t)0, offset), ret);
- return (ret);
+ WT_SYSCALL_RETRY(fallocate(pfh->fd, 0, (wt_off_t)0, offset), ret);
+ return (ret);
#else
- WT_UNUSED(file_handle);
- WT_UNUSED(offset);
+ WT_UNUSED(file_handle);
+ WT_UNUSED(offset);
- return (__wt_set_return((WT_SESSION_IMPL *)wt_session, ENOTSUP));
+ return (__wt_set_return((WT_SESSION_IMPL *)wt_session, ENOTSUP));
#endif
}
/*
* __posix_sys_fallocate --
- * Linux fallocate call (system call version).
+ * Linux fallocate call (system call version).
*/
static int
-__posix_sys_fallocate(
- WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset)
+__posix_sys_fallocate(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset)
{
#if defined(__linux__) && defined(SYS_fallocate)
- WT_DECL_RET;
- WT_FILE_HANDLE_POSIX *pfh;
-
- WT_UNUSED(wt_session);
-
- pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
-
- /*
- * Try the system call for fallocate even if the C library wrapper was
- * not found. The system call actually exists in the kernel for some
- * Linux versions (RHEL 5.5), but not in the version of the C library.
- * This allows it to work everywhere the kernel supports it.
- */
- WT_SYSCALL_RETRY(
- syscall(SYS_fallocate, pfh->fd, 0, (wt_off_t)0, offset), ret);
- return (ret);
+ WT_DECL_RET;
+ WT_FILE_HANDLE_POSIX *pfh;
+
+ WT_UNUSED(wt_session);
+
+ pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
+
+ /*
+ * Try the system call for fallocate even if the C library wrapper was not found. The system
+ * call actually exists in the kernel for some Linux versions (RHEL 5.5), but not in the version
+ * of the C library. This allows it to work everywhere the kernel supports it.
+ */
+ WT_SYSCALL_RETRY(syscall(SYS_fallocate, pfh->fd, 0, (wt_off_t)0, offset), ret);
+ return (ret);
#else
- WT_UNUSED(file_handle);
- WT_UNUSED(offset);
+ WT_UNUSED(file_handle);
+ WT_UNUSED(offset);
- return (__wt_set_return((WT_SESSION_IMPL *)wt_session, ENOTSUP));
+ return (__wt_set_return((WT_SESSION_IMPL *)wt_session, ENOTSUP));
#endif
}
/*
* __posix_posix_fallocate --
- * POSIX fallocate call.
+ * POSIX fallocate call.
*/
static int
-__posix_posix_fallocate(
- WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset)
+__posix_posix_fallocate(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset)
{
#if defined(HAVE_POSIX_FALLOCATE)
- WT_DECL_RET;
- WT_FILE_HANDLE_POSIX *pfh;
+ WT_DECL_RET;
+ WT_FILE_HANDLE_POSIX *pfh;
- WT_UNUSED(wt_session);
+ WT_UNUSED(wt_session);
- pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
+ pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
- WT_SYSCALL_RETRY(posix_fallocate(pfh->fd, (wt_off_t)0, offset), ret);
- return (ret);
+ WT_SYSCALL_RETRY(posix_fallocate(pfh->fd, (wt_off_t)0, offset), ret);
+ return (ret);
#else
- WT_UNUSED(file_handle);
- WT_UNUSED(offset);
+ WT_UNUSED(file_handle);
+ WT_UNUSED(offset);
- return (__wt_set_return((WT_SESSION_IMPL *)wt_session, ENOTSUP));
+ return (__wt_set_return((WT_SESSION_IMPL *)wt_session, ENOTSUP));
#endif
}
/*
* __wt_posix_file_extend --
- * Extend the file.
+ * Extend the file.
*/
int
-__wt_posix_file_extend(
- WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset)
+__wt_posix_file_extend(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset)
{
- /*
- * The first file extension call: figure out what this system has.
- *
- * This function is configured as a locking call, so we know we're
- * single-threaded through here. Set the nolock function first, then
- * publish the NULL replacement to ensure the handle functions are
- * always correct.
- *
- * We've seen Linux systems where posix_fallocate has corrupted existing
- * file data (even though that is explicitly disallowed by POSIX).
- * FreeBSD and Solaris support posix_fallocate, and so far we've seen
- * no problems leaving it unlocked. Check for fallocate (and the system
- * call version of fallocate) first to avoid locking on Linux if at all
- * possible.
- */
- if (__posix_std_fallocate(file_handle, wt_session, offset) == 0) {
- file_handle->fh_extend_nolock = __posix_std_fallocate;
- WT_PUBLISH(file_handle->fh_extend, NULL);
- return (0);
- }
- if (__posix_sys_fallocate(file_handle, wt_session, offset) == 0) {
- file_handle->fh_extend_nolock = __posix_sys_fallocate;
- WT_PUBLISH(file_handle->fh_extend, NULL);
- return (0);
- }
- if (__posix_posix_fallocate(file_handle, wt_session, offset) == 0) {
+ /*
+ * The first file extension call: figure out what this system has.
+ *
+ * This function is configured as a locking call, so we know we're
+ * single-threaded through here. Set the nolock function first, then
+ * publish the NULL replacement to ensure the handle functions are
+ * always correct.
+ *
+ * We've seen Linux systems where posix_fallocate has corrupted existing
+ * file data (even though that is explicitly disallowed by POSIX).
+ * FreeBSD and Solaris support posix_fallocate, and so far we've seen
+ * no problems leaving it unlocked. Check for fallocate (and the system
+ * call version of fallocate) first to avoid locking on Linux if at all
+ * possible.
+ */
+ if (__posix_std_fallocate(file_handle, wt_session, offset) == 0) {
+ file_handle->fh_extend_nolock = __posix_std_fallocate;
+ WT_PUBLISH(file_handle->fh_extend, NULL);
+ return (0);
+ }
+ if (__posix_sys_fallocate(file_handle, wt_session, offset) == 0) {
+ file_handle->fh_extend_nolock = __posix_sys_fallocate;
+ WT_PUBLISH(file_handle->fh_extend, NULL);
+ return (0);
+ }
+ if (__posix_posix_fallocate(file_handle, wt_session, offset) == 0) {
#if defined(__linux__)
- file_handle->fh_extend = __posix_posix_fallocate;
- WT_WRITE_BARRIER();
+ file_handle->fh_extend = __posix_posix_fallocate;
+ WT_WRITE_BARRIER();
#else
- file_handle->fh_extend_nolock = __posix_posix_fallocate;
- WT_PUBLISH(file_handle->fh_extend, NULL);
+ file_handle->fh_extend_nolock = __posix_posix_fallocate;
+ WT_PUBLISH(file_handle->fh_extend, NULL);
#endif
- return (0);
- }
-
- /*
- * Use the POSIX ftruncate call if there's nothing else, it can extend
- * files. Note ftruncate requires locking.
- */
- if (file_handle->fh_truncate != NULL &&
- file_handle->fh_truncate(file_handle, wt_session, offset) == 0) {
- file_handle->fh_extend = file_handle->fh_truncate;
- WT_WRITE_BARRIER();
- return (0);
- }
-
- file_handle->fh_extend = NULL;
- WT_WRITE_BARRIER();
- return (ENOTSUP);
+ return (0);
+ }
+
+ /*
+ * Use the POSIX ftruncate call if there's nothing else, it can extend files. Note ftruncate
+ * requires locking.
+ */
+ if (file_handle->fh_truncate != NULL &&
+ file_handle->fh_truncate(file_handle, wt_session, offset) == 0) {
+ file_handle->fh_extend = file_handle->fh_truncate;
+ WT_WRITE_BARRIER();
+ return (0);
+ }
+
+ file_handle->fh_extend = NULL;
+ WT_WRITE_BARRIER();
+ return (ENOTSUP);
}
diff --git a/src/third_party/wiredtiger/src/os_posix/os_fs.c b/src/third_party/wiredtiger/src/os_posix/os_fs.c
index f9771fb3860..a96c9e3fc5d 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_fs.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_fs.c
@@ -30,814 +30,767 @@
/*
* __posix_sync --
- * Underlying support function to flush a file descriptor.
- *
- * Fsync calls (or fsync-style calls, for example, fdatasync) are not retried
- * on failure, and failure halts the system.
- *
- * Excerpted from the LWN.net article https://lwn.net/Articles/752063/:
- * In short, PostgreSQL assumes that a successful call to fsync() indicates
- * that all data written since the last successful call made it safely to
- * persistent storage. But that is not what the kernel actually does. When
- * a buffered I/O write fails due to a hardware-level error, filesystems
- * will respond differently, but that behavior usually includes discarding
- * the data in the affected pages and marking them as being clean. So a read
- * of the blocks that were just written will likely return something other
- * than the data that was written.
- *
- * Given the shared history of UNIX filesystems, and the difficulty of knowing
- * what specific error will be returned under specific circumstances, we don't
- * retry fsync-style calls and panic if a flush operation fails.
+ * Underlying support function to flush a file descriptor. Fsync calls (or fsync-style calls,
+ * for example, fdatasync) are not retried on failure, and failure halts the system. Excerpted
+ * from the LWN.net article https://lwn.net/Articles/752063/: In short, PostgreSQL assumes that
+ * a successful call to fsync() indicates that all data written since the last successful call
+ * made it safely to persistent storage. But that is not what the kernel actually does. When a
+ * buffered I/O write fails due to a hardware-level error, filesystems will respond differently,
+ * but that behavior usually includes discarding the data in the affected pages and marking them
+ * as being clean. So a read of the blocks that were just written will likely return something
+ * other than the data that was written. Given the shared history of UNIX filesystems, and the
+ * difficulty of knowing what specific error will be returned under specific circumstances, we
+ * don't retry fsync-style calls and panic if a flush operation fails.
*/
static int
-__posix_sync(
- WT_SESSION_IMPL *session, int fd, const char *name, const char *func)
+__posix_sync(WT_SESSION_IMPL *session, int fd, const char *name, const char *func)
{
- WT_DECL_RET;
+ WT_DECL_RET;
#if defined(F_FULLFSYNC)
- /*
- * OS X fsync documentation:
- * "Note that while fsync() will flush all data from the host to the
- * drive (i.e. the "permanent storage device"), the drive itself may
- * not physically write the data to the platters for quite some time
- * and it may be written in an out-of-order sequence. For applications
- * that require tighter guarantees about the integrity of their data,
- * Mac OS X provides the F_FULLFSYNC fcntl. The F_FULLFSYNC fcntl asks
- * the drive to flush all buffered data to permanent storage."
- *
- * OS X F_FULLFSYNC fcntl documentation:
- * "This is currently implemented on HFS, MS-DOS (FAT), and Universal
- * Disk Format (UDF) file systems."
- *
- * See comment in __posix_sync(): sync cannot be retried or fail.
- */
- static enum { FF_NOTSET, FF_IGNORE, FF_OK } ff_status = FF_NOTSET;
- switch (ff_status) {
- case FF_NOTSET:
- WT_SYSCALL(fcntl(fd, F_FULLFSYNC, 0) == -1 ? -1 : 0, ret);
- if (ret == 0) {
- ff_status = FF_OK;
- return (0);
- }
-
- /*
- * If the first F_FULLFSYNC fails, assume the file system
- * doesn't support it and fallback to fdatasync or fsync.
- */
- ff_status = FF_IGNORE;
- __wt_err(session, ret,
- "fcntl(F_FULLFSYNC) failed, falling back to fdatasync "
- "or fsync");
- break;
- case FF_IGNORE:
- break;
- case FF_OK:
- WT_SYSCALL(fcntl(fd, F_FULLFSYNC, 0) == -1 ? -1 : 0, ret);
- if (ret == 0)
- return (0);
- WT_PANIC_RET(session,
- ret, "%s: %s: fcntl(F_FULLFSYNC)", name, func);
- }
+ /*
+ * OS X fsync documentation:
+ * "Note that while fsync() will flush all data from the host to the
+ * drive (i.e. the "permanent storage device"), the drive itself may
+ * not physically write the data to the platters for quite some time
+ * and it may be written in an out-of-order sequence. For applications
+ * that require tighter guarantees about the integrity of their data,
+ * Mac OS X provides the F_FULLFSYNC fcntl. The F_FULLFSYNC fcntl asks
+ * the drive to flush all buffered data to permanent storage."
+ *
+ * OS X F_FULLFSYNC fcntl documentation:
+ * "This is currently implemented on HFS, MS-DOS (FAT), and Universal
+ * Disk Format (UDF) file systems."
+ *
+ * See comment in __posix_sync(): sync cannot be retried or fail.
+ */
+ static enum { FF_NOTSET, FF_IGNORE, FF_OK } ff_status = FF_NOTSET;
+ switch (ff_status) {
+ case FF_NOTSET:
+ WT_SYSCALL(fcntl(fd, F_FULLFSYNC, 0) == -1 ? -1 : 0, ret);
+ if (ret == 0) {
+ ff_status = FF_OK;
+ return (0);
+ }
+
+ /*
+ * If the first F_FULLFSYNC fails, assume the file system doesn't support it and fallback to
+ * fdatasync or fsync.
+ */
+ ff_status = FF_IGNORE;
+ __wt_err(session, ret,
+ "fcntl(F_FULLFSYNC) failed, falling back to fdatasync "
+ "or fsync");
+ break;
+ case FF_IGNORE:
+ break;
+ case FF_OK:
+ WT_SYSCALL(fcntl(fd, F_FULLFSYNC, 0) == -1 ? -1 : 0, ret);
+ if (ret == 0)
+ return (0);
+ WT_PANIC_RET(session, ret, "%s: %s: fcntl(F_FULLFSYNC)", name, func);
+ }
#endif
#if defined(HAVE_FDATASYNC)
- /* See comment in __posix_sync(): sync cannot be retried or fail. */
- WT_SYSCALL(fdatasync(fd), ret);
- if (ret == 0)
- return (0);
- WT_PANIC_RET(session, ret, "%s: %s: fdatasync", name, func);
+ /* See comment in __posix_sync(): sync cannot be retried or fail. */
+ WT_SYSCALL(fdatasync(fd), ret);
+ if (ret == 0)
+ return (0);
+ WT_PANIC_RET(session, ret, "%s: %s: fdatasync", name, func);
#else
- /* See comment in __posix_sync(): sync cannot be retried or fail. */
- WT_SYSCALL(fsync(fd), ret);
- if (ret == 0)
- return (0);
- WT_PANIC_RET(session, ret, "%s: %s: fsync", name, func);
+ /* See comment in __posix_sync(): sync cannot be retried or fail. */
+ WT_SYSCALL(fsync(fd), ret);
+ if (ret == 0)
+ return (0);
+ WT_PANIC_RET(session, ret, "%s: %s: fsync", name, func);
#endif
}
#ifdef __linux__
/*
* __posix_directory_sync --
- * Flush a directory to ensure file creation, remove or rename is durable.
+ * Flush a directory to ensure file creation, remove or rename is durable.
*/
static int
__posix_directory_sync(WT_SESSION_IMPL *session, const char *path)
{
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
- int fd, tret;
- char *dir;
-
- WT_RET(__wt_scr_alloc(session, 0, &tmp));
- WT_ERR(__wt_buf_setstr(session, tmp, path));
-
- /*
- * This layer should never see a path that doesn't include a trailing
- * path separator, this code asserts that fact.
- */
- dir = tmp->mem;
- strrchr(dir, '/')[1] = '\0';
-
- fd = 0; /* -Wconditional-uninitialized */
- WT_SYSCALL_RETRY((
- (fd = open(dir, O_RDONLY, 0444)) == -1 ? -1 : 0), ret);
- if (ret != 0)
- WT_ERR_MSG(session, ret, "%s: directory-sync: open", dir);
-
- ret = __posix_sync(session, fd, dir, "directory-sync");
-
- WT_SYSCALL(close(fd), tret);
- if (tret != 0) {
- __wt_err(session, tret, "%s: directory-sync: close", dir);
- WT_TRET(tret);
- }
-
-err: __wt_scr_free(session, &tmp);
- if (ret == 0)
- return (ret);
-
- /* See comment in __posix_sync(): sync cannot be retried or fail. */
- WT_PANIC_RET(session, ret, "%s: directory-sync", path);
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ int fd, tret;
+ char *dir;
+
+ WT_RET(__wt_scr_alloc(session, 0, &tmp));
+ WT_ERR(__wt_buf_setstr(session, tmp, path));
+
+ /*
+ * This layer should never see a path that doesn't include a trailing path separator, this code
+ * asserts that fact.
+ */
+ dir = tmp->mem;
+ strrchr(dir, '/')[1] = '\0';
+
+ fd = 0; /* -Wconditional-uninitialized */
+ WT_SYSCALL_RETRY(((fd = open(dir, O_RDONLY, 0444)) == -1 ? -1 : 0), ret);
+ if (ret != 0)
+ WT_ERR_MSG(session, ret, "%s: directory-sync: open", dir);
+
+ ret = __posix_sync(session, fd, dir, "directory-sync");
+
+ WT_SYSCALL(close(fd), tret);
+ if (tret != 0) {
+ __wt_err(session, tret, "%s: directory-sync: close", dir);
+ WT_TRET(tret);
+ }
+
+err:
+ __wt_scr_free(session, &tmp);
+ if (ret == 0)
+ return (ret);
+
+ /* See comment in __posix_sync(): sync cannot be retried or fail. */
+ WT_PANIC_RET(session, ret, "%s: directory-sync", path);
}
#endif
/*
* __posix_fs_exist --
- * Return if the file exists.
+ * Return if the file exists.
*/
static int
-__posix_fs_exist(WT_FILE_SYSTEM *file_system,
- WT_SESSION *wt_session, const char *name, bool *existp)
+__posix_fs_exist(
+ WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name, bool *existp)
{
- struct stat sb;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- WT_UNUSED(file_system);
-
- session = (WT_SESSION_IMPL *)wt_session;
-
- WT_SYSCALL(stat(name, &sb), ret);
- if (ret == 0) {
- *existp = true;
- return (0);
- }
- if (ret == ENOENT) {
- *existp = false;
- return (0);
- }
- WT_RET_MSG(session, ret, "%s: file-exist: stat", name);
+ struct stat sb;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ WT_UNUSED(file_system);
+
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ WT_SYSCALL(stat(name, &sb), ret);
+ if (ret == 0) {
+ *existp = true;
+ return (0);
+ }
+ if (ret == ENOENT) {
+ *existp = false;
+ return (0);
+ }
+ WT_RET_MSG(session, ret, "%s: file-exist: stat", name);
}
/*
* __posix_fs_remove --
- * Remove a file.
+ * Remove a file.
*/
static int
-__posix_fs_remove(WT_FILE_SYSTEM *file_system,
- WT_SESSION *wt_session, const char *name, uint32_t flags)
+__posix_fs_remove(
+ WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name, uint32_t flags)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- WT_UNUSED(file_system);
+ WT_UNUSED(file_system);
- session = (WT_SESSION_IMPL *)wt_session;
+ session = (WT_SESSION_IMPL *)wt_session;
- /*
- * ISO C doesn't require remove return -1 on failure or set errno (note
- * POSIX 1003.1 extends C with those requirements). Regardless, use the
- * unlink system call, instead of remove, to simplify error handling;
- * where we're not doing any special checking for standards compliance,
- * using unlink may be marginally safer.
- */
- WT_SYSCALL(unlink(name), ret);
- if (ret != 0)
- WT_RET_MSG(session, ret, "%s: file-remove: unlink", name);
+ /*
+ * ISO C doesn't require remove return -1 on failure or set errno (note POSIX 1003.1 extends C
+ * with those requirements). Regardless, use the unlink system call, instead of remove, to
+ * simplify error handling; where we're not doing any special checking for standards compliance,
+ * using unlink may be marginally safer.
+ */
+ WT_SYSCALL(unlink(name), ret);
+ if (ret != 0)
+ WT_RET_MSG(session, ret, "%s: file-remove: unlink", name);
- if (!LF_ISSET(WT_FS_DURABLE))
- return (0);
+ if (!LF_ISSET(WT_FS_DURABLE))
+ return (0);
#ifdef __linux__
- /* Flush the backing directory to guarantee the remove. */
- WT_RET (__posix_directory_sync(session, name));
+ /* Flush the backing directory to guarantee the remove. */
+ WT_RET(__posix_directory_sync(session, name));
#endif
- return (0);
+ return (0);
}
/*
* __posix_fs_rename --
- * Rename a file.
+ * Rename a file.
*/
static int
-__posix_fs_rename(WT_FILE_SYSTEM *file_system,
- WT_SESSION *wt_session, const char *from, const char *to, uint32_t flags)
+__posix_fs_rename(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *from,
+ const char *to, uint32_t flags)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- WT_UNUSED(file_system);
-
- session = (WT_SESSION_IMPL *)wt_session;
-
- /*
- * ISO C doesn't require rename return -1 on failure or set errno (note
- * POSIX 1003.1 extends C with those requirements). Be cautious, force
- * any non-zero return to -1 so we'll check errno. We can still end up
- * with the wrong errno (if errno is garbage), or the generic WT_ERROR
- * return (if errno is 0), but we've done the best we can.
- */
- WT_SYSCALL(rename(from, to) != 0 ? -1 : 0, ret);
- if (ret != 0)
- WT_RET_MSG(
- session, ret, "%s to %s: file-rename: rename", from, to);
-
- if (!LF_ISSET(WT_FS_DURABLE))
- return (0);
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ WT_UNUSED(file_system);
+
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ /*
+ * ISO C doesn't require rename return -1 on failure or set errno (note POSIX 1003.1 extends C
+ * with those requirements). Be cautious, force any non-zero return to -1 so we'll check errno.
+ * We can still end up with the wrong errno (if errno is garbage), or the generic WT_ERROR
+ * return (if errno is 0), but we've done the best we can.
+ */
+ WT_SYSCALL(rename(from, to) != 0 ? -1 : 0, ret);
+ if (ret != 0)
+ WT_RET_MSG(session, ret, "%s to %s: file-rename: rename", from, to);
+
+ if (!LF_ISSET(WT_FS_DURABLE))
+ return (0);
#ifdef __linux__
- /*
- * Flush the backing directory to guarantee the rename. My reading of
- * POSIX 1003.1 is there's no guarantee flushing only one of the from
- * or to directories, or flushing a common parent, is sufficient, and
- * even if POSIX were to make that guarantee, existing filesystems are
- * known to not provide the guarantee or only provide the guarantee
- * with specific mount options. Flush both of the from/to directories
- * until it's a performance problem.
- */
- WT_RET(__posix_directory_sync(session, from));
-
- /*
- * In almost all cases, we're going to be renaming files in the same
- * directory, we can at least fast-path that.
- */
- {
- bool same_directory;
- const char *fp, *tp;
-
- fp = strrchr(from, '/');
- tp = strrchr(to, '/');
- same_directory = (fp == NULL && tp == NULL) ||
- (fp != NULL && tp != NULL &&
- fp - from == tp - to && memcmp(from, to, (size_t)(fp - from)) == 0);
-
- if (!same_directory)
- WT_RET(__posix_directory_sync(session, to));
- }
+ /*
+ * Flush the backing directory to guarantee the rename. My reading of POSIX 1003.1 is there's no
+ * guarantee flushing only one of the from or to directories, or flushing a common parent, is
+ * sufficient, and even if POSIX were to make that guarantee, existing filesystems are known to
+ * not provide the guarantee or only provide the guarantee with specific mount options. Flush
+ * both of the from/to directories until it's a performance problem.
+ */
+ WT_RET(__posix_directory_sync(session, from));
+
+ /*
+ * In almost all cases, we're going to be renaming files in the same directory, we can at least
+ * fast-path that.
+ */
+ {
+ bool same_directory;
+ const char *fp, *tp;
+
+ fp = strrchr(from, '/');
+ tp = strrchr(to, '/');
+ same_directory =
+ (fp == NULL && tp == NULL) || (fp != NULL && tp != NULL && fp - from == tp - to &&
+ memcmp(from, to, (size_t)(fp - from)) == 0);
+
+ if (!same_directory)
+ WT_RET(__posix_directory_sync(session, to));
+ }
#endif
- return (0);
+ return (0);
}
/*
* __posix_fs_size --
- * Get the size of a file in bytes, by file name.
+ * Get the size of a file in bytes, by file name.
*/
static int
-__posix_fs_size(WT_FILE_SYSTEM *file_system,
- WT_SESSION *wt_session, const char *name, wt_off_t *sizep)
+__posix_fs_size(
+ WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name, wt_off_t *sizep)
{
- struct stat sb;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ struct stat sb;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- WT_UNUSED(file_system);
+ WT_UNUSED(file_system);
- session = (WT_SESSION_IMPL *)wt_session;
+ session = (WT_SESSION_IMPL *)wt_session;
- WT_SYSCALL(stat(name, &sb), ret);
- if (ret == 0) {
- *sizep = sb.st_size;
- return (0);
- }
- WT_RET_MSG(session, ret, "%s: file-size: stat", name);
+ WT_SYSCALL(stat(name, &sb), ret);
+ if (ret == 0) {
+ *sizep = sb.st_size;
+ return (0);
+ }
+ WT_RET_MSG(session, ret, "%s: file-size: stat", name);
}
#if defined(HAVE_POSIX_FADVISE)
/*
* __posix_file_advise --
- * POSIX fadvise.
+ * POSIX fadvise.
*/
static int
-__posix_file_advise(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session,
- wt_off_t offset, wt_off_t len, int advice)
+__posix_file_advise(
+ WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset, wt_off_t len, int advice)
{
- WT_DECL_RET;
- WT_FILE_HANDLE_POSIX *pfh;
- WT_SESSION_IMPL *session;
-
- session = (WT_SESSION_IMPL *)wt_session;
- pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
-
- WT_SYSCALL(posix_fadvise(pfh->fd, offset, len, advice), ret);
- if (ret == 0)
- return (0);
-
- /*
- * Treat EINVAL as not-supported, some systems don't support some flags.
- * Quietly fail, callers expect not-supported failures, and reset the
- * handle method to prevent future calls.
- */
- if (ret == EINVAL) {
- file_handle->fh_advise = NULL;
- return (__wt_set_return(session, ENOTSUP));
- }
-
- WT_RET_MSG(session, ret,
- "%s: handle-advise: posix_fadvise", file_handle->name);
-
+ WT_DECL_RET;
+ WT_FILE_HANDLE_POSIX *pfh;
+ WT_SESSION_IMPL *session;
+
+ session = (WT_SESSION_IMPL *)wt_session;
+ pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
+
+ WT_SYSCALL(posix_fadvise(pfh->fd, offset, len, advice), ret);
+ if (ret == 0)
+ return (0);
+
+ /*
+ * Treat EINVAL as not-supported, some systems don't support some flags. Quietly fail, callers
+ * expect not-supported failures, and reset the handle method to prevent future calls.
+ */
+ if (ret == EINVAL) {
+ file_handle->fh_advise = NULL;
+ return (__wt_set_return(session, ENOTSUP));
+ }
+
+ WT_RET_MSG(session, ret, "%s: handle-advise: posix_fadvise", file_handle->name);
}
#endif
/*
* __posix_file_close --
- * ANSI C close.
+ * ANSI C close.
*/
static int
__posix_file_close(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session)
{
- WT_DECL_RET;
- WT_FILE_HANDLE_POSIX *pfh;
- WT_SESSION_IMPL *session;
-
- session = (WT_SESSION_IMPL *)wt_session;
- pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
-
- /* Close the file handle. */
- if (pfh->fd != -1) {
- WT_SYSCALL(close(pfh->fd), ret);
- if (ret != 0)
- __wt_err(session, ret,
- "%s: handle-close: close", file_handle->name);
- }
-
- __wt_free(session, file_handle->name);
- __wt_free(session, pfh);
- return (ret);
+ WT_DECL_RET;
+ WT_FILE_HANDLE_POSIX *pfh;
+ WT_SESSION_IMPL *session;
+
+ session = (WT_SESSION_IMPL *)wt_session;
+ pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
+
+ /* Close the file handle. */
+ if (pfh->fd != -1) {
+ WT_SYSCALL(close(pfh->fd), ret);
+ if (ret != 0)
+ __wt_err(session, ret, "%s: handle-close: close", file_handle->name);
+ }
+
+ __wt_free(session, file_handle->name);
+ __wt_free(session, pfh);
+ return (ret);
}
/*
* __posix_file_lock --
- * Lock/unlock a file.
+ * Lock/unlock a file.
*/
static int
-__posix_file_lock(
- WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, bool lock)
+__posix_file_lock(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, bool lock)
{
- struct flock fl;
- WT_DECL_RET;
- WT_FILE_HANDLE_POSIX *pfh;
- WT_SESSION_IMPL *session;
-
- session = (WT_SESSION_IMPL *)wt_session;
- pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
-
- /*
- * WiredTiger requires this function be able to acquire locks past
- * the end of file.
- *
- * Note we're using fcntl(2) locking: all fcntl locks associated with a
- * file for a given process are removed when any file descriptor for the
- * file is closed by the process, even if a lock was never requested for
- * that file descriptor.
- */
- fl.l_start = 0;
- fl.l_len = 1;
- fl.l_type = lock ? F_WRLCK : F_UNLCK;
- fl.l_whence = SEEK_SET;
-
- WT_SYSCALL(fcntl(pfh->fd, F_SETLK, &fl) == -1 ? -1 : 0, ret);
- if (ret == 0)
- return (0);
- WT_RET_MSG(session, ret, "%s: handle-lock: fcntl", file_handle->name);
+ struct flock fl;
+ WT_DECL_RET;
+ WT_FILE_HANDLE_POSIX *pfh;
+ WT_SESSION_IMPL *session;
+
+ session = (WT_SESSION_IMPL *)wt_session;
+ pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
+
+ /*
+ * WiredTiger requires this function be able to acquire locks past
+ * the end of file.
+ *
+ * Note we're using fcntl(2) locking: all fcntl locks associated with a
+ * file for a given process are removed when any file descriptor for the
+ * file is closed by the process, even if a lock was never requested for
+ * that file descriptor.
+ */
+ fl.l_start = 0;
+ fl.l_len = 1;
+ fl.l_type = lock ? F_WRLCK : F_UNLCK;
+ fl.l_whence = SEEK_SET;
+
+ WT_SYSCALL(fcntl(pfh->fd, F_SETLK, &fl) == -1 ? -1 : 0, ret);
+ if (ret == 0)
+ return (0);
+ WT_RET_MSG(session, ret, "%s: handle-lock: fcntl", file_handle->name);
}
/*
* __posix_file_read --
- * POSIX pread.
+ * POSIX pread.
*/
static int
-__posix_file_read(WT_FILE_HANDLE *file_handle,
- WT_SESSION *wt_session, wt_off_t offset, size_t len, void *buf)
+__posix_file_read(
+ WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset, size_t len, void *buf)
{
- WT_FILE_HANDLE_POSIX *pfh;
- WT_SESSION_IMPL *session;
- size_t chunk;
- ssize_t nr;
- uint8_t *addr;
-
- session = (WT_SESSION_IMPL *)wt_session;
- pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
-
- /* Assert direct I/O is aligned and a multiple of the alignment. */
- WT_ASSERT(session,
- !pfh->direct_io ||
- S2C(session)->buffer_alignment == 0 ||
- (!((uintptr_t)buf &
- (uintptr_t)(S2C(session)->buffer_alignment - 1)) &&
- len >= S2C(session)->buffer_alignment &&
- len % S2C(session)->buffer_alignment == 0));
-
- /* Break reads larger than 1GB into 1GB chunks. */
- for (addr = buf; len > 0; addr += nr, len -= (size_t)nr, offset += nr) {
- chunk = WT_MIN(len, WT_GIGABYTE);
- if ((nr = pread(pfh->fd, addr, chunk, offset)) <= 0) {
- if (nr == 0)
- F_SET(S2C(session), WT_CONN_DATA_CORRUPTION);
- WT_RET_MSG(session,
- nr == 0 ? WT_ERROR : __wt_errno(),
- "%s: handle-read: pread: failed to read %"
- WT_SIZET_FMT " bytes at offset %" PRIuMAX,
- file_handle->name, chunk, (uintmax_t)offset);
- }
- }
- return (0);
+ WT_FILE_HANDLE_POSIX *pfh;
+ WT_SESSION_IMPL *session;
+ size_t chunk;
+ ssize_t nr;
+ uint8_t *addr;
+
+ session = (WT_SESSION_IMPL *)wt_session;
+ pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
+
+ /* Assert direct I/O is aligned and a multiple of the alignment. */
+ WT_ASSERT(
+ session, !pfh->direct_io || S2C(session)->buffer_alignment == 0 ||
+ (!((uintptr_t)buf & (uintptr_t)(S2C(session)->buffer_alignment - 1)) &&
+ len >= S2C(session)->buffer_alignment && len % S2C(session)->buffer_alignment == 0));
+
+ /* Break reads larger than 1GB into 1GB chunks. */
+ for (addr = buf; len > 0; addr += nr, len -= (size_t)nr, offset += nr) {
+ chunk = WT_MIN(len, WT_GIGABYTE);
+ if ((nr = pread(pfh->fd, addr, chunk, offset)) <= 0) {
+ if (nr == 0)
+ F_SET(S2C(session), WT_CONN_DATA_CORRUPTION);
+ WT_RET_MSG(session, nr == 0 ? WT_ERROR : __wt_errno(),
+ "%s: handle-read: pread: failed to read %" WT_SIZET_FMT " bytes at offset %" PRIuMAX,
+ file_handle->name, chunk, (uintmax_t)offset);
+ }
+ }
+ return (0);
}
/*
* __posix_file_size --
- * Get the size of a file in bytes, by file handle.
+ * Get the size of a file in bytes, by file handle.
*/
static int
-__posix_file_size(
- WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t *sizep)
+__posix_file_size(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t *sizep)
{
- struct stat sb;
- WT_DECL_RET;
- WT_FILE_HANDLE_POSIX *pfh;
- WT_SESSION_IMPL *session;
-
- session = (WT_SESSION_IMPL *)wt_session;
- pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
-
- WT_SYSCALL(fstat(pfh->fd, &sb), ret);
- if (ret == 0) {
- *sizep = sb.st_size;
- return (0);
- }
- WT_RET_MSG(session, ret, "%s: handle-size: fstat", file_handle->name);
+ struct stat sb;
+ WT_DECL_RET;
+ WT_FILE_HANDLE_POSIX *pfh;
+ WT_SESSION_IMPL *session;
+
+ session = (WT_SESSION_IMPL *)wt_session;
+ pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
+
+ WT_SYSCALL(fstat(pfh->fd, &sb), ret);
+ if (ret == 0) {
+ *sizep = sb.st_size;
+ return (0);
+ }
+ WT_RET_MSG(session, ret, "%s: handle-size: fstat", file_handle->name);
}
/*
* __posix_file_sync --
- * POSIX fsync.
+ * POSIX fsync.
*/
static int
__posix_file_sync(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session)
{
- WT_FILE_HANDLE_POSIX *pfh;
- WT_SESSION_IMPL *session;
+ WT_FILE_HANDLE_POSIX *pfh;
+ WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)wt_session;
- pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
+ session = (WT_SESSION_IMPL *)wt_session;
+ pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
- return (
- __posix_sync(session, pfh->fd, file_handle->name, "handle-sync"));
+ return (__posix_sync(session, pfh->fd, file_handle->name, "handle-sync"));
}
#ifdef HAVE_SYNC_FILE_RANGE
/*
* __posix_file_sync_nowait --
- * POSIX fsync.
+ * POSIX fsync.
*/
static int
__posix_file_sync_nowait(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session)
{
- WT_DECL_RET;
- WT_FILE_HANDLE_POSIX *pfh;
- WT_SESSION_IMPL *session;
+ WT_DECL_RET;
+ WT_FILE_HANDLE_POSIX *pfh;
+ WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)wt_session;
- pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
+ session = (WT_SESSION_IMPL *)wt_session;
+ pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
- /* See comment in __posix_sync(): sync cannot be retried or fail. */
- WT_SYSCALL(sync_file_range(pfh->fd,
- (off64_t)0, (off64_t)0, SYNC_FILE_RANGE_WRITE), ret);
- if (ret == 0)
- return (0);
+ /* See comment in __posix_sync(): sync cannot be retried or fail. */
+ WT_SYSCALL(sync_file_range(pfh->fd, (off64_t)0, (off64_t)0, SYNC_FILE_RANGE_WRITE), ret);
+ if (ret == 0)
+ return (0);
- WT_PANIC_RET(session, ret,
- "%s: handle-sync-nowait: sync_file_range", file_handle->name);
+ WT_PANIC_RET(session, ret, "%s: handle-sync-nowait: sync_file_range", file_handle->name);
}
#endif
#ifdef HAVE_FTRUNCATE
/*
* __posix_file_truncate --
- * POSIX ftruncate.
+ * POSIX ftruncate.
*/
static int
-__posix_file_truncate(
- WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t len)
+__posix_file_truncate(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t len)
{
- WT_DECL_RET;
- WT_FILE_HANDLE_POSIX *pfh;
- WT_SESSION_IMPL *session;
-
- session = (WT_SESSION_IMPL *)wt_session;
- pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
-
- WT_SYSCALL_RETRY(ftruncate(pfh->fd, len), ret);
- if (ret == 0)
- return (0);
- WT_RET_MSG(session, ret,
- "%s: handle-truncate: ftruncate", file_handle->name);
+ WT_DECL_RET;
+ WT_FILE_HANDLE_POSIX *pfh;
+ WT_SESSION_IMPL *session;
+
+ session = (WT_SESSION_IMPL *)wt_session;
+ pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
+
+ WT_SYSCALL_RETRY(ftruncate(pfh->fd, len), ret);
+ if (ret == 0)
+ return (0);
+ WT_RET_MSG(session, ret, "%s: handle-truncate: ftruncate", file_handle->name);
}
#endif
/*
* __posix_file_write --
- * POSIX pwrite.
+ * POSIX pwrite.
*/
static int
-__posix_file_write(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session,
- wt_off_t offset, size_t len, const void *buf)
+__posix_file_write(
+ WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset, size_t len, const void *buf)
{
- WT_FILE_HANDLE_POSIX *pfh;
- WT_SESSION_IMPL *session;
- size_t chunk;
- ssize_t nw;
- const uint8_t *addr;
-
- session = (WT_SESSION_IMPL *)wt_session;
- pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
-
- /* Assert direct I/O is aligned and a multiple of the alignment. */
- WT_ASSERT(session,
- !pfh->direct_io ||
- S2C(session)->buffer_alignment == 0 ||
- (!((uintptr_t)buf &
- (uintptr_t)(S2C(session)->buffer_alignment - 1)) &&
- len >= S2C(session)->buffer_alignment &&
- len % S2C(session)->buffer_alignment == 0));
-
- /* Break writes larger than 1GB into 1GB chunks. */
- for (addr = buf; len > 0; addr += nw, len -= (size_t)nw, offset += nw) {
- chunk = WT_MIN(len, WT_GIGABYTE);
- if ((nw = pwrite(pfh->fd, addr, chunk, offset)) < 0)
- WT_RET_MSG(session, __wt_errno(),
- "%s: handle-write: pwrite: failed to write %"
- WT_SIZET_FMT " bytes at offset %" PRIuMAX,
- file_handle->name, chunk, (uintmax_t)offset);
- }
- return (0);
+ WT_FILE_HANDLE_POSIX *pfh;
+ WT_SESSION_IMPL *session;
+ size_t chunk;
+ ssize_t nw;
+ const uint8_t *addr;
+
+ session = (WT_SESSION_IMPL *)wt_session;
+ pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
+
+ /* Assert direct I/O is aligned and a multiple of the alignment. */
+ WT_ASSERT(
+ session, !pfh->direct_io || S2C(session)->buffer_alignment == 0 ||
+ (!((uintptr_t)buf & (uintptr_t)(S2C(session)->buffer_alignment - 1)) &&
+ len >= S2C(session)->buffer_alignment && len % S2C(session)->buffer_alignment == 0));
+
+ /* Break writes larger than 1GB into 1GB chunks. */
+ for (addr = buf; len > 0; addr += nw, len -= (size_t)nw, offset += nw) {
+ chunk = WT_MIN(len, WT_GIGABYTE);
+ if ((nw = pwrite(pfh->fd, addr, chunk, offset)) < 0)
+ WT_RET_MSG(session, __wt_errno(),
+ "%s: handle-write: pwrite: failed to write %" WT_SIZET_FMT
+ " bytes at offset %" PRIuMAX,
+ file_handle->name, chunk, (uintmax_t)offset);
+ }
+ return (0);
}
/*
* __posix_open_file_cloexec --
- * Prevent child access to file handles.
+ * Prevent child access to file handles.
*/
static inline int
__posix_open_file_cloexec(WT_SESSION_IMPL *session, int fd, const char *name)
{
#if defined(FD_CLOEXEC) && !defined(O_CLOEXEC)
- int f;
-
- /*
- * Security:
- * The application may spawn a new process, and we don't want another
- * process to have access to our file handles. There's an obvious race
- * between the open and this call, prefer the flag to open if available.
- */
- if ((f = fcntl(fd, F_GETFD)) == -1 ||
- fcntl(fd, F_SETFD, f | FD_CLOEXEC) == -1)
- WT_RET_MSG(session, __wt_errno(),
- "%s: handle-open: fcntl(FD_CLOEXEC)", name);
- return (0);
+ int f;
+
+ /*
+ * Security: The application may spawn a new process, and we don't want another process to have
+ * access to our file handles. There's an obvious race between the open and this call, prefer
+ * the flag to open if available.
+ */
+ if ((f = fcntl(fd, F_GETFD)) == -1 || fcntl(fd, F_SETFD, f | FD_CLOEXEC) == -1)
+ WT_RET_MSG(session, __wt_errno(), "%s: handle-open: fcntl(FD_CLOEXEC)", name);
+ return (0);
#else
- WT_UNUSED(session);
- WT_UNUSED(fd);
- WT_UNUSED(name);
- return (0);
+ WT_UNUSED(session);
+ WT_UNUSED(fd);
+ WT_UNUSED(name);
+ return (0);
#endif
}
/*
* __posix_open_file --
- * Open a file handle.
+ * Open a file handle.
*/
static int
-__posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
- const char *name, WT_FS_OPEN_FILE_TYPE file_type, uint32_t flags,
- WT_FILE_HANDLE **file_handlep)
+__posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name,
+ WT_FS_OPEN_FILE_TYPE file_type, uint32_t flags, WT_FILE_HANDLE **file_handlep)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_FILE_HANDLE *file_handle;
- WT_FILE_HANDLE_POSIX *pfh;
- WT_SESSION_IMPL *session;
- mode_t mode;
- int advise_flag, f;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_FILE_HANDLE *file_handle;
+ WT_FILE_HANDLE_POSIX *pfh;
+ WT_SESSION_IMPL *session;
+ mode_t mode;
+ int advise_flag, f;
- WT_UNUSED(file_system);
+ WT_UNUSED(file_system);
- *file_handlep = NULL;
+ *file_handlep = NULL;
- session = (WT_SESSION_IMPL *)wt_session;
- conn = S2C(session);
+ session = (WT_SESSION_IMPL *)wt_session;
+ conn = S2C(session);
- WT_RET(__wt_calloc_one(session, &pfh));
+ WT_RET(__wt_calloc_one(session, &pfh));
- /* Set up error handling. */
- pfh->fd = -1;
+ /* Set up error handling. */
+ pfh->fd = -1;
- if (file_type == WT_FS_OPEN_FILE_TYPE_DIRECTORY) {
- f = O_RDONLY;
+ if (file_type == WT_FS_OPEN_FILE_TYPE_DIRECTORY) {
+ f = O_RDONLY;
#ifdef O_CLOEXEC
- /*
- * Security:
- * The application may spawn a new process, and we don't want
- * another process to have access to our file handles.
- */
- f |= O_CLOEXEC;
+ /*
+ * Security: The application may spawn a new process, and we don't want another process to
+ * have access to our file handles.
+ */
+ f |= O_CLOEXEC;
#endif
- WT_SYSCALL_RETRY((
- (pfh->fd = open(name, f, 0444)) == -1 ? -1 : 0), ret);
- if (ret != 0)
- WT_ERR_MSG(session, ret,
- "%s: handle-open: open-directory", name);
- WT_ERR(__posix_open_file_cloexec(session, pfh->fd, name));
- goto directory_open;
- }
-
- f = LF_ISSET(WT_FS_OPEN_READONLY) ? O_RDONLY : O_RDWR;
- if (LF_ISSET(WT_FS_OPEN_CREATE)) {
- f |= O_CREAT;
- if (LF_ISSET(WT_FS_OPEN_EXCLUSIVE))
- f |= O_EXCL;
- mode = 0666;
- } else
- mode = 0;
+ WT_SYSCALL_RETRY(((pfh->fd = open(name, f, 0444)) == -1 ? -1 : 0), ret);
+ if (ret != 0)
+ WT_ERR_MSG(session, ret, "%s: handle-open: open-directory", name);
+ WT_ERR(__posix_open_file_cloexec(session, pfh->fd, name));
+ goto directory_open;
+ }
+
+ f = LF_ISSET(WT_FS_OPEN_READONLY) ? O_RDONLY : O_RDWR;
+ if (LF_ISSET(WT_FS_OPEN_CREATE)) {
+ f |= O_CREAT;
+ if (LF_ISSET(WT_FS_OPEN_EXCLUSIVE))
+ f |= O_EXCL;
+ mode = 0666;
+ } else
+ mode = 0;
#ifdef O_BINARY
- /* Windows clones: we always want to treat the file as a binary. */
- f |= O_BINARY;
+ /* Windows clones: we always want to treat the file as a binary. */
+ f |= O_BINARY;
#endif
#ifdef O_CLOEXEC
- /*
- * Security:
- * The application may spawn a new process, and we don't want another
- * process to have access to our file handles.
- */
- f |= O_CLOEXEC;
+ /*
+ * Security: The application may spawn a new process, and we don't want another process to have
+ * access to our file handles.
+ */
+ f |= O_CLOEXEC;
#endif
#ifdef O_DIRECT
- /* Direct I/O. */
- if (LF_ISSET(WT_FS_OPEN_DIRECTIO)) {
- f |= O_DIRECT;
- pfh->direct_io = true;
- } else
- pfh->direct_io = false;
+ /* Direct I/O. */
+ if (LF_ISSET(WT_FS_OPEN_DIRECTIO)) {
+ f |= O_DIRECT;
+ pfh->direct_io = true;
+ } else
+ pfh->direct_io = false;
#endif
#ifdef O_NOATIME
- /* Avoid updating metadata for read-only workloads. */
- if (file_type == WT_FS_OPEN_FILE_TYPE_DATA)
- f |= O_NOATIME;
+ /* Avoid updating metadata for read-only workloads. */
+ if (file_type == WT_FS_OPEN_FILE_TYPE_DATA)
+ f |= O_NOATIME;
#endif
- if (file_type == WT_FS_OPEN_FILE_TYPE_LOG &&
- FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC)) {
+ if (file_type == WT_FS_OPEN_FILE_TYPE_LOG && FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC)) {
#ifdef O_DSYNC
- f |= O_DSYNC;
+ f |= O_DSYNC;
#elif defined(O_SYNC)
- f |= O_SYNC;
+ f |= O_SYNC;
#else
- WT_ERR_MSG(session, ENOTSUP,
- "unsupported log sync mode configured");
+ WT_ERR_MSG(session, ENOTSUP, "unsupported log sync mode configured");
#endif
- }
+ }
- /* Create/Open the file. */
- WT_SYSCALL_RETRY(((pfh->fd = open(name, f, mode)) == -1 ? -1 : 0), ret);
- if (ret != 0)
- WT_ERR_MSG(session, ret,
- pfh->direct_io ?
- "%s: handle-open: open: failed with direct I/O configured, "
- "some filesystem types do not support direct I/O" :
- "%s: handle-open: open", name);
+ /* Create/Open the file. */
+ WT_SYSCALL_RETRY(((pfh->fd = open(name, f, mode)) == -1 ? -1 : 0), ret);
+ if (ret != 0)
+ WT_ERR_MSG(session, ret,
+ pfh->direct_io ? "%s: handle-open: open: failed with direct I/O configured, "
+ "some filesystem types do not support direct I/O" :
+ "%s: handle-open: open",
+ name);
#ifdef __linux__
- /*
- * Durability: some filesystems require a directory sync to be confident
- * the file will appear.
- */
- if (LF_ISSET(WT_FS_OPEN_DURABLE))
- WT_ERR(__posix_directory_sync(session, name));
+ /*
+ * Durability: some filesystems require a directory sync to be confident the file will appear.
+ */
+ if (LF_ISSET(WT_FS_OPEN_DURABLE))
+ WT_ERR(__posix_directory_sync(session, name));
#endif
- WT_ERR(__posix_open_file_cloexec(session, pfh->fd, name));
+ WT_ERR(__posix_open_file_cloexec(session, pfh->fd, name));
#if defined(HAVE_POSIX_FADVISE)
- /*
- * If the user set an access pattern hint, call fadvise now.
- * Ignore fadvise when doing direct I/O, the kernel cache isn't
- * interesting.
- */
- if (!pfh->direct_io && file_type == WT_FS_OPEN_FILE_TYPE_DATA &&
- LF_ISSET(WT_FS_OPEN_ACCESS_RAND | WT_FS_OPEN_ACCESS_SEQ)) {
- advise_flag = 0;
- if (LF_ISSET(WT_FS_OPEN_ACCESS_RAND))
- advise_flag = POSIX_FADV_RANDOM;
- if (LF_ISSET(WT_FS_OPEN_ACCESS_SEQ))
- advise_flag = POSIX_FADV_SEQUENTIAL;
- WT_SYSCALL(posix_fadvise(pfh->fd, 0, 0, advise_flag), ret);
- if (ret != 0)
- WT_ERR_MSG(session, ret,
- "%s: handle-open: posix_fadvise", name);
- }
+ /*
+ * If the user set an access pattern hint, call fadvise now. Ignore fadvise when doing direct
+ * I/O, the kernel cache isn't interesting.
+ */
+ if (!pfh->direct_io && file_type == WT_FS_OPEN_FILE_TYPE_DATA &&
+ LF_ISSET(WT_FS_OPEN_ACCESS_RAND | WT_FS_OPEN_ACCESS_SEQ)) {
+ advise_flag = 0;
+ if (LF_ISSET(WT_FS_OPEN_ACCESS_RAND))
+ advise_flag = POSIX_FADV_RANDOM;
+ if (LF_ISSET(WT_FS_OPEN_ACCESS_SEQ))
+ advise_flag = POSIX_FADV_SEQUENTIAL;
+ WT_SYSCALL(posix_fadvise(pfh->fd, 0, 0, advise_flag), ret);
+ if (ret != 0)
+ WT_ERR_MSG(session, ret, "%s: handle-open: posix_fadvise", name);
+ }
#else
- WT_UNUSED(advise_flag);
+ WT_UNUSED(advise_flag);
#endif
directory_open:
- /* Initialize public information. */
- file_handle = (WT_FILE_HANDLE *)pfh;
- WT_ERR(__wt_strdup(session, name, &file_handle->name));
+ /* Initialize public information. */
+ file_handle = (WT_FILE_HANDLE *)pfh;
+ WT_ERR(__wt_strdup(session, name, &file_handle->name));
- file_handle->close = __posix_file_close;
+ file_handle->close = __posix_file_close;
#if defined(HAVE_POSIX_FADVISE)
- /*
- * Ignore fadvise when doing direct I/O, the kernel cache isn't
- * interesting.
- */
- if (!pfh->direct_io)
- file_handle->fh_advise = __posix_file_advise;
+ /*
+ * Ignore fadvise when doing direct I/O, the kernel cache isn't interesting.
+ */
+ if (!pfh->direct_io)
+ file_handle->fh_advise = __posix_file_advise;
#endif
- file_handle->fh_extend = __wt_posix_file_extend;
- file_handle->fh_lock = __posix_file_lock;
+ file_handle->fh_extend = __wt_posix_file_extend;
+ file_handle->fh_lock = __posix_file_lock;
#ifdef WORDS_BIGENDIAN
- /*
- * The underlying objects are little-endian, mapping objects isn't
- * currently supported on big-endian systems.
- */
+/*
+ * The underlying objects are little-endian, mapping objects isn't currently supported on big-endian
+ * systems.
+ */
#else
- file_handle->fh_map = __wt_posix_map;
+ file_handle->fh_map = __wt_posix_map;
#ifdef HAVE_POSIX_MADVISE
- file_handle->fh_map_discard = __wt_posix_map_discard;
- file_handle->fh_map_preload = __wt_posix_map_preload;
+ file_handle->fh_map_discard = __wt_posix_map_discard;
+ file_handle->fh_map_preload = __wt_posix_map_preload;
#endif
- file_handle->fh_unmap = __wt_posix_unmap;
+ file_handle->fh_unmap = __wt_posix_unmap;
#endif
- file_handle->fh_read = __posix_file_read;
- file_handle->fh_size = __posix_file_size;
- file_handle->fh_sync = __posix_file_sync;
+ file_handle->fh_read = __posix_file_read;
+ file_handle->fh_size = __posix_file_size;
+ file_handle->fh_sync = __posix_file_sync;
#ifdef HAVE_SYNC_FILE_RANGE
- file_handle->fh_sync_nowait = __posix_file_sync_nowait;
+ file_handle->fh_sync_nowait = __posix_file_sync_nowait;
#endif
#ifdef HAVE_FTRUNCATE
- file_handle->fh_truncate = __posix_file_truncate;
+ file_handle->fh_truncate = __posix_file_truncate;
#endif
- file_handle->fh_write = __posix_file_write;
+ file_handle->fh_write = __posix_file_write;
- *file_handlep = file_handle;
+ *file_handlep = file_handle;
- return (0);
+ return (0);
-err: WT_TRET(__posix_file_close((WT_FILE_HANDLE *)pfh, wt_session));
- return (ret);
+err:
+ WT_TRET(__posix_file_close((WT_FILE_HANDLE *)pfh, wt_session));
+ return (ret);
}
/*
* __posix_terminate --
- * Terminate a POSIX configuration.
+ * Terminate a POSIX configuration.
*/
static int
__posix_terminate(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session)
{
- WT_SESSION_IMPL *session;
+ WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)wt_session;
+ session = (WT_SESSION_IMPL *)wt_session;
- __wt_free(session, file_system);
- return (0);
+ __wt_free(session, file_system);
+ return (0);
}
/*
* __wt_os_posix --
- * Initialize a POSIX configuration.
+ * Initialize a POSIX configuration.
*/
int
__wt_os_posix(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- WT_FILE_SYSTEM *file_system;
+ WT_CONNECTION_IMPL *conn;
+ WT_FILE_SYSTEM *file_system;
- conn = S2C(session);
+ conn = S2C(session);
- WT_RET(__wt_calloc_one(session, &file_system));
+ WT_RET(__wt_calloc_one(session, &file_system));
- /* Initialize the POSIX jump table. */
- file_system->fs_directory_list = __wt_posix_directory_list;
- file_system->fs_directory_list_single =
- __wt_posix_directory_list_single;
- file_system->fs_directory_list_free = __wt_posix_directory_list_free;
- file_system->fs_exist = __posix_fs_exist;
- file_system->fs_open_file = __posix_open_file;
- file_system->fs_remove = __posix_fs_remove;
- file_system->fs_rename = __posix_fs_rename;
- file_system->fs_size = __posix_fs_size;
- file_system->terminate = __posix_terminate;
+ /* Initialize the POSIX jump table. */
+ file_system->fs_directory_list = __wt_posix_directory_list;
+ file_system->fs_directory_list_single = __wt_posix_directory_list_single;
+ file_system->fs_directory_list_free = __wt_posix_directory_list_free;
+ file_system->fs_exist = __posix_fs_exist;
+ file_system->fs_open_file = __posix_open_file;
+ file_system->fs_remove = __posix_fs_remove;
+ file_system->fs_rename = __posix_fs_rename;
+ file_system->fs_size = __posix_fs_size;
+ file_system->terminate = __posix_terminate;
- /* Switch it into place. */
- conn->file_system = file_system;
+ /* Switch it into place. */
+ conn->file_system = file_system;
- return (0);
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/os_posix/os_getenv.c b/src/third_party/wiredtiger/src/os_posix/os_getenv.c
index e2e3c6f3153..f7eb9001376 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_getenv.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_getenv.c
@@ -10,18 +10,18 @@
/*
* __wt_getenv --
- * Get a non-NULL, greater than zero-length environment variable.
+ * Get a non-NULL, greater than zero-length environment variable.
*/
int
__wt_getenv(WT_SESSION_IMPL *session, const char *variable, const char **envp)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- const char *temp;
+ const char *temp;
- *envp = NULL;
+ *envp = NULL;
- if (((temp = getenv(variable)) != NULL) && strlen(temp) > 0)
- return (__wt_strdup(session, temp, envp));
+ if (((temp = getenv(variable)) != NULL) && strlen(temp) > 0)
+ return (__wt_strdup(session, temp, envp));
- return (0);
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/os_posix/os_map.c b/src/third_party/wiredtiger/src/os_posix/os_map.c
index f0c251bca59..3008ec28c95 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_map.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_map.c
@@ -10,163 +10,156 @@
/*
* __wt_posix_map --
- * Map a file into memory.
+ * Map a file into memory.
*/
int
-__wt_posix_map(WT_FILE_HANDLE *fh, WT_SESSION *wt_session,
- void *mapped_regionp, size_t *lenp, void *mapped_cookiep)
+__wt_posix_map(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *mapped_regionp, size_t *lenp,
+ void *mapped_cookiep)
{
- WT_FILE_HANDLE_POSIX *pfh;
- WT_SESSION_IMPL *session;
- wt_off_t file_size;
- size_t len;
- void *map;
-
- WT_UNUSED(mapped_cookiep);
-
- session = (WT_SESSION_IMPL *)wt_session;
- pfh = (WT_FILE_HANDLE_POSIX *)fh;
-
- /*
- * Mapping isn't possible if direct I/O configured for the file, the
- * Linux open(2) documentation says applications should avoid mixing
- * mmap(2) of files with direct I/O to the same files.
- */
- if (pfh->direct_io)
- return (__wt_set_return(session, ENOTSUP));
-
- /*
- * There's no locking here to prevent the underlying file from changing
- * underneath us, our caller needs to ensure consistency of the mapped
- * region vs. any other file activity.
- */
- WT_RET(fh->fh_size(fh, wt_session, &file_size));
- len = (size_t)file_size;
-
- __wt_verbose(session, WT_VERB_HANDLEOPS,
- "%s: memory-map: %" WT_SIZET_FMT " bytes", fh->name, len);
-
- if ((map = mmap(NULL, len,
- PROT_READ,
+ WT_FILE_HANDLE_POSIX *pfh;
+ WT_SESSION_IMPL *session;
+ wt_off_t file_size;
+ size_t len;
+ void *map;
+
+ WT_UNUSED(mapped_cookiep);
+
+ session = (WT_SESSION_IMPL *)wt_session;
+ pfh = (WT_FILE_HANDLE_POSIX *)fh;
+
+ /*
+ * Mapping isn't possible if direct I/O configured for the file, the Linux open(2) documentation
+ * says applications should avoid mixing mmap(2) of files with direct I/O to the same files.
+ */
+ if (pfh->direct_io)
+ return (__wt_set_return(session, ENOTSUP));
+
+ /*
+ * There's no locking here to prevent the underlying file from changing underneath us, our
+ * caller needs to ensure consistency of the mapped region vs. any other file activity.
+ */
+ WT_RET(fh->fh_size(fh, wt_session, &file_size));
+ len = (size_t)file_size;
+
+ __wt_verbose(
+ session, WT_VERB_HANDLEOPS, "%s: memory-map: %" WT_SIZET_FMT " bytes", fh->name, len);
+
+ if ((map = mmap(NULL, len, PROT_READ,
#ifdef MAP_NOCORE
- MAP_NOCORE |
+ MAP_NOCORE |
#endif
- MAP_PRIVATE,
- pfh->fd, (wt_off_t)0)) == MAP_FAILED)
- WT_RET_MSG(session,
- __wt_errno(), "%s: memory-map: mmap", fh->name);
-
- *(void **)mapped_regionp = map;
- *lenp = len;
- return (0);
+ MAP_PRIVATE,
+ pfh->fd, (wt_off_t)0)) == MAP_FAILED)
+ WT_RET_MSG(session, __wt_errno(), "%s: memory-map: mmap", fh->name);
+
+ *(void **)mapped_regionp = map;
+ *lenp = len;
+ return (0);
}
#ifdef HAVE_POSIX_MADVISE
/*
* __wt_posix_map_preload --
- * Cause a section of a memory map to be faulted in.
+ * Cause a section of a memory map to be faulted in.
*/
int
-__wt_posix_map_preload(WT_FILE_HANDLE *fh,
- WT_SESSION *wt_session, const void *map, size_t length, void *mapped_cookie)
+__wt_posix_map_preload(
+ WT_FILE_HANDLE *fh, WT_SESSION *wt_session, const void *map, size_t length, void *mapped_cookie)
{
- WT_BM *bm;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- void *blk;
-
- WT_UNUSED(mapped_cookie);
-
- session = (WT_SESSION_IMPL *)wt_session;
-
- conn = S2C(session);
- bm = S2BT(session)->bm;
-
- /* Linux requires the address be aligned to a 4KB boundary. */
- blk = (void *)((uintptr_t)map & ~(uintptr_t)(conn->page_size - 1));
- length += WT_PTRDIFF(map, blk);
-
- /* XXX proxy for "am I doing a scan?" -- manual read-ahead */
- if (F_ISSET(session, WT_SESSION_READ_WONT_NEED)) {
- /* Read in 2MB blocks every 1MB of data. */
- if (((uintptr_t)((uint8_t *)blk + length) &
- (uintptr_t)((1<<20) - 1)) < (uintptr_t)blk)
- return (0);
- length = WT_MIN(WT_MAX(20 * length, 2 << 20),
- WT_PTRDIFF((uint8_t *)bm->map + bm->maplen, blk));
- }
-
- /*
- * Manual pages aren't clear on whether alignment is required for the
- * size, so we will be conservative.
- */
- length &= ~(size_t)(conn->page_size - 1);
- if (length <= (size_t)conn->page_size)
- return (0);
-
- WT_SYSCALL(posix_madvise(blk, length, POSIX_MADV_WILLNEED), ret);
- if (ret == 0)
- return (0);
-
- WT_RET_MSG(session, ret,
- "%s: memory-map preload: posix_madvise: POSIX_MADV_WILLNEED",
- fh->name);
+ WT_BM *bm;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ void *blk;
+
+ WT_UNUSED(mapped_cookie);
+
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ conn = S2C(session);
+ bm = S2BT(session)->bm;
+
+ /* Linux requires the address be aligned to a 4KB boundary. */
+ blk = (void *)((uintptr_t)map & ~(uintptr_t)(conn->page_size - 1));
+ length += WT_PTRDIFF(map, blk);
+
+ /* XXX proxy for "am I doing a scan?" -- manual read-ahead */
+ if (F_ISSET(session, WT_SESSION_READ_WONT_NEED)) {
+ /* Read in 2MB blocks every 1MB of data. */
+ if (((uintptr_t)((uint8_t *)blk + length) & (uintptr_t)((1 << 20) - 1)) < (uintptr_t)blk)
+ return (0);
+ length =
+ WT_MIN(WT_MAX(20 * length, 2 << 20), WT_PTRDIFF((uint8_t *)bm->map + bm->maplen, blk));
+ }
+
+ /*
+ * Manual pages aren't clear on whether alignment is required for the size, so we will be
+ * conservative.
+ */
+ length &= ~(size_t)(conn->page_size - 1);
+ if (length <= (size_t)conn->page_size)
+ return (0);
+
+ WT_SYSCALL(posix_madvise(blk, length, POSIX_MADV_WILLNEED), ret);
+ if (ret == 0)
+ return (0);
+
+ WT_RET_MSG(
+ session, ret, "%s: memory-map preload: posix_madvise: POSIX_MADV_WILLNEED", fh->name);
}
#endif
#ifdef HAVE_POSIX_MADVISE
/*
* __wt_posix_map_discard --
- * Discard a chunk of the memory map.
+ * Discard a chunk of the memory map.
*/
int
-__wt_posix_map_discard(WT_FILE_HANDLE *fh,
- WT_SESSION *wt_session, void *map, size_t length, void *mapped_cookie)
+__wt_posix_map_discard(
+ WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *map, size_t length, void *mapped_cookie)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- void *blk;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ void *blk;
- WT_UNUSED(mapped_cookie);
+ WT_UNUSED(mapped_cookie);
- session = (WT_SESSION_IMPL *)wt_session;
- conn = S2C(session);
+ session = (WT_SESSION_IMPL *)wt_session;
+ conn = S2C(session);
- /* Linux requires the address be aligned to a 4KB boundary. */
- blk = (void *)((uintptr_t)map & ~(uintptr_t)(conn->page_size - 1));
- length += WT_PTRDIFF(map, blk);
+ /* Linux requires the address be aligned to a 4KB boundary. */
+ blk = (void *)((uintptr_t)map & ~(uintptr_t)(conn->page_size - 1));
+ length += WT_PTRDIFF(map, blk);
- WT_SYSCALL(posix_madvise(blk, length, POSIX_MADV_DONTNEED), ret);
- if (ret == 0)
- return (0);
+ WT_SYSCALL(posix_madvise(blk, length, POSIX_MADV_DONTNEED), ret);
+ if (ret == 0)
+ return (0);
- WT_RET_MSG(session, ret,
- "%s: memory-map discard: posix_madvise: POSIX_MADV_DONTNEED",
- fh->name);
+ WT_RET_MSG(
+ session, ret, "%s: memory-map discard: posix_madvise: POSIX_MADV_DONTNEED", fh->name);
}
#endif
/*
* __wt_posix_unmap --
- * Remove a memory mapping.
+ * Remove a memory mapping.
*/
int
-__wt_posix_unmap(WT_FILE_HANDLE *fh, WT_SESSION *wt_session,
- void *mapped_region, size_t len, void *mapped_cookie)
+__wt_posix_unmap(
+ WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *mapped_region, size_t len, void *mapped_cookie)
{
- WT_SESSION_IMPL *session;
+ WT_SESSION_IMPL *session;
- WT_UNUSED(mapped_cookie);
+ WT_UNUSED(mapped_cookie);
- session = (WT_SESSION_IMPL *)wt_session;
+ session = (WT_SESSION_IMPL *)wt_session;
- __wt_verbose(session, WT_VERB_HANDLEOPS,
- "%s: memory-unmap: %" WT_SIZET_FMT " bytes", fh->name, len);
+ __wt_verbose(
+ session, WT_VERB_HANDLEOPS, "%s: memory-unmap: %" WT_SIZET_FMT " bytes", fh->name, len);
- if (munmap(mapped_region, len) == 0)
- return (0);
+ if (munmap(mapped_region, len) == 0)
+ return (0);
- WT_RET_MSG(session, __wt_errno(), "%s: memory-unmap: munmap", fh->name);
+ WT_RET_MSG(session, __wt_errno(), "%s: memory-unmap: munmap", fh->name);
}
diff --git a/src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c b/src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c
index 71f208ce568..bd68c7afdbd 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c
@@ -10,203 +10,199 @@
/*
* __wt_cond_alloc --
- * Allocate and initialize a condition variable.
+ * Allocate and initialize a condition variable.
*/
int
__wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, WT_CONDVAR **condp)
{
- WT_CONDVAR *cond;
- WT_DECL_RET;
+ WT_CONDVAR *cond;
+ WT_DECL_RET;
- WT_RET(__wt_calloc_one(session, &cond));
- WT_ERR(pthread_mutex_init(&cond->mtx, NULL));
+ WT_RET(__wt_calloc_one(session, &cond));
+ WT_ERR(pthread_mutex_init(&cond->mtx, NULL));
#ifdef HAVE_PTHREAD_COND_MONOTONIC
- {
- pthread_condattr_t condattr;
-
- WT_ERR(pthread_condattr_init(&condattr));
- ret = pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC);
- if (ret == 0)
- ret = pthread_cond_init(&cond->cond, &condattr);
- WT_TRET(pthread_condattr_destroy(&condattr));
- WT_ERR(ret);
- }
+ {
+ pthread_condattr_t condattr;
+
+ WT_ERR(pthread_condattr_init(&condattr));
+ ret = pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC);
+ if (ret == 0)
+ ret = pthread_cond_init(&cond->cond, &condattr);
+ WT_TRET(pthread_condattr_destroy(&condattr));
+ WT_ERR(ret);
+ }
#else
- WT_ERR(pthread_cond_init(&cond->cond, NULL));
+ WT_ERR(pthread_cond_init(&cond->cond, NULL));
#endif
- cond->name = name;
- cond->waiters = 0;
+ cond->name = name;
+ cond->waiters = 0;
- *condp = cond;
- return (0);
+ *condp = cond;
+ return (0);
-err: __wt_free(session, cond);
- return (ret);
+err:
+ __wt_free(session, cond);
+ return (ret);
}
/*
* __wt_cond_wait_signal --
- * Wait on a mutex, optionally timing out. If we get it before the time
- * out period expires, let the caller know.
+ * Wait on a mutex, optionally timing out. If we get it before the time out period expires, let
+ * the caller know.
*/
void
-__wt_cond_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond,
- uint64_t usecs, bool (*run_func)(WT_SESSION_IMPL *), bool *signalled)
+__wt_cond_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs,
+ bool (*run_func)(WT_SESSION_IMPL *), bool *signalled)
{
- struct timespec ts;
- WT_DECL_RET;
- WT_TRACK_OP_DECL;
- bool locked;
-
- WT_TRACK_OP_INIT(session);
-
- locked = false;
-
- /* Fast path if already signalled. */
- *signalled = true;
- if (__wt_atomic_addi32(&cond->waiters, 1) == 0) {
- WT_TRACK_OP_END(session);
- return;
- }
-
- __wt_verbose(session, WT_VERB_MUTEX, "wait %s", cond->name);
- WT_STAT_CONN_INCR(session, cond_wait);
-
- WT_ERR(pthread_mutex_lock(&cond->mtx));
- locked = true;
-
- /*
- * It's possible to race with threads waking us up. That's not a problem
- * if there are multiple wakeups because the next wakeup will get us, or
- * if we're only pausing for a short period. It's a problem if there's
- * only a single wakeup, our waker is likely waiting for us to exit.
- * After acquiring the mutex (so we're guaranteed to be awakened by any
- * future wakeup call), optionally check if we're OK to keep running.
- * This won't ensure our caller won't just loop and call us again, but
- * at least it's not our fault.
- *
- * Assert we're not waiting longer than a second if not checking the
- * run status.
- */
- WT_ASSERT(session, run_func != NULL || usecs <= WT_MILLION);
- if (run_func != NULL && !run_func(session))
- goto skipping;
-
- if (usecs > 0) {
- /*
- * Get the current time as the basis for calculating when the
- * wait should end. Prefer a monotonic clock source to avoid
- * unexpectedly long sleeps when the system clock is adjusted.
- *
- * Failing that, query the time directly and don't attempt to
- * correct for the clock moving backwards, which would result
- * in a sleep that is too long by however much the clock is
- * updated. This isn't as good as a monotonic clock source but
- * makes the window of vulnerability smaller (i.e., the
- * calculated time is only incorrect if the system clock
- * changes in between us querying it and waiting).
- */
+ struct timespec ts;
+ WT_DECL_RET;
+ WT_TRACK_OP_DECL;
+ bool locked;
+
+ WT_TRACK_OP_INIT(session);
+
+ locked = false;
+
+ /* Fast path if already signalled. */
+ *signalled = true;
+ if (__wt_atomic_addi32(&cond->waiters, 1) == 0) {
+ WT_TRACK_OP_END(session);
+ return;
+ }
+
+ __wt_verbose(session, WT_VERB_MUTEX, "wait %s", cond->name);
+ WT_STAT_CONN_INCR(session, cond_wait);
+
+ WT_ERR(pthread_mutex_lock(&cond->mtx));
+ locked = true;
+
+ /*
+ * It's possible to race with threads waking us up. That's not a problem
+ * if there are multiple wakeups because the next wakeup will get us, or
+ * if we're only pausing for a short period. It's a problem if there's
+ * only a single wakeup, our waker is likely waiting for us to exit.
+ * After acquiring the mutex (so we're guaranteed to be awakened by any
+ * future wakeup call), optionally check if we're OK to keep running.
+ * This won't ensure our caller won't just loop and call us again, but
+ * at least it's not our fault.
+ *
+ * Assert we're not waiting longer than a second if not checking the
+ * run status.
+ */
+ WT_ASSERT(session, run_func != NULL || usecs <= WT_MILLION);
+ if (run_func != NULL && !run_func(session))
+ goto skipping;
+
+ if (usecs > 0) {
+/*
+ * Get the current time as the basis for calculating when the
+ * wait should end. Prefer a monotonic clock source to avoid
+ * unexpectedly long sleeps when the system clock is adjusted.
+ *
+ * Failing that, query the time directly and don't attempt to
+ * correct for the clock moving backwards, which would result
+ * in a sleep that is too long by however much the clock is
+ * updated. This isn't as good as a monotonic clock source but
+ * makes the window of vulnerability smaller (i.e., the
+ * calculated time is only incorrect if the system clock
+ * changes in between us querying it and waiting).
+ */
#ifdef HAVE_PTHREAD_COND_MONOTONIC
- WT_SYSCALL_RETRY(clock_gettime(CLOCK_MONOTONIC, &ts), ret);
- if (ret != 0)
- WT_PANIC_MSG(session, ret, "clock_gettime");
+ WT_SYSCALL_RETRY(clock_gettime(CLOCK_MONOTONIC, &ts), ret);
+ if (ret != 0)
+ WT_PANIC_MSG(session, ret, "clock_gettime");
#else
- __wt_epoch_raw(session, &ts);
+ __wt_epoch_raw(session, &ts);
#endif
- ts.tv_sec += (time_t)
- (((uint64_t)ts.tv_nsec + WT_THOUSAND * usecs) / WT_BILLION);
- ts.tv_nsec = (long)
- (((uint64_t)ts.tv_nsec + WT_THOUSAND * usecs) % WT_BILLION);
- ret = pthread_cond_timedwait(&cond->cond, &cond->mtx, &ts);
- } else
- ret = pthread_cond_wait(&cond->cond, &cond->mtx);
-
- /*
- * Check pthread_cond_wait() return for EINTR, ETIME and
- * ETIMEDOUT, some systems return these errors.
- */
- if (ret == EINTR ||
+ ts.tv_sec += (time_t)(((uint64_t)ts.tv_nsec + WT_THOUSAND * usecs) / WT_BILLION);
+ ts.tv_nsec = (long)(((uint64_t)ts.tv_nsec + WT_THOUSAND * usecs) % WT_BILLION);
+ ret = pthread_cond_timedwait(&cond->cond, &cond->mtx, &ts);
+ } else
+ ret = pthread_cond_wait(&cond->cond, &cond->mtx);
+
+ /*
+ * Check pthread_cond_wait() return for EINTR, ETIME and ETIMEDOUT, some systems return these
+ * errors.
+ */
+ if (ret == EINTR ||
#ifdef ETIME
- ret == ETIME ||
+ ret == ETIME ||
#endif
- ret == ETIMEDOUT) {
-skipping: *signalled = false;
- ret = 0;
- }
+ ret == ETIMEDOUT) {
+skipping:
+ *signalled = false;
+ ret = 0;
+ }
-err: (void)__wt_atomic_subi32(&cond->waiters, 1);
+err:
+ (void)__wt_atomic_subi32(&cond->waiters, 1);
- if (locked)
- WT_TRET(pthread_mutex_unlock(&cond->mtx));
+ if (locked)
+ WT_TRET(pthread_mutex_unlock(&cond->mtx));
- WT_TRACK_OP_END(session);
- if (ret == 0)
- return;
+ WT_TRACK_OP_END(session);
+ if (ret == 0)
+ return;
- WT_PANIC_MSG(session, ret, "pthread_cond_wait: %s", cond->name);
+ WT_PANIC_MSG(session, ret, "pthread_cond_wait: %s", cond->name);
}
/*
* __wt_cond_signal --
- * Signal a waiting thread.
+ * Signal a waiting thread.
*/
void
__wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond)
{
- WT_DECL_RET;
-
- __wt_verbose(session, WT_VERB_MUTEX, "signal %s", cond->name);
-
- /*
- * Our callers often set flags to cause a thread to exit. Add a barrier
- * to ensure exit flags are seen by the sleeping threads, otherwise we
- * can wake up a thread, it immediately goes back to sleep, and we'll
- * hang. Use a full barrier (we may not write before waiting on thread
- * join).
- */
- WT_FULL_BARRIER();
-
- /*
- * Fast path if we are in (or can enter), a state where the next waiter
- * will return immediately as already signaled.
- */
- if (cond->waiters == -1 ||
- (cond->waiters == 0 && __wt_atomic_casi32(&cond->waiters, 0, -1)))
- return;
-
- WT_ERR(pthread_mutex_lock(&cond->mtx));
- ret = pthread_cond_broadcast(&cond->cond);
- WT_TRET(pthread_mutex_unlock(&cond->mtx));
- if (ret == 0)
- return;
+ WT_DECL_RET;
+
+ __wt_verbose(session, WT_VERB_MUTEX, "signal %s", cond->name);
+
+ /*
+ * Our callers often set flags to cause a thread to exit. Add a barrier to ensure exit flags are
+ * seen by the sleeping threads, otherwise we can wake up a thread, it immediately goes back to
+ * sleep, and we'll hang. Use a full barrier (we may not write before waiting on thread join).
+ */
+ WT_FULL_BARRIER();
+
+ /*
+ * Fast path if we are in (or can enter), a state where the next waiter will return immediately
+ * as already signaled.
+ */
+ if (cond->waiters == -1 || (cond->waiters == 0 && __wt_atomic_casi32(&cond->waiters, 0, -1)))
+ return;
+
+ WT_ERR(pthread_mutex_lock(&cond->mtx));
+ ret = pthread_cond_broadcast(&cond->cond);
+ WT_TRET(pthread_mutex_unlock(&cond->mtx));
+ if (ret == 0)
+ return;
err:
- WT_PANIC_MSG(session, ret, "pthread_cond_broadcast: %s", cond->name);
+ WT_PANIC_MSG(session, ret, "pthread_cond_broadcast: %s", cond->name);
}
/*
* __wt_cond_destroy --
- * Destroy a condition variable.
+ * Destroy a condition variable.
*/
void
__wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp)
{
- WT_CONDVAR *cond;
- WT_DECL_RET;
+ WT_CONDVAR *cond;
+ WT_DECL_RET;
- cond = *condp;
- if (cond == NULL)
- return;
+ cond = *condp;
+ if (cond == NULL)
+ return;
- if ((ret = pthread_cond_destroy(&cond->cond)) != 0)
- WT_PANIC_MSG(
- session, ret, "pthread_cond_destroy: %s", cond->name);
+ if ((ret = pthread_cond_destroy(&cond->cond)) != 0)
+ WT_PANIC_MSG(session, ret, "pthread_cond_destroy: %s", cond->name);
- if ((ret = pthread_mutex_destroy(&cond->mtx)) != 0)
- WT_PANIC_MSG(
- session, ret, "pthread_mutex_destroy: %s", cond->name);
+ if ((ret = pthread_mutex_destroy(&cond->mtx)) != 0)
+ WT_PANIC_MSG(session, ret, "pthread_mutex_destroy: %s", cond->name);
- __wt_free(session, *condp);
+ __wt_free(session, *condp);
}
diff --git a/src/third_party/wiredtiger/src/os_posix/os_once.c b/src/third_party/wiredtiger/src/os_posix/os_once.c
index 9477ba614c5..96483e4aaab 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_once.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_once.c
@@ -10,12 +10,12 @@
/*
* __wt_once --
- * One-time initialization per process.
+ * One-time initialization per process.
*/
int
__wt_once(void (*init_routine)(void))
{
- static pthread_once_t once_control = PTHREAD_ONCE_INIT;
+ static pthread_once_t once_control = PTHREAD_ONCE_INIT;
- return (pthread_once(&once_control, init_routine));
+ return (pthread_once(&once_control, init_routine));
}
diff --git a/src/third_party/wiredtiger/src/os_posix/os_pagesize.c b/src/third_party/wiredtiger/src/os_posix/os_pagesize.c
index f4d04997d0b..07ac76c64cd 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_pagesize.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_pagesize.c
@@ -10,10 +10,10 @@
/*
* __wt_get_vm_pagesize --
- * Return the default page size of a virtual memory page.
+ * Return the default page size of a virtual memory page.
*/
int
__wt_get_vm_pagesize(void)
{
- return (getpagesize());
+ return (getpagesize());
}
diff --git a/src/third_party/wiredtiger/src/os_posix/os_path.c b/src/third_party/wiredtiger/src/os_posix/os_path.c
index 9c3fc69cc65..b21c1e5bf6d 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_path.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_path.c
@@ -10,20 +10,20 @@
/*
* __wt_absolute_path --
- * Return if a filename is an absolute path.
+ * Return if a filename is an absolute path.
*/
bool
__wt_absolute_path(const char *path)
{
- return (path[0] == '/');
+ return (path[0] == '/');
}
/*
* __wt_path_separator --
- * Return the path separator string.
+ * Return the path separator string.
*/
const char *
__wt_path_separator(void)
{
- return ("/");
+ return ("/");
}
diff --git a/src/third_party/wiredtiger/src/os_posix/os_priv.c b/src/third_party/wiredtiger/src/os_posix/os_priv.c
index 7f476c41c5a..07c603c0b68 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_priv.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_priv.c
@@ -10,11 +10,11 @@
/*
* __wt_has_priv --
- * Return if the process has special privileges, defined as having
- * different effective and read UIDs or GIDs.
+ * Return if the process has special privileges, defined as having different effective and read
+ * UIDs or GIDs.
*/
bool
__wt_has_priv(void)
{
- return (getuid() != geteuid() || getgid() != getegid());
+ return (getuid() != geteuid() || getgid() != getegid());
}
diff --git a/src/third_party/wiredtiger/src/os_posix/os_setvbuf.c b/src/third_party/wiredtiger/src/os_posix/os_setvbuf.c
index c9a2bff327e..462288accf2 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_setvbuf.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_setvbuf.c
@@ -10,27 +10,25 @@
/*
* __wt_stream_set_line_buffer --
- * Set line buffering on a stream.
+ * Set line buffering on a stream.
*/
void
-__wt_stream_set_line_buffer(FILE *fp)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+__wt_stream_set_line_buffer(FILE *fp) WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- /*
- * This function exists because MSVC doesn't support buffer sizes of 0
- * to the setvbuf call. To avoid re-introducing the bug, we have helper
- * functions and disallow calling setvbuf directly in WiredTiger code.
- */
- (void)setvbuf(fp, NULL, _IOLBF, 1024);
+ /*
+ * This function exists because MSVC doesn't support buffer sizes of 0 to the setvbuf call. To
+ * avoid re-introducing the bug, we have helper functions and disallow calling setvbuf directly
+ * in WiredTiger code.
+ */
+ (void)setvbuf(fp, NULL, _IOLBF, 1024);
}
/*
* __wt_stream_set_no_buffer --
- * Turn off buffering on a stream.
+ * Turn off buffering on a stream.
*/
void
-__wt_stream_set_no_buffer(FILE *fp)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+__wt_stream_set_no_buffer(FILE *fp) WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- (void)setvbuf(fp, NULL, _IONBF, 0);
+ (void)setvbuf(fp, NULL, _IONBF, 0);
}
diff --git a/src/third_party/wiredtiger/src/os_posix/os_sleep.c b/src/third_party/wiredtiger/src/os_posix/os_sleep.c
index c9676dcd585..1508755d504 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_sleep.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_sleep.c
@@ -10,24 +10,22 @@
/*
* __wt_sleep --
- * Pause the thread of control.
+ * Pause the thread of control.
*/
void
-__wt_sleep(uint64_t seconds, uint64_t micro_seconds)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+__wt_sleep(uint64_t seconds, uint64_t micro_seconds) WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- struct timeval t;
+ struct timeval t;
- /*
- * Sleeping isn't documented as a memory barrier, and it's a reasonable
- * expectation to have. There's no reason not to explicitly include a
- * barrier since we're giving up the CPU, and ensures callers are never
- * surprised.
- */
- WT_FULL_BARRIER();
+ /*
+ * Sleeping isn't documented as a memory barrier, and it's a reasonable expectation to have.
+ * There's no reason not to explicitly include a barrier since we're giving up the CPU, and
+ * ensures callers are never surprised.
+ */
+ WT_FULL_BARRIER();
- t.tv_sec = (time_t)(seconds + micro_seconds / WT_MILLION);
- t.tv_usec = (suseconds_t)(micro_seconds % WT_MILLION);
+ t.tv_sec = (time_t)(seconds + micro_seconds / WT_MILLION);
+ t.tv_usec = (suseconds_t)(micro_seconds % WT_MILLION);
- (void)select(0, NULL, NULL, NULL, &t);
+ (void)select(0, NULL, NULL, NULL, &t);
}
diff --git a/src/third_party/wiredtiger/src/os_posix/os_snprintf.c b/src/third_party/wiredtiger/src/os_posix/os_snprintf.c
index cc532290b2c..651f570726d 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_snprintf.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_snprintf.c
@@ -10,18 +10,17 @@
/*
* __wt_vsnprintf_len_incr --
- * POSIX vsnprintf convenience function, incrementing the returned size.
+ * POSIX vsnprintf convenience function, incrementing the returned size.
*/
int
-__wt_vsnprintf_len_incr(
- char *buf, size_t size, size_t *retsizep, const char *fmt, va_list ap)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+__wt_vsnprintf_len_incr(char *buf, size_t size, size_t *retsizep, const char *fmt, va_list ap)
+ WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- WT_DECL_RET;
+ WT_DECL_RET;
- if ((ret = vsnprintf(buf, size, fmt, ap)) >= 0) {
- *retsizep += (size_t)ret;
- return (0);
- }
- return (__wt_errno());
+ if ((ret = vsnprintf(buf, size, fmt, ap)) >= 0) {
+ *retsizep += (size_t)ret;
+ return (0);
+ }
+ return (__wt_errno());
}
diff --git a/src/third_party/wiredtiger/src/os_posix/os_thread.c b/src/third_party/wiredtiger/src/os_posix/os_thread.c
index 613df15d6d0..bc0e739c193 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_thread.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_thread.c
@@ -10,112 +10,105 @@
/*
* __wt_thread_create --
- * Create a new thread of control.
+ * Create a new thread of control.
*/
int
-__wt_thread_create(WT_SESSION_IMPL *session,
- wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+__wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret,
+ WT_THREAD_CALLBACK (*func)(void *), void *arg) WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- WT_DECL_RET;
+ WT_DECL_RET;
- /*
- * Creating a thread isn't a memory barrier, but WiredTiger commonly
- * sets flags and or state and then expects worker threads to start.
- * Include a barrier to ensure safety in those cases.
- */
- WT_FULL_BARRIER();
+ /*
+ * Creating a thread isn't a memory barrier, but WiredTiger commonly sets flags and or state and
+ * then expects worker threads to start. Include a barrier to ensure safety in those cases.
+ */
+ WT_FULL_BARRIER();
- /* Spawn a new thread of control. */
- WT_SYSCALL_RETRY(pthread_create(&tidret->id, NULL, func, arg), ret);
- if (ret == 0) {
- tidret->created = true;
- return (0);
- }
- WT_RET_MSG(session, ret, "pthread_create");
+ /* Spawn a new thread of control. */
+ WT_SYSCALL_RETRY(pthread_create(&tidret->id, NULL, func, arg), ret);
+ if (ret == 0) {
+ tidret->created = true;
+ return (0);
+ }
+ WT_RET_MSG(session, ret, "pthread_create");
}
/*
* __wt_thread_join --
- * Wait for a thread of control to exit.
+ * Wait for a thread of control to exit.
*/
int
__wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t *tid)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- WT_DECL_RET;
+ WT_DECL_RET;
- /* Only attempt to join if thread was created successfully */
- if (!tid->created)
- return (0);
- tid->created = false;
+ /* Only attempt to join if thread was created successfully */
+ if (!tid->created)
+ return (0);
+ tid->created = false;
- /*
- * Joining a thread isn't a memory barrier, but WiredTiger commonly
- * sets flags and or state and then expects worker threads to halt.
- * Include a barrier to ensure safety in those cases.
- */
- WT_FULL_BARRIER();
+ /*
+ * Joining a thread isn't a memory barrier, but WiredTiger commonly sets flags and or state and
+ * then expects worker threads to halt. Include a barrier to ensure safety in those cases.
+ */
+ WT_FULL_BARRIER();
- WT_SYSCALL(pthread_join(tid->id, NULL), ret);
- if (ret == 0)
- return (0);
+ WT_SYSCALL(pthread_join(tid->id, NULL), ret);
+ if (ret == 0)
+ return (0);
- WT_RET_MSG(session, ret, "pthread_join");
+ WT_RET_MSG(session, ret, "pthread_join");
}
/*
* __wt_thread_id --
- * Return an arithmetic representation of a thread ID on POSIX.
+ * Return an arithmetic representation of a thread ID on POSIX.
*/
void
-__wt_thread_id(uintmax_t *id)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+__wt_thread_id(uintmax_t *id) WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- pthread_t self;
+ pthread_t self;
- /*
- * POSIX 1003.1 allows pthread_t to be an opaque type; on systems where
- * it's a pointer, print the pointer to match gdb output.
- */
- self = pthread_self();
+ /*
+ * POSIX 1003.1 allows pthread_t to be an opaque type; on systems where it's a pointer, print
+ * the pointer to match gdb output.
+ */
+ self = pthread_self();
#ifdef __sun
- *id = (uintmax_t)self;
+ *id = (uintmax_t)self;
#else
- *id = (uintmax_t)(void *)self;
+ *id = (uintmax_t)(void *)self;
#endif
}
/*
* __wt_thread_str --
- * Fill in a printable version of the process and thread IDs.
+ * Fill in a printable version of the process and thread IDs.
*/
int
-__wt_thread_str(char *buf, size_t buflen)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+__wt_thread_str(char *buf, size_t buflen) WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- pthread_t self;
+ pthread_t self;
- /*
- * POSIX 1003.1 allows pthread_t to be an opaque type; on systems where
- * it's a pointer, print the pointer to match gdb output.
- */
- self = pthread_self();
+ /*
+ * POSIX 1003.1 allows pthread_t to be an opaque type; on systems where it's a pointer, print
+ * the pointer to match gdb output.
+ */
+ self = pthread_self();
#ifdef __sun
- return (__wt_snprintf(buf, buflen,
- "%" PRIuMAX ":%u", (uintmax_t)getpid(), self));
+ return (__wt_snprintf(buf, buflen, "%" PRIuMAX ":%u", (uintmax_t)getpid(), self));
#else
- return (__wt_snprintf(buf, buflen,
- "%" PRIuMAX ":%p", (uintmax_t)getpid(), (void *)self));
+ return (__wt_snprintf(buf, buflen, "%" PRIuMAX ":%p", (uintmax_t)getpid(), (void *)self));
#endif
}
/*
* __wt_process_id --
- * Return the process ID assigned by the operating system.
+ * Return the process ID assigned by the operating system.
*/
uintmax_t
__wt_process_id(void)
{
- return ((uintmax_t)getpid());
+ return ((uintmax_t)getpid());
}
diff --git a/src/third_party/wiredtiger/src/os_posix/os_time.c b/src/third_party/wiredtiger/src/os_posix/os_time.c
index 8fd63ada9e9..9b4729994df 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_time.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_time.c
@@ -10,54 +10,53 @@
/*
* __wt_epoch_raw --
- * Return the time since the Epoch as reported by a system call.
+ * Return the time since the Epoch as reported by a system call.
*/
void
__wt_epoch_raw(WT_SESSION_IMPL *session, struct timespec *tsp)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- /*
- * This function doesn't return an error, but panics on failure (which
- * should never happen, it's done this way to simplify error handling
- * in the caller). However, some compilers complain about using garbage
- * values. Initializing the values avoids the complaint.
- */
- tsp->tv_sec = 0;
- tsp->tv_nsec = 0;
+ /*
+ * This function doesn't return an error, but panics on failure (which should never happen, it's
+ * done this way to simplify error handling in the caller). However, some compilers complain
+ * about using garbage values. Initializing the values avoids the complaint.
+ */
+ tsp->tv_sec = 0;
+ tsp->tv_nsec = 0;
#if defined(HAVE_CLOCK_GETTIME)
- WT_SYSCALL_RETRY(clock_gettime(CLOCK_REALTIME, tsp), ret);
- if (ret == 0)
- return;
- WT_PANIC_MSG(session, ret, "clock_gettime");
+ WT_SYSCALL_RETRY(clock_gettime(CLOCK_REALTIME, tsp), ret);
+ if (ret == 0)
+ return;
+ WT_PANIC_MSG(session, ret, "clock_gettime");
#elif defined(HAVE_GETTIMEOFDAY)
- {
- struct timeval v;
+ {
+ struct timeval v;
- WT_SYSCALL_RETRY(gettimeofday(&v, NULL), ret);
- if (ret == 0) {
- tsp->tv_sec = v.tv_sec;
- tsp->tv_nsec = v.tv_usec * WT_THOUSAND;
- return;
- }
- WT_PANIC_MSG(session, ret, "gettimeofday");
- }
+ WT_SYSCALL_RETRY(gettimeofday(&v, NULL), ret);
+ if (ret == 0) {
+ tsp->tv_sec = v.tv_sec;
+ tsp->tv_nsec = v.tv_usec * WT_THOUSAND;
+ return;
+ }
+ WT_PANIC_MSG(session, ret, "gettimeofday");
+ }
#else
- NO TIME-OF-DAY IMPLEMENTATION: see src/os_posix/os_time.c
+ NO TIME - OF - DAY IMPLEMENTATION : see src / os_posix / os_time.c
#endif
}
/*
* __wt_localtime --
- * Return the current local broken-down time.
+ * Return the current local broken-down time.
*/
int
__wt_localtime(WT_SESSION_IMPL *session, const time_t *timep, struct tm *result)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- if (localtime_r(timep, result) != NULL)
- return (0);
+ if (localtime_r(timep, result) != NULL)
+ return (0);
- WT_RET_MSG(session, __wt_errno(), "localtime_r");
+ WT_RET_MSG(session, __wt_errno(), "localtime_r");
}
diff --git a/src/third_party/wiredtiger/src/os_posix/os_yield.c b/src/third_party/wiredtiger/src/os_posix/os_yield.c
index 080f6b29c97..2a5e07c4e99 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_yield.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_yield.c
@@ -10,19 +10,17 @@
/*
* __wt_yield --
- * Yield the thread of control.
+ * Yield the thread of control.
*/
void
-__wt_yield(void)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+__wt_yield(void) WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- /*
- * Yielding the processor isn't documented as a memory barrier, and it's
- * a reasonable expectation to have. There's no reason not to explicitly
- * include a barrier since we're giving up the CPU, and ensures callers
- * aren't ever surprised.
- */
- WT_FULL_BARRIER();
+ /*
+ * Yielding the processor isn't documented as a memory barrier, and it's a reasonable
+ * expectation to have. There's no reason not to explicitly include a barrier since we're giving
+ * up the CPU, and ensures callers aren't ever surprised.
+ */
+ WT_FULL_BARRIER();
- sched_yield();
+ sched_yield();
}