summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--dist/s_string.ok1
-rw-r--r--examples/c/ex_file_system.c4
-rw-r--r--src/block/block_map.c2
-rw-r--r--src/block/block_write.c81
-rw-r--r--src/docs/custom-file-systems.dox43
-rw-r--r--src/docs/spell.ok4
-rw-r--r--src/include/extern_posix.h2
-rw-r--r--src/include/os_fhandle.i49
-rw-r--r--src/include/wiredtiger.in56
-rw-r--r--src/log/log.c14
-rw-r--r--src/os_common/os_fhandle.c16
-rw-r--r--src/os_posix/os_fallocate.c84
-rw-r--r--src/os_posix/os_fs.c2
-rw-r--r--src/os_win/os_fs.c5
14 files changed, 182 insertions, 181 deletions
diff --git a/dist/s_string.ok b/dist/s_string.ok
index 8c5f1e99bff..1c9891801ae 100644
--- a/dist/s_string.ok
+++ b/dist/s_string.ok
@@ -629,6 +629,7 @@ fcntl
fd
fdatasync
fdopen
+fextend
ffc
fflush
ffs
diff --git a/examples/c/ex_file_system.c b/examples/c/ex_file_system.c
index 55ee20e9331..524a5d03f89 100644
--- a/examples/c/ex_file_system.c
+++ b/examples/c/ex_file_system.c
@@ -352,8 +352,8 @@ demo_fs_open(WT_FILE_SYSTEM *file_system, WT_SESSION *session,
*/
file_handle->close = demo_file_close;
file_handle->fh_advise = NULL;
- file_handle->fh_allocate = NULL;
- file_handle->fh_allocate_nolock = NULL;
+ file_handle->fh_extend = NULL;
+ file_handle->fh_extend_nolock = NULL;
file_handle->fh_lock = demo_file_lock;
file_handle->fh_map = NULL;
file_handle->fh_map_discard = NULL;
diff --git a/src/block/block_map.c b/src/block/block_map.c
index d2c70fb4c49..b7afa61cc55 100644
--- a/src/block/block_map.c
+++ b/src/block/block_map.c
@@ -56,7 +56,7 @@ __wt_block_map(WT_SESSION_IMPL *session, WT_BLOCK *block,
*/
ret = handle->fh_map(handle,
(WT_SESSION *)session, mapped_regionp, lengthp, mapped_cookiep);
- if (ret == ENOTSUP) {
+ if (ret == EBUSY || ret == ENOTSUP) {
*(void **)mapped_regionp = NULL;
ret = 0;
}
diff --git a/src/block/block_write.c b/src/block/block_write.c
index 30d06e6259a..7e5b027ce2b 100644
--- a/src/block/block_write.c
+++ b/src/block/block_write.c
@@ -82,22 +82,18 @@ __wt_block_extend(WT_SESSION_IMPL *session, WT_BLOCK *block,
{
WT_DECL_RET;
WT_FILE_HANDLE *handle;
- bool locked;
/*
* The locking in this function is messy: by definition, the live system
* is locked when we're called, but that lock may have been acquired by
* our caller or our caller's caller. If our caller's lock, release_lock
- * comes in set, indicating this function can unlock it before returning
- * (either before extending the file or afterward, depending on the call
- * used). If it is our caller's caller, then release_lock comes in not
- * set, indicating it cannot be released here.
+ * comes in set and this function can unlock it before returning (so it
+ * isn't held while extending the file). If it is our caller's caller,
+ * then release_lock comes in not set, indicating it cannot be released
+ * here.
*
- * If we unlock here, we clear release_lock. But if we then find out we
- * need a lock after all, we re-acquire the lock and set release_lock so
- * our caller knows to release it.
+ * If we unlock here, we clear release_lock.
*/
- locked = true;
/* If not configured to extend the file, we're done. */
if (block->extend_len == 0)
@@ -122,62 +118,39 @@ __wt_block_extend(WT_SESSION_IMPL *session, WT_BLOCK *block,
* used to extend the file initialize the extended space. If a writing
* thread races with the extending thread, the extending thread might
* overwrite already written data, and that would be very, very bad.
- *
- * Some variants of the system call to extend the file fail at run-time
- * based on the filesystem type, fall back to ftruncate in that case,
- * and remember that ftruncate requires locking.
*/
handle = fh->handle;
- if (handle->fh_allocate != NULL ||
- handle->fh_allocate_nolock != NULL) {
- /*
- * Release any locally acquired lock if not needed to extend the
- * file, extending the file may require updating on-disk file's
- * metadata, which can be slow. (It may be a bad idea to
- * configure for file extension on systems that require locking
- * over the extend call.)
- */
- if (handle->fh_allocate_nolock != NULL && *release_lockp) {
- *release_lockp = locked = false;
- __wt_spin_unlock(session, &block->live_lock);
- }
-
- /*
- * Extend the file: there's a race between setting the value of
- * extend_size and doing the extension, but it should err on the
- * side of extend_size being smaller than the actual file size,
- * and that's OK, we simply may do another extension sooner than
- * otherwise.
- */
- block->extend_size = block->size + block->extend_len * 2;
- if ((ret = __wt_fallocate(
- session, fh, block->size, block->extend_len * 2)) == 0)
- return (0);
- WT_RET_ERROR_OK(ret, ENOTSUP);
- }
+ if (handle->fh_extend == NULL && handle->fh_extend_nolock == NULL)
+ return (0);
/*
- * We may have a caller lock or a locally acquired lock, but we need a
- * lock to call ftruncate.
+ * Set the extend_size before releasing the lock, I don't want to read
+ * and manipulate multiple values without holding a lock.
+ *
+ * There's a race between the calculation and doing the extension, but
+ * it should err on the side of extend_size being smaller than the
+ * actual file size, and that's OK, we simply may do another extension
+ * sooner than otherwise.
*/
- if (!locked) {
- __wt_spin_lock(session, &block->live_lock);
- *release_lockp = true;
- }
+ block->extend_size = block->size + block->extend_len * 2;
/*
- * The underlying truncate call initializes allocated space, reset the
- * extend length after locking so we don't overwrite already-written
- * blocks.
+ * Release any locally acquired lock if not needed to extend the file,
+ * extending the file may require updating on-disk file's metadata,
+ * which can be slow. (It may be a bad idea to configure for file
+ * extension on systems that require locking over the extend call.)
*/
- block->extend_size = block->size + block->extend_len * 2;
+ if (handle->fh_extend_nolock != NULL && *release_lockp) {
+ *release_lockp = false;
+ __wt_spin_unlock(session, &block->live_lock);
+ }
/*
- * The truncate might fail if there's a mapped file (in other words, if
- * there's an open checkpoint on the file), that's OK.
+ * The extend might fail (for example, the file is mapped into memory),
+ * or discover file extension isn't supported; both are OK.
*/
- WT_RET_BUSY_OK(__wt_ftruncate(session, fh, block->extend_size));
- return (0);
+ ret = __wt_fextend(session, fh, block->extend_size);
+ return (ret == EBUSY || ret == ENOTSUP ? 0 : ret);
}
/*
diff --git a/src/docs/custom-file-systems.dox b/src/docs/custom-file-systems.dox
index d496002b0fb..91bda3a23b5 100644
--- a/src/docs/custom-file-systems.dox
+++ b/src/docs/custom-file-systems.dox
@@ -9,13 +9,23 @@ in the WT_FILE_SYSTEM and WT_FILE_HANDLE structures, and documentation for
those structures indicate which methods are optional. Methods which are not
provided should be set to NULL.
-Generally, function pointers should not be changed once a handle is
-created. An exception to this are the WT_FILE_HANDLE::fallocate and
-WT_FILE_HANDLE::fallocate_nolock methods, because a file system
-implementation may not know what support the system provides until file
-allocation is attempted. See the WiredTiger POSIX file system
-implementation for an example of how the fallocate method might be
-changed after initialization.
+Function pointers should not be cleared once a handle is created.
+(WiredTiger might check for a non-NULL method and then call it, and
+clearing the function pointer could result in a core dump.)
+
+Function pointers are not expected to be cleared or set after a handle
+is created. An exception to this are the file extension methods, because
+existing file system implementations do not know the level of support
+the underlying system provides until after file extension is attempted.
+For this reason, these methods appear in both locking and non-locking
+versions. Custom file systems needing to discover system support before
+configuring non-locking methods should initialize only the locking
+version of the method, then either set the non-locking version of the
+method and clear the locking method (or clear both methods), after
+discovery is complete. Clearing the method value is safe because calls
+are serialized until a non-locking method is set. Note it is not
+possible to downgrade from a non-locking version of these methods to a
+locking version.
WT_FILE_SYSTEM and WT_FILE_HANDLE methods are expected to return POSIX
1003.1 or ANSI C standard error codes on failure. Custom file systems
@@ -23,13 +33,18 @@ on Windows systems can use the WT_EXTENSION_API::map_windows_error
method to translate Windows system errors into POSIX system errors for
return to WiredTiger.
-WT_FILE_SYSTEM and WT_FILE_HANDLE methods which fail but not fatally
-(for example, a WT_FILE_HANDLE::truncate method call which fails because
-the file is currently mapped into memory), should return EBUSY.
-
-WT_FILE_SYSTEM and WT_FILE_HANDLE methods which fail fatally, but not
-in all cases (for example, a WT_FILE_HANDLE::fadvise method call which
-only supports ::WT_FILE_HANDLE_WILLNEED), should return ENOTSUP.
+WT_FILE_SYSTEM and WT_FILE_HANDLE methods which fail, but where future
+calls may succeed (for example, a WT_FILE_HANDLE::fh_truncate method
+call which fails because the file is currently mapped into memory),
+should return EBUSY.
+
+WT_FILE_SYSTEM and WT_FILE_HANDLE methods which fail, and no future
+calls will succeed, should return ENOTSUP. This failure may describe
+either the entire method being unavailable or a particular mode failure.
+For example, a WT_FILE_HANDLE::fh_advise method call with an argument of
+::WT_FILE_HANDLE_DONTNEED, where the file handle doesn't support the
+WT_FILE_HANDLE::fh_advise method at all, or only supports the method
+argument ::WT_FILE_HANDLE_WILLNEED, should return ENOTSUP.
Additionally, custom file system functions may return ::WT_PANIC to
shut down the system.
diff --git a/src/docs/spell.ok b/src/docs/spell.ok
index e08eb7d1447..539705961a4 100644
--- a/src/docs/spell.ok
+++ b/src/docs/spell.ok
@@ -15,6 +15,7 @@ Coverity
Coverity's
DB's
DBTs
+DONTNEED
Datastore
DbCursor
DbEnv
@@ -216,6 +217,8 @@ failchk
fallocate
fd's
fdatasync
+fextend
+fh
fieldname
fileID
fileformats
@@ -230,6 +233,7 @@ forw
fput
freelist
fsync
+ftruncate
gcc
gdbm
ge
diff --git a/src/include/extern_posix.h b/src/include/extern_posix.h
index 6fde537f36b..18f0914bc58 100644
--- a/src/include/extern_posix.h
+++ b/src/include/extern_posix.h
@@ -5,7 +5,7 @@ extern int __wt_posix_directory_list_free(WT_FILE_SYSTEM *file_system, WT_SESSIO
extern int __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp);
extern int __wt_dlsym(WT_SESSION_IMPL *session, WT_DLH *dlh, const char *name, bool fail, void *sym_ret);
extern int __wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh);
-extern int __wt_posix_file_fallocate(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset, wt_off_t len);
+extern int __wt_posix_file_extend( WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset);
extern int __wt_os_posix(WT_SESSION_IMPL *session);
extern int __wt_getenv(WT_SESSION_IMPL *session, const char *variable, const char **envp);
extern int __wt_posix_map(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *mapped_regionp, size_t *lenp, void *mapped_cookiep);
diff --git a/src/include/os_fhandle.i b/src/include/os_fhandle.i
index 9bf5ce0e60b..06eef2b7e9c 100644
--- a/src/include/os_fhandle.i
+++ b/src/include/os_fhandle.i
@@ -39,40 +39,32 @@ __wt_fsync(WT_SESSION_IMPL *session, WT_FH *fh, bool block)
}
/*
- * __wt_fallocate --
+ * __wt_fextend --
* Extend a file.
*/
static inline int
-__wt_fallocate(
- WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len)
+__wt_fextend(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset)
{
- WT_DECL_RET;
WT_FILE_HANDLE *handle;
WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY));
WT_RET(__wt_verbose(session, WT_VERB_HANDLEOPS,
- "%s: handle-allocate: %" PRIuMAX " at %" PRIuMAX,
- fh->handle->name, (uintmax_t)len, (uintmax_t)offset));
+ "%s: handle-extend: %" PRIuMAX " at %" PRIuMAX,
+ fh->handle->name, (uintmax_t)offset));
/*
* Our caller is responsible for handling any locking issues, all we
* have to do is find a function to call.
- *
- * Be cautious, the underlying system might have configured the nolock
- * flavor, that failed, and we have to fallback to the locking flavor.
*/
handle = fh->handle;
- if (handle->fh_allocate_nolock != NULL) {
- if ((ret = handle->fh_allocate_nolock(
- handle, (WT_SESSION *)session, offset, len)) == 0)
- return (0);
- WT_RET_ERROR_OK(ret, ENOTSUP);
- }
- if (handle->fh_allocate != NULL)
- return (handle->fh_allocate(
- handle, (WT_SESSION *)session, offset, len));
+ if (handle->fh_extend_nolock != NULL)
+ return (handle->fh_extend_nolock(
+ handle, (WT_SESSION *)session, offset));
+ if (handle->fh_extend != NULL)
+ return (handle->fh_extend(
+ handle, (WT_SESSION *)session, offset));
return (ENOTSUP);
}
@@ -132,19 +124,28 @@ __wt_filesize(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep)
/*
* __wt_ftruncate --
- * POSIX ftruncate.
+ * Truncate a file.
*/
static inline int
-__wt_ftruncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len)
+__wt_ftruncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset)
{
+ WT_FILE_HANDLE *handle;
+
WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
WT_RET(__wt_verbose(session, WT_VERB_HANDLEOPS,
- "%s: handle-truncate: %" PRIuMAX,
- fh->handle->name, (uintmax_t)len));
+ "%s: handle-truncate: %" PRIuMAX " at %" PRIuMAX,
+ fh->handle->name, (uintmax_t)offset));
- return (fh->handle->fh_truncate(
- fh->handle, (WT_SESSION *)session, len));
+ /*
+ * Our caller is responsible for handling any locking issues, all we
+ * have to do is find a function to call.
+ */
+ handle = fh->handle;
+ if (handle->fh_truncate != NULL)
+ return (handle->fh_truncate(
+ handle, (WT_SESSION *)session, offset));
+ return (ENOTSUP);
}
/*
diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in
index a230e382b17..006c8bccdff 100644
--- a/src/include/wiredtiger.in
+++ b/src/include/wiredtiger.in
@@ -3927,48 +3927,54 @@ struct __wt_file_handle {
WT_SESSION *session, wt_off_t offset, wt_off_t len, int advice);
/*!
- * Ensure disk space is allocated for the file, based on the POSIX
- * 1003.1 standard fallocate.
+ * Extend the file.
*
* This method is not required, and should be set to NULL when not
* supported by the file.
*
+ * Any allocated disk space must read as 0 bytes, and no existing file
+ * data may change. Allocating all necessary underlying storage (not
+ * changing just the file's metadata), is likely to result in increased
+ * performance.
+ *
* This method is not called by multiple threads concurrently (on the
- * same file handle). If the file handle's fallocate method supports
- * concurrent calls, set the WT_FILE_HANDLE::fallocate_nolock method
- * instead.
+ * same file handle). If the file handle's extension method supports
+ * concurrent calls, set the WT_FILE_HANDLE::fh_extend_nolock method
+ * instead. See @ref custom_file_systems for more information.
*
* @errors
*
* @param file_handle the WT_FILE_HANDLE
* @param session the current WiredTiger session
- * @param offset the file offset
- * @param len the size of the advisory
+ * @param offset desired file size after extension
*/
- int (*fh_allocate)(WT_FILE_HANDLE *file_handle,
- WT_SESSION *session, wt_off_t, wt_off_t);
+ int (*fh_extend)(
+ WT_FILE_HANDLE *file_handle, WT_SESSION *session, wt_off_t offset);
/*!
- * Ensure disk space is allocated for the file, based on the POSIX
- * 1003.1 standard fallocate.
+ * Extend the file.
*
* This method is not required, and should be set to NULL when not
* supported by the file.
*
+ * Any allocated disk space must read as 0 bytes, and no existing file
+ * data may change. Allocating all necessary underlying storage (not
+ * only changing the file's metadata), is likely to result in increased
+ * performance.
+ *
* This method may be called by multiple threads concurrently (on the
- * same file handle). If the file handle's fallocate method does not
- * support concurrent calls, set the WT_FILE_HANDLE::fallocate method
- * instead.
+ * same file handle). If the file handle's extension method does not
+ * support concurrent calls, set the WT_FILE_HANDLE::fh_extend method
+ * instead. See @ref custom_file_systems for more information.
*
* @errors
*
* @param file_handle the WT_FILE_HANDLE
* @param session the current WiredTiger session
- * @param offset the file offset
- * @param len the size of the advisory
+ * @param offset desired file size after extension
*/
- int (*fh_allocate_nolock)(WT_FILE_HANDLE *file_handle,
- WT_SESSION *session, wt_off_t, wt_off_t);
+ int (*fh_extend_nolock)(
+ WT_FILE_HANDLE *file_handle, WT_SESSION *session, wt_off_t offset);
/*!
* Lock/unlock a file from the perspective of other processes running
@@ -4115,20 +4121,22 @@ struct __wt_file_handle {
int (*fh_sync_nowait)(WT_FILE_HANDLE *file_handle, WT_SESSION *session);
/*!
- * Lengthen or shorten a file to the specified length, based on the
- * POSIX 1003.1 standard ftruncate.
+ * Truncate the file.
*
- * This method is not required for read-only files, and should be set
- * to NULL when not supported by the file.
+ * This method is not required, and should be set to NULL when not
+ * supported by the file.
+ *
+ * This method is not called by multiple threads concurrently (on the
+ * same file handle).
*
* @errors
*
* @param file_handle the WT_FILE_HANDLE
* @param session the current WiredTiger session
- * @param length desired file size after truncate
+ * @param offset desired file size after truncate
*/
int (*fh_truncate)(
- WT_FILE_HANDLE *file_handle, WT_SESSION *session, wt_off_t length);
+ WT_FILE_HANDLE *file_handle, WT_SESSION *session, wt_off_t offset);
/*!
* Write to a file, based on the POSIX 1003.1 standard pwrite.
diff --git a/src/log/log.c b/src/log/log.c
index 8ec910115ac..b6373c95a11 100644
--- a/src/log/log.c
+++ b/src/log/log.c
@@ -460,7 +460,7 @@ __log_prealloc(WT_SESSION_IMPL *session, WT_FH *fh)
/*
* If the user configured zero filling, pre-allocate the log file
- * manually. Otherwise use either fallocate or ftruncate to create
+ * manually. Otherwise use the file extension method to create
* and zero the log file based on what is available.
*/
if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ZERO_FILL))
@@ -471,13 +471,8 @@ __log_prealloc(WT_SESSION_IMPL *session, WT_FH *fh)
* We have exclusive access to the log file and there are no other
* writes happening concurrently, so there are no locking issues.
*/
- if ((ret = __wt_fallocate(
- session, fh, WT_LOG_FIRST_RECORD,
- conn->log_file_max - WT_LOG_FIRST_RECORD)) == 0)
- return (0);
- WT_RET_ERROR_OK(ret, ENOTSUP);
-
- return (__wt_ftruncate(session, fh, conn->log_file_max));
+ ret = __wt_fextend(session, fh, conn->log_file_max);
+ return (ret == EBUSY || ret == ENOTSUP ? 0 : ret);
}
/*
@@ -990,8 +985,7 @@ __log_truncate(WT_SESSION_IMPL *session,
/*
* Truncate the log file to the given LSN.
*/
- WT_ERR(__log_openfile(session,
- &log_fh, file_prefix, lsn->l.file, 0));
+ WT_ERR(__log_openfile(session, &log_fh, file_prefix, lsn->l.file, 0));
WT_ERR(__wt_ftruncate(session, log_fh, lsn->l.offset));
WT_ERR(__wt_fsync(session, log_fh, true));
WT_ERR(__wt_close(session, &log_fh));
diff --git a/src/os_common/os_fhandle.c b/src/os_common/os_fhandle.c
index 184a9df0e72..055d784a8db 100644
--- a/src/os_common/os_fhandle.c
+++ b/src/os_common/os_fhandle.c
@@ -24,19 +24,19 @@ __fhandle_method_finalize(
"a WT_FILE_HANDLE.%s method must be configured", #name)
WT_HANDLE_METHOD_REQ(close);
- /* not required: fadvise */
- /* not required: fallocate */
- /* not required: fallocate_nolock */
+ /* not required: fh_advise */
+ /* not required: fh_extend */
+ /* not required: fh_extend_nolock */
WT_HANDLE_METHOD_REQ(fh_lock);
- /* not required: map */
- /* not required: map_discard */
- /* not required: map_preload */
- /* not required: map_unmap */
+ /* not required: fh_map */
+ /* not required: fh_map_discard */
+ /* not required: fh_map_preload */
+ /* not required: fh_unmap */
WT_HANDLE_METHOD_REQ(fh_read);
WT_HANDLE_METHOD_REQ(fh_size);
if (!readonly)
WT_HANDLE_METHOD_REQ(fh_sync);
- /* not required: sync_nowait */
+ /* not required: fh_sync_nowait */
if (!readonly) {
WT_HANDLE_METHOD_REQ(fh_truncate);
WT_HANDLE_METHOD_REQ(fh_write);
diff --git a/src/os_posix/os_fallocate.c b/src/os_posix/os_fallocate.c
index 9e5d9519900..7395bf94a76 100644
--- a/src/os_posix/os_fallocate.c
+++ b/src/os_posix/os_fallocate.c
@@ -18,8 +18,8 @@
* Linux fallocate call.
*/
static int
-__posix_std_fallocate(WT_FILE_HANDLE *file_handle,
- WT_SESSION *wt_session, wt_off_t offset, wt_off_t len)
+__posix_std_fallocate(
+ WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset)
{
#if defined(HAVE_FALLOCATE)
WT_DECL_RET;
@@ -29,13 +29,12 @@ __posix_std_fallocate(WT_FILE_HANDLE *file_handle,
pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
- WT_SYSCALL_RETRY(fallocate(pfh->fd, 0, offset, len), ret);
+ WT_SYSCALL_RETRY(fallocate(pfh->fd, 0, (wt_off_t)0, offset), ret);
return (ret);
#else
WT_UNUSED(file_handle);
WT_UNUSED(wt_session);
WT_UNUSED(offset);
- WT_UNUSED(len);
return (ENOTSUP);
#endif
}
@@ -45,8 +44,8 @@ __posix_std_fallocate(WT_FILE_HANDLE *file_handle,
* Linux fallocate call (system call version).
*/
static int
-__posix_sys_fallocate(WT_FILE_HANDLE *file_handle,
- WT_SESSION *wt_session, wt_off_t offset, wt_off_t len)
+__posix_sys_fallocate(
+ WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset)
{
#if defined(__linux__) && defined(SYS_fallocate)
WT_DECL_RET;
@@ -62,13 +61,13 @@ __posix_sys_fallocate(WT_FILE_HANDLE *file_handle,
* Linux versions (RHEL 5.5), but not in the version of the C library.
* This allows it to work everywhere the kernel supports it.
*/
- WT_SYSCALL_RETRY(syscall(SYS_fallocate, pfh->fd, 0, offset, len), ret);
+ WT_SYSCALL_RETRY(
+ syscall(SYS_fallocate, pfh->fd, 0, (wt_off_t)0, offset), ret);
return (ret);
#else
WT_UNUSED(file_handle);
WT_UNUSED(wt_session);
WT_UNUSED(offset);
- WT_UNUSED(len);
return (ENOTSUP);
#endif
}
@@ -78,8 +77,8 @@ __posix_sys_fallocate(WT_FILE_HANDLE *file_handle,
* POSIX fallocate call.
*/
static int
-__posix_posix_fallocate(WT_FILE_HANDLE *file_handle,
- WT_SESSION *wt_session, wt_off_t offset, wt_off_t len)
+__posix_posix_fallocate(
+ WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset)
{
#if defined(HAVE_POSIX_FALLOCATE)
WT_DECL_RET;
@@ -89,64 +88,71 @@ __posix_posix_fallocate(WT_FILE_HANDLE *file_handle,
pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
- WT_SYSCALL_RETRY(posix_fallocate(pfh->fd, offset, len), ret);
+ WT_SYSCALL_RETRY(posix_fallocate(pfh->fd, (wt_off_t)0, offset), ret);
return (ret);
#else
WT_UNUSED(file_handle);
WT_UNUSED(wt_session);
WT_UNUSED(offset);
- WT_UNUSED(len);
return (ENOTSUP);
#endif
}
/*
- * __wt_posix_file_fallocate --
- * POSIX fallocate.
+ * __wt_posix_file_extend --
+ * Extend the file.
*/
int
-__wt_posix_file_fallocate(WT_FILE_HANDLE *file_handle,
- WT_SESSION *wt_session, wt_off_t offset, wt_off_t len)
+__wt_posix_file_extend(
+ WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset)
{
/*
- * The first fallocate call: figure out what fallocate call this system
- * supports, if any.
+ * The first file extension call: figure out what this system has.
*
- * The function is configured as a locking fallocate call, so we know
- * we're single-threaded through here. Set the nolock function first,
- * then publish the NULL replacement to ensure the handle functions are
+ * This function is configured as a locking call, so we know we're
+ * single-threaded through here. Set the nolock function first, then
+ * publish the NULL replacement to ensure the handle functions are
* always correct.
*
- * We've seen Linux systems where posix_fallocate has corrupted
- * existing file data (even though that is explicitly disallowed
- * by POSIX). FreeBSD and Solaris support posix_fallocate, and
- * so far we've seen no problems leaving it unlocked. Check for
- * fallocate (and the system call version of fallocate) first to
- * avoid locking on Linux if at all possible.
+ * We've seen Linux systems where posix_fallocate has corrupted existing
+ * file data (even though that is explicitly disallowed by POSIX).
+ * FreeBSD and Solaris support posix_fallocate, and so far we've seen
+ * no problems leaving it unlocked. Check for fallocate (and the system
+ * call version of fallocate) first to avoid locking on Linux if at all
+ * possible.
*/
- if (__posix_std_fallocate(file_handle, wt_session, offset, len) == 0) {
- file_handle->fh_allocate_nolock = __posix_std_fallocate;
- WT_PUBLISH(file_handle->fh_allocate, NULL);
+ if (__posix_std_fallocate(file_handle, wt_session, offset) == 0) {
+ file_handle->fh_extend_nolock = __posix_std_fallocate;
+ WT_PUBLISH(file_handle->fh_extend, NULL);
return (0);
}
- if (__posix_sys_fallocate(file_handle, wt_session, offset, len) == 0) {
- file_handle->fh_allocate_nolock = __posix_sys_fallocate;
- WT_PUBLISH(file_handle->fh_allocate, NULL);
+ if (__posix_sys_fallocate(file_handle, wt_session, offset) == 0) {
+ file_handle->fh_extend_nolock = __posix_sys_fallocate;
+ WT_PUBLISH(file_handle->fh_extend, NULL);
return (0);
}
- if (__posix_posix_fallocate(
- file_handle, wt_session, offset, len) == 0) {
+ if (__posix_posix_fallocate(file_handle, wt_session, offset) == 0) {
#if defined(__linux__)
- file_handle->fh_allocate = __posix_posix_fallocate;
+ file_handle->fh_extend = __posix_posix_fallocate;
WT_WRITE_BARRIER();
#else
- file_handle->fh_allocate_nolock = __posix_posix_fallocate;
- WT_PUBLISH(file_handle->fh_allocate, NULL);
+ file_handle->fh_extend_nolock = __posix_posix_fallocate;
+ WT_PUBLISH(file_handle->fh_extend, NULL);
#endif
return (0);
}
- file_handle->fh_allocate = NULL;
+ /*
+ * Use the POSIX ftruncate call if there's nothing else, it can extend
+ * files. Note ftruncate requires locking.
+ */
+ if (file_handle->fh_truncate(file_handle, wt_session, offset) == 0) {
+ file_handle->fh_extend = file_handle->fh_truncate;
+ WT_WRITE_BARRIER();
+ return (0);
+ }
+
+ file_handle->fh_extend = NULL;
WT_WRITE_BARRIER();
return (ENOTSUP);
}
diff --git a/src/os_posix/os_fs.c b/src/os_posix/os_fs.c
index 11f38ec063b..06ec521e1d9 100644
--- a/src/os_posix/os_fs.c
+++ b/src/os_posix/os_fs.c
@@ -701,7 +701,7 @@ directory_open:
if (!pfh->direct_io)
file_handle->fh_advise = __posix_file_advise;
#endif
- file_handle->fh_allocate = __wt_posix_file_fallocate;
+ file_handle->fh_extend = __wt_posix_file_extend;
file_handle->fh_lock = __posix_file_lock;
#ifdef WORDS_BIGENDIAN
/*
diff --git a/src/os_win/os_fs.c b/src/os_win/os_fs.c
index fc03e0a2595..e8a9fed15e1 100644
--- a/src/os_win/os_fs.c
+++ b/src/os_win/os_fs.c
@@ -526,9 +526,8 @@ __win_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
}
/*
- * Open a second handle to file to support allocation/truncation
- * concurrently with reads on the file. Writes would also move the file
- * pointer.
+ * Open a second handle to file to support truncation concurrently with
+ * reads on the file. Writes would also move the file pointer.
*/
if (!LF_ISSET(WT_FS_OPEN_READONLY)) {
win_fh->filehandle_secondary = CreateFileA(name, desired_access,