summaryrefslogtreecommitdiff
path: root/src/block
diff options
context:
space:
mode:
Diffstat (limited to 'src/block')
-rw-r--r--src/block/block_write.c123
1 files changed, 67 insertions, 56 deletions
diff --git a/src/block/block_write.c b/src/block/block_write.c
index e59264d495e..b99bdf97b22 100644
--- a/src/block/block_write.c
+++ b/src/block/block_write.c
@@ -45,46 +45,6 @@ __wt_block_write_size(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t *sizep)
}
/*
- * __block_extend --
- * Extend the file.
- */
-static inline int
-__block_extend(
- WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t size)
-{
- /*
- * Extend the file in chunks. We want to limit the number of threads
- * extending the file at the same time, so choose the one thread that's
- * crossing the extended boundary. We don't extend newly created files,
- * and it's theoretically possible we might wait so long our extension
- * of the file is passed by another thread writing single blocks, that's
- * why there's a check in case the extended file size becomes too small:
- * if the file size catches up, every thread tries to extend it.
- *
- * We require locking in the case of using an underlying ftruncate call
- * to extend the file: if a writing thread passes the extending thread,
- * it's possible the ftruncate call would delete written data, and that
- * would be very, very bad.
- *
- * We also lock in the case of using an underlying posix_fallocate call.
- * We've seen Linux systems where posix_fallocate corrupts existing data
- * (even though that is explicitly disallowed by POSIX). We've not had
- * problems with fallocate, it's unlocked for now.
- */
-#if defined(HAVE_FALLOCATE) ||\
- defined(HAVE_FTRUNCATE) || defined(HAVE_POSIX_FALLOCATE)
- if (fh->extend_size <= fh->size ||
- (offset + fh->extend_len <= fh->extend_size &&
- offset + fh->extend_len + (wt_off_t)size >= fh->extend_size)) {
- fh->extend_size = offset + fh->extend_len * 2;
- return (
- __wt_fallocate(session, fh, offset, fh->extend_len * 2));
- }
-#endif
- return (0);
-}
-
-/*
* __wt_block_write --
* Write a buffer into a block, returning the block's address cookie.
*/
@@ -114,16 +74,18 @@ __wt_block_write(WT_SESSION_IMPL *session, WT_BLOCK *block,
int
__wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block,
WT_ITEM *buf, wt_off_t *offsetp, uint32_t *sizep, uint32_t *cksump,
- int data_cksum, int locked)
+ int data_cksum, int caller_locked)
{
WT_BLOCK_HEADER *blk;
WT_DECL_RET;
WT_FH *fh;
size_t align_size;
wt_off_t offset;
+ int local_locked;
blk = WT_BLOCK_HEADER_REF(buf->mem);
fh = block->fh;
+ local_locked = 0;
/* Buffers should be aligned for writing. */
if (!F_ISSET(buf, WT_ITEM_ALIGNED)) {
@@ -180,36 +142,85 @@ __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block,
blk->cksum = __wt_cksum(
buf->mem, data_cksum ? align_size : WT_BLOCK_COMPRESS_SKIP);
- if (!locked) {
+ if (!caller_locked) {
WT_RET(__wt_block_ext_prealloc(session, 5));
__wt_spin_lock(session, &block->live_lock);
+ local_locked = 1;
}
ret = __wt_block_alloc(session, block, &offset, (wt_off_t)align_size);
/*
- * File extension requires locking unless we have the Linux fallocate
- * system call (see __block_extend for the details). Avoid releasing
- * and re-acquiring the lock.
+ * Extend the file in chunks. We want to limit the number of threads
+ * extending the file at the same time, so choose the one thread that's
+ * crossing the extended boundary. We don't extend newly created files,
+ * and it's theoretically possible we might wait so long our extension
+ * of the file is passed by another thread writing single blocks, that's
+ * why there's a check in case the extended file size becomes too small:
+ * if the file size catches up, every thread tries to extend it.
+ *
+ * File extension may require locking: some variants of the system call
+ * used to extend the file initialize the extended space. If a writing
+ * thread races with the extending thread, the extending thread might
+ * overwrite already written data, and that would be very, very bad.
+ *
+ * Some variants of the system call to extend the file fail at run-time
+ * based on the filesystem type, fall back to ftruncate in that case,
+ * and remember that ftruncate requires locking.
*/
-#if defined(HAVE_FALLOCATE)
- if (!locked)
- __wt_spin_unlock(session, &block->live_lock);
-#endif
- if (ret == 0 && fh->extend_len != 0)
- WT_TRET(__block_extend(session, fh, offset, align_size));
-#if !defined(HAVE_FALLOCATE)
- if (!locked)
+ if (ret == 0 &&
+ fh->extend_len != 0 &&
+ (fh->extend_size <= fh->size ||
+ (offset + fh->extend_len <= fh->extend_size &&
+ offset +
+ fh->extend_len + (wt_off_t)align_size >= fh->extend_size))) {
+ fh->extend_size = offset + fh->extend_len * 2;
+ if (fh->fallocate_available) {
+ /*
+ * Release any locally acquired lock if it's not needed
+ * to extend the file, extending the file might require
+ * updating file metadata, which can be slow. (It may be
+ * a bad idea to configure for file extension on systems
+ * that require locking over the extend call.)
+ */
+ if (!fh->fallocate_requires_locking && local_locked) {
+ __wt_spin_unlock(session, &block->live_lock);
+ local_locked = 0;
+ }
+
+ /* Extend the file. */
+ if ((ret = __wt_fallocate(session,
+ fh, offset, fh->extend_len * 2)) == ENOTSUP) {
+ ret = 0;
+ goto extend_truncate;
+ }
+ } else {
+extend_truncate: /*
+ * We may have a caller lock or a locally acquired lock,
+ * but we need a lock to call ftruncate.
+ */
+ if (!caller_locked && local_locked == 0) {
+ __wt_spin_lock(session, &block->live_lock);
+ local_locked = 1;
+ }
+ ret = __wt_ftruncate(
+ session, fh, offset + fh->extend_len * 2);
+ }
+ }
+ /* Release any locally acquired lock. */
+ if (local_locked) {
__wt_spin_unlock(session, &block->live_lock);
-#endif
+ local_locked = 0;
+ }
WT_RET(ret);
+ /* Write the block. */
if ((ret =
__wt_write(session, fh, offset, align_size, buf->mem)) != 0) {
- if (!locked)
+ if (!caller_locked)
__wt_spin_lock(session, &block->live_lock);
WT_TRET(__wt_block_off_free(
session, block, offset, (wt_off_t)align_size));
- if (!locked)
+ if (!caller_locked)
__wt_spin_unlock(session, &block->live_lock);
WT_RET(ret);
}