diff options
Diffstat (limited to 'src/block')
-rw-r--r-- | src/block/block_write.c | 123 |
1 files changed, 67 insertions, 56 deletions
diff --git a/src/block/block_write.c b/src/block/block_write.c index e59264d495e..b99bdf97b22 100644 --- a/src/block/block_write.c +++ b/src/block/block_write.c @@ -45,46 +45,6 @@ __wt_block_write_size(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t *sizep) } /* - * __block_extend -- - * Extend the file. - */ -static inline int -__block_extend( - WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t size) -{ - /* - * Extend the file in chunks. We want to limit the number of threads - * extending the file at the same time, so choose the one thread that's - * crossing the extended boundary. We don't extend newly created files, - * and it's theoretically possible we might wait so long our extension - * of the file is passed by another thread writing single blocks, that's - * why there's a check in case the extended file size becomes too small: - * if the file size catches up, every thread tries to extend it. - * - * We require locking in the case of using an underlying ftruncate call - * to extend the file: if a writing thread passes the extending thread, - * it's possible the ftruncate call would delete written data, and that - * would be very, very bad. - * - * We also lock in the case of using an underlying posix_fallocate call. - * We've seen Linux systems where posix_fallocate corrupts existing data - * (even though that is explicitly disallowed by POSIX). We've not had - * problems with fallocate, it's unlocked for now. - */ -#if defined(HAVE_FALLOCATE) ||\ - defined(HAVE_FTRUNCATE) || defined(HAVE_POSIX_FALLOCATE) - if (fh->extend_size <= fh->size || - (offset + fh->extend_len <= fh->extend_size && - offset + fh->extend_len + (wt_off_t)size >= fh->extend_size)) { - fh->extend_size = offset + fh->extend_len * 2; - return ( - __wt_fallocate(session, fh, offset, fh->extend_len * 2)); - } -#endif - return (0); -} - -/* * __wt_block_write -- * Write a buffer into a block, returning the block's address cookie. */ @@ -114,16 +74,18 @@ __wt_block_write(WT_SESSION_IMPL *session, WT_BLOCK *block, int __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_off_t *offsetp, uint32_t *sizep, uint32_t *cksump, - int data_cksum, int locked) + int data_cksum, int caller_locked) { WT_BLOCK_HEADER *blk; WT_DECL_RET; WT_FH *fh; size_t align_size; wt_off_t offset; + int local_locked; blk = WT_BLOCK_HEADER_REF(buf->mem); fh = block->fh; + local_locked = 0; /* Buffers should be aligned for writing. */ if (!F_ISSET(buf, WT_ITEM_ALIGNED)) { @@ -180,36 +142,85 @@ __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, blk->cksum = __wt_cksum( buf->mem, data_cksum ? align_size : WT_BLOCK_COMPRESS_SKIP); - if (!locked) { + if (!caller_locked) { WT_RET(__wt_block_ext_prealloc(session, 5)); __wt_spin_lock(session, &block->live_lock); + local_locked = 1; } ret = __wt_block_alloc(session, block, &offset, (wt_off_t)align_size); /* - * File extension requires locking unless we have the Linux fallocate - * system call (see __block_extend for the details). Avoid releasing - * and re-acquiring the lock. + * Extend the file in chunks. We want to limit the number of threads + * extending the file at the same time, so choose the one thread that's + * crossing the extended boundary. We don't extend newly created files, + * and it's theoretically possible we might wait so long our extension + * of the file is passed by another thread writing single blocks, that's + * why there's a check in case the extended file size becomes too small: + * if the file size catches up, every thread tries to extend it. + * + * File extension may require locking: some variants of the system call + * used to extend the file initialize the extended space. If a writing + * thread races with the extending thread, the extending thread might + * overwrite already written data, and that would be very, very bad. + * + * Some variants of the system call to extend the file fail at run-time + * based on the filesystem type, fall back to ftruncate in that case, + * and remember that ftruncate requires locking. */ -#if defined(HAVE_FALLOCATE) - if (!locked) - __wt_spin_unlock(session, &block->live_lock); -#endif - if (ret == 0 && fh->extend_len != 0) - WT_TRET(__block_extend(session, fh, offset, align_size)); -#if !defined(HAVE_FALLOCATE) - if (!locked) + if (ret == 0 && + fh->extend_len != 0 && + (fh->extend_size <= fh->size || + (offset + fh->extend_len <= fh->extend_size && + offset + + fh->extend_len + (wt_off_t)align_size >= fh->extend_size))) { + fh->extend_size = offset + fh->extend_len * 2; + if (fh->fallocate_available) { + /* + * Release any locally acquired lock if it's not needed + * to extend the file, extending the file might require + * updating file metadata, which can be slow. (It may be + * a bad idea to configure for file extension on systems + * that require locking over the extend call.) + */ + if (!fh->fallocate_requires_locking && local_locked) { + __wt_spin_unlock(session, &block->live_lock); + local_locked = 0; + } + + /* Extend the file. */ + if ((ret = __wt_fallocate(session, + fh, offset, fh->extend_len * 2)) == ENOTSUP) { + ret = 0; + goto extend_truncate; + } + } else { +extend_truncate: /* + * We may have a caller lock or a locally acquired lock, + * but we need a lock to call ftruncate. + */ + if (!caller_locked && local_locked == 0) { + __wt_spin_lock(session, &block->live_lock); + local_locked = 1; + } + ret = __wt_ftruncate( + session, fh, offset + fh->extend_len * 2); + } + } + /* Release any locally acquired lock. */ + if (local_locked) { __wt_spin_unlock(session, &block->live_lock); -#endif + local_locked = 0; + } WT_RET(ret); + /* Write the block. */ if ((ret = __wt_write(session, fh, offset, align_size, buf->mem)) != 0) { - if (!locked) + if (!caller_locked) __wt_spin_lock(session, &block->live_lock); WT_TRET(__wt_block_off_free( session, block, offset, (wt_off_t)align_size)); - if (!locked) + if (!caller_locked) __wt_spin_unlock(session, &block->live_lock); WT_RET(ret); } |