summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKeith Bostic <keith.bostic@mongodb.com>2016-08-12 03:02:58 -0400
committerMichael Cahill <michael.cahill@mongodb.com>2016-08-12 17:02:58 +1000
commit707807a81fbf06766006b8d63393414205fda181 (patch)
tree8c039560c34d282489269877ce7a45244800e0e9
parentf69bbc18148c6fced50b01e04d552f0053fce20c (diff)
downloadmongo-707807a81fbf06766006b8d63393414205fda181.tar.gz
WT-2823 support file handles without a truncate method (#2940)
* Rework the block manager to ignore whether or not truncate works at a low-level, rather than handling errors we don't care about in the callers.
-rw-r--r--src/block/block_ckpt.c17
-rw-r--r--src/block/block_ext.c16
-rw-r--r--src/block/block_slvg.c9
-rw-r--r--src/block/block_write.c30
-rw-r--r--src/conn/conn_log.c15
-rw-r--r--src/include/error.h1
-rw-r--r--src/include/log.h4
-rw-r--r--src/log/log.c16
-rw-r--r--src/os_common/os_fhandle.c5
-rw-r--r--src/os_common/os_fs_inmemory.c36
10 files changed, 58 insertions, 91 deletions
diff --git a/src/block/block_ckpt.c b/src/block/block_ckpt.c
index 3584efc7671..6d827cf697d 100644
--- a/src/block/block_ckpt.c
+++ b/src/block/block_ckpt.c
@@ -137,18 +137,9 @@ __wt_block_checkpoint_load(WT_SESSION_IMPL *session, WT_BLOCK *block,
* that was done when the checkpoint was first written (re-writing the
* checkpoint might possibly make it relevant here, but it's unlikely
* enough I don't bother).
- *
- * If in-memory, we don't read or write the object, and the truncate
- * will unnecessarily allocate buffer space.
*/
- if (!checkpoint && !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)) {
- WT_ERR(__wt_verbose(session, WT_VERB_CHECKPOINT,
- "truncate file to %" PRIuMAX, (uintmax_t)ci->file_size));
-
- /* The truncate might fail, and that's OK. */
- WT_ERR_BUSY_OK(
- __wt_block_truncate(session, block, ci->file_size));
- }
+ if (!checkpoint)
+ WT_ERR(__wt_block_truncate(session, block, ci->file_size));
if (0) {
err: /*
@@ -190,9 +181,7 @@ __wt_block_checkpoint_unload(
* checkpoints.
*/
if (!checkpoint) {
- /* The truncate might fail, and that's OK. */
- WT_TRET_BUSY_OK(
- __wt_block_truncate(session, block, block->size));
+ WT_TRET(__wt_block_truncate(session, block, block->size));
__wt_spin_lock(session, &block->live_lock);
__wt_block_ckpt_destroy(session, &block->live);
diff --git a/src/block/block_ext.c b/src/block/block_ext.c
index bad4d8d7990..5e7616b8bcb 100644
--- a/src/block/block_ext.c
+++ b/src/block/block_ext.c
@@ -1336,7 +1336,7 @@ __wt_block_extlist_truncate(
WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el)
{
WT_EXT *ext, **astack[WT_SKIP_MAXDEPTH];
- wt_off_t orig, size;
+ wt_off_t size;
/*
* Check if the last available extent is at the end of the file, and if
@@ -1353,21 +1353,11 @@ __wt_block_extlist_truncate(
* the cached file size, and that can't happen until after the extent
* list removal succeeds.)
*/
- orig = block->size;
size = ext->off;
WT_RET(__block_off_remove(session, block, el, size, NULL));
- block->size = size;
- /*
- * Truncate the file. The truncate might fail, and that's OK, we simply
- * ignore those blocks.
- */
- WT_RET(__wt_verbose(session, WT_VERB_BLOCK,
- "truncate file from %" PRIdMAX " to %" PRIdMAX,
- (intmax_t)orig, (intmax_t)size));
- WT_RET_BUSY_OK(__wt_block_truncate(session, block, size));
-
- return (0);
+ /* Truncate the file. */
+ return (__wt_block_truncate(session, block, size));
}
/*
diff --git a/src/block/block_slvg.c b/src/block/block_slvg.c
index 6be3fa73f70..ea04f994874 100644
--- a/src/block/block_slvg.c
+++ b/src/block/block_slvg.c
@@ -33,13 +33,10 @@ __wt_block_salvage_start(WT_SESSION_IMPL *session, WT_BLOCK *block)
* Truncate the file to an allocation-size multiple of blocks (bytes
* trailing the last block must be garbage, by definition).
*/
- if (block->size > allocsize) {
+ len = allocsize;
+ if (block->size > allocsize)
len = (block->size / allocsize) * allocsize;
- if (len != block->size)
- WT_RET(__wt_block_truncate(session, block, len));
- } else
- len = allocsize;
- block->live.file_size = len;
+ WT_RET(__wt_block_truncate(session, block, len));
/*
* The file's first allocation-sized block is description information,
diff --git a/src/block/block_write.c b/src/block/block_write.c
index 7e5b027ce2b..d8001704080 100644
--- a/src/block/block_write.c
+++ b/src/block/block_write.c
@@ -15,6 +15,20 @@
int
__wt_block_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t len)
{
+ WT_DECL_RET;
+
+ WT_RET(__wt_verbose(session,
+ WT_VERB_BLOCK, "truncate file to %" PRIuMAX, (uintmax_t)len));
+
+ /*
+ * Truncate requires serialization, we depend on our caller for that.
+ *
+ * Truncation isn't a requirement of the block manager, it's only used
+ * to conserve disk space. Regardless of the underlying file system
+ * call's result, the in-memory understanding of the file size changes.
+ */
+ block->size = block->extend_size = len;
+
/*
* Backups are done by copying files outside of WiredTiger, potentially
* by system utilities. We cannot truncate the file during the backup
@@ -26,18 +40,16 @@ __wt_block_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t len)
* targeted solution at some point.
*/
if (S2C(session)->hot_backup)
- return (EBUSY);
+ return (0);
/*
- * Additionally, the truncate might fail if there's a file mapping (if
- * there's an open checkpoint on the file), in which case the underlying
- * function returns EBUSY.
+ * The truncate may fail temporarily or permanently (for example, there
+ * may be a file mapping if there's an open checkpoint on the file on a
+ * POSIX system, in which case the underlying function returns EBUSY).
+ * It's OK, we don't have to be able to truncate files.
*/
- WT_RET(__wt_ftruncate(session, block->fh, len));
-
- block->size = block->extend_size = len;
-
- return (0);
+ ret = __wt_ftruncate(session, block->fh, len);
+ return (ret == EBUSY || ret == ENOTSUP ? 0 : ret);
}
/*
diff --git a/src/conn/conn_log.c b/src/conn/conn_log.c
index 18ed71e4688..06b23086c69 100644
--- a/src/conn/conn_log.c
+++ b/src/conn/conn_log.c
@@ -353,8 +353,7 @@ __wt_log_truncate_files(
backup_file = WT_CURSOR_BACKUP_ID(cursor);
WT_ASSERT(session, backup_file <= log->alloc_lsn.l.file);
WT_RET(__wt_verbose(session, WT_VERB_LOG,
- "log_truncate_files: Archive once up to %" PRIu32,
- backup_file));
+ "log_truncate_files: Archive once up to %" PRIu32, backup_file));
WT_RET(__wt_writelock(session, log->log_archive_lock));
locked = true;
@@ -429,12 +428,14 @@ __log_file_server(void *arg)
*/
WT_ERR(__wt_fsync(session, close_fh, true));
/*
- * We want to make sure the file size reflects
- * actual data and has minimal pre-allocated
- * zeroed space.
+ * We want to have the file size reflect actual
+ * data with minimal pre-allocated zeroed space.
+ * The underlying file system may not support
+ * truncate, which is OK, it's just more work
+ * during cursor traversal.
*/
- WT_ERR(__wt_ftruncate(session,
- close_fh, close_end_lsn.l.offset));
+ WT_ERR_ERROR_OK(__wt_ftruncate(session,
+ close_fh, close_end_lsn.l.offset), ENOTSUP);
WT_SET_LSN(&close_end_lsn,
close_end_lsn.l.file + 1, 0);
__wt_spin_lock(session, &log->log_sync_lock);
diff --git a/src/include/error.h b/src/include/error.h
index e85220d5d3c..bbb7f989332 100644
--- a/src/include/error.h
+++ b/src/include/error.h
@@ -77,7 +77,6 @@
ret == 0 || ret == WT_DUPLICATE_KEY || ret == WT_NOTFOUND)) \
ret = __ret; \
} while (0)
-#define WT_TRET_BUSY_OK(a) WT_TRET_ERROR_OK(a, EBUSY)
#define WT_TRET_NOTFOUND_OK(a) WT_TRET_ERROR_OK(a, WT_NOTFOUND)
/* Return and branch-to-err-label cases for switch statements. */
diff --git a/src/include/log.h b/src/include/log.h
index 870c046252c..b91c7dffe60 100644
--- a/src/include/log.h
+++ b/src/include/log.h
@@ -256,7 +256,9 @@ struct __wt_log {
#ifdef HAVE_DIAGNOSTIC
uint64_t write_calls; /* Calls to log_write */
#endif
-#define WT_LOG_OPENED 0x01 /* Log subsystem successfully open */
+
+#define WT_LOG_OPENED 0x01 /* Log subsystem successfully open */
+#define WT_LOG_TRUNCATE_NOTSUP 0x02 /* File system truncate not supported */
uint32_t flags;
};
diff --git a/src/log/log.c b/src/log/log.c
index b6373c95a11..6ead11af438 100644
--- a/src/log/log.c
+++ b/src/log/log.c
@@ -984,9 +984,23 @@ __log_truncate(WT_SESSION_IMPL *session,
/*
* Truncate the log file to the given LSN.
+ *
+ * It's possible the underlying file system doesn't support truncate
+ * (there are existing examples), which is fine, but we don't want to
+ * repeatedly do the setup work just to find that out every time. Check
+ * before doing work, and if there's a not-supported error, turn off
+ * future truncates.
*/
+ if (F_ISSET(log, WT_LOG_TRUNCATE_NOTSUP))
+ return (0);
WT_ERR(__log_openfile(session, &log_fh, file_prefix, lsn->l.file, 0));
- WT_ERR(__wt_ftruncate(session, log_fh, lsn->l.offset));
+ if ((ret = __wt_ftruncate(session, log_fh, lsn->l.offset)) != 0) {
+ if (ret == ENOTSUP) {
+ F_SET(log, WT_LOG_TRUNCATE_NOTSUP);
+ ret = 0;
+ }
+ goto err;
+ }
WT_ERR(__wt_fsync(session, log_fh, true));
WT_ERR(__wt_close(session, &log_fh));
diff --git a/src/os_common/os_fhandle.c b/src/os_common/os_fhandle.c
index 055d784a8db..3466f0d4e9b 100644
--- a/src/os_common/os_fhandle.c
+++ b/src/os_common/os_fhandle.c
@@ -37,10 +37,9 @@ __fhandle_method_finalize(
if (!readonly)
WT_HANDLE_METHOD_REQ(fh_sync);
/* not required: fh_sync_nowait */
- if (!readonly) {
- WT_HANDLE_METHOD_REQ(fh_truncate);
+ /* not required: fh_truncate */
+ if (!readonly)
WT_HANDLE_METHOD_REQ(fh_write);
- }
return (0);
}
diff --git a/src/os_common/os_fs_inmemory.c b/src/os_common/os_fs_inmemory.c
index 178adc1dac8..70a82007300 100644
--- a/src/os_common/os_fs_inmemory.c
+++ b/src/os_common/os_fs_inmemory.c
@@ -391,41 +391,6 @@ __im_file_sync(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session)
}
/*
- * __im_file_truncate --
- * POSIX ftruncate.
- */
-static int
-__im_file_truncate(
- WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset)
-{
- WT_DECL_RET;
- WT_FILE_HANDLE_INMEM *im_fh;
- WT_FILE_SYSTEM_INMEM *im_fs;
- WT_SESSION_IMPL *session;
- size_t off;
-
- im_fh = (WT_FILE_HANDLE_INMEM *)file_handle;
- im_fs = (WT_FILE_SYSTEM_INMEM *)file_handle->file_system;
- session = (WT_SESSION_IMPL *)wt_session;
-
- __wt_spin_lock(session, &im_fs->lock);
-
- /*
- * Grow the buffer as necessary, clear any new space in the file, and
- * reset the file's data length.
- */
- off = (size_t)offset;
- WT_ERR(__wt_buf_grow(session, &im_fh->buf, off));
- if (im_fh->buf.size < off)
- memset((uint8_t *)im_fh->buf.data + im_fh->buf.size,
- 0, off - im_fh->buf.size);
- im_fh->buf.size = off;
-
-err: __wt_spin_unlock(session, &im_fs->lock);
- return (ret);
-}
-
-/*
* __im_file_write --
* POSIX pwrite.
*/
@@ -526,7 +491,6 @@ __im_file_open(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
file_handle->fh_read = __im_file_read;
file_handle->fh_size = __im_file_size;
file_handle->fh_sync = __im_file_sync;
- file_handle->fh_truncate = __im_file_truncate;
file_handle->fh_write = __im_file_write;
*file_handlep = file_handle;