summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKeith Bostic <keith.bostic@mongodb.com>2016-09-06 13:43:13 -0400
committerGitHub <noreply@github.com>2016-09-06 13:43:13 -0400
commit5500b29ad0f2d2196179f9c6c0448abc1f100122 (patch)
treea52ea377fe3508f8539197dfafd0d5a9feb6d2e3
parent338d789506ffcd388f82a5551a070b7cc4c482ae (diff)
downloadmongo-5500b29ad0f2d2196179f9c6c0448abc1f100122.tar.gz
WT-2892 hot backup can race with block truncate (#3023)
* WT-2892 hot backup can race with block truncate Hold a readlock across the block truncate, otherwise a hot backup could potentially race with the truncation. * Don't truncate log files if there's a hot backup in progress. * Moving a pre-allocated log file into place is a rename, and so it shouldn't be affected by a hot backup. The only real change is removing the test of WT_CONNECTION_IMPL.hot_backup, but some minor restructuring for clarity. * Revert "Moving a pre-allocated log file into place is a rename, and so it shouldn't" This reverts commit 3b8b2a7a81b0542b2b87f29339c18c6e7a660ed2. * The log-file rename for a pre-allocated file can race with a hot-backup, we have to hold a read lock on the hot backup lock across the process. Minor shuffling & whitespace for clarity, move some variables closer to where they're used. * Tweak default settings, increase backups from 5% to 20%, logging from 30% to 50%. * __wt_cursor_close() doesn't need a local "ret" variable. * Minor tweak to catch more of the standard API_CALL macros.
-rw-r--r--dist/s_void3
-rw-r--r--src/block/block_write.c20
-rw-r--r--src/conn/conn_log.c20
-rw-r--r--src/cursor/cur_std.c3
-rw-r--r--src/log/log.c71
-rw-r--r--test/format/config.h8
6 files changed, 77 insertions, 48 deletions
diff --git a/dist/s_void b/dist/s_void
index 68c7ea06031..ab2f23a5fdd 100644
--- a/dist/s_void
+++ b/dist/s_void
@@ -44,7 +44,6 @@ func_ok()
-e '/int __posix_file_close$/d' \
-e '/int __posix_terminate$/d' \
-e '/int __rec_destroy_session$/d' \
- -e '/int __session_transaction_pinned_range$/d' \
-e '/int __win_terminate$/d' \
-e '/int __wt_block_compact_end$/d' \
-e '/int __wt_block_compact_start$/d' \
@@ -115,7 +114,7 @@ for f in `find bench ext src test -name '*.[ci]'`; do
# form of return assignment or call.
file_parse $f |
sed -e 's/return ([^)]*); }$//' \
- -e '/[A-Z]*_API_CALL(/d' \
+ -e '/[A-Z]*_API_CALL[A-Z_]*(/d' \
-e '/WT_CURSOR_NEEDKEY(/d' \
-e '/WT_CURSOR_NEEDVALUE(/d' \
-e '/WT_ERR[A-Z_]*(/d' \
diff --git a/src/block/block_write.c b/src/block/block_write.c
index d926a8b7c2d..36159314a4a 100644
--- a/src/block/block_write.c
+++ b/src/block/block_write.c
@@ -15,8 +15,11 @@
int
__wt_block_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t len)
{
+ WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
+ conn = S2C(session);
+
__wt_verbose(session,
WT_VERB_BLOCK, "truncate file to %" PRIuMAX, (uintmax_t)len);
@@ -34,13 +37,17 @@ __wt_block_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t len)
* by system utilities. We cannot truncate the file during the backup
* window, we might surprise an application.
*
- * Stop block truncation. This affects files that aren't involved in the
- * backup (for example, doing incremental backups, which only copies log
- * files, or targeted backups, stops all truncation). We may want a more
- * targeted solution at some point.
+ * This affects files that aren't involved in the backup (for example,
+ * doing incremental backups, which only copies log files, or targeted
+ * backups, stops all block truncation unnecessarily). We may want a
+ * more targeted solution at some point.
*/
- if (S2C(session)->hot_backup)
- return (0);
+ if (!conn->hot_backup) {
+ __wt_readlock(session, conn->hot_backup_lock);
+ if (!conn->hot_backup)
+ ret = __wt_ftruncate(session, block->fh, len);
+ __wt_readunlock(session, conn->hot_backup_lock);
+ }
/*
* The truncate may fail temporarily or permanently (for example, there
@@ -48,7 +55,6 @@ __wt_block_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t len)
* POSIX system, in which case the underlying function returns EBUSY).
* It's OK, we don't have to be able to truncate files.
*/
- ret = __wt_ftruncate(session, block->fh, len);
return (ret == EBUSY || ret == ENOTSUP ? 0 : ret);
}
diff --git a/src/conn/conn_log.c b/src/conn/conn_log.c
index 6c05376f9ce..a8f0fe4810b 100644
--- a/src/conn/conn_log.c
+++ b/src/conn/conn_log.c
@@ -421,15 +421,27 @@ __log_file_server(void *arg)
* later syncs.
*/
WT_ERR(__wt_fsync(session, close_fh, true));
+
/*
* We want to have the file size reflect actual
* data with minimal pre-allocated zeroed space.
- * The underlying file system may not support
- * truncate, which is OK, it's just more work
+ * We can't truncate the file during hot backup,
+ * or the underlying file system may not support
+ * truncate: both are OK, it's just more work
* during cursor traversal.
*/
- WT_ERR_ERROR_OK(__wt_ftruncate(session,
- close_fh, close_end_lsn.l.offset), ENOTSUP);
+ if (!conn->hot_backup) {
+ __wt_readlock(
+ session, conn->hot_backup_lock);
+ if (!conn->hot_backup)
+ WT_ERR_ERROR_OK(
+ __wt_ftruncate(session,
+ close_fh,
+ close_end_lsn.l.offset),
+ ENOTSUP);
+ __wt_readunlock(
+ session, conn->hot_backup_lock);
+ }
WT_SET_LSN(&close_end_lsn,
close_end_lsn.l.file + 1, 0);
__wt_spin_lock(session, &log->log_sync_lock);
diff --git a/src/cursor/cur_std.c b/src/cursor/cur_std.c
index 86594179907..bf7280f796b 100644
--- a/src/cursor/cur_std.c
+++ b/src/cursor/cur_std.c
@@ -539,7 +539,6 @@ err: cursor->saved_err = ret;
int
__wt_cursor_close(WT_CURSOR *cursor)
{
- WT_DECL_RET;
WT_SESSION_IMPL *session;
session = (WT_SESSION_IMPL *)cursor->session;
@@ -557,7 +556,7 @@ __wt_cursor_close(WT_CURSOR *cursor)
__wt_free(session, cursor->internal_uri);
__wt_free(session, cursor->uri);
__wt_overwrite_and_free(session, cursor);
- return (ret);
+ return (0);
}
/*
diff --git a/src/log/log.c b/src/log/log.c
index 9514b2a2939..ec1bd10d0bc 100644
--- a/src/log/log.c
+++ b/src/log/log.c
@@ -799,14 +799,12 @@ __log_newfile(WT_SESSION_IMPL *session, bool conn_open, bool *created)
WT_FH *log_fh;
WT_LOG *log;
WT_LSN end_lsn;
- int yield_cnt;
+ u_int yield_cnt;
bool create_log;
conn = S2C(session);
log = conn->log;
- create_log = true;
- yield_cnt = 0;
/*
* Set aside the log file handle to be closed later. Other threads
* may still be using it to write to the log. If the log file size
@@ -814,7 +812,7 @@ __log_newfile(WT_SESSION_IMPL *session, bool conn_open, bool *created)
* Wait for that to close.
*/
WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SLOT));
- while (log->log_close_fh != NULL) {
+ for (yield_cnt = 0; log->log_close_fh != NULL;) {
WT_STAT_FAST_CONN_INCR(session, log_close_yields);
__wt_log_wrlsn(session, NULL);
if (++yield_cnt > 10000)
@@ -834,30 +832,39 @@ __log_newfile(WT_SESSION_IMPL *session, bool conn_open, bool *created)
* Make sure everything we set above is visible.
*/
WT_FULL_BARRIER();
+
/*
- * If we're pre-allocating log files, look for one. If there aren't any
- * or we're not pre-allocating, or a backup cursor is open, then
- * create one.
+ * If pre-allocating log files look for one; otherwise, or if we don't
+ * find one create a log file. We can't use pre-allocated log files in
+ * while a hot backup is in progress: applications can copy the files
+ * in any way they choose, and a log file rename might confuse things.
*/
+ create_log = true;
if (conn->log_prealloc > 0 && !conn->hot_backup) {
- ret = __log_alloc_prealloc(session, log->fileid);
- /*
- * If ret is 0 it means we found a pre-allocated file.
- * If ret is non-zero but not WT_NOTFOUND, we return the error.
- * If ret is WT_NOTFOUND, we leave create_log set and create
- * the new log file.
- */
- if (ret == 0)
- create_log = false;
- /*
- * If we get any error other than WT_NOTFOUND, return it.
- */
- WT_RET_NOTFOUND_OK(ret);
+ __wt_readlock(session, conn->hot_backup_lock);
+ if (conn->hot_backup)
+ __wt_readunlock(session, conn->hot_backup_lock);
+ else {
+ ret = __log_alloc_prealloc(session, log->fileid);
+ __wt_readunlock(session, conn->hot_backup_lock);
- if (create_log) {
- WT_STAT_FAST_CONN_INCR(session, log_prealloc_missed);
- if (conn->log_cond != NULL)
- __wt_cond_auto_signal(session, conn->log_cond);
+ /*
+ * If ret is 0 it means we found a pre-allocated file.
+ * If ret is WT_NOTFOUND, create the new log file and
+ * signal the server, we missed our pre-allocation.
+ * If ret is non-zero but not WT_NOTFOUND, return the
+ * error.
+ */
+ WT_RET_NOTFOUND_OK(ret);
+ if (ret == 0)
+ create_log = false;
+ else {
+ WT_STAT_FAST_CONN_INCR(
+ session, log_prealloc_missed);
+ if (conn->log_cond != NULL)
+ __wt_cond_auto_signal(
+ session, conn->log_cond);
+ }
}
}
/*
@@ -968,11 +975,17 @@ __log_truncate_file(WT_SESSION_IMPL *session, WT_FH *log_fh, wt_off_t offset)
conn = S2C(session);
log = conn->log;
- if (!F_ISSET(log, WT_LOG_TRUNCATE_NOTSUP)) {
- if ((ret = __wt_ftruncate(session, log_fh, offset)) != ENOTSUP)
- return (ret);
-
- F_SET(log, WT_LOG_TRUNCATE_NOTSUP);
+ if (!F_ISSET(log, WT_LOG_TRUNCATE_NOTSUP) && !conn->hot_backup) {
+ __wt_readlock(session, conn->hot_backup_lock);
+ if (conn->hot_backup)
+ __wt_readunlock(session, conn->hot_backup_lock);
+ else {
+ ret = __wt_ftruncate(session, log_fh, offset);
+ __wt_readunlock(session, conn->hot_backup_lock);
+ if (ret != ENOTSUP)
+ return (ret);
+ F_SET(log, WT_LOG_TRUNCATE_NOTSUP);
+ }
}
return (__log_zero(session, log_fh, offset, conn->log_file_max));
diff --git a/test/format/config.h b/test/format/config.h
index 16fffb6fafe..6e3f5473479 100644
--- a/test/format/config.h
+++ b/test/format/config.h
@@ -70,8 +70,8 @@ static CONFIG c[] = {
C_BOOL, 90, 0, 0, &g.c_auto_throttle, NULL },
{ "backups",
- "if backups are enabled", /* 5% */
- C_BOOL, 5, 0, 0, &g.c_backups, NULL },
+ "if backups are enabled", /* 20% */
+ C_BOOL, 20, 0, 0, &g.c_backups, NULL },
{ "bitcnt",
"number of bits for fixed-length column-store files",
@@ -203,8 +203,8 @@ static CONFIG c[] = {
C_BOOL, 0, 0, 0, &g.c_leak_memory, NULL },
{ "logging",
- "if logging configured", /* 30% */
- C_BOOL, 30, 0, 0, &g.c_logging, NULL },
+ "if logging configured", /* 50% */
+ C_BOOL, 50, 0, 0, &g.c_logging, NULL },
{ "logging_archive",
"if log file archival configured", /* 50% */