diff options
author | Keith Bostic <keith.bostic@mongodb.com> | 2016-09-06 13:43:13 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2016-09-06 13:43:13 -0400 |
commit | 5500b29ad0f2d2196179f9c6c0448abc1f100122 (patch) | |
tree | a52ea377fe3508f8539197dfafd0d5a9feb6d2e3 | |
parent | 338d789506ffcd388f82a5551a070b7cc4c482ae (diff) | |
download | mongo-5500b29ad0f2d2196179f9c6c0448abc1f100122.tar.gz |
WT-2892 hot backup can race with block truncate (#3023)
* WT-2892 hot backup can race with block truncate
Hold a readlock across the block truncate, otherwise a hot backup
could potentially race with the truncation.
* Don't truncate log files if there's a hot backup in progress.
* Moving a pre-allocated log file into place is a rename, and so it shouldn't
be affected by a hot backup.
The only real change is removing the test of WT_CONNECTION_IMPL.hot_backup,
but some minor restructuring for clarity.
* Revert "Moving a pre-allocated log file into place is a rename, and so it shouldn't"
This reverts commit 3b8b2a7a81b0542b2b87f29339c18c6e7a660ed2.
* The log-file rename for a pre-allocated file can race with a hot-backup,
we have to hold a read lock on the hot backup lock across the process.
Minor shuffling & whitespace for clarity, move some variables closer to
where they're used.
* Tweak default settings, increase backups from 5% to 20%, logging
from 30% to 50%.
* __wt_cursor_close() doesn't need a local "ret" variable.
* Minor tweak to catch more of the standard API_CALL macros.
-rw-r--r-- | dist/s_void | 3 | ||||
-rw-r--r-- | src/block/block_write.c | 20 | ||||
-rw-r--r-- | src/conn/conn_log.c | 20 | ||||
-rw-r--r-- | src/cursor/cur_std.c | 3 | ||||
-rw-r--r-- | src/log/log.c | 71 | ||||
-rw-r--r-- | test/format/config.h | 8 |
6 files changed, 77 insertions, 48 deletions
diff --git a/dist/s_void b/dist/s_void index 68c7ea06031..ab2f23a5fdd 100644 --- a/dist/s_void +++ b/dist/s_void @@ -44,7 +44,6 @@ func_ok() -e '/int __posix_file_close$/d' \ -e '/int __posix_terminate$/d' \ -e '/int __rec_destroy_session$/d' \ - -e '/int __session_transaction_pinned_range$/d' \ -e '/int __win_terminate$/d' \ -e '/int __wt_block_compact_end$/d' \ -e '/int __wt_block_compact_start$/d' \ @@ -115,7 +114,7 @@ for f in `find bench ext src test -name '*.[ci]'`; do # form of return assignment or call. file_parse $f | sed -e 's/return ([^)]*); }$//' \ - -e '/[A-Z]*_API_CALL(/d' \ + -e '/[A-Z]*_API_CALL[A-Z_]*(/d' \ -e '/WT_CURSOR_NEEDKEY(/d' \ -e '/WT_CURSOR_NEEDVALUE(/d' \ -e '/WT_ERR[A-Z_]*(/d' \ diff --git a/src/block/block_write.c b/src/block/block_write.c index d926a8b7c2d..36159314a4a 100644 --- a/src/block/block_write.c +++ b/src/block/block_write.c @@ -15,8 +15,11 @@ int __wt_block_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t len) { + WT_CONNECTION_IMPL *conn; WT_DECL_RET; + conn = S2C(session); + __wt_verbose(session, WT_VERB_BLOCK, "truncate file to %" PRIuMAX, (uintmax_t)len); @@ -34,13 +37,17 @@ __wt_block_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t len) * by system utilities. We cannot truncate the file during the backup * window, we might surprise an application. * - * Stop block truncation. This affects files that aren't involved in the - * backup (for example, doing incremental backups, which only copies log - * files, or targeted backups, stops all truncation). We may want a more - * targeted solution at some point. + * This affects files that aren't involved in the backup (for example, + * doing incremental backups, which only copies log files, or targeted + * backups, stops all block truncation unnecessarily). We may want a + * more targeted solution at some point. */ - if (S2C(session)->hot_backup) - return (0); + if (!conn->hot_backup) { + __wt_readlock(session, conn->hot_backup_lock); + if (!conn->hot_backup) + ret = __wt_ftruncate(session, block->fh, len); + __wt_readunlock(session, conn->hot_backup_lock); + } /* * The truncate may fail temporarily or permanently (for example, there @@ -48,7 +55,6 @@ __wt_block_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t len) * POSIX system, in which case the underlying function returns EBUSY). * It's OK, we don't have to be able to truncate files. */ - ret = __wt_ftruncate(session, block->fh, len); return (ret == EBUSY || ret == ENOTSUP ? 0 : ret); } diff --git a/src/conn/conn_log.c b/src/conn/conn_log.c index 6c05376f9ce..a8f0fe4810b 100644 --- a/src/conn/conn_log.c +++ b/src/conn/conn_log.c @@ -421,15 +421,27 @@ __log_file_server(void *arg) * later syncs. */ WT_ERR(__wt_fsync(session, close_fh, true)); + /* * We want to have the file size reflect actual * data with minimal pre-allocated zeroed space. - * The underlying file system may not support - * truncate, which is OK, it's just more work + * We can't truncate the file during hot backup, + * or the underlying file system may not support + * truncate: both are OK, it's just more work * during cursor traversal. */ - WT_ERR_ERROR_OK(__wt_ftruncate(session, - close_fh, close_end_lsn.l.offset), ENOTSUP); + if (!conn->hot_backup) { + __wt_readlock( + session, conn->hot_backup_lock); + if (!conn->hot_backup) + WT_ERR_ERROR_OK( + __wt_ftruncate(session, + close_fh, + close_end_lsn.l.offset), + ENOTSUP); + __wt_readunlock( + session, conn->hot_backup_lock); + } WT_SET_LSN(&close_end_lsn, close_end_lsn.l.file + 1, 0); __wt_spin_lock(session, &log->log_sync_lock); diff --git a/src/cursor/cur_std.c b/src/cursor/cur_std.c index 86594179907..bf7280f796b 100644 --- a/src/cursor/cur_std.c +++ b/src/cursor/cur_std.c @@ -539,7 +539,6 @@ err: cursor->saved_err = ret; int __wt_cursor_close(WT_CURSOR *cursor) { - WT_DECL_RET; WT_SESSION_IMPL *session; session = (WT_SESSION_IMPL *)cursor->session; @@ -557,7 +556,7 @@ __wt_cursor_close(WT_CURSOR *cursor) __wt_free(session, cursor->internal_uri); __wt_free(session, cursor->uri); __wt_overwrite_and_free(session, cursor); - return (ret); + return (0); } /* diff --git a/src/log/log.c b/src/log/log.c index 9514b2a2939..ec1bd10d0bc 100644 --- a/src/log/log.c +++ b/src/log/log.c @@ -799,14 +799,12 @@ __log_newfile(WT_SESSION_IMPL *session, bool conn_open, bool *created) WT_FH *log_fh; WT_LOG *log; WT_LSN end_lsn; - int yield_cnt; + u_int yield_cnt; bool create_log; conn = S2C(session); log = conn->log; - create_log = true; - yield_cnt = 0; /* * Set aside the log file handle to be closed later. Other threads * may still be using it to write to the log. If the log file size @@ -814,7 +812,7 @@ __log_newfile(WT_SESSION_IMPL *session, bool conn_open, bool *created) * Wait for that to close. */ WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SLOT)); - while (log->log_close_fh != NULL) { + for (yield_cnt = 0; log->log_close_fh != NULL;) { WT_STAT_FAST_CONN_INCR(session, log_close_yields); __wt_log_wrlsn(session, NULL); if (++yield_cnt > 10000) @@ -834,30 +832,39 @@ __log_newfile(WT_SESSION_IMPL *session, bool conn_open, bool *created) * Make sure everything we set above is visible. */ WT_FULL_BARRIER(); + /* - * If we're pre-allocating log files, look for one. If there aren't any - * or we're not pre-allocating, or a backup cursor is open, then - * create one. + * If pre-allocating log files look for one; otherwise, or if we don't + * find one create a log file. We can't use pre-allocated log files in + * while a hot backup is in progress: applications can copy the files + * in any way they choose, and a log file rename might confuse things. */ + create_log = true; if (conn->log_prealloc > 0 && !conn->hot_backup) { - ret = __log_alloc_prealloc(session, log->fileid); - /* - * If ret is 0 it means we found a pre-allocated file. - * If ret is non-zero but not WT_NOTFOUND, we return the error. - * If ret is WT_NOTFOUND, we leave create_log set and create - * the new log file. - */ - if (ret == 0) - create_log = false; - /* - * If we get any error other than WT_NOTFOUND, return it. - */ - WT_RET_NOTFOUND_OK(ret); + __wt_readlock(session, conn->hot_backup_lock); + if (conn->hot_backup) + __wt_readunlock(session, conn->hot_backup_lock); + else { + ret = __log_alloc_prealloc(session, log->fileid); + __wt_readunlock(session, conn->hot_backup_lock); - if (create_log) { - WT_STAT_FAST_CONN_INCR(session, log_prealloc_missed); - if (conn->log_cond != NULL) - __wt_cond_auto_signal(session, conn->log_cond); + /* + * If ret is 0 it means we found a pre-allocated file. + * If ret is WT_NOTFOUND, create the new log file and + * signal the server, we missed our pre-allocation. + * If ret is non-zero but not WT_NOTFOUND, return the + * error. + */ + WT_RET_NOTFOUND_OK(ret); + if (ret == 0) + create_log = false; + else { + WT_STAT_FAST_CONN_INCR( + session, log_prealloc_missed); + if (conn->log_cond != NULL) + __wt_cond_auto_signal( + session, conn->log_cond); + } } } /* @@ -968,11 +975,17 @@ __log_truncate_file(WT_SESSION_IMPL *session, WT_FH *log_fh, wt_off_t offset) conn = S2C(session); log = conn->log; - if (!F_ISSET(log, WT_LOG_TRUNCATE_NOTSUP)) { - if ((ret = __wt_ftruncate(session, log_fh, offset)) != ENOTSUP) - return (ret); - - F_SET(log, WT_LOG_TRUNCATE_NOTSUP); + if (!F_ISSET(log, WT_LOG_TRUNCATE_NOTSUP) && !conn->hot_backup) { + __wt_readlock(session, conn->hot_backup_lock); + if (conn->hot_backup) + __wt_readunlock(session, conn->hot_backup_lock); + else { + ret = __wt_ftruncate(session, log_fh, offset); + __wt_readunlock(session, conn->hot_backup_lock); + if (ret != ENOTSUP) + return (ret); + F_SET(log, WT_LOG_TRUNCATE_NOTSUP); + } } return (__log_zero(session, log_fh, offset, conn->log_file_max)); diff --git a/test/format/config.h b/test/format/config.h index 16fffb6fafe..6e3f5473479 100644 --- a/test/format/config.h +++ b/test/format/config.h @@ -70,8 +70,8 @@ static CONFIG c[] = { C_BOOL, 90, 0, 0, &g.c_auto_throttle, NULL }, { "backups", - "if backups are enabled", /* 5% */ - C_BOOL, 5, 0, 0, &g.c_backups, NULL }, + "if backups are enabled", /* 20% */ + C_BOOL, 20, 0, 0, &g.c_backups, NULL }, { "bitcnt", "number of bits for fixed-length column-store files", @@ -203,8 +203,8 @@ static CONFIG c[] = { C_BOOL, 0, 0, 0, &g.c_leak_memory, NULL }, { "logging", - "if logging configured", /* 30% */ - C_BOOL, 30, 0, 0, &g.c_logging, NULL }, + "if logging configured", /* 50% */ + C_BOOL, 50, 0, 0, &g.c_logging, NULL }, { "logging_archive", "if log file archival configured", /* 50% */ |