diff options
Diffstat (limited to 'src/third_party/wiredtiger/src/log/log.c')
-rw-r--r-- | src/third_party/wiredtiger/src/log/log.c | 170 |
1 files changed, 95 insertions, 75 deletions
diff --git a/src/third_party/wiredtiger/src/log/log.c b/src/third_party/wiredtiger/src/log/log.c index 8ec910115ac..14cb2b8c064 100644 --- a/src/third_party/wiredtiger/src/log/log.c +++ b/src/third_party/wiredtiger/src/log/log.c @@ -25,7 +25,7 @@ static int __log_write_internal( * Record the given LSN as the checkpoint LSN and signal the archive * thread as needed. */ -int +void __wt_log_ckpt(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn) { WT_CONNECTION_IMPL *conn; @@ -35,8 +35,7 @@ __wt_log_ckpt(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn) log = conn->log; log->ckpt_lsn = *ckp_lsn; if (conn->log_cond != NULL) - WT_RET(__wt_cond_auto_signal(session, conn->log_cond)); - return (0); + __wt_cond_auto_signal(session, conn->log_cond); } /* @@ -53,7 +52,7 @@ __wt_log_flush_lsn(WT_SESSION_IMPL *session, WT_LSN *lsn, bool start) conn = S2C(session); log = conn->log; WT_RET(__wt_log_force_write(session, 1, NULL)); - WT_RET(__wt_log_wrlsn(session, NULL)); + __wt_log_wrlsn(session, NULL); if (start) *lsn = log->write_start_lsn; else @@ -66,7 +65,7 @@ __wt_log_flush_lsn(WT_SESSION_IMPL *session, WT_LSN *lsn, bool start) * Record the given LSN as the background LSN and signal the * thread as needed. */ -int +void __wt_log_background(WT_SESSION_IMPL *session, WT_LSN *lsn) { WT_CONNECTION_IMPL *conn; @@ -78,7 +77,7 @@ __wt_log_background(WT_SESSION_IMPL *session, WT_LSN *lsn) * If a thread already set the LSN to a bigger LSN, we're done. */ if (__wt_log_cmp(&session->bg_sync_lsn, lsn) > 0) - return (0); + return; session->bg_sync_lsn = *lsn; /* @@ -89,7 +88,7 @@ __wt_log_background(WT_SESSION_IMPL *session, WT_LSN *lsn) if (__wt_log_cmp(lsn, &log->bg_sync_lsn) > 0) log->bg_sync_lsn = *lsn; __wt_spin_unlock(session, &log->log_sync_lock); - return (__wt_cond_signal(session, conn->log_file_cond)); + __wt_cond_signal(session, conn->log_file_cond); } /* @@ -115,9 +114,8 @@ __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn) * log file ready to close. */ while (log->sync_lsn.l.file < min_lsn->l.file) { - WT_ERR(__wt_cond_signal(session, - S2C(session)->log_file_cond)); - WT_ERR(__wt_cond_wait(session, log->log_sync_cond, 10000)); + __wt_cond_signal(session, S2C(session)->log_file_cond); + __wt_cond_wait(session, log->log_sync_cond, 10000); } __wt_spin_lock(session, &log->log_sync_lock); WT_ASSERT(session, log->log_dir_fh != NULL); @@ -126,10 +124,10 @@ __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn) * into the directory. */ if (log->sync_dir_lsn.l.file < min_lsn->l.file) { - WT_ERR(__wt_verbose(session, WT_VERB_LOG, + __wt_verbose(session, WT_VERB_LOG, "log_force_sync: sync directory %s to LSN %" PRIu32 "/%" PRIu32, - log->log_dir_fh->name, min_lsn->l.file, min_lsn->l.offset)); + log->log_dir_fh->name, min_lsn->l.file, min_lsn->l.offset); WT_ERR(__wt_epoch(session, &fsync_start)); WT_ERR(__wt_fsync(session, log->log_dir_fh, true)); WT_ERR(__wt_epoch(session, &fsync_stop)); @@ -151,9 +149,9 @@ __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn) */ WT_ERR(__log_openfile(session, &log_fh, WT_LOG_FILENAME, min_lsn->l.file, 0)); - WT_ERR(__wt_verbose(session, WT_VERB_LOG, + __wt_verbose(session, WT_VERB_LOG, "log_force_sync: sync %s to LSN %" PRIu32 "/%" PRIu32, - log_fh->name, min_lsn->l.file, min_lsn->l.offset)); + log_fh->name, min_lsn->l.file, min_lsn->l.offset); WT_ERR(__wt_epoch(session, &fsync_start)); WT_ERR(__wt_fsync(session, log_fh, true)); WT_ERR(__wt_epoch(session, &fsync_stop)); @@ -163,7 +161,7 @@ __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn) WT_STAT_FAST_CONN_INCRV(session, log_sync_duration, fsync_duration_usecs); WT_ERR(__wt_close(session, &log_fh)); - WT_ERR(__wt_cond_signal(session, log->log_sync_cond)); + __wt_cond_signal(session, log->log_sync_cond); } err: __wt_spin_unlock(session, &log->log_sync_lock); @@ -460,7 +458,7 @@ __log_prealloc(WT_SESSION_IMPL *session, WT_FH *fh) /* * If the user configured zero filling, pre-allocate the log file - * manually. Otherwise use either fallocate or ftruncate to create + * manually. Otherwise use the file extension method to create * and zero the log file based on what is available. */ if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ZERO_FILL)) @@ -471,13 +469,8 @@ __log_prealloc(WT_SESSION_IMPL *session, WT_FH *fh) * We have exclusive access to the log file and there are no other * writes happening concurrently, so there are no locking issues. */ - if ((ret = __wt_fallocate( - session, fh, WT_LOG_FIRST_RECORD, - conn->log_file_max - WT_LOG_FIRST_RECORD)) == 0) - return (0); - WT_RET_ERROR_OK(ret, ENOTSUP); - - return (__wt_ftruncate(session, fh, conn->log_file_max)); + ret = __wt_fextend(session, fh, conn->log_file_max); + return (ret == EBUSY || ret == ENOTSUP ? 0 : ret); } /* @@ -706,8 +699,8 @@ __log_openfile(WT_SESSION_IMPL *session, allocsize = log->allocsize; WT_RET(__wt_scr_alloc(session, 0, &buf)); WT_ERR(__log_filename(session, id, file_prefix, buf)); - WT_ERR(__wt_verbose(session, WT_VERB_LOG, - "opening log %s", (const char *)buf->data)); + __wt_verbose(session, WT_VERB_LOG, + "opening log %s", (const char *)buf->data); wtopen_flags = 0; if (LF_ISSET(WT_LOG_OPEN_CREATE_OK)) FLD_SET(wtopen_flags, WT_FS_OPEN_CREATE); @@ -778,9 +771,9 @@ __log_alloc_prealloc(WT_SESSION_IMPL *session, uint32_t to_num) WT_ERR(__wt_scr_alloc(session, 0, &to_path)); WT_ERR(__log_filename(session, from_num, WT_LOG_PREPNAME, from_path)); WT_ERR(__log_filename(session, to_num, WT_LOG_FILENAME, to_path)); - WT_ERR(__wt_verbose(session, WT_VERB_LOG, + __wt_verbose(session, WT_VERB_LOG, "log_alloc_prealloc: rename log %s to %s", - (char *)from_path->data, (char *)to_path->data)); + (const char *)from_path->data, (const char *)to_path->data); WT_STAT_FAST_CONN_INCR(session, log_prealloc_used); /* * All file setup, writing the header and pre-allocation was done @@ -823,7 +816,7 @@ __log_newfile(WT_SESSION_IMPL *session, bool conn_open, bool *created) WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SLOT)); while (log->log_close_fh != NULL) { WT_STAT_FAST_CONN_INCR(session, log_close_yields); - WT_RET(__wt_log_wrlsn(session, NULL)); + __wt_log_wrlsn(session, NULL); if (++yield_cnt > 10000) return (EBUSY); __wt_yield(); @@ -864,8 +857,7 @@ __log_newfile(WT_SESSION_IMPL *session, bool conn_open, bool *created) if (create_log) { WT_STAT_FAST_CONN_INCR(session, log_prealloc_missed); if (conn->log_cond != NULL) - WT_RET(__wt_cond_auto_signal( - session, conn->log_cond)); + __wt_cond_auto_signal(session, conn->log_cond); } } /* @@ -960,6 +952,33 @@ __wt_log_acquire(WT_SESSION_IMPL *session, uint64_t recsize, WT_LOGSLOT *slot) } /* + * __log_truncate_file -- + * Truncate a log file to the specified offset. + * + * If the underlying file system doesn't support truncate then we need to + * zero out the rest of the file, doing an effective truncate. + */ +static int +__log_truncate_file(WT_SESSION_IMPL *session, WT_FH *log_fh, wt_off_t offset) +{ + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + WT_LOG *log; + + conn = S2C(session); + log = conn->log; + + if (!F_ISSET(log, WT_LOG_TRUNCATE_NOTSUP)) { + if ((ret = __wt_ftruncate(session, log_fh, offset)) != ENOTSUP) + return (ret); + + F_SET(log, WT_LOG_TRUNCATE_NOTSUP); + } + + return (__log_zero(session, log_fh, offset, conn->log_file_max)); +} + +/* * __log_truncate -- * Truncate the log to the given LSN. If this_log is set, it will only * truncate the log file indicated in the given LSN. If not set, @@ -989,10 +1008,15 @@ __log_truncate(WT_SESSION_IMPL *session, /* * Truncate the log file to the given LSN. - */ - WT_ERR(__log_openfile(session, - &log_fh, file_prefix, lsn->l.file, 0)); - WT_ERR(__wt_ftruncate(session, log_fh, lsn->l.offset)); + * + * It's possible the underlying file system doesn't support truncate + * (there are existing examples), which is fine, but we don't want to + * repeatedly do the setup work just to find that out every time. Check + * before doing work, and if there's a not-supported error, turn off + * future truncates. + */ + WT_ERR(__log_openfile(session, &log_fh, file_prefix, lsn->l.file, 0)); + WT_ERR(__log_truncate_file(session, log_fh, lsn->l.offset)); WT_ERR(__wt_fsync(session, log_fh, true)); WT_ERR(__wt_close(session, &log_fh)); @@ -1013,8 +1037,8 @@ __log_truncate(WT_SESSION_IMPL *session, * If there are intervening files pre-allocated, * truncate them to the end of the log file header. */ - WT_ERR(__wt_ftruncate(session, - log_fh, WT_LOG_FIRST_RECORD)); + WT_ERR(__log_truncate_file( + session, log_fh, WT_LOG_FIRST_RECORD)); WT_ERR(__wt_fsync(session, log_fh, true)); WT_ERR(__wt_close(session, &log_fh)); } @@ -1066,9 +1090,9 @@ __wt_log_allocfile( WT_ERR(__log_prealloc(session, log_fh)); WT_ERR(__wt_fsync(session, log_fh, true)); WT_ERR(__wt_close(session, &log_fh)); - WT_ERR(__wt_verbose(session, WT_VERB_LOG, + __wt_verbose(session, WT_VERB_LOG, "log_prealloc: rename %s to %s", - (char *)from_path->data, (char *)to_path->data)); + (const char *)from_path->data, (const char *)to_path->data); /* * Rename it into place and make it available. */ @@ -1093,8 +1117,8 @@ __wt_log_remove(WT_SESSION_IMPL *session, WT_RET(__wt_scr_alloc(session, 0, &path)); WT_ERR(__log_filename(session, lognum, file_prefix, path)); - WT_ERR(__wt_verbose(session, WT_VERB_LOG, - "log_remove: remove log %s", (char *)path->data)); + __wt_verbose(session, WT_VERB_LOG, + "log_remove: remove log %s", (const char *)path->data); WT_ERR(__wt_fs_remove(session, path->data, false)); err: __wt_scr_free(session, &path); return (ret); @@ -1128,8 +1152,8 @@ __wt_log_open(WT_SESSION_IMPL *session) * Open up a file handle to the log directory if we haven't. */ if (log->log_dir_fh == NULL) { - WT_RET(__wt_verbose(session, WT_VERB_LOG, - "log_open: open fh to directory %s", conn->log_path)); + __wt_verbose(session, WT_VERB_LOG, + "log_open: open fh to directory %s", conn->log_path); WT_RET(__wt_open(session, conn->log_path, WT_FS_OPEN_FILE_TYPE_DIRECTORY, 0, &log->log_dir_fh)); } @@ -1172,9 +1196,9 @@ __wt_log_open(WT_SESSION_IMPL *session) firstlog = WT_MIN(firstlog, lognum); } log->fileid = lastlog; - WT_ERR(__wt_verbose(session, WT_VERB_LOG, + __wt_verbose(session, WT_VERB_LOG, "log_open: first log %" PRIu32 " last log %" PRIu32, - firstlog, lastlog)); + firstlog, lastlog); if (firstlog == UINT32_MAX) { WT_ASSERT(session, logcount == 0); WT_INIT_LSN(&log->first_lsn); @@ -1217,23 +1241,23 @@ __wt_log_close(WT_SESSION_IMPL *session) log = conn->log; if (log->log_close_fh != NULL && log->log_close_fh != log->log_fh) { - WT_RET(__wt_verbose(session, WT_VERB_LOG, - "closing old log %s", log->log_close_fh->name)); + __wt_verbose(session, WT_VERB_LOG, + "closing old log %s", log->log_close_fh->name); if (!F_ISSET(conn, WT_CONN_READONLY)) WT_RET(__wt_fsync(session, log->log_close_fh, true)); WT_RET(__wt_close(session, &log->log_close_fh)); } if (log->log_fh != NULL) { - WT_RET(__wt_verbose(session, WT_VERB_LOG, - "closing log %s", log->log_fh->name)); + __wt_verbose(session, WT_VERB_LOG, + "closing log %s", log->log_fh->name); if (!F_ISSET(conn, WT_CONN_READONLY)) WT_RET(__wt_fsync(session, log->log_fh, true)); WT_RET(__wt_close(session, &log->log_fh)); log->log_fh = NULL; } if (log->log_dir_fh != NULL) { - WT_RET(__wt_verbose(session, WT_VERB_LOG, - "closing log directory %s", log->log_dir_fh->name)); + __wt_verbose(session, WT_VERB_LOG, + "closing log directory %s", log->log_dir_fh->name); if (!F_ISSET(conn, WT_CONN_READONLY)) WT_RET(__wt_fsync(session, log->log_dir_fh, true)); WT_RET(__wt_close(session, &log->log_dir_fh)); @@ -1384,28 +1408,27 @@ __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep) */ if (F_ISSET(session, WT_SESSION_LOCKED_SLOT)) __wt_spin_unlock(session, &log->log_slot_lock); - WT_ERR(__wt_cond_auto_signal(session, conn->log_wrlsn_cond)); + __wt_cond_auto_signal(session, conn->log_wrlsn_cond); if (++yield_count < WT_THOUSAND) __wt_yield(); else - ret = __wt_cond_wait(session, log->log_write_cond, 200); + __wt_cond_wait(session, log->log_write_cond, 200); if (F_ISSET(session, WT_SESSION_LOCKED_SLOT)) __wt_spin_lock(session, &log->log_slot_lock); - WT_ERR(ret); } log->write_start_lsn = slot->slot_start_lsn; log->write_lsn = slot->slot_end_lsn; WT_ASSERT(session, slot != log->active_slot); - WT_ERR(__wt_cond_signal(session, log->log_write_cond)); + __wt_cond_signal(session, log->log_write_cond); F_CLR(slot, WT_SLOT_FLUSH); /* * Signal the close thread if needed. */ if (F_ISSET(slot, WT_SLOT_CLOSEFH)) - WT_ERR(__wt_cond_signal(session, conn->log_file_cond)); + __wt_cond_signal(session, conn->log_file_cond); /* * Try to consolidate calls to fsync to wait less. Acquire a spin lock @@ -1420,8 +1443,7 @@ __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep) */ if (log->sync_lsn.l.file < slot->slot_end_lsn.l.file || __wt_spin_trylock(session, &log->log_sync_lock) != 0) { - WT_ERR(__wt_cond_wait( - session, log->log_sync_cond, 10000)); + __wt_cond_wait(session, log->log_sync_cond, 10000); continue; } locked = true; @@ -1440,11 +1462,11 @@ __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep) if (F_ISSET(slot, WT_SLOT_SYNC_DIR) && (log->sync_dir_lsn.l.file < sync_lsn.l.file)) { WT_ASSERT(session, log->log_dir_fh != NULL); - WT_ERR(__wt_verbose(session, WT_VERB_LOG, + __wt_verbose(session, WT_VERB_LOG, "log_release: sync directory %s to LSN %" PRIu32 "/%" PRIu32, log->log_dir_fh->name, - sync_lsn.l.file, sync_lsn.l.offset)); + sync_lsn.l.file, sync_lsn.l.offset); WT_ERR(__wt_epoch(session, &fsync_start)); WT_ERR(__wt_fsync(session, log->log_dir_fh, true)); WT_ERR(__wt_epoch(session, &fsync_stop)); @@ -1461,11 +1483,11 @@ __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep) */ if (F_ISSET(slot, WT_SLOT_SYNC) && __wt_log_cmp(&log->sync_lsn, &slot->slot_end_lsn) < 0) { - WT_ERR(__wt_verbose(session, WT_VERB_LOG, + __wt_verbose(session, WT_VERB_LOG, "log_release: sync log %s to LSN %" PRIu32 "/%" PRIu32, log->log_fh->name, - sync_lsn.l.file, sync_lsn.l.offset)); + sync_lsn.l.file, sync_lsn.l.offset); WT_STAT_FAST_CONN_INCR(session, log_sync); WT_ERR(__wt_epoch(session, &fsync_start)); WT_ERR(__wt_fsync(session, log->log_fh, true)); @@ -1475,7 +1497,7 @@ __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep) WT_STAT_FAST_CONN_INCRV(session, log_sync_duration, fsync_duration_usecs); log->sync_lsn = sync_lsn; - WT_ERR(__wt_cond_signal(session, log->log_sync_cond)); + __wt_cond_signal(session, log->log_sync_cond); } /* * Clear the flags before leaving the loop. @@ -1535,9 +1557,9 @@ __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags, return (0); if (LF_ISSET(WT_LOGSCAN_RECOVER)) - WT_RET(__wt_verbose(session, WT_VERB_LOG, + __wt_verbose(session, WT_VERB_LOG, "__wt_log_scan truncating to %" PRIu32 "/%" PRIu32, - log->trunc_lsn.l.file, log->trunc_lsn.l.offset)); + log->trunc_lsn.l.file, log->trunc_lsn.l.offset); if (log != NULL) { allocsize = log->allocsize; @@ -2085,7 +2107,7 @@ __log_write_internal(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, * XXX I've seen times when conditions are NULL. */ if (conn->log_cond != NULL) { - WT_ERR(__wt_cond_auto_signal(session, conn->log_cond)); + __wt_cond_auto_signal(session, conn->log_cond); __wt_yield(); } else WT_ERR(__wt_log_force_write(session, 1, NULL)); @@ -2094,21 +2116,19 @@ __log_write_internal(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, /* Wait for our writes to reach the OS */ while (__wt_log_cmp(&log->write_lsn, &lsn) <= 0 && myslot.slot->slot_error == 0) - (void)__wt_cond_wait( - session, log->log_write_cond, 10000); + __wt_cond_wait(session, log->log_write_cond, 10000); } else if (LF_ISSET(WT_LOG_FSYNC)) { /* Wait for our writes to reach disk */ while (__wt_log_cmp(&log->sync_lsn, &lsn) <= 0 && myslot.slot->slot_error == 0) - (void)__wt_cond_wait( - session, log->log_sync_cond, 10000); + __wt_cond_wait(session, log->log_sync_cond, 10000); } /* * Advance the background sync LSN if needed. */ if (LF_ISSET(WT_LOG_BACKGROUND)) - WT_ERR(__wt_log_background(session, &lsn)); + __wt_log_background(session, &lsn); err: if (ret == 0 && lsnp != NULL) @@ -2176,8 +2196,8 @@ __wt_log_vprintf(WT_SESSION_IMPL *session, const char *fmt, va_list ap) (void)vsnprintf((char *)logrec->data + logrec->size, len, fmt, ap); - WT_ERR(__wt_verbose(session, WT_VERB_LOG, - "log_printf: %s", (char *)logrec->data + logrec->size)); + __wt_verbose(session, WT_VERB_LOG, + "log_printf: %s", (char *)logrec->data + logrec->size); logrec->size += len; WT_ERR(__wt_log_write(session, logrec, NULL, 0)); @@ -2223,16 +2243,16 @@ __wt_log_flush(WT_SESSION_IMPL *session, uint32_t flags) while (__wt_log_cmp(&last_lsn, &lsn) > 0) WT_RET(__wt_log_flush_lsn(session, &lsn, false)); - WT_RET(__wt_verbose(session, WT_VERB_LOG, + __wt_verbose(session, WT_VERB_LOG, "log_flush: flags %#" PRIx32 " LSN %" PRIu32 "/%" PRIu32, - flags, lsn.l.file, lsn.l.offset)); + flags, lsn.l.file, lsn.l.offset); /* * If the user wants write-no-sync, there is nothing more to do. * If the user wants background sync, set the LSN and we're done. * If the user wants sync, force it now. */ if (LF_ISSET(WT_LOG_BACKGROUND)) - WT_RET(__wt_log_background(session, &lsn)); + __wt_log_background(session, &lsn); else if (LF_ISSET(WT_LOG_FSYNC)) WT_RET(__wt_log_force_sync(session, &lsn)); return (0); |