summaryrefslogtreecommitdiff
path: root/src/conn/conn_log.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/conn/conn_log.c')
-rw-r--r--src/conn/conn_log.c322
1 files changed, 210 insertions, 112 deletions
diff --git a/src/conn/conn_log.c b/src/conn/conn_log.c
index de4bf7268ed..2b115190b06 100644
--- a/src/conn/conn_log.c
+++ b/src/conn/conn_log.c
@@ -287,8 +287,9 @@ __log_file_server(void *arg)
WT_DECL_RET;
WT_FH *close_fh;
WT_LOG *log;
- WT_LSN close_end_lsn, close_lsn, min_lsn;
+ WT_LSN close_end_lsn, min_lsn;
WT_SESSION_IMPL *session;
+ uint32_t filenum;
int locked;
session = arg;
@@ -300,66 +301,97 @@ __log_file_server(void *arg)
* If there is a log file to close, make sure any outstanding
* write operations have completed, then fsync and close it.
*/
- if ((close_fh = log->log_close_fh) != NULL &&
- (ret = __wt_log_extract_lognum(session, close_fh->name,
- &close_lsn.file)) == 0 &&
- close_lsn.file < log->write_lsn.file) {
+ if ((close_fh = log->log_close_fh) != NULL) {
+ WT_ERR(__wt_log_extract_lognum(session, close_fh->name,
+ &filenum));
/*
- * We've copied the file handle, clear out the one in
- * log structure to allow it to be set again.
+ * We update the close file handle before updating the
+ * close LSN when changing files. It is possible we
+ * could see mismatched settings. If we do, yield
+ * until it is set. This should rarely happen.
*/
- log->log_close_fh = NULL;
- /*
- * Set the close_end_lsn to the LSN immediately after
- * ours. That is, the beginning of the next log file.
- * We need to know the LSN file number of our own close
- * in case earlier calls are still in progress and the
- * next one to move the sync_lsn into the next file for
- * later syncs.
- */
- close_lsn.offset = 0;
- close_end_lsn = close_lsn;
- close_end_lsn.file++;
- WT_ERR(__wt_fsync(session, close_fh));
- __wt_spin_lock(session, &log->log_sync_lock);
- locked = 1;
- WT_ERR(__wt_close(session, &close_fh));
- WT_ASSERT(session,
- WT_LOG_CMP(&close_end_lsn, &log->sync_lsn) >= 0);
- log->sync_lsn = close_end_lsn;
- WT_ERR(__wt_cond_signal(session, log->log_sync_cond));
- locked = 0;
- __wt_spin_unlock(session, &log->log_sync_lock);
+ while (log->log_close_lsn.file < filenum)
+ __wt_yield();
+
+ if (__wt_log_cmp(
+ &log->write_lsn, &log->log_close_lsn) >= 0) {
+ /*
+ * We've copied the file handle, clear out the
+ * one in the log structure to allow it to be
+ * set again. Copy the LSN before clearing
+ * the file handle.
+ * Use a barrier to make sure the compiler does
+ * not reorder the following two statements.
+ */
+ close_end_lsn = log->log_close_lsn;
+ WT_FULL_BARRIER();
+ log->log_close_fh = NULL;
+ /*
+ * Set the close_end_lsn to the LSN immediately
+ * after ours. That is, the beginning of the
+ * next log file. We need to know the LSN
+ * file number of our own close in case earlier
+ * calls are still in progress and the next one
+ * to move the sync_lsn into the next file for
+ * later syncs.
+ */
+ close_end_lsn.file++;
+ close_end_lsn.offset = 0;
+ WT_ERR(__wt_fsync(session, close_fh));
+ __wt_spin_lock(session, &log->log_sync_lock);
+ locked = 1;
+ WT_ERR(__wt_close(session, &close_fh));
+ WT_ASSERT(session, __wt_log_cmp(
+ &close_end_lsn, &log->sync_lsn) >= 0);
+ log->sync_lsn = close_end_lsn;
+ WT_ERR(__wt_cond_signal(
+ session, log->log_sync_cond));
+ locked = 0;
+ __wt_spin_unlock(session, &log->log_sync_lock);
+ }
}
/*
* If a later thread asked for a background sync, do it now.
*/
- if (WT_LOG_CMP(&log->bg_sync_lsn, &log->sync_lsn) > 0) {
+ if (__wt_log_cmp(&log->bg_sync_lsn, &log->sync_lsn) > 0) {
/*
* Save the latest write LSN which is the minimum
* we will have written to disk.
*/
min_lsn = log->write_lsn;
/*
- * The sync LSN we asked for better be smaller than
- * the current written LSN.
+ * We have to wait until the LSN we asked for is
+ * written. If it isn't signal the wrlsn thread
+ * to get it written.
*/
- WT_ASSERT(session,
- WT_LOG_CMP(&log->bg_sync_lsn, &min_lsn) <= 0);
- WT_ERR(__wt_fsync(session, log->log_fh));
- __wt_spin_lock(session, &log->log_sync_lock);
- locked = 1;
- /*
- * The sync LSN could have advanced while we were
- * writing to disk.
- */
- if (WT_LOG_CMP(&log->sync_lsn, &min_lsn) <= 0) {
- log->sync_lsn = min_lsn;
+ if (__wt_log_cmp(&log->bg_sync_lsn, &min_lsn) <= 0) {
+ WT_ERR(__wt_fsync(session, log->log_fh));
+ __wt_spin_lock(session, &log->log_sync_lock);
+ locked = 1;
+ /*
+ * The sync LSN could have advanced while we
+ * were writing to disk.
+ */
+ if (__wt_log_cmp(
+ &log->sync_lsn, &min_lsn) <= 0) {
+ log->sync_lsn = min_lsn;
+ WT_ERR(__wt_cond_signal(
+ session, log->log_sync_cond));
+ }
+ locked = 0;
+ __wt_spin_unlock(session, &log->log_sync_lock);
+ } else {
WT_ERR(__wt_cond_signal(
- session, log->log_sync_cond));
+ session, conn->log_wrlsn_cond));
+ /*
+ * We do not want to wait potentially a second
+ * to process this. Yield to give the wrlsn
+ * thread a chance to run and try again in
+ * this case.
+ */
+ __wt_yield();
+ continue;
}
- locked = 0;
- __wt_spin_unlock(session, &log->log_sync_lock);
}
/* Wait until the next event. */
WT_ERR(__wt_cond_wait(
@@ -394,26 +426,29 @@ typedef struct {
/*
* __wt_log_wrlsn --
* Process written log slots and attempt to coalesce them if the LSNs
- * are contiguous. Returns 1 if slots were freed, 0 if no slots were
- * freed in the progress arg. Must be called with the log slot lock held.
+ * are contiguous. The purpose of this function is to advance the
+ * write_lsn in LSN order after the buffer is written to the log file.
*/
int
-__wt_log_wrlsn(WT_SESSION_IMPL *session, uint32_t *free_i, int *yield)
+__wt_log_wrlsn(WT_SESSION_IMPL *session)
{
WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
WT_LOG *log;
WT_LOG_WRLSN_ENTRY written[WT_SLOT_POOL];
WT_LOGSLOT *coalescing, *slot;
+ WT_LSN save_lsn;
size_t written_i;
uint32_t i, save_i;
conn = S2C(session);
log = conn->log;
+ __wt_spin_lock(session, &log->log_writelsn_lock);
+restart:
coalescing = NULL;
+ WT_INIT_LSN(&save_lsn);
written_i = 0;
i = 0;
- if (free_i != NULL)
- *free_i = WT_SLOT_POOL;
/*
* Walk the array once saving any slots that are in the
@@ -422,9 +457,14 @@ __wt_log_wrlsn(WT_SESSION_IMPL *session, uint32_t *free_i, int *yield)
while (i < WT_SLOT_POOL) {
save_i = i;
slot = &log->slot_pool[i++];
- if (free_i != NULL && *free_i == WT_SLOT_POOL &&
- slot->slot_state == WT_LOG_SLOT_FREE)
- *free_i = save_i;
+ /*
+ * XXX - During debugging I saw slot 0 become orphaned.
+ * I believe it is fixed, but check for now.
+ * This assertion should catch that.
+ */
+ if (slot->slot_state == 0)
+ WT_ASSERT(session,
+ slot->slot_release_lsn.file >= log->write_lsn.file);
if (slot->slot_state != WT_LOG_SLOT_WRITTEN)
continue;
written[written_i].slot_index = save_i;
@@ -435,15 +475,8 @@ __wt_log_wrlsn(WT_SESSION_IMPL *session, uint32_t *free_i, int *yield)
* based on the release LSN, and then look for them in order.
*/
if (written_i > 0) {
- /*
- * If wanted, reset the yield variable to indicate that we
- * have found written slots.
- */
- if (yield != NULL)
- *yield = 0;
WT_INSERTION_SORT(written, written_i,
WT_LOG_WRLSN_ENTRY, WT_WRLSN_ENTRY_CMP_LT);
-
/*
* We know the written array is sorted by LSN. Go
* through them either advancing write_lsn or coalesce
@@ -451,8 +484,28 @@ __wt_log_wrlsn(WT_SESSION_IMPL *session, uint32_t *free_i, int *yield)
*/
for (i = 0; i < written_i; i++) {
slot = &log->slot_pool[written[i].slot_index];
+ /*
+ * The log server thread pushes out slots periodically.
+ * Sometimes they are empty slots. If we find an
+ * empty slot, where empty means the start and end LSN
+ * are the same, free it and continue.
+ */
+ if (__wt_log_cmp(&slot->slot_start_lsn,
+ &slot->slot_release_lsn) == 0 &&
+ __wt_log_cmp(&slot->slot_start_lsn,
+ &slot->slot_end_lsn) == 0) {
+ __wt_log_slot_free(session, slot);
+ continue;
+ }
if (coalescing != NULL) {
- if (WT_LOG_CMP(&coalescing->slot_end_lsn,
+ /*
+ * If the write_lsn changed, we may be able to
+ * process slots. Try again.
+ */
+ if (__wt_log_cmp(
+ &log->write_lsn, &save_lsn) != 0)
+ goto restart;
+ if (__wt_log_cmp(&coalescing->slot_end_lsn,
&written[i].lsn) != 0) {
coalescing = slot;
continue;
@@ -461,6 +514,8 @@ __wt_log_wrlsn(WT_SESSION_IMPL *session, uint32_t *free_i, int *yield)
* If we get here we have a slot to coalesce
* and free.
*/
+ coalescing->slot_last_offset =
+ slot->slot_last_offset;
coalescing->slot_end_lsn = slot->slot_end_lsn;
WT_STAT_FAST_CONN_INCR(
session, log_slot_coalesced);
@@ -473,8 +528,12 @@ __wt_log_wrlsn(WT_SESSION_IMPL *session, uint32_t *free_i, int *yield)
/*
* If this written slot is not the next LSN,
* try to start coalescing with later slots.
+ * A synchronous write may update write_lsn
+ * so save the last one we saw to check when
+ * coalescing slots.
*/
- if (WT_LOG_CMP(
+ save_lsn = log->write_lsn;
+ if (__wt_log_cmp(
&log->write_lsn, &written[i].lsn) != 0) {
coalescing = slot;
continue;
@@ -483,27 +542,29 @@ __wt_log_wrlsn(WT_SESSION_IMPL *session, uint32_t *free_i, int *yield)
* If we get here we have a slot to process.
* Advance the LSN and process the slot.
*/
- WT_ASSERT(session, WT_LOG_CMP(&written[i].lsn,
+ WT_ASSERT(session, __wt_log_cmp(&written[i].lsn,
&slot->slot_release_lsn) == 0);
+ if (slot->slot_start_lsn.offset !=
+ slot->slot_last_offset)
+ slot->slot_start_lsn.offset =
+ slot->slot_last_offset;
log->write_start_lsn = slot->slot_start_lsn;
log->write_lsn = slot->slot_end_lsn;
- WT_RET(__wt_cond_signal(
+ WT_ERR(__wt_cond_signal(
session, log->log_write_cond));
WT_STAT_FAST_CONN_INCR(session, log_write_lsn);
/*
* Signal the close thread if needed.
*/
if (F_ISSET(slot, WT_SLOT_CLOSEFH))
- WT_RET(__wt_cond_signal(
+ WT_ERR(__wt_cond_signal(
session, conn->log_file_cond));
}
- WT_RET(__wt_log_slot_free(session, slot));
- if (free_i != NULL && *free_i == WT_SLOT_POOL &&
- slot->slot_state == WT_LOG_SLOT_FREE)
- *free_i = save_i;
+ __wt_log_slot_free(session, slot);
}
}
- return (0);
+err: __wt_spin_unlock(session, &log->log_writelsn_lock);
+ return (ret);
}
/*
@@ -515,31 +576,26 @@ __log_wrlsn_server(void *arg)
{
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
- WT_LOG *log;
WT_SESSION_IMPL *session;
- int locked, yield;
session = arg;
conn = S2C(session);
- log = conn->log;
- locked = yield = 0;
while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) {
- __wt_spin_lock(session, &log->log_slot_lock);
- locked = 1;
- WT_ERR(__wt_log_wrlsn(session, NULL, &yield));
- locked = 0;
- __wt_spin_unlock(session, &log->log_slot_lock);
- if (++yield < 1000)
- __wt_yield();
- else
- WT_ERR(__wt_cond_wait(session,
- conn->log_wrlsn_cond, 100000));
+ /*
+ * Write out any log record buffers.
+ */
+ WT_ERR(__wt_log_wrlsn(session));
+ WT_ERR(__wt_cond_wait(session, conn->log_wrlsn_cond, 10000));
}
+ /*
+ * On close we need to do this one more time because there could
+ * be straggling log writes that need to be written.
+ */
+ WT_ERR(__wt_log_force_write(session, 1));
+ WT_ERR(__wt_log_wrlsn(session));
if (0) {
err: __wt_err(session, ret, "log wrlsn server error");
}
- if (locked)
- __wt_spin_unlock(session, &log->log_slot_lock);
return (WT_THREAD_RET_VALUE);
}
@@ -554,44 +610,81 @@ __log_server(void *arg)
WT_DECL_RET;
WT_LOG *log;
WT_SESSION_IMPL *session;
- u_int locked;
+ int freq_per_sec, signalled;
session = arg;
conn = S2C(session);
log = conn->log;
- locked = 0;
+ signalled = 0;
+
+ /*
+ * Set this to the number of times per second we want to force out the
+ * log slot buffer.
+ */
+#define WT_FORCE_PER_SECOND 20
+ freq_per_sec = WT_FORCE_PER_SECOND;
+
+ /*
+ * The log server thread does a variety of work. It forces out any
+ * buffered log writes. It pre-allocates log files and it performs
+ * log archiving. The reason the wrlsn thread does not force out
+ * the buffered writes is because we want to process and move the
+ * write_lsn forward as quickly as possible. The same reason applies
+ * to why the log file server thread does not force out the writes.
+ * That thread does fsync calls which can take a long time and we
+ * don't want log records sitting in the buffer over the time it
+ * takes to sync out an earlier file.
+ */
while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) {
/*
- * Perform log pre-allocation.
+ * Slots depend on future activity. Force out buffered
+ * writes in case we are idle. This cannot be part of the
+ * wrlsn thread because of interaction advancing the write_lsn
+ * and a buffer may need to wait for the write_lsn to advance
+ * in the case of a synchronous buffer. We end up with a hang.
*/
- if (conn->log_prealloc > 0)
- WT_ERR(__log_prealloc_once(session));
+ WT_ERR_BUSY_OK(__wt_log_force_write(session, 0));
/*
- * Perform the archive.
+ * We don't want to archive or pre-allocate files as often as
+ * we want to force out log buffers. Only do it once per second
+ * or if the condition was signalled.
*/
- if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ARCHIVE)) {
- if (__wt_try_writelock(
- session, log->log_archive_lock) == 0) {
- locked = 1;
- WT_ERR(__log_archive_once(session, 0));
- WT_ERR( __wt_writeunlock(
- session, log->log_archive_lock));
- locked = 0;
- } else
- WT_ERR(__wt_verbose(session, WT_VERB_LOG,
- "log_archive: Blocked due to open log "
- "cursor holding archive lock"));
+ if (--freq_per_sec <= 0 || signalled != 0) {
+ freq_per_sec = WT_FORCE_PER_SECOND;
+
+ /*
+ * Perform log pre-allocation.
+ */
+ if (conn->log_prealloc > 0)
+ WT_ERR(__log_prealloc_once(session));
+
+ /*
+ * Perform the archive.
+ */
+ if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ARCHIVE)) {
+ if (__wt_try_writelock(
+ session, log->log_archive_lock) == 0) {
+ ret = __log_archive_once(session, 0);
+ WT_TRET(__wt_writeunlock(
+ session, log->log_archive_lock));
+ WT_ERR(ret);
+ } else
+ WT_ERR(
+ __wt_verbose(session, WT_VERB_LOG,
+ "log_archive: Blocked due to open "
+ "log cursor holding archive lock"));
+ }
}
+
/* Wait until the next event. */
- WT_ERR(__wt_cond_wait(session, conn->log_cond, WT_MILLION));
+ WT_ERR(__wt_cond_wait_signal(session, conn->log_cond,
+ WT_MILLION / WT_FORCE_PER_SECOND, &signalled));
}
if (0) {
err: __wt_err(session, ret, "log server error");
}
- if (locked)
- (void)__wt_writeunlock(session, log->log_archive_lock);
return (WT_THREAD_RET_VALUE);
}
@@ -624,6 +717,8 @@ __wt_logmgr_create(WT_SESSION_IMPL *session, const char *cfg[])
WT_RET(__wt_spin_init(session, &log->log_lock, "log"));
WT_RET(__wt_spin_init(session, &log->log_slot_lock, "log slot"));
WT_RET(__wt_spin_init(session, &log->log_sync_lock, "log sync"));
+ WT_RET(__wt_spin_init(session, &log->log_writelsn_lock,
+ "log write LSN"));
WT_RET(__wt_rwlock_alloc(session,
&log->log_archive_lock, "log archive lock"));
if (FLD_ISSET(conn->direct_io, WT_FILE_TYPE_LOG))
@@ -755,13 +850,11 @@ __wt_logmgr_destroy(WT_SESSION_IMPL *session)
WT_TRET(__wt_thread_join(session, conn->log_tid));
conn->log_tid_set = 0;
}
- WT_TRET(__wt_cond_destroy(session, &conn->log_cond));
if (conn->log_file_tid_set) {
WT_TRET(__wt_cond_signal(session, conn->log_file_cond));
WT_TRET(__wt_thread_join(session, conn->log_file_tid));
conn->log_file_tid_set = 0;
}
- WT_TRET(__wt_cond_destroy(session, &conn->log_file_cond));
if (conn->log_file_session != NULL) {
wt_session = &conn->log_file_session->iface;
WT_TRET(wt_session->close(wt_session, NULL));
@@ -772,13 +865,13 @@ __wt_logmgr_destroy(WT_SESSION_IMPL *session)
WT_TRET(__wt_thread_join(session, conn->log_wrlsn_tid));
conn->log_wrlsn_tid_set = 0;
}
- WT_TRET(__wt_cond_destroy(session, &conn->log_wrlsn_cond));
if (conn->log_wrlsn_session != NULL) {
wt_session = &conn->log_wrlsn_session->iface;
WT_TRET(wt_session->close(wt_session, NULL));
conn->log_wrlsn_session = NULL;
}
+ WT_TRET(__wt_log_slot_destroy(session));
WT_TRET(__wt_log_close(session));
/* Close the server thread's session. */
@@ -788,13 +881,18 @@ __wt_logmgr_destroy(WT_SESSION_IMPL *session)
conn->log_session = NULL;
}
- WT_TRET(__wt_log_slot_destroy(session));
+ /* Destroy the condition variables now that all threads are stopped */
+ WT_TRET(__wt_cond_destroy(session, &conn->log_cond));
+ WT_TRET(__wt_cond_destroy(session, &conn->log_file_cond));
+ WT_TRET(__wt_cond_destroy(session, &conn->log_wrlsn_cond));
+
WT_TRET(__wt_cond_destroy(session, &conn->log->log_sync_cond));
WT_TRET(__wt_cond_destroy(session, &conn->log->log_write_cond));
WT_TRET(__wt_rwlock_destroy(session, &conn->log->log_archive_lock));
__wt_spin_destroy(session, &conn->log->log_lock);
__wt_spin_destroy(session, &conn->log->log_slot_lock);
__wt_spin_destroy(session, &conn->log->log_sync_lock);
+ __wt_spin_destroy(session, &conn->log->log_writelsn_lock);
__wt_free(session, conn->log_path);
__wt_free(session, conn->log);
return (ret);