summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--dist/s_string.ok1
-rw-r--r--src/log/log.c78
-rw-r--r--test/recovery/random-abort.c82
-rwxr-xr-xtest/recovery/smoke.sh1
4 files changed, 125 insertions, 37 deletions
diff --git a/dist/s_string.ok b/dist/s_string.ok
index 65c74b61995..d19c05e802b 100644
--- a/dist/s_string.ok
+++ b/dist/s_string.ok
@@ -863,6 +863,7 @@ lu
lwsync
lz
lzo
+mT
madvise
majorp
malloc
diff --git a/src/log/log.c b/src/log/log.c
index 00e4ea5f441..96b593ec706 100644
--- a/src/log/log.c
+++ b/src/log/log.c
@@ -21,6 +21,60 @@ static int __log_write_internal(
#define WT_LOG_OPEN_VERIFY 0x02
/*
+ * __log_wait_for_earlier_slot --
+ * Wait for write_lsn to catch up to this slot.
+ */
+static void
+__log_wait_for_earlier_slot(WT_SESSION_IMPL *session, WT_LOGSLOT *slot)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_LOG *log;
+ int yield_count;
+
+ conn = S2C(session);
+ log = conn->log;
+ yield_count = 0;
+
+ while (__wt_log_cmp(&log->write_lsn, &slot->slot_release_lsn) != 0) {
+ /*
+ * If we're on a locked path and the write LSN is not advancing,
+ * unlock in case an earlier thread is trying to switch its
+ * slot and complete its operation.
+ */
+ if (F_ISSET(session, WT_SESSION_LOCKED_SLOT))
+ __wt_spin_unlock(session, &log->log_slot_lock);
+ __wt_cond_auto_signal(session, conn->log_wrlsn_cond);
+ if (++yield_count < WT_THOUSAND)
+ __wt_yield();
+ else
+ __wt_cond_wait(session, log->log_write_cond, 200);
+ if (F_ISSET(session, WT_SESSION_LOCKED_SLOT))
+ __wt_spin_lock(session, &log->log_slot_lock);
+ }
+}
+
+/*
+ * __log_fs_write --
+ * Wrapper when writing to a log file. If we're writing to a new log
+ * file for the first time wait for writes to the previous log file.
+ */
+static int
+__log_fs_write(WT_SESSION_IMPL *session,
+ WT_LOGSLOT *slot, wt_off_t offset, size_t len, const void *buf)
+{
+ /*
+ * If we're writing into a new log file, we have to wait for all
+ * writes to the previous log file to complete otherwise there could
+ * be a hole at the end of the previous log file that we cannot detect.
+ */
+ if (slot->slot_release_lsn.l.file < slot->slot_start_lsn.l.file) {
+ __log_wait_for_earlier_slot(session, slot);
+ WT_RET(__wt_log_force_sync(session, &slot->slot_release_lsn));
+ }
+ return (__wt_write(session, slot->slot_fh, offset, len, buf));
+}
+
+/*
* __wt_log_ckpt --
* Record the given LSN as the checkpoint LSN and signal the archive
* thread as needed.
@@ -576,7 +630,7 @@ __log_fill(WT_SESSION_IMPL *session,
/*
* If this is a force or unbuffered write, write it now.
*/
- WT_ERR(__wt_write(session, myslot->slot->slot_fh,
+ WT_ERR(__log_fs_write(session, myslot->slot,
myslot->offset + myslot->slot->slot_start_offset,
record->size, record->mem));
@@ -1352,13 +1406,11 @@ __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep)
WT_LSN sync_lsn;
int64_t release_buffered, release_bytes;
uint64_t fsync_duration_usecs;
- int yield_count;
bool locked;
conn = S2C(session);
log = conn->log;
locked = false;
- yield_count = 0;
if (freep != NULL)
*freep = 1;
release_buffered = WT_LOG_SLOT_RELEASED_BUFFERED(slot->slot_state);
@@ -1379,8 +1431,7 @@ __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep)
/* Write the buffered records */
if (release_buffered != 0)
- WT_ERR(__wt_write(session,
- slot->slot_fh, slot->slot_start_offset,
+ WT_ERR(__log_fs_write(session, slot, slot->slot_start_offset,
(size_t)release_buffered, slot->slot_buf.mem));
/*
@@ -1411,22 +1462,7 @@ __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep)
* be holes in the log file.
*/
WT_STAT_CONN_INCR(session, log_release_write_lsn);
- while (__wt_log_cmp(&log->write_lsn, &slot->slot_release_lsn) != 0) {
- /*
- * If we're on a locked path and the write LSN is not advancing,
- * unlock in case an earlier thread is trying to switch its
- * slot and complete its operation.
- */
- if (F_ISSET(session, WT_SESSION_LOCKED_SLOT))
- __wt_spin_unlock(session, &log->log_slot_lock);
- __wt_cond_auto_signal(session, conn->log_wrlsn_cond);
- if (++yield_count < WT_THOUSAND)
- __wt_yield();
- else
- __wt_cond_wait(session, log->log_write_cond, 200);
- if (F_ISSET(session, WT_SESSION_LOCKED_SLOT))
- __wt_spin_lock(session, &log->log_slot_lock);
- }
+ __log_wait_for_earlier_slot(session, slot);
log->write_start_lsn = slot->slot_start_lsn;
log->write_lsn = slot->slot_end_lsn;
diff --git a/test/recovery/random-abort.c b/test/recovery/random-abort.c
index 03e67e2f723..21460646017 100644
--- a/test/recovery/random-abort.c
+++ b/test/recovery/random-abort.c
@@ -34,6 +34,7 @@
static char home[512]; /* Program working dir */
static const char *progname; /* Program name */
static const char * const uri = "table:main";
+bool inmem;
#define MAX_TH 12
#define MIN_TH 5
@@ -41,7 +42,9 @@ static const char * const uri = "table:main";
#define MIN_TIME 10
#define RECORDS_FILE "records-%" PRIu32
-#define ENV_CONFIG \
+#define ENV_CONFIG_DEF \
+ "create,log=(file_max=10M,archive=false,enabled)"
+#define ENV_CONFIG_TXNSYNC \
"create,log=(file_max=10M,archive=false,enabled)," \
"transaction_sync=(enabled,method=none)"
#define ENV_CONFIG_REC "log=(recover=on)"
@@ -73,11 +76,15 @@ thread_run(void *arg)
WT_THREAD_DATA *td;
uint64_t i;
int ret;
- char buf[MAX_VAL], kname[64];
+ size_t lsize;
+ char buf[MAX_VAL], kname[64], lgbuf[8];
+ char large[128*1024];
__wt_random_init(&rnd);
memset(buf, 0, sizeof(buf));
memset(kname, 0, sizeof(kname));
+ lsize = sizeof(large);
+ memset(large, 0, lsize);
td = (WT_THREAD_DATA *)arg;
/*
@@ -85,6 +92,13 @@ thread_run(void *arg)
*/
snprintf(buf, sizeof(buf), RECORDS_FILE, td->id);
/*
+ * Set up a large value putting our id in it. Write it in there a
+ * bunch of times, but the rest of the buffer can just be zero.
+ */
+ snprintf(lgbuf, sizeof(lgbuf), "th-%" PRIu32, td->id);
+ for (i = 0; i < 128; i += strlen(lgbuf))
+ snprintf(&large[i], lsize - i, "%s", lgbuf);
+ /*
* Keep a separate file with the records we wrote for checking.
*/
(void)unlink(buf);
@@ -107,8 +121,18 @@ thread_run(void *arg)
*/
for (i = td->start; ; ++i) {
snprintf(kname, sizeof(kname), "%" PRIu64, i);
- data.size = __wt_random(&rnd) % MAX_VAL;
cursor->set_key(cursor, kname);
+ /*
+ * Every 30th record write a very large record that exceeds the
+ * log buffer size. This forces us to use the unbuffered path.
+ */
+ if (i % 30 == 0) {
+ data.size = 128 * 1024;
+ data.data = large;
+ } else {
+ data.size = __wt_random(&rnd) % MAX_VAL;
+ data.data = buf;
+ }
cursor->set_value(cursor, &data);
if ((ret = cursor->insert(cursor)) != 0)
testutil_die(ret, "WT_CURSOR.insert");
@@ -136,12 +160,17 @@ fill_db(uint32_t nth)
WT_THREAD_DATA *td;
uint32_t i;
int ret;
+ const char *envconf;
thr = dcalloc(nth, sizeof(pthread_t));
td = dcalloc(nth, sizeof(WT_THREAD_DATA));
if (chdir(home) != 0)
testutil_die(errno, "Child chdir: %s", home);
- if ((ret = wiredtiger_open(NULL, NULL, ENV_CONFIG, &conn)) != 0)
+ if (inmem)
+ envconf = ENV_CONFIG_DEF;
+ else
+ envconf = ENV_CONFIG_TXNSYNC;
+ if ((ret = wiredtiger_open(NULL, NULL, envconf, &conn)) != 0)
testutil_die(ret, "wiredtiger_open");
if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
testutil_die(ret, "WT_CONNECTION:open_session");
@@ -187,11 +216,11 @@ main(int argc, char *argv[])
WT_CURSOR *cursor;
WT_SESSION *session;
WT_RAND_STATE rnd;
- uint64_t key, last_key;
- uint32_t absent, count, i, nth, timeout;
+ uint64_t absent, count, key, last_key, middle;
+ uint32_t i, nth, timeout;
int ch, status, ret;
pid_t pid;
- bool rand_th, rand_time, verify_only;
+ bool fatal, rand_th, rand_time, verify_only;
const char *working_dir;
char fname[64], kname[64];
@@ -200,17 +229,21 @@ main(int argc, char *argv[])
else
++progname;
+ inmem = false;
nth = MIN_TH;
rand_th = rand_time = true;
timeout = MIN_TIME;
verify_only = false;
working_dir = "WT_TEST.random-abort";
- while ((ch = __wt_getopt(progname, argc, argv, "h:T:t:v")) != EOF)
+ while ((ch = __wt_getopt(progname, argc, argv, "h:mT:t:v")) != EOF)
switch (ch) {
case 'h':
working_dir = __wt_optarg;
break;
+ case 'm':
+ inmem = true;
+ break;
case 'T':
rand_th = false;
nth = (uint32_t)atoi(__wt_optarg);
@@ -303,7 +336,9 @@ main(int argc, char *argv[])
testutil_die(ret, "WT_SESSION.open_cursor: %s", uri);
absent = count = 0;
+ fatal = false;
for (i = 0; i < nth; ++i) {
+ middle = 0;
snprintf(fname, sizeof(fname), RECORDS_FILE, i);
if ((fp = fopen(fname, "r")) == NULL) {
fprintf(stderr,
@@ -313,8 +348,10 @@ main(int argc, char *argv[])
/*
* For every key in the saved file, verify that the key exists
- * in the table after recovery. Since we did write-no-sync, we
- * expect every key to have been recovered.
+ * in the table after recovery. If we're doing in-memory
+ * log buffering we never expect a record missing in the middle,
+ * but records may be missing at the end. If we did
+ * write-no-sync, we expect every key to have been recovered.
*/
for (last_key = UINT64_MAX;; ++count, last_key = key) {
ret = fscanf(fp, "%" SCNu64 "\n", &key);
@@ -338,9 +375,20 @@ main(int argc, char *argv[])
if ((ret = cursor->search(cursor)) != 0) {
if (ret != WT_NOTFOUND)
testutil_die(ret, "search");
- printf("%s: no record with key %" PRIu64 "\n",
- fname, key);
- ++absent;
+ if (!inmem)
+ printf("%s: no record with key %"
+ PRIu64 "\n", fname, key);
+ absent++;
+ middle = key;
+ } else if (middle != 0) {
+ /*
+ * We should never find an existing key after
+ * we have detected one missing.
+ */
+ printf("%s: after absent record at %" PRIu64
+ " key %" PRIu64 " exists\n",
+ fname, middle, key);
+ fatal = true;
}
}
if (fclose(fp) != 0)
@@ -348,11 +396,13 @@ main(int argc, char *argv[])
}
if ((ret = conn->close(conn, NULL)) != 0)
testutil_die(ret, "WT_CONNECTION:close");
- if (absent) {
- printf("%" PRIu32 " record(s) absent from %" PRIu32 "\n",
+ if (fatal)
+ return (EXIT_FAILURE);
+ if (!inmem && absent) {
+ printf("%" PRIu64 " record(s) absent from %" PRIu64 "\n",
absent, count);
return (EXIT_FAILURE);
}
- printf("%" PRIu32 " records verified\n", count);
+ printf("%" PRIu64 " records verified\n", count);
return (EXIT_SUCCESS);
}
diff --git a/test/recovery/smoke.sh b/test/recovery/smoke.sh
index c7677b64503..ce0662d3b2b 100755
--- a/test/recovery/smoke.sh
+++ b/test/recovery/smoke.sh
@@ -5,4 +5,5 @@ set -e
# Smoke-test recovery as part of running "make check".
$TEST_WRAPPER ./random-abort -t 10 -T 5
+$TEST_WRAPPER ./random-abort -m -t 10 -T 5
$TEST_WRAPPER ./truncated-log