diff options
author | sueloverso <sue@mongodb.com> | 2017-05-22 12:47:10 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-05-22 12:47:10 -0400 |
commit | 9946ed24f2187bfc724edd8eb46069fcb94e090f (patch) | |
tree | e5565d192e023966a48853d9e3d4355d16ff2cf2 | |
parent | d61a94a99c87a9e94e4bd959e2269b32d1a7dd22 (diff) | |
download | mongo-9946ed24f2187bfc724edd8eb46069fcb94e090f.tar.gz |
WT-3264 Fix log file cleanup when toggling logging. (#3419)
* WT-3264 Fix log file cleanup when toggling logging.
* whitespace
* Fix test loop bug
* Remove debugging. Test fixes. Reset slot.
* Update comments. Remove unused var.
* Rename goto label
-rw-r--r-- | src/conn/conn_log.c | 2 | ||||
-rw-r--r-- | src/include/extern.h | 3 | ||||
-rw-r--r-- | src/log/log.c | 55 | ||||
-rw-r--r-- | src/log/log_slot.c | 20 | ||||
-rw-r--r-- | src/txn/txn_recover.c | 25 | ||||
-rw-r--r-- | test/suite/test_txn09.py | 4 | ||||
-rw-r--r-- | test/suite/test_txn16.py | 140 |
7 files changed, 234 insertions, 15 deletions
diff --git a/src/conn/conn_log.c b/src/conn/conn_log.c index 08b572244af..dac16cc9d00 100644 --- a/src/conn/conn_log.c +++ b/src/conn/conn_log.c @@ -901,7 +901,7 @@ __wt_logmgr_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_RET(__wt_cond_alloc(session, "log sync", &log->log_sync_cond)); WT_RET(__wt_cond_alloc(session, "log write", &log->log_write_cond)); WT_RET(__wt_log_open(session)); - WT_RET(__wt_log_slot_init(session)); + WT_RET(__wt_log_slot_init(session, true)); return (0); } diff --git a/src/include/extern.h b/src/include/extern.h index 232ea6ff4e2..a3ce0f3746f 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -368,6 +368,7 @@ extern int __wt_log_needs_recovery(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn, bo extern void __wt_log_written_reset(WT_SESSION_IMPL *session); extern int __wt_log_get_all_files(WT_SESSION_IMPL *session, char ***filesp, u_int *countp, uint32_t *maxid, bool active_only) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_log_extract_lognum( WT_SESSION_IMPL *session, const char *name, uint32_t *id) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_reset(WT_SESSION_IMPL *session, uint32_t lognum) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_log_acquire(WT_SESSION_IMPL *session, uint64_t recsize, WT_LOGSLOT *slot) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_log_allocfile( WT_SESSION_IMPL *session, uint32_t lognum, const char *dest) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_log_remove(WT_SESSION_IMPL *session, const char *file_prefix, uint32_t lognum) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); @@ -404,7 +405,7 @@ extern int __wt_logop_row_truncate_print(WT_SESSION_IMPL *session, const uint8_t extern int __wt_txn_op_printlog(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_log_slot_activate(WT_SESSION_IMPL *session, WT_LOGSLOT *slot); extern int __wt_log_slot_switch(WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool retry, bool forced, bool *did_work) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_log_slot_init(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_slot_init(WT_SESSION_IMPL *session, bool alloc) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_log_slot_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, uint32_t flags, WT_MYSLOT *myslot); extern int64_t __wt_log_slot_release(WT_MYSLOT *myslot, int64_t size); diff --git a/src/log/log.c b/src/log/log.c index c8ba7366f3f..0de881660b2 100644 --- a/src/log/log.c +++ b/src/log/log.c @@ -8,6 +8,7 @@ #include "wt_internal.h" +static int __log_newfile(WT_SESSION_IMPL *, bool, bool *); static int __log_openfile( WT_SESSION_IMPL *, WT_FH **, const char *, uint32_t, uint32_t); static int __log_write_internal( @@ -438,6 +439,60 @@ __wt_log_extract_lognum( } /* + * __wt_log_reset -- + * Reset the existing log file to after the given file number. + * Called from recovery when toggling logging back on, it was off + * the previous open but it was on earlier before that toggle. + */ +int +__wt_log_reset(WT_SESSION_IMPL *session, uint32_t lognum) +{ + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + WT_LOG *log; + uint32_t old_lognum; + u_int i, logcount; + char **logfiles; + + conn = S2C(session); + log = conn->log; + + if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) || + log->fileid > lognum) + return (0); + + WT_ASSERT(session, F_ISSET(conn, WT_CONN_RECOVERING)); + WT_ASSERT(session, !F_ISSET(conn, WT_CONN_READONLY)); + /* + * We know we're single threaded and called from recovery only when + * toggling logging back on. Therefore the only log files we have are + * old and outdated and the new one created when logging opened before + * recovery. We have to remove all old log files first and then create + * the new one so that log file numbers are contiguous in the file + * system. + */ + WT_RET(__wt_close(session, &log->log_fh)); + WT_RET(__log_get_files(session, + WT_LOG_FILENAME, &logfiles, &logcount)); + for (i = 0; i < logcount; i++) { + WT_ERR(__wt_log_extract_lognum( + session, logfiles[i], &old_lognum)); + WT_ASSERT(session, old_lognum < lognum); + WT_ERR(__wt_log_remove( + session, WT_LOG_FILENAME, old_lognum)); + } + log->fileid = lognum; + + /* Send in true to update connection creation LSNs. */ + WT_WITH_SLOT_LOCK(session, log, + ret = __log_newfile(session, true, NULL)); + WT_ERR(__wt_log_slot_init(session, false)); +err: WT_TRET( + __wt_fs_directory_list_free(session, &logfiles, logcount)); + return (ret); +} + +/* * __log_zero -- * Zero a log file. */ diff --git a/src/log/log_slot.c b/src/log/log_slot.c index 444babfda92..5bd3d53a973 100644 --- a/src/log/log_slot.c +++ b/src/log/log_slot.c @@ -402,7 +402,7 @@ __wt_log_slot_switch(WT_SESSION_IMPL *session, * Initialize the slot array. */ int -__wt_log_slot_init(WT_SESSION_IMPL *session) +__wt_log_slot_init(WT_SESSION_IMPL *session, bool alloc) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; @@ -424,15 +424,17 @@ __wt_log_slot_init(WT_SESSION_IMPL *session) * switch log files very aggressively. Scale back the buffer for * small log file sizes. */ - log->slot_buf_size = (uint32_t)WT_MIN( - (size_t)conn->log_file_max / 10, WT_LOG_SLOT_BUF_SIZE); - for (i = 0; i < WT_SLOT_POOL; i++) { - WT_ERR(__wt_buf_init(session, - &log->slot_pool[i].slot_buf, log->slot_buf_size)); - F_SET(&log->slot_pool[i], WT_SLOT_INIT_FLAGS); + if (alloc) { + log->slot_buf_size = (uint32_t)WT_MIN( + (size_t)conn->log_file_max / 10, WT_LOG_SLOT_BUF_SIZE); + for (i = 0; i < WT_SLOT_POOL; i++) { + WT_ERR(__wt_buf_init(session, + &log->slot_pool[i].slot_buf, log->slot_buf_size)); + F_SET(&log->slot_pool[i], WT_SLOT_INIT_FLAGS); + } + WT_STAT_CONN_SET(session, + log_buffer_size, log->slot_buf_size * WT_SLOT_POOL); } - WT_STAT_CONN_SET(session, - log_buffer_size, log->slot_buf_size * WT_SLOT_POOL); /* * Set up the available slot from the pool the first time. */ diff --git a/src/txn/txn_recover.c b/src/txn/txn_recover.c index 97e8e98f8e0..590e17b6a2a 100644 --- a/src/txn/txn_recover.c +++ b/src/txn/txn_recover.c @@ -20,6 +20,7 @@ typedef struct { } *files; size_t file_alloc; /* Allocated size of files array. */ u_int max_fileid; /* Maximum file ID seen. */ + WT_LSN max_lsn; /* Maximum checkpoint LSN seen. */ u_int nfiles; /* Number of files in the metadata. */ WT_LSN ckpt_lsn; /* Start LSN for main recovery loop. */ @@ -342,6 +343,10 @@ __recovery_setup_file(WT_RECOVERY *r, const char *uri, const char *config) "Recovering %s with id %" PRIu32 " @ (%" PRIu32 ", %" PRIu32 ")", uri, fileid, lsn.l.file, lsn.l.offset); + if ((!WT_IS_MAX_LSN(&lsn) && !WT_IS_INIT_LSN(&lsn)) && + (WT_IS_MAX_LSN(&r->max_lsn) || __wt_log_cmp(&lsn, &r->max_lsn) > 0)) + r->max_lsn = lsn; + return (0); } @@ -428,6 +433,7 @@ __wt_txn_recover(WT_SESSION_IMPL *session) WT_RET(__wt_open_internal_session(conn, "txn-recover", false, WT_SESSION_NO_LOGGING, &session)); r.session = session; + WT_MAX_LSN(&r.max_lsn); F_SET(conn, WT_CONN_RECOVERING); WT_ERR(__wt_metadata_search(session, WT_METAFILE_URI, &config)); @@ -443,9 +449,24 @@ __wt_txn_recover(WT_SESSION_IMPL *session) */ if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_EXISTED) || WT_IS_MAX_LSN(&metafile->ckpt_lsn)) { + /* + * Detect if we're going from logging disabled to enabled. + * We need to know this to verify LSNs and start at the correct + * log file later. If someone ran with logging, then disabled + * it and removed all the log files and then turned logging back + * on, we have to start logging in the log file number that is + * larger than any checkpoint LSN we have from the earlier time. + */ WT_ERR(__recovery_file_scan(&r)); conn->next_file_id = r.max_fileid; - goto done; + + if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) && + WT_IS_MAX_LSN(&metafile->ckpt_lsn) && + !WT_IS_MAX_LSN(&r.max_lsn)) { + WT_ERR(__wt_log_reset(session, r.max_lsn.l.file)); + goto ckpt; + } else + goto done; } /* @@ -556,7 +577,7 @@ __wt_txn_recover(WT_SESSION_IMPL *session) * open is fast and keep the metadata up to date with the checkpoint * LSN and archiving. */ - WT_ERR(session->iface.checkpoint(&session->iface, "force=1")); +ckpt: WT_ERR(session->iface.checkpoint(&session->iface, "force=1")); done: FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_DONE); err: WT_TRET(__recovery_free(&r)); diff --git a/test/suite/test_txn09.py b/test/suite/test_txn09.py index e0710803f36..cc5771ef681 100644 --- a/test/suite/test_txn09.py +++ b/test/suite/test_txn09.py @@ -26,8 +26,8 @@ # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR # OTHER DEALINGS IN THE SOFTWARE. # -# test_txn02.py -# Transactions: commits and rollbacks +# test_txn09.py +# Transactions: recovery toggling logging # import fnmatch, os, shutil, time diff --git a/test/suite/test_txn16.py b/test/suite/test_txn16.py new file mode 100644 index 00000000000..929da2291c7 --- /dev/null +++ b/test/suite/test_txn16.py @@ -0,0 +1,140 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2017 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# test_txn16.py +# Recovery: Test that toggling between logging and not logging does not +# continue to generate more log files. +# + +import fnmatch, os, shutil, time +from suite_subprocess import suite_subprocess +import wttest + +class test_txn16(wttest.WiredTigerTestCase, suite_subprocess): + t1 = 'table:test_txn16_1' + t2 = 'table:test_txn16_2' + t3 = 'table:test_txn16_3' + nentries = 1000 + create_params = 'key_format=i,value_format=i' + # Set the log file size small so we generate checkpoints + # with LSNs in different files. + conn_config = 'config_base=false,' + \ + 'log=(archive=false,enabled,file_max=100K),' + \ + 'transaction_sync=(method=dsync,enabled)' + conn_on = 'config_base=false,' + \ + 'log=(archive=false,enabled,file_max=100K),' + \ + 'transaction_sync=(method=dsync,enabled)' + conn_off = 'config_base=false,log=(enabled=false)' + + def populate_table(self, uri): + self.session.create(uri, self.create_params) + c = self.session.open_cursor(uri, None, None) + # Populate with an occasional checkpoint to generate + # some varying LSNs. + for i in range(self.nentries): + c[i] = i + 1 + if i % 900 == 0: + self.session.checkpoint() + c.close() + + def copy_dir(self, olddir, newdir): + ''' Simulate a crash from olddir and restart in newdir. ''' + # with the connection still open, copy files to new directory + shutil.rmtree(newdir, ignore_errors=True) + os.mkdir(newdir) + for fname in os.listdir(olddir): + fullname = os.path.join(olddir, fname) + # Skip lock file on Windows since it is locked + if os.path.isfile(fullname) and \ + "WiredTiger.lock" not in fullname and \ + "Tmplog" not in fullname and \ + "Preplog" not in fullname: + shutil.copy(fullname, newdir) + # close the original connection. + self.close_conn() + + def run_toggle(self, homedir): + loop = 0 + # Record original log files. There should never be overlap + # with these even after they're removed. + orig_logs = fnmatch.filter(os.listdir(homedir), "*Log*") + while loop < 3: + # Reopen with logging on to run recovery first time + on_conn = self.wiredtiger_open(homedir, self.conn_on) + on_conn.close() + if loop > 0: + # Get current log files. + cur_logs = fnmatch.filter(os.listdir(homedir), "*Log*") + scur = set(cur_logs) + sorig = set(orig_logs) + # There should never be overlap with the log files that + # were there originally. Mostly this checks that after + # opening with logging disabled and then re-enabled, we + # don't see log file 1. + self.assertEqual(scur.isdisjoint(sorig), True) + if loop > 1: + # We should be creating the same log files each time. + for l in cur_logs: + self.assertEqual(l in last_logs, True) + for l in last_logs: + self.assertEqual(l in cur_logs, True) + last_logs = cur_logs + loop += 1 + # Remove all log files before opening without logging. + cur_logs = fnmatch.filter(os.listdir(homedir), "*Log*") + for l in cur_logs: + path=homedir + "/" + l + os.remove(path) + off_conn = self.wiredtiger_open(homedir, self.conn_off) + off_conn.close() + + def test_recovery(self): + ''' Check log file creation when toggling. ''' + + # Here's the strategy: + # - With logging populate 4 tables. Checkpoint + # them at different times. + # - Copy to a new directory to simulate a crash. + # - Close the original connection. + # On both a "copy" to simulate a crash and the original (3x): + # - Record log files existing. + # - Reopen with logging to run recovery. Close connection. + # - Record log files existing. + # - Remove all log files. + # - Open connection with logging disabled. + # - Record log files existing. Verify we don't keep adding. + # + self.populate_table(self.t1) + self.populate_table(self.t2) + self.populate_table(self.t3) + self.copy_dir(".", "RESTART") + self.run_toggle(".") + self.run_toggle("RESTART") + +if __name__ == '__main__': + wttest.run() |