diff options
author | Luke Chen <luke.chen@mongodb.com> | 2018-07-23 15:53:49 +1000 |
---|---|---|
committer | Luke Chen <luke.chen@mongodb.com> | 2018-07-23 15:53:49 +1000 |
commit | e065e7daa99f75f19a73fc1b2b57a8947852a006 (patch) | |
tree | d2c141fab1e7334b6a552ecda525d29c1cbf20dd /src/third_party/wiredtiger/src/log/log.c | |
parent | 1c1535c9ee73ac4ed0d922855ccbe44335909082 (diff) | |
download | mongo-e065e7daa99f75f19a73fc1b2b57a8947852a006.tar.gz |
Import wiredtiger: 1be1b793becdfb5f4b2ae950449aa3710ca320ce from branch mongodb-4.2
ref: 3334609975..1be1b793be
for: 4.1.2
WT-3276 Add recover=salvage to recover from a corrupted log file
WT-3943 Include full error message when a python test asserts
WT-3955 Add verbose option to log more messages on error returns
WT-4160 Restore performance when timestamps are not in use
WT-4168 Update upgrading documentation for 3.1.0 release
WT-4169 Fix wt verify dump-pages failure
WT-4171 Enabling tree walk timing stress causes excessive slowdown
WT-4172 Add diagnostic hazard pointer checks in more places before freeing refs
WT-4174 Do not access the lookaside file in rollback_to_stable when running with in_memory=true
WT-4178 Fixes for wt_btree_immediately_durable needed for in-memory
WT-4179 Expose WiredTiger crc32c functions
WT-4182 Use conservative approach for log checksum errors
WT-4188 Coverity: unchecked return value complaints
Diffstat (limited to 'src/third_party/wiredtiger/src/log/log.c')
-rw-r--r-- | src/third_party/wiredtiger/src/log/log.c | 166 |
1 files changed, 140 insertions, 26 deletions
diff --git a/src/third_party/wiredtiger/src/log/log.c b/src/third_party/wiredtiger/src/log/log.c index 6ce26e03c5d..243af64cb18 100644 --- a/src/third_party/wiredtiger/src/log/log.c +++ b/src/third_party/wiredtiger/src/log/log.c @@ -10,6 +10,7 @@ static int __log_newfile(WT_SESSION_IMPL *, bool, bool *); static int __log_openfile(WT_SESSION_IMPL *, uint32_t, uint32_t, WT_FH **); +static int __log_truncate(WT_SESSION_IMPL *, WT_LSN *, bool, bool); static int __log_write_internal( WT_SESSION_IMPL *, WT_ITEM *, WT_LSN *, uint32_t); @@ -926,7 +927,7 @@ err: __wt_scr_free(session, &buf); */ static int __log_open_verify(WT_SESSION_IMPL *session, uint32_t id, WT_FH **fhp, - WT_LSN *lsnp, uint16_t *versionp) + WT_LSN *lsnp, uint16_t *versionp, bool *need_salvagep) { WT_CONNECTION_IMPL *conn; WT_DECL_ITEM(buf); @@ -937,11 +938,15 @@ __log_open_verify(WT_SESSION_IMPL *session, uint32_t id, WT_FH **fhp, WT_LOG_RECORD *logrec; uint32_t allocsize, rectype; const uint8_t *end, *p; + bool need_salvage, salvage_mode; conn = S2C(session); + fh = NULL; log = conn->log; + need_salvage = false; WT_RET(__wt_scr_alloc(session, 0, &buf)); - WT_ERR(__log_openfile(session, id, 0, &fh)); + salvage_mode = (need_salvagep != NULL && + FLD_ISSET(conn->log_flags, WT_CONN_LOG_RECOVER_SALVAGE)); if (log == NULL) allocsize = WT_LOG_ALIGN; @@ -953,17 +958,30 @@ __log_open_verify(WT_SESSION_IMPL *session, uint32_t id, WT_FH **fhp, memset(buf->mem, 0, allocsize); /* + * Any operation that fails from here on out indicates corruption + * that could be salvaged. + */ + need_salvage = true; + + /* * Read in the log file header and verify it. */ + WT_ERR(__log_openfile(session, id, 0, &fh)); WT_ERR(__wt_read(session, fh, 0, allocsize, buf->mem)); logrec = (WT_LOG_RECORD *)buf->mem; __wt_log_record_byteswap(logrec); desc = (WT_LOG_DESC *)logrec->record; __wt_log_desc_byteswap(desc); - if (desc->log_magic != WT_LOG_MAGIC) - WT_PANIC_RET(session, WT_ERROR, - "log file %s corrupted: Bad magic number %" PRIu32, - fh->name, desc->log_magic); + if (desc->log_magic != WT_LOG_MAGIC) { + if (salvage_mode) + WT_ERR_MSG(session, WT_ERROR, + "log file %s corrupted: Bad magic number %" PRIu32, + fh->name, desc->log_magic); + else + WT_PANIC_RET(session, WT_ERROR, + "log file %s corrupted: Bad magic number %" PRIu32, + fh->name, desc->log_magic); + } /* * We cannot read future log file formats. */ @@ -1043,7 +1061,14 @@ err: __wt_scr_free(session, &buf); */ if (fhp != NULL && ret == 0) *fhp = fh; - else + else if (ret != 0 && need_salvage && salvage_mode) { + /* Let the caller know this file must be salvaged. */ + ret = 0; + WT_TRET(__wt_close(session, &fh)); + if (fhp != NULL) + *fhp = NULL; + *need_salvagep = true; + } else WT_TRET(__wt_close(session, &fh)); return (ret); @@ -1131,7 +1156,7 @@ __log_newfile(WT_SESSION_IMPL *session, bool conn_open, bool *created) WT_STAT_CONN_INCR(session, log_close_yields); __wt_log_wrlsn(session, NULL); if (++yield_cnt > 10000) - return (EBUSY); + return (__wt_set_return(session, EBUSY)); __wt_yield(); } /* @@ -1200,7 +1225,8 @@ __log_newfile(WT_SESSION_IMPL *session, bool conn_open, bool *created) * we must pass in a local file handle. Otherwise there is a wide * window where another thread could see a NULL log file handle. */ - WT_RET(__log_open_verify(session, log->fileid, &log_fh, NULL, NULL)); + WT_RET(__log_open_verify(session, log->fileid, &log_fh, NULL, NULL, + NULL)); /* * Write the LSN at the end of the last record in the previous log file * as the first record in this log file. @@ -1412,25 +1438,31 @@ __log_truncate_file(WT_SESSION_IMPL *session, WT_FH *log_fh, wt_off_t offset) * it will truncate between the given LSN and the trunc_lsn. That is, * since we pre-allocate log files, it will free that space and allow the * log to be traversed. We use the trunc_lsn because logging has already - * opened the new/next log file before recovery ran. This function assumes - * we are in recovery or other dedicated time and not during live running. + * opened the new/next log file before recovery ran. If salvage_mode is + * set, we verify headers of log files visited and recreate them if they + * are damaged. This function assumes we are in recovery or other + * dedicated time and not during live running. */ static int -__log_truncate(WT_SESSION_IMPL *session, WT_LSN *lsn, bool this_log) +__log_truncate(WT_SESSION_IMPL *session, WT_LSN *lsn, bool this_log, + bool salvage_mode) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_FH *log_fh; WT_LOG *log; - uint32_t lognum; + uint32_t lognum, salvage_first, salvage_last; u_int i, logcount; char **logfiles; + bool need_salvage, opened; conn = S2C(session); log = conn->log; log_fh = NULL; logcount = 0; logfiles = NULL; + salvage_first = salvage_last = 0; + need_salvage = false; /* * Truncate the log file to the given LSN. @@ -1446,6 +1478,10 @@ __log_truncate(WT_SESSION_IMPL *session, WT_LSN *lsn, bool this_log) WT_ERR(__wt_fsync(session, log_fh, true)); WT_ERR(__wt_close(session, &log_fh)); + if (salvage_mode) + WT_ERR(__wt_msg(session, + "salvage: log file %" PRIu32 " truncated", lsn->l.file)); + /* * If we just want to truncate the current log, return and skip * looking for intervening logs. @@ -1456,7 +1492,32 @@ __log_truncate(WT_SESSION_IMPL *session, WT_LSN *lsn, bool this_log) for (i = 0; i < logcount; i++) { WT_ERR(__wt_log_extract_lognum(session, logfiles[i], &lognum)); if (lognum > lsn->l.file && lognum < log->trunc_lsn.l.file) { - WT_ERR(__log_openfile(session, lognum, 0, &log_fh)); + opened = false; + if (salvage_mode) { + /* + * When salvaging, we verify that the + * header of the log file is valid. + * If not, create a new, empty one. + */ + need_salvage = false; + WT_ERR(__log_open_verify(session, lognum, + &log_fh, NULL, NULL, &need_salvage)); + if (need_salvage) { + WT_ASSERT(session, log_fh == NULL); + WT_ERR(__wt_log_remove(session, + WT_LOG_FILENAME, lognum)); + WT_ERR(__wt_log_allocfile(session, + lognum, WT_LOG_FILENAME)); + } else + opened = true; + + if (salvage_first == 0) + salvage_first = lognum; + salvage_last = lognum; + } + if (!opened) + WT_ERR(__log_openfile(session, lognum, 0, + &log_fh)); /* * If there are intervening files pre-allocated, * truncate them to the end of the log file header. @@ -1469,6 +1530,17 @@ __log_truncate(WT_SESSION_IMPL *session, WT_LSN *lsn, bool this_log) } err: WT_TRET(__wt_close(session, &log_fh)); WT_TRET(__wt_fs_directory_list_free(session, &logfiles, logcount)); + if (salvage_first != 0) { + if (salvage_last > salvage_first) + WT_TRET(__wt_msg(session, + "salvage: log files %" PRIu32 "-%" PRIu32 + " truncated at beginning", salvage_first, + salvage_last)); + else + WT_TRET(__wt_msg(session, + "salvage: log file %" PRIu32 + " truncated at beginning", salvage_first)); + } return (ret); } @@ -1566,13 +1638,12 @@ __wt_log_open(WT_SESSION_IMPL *session) uint16_t version; u_int i, logcount; char **logfiles; + bool need_salvage; conn = S2C(session); log = conn->log; logfiles = NULL; logcount = 0; - lastlog = 0; - firstlog = UINT32_MAX; /* * Open up a file handle to the log directory if we haven't. @@ -1587,9 +1658,14 @@ __wt_log_open(WT_SESSION_IMPL *session) if (!F_ISSET(conn, WT_CONN_READONLY)) WT_ERR(__log_prealloc_remove(session)); +again: /* * Now look at the log files and set our LSNs. */ + lastlog = 0; + firstlog = UINT32_MAX; + need_salvage = false; + WT_ERR(__log_get_files(session, WT_LOG_FILENAME, &logfiles, &logcount)); for (i = 0; i < logcount; i++) { WT_ERR(__wt_log_extract_lognum(session, logfiles[i], &lognum)); @@ -1610,8 +1686,23 @@ __wt_log_open(WT_SESSION_IMPL *session) * we create a new log file so that we can detect an unsupported * version before modifying the file space. */ - WT_ERR(__log_open_verify(session, - lastlog, NULL, NULL, &version)); + WT_ERR(__log_open_verify(session, lastlog, NULL, NULL, + &version, &need_salvage)); + + /* + * If we were asked to salvage and the last log file was + * indeed corrupt, remove it and try all over again. + */ + if (need_salvage) { + WT_ERR(__wt_log_remove( + session, WT_LOG_FILENAME, lastlog)); + WT_ERR(__wt_msg(session, + "salvage: log file %" PRIu32 " removed", lastlog)); + WT_ERR(__wt_fs_directory_list_free(session, &logfiles, + logcount)); + logfiles = NULL; + goto again; + } } /* @@ -1641,7 +1732,7 @@ __wt_log_open(WT_SESSION_IMPL *session) * have to close the file. */ WT_ERR(__log_open_verify(session, - lognum, NULL, NULL, &version)); + lognum, NULL, NULL, &version, NULL)); /* * If we find any log file at the wrong version * set the flag and we're done. @@ -1957,7 +2048,7 @@ __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags, u_int i, logcount; int firstrecord; char **logfiles; - bool eol, partial_record; + bool eol, need_salvage, partial_record; conn = S2C(session); log = conn->log; @@ -1966,6 +2057,7 @@ __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags, logfiles = NULL; eol = false; firstrecord = 1; + need_salvage = false; /* * If the caller did not give us a callback function there is nothing @@ -2064,8 +2156,9 @@ __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags, if (!WT_IS_INIT_LSN(lsnp)) start_lsn = *lsnp; } - WT_ERR(__log_open_verify(session, - start_lsn.l.file, &log_fh, &prev_lsn, NULL)); + WT_ERR(__log_open_verify(session, start_lsn.l.file, &log_fh, &prev_lsn, + NULL, &need_salvage)); + WT_ERR_TEST(need_salvage, WT_ERROR); WT_ERR(__wt_filesize(session, log_fh, &log_size)); rd_lsn = start_lsn; if (LF_ISSET(WT_LOGSCAN_RECOVER)) @@ -2103,7 +2196,8 @@ advance: __wt_verbose(session, WT_VERB_LOG, "Truncate end of log %" PRIu32 "/%" PRIu32, rd_lsn.l.file, rd_lsn.l.offset); - WT_ERR(__log_truncate(session, &rd_lsn, true)); + WT_ERR(__log_truncate(session, &rd_lsn, true, + false)); } /* * If we had a partial record, we'll want to break @@ -2128,7 +2222,9 @@ advance: " through %" PRIu32, rd_lsn.l.file, end_lsn.l.file); WT_ERR(__log_open_verify(session, - rd_lsn.l.file, &log_fh, &prev_lsn, &version)); + rd_lsn.l.file, &log_fh, &prev_lsn, &version, + &need_salvage)); + WT_ERR_TEST(need_salvage, WT_ERROR); /* * Opening the log file reads with verify sets up the * previous LSN from the first record. This detects @@ -2161,10 +2257,15 @@ advance: } /* * Read the minimum allocation size a record could be. + * Conditionally set the need_salvage flag so that if the + * read fails, we know this is an situation we can salvage. */ WT_ASSERT(session, buf->memsize >= allocsize); + need_salvage = FLD_ISSET(conn->log_flags, + WT_CONN_LOG_RECOVER_SALVAGE); WT_ERR(__wt_read(session, log_fh, rd_lsn.l.offset, (size_t)allocsize, buf->mem)); + need_salvage = false; /* * See if we need to read more than the allocation size. We * expect that we rarely will have to read more. Most log @@ -2215,7 +2316,9 @@ advance: WT_STAT_CONN_INCR(session, log_scan_rereads); } /* - * We read in the record, verify checksum. + * We read in the record, verify checksum. A failed checksum + * does not imply corruption, it may be the result of a + * partial write. * * Handle little- and big-endian objects. Objects are written * in little-endian format: save the header checksum, and @@ -2291,11 +2394,22 @@ advance: __wt_verbose(session, WT_VERB_LOG, "End of recovery truncate end of log %" PRIu32 "/%" PRIu32, rd_lsn.l.file, rd_lsn.l.offset); - WT_ERR(__log_truncate(session, &rd_lsn, false)); + WT_ERR(__log_truncate(session, &rd_lsn, false, false)); } err: WT_STAT_CONN_INCR(session, log_scans); /* + * If we are salvaging and failed a salvageable operation, then + * truncate the log at the fail point. + */ + if (ret != 0 && need_salvage) { + WT_TRET(__wt_close(session, &log_fh)); + log_fh = NULL; + WT_TRET(__log_truncate(session, &rd_lsn, false, true)); + ret = 0; + } + + /* * If the first attempt to read a log record results in * an error recovery is likely going to fail. Try to provide * a helpful failure message. |