diff options
author | Mathias Stearn <redbeard0531@gmail.com> | 2014-09-16 18:29:29 -0400 |
---|---|---|
committer | Mathias Stearn <redbeard0531@gmail.com> | 2014-09-17 14:14:32 -0400 |
commit | 7bca29e784b536e90387974bfa5a451ce15161a5 (patch) | |
tree | 97777c7aec1a61455c8cac91f6ddd74985102561 /src | |
parent | 722777370bef11db921efff5b0a50242c881afd8 (diff) | |
download | mongo-7bca29e784b536e90387974bfa5a451ce15161a5.tar.gz |
SERVER-15111 Treat corruption of final journal section as an expected event
Manual backport of the following commits (combined):
8e1f5beabfad09c790e46826e8b3c7dcc5070d8d
6e93b33179e71abce820e534b3d32f1e593f71ca
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/db/dur_recover.cpp | 64 | ||||
-rw-r--r-- | src/mongo/shell/shell_utils_extended.cpp | 23 |
2 files changed, 50 insertions, 37 deletions
diff --git a/src/mongo/db/dur_recover.cpp b/src/mongo/db/dur_recover.cpp index 31626542b77..2e972b45d86 100644 --- a/src/mongo/db/dur_recover.cpp +++ b/src/mongo/db/dur_recover.cpp @@ -60,6 +60,15 @@ using namespace mongoutils; namespace mongo { + /** + * Thrown when a journal section is corrupt. This is considered OK as long as it occurs while + * processing the last file. Processing stops at the first corrupt section. + * + * Any logging about the nature of the corruption should happen before throwing as this class + * contains no data. + */ + class JournalSectionCorruptException {}; + namespace dur { struct ParsedJournalEntry { /*copyable*/ @@ -123,9 +132,10 @@ namespace mongo { verify( doDurOpsRecovering ); bool ok = uncompress((const char *)compressed, compressedLen, &_uncompressed); if( !ok ) { - // it should always be ok (i think?) as there is a previous check to see that the JSectFooter is ok + // We check the checksum before we uncompress, but this may still fail as the + // checksum isn't foolproof. log() << "couldn't uncompress journal section" << endl; - msgasserted(15874, "couldn't uncompress journal section"); + throw JournalSectionCorruptException(); } const char *p = _uncompressed.c_str(); verify( compressedLen == _h.sectionLen() - sizeof(JSectFooter) - sizeof(JSectHeader) ); @@ -174,7 +184,11 @@ namespace mongo { const unsigned limit = std::min((unsigned)Namespace::MaxNsLenWithNUL, _entries->remaining()); const unsigned len = strnlen(_lastDbName, limit); - massert(13533, "problem processing journal file during recovery", _lastDbName[len] == '\0'); + if (_lastDbName[len] != '\0') { + log() << "problem processing journal file during recovery"; + throw JournalSectionCorruptException(); + } + _entries->skip(len+1); // skip '\0' too _entries->read(lenOrOpCode); // read this for the fall through } @@ -371,10 +385,15 @@ namespace mongo { scoped_lock lk(_mx); RACECHECK - /** todo: we should really verify the checksum to see that seqNumber is ok? - that is expensive maybe there is some sort of checksum of just the header - within the header itself - */ + // Check the footer checksum before doing anything else. + if (_recovering) { + verify( ((const char *)h) + sizeof(JSectHeader) == p ); + if (!f->checkHash(h, len + sizeof(JSectHeader))) { + log() << "journal section checksum doesn't match"; + throw JournalSectionCorruptException(); + } + } + if( _recovering && _lastDataSyncedFromLastRun > h->seqNumber + ExtraKeepTimeMs ) { if( h->seqNumber != _lastSeqMentionedInConsoleLog ) { static int n; @@ -417,14 +436,6 @@ namespace mongo { entries.push_back(e); } - // after the entries check the footer checksum - if( _recovering ) { - verify( ((const char *)h) + sizeof(JSectHeader) == p ); - if( !f->checkHash(h, len + sizeof(JSectHeader)) ) { - msgasserted(13594, "journal checksum doesn't match"); - } - } - // got all the entries for one group commit. apply them: applyEntries(entries); } @@ -443,20 +454,20 @@ namespace mongo { JHeader h; br.read(h); - /* [dm] not automatically handled. we should eventually handle this automatically. i think: - (1) if this is the final journal file - (2) and the file size is just the file header in length (or less) -- this is a bit tricky to determine if prealloced - then can just assume recovery ended cleanly and not error out (still should log). - */ - uassert(13537, - "journal file header invalid. This could indicate corruption in a journal file, or perhaps a crash where sectors in file header were in flight written out of order at time of crash (unlikely but possible).", - h.valid()); + if (!h.valid()) { + log() << "Journal file header invalid. This could indicate corruption, or " + << "an unclean shutdown while writing the first section in a journal " + << "file."; + throw JournalSectionCorruptException(); + } if( !h.versionOk() ) { log() << "journal file version number mismatch got:" << hex << h._version << " expected:" << hex << (unsigned) JHeader::CurrentVersion << ". if you have just upgraded, recover with old version of mongod, terminate cleanly, then upgrade." << endl; + // Not using JournalSectionCurruptException as we don't want to ignore + // journal files on upgrade. uasserted(13536, str::stream() << "journal version number mismatch " << h._version); } fileId = h.fileId; @@ -489,7 +500,12 @@ namespace mongo { killCurrentOp.checkForInterrupt(false); } } - catch( BufReader::eof& ) { + catch (const BufReader::eof&) { + if (storageGlobalParams.durOptions & StorageGlobalParams::DurDumpJournal) + log() << "ABRUPT END" << endl; + return true; // abrupt end + } + catch (const JournalSectionCorruptException&) { if (storageGlobalParams.durOptions & StorageGlobalParams::DurDumpJournal) log() << "ABRUPT END" << endl; return true; // abrupt end diff --git a/src/mongo/shell/shell_utils_extended.cpp b/src/mongo/shell/shell_utils_extended.cpp index e46d69f1d8d..6073994a4e6 100644 --- a/src/mongo/shell/shell_utils_extended.cpp +++ b/src/mongo/shell/shell_utils_extended.cpp @@ -202,22 +202,19 @@ namespace mongo { } /** - * @param args - [ name, byte index ] - * In this initial implementation, all bits in the specified byte are flipped. + * @param args - [ source, destination ] + * copies file 'source' to 'destination'. Errors if the 'destination' file already exists. */ - BSONObj fuzzFile(const BSONObj& args, void* data) { - uassert( 13619, "fuzzFile takes 2 arguments", args.nFields() == 2 ); - scoped_ptr< File > f( new File() ); - f->open( args.getStringField( "0" ) ); - uassert( 13620, "couldn't open file to fuzz", !f->bad() && f->is_open() ); + BSONObj copyFile(const BSONObj& args, void* data) { + uassert(13619, "copyFile takes 2 arguments", args.nFields() == 2); - char c; - f->read( args.getIntField( "1" ), &c, 1 ); - c = ~c; - f->write( args.getIntField( "1" ), &c, 1 ); + BSONObjIterator it(args); + const std::string source = it.next().str(); + const std::string destination = it.next().str(); + + boost::filesystem::copy_file(source, destination); return undefinedReturn; - // f close is implicit } BSONObj getHostName(const BSONObj& a, void* data) { @@ -231,7 +228,7 @@ namespace mongo { void installShellUtilsExtended( Scope& scope ) { scope.injectNative( "getHostName" , getHostName ); scope.injectNative( "removeFile" , removeFile ); - scope.injectNative( "fuzzFile" , fuzzFile ); + scope.injectNative( "copyFile" , copyFile ); scope.injectNative( "listFiles" , listFiles ); scope.injectNative( "ls" , ls ); scope.injectNative( "pwd", pwd ); |