diff options
-rw-r--r-- | SConstruct | 2 | ||||
-rw-r--r-- | client/dbclient_rs.cpp | 4 | ||||
-rw-r--r-- | db/bufreader.h | 6 | ||||
-rw-r--r-- | db/db.vcxproj | 2 | ||||
-rwxr-xr-x | db/db.vcxproj.filters | 6 | ||||
-rw-r--r-- | db/dur.cpp | 204 | ||||
-rw-r--r-- | db/dur.h | 17 | ||||
-rw-r--r-- | db/dur_commitjob.cpp | 2 | ||||
-rw-r--r-- | db/dur_commitjob.h | 68 | ||||
-rw-r--r-- | db/dur_journal.cpp | 3 | ||||
-rw-r--r-- | db/dur_journalformat.h | 37 | ||||
-rw-r--r-- | db/dur_preplogbuffer.cpp | 175 | ||||
-rw-r--r-- | db/dur_recover.cpp | 187 | ||||
-rw-r--r-- | db/dur_stats.h | 9 | ||||
-rw-r--r-- | db/dur_writetodatafiles.cpp | 17 | ||||
-rw-r--r-- | db/oplog.cpp | 45 | ||||
-rwxr-xr-x | jstests/dur/oplog.js | 118 |
17 files changed, 670 insertions, 232 deletions
diff --git a/SConstruct b/SConstruct index 5f9e087acba..01bb0123024 100644 --- a/SConstruct +++ b/SConstruct @@ -327,7 +327,7 @@ coreServerFiles = [ "util/message_server_port.cpp" , if has_option( "asio" ): coreServerFiles += [ "util/message_server_asio.cpp" ] -serverOnlyFiles = Split( "util/logfile.cpp util/alignedbuilder.cpp db/mongommf.cpp db/dur.cpp db/durop.cpp db/dur_writetodatafiles.cpp db/dur_commitjob.cpp db/dur_recover.cpp db/dur_journal.cpp db/query.cpp db/update.cpp db/introspect.cpp db/btree.cpp db/clientcursor.cpp db/tests.cpp db/repl.cpp db/repl/rs.cpp db/repl/consensus.cpp db/repl/rs_initiate.cpp db/repl/replset_commands.cpp db/repl/manager.cpp db/repl/health.cpp db/repl/heartbeat.cpp db/repl/rs_config.cpp db/repl/rs_rollback.cpp db/repl/rs_sync.cpp db/repl/rs_initialsync.cpp db/oplog.cpp db/repl_block.cpp db/btreecursor.cpp db/cloner.cpp db/namespace.cpp db/cap.cpp db/matcher_covered.cpp db/dbeval.cpp db/restapi.cpp db/dbhelpers.cpp db/instance.cpp db/client.cpp db/database.cpp db/pdfile.cpp db/cursor.cpp db/security_commands.cpp db/security.cpp db/queryoptimizer.cpp db/extsort.cpp db/cmdline.cpp" ) +serverOnlyFiles = Split( "util/logfile.cpp util/alignedbuilder.cpp db/mongommf.cpp db/dur.cpp db/durop.cpp db/dur_writetodatafiles.cpp db/dur_preplogbuffer.cpp db/dur_commitjob.cpp db/dur_recover.cpp db/dur_journal.cpp db/query.cpp db/update.cpp db/introspect.cpp db/btree.cpp db/clientcursor.cpp db/tests.cpp db/repl.cpp db/repl/rs.cpp db/repl/consensus.cpp db/repl/rs_initiate.cpp db/repl/replset_commands.cpp db/repl/manager.cpp db/repl/health.cpp db/repl/heartbeat.cpp db/repl/rs_config.cpp db/repl/rs_rollback.cpp db/repl/rs_sync.cpp db/repl/rs_initialsync.cpp db/oplog.cpp db/repl_block.cpp db/btreecursor.cpp db/cloner.cpp db/namespace.cpp db/cap.cpp db/matcher_covered.cpp db/dbeval.cpp db/restapi.cpp db/dbhelpers.cpp db/instance.cpp db/client.cpp db/database.cpp db/pdfile.cpp db/cursor.cpp db/security_commands.cpp db/security.cpp db/queryoptimizer.cpp db/extsort.cpp db/cmdline.cpp" ) serverOnlyFiles += [ "db/index.cpp" ] + Glob( "db/geo/*.cpp" ) diff --git a/client/dbclient_rs.cpp b/client/dbclient_rs.cpp index 7280d5a0a60..00e15e825e0 100644 --- a/client/dbclient_rs.cpp +++ b/client/dbclient_rs.cpp @@ -360,7 +360,7 @@ namespace mongo { try { return checkSlave()->query(ns,query,nToReturn,nToSkip,fieldsToReturn,queryOptions,batchSize); } - catch ( DBException & e ){ + catch ( DBException & ){ LOG(1) << "can't query replica set slave: " << _slaveHost << endl; } } @@ -378,7 +378,7 @@ namespace mongo { try { return checkSlave()->findOne(ns,query,fieldsToReturn,queryOptions); } - catch ( DBException & e ){ + catch ( DBException & ){ LOG(1) << "can't query replica set slave: " << _slaveHost << endl; } } diff --git a/db/bufreader.h b/db/bufreader.h index 8e351f29334..bcc4f4271e3 100644 --- a/db/bufreader.h +++ b/db/bufreader.h @@ -60,6 +60,12 @@ namespace mongo { /** return remaining bytes */ unsigned remaining() const { return (char*)_end -(char*)_pos; } + /** back up by nbytes */ + void rewind(unsigned nbytes) { + _pos = ((char *) _pos) - nbytes; + assert( _pos >= _start ); + } + /** return current position pointer, and advance by len */ const void* skip(unsigned len) { char *nxt = ((char *) _pos) + len; diff --git a/db/db.vcxproj b/db/db.vcxproj index c4f9e1edc70..7d1baae52d7 100644 --- a/db/db.vcxproj +++ b/db/db.vcxproj @@ -198,6 +198,7 @@ <ItemGroup>
<ClCompile Include="..\bson\oid.cpp" />
<ClCompile Include="..\client\dbclientcursor.cpp" />
+ <ClCompile Include="..\client\dbclient_rs.cpp" />
<ClCompile Include="..\client\distlock.cpp" />
<ClCompile Include="..\client\model.cpp" />
<ClCompile Include="..\pcre-7.4\pcrecpp.cc">
@@ -483,6 +484,7 @@ <ClCompile Include="durop.cpp" />
<ClCompile Include="dur_commitjob.cpp" />
<ClCompile Include="dur_journal.cpp" />
+ <ClCompile Include="dur_preplogbuffer.cpp" />
<ClCompile Include="dur_recover.cpp" />
<ClCompile Include="dur_writetodatafiles.cpp" />
<ClCompile Include="geo\2d.cpp" />
diff --git a/db/db.vcxproj.filters b/db/db.vcxproj.filters index f77d11b4125..fd2f74753f8 100755 --- a/db/db.vcxproj.filters +++ b/db/db.vcxproj.filters @@ -451,6 +451,12 @@ <ClCompile Include="dur_writetodatafiles.cpp">
<Filter>_storage engine</Filter>
</ClCompile>
+ <ClCompile Include="dur_preplogbuffer.cpp">
+ <Filter>_storage engine</Filter>
+ </ClCompile>
+ <ClCompile Include="..\client\dbclient_rs.cpp">
+ <Filter>client</Filter>
+ </ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="repl\rs_config.h">
diff --git a/db/dur.cpp b/db/dur.cpp index d0d128ea910..b57015b7759 100644 --- a/db/dur.cpp +++ b/db/dur.cpp @@ -50,6 +50,7 @@ #include "../util/mongoutils/str.h" #include "../util/timer.h" #include "dur_stats.h" +#include "pdfile.h" // Record::HeaderSize using namespace mongoutils; @@ -57,7 +58,18 @@ namespace mongo { namespace dur { +#if defined(_DEBUG) + const bool DebugCheckLastDeclaredWrite = false; +#else + const bool DebugCheckLastDeclaredWrite = false; +#endif + void WRITETODATAFILES(); + void PREPLOGBUFFER(); + + static void groupCommit(); // later in this file + + CommitJob commitJob; Stats stats; @@ -65,6 +77,8 @@ namespace mongo { memset(this, 0, sizeof(*this)); } + DurableInterface* DurableInterface::_impl = new NonDurableImpl(); + void NonDurableImpl::startup() { if( haveJournalFiles() ) { log() << "Error: journal files are present in journal directory, yet starting without --dur enabled." << endl; @@ -74,13 +88,49 @@ namespace mongo { } } -#if defined(_DEBUG) - const bool DebugCheckLastDeclaredWrite = false; -#else - const bool DebugCheckLastDeclaredWrite = false; -#endif + /** base declare write intent function that all the helpers call. */ + void DurableImpl::declareWriteIntent(void *p, unsigned len) { + WriteIntent w; + w.p = p; + w.len = len; + commitJob.note(w); + } - DurableInterface* DurableInterface::_impl = new NonDurableImpl(); + bool DurableImpl::objAppend(void *dst, const void *src, unsigned len) { + { + char *srcRecord = (char*) src; + srcRecord -= Record::HeaderSize; + WriteIntent w; + w.p = srcRecord; + w.len = len+Record::HeaderSize; + if( !commitJob.alreadyNoted(w) ) { + + if( debug ) { + WriteIntent w; + w.p = (void*)src; + w.len = len; + if( commitJob.alreadyNoted(w) ) { + log() << "dur info it appears an optimization is possible that is not yet done" << endl; + } + } + + return false; + } + } + + BasicWriteOp b; + b.setObjAppend(dst, (void*)src, len); + commitJob.basicWrites().push_back(b); + if( testIntent ) + dst = MongoMMF::switchToPrivateView(dst); + memcpy(dst, src, len); + char *p = static_cast<char*>(dst); + p[-3] = (char) Object; // { ..., o: <copiedobj>, ..., EOO} + p[-2] = 'o'; + p[-1] = 0; + p[len] = EOO; + return true; + } void enableDurability() { // TODO: merge with startup() ? assert(typeid(*DurableInterface::_impl) == typeid(NonDurableImpl)); @@ -88,11 +138,6 @@ namespace mongo { DurableInterface::_impl = new DurableImpl(); } - // later in this file - static void groupCommit(); - - CommitJob commitJob; - bool DurableImpl::commitNow() { groupCommit(); return true; @@ -124,12 +169,6 @@ namespace mongo { groupCommit(); } - /** declare write intent. when already in the write view if testIntent is true. */ - void DurableImpl::declareWriteIntent(void *p, unsigned len) { - WriteIntent w(p, len); - commitJob.note(w); - } - void* DurableImpl::writingPtr(void *x, unsigned len) { void *p = x; if( testIntent ) @@ -171,15 +210,15 @@ namespace mongo { ++n; assert(debug && cmdLine.dur); - vector<WriteIntent>& w = commitJob.writes(); + vector<BasicWriteOp>& w = commitJob.basicWrites(); if( w.size() == 0 ) return; - const WriteIntent &i = w[w.size()-1]; + const BasicWriteOp &i = w[w.size()-1]; size_t ofs; - MongoMMF *mmf = privateViews.find(i.p, ofs); + MongoMMF *mmf = privateViews.find(i.src, ofs); if( mmf == 0 ) return; - size_t past = ofs + i.len; + size_t past = ofs + i.len(); if( mmf->length() < past + 8 ) return; // too close to end of view char *priv = (char *) mmf->getView(); @@ -188,15 +227,15 @@ namespace mongo { unsigned long long *b = (unsigned long long *) (writ+past); if( *a != *b ) { for( unsigned z = 0; z < w.size() - 1; z++ ) { - const WriteIntent& wi = w[z]; - char *r1 = (char*) wi.p; - char *r2 = r1 + wi.len; + const BasicWriteOp& wi = w[z]; + char *r1 = (char*) wi.src; + char *r2 = r1 + wi.len(); if( r1 <= (((char*)a)+8) && r2 > (char*)a ) { //log() << "it's ok " << wi.p << ' ' << wi.len << endl; return; } } - log() << "dur data after write area " << i.p << " does not agree" << endl; + log() << "dur data after write area " << i.src << " does not agree" << endl; log() << " was: " << ((void*)b) << " " << hexdump((char*)b, 8) << endl; log() << " now: " << ((void*)a) << " " << hexdump((char*)a, 8) << endl; log() << " n: " << n << endl; @@ -205,91 +244,6 @@ namespace mongo { } #endif - RelativePath local = RelativePath::fromRelativePath("local"); - - /** we will build an output buffer ourself and then use O_DIRECT - we could be in read lock for this - caller handles locking - */ - static void PREPLOGBUFFER() { - assert( cmdLine.dur ); - AlignedBuilder& bb = commitJob._ab; - bb.reset(); - - unsigned lenOfs; // we will need to backfill the length when prep is wrapping up - // JSectHeader - { - bb.appendStr("\nHH\n", false); - lenOfs = bb.skip(4); - } - - // ops other than basic writes (DurOp's) - { - for( vector< shared_ptr<DurOp> >::iterator i = commitJob.ops().begin(); i != commitJob.ops().end(); ++i ) { - (*i)->serialize(bb); - } - } - - // write intents. note there is no particular order to these : if we have - // two writes to the same location during the group commit interval, it is likely - // (although not assured) that it is journalled here once. - { - scoped_lock lk(privateViews._mutex()); - RelativePath lastFilePath; - for( vector<WriteIntent>::iterator i = commitJob.writes().begin(); i != commitJob.writes().end(); i++ ) { - size_t ofs; - MongoMMF *mmf = privateViews._find(i->p, ofs); - if( mmf == 0 ) { - string s = str::stream() << "view pointer cannot be resolved " << (size_t) i->p; - journalingFailure(s.c_str()); // asserts - return; - } - - if( !mmf->willNeedRemap() ) { - // tag this mmf as needed a remap of its private view later. - // usually it will already be dirty/already set, so we do the if above first - // to avoid possibility of cpu cache line contention - mmf->willNeedRemap() = true; - } - i->w_ptr = ((char*)mmf->view_write()) + ofs; - JEntry e; - e.len = i->len; - assert( ofs <= 0x80000000 ); - e.ofs = (unsigned) ofs; - e.setFileNo( mmf->fileSuffixNo() ); - if( mmf->relativePath() == local ) { - e.setLocalDbContextBit(); - } - else if( mmf->relativePath() != lastFilePath ) { - lastFilePath = mmf->relativePath(); - //assert( !str::startsWith(lastFilePath, dbpath) ); // dbpath should be stripped this is a relative path - JDbContext c; - bb.appendStruct(c); - bb.appendStr(lastFilePath.toString()); - } - bb.appendStruct(e); - bb.appendBuf(i->p, i->len); - } - } - - { - JSectFooter f(bb.buf(), bb.len()); - bb.appendStruct(f); - } - - { - assert( 0xffffe000 == (~(Alignment-1)) ); - unsigned L = (bb.len() + Alignment-1) & (~(Alignment-1)); // fill to alignment - dassert( L >= (unsigned) bb.len() ); - *((unsigned*)bb.atOfs(lenOfs)) = L; - unsigned padding = L - bb.len(); - bb.skip(padding); - dassert( bb.len() % Alignment == 0 ); - } - - return; - } - /** write the buffer we have built to the journal and fsync it. outside of lock as that could be slow. */ @@ -326,9 +280,23 @@ namespace mongo { unsigned low = 0xffffffff; unsigned high = 0; + log() << "DurParanoid mismatch in " << mmf->filename() << endl; + int logged = 0; + unsigned lastMismatch = 0xffffffff; for( unsigned i = 0; i < mmf->length(); i++ ) { if( p[i] != w[i] ) { - log() << i << '\t' << (int) p[i] << '\t' << (int) w[i] << endl; + if( lastMismatch != 0xffffffff && lastMismatch+1 != i ) + log() << endl; // separate blocks of mismatches + lastMismatch= i; + if( ++logged < 60 ) { + stringstream ss; + ss << "mismatch ofs:" << hex << i << "\tfilemap:" << setw(2) << (unsigned) w[i] << "\tprivmap:" << setw(2) << (unsigned) p[i]; + if( p[i] > 32 && p[i] <= 126 ) + ss << '\t' << p[i]; + log() << ss.str() << endl; + } + if( logged == 60 ) + log() << "..." << endl; if( i < low ) low = i; if( i > high ) high = i; } @@ -337,9 +305,9 @@ namespace mongo { std::stringstream ss; ss << "dur error warning views mismatch " << mmf->filename() << ' ' << (hex) << low << ".." << high << " len:" << high-low+1; log() << ss.str() << endl; - log() << "priv loc: " << (void*)(p+low) << endl; - vector<WriteIntent>& w = commitJob.writes(); - (void)w; // mark as unused. Useful for inspection in debugger + log() << "priv loc: " << (void*)(p+low) << ' ' << stats.curr._objCopies << endl; + vector<BasicWriteOp>& b = commitJob.basicWrites(); + (void)b; // mark as unused. Useful for inspection in debugger massert(13599, "Written data does not match in-memory view. Missing WriteIntent?", false); } @@ -412,6 +380,9 @@ namespace mongo { stats.curr._commits++; dbMutex.assertAtLeastReadLocked(); + if( dbMutex.isWriteLocked() ) { + stats.curr._commitsInWriteLock++; + } if( !commitJob.hasWritten() ) return; @@ -514,6 +485,7 @@ namespace mongo { void unlinkThread(); void recover(); + void releasingWriteLock() { try { #if defined(_DEBUG) @@ -548,7 +520,5 @@ namespace mongo { } } // namespace dur - - + } // namespace mongo - @@ -28,6 +28,21 @@ namespace mongo { /** Declare a database is about to be dropped */ virtual void droppingDb(string db) = 0; + /** this function appends a nested bsonobj, with the fieldname, "o", to a partially built + bson object. it then terminates the bson object with EOO. The caller is responsible + for setting the outer bson object's total size correctly. + + It also journals the operation. an optimization occurs in the journaling. + if the src was just journaled, a new copy is not written, rather, + JObjAppend is added to the journal which is small. + + If the src was not recently journaled, that is ok: in that case the function returns false + and you must do your traditional behavior. + + This is used to make writing to the replication oplog efficient. + */ + virtual bool objAppend(void *dst, const void *src, unsigned len) = 0; + /** Declarations of write intent. Use these methods to declare "i'm about to write to x and it should be logged for redo." @@ -143,6 +158,7 @@ namespace mongo { void declareWriteIntent(void *, unsigned) { } void createdFile(string filename, unsigned long long len) { } void droppingDb(string db) { } + bool objAppend(void *dst, const void *src, unsigned len) { return false; } bool awaitCommit() { return false; } bool commitNow() { return false; } #if defined(_DEBUG) @@ -157,6 +173,7 @@ namespace mongo { void declareWriteIntent(void *, unsigned); void createdFile(string filename, unsigned long long len); void droppingDb(string db); + bool objAppend(void *dst, const void *src, unsigned len); bool awaitCommit(); bool commitNow(); #if defined(_DEBUG) diff --git a/db/dur_commitjob.cpp b/db/dur_commitjob.cpp index 049ee41c3fa..ba74c66272d 100644 --- a/db/dur_commitjob.cpp +++ b/db/dur_commitjob.cpp @@ -24,7 +24,7 @@ namespace mongo { void Writes::clear() { _alreadyNoted.clear(); - _writes.clear(); + _basicWrites.clear(); _ops.clear(); } diff --git a/db/dur_commitjob.h b/db/dur_commitjob.h index e265f7121b4..ceca4a0da07 100644 --- a/db/dur_commitjob.h +++ b/db/dur_commitjob.h @@ -29,13 +29,31 @@ namespace mongo { namespace dur { - /** declaration of an intent to write to a region of a memory mapped view */ - struct WriteIntent /* copyable */ { - WriteIntent() : w_ptr(0), p(0) { } - WriteIntent(void *a, unsigned b) : w_ptr(0), p(a), len(b) { } - void *w_ptr; // p is mapped from private to equivalent location in the writable mmap - void *p; // intent to write at p - unsigned len; // up to this len + /** "I intend to write at p for up to len bytes" */ + struct WriteIntent { + void *p; + unsigned len; + }; + + /** declaration of an intent to write to a region of a memory mapped view + this could be either a JEntry or a JObjAppend in the journal + */ + struct BasicWriteOp /* copyable */ { + void *dst; + void *src; + unsigned len() const { return _len & 0x7fffffff; } + bool isObjAppend() const { return _len & 0x80000000; } + void set(void *Src, unsigned Len) { + dst = 0; + src = Src; + _len = Len; + } + void setObjAppend(void *Dst, void *Src, unsigned Len) { + dst = Dst; src = Src; + _len = Len | 0x80000000; + } + private: + unsigned _len; }; /** try to remember things we have already marked for journalling. false negatives are ok if infrequent - @@ -64,6 +82,15 @@ namespace mongo { nd = w; return false; // a new set } + + /** + @return true if already indicated. + */ + bool check(const WriteIntent& w) { + unsigned x = mongoutils::hashPointer(w.p); + WriteIntent& nd = nodes[x % N]; + return nd.p == w.p && nd.len >= w.len; + } private: enum { N = Prime }; // this should be small the idea is that it fits in the cpu cache easily WriteIntent nodes[N]; @@ -73,8 +100,9 @@ namespace mongo { class Writes : boost::noncopyable { public: Already<127> _alreadyNoted; - vector<WriteIntent> _writes; + vector<BasicWriteOp> _basicWrites; vector< shared_ptr<DurOp> > _ops; // all the ops other than basic writes + /** reset the Writes structure (empties all the above) */ void clear(); }; @@ -97,7 +125,10 @@ namespace mongo { /** note an operation other than a "basic write" */ void noteOp(shared_ptr<DurOp> p); - vector<WriteIntent>& writes() { return _wi._writes; } + /** @return true if was already noted. false negatives possible (to be fast). */ + bool alreadyNoted(WriteIntent& w) { return _wi._alreadyNoted.check(w); } + + vector<BasicWriteOp>& basicWrites() { return _wi._basicWrites; } vector< shared_ptr<DurOp> >& ops() { return _wi._ops; } /** this method is safe to call outside of locks. when haswritten is false we don't do any group commit and avoid even @@ -117,13 +148,14 @@ namespace mongo { _notify.wait(); } + /** we check how much written and if it is getting to be a lot, we commit sooner. */ size_t bytes() const { return _bytes; } private: bool _hasWritten; Writes _wi; - NotifyAll _notify; size_t _bytes; + NotifyAll _notify; // for getlasterror fsync:true acknowledgements }; extern CommitJob commitJob; @@ -142,7 +174,10 @@ namespace mongo { dassert( cmdLine.dur ); if( !_wi._alreadyNoted.checkAndSet(w) ) { if( !_hasWritten ) { + // you can't be writing if one of these is pending, so this is a verification. assert( !dbMutex._remapPrivateViewRequested ); + + // we don't bother doing a group commit when nothing is written, so we have a var to track that _hasWritten = true; } @@ -164,6 +199,10 @@ namespace mongo { else { log() << "DEBUG note write intent " << w.p << ' ' << w.len << " NOT FOUND IN privateViews" << endl; } + /*if( w.len > 48 ) { + log() << "big TEMP" << endl; + log() << hexdump((char*) w.p, 48) << endl; + }*/ } else if( n == 10000 ) { log() << "DEBUG stopping write intent logging, too much to log" << endl; @@ -172,10 +211,11 @@ namespace mongo { #endif // remember intent. we will journal it in a bit - _wi._writes.push_back(w); - _bytes += w.len; - wassert( _wi._writes.size() < 2000000 ); - //assert( _wi._writes.size() < 20000000 ); + BasicWriteOp b; + b.set(w.p, w.len); + _wi._basicWrites.push_back(b); + wassert( _wi._basicWrites.size() < 2000000 ); + assert( _wi._basicWrites.size() < 20000000 ); } } } diff --git a/db/dur_journal.cpp b/db/dur_journal.cpp index f8ac1ff41d9..d7af952b0a4 100644 --- a/db/dur_journal.cpp +++ b/db/dur_journal.cpp @@ -32,8 +32,9 @@ #include "../util/mongoutils/str.h" #include "../util/concurrency/mvar.h" +using namespace mongoutils; + namespace mongo { - using namespace mongoutils; class AlignedBuilder; diff --git a/db/dur_journalformat.h b/db/dur_journalformat.h index 6f6d6d2f5d3..7c8473e830f 100644 --- a/db/dur_journalformat.h +++ b/db/dur_journalformat.h @@ -70,27 +70,58 @@ namespace mongo { OpCode_DbContext = 0xfffffffe, OpCode_FileCreated = 0xfffffffd, OpCode_DropDb = 0xfffffffc, + OpCode_ObjAppend = 0xfffffffb, OpCode_Min = 0xfffff000 }; - union { - unsigned len; + unsigned len; // length in bytes of the data of the JEntry. does not include the JEntry header OpCodes opcode; }; + unsigned ofs; // offset in file - enum { + // sentinel and masks for _fileNo + enum { DotNsSuffix = 0x7fffffff, // ".ns" file LocalDbBit = 0x80000000 // assuming "local" db instead of using the JDbContext }; int _fileNo; // high bit is set to indicate it should be the <dbpath>/local database // char data[] follows + const char * srcData() const { + const int *i = &_fileNo; + return (const char *) (i+1); + } + int getFileNo() const { return _fileNo & (~LocalDbBit); } void setFileNo(int f) { _fileNo = f; } + bool isNsSuffix() const { return getFileNo() == DotNsSuffix; } + void setLocalDbContextBit() { _fileNo |= LocalDbBit; } bool isLocalDbContext() const { return _fileNo & LocalDbBit; } void clearLocalDbContextBit() { _fileNo = getFileNo(); } + + static string suffix(int fileno) { + if( fileno == DotNsSuffix ) return "ns"; + stringstream ss; + ss << fileno; + return ss.str(); + } + }; + + /** append an object to a partial BSON buffer under construction. + adds + o : <data>, EOO + len is the length of the data + */ + struct JObjAppend { + JObjAppend() : opcode(JEntry::OpCode_ObjAppend) { } + unsigned opcode; + int dstFileNo; // destination in the local database. + unsigned dstOfs; + int srcFileNo; // source in the database of current context (JDbContext) + unsigned srcOfs; + unsigned len; }; /** group commit section footer. md5 is a key field. */ diff --git a/db/dur_preplogbuffer.cpp b/db/dur_preplogbuffer.cpp new file mode 100644 index 00000000000..dac7d62cf8f --- /dev/null +++ b/db/dur_preplogbuffer.cpp @@ -0,0 +1,175 @@ +// @file dur_preplogbuffer.cpp + +/** +* Copyright (C) 2009 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* + PREPLOGBUFFER + we will build an output buffer ourself and then use O_DIRECT + we could be in read lock for this + for very large objects write directly to redo log in situ? + @see https://docs.google.com/drawings/edit?id=1TklsmZzm7ohIZkwgeK6rMvsdaR13KjtJYMsfLr175Zc +*/ + +#include "pch.h" +#include "cmdline.h" +#include "dur.h" +#include "dur_journal.h" +#include "dur_commitjob.h" +#include "../util/mongoutils/hash.h" +#include "../util/mongoutils/str.h" +#include "dur_stats.h" + +using namespace mongoutils; + +namespace mongo { + namespace dur { + + RelativePath local = RelativePath::fromRelativePath("local"); + + MongoMMF* findMMF(void *ptr, size_t &ofs) { + MongoMMF *f = privateViews._find(ptr, ofs); + if( f == 0 ) { + string s = str::stream() << "view pointer cannot be resolved " << (size_t) ptr; + journalingFailure(s.c_str()); // asserts + } + return f; + } + + /** we will build an output buffer ourself and then use O_DIRECT + we could be in read lock for this + caller handles locking + */ + void PREPLOGBUFFER() { + assert( cmdLine.dur ); + AlignedBuilder& bb = commitJob._ab; + bb.reset(); + + unsigned lenOfs; // we will need to backfill the length when prep is wrapping up + + // JSectHeader + { + bb.appendStr("\nHH\n", false); + lenOfs = bb.skip(4); + } + + // ops other than basic writes (DurOp's) + { + for( vector< shared_ptr<DurOp> >::iterator i = commitJob.ops().begin(); i != commitJob.ops().end(); ++i ) { + (*i)->serialize(bb); + } + } + + // basic write ops / write intents. note there is no particular order to these : if we have + // two writes to the same location during the group commit interval, it is likely + // (although not assured) that it is journaled here once. + // + // "objAppend" operations are herein too + // + { + scoped_lock lk(privateViews._mutex()); + + // each time events switch to a different database we journal a JDbContext + RelativePath lastDbPath; + + for( vector<BasicWriteOp>::iterator i = commitJob.basicWrites().begin(); i != commitJob.basicWrites().end(); i++ ) { + size_t ofs = 1; + MongoMMF *mmf = findMMF(i->src, /*out*/ofs); + + if( i->dst ) { + // objAppend operation + + if( mmf->relativePath() != lastDbPath ) { + lastDbPath = mmf->relativePath(); + dassert( lastDbPath != local ); // objAppend is not used for the local database + JDbContext c; + bb.appendStruct(c); + bb.appendStr(lastDbPath.toString()); + } + + size_t dstofs; + MongoMMF *dstmmf = findMMF(i->dst, dstofs); + if( !dstmmf->willNeedRemap() ) { + dstmmf->willNeedRemap() = true; + } + + // since we have already looked up the mmf, we go ahead and remember the write view location + // so we don't have to find the MongoMMF again later in WRITETODATAFILES() + i->dst = ((char*)dstmmf->view_write()) + dstofs; + + JObjAppend d; + d.dstFileNo = dstmmf->fileSuffixNo(); + d.dstOfs = (unsigned) dstofs; + d.srcFileNo = mmf->fileSuffixNo(); + d.srcOfs = ofs; + d.len = i->len(); + bb.appendStruct(d); + ++stats.curr._objCopies; + } + else { + if( !mmf->willNeedRemap() ) { + // tag this mmf as needed a remap of its private view later. + // usually it will already be dirty/already set, so we do the if above first + // to avoid possibility of cpu cache line contention + mmf->willNeedRemap() = true; + } + + // since we have already looked up the mmf, we go ahead and remember the write view location + // so we don't have to find the MongoMMF again later in WRITETODATAFILES() + dassert( i->dst == 0 ); + i->dst = ((char*)mmf->view_write()) + ofs; + + JEntry e; + e.len = i->len(); + assert( ofs <= 0x80000000 ); + e.ofs = (unsigned) ofs; + e.setFileNo( mmf->fileSuffixNo() ); + if( mmf->relativePath() == local ) { + e.setLocalDbContextBit(); + } + else if( mmf->relativePath() != lastDbPath ) { + lastDbPath = mmf->relativePath(); + JDbContext c; + bb.appendStruct(c); + bb.appendStr(lastDbPath.toString()); + } + bb.appendStruct(e); + bb.appendBuf(i->src, i->len()); + } + } + } + + { + JSectFooter f(bb.buf(), bb.len()); + bb.appendStruct(f); + } + + { + // pad to alignment, and set the total section length in the JSectHeader + assert( 0xffffe000 == (~(Alignment-1)) ); + unsigned L = (bb.len() + Alignment-1) & (~(Alignment-1)); + dassert( L >= (unsigned) bb.len() ); + *((unsigned*)bb.atOfs(lenOfs)) = L; + unsigned padding = L - bb.len(); + bb.skip(padding); + dassert( bb.len() % Alignment == 0 ); + } + + return; + } + + } +}
\ No newline at end of file diff --git a/db/dur_recover.cpp b/db/dur_recover.cpp index 586fa6f7a39..9d55974a2d1 100644 --- a/db/dur_recover.cpp +++ b/db/dur_recover.cpp @@ -37,23 +37,19 @@ namespace mongo { namespace dur { - /** todo clean up : this is messy from refactoring. */ - struct FullyQualifiedJournalEntry { - bool isBasicWrite() const { return dbName != 0; } + struct ParsedJournalEntry /*copyable*/ { + ParsedJournalEntry() : e(0), d(0) { } - // relative path of database for the operation. - // might be a pointer into mmaped Journal file - const char *dbName; + // relative path of database for the operation. + // might be a pointer into mmaped Journal file + const char *dbName; + // thse are pointers into the memory mapped journal file + const JEntry *e; // local db sentinel is already parsed out here into dbName + const JObjAppend *d; - // local db sentinel is already parsed out here into dbName - JEntry e; - - // pointer into mmaped Journal file - const char *srcData; - - // if not a simple JEntry, the operation - shared_ptr<DurOp> op; + // if not one of the two simple JEntry's above, this is the operation: + shared_ptr<DurOp> op; }; void removeJournalFiles(); @@ -105,7 +101,9 @@ namespace mongo { * @return true if got an entry. false at successful end of section (and no entry returned). * throws on premature end of section. */ - bool next(FullyQualifiedJournalEntry& e) { + bool next(ParsedJournalEntry& e) { + assert( e.e == 0 ); + if( !_sectHead ) { _sectHead = static_cast<const JSectHeader*>(_br.pos()); _br.skip(sizeof(JSectHeader)); @@ -113,36 +111,36 @@ namespace mongo { unsigned lenOrOpCode; _br.read(lenOrOpCode); - if( lenOrOpCode >= JEntry::OpCode_Min ) { - // not a "basic write" - if( lenOrOpCode == JEntry::OpCode_Footer ) { + switch( lenOrOpCode ) { + + case JEntry::OpCode_Footer: + { const char* pos = (const char*) _br.pos(); pos -= sizeof(lenOrOpCode); // rewind to include OpCode const JSectFooter& footer = *(const JSectFooter*)pos; - int len = pos - (char*)_sectHead; if (!footer.checkHash(_sectHead, len)){ massert(13594, str::stream() << "Journal checksum doesn't match. recorded: " - << toHex(footer.hash, sizeof(footer.hash)) - << " actual: " << md5simpledigest(_sectHead, len) - , false); + << toHex(footer.hash, sizeof(footer.hash)) + << " actual: " << md5simpledigest(_sectHead, len) + , false); } - _br.skip(sizeof(JSectFooter) - 4); _br.align(Alignment); - _sectHead = NULL; - return false; + return false; // false return value denotes of section } - if( lenOrOpCode != JEntry::OpCode_DbContext ) { + case JEntry::OpCode_FileCreated: + case JEntry::OpCode_DropDb: + { e.dbName = 0; e.op = DurOp::read(lenOrOpCode, _br); return true; } - // JDbContext + case JEntry::OpCode_DbContext: { _lastDbName = (const char*) _br.pos(); const unsigned limit = std::min((unsigned)Namespace::MaxNsLen, _br.remaining()); @@ -151,19 +149,28 @@ namespace mongo { _br.skip(len+1); // skip '\0' too _br.read(lenOrOpCode); } + // fall through as a basic operation always follows jdbcontext, and we don't have anything to return yet + + case JEntry::OpCode_ObjAppend: + default: + // fall through + ; } - // JEntry - a basic write - assert( lenOrOpCode && lenOrOpCode < JEntry::OpCode_Min ); - e.dbName = _lastDbName; - e.e.len = lenOrOpCode; - _br.read(e.e.ofs); - _br.read(e.e._fileNo); - if( e.e.isLocalDbContext() ) { - e.dbName = "local"; - e.e.clearLocalDbContextBit(); + { + assert( lenOrOpCode && lenOrOpCode <= JEntry::OpCode_ObjAppend ); + _br.rewind(4); + if( lenOrOpCode == JEntry::OpCode_ObjAppend ) { + e.d = (JObjAppend *) _br.skip(sizeof(JObjAppend)); + e.dbName = _lastDbName; + } + else { + e.e = (JEntry *) _br.skip(sizeof(JEntry)); + e.dbName = e.e->isLocalDbContext() ? "local" : _lastDbName; + dassert( e.e->len == lenOrOpCode ); + _br.skip(e.e->len); + } } - e.srcData = (const char *) _br.skip(lenOrOpCode); return true; } private: @@ -179,7 +186,8 @@ namespace mongo { void go(vector<path>& files); ~RecoveryJob(); private: - void applyEntries(const vector<FullyQualifiedJournalEntry> &entries); + void applyEntry(const ParsedJournalEntry& entry, bool apply, bool dump); + void applyEntries(const vector<ParsedJournalEntry> &entries); bool processBuffer(void *, unsigned len); bool processFile(path journalfile); void close(); @@ -255,67 +263,98 @@ namespace mongo { _fileToPtr.clear(); } - void RecoveryJob::applyEntries(const vector<FullyQualifiedJournalEntry> &entries) { + void RecoveryJob::applyEntry(const ParsedJournalEntry& entry, bool apply, bool dump) { + if( entry.e ) { + if( dump ) { + stringstream ss; + ss << " BASICWRITE " << setw(20) << entry.dbName << '.'; + if( entry.e->isNsSuffix() ) + ss << "ns"; + else + ss << setw(2) << entry.e->getFileNo(); + ss << ' ' << setw(6) << entry.e->len << ' ' << /*hex << setw(8) << (size_t) fqe.srcData << dec <<*/ + " " << hexdump(entry.e->srcData(), entry.e->len); + log() << ss.str() << endl; + } + if( apply ) { + void *p = ptr(entry.dbName, entry.e->getFileNo(), entry.e->ofs); + memcpy(p, entry.e->srcData(), entry.e->len); + } + } + else if( entry.d ) { + // OpCode_ObjAppend (struct JObjAppend) + if( dump ) { + stringstream ss; + ss << " JObjAppend dst: local." << JEntry::suffix(entry.d->dstFileNo) << " ofs:" << entry.d->dstOfs; + ss << " src:" << entry.dbName << '.' << JEntry::suffix(entry.d->srcFileNo) << " ofs:" << entry.d->srcOfs; + ss << " len:" << entry.d->len; + log() << ss.str() << endl; + } + if( apply ) { + void *dst = ptr("local", entry.d->dstFileNo, entry.d->dstOfs); + void *src = ptr(entry.dbName, entry.d->srcFileNo, entry.d->srcOfs); + memcpy(dst, src, entry.d->len); + char *p = (char *) dst; + p[-3] = (char) Object; // { ..., o: <copiedobj>, ..., EOO} + p[-2] = 'o'; + p[-1] = 0; + p[entry.d->len] = EOO; + } + } + else { + // a DurOp subclass operation + if( dump ) { + log() << " OP " << entry.op->toString() << endl; + } + if( apply ) { + if( entry.op->needFilesClosed() ) { + close(); + } + entry.op->replay(); + } + } + } + + void RecoveryJob::applyEntries(const vector<ParsedJournalEntry> &entries) { bool apply = (cmdLine.durOptions & CmdLine::DurScanOnly) == 0; bool dump = cmdLine.durOptions & CmdLine::DurDumpJournal; if( dump ) log() << "BEGIN section" << endl; - for( vector<FullyQualifiedJournalEntry>::const_iterator i = entries.begin(); i != entries.end(); ++i ) { - const FullyQualifiedJournalEntry& fqe = *i; - if( fqe.isBasicWrite() ) { - if( dump ) { - stringstream ss; - ss << " BASICWRITE " << setw(20) << fqe.dbName << '.'; - if( fqe.e._fileNo == JEntry::DotNsSuffix ) - ss << "ns"; - else - ss << setw(2) << fqe.e._fileNo; - ss << ' ' << setw(6) << fqe.e.len << ' ' << hex << setw(8) << (size_t) fqe.srcData << dec << " " << hexdump(fqe.srcData, fqe.e.len); - log() << ss.str() << endl; - } - if( apply ) { - void *p = ptr(fqe.dbName, fqe.e._fileNo, fqe.e.ofs); - memcpy(p, fqe.srcData, fqe.e.len); - } - } else { - if( dump ) { - log() << " OP " << fqe.op->toString() << endl; - } - if( apply ) { - if( fqe.op->needFilesClosed() ) { - close(); - } - fqe.op->replay(); - } - } + for( vector<ParsedJournalEntry>::const_iterator i = entries.begin(); i != entries.end(); ++i ) { + applyEntry(*i, apply, dump); } if( dump ) log() << "END section" << endl; } - /** @param p start of the memory mapped file + /** apply a specific journal file, that is already mmap'd + @param p start of the memory mapped file @return true if this is detected to be the last file (ends abruptly) */ bool RecoveryJob::processBuffer(void *p, unsigned len) { JournalIterator i(p, len); - vector<FullyQualifiedJournalEntry> entries; + vector<ParsedJournalEntry> entries; try { while( 1 ) { entries.clear(); - FullyQualifiedJournalEntry e; - while( i.next(e) ) + while( 1 ) { + ParsedJournalEntry e; + if( !i.next(e) ) + break; entries.push_back(e); + } // got all the entries for one group commit. apply them: applyEntries(entries); - // now do the next section (i.e. group commit) if( i.atEof() ) break; + + // loop back and do the new group commit section } } catch( BufReader::eof& ) { @@ -327,6 +366,7 @@ namespace mongo { return false; // non-abrupt end } + /** apply a specific journal file */ bool RecoveryJob::processFile(path journalfile) { log() << "recover " << journalfile.string() << endl; MemoryMappedFile f; @@ -335,6 +375,7 @@ namespace mongo { return processBuffer(p, (unsigned) f.length()); } + /** @param files all the j._0 style files we need to apply for recovery */ void RecoveryJob::go(vector<path>& files) { log() << "recover begin" << endl; @@ -392,7 +433,7 @@ namespace mongo { BufReader r((void*) "abcdabcdabcd", 12); char x; BufReaderY y; - r.read(x); cout << x; // a + r.read(x); //cout << x; // a assert( x == 'a' ); r.read(y); r.read(x); diff --git a/db/dur_stats.h b/db/dur_stats.h index f9fe3f7eb7b..cd76efb01a9 100644 --- a/db/dur_stats.h +++ b/db/dur_stats.h @@ -10,9 +10,16 @@ namespace mongo { Stats();
struct S {
unsigned _commits;
- unsigned _dittos;
+ unsigned _objCopies;
unsigned long long _journaledBytes;
unsigned long long _writeToDataFilesBytes;
+
+ // undesirable to be in write lock for the group commit (it can be done in a read lock), so good if we
+ // have visibility when this happens. can happen for a couple reasons
+ // - read lock starvation
+ // - file being closed
+ // - data being written faster than the normal group commit interval
+ unsigned _commitsInWriteLock;
} curr;
};
extern Stats stats;
diff --git a/db/dur_writetodatafiles.cpp b/db/dur_writetodatafiles.cpp index 21b995c5732..fad947c6470 100644 --- a/db/dur_writetodatafiles.cpp +++ b/db/dur_writetodatafiles.cpp @@ -46,11 +46,18 @@ namespace mongo { */ void WRITETODATAFILES() { /* we go backwards as what is at the end is most likely in the cpu cache. it won't be much, but we'll take it. */ - for( int i = commitJob.writes().size() - 1; i >= 0; i-- ) { - const WriteIntent& intent = commitJob.writes()[i]; - char *dst = (char *) (intent.w_ptr); - memcpy(dst, intent.p, intent.len); - stats.curr._writeToDataFilesBytes += intent.len; + for( int i = commitJob.basicWrites().size() - 1; i >= 0; i-- ) { + const BasicWriteOp& b = commitJob.basicWrites()[i]; + stats.curr._writeToDataFilesBytes += b.len(); + dassert(b.dst); + memcpy(b.dst, b.src, b.len()); + if( b.isObjAppend() ) { + char *p = static_cast<char*>(b.dst); + p[-3] = (char) Object; // { ..., o: <copiedobj>, ..., EOO} + p[-2] = 'o'; + p[-1] = 0; + p[b.len()] = EOO; + } } debugValidateMapsMatch(); diff --git a/db/oplog.cpp b/db/oplog.cpp index 59dd8f01ade..b7dc8e5949d 100644 --- a/db/oplog.cpp +++ b/db/oplog.cpp @@ -217,9 +217,10 @@ namespace mongo { b.appendBool("b", *bb); if ( o2 ) b.append("o2", *o2); - BSONObj partial = b.done(); - int posz = partial.objsize(); - int len = posz + obj.objsize() + 1 + 2 /*o:*/; + BSONObj partial = b.done(); // partial is everything except the o:... part. + + int po_sz = partial.objsize(); + int len = po_sz + obj.objsize() + 1 + 2 /*o:*/; Record *r; if( logNS == 0 ) { @@ -236,21 +237,37 @@ namespace mongo { } else { Client::Context ctx( logNS, dbpath, 0, false ); assert( nsdetails( logNS ) ); + // first we allocate the space, then we fill it below. r = theDataFileMgr.fast_oplog_insert( nsdetails( logNS ), logNS, len); } { - const int size2 = obj.objsize() + 1 + 2; - char *p = (char *) getDur().writingPtr(r->data, size2+posz); - memcpy(p, partial.objdata(), posz); - *((unsigned *)p) += size2; - p += posz - 1; - *p++ = (char) Object; - *p++ = 'o'; // { o : ... } - *p++ = 0; - memcpy(p, obj.objdata(), obj.objsize()); - p += obj.objsize(); - *p = EOO; + const int size1 = po_sz-1; // less the EOO char + const int objOfs = size1+3; // 3 = byte BSONOBJTYPE + byte 'o' + byte \0 + char *objInsertPoint = r->data + objOfs; + + // don't bother if obj is tiny as there is some header overhead for the additional journal entry + // and also some computational administrative overhead. + bool objAppendWorked = obj.objsize() >= 32 && + getDur().objAppend(objInsertPoint, obj.objdata(), obj.objsize()); + + void *p = (char *) getDur().writingPtr(r->data, objAppendWorked ? size1 : objOfs+obj.objsize()+1); + + memcpy(p, partial.objdata(), size1); + + // adjust overall bson object size for the o: field + *(static_cast<unsigned*>(p)) += obj.objsize() + 1/*fieldtype byte*/ + 2/*"o" fieldname*/; + + if( !objAppendWorked ) { + char *b = static_cast<char *>(p); + b += size1; + *b++ = (char) Object; + *b++ = 'o'; // { o : ... } + *b++ = 0; // null terminate "o" fieldname + memcpy(b, obj.objdata(), obj.objsize()); + b += obj.objsize(); + *b = EOO; + } } context.getClient()->setLastOp( ts.asDate() ); diff --git a/jstests/dur/oplog.js b/jstests/dur/oplog.js new file mode 100755 index 00000000000..0887826263f --- /dev/null +++ b/jstests/dur/oplog.js @@ -0,0 +1,118 @@ +/* + test durability +*/
+
+var debugging = false;
+if (""+typeof(db) != "undefined")
+ debugging = true; +var testname = "oplog"; +var step = 1; +var conn = null; + +function log(str) { + if(str) + print(testname+" step " + step++ + " " + str); + else + print(testname+" step " + step++); +} + +// if you do inserts here, you will want to set _id. otherwise they won't match on different +// runs so we can't do a binary diff of the resulting files to check they are consistent. +function work() { + log("work"); + var d = conn.getDB("test"); + d.foo.insert({ _id: 3, x: 22 }); + d.foo.insert({ _id: 4, x: 22 }); + d.a.insert({ _id: 3, x: 22, y: [1, 2, 3] }); + d.a.insert({ _id: 4, x: 22, y: [1, 2, 3] });
+ d.a.update({ _id: 4 }, { $inc: { x: 1} });
+ // OpCode_ObjCopy fires on larger operations so make one that isn't tiny
+ var big = "axxxxxxxxxxxxxxb";
+ big = big + big;
+ big = big + big;
+ big = big + big;
+ big = big + big;
+ big = big + big;
+ d.foo.insert({ _id: 5, q: "aaaaa", b: big, z: 3 }); + + // assure writes applied in case we kill -9 on return from this function + d.getLastError(); + + log("endwork"); +} + +function verify() { + log("verify");
+ var d = conn.getDB("local"); + assert( d.oplog.$main.find({"o.z":3}).count() == 1, "oplog doesnt match" ); +} + +if( debugging ) {
+ // mongod already running in debugger
+ print("DOING DEBUG MODE BEHAVIOR AS 'db' IS DEFINED -- RUN mongo --nodb FOR REGULAR TEST BEHAVIOR"); + conn = db.getMongo(); + work(); + sleep(30000); + quit(); +} + +log(); + +// directories +var path1 = "/data/db/" + testname+"nodur"; +var path2 = "/data/db/" + testname+"dur"; + +// non-durable version +log(); +conn = startMongodEmpty("--port", 30000, "--dbpath", path1, "--nodur", "--smallfiles", "--master"); +work(); +stopMongod(30000); + +// durable version +log();
+conn = startMongodEmpty("--port", 30001, "--dbpath", path2, "--dur", "--smallfiles", "--durOptions", /*DurParanoid*/8, "--master"); +work(); + +// wait for group commit. use getLastError(...) later when that is enhanced. +sleep(400); + +// kill the process hard +stopMongod(30001, /*signal*/9); + +// journal file should be present, and non-empty as we killed hard + +// restart and recover +log(); +conn = startMongodNoReset("--port", 30002, "--dbpath", path2, "--dur", "--smallfiles", "--durOptions", 8, "--master"); +verify(); + +log("stop"); +stopMongod(30002); + +// stopMongod seems to be asynchronous (hmmm) so we sleep here. +sleep(5000); + +// at this point, after clean shutdown, there should be no journal files +log("check no journal files"); +assert(ls(path2 + "/journal") == null); + +log("check data matches ns"); +var diff = run("diff", path1 + "/test.ns", path2 + "/test.ns"); +if (diff != "") { + print("\n\n\nDIFFERS\n"); + print(diff); +} +assert(diff == "", "error test.ns files differ"); + +log("check data matches .0"); +diff = run("diff", path1 + "/test.0", path2 + "/test.0"); +if (diff != "") { + print("\n\n\nDIFFERS\n"); + print(diff); +} +assert(diff == "", "error test.0 files differ"); + +log("check data matches done"); + +print(testname + " SUCCESS"); + |