diff options
Diffstat (limited to 'src/mongo')
10 files changed, 401 insertions, 433 deletions
diff --git a/src/mongo/db/storage/mmap_v1/btree/btree_logic_test.cpp b/src/mongo/db/storage/mmap_v1/btree/btree_logic_test.cpp index 7499c178d46..a84b135091b 100644 --- a/src/mongo/db/storage/mmap_v1/btree/btree_logic_test.cpp +++ b/src/mongo/db/storage/mmap_v1/btree/btree_logic_test.cpp @@ -2207,8 +2207,8 @@ namespace mongo { // too much work to try to make this happen through inserts and deletes // we are intentionally manipulating the btree bucket directly here BtreeBucket::Loc* L = const_cast< BtreeBucket::Loc* >( &bt()->keyNode( 1 ).prevChildBucket ); - getDur().writing(L)->Null(); - getDur().writingInt( const_cast< BtreeBucket::Loc& >( bt()->keyNode( 1 ).recordLoc ).GETOFS() ) |= 1; // make unused + writing(L)->Null(); + writingInt( const_cast< BtreeBucket::Loc& >( bt()->keyNode( 1 ).recordLoc ).GETOFS() ) |= 1; // make unused BSONObj k = BSON( "a" << toInsert ); Base::insert( k ); } diff --git a/src/mongo/db/storage/mmap_v1/dur.cpp b/src/mongo/db/storage/mmap_v1/dur.cpp index 02990c0fffe..099ff151829 100644 --- a/src/mongo/db/storage/mmap_v1/dur.cpp +++ b/src/mongo/db/storage/mmap_v1/dur.cpp @@ -1,32 +1,30 @@ -// @file dur.cpp durability in the storage engine (crash-safeness / journaling) - /** -* Copyright (C) 2009 10gen Inc. -* -* This program is free software: you can redistribute it and/or modify -* it under the terms of the GNU Affero General Public License, version 3, -* as published by the Free Software Foundation. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU Affero General Public License for more details. -* -* You should have received a copy of the GNU Affero General Public License -* along with this program. If not, see <http://www.gnu.org/licenses/>. -* -* As a special exception, the copyright holders give permission to link the -* code of portions of this program with the OpenSSL library under certain -* conditions as described in each individual source file and distribute -* linked combinations including the program with the OpenSSL library. You -* must comply with the GNU Affero General Public License in all respects for -* all of the code used other than as permitted herein. If you modify file(s) -* with this exception, you may extend this exception to your version of the -* file(s), but you are not obligated to do so. If you do not wish to do so, -* delete this exception statement from your version. If you delete this -* exception statement from all source files in the program, then also delete -* it in the license file. -*/ + * Copyright (C) 2009-2014 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ /* phases: @@ -73,58 +71,73 @@ #include "mongo/platform/basic.h" +#include "mongo/db/storage/mmap_v1/dur.h" + #include <boost/thread/condition_variable.hpp> #include <boost/thread/mutex.hpp> #include <boost/thread/thread.hpp> #include <iomanip> #include "mongo/db/client.h" -#include "mongo/db/commands/fsync.h" #include "mongo/db/commands/server_status.h" #include "mongo/db/concurrency/lock_state.h" #include "mongo/db/operation_context_impl.h" -#include "mongo/db/storage/storage_engine.h" -#include "mongo/db/storage/mmap_v1/dur.h" #include "mongo/db/storage/mmap_v1/dur_commitjob.h" #include "mongo/db/storage/mmap_v1/dur_journal.h" #include "mongo/db/storage/mmap_v1/dur_recover.h" #include "mongo/db/storage/mmap_v1/dur_stats.h" #include "mongo/db/storage/mmap_v1/mmap_v1_options.h" #include "mongo/db/storage_options.h" -#include "mongo/server.h" #include "mongo/util/exit.h" #include "mongo/util/log.h" -#include "mongo/util/mongoutils/str.h" -#include "mongo/util/stacktrace.h" #include "mongo/util/timer.h" -using namespace mongoutils; - namespace mongo { +namespace dur { - namespace dur { +namespace { - void PREPLOGBUFFER(JSectHeader& outParm, AlignedBuilder&); - void WRITETOJOURNAL(JSectHeader h, AlignedBuilder& uncompressed); - void WRITETODATAFILES(const JSectHeader& h, AlignedBuilder& uncompressed); + // Used to activate the flush thread + boost::mutex flushMutex; + boost::condition_variable flushRequested; - /** declared later in this file - only used in this file -- use DurableInterface::commitNow() outside - */ - static void groupCommit(); + // When set, the flush thread will exit + AtomicUInt32 shutdownRequested(0); - // Used to activate the flush thread - static boost::mutex flushMutex; - static boost::condition_variable flushRequested; + // One instance of each durability interface + DurableImpl durableImpl; + NonDurableImpl nonDurableImpl; - // When set, the flush thread will exit - static AtomicUInt32 shutdownRequested(0); + // Pre-allocated buffer for building the journal + AlignedBuilder journalBuilder(4 * 1024 * 1024); +} + // Declared in dur_preplogbuffer.cpp + void PREPLOGBUFFER(JSectHeader& outHeader, AlignedBuilder& outBuffer); + void WRITETOJOURNAL(const JSectHeader& h, const AlignedBuilder& uncompressed); + void WRITETODATAFILES(const JSectHeader& h, const AlignedBuilder& uncompressed); - CommitJob& commitJob = *(new CommitJob()); // don't destroy + // Declared in dur_journal.cpp + boost::filesystem::path getJournalDir(); + void preallocateFiles(); + // Declared and maintained in dur_commitjob.cpp + extern size_t privateMapBytes; + + // Durability activity statistics Stats stats; + // Reference to the write intents tracking object + CommitJob& commitJob = *(new CommitJob()); // don't destroy + + // The durability interface to use + DurableInterface* DurableInterface::_impl = &nonDurableImpl; + + + // + // Stats + // + void Stats::S::reset() { memset(this, 0, sizeof(*this)); } @@ -197,6 +210,28 @@ namespace mongo { } } + + // + // DurableInterface + // + + DurableInterface::DurableInterface() { + + } + + DurableInterface::~DurableInterface() { + + } + + void DurableInterface::enableDurability() { + _impl = &durableImpl; + } + + + // + // NonDurableImpl + // + void* NonDurableImpl::writingPtr(void *x, unsigned len) { dassert(shutdownRequested.load() == 0); return x; @@ -214,16 +249,9 @@ namespace mongo { } - - static DurableImpl* durableImpl = new DurableImpl(); - static NonDurableImpl* nonDurableImpl = new NonDurableImpl(); - DurableInterface* DurableInterface::_impl = nonDurableImpl; - - void DurableInterface::enableDurability() { - verify(_impl == nonDurableImpl); - _impl = durableImpl; - } - + // + // DurableImpl + // bool DurableImpl::commitNow(OperationContext* txn) { NotifyAll::When when = commitJob._notify.now(); @@ -242,11 +270,6 @@ namespace mongo { return true; } - /** Declare that a file has been created - Normally writes are applied only after journaling, for safety. But here the file - is created first, and the journal will just replay the creation if the create didn't - happen because of crashing. - */ void DurableImpl::createdFile(const std::string& filename, unsigned long long len) { shared_ptr<DurOp> op( new FileCreatedOp(filename, len) ); commitJob.noteOp(op); @@ -259,26 +282,6 @@ namespace mongo { return p; } - /** declare intent to write - @param ofs offset within buf at which we will write - @param len the length at ofs we will write - @return new buffer pointer. - */ - void* DurableImpl::writingAtOffset(void *buf, unsigned ofs, unsigned len) { - char *p = (char *) buf; - declareWriteIntent(p+ofs, len); - return p; - } - - void* DurableImpl::writingRangesAtOffsets(void *buf, const vector< pair< long long, unsigned > > &ranges ) { - char *p = (char *) buf; - for( vector< pair< long long, unsigned > >::const_iterator i = ranges.begin(); - i != ranges.end(); ++i ) { - declareWriteIntent( p + i->first, i->second ); - } - return p; - } - bool DurableImpl::commitIfNeeded() { if (MONGO_likely(commitJob.bytes() < UncommittedBytesLimit)) { return false; @@ -289,6 +292,23 @@ namespace mongo { return true; } + void DurableImpl::syncDataAndTruncateJournal(OperationContext* txn) { + invariant(txn->lockState()->isW()); + + // a commit from the commit thread won't begin while we are in the write lock, + // but it may already be in progress and the end of that work is done outside + // (dbMutex) locks. This line waits for that to complete if already underway. + { + SimpleMutex::scoped_lock lk(commitJob.groupCommitMutex); + } + + commitNow(txn); + MongoFile::flushAll(true); + journalCleanup(); + + invariant(!haveJournalFiles()); // Double check post-conditions + } + void DurableImpl::commitAndStopDurThread() { NotifyAll::When when = commitJob._notify.now(); @@ -302,9 +322,9 @@ namespace mongo { // Functor to be called over all MongoFiles - class validateSingleMapMatches { + class ValidateSingleMapMatches { public: - validateSingleMapMatches(unsigned long long& bytes) :_bytes(bytes) {} + ValidateSingleMapMatches(unsigned long long& bytes) :_bytes(bytes) {} void operator () (MongoFile *mf) { if( mf->isDurableMappedFile() ) { DurableMappedFile *mmf = (DurableMappedFile*) mf; @@ -362,6 +382,7 @@ namespace mongo { unsigned long long& _bytes; }; + /** (SLOW) diagnostic to check that the private view and the non-private view are in sync. */ void debugValidateAllMapsMatch() { @@ -370,13 +391,12 @@ namespace mongo { unsigned long long bytes = 0; Timer t; - MongoFile::forEach(validateSingleMapMatches(bytes)); + MongoFile::forEach(ValidateSingleMapMatches(bytes)); OCCASIONALLY log() << "DurParanoid map check " << t.millis() << "ms for " << (bytes / (1024*1024)) << "MB" << endl; } - extern size_t privateMapBytes; - static void _REMAPPRIVATEVIEW() { + static void _remapPrivateView() { // todo: Consider using ProcessInfo herein and watching for getResidentSize to drop. that could be a way // to assure very good behavior here. @@ -385,16 +405,16 @@ namespace mongo { LOG(4) << "journal REMAPPRIVATEVIEW" << endl; - invariant(!commitJob.hasWritten()); - // we want to remap all private views about every 2 seconds. there could be ~1000 views so // we do a little each pass; beyond the remap time, more significantly, there will be copy on write // faults after remapping, so doing a little bit at a time will avoid big load spikes on // remapping. unsigned long long now = curTimeMicros64(); double fraction = (now-lastRemap)/2000000.0; - if (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalAlwaysRemap) + if (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalAlwaysRemap) { fraction = 1; + } + lastRemap = now; #if defined(_WIN32) || defined(__sunos__) @@ -456,57 +476,75 @@ namespace mongo { << ' ' << t.millis() << "ms" << endl; } - /** We need to remap the private views periodically. otherwise they would become very large. - Call within write lock. See top of file for more commentary. - */ - static void REMAPPRIVATEVIEW() { - Timer t; - _REMAPPRIVATEVIEW(); - stats.curr->_remapPrivateViewMicros += t.micros(); - } - // this is a pseudo-local variable in the groupcommit functions - // below. however we don't truly do that so that we don't have to - // reallocate, and more importantly regrow it, on every single commit. - static AlignedBuilder __theBuilder(4 * 1024 * 1024); + static void remapPrivateView() { + // Remapping private views must occur after WRITETODATAFILES otherwise we wouldn't see + // any newly written data on reads. + invariant(!commitJob.hasWritten()); + try { + Timer t; + _remapPrivateView(); + stats.curr->_remapPrivateViewMicros += t.micros(); - static void _groupCommit() { - LOG(4) << "_groupCommit " << endl; + LOG(4) << "remapPrivateView end"; + return; + } + catch (DBException& e) { + severe() << "dbexception in remapPrivateView causing immediate shutdown: " + << e.toString(); + mongoAbort("gc1"); + } + catch (std::ios_base::failure& e) { + severe() << "ios_base exception in remapPrivateView causing immediate shutdown: " + << e.what(); + mongoAbort("gc2"); + } + catch (std::bad_alloc& e) { + severe() << "bad_alloc exception in remapPrivateView causing immediate shutdown: " + << e.what(); + mongoAbort("gc3"); + } + catch (std::exception& e) { + severe() << "exception in remapPrivateView causing immediate shutdown: " + << e.what(); + mongoAbort("gc4"); + } + } - { - AlignedBuilder &ab = __theBuilder; - // we need to make sure two group commits aren't running at the same time - // (and we are only read locked in the dbMutex, so it could happen -- while - // there is only one dur thread, "early commits" can be done by other threads) - SimpleMutex::scoped_lock lk(commitJob.groupCommitMutex); + static void _groupCommit() { + LOG(4) << "_groupCommit "; - commitJob.commitingBegin(); + // we need to make sure two group commits aren't running at the same time + // (and we are only read locked in the dbMutex, so it could happen -- while + // there is only one dur thread, "early commits" can be done by other threads) + SimpleMutex::scoped_lock lk(commitJob.groupCommitMutex); - if( !commitJob.hasWritten() ) { - // getlasterror request could have came after the data was already committed - commitJob.committingNotifyCommitted(); - } - else { - JSectHeader h; - PREPLOGBUFFER(h,ab); + commitJob.commitingBegin(); - // todo : write to the journal outside locks, as this write can be slow. - // however, be careful then about remapprivateview as that cannot be done - // if new writes are then pending in the private maps. - WRITETOJOURNAL(h, ab); + if (!commitJob.hasWritten()) { + // getlasterror request could have came after the data was already committed + commitJob.committingNotifyCommitted(); + } + else { + JSectHeader h; + PREPLOGBUFFER(h, journalBuilder); - // data is now in the journal, which is sufficient for acknowledging getLastError. - // (ok to crash after that) - commitJob.committingNotifyCommitted(); + // todo : write to the journal outside locks, as this write can be slow. + // however, be careful then about remapprivateview as that cannot be done + // if new writes are then pending in the private maps. + WRITETOJOURNAL(h, journalBuilder); - WRITETODATAFILES(h, ab); - debugValidateAllMapsMatch(); + // data is now in the journal, which is sufficient for acknowledging getLastError. + // (ok to crash after that) + commitJob.committingNotifyCommitted(); - commitJob.committingReset(); - ab.reset(); - } + WRITETODATAFILES(h, journalBuilder); + debugValidateAllMapsMatch(); + + commitJob.committingReset(); + journalBuilder.reset(); } } @@ -520,60 +558,30 @@ namespace mongo { static void groupCommit() { try { _groupCommit(); - } - catch(DBException& e ) { - log() << "dbexception in groupCommit causing immediate shutdown: " << e.toString() << endl; - mongoAbort("gc1"); - } - catch(std::ios_base::failure& e) { - log() << "ios_base exception in groupCommit causing immediate shutdown: " << e.what() << endl; - mongoAbort("gc2"); - } - catch(std::bad_alloc& e) { - log() << "bad_alloc exception in groupCommit causing immediate shutdown: " << e.what() << endl; - mongoAbort("gc3"); - } - catch(std::exception& e) { - log() << "exception in groupCommit causing immediate shutdown: " << e.what() << endl; - mongoAbort("gc4"); - } - LOG(4) << "groupCommit end" << endl; - } - static void remapPrivateView() { - try { - // Remapping private views must occur after WRITETODATAFILES otherwise we wouldn't - // see newly written data on reads. - invariant(!commitJob.hasWritten()); - - REMAPPRIVATEVIEW(); + LOG(4) << "groupCommit end"; + return; } catch (DBException& e) { - log() << "dbexception in remapPrivateView causing immediate shutdown: " - << e.toString() - << endl; + severe() << "dbexception in groupCommit causing immediate shutdown: " + << e.toString(); mongoAbort("gc1"); } - catch (std::ios_base::failure& e) { - log() << "ios_base exception in remapPrivateView causing immediate shutdown: " - << e.what() - << endl; + catch(std::ios_base::failure& e) { + severe() << "ios_base exception in groupCommit causing immediate shutdown: " + << e.what(); mongoAbort("gc2"); } - catch (std::bad_alloc& e) { - log() << "bad_alloc exception in remapPrivateView causing immediate shutdown: " - << e.what() - << endl; + catch(std::bad_alloc& e) { + severe() << "bad_alloc exception in groupCommit causing immediate shutdown: " + << e.what(); mongoAbort("gc3"); } - catch (std::exception& e) { - log() << "exception in remapPrivateView causing immediate shutdown: " - << e.what() - << endl; + catch(std::exception& e) { + severe() << "exception in groupCommit causing immediate shutdown: " + << e.what(); mongoAbort("gc4"); } - - LOG(4) << "remapPrivateView end" << endl; } @@ -596,8 +604,6 @@ namespace mongo { } } - extern int groupCommitIntervalMs; - boost::filesystem::path getJournalDir(); static void durThread() { Client::initThread("journal"); @@ -627,16 +633,20 @@ namespace mongo { // commit sooner if one or more getLastError j:true is pending for (unsigned i = 0; i <= 2; i++) { - if (flushRequested.timed_wait(lock, - Milliseconds(oneThird))) { + if (flushRequested.timed_wait(lock, Milliseconds(oneThird))) { // Someone forced a flush break; } - if (commitJob._notify.nWaiting()) + if (commitJob._notify.nWaiting()) { + // There are threads waiting for journaling break; - if (commitJob.bytes() > UncommittedBytesLimit / 10) + } + + if (commitJob.bytes() > UncommittedBytesLimit / 2) { + // The number of written bytes is growing break; + } } OperationContextImpl txn; @@ -651,11 +661,11 @@ namespace mongo { remapPrivateView(); } catch(std::exception& e) { - log() << "exception in durThread causing immediate shutdown: " << e.what() << endl; + severe() << "exception in durThread causing immediate shutdown: " << e.what(); mongoAbort("exception in durThread"); } catch (...) { - log() << "unhandled exception in durThread causing immediate shutdown" << endl; + severe() << "unhandled exception in durThread causing immediate shutdown"; mongoAbort("unhandled exception in durThread"); } } @@ -663,7 +673,6 @@ namespace mongo { cc().shutdown(); } - void preallocateFiles(); /** at startup, recover, and then start the journal threads */ void startup() { @@ -676,15 +685,15 @@ namespace mongo { replayJournalFilesAtStartup(); } catch(DBException& e) { - log() << "dbexception during recovery: " << e.toString() << endl; + severe() << "dbexception during recovery: " << e.toString(); throw; } catch(std::exception& e) { - log() << "std::exception during recovery: " << e.what() << endl; + severe() << "std::exception during recovery: " << e.what(); throw; } catch(...) { - log() << "exception during recovery" << endl; + severe() << "exception during recovery"; throw; } @@ -694,30 +703,11 @@ namespace mongo { boost::thread t(durThread); } - DurableInterface::~DurableInterface() { - log() << "ERROR warning ~DurableInterface not intended to be called" << std::endl; - } - - void DurableImpl::syncDataAndTruncateJournal(OperationContext* txn) { - invariant(txn->lockState()->isW()); - - // a commit from the commit thread won't begin while we are in the write lock, - // but it may already be in progress and the end of that work is done outside - // (dbMutex) locks. This line waits for that to complete if already underway. - { - SimpleMutex::scoped_lock lk(commitJob.groupCommitMutex); - } - - commitNow(txn); - MongoFile::flushAll(true); - journalCleanup(); - - invariant(!haveJournalFiles()); // Double check post-conditions - } class DurSSS : public ServerStatusSection { public: - DurSSS() : ServerStatusSection( "dur" ){} + DurSSS() : ServerStatusSection( "dur" ) { } + virtual bool includeByDefault() const { return true; } BSONObj generateSection(OperationContext* txn, @@ -732,7 +722,5 @@ namespace mongo { } durSSS; - - } // namespace dur - +} // namespace dur } // namespace mongo diff --git a/src/mongo/db/storage/mmap_v1/dur.h b/src/mongo/db/storage/mmap_v1/dur.h index 9b135ff78e9..c296ae4ad9b 100644 --- a/src/mongo/db/storage/mmap_v1/dur.h +++ b/src/mongo/db/storage/mmap_v1/dur.h @@ -1,38 +1,35 @@ -// @file dur.h durability support - /** -* Copyright (C) 2009 10gen Inc. -* -* This program is free software: you can redistribute it and/or modify -* it under the terms of the GNU Affero General Public License, version 3, -* as published by the Free Software Foundation. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU Affero General Public License for more details. -* -* You should have received a copy of the GNU Affero General Public License -* along with this program. If not, see <http://www.gnu.org/licenses/>. -* -* As a special exception, the copyright holders give permission to link the -* code of portions of this program with the OpenSSL library under certain -* conditions as described in each individual source file and distribute -* linked combinations including the program with the OpenSSL library. You -* must comply with the GNU Affero General Public License in all respects for -* all of the code used other than as permitted herein. If you modify file(s) -* with this exception, you may extend this exception to your version of the -* file(s), but you are not obligated to do so. If you do not wish to do so, -* delete this exception statement from your version. If you delete this -* exception statement from all source files in the program, then also delete -* it in the license file. -*/ + * Copyright (C) 2009-2014 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ #pragma once #include "mongo/db/storage/mmap_v1/durable_mapped_file.h" - namespace mongo { class OperationContext; @@ -40,188 +37,167 @@ namespace mongo { void mongoAbort(const char *msg); void abort(); // not defined -- use mongoAbort() instead - namespace dur { - // a smaller limit is likely better on 32 bit - const unsigned UncommittedBytesLimit = (sizeof(void*)==4) ? 50 * 1024 * 1024 : 512 * 1024 * 1024; +namespace dur { + + // a smaller limit is likely better on 32 bit + const unsigned UncommittedBytesLimit = (sizeof(void*) == 4) ? 50 * 1024 * 1024 : 512 * 1024 * 1024; + + /** + * Called during startup so durability module can initialize and start the durability thread. + * Does nothing if storageGlobalParams.dur is false + */ + void startup(); + + + class DurableInterface { + MONGO_DISALLOW_COPYING(DurableInterface); + public: + + DurableInterface(); + virtual ~DurableInterface(); - /** Call during startup so durability module can initialize - Throws if fatal error - Does nothing if storageGlobalParams.dur is false + /** + * Declare that a file has been created. Normally writes are applied only after journaling + * for safety. But here the file is created first, and the journal will just replay the + * creation if the create didn't happen due to a crash. */ - void startup(); + virtual void createdFile(const std::string& filename, unsigned long long len) = 0; + + // Declare write intents. Use these methods to declare "i'm about to write to x and it + // should be logged for redo." + // + // Failure to call writing...() is checked in _DEBUG mode by using a read only mapped view + // (i.e., you'll segfault if the code is covered in that situation). The _DEBUG check + // doesn't verify that your length is correct though. + + /** + * Declare intent to write to x for up to len. + * + * @return pointer where to write. + */ + virtual void* writingPtr(void *x, unsigned len) = 0; + + /** + * Declare write intent after write has been done. + */ + virtual void declareWriteIntent(void *x, unsigned len) = 0; - class DurableInterface : boost::noncopyable { - public: - virtual ~DurableInterface(); + /** + * Allows you to declare many write intents at once more efficiently than repeated calls + * to declareWriteIntent. + */ + virtual void declareWriteIntents( + const std::vector<std::pair<void*, unsigned> >& intents) = 0; - /** Declare that a file has been created - Normally writes are applied only after journaling, for safety. But here the file - is created first, and the journal will just replay the creation if the create didn't - happen because of crashing. + /** Wait for acknowledgement of the next group commit. + @return true if --dur is on. There will be delay. + @return false if --dur is off. */ - virtual void createdFile(const std::string& filename, unsigned long long len) = 0; + virtual bool awaitCommit() = 0; - /** Declarations of write intent. + /** Commit immediately. - Use these methods to declare "i'm about to write to x and it should be logged for redo." + Generally, you do not want to do this often, as highly granular committing may affect + performance. - Failure to call writing...() is checked in _DEBUG mode by using a read only mapped view - (i.e., you'll segfault if the code is covered in that situation). The _DEBUG check doesn't - verify that your length is correct though. - */ + Does not return until the commit is complete. - /** declare intent to write to x for up to len - @return pointer where to write. this is modified when testIntent is true. - */ - virtual void* writingPtr(void *x, unsigned len) = 0; + You must be at least read locked when you call this. Ideally, you are not write locked + and then read operations can occur concurrently. - /** declare write intent; should already be in the write view to work correctly when testIntent is true. - if you aren't, use writingPtr() instead. + Do not use this. Use commitIfNeeded() instead. + + @return true if --dur is on. + @return false if --dur is off. (in which case there is action) */ - virtual void declareWriteIntent(void *x, unsigned len) = 0; - - /** - * Allows you to declare many write intents at once more efficiently than repeated calls - * to declareWriteIntent. - */ - virtual void declareWriteIntents( - const std::vector<std::pair<void*, unsigned> >& intents) = 0; - - /** declare intent to write - @param ofs offset within buf at which we will write - @param len the length at ofs we will write - @return new buffer pointer. this is modified when testIntent is true. + virtual bool commitNow(OperationContext* txn) = 0; + + /** Commit if enough bytes have been modified. Current threshold is 50MB + + The idea is that long running write operations that don't yield + (like creating an index or update with $atomic) can call this + whenever the db is in a sane state and it will prevent commits + from growing too large. + @return true if commited */ - virtual void* writingAtOffset(void *buf, unsigned ofs, unsigned len) = 0; - - /** declare intent to write - @param ranges vector of pairs representing ranges. Each pair - comprises an offset from buf where a range begins, then the - range length. - @return new buffer pointer. this is modified when testIntent is true. - */ - virtual void* writingRangesAtOffsets(void *buf, const std::vector< std::pair< long long, unsigned > > &ranges ) = 0; - - /** Wait for acknowledgement of the next group commit. - @return true if --dur is on. There will be delay. - @return false if --dur is off. + virtual bool commitIfNeeded() = 0; + + /** + * Invoked at clean shutdown time. Performs one last commit/flush and terminates the + * flush thread. + * + * Must be called under the global X lock. */ - virtual bool awaitCommit() = 0; + virtual void commitAndStopDurThread() = 0; + + /** + * Commits pending changes, flushes all changes to main data files, then removes the + * journal. + * + * WARNING: Data *must* be in a crash-recoverable state when this is called and must + * not be inside of a write unit of work. + * + * This is useful as a "barrier" to ensure that writes before this call will never go + * through recovery and be applied to files that have had changes made after this call + * applied. + */ + virtual void syncDataAndTruncateJournal(OperationContext* txn) = 0; + + virtual bool isDurable() const = 0; - /** Commit immediately. + /** + * Declare intent to write to x for sizeof(*x) + */ + template <typename T> + T* writing(T *x) { + return static_cast<T*>(writingPtr(x, sizeof(T))); + } - Generally, you do not want to do this often, as highly granular committing may affect - performance. - Does not return until the commit is complete. + static DurableInterface& getDur() { return *_impl; } - You must be at least read locked when you call this. Ideally, you are not write locked - and then read operations can occur concurrently. + private: - Do not use this. Use commitIfNeeded() instead. + // Needs to be able to enable/disable Durability + friend void startup(); - @return true if --dur is on. - @return false if --dur is off. (in which case there is action) - */ - virtual bool commitNow(OperationContext* txn) = 0; + static void enableDurability(); // makes _impl a DurableImpl - /** Commit if enough bytes have been modified. Current threshold is 50MB + static DurableInterface* _impl; // NonDurableImpl at startup() + }; - The idea is that long running write operations that don't yield - (like creating an index or update with $atomic) can call this - whenever the db is in a sane state and it will prevent commits - from growing too large. - @return true if commited - */ - virtual bool commitIfNeeded() = 0; - /** - * Invoked at clean shutdown time. Performs one last commit/flush and terminates the - * flush thread. - * - * Must be called under the global X lock. - */ - virtual void commitAndStopDurThread() = 0; + class NonDurableImpl : public DurableInterface { + public: + void* writingPtr(void *x, unsigned len); + void declareWriteIntent(void *, unsigned); + void declareWriteIntents(const std::vector<std::pair<void*, unsigned> >& intents) { } + void createdFile(const std::string& filename, unsigned long long len) { } + bool awaitCommit() { return false; } + bool commitNow(OperationContext* txn); + bool commitIfNeeded(); + void syncDataAndTruncateJournal(OperationContext* txn) {} + bool isDurable() const { return false; } + void commitAndStopDurThread() { } + }; - /** Declare write intent for an int */ - inline int& writingInt(int& d) { return *static_cast<int*>(writingPtr( &d, sizeof(d))); } + class DurableImpl : public DurableInterface { + public: + void* writingPtr(void *x, unsigned len); + void declareWriteIntent(void *, unsigned); + void declareWriteIntents(const std::vector<std::pair<void*, unsigned> >& intents); + void createdFile(const std::string& filename, unsigned long long len); + bool awaitCommit(); + bool commitNow(OperationContext* txn); + bool commitIfNeeded(); + void syncDataAndTruncateJournal(OperationContext* txn); + bool isDurable() const { return true; } + void commitAndStopDurThread(); + }; + +} // namespace dur - /** "assume i've already indicated write intent, let me write" - redeclaration is fine too, but this is faster. - */ - template <typename T> - inline - T* alreadyDeclared(T *x) { -#if defined(_TESTINTENT) - return (T*) DurableMappedFile::switchToPrivateView(x); -#else - return x; -#endif - } - - /** declare intent to write to x for sizeof(*x) */ - template <typename T> - inline - T* writing(T *x) { - return (T*) writingPtr(x, sizeof(T)); - } - - /** - * Commits pending changes, flushes all changes to main data files, then removes the - * journal. - * - * WARNING: Data *must* be in a crash-recoverable state when this is called and must - * not be inside of a write unit of work. - * - * This is useful as a "barrier" to ensure that writes before this call will never go - * through recovery and be applied to files that have had changes made after this call - * applied. - */ - virtual void syncDataAndTruncateJournal(OperationContext* txn) = 0; - - virtual bool isDurable() const = 0; - - static DurableInterface& getDur() { return *_impl; } - - private: - static DurableInterface* _impl; // NonDurableImpl at startup() - static void enableDurability(); // makes _impl a DurableImpl - - // these need to be able to enable/disable Durability - friend void startup(); - }; // class DurableInterface - - class NonDurableImpl : public DurableInterface { - void* writingPtr(void *x, unsigned len); - void* writingAtOffset(void *buf, unsigned ofs, unsigned len) { return buf; } - void* writingRangesAtOffsets(void *buf, const std::vector< std::pair< long long, unsigned > > &ranges) { return buf; } - void declareWriteIntent(void *, unsigned); - void declareWriteIntents(const std::vector<std::pair<void*, unsigned> >& intents) {} - void createdFile(const std::string& filename, unsigned long long len) { } - bool awaitCommit() { return false; } - bool commitNow(OperationContext* txn); - bool commitIfNeeded(); - void syncDataAndTruncateJournal(OperationContext* txn) {} - bool isDurable() const { return false; } - void commitAndStopDurThread() { } - }; - - class DurableImpl : public DurableInterface { - void* writingPtr(void *x, unsigned len); - void* writingAtOffset(void *buf, unsigned ofs, unsigned len); - void* writingRangesAtOffsets(void *buf, const std::vector< std::pair< long long, unsigned > > &ranges); - void declareWriteIntent(void *, unsigned); - void declareWriteIntents(const std::vector<std::pair<void*, unsigned> >& intents); - void createdFile(const std::string& filename, unsigned long long len); - bool awaitCommit(); - bool commitNow(OperationContext* txn); - bool commitIfNeeded(); - void syncDataAndTruncateJournal(OperationContext* txn); - bool isDurable() const { return true; } - void commitAndStopDurThread(); - }; - - } // namespace dur inline dur::DurableInterface& getDur() { return dur::DurableInterface::getDur(); } -} + +} // namespace mongo diff --git a/src/mongo/db/storage/mmap_v1/dur_journal.cpp b/src/mongo/db/storage/mmap_v1/dur_journal.cpp index 89f16b62235..02889e3632b 100644 --- a/src/mongo/db/storage/mmap_v1/dur_journal.cpp +++ b/src/mongo/db/storage/mmap_v1/dur_journal.cpp @@ -724,7 +724,7 @@ namespace mongo { @param uncompressed - a buffer that will be written to the journal after compression will not return until on disk */ - void WRITETOJOURNAL(JSectHeader h, AlignedBuilder& uncompressed) { + void WRITETOJOURNAL(const JSectHeader& h, const AlignedBuilder& uncompressed) { Timer t; j.journal(h, uncompressed); stats.curr->_writeToJournalMicros += t.micros(); diff --git a/src/mongo/db/storage/mmap_v1/dur_preplogbuffer.cpp b/src/mongo/db/storage/mmap_v1/dur_preplogbuffer.cpp index 9323cb601ca..95bb53eabf4 100644 --- a/src/mongo/db/storage/mmap_v1/dur_preplogbuffer.cpp +++ b/src/mongo/db/storage/mmap_v1/dur_preplogbuffer.cpp @@ -193,10 +193,10 @@ namespace mongo { return; } - void PREPLOGBUFFER(/*out*/ JSectHeader& h, AlignedBuilder& ab) { + void PREPLOGBUFFER(/*out*/ JSectHeader& outHeader, AlignedBuilder& outBuffer) { Timer t; j.assureLogFileOpen(); // so fileId is set - _PREPLOGBUFFER(h, ab); + _PREPLOGBUFFER(outHeader, outBuffer); stats.curr->_prepLogBufferMicros += t.micros(); } diff --git a/src/mongo/db/storage/mmap_v1/dur_writetodatafiles.cpp b/src/mongo/db/storage/mmap_v1/dur_writetodatafiles.cpp index de690f6d123..85b04854dda 100644 --- a/src/mongo/db/storage/mmap_v1/dur_writetodatafiles.cpp +++ b/src/mongo/db/storage/mmap_v1/dur_writetodatafiles.cpp @@ -45,7 +45,8 @@ namespace mongo { void debugValidateAllMapsMatch(); - static void WRITETODATAFILES_Impl1(const JSectHeader& h, AlignedBuilder& uncompressed) { + static void WRITETODATAFILES_Impl1(const JSectHeader& h, + const AlignedBuilder& uncompressed) { LOG(3) << "journal WRITETODATAFILES 1" << endl; RecoveryJob::get().processSection(&h, uncompressed.buf(), uncompressed.len(), 0); LOG(3) << "journal WRITETODATAFILES 2" << endl; @@ -99,7 +100,7 @@ namespace mongo { @see https://docs.google.com/drawings/edit?id=1TklsmZzm7ohIZkwgeK6rMvsdaR13KjtJYMsfLr175Zc&hl=en */ - void WRITETODATAFILES(const JSectHeader& h, AlignedBuilder& uncompressed) { + void WRITETODATAFILES(const JSectHeader& h, const AlignedBuilder& uncompressed) { Timer t; WRITETODATAFILES_Impl1(h, uncompressed); long long m = t.micros(); diff --git a/src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.cpp b/src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.cpp index 8febc2bb83a..ca8efe1a38f 100644 --- a/src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.cpp +++ b/src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.cpp @@ -105,7 +105,7 @@ namespace mongo { void commit() { } private: - std::string _ns; + const std::string _ns; MMAPV1DatabaseCatalogEntry* const _entry; }; @@ -133,7 +133,7 @@ namespace mongo { } private: - std::string _ns; + const std::string _ns; MMAPV1DatabaseCatalogEntry* const _catalogEntry; Entry* const _cachedEntry; }; diff --git a/src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.h b/src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.h index 597efe7a0ed..62fcbe0fc25 100644 --- a/src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.h +++ b/src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.h @@ -121,6 +121,25 @@ namespace mongo { void createNamespaceForIndex(OperationContext* txn, const StringData& name); private: + + class EntryInsertion; + class EntryRemoval; + + friend class NamespaceDetailsCollectionCatalogEntry; + + // The _collections map is a cache for efficiently looking up namespace information. + // Access to the cache is protected by the _collectionsLock mutex. + // Once initialized, the cache must remain consistent with the data in the memory-mapped + // database files through _removeFromCache and _insertInCache_inlock. These methods + // use the RecoveryUnit to ensure correct handling of rollback. + struct Entry { + scoped_ptr<CollectionCatalogEntry> catalogEntry; + scoped_ptr<RecordStoreV1Base> recordStore; + }; + + typedef std::map<std::string, Entry*> CollectionMap; + + RecordStoreV1Base* _getIndexRecordStore_inlock(); RecordStoreV1Base* _getIndexRecordStore(); RecordStoreV1Base* _getNamespaceRecordStore_inlock() const; @@ -132,6 +151,7 @@ namespace mongo { void _addNamespaceToNamespaceCollection( OperationContext* txn, const StringData& ns, const BSONObj* options ); + void _addNamespaceToNamespaceCollection_inlock( OperationContext* txn, const StringData& ns, const BSONObj* options ); @@ -147,23 +167,8 @@ namespace mongo { void _ensureSystemCollection_inlock( OperationContext* txn, const StringData& ns ); - void _lazyInit( OperationContext* txn ); - - std::string _path; - MmapV1ExtentManager _extentManager; - NamespaceIndex _namespaceIndex; - - // The _collections map is a cache for efficiently looking up namespace information. - // Access to the cache is protected by the _collectionsLock mutex. - // Once initialized, the cache must remain consistent with the data in the memory-mapped - // database files through _removeFromCache and _insertInCache_inlock. These methods - // use the RecoveryUnit to ensure correct handling of rollback. - - struct Entry { - scoped_ptr<CollectionCatalogEntry> catalogEntry; - scoped_ptr<RecordStoreV1Base> recordStore; - }; + void _lazyInit( OperationContext* txn ); /** * Populate the _collections cache. @@ -176,13 +181,13 @@ namespace mongo { */ void _removeFromCache(RecoveryUnit* ru, const StringData& ns); - mutable boost::mutex _collectionsLock; - typedef std::map<std::string, Entry*> CollectionMap; - CollectionMap _collections; - class EntryInsertion; - class EntryRemoval; + std::string _path; - friend class NamespaceDetailsCollectionCatalogEntry; + MmapV1ExtentManager _extentManager; + NamespaceIndex _namespaceIndex; + + mutable boost::mutex _collectionsLock; + CollectionMap _collections; }; } diff --git a/src/mongo/db/storage/mmap_v1/mmap_v1_engine.cpp b/src/mongo/db/storage/mmap_v1/mmap_v1_engine.cpp index 5430b473c17..7e3fc759311 100644 --- a/src/mongo/db/storage/mmap_v1/mmap_v1_engine.cpp +++ b/src/mongo/db/storage/mmap_v1/mmap_v1_engine.cpp @@ -45,7 +45,6 @@ #include "mongo/db/mongod_options.h" #include "mongo/db/storage/mmap_v1/data_file_sync.h" #include "mongo/db/storage/mmap_v1/dur.h" -#include "mongo/db/storage/mmap_v1/dur_commitjob.h" #include "mongo/db/storage/mmap_v1/dur_journal.h" #include "mongo/db/storage/mmap_v1/dur_recover.h" #include "mongo/db/storage/mmap_v1/dur_recovery_unit.h" diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h index 86aa8f46dc0..4fc5fc93c58 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h @@ -74,7 +74,6 @@ namespace mongo { // un-used API virtual void* writingPtr(void* data, size_t len) { invariant(!"don't call writingPtr"); } - virtual void syncDataAndTruncateJournal() {} virtual uint64_t getMyTransactionCount() const; |