summaryrefslogtreecommitdiff
path: root/src/mongo
diff options
context:
space:
mode:
Diffstat (limited to 'src/mongo')
-rw-r--r--src/mongo/db/storage/mmap_v1/btree/btree_logic_test.cpp4
-rw-r--r--src/mongo/db/storage/mmap_v1/dur.cpp412
-rw-r--r--src/mongo/db/storage/mmap_v1/dur.h352
-rw-r--r--src/mongo/db/storage/mmap_v1/dur_journal.cpp2
-rw-r--r--src/mongo/db/storage/mmap_v1/dur_preplogbuffer.cpp4
-rw-r--r--src/mongo/db/storage/mmap_v1/dur_writetodatafiles.cpp5
-rw-r--r--src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.cpp4
-rw-r--r--src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.h49
-rw-r--r--src/mongo/db/storage/mmap_v1/mmap_v1_engine.cpp1
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h1
10 files changed, 401 insertions, 433 deletions
diff --git a/src/mongo/db/storage/mmap_v1/btree/btree_logic_test.cpp b/src/mongo/db/storage/mmap_v1/btree/btree_logic_test.cpp
index 7499c178d46..a84b135091b 100644
--- a/src/mongo/db/storage/mmap_v1/btree/btree_logic_test.cpp
+++ b/src/mongo/db/storage/mmap_v1/btree/btree_logic_test.cpp
@@ -2207,8 +2207,8 @@ namespace mongo {
// too much work to try to make this happen through inserts and deletes
// we are intentionally manipulating the btree bucket directly here
BtreeBucket::Loc* L = const_cast< BtreeBucket::Loc* >( &bt()->keyNode( 1 ).prevChildBucket );
- getDur().writing(L)->Null();
- getDur().writingInt( const_cast< BtreeBucket::Loc& >( bt()->keyNode( 1 ).recordLoc ).GETOFS() ) |= 1; // make unused
+ writing(L)->Null();
+ writingInt( const_cast< BtreeBucket::Loc& >( bt()->keyNode( 1 ).recordLoc ).GETOFS() ) |= 1; // make unused
BSONObj k = BSON( "a" << toInsert );
Base::insert( k );
}
diff --git a/src/mongo/db/storage/mmap_v1/dur.cpp b/src/mongo/db/storage/mmap_v1/dur.cpp
index 02990c0fffe..099ff151829 100644
--- a/src/mongo/db/storage/mmap_v1/dur.cpp
+++ b/src/mongo/db/storage/mmap_v1/dur.cpp
@@ -1,32 +1,30 @@
-// @file dur.cpp durability in the storage engine (crash-safeness / journaling)
-
/**
-* Copyright (C) 2009 10gen Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
+ * Copyright (C) 2009-2014 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
/*
phases:
@@ -73,58 +71,73 @@
#include "mongo/platform/basic.h"
+#include "mongo/db/storage/mmap_v1/dur.h"
+
#include <boost/thread/condition_variable.hpp>
#include <boost/thread/mutex.hpp>
#include <boost/thread/thread.hpp>
#include <iomanip>
#include "mongo/db/client.h"
-#include "mongo/db/commands/fsync.h"
#include "mongo/db/commands/server_status.h"
#include "mongo/db/concurrency/lock_state.h"
#include "mongo/db/operation_context_impl.h"
-#include "mongo/db/storage/storage_engine.h"
-#include "mongo/db/storage/mmap_v1/dur.h"
#include "mongo/db/storage/mmap_v1/dur_commitjob.h"
#include "mongo/db/storage/mmap_v1/dur_journal.h"
#include "mongo/db/storage/mmap_v1/dur_recover.h"
#include "mongo/db/storage/mmap_v1/dur_stats.h"
#include "mongo/db/storage/mmap_v1/mmap_v1_options.h"
#include "mongo/db/storage_options.h"
-#include "mongo/server.h"
#include "mongo/util/exit.h"
#include "mongo/util/log.h"
-#include "mongo/util/mongoutils/str.h"
-#include "mongo/util/stacktrace.h"
#include "mongo/util/timer.h"
-using namespace mongoutils;
-
namespace mongo {
+namespace dur {
- namespace dur {
+namespace {
- void PREPLOGBUFFER(JSectHeader& outParm, AlignedBuilder&);
- void WRITETOJOURNAL(JSectHeader h, AlignedBuilder& uncompressed);
- void WRITETODATAFILES(const JSectHeader& h, AlignedBuilder& uncompressed);
+ // Used to activate the flush thread
+ boost::mutex flushMutex;
+ boost::condition_variable flushRequested;
- /** declared later in this file
- only used in this file -- use DurableInterface::commitNow() outside
- */
- static void groupCommit();
+ // When set, the flush thread will exit
+ AtomicUInt32 shutdownRequested(0);
- // Used to activate the flush thread
- static boost::mutex flushMutex;
- static boost::condition_variable flushRequested;
+ // One instance of each durability interface
+ DurableImpl durableImpl;
+ NonDurableImpl nonDurableImpl;
- // When set, the flush thread will exit
- static AtomicUInt32 shutdownRequested(0);
+ // Pre-allocated buffer for building the journal
+ AlignedBuilder journalBuilder(4 * 1024 * 1024);
+}
+ // Declared in dur_preplogbuffer.cpp
+ void PREPLOGBUFFER(JSectHeader& outHeader, AlignedBuilder& outBuffer);
+ void WRITETOJOURNAL(const JSectHeader& h, const AlignedBuilder& uncompressed);
+ void WRITETODATAFILES(const JSectHeader& h, const AlignedBuilder& uncompressed);
- CommitJob& commitJob = *(new CommitJob()); // don't destroy
+ // Declared in dur_journal.cpp
+ boost::filesystem::path getJournalDir();
+ void preallocateFiles();
+ // Declared and maintained in dur_commitjob.cpp
+ extern size_t privateMapBytes;
+
+ // Durability activity statistics
Stats stats;
+ // Reference to the write intents tracking object
+ CommitJob& commitJob = *(new CommitJob()); // don't destroy
+
+ // The durability interface to use
+ DurableInterface* DurableInterface::_impl = &nonDurableImpl;
+
+
+ //
+ // Stats
+ //
+
void Stats::S::reset() {
memset(this, 0, sizeof(*this));
}
@@ -197,6 +210,28 @@ namespace mongo {
}
}
+
+ //
+ // DurableInterface
+ //
+
+ DurableInterface::DurableInterface() {
+
+ }
+
+ DurableInterface::~DurableInterface() {
+
+ }
+
+ void DurableInterface::enableDurability() {
+ _impl = &durableImpl;
+ }
+
+
+ //
+ // NonDurableImpl
+ //
+
void* NonDurableImpl::writingPtr(void *x, unsigned len) {
dassert(shutdownRequested.load() == 0);
return x;
@@ -214,16 +249,9 @@ namespace mongo {
}
-
- static DurableImpl* durableImpl = new DurableImpl();
- static NonDurableImpl* nonDurableImpl = new NonDurableImpl();
- DurableInterface* DurableInterface::_impl = nonDurableImpl;
-
- void DurableInterface::enableDurability() {
- verify(_impl == nonDurableImpl);
- _impl = durableImpl;
- }
-
+ //
+ // DurableImpl
+ //
bool DurableImpl::commitNow(OperationContext* txn) {
NotifyAll::When when = commitJob._notify.now();
@@ -242,11 +270,6 @@ namespace mongo {
return true;
}
- /** Declare that a file has been created
- Normally writes are applied only after journaling, for safety. But here the file
- is created first, and the journal will just replay the creation if the create didn't
- happen because of crashing.
- */
void DurableImpl::createdFile(const std::string& filename, unsigned long long len) {
shared_ptr<DurOp> op( new FileCreatedOp(filename, len) );
commitJob.noteOp(op);
@@ -259,26 +282,6 @@ namespace mongo {
return p;
}
- /** declare intent to write
- @param ofs offset within buf at which we will write
- @param len the length at ofs we will write
- @return new buffer pointer.
- */
- void* DurableImpl::writingAtOffset(void *buf, unsigned ofs, unsigned len) {
- char *p = (char *) buf;
- declareWriteIntent(p+ofs, len);
- return p;
- }
-
- void* DurableImpl::writingRangesAtOffsets(void *buf, const vector< pair< long long, unsigned > > &ranges ) {
- char *p = (char *) buf;
- for( vector< pair< long long, unsigned > >::const_iterator i = ranges.begin();
- i != ranges.end(); ++i ) {
- declareWriteIntent( p + i->first, i->second );
- }
- return p;
- }
-
bool DurableImpl::commitIfNeeded() {
if (MONGO_likely(commitJob.bytes() < UncommittedBytesLimit)) {
return false;
@@ -289,6 +292,23 @@ namespace mongo {
return true;
}
+ void DurableImpl::syncDataAndTruncateJournal(OperationContext* txn) {
+ invariant(txn->lockState()->isW());
+
+ // a commit from the commit thread won't begin while we are in the write lock,
+ // but it may already be in progress and the end of that work is done outside
+ // (dbMutex) locks. This line waits for that to complete if already underway.
+ {
+ SimpleMutex::scoped_lock lk(commitJob.groupCommitMutex);
+ }
+
+ commitNow(txn);
+ MongoFile::flushAll(true);
+ journalCleanup();
+
+ invariant(!haveJournalFiles()); // Double check post-conditions
+ }
+
void DurableImpl::commitAndStopDurThread() {
NotifyAll::When when = commitJob._notify.now();
@@ -302,9 +322,9 @@ namespace mongo {
// Functor to be called over all MongoFiles
- class validateSingleMapMatches {
+ class ValidateSingleMapMatches {
public:
- validateSingleMapMatches(unsigned long long& bytes) :_bytes(bytes) {}
+ ValidateSingleMapMatches(unsigned long long& bytes) :_bytes(bytes) {}
void operator () (MongoFile *mf) {
if( mf->isDurableMappedFile() ) {
DurableMappedFile *mmf = (DurableMappedFile*) mf;
@@ -362,6 +382,7 @@ namespace mongo {
unsigned long long& _bytes;
};
+
/** (SLOW) diagnostic to check that the private view and the non-private view are in sync.
*/
void debugValidateAllMapsMatch() {
@@ -370,13 +391,12 @@ namespace mongo {
unsigned long long bytes = 0;
Timer t;
- MongoFile::forEach(validateSingleMapMatches(bytes));
+ MongoFile::forEach(ValidateSingleMapMatches(bytes));
OCCASIONALLY log() << "DurParanoid map check " << t.millis() << "ms for " << (bytes / (1024*1024)) << "MB" << endl;
}
- extern size_t privateMapBytes;
- static void _REMAPPRIVATEVIEW() {
+ static void _remapPrivateView() {
// todo: Consider using ProcessInfo herein and watching for getResidentSize to drop. that could be a way
// to assure very good behavior here.
@@ -385,16 +405,16 @@ namespace mongo {
LOG(4) << "journal REMAPPRIVATEVIEW" << endl;
- invariant(!commitJob.hasWritten());
-
// we want to remap all private views about every 2 seconds. there could be ~1000 views so
// we do a little each pass; beyond the remap time, more significantly, there will be copy on write
// faults after remapping, so doing a little bit at a time will avoid big load spikes on
// remapping.
unsigned long long now = curTimeMicros64();
double fraction = (now-lastRemap)/2000000.0;
- if (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalAlwaysRemap)
+ if (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalAlwaysRemap) {
fraction = 1;
+ }
+
lastRemap = now;
#if defined(_WIN32) || defined(__sunos__)
@@ -456,57 +476,75 @@ namespace mongo {
<< ' ' << t.millis() << "ms" << endl;
}
- /** We need to remap the private views periodically. otherwise they would become very large.
- Call within write lock. See top of file for more commentary.
- */
- static void REMAPPRIVATEVIEW() {
- Timer t;
- _REMAPPRIVATEVIEW();
- stats.curr->_remapPrivateViewMicros += t.micros();
- }
- // this is a pseudo-local variable in the groupcommit functions
- // below. however we don't truly do that so that we don't have to
- // reallocate, and more importantly regrow it, on every single commit.
- static AlignedBuilder __theBuilder(4 * 1024 * 1024);
+ static void remapPrivateView() {
+ // Remapping private views must occur after WRITETODATAFILES otherwise we wouldn't see
+ // any newly written data on reads.
+ invariant(!commitJob.hasWritten());
+ try {
+ Timer t;
+ _remapPrivateView();
+ stats.curr->_remapPrivateViewMicros += t.micros();
- static void _groupCommit() {
- LOG(4) << "_groupCommit " << endl;
+ LOG(4) << "remapPrivateView end";
+ return;
+ }
+ catch (DBException& e) {
+ severe() << "dbexception in remapPrivateView causing immediate shutdown: "
+ << e.toString();
+ mongoAbort("gc1");
+ }
+ catch (std::ios_base::failure& e) {
+ severe() << "ios_base exception in remapPrivateView causing immediate shutdown: "
+ << e.what();
+ mongoAbort("gc2");
+ }
+ catch (std::bad_alloc& e) {
+ severe() << "bad_alloc exception in remapPrivateView causing immediate shutdown: "
+ << e.what();
+ mongoAbort("gc3");
+ }
+ catch (std::exception& e) {
+ severe() << "exception in remapPrivateView causing immediate shutdown: "
+ << e.what();
+ mongoAbort("gc4");
+ }
+ }
- {
- AlignedBuilder &ab = __theBuilder;
- // we need to make sure two group commits aren't running at the same time
- // (and we are only read locked in the dbMutex, so it could happen -- while
- // there is only one dur thread, "early commits" can be done by other threads)
- SimpleMutex::scoped_lock lk(commitJob.groupCommitMutex);
+ static void _groupCommit() {
+ LOG(4) << "_groupCommit ";
- commitJob.commitingBegin();
+ // we need to make sure two group commits aren't running at the same time
+ // (and we are only read locked in the dbMutex, so it could happen -- while
+ // there is only one dur thread, "early commits" can be done by other threads)
+ SimpleMutex::scoped_lock lk(commitJob.groupCommitMutex);
- if( !commitJob.hasWritten() ) {
- // getlasterror request could have came after the data was already committed
- commitJob.committingNotifyCommitted();
- }
- else {
- JSectHeader h;
- PREPLOGBUFFER(h,ab);
+ commitJob.commitingBegin();
- // todo : write to the journal outside locks, as this write can be slow.
- // however, be careful then about remapprivateview as that cannot be done
- // if new writes are then pending in the private maps.
- WRITETOJOURNAL(h, ab);
+ if (!commitJob.hasWritten()) {
+ // getlasterror request could have came after the data was already committed
+ commitJob.committingNotifyCommitted();
+ }
+ else {
+ JSectHeader h;
+ PREPLOGBUFFER(h, journalBuilder);
- // data is now in the journal, which is sufficient for acknowledging getLastError.
- // (ok to crash after that)
- commitJob.committingNotifyCommitted();
+ // todo : write to the journal outside locks, as this write can be slow.
+ // however, be careful then about remapprivateview as that cannot be done
+ // if new writes are then pending in the private maps.
+ WRITETOJOURNAL(h, journalBuilder);
- WRITETODATAFILES(h, ab);
- debugValidateAllMapsMatch();
+ // data is now in the journal, which is sufficient for acknowledging getLastError.
+ // (ok to crash after that)
+ commitJob.committingNotifyCommitted();
- commitJob.committingReset();
- ab.reset();
- }
+ WRITETODATAFILES(h, journalBuilder);
+ debugValidateAllMapsMatch();
+
+ commitJob.committingReset();
+ journalBuilder.reset();
}
}
@@ -520,60 +558,30 @@ namespace mongo {
static void groupCommit() {
try {
_groupCommit();
- }
- catch(DBException& e ) {
- log() << "dbexception in groupCommit causing immediate shutdown: " << e.toString() << endl;
- mongoAbort("gc1");
- }
- catch(std::ios_base::failure& e) {
- log() << "ios_base exception in groupCommit causing immediate shutdown: " << e.what() << endl;
- mongoAbort("gc2");
- }
- catch(std::bad_alloc& e) {
- log() << "bad_alloc exception in groupCommit causing immediate shutdown: " << e.what() << endl;
- mongoAbort("gc3");
- }
- catch(std::exception& e) {
- log() << "exception in groupCommit causing immediate shutdown: " << e.what() << endl;
- mongoAbort("gc4");
- }
- LOG(4) << "groupCommit end" << endl;
- }
- static void remapPrivateView() {
- try {
- // Remapping private views must occur after WRITETODATAFILES otherwise we wouldn't
- // see newly written data on reads.
- invariant(!commitJob.hasWritten());
-
- REMAPPRIVATEVIEW();
+ LOG(4) << "groupCommit end";
+ return;
}
catch (DBException& e) {
- log() << "dbexception in remapPrivateView causing immediate shutdown: "
- << e.toString()
- << endl;
+ severe() << "dbexception in groupCommit causing immediate shutdown: "
+ << e.toString();
mongoAbort("gc1");
}
- catch (std::ios_base::failure& e) {
- log() << "ios_base exception in remapPrivateView causing immediate shutdown: "
- << e.what()
- << endl;
+ catch(std::ios_base::failure& e) {
+ severe() << "ios_base exception in groupCommit causing immediate shutdown: "
+ << e.what();
mongoAbort("gc2");
}
- catch (std::bad_alloc& e) {
- log() << "bad_alloc exception in remapPrivateView causing immediate shutdown: "
- << e.what()
- << endl;
+ catch(std::bad_alloc& e) {
+ severe() << "bad_alloc exception in groupCommit causing immediate shutdown: "
+ << e.what();
mongoAbort("gc3");
}
- catch (std::exception& e) {
- log() << "exception in remapPrivateView causing immediate shutdown: "
- << e.what()
- << endl;
+ catch(std::exception& e) {
+ severe() << "exception in groupCommit causing immediate shutdown: "
+ << e.what();
mongoAbort("gc4");
}
-
- LOG(4) << "remapPrivateView end" << endl;
}
@@ -596,8 +604,6 @@ namespace mongo {
}
}
- extern int groupCommitIntervalMs;
- boost::filesystem::path getJournalDir();
static void durThread() {
Client::initThread("journal");
@@ -627,16 +633,20 @@ namespace mongo {
// commit sooner if one or more getLastError j:true is pending
for (unsigned i = 0; i <= 2; i++) {
- if (flushRequested.timed_wait(lock,
- Milliseconds(oneThird))) {
+ if (flushRequested.timed_wait(lock, Milliseconds(oneThird))) {
// Someone forced a flush
break;
}
- if (commitJob._notify.nWaiting())
+ if (commitJob._notify.nWaiting()) {
+ // There are threads waiting for journaling
break;
- if (commitJob.bytes() > UncommittedBytesLimit / 10)
+ }
+
+ if (commitJob.bytes() > UncommittedBytesLimit / 2) {
+ // The number of written bytes is growing
break;
+ }
}
OperationContextImpl txn;
@@ -651,11 +661,11 @@ namespace mongo {
remapPrivateView();
}
catch(std::exception& e) {
- log() << "exception in durThread causing immediate shutdown: " << e.what() << endl;
+ severe() << "exception in durThread causing immediate shutdown: " << e.what();
mongoAbort("exception in durThread");
}
catch (...) {
- log() << "unhandled exception in durThread causing immediate shutdown" << endl;
+ severe() << "unhandled exception in durThread causing immediate shutdown";
mongoAbort("unhandled exception in durThread");
}
}
@@ -663,7 +673,6 @@ namespace mongo {
cc().shutdown();
}
- void preallocateFiles();
/** at startup, recover, and then start the journal threads */
void startup() {
@@ -676,15 +685,15 @@ namespace mongo {
replayJournalFilesAtStartup();
}
catch(DBException& e) {
- log() << "dbexception during recovery: " << e.toString() << endl;
+ severe() << "dbexception during recovery: " << e.toString();
throw;
}
catch(std::exception& e) {
- log() << "std::exception during recovery: " << e.what() << endl;
+ severe() << "std::exception during recovery: " << e.what();
throw;
}
catch(...) {
- log() << "exception during recovery" << endl;
+ severe() << "exception during recovery";
throw;
}
@@ -694,30 +703,11 @@ namespace mongo {
boost::thread t(durThread);
}
- DurableInterface::~DurableInterface() {
- log() << "ERROR warning ~DurableInterface not intended to be called" << std::endl;
- }
-
- void DurableImpl::syncDataAndTruncateJournal(OperationContext* txn) {
- invariant(txn->lockState()->isW());
-
- // a commit from the commit thread won't begin while we are in the write lock,
- // but it may already be in progress and the end of that work is done outside
- // (dbMutex) locks. This line waits for that to complete if already underway.
- {
- SimpleMutex::scoped_lock lk(commitJob.groupCommitMutex);
- }
-
- commitNow(txn);
- MongoFile::flushAll(true);
- journalCleanup();
-
- invariant(!haveJournalFiles()); // Double check post-conditions
- }
class DurSSS : public ServerStatusSection {
public:
- DurSSS() : ServerStatusSection( "dur" ){}
+ DurSSS() : ServerStatusSection( "dur" ) { }
+
virtual bool includeByDefault() const { return true; }
BSONObj generateSection(OperationContext* txn,
@@ -732,7 +722,5 @@ namespace mongo {
} durSSS;
-
- } // namespace dur
-
+} // namespace dur
} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/dur.h b/src/mongo/db/storage/mmap_v1/dur.h
index 9b135ff78e9..c296ae4ad9b 100644
--- a/src/mongo/db/storage/mmap_v1/dur.h
+++ b/src/mongo/db/storage/mmap_v1/dur.h
@@ -1,38 +1,35 @@
-// @file dur.h durability support
-
/**
-* Copyright (C) 2009 10gen Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
+ * Copyright (C) 2009-2014 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
#pragma once
#include "mongo/db/storage/mmap_v1/durable_mapped_file.h"
-
namespace mongo {
class OperationContext;
@@ -40,188 +37,167 @@ namespace mongo {
void mongoAbort(const char *msg);
void abort(); // not defined -- use mongoAbort() instead
- namespace dur {
- // a smaller limit is likely better on 32 bit
- const unsigned UncommittedBytesLimit = (sizeof(void*)==4) ? 50 * 1024 * 1024 : 512 * 1024 * 1024;
+namespace dur {
+
+ // a smaller limit is likely better on 32 bit
+ const unsigned UncommittedBytesLimit = (sizeof(void*) == 4) ? 50 * 1024 * 1024 : 512 * 1024 * 1024;
+
+ /**
+ * Called during startup so durability module can initialize and start the durability thread.
+ * Does nothing if storageGlobalParams.dur is false
+ */
+ void startup();
+
+
+ class DurableInterface {
+ MONGO_DISALLOW_COPYING(DurableInterface);
+ public:
+
+ DurableInterface();
+ virtual ~DurableInterface();
- /** Call during startup so durability module can initialize
- Throws if fatal error
- Does nothing if storageGlobalParams.dur is false
+ /**
+ * Declare that a file has been created. Normally writes are applied only after journaling
+ * for safety. But here the file is created first, and the journal will just replay the
+ * creation if the create didn't happen due to a crash.
*/
- void startup();
+ virtual void createdFile(const std::string& filename, unsigned long long len) = 0;
+
+ // Declare write intents. Use these methods to declare "i'm about to write to x and it
+ // should be logged for redo."
+ //
+ // Failure to call writing...() is checked in _DEBUG mode by using a read only mapped view
+ // (i.e., you'll segfault if the code is covered in that situation). The _DEBUG check
+ // doesn't verify that your length is correct though.
+
+ /**
+ * Declare intent to write to x for up to len.
+ *
+ * @return pointer where to write.
+ */
+ virtual void* writingPtr(void *x, unsigned len) = 0;
+
+ /**
+ * Declare write intent after write has been done.
+ */
+ virtual void declareWriteIntent(void *x, unsigned len) = 0;
- class DurableInterface : boost::noncopyable {
- public:
- virtual ~DurableInterface();
+ /**
+ * Allows you to declare many write intents at once more efficiently than repeated calls
+ * to declareWriteIntent.
+ */
+ virtual void declareWriteIntents(
+ const std::vector<std::pair<void*, unsigned> >& intents) = 0;
- /** Declare that a file has been created
- Normally writes are applied only after journaling, for safety. But here the file
- is created first, and the journal will just replay the creation if the create didn't
- happen because of crashing.
+ /** Wait for acknowledgement of the next group commit.
+ @return true if --dur is on. There will be delay.
+ @return false if --dur is off.
*/
- virtual void createdFile(const std::string& filename, unsigned long long len) = 0;
+ virtual bool awaitCommit() = 0;
- /** Declarations of write intent.
+ /** Commit immediately.
- Use these methods to declare "i'm about to write to x and it should be logged for redo."
+ Generally, you do not want to do this often, as highly granular committing may affect
+ performance.
- Failure to call writing...() is checked in _DEBUG mode by using a read only mapped view
- (i.e., you'll segfault if the code is covered in that situation). The _DEBUG check doesn't
- verify that your length is correct though.
- */
+ Does not return until the commit is complete.
- /** declare intent to write to x for up to len
- @return pointer where to write. this is modified when testIntent is true.
- */
- virtual void* writingPtr(void *x, unsigned len) = 0;
+ You must be at least read locked when you call this. Ideally, you are not write locked
+ and then read operations can occur concurrently.
- /** declare write intent; should already be in the write view to work correctly when testIntent is true.
- if you aren't, use writingPtr() instead.
+ Do not use this. Use commitIfNeeded() instead.
+
+ @return true if --dur is on.
+ @return false if --dur is off. (in which case there is action)
*/
- virtual void declareWriteIntent(void *x, unsigned len) = 0;
-
- /**
- * Allows you to declare many write intents at once more efficiently than repeated calls
- * to declareWriteIntent.
- */
- virtual void declareWriteIntents(
- const std::vector<std::pair<void*, unsigned> >& intents) = 0;
-
- /** declare intent to write
- @param ofs offset within buf at which we will write
- @param len the length at ofs we will write
- @return new buffer pointer. this is modified when testIntent is true.
+ virtual bool commitNow(OperationContext* txn) = 0;
+
+ /** Commit if enough bytes have been modified. Current threshold is 50MB
+
+ The idea is that long running write operations that don't yield
+ (like creating an index or update with $atomic) can call this
+ whenever the db is in a sane state and it will prevent commits
+ from growing too large.
+ @return true if commited
*/
- virtual void* writingAtOffset(void *buf, unsigned ofs, unsigned len) = 0;
-
- /** declare intent to write
- @param ranges vector of pairs representing ranges. Each pair
- comprises an offset from buf where a range begins, then the
- range length.
- @return new buffer pointer. this is modified when testIntent is true.
- */
- virtual void* writingRangesAtOffsets(void *buf, const std::vector< std::pair< long long, unsigned > > &ranges ) = 0;
-
- /** Wait for acknowledgement of the next group commit.
- @return true if --dur is on. There will be delay.
- @return false if --dur is off.
+ virtual bool commitIfNeeded() = 0;
+
+ /**
+ * Invoked at clean shutdown time. Performs one last commit/flush and terminates the
+ * flush thread.
+ *
+ * Must be called under the global X lock.
*/
- virtual bool awaitCommit() = 0;
+ virtual void commitAndStopDurThread() = 0;
+
+ /**
+ * Commits pending changes, flushes all changes to main data files, then removes the
+ * journal.
+ *
+ * WARNING: Data *must* be in a crash-recoverable state when this is called and must
+ * not be inside of a write unit of work.
+ *
+ * This is useful as a "barrier" to ensure that writes before this call will never go
+ * through recovery and be applied to files that have had changes made after this call
+ * applied.
+ */
+ virtual void syncDataAndTruncateJournal(OperationContext* txn) = 0;
+
+ virtual bool isDurable() const = 0;
- /** Commit immediately.
+ /**
+ * Declare intent to write to x for sizeof(*x)
+ */
+ template <typename T>
+ T* writing(T *x) {
+ return static_cast<T*>(writingPtr(x, sizeof(T)));
+ }
- Generally, you do not want to do this often, as highly granular committing may affect
- performance.
- Does not return until the commit is complete.
+ static DurableInterface& getDur() { return *_impl; }
- You must be at least read locked when you call this. Ideally, you are not write locked
- and then read operations can occur concurrently.
+ private:
- Do not use this. Use commitIfNeeded() instead.
+ // Needs to be able to enable/disable Durability
+ friend void startup();
- @return true if --dur is on.
- @return false if --dur is off. (in which case there is action)
- */
- virtual bool commitNow(OperationContext* txn) = 0;
+ static void enableDurability(); // makes _impl a DurableImpl
- /** Commit if enough bytes have been modified. Current threshold is 50MB
+ static DurableInterface* _impl; // NonDurableImpl at startup()
+ };
- The idea is that long running write operations that don't yield
- (like creating an index or update with $atomic) can call this
- whenever the db is in a sane state and it will prevent commits
- from growing too large.
- @return true if commited
- */
- virtual bool commitIfNeeded() = 0;
- /**
- * Invoked at clean shutdown time. Performs one last commit/flush and terminates the
- * flush thread.
- *
- * Must be called under the global X lock.
- */
- virtual void commitAndStopDurThread() = 0;
+ class NonDurableImpl : public DurableInterface {
+ public:
+ void* writingPtr(void *x, unsigned len);
+ void declareWriteIntent(void *, unsigned);
+ void declareWriteIntents(const std::vector<std::pair<void*, unsigned> >& intents) { }
+ void createdFile(const std::string& filename, unsigned long long len) { }
+ bool awaitCommit() { return false; }
+ bool commitNow(OperationContext* txn);
+ bool commitIfNeeded();
+ void syncDataAndTruncateJournal(OperationContext* txn) {}
+ bool isDurable() const { return false; }
+ void commitAndStopDurThread() { }
+ };
- /** Declare write intent for an int */
- inline int& writingInt(int& d) { return *static_cast<int*>(writingPtr( &d, sizeof(d))); }
+ class DurableImpl : public DurableInterface {
+ public:
+ void* writingPtr(void *x, unsigned len);
+ void declareWriteIntent(void *, unsigned);
+ void declareWriteIntents(const std::vector<std::pair<void*, unsigned> >& intents);
+ void createdFile(const std::string& filename, unsigned long long len);
+ bool awaitCommit();
+ bool commitNow(OperationContext* txn);
+ bool commitIfNeeded();
+ void syncDataAndTruncateJournal(OperationContext* txn);
+ bool isDurable() const { return true; }
+ void commitAndStopDurThread();
+ };
+
+} // namespace dur
- /** "assume i've already indicated write intent, let me write"
- redeclaration is fine too, but this is faster.
- */
- template <typename T>
- inline
- T* alreadyDeclared(T *x) {
-#if defined(_TESTINTENT)
- return (T*) DurableMappedFile::switchToPrivateView(x);
-#else
- return x;
-#endif
- }
-
- /** declare intent to write to x for sizeof(*x) */
- template <typename T>
- inline
- T* writing(T *x) {
- return (T*) writingPtr(x, sizeof(T));
- }
-
- /**
- * Commits pending changes, flushes all changes to main data files, then removes the
- * journal.
- *
- * WARNING: Data *must* be in a crash-recoverable state when this is called and must
- * not be inside of a write unit of work.
- *
- * This is useful as a "barrier" to ensure that writes before this call will never go
- * through recovery and be applied to files that have had changes made after this call
- * applied.
- */
- virtual void syncDataAndTruncateJournal(OperationContext* txn) = 0;
-
- virtual bool isDurable() const = 0;
-
- static DurableInterface& getDur() { return *_impl; }
-
- private:
- static DurableInterface* _impl; // NonDurableImpl at startup()
- static void enableDurability(); // makes _impl a DurableImpl
-
- // these need to be able to enable/disable Durability
- friend void startup();
- }; // class DurableInterface
-
- class NonDurableImpl : public DurableInterface {
- void* writingPtr(void *x, unsigned len);
- void* writingAtOffset(void *buf, unsigned ofs, unsigned len) { return buf; }
- void* writingRangesAtOffsets(void *buf, const std::vector< std::pair< long long, unsigned > > &ranges) { return buf; }
- void declareWriteIntent(void *, unsigned);
- void declareWriteIntents(const std::vector<std::pair<void*, unsigned> >& intents) {}
- void createdFile(const std::string& filename, unsigned long long len) { }
- bool awaitCommit() { return false; }
- bool commitNow(OperationContext* txn);
- bool commitIfNeeded();
- void syncDataAndTruncateJournal(OperationContext* txn) {}
- bool isDurable() const { return false; }
- void commitAndStopDurThread() { }
- };
-
- class DurableImpl : public DurableInterface {
- void* writingPtr(void *x, unsigned len);
- void* writingAtOffset(void *buf, unsigned ofs, unsigned len);
- void* writingRangesAtOffsets(void *buf, const std::vector< std::pair< long long, unsigned > > &ranges);
- void declareWriteIntent(void *, unsigned);
- void declareWriteIntents(const std::vector<std::pair<void*, unsigned> >& intents);
- void createdFile(const std::string& filename, unsigned long long len);
- bool awaitCommit();
- bool commitNow(OperationContext* txn);
- bool commitIfNeeded();
- void syncDataAndTruncateJournal(OperationContext* txn);
- bool isDurable() const { return true; }
- void commitAndStopDurThread();
- };
-
- } // namespace dur
inline dur::DurableInterface& getDur() { return dur::DurableInterface::getDur(); }
-}
+
+} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/dur_journal.cpp b/src/mongo/db/storage/mmap_v1/dur_journal.cpp
index 89f16b62235..02889e3632b 100644
--- a/src/mongo/db/storage/mmap_v1/dur_journal.cpp
+++ b/src/mongo/db/storage/mmap_v1/dur_journal.cpp
@@ -724,7 +724,7 @@ namespace mongo {
@param uncompressed - a buffer that will be written to the journal after compression
will not return until on disk
*/
- void WRITETOJOURNAL(JSectHeader h, AlignedBuilder& uncompressed) {
+ void WRITETOJOURNAL(const JSectHeader& h, const AlignedBuilder& uncompressed) {
Timer t;
j.journal(h, uncompressed);
stats.curr->_writeToJournalMicros += t.micros();
diff --git a/src/mongo/db/storage/mmap_v1/dur_preplogbuffer.cpp b/src/mongo/db/storage/mmap_v1/dur_preplogbuffer.cpp
index 9323cb601ca..95bb53eabf4 100644
--- a/src/mongo/db/storage/mmap_v1/dur_preplogbuffer.cpp
+++ b/src/mongo/db/storage/mmap_v1/dur_preplogbuffer.cpp
@@ -193,10 +193,10 @@ namespace mongo {
return;
}
- void PREPLOGBUFFER(/*out*/ JSectHeader& h, AlignedBuilder& ab) {
+ void PREPLOGBUFFER(/*out*/ JSectHeader& outHeader, AlignedBuilder& outBuffer) {
Timer t;
j.assureLogFileOpen(); // so fileId is set
- _PREPLOGBUFFER(h, ab);
+ _PREPLOGBUFFER(outHeader, outBuffer);
stats.curr->_prepLogBufferMicros += t.micros();
}
diff --git a/src/mongo/db/storage/mmap_v1/dur_writetodatafiles.cpp b/src/mongo/db/storage/mmap_v1/dur_writetodatafiles.cpp
index de690f6d123..85b04854dda 100644
--- a/src/mongo/db/storage/mmap_v1/dur_writetodatafiles.cpp
+++ b/src/mongo/db/storage/mmap_v1/dur_writetodatafiles.cpp
@@ -45,7 +45,8 @@ namespace mongo {
void debugValidateAllMapsMatch();
- static void WRITETODATAFILES_Impl1(const JSectHeader& h, AlignedBuilder& uncompressed) {
+ static void WRITETODATAFILES_Impl1(const JSectHeader& h,
+ const AlignedBuilder& uncompressed) {
LOG(3) << "journal WRITETODATAFILES 1" << endl;
RecoveryJob::get().processSection(&h, uncompressed.buf(), uncompressed.len(), 0);
LOG(3) << "journal WRITETODATAFILES 2" << endl;
@@ -99,7 +100,7 @@ namespace mongo {
@see https://docs.google.com/drawings/edit?id=1TklsmZzm7ohIZkwgeK6rMvsdaR13KjtJYMsfLr175Zc&hl=en
*/
- void WRITETODATAFILES(const JSectHeader& h, AlignedBuilder& uncompressed) {
+ void WRITETODATAFILES(const JSectHeader& h, const AlignedBuilder& uncompressed) {
Timer t;
WRITETODATAFILES_Impl1(h, uncompressed);
long long m = t.micros();
diff --git a/src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.cpp b/src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.cpp
index 8febc2bb83a..ca8efe1a38f 100644
--- a/src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.cpp
+++ b/src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.cpp
@@ -105,7 +105,7 @@ namespace mongo {
void commit() { }
private:
- std::string _ns;
+ const std::string _ns;
MMAPV1DatabaseCatalogEntry* const _entry;
};
@@ -133,7 +133,7 @@ namespace mongo {
}
private:
- std::string _ns;
+ const std::string _ns;
MMAPV1DatabaseCatalogEntry* const _catalogEntry;
Entry* const _cachedEntry;
};
diff --git a/src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.h b/src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.h
index 597efe7a0ed..62fcbe0fc25 100644
--- a/src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.h
+++ b/src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.h
@@ -121,6 +121,25 @@ namespace mongo {
void createNamespaceForIndex(OperationContext* txn, const StringData& name);
private:
+
+ class EntryInsertion;
+ class EntryRemoval;
+
+ friend class NamespaceDetailsCollectionCatalogEntry;
+
+ // The _collections map is a cache for efficiently looking up namespace information.
+ // Access to the cache is protected by the _collectionsLock mutex.
+ // Once initialized, the cache must remain consistent with the data in the memory-mapped
+ // database files through _removeFromCache and _insertInCache_inlock. These methods
+ // use the RecoveryUnit to ensure correct handling of rollback.
+ struct Entry {
+ scoped_ptr<CollectionCatalogEntry> catalogEntry;
+ scoped_ptr<RecordStoreV1Base> recordStore;
+ };
+
+ typedef std::map<std::string, Entry*> CollectionMap;
+
+
RecordStoreV1Base* _getIndexRecordStore_inlock();
RecordStoreV1Base* _getIndexRecordStore();
RecordStoreV1Base* _getNamespaceRecordStore_inlock() const;
@@ -132,6 +151,7 @@ namespace mongo {
void _addNamespaceToNamespaceCollection( OperationContext* txn,
const StringData& ns,
const BSONObj* options );
+
void _addNamespaceToNamespaceCollection_inlock( OperationContext* txn,
const StringData& ns,
const BSONObj* options );
@@ -147,23 +167,8 @@ namespace mongo {
void _ensureSystemCollection_inlock( OperationContext* txn,
const StringData& ns );
- void _lazyInit( OperationContext* txn );
-
- std::string _path;
- MmapV1ExtentManager _extentManager;
- NamespaceIndex _namespaceIndex;
-
- // The _collections map is a cache for efficiently looking up namespace information.
- // Access to the cache is protected by the _collectionsLock mutex.
- // Once initialized, the cache must remain consistent with the data in the memory-mapped
- // database files through _removeFromCache and _insertInCache_inlock. These methods
- // use the RecoveryUnit to ensure correct handling of rollback.
-
- struct Entry {
- scoped_ptr<CollectionCatalogEntry> catalogEntry;
- scoped_ptr<RecordStoreV1Base> recordStore;
- };
+ void _lazyInit( OperationContext* txn );
/**
* Populate the _collections cache.
@@ -176,13 +181,13 @@ namespace mongo {
*/
void _removeFromCache(RecoveryUnit* ru, const StringData& ns);
- mutable boost::mutex _collectionsLock;
- typedef std::map<std::string, Entry*> CollectionMap;
- CollectionMap _collections;
- class EntryInsertion;
- class EntryRemoval;
+ std::string _path;
- friend class NamespaceDetailsCollectionCatalogEntry;
+ MmapV1ExtentManager _extentManager;
+ NamespaceIndex _namespaceIndex;
+
+ mutable boost::mutex _collectionsLock;
+ CollectionMap _collections;
};
}
diff --git a/src/mongo/db/storage/mmap_v1/mmap_v1_engine.cpp b/src/mongo/db/storage/mmap_v1/mmap_v1_engine.cpp
index 5430b473c17..7e3fc759311 100644
--- a/src/mongo/db/storage/mmap_v1/mmap_v1_engine.cpp
+++ b/src/mongo/db/storage/mmap_v1/mmap_v1_engine.cpp
@@ -45,7 +45,6 @@
#include "mongo/db/mongod_options.h"
#include "mongo/db/storage/mmap_v1/data_file_sync.h"
#include "mongo/db/storage/mmap_v1/dur.h"
-#include "mongo/db/storage/mmap_v1/dur_commitjob.h"
#include "mongo/db/storage/mmap_v1/dur_journal.h"
#include "mongo/db/storage/mmap_v1/dur_recover.h"
#include "mongo/db/storage/mmap_v1/dur_recovery_unit.h"
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h
index 86aa8f46dc0..4fc5fc93c58 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h
@@ -74,7 +74,6 @@ namespace mongo {
// un-used API
virtual void* writingPtr(void* data, size_t len) { invariant(!"don't call writingPtr"); }
- virtual void syncDataAndTruncateJournal() {}
virtual uint64_t getMyTransactionCount() const;