diff options
author | Eric Milkie <milkie@10gen.com> | 2015-01-21 11:30:57 -0500 |
---|---|---|
committer | Eric Milkie <milkie@10gen.com> | 2015-01-22 11:59:22 -0500 |
commit | 44fd8e4e903d65dceb583c84cc5ec430c9350ad8 (patch) | |
tree | 333f4e74524d2be72779fe19bef548a18b0d2365 /src/mongo/db | |
parent | 612f66c61c12b705fe3c5ed251d63525f1bf650a (diff) | |
download | mongo-44fd8e4e903d65dceb583c84cc5ec430c9350ad8.tar.gz |
SERVER-16908 disable MMAPv1 rollback during fg index build, for perf
Diffstat (limited to 'src/mongo/db')
9 files changed, 62 insertions, 12 deletions
diff --git a/src/mongo/db/index/btree_based_bulk_access_method.cpp b/src/mongo/db/index/btree_based_bulk_access_method.cpp index 12fcd1bfbd2..fa1121ac5e9 100644 --- a/src/mongo/db/index/btree_based_bulk_access_method.cpp +++ b/src/mongo/db/index/btree_based_bulk_access_method.cpp @@ -150,6 +150,11 @@ namespace mongo { } WriteUnitOfWork wunit(_txn); + // Improve performance in the btree-building phase by disabling rollback tracking. + // This avoids copying all the written bytes to a buffer that is only used to roll back. + // Note that this is safe to do, as this entire index-build-in-progress will be cleaned + // up by the index system. + _txn->recoveryUnit()->setRollbackWritesDisabled(); // Get the next datum and add it to the builder. BSONObjExternalSorter::Data d = i->next(); diff --git a/src/mongo/db/storage/in_memory/in_memory_recovery_unit.h b/src/mongo/db/storage/in_memory/in_memory_recovery_unit.h index 0407f7191af..ddf38dff73e 100644 --- a/src/mongo/db/storage/in_memory/in_memory_recovery_unit.h +++ b/src/mongo/db/storage/in_memory/in_memory_recovery_unit.h @@ -63,6 +63,8 @@ namespace mongo { invariant(!"don't call writingPtr"); } + virtual void setRollbackWritesDisabled() {} + private: typedef boost::shared_ptr<Change> ChangePtr; typedef std::vector<ChangePtr> Changes; diff --git a/src/mongo/db/storage/mmap_v1/dur_recovery_unit.cpp b/src/mongo/db/storage/mmap_v1/dur_recovery_unit.cpp index 900dfc8913e..5947cae00bd 100644 --- a/src/mongo/db/storage/mmap_v1/dur_recovery_unit.cpp +++ b/src/mongo/db/storage/mmap_v1/dur_recovery_unit.cpp @@ -42,7 +42,7 @@ namespace mongo { - DurRecoveryUnit::DurRecoveryUnit() : _mustRollback(false) { + DurRecoveryUnit::DurRecoveryUnit() : _mustRollback(false), _rollbackDisabled(false) { } @@ -78,6 +78,9 @@ namespace mongo { rollbackInnermostChanges(); } + // Reset back to default. + _rollbackDisabled = false; + _startOfUncommittedChangesForLevel.pop_back(); } @@ -102,7 +105,7 @@ namespace mongo { (*it)->commit(); } - // We now reset to a "clean" state without any uncommited changes. + // We now reset to a "clean" state without any uncommitted changes. _changes.clear(); _writes.clear(); _preimageBuffer.clear(); @@ -144,17 +147,21 @@ namespace mongo { const int changesRollbackTo = _startOfUncommittedChangesForLevel.back().changeIndex; const int writesRollbackTo = _startOfUncommittedChangesForLevel.back().writeIndex; - LOG(2) << " ***** ROLLING BACK " << (_writes.size() - writesRollbackTo) << " disk writes" - << " and " << (_changes.size() - changesRollbackTo) << " custom changes"; - // First rollback disk writes, then Changes. This matches behavior in other storage engines // that either rollback a transaction or don't write a writebatch. - for (int i = _writes.size() - 1; i >= writesRollbackTo; i--) { - // TODO need to add these pages to our "dirty count" somehow. - _preimageBuffer.copy(_writes[i].addr, _writes[i].len, _writes[i].offset); + if (!_rollbackDisabled) { + LOG(2) << " ***** ROLLING BACK " << (_writes.size() - writesRollbackTo) + << " disk writes"; + + for (int i = _writes.size() - 1; i >= writesRollbackTo; i--) { + // TODO need to add these pages to our "dirty count" somehow. + _preimageBuffer.copy(_writes[i].addr, _writes[i].len, _writes[i].offset); + } } + LOG(2) << " ***** ROLLING BACK " << (_changes.size() - changesRollbackTo) + << " custom changes"; for (int i = _changes.size() - 1; i >= changesRollbackTo; i--) { LOG(2) << "CUSTOM ROLLBACK " << demangleName(typeid(*_changes[i])); _changes[i]->rollback(); @@ -192,11 +199,18 @@ namespace mongo { privateViews.makeWritable(data, len); _writes.push_back(Write(static_cast<char*>(data), len, _preimageBuffer.size())); - _preimageBuffer.append(static_cast<char*>(data), len); + if (!_rollbackDisabled) { + _preimageBuffer.append(static_cast<char*>(data), len); + } return data; } + void DurRecoveryUnit::setRollbackWritesDisabled() { + invariant(inOutermostUnitOfWork()); + _rollbackDisabled = true; + } + void DurRecoveryUnit::registerChange(Change* change) { invariant(inAUnitOfWork()); _changes.push_back(change); diff --git a/src/mongo/db/storage/mmap_v1/dur_recovery_unit.h b/src/mongo/db/storage/mmap_v1/dur_recovery_unit.h index 37c106b3393..6307bf66b31 100644 --- a/src/mongo/db/storage/mmap_v1/dur_recovery_unit.h +++ b/src/mongo/db/storage/mmap_v1/dur_recovery_unit.h @@ -49,15 +49,17 @@ namespace mongo { virtual void commitUnitOfWork(); virtual void endUnitOfWork(); - virtual void commitAndRestart(); - virtual bool awaitCommit(); - virtual void* writingPtr(void* data, size_t len); + virtual void commitAndRestart(); // The recovery unit takes ownership of change. virtual void registerChange(Change* change); + virtual void* writingPtr(void* data, size_t len); + + virtual void setRollbackWritesDisabled(); + private: void commitChanges(); void pushChangesToDurSubSystem(); @@ -110,6 +112,11 @@ namespace mongo { // If true, this RU is in a "failed" state and all changes must be rolled back. Once the // outermost WUOW rolls back it reverts to false. bool _mustRollback; + + // Default is false. + // If true, no preimages are tracked. If rollback is subsequently attempted, the process + // will abort. + bool _rollbackDisabled; }; } // namespace mongo diff --git a/src/mongo/db/storage/mmap_v1/heap_record_store_btree.h b/src/mongo/db/storage/mmap_v1/heap_record_store_btree.h index 4bc7ca1c3a3..710a34a8eec 100644 --- a/src/mongo/db/storage/mmap_v1/heap_record_store_btree.h +++ b/src/mongo/db/storage/mmap_v1/heap_record_store_btree.h @@ -204,6 +204,8 @@ namespace mongo { virtual void* writingPtr(void* data, size_t len); + virtual void setRollbackWritesDisabled() {} + // ----------------------- void notifyInsert( HeapRecordStoreBtree* rs, const RecordId& loc ); diff --git a/src/mongo/db/storage/recovery_unit.h b/src/mongo/db/storage/recovery_unit.h index 9b35e8b63fe..ac456e02b7c 100644 --- a/src/mongo/db/storage/recovery_unit.h +++ b/src/mongo/db/storage/recovery_unit.h @@ -159,6 +159,21 @@ namespace mongo { return x; } + /** + * Sets a flag that declares this RecoveryUnit will skip rolling back writes, for the + * duration of the current outermost WriteUnitOfWork. This function can only be called + * between a pair of unnested beginUnitOfWork() / endUnitOfWork() calls. + * The flag is cleared when endUnitOfWork() is called. + * While the flag is set, rollback will skip rolling back writes, but custom rollback + * change functions are still called. Clearly, this functionality should only be used when + * writing to temporary collections that can be cleaned up externally. For example, + * foreground index builds write to a temporary collection; if something goes wrong that + * normally requires a rollback, we can instead clean up the index by dropping the entire + * index. + * Setting the flag may permit increased performance. + */ + virtual void setRollbackWritesDisabled() = 0; + protected: RecoveryUnit() { } }; diff --git a/src/mongo/db/storage/recovery_unit_noop.h b/src/mongo/db/storage/recovery_unit_noop.h index 1e07b1f9c0b..e6f560a7871 100644 --- a/src/mongo/db/storage/recovery_unit_noop.h +++ b/src/mongo/db/storage/recovery_unit_noop.h @@ -53,6 +53,7 @@ namespace mongo { virtual void* writingPtr(void* data, size_t len) { return data; } + virtual void setRollbackWritesDisabled() {} }; } // namespace mongo diff --git a/src/mongo/db/storage/rocks/rocks_recovery_unit.h b/src/mongo/db/storage/rocks/rocks_recovery_unit.h index a86d6bc9b56..71bb93cfa0c 100644 --- a/src/mongo/db/storage/rocks/rocks_recovery_unit.h +++ b/src/mongo/db/storage/rocks/rocks_recovery_unit.h @@ -78,6 +78,8 @@ namespace mongo { virtual void registerChange(Change* change); + virtual void setRollbackWritesDisabled() {} + // local api rocksdb::WriteBatchWithIndex* writeBatch(); diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h index 556e0e25a1b..ed2b77bff00 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h @@ -75,6 +75,8 @@ namespace mongo { // un-used API virtual void* writingPtr(void* data, size_t len) { invariant(!"don't call writingPtr"); } + virtual void setRollbackWritesDisabled() {} + virtual uint64_t getMyTransactionCount() const; // ---- WT STUFF |