diff options
author | Geert Bosch <geert@mongodb.com> | 2014-10-15 12:37:26 -0400 |
---|---|---|
committer | Geert Bosch <geert@mongodb.com> | 2014-10-16 14:12:50 -0400 |
commit | 6bc93c0a8730511e021645b711079dd0ec4e9f61 (patch) | |
tree | c1ea33dcfbc057785cbf5852f72618716aafc331 | |
parent | ca1a8d5bb453a645b61a8ebcee8b40b7a51bdfdf (diff) | |
download | mongo-6bc93c0a8730511e021645b711079dd0ec4e9f61.tar.gz |
SERVER-15647: make Extent Manager thread-safe and enable MMAPv1 CLL
-rw-r--r-- | src/mongo/db/concurrency/d_concurrency.cpp | 10 | ||||
-rw-r--r-- | src/mongo/db/concurrency/d_concurrency.h | 18 | ||||
-rw-r--r-- | src/mongo/db/concurrency/lock_mgr_defs.h | 11 | ||||
-rw-r--r-- | src/mongo/db/storage/mmap_v1/extent_manager.h | 17 | ||||
-rw-r--r-- | src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.cpp | 3 | ||||
-rw-r--r-- | src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.cpp | 76 | ||||
-rw-r--r-- | src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.h | 97 | ||||
-rw-r--r-- | src/mongo/db/storage/mmap_v1/record_store_v1_test_help.cpp | 6 | ||||
-rw-r--r-- | src/mongo/db/storage/mmap_v1/record_store_v1_test_help.h | 4 | ||||
-rw-r--r-- | src/mongo/s/d_migrate.cpp | 16 |
10 files changed, 176 insertions, 82 deletions
diff --git a/src/mongo/db/concurrency/d_concurrency.cpp b/src/mongo/db/concurrency/d_concurrency.cpp index 4a4dfcce3be..ff9d4e5fa60 100644 --- a/src/mongo/db/concurrency/d_concurrency.cpp +++ b/src/mongo/db/concurrency/d_concurrency.cpp @@ -270,6 +270,16 @@ namespace mongo { } } + Lock::ResourceLock::ResourceLock(Locker* lockState, ResourceId rid, LockMode mode) + : _rid(rid), + _lockState(lockState) { + _lockState->lock(_rid, mode); + } + + Lock::ResourceLock::~ResourceLock() { + _lockState->unlock(_rid); + } + Lock::DBRead::DBRead(Locker* lockState, const StringData& dbOrNs) : DBLock(lockState, nsToDatabaseSubstring(dbOrNs), MODE_S) { } diff --git a/src/mongo/db/concurrency/d_concurrency.h b/src/mongo/db/concurrency/d_concurrency.h index e3f5dcac647..82c41d56686 100644 --- a/src/mongo/db/concurrency/d_concurrency.h +++ b/src/mongo/db/concurrency/d_concurrency.h @@ -201,7 +201,7 @@ namespace mongo { */ class CollectionLock : boost::noncopyable { public: - CollectionLock (Locker* lockState, const StringData& ns, const LockMode); + CollectionLock(Locker* lockState, const StringData& ns, const LockMode); virtual ~CollectionLock(); private: const ResourceId _id; @@ -209,6 +209,22 @@ namespace mongo { }; /** + * General purpose RAII wrapper for a resource managed by the lock manager + * + * See LockMode for the supported modes. Unlike DBLock/Collection lock, this will not do + * any additional checks/upgrades or global locking. Use ResourceLock for locking + * resources other than RESOURCE_GLOBAL, RESOURCE_DATABASE and RESOURCE_COLLECTION. + */ + class ResourceLock : boost::noncopyable { + public: + ResourceLock(Locker* lockState, const ResourceId rid, const LockMode); + virtual ~ResourceLock(); + private: + const ResourceId _rid; + Locker* _lockState; + }; + + /** * Shared database lock -- DEPRECATED, please transition to DBLock and collection locks * * Allows concurrent read access to the given database, blocking any writers. diff --git a/src/mongo/db/concurrency/lock_mgr_defs.h b/src/mongo/db/concurrency/lock_mgr_defs.h index 09fd03f9c3f..e34acefb0ba 100644 --- a/src/mongo/db/concurrency/lock_mgr_defs.h +++ b/src/mongo/db/concurrency/lock_mgr_defs.h @@ -106,8 +106,11 @@ namespace mongo { /** * Hierarchy of resource types. The lock manager knows nothing about this hierarchy, it is - * purely logical. I.e., acquiring a RESOURCE_GLOBAL and then a RESOURCE_DATABASE won't block - * at the level of the lock manager. + * purely logical. Resources of different types will never conflict with each other. While the + * lock manager does not know or care about ordering, the general policy is that resources are + * acquired in the order below. For example, one might first acquire a RESOURCE_GLOBAL and then + * the desired RESOURCE_DATABASE, both using intent modes, and finally a RESOURCE_COLLECTION + * in exclusive mode. */ enum ResourceType { // Special (singleton) resources @@ -119,14 +122,14 @@ namespace mongo { RESOURCE_DATABASE, RESOURCE_COLLECTION, RESOURCE_DOCUMENT, + RESOURCE_MMAPv1_EXTENT_MANAGER, // Only for MMAPv1 engine, keyed on database name // Counts the rest. Always insert new resource types above this entry. ResourceTypesCount }; // We only use 3 bits for the resource type in the ResourceId hash - BOOST_STATIC_ASSERT(ResourceTypesCount < 8); - + BOOST_STATIC_ASSERT(ResourceTypesCount <= 8); /** * Uniquely identifies a lockable resource. diff --git a/src/mongo/db/storage/mmap_v1/extent_manager.h b/src/mongo/db/storage/mmap_v1/extent_manager.h index e655883d109..79c0c755f86 100644 --- a/src/mongo/db/storage/mmap_v1/extent_manager.h +++ b/src/mongo/db/storage/mmap_v1/extent_manager.h @@ -54,8 +54,9 @@ namespace mongo { * - responsible for figuring out how to get a new extent * - can use any method it wants to do so * - this structure is NOT stored on disk - * - this class is NOT thread safe, locking should be above (for now) - * + * - files will not be removed from the EM + * - extent size and loc are immutable + * - this class is thread safe, once constructed and init()-ialized */ class ExtentManager { MONGO_DISALLOW_COPYING( ExtentManager ); @@ -91,7 +92,15 @@ namespace mongo { */ virtual void freeExtent( OperationContext* txn, DiskLoc extent ) = 0; - virtual void freeListStats( int* numExtents, int64_t* totalFreeSize ) const = 0; + /** + * Retrieve statistics on the the free list managed by this ExtentManger. + * @param numExtents - non-null pointer to an int that will receive the number of extents + * @param totalFreeSizeBytes - non-null pointer to an int64_t receiving the total free + * space in the free list. + */ + virtual void freeListStats(OperationContext* txn, + int* numExtents, + int64_t* totalFreeSizeBytes) const = 0; /** * @param loc - has to be for a specific Record @@ -153,7 +162,7 @@ namespace mongo { }; /** * Tell the system that for this extent, it will have this kind of disk access. - * Owner takes owernship of CacheHint + * Caller takes owernship of CacheHint */ virtual CacheHint* cacheHint( const DiskLoc& extentLoc, const HintType& hint ) = 0; }; diff --git a/src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.cpp b/src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.cpp index c5f8e371120..e219c5cf765 100644 --- a/src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.cpp +++ b/src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.cpp @@ -183,7 +183,6 @@ namespace mongo { else { log() << e.what() << endl; } - _extentManager.reset(); throw; } } @@ -426,7 +425,7 @@ namespace mongo { int freeListSize = 0; int64_t freeListSpace = 0; - _extentManager.freeListStats( &freeListSize, &freeListSpace ); + _extentManager.freeListStats(opCtx, &freeListSize, &freeListSpace); BSONObjBuilder extentFreeList( output->subobjStart( "extentFreeList" ) ); extentFreeList.append( "num", freeListSize ); diff --git a/src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.cpp b/src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.cpp index 027ab609bda..f6d18f3090d 100644 --- a/src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.cpp +++ b/src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.cpp @@ -47,23 +47,13 @@ namespace mongo { - MmapV1ExtentManager::MmapV1ExtentManager( const StringData& dbname, - const StringData& path, - bool directoryPerDB ) - : _dbname( dbname.toString() ), - _path( path.toString() ), - _directoryPerDB( directoryPerDB ) { - } - - MmapV1ExtentManager::~MmapV1ExtentManager() { - reset(); - } - - void MmapV1ExtentManager::reset() { - for ( size_t i = 0; i < _files.size(); i++ ) { - delete _files[i]; - } - _files.clear(); + MmapV1ExtentManager::MmapV1ExtentManager(const StringData& dbname, + const StringData& path, + bool directoryPerDB) + : _dbname(dbname.toString()), + _path(path.toString()), + _directoryPerDB(directoryPerDB), + _rid(RESOURCE_MMAPv1_EXTENT_MANAGER, dbname) { } boost::filesystem::path MmapV1ExtentManager::fileName( int n ) const { @@ -78,7 +68,7 @@ namespace mongo { Status MmapV1ExtentManager::init(OperationContext* txn) { - verify( _files.size() == 0 ); + verify(_files.empty()); for ( int n = 0; n < DiskLoc::MaxFiles; n++ ) { boost::filesystem::path fullName = fileName( n ); @@ -126,7 +116,7 @@ namespace mongo { } const DataFile* MmapV1ExtentManager::_getOpenFile(int fileId) const { - if (fileId < 0 || fileId >= static_cast<int>(_files.size())) { + if (fileId < 0 || fileId >= _files.size()) { log() << "_getOpenFile() invalid file index requested " << fileId; invariant(false); } @@ -135,7 +125,7 @@ namespace mongo { } DataFile* MmapV1ExtentManager::_getOpenFile(int fileId) { - if (fileId < 0 || fileId >= static_cast<int>(_files.size())) { + if (fileId < 0 || fileId >= _files.size()) { log() << "_getOpenFile() invalid file index requested " << fileId; invariant(false); } @@ -192,11 +182,11 @@ namespace mongo { } // Returns the last file added - return *_files.rbegin(); + return _files[allocFileId]; } int MmapV1ExtentManager::numFiles() const { - return static_cast<int>( _files.size() ); + return _files.size(); } long long MmapV1ExtentManager::fileSize() const { @@ -401,11 +391,11 @@ namespace mongo { return best->myLoc; } - DiskLoc MmapV1ExtentManager::allocateExtent( OperationContext* txn, - bool capped, - int size, - bool enforceQuota ) { - + DiskLoc MmapV1ExtentManager::allocateExtent(OperationContext* txn, + bool capped, + int size, + bool enforceQuota) { + Lock::ResourceLock rlk(txn->lockState(), _rid, MODE_X); bool fromFreeList = true; DiskLoc eloc = _allocFromFreeList( txn, size, capped ); if ( eloc.isNull() ) { @@ -425,6 +415,7 @@ namespace mongo { } void MmapV1ExtentManager::freeExtent(OperationContext* txn, DiskLoc firstExt ) { + Lock::ResourceLock rlk(txn->lockState(), _rid, MODE_X); Extent* e = getExtent( firstExt ); txn->recoveryUnit()->writing( &e->xnext )->Null(); txn->recoveryUnit()->writing( &e->xprev )->Null(); @@ -447,6 +438,7 @@ namespace mongo { } void MmapV1ExtentManager::freeExtents(OperationContext* txn, DiskLoc firstExt, DiskLoc lastExt) { + Lock::ResourceLock rlk(txn->lockState(), _rid, MODE_X); if ( firstExt.isNull() && lastExt.isNull() ) return; @@ -472,7 +464,6 @@ namespace mongo { *txn->recoveryUnit()->writing( &getExtent( lastExt )->xnext ) = a; _setFreeListStart( txn, firstExt ); } - } DiskLoc MmapV1ExtentManager::_getFreeListStart() const { @@ -501,18 +492,22 @@ namespace mongo { *txn->recoveryUnit()->writing( &file->header()->freeListEnd ) = loc; } - void MmapV1ExtentManager::freeListStats( int* numExtents, int64_t* totalFreeSize ) const { - invariant( numExtents ); - invariant( totalFreeSize ); + void MmapV1ExtentManager::freeListStats(OperationContext* txn, + int* numExtents, + int64_t* totalFreeSizeBytes) const { + Lock::ResourceLock rlk(txn->lockState(), _rid, MODE_S); + + invariant(numExtents); + invariant(totalFreeSizeBytes); *numExtents = 0; - *totalFreeSize = 0; + *totalFreeSizeBytes = 0; DiskLoc a = _getFreeListStart(); while( !a.isNull() ) { Extent *e = getExtent( a ); (*numExtents)++; - (*totalFreeSize) += e->length; + (*totalFreeSizeBytes) += e->length; a = e->xnext; } @@ -553,6 +548,21 @@ namespace mongo { MAdvise::Sequential ); } + MmapV1ExtentManager::FilesArray::~FilesArray() { + for (int i = 0; i < size(); i++) { + delete _files[i]; + } + } + + void MmapV1ExtentManager::FilesArray::push_back(DataFile* val) { + scoped_lock lk(_writersMutex); + const int n = _size.load(); + invariant(n < DiskLoc::MaxFiles); + // Note ordering: _size update must come after updating the _files array + _files[n] = val; + _size.store(n + 1); + } + void MmapV1ExtentManager::getFileFormat( OperationContext* txn, int* major, int* minor ) const { if ( numFiles() == 0 ) return; diff --git a/src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.h b/src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.h index d41f8b0e21f..e1857db726c 100644 --- a/src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.h +++ b/src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.h @@ -31,14 +31,16 @@ #pragma once #include <string> -#include <vector> #include <boost/filesystem/path.hpp> +#include "mongo/platform/atomic_word.h" #include "mongo/base/status.h" #include "mongo/base/string_data.h" +#include "mongo/db/concurrency/lock_mgr_defs.h" #include "mongo/db/diskloc.h" #include "mongo/db/storage/mmap_v1/extent_manager.h" +#include "mongo/util/concurrency/mutex.h" namespace mongo { @@ -57,10 +59,16 @@ namespace mongo { * - responsible for figuring out how to get a new extent * - can use any method it wants to do so * - this structure is NOT stored on disk - * - this class is NOT thread safe, locking should be above (for now) + * - this class is thread safe, except as indicated below * - * implementation: - * - ExtentManager holds a list of DataFile + * Implementation: + * - ExtentManager holds a preallocated list of DataFile + * - files will not be removed from the EM, so _files access can be lock-free + * - extent size and loc are immutable + * - Any non-const public operations on an ExtentManager will acquire an MODE_X lock on its + * RESOURCE_MMAPv1_EXTENT_MANAGER resource from the lock-manager, which will extend life + * to during WriteUnitOfWorks that might need rollback. Private methods will only + * be called from public ones. */ class MmapV1ExtentManager : public ExtentManager { MONGO_DISALLOW_COPYING( MmapV1ExtentManager ); @@ -70,18 +78,11 @@ namespace mongo { * while a bit odd, this is not a layer violation as extents * are a peer to the .ns file, without any layering */ - MmapV1ExtentManager( const StringData& dbname, const StringData& path, - bool directoryPerDB ); - - virtual ~MmapV1ExtentManager(); - - /** - * deletes all state and puts back to original state - */ - void reset(); + MmapV1ExtentManager(const StringData& dbname, const StringData& path, + bool directoryPerDB); /** - * opens all current files + * opens all current files, not thread safe */ Status init(OperationContext* txn); @@ -105,9 +106,12 @@ namespace mongo { */ void freeExtent( OperationContext* txn, DiskLoc extent ); + // For debug only: not thread safe void printFreeList() const; - void freeListStats( int* numExtents, int64_t* totalFreeSize ) const; + void freeListStats(OperationContext* txn, + int* numExtents, + int64_t* totalFreeSizeBytes) const; /** * @param loc - has to be for a specific Record @@ -135,18 +139,19 @@ namespace mongo { */ Extent* getExtent( const DiskLoc& loc, bool doSanityCheck = true ) const; - void getFileFormat( OperationContext* txn, int* major, int* minor ) const; + /** + * Not thread safe, requires a database exclusive lock + */ + void getFileFormat(OperationContext* txn, int* major, int* minor) const; void setFileFormat(OperationContext* txn, int major, int minor); const DataFile* getOpenFile( int n ) const { return _getOpenFile( n ); } virtual int maxSize() const; - virtual CacheHint* cacheHint( const DiskLoc& extentLoc, const HintType& hint ); private: - /** * will return NULL if nothing suitable in free list */ @@ -176,15 +181,53 @@ namespace mongo { // ----- - std::string _dbname; // i.e. "test" - std::string _path; // i.e. "/data/db" - bool _directoryPerDB; - - // must be in the dbLock when touching this (and write locked when writing to of course) - // however during Database object construction we aren't, which is ok as it isn't yet visible - // to others and we are in the dbholder lock then. - std::vector<DataFile*> _files; + const std::string _dbname; // i.e. "test" + const std::string _path; // i.e. "/data/db" + const bool _directoryPerDB; + const ResourceId _rid; + /** + * Simple wrapper around an array object to allow append-only modification of the array, + * as well as concurrent read-accesses. This class has a minimal interface to keep + * implementation simple and easy to modify. + */ + class FilesArray { + public: + FilesArray() : _writersMutex("MmapV1ExtentManager"), _size(0) { } + ~FilesArray(); + + /** + * Returns file at location 'n' in the array, with 'n' less than number of files added. + * Will always return the same pointer for a given file. + */ + DataFile* operator[](int n) const { + invariant(n >= 0 && n < size()); + return _files[n]; + } + + /** + * Returns true iff no files were added + */ + bool empty() const { + return size() == 0; + } + + /** + * Returns number of files added to the array + */ + int size() const { + return _size.load(); + } + + // Appends val to the array, taking ownership of its pointer + void push_back(DataFile* val); + + private: + mutex _writersMutex; + AtomicInt32 _size; // number of files in the array + DataFile* _files[DiskLoc::MaxFiles]; + }; + + FilesArray _files; }; - } diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_test_help.cpp b/src/mongo/db/storage/mmap_v1/record_store_v1_test_help.cpp index cddabbe3fef..ec19da43b36 100644 --- a/src/mongo/db/storage/mmap_v1/record_store_v1_test_help.cpp +++ b/src/mongo/db/storage/mmap_v1/record_store_v1_test_help.cpp @@ -253,8 +253,10 @@ namespace mongo { void DummyExtentManager::freeExtent( OperationContext* txn, DiskLoc extent ) { // XXX } - void DummyExtentManager::freeListStats( int* numExtents, int64_t* totalFreeSize ) const { - invariant( false ); + void DummyExtentManager::freeListStats(OperationContext* txn, + int* numExtents, + int64_t* totalFreeSizeBytes) const { + invariant(false); } Record* DummyExtentManager::recordForV1( const DiskLoc& loc ) const { diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_test_help.h b/src/mongo/db/storage/mmap_v1/record_store_v1_test_help.h index c8b321ccd40..a04f9b40331 100644 --- a/src/mongo/db/storage/mmap_v1/record_store_v1_test_help.h +++ b/src/mongo/db/storage/mmap_v1/record_store_v1_test_help.h @@ -129,7 +129,9 @@ namespace mongo { virtual void freeExtent( OperationContext* txn, DiskLoc extent ); - virtual void freeListStats( int* numExtents, int64_t* totalFreeSize ) const; + virtual void freeListStats(OperationContext* txn, + int* numExtents, + int64_t* totalFreeSizeBytes) const; virtual Record* recordForV1( const DiskLoc& loc ) const; diff --git a/src/mongo/s/d_migrate.cpp b/src/mongo/s/d_migrate.cpp index eae740f2c79..cbf59ca1ada 100644 --- a/src/mongo/s/d_migrate.cpp +++ b/src/mongo/s/d_migrate.cpp @@ -2114,27 +2114,27 @@ namespace mongo { bool didAnything = false; if ( xfer["deleted"].isABSONObj() ) { + Lock::DBLock dlk(txn->lockState(), nsToDatabaseSubstring(ns), MODE_IX); Helpers::RemoveSaver rs( "moveChunk" , ns , "removedDuring" ); BSONObjIterator i( xfer["deleted"].Obj() ); while ( i.more() ) { - Client::WriteContext cx(txn, ns); + Lock::CollectionLock clk(txn->lockState(), ns, MODE_X); + WriteUnitOfWork wunit(txn); + Client::Context ctx(txn, ns); BSONObj id = i.next().Obj(); // do not apply deletes if they do not belong to the chunk being migrated BSONObj fullObj; - if ( Helpers::findById( txn, cx.ctx().db(), ns.c_str(), id, fullObj ) ) { - if ( ! isInRange( fullObj , min , max , shardKeyPattern ) ) { + if (Helpers::findById(txn, ctx.db(), ns.c_str(), id, fullObj)) { + if (!isInRange(fullObj , min , max , shardKeyPattern)) { log() << "not applying out of range deletion: " << fullObj << migrateLog; continue; } } - Lock::DBLock lk(txn->lockState(), nsToDatabaseSubstring(ns), MODE_X); - Client::Context ctx(txn, ns); - if (serverGlobalParams.moveParanoia) { rs.goingToDelete(fullObj); } @@ -2148,8 +2148,8 @@ namespace mongo { false /* god */, true /* fromMigrate */); - *lastOpApplied = cx.ctx().getClient()->getLastOp().asDate(); - cx.commit(); + *lastOpApplied = ctx.getClient()->getLastOp().asDate(); + wunit.commit(); didAnything = true; } } |