summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeert Bosch <geert@mongodb.com>2014-10-15 12:37:26 -0400
committerGeert Bosch <geert@mongodb.com>2014-10-16 14:12:50 -0400
commit6bc93c0a8730511e021645b711079dd0ec4e9f61 (patch)
treec1ea33dcfbc057785cbf5852f72618716aafc331
parentca1a8d5bb453a645b61a8ebcee8b40b7a51bdfdf (diff)
downloadmongo-6bc93c0a8730511e021645b711079dd0ec4e9f61.tar.gz
SERVER-15647: make Extent Manager thread-safe and enable MMAPv1 CLL
-rw-r--r--src/mongo/db/concurrency/d_concurrency.cpp10
-rw-r--r--src/mongo/db/concurrency/d_concurrency.h18
-rw-r--r--src/mongo/db/concurrency/lock_mgr_defs.h11
-rw-r--r--src/mongo/db/storage/mmap_v1/extent_manager.h17
-rw-r--r--src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.cpp3
-rw-r--r--src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.cpp76
-rw-r--r--src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.h97
-rw-r--r--src/mongo/db/storage/mmap_v1/record_store_v1_test_help.cpp6
-rw-r--r--src/mongo/db/storage/mmap_v1/record_store_v1_test_help.h4
-rw-r--r--src/mongo/s/d_migrate.cpp16
10 files changed, 176 insertions, 82 deletions
diff --git a/src/mongo/db/concurrency/d_concurrency.cpp b/src/mongo/db/concurrency/d_concurrency.cpp
index 4a4dfcce3be..ff9d4e5fa60 100644
--- a/src/mongo/db/concurrency/d_concurrency.cpp
+++ b/src/mongo/db/concurrency/d_concurrency.cpp
@@ -270,6 +270,16 @@ namespace mongo {
}
}
+ Lock::ResourceLock::ResourceLock(Locker* lockState, ResourceId rid, LockMode mode)
+ : _rid(rid),
+ _lockState(lockState) {
+ _lockState->lock(_rid, mode);
+ }
+
+ Lock::ResourceLock::~ResourceLock() {
+ _lockState->unlock(_rid);
+ }
+
Lock::DBRead::DBRead(Locker* lockState, const StringData& dbOrNs) :
DBLock(lockState, nsToDatabaseSubstring(dbOrNs), MODE_S) { }
diff --git a/src/mongo/db/concurrency/d_concurrency.h b/src/mongo/db/concurrency/d_concurrency.h
index e3f5dcac647..82c41d56686 100644
--- a/src/mongo/db/concurrency/d_concurrency.h
+++ b/src/mongo/db/concurrency/d_concurrency.h
@@ -201,7 +201,7 @@ namespace mongo {
*/
class CollectionLock : boost::noncopyable {
public:
- CollectionLock (Locker* lockState, const StringData& ns, const LockMode);
+ CollectionLock(Locker* lockState, const StringData& ns, const LockMode);
virtual ~CollectionLock();
private:
const ResourceId _id;
@@ -209,6 +209,22 @@ namespace mongo {
};
/**
+ * General purpose RAII wrapper for a resource managed by the lock manager
+ *
+ * See LockMode for the supported modes. Unlike DBLock/Collection lock, this will not do
+ * any additional checks/upgrades or global locking. Use ResourceLock for locking
+ * resources other than RESOURCE_GLOBAL, RESOURCE_DATABASE and RESOURCE_COLLECTION.
+ */
+ class ResourceLock : boost::noncopyable {
+ public:
+ ResourceLock(Locker* lockState, const ResourceId rid, const LockMode);
+ virtual ~ResourceLock();
+ private:
+ const ResourceId _rid;
+ Locker* _lockState;
+ };
+
+ /**
* Shared database lock -- DEPRECATED, please transition to DBLock and collection locks
*
* Allows concurrent read access to the given database, blocking any writers.
diff --git a/src/mongo/db/concurrency/lock_mgr_defs.h b/src/mongo/db/concurrency/lock_mgr_defs.h
index 09fd03f9c3f..e34acefb0ba 100644
--- a/src/mongo/db/concurrency/lock_mgr_defs.h
+++ b/src/mongo/db/concurrency/lock_mgr_defs.h
@@ -106,8 +106,11 @@ namespace mongo {
/**
* Hierarchy of resource types. The lock manager knows nothing about this hierarchy, it is
- * purely logical. I.e., acquiring a RESOURCE_GLOBAL and then a RESOURCE_DATABASE won't block
- * at the level of the lock manager.
+ * purely logical. Resources of different types will never conflict with each other. While the
+ * lock manager does not know or care about ordering, the general policy is that resources are
+ * acquired in the order below. For example, one might first acquire a RESOURCE_GLOBAL and then
+ * the desired RESOURCE_DATABASE, both using intent modes, and finally a RESOURCE_COLLECTION
+ * in exclusive mode.
*/
enum ResourceType {
// Special (singleton) resources
@@ -119,14 +122,14 @@ namespace mongo {
RESOURCE_DATABASE,
RESOURCE_COLLECTION,
RESOURCE_DOCUMENT,
+ RESOURCE_MMAPv1_EXTENT_MANAGER, // Only for MMAPv1 engine, keyed on database name
// Counts the rest. Always insert new resource types above this entry.
ResourceTypesCount
};
// We only use 3 bits for the resource type in the ResourceId hash
- BOOST_STATIC_ASSERT(ResourceTypesCount < 8);
-
+ BOOST_STATIC_ASSERT(ResourceTypesCount <= 8);
/**
* Uniquely identifies a lockable resource.
diff --git a/src/mongo/db/storage/mmap_v1/extent_manager.h b/src/mongo/db/storage/mmap_v1/extent_manager.h
index e655883d109..79c0c755f86 100644
--- a/src/mongo/db/storage/mmap_v1/extent_manager.h
+++ b/src/mongo/db/storage/mmap_v1/extent_manager.h
@@ -54,8 +54,9 @@ namespace mongo {
* - responsible for figuring out how to get a new extent
* - can use any method it wants to do so
* - this structure is NOT stored on disk
- * - this class is NOT thread safe, locking should be above (for now)
- *
+ * - files will not be removed from the EM
+ * - extent size and loc are immutable
+ * - this class is thread safe, once constructed and init()-ialized
*/
class ExtentManager {
MONGO_DISALLOW_COPYING( ExtentManager );
@@ -91,7 +92,15 @@ namespace mongo {
*/
virtual void freeExtent( OperationContext* txn, DiskLoc extent ) = 0;
- virtual void freeListStats( int* numExtents, int64_t* totalFreeSize ) const = 0;
+ /**
+ * Retrieve statistics on the the free list managed by this ExtentManger.
+ * @param numExtents - non-null pointer to an int that will receive the number of extents
+ * @param totalFreeSizeBytes - non-null pointer to an int64_t receiving the total free
+ * space in the free list.
+ */
+ virtual void freeListStats(OperationContext* txn,
+ int* numExtents,
+ int64_t* totalFreeSizeBytes) const = 0;
/**
* @param loc - has to be for a specific Record
@@ -153,7 +162,7 @@ namespace mongo {
};
/**
* Tell the system that for this extent, it will have this kind of disk access.
- * Owner takes owernship of CacheHint
+ * Caller takes owernship of CacheHint
*/
virtual CacheHint* cacheHint( const DiskLoc& extentLoc, const HintType& hint ) = 0;
};
diff --git a/src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.cpp b/src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.cpp
index c5f8e371120..e219c5cf765 100644
--- a/src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.cpp
+++ b/src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.cpp
@@ -183,7 +183,6 @@ namespace mongo {
else {
log() << e.what() << endl;
}
- _extentManager.reset();
throw;
}
}
@@ -426,7 +425,7 @@ namespace mongo {
int freeListSize = 0;
int64_t freeListSpace = 0;
- _extentManager.freeListStats( &freeListSize, &freeListSpace );
+ _extentManager.freeListStats(opCtx, &freeListSize, &freeListSpace);
BSONObjBuilder extentFreeList( output->subobjStart( "extentFreeList" ) );
extentFreeList.append( "num", freeListSize );
diff --git a/src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.cpp b/src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.cpp
index 027ab609bda..f6d18f3090d 100644
--- a/src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.cpp
+++ b/src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.cpp
@@ -47,23 +47,13 @@
namespace mongo {
- MmapV1ExtentManager::MmapV1ExtentManager( const StringData& dbname,
- const StringData& path,
- bool directoryPerDB )
- : _dbname( dbname.toString() ),
- _path( path.toString() ),
- _directoryPerDB( directoryPerDB ) {
- }
-
- MmapV1ExtentManager::~MmapV1ExtentManager() {
- reset();
- }
-
- void MmapV1ExtentManager::reset() {
- for ( size_t i = 0; i < _files.size(); i++ ) {
- delete _files[i];
- }
- _files.clear();
+ MmapV1ExtentManager::MmapV1ExtentManager(const StringData& dbname,
+ const StringData& path,
+ bool directoryPerDB)
+ : _dbname(dbname.toString()),
+ _path(path.toString()),
+ _directoryPerDB(directoryPerDB),
+ _rid(RESOURCE_MMAPv1_EXTENT_MANAGER, dbname) {
}
boost::filesystem::path MmapV1ExtentManager::fileName( int n ) const {
@@ -78,7 +68,7 @@ namespace mongo {
Status MmapV1ExtentManager::init(OperationContext* txn) {
- verify( _files.size() == 0 );
+ verify(_files.empty());
for ( int n = 0; n < DiskLoc::MaxFiles; n++ ) {
boost::filesystem::path fullName = fileName( n );
@@ -126,7 +116,7 @@ namespace mongo {
}
const DataFile* MmapV1ExtentManager::_getOpenFile(int fileId) const {
- if (fileId < 0 || fileId >= static_cast<int>(_files.size())) {
+ if (fileId < 0 || fileId >= _files.size()) {
log() << "_getOpenFile() invalid file index requested " << fileId;
invariant(false);
}
@@ -135,7 +125,7 @@ namespace mongo {
}
DataFile* MmapV1ExtentManager::_getOpenFile(int fileId) {
- if (fileId < 0 || fileId >= static_cast<int>(_files.size())) {
+ if (fileId < 0 || fileId >= _files.size()) {
log() << "_getOpenFile() invalid file index requested " << fileId;
invariant(false);
}
@@ -192,11 +182,11 @@ namespace mongo {
}
// Returns the last file added
- return *_files.rbegin();
+ return _files[allocFileId];
}
int MmapV1ExtentManager::numFiles() const {
- return static_cast<int>( _files.size() );
+ return _files.size();
}
long long MmapV1ExtentManager::fileSize() const {
@@ -401,11 +391,11 @@ namespace mongo {
return best->myLoc;
}
- DiskLoc MmapV1ExtentManager::allocateExtent( OperationContext* txn,
- bool capped,
- int size,
- bool enforceQuota ) {
-
+ DiskLoc MmapV1ExtentManager::allocateExtent(OperationContext* txn,
+ bool capped,
+ int size,
+ bool enforceQuota) {
+ Lock::ResourceLock rlk(txn->lockState(), _rid, MODE_X);
bool fromFreeList = true;
DiskLoc eloc = _allocFromFreeList( txn, size, capped );
if ( eloc.isNull() ) {
@@ -425,6 +415,7 @@ namespace mongo {
}
void MmapV1ExtentManager::freeExtent(OperationContext* txn, DiskLoc firstExt ) {
+ Lock::ResourceLock rlk(txn->lockState(), _rid, MODE_X);
Extent* e = getExtent( firstExt );
txn->recoveryUnit()->writing( &e->xnext )->Null();
txn->recoveryUnit()->writing( &e->xprev )->Null();
@@ -447,6 +438,7 @@ namespace mongo {
}
void MmapV1ExtentManager::freeExtents(OperationContext* txn, DiskLoc firstExt, DiskLoc lastExt) {
+ Lock::ResourceLock rlk(txn->lockState(), _rid, MODE_X);
if ( firstExt.isNull() && lastExt.isNull() )
return;
@@ -472,7 +464,6 @@ namespace mongo {
*txn->recoveryUnit()->writing( &getExtent( lastExt )->xnext ) = a;
_setFreeListStart( txn, firstExt );
}
-
}
DiskLoc MmapV1ExtentManager::_getFreeListStart() const {
@@ -501,18 +492,22 @@ namespace mongo {
*txn->recoveryUnit()->writing( &file->header()->freeListEnd ) = loc;
}
- void MmapV1ExtentManager::freeListStats( int* numExtents, int64_t* totalFreeSize ) const {
- invariant( numExtents );
- invariant( totalFreeSize );
+ void MmapV1ExtentManager::freeListStats(OperationContext* txn,
+ int* numExtents,
+ int64_t* totalFreeSizeBytes) const {
+ Lock::ResourceLock rlk(txn->lockState(), _rid, MODE_S);
+
+ invariant(numExtents);
+ invariant(totalFreeSizeBytes);
*numExtents = 0;
- *totalFreeSize = 0;
+ *totalFreeSizeBytes = 0;
DiskLoc a = _getFreeListStart();
while( !a.isNull() ) {
Extent *e = getExtent( a );
(*numExtents)++;
- (*totalFreeSize) += e->length;
+ (*totalFreeSizeBytes) += e->length;
a = e->xnext;
}
@@ -553,6 +548,21 @@ namespace mongo {
MAdvise::Sequential );
}
+ MmapV1ExtentManager::FilesArray::~FilesArray() {
+ for (int i = 0; i < size(); i++) {
+ delete _files[i];
+ }
+ }
+
+ void MmapV1ExtentManager::FilesArray::push_back(DataFile* val) {
+ scoped_lock lk(_writersMutex);
+ const int n = _size.load();
+ invariant(n < DiskLoc::MaxFiles);
+ // Note ordering: _size update must come after updating the _files array
+ _files[n] = val;
+ _size.store(n + 1);
+ }
+
void MmapV1ExtentManager::getFileFormat( OperationContext* txn, int* major, int* minor ) const {
if ( numFiles() == 0 )
return;
diff --git a/src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.h b/src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.h
index d41f8b0e21f..e1857db726c 100644
--- a/src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.h
+++ b/src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.h
@@ -31,14 +31,16 @@
#pragma once
#include <string>
-#include <vector>
#include <boost/filesystem/path.hpp>
+#include "mongo/platform/atomic_word.h"
#include "mongo/base/status.h"
#include "mongo/base/string_data.h"
+#include "mongo/db/concurrency/lock_mgr_defs.h"
#include "mongo/db/diskloc.h"
#include "mongo/db/storage/mmap_v1/extent_manager.h"
+#include "mongo/util/concurrency/mutex.h"
namespace mongo {
@@ -57,10 +59,16 @@ namespace mongo {
* - responsible for figuring out how to get a new extent
* - can use any method it wants to do so
* - this structure is NOT stored on disk
- * - this class is NOT thread safe, locking should be above (for now)
+ * - this class is thread safe, except as indicated below
*
- * implementation:
- * - ExtentManager holds a list of DataFile
+ * Implementation:
+ * - ExtentManager holds a preallocated list of DataFile
+ * - files will not be removed from the EM, so _files access can be lock-free
+ * - extent size and loc are immutable
+ * - Any non-const public operations on an ExtentManager will acquire an MODE_X lock on its
+ * RESOURCE_MMAPv1_EXTENT_MANAGER resource from the lock-manager, which will extend life
+ * to during WriteUnitOfWorks that might need rollback. Private methods will only
+ * be called from public ones.
*/
class MmapV1ExtentManager : public ExtentManager {
MONGO_DISALLOW_COPYING( MmapV1ExtentManager );
@@ -70,18 +78,11 @@ namespace mongo {
* while a bit odd, this is not a layer violation as extents
* are a peer to the .ns file, without any layering
*/
- MmapV1ExtentManager( const StringData& dbname, const StringData& path,
- bool directoryPerDB );
-
- virtual ~MmapV1ExtentManager();
-
- /**
- * deletes all state and puts back to original state
- */
- void reset();
+ MmapV1ExtentManager(const StringData& dbname, const StringData& path,
+ bool directoryPerDB);
/**
- * opens all current files
+ * opens all current files, not thread safe
*/
Status init(OperationContext* txn);
@@ -105,9 +106,12 @@ namespace mongo {
*/
void freeExtent( OperationContext* txn, DiskLoc extent );
+ // For debug only: not thread safe
void printFreeList() const;
- void freeListStats( int* numExtents, int64_t* totalFreeSize ) const;
+ void freeListStats(OperationContext* txn,
+ int* numExtents,
+ int64_t* totalFreeSizeBytes) const;
/**
* @param loc - has to be for a specific Record
@@ -135,18 +139,19 @@ namespace mongo {
*/
Extent* getExtent( const DiskLoc& loc, bool doSanityCheck = true ) const;
- void getFileFormat( OperationContext* txn, int* major, int* minor ) const;
+ /**
+ * Not thread safe, requires a database exclusive lock
+ */
+ void getFileFormat(OperationContext* txn, int* major, int* minor) const;
void setFileFormat(OperationContext* txn, int major, int minor);
const DataFile* getOpenFile( int n ) const { return _getOpenFile( n ); }
virtual int maxSize() const;
-
virtual CacheHint* cacheHint( const DiskLoc& extentLoc, const HintType& hint );
private:
-
/**
* will return NULL if nothing suitable in free list
*/
@@ -176,15 +181,53 @@ namespace mongo {
// -----
- std::string _dbname; // i.e. "test"
- std::string _path; // i.e. "/data/db"
- bool _directoryPerDB;
-
- // must be in the dbLock when touching this (and write locked when writing to of course)
- // however during Database object construction we aren't, which is ok as it isn't yet visible
- // to others and we are in the dbholder lock then.
- std::vector<DataFile*> _files;
+ const std::string _dbname; // i.e. "test"
+ const std::string _path; // i.e. "/data/db"
+ const bool _directoryPerDB;
+ const ResourceId _rid;
+ /**
+ * Simple wrapper around an array object to allow append-only modification of the array,
+ * as well as concurrent read-accesses. This class has a minimal interface to keep
+ * implementation simple and easy to modify.
+ */
+ class FilesArray {
+ public:
+ FilesArray() : _writersMutex("MmapV1ExtentManager"), _size(0) { }
+ ~FilesArray();
+
+ /**
+ * Returns file at location 'n' in the array, with 'n' less than number of files added.
+ * Will always return the same pointer for a given file.
+ */
+ DataFile* operator[](int n) const {
+ invariant(n >= 0 && n < size());
+ return _files[n];
+ }
+
+ /**
+ * Returns true iff no files were added
+ */
+ bool empty() const {
+ return size() == 0;
+ }
+
+ /**
+ * Returns number of files added to the array
+ */
+ int size() const {
+ return _size.load();
+ }
+
+ // Appends val to the array, taking ownership of its pointer
+ void push_back(DataFile* val);
+
+ private:
+ mutex _writersMutex;
+ AtomicInt32 _size; // number of files in the array
+ DataFile* _files[DiskLoc::MaxFiles];
+ };
+
+ FilesArray _files;
};
-
}
diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_test_help.cpp b/src/mongo/db/storage/mmap_v1/record_store_v1_test_help.cpp
index cddabbe3fef..ec19da43b36 100644
--- a/src/mongo/db/storage/mmap_v1/record_store_v1_test_help.cpp
+++ b/src/mongo/db/storage/mmap_v1/record_store_v1_test_help.cpp
@@ -253,8 +253,10 @@ namespace mongo {
void DummyExtentManager::freeExtent( OperationContext* txn, DiskLoc extent ) {
// XXX
}
- void DummyExtentManager::freeListStats( int* numExtents, int64_t* totalFreeSize ) const {
- invariant( false );
+ void DummyExtentManager::freeListStats(OperationContext* txn,
+ int* numExtents,
+ int64_t* totalFreeSizeBytes) const {
+ invariant(false);
}
Record* DummyExtentManager::recordForV1( const DiskLoc& loc ) const {
diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_test_help.h b/src/mongo/db/storage/mmap_v1/record_store_v1_test_help.h
index c8b321ccd40..a04f9b40331 100644
--- a/src/mongo/db/storage/mmap_v1/record_store_v1_test_help.h
+++ b/src/mongo/db/storage/mmap_v1/record_store_v1_test_help.h
@@ -129,7 +129,9 @@ namespace mongo {
virtual void freeExtent( OperationContext* txn, DiskLoc extent );
- virtual void freeListStats( int* numExtents, int64_t* totalFreeSize ) const;
+ virtual void freeListStats(OperationContext* txn,
+ int* numExtents,
+ int64_t* totalFreeSizeBytes) const;
virtual Record* recordForV1( const DiskLoc& loc ) const;
diff --git a/src/mongo/s/d_migrate.cpp b/src/mongo/s/d_migrate.cpp
index eae740f2c79..cbf59ca1ada 100644
--- a/src/mongo/s/d_migrate.cpp
+++ b/src/mongo/s/d_migrate.cpp
@@ -2114,27 +2114,27 @@ namespace mongo {
bool didAnything = false;
if ( xfer["deleted"].isABSONObj() ) {
+ Lock::DBLock dlk(txn->lockState(), nsToDatabaseSubstring(ns), MODE_IX);
Helpers::RemoveSaver rs( "moveChunk" , ns , "removedDuring" );
BSONObjIterator i( xfer["deleted"].Obj() );
while ( i.more() ) {
- Client::WriteContext cx(txn, ns);
+ Lock::CollectionLock clk(txn->lockState(), ns, MODE_X);
+ WriteUnitOfWork wunit(txn);
+ Client::Context ctx(txn, ns);
BSONObj id = i.next().Obj();
// do not apply deletes if they do not belong to the chunk being migrated
BSONObj fullObj;
- if ( Helpers::findById( txn, cx.ctx().db(), ns.c_str(), id, fullObj ) ) {
- if ( ! isInRange( fullObj , min , max , shardKeyPattern ) ) {
+ if (Helpers::findById(txn, ctx.db(), ns.c_str(), id, fullObj)) {
+ if (!isInRange(fullObj , min , max , shardKeyPattern)) {
log() << "not applying out of range deletion: " << fullObj << migrateLog;
continue;
}
}
- Lock::DBLock lk(txn->lockState(), nsToDatabaseSubstring(ns), MODE_X);
- Client::Context ctx(txn, ns);
-
if (serverGlobalParams.moveParanoia) {
rs.goingToDelete(fullObj);
}
@@ -2148,8 +2148,8 @@ namespace mongo {
false /* god */,
true /* fromMigrate */);
- *lastOpApplied = cx.ctx().getClient()->getLastOp().asDate();
- cx.commit();
+ *lastOpApplied = ctx.getClient()->getLastOp().asDate();
+ wunit.commit();
didAnything = true;
}
}