diff options
author | Mark Benvenuto <mark.benvenuto@mongodb.com> | 2014-11-25 16:58:34 -0500 |
---|---|---|
committer | Mark Benvenuto <mark.benvenuto@mongodb.com> | 2014-12-08 11:17:18 -0500 |
commit | 909bb8ed24c3c2448fc10532f59c54fb42033190 (patch) | |
tree | c9b3b323674767bd5693beaf2918d7de7233f241 /src | |
parent | 9b71a6b87626c0966ef0c0e1ffdde35acab74b89 (diff) | |
download | mongo-909bb8ed24c3c2448fc10532f59c54fb42033190.tar.gz |
SERVER-16285: Crash when dropping and creating databases concurrently on Windows
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/db/storage/mmap_v1/dur_commitjob.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/storage/mmap_v1/dur_recovery_unit.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/storage/mmap_v1/durable_mapped_file.cpp | 107 | ||||
-rw-r--r-- | src/mongo/db/storage/mmap_v1/durable_mapped_file.h | 117 | ||||
-rw-r--r-- | src/mongo/dbtests/mmaptests.cpp | 4 | ||||
-rw-r--r-- | src/mongo/util/mmap.h | 62 | ||||
-rw-r--r-- | src/mongo/util/mmap_win.cpp | 106 |
7 files changed, 250 insertions, 150 deletions
diff --git a/src/mongo/db/storage/mmap_v1/dur_commitjob.cpp b/src/mongo/db/storage/mmap_v1/dur_commitjob.cpp index 10c8acadd97..d270dfd0a76 100644 --- a/src/mongo/db/storage/mmap_v1/dur_commitjob.cpp +++ b/src/mongo/db/storage/mmap_v1/dur_commitjob.cpp @@ -48,7 +48,7 @@ namespace mongo { /** base declare write intent function that all the helpers call. */ /** we batch up our write intents so that we do not have to synchronize too often */ void DurableImpl::declareWriteIntent(void *p, unsigned len) { - MemoryMappedFile::makeWritable(p, len); + privateViews.makeWritable(p, len); SimpleMutex::scoped_lock lk(commitJob.groupCommitMutex); commitJob.note(p, len); } diff --git a/src/mongo/db/storage/mmap_v1/dur_recovery_unit.cpp b/src/mongo/db/storage/mmap_v1/dur_recovery_unit.cpp index f02392987f2..00ff4ba1943 100644 --- a/src/mongo/db/storage/mmap_v1/dur_recovery_unit.cpp +++ b/src/mongo/db/storage/mmap_v1/dur_recovery_unit.cpp @@ -192,7 +192,7 @@ namespace mongo { invariant(len < size_t(numeric_limits<int>::max())); // Windows requires us to adjust the address space *before* we write to anything. - MemoryMappedFile::makeWritable(data, len); + privateViews.makeWritable(data, len); _writes.push_back(Write(static_cast<char*>(data), len, _preimageBuffer.size())); _preimageBuffer.append(static_cast<char*>(data), len); diff --git a/src/mongo/db/storage/mmap_v1/durable_mapped_file.cpp b/src/mongo/db/storage/mmap_v1/durable_mapped_file.cpp index b71bf5ab780..a27376d6d01 100644 --- a/src/mongo/db/storage/mmap_v1/durable_mapped_file.cpp +++ b/src/mongo/db/storage/mmap_v1/durable_mapped_file.cpp @@ -46,6 +46,7 @@ #include "mongo/db/storage_options.h" #include "mongo/util/mongoutils/str.h" #include "mongo/util/log.h" +#include "mongo/util/processinfo.h" using namespace mongoutils; @@ -66,21 +67,113 @@ namespace mongo { } /** register view. threadsafe */ - void PointerToDurableMappedFile::add(void *view, DurableMappedFile *f) { + void PointerToDurableMappedFile::add_inlock(void *view, DurableMappedFile *f) { verify(view); verify(f); - mutex::scoped_lock lk(_m); - _views.insert( pair<void*,DurableMappedFile*>(view,f) ); + clearWritableBits_inlock(view, f->length()); + _views.insert(pair<void*, DurableMappedFile*>(view, f)); } /** de-register view. threadsafe */ - void PointerToDurableMappedFile::remove(void *view) { + void PointerToDurableMappedFile::remove(void *view, size_t len) { if( view ) { mutex::scoped_lock lk(_m); + clearWritableBits_inlock(view, len); _views.erase(view); } } +#ifdef _WIN32 + void PointerToDurableMappedFile::clearWritableBits(void *privateView, size_t len) { + mutex::scoped_lock lk(_m); + clearWritableBits_inlock(privateView, len); + } + + /** notification on unmapping so we can clear writable bits */ + void PointerToDurableMappedFile::clearWritableBits_inlock(void *privateView, size_t len) { + for (unsigned i = reinterpret_cast<size_t>(privateView) / MemoryMappedCOWBitset::ChunkSize; + i <= (reinterpret_cast<size_t>(privateView) + len) / MemoryMappedCOWBitset::ChunkSize; + ++i) { + writable.clear(i); + dassert(!writable.get(i)); + } + } + + extern mutex mapViewMutex; + + __declspec(noinline) void PointerToDurableMappedFile::makeChunkWritable(size_t chunkno) { + mutex::scoped_lock lkPrivateViews(_m); + + if (writable.get(chunkno)) // double check lock + return; + + // remap all maps in this chunk. + // common case is a single map, but could have more than one with smallfiles or .ns files + size_t chunkStart = chunkno * MemoryMappedCOWBitset::ChunkSize; + size_t chunkNext = chunkStart + MemoryMappedCOWBitset::ChunkSize; + + scoped_lock lkMapView(mapViewMutex); + + map<void*, DurableMappedFile*>::iterator i = _views.upper_bound((void*)(chunkNext - 1)); + while (1) { + const pair<void*, DurableMappedFile*> x = *(--i); + DurableMappedFile *mmf = x.second; + if (mmf == 0) + break; + + size_t viewStart = reinterpret_cast<size_t>(x.first); + size_t viewEnd = viewStart + mmf->length(); + if (viewEnd <= chunkStart) + break; + + size_t protectStart = std::max(viewStart, chunkStart); + dassert(protectStart < chunkNext); + + size_t protectEnd = std::min(viewEnd, chunkNext); + size_t protectSize = protectEnd - protectStart; + dassert(protectSize > 0 && protectSize <= MemoryMappedCOWBitset::ChunkSize); + + DWORD oldProtection; + bool ok = VirtualProtect(reinterpret_cast<void*>(protectStart), + protectSize, + PAGE_WRITECOPY, + &oldProtection); + if (!ok) { + DWORD dosError = GetLastError(); + + if (dosError == ERROR_COMMITMENT_LIMIT) { + // System has run out of memory between physical RAM & page file, tell the user + BSONObjBuilder bb; + + ProcessInfo p; + p.getExtraInfo(bb); + + severe() << "MongoDB has exhausted the system memory capacity."; + severe() << "Current Memory Status: " << bb.obj().toString(); + } + + severe() << "VirtualProtect for " << mmf->filename() + << " chunk " << chunkno + << " failed with " << errnoWithDescription(dosError) + << " (chunk size is " << protectSize + << ", address is " << hex << protectStart << dec << ")" + << " in mongo::makeChunkWritable, terminating" + << endl; + + fassertFailed(16362); + } + } + + writable.set(chunkno); + } +#else + void PointerToDurableMappedFile::clearWritableBits(void *privateView, size_t len) { + } + + void PointerToDurableMappedFile::clearWritableBits_inlock(void *privateView, size_t len) { + } +#endif + PointerToDurableMappedFile::PointerToDurableMappedFile() : _m("PointerToDurableMappedFile") { #if defined(SIZE_MAX) size_t max = SIZE_MAX; @@ -162,11 +255,13 @@ namespace mongo { LOG(3) << "mmf finishOpening " << (void*) _view_write << ' ' << filename() << " len:" << length() << endl; if( _view_write ) { if (storageGlobalParams.dur) { + scoped_lock lk2(privateViews._mutex()); + _view_private = createPrivateMap(); if( _view_private == 0 ) { msgasserted(13636, str::stream() << "file " << filename() << " open/create failed in createPrivateMap (look in log for more information)"); } - privateViews.add(_view_private, this); // note that testIntent builds use this, even though it points to view_write then... + privateViews.add_inlock(_view_private, this); // note that testIntent builds use this, even though it points to view_write then... } else { _view_private = _view_write; @@ -194,7 +289,7 @@ namespace mongo { } LockMongoFilesExclusive lk; - privateViews.remove(_view_private); + privateViews.remove(_view_private, length()); _view_write = _view_private = 0; MemoryMappedFile::close(); } diff --git a/src/mongo/db/storage/mmap_v1/durable_mapped_file.h b/src/mongo/db/storage/mmap_v1/durable_mapped_file.h index cf1fa59b9d6..53a211f609e 100644 --- a/src/mongo/db/storage/mmap_v1/durable_mapped_file.h +++ b/src/mongo/db/storage/mmap_v1/durable_mapped_file.h @@ -111,21 +111,83 @@ namespace mongo { bool finishOpening(); }; + +#ifdef _WIN32 + // Simple array based bitset to track COW chunks in memory mapped files on Windows + // A chunk is a 64MB granular region in virtual memory that we mark as COW everytime we need + // to write to a memory mapped files on Windows + // + class MemoryMappedCOWBitset { + MONGO_DISALLOW_COPYING(MemoryMappedCOWBitset); + public: + // Size of the chunks we mark Copy-On-Write with VirtualProtect + static const unsigned long long ChunkSize = 64 * 1024 * 1024; + + // Number of chunks we store in our bitset which are really 32-bit ints + static const unsigned long long NChunks = 64 * 1024; + + // Total Virtual Memory space we can cover with the bitset + static const unsigned long long MaxChunkMemory = ChunkSize * NChunks + * sizeof(unsigned int) * 8; + + // Size in bytes of the bitset we allocate + static const unsigned long long MaxChunkBytes = NChunks * sizeof(unsigned int); + + // 128 TB Virtual Memory space in Windows 8.1/2012 R2, 8TB before + static const unsigned long long MaxWinMemory = + 128ULL * 1024 * 1024 * 1024 * 1024; + + // Make sure that the chunk memory covers the Max Windows user process VM space + static_assert(MaxChunkMemory == MaxWinMemory, + "Need a larger bitset to cover max process VM space"); + public: + MemoryMappedCOWBitset() { + static_assert(MemoryMappedCOWBitset::MaxChunkBytes == sizeof(bits), + "Validate our predicted bitset size is correct"); + } + + bool get(uintptr_t i) const { + uintptr_t x = i / 32; + verify(x < MemoryMappedCOWBitset::NChunks); + return (bits[x].loadRelaxed() & (1 << (i % 32))) != 0; + } + + // Note: assumes caller holds privateViews.mutex + void set(uintptr_t i) { + uintptr_t x = i / 32; + verify(x < MemoryMappedCOWBitset::NChunks); + bits[x].store( bits[x].loadRelaxed() | (1 << (i % 32))); + } + + // Note: assumes caller holds privateViews.mutex + void clear(uintptr_t i) { + uintptr_t x = i / 32; + verify(x < MemoryMappedCOWBitset::NChunks); + bits[x].store(bits[x].loadRelaxed() & ~(1 << (i % 32))); + } + + private: + // atomic as we are doing double check locking + AtomicUInt32 bits[MemoryMappedCOWBitset::NChunks]; + }; +#endif + /** for durability support we want to be able to map pointers to specific DurableMappedFile objects. */ - class PointerToDurableMappedFile : boost::noncopyable { + class PointerToDurableMappedFile { + MONGO_DISALLOW_COPYING(PointerToDurableMappedFile); public: PointerToDurableMappedFile(); /** register view. - threadsafe - */ - void add(void *view, DurableMappedFile *f); + not-threadsafe, caller must hold _mutex() + */ + void add_inlock(void *view, DurableMappedFile *f); /** de-register view. threadsafe */ - void remove(void *view); + void remove(void *view, size_t length); /** find associated MMF object for a given pointer. threadsafe @@ -136,17 +198,58 @@ namespace mongo { /** for doing many finds in a row with one lock operation */ mutex& _mutex() { return _m; } - DurableMappedFile* find_inlock(void *p, /*out*/ size_t& ofs); - std::map<void*,DurableMappedFile*>::iterator finditer_inlock(void *p) { return _views.upper_bound(p); } + /** not-threadsafe, caller must hold _mutex() */ + DurableMappedFile* find_inlock(void *p, /*out*/ size_t& ofs); + /** not-threadsafe, caller must hold _mutex() */ unsigned numberOfViews_inlock() const { return _views.size(); } + /** make the private map range writable (necessary for our windows implementation) */ + void makeWritable(void *, unsigned len); + + void clearWritableBits(void *privateView, size_t len); + + private: + void clearWritableBits_inlock(void *privateView, size_t len); + +#ifdef _WIN32 + void makeChunkWritable(size_t chunkno); +#endif + private: + // PointerToDurableMappedFile Mutex + // + // Protects: + // Protects internal consistency of data structure + // Lock Ordering: + // Must be taken before MapViewMutex if both are taken to prevent deadlocks mutex _m; std::map<void*, DurableMappedFile*> _views; + +#ifdef _WIN32 + // Tracks which memory mapped regions are marked as Copy on Write + MemoryMappedCOWBitset writable; +#endif }; +#ifdef _WIN32 + inline void PointerToDurableMappedFile::makeWritable(void *privateView, unsigned len) { + size_t p = reinterpret_cast<size_t>(privateView); + unsigned a = p / MemoryMappedCOWBitset::ChunkSize; + unsigned b = (p + len) / MemoryMappedCOWBitset::ChunkSize; + + for (unsigned i = a; i <= b; i++) { + if (!writable.get(i)) { + makeChunkWritable(i); + } + } + } +#else + inline void PointerToDurableMappedFile::makeWritable(void *_p, unsigned len) { + } +#endif + // allows a pointer into any private view of a DurableMappedFile to be resolved to the DurableMappedFile object extern PointerToDurableMappedFile privateViews; } diff --git a/src/mongo/dbtests/mmaptests.cpp b/src/mongo/dbtests/mmaptests.cpp index ad232c72340..dec4e65702b 100644 --- a/src/mongo/dbtests/mmaptests.cpp +++ b/src/mongo/dbtests/mmaptests.cpp @@ -78,7 +78,7 @@ namespace MMapTests { verify(p); // write something to the private view as a test if (storageGlobalParams.dur) - MemoryMappedFile::makeWritable(p, 6); + privateViews.makeWritable(p, 6); strcpy(p, "hello"); } if (storageGlobalParams.dur) { @@ -109,7 +109,7 @@ namespace MMapTests { char *p = (char *) f.getView(); verify(p); if (storageGlobalParams.dur) - MemoryMappedFile::makeWritable(p, 4); + privateViews.makeWritable(p, 4); strcpy(p, "zzz"); } if (storageGlobalParams.dur) { diff --git a/src/mongo/util/mmap.h b/src/mongo/util/mmap.h index 868a3a4a4bf..ea83710fb8c 100644 --- a/src/mongo/util/mmap.h +++ b/src/mongo/util/mmap.h @@ -218,14 +218,6 @@ namespace mongo { void* createReadOnlyMap(); void* createPrivateMap(); - /** make the private map range writable (necessary for our windows implementation) */ - static void makeWritable(void *, unsigned len) -#if defined(_WIN32) - ; -#else - { } -#endif - virtual uint64_t getUniqueId() const { return _uniqueId; } private: @@ -237,15 +229,14 @@ namespace mongo { unsigned long long len; const uint64_t _uniqueId; #ifdef _WIN32 - // NOTE: Locking Order: - // LockMongoFilesShared must be taken before _flushMutex if both are taken + // flush Mutex + // + // Protects: + // Prevent flush() and close() from concurrently running. + // It ensures close() cannot complete while flush() is running + // Lock Ordering: + // LockMongoFilesShared must be taken before _flushMutex if both are taken boost::mutex _flushMutex; - void clearWritableBits(void *privateView); - public: - static const unsigned ChunkSize = 64 * 1024 * 1024; - static const unsigned NChunks = 1024 * 1024; -#else - void clearWritableBits(void *privateView) { } #endif protected: @@ -263,43 +254,4 @@ namespace mongo { p(*i); } -#if defined(_WIN32) - class ourbitset { - volatile unsigned bits[MemoryMappedFile::NChunks]; // volatile as we are doing double check locking - public: - ourbitset() { - memset((void*) bits, 0, sizeof(bits)); - } - bool get(unsigned i) const { - unsigned x = i / 32; - verify( x < MemoryMappedFile::NChunks ); - return (bits[x] & (1 << (i%32))) != 0; - } - void set(unsigned i) { - unsigned x = i / 32; - wassert( x < (MemoryMappedFile::NChunks*2/3) ); // warn if getting close to limit - verify( x < MemoryMappedFile::NChunks ); - bits[x] |= (1 << (i%32)); - } - void clear(unsigned i) { - unsigned x = i / 32; - verify( x < MemoryMappedFile::NChunks ); - bits[x] &= ~(1 << (i%32)); - } - }; - extern ourbitset writable; - void makeChunkWritable(size_t chunkno); - inline void MemoryMappedFile::makeWritable(void *_p, unsigned len) { - size_t p = (size_t) _p; - unsigned a = p/ChunkSize; - unsigned b = (p+len)/ChunkSize; - for( unsigned i = a; i <= b; i++ ) { - if( !writable.get(i) ) { - makeChunkWritable(i); - } - } - } - -#endif - } // namespace mongo diff --git a/src/mongo/util/mmap_win.cpp b/src/mongo/util/mmap_win.cpp index f4c6efa31fd..5d0d0aa1356 100644 --- a/src/mongo/util/mmap_win.cpp +++ b/src/mongo/util/mmap_win.cpp @@ -55,9 +55,17 @@ namespace mongo { } const size_t g_minOSPageSizeBytes = fetchMinOSPageSizeBytes(); - + // MapViewMutex + // + // Protects: + // 1. Ensures all MapViewOfFile/UnMapViewOfFile operations are serialized to reduce chance of + // "address in use" errors (error code 487) + // - These errors can still occur if the memory is used for other purposes + // (stack storage, heap) + // 2. Prevents calls to VirtualProtect while we remapping files. + // Lock Ordering: + // - If taken, must be after previewViews._m to prevent deadlocks mutex mapViewMutex("mapView"); - ourbitset writable; MAdvise::MAdvise(void *,unsigned, Advice) { } MAdvise::~MAdvise() { } @@ -65,6 +73,13 @@ namespace mongo { const unsigned long long memoryMappedFileLocationFloor = 256LL * 1024LL * 1024LL * 1024LL; static unsigned long long _nextMemoryMappedFileLocation = memoryMappedFileLocationFloor; + // nextMemoryMappedFileLocationMutex + // + // Protects: + // Windows 64-bit specific allocation of virtual memory regions for + // placing memory mapped files in memory + // Lock Ordering: + // No restrictions static SimpleMutex _nextMemoryMappedFileLocationMutex("nextMemoryMappedFileLocationMutex"); unsigned long long AlignNumber(unsigned long long number, unsigned long long granularity) @@ -130,14 +145,6 @@ namespace mongo { return reinterpret_cast<void*>(static_cast<uintptr_t>(thisMemoryMappedFileLocation)); } - /** notification on unmapping so we can clear writable bits */ - void MemoryMappedFile::clearWritableBits(void *p) { - for( unsigned i = ((size_t)p)/ChunkSize; i <= (((size_t)p)+len)/ChunkSize; i++ ) { - writable.clear(i); - verify( !writable.get(i) ); - } - } - MemoryMappedFile::MemoryMappedFile() : _uniqueId(mmfNextId.fetchAndAdd(1)) { fd = 0; @@ -152,10 +159,14 @@ namespace mongo { // Prevent flush and close from concurrently running boost::lock_guard<boost::mutex> lk(_flushMutex); - for( vector<void*>::iterator i = views.begin(); i != views.end(); i++ ) { - clearWritableBits(*i); - UnmapViewOfFile(*i); + { + scoped_lock lk(mapViewMutex); + + for (vector<void*>::iterator i = views.begin(); i != views.end(); i++) { + UnmapViewOfFile(*i); + } } + views.clear(); if ( maphandle ) CloseHandle(maphandle); @@ -345,69 +356,6 @@ namespace mongo { extern mutex mapViewMutex; - __declspec(noinline) void makeChunkWritable(size_t chunkno) { - scoped_lock lk(mapViewMutex); - - if( writable.get(chunkno) ) // double check lock - return; - - // remap all maps in this chunk. common case is a single map, but could have more than one with smallfiles or .ns files - size_t chunkStart = chunkno * MemoryMappedFile::ChunkSize; - size_t chunkNext = chunkStart + MemoryMappedFile::ChunkSize; - - scoped_lock lk2(privateViews._mutex()); - map<void*,DurableMappedFile*>::iterator i = privateViews.finditer_inlock((void*) (chunkNext-1)); - while( 1 ) { - const pair<void*,DurableMappedFile*> x = *(--i); - DurableMappedFile *mmf = x.second; - if( mmf == 0 ) - break; - - size_t viewStart = (size_t) x.first; - size_t viewEnd = (size_t) (viewStart + mmf->length()); - if( viewEnd <= chunkStart ) - break; - - size_t protectStart = max(viewStart, chunkStart); - dassert(protectStart<chunkNext); - - size_t protectEnd = min(viewEnd, chunkNext); - size_t protectSize = protectEnd - protectStart; - dassert(protectSize>0&&protectSize<=MemoryMappedFile::ChunkSize); - - DWORD oldProtection; - bool ok = VirtualProtect( reinterpret_cast<void*>( protectStart ), - protectSize, - PAGE_WRITECOPY, - &oldProtection ); - if ( !ok ) { - DWORD dosError = GetLastError(); - - if (dosError == ERROR_COMMITMENT_LIMIT) { - // System has run out of memory between physical RAM & page file, tell the user - BSONObjBuilder bb; - - ProcessInfo p; - p.getExtraInfo(bb); - - log() << "MongoDB has exhausted the system memory capacity."; - log() << "Current Memory Status: " << bb.obj().toString(); - } - - log() << "VirtualProtect for " << mmf->filename() - << " chunk " << chunkno - << " failed with " << errnoWithDescription( dosError ) - << " (chunk size is " << protectSize - << ", address is " << hex << protectStart << dec << ")" - << " in mongo::makeChunkWritable, terminating" - << endl; - fassertFailed( 16362 ); - } - } - - writable.set(chunkno); - } - void* MemoryMappedFile::createPrivateMap() { verify( maphandle ); @@ -450,7 +398,6 @@ namespace mongo { break; } - clearWritableBits( privateMapAddress ); views.push_back( privateMapAddress ); return privateMapAddress; } @@ -458,7 +405,10 @@ namespace mongo { void* MemoryMappedFile::remapPrivateView(void *oldPrivateAddr) { LockMongoFilesExclusive lockMongoFiles; - clearWritableBits(oldPrivateAddr); + privateViews.clearWritableBits(oldPrivateAddr, len); + + scoped_lock lk(mapViewMutex); + if( !UnmapViewOfFile(oldPrivateAddr) ) { DWORD dosError = GetLastError(); log() << "UnMapViewOfFile for " << filename() |