From c28079649e81e3beb96e124e27c22b66d525d480 Mon Sep 17 00:00:00 2001 From: Greg Studer Date: Mon, 12 Aug 2013 10:03:53 -0400 Subject: SERVER-10458 sanity check before critical section that all cloned docs sent --- s/d_migrate.cpp | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/s/d_migrate.cpp b/s/d_migrate.cpp index a0ccdd8df4d..1bc2a0a61ab 100644 --- a/s/d_migrate.cpp +++ b/s/d_migrate.cpp @@ -555,6 +555,11 @@ namespace mongo { long long mbUsed() const { return _memoryUsed / ( 1024 * 1024 ); } + std::size_t cloneLocsRemaining() { + scoped_spinlock lk( _trackerLocks ); + return _cloneLocs.size(); + } + bool getInCriticalSection() const { scoped_lock l(_m); return _inCriticalSection; } void setInCriticalSection( bool b ) { scoped_lock l(_m); _inCriticalSection = b; } @@ -908,11 +913,14 @@ namespace mongo { timing.done( 3 ); // 4. + + // Track last result from TO shard for sanity check + BSONObj res; for ( int i=0; i<86400; i++ ) { // don't want a single chunk move to take more than a day assert( dbMutex.getState() == 0 ); sleepsecs( 1 ); ScopedDbConnection conn( to ); - BSONObj res; + res = BSONObj(); bool ok = conn->runCommand( "admin" , BSON( "_recvChunkStatus" << 1 ) , res ); res = res.getOwned(); conn.done(); @@ -948,6 +956,26 @@ namespace mongo { timing.done(4); // 5. + + // Before we get into the critical section of the migration, let's double check + // that the docs have been cloned + log() << "About to check if it is safe to enter critical section" << endl; + + // Ensure all cloned docs have actually been transferred + std::size_t locsRemaining = migrateFromStatus.cloneLocsRemaining(); + if ( locsRemaining != 0 ) { + + errmsg = + str::stream() << "moveChunk cannot enter critical section before all data is" + << " cloned, " << locsRemaining << " locs were not transferred" + << " but to-shard reported " << res; + + // Should never happen, but safe to abort before critical section + error() << errmsg << migrateLog; + dassert( false ); + return false; + } + { // 5.a // we're under the collection lock here, so no other migrate can change maxVersion or ShardChunkManager state -- cgit v1.2.1