summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGreg Studer <greg@10gen.com>2013-08-09 15:52:05 -0400
committerMatt Kangas <matt.kangas@10gen.com>2013-08-09 16:09:17 -0500
commit4d957329dc47c05a0edfd6b85f8170508aa3321f (patch)
treebb03a7222fc30977764a9b87d22490ffc7dab514
parent484fc234656308135234cfca7c184f8f8520c497 (diff)
downloadmongo-4d957329dc47c05a0edfd6b85f8170508aa3321f.tar.gz
SERVER-10458 sanity check before critical section that all cloned docs sent
-rw-r--r--src/mongo/s/d_migrate.cpp33
1 files changed, 30 insertions, 3 deletions
diff --git a/src/mongo/s/d_migrate.cpp b/src/mongo/s/d_migrate.cpp
index ec528226b27..85eb18294dd 100644
--- a/src/mongo/s/d_migrate.cpp
+++ b/src/mongo/s/d_migrate.cpp
@@ -639,6 +639,11 @@ namespace mongo {
_cloneLocs.erase( dl );
}
+ std::size_t cloneLocsRemaining() {
+ scoped_spinlock lk( _trackerLocks );
+ return _cloneLocs.size();
+ }
+
long long mbUsed() const { return _memoryUsed / ( 1024 * 1024 ); }
bool getInCriticalSection() const {
@@ -1162,15 +1167,20 @@ namespace mongo {
timing.done( 3 );
// 4.
+
+ // Track last result from TO shard for sanity check
+ BSONObj res;
for ( int i=0; i<86400; i++ ) { // don't want a single chunk move to take more than a day
verify( !Lock::isLocked() );
// Exponential sleep backoff, up to 1024ms. Don't sleep much on the first few
// iterations, since we want empty chunk migrations to be fast.
sleepmillis( 1 << std::min( i , 10 ) );
+
scoped_ptr<ScopedDbConnection> conn(
ScopedDbConnection::getScopedDbConnection( toShard.getConnString() ) );
- BSONObj res;
+
bool ok;
+ res = BSONObj();
try {
ok = conn->get()->runCommand( "admin" , BSON( "_recvChunkStatus" << 1 ) , res );
res = res.getOwned();
@@ -1218,9 +1228,26 @@ namespace mongo {
// 5.
// Before we get into the critical section of the migration, let's double check
- // that the config servers are reachable and the lock is in place.
- log() << "About to check if it is safe to enter critical section";
+ // that the docs have been cloned, the config servers are reachable,
+ // and the lock is in place.
+ log() << "About to check if it is safe to enter critical section" << endl;
+
+ // Ensure all cloned docs have actually been transferred
+ std::size_t locsRemaining = migrateFromStatus.cloneLocsRemaining();
+ if ( locsRemaining != 0 ) {
+
+ errmsg =
+ str::stream() << "moveChunk cannot enter critical section before all data is"
+ << " cloned, " << locsRemaining << " locs were not transferred"
+ << " but to-shard reported " << res;
+
+ // Should never happen, but safe to abort before critical section
+ error() << errmsg << migrateLog;
+ dassert( false );
+ return false;
+ }
+ // Ensure distributed lock still held
string lockHeldMsg;
bool lockHeld = dlk.isLockHeld( 30.0 /* timeout */, &lockHeldMsg );
if ( !lockHeld ) {