diff options
author | Greg Studer <greg@10gen.com> | 2012-07-05 15:05:10 -0400 |
---|---|---|
committer | Greg Studer <greg@10gen.com> | 2012-07-30 16:07:09 -0400 |
commit | 424df087f9256b85e7b3a9b31d36518081ff2875 (patch) | |
tree | aba2fe1e3204be9f1bf282aa108c0ae918cfa357 /src/mongo | |
parent | 3ac0e57723cd6169e305b6795b686ec45b2d4b83 (diff) | |
download | mongo-424df087f9256b85e7b3a9b31d36518081ff2875.tar.gz |
SERVER-6282 wait-doubling backoff for local mongos errors requiring config server access
Diffstat (limited to 'src/mongo')
-rw-r--r-- | src/mongo/dbtests/basictests.cpp | 53 | ||||
-rw-r--r-- | src/mongo/s/strategy_shard.cpp | 12 | ||||
-rw-r--r-- | src/mongo/util/time_support.cpp | 36 | ||||
-rw-r--r-- | src/mongo/util/time_support.h | 23 |
4 files changed, 121 insertions, 3 deletions
diff --git a/src/mongo/dbtests/basictests.cpp b/src/mongo/dbtests/basictests.cpp index 9d1ff713875..ded62a13049 100644 --- a/src/mongo/dbtests/basictests.cpp +++ b/src/mongo/dbtests/basictests.cpp @@ -27,6 +27,7 @@ #include "../util/paths.h" #include "../util/stringutils.h" #include "../util/compress.h" +#include "../util/time_support.h" #include "../db/db.h" namespace BasicTests { @@ -260,6 +261,56 @@ namespace BasicTests { }; + class SleepBackoffTest { + public: + void run() { + + int maxSleepTimeMillis = 1000; + int lastSleepTimeMillis = -1; + int epsMillis = 50; // Allowable inprecision for timing + + Backoff backoff( maxSleepTimeMillis, maxSleepTimeMillis * 2 ); + + Timer t; + + // Make sure our backoff increases to the maximum value + int maxSleepCount = 0; + while( maxSleepCount < 3 ){ + + t.reset(); + + backoff.nextSleepMillis(); + + int elapsedMillis = t.millis(); + + log() << "Slept for " << elapsedMillis << endl; + + ASSERT( almostGTE( elapsedMillis, lastSleepTimeMillis, epsMillis ) ); + lastSleepTimeMillis = elapsedMillis; + + if( almostEq( elapsedMillis, maxSleepTimeMillis, epsMillis ) ) maxSleepCount++; + } + + // Make sure that our backoff gets reset if we wait much longer than the maximum wait + sleepmillis( maxSleepTimeMillis * 4 ); + + t.reset(); + backoff.nextSleepMillis(); + + ASSERT( almostEq( t.millis(), 0, epsMillis ) ); + + } + + bool almostEq( int a, int b, int eps ){ + return std::abs( a - b ) <= eps; + } + + bool almostGTE( int a, int b, int eps ){ + if( almostEq( a, b, eps ) ) return true; + return a > b; + } + }; + class AssertTests { public: @@ -723,7 +774,6 @@ namespace BasicTests { Tee _tee; }; - class All : public Suite { public: All() : Suite( "basic" ) { @@ -739,6 +789,7 @@ namespace BasicTests { add< stringbuildertests::reset2 >(); add< sleeptest >(); + add< SleepBackoffTest >(); add< AssertTests >(); add< ArrayTests::basic1 >(); diff --git a/src/mongo/s/strategy_shard.cpp b/src/mongo/s/strategy_shard.cpp index 23d2591c466..534defa8b17 100644 --- a/src/mongo/s/strategy_shard.cpp +++ b/src/mongo/s/strategy_shard.cpp @@ -216,6 +216,9 @@ namespace mongo { } } + static const int maxWaitMillis = 500; + boost::thread_specific_ptr<Backoff> perThreadBackoff; + /** * Invoked before mongos needs to throw an error relating to an operation which cannot * be performed on a sharded collection. @@ -223,10 +226,15 @@ namespace mongo { * This prevents mongos from refreshing config data too quickly in response to bad requests, * since doing so is expensive. * - * TODO: Can we restructure to make this simpler? + * Each thread gets its own backoff wait sequence, to avoid interfering with other valid + * operations. */ void _sleepForVerifiedLocalError(){ - sleepsecs( 1 ); + + if( ! perThreadBackoff.get() ) + perThreadBackoff.reset( new Backoff( maxWaitMillis, maxWaitMillis * 2 ) ); + + perThreadBackoff.get()->nextSleepMillis(); } void _handleRetries( const string& op, diff --git a/src/mongo/util/time_support.cpp b/src/mongo/util/time_support.cpp index 0021c577c3b..01cc9a1efcf 100644 --- a/src/mongo/util/time_support.cpp +++ b/src/mongo/util/time_support.cpp @@ -171,6 +171,42 @@ namespace mongo { } #endif + void Backoff::nextSleepMillis(){ + + // Get the current time + unsigned long long currTimeMillis = curTimeMillis64(); + + int lastSleepMillis = _lastSleepMillis; + + if( _lastErrorTimeMillis == 0 || _lastErrorTimeMillis > currTimeMillis /* VM bugs exist */ ) + _lastErrorTimeMillis = currTimeMillis; + unsigned long long lastErrorTimeMillis = _lastErrorTimeMillis; + _lastErrorTimeMillis = currTimeMillis; + + // Backoff logic + + // Get the time since the last error + unsigned long long timeSinceLastErrorMillis = currTimeMillis - lastErrorTimeMillis; + + // Makes the cast below safe + verify( _resetAfterMillis >= 0 ); + + // If we haven't seen another error recently (3x the max wait time), reset our + // wait counter. + if( timeSinceLastErrorMillis > (unsigned)( _resetAfterMillis ) ) lastSleepMillis = 0; + + // Makes the test below sane + verify( _maxSleepMillis > 0 ); + + // Wait a power of two millis + if( lastSleepMillis == 0 ) lastSleepMillis = 1; + else lastSleepMillis = std::min( lastSleepMillis * 2, _maxSleepMillis ); + + // Store the last slept time + _lastSleepMillis = lastSleepMillis; + sleepmillis( lastSleepMillis ); + } + extern long long jsTime_virtual_skew; extern boost::thread_specific_ptr<long long> jsTime_virtual_thread_skew; diff --git a/src/mongo/util/time_support.h b/src/mongo/util/time_support.h index 511bc092fb1..254401b01ad 100644 --- a/src/mongo/util/time_support.h +++ b/src/mongo/util/time_support.h @@ -43,6 +43,29 @@ namespace mongo { void sleepmillis(long long ms); void sleepmicros(long long micros); + class Backoff { + public: + + Backoff( int maxSleepMillis, int resetAfter ) : + _maxSleepMillis( maxSleepMillis ), + _resetAfterMillis( maxSleepMillis + resetAfter ), // Don't reset < the max sleep + _lastSleepMillis( 0 ), + _lastErrorTimeMillis( 0 ) + {} + + void nextSleepMillis(); + + private: + + // Parameters + int _maxSleepMillis; + int _resetAfterMillis; + + // Last sleep information + int _lastSleepMillis; + unsigned long long _lastErrorTimeMillis; + }; + // DO NOT TOUCH except for testing void jsTimeVirtualSkew( long long skew ); |