summaryrefslogtreecommitdiff
path: root/src/mongo
diff options
context:
space:
mode:
authorGreg Studer <greg@10gen.com>2012-07-05 15:05:10 -0400
committerGreg Studer <greg@10gen.com>2012-07-30 16:07:09 -0400
commit424df087f9256b85e7b3a9b31d36518081ff2875 (patch)
treeaba2fe1e3204be9f1bf282aa108c0ae918cfa357 /src/mongo
parent3ac0e57723cd6169e305b6795b686ec45b2d4b83 (diff)
downloadmongo-424df087f9256b85e7b3a9b31d36518081ff2875.tar.gz
SERVER-6282 wait-doubling backoff for local mongos errors requiring config server access
Diffstat (limited to 'src/mongo')
-rw-r--r--src/mongo/dbtests/basictests.cpp53
-rw-r--r--src/mongo/s/strategy_shard.cpp12
-rw-r--r--src/mongo/util/time_support.cpp36
-rw-r--r--src/mongo/util/time_support.h23
4 files changed, 121 insertions, 3 deletions
diff --git a/src/mongo/dbtests/basictests.cpp b/src/mongo/dbtests/basictests.cpp
index 9d1ff713875..ded62a13049 100644
--- a/src/mongo/dbtests/basictests.cpp
+++ b/src/mongo/dbtests/basictests.cpp
@@ -27,6 +27,7 @@
#include "../util/paths.h"
#include "../util/stringutils.h"
#include "../util/compress.h"
+#include "../util/time_support.h"
#include "../db/db.h"
namespace BasicTests {
@@ -260,6 +261,56 @@ namespace BasicTests {
};
+ class SleepBackoffTest {
+ public:
+ void run() {
+
+ int maxSleepTimeMillis = 1000;
+ int lastSleepTimeMillis = -1;
+ int epsMillis = 50; // Allowable inprecision for timing
+
+ Backoff backoff( maxSleepTimeMillis, maxSleepTimeMillis * 2 );
+
+ Timer t;
+
+ // Make sure our backoff increases to the maximum value
+ int maxSleepCount = 0;
+ while( maxSleepCount < 3 ){
+
+ t.reset();
+
+ backoff.nextSleepMillis();
+
+ int elapsedMillis = t.millis();
+
+ log() << "Slept for " << elapsedMillis << endl;
+
+ ASSERT( almostGTE( elapsedMillis, lastSleepTimeMillis, epsMillis ) );
+ lastSleepTimeMillis = elapsedMillis;
+
+ if( almostEq( elapsedMillis, maxSleepTimeMillis, epsMillis ) ) maxSleepCount++;
+ }
+
+ // Make sure that our backoff gets reset if we wait much longer than the maximum wait
+ sleepmillis( maxSleepTimeMillis * 4 );
+
+ t.reset();
+ backoff.nextSleepMillis();
+
+ ASSERT( almostEq( t.millis(), 0, epsMillis ) );
+
+ }
+
+ bool almostEq( int a, int b, int eps ){
+ return std::abs( a - b ) <= eps;
+ }
+
+ bool almostGTE( int a, int b, int eps ){
+ if( almostEq( a, b, eps ) ) return true;
+ return a > b;
+ }
+ };
+
class AssertTests {
public:
@@ -723,7 +774,6 @@ namespace BasicTests {
Tee _tee;
};
-
class All : public Suite {
public:
All() : Suite( "basic" ) {
@@ -739,6 +789,7 @@ namespace BasicTests {
add< stringbuildertests::reset2 >();
add< sleeptest >();
+ add< SleepBackoffTest >();
add< AssertTests >();
add< ArrayTests::basic1 >();
diff --git a/src/mongo/s/strategy_shard.cpp b/src/mongo/s/strategy_shard.cpp
index 23d2591c466..534defa8b17 100644
--- a/src/mongo/s/strategy_shard.cpp
+++ b/src/mongo/s/strategy_shard.cpp
@@ -216,6 +216,9 @@ namespace mongo {
}
}
+ static const int maxWaitMillis = 500;
+ boost::thread_specific_ptr<Backoff> perThreadBackoff;
+
/**
* Invoked before mongos needs to throw an error relating to an operation which cannot
* be performed on a sharded collection.
@@ -223,10 +226,15 @@ namespace mongo {
* This prevents mongos from refreshing config data too quickly in response to bad requests,
* since doing so is expensive.
*
- * TODO: Can we restructure to make this simpler?
+ * Each thread gets its own backoff wait sequence, to avoid interfering with other valid
+ * operations.
*/
void _sleepForVerifiedLocalError(){
- sleepsecs( 1 );
+
+ if( ! perThreadBackoff.get() )
+ perThreadBackoff.reset( new Backoff( maxWaitMillis, maxWaitMillis * 2 ) );
+
+ perThreadBackoff.get()->nextSleepMillis();
}
void _handleRetries( const string& op,
diff --git a/src/mongo/util/time_support.cpp b/src/mongo/util/time_support.cpp
index 0021c577c3b..01cc9a1efcf 100644
--- a/src/mongo/util/time_support.cpp
+++ b/src/mongo/util/time_support.cpp
@@ -171,6 +171,42 @@ namespace mongo {
}
#endif
+ void Backoff::nextSleepMillis(){
+
+ // Get the current time
+ unsigned long long currTimeMillis = curTimeMillis64();
+
+ int lastSleepMillis = _lastSleepMillis;
+
+ if( _lastErrorTimeMillis == 0 || _lastErrorTimeMillis > currTimeMillis /* VM bugs exist */ )
+ _lastErrorTimeMillis = currTimeMillis;
+ unsigned long long lastErrorTimeMillis = _lastErrorTimeMillis;
+ _lastErrorTimeMillis = currTimeMillis;
+
+ // Backoff logic
+
+ // Get the time since the last error
+ unsigned long long timeSinceLastErrorMillis = currTimeMillis - lastErrorTimeMillis;
+
+ // Makes the cast below safe
+ verify( _resetAfterMillis >= 0 );
+
+ // If we haven't seen another error recently (3x the max wait time), reset our
+ // wait counter.
+ if( timeSinceLastErrorMillis > (unsigned)( _resetAfterMillis ) ) lastSleepMillis = 0;
+
+ // Makes the test below sane
+ verify( _maxSleepMillis > 0 );
+
+ // Wait a power of two millis
+ if( lastSleepMillis == 0 ) lastSleepMillis = 1;
+ else lastSleepMillis = std::min( lastSleepMillis * 2, _maxSleepMillis );
+
+ // Store the last slept time
+ _lastSleepMillis = lastSleepMillis;
+ sleepmillis( lastSleepMillis );
+ }
+
extern long long jsTime_virtual_skew;
extern boost::thread_specific_ptr<long long> jsTime_virtual_thread_skew;
diff --git a/src/mongo/util/time_support.h b/src/mongo/util/time_support.h
index 511bc092fb1..254401b01ad 100644
--- a/src/mongo/util/time_support.h
+++ b/src/mongo/util/time_support.h
@@ -43,6 +43,29 @@ namespace mongo {
void sleepmillis(long long ms);
void sleepmicros(long long micros);
+ class Backoff {
+ public:
+
+ Backoff( int maxSleepMillis, int resetAfter ) :
+ _maxSleepMillis( maxSleepMillis ),
+ _resetAfterMillis( maxSleepMillis + resetAfter ), // Don't reset < the max sleep
+ _lastSleepMillis( 0 ),
+ _lastErrorTimeMillis( 0 )
+ {}
+
+ void nextSleepMillis();
+
+ private:
+
+ // Parameters
+ int _maxSleepMillis;
+ int _resetAfterMillis;
+
+ // Last sleep information
+ int _lastSleepMillis;
+ unsigned long long _lastErrorTimeMillis;
+ };
+
// DO NOT TOUCH except for testing
void jsTimeVirtualSkew( long long skew );