summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRandolph Tan <randolph@10gen.com>2014-03-27 11:42:33 -0400
committerRandolph Tan <randolph@10gen.com>2014-03-27 13:51:35 -0400
commit0e1908763a942f33ff90ce30f0b7a5e90a8d947d (patch)
treecf214ed0c6bb37d759bbc9ee19c48387fa5256a1
parente8d714a7471249cc66d695a352f1012aa7871ff3 (diff)
downloadmongo-0e1908763a942f33ff90ce30f0b7a5e90a8d947d.tar.gz
SERVER-13376 Make sync7.js less sensitive to delays
(cherry picked from commit 06c40ab0bb2a36c709fb4d3b91961e91fb08b1f1)
-rw-r--r--jstests/sharding/sync7.js4
-rw-r--r--src/mongo/client/distlock.h25
2 files changed, 20 insertions, 9 deletions
diff --git a/jstests/sharding/sync7.js b/jstests/sharding/sync7.js
index 87eeaa0b36b..8c1430ea3c0 100644
--- a/jstests/sharding/sync7.js
+++ b/jstests/sharding/sync7.js
@@ -3,11 +3,11 @@
s = new ShardingTest( "moveDistLock", 3, 0, undefined, { sync : true } );
s._connections[0].getDB( "admin" ).runCommand( { _skewClockCommand : 1, skew : 15000 } )
-s._connections[1].getDB( "admin" ).runCommand( { _skewClockCommand : 1, skew : -16000 } )
+s._connections[1].getDB( "admin" ).runCommand( { _skewClockCommand : 1, skew : -32000 } )
// We need to start another mongos after skewing the clock, since the first mongos will have already
// tested the config servers (via the balancer) before we manually skewed them
-otherMongos = startMongos( { port : 30020, v : 0, configdb : s._configDB } );
+otherMongos = startMongos( { port : 30020, v : 2, configdb : s._configDB } );
// Initialize DB data
initDB = function(name) {
diff --git a/src/mongo/client/distlock.h b/src/mongo/client/distlock.h
index 183826afcce..774d94a0df8 100644
--- a/src/mongo/client/distlock.h
+++ b/src/mongo/client/distlock.h
@@ -57,12 +57,19 @@ namespace mongo {
};
/**
- * The distributed lock is a configdb backed way of synchronizing system-wide tasks. A task must be identified by a
- * unique name across the system (e.g., "balancer"). A lock is taken by writing a document in the configdb's locks
- * collection with that name.
+ * The distributed lock is a configdb backed way of synchronizing system-wide tasks. A task
+ * must be identified by a unique name across the system (e.g., "balancer"). A lock is taken
+ * by writing a document in the configdb's locks collection with that name.
*
- * To be maintained, each taken lock needs to be revalidated ("pinged") within a pre-established amount of time. This
- * class does this maintenance automatically once a DistributedLock object was constructed.
+ * To be maintained, each taken lock needs to be revalidated ("pinged") within a
+ * pre-established amount of time. This class does this maintenance automatically once a
+ * DistributedLock object was constructed. The ping procedure records the local time to
+ * the ping document, but that time is untrusted and is only used as a point of reference
+ * of whether the ping was refreshed or not. Ultimately, the clock a configdb is the source
+ * of truth when determining whether a ping is still fresh or not. This is achieved by
+ * (1) remembering the ping document time along with config server time when unable to
+ * take a lock, and (2) ensuring all config servers report similar times and have similar
+ * time rates (the difference in times must start and stay small).
*/
class DistributedLock {
public:
@@ -147,9 +154,13 @@ namespace mongo {
const ConnectionString& getRemoteConnection();
/**
- * Check the skew between a cluster of servers
+ * Checks the skew among a cluster of servers and returns true if the min and max clock
+ * times among the servers are within maxClockSkew.
*/
- static bool checkSkew( const ConnectionString& cluster, unsigned skewChecks = NUM_LOCK_SKEW_CHECKS, unsigned long long maxClockSkew = MAX_LOCK_CLOCK_SKEW, unsigned long long maxNetSkew = MAX_LOCK_NET_SKEW );
+ static bool checkSkew( const ConnectionString& cluster,
+ unsigned skewChecks = NUM_LOCK_SKEW_CHECKS,
+ unsigned long long maxClockSkew = MAX_LOCK_CLOCK_SKEW,
+ unsigned long long maxNetSkew = MAX_LOCK_NET_SKEW );
/**
* Get the remote time from a server or cluster