diff options
author | Randolph Tan <randolph@10gen.com> | 2014-03-27 11:42:33 -0400 |
---|---|---|
committer | Randolph Tan <randolph@10gen.com> | 2014-03-27 13:51:35 -0400 |
commit | 0e1908763a942f33ff90ce30f0b7a5e90a8d947d (patch) | |
tree | cf214ed0c6bb37d759bbc9ee19c48387fa5256a1 | |
parent | e8d714a7471249cc66d695a352f1012aa7871ff3 (diff) | |
download | mongo-0e1908763a942f33ff90ce30f0b7a5e90a8d947d.tar.gz |
SERVER-13376 Make sync7.js less sensitive to delays
(cherry picked from commit 06c40ab0bb2a36c709fb4d3b91961e91fb08b1f1)
-rw-r--r-- | jstests/sharding/sync7.js | 4 | ||||
-rw-r--r-- | src/mongo/client/distlock.h | 25 |
2 files changed, 20 insertions, 9 deletions
diff --git a/jstests/sharding/sync7.js b/jstests/sharding/sync7.js index 87eeaa0b36b..8c1430ea3c0 100644 --- a/jstests/sharding/sync7.js +++ b/jstests/sharding/sync7.js @@ -3,11 +3,11 @@ s = new ShardingTest( "moveDistLock", 3, 0, undefined, { sync : true } ); s._connections[0].getDB( "admin" ).runCommand( { _skewClockCommand : 1, skew : 15000 } ) -s._connections[1].getDB( "admin" ).runCommand( { _skewClockCommand : 1, skew : -16000 } ) +s._connections[1].getDB( "admin" ).runCommand( { _skewClockCommand : 1, skew : -32000 } ) // We need to start another mongos after skewing the clock, since the first mongos will have already // tested the config servers (via the balancer) before we manually skewed them -otherMongos = startMongos( { port : 30020, v : 0, configdb : s._configDB } ); +otherMongos = startMongos( { port : 30020, v : 2, configdb : s._configDB } ); // Initialize DB data initDB = function(name) { diff --git a/src/mongo/client/distlock.h b/src/mongo/client/distlock.h index 183826afcce..774d94a0df8 100644 --- a/src/mongo/client/distlock.h +++ b/src/mongo/client/distlock.h @@ -57,12 +57,19 @@ namespace mongo { }; /** - * The distributed lock is a configdb backed way of synchronizing system-wide tasks. A task must be identified by a - * unique name across the system (e.g., "balancer"). A lock is taken by writing a document in the configdb's locks - * collection with that name. + * The distributed lock is a configdb backed way of synchronizing system-wide tasks. A task + * must be identified by a unique name across the system (e.g., "balancer"). A lock is taken + * by writing a document in the configdb's locks collection with that name. * - * To be maintained, each taken lock needs to be revalidated ("pinged") within a pre-established amount of time. This - * class does this maintenance automatically once a DistributedLock object was constructed. + * To be maintained, each taken lock needs to be revalidated ("pinged") within a + * pre-established amount of time. This class does this maintenance automatically once a + * DistributedLock object was constructed. The ping procedure records the local time to + * the ping document, but that time is untrusted and is only used as a point of reference + * of whether the ping was refreshed or not. Ultimately, the clock a configdb is the source + * of truth when determining whether a ping is still fresh or not. This is achieved by + * (1) remembering the ping document time along with config server time when unable to + * take a lock, and (2) ensuring all config servers report similar times and have similar + * time rates (the difference in times must start and stay small). */ class DistributedLock { public: @@ -147,9 +154,13 @@ namespace mongo { const ConnectionString& getRemoteConnection(); /** - * Check the skew between a cluster of servers + * Checks the skew among a cluster of servers and returns true if the min and max clock + * times among the servers are within maxClockSkew. */ - static bool checkSkew( const ConnectionString& cluster, unsigned skewChecks = NUM_LOCK_SKEW_CHECKS, unsigned long long maxClockSkew = MAX_LOCK_CLOCK_SKEW, unsigned long long maxNetSkew = MAX_LOCK_NET_SKEW ); + static bool checkSkew( const ConnectionString& cluster, + unsigned skewChecks = NUM_LOCK_SKEW_CHECKS, + unsigned long long maxClockSkew = MAX_LOCK_CLOCK_SKEW, + unsigned long long maxNetSkew = MAX_LOCK_NET_SKEW ); /** * Get the remote time from a server or cluster |