summaryrefslogtreecommitdiff
path: root/client
diff options
context:
space:
mode:
authorEliot Horowitz <eliot@10gen.com>2011-02-17 19:59:33 -0500
committerEliot Horowitz <eliot@10gen.com>2011-02-17 19:59:33 -0500
commit8d4850ae65b5ca314a540090bd94714cf327b71b (patch)
tree0c71b2d70675340dbd29ecf90c6acc2651c83f07 /client
parentabbff52dbda3f7679bb86ce54c8899e953507da4 (diff)
downloadmongo-8d4850ae65b5ca314a540090bd94714cf327b71b.tar.gz
detect clock skew in DistributedLock SERVER-2584
Diffstat (limited to 'client')
-rw-r--r--client/distlock.cpp29
1 files changed, 19 insertions, 10 deletions
diff --git a/client/distlock.cpp b/client/distlock.cpp
index 307ae971f73..9ec98eaed18 100644
--- a/client/distlock.cpp
+++ b/client/distlock.cpp
@@ -182,7 +182,7 @@ namespace mongo {
{
// make sure its there so we can use simple update logic below
- BSONObj o = conn->findOne( _ns , _id );
+ BSONObj o = conn->findOne( _ns , _id ).getOwned();
if ( o.isEmpty() ) {
try {
log(4) << "dist_lock inserting initial doc in " << _ns << " for lock " << _name << endl;
@@ -192,32 +192,41 @@ namespace mongo {
log() << "dist_lock could not insert initial doc: " << e << endl;
}
}
-
+
else if ( o["state"].numberInt() > 0 ) {
BSONObj lastPing = conn->findOne( lockPingNS , o["process"].wrap( "_id" ) );
if ( lastPing.isEmpty() ) {
// if a lock is taken but there's no ping for it, we're in an inconsistent situation
// if the lock holder (mongos or d) does not exist anymore, the lock could safely be removed
// but we'd require analysis of the situation before a manual intervention
- log(LL_ERROR) << "config.locks: " << _name << " lock is taken by old process? "
- << "remove the following lock if the process is not active anymore: " << o << endl;
+ error() << "config.locks: " << _name << " lock is taken by old process? "
+ << "remove the following lock if the process is not active anymore: " << o << endl;
*other = o;
- other->getOwned();
conn.done();
return false;
}
- unsigned long long elapsed = jsTime() - lastPing["ping"].Date(); // in ms
+ unsigned long long now = jsTime();
+ unsigned long long pingTime = lastPing["ping"].Date();
+
+ if ( now < pingTime ) {
+ // clock skew
+ warning() << "dist_lock has detected clock skew of " << ( pingTime - now ) << "ms" << endl;
+ *other = o;
+ conn.done();
+ return false;
+ }
+
+ unsigned long long elapsed = now - pingTime;
elapsed = elapsed / ( 1000 * 60 ); // convert to minutes
if ( elapsed > ( 60 * 24 * 365 * 100 ) /* 100 years */ ) {
warning() << "distlock elapsed time seems impossible: " << lastPing << endl;
}
-
+
if ( elapsed <= _takeoverMinutes ) {
log(1) << "dist_lock lock failed because taken by: " << o << " elapsed minutes: " << elapsed << endl;
*other = o;
- other->getOwned();
conn.done();
return false;
}
@@ -226,8 +235,8 @@ namespace mongo {
conn->update( _ns , _id , BSON( "$set" << BSON( "state" << 0 ) ) );
string err = conn->getLastError();
if ( ! err.empty() ) {
- log( LL_WARNING ) << "dist_lock take over from: " << o << " failed: " << err << endl;
- *other = o;
+ warning() << "dist_lock take over from: " << o << " failed: " << err << endl;
+ *other = o.getOwned();
other->getOwned();
conn.done();
return false;