// distlock.h
/* Copyright 2009 10gen Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the GNU Affero General Public License in all respects
* for all of the code used other than as permitted herein. If you modify
* file(s) with this exception, you may extend this exception to your
* version of the file(s), but you are not obligated to do so. If you do not
* wish to do so, delete this exception statement from your version. If you
* delete this exception statement from all source files in the program,
* then also delete it in the license file.
*/
#pragma once
#include "mongo/platform/basic.h"
#include "mongo/client/connpool.h"
#include "mongo/client/export_macros.h"
#include "mongo/client/syncclusterconnection.h"
#include "mongo/logger/labeled_level.h"
namespace mongo {
namespace {
enum TimeConstants {
LOCK_TIMEOUT = 15 * 60 * 1000,
LOCK_SKEW_FACTOR = 30,
LOCK_PING = LOCK_TIMEOUT / LOCK_SKEW_FACTOR,
MAX_LOCK_NET_SKEW = LOCK_TIMEOUT / LOCK_SKEW_FACTOR,
MAX_LOCK_CLOCK_SKEW = LOCK_TIMEOUT / LOCK_SKEW_FACTOR,
NUM_LOCK_SKEW_CHECKS = 3,
};
// The maximum clock skew we need to handle between config servers is
// 2 * MAX_LOCK_NET_SKEW + MAX_LOCK_CLOCK_SKEW.
// Net effect of *this* clock being slow is effectively a multiplier on the max net skew
// and a linear increase or decrease of the max clock skew.
}
/**
* Exception class to encapsulate exceptions while managing distributed locks
*/
class MONGO_CLIENT_API LockException : public DBException {
public:
LockException(const char* msg, int code) : DBException(msg, code) {}
LockException(const std::string& msg, int code) : DBException(msg, code) {}
virtual ~LockException() throw() {}
};
/**
* Indicates an error in retrieving time values from remote servers.
*/
class MONGO_CLIENT_API TimeNotFoundException : public LockException {
public:
TimeNotFoundException(const char* msg, int code) : LockException(msg, code) {}
TimeNotFoundException(const std::string& msg, int code) : LockException(msg, code) {}
virtual ~TimeNotFoundException() throw() {}
};
/**
* The distributed lock is a configdb backed way of synchronizing system-wide tasks. A task
* must be identified by a unique name across the system (e.g., "balancer"). A lock is taken
* by writing a document in the configdb's locks collection with that name.
*
* To be maintained, each taken lock needs to be revalidated ("pinged") within a
* pre-established amount of time. This class does this maintenance automatically once a
* DistributedLock object was constructed. The ping procedure records the local time to
* the ping document, but that time is untrusted and is only used as a point of reference
* of whether the ping was refreshed or not. Ultimately, the clock a configdb is the source
* of truth when determining whether a ping is still fresh or not. This is achieved by
* (1) remembering the ping document time along with config server time when unable to
* take a lock, and (2) ensuring all config servers report similar times and have similar
* time rates (the difference in times must start and stay small).
*
* Lock states include:
* 0: unlocked
* 1: about to be locked
* 2: locked
*
* Valid state transitions:
* 0 -> 1
* 1 -> 2
* 2 -> 0
*
* Note that at any point in time, a lock can be force unlocked if the ping for the lock
* becomes too stale.
*/
class MONGO_CLIENT_API DistributedLock {
public:
static logger::LabeledLevel logLvl;
struct PingData {
PingData(const std::string& _id, Date_t _lastPing, Date_t _remote, OID _ts)
: id(_id), lastPing(_lastPing), remote(_remote), ts(_ts) {}
PingData() : id(""), lastPing(0), remote(0), ts() {}
std::string id;
Date_t lastPing;
Date_t remote;
OID ts;
};
class LastPings {
public:
LastPings() : _mutex("DistributedLock::LastPings") {}
~LastPings() {}
PingData getLastPing(const ConnectionString& conn, const std::string& lockName);
void setLastPing(const ConnectionString& conn,
const std::string& lockName,
const PingData& pd);
mongo::mutex _mutex;
std::map, PingData> _lastPings;
};
static LastPings lastPings;
/**
* The constructor does not connect to the configdb yet and constructing does not mean the lock
* was acquired. Construction does trigger a lock "pinging" mechanism, though.
*
* @param conn address of config(s) server(s)
* @param name identifier for the lock
* @param lockTimeout how long can the log go "unpinged" before a new attempt to lock steals it
* (in minutes).
* @param lockPing how long to wait between lock pings
* @param legacy use legacy logic
*
*/
DistributedLock(const ConnectionString& conn,
const std::string& name,
unsigned long long lockTimeout = 0,
bool asProcess = false);
~DistributedLock(){};
/**
* Attempts to acquire 'this' lock, checking if it could or should be stolen from the previous
* holder. Please consider using the dist_lock_try construct to acquire this lock in an
* exception safe way.
*
* @param why human readable description of why the lock is being taken (used to log)
* @param whether this is a lock re-entry or a new lock
* @param other configdb's lock document that is currently holding the lock, if lock is taken,
* or our own lock details if not
* @return true if it managed to grab the lock
*/
bool lock_try(const std::string& why,
bool reenter = false,
BSONObj* other = 0,
double timeout = 0.0);
/**
* Returns OK if this lock is held (but does not guarantee that this owns it) and
* it was possible to confirm that, within 'timeout' seconds, if provided, with the
* config servers.
*/
Status checkStatus(double timeout);
/**
* Releases a previously taken lock.
*/
void unlock(BSONObj* oldLockPtr = NULL);
Date_t getRemoteTime();
bool isRemoteTimeSkewed();
const std::string& getProcessId();
const ConnectionString& getRemoteConnection();
/**
* Checks the skew among a cluster of servers and returns true if the min and max clock
* times among the servers are within maxClockSkew.
*/
static bool checkSkew(const ConnectionString& cluster,
unsigned skewChecks = NUM_LOCK_SKEW_CHECKS,
unsigned long long maxClockSkew = MAX_LOCK_CLOCK_SKEW,
unsigned long long maxNetSkew = MAX_LOCK_NET_SKEW);
/**
* Get the remote time from a server or cluster
*/
static Date_t remoteTime(const ConnectionString& cluster,
unsigned long long maxNetSkew = MAX_LOCK_NET_SKEW);
static bool killPinger(DistributedLock& lock);
/**
* Namespace for lock pings
*/
static const std::string lockPingNS;
/**
* Namespace for locks
*/
static const std::string locksNS;
const ConnectionString _conn;
const std::string _name;
const std::string _processId;
// Timeout for lock, usually LOCK_TIMEOUT
const unsigned long long _lockTimeout;
const unsigned long long _maxClockSkew;
const unsigned long long _maxNetSkew;
const unsigned long long _lockPing;
private:
void resetLastPing() {
lastPings.setLastPing(_conn, _name, PingData());
}
void setLastPing(const PingData& pd) {
lastPings.setLastPing(_conn, _name, pd);
}
PingData getLastPing() {
return lastPings.getLastPing(_conn, _name);
}
// May or may not exist, depending on startup
mongo::mutex _mutex;
std::string _threadId;
};
// Helper functions for tests, allows us to turn the creation of a lock pinger on and off.
// *NOT* thread-safe
bool MONGO_CLIENT_API isLockPingerEnabled();
void MONGO_CLIENT_API setLockPingerEnabled(bool enabled);
class MONGO_CLIENT_API dist_lock_try {
public:
dist_lock_try() : _lock(NULL), _got(false) {}
dist_lock_try(const dist_lock_try& that)
: _lock(that._lock), _got(that._got), _other(that._other) {
_other.getOwned();
// Make sure the lock ownership passes to this object,
// so we only unlock once.
((dist_lock_try&)that)._got = false;
((dist_lock_try&)that)._lock = NULL;
((dist_lock_try&)that)._other = BSONObj();
}
// Needed so we can handle lock exceptions in context of lock try.
dist_lock_try& operator=(const dist_lock_try& that) {
if (this == &that)
return *this;
_lock = that._lock;
_got = that._got;
_other = that._other;
_other.getOwned();
_why = that._why;
// Make sure the lock ownership passes to this object,
// so we only unlock once.
((dist_lock_try&)that)._got = false;
((dist_lock_try&)that)._lock = NULL;
((dist_lock_try&)that)._other = BSONObj();
return *this;
}
dist_lock_try(DistributedLock* lock, const std::string& why, double timeout = 0.0)
: _lock(lock), _why(why) {
_got = _lock->lock_try(why, false, &_other, timeout);
}
~dist_lock_try() {
if (_got) {
verify(!_other.isEmpty());
_lock->unlock(&_other);
}
}
/**
* Returns not OK if the lock is known _not_ to be held.
*/
Status checkStatus(double timeout) {
if (!_lock) {
return Status(ErrorCodes::LockFailed, "Lock is not currently set up");
}
if (!_got) {
return Status(ErrorCodes::LockFailed,
str::stream() << "Lock " << _lock->_name << " is currently held by "
<< _other);
}
return _lock->checkStatus(timeout);
}
bool got() const {
return _got;
}
BSONObj other() const {
return _other;
}
private:
DistributedLock* _lock;
bool _got;
BSONObj _other;
std::string _why;
};
/**
* Scoped wrapper for a distributed lock acquisition attempt. One or more attempts to acquire
* the distributed lock are managed by this class, and the distributed lock is unlocked if
* successfully acquired on object destruction.
*/
class MONGO_CLIENT_API ScopedDistributedLock {
public:
static const long long kDefaultLockTryIntervalMillis;
static const long long kDefaultSocketTimeoutMillis;
ScopedDistributedLock(const ConnectionString& conn, const std::string& name);
~ScopedDistributedLock();
/**
* Tries to obtain the lock once.
*
* Returns OK if the lock was successfully acquired.
* Returns ErrorCodes::DistributedClockSkewed when a clock skew is detected.
* Returns ErrorCodes::LockBusy if the lock is being held.
*/
Status tryAcquire();
/**
* Tries to unlock the lock if acquired. Cannot report an error or block indefinitely
* (though it may log messages or continue retrying in a non-blocking way).
*/
void unlock();
/**
* Tries multiple times to unlock the lock, using the specified lock try interval, until
* a certain amount of time has passed.
*
* waitForMillis = 0 indicates there should only be one attempt to acquire the lock, and
* no waiting.
* waitForMillis = -1 indicates we should retry indefinitely.
*
* Returns OK if the lock was successfully acquired.
* Returns ErrorCodes::DistributedClockSkewed when a clock skew is detected.
* Returns ErrorCodes::LockBusy if the lock is being held.
*/
Status acquire(long long waitForMillis);
/**
* If lock is held, remotely verifies that the lock has not been forced as a sanity check.
* If the lock is not held or cannot be verified, returns not OK.
*/
Status checkStatus();
bool isAcquired() const {
return _acquired;
}
ConnectionString getConfigConnectionString() const {
return _lock._conn;
}
void setLockTryIntervalMillis(long long lockTryIntervalMillis) {
_lockTryIntervalMillis = lockTryIntervalMillis;
}
long long getLockTryIntervalMillis() const {
return _lockTryIntervalMillis;
}
void setLockMessage(const std::string& why) {
_why = why;
}
std::string getLockMessage() const {
return _why;
}
void setSocketTimeoutMillis(long long socketTimeoutMillis) {
_socketTimeoutMillis = socketTimeoutMillis;
}
long long getSocketTimeoutMillis() const {
return _socketTimeoutMillis;
}
private:
DistributedLock _lock;
std::string _why;
long long _lockTryIntervalMillis;
long long _socketTimeoutMillis;
bool _acquired;
BSONObj _other;
};
}