summaryrefslogtreecommitdiff
path: root/src/mongo/s/distlock.h
blob: 1dddd495efe7588f10c2f7c6de29310ef306c646 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
// distlock.h

/*    Copyright 2009 10gen Inc.
 *
 *    This program is free software: you can redistribute it and/or  modify
 *    it under the terms of the GNU Affero General Public License, version 3,
 *    as published by the Free Software Foundation.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU Affero General Public License for more details.
 *
 *    You should have received a copy of the GNU Affero General Public License
 *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 *    As a special exception, the copyright holders give permission to link the
 *    code of portions of this program with the OpenSSL library under certain
 *    conditions as described in each individual source file and distribute
 *    linked combinations including the program with the OpenSSL library. You
 *    must comply with the GNU Affero General Public License in all respects
 *    for all of the code used other than as permitted herein. If you modify
 *    file(s) with this exception, you may extend this exception to your
 *    version of the file(s), but you are not obligated to do so. If you do not
 *    wish to do so, delete this exception statement from your version. If you
 *    delete this exception statement from all source files in the program,
 *    then also delete it in the license file.
 */

#pragma once

#include "mongo/pch.h"
#include "mongo/client/connpool.h"
#include "mongo/client/export_macros.h"
#include "mongo/client/syncclusterconnection.h"
#include "mongo/logger/labeled_level.h"


namespace mongo {

    namespace {

        enum TimeConstants {
            LOCK_TIMEOUT = 15 * 60 * 1000,
            LOCK_SKEW_FACTOR = 30,
            LOCK_PING = LOCK_TIMEOUT / LOCK_SKEW_FACTOR,
            MAX_LOCK_NET_SKEW = LOCK_TIMEOUT / LOCK_SKEW_FACTOR,
            MAX_LOCK_CLOCK_SKEW = LOCK_TIMEOUT / LOCK_SKEW_FACTOR,
            NUM_LOCK_SKEW_CHECKS = 3,
        };

        // The maximum clock skew we need to handle between config servers is
        // 2 * MAX_LOCK_NET_SKEW + MAX_LOCK_CLOCK_SKEW.

        // Net effect of *this* clock being slow is effectively a multiplier on the max net skew
        // and a linear increase or decrease of the max clock skew.
    }

    /**
     * Exception class to encapsulate exceptions while managing distributed locks
     */
    class MONGO_CLIENT_API LockException : public DBException {
    public:
        LockException( const char * msg , int code ) : DBException( msg, code ) {}
        LockException( const std::string& msg, int code ) : DBException( msg, code ) {}
        virtual ~LockException() throw() { }
    };

    /**
     * Indicates an error in retrieving time values from remote servers.
     */
    class MONGO_CLIENT_API TimeNotFoundException : public LockException {
    public:
        TimeNotFoundException( const char * msg , int code ) : LockException( msg, code ) {}
        TimeNotFoundException( const std::string& msg, int code ) : LockException( msg, code ) {}
        virtual ~TimeNotFoundException() throw() { }
    };

    /**
     * The distributed lock is a configdb backed way of synchronizing system-wide tasks. A task
     * must be identified by a unique name across the system (e.g., "balancer"). A lock is taken
     * by writing a document in the configdb's locks collection with that name.
     *
     * To be maintained, each taken lock needs to be revalidated ("pinged") within a
     * pre-established amount of time. This class does this maintenance automatically once a
     * DistributedLock object was constructed. The ping procedure records the local time to
     * the ping document, but that time is untrusted and is only used as a point of reference
     * of whether the ping was refreshed or not. Ultimately, the clock a configdb is the source
     * of truth when determining whether a ping is still fresh or not. This is achieved by
     * (1) remembering the ping document time along with config server time when unable to
     * take a lock, and (2) ensuring all config servers report similar times and have similar
     * time rates (the difference in times must start and stay small).
     *
     * Lock states include:
     * 0: unlocked
     * 1: about to be locked
     * 2: locked
     *
     * Valid state transitions:
     * 0 -> 1
     * 1 -> 2
     * 2 -> 0
     *
     * Note that at any point in time, a lock can be force unlocked if the ping for the lock
     * becomes too stale.
     */
    class MONGO_CLIENT_API DistributedLock {
    public:

        static logger::LabeledLevel logLvl;

        struct PingData {

            PingData( const std::string& _id , Date_t _lastPing , Date_t _remote , OID _ts )
                : id(_id), lastPing(_lastPing), remote(_remote), ts(_ts){
            }

            PingData()
                : id(""), lastPing(0), remote(0), ts(){
            }

            std::string id;
            Date_t lastPing;
            Date_t remote;
            OID ts;
        };

        class LastPings {
        public:
            LastPings() : _mutex( "DistributedLock::LastPings" ) {}
            ~LastPings(){}

            PingData getLastPing( const ConnectionString& conn, const std::string& lockName );
            void setLastPing( const ConnectionString& conn, const std::string& lockName, const PingData& pd );

            mongo::mutex _mutex;
            std::map< std::pair<std::string, std::string>, PingData > _lastPings;
        };

    	static LastPings lastPings;

        /**
         * The constructor does not connect to the configdb yet and constructing does not mean the lock was acquired.
         * Construction does trigger a lock "pinging" mechanism, though.
         *
         * @param conn address of config(s) server(s)
         * @param name identifier for the lock
         * @param lockTimeout how long can the log go "unpinged" before a new attempt to lock steals it (in minutes).
         * @param lockPing how long to wait between lock pings
         * @param legacy use legacy logic
         *
         */
        DistributedLock( const ConnectionString& conn , const std::string& name , unsigned long long lockTimeout = 0, bool asProcess = false );
        ~DistributedLock(){};

        /**
         * Attempts to acquire 'this' lock, checking if it could or should be stolen from the previous holder. Please
         * consider using the dist_lock_try construct to acquire this lock in an exception safe way.
         *
         * @param why human readable description of why the lock is being taken (used to log)
         * @param whether this is a lock re-entry or a new lock
         * @param other configdb's lock document that is currently holding the lock, if lock is taken, or our own lock
         * details if not
         * @return true if it managed to grab the lock
         */
        bool lock_try( const std::string& why , bool reenter = false, BSONObj * other = 0, double timeout = 0.0 );

        /**
         * Returns OK if this lock is held (but does not guarantee that this owns it) and
         * it was possible to confirm that, within 'timeout' seconds, if provided, with the
         * config servers.
         */
        Status checkStatus(double timeout);

        /**
         * Releases a previously taken lock.
         */
        void unlock( BSONObj* oldLockPtr = NULL );

        Date_t getRemoteTime();

        bool isRemoteTimeSkewed();

        const std::string& getProcessId();

        const ConnectionString& getRemoteConnection();

        /**
         * Checks the skew among a cluster of servers and returns true if the min and max clock
         * times among the servers are within maxClockSkew.
         */
        static bool checkSkew( const ConnectionString& cluster,
                               unsigned skewChecks = NUM_LOCK_SKEW_CHECKS,
                               unsigned long long maxClockSkew = MAX_LOCK_CLOCK_SKEW,
                               unsigned long long maxNetSkew = MAX_LOCK_NET_SKEW );

        /**
         * Get the remote time from a server or cluster
         */
        static Date_t remoteTime( const ConnectionString& cluster, unsigned long long maxNetSkew = MAX_LOCK_NET_SKEW );

        static bool killPinger( DistributedLock& lock );

        /**
         * Namespace for lock pings
         */
        static const std::string lockPingNS;

        /**
         * Namespace for locks
         */
        static const std::string locksNS;

        const ConnectionString _conn;
        const std::string _name;
        const std::string _processId;

        // Timeout for lock, usually LOCK_TIMEOUT
        const unsigned long long _lockTimeout;
        const unsigned long long _maxClockSkew;
        const unsigned long long _maxNetSkew;
        const unsigned long long _lockPing;

    private:

        void resetLastPing(){ lastPings.setLastPing( _conn, _name, PingData() ); }
        void setLastPing( const PingData& pd ){ lastPings.setLastPing( _conn, _name, pd ); }
        PingData getLastPing(){ return lastPings.getLastPing( _conn, _name ); }

        // May or may not exist, depending on startup
        mongo::mutex _mutex;
        std::string _threadId;

    };

    // Helper functions for tests, allows us to turn the creation of a lock pinger on and off.
    // *NOT* thread-safe
    bool MONGO_CLIENT_API isLockPingerEnabled();
    void MONGO_CLIENT_API setLockPingerEnabled(bool enabled);


    class MONGO_CLIENT_API dist_lock_try {
    public:

        dist_lock_try() : _lock(NULL), _got(false) {}

        dist_lock_try( const dist_lock_try& that ) :
                _lock(that._lock), _got(that._got), _other(that._other) {

            _other.getOwned();

            // Make sure the lock ownership passes to this object,
            // so we only unlock once.
            ((dist_lock_try&) that)._got = false;
            ((dist_lock_try&) that)._lock = NULL;
            ((dist_lock_try&) that)._other = BSONObj();
        }

        // Needed so we can handle lock exceptions in context of lock try.
        dist_lock_try& operator=( const dist_lock_try& that ){

            if( this == &that ) return *this;

            _lock = that._lock;
            _got = that._got;
            _other = that._other;
            _other.getOwned();
            _why = that._why;

            // Make sure the lock ownership passes to this object,
            // so we only unlock once.
            ((dist_lock_try&) that)._got = false;
            ((dist_lock_try&) that)._lock = NULL;
            ((dist_lock_try&) that)._other = BSONObj();

            return *this;
        }

        dist_lock_try( DistributedLock * lock , const std::string& why, double timeout = 0.0 )
            : _lock(lock), _why(why) {
            _got = _lock->lock_try( why , false , &_other, timeout );
        }

        ~dist_lock_try() {
            if ( _got ) {
                verify( ! _other.isEmpty() );
                _lock->unlock( &_other );
            }
        }

        /**
         * Returns not OK  if the lock is known _not_ to be held.
         */
        Status checkStatus(double timeout) {
            if ( !_lock ) {
                return Status(ErrorCodes::LockFailed, "Lock is not currently set up");
            }

            if ( !_got ) {
                return Status(ErrorCodes::LockFailed,
                        str::stream() << "Lock " << _lock->_name << " is currently held by "
                                      << _other);
            }

            return _lock->checkStatus(timeout);
        }

        bool got() const { return _got; }
        BSONObj other() const { return _other; }

    private:
        DistributedLock * _lock;
        bool _got;
        BSONObj _other;
        std::string _why;
    };

    /**
     * Scoped wrapper for a distributed lock acquisition attempt.  One or more attempts to acquire
     * the distributed lock are managed by this class, and the distributed lock is unlocked if
     * successfully acquired on object destruction.
     */
    class MONGO_CLIENT_API ScopedDistributedLock {
    public:

        static const long long kDefaultLockTryIntervalMillis;
        static const long long kDefaultSocketTimeoutMillis;

        ScopedDistributedLock(const ConnectionString& conn, const std::string& name);

        ~ScopedDistributedLock();

        /**
         * Tries to obtain the lock once.
         *
         * Returns OK if the lock was successfully acquired.
         * Returns ErrorCodes::DistributedClockSkewed when a clock skew is detected.
         * Returns ErrorCodes::LockBusy if the lock is being held.
         */
        Status tryAcquire();

        /**
         * Tries to unlock the lock if acquired.  Cannot report an error or block indefinitely
         * (though it may log messages or continue retrying in a non-blocking way).
         */
        void unlock();

        /**
         * Tries multiple times to unlock the lock, using the specified lock try interval, until
         * a certain amount of time has passed.
         *
         * waitForMillis = 0 indicates there should only be one attempt to acquire the lock, and
         * no waiting.
         * waitForMillis = -1 indicates we should retry indefinitely.
         *
         * Returns OK if the lock was successfully acquired.
         * Returns ErrorCodes::DistributedClockSkewed when a clock skew is detected.
         * Returns ErrorCodes::LockBusy if the lock is being held.
         */
        Status acquire(long long waitForMillis);

        /**
         * If lock is held, remotely verifies that the lock has not been forced as a sanity check.
         * If the lock is not held or cannot be verified, returns not OK.
         */
        Status checkStatus();

        bool isAcquired() const {
            return _acquired;
        }

        ConnectionString getConfigConnectionString() const {
            return _lock._conn;
        }

        void setLockTryIntervalMillis(long long lockTryIntervalMillis) {
            _lockTryIntervalMillis = lockTryIntervalMillis;
        }

        long long getLockTryIntervalMillis() const {
            return _lockTryIntervalMillis;
        }

        void setLockMessage(const std::string& why) {
            _why = why;
        }

        std::string getLockMessage() const {
            return _why;
        }

        void setSocketTimeoutMillis(long long socketTimeoutMillis) {
            _socketTimeoutMillis = socketTimeoutMillis;
        }

        long long getSocketTimeoutMillis() const {
            return _socketTimeoutMillis;
        }

    private:
        DistributedLock _lock;
        std::string _why;
        long long _lockTryIntervalMillis;
        long long _socketTimeoutMillis;

        bool _acquired;
        BSONObj _other;
    };

}