summaryrefslogtreecommitdiff
path: root/client/dbclient_rs.h
blob: b49e12f7b28edaf6212c12bc411aed82b37c0096 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
/** @file dbclient_rs.h Connect to a Replica Set, from C++ */

/*    Copyright 2009 10gen Inc.
 *
 *    Licensed under the Apache License, Version 2.0 (the "License");
 *    you may not use this file except in compliance with the License.
 *    You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 *    Unless required by applicable law or agreed to in writing, software
 *    distributed under the License is distributed on an "AS IS" BASIS,
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *    See the License for the specific language governing permissions and
 *    limitations under the License.
 */

#pragma once

#include "../pch.h"
#include "dbclient.h"

namespace mongo {

    class ReplicaSetMonitor;
    typedef shared_ptr<ReplicaSetMonitor> ReplicaSetMonitorPtr;

    /**
     * manages state about a replica set for client
     * keeps tabs on whose master and what slaves are up
     * can hand a slave to someone for SLAVE_OK
     * one instace per process per replica set
     * TODO: we might be able to use a regular Node * to avoid _lock
     */
    class ReplicaSetMonitor {
    public:

        typedef boost::function1<void,const ReplicaSetMonitor*> ConfigChangeHook;

        /**
         * gets a cached Monitor per name or will create if doesn't exist
         */
        static ReplicaSetMonitorPtr get( const string& name , const vector<HostAndPort>& servers );

        /**
         * checks all sets for current master and new secondaries
         * usually only called from a BackgroundJob
         */
        static void checkAll();

        /**
         * this is called whenever the config of any repclia set changes
         * currently only 1 globally
         * asserts if one already exists
         * ownership passes to ReplicaSetMonitor and the hook will actually never be deleted
         */
        static void setConfigChangeHook( ConfigChangeHook hook );

        ~ReplicaSetMonitor();

        /** @return HostAndPort or throws an exception */
        HostAndPort getMaster();

        /**
         * notify the monitor that server has faild
         */
        void notifyFailure( const HostAndPort& server );

        /** @return prev if its still ok, and if not returns a random slave that is ok for reads */
        HostAndPort getSlave( const HostAndPort& prev );

        /** @return a random slave that is ok for reads */
        HostAndPort getSlave();


        /**
         * notify the monitor that server has faild
         */
        void notifySlaveFailure( const HostAndPort& server );

        /**
         * checks for current master and new secondaries
         */
        void check();

        string getName() const { return _name; }

        string getServerAddress() const;
        
        bool contains( const string& server ) const;

    private:
        /**
         * This populates a list of hosts from the list of seeds (discarding the
         * seed list).
         * @param name set name
         * @param servers seeds
         */
        ReplicaSetMonitor( const string& name , const vector<HostAndPort>& servers );

        void _check();

        /**
         * Use replSetGetStatus command to make sure hosts in host list are up
         * and readable.  Sets Node::ok appropriately.
         */
        void _checkStatus(DBClientConnection *conn);

        /**
         * Add array of hosts to host list. Doesn't do anything if hosts are
         * already in host list.
         * @param hostList the list of hosts to add
         * @param changed if new hosts were added
         */
        void _checkHosts(const BSONObj& hostList, bool& changed);

        /**
         * Updates host list.
         * @param c the connection to check
         * @param maybePrimary OUT
         * @param verbose
         * @return if the connection is good
         */
        bool _checkConnection( DBClientConnection * c , string& maybePrimary , bool verbose );

        int _find( const string& server ) const ;
        int _find_inlock( const string& server ) const ;
        int _find( const HostAndPort& server ) const ;

        mutable mongo::mutex _lock; // protects _nodes
        mutable mongo::mutex  _checkConnectionLock;

        string _name;
        struct Node {
            Node( const HostAndPort& a , DBClientConnection* c ) : addr( a ) , conn(c) , ok(true) {}
            HostAndPort addr;
            DBClientConnection* conn;

            // if this node is in a failure state
            // used for slave routing
            // this is too simple, should make it better
            bool ok;
        };

        /**
         * Host list.
         */
        vector<Node> _nodes;

        int _master; // which node is the current master.  -1 means no master is known
        int _nextSlave; // which node is the current slave

        static mongo::mutex _setsLock; // protects _sets
        static map<string,ReplicaSetMonitorPtr> _sets; // set name to Monitor

        static ConfigChangeHook _hook;
    };

    /** Use this class to connect to a replica set of servers.  The class will manage
       checking for which server in a replica set is master, and do failover automatically.

       This can also be used to connect to replica pairs since pairs are a subset of sets

       On a failover situation, expect at least one operation to return an error (throw
       an exception) before the failover is complete.  Operations are not retried.
    */
    class DBClientReplicaSet : public DBClientBase {

    public:
        /** Call connect() after constructing. autoReconnect is always on for DBClientReplicaSet connections. */
        DBClientReplicaSet( const string& name , const vector<HostAndPort>& servers );
        virtual ~DBClientReplicaSet();

        /** Returns false if nomember of the set were reachable, or neither is
         * master, although,
         * when false returned, you can still try to use this connection object, it will
         * try reconnects.
         */
        bool connect();

        /** Authorize.  Authorizes all nodes as needed
        */
        virtual bool auth(const string &dbname, const string &username, const string &pwd, string& errmsg, bool digestPassword = true );

        // ----------- simple functions --------------

        /** throws userassertion "no master found" */
        virtual auto_ptr<DBClientCursor> query(const string &ns, Query query, int nToReturn = 0, int nToSkip = 0,
                                               const BSONObj *fieldsToReturn = 0, int queryOptions = 0 , int batchSize = 0 );

        /** throws userassertion "no master found" */
        virtual BSONObj findOne(const string &ns, const Query& query, const BSONObj *fieldsToReturn = 0, int queryOptions = 0);

        virtual void insert( const string &ns , BSONObj obj , int flags=0);

        /** insert multiple objects.  Note that single object insert is asynchronous, so this version
            is only nominally faster and not worth a special effort to try to use.  */
        virtual void insert( const string &ns, const vector< BSONObj >& v , int flags=0);

        virtual void remove( const string &ns , Query obj , bool justOne = 0 );

        virtual void update( const string &ns , Query query , BSONObj obj , bool upsert = 0 , bool multi = 0 );

        virtual void killCursor( long long cursorID );

        // ---- access raw connections ----

        DBClientConnection& masterConn();
        DBClientConnection& slaveConn();

        // ---- callback pieces -------

        virtual void say( Message &toSend );
        virtual bool recv( Message &toRecv );
        virtual void checkResponse( const char* data, int nReturned, bool* retry = NULL, string* targetHost = NULL );

        /* this is the callback from our underlying connections to notify us that we got a "not master" error.
         */
        void isntMaster();

        /* this is used to indicate we got a "not master or secondary" error from a secondary.
         */
        void isntSecondary();

        // ----- status ------

        virtual bool isFailed() const { return ! _master || _master->isFailed(); }

        // ----- informational ----

        string toString() { return getServerAddress(); }

        string getServerAddress() const { return _monitor->getServerAddress(); }

        virtual ConnectionString::ConnectionType type() const { return ConnectionString::SET; }

        // ---- low level ------

        virtual bool call( Message &toSend, Message &response, bool assertOk=true , string * actualServer = 0 );
        virtual bool callRead( Message& toSend , Message& response ) { return checkMaster()->callRead( toSend , response ); }

    protected:
        virtual void sayPiggyBack( Message &toSend ) { checkMaster()->say( toSend ); }

    private:

        // Used to simplify slave-handling logic on errors
        auto_ptr<DBClientCursor> checkSlaveQueryResult( auto_ptr<DBClientCursor> result );

        DBClientConnection * checkMaster();
        DBClientConnection * checkSlave();

        void _auth( DBClientConnection * conn );

        ReplicaSetMonitorPtr _monitor;

        HostAndPort _masterHost;
        scoped_ptr<DBClientConnection> _master;

        HostAndPort _slaveHost;
        scoped_ptr<DBClientConnection> _slave;

        /**
         * for storing authentication info
         * fields are exactly for DBClientConnection::auth
         */
        struct AuthInfo {
            AuthInfo( string d , string u , string p , bool di )
                : dbname( d ) , username( u ) , pwd( p ) , digestPassword( di ) {}
            string dbname;
            string username;
            string pwd;
            bool digestPassword;
        };

        // we need to store so that when we connect to a new node on failure
        // we can re-auth
        // this could be a security issue, as the password is stored in memory
        // not sure if/how we should handle
        list<AuthInfo> _auths;

    protected:

        /**
         * for storing (non-threadsafe) information between lazy calls
         */
        class LazyState {
        public:
            LazyState() : _lastClient( NULL ), _lastOp( -1 ), _slaveOk( false ), _retries( 0 ) {}
            DBClientConnection* _lastClient;
            int _lastOp;
            bool _slaveOk;
            int _retries;

        } _lazyState;

    };


}