diff options
author | matt dannenberg <matt.dannenberg@10gen.com> | 2013-10-21 13:27:03 -0400 |
---|---|---|
committer | matt dannenberg <matt.dannenberg@10gen.com> | 2013-10-28 14:16:06 -0400 |
commit | 38b665b22722ab442a9c022b54558e3cd7a9b84f (patch) | |
tree | 7592e04b21d6c42b01e2e65db240fc49c70c3dc9 /src | |
parent | ba1c7d57ea1a5e82e5af93e41344cf068cdc675b (diff) | |
download | mongo-38b665b22722ab442a9c022b54558e3cd7a9b84f.tar.gz |
SERVER-6552 implement resync command for replicaset members
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/db/repl/master_slave.cpp | 68 | ||||
-rw-r--r-- | src/mongo/db/repl/master_slave.h | 4 | ||||
-rw-r--r-- | src/mongo/db/repl/resync.cpp | 112 | ||||
-rw-r--r-- | src/mongo/db/repl/rs.cpp | 1 | ||||
-rw-r--r-- | src/mongo/db/repl/rs.h | 9 | ||||
-rw-r--r-- | src/mongo/db/repl/rs_initialsync.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/repl/rs_sync.cpp | 29 |
8 files changed, 160 insertions, 68 deletions
diff --git a/src/mongo/SConscript b/src/mongo/SConscript index 5a01fd0afe3..1503895bac3 100644 --- a/src/mongo/SConscript +++ b/src/mongo/SConscript @@ -493,6 +493,7 @@ serverOnlyFiles = [ "db/curop.cpp", "db/repl/oplogreader.cpp", "db/repl/replication_server_status.cpp", "db/repl/repl_reads_ok.cpp", + "db/repl/resync.cpp", "db/repl/oplog.cpp", "db/prefetch.cpp", "db/repl/write_concern.cpp", diff --git a/src/mongo/db/repl/master_slave.cpp b/src/mongo/db/repl/master_slave.cpp index 0b90942e14b..bc658574fa4 100644 --- a/src/mongo/db/repl/master_slave.cpp +++ b/src/mongo/db/repl/master_slave.cpp @@ -61,7 +61,7 @@ namespace mongo { /* if 1 sync() is running */ volatile int syncing = 0; - static volatile int relinquishSyncingSome = 0; + volatile int relinquishSyncingSome = 0; /* "dead" means something really bad happened like replication falling completely out of sync. when non-null, we are dead and the string is informational @@ -82,72 +82,6 @@ namespace mongo { }; - /* operator requested resynchronization of replication (on the slave). { resync : 1 } */ - class CmdResync : public Command { - public: - virtual bool slaveOk() const { - return true; - } - virtual bool adminOnly() const { - return true; - } - virtual bool logTheOp() { return false; } - virtual bool lockGlobally() const { return true; } - virtual LockType locktype() const { return WRITE; } - virtual void addRequiredPrivileges(const std::string& dbname, - const BSONObj& cmdObj, - std::vector<Privilege>* out) { - ActionSet actions; - actions.addAction(ActionType::resync); - out->push_back(Privilege(ResourcePattern::forClusterResource(), actions)); - } - void help(stringstream&h) const { h << "resync (from scratch) an out of date replica slave.\nhttp://dochub.mongodb.org/core/masterslave"; } - CmdResync() : Command("resync") { } - virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { - if (replSettings.usingReplSets()) { - errmsg = "resync command not currently supported with replica sets. See RS102 info in the mongodb documentations"; - result.append("info", "http://dochub.mongodb.org/core/resyncingaverystalereplicasetmember"); - return false; - } - - if ( cmdObj.getBoolField( "force" ) ) { - if ( !waitForSyncToFinish( errmsg ) ) - return false; - replAllDead = "resync forced"; - } - if ( !replAllDead ) { - errmsg = "not dead, no need to resync"; - return false; - } - if ( !waitForSyncToFinish( errmsg ) ) - return false; - - ReplSource::forceResyncDead( "client" ); - result.append( "info", "triggered resync for all sources" ); - return true; - } - bool waitForSyncToFinish( string &errmsg ) const { - // Wait for slave thread to finish syncing, so sources will be be - // reloaded with new saved state on next pass. - Timer t; - while ( 1 ) { - if ( syncing == 0 || t.millis() > 30000 ) - break; - { - Lock::TempRelease t; - relinquishSyncingSome = 1; - sleepmillis(1); - } - } - if ( syncing ) { - errmsg = "timeout waiting for sync() to finish"; - return false; - } - return true; - } - } cmdResync; - - ReplSource::ReplSource() { nClonedThisPass = 0; ensureMe(); diff --git a/src/mongo/db/repl/master_slave.h b/src/mongo/db/repl/master_slave.h index eae45fb0db2..79ed9d7044e 100644 --- a/src/mongo/db/repl/master_slave.h +++ b/src/mongo/db/repl/master_slave.h @@ -44,6 +44,10 @@ namespace mongo { // Main entry point for master/slave at startup time. void startMasterSlave(); + // externed for use with resync.cpp + extern volatile int relinquishSyncingSome; + extern volatile int syncing; + // Global variable that contains a string telling why master/slave halted extern const char *replAllDead; diff --git a/src/mongo/db/repl/resync.cpp b/src/mongo/db/repl/resync.cpp new file mode 100644 index 00000000000..8bdcca2594b --- /dev/null +++ b/src/mongo/db/repl/resync.cpp @@ -0,0 +1,112 @@ +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +* +* As a special exception, the copyright holders give permission to link the +* code of portions of this program with the OpenSSL library under certain +* conditions as described in each individual source file and distribute +* linked combinations including the program with the OpenSSL library. You +* must comply with the GNU Affero General Public License in all respects for +* all of the code used other than as permitted herein. If you modify file(s) +* with this exception, you may extend this exception to your version of the +* file(s), but you are not obligated to do so. If you do not wish to do so, +* delete this exception statement from your version. If you delete this +* exception statement from all source files in the program, then also delete +* it in the license file. +*/ + +#include "mongo/db/commands.h" +#include "mongo/db/repl/master_slave.h" // replSettings +#include "mongo/db/repl/replication_server_status.h" // replSettings +#include "mongo/db/repl/rs.h" // replLocalAuth() + +namespace mongo { + + // operator requested resynchronization of replication (on a slave or secondary). {resync: 1} + class CmdResync : public Command { + public: + virtual bool slaveOk() const { + return true; + } + virtual bool adminOnly() const { + return true; + } + virtual bool logTheOp() { return false; } + virtual bool lockGlobally() const { return true; } + virtual LockType locktype() const { return WRITE; } + virtual void addRequiredPrivileges(const std::string& dbname, + const BSONObj& cmdObj, + std::vector<Privilege>* out) { + ActionSet actions; + actions.addAction(ActionType::resync); + out->push_back(Privilege(ResourcePattern::forClusterResource(), actions)); + } + + void help(stringstream& h) const { + h << "resync (from scratch) a stale slave or replica set secondary node.\n"; + } + + CmdResync() : Command("resync") { } + virtual bool run(const string&, + BSONObj& cmdObj, + int, + string& errmsg, + BSONObjBuilder& result, + bool fromRepl) { + if (replSettings.usingReplSets()) { + if (theReplSet->isPrimary()) { + errmsg = "primaries cannot resync"; + return false; + } + return theReplSet->resync(errmsg); + } + + // below this comment pertains only to master/slave replication + if ( cmdObj.getBoolField( "force" ) ) { + if ( !waitForSyncToFinish( errmsg ) ) + return false; + replAllDead = "resync forced"; + } + if ( !replAllDead ) { + errmsg = "not dead, no need to resync"; + return false; + } + if ( !waitForSyncToFinish( errmsg ) ) + return false; + + ReplSource::forceResyncDead( "client" ); + result.append( "info", "triggered resync for all sources" ); + return true; + } + bool waitForSyncToFinish( string &errmsg ) const { + // Wait for slave thread to finish syncing, so sources will be be + // reloaded with new saved state on next pass. + Timer t; + while ( 1 ) { + if ( syncing == 0 || t.millis() > 30000 ) + break; + { + Lock::TempRelease t; + relinquishSyncingSome = 1; + sleepmillis(1); + } + } + if ( syncing ) { + errmsg = "timeout waiting for sync() to finish"; + return false; + } + return true; + } + } cmdResync; +} diff --git a/src/mongo/db/repl/rs.cpp b/src/mongo/db/repl/rs.cpp index c7004a277a1..4dff8744c2b 100644 --- a/src/mongo/db/repl/rs.cpp +++ b/src/mongo/db/repl/rs.cpp @@ -488,6 +488,7 @@ namespace { _writerPool(replWriterThreadCount), _prefetcherPool(replPrefetcherThreadCount), oplogVersion(0), + initialSyncRequested(false), // only used for resync _indexPrefetchConfig(PREFETCH_ALL) { } diff --git a/src/mongo/db/repl/rs.h b/src/mongo/db/repl/rs.h index ce4940e0aeb..1d8fce9fc72 100644 --- a/src/mongo/db/repl/rs.h +++ b/src/mongo/db/repl/rs.h @@ -531,6 +531,11 @@ namespace mongo { const Member* findById(unsigned id) const; Member* getMutableMember(unsigned id); Member* findByName(const std::string& hostname) const; + + /** + * Cause the node to resync from scratch. + */ + bool resync(std::string& errmsg); private: void _getTargets(list<Target>&, int &configVersion); void getTargets(list<Target>&, int &configVersion); @@ -608,6 +613,10 @@ namespace mongo { static void setInitialSyncFlag(); int oplogVersion; + + // bool for indicating resync need on this node and the mutex that protects it + bool initialSyncRequested; + boost::mutex initialSyncMutex; private: IndexPrefetchConfig _indexPrefetchConfig; diff --git a/src/mongo/db/repl/rs_initialsync.cpp b/src/mongo/db/repl/rs_initialsync.cpp index 786209573d1..8e5b092d1e0 100644 --- a/src/mongo/db/repl/rs_initialsync.cpp +++ b/src/mongo/db/repl/rs_initialsync.cpp @@ -479,6 +479,10 @@ namespace mongo { cx.ctx().db()->flushFiles(true); } + { + boost::unique_lock<boost::mutex> lock(theReplSet->initialSyncMutex); + theReplSet->initialSyncRequested = false; + } // If we just cloned & there were no ops applied, we still want the primary to know where // we're up to diff --git a/src/mongo/db/repl/rs_sync.cpp b/src/mongo/db/repl/rs_sync.cpp index 995b4d51d92..89c6f4e8c52 100644 --- a/src/mongo/db/repl/rs_sync.cpp +++ b/src/mongo/db/repl/rs_sync.cpp @@ -415,6 +415,13 @@ namespace replset { // (always checked in the first iteration of this do-while loop, because // ops is empty) if (ops.empty() || now > lastTimeChecked) { + { + boost::unique_lock<boost::mutex> lock(theReplSet->initialSyncMutex); + if (theReplSet->initialSyncRequested) { + // got a resync command + return; + } + } lastTimeChecked = now; // can we become secondary? // we have to check this before calling mgr, as we must be a secondary to @@ -748,10 +755,16 @@ namespace replset { return; } + bool initialSyncRequested = false; + { + boost::unique_lock<boost::mutex> lock(theReplSet->initialSyncMutex); + initialSyncRequested = theReplSet->initialSyncRequested; + } // Check criteria for doing an initial sync: // 1. If the oplog is empty, do an initial sync // 2. If minValid has _initialSyncFlag set, do an initial sync - if (lastOpTimeWritten.isNull() || getInitialSyncFlag()) { + // 3. If initialSyncRequested is true + if (lastOpTimeWritten.isNull() || getInitialSyncFlag() || initialSyncRequested) { syncDoInitialSync(); return; // _syncThread will be recalled, starts from top again in case sync failed. } @@ -761,6 +774,20 @@ namespace replset { tail.oplogApplication(); } + bool ReplSetImpl::resync(string& errmsg) { + changeState(MemberState::RS_RECOVERING); + { + Client::Context ctx("local"); + cc().database()->dropCollection("local.oplog.rs"); + } + _veto.clear(); + { + boost::unique_lock<boost::mutex> lock(theReplSet->initialSyncMutex); + theReplSet->initialSyncRequested = true; + } + return true; + } + void ReplSetImpl::syncThread() { while( 1 ) { // After a reconfig, we may not be in the replica set anymore, so |