summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authormatt dannenberg <matt.dannenberg@10gen.com>2013-10-21 13:27:03 -0400
committermatt dannenberg <matt.dannenberg@10gen.com>2013-10-28 14:16:06 -0400
commit38b665b22722ab442a9c022b54558e3cd7a9b84f (patch)
tree7592e04b21d6c42b01e2e65db240fc49c70c3dc9 /src
parentba1c7d57ea1a5e82e5af93e41344cf068cdc675b (diff)
downloadmongo-38b665b22722ab442a9c022b54558e3cd7a9b84f.tar.gz
SERVER-6552 implement resync command for replicaset members
Diffstat (limited to 'src')
-rw-r--r--src/mongo/SConscript1
-rw-r--r--src/mongo/db/repl/master_slave.cpp68
-rw-r--r--src/mongo/db/repl/master_slave.h4
-rw-r--r--src/mongo/db/repl/resync.cpp112
-rw-r--r--src/mongo/db/repl/rs.cpp1
-rw-r--r--src/mongo/db/repl/rs.h9
-rw-r--r--src/mongo/db/repl/rs_initialsync.cpp4
-rw-r--r--src/mongo/db/repl/rs_sync.cpp29
8 files changed, 160 insertions, 68 deletions
diff --git a/src/mongo/SConscript b/src/mongo/SConscript
index 5a01fd0afe3..1503895bac3 100644
--- a/src/mongo/SConscript
+++ b/src/mongo/SConscript
@@ -493,6 +493,7 @@ serverOnlyFiles = [ "db/curop.cpp",
"db/repl/oplogreader.cpp",
"db/repl/replication_server_status.cpp",
"db/repl/repl_reads_ok.cpp",
+ "db/repl/resync.cpp",
"db/repl/oplog.cpp",
"db/prefetch.cpp",
"db/repl/write_concern.cpp",
diff --git a/src/mongo/db/repl/master_slave.cpp b/src/mongo/db/repl/master_slave.cpp
index 0b90942e14b..bc658574fa4 100644
--- a/src/mongo/db/repl/master_slave.cpp
+++ b/src/mongo/db/repl/master_slave.cpp
@@ -61,7 +61,7 @@ namespace mongo {
/* if 1 sync() is running */
volatile int syncing = 0;
- static volatile int relinquishSyncingSome = 0;
+ volatile int relinquishSyncingSome = 0;
/* "dead" means something really bad happened like replication falling completely out of sync.
when non-null, we are dead and the string is informational
@@ -82,72 +82,6 @@ namespace mongo {
};
- /* operator requested resynchronization of replication (on the slave). { resync : 1 } */
- class CmdResync : public Command {
- public:
- virtual bool slaveOk() const {
- return true;
- }
- virtual bool adminOnly() const {
- return true;
- }
- virtual bool logTheOp() { return false; }
- virtual bool lockGlobally() const { return true; }
- virtual LockType locktype() const { return WRITE; }
- virtual void addRequiredPrivileges(const std::string& dbname,
- const BSONObj& cmdObj,
- std::vector<Privilege>* out) {
- ActionSet actions;
- actions.addAction(ActionType::resync);
- out->push_back(Privilege(ResourcePattern::forClusterResource(), actions));
- }
- void help(stringstream&h) const { h << "resync (from scratch) an out of date replica slave.\nhttp://dochub.mongodb.org/core/masterslave"; }
- CmdResync() : Command("resync") { }
- virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
- if (replSettings.usingReplSets()) {
- errmsg = "resync command not currently supported with replica sets. See RS102 info in the mongodb documentations";
- result.append("info", "http://dochub.mongodb.org/core/resyncingaverystalereplicasetmember");
- return false;
- }
-
- if ( cmdObj.getBoolField( "force" ) ) {
- if ( !waitForSyncToFinish( errmsg ) )
- return false;
- replAllDead = "resync forced";
- }
- if ( !replAllDead ) {
- errmsg = "not dead, no need to resync";
- return false;
- }
- if ( !waitForSyncToFinish( errmsg ) )
- return false;
-
- ReplSource::forceResyncDead( "client" );
- result.append( "info", "triggered resync for all sources" );
- return true;
- }
- bool waitForSyncToFinish( string &errmsg ) const {
- // Wait for slave thread to finish syncing, so sources will be be
- // reloaded with new saved state on next pass.
- Timer t;
- while ( 1 ) {
- if ( syncing == 0 || t.millis() > 30000 )
- break;
- {
- Lock::TempRelease t;
- relinquishSyncingSome = 1;
- sleepmillis(1);
- }
- }
- if ( syncing ) {
- errmsg = "timeout waiting for sync() to finish";
- return false;
- }
- return true;
- }
- } cmdResync;
-
-
ReplSource::ReplSource() {
nClonedThisPass = 0;
ensureMe();
diff --git a/src/mongo/db/repl/master_slave.h b/src/mongo/db/repl/master_slave.h
index eae45fb0db2..79ed9d7044e 100644
--- a/src/mongo/db/repl/master_slave.h
+++ b/src/mongo/db/repl/master_slave.h
@@ -44,6 +44,10 @@ namespace mongo {
// Main entry point for master/slave at startup time.
void startMasterSlave();
+ // externed for use with resync.cpp
+ extern volatile int relinquishSyncingSome;
+ extern volatile int syncing;
+
// Global variable that contains a string telling why master/slave halted
extern const char *replAllDead;
diff --git a/src/mongo/db/repl/resync.cpp b/src/mongo/db/repl/resync.cpp
new file mode 100644
index 00000000000..8bdcca2594b
--- /dev/null
+++ b/src/mongo/db/repl/resync.cpp
@@ -0,0 +1,112 @@
+/**
+* Copyright (C) 2008 10gen Inc.
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU Affero General Public License, version 3,
+* as published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU Affero General Public License for more details.
+*
+* You should have received a copy of the GNU Affero General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*
+* As a special exception, the copyright holders give permission to link the
+* code of portions of this program with the OpenSSL library under certain
+* conditions as described in each individual source file and distribute
+* linked combinations including the program with the OpenSSL library. You
+* must comply with the GNU Affero General Public License in all respects for
+* all of the code used other than as permitted herein. If you modify file(s)
+* with this exception, you may extend this exception to your version of the
+* file(s), but you are not obligated to do so. If you do not wish to do so,
+* delete this exception statement from your version. If you delete this
+* exception statement from all source files in the program, then also delete
+* it in the license file.
+*/
+
+#include "mongo/db/commands.h"
+#include "mongo/db/repl/master_slave.h" // replSettings
+#include "mongo/db/repl/replication_server_status.h" // replSettings
+#include "mongo/db/repl/rs.h" // replLocalAuth()
+
+namespace mongo {
+
+ // operator requested resynchronization of replication (on a slave or secondary). {resync: 1}
+ class CmdResync : public Command {
+ public:
+ virtual bool slaveOk() const {
+ return true;
+ }
+ virtual bool adminOnly() const {
+ return true;
+ }
+ virtual bool logTheOp() { return false; }
+ virtual bool lockGlobally() const { return true; }
+ virtual LockType locktype() const { return WRITE; }
+ virtual void addRequiredPrivileges(const std::string& dbname,
+ const BSONObj& cmdObj,
+ std::vector<Privilege>* out) {
+ ActionSet actions;
+ actions.addAction(ActionType::resync);
+ out->push_back(Privilege(ResourcePattern::forClusterResource(), actions));
+ }
+
+ void help(stringstream& h) const {
+ h << "resync (from scratch) a stale slave or replica set secondary node.\n";
+ }
+
+ CmdResync() : Command("resync") { }
+ virtual bool run(const string&,
+ BSONObj& cmdObj,
+ int,
+ string& errmsg,
+ BSONObjBuilder& result,
+ bool fromRepl) {
+ if (replSettings.usingReplSets()) {
+ if (theReplSet->isPrimary()) {
+ errmsg = "primaries cannot resync";
+ return false;
+ }
+ return theReplSet->resync(errmsg);
+ }
+
+ // below this comment pertains only to master/slave replication
+ if ( cmdObj.getBoolField( "force" ) ) {
+ if ( !waitForSyncToFinish( errmsg ) )
+ return false;
+ replAllDead = "resync forced";
+ }
+ if ( !replAllDead ) {
+ errmsg = "not dead, no need to resync";
+ return false;
+ }
+ if ( !waitForSyncToFinish( errmsg ) )
+ return false;
+
+ ReplSource::forceResyncDead( "client" );
+ result.append( "info", "triggered resync for all sources" );
+ return true;
+ }
+ bool waitForSyncToFinish( string &errmsg ) const {
+ // Wait for slave thread to finish syncing, so sources will be be
+ // reloaded with new saved state on next pass.
+ Timer t;
+ while ( 1 ) {
+ if ( syncing == 0 || t.millis() > 30000 )
+ break;
+ {
+ Lock::TempRelease t;
+ relinquishSyncingSome = 1;
+ sleepmillis(1);
+ }
+ }
+ if ( syncing ) {
+ errmsg = "timeout waiting for sync() to finish";
+ return false;
+ }
+ return true;
+ }
+ } cmdResync;
+}
diff --git a/src/mongo/db/repl/rs.cpp b/src/mongo/db/repl/rs.cpp
index c7004a277a1..4dff8744c2b 100644
--- a/src/mongo/db/repl/rs.cpp
+++ b/src/mongo/db/repl/rs.cpp
@@ -488,6 +488,7 @@ namespace {
_writerPool(replWriterThreadCount),
_prefetcherPool(replPrefetcherThreadCount),
oplogVersion(0),
+ initialSyncRequested(false), // only used for resync
_indexPrefetchConfig(PREFETCH_ALL) {
}
diff --git a/src/mongo/db/repl/rs.h b/src/mongo/db/repl/rs.h
index ce4940e0aeb..1d8fce9fc72 100644
--- a/src/mongo/db/repl/rs.h
+++ b/src/mongo/db/repl/rs.h
@@ -531,6 +531,11 @@ namespace mongo {
const Member* findById(unsigned id) const;
Member* getMutableMember(unsigned id);
Member* findByName(const std::string& hostname) const;
+
+ /**
+ * Cause the node to resync from scratch.
+ */
+ bool resync(std::string& errmsg);
private:
void _getTargets(list<Target>&, int &configVersion);
void getTargets(list<Target>&, int &configVersion);
@@ -608,6 +613,10 @@ namespace mongo {
static void setInitialSyncFlag();
int oplogVersion;
+
+ // bool for indicating resync need on this node and the mutex that protects it
+ bool initialSyncRequested;
+ boost::mutex initialSyncMutex;
private:
IndexPrefetchConfig _indexPrefetchConfig;
diff --git a/src/mongo/db/repl/rs_initialsync.cpp b/src/mongo/db/repl/rs_initialsync.cpp
index 786209573d1..8e5b092d1e0 100644
--- a/src/mongo/db/repl/rs_initialsync.cpp
+++ b/src/mongo/db/repl/rs_initialsync.cpp
@@ -479,6 +479,10 @@ namespace mongo {
cx.ctx().db()->flushFiles(true);
}
+ {
+ boost::unique_lock<boost::mutex> lock(theReplSet->initialSyncMutex);
+ theReplSet->initialSyncRequested = false;
+ }
// If we just cloned & there were no ops applied, we still want the primary to know where
// we're up to
diff --git a/src/mongo/db/repl/rs_sync.cpp b/src/mongo/db/repl/rs_sync.cpp
index 995b4d51d92..89c6f4e8c52 100644
--- a/src/mongo/db/repl/rs_sync.cpp
+++ b/src/mongo/db/repl/rs_sync.cpp
@@ -415,6 +415,13 @@ namespace replset {
// (always checked in the first iteration of this do-while loop, because
// ops is empty)
if (ops.empty() || now > lastTimeChecked) {
+ {
+ boost::unique_lock<boost::mutex> lock(theReplSet->initialSyncMutex);
+ if (theReplSet->initialSyncRequested) {
+ // got a resync command
+ return;
+ }
+ }
lastTimeChecked = now;
// can we become secondary?
// we have to check this before calling mgr, as we must be a secondary to
@@ -748,10 +755,16 @@ namespace replset {
return;
}
+ bool initialSyncRequested = false;
+ {
+ boost::unique_lock<boost::mutex> lock(theReplSet->initialSyncMutex);
+ initialSyncRequested = theReplSet->initialSyncRequested;
+ }
// Check criteria for doing an initial sync:
// 1. If the oplog is empty, do an initial sync
// 2. If minValid has _initialSyncFlag set, do an initial sync
- if (lastOpTimeWritten.isNull() || getInitialSyncFlag()) {
+ // 3. If initialSyncRequested is true
+ if (lastOpTimeWritten.isNull() || getInitialSyncFlag() || initialSyncRequested) {
syncDoInitialSync();
return; // _syncThread will be recalled, starts from top again in case sync failed.
}
@@ -761,6 +774,20 @@ namespace replset {
tail.oplogApplication();
}
+ bool ReplSetImpl::resync(string& errmsg) {
+ changeState(MemberState::RS_RECOVERING);
+ {
+ Client::Context ctx("local");
+ cc().database()->dropCollection("local.oplog.rs");
+ }
+ _veto.clear();
+ {
+ boost::unique_lock<boost::mutex> lock(theReplSet->initialSyncMutex);
+ theReplSet->initialSyncRequested = true;
+ }
+ return true;
+ }
+
void ReplSetImpl::syncThread() {
while( 1 ) {
// After a reconfig, we may not be in the replica set anymore, so