summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKristina <kristina@10gen.com>2011-11-14 13:10:52 -0500
committerKristina <kristina@10gen.com>2011-11-14 13:10:52 -0500
commit94b35f403221390f5dac1750a5c4b6302eaadf2b (patch)
tree6a42f61f65ab0a3004f5e2562d19059d357d4e4b
parent1519ef321f72b59dc8b586f049e7238728727e91 (diff)
downloadmongo-94b35f403221390f5dac1750a5c4b6302eaadf2b.tar.gz
Generalize recloning docs on initial oplog application SERVER-4270
Conflicts: db/repl.cpp db/repl.h db/repl/rs_sync.cpp
-rw-r--r--db/oplog.cpp34
-rw-r--r--db/oplog.h6
-rw-r--r--db/repl.cpp6
-rw-r--r--db/repl.h2
-rw-r--r--db/repl/rs_sync.cpp41
5 files changed, 48 insertions, 41 deletions
diff --git a/db/oplog.cpp b/db/oplog.cpp
index e248d5e11ec..151a4cd2269 100644
--- a/db/oplog.cpp
+++ b/db/oplog.cpp
@@ -480,6 +480,40 @@ namespace mongo {
}
}
+ bool shouldRetry(const BSONObj& o, const string& hn) {
+ OplogReader missingObjReader;
+
+ // we don't have the object yet, which is possible on initial sync. get it.
+ log() << "replication info adding missing object" << endl; // rare enough we can log
+ uassert(15916, str::stream() << "Can no longer connect to initial sync source: " << hn, missingObjReader.connect(hn));
+
+ const char *ns = o.getStringField("ns");
+ // might be more than just _id in the update criteria
+ BSONObj query = BSONObjBuilder().append(o.getObjectField("o2")["_id"]).obj();
+ BSONObj missingObj;
+ try {
+ missingObj = missingObjReader.findOne(ns, query);
+ } catch(DBException& e) {
+ log() << "replication assertion fetching missing object: " << e.what() << endl;
+ throw;
+ }
+
+ if( missingObj.isEmpty() ) {
+ log() << "replication missing object not found on source. presumably deleted later in oplog" << endl;
+ log() << "replication object: " << o.getObjectField("o2").toString() << endl;
+ log() << "replication o object: " << o.getObjectField("o").toString() << endl;
+
+ return false;
+ }
+ else {
+ Client::Context ctx(ns);
+ DiskLoc d = theDataFileMgr.insert(ns, (void*) missingObj.objdata(), missingObj.objsize());
+ uassert(15917, "Got bad disk location when attempting to insert", !d.isNull());
+
+ return true;
+ }
+ }
+
/** @param fromRepl false if from ApplyOpsCmd
@return true if was and update should have happened and the document DNE. see replset initial sync code.
*/
diff --git a/db/oplog.h b/db/oplog.h
index cc8b4d5d972..2401e98d5fb 100644
--- a/db/oplog.h
+++ b/db/oplog.h
@@ -212,6 +212,12 @@ namespace mongo {
* take an op and apply locally
* used for applying from an oplog
* @param fromRepl really from replication or for testing/internal/command/etc...
+ * Returns if the op was an update that could not be applied (true on failure)
*/
bool applyOperation_inlock(const BSONObj& op , bool fromRepl = true );
+
+ /**
+ * If applyOperation_inlock should be called again after an update fails.
+ */
+ bool shouldRetry(const BSONObj& op , const string& hn);
}
diff --git a/db/repl.cpp b/db/repl.cpp
index b14034d308a..4d648204a92 100644
--- a/db/repl.cpp
+++ b/db/repl.cpp
@@ -851,7 +851,10 @@ namespace mongo {
void ReplSource::applyOperation(const BSONObj& op) {
try {
- applyOperation_inlock( op );
+ bool failedUpdate = applyOperation_inlock( op );
+ if (failedUpdate && shouldRetry(op, hostName)) {
+ uassert(15914, "Failure retrying initial sync update", applyOperation_inlock(op));
+ }
}
catch ( UserException& e ) {
log() << "sync: caught user assertion " << e << " while applying op: " << op << endl;;
@@ -1351,6 +1354,7 @@ namespace mongo {
setLastSavedLocalTs( nextLastSaved );
}
}
+
if( oplogReader.awaitCapable() && tailing )
okResultCode = 0; // don't sleep
syncedTo = nextOpTime;
diff --git a/db/repl.h b/db/repl.h
index 45036fa30d7..e44607919e8 100644
--- a/db/repl.h
+++ b/db/repl.h
@@ -134,7 +134,7 @@ namespace mongo {
public:
OplogReader oplogReader;
- static void applyOperation(const BSONObj& op);
+ void applyOperation(const BSONObj& op);
bool replacing; // in "replace mode" -- see CmdReplacePeer
bool paired; // --pair in use
string hostName; // ip addr or hostname plus optionally, ":<port>"
diff --git a/db/repl/rs_sync.cpp b/db/repl/rs_sync.cpp
index 7f8522de44b..82c845e7145 100644
--- a/db/repl/rs_sync.cpp
+++ b/db/repl/rs_sync.cpp
@@ -58,7 +58,6 @@ namespace mongo {
const string hn = source->h().toString();
OplogReader r;
- OplogReader missingObjReader;
try {
if( !r.connect(hn) ) {
log() << "replSet initial sync error can't connect to " << hn << " to read " << rsoplog << rsLog;
@@ -137,44 +136,8 @@ namespace mongo {
if( ts >= applyGTE ) { // optimes before we started copying need not be applied.
bool failedUpdate = syncApply(o);
- if( failedUpdate ) {
- // we don't have the object yet, which is possible on initial sync. get it.
- log() << "replSet info adding missing object" << endl; // rare enough we can log
- if( !missingObjReader.connect(hn) ) { // ok to call more than once
- log() << "replSet initial sync fails, couldn't connect to " << hn << endl;
- return false;
- }
- const char *ns = o.getStringField("ns");
- BSONObj query = BSONObjBuilder().append(o.getObjectField("o2")["_id"]).obj(); // might be more than just _id in the update criteria
- BSONObj missingObj;
- try {
- missingObj = missingObjReader.findOne(
- ns,
- query );
- } catch(...) {
- log() << "replSet assertion fetching missing object" << endl;
- throw;
- }
- if( missingObj.isEmpty() ) {
- log() << "replSet missing object not found on source. presumably deleted later in oplog" << endl;
- log() << "replSet op: " << o.toString() << endl;
- }
- else {
- Client::Context ctx(ns);
- try {
- DiskLoc d = theDataFileMgr.insert(ns, (void*) missingObj.objdata(), missingObj.objsize());
- assert( !d.isNull() );
- } catch(...) {
- log() << "replSet assertion during insert of missing object" << endl;
- throw;
- }
- // now reapply the update from above
- bool failed = syncApply(o);
- if( failed ) {
- log() << "replSet update still fails after adding missing object " << ns << endl;
- assert(false);
- }
- }
+ if( failedUpdate && shouldRetry(o, hn)) {
+ uassert(15915, "replSet update still fails after adding missing object", syncApply(o));
}
}
_logOpObjRS(o); /* with repl sets we write the ops to our oplog too */