diff options
author | Kristina <kristina@10gen.com> | 2011-11-14 13:10:52 -0500 |
---|---|---|
committer | Kristina <kristina@10gen.com> | 2011-11-14 13:10:52 -0500 |
commit | 94b35f403221390f5dac1750a5c4b6302eaadf2b (patch) | |
tree | 6a42f61f65ab0a3004f5e2562d19059d357d4e4b | |
parent | 1519ef321f72b59dc8b586f049e7238728727e91 (diff) | |
download | mongo-94b35f403221390f5dac1750a5c4b6302eaadf2b.tar.gz |
Generalize recloning docs on initial oplog application SERVER-4270
Conflicts:
db/repl.cpp
db/repl.h
db/repl/rs_sync.cpp
-rw-r--r-- | db/oplog.cpp | 34 | ||||
-rw-r--r-- | db/oplog.h | 6 | ||||
-rw-r--r-- | db/repl.cpp | 6 | ||||
-rw-r--r-- | db/repl.h | 2 | ||||
-rw-r--r-- | db/repl/rs_sync.cpp | 41 |
5 files changed, 48 insertions, 41 deletions
diff --git a/db/oplog.cpp b/db/oplog.cpp index e248d5e11ec..151a4cd2269 100644 --- a/db/oplog.cpp +++ b/db/oplog.cpp @@ -480,6 +480,40 @@ namespace mongo { } } + bool shouldRetry(const BSONObj& o, const string& hn) { + OplogReader missingObjReader; + + // we don't have the object yet, which is possible on initial sync. get it. + log() << "replication info adding missing object" << endl; // rare enough we can log + uassert(15916, str::stream() << "Can no longer connect to initial sync source: " << hn, missingObjReader.connect(hn)); + + const char *ns = o.getStringField("ns"); + // might be more than just _id in the update criteria + BSONObj query = BSONObjBuilder().append(o.getObjectField("o2")["_id"]).obj(); + BSONObj missingObj; + try { + missingObj = missingObjReader.findOne(ns, query); + } catch(DBException& e) { + log() << "replication assertion fetching missing object: " << e.what() << endl; + throw; + } + + if( missingObj.isEmpty() ) { + log() << "replication missing object not found on source. presumably deleted later in oplog" << endl; + log() << "replication object: " << o.getObjectField("o2").toString() << endl; + log() << "replication o object: " << o.getObjectField("o").toString() << endl; + + return false; + } + else { + Client::Context ctx(ns); + DiskLoc d = theDataFileMgr.insert(ns, (void*) missingObj.objdata(), missingObj.objsize()); + uassert(15917, "Got bad disk location when attempting to insert", !d.isNull()); + + return true; + } + } + /** @param fromRepl false if from ApplyOpsCmd @return true if was and update should have happened and the document DNE. see replset initial sync code. */ diff --git a/db/oplog.h b/db/oplog.h index cc8b4d5d972..2401e98d5fb 100644 --- a/db/oplog.h +++ b/db/oplog.h @@ -212,6 +212,12 @@ namespace mongo { * take an op and apply locally * used for applying from an oplog * @param fromRepl really from replication or for testing/internal/command/etc... + * Returns if the op was an update that could not be applied (true on failure) */ bool applyOperation_inlock(const BSONObj& op , bool fromRepl = true ); + + /** + * If applyOperation_inlock should be called again after an update fails. + */ + bool shouldRetry(const BSONObj& op , const string& hn); } diff --git a/db/repl.cpp b/db/repl.cpp index b14034d308a..4d648204a92 100644 --- a/db/repl.cpp +++ b/db/repl.cpp @@ -851,7 +851,10 @@ namespace mongo { void ReplSource::applyOperation(const BSONObj& op) { try { - applyOperation_inlock( op ); + bool failedUpdate = applyOperation_inlock( op ); + if (failedUpdate && shouldRetry(op, hostName)) { + uassert(15914, "Failure retrying initial sync update", applyOperation_inlock(op)); + } } catch ( UserException& e ) { log() << "sync: caught user assertion " << e << " while applying op: " << op << endl;; @@ -1351,6 +1354,7 @@ namespace mongo { setLastSavedLocalTs( nextLastSaved ); } } + if( oplogReader.awaitCapable() && tailing ) okResultCode = 0; // don't sleep syncedTo = nextOpTime; diff --git a/db/repl.h b/db/repl.h index 45036fa30d7..e44607919e8 100644 --- a/db/repl.h +++ b/db/repl.h @@ -134,7 +134,7 @@ namespace mongo { public: OplogReader oplogReader; - static void applyOperation(const BSONObj& op); + void applyOperation(const BSONObj& op); bool replacing; // in "replace mode" -- see CmdReplacePeer bool paired; // --pair in use string hostName; // ip addr or hostname plus optionally, ":<port>" diff --git a/db/repl/rs_sync.cpp b/db/repl/rs_sync.cpp index 7f8522de44b..82c845e7145 100644 --- a/db/repl/rs_sync.cpp +++ b/db/repl/rs_sync.cpp @@ -58,7 +58,6 @@ namespace mongo { const string hn = source->h().toString(); OplogReader r; - OplogReader missingObjReader; try { if( !r.connect(hn) ) { log() << "replSet initial sync error can't connect to " << hn << " to read " << rsoplog << rsLog; @@ -137,44 +136,8 @@ namespace mongo { if( ts >= applyGTE ) { // optimes before we started copying need not be applied. bool failedUpdate = syncApply(o); - if( failedUpdate ) { - // we don't have the object yet, which is possible on initial sync. get it. - log() << "replSet info adding missing object" << endl; // rare enough we can log - if( !missingObjReader.connect(hn) ) { // ok to call more than once - log() << "replSet initial sync fails, couldn't connect to " << hn << endl; - return false; - } - const char *ns = o.getStringField("ns"); - BSONObj query = BSONObjBuilder().append(o.getObjectField("o2")["_id"]).obj(); // might be more than just _id in the update criteria - BSONObj missingObj; - try { - missingObj = missingObjReader.findOne( - ns, - query ); - } catch(...) { - log() << "replSet assertion fetching missing object" << endl; - throw; - } - if( missingObj.isEmpty() ) { - log() << "replSet missing object not found on source. presumably deleted later in oplog" << endl; - log() << "replSet op: " << o.toString() << endl; - } - else { - Client::Context ctx(ns); - try { - DiskLoc d = theDataFileMgr.insert(ns, (void*) missingObj.objdata(), missingObj.objsize()); - assert( !d.isNull() ); - } catch(...) { - log() << "replSet assertion during insert of missing object" << endl; - throw; - } - // now reapply the update from above - bool failed = syncApply(o); - if( failed ) { - log() << "replSet update still fails after adding missing object " << ns << endl; - assert(false); - } - } + if( failedUpdate && shouldRetry(o, hn)) { + uassert(15915, "replSet update still fails after adding missing object", syncApply(o)); } } _logOpObjRS(o); /* with repl sets we write the ops to our oplog too */ |