summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordwight <dwight@10gen.com>2011-09-18 17:23:28 -0400
committerEliot Horowitz <eliot@10gen.com>2011-09-28 08:41:03 -0400
commitf5f0053a1f2e7f1ed042b028f495704f985f56ad (patch)
treed8f9b567da565c0e7a54680b6fdf8804cf16db5d
parent44da5dff2ad30390848bca21be7ca2950c68255b (diff)
downloadmongo-f5f0053a1f2e7f1ed042b028f495704f985f56ad.tar.gz
fix a possible issue with initial sync with replication when updates are in flight.
also correct UpdateResult in one case
-rw-r--r--db/oplog.cpp46
-rw-r--r--db/oplog.h2
-rw-r--r--db/ops/update.cpp3
-rw-r--r--db/repl/rs.h2
-rw-r--r--db/repl/rs_sync.cpp38
5 files changed, 78 insertions, 13 deletions
diff --git a/db/oplog.cpp b/db/oplog.cpp
index dc9db76d9d5..5cb16ec2ee5 100644
--- a/db/oplog.cpp
+++ b/db/oplog.cpp
@@ -625,9 +625,13 @@ namespace mongo {
}
}
- void applyOperation_inlock(const BSONObj& op , bool fromRepl ) {
+ /** @param fromRepl false if from ApplyOpsCmd
+ @return true if was and update should have happened and the document DNE. see replset initial sync code.
+ */
+ bool applyOperation_inlock(const BSONObj& op , bool fromRepl ) {
assertInWriteLock();
LOG(6) << "applying op: " << op << endl;
+ bool failedUpdate = false;
OpCounters * opCounters = fromRepl ? &replOpCounters : &globalOpCounters;
@@ -680,9 +684,45 @@ namespace mongo {
}
else if ( *opType == 'u' ) {
opCounters->gotUpdate();
+ // dm do we create this for a capped collection?
+ // - if not, updates would be slow
+ // - but if were by id would be slow on primary too so maybe ok
+ // - if on primary was by another key and there are other indexes, this could be very bad w/out an index
+ // - if do create, odd to have on secondary but not primary. also can cause secondary to block for
+ // quite a while on creation.
RARELY ensureHaveIdIndex(ns); // otherwise updates will be super slow
OpDebug debug;
- updateObjects(ns, o, op.getObjectField("o2"), /*upsert*/ fields[3].booleanSafe(), /*multi*/ false, /*logop*/ false , debug );
+ BSONObj updateCriteria = op.getObjectField("o2");
+ bool upsert = fields[3].booleanSafe();
+ UpdateResult ur = updateObjects(ns, o, updateCriteria, upsert, /*multi*/ false, /*logop*/ false , debug );
+ if( ur.num == 0 ) {
+ if( ur.mod ) {
+ if( updateCriteria.nFields() == 1 ) {
+ // was a simple { _id : ... } update criteria
+ failedUpdate = true;
+ // todo: probably should assert in these failedUpdate cases if not in initialSync
+ }
+ // need to check to see if it isn't present so we can set failedUpdate correctly.
+ // note that adds some overhead for this extra check in some cases, such as an updateCriteria
+ // of the form
+ // { _id:..., { x : {$size:...} }
+ // thus this is not ideal.
+ else if( Helpers::findById(nsdetails(ns), updateCriteria).isNull() ) {
+ failedUpdate = true;
+ }
+ else {
+ // it's present; zero objects were updated because of additional specifiers in the query for idempotence
+ }
+ }
+ else {
+ // this could happen benignly on an oplog duplicate replay of an upsert
+ // (because we are idempotent),
+ // if an regular non-mod update fails the item is (presumably) missing.
+ if( !upsert ) {
+ failedUpdate = true;
+ }
+ }
+ }
}
else if ( *opType == 'd' ) {
opCounters->gotDelete();
@@ -703,7 +743,7 @@ namespace mongo {
else {
throw MsgAssertionException( 14825 , ErrorMsg("error in applyOperation : unknown opType ", *opType) );
}
-
+ return failedUpdate;
}
class ApplyOpsCmd : public Command {
diff --git a/db/oplog.h b/db/oplog.h
index 2f2b2860478..1d651eaf6cc 100644
--- a/db/oplog.h
+++ b/db/oplog.h
@@ -130,5 +130,5 @@ namespace mongo {
* used for applying from an oplog
* @param fromRepl really from replication or for testing/internal/command/etc...
*/
- void applyOperation_inlock(const BSONObj& op , bool fromRepl = true );
+ bool applyOperation_inlock(const BSONObj& op , bool fromRepl = true );
}
diff --git a/db/ops/update.cpp b/db/ops/update.cpp
index fd9798a8f26..6a7aad49a83 100644
--- a/db/ops/update.cpp
+++ b/db/ops/update.cpp
@@ -1354,7 +1354,8 @@ namespace mongo {
logOp( "i", ns, no );
return UpdateResult( 0 , 0 , 1 , no );
}
- return UpdateResult( 0 , 0 , 0 );
+
+ return UpdateResult( 0 , isOperatorUpdate , 0 );
}
UpdateResult updateObjects(const char *ns, const BSONObj& updateobj, BSONObj patternOrig, bool upsert, bool multi, bool logop , OpDebug& debug ) {
diff --git a/db/repl/rs.h b/db/repl/rs.h
index 61041a67f8f..ba6a0c90829 100644
--- a/db/repl/rs.h
+++ b/db/repl/rs.h
@@ -491,7 +491,7 @@ namespace mongo {
void _syncThread();
bool tryToGoLiveAsASecondary(OpTime&); // readlocks
void syncTail();
- void syncApply(const BSONObj &o);
+ bool syncApply(const BSONObj &o);
unsigned _syncRollback(OplogReader& r);
void syncRollback(OplogReader& r);
void syncFixUp(HowToFixUp& h, OplogReader& r);
diff --git a/db/repl/rs_sync.cpp b/db/repl/rs_sync.cpp
index b29328b5935..7ec5d378b64 100644
--- a/db/repl/rs_sync.cpp
+++ b/db/repl/rs_sync.cpp
@@ -32,17 +32,19 @@ namespace mongo {
}
}
- /* apply the log op that is in param o */
- void ReplSetImpl::syncApply(const BSONObj &o) {
+ /* apply the log op that is in param o
+ @return bool failedUpdate
+ */
+ bool ReplSetImpl::syncApply(const BSONObj &o) {
const char *ns = o.getStringField("ns");
if ( *ns == '.' || *ns == 0 ) {
blank(o);
- return;
+ return false;
}
Client::Context ctx(ns);
ctx.getClient()->curop()->reset();
- applyOperation_inlock(o);
+ return applyOperation_inlock(o);
}
/* initial oplog application, during initial sync, after cloning.
@@ -57,6 +59,7 @@ namespace mongo {
const string hn = source->h().toString();
OplogReader r;
+ OplogReader missingObjReader;
try {
if( !r.connect(hn) ) {
log() << "replSet initial sync error can't connect to " << hn << " to read " << rsoplog << rsLog;
@@ -133,9 +136,30 @@ namespace mongo {
throw DBException("primary changed",0);
}
- if( ts >= applyGTE ) {
- // optimes before we started copying need not be applied.
- syncApply(o);
+ if( ts >= applyGTE ) { // optimes before we started copying need not be applied.
+ bool failedUpdate = syncApply(o);
+ if( failedUpdate ) {
+ // we don't have the object yet, which is possible on initial sync. get it.
+ log() << "replSet info adding missing object" << endl; // rare enough we can log
+ if( !missingObjReader.connect(hn) ) { // ok to call more than once
+ log() << "replSet initial sync fails, couldn't connect to " << hn << endl;
+ return false;
+ }
+ const char *ns = o.getStringField("ns");
+ BSONObj query = BSONObjBuilder().append(o.getObjectField("o2")[_id]).obj(); // might be more than just _id in the update criteria
+ BSONObj missingObj = missingObjReader.findOne(
+ ns,
+ query );
+ assert( !missingObj.isEmpty() );
+ DiskLoc d = theDataFileMgr.insert(ns, (void*) missingObj.objdata(), missingObj.objsize());
+ assert( !d.isNull() );
+ // now reapply the update from above
+ bool failed = syncApply(o);
+ if( failed ) {
+ log() << "replSet update still fails after adding missing object " << ns << endl;
+ assert(false);
+ }
+ }
}
_logOpObjRS(o); /* with repl sets we write the ops to our oplog too */
}