diff options
author | Kristina <kristina@10gen.com> | 2012-10-04 10:06:12 -0400 |
---|---|---|
committer | Eric Milkie <milkie@10gen.com> | 2012-10-04 10:52:24 -0400 |
commit | e0c65e3f6b8c57c3eb873c607a029dcb05b99e5f (patch) | |
tree | 026be60bbda6aa0420d63ecb5feb4a48fd31422e | |
parent | d390508cbf59251b96aacd2c6e5e726c6a0c759b (diff) | |
download | mongo-e0c65e3f6b8c57c3eb873c607a029dcb05b99e5f.tar.gz |
SERVER-7199 Bump minvalid correctly when batch goes past minvalid
-rw-r--r-- | src/mongo/db/repl/rs_initialsync.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/repl/rs_sync.cpp | 25 | ||||
-rw-r--r-- | src/mongo/db/repl/rs_sync.h | 33 |
3 files changed, 44 insertions, 16 deletions
diff --git a/src/mongo/db/repl/rs_initialsync.cpp b/src/mongo/db/repl/rs_initialsync.cpp index 3a56066e091..bc98c903167 100644 --- a/src/mongo/db/repl/rs_initialsync.cpp +++ b/src/mongo/db/repl/rs_initialsync.cpp @@ -295,7 +295,7 @@ namespace mongo { // apply startingTS..mvoptime portion of the oplog { try { - syncer.oplogApplication(lastOp, minValid); + minValid = syncer.oplogApplication(lastOp, minValid); } catch (const DBException&) { log() << "replSet initial sync failed during oplog application phase" << rsLog; diff --git a/src/mongo/db/repl/rs_sync.cpp b/src/mongo/db/repl/rs_sync.cpp index 7c0a5557122..bd7d24c6742 100644 --- a/src/mongo/db/repl/rs_sync.cpp +++ b/src/mongo/db/repl/rs_sync.cpp @@ -240,18 +240,17 @@ namespace replset { InitialSync::~InitialSync() {} - void SyncTail::oplogApplySegment(const BSONObj& applyGTEObj, const BSONObj& minValidObj, + BSONObj SyncTail::oplogApplySegment(const BSONObj& applyGTEObj, const BSONObj& minValidObj, MultiSyncApplyFunc func) { OpTime applyGTE = applyGTEObj["ts"]._opTime(); OpTime minValid = minValidObj["ts"]._opTime(); - // if there were no writes during the initial sync, there will be nothing in the queue so - // just go live - if (minValid == applyGTE) { - return; - } + // We have to keep track of the last op applied to the data, because there's no other easy + // way of getting this data synchronously. Batches may go past minValidObj, so we need to + // know to bump minValid past minValidObj. + BSONObj lastOp = applyGTEObj; + OpTime ts = applyGTE; - OpTime ts; time_t start = time(0); unsigned long long n = 0, lastN = 0; @@ -281,17 +280,19 @@ namespace replset { } // we want to keep a record of the last op applied, to compare with minvalid - const BSONObj& lastOp = ops.getDeque().back(); + lastOp = ops.getDeque().back(); OpTime tempTs = lastOp["ts"]._opTime(); applyOpsToOplog(&ops.getDeque()); ts = tempTs; } + + return lastOp; } /* initial oplog application, during initial sync, after cloning. */ - void InitialSync::oplogApplication(const BSONObj& applyGTEObj, const BSONObj& minValidObj) { + BSONObj InitialSync::oplogApplication(const BSONObj& applyGTEObj, const BSONObj& minValidObj) { if (replSetForceInitialSyncFailure > 0) { log() << "replSet test code invoked, forced InitialSync failure: " << replSetForceInitialSyncFailure << rsLog; replSetForceInitialSyncFailure--; @@ -302,11 +303,11 @@ namespace replset { syncApply(applyGTEObj); _logOpObjRS(applyGTEObj); - oplogApplySegment(applyGTEObj, minValidObj, multiInitialSyncApply); + return oplogApplySegment(applyGTEObj, minValidObj, multiInitialSyncApply); } - void SyncTail::oplogApplication(const BSONObj& applyGTEObj, const BSONObj& minValidObj) { - oplogApplySegment(applyGTEObj, minValidObj, multiSyncApply); + BSONObj SyncTail::oplogApplication(const BSONObj& applyGTEObj, const BSONObj& minValidObj) { + return oplogApplySegment(applyGTEObj, minValidObj, multiSyncApply); } /* tail an oplog. ok to return, will be re-called. */ diff --git a/src/mongo/db/repl/rs_sync.h b/src/mongo/db/repl/rs_sync.h index aa3a5c9a2cb..1e52380b458 100644 --- a/src/mongo/db/repl/rs_sync.h +++ b/src/mongo/db/repl/rs_sync.h @@ -39,9 +39,31 @@ namespace replset { SyncTail(BackgroundSyncInterface *q); virtual ~SyncTail(); virtual bool syncApply(const BSONObj &o, bool convertUpdateToUpsert = false); - void oplogApplySegment(const BSONObj& applyGTEObj, const BSONObj& minValidObj, + + /** + * Apply ops from applyGTEObj's ts to at least minValidObj's ts. Note that, due to + * batching, this may end up applying ops beyond minValidObj's ts. + * + * @param applyGTEObj the op to start replicating at. This is actually not used except in + * comparision to minValidObj: the background sync thread keeps its own + * record of where we're synced to and starts providing ops from that + * point. + * @param minValidObj the op to finish syncing at. This function cannot return (other than + * fatally erroring out) without applying at least this op. + * @param func whether this should use initial sync logic (recloning docs) or + * "normal" logic. + * @return BSONObj the op that was synced to. This may be greater than minValidObj, as a + * single batch might blow right by minvalid. If applyGTEObj is the same + * op as minValidObj, this will be applyGTEObj. + */ + BSONObj oplogApplySegment(const BSONObj& applyGTEObj, const BSONObj& minValidObj, MultiSyncApplyFunc func); - virtual void oplogApplication(const BSONObj& applyGTEObj, const BSONObj& minValidObj); + + /** + * Runs oplogApplySegment without allowing recloning documents. + */ + virtual BSONObj oplogApplication(const BSONObj& applyGTEObj, const BSONObj& minValidObj); + void oplogApplication(); bool peek(BSONObj* obj); @@ -107,7 +129,12 @@ namespace replset { public: virtual ~InitialSync(); InitialSync(BackgroundSyncInterface *q); - void oplogApplication(const BSONObj& applyGTEObj, const BSONObj& minValidObj); + + /** + * Creates the initial oplog entry: applies applyGTEObj and writes it to the oplog. Then + * this runs oplogApplySegment allowing recloning documents. + */ + BSONObj oplogApplication(const BSONObj& applyGTEObj, const BSONObj& minValidObj); }; // TODO: move hbmsg into an error-keeping class (SERVER-4444) |