diff options
author | Siyuan Zhou <siyuan.zhou@mongodb.com> | 2016-02-05 17:07:32 -0500 |
---|---|---|
committer | Siyuan Zhou <siyuan.zhou@mongodb.com> | 2016-02-08 17:41:02 -0500 |
commit | 6935119d85848bde5b189c2c0ccf49b1ce10f0ed (patch) | |
tree | 0387ceef50c1ea4c8d52d2f3d0553277521e9258 | |
parent | 3cc599e2c4e253981034b5a63d457c939cf27e09 (diff) | |
download | mongo-6935119d85848bde5b189c2c0ccf49b1ce10f0ed.tar.gz |
SERVER-22262 Do not truncate the last applied oplog entry during batch recovery
-rw-r--r-- | jstests/replsets/oplog_truncated_on_recovery.js | 33 | ||||
-rw-r--r-- | src/mongo/db/repl/bgsync.cpp | 3 | ||||
-rw-r--r-- | src/mongo/db/repl/oplog.cpp | 10 |
3 files changed, 24 insertions, 22 deletions
diff --git a/jstests/replsets/oplog_truncated_on_recovery.js b/jstests/replsets/oplog_truncated_on_recovery.js index b9fee8aefba..4fd4690f0c6 100644 --- a/jstests/replsets/oplog_truncated_on_recovery.js +++ b/jstests/replsets/oplog_truncated_on_recovery.js @@ -1,10 +1,10 @@ /** - * This test will ensure that a failed a batch apply will remove the any oplog + * This test will ensure that recovery from a failed batch application will remove the oplog * entries from that batch. * * To do this we: * -- Create single node replica set - * -- Set minvalid manually on primary way ahead (5 minutes) + * -- Set minvalid manually on primary way ahead (5 days) * -- Write some oplog entries newer than minvalid.start * -- Ensure restarted primary comes up in recovering and truncates the oplog * -- Success! @@ -54,29 +54,21 @@ } }))); - // Set minvalid to something far in the future for the current primary, to - // simulate recovery. - // Note: This is so far in the future (5 days) that it will never become - // secondary. + // Set minvalid to something far in the future for the current primary, to simulate recovery. + // Note: This is so far in the future (5 days) that it will never become secondary. var farFutureTS = new Timestamp(Math.floor(new Date().getTime() / 1000) + (60 * 60 * 24 * 5 /* in five days */), 0); var rsgs = assert.commandWorked(localDB.adminCommand("replSetGetStatus")); log(rsgs); var primaryOpTime = rsgs.members[0].optime; - var primaryLastTS = rsgs.members[0].optime.ts; - log(primaryLastTS); + log(primaryOpTime); // Set the start of the failed batch - primaryOpTime.ts = new Timestamp(primaryOpTime.ts.t, primaryOpTime.ts.i + 1); - log(primaryLastTS); jsTest.log("future TS: " + tojson(farFutureTS) + ", date:" + tsToDate(farFutureTS)); - // We do an update in case there is a minvalid document on the primary - // already. - // If the doc doesn't exist then upsert:true will create it, and the - // writeConcern ensures - // that update returns details of the write, like whether an update or - // insert was performed. + // We do an update in case there is a minvalid document on the primary already. + // If the doc doesn't exist then upsert:true will create it, and the writeConcern ensures + // that update returns details of the write, like whether an update or insert was performed. log(assert.writeOK(minvalidColl.update( {}, { @@ -92,12 +84,15 @@ } }))); + // Insert a diverged oplog entry that will be truncated after restart. + var divergedTS = new Timestamp(primaryOpTime.ts.t, primaryOpTime.ts.i + 1); log(assert.writeOK(localDB.oplog.rs.insert( { _id : 0, - ts : primaryOpTime.ts, + ts : divergedTS, op : "n", - term : -1 + h: NumberLong(0), + t : NumberLong(-1) }))); log(localDB.oplog.rs.find().toArray()); log(assert.commandWorked(localDB.adminCommand("replSetGetStatus"))); @@ -122,7 +117,7 @@ $natural : -1 }).limit(-1).next().ts; log(localDB.oplog.rs.find().toArray()); - assert.eq(primaryLastTS, lastTS); + assert.eq(primaryOpTime.ts, lastTS); return true; }); diff --git a/src/mongo/db/repl/bgsync.cpp b/src/mongo/db/repl/bgsync.cpp index 50a9e69c0e2..f24af61d261 100644 --- a/src/mongo/db/repl/bgsync.cpp +++ b/src/mongo/db/repl/bgsync.cpp @@ -773,7 +773,8 @@ long long BackgroundSync::_readLastAppliedHash(OperationContext* txn) { BSONElement hashElement = oplogEntry[hashFieldName]; if (hashElement.eoo()) { severe() << "Most recent entry in " << rsOplogName << " missing \"" << hashFieldName - << "\" field"; + << "\" field. Oplog entry: " << oplogEntry; + fassertFailed(18902); } if (hashElement.type() != NumberLong) { diff --git a/src/mongo/db/repl/oplog.cpp b/src/mongo/db/repl/oplog.cpp index 3d2e0fae885..eb8cb8764ea 100644 --- a/src/mongo/db/repl/oplog.cpp +++ b/src/mongo/db/repl/oplog.cpp @@ -305,7 +305,7 @@ unique_ptr<OplogDocWriter> _logOpWriter(OperationContext* txn, } } // end anon namespace -// Truncates the oplog to and including the "truncateTimestamp" entry. +// Truncates the oplog to but excluding the "truncateTimestamp" entry. void truncateOplogTo(OperationContext* txn, Timestamp truncateTimestamp) { const NamespaceString oplogNss(rsOplogName); ScopedTransaction transaction(txn, MODE_IX); @@ -339,8 +339,14 @@ void truncateOplogTo(OperationContext* txn, Timestamp truncateTimestamp) { first = false; } - if (tsElem.timestamp() < truncateTimestamp) { + if (tsElem.timestamp() == truncateTimestamp) { break; + } else if (tsElem.timestamp() < truncateTimestamp) { + fassertFailedWithStatusNoTrace(34411, + Status(ErrorCodes::OplogOutOfOrder, + str::stream() << "Can't find " + << truncateTimestamp.toString() + << " to truncate from!")); } foundSomethingToTruncate = true; |