summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSiyuan Zhou <siyuan.zhou@mongodb.com>2016-02-05 17:07:32 -0500
committerSiyuan Zhou <siyuan.zhou@mongodb.com>2016-02-08 17:41:02 -0500
commit6935119d85848bde5b189c2c0ccf49b1ce10f0ed (patch)
tree0387ceef50c1ea4c8d52d2f3d0553277521e9258
parent3cc599e2c4e253981034b5a63d457c939cf27e09 (diff)
downloadmongo-6935119d85848bde5b189c2c0ccf49b1ce10f0ed.tar.gz
SERVER-22262 Do not truncate the last applied oplog entry during batch recovery
-rw-r--r--jstests/replsets/oplog_truncated_on_recovery.js33
-rw-r--r--src/mongo/db/repl/bgsync.cpp3
-rw-r--r--src/mongo/db/repl/oplog.cpp10
3 files changed, 24 insertions, 22 deletions
diff --git a/jstests/replsets/oplog_truncated_on_recovery.js b/jstests/replsets/oplog_truncated_on_recovery.js
index b9fee8aefba..4fd4690f0c6 100644
--- a/jstests/replsets/oplog_truncated_on_recovery.js
+++ b/jstests/replsets/oplog_truncated_on_recovery.js
@@ -1,10 +1,10 @@
/**
- * This test will ensure that a failed a batch apply will remove the any oplog
+ * This test will ensure that recovery from a failed batch application will remove the oplog
* entries from that batch.
*
* To do this we:
* -- Create single node replica set
- * -- Set minvalid manually on primary way ahead (5 minutes)
+ * -- Set minvalid manually on primary way ahead (5 days)
* -- Write some oplog entries newer than minvalid.start
* -- Ensure restarted primary comes up in recovering and truncates the oplog
* -- Success!
@@ -54,29 +54,21 @@
}
})));
- // Set minvalid to something far in the future for the current primary, to
- // simulate recovery.
- // Note: This is so far in the future (5 days) that it will never become
- // secondary.
+ // Set minvalid to something far in the future for the current primary, to simulate recovery.
+ // Note: This is so far in the future (5 days) that it will never become secondary.
var farFutureTS = new Timestamp(Math.floor(new Date().getTime() / 1000)
+ (60 * 60 * 24 * 5 /* in five days */), 0);
var rsgs = assert.commandWorked(localDB.adminCommand("replSetGetStatus"));
log(rsgs);
var primaryOpTime = rsgs.members[0].optime;
- var primaryLastTS = rsgs.members[0].optime.ts;
- log(primaryLastTS);
+ log(primaryOpTime);
// Set the start of the failed batch
- primaryOpTime.ts = new Timestamp(primaryOpTime.ts.t, primaryOpTime.ts.i + 1);
- log(primaryLastTS);
jsTest.log("future TS: " + tojson(farFutureTS) + ", date:" + tsToDate(farFutureTS));
- // We do an update in case there is a minvalid document on the primary
- // already.
- // If the doc doesn't exist then upsert:true will create it, and the
- // writeConcern ensures
- // that update returns details of the write, like whether an update or
- // insert was performed.
+ // We do an update in case there is a minvalid document on the primary already.
+ // If the doc doesn't exist then upsert:true will create it, and the writeConcern ensures
+ // that update returns details of the write, like whether an update or insert was performed.
log(assert.writeOK(minvalidColl.update(
{},
{
@@ -92,12 +84,15 @@
}
})));
+ // Insert a diverged oplog entry that will be truncated after restart.
+ var divergedTS = new Timestamp(primaryOpTime.ts.t, primaryOpTime.ts.i + 1);
log(assert.writeOK(localDB.oplog.rs.insert(
{
_id : 0,
- ts : primaryOpTime.ts,
+ ts : divergedTS,
op : "n",
- term : -1
+ h: NumberLong(0),
+ t : NumberLong(-1)
})));
log(localDB.oplog.rs.find().toArray());
log(assert.commandWorked(localDB.adminCommand("replSetGetStatus")));
@@ -122,7 +117,7 @@
$natural : -1
}).limit(-1).next().ts;
log(localDB.oplog.rs.find().toArray());
- assert.eq(primaryLastTS, lastTS);
+ assert.eq(primaryOpTime.ts, lastTS);
return true;
});
diff --git a/src/mongo/db/repl/bgsync.cpp b/src/mongo/db/repl/bgsync.cpp
index 50a9e69c0e2..f24af61d261 100644
--- a/src/mongo/db/repl/bgsync.cpp
+++ b/src/mongo/db/repl/bgsync.cpp
@@ -773,7 +773,8 @@ long long BackgroundSync::_readLastAppliedHash(OperationContext* txn) {
BSONElement hashElement = oplogEntry[hashFieldName];
if (hashElement.eoo()) {
severe() << "Most recent entry in " << rsOplogName << " missing \"" << hashFieldName
- << "\" field";
+ << "\" field. Oplog entry: " << oplogEntry;
+
fassertFailed(18902);
}
if (hashElement.type() != NumberLong) {
diff --git a/src/mongo/db/repl/oplog.cpp b/src/mongo/db/repl/oplog.cpp
index 3d2e0fae885..eb8cb8764ea 100644
--- a/src/mongo/db/repl/oplog.cpp
+++ b/src/mongo/db/repl/oplog.cpp
@@ -305,7 +305,7 @@ unique_ptr<OplogDocWriter> _logOpWriter(OperationContext* txn,
}
} // end anon namespace
-// Truncates the oplog to and including the "truncateTimestamp" entry.
+// Truncates the oplog to but excluding the "truncateTimestamp" entry.
void truncateOplogTo(OperationContext* txn, Timestamp truncateTimestamp) {
const NamespaceString oplogNss(rsOplogName);
ScopedTransaction transaction(txn, MODE_IX);
@@ -339,8 +339,14 @@ void truncateOplogTo(OperationContext* txn, Timestamp truncateTimestamp) {
first = false;
}
- if (tsElem.timestamp() < truncateTimestamp) {
+ if (tsElem.timestamp() == truncateTimestamp) {
break;
+ } else if (tsElem.timestamp() < truncateTimestamp) {
+ fassertFailedWithStatusNoTrace(34411,
+ Status(ErrorCodes::OplogOutOfOrder,
+ str::stream() << "Can't find "
+ << truncateTimestamp.toString()
+ << " to truncate from!"));
}
foundSomethingToTruncate = true;