SERVER-21483 Don't reorder capped inserts on secondaries

Manual backport of bbff16c4b196133718b3d3f5cf7ce2095cc6d2b9
author: Mathias Stearn <redbeard0531@gmail.com> 2015-12-02 18:34:34 -0500
committer: Mathias Stearn <redbeard0531@gmail.com> 2015-12-02 18:34:34 -0500
commit: c53eebbb6120ab0be19bf746d592ce426afa7682 (patch)
tree: 356915f1dd1b7105ed55b1a5e8a004b2bc79df29
parent: eb8bc24a0d36b673022faa83976a4b33bc8f676b (diff)
download: mongo-c53eebbb6120ab0be19bf746d592ce426afa7682.tar.gz
3 files changed, 97 insertions, 5 deletions
diff --git a/jstests/replsets/capped_insert_order.js b/jstests/replsets/capped_insert_order.js
new file mode 100644
index 00000000000..40294f0a8b7
--- /dev/null
+++ b/jstests/replsets/capped_insert_order.js
@@ -0,0 +1,49 @@
+// Check that inserts to capped collections have the same order on primary and secondary.
+// See SERVER-21483.
+
+(function() {
+    "use strict";
+
+    var replTest = new ReplSetTest({name: 'capped_insert_order', nodes: 2});
+    replTest.startSet();
+    replTest.initiate();
+
+    var master = replTest.getMaster();
+    var slave = replTest.liveNodes.slaves[0];
+
+    var dbName = "db";
+    var masterDb = master.getDB(dbName);
+    var slaveDb = slave.getDB(dbName);
+
+    var collectionName = "collection";
+    var masterColl = masterDb[collectionName];
+    var slaveColl = slaveDb[collectionName];
+
+    // Making a large capped collection to ensure that every document fits.
+    masterDb.createCollection(collectionName, {capped: true, size: 1024*1024});
+
+    // Insert 1000 docs with _id from 0 to 999 inclusive.
+    var nDocuments = 1000;
+    var batch = masterColl.initializeOrderedBulkOp();
+    for (var i = 0; i < nDocuments; i++) {
+        batch.insert({_id: i});
+    }
+    assert.writeOK(batch.execute());
+    replTest.awaitReplication();
+
+    function checkCollection(coll) {
+        assert.eq(coll.count(), nDocuments);
+
+        var i = 0;
+        coll.find().forEach(function(doc) {
+            assert.eq(doc._id, i);
+            i++;
+        });
+        assert.eq(i, nDocuments);
+    }
+
+    checkCollection(masterColl);
+    checkCollection(slaveColl);
+
+    replTest.stopSet();
+})();
diff --git a/src/mongo/db/repl/sync_tail.cpp b/src/mongo/db/repl/sync_tail.cpp
index dbde6ca3549..b267e37ad43 100644
--- a/src/mongo/db/repl/sync_tail.cpp
+++ b/src/mongo/db/repl/sync_tail.cpp
@@ -39,10 +39,12 @@
 
 #include "mongo/base/counter.h"
 #include "mongo/db/auth/authorization_session.h"
+#include "mongo/db/catalog/collection.h"
 #include "mongo/db/catalog/database.h"
 #include "mongo/db/catalog/database_holder.h"
 #include "mongo/db/commands/fsync.h"
 #include "mongo/db/commands/server_status_metric.h"
+#include "mongo/db/concurrency/d_concurrency.h"
 #include "mongo/db/concurrency/write_conflict_exception.h"
 #include "mongo/db/curop.h"
 #include "mongo/db/global_environment_experiment.h"
@@ -250,7 +252,7 @@ OpTime SyncTail::multiApply(OperationContext* txn, std::deque<BSONObj>& ops) {
     }
 
     std::vector<std::vector<BSONObj>> writerVectors(replWriterThreadCount);
-    fillWriterVectors(ops, &writerVectors);
+    fillWriterVectors(txn, ops, &writerVectors);
     LOG(2) << "replication batch size is " << ops.size() << endl;
     // We must grab this because we're going to grab write locks later.
     // We hold this mutex the entire time we're writing; it doesn't matter
@@ -279,8 +281,45 @@ OpTime SyncTail::multiApply(OperationContext* txn, std::deque<BSONObj>& ops) {
     return lastOpTime;
 }
 
-void SyncTail::fillWriterVectors(const std::deque<BSONObj>& ops,
+/**
+ * A caching functor that returns true if a namespace refers to a capped collection.
+ * Collections that don't exist are implicitly not capped.
+ */
+class CachingCappedChecker {
+public:
+    bool operator()(OperationContext* txn, StringData ns) {
+        auto it = _cache.find(ns);
+        if (it != _cache.end()) {
+            return it->second;
+        }
+
+        bool isCapped = isCappedImpl(txn, ns);
+        _cache[ns] = isCapped;
+        return isCapped;
+    }
+
+private:
+    bool isCappedImpl(OperationContext* txn, StringData ns) {
+        auto db = dbHolder().get(txn, ns);
+        if (!db)
+            return false;
+
+        auto collection = db->getCollection(ns);
+        return collection && collection->isCapped();
+    }
+
+    StringMap<bool> _cache;
+};
+
+void SyncTail::fillWriterVectors(OperationContext* txn,
+                                 const std::deque<BSONObj>& ops,
                                  std::vector<std::vector<BSONObj>>* writerVectors) {
+    const bool supportsDocLocking =
+        getGlobalEnvironment()->getGlobalStorageEngine()->supportsDocLocking();
+
+    Lock::GlobalRead globalReadLock(txn->lockState());
+    CachingCappedChecker isCapped;
+
     for (std::deque<BSONObj>::const_iterator it = ops.begin(); it != ops.end(); ++it) {
         const BSONElement e = it->getField("ns");
         verify(e.type() == String);
@@ -291,8 +330,11 @@ void SyncTail::fillWriterVectors(const std::deque<BSONObj>& ops,
 
         const char* opType = it->getField("op").valuestrsafe();
 
-        if (getGlobalEnvironment()->getGlobalStorageEngine()->supportsDocLocking() &&
-            isCrudOpType(opType)) {
+        // For doc locking engines, include the _id of the document in the hash so we get
+        // parallelism even if all writes are to a single collection. We can't do this for capped
+        // collections because the order of inserts is a guaranteed property, unlike for normal
+        // collections.
+        if (supportsDocLocking && isCrudOpType(opType) && !isCapped(txn, ns)) {
             BSONElement id;
             switch (opType[0]) {
                 case 'u':
diff --git a/src/mongo/db/repl/sync_tail.h b/src/mongo/db/repl/sync_tail.h
index 82db52c02c3..7815af0d4f1 100644
--- a/src/mongo/db/repl/sync_tail.h
+++ b/src/mongo/db/repl/sync_tail.h
@@ -129,7 +129,8 @@ private:
     // Doles out all the work to the writer pool threads and waits for them to complete
     void applyOps(const std::vector<std::vector<BSONObj>>& writerVectors);
 
-    void fillWriterVectors(const std::deque<BSONObj>& ops,
+    void fillWriterVectors(OperationContext* txn,
+                           const std::deque<BSONObj>& ops,
                            std::vector<std::vector<BSONObj>>* writerVectors);
     void handleSlaveDelay(const BSONObj& op);
author	Mathias Stearn <redbeard0531@gmail.com>	2015-12-02 18:34:34 -0500
committer	Mathias Stearn <redbeard0531@gmail.com>	2015-12-02 18:34:34 -0500
commit	c53eebbb6120ab0be19bf746d592ce426afa7682 (patch)
tree	356915f1dd1b7105ed55b1a5e8a004b2bc79df29
parent	eb8bc24a0d36b673022faa83976a4b33bc8f676b (diff)
download	mongo-c53eebbb6120ab0be19bf746d592ce426afa7682.tar.gz