summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTess Avitabile <tess.avitabile@mongodb.com>2016-11-09 14:59:30 -0500
committerTess Avitabile <tess.avitabile@mongodb.com>2016-11-09 14:59:57 -0500
commit2100b4dc48de18b5a7af7610a6fd7ab4a2b263bc (patch)
tree99633d9c2d5be7fbb88e728a6cc7684ff9e4bcdc
parentb3a7f6c5c9480b7713a09339b10d616f61765a48 (diff)
downloadmongo-2100b4dc48de18b5a7af7610a6fd7ab4a2b263bc.tar.gz
Revert "SERVER-26834 replset7.js should cause documents to be seen twice in initial sync"
This reverts commit 80f8ffa2121c264ead069f3ed39a34a57ac3f5a7.
-rw-r--r--jstests/replsets/initial_sync_move_forward.js98
-rw-r--r--jstests/replsets/replset7.js54
-rw-r--r--src/mongo/db/cloner.cpp19
-rw-r--r--src/mongo/db/repl/SConscript7
-rw-r--r--src/mongo/db/repl/collection_cloner.cpp14
-rw-r--r--src/mongo/db/repl/data_replicator.cpp4
-rw-r--r--src/mongo/db/repl/data_replicator.h4
-rw-r--r--src/mongo/db/repl/rs_initialsync.cpp12
8 files changed, 62 insertions, 150 deletions
diff --git a/jstests/replsets/initial_sync_move_forward.js b/jstests/replsets/initial_sync_move_forward.js
deleted file mode 100644
index 6b01a32cbfd..00000000000
--- a/jstests/replsets/initial_sync_move_forward.js
+++ /dev/null
@@ -1,98 +0,0 @@
-// Test initial sync with documents moving forward.
-//
-// This tests that initial sync succeeds when the clone phase encounters the same _id twice. We test
-// that the destination node has the correct document with that _id at the end of initial sync.
-//
-// We also test that the initial sync succeeds when the clone phase encounters the same 'x' value
-// twice, for a collection with a unique index {x: 1}.
-//
-// It works by deleting a document at the end of the range we are cloning, then growing a document
-// from the beginning of the range so that it moves to the hole in the end of the range.
-//
-// This also works for wiredTiger, because we grow the document by deleting and reinserting it, so
-// the newly inserted document is included in the cursor on the source.
-(function() {
- "use strict";
-
- load("jstests/libs/get_index_helpers.js");
-
- var rst = new ReplSetTest({name: "initial_sync_move_forward", nodes: 1});
- rst.startSet();
- rst.initiate();
-
- var masterColl = rst.getPrimary().getDB("test").coll;
-
- // Insert 500000 documents. Make the last two documents larger, so that {_id: 0, x: 0} and {_id:
- // 1, x: 1} will fit into their positions when we grow them.
- var count = 500000;
- var bulk = masterColl.initializeUnorderedBulkOp();
- for (var i = 0; i < count - 2; ++i) {
- bulk.insert({_id: i, x: i});
- }
- var longString = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
- bulk.insert({_id: count - 2, x: count - 2, longString: longString});
- bulk.insert({_id: count - 1, x: count - 1, longString: longString});
- assert.writeOK(bulk.execute());
-
- // Create a unique index on {x: 1}.
- assert.commandWorked(masterColl.ensureIndex({x: 1}, {unique: true}));
-
- // Add a secondary.
- var secondary = rst.add({setParameter: "num3Dot2InitialSyncAttempts=1"});
- secondary.setSlaveOk();
- var secondaryColl = secondary.getDB("test").coll;
-
- // Pause initial sync when the secondary has copied {_id: 0, x: 0} and {_id: 1, x: 1}.
- assert.commandWorked(secondary.adminCommand({
- configureFailPoint: "initialSyncHangDuringCollectionClone",
- data: {namespace: secondaryColl.getFullName(), numDocsToClone: 2},
- mode: "alwaysOn"
- }));
- rst.reInitiate();
- assert.soon(function() {
- var logMessages = assert.commandWorked(secondary.adminCommand({getLog: "global"})).log;
- for (var i = 0; i < logMessages.length; i++) {
- if (logMessages[i].indexOf(
- "initial sync - initialSyncHangDuringCollectionClone fail point enabled") !=
- -1) {
- return true;
- }
- }
- return false;
- });
-
- // Delete {_id: count - 2} to make a hole. Grow {_id: 0} so that it moves into that hole. This
- // will cause the secondary to clone {_id: 0} again.
- // Change the value for 'x' so that we are not testing the uniqueness of 'x' in this case.
- assert.writeOK(masterColl.remove({_id: 0, x: 0}));
- assert.writeOK(masterColl.remove({_id: count - 2, x: count - 2}));
- assert.writeOK(masterColl.insert({_id: 0, x: count, longString: longString}));
-
- // Delete {_id: count - 1} to make a hole. Grow {x: 1} so that it moves into that hole. This
- // will cause the secondary to clone {x: 1} again.
- // Change the value for _id so that we are not testing the uniqueness of _id in this case.
- assert.writeOK(masterColl.remove({_id: 1, x: 1}));
- assert.writeOK(masterColl.remove({_id: count - 1, x: count - 1}));
- assert.writeOK(masterColl.insert({_id: count, x: 1, longString: longString}));
-
- // Resume initial sync.
- assert.commandWorked(secondary.adminCommand(
- {configureFailPoint: "initialSyncHangDuringCollectionClone", mode: "off"}));
-
- // Wait for initial sync to finish.
- rst.awaitSecondaryNodes();
-
- // Check document count on secondary.
- assert.eq(count - 2, secondaryColl.find().itcount());
-
- // Check for {_id: 0} on secondary.
- assert.eq(1, secondaryColl.find({_id: 0, x: count}).itcount());
-
- // Check for {x: 1} on secondary.
- assert.eq(1, secondaryColl.find({_id: count, x: 1}).itcount());
-
- // Check for unique index on secondary.
- var indexSpec = GetIndexHelpers.findByKeyPattern(secondaryColl.getIndexes(), {x: 1});
- assert.neq(null, indexSpec);
- assert.eq(true, indexSpec.unique);
-})(); \ No newline at end of file
diff --git a/jstests/replsets/replset7.js b/jstests/replsets/replset7.js
new file mode 100644
index 00000000000..3dde7503fb5
--- /dev/null
+++ b/jstests/replsets/replset7.js
@@ -0,0 +1,54 @@
+
+// test for SERVER-5040 - if documents move forward during an initial sync.
+
+var rt = new ReplSetTest({name: "replset7tests", nodes: 1});
+
+var nodes = rt.startSet();
+rt.initiate();
+var master = rt.getPrimary();
+
+var md = master.getDB('d');
+var mdc = md['c'];
+
+// prep the data
+var doccount = 5000;
+var bulk = mdc.initializeUnorderedBulkOp();
+for (i = 0; i < doccount; ++i) {
+ bulk.insert({_id: i, x: i});
+}
+assert.writeOK(bulk.execute());
+
+assert.commandWorked(mdc.ensureIndex({x: 1}, {unique: true}));
+
+// add a secondary
+var slave = rt.add();
+rt.reInitiate();
+print("initiation complete!");
+var sc = slave.getDB('d')['c'];
+slave.setSlaveOk();
+
+// Wait for slave to start cloning.
+// assert.soon( function() { c = sc.find( { _id:1, x:1 } ); print( c ); return c > 0; } );
+
+// Move all documents to the end by growing it
+bulk = mdc.initializeUnorderedBulkOp();
+var bigStr = "ayayayayayayayayayayayayayayayayayayayayayayayayayayayayayayayayay" +
+ "ayayayayayayayayayayayay";
+for (i = 0; i < doccount; ++i) {
+ bulk.find({_id: i, x: i}).remove();
+ bulk.insert({_id: doccount + i, x: i, bigstring: bigStr});
+}
+assert.writeOK(bulk.execute());
+
+// Wait for replication to catch up.
+rt.awaitSecondaryNodes();
+
+// Do we have an index?
+assert.eq(1,
+ slave.getDB('d')['c']
+ .getIndexes()
+ .filter(function(doc) {
+ return (doc.v >= 1 && JSON.stringify(doc.key) === JSON.stringify({x: 1}) &&
+ doc.ns === 'd.c' && doc.name === 'x_1');
+ })
+ .length);
diff --git a/src/mongo/db/cloner.cpp b/src/mongo/db/cloner.cpp
index 331f5716b81..d848f981680 100644
--- a/src/mongo/db/cloner.cpp
+++ b/src/mongo/db/cloner.cpp
@@ -57,7 +57,6 @@
#include "mongo/db/jsobj.h"
#include "mongo/db/namespace_string.h"
#include "mongo/db/op_observer.h"
-#include "mongo/db/repl/data_replicator.h"
#include "mongo/db/repl/isself.h"
#include "mongo/db/repl/replication_coordinator_global.h"
#include "mongo/db/server_parameters.h"
@@ -65,13 +64,11 @@
#include "mongo/db/storage/storage_options.h"
#include "mongo/s/grid.h"
#include "mongo/util/assert_util.h"
-#include "mongo/util/fail_point_service.h"
#include "mongo/util/log.h"
#include "mongo/util/mongoutils/str.h"
namespace mongo {
-using repl::initialSyncHangDuringCollectionClone;
using std::endl;
using std::list;
using std::set;
@@ -272,27 +269,13 @@ struct Cloner::Fun {
<< redact(status) << " obj:" << redact(doc);
uassertStatusOK(status);
}
- if (status.isOK()) {
- wunit.commit();
- }
+ wunit.commit();
}
MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "cloner insert", to_collection.ns());
RARELY if (time(0) - saveLast > 60) {
log() << numSeen << " objects cloned so far from collection " << from_collection;
saveLast = time(0);
}
-
- MONGO_FAIL_POINT_BLOCK(initialSyncHangDuringCollectionClone, options) {
- const BSONObj& data = options.getData();
- if (data["namespace"].String() == to_collection.ns() &&
- numSeen >= data["numDocsToClone"].numberInt()) {
- log() << "initial sync - initialSyncHangDuringCollectionClone fail point "
- "enabled. Blocking until fail point is disabled.";
- while (MONGO_FAIL_POINT(initialSyncHangDuringCollectionClone)) {
- mongo::sleepsecs(1);
- }
- }
- }
}
}
diff --git a/src/mongo/db/repl/SConscript b/src/mongo/db/repl/SConscript
index 6e20ade7da9..7547ceab293 100644
--- a/src/mongo/db/repl/SConscript
+++ b/src/mongo/db/repl/SConscript
@@ -813,9 +813,8 @@ env.CppUnitTest(
target='collection_cloner_test',
source='collection_cloner_test.cpp',
LIBDEPS=[
- 'base_cloner_test_fixture',
'collection_cloner',
- 'data_replicator',
+ 'base_cloner_test_fixture',
'$BUILD_DIR/mongo/db/auth/authorization_manager_mock_init',
'$BUILD_DIR/mongo/db/commands_test_crutch',
'$BUILD_DIR/mongo/db/service_context_noop_init',
@@ -839,9 +838,8 @@ env.CppUnitTest(
target='database_cloner_test',
source='database_cloner_test.cpp',
LIBDEPS=[
- 'base_cloner_test_fixture',
- 'data_replicator',
'database_cloner',
+ 'base_cloner_test_fixture',
'$BUILD_DIR/mongo/db/auth/authorization_manager_mock_init',
'$BUILD_DIR/mongo/db/commands_test_crutch',
'$BUILD_DIR/mongo/db/service_context_noop_init',
@@ -863,7 +861,6 @@ env.CppUnitTest(
target='databases_cloner_test',
source='databases_cloner_test.cpp',
LIBDEPS=[
- 'data_replicator',
'databases_cloner',
'oplog_entry',
'replmocks',
diff --git a/src/mongo/db/repl/collection_cloner.cpp b/src/mongo/db/repl/collection_cloner.cpp
index cf6934b9988..aeb2f61da63 100644
--- a/src/mongo/db/repl/collection_cloner.cpp
+++ b/src/mongo/db/repl/collection_cloner.cpp
@@ -37,14 +37,12 @@
#include "mongo/client/remote_command_retry_scheduler.h"
#include "mongo/db/catalog/collection_options.h"
#include "mongo/db/namespace_string.h"
-#include "mongo/db/repl/data_replicator.h"
#include "mongo/db/repl/storage_interface.h"
#include "mongo/db/repl/storage_interface_mock.h"
#include "mongo/db/server_parameters.h"
#include "mongo/rpc/get_status_from_command_result.h"
#include "mongo/util/assert_util.h"
#include "mongo/util/destructor_guard.h"
-#include "mongo/util/fail_point_service.h"
#include "mongo/util/log.h"
#include "mongo/util/mongoutils/str.h"
@@ -493,18 +491,6 @@ void CollectionCloner::_insertDocumentsCallback(const executor::TaskExecutor::Ca
return;
}
- MONGO_FAIL_POINT_BLOCK(initialSyncHangDuringCollectionClone, options) {
- const BSONObj& data = options.getData();
- if (data["namespace"].String() == _destNss.ns() &&
- static_cast<int>(_stats.documentsCopied) >= data["numDocsToClone"].numberInt()) {
- log() << "initial sync - initialSyncHangDuringCollectionClone fail point "
- "enabled. Blocking until fail point is disabled.";
- while (MONGO_FAIL_POINT(initialSyncHangDuringCollectionClone)) {
- mongo::sleepsecs(1);
- }
- }
- }
-
if (!lastBatch) {
return;
}
diff --git a/src/mongo/db/repl/data_replicator.cpp b/src/mongo/db/repl/data_replicator.cpp
index ecdea5557d8..b9b7b35640f 100644
--- a/src/mongo/db/repl/data_replicator.cpp
+++ b/src/mongo/db/repl/data_replicator.cpp
@@ -84,10 +84,6 @@ MONGO_FP_DECLARE(initialSyncHangBeforeFinish);
// operation.
MONGO_FP_DECLARE(initialSyncHangBeforeGettingMissingDocument);
-// Failpoint which causes initial sync to hang when it has cloned 'numDocsToClone' documents to
-// collection 'namespace'. This failpoint also applies to 3.2 initial sync.
-MONGO_FP_DECLARE(initialSyncHangDuringCollectionClone);
-
// Failpoint which stops the applier.
MONGO_FP_DECLARE(rsSyncApplyStop);
diff --git a/src/mongo/db/repl/data_replicator.h b/src/mongo/db/repl/data_replicator.h
index a78ed3b481f..4003719bf24 100644
--- a/src/mongo/db/repl/data_replicator.h
+++ b/src/mongo/db/repl/data_replicator.h
@@ -76,10 +76,6 @@ MONGO_FP_FORWARD_DECLARE(initialSyncHangBeforeCopyingDatabases);
// operation.
MONGO_FP_FORWARD_DECLARE(initialSyncHangBeforeGettingMissingDocument);
-// Failpoint which causes initial sync to hang when it has cloned 'numDocsToClone' documents to
-// collection 'namespace'. This failpoint also applies to 3.2 initial sync.
-MONGO_FP_FORWARD_DECLARE(initialSyncHangDuringCollectionClone);
-
// Failpoint which stops the applier.
MONGO_FP_FORWARD_DECLARE(rsSyncApplyStop);
diff --git a/src/mongo/db/repl/rs_initialsync.cpp b/src/mongo/db/repl/rs_initialsync.cpp
index 17666866b02..c8ede456db4 100644
--- a/src/mongo/db/repl/rs_initialsync.cpp
+++ b/src/mongo/db/repl/rs_initialsync.cpp
@@ -56,7 +56,6 @@
#include "mongo/db/repl/replication_coordinator_external_state.h"
#include "mongo/db/repl/replication_coordinator_global.h"
#include "mongo/db/repl/storage_interface.h"
-#include "mongo/db/server_parameters.h"
#include "mongo/db/service_context.h"
#include "mongo/stdx/memory.h"
#include "mongo/util/exit.h"
@@ -75,8 +74,6 @@ using std::string;
const auto kInitialSyncMaxConnectRetries = 10;
-MONGO_EXPORT_SERVER_PARAMETER(num3Dot2InitialSyncAttempts, int, 10);
-
/**
* Truncates the oplog (removes any documents) and resets internal variables that were
* originally initialized or affected by using values from the oplog at startup time. These
@@ -459,6 +456,7 @@ Status _initialSync(OperationContext* txn, BackgroundSync* bgsync) {
}
stdx::mutex _initialSyncMutex;
+const auto kMaxFailedAttempts = 10;
const auto kInitialSyncRetrySleepDuration = Seconds{5};
} // namespace
@@ -540,7 +538,7 @@ void syncDoInitialSync(OperationContext* txn,
});
int failedAttempts = 0;
- while (failedAttempts < num3Dot2InitialSyncAttempts) {
+ while (failedAttempts < kMaxFailedAttempts) {
try {
// leave loop when successful
Status status = _initialSync(txn, bgsync.get());
@@ -561,13 +559,13 @@ void syncDoInitialSync(OperationContext* txn,
return;
}
- error() << "initial sync attempt failed, "
- << (num3Dot2InitialSyncAttempts - ++failedAttempts) << " attempts remaining";
+ error() << "initial sync attempt failed, " << (kMaxFailedAttempts - ++failedAttempts)
+ << " attempts remaining";
sleepmillis(durationCount<Milliseconds>(kInitialSyncRetrySleepDuration));
}
// No need to print a stack
- if (failedAttempts >= num3Dot2InitialSyncAttempts) {
+ if (failedAttempts >= kMaxFailedAttempts) {
severe() << "The maximum number of retries have been exhausted for initial sync.";
fassertFailedNoTrace(16233);
}