diff options
author | Tess Avitabile <tess.avitabile@mongodb.com> | 2016-11-09 14:59:30 -0500 |
---|---|---|
committer | Tess Avitabile <tess.avitabile@mongodb.com> | 2016-11-09 14:59:57 -0500 |
commit | 2100b4dc48de18b5a7af7610a6fd7ab4a2b263bc (patch) | |
tree | 99633d9c2d5be7fbb88e728a6cc7684ff9e4bcdc | |
parent | b3a7f6c5c9480b7713a09339b10d616f61765a48 (diff) | |
download | mongo-2100b4dc48de18b5a7af7610a6fd7ab4a2b263bc.tar.gz |
Revert "SERVER-26834 replset7.js should cause documents to be seen twice in initial sync"
This reverts commit 80f8ffa2121c264ead069f3ed39a34a57ac3f5a7.
-rw-r--r-- | jstests/replsets/initial_sync_move_forward.js | 98 | ||||
-rw-r--r-- | jstests/replsets/replset7.js | 54 | ||||
-rw-r--r-- | src/mongo/db/cloner.cpp | 19 | ||||
-rw-r--r-- | src/mongo/db/repl/SConscript | 7 | ||||
-rw-r--r-- | src/mongo/db/repl/collection_cloner.cpp | 14 | ||||
-rw-r--r-- | src/mongo/db/repl/data_replicator.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/repl/data_replicator.h | 4 | ||||
-rw-r--r-- | src/mongo/db/repl/rs_initialsync.cpp | 12 |
8 files changed, 62 insertions, 150 deletions
diff --git a/jstests/replsets/initial_sync_move_forward.js b/jstests/replsets/initial_sync_move_forward.js deleted file mode 100644 index 6b01a32cbfd..00000000000 --- a/jstests/replsets/initial_sync_move_forward.js +++ /dev/null @@ -1,98 +0,0 @@ -// Test initial sync with documents moving forward. -// -// This tests that initial sync succeeds when the clone phase encounters the same _id twice. We test -// that the destination node has the correct document with that _id at the end of initial sync. -// -// We also test that the initial sync succeeds when the clone phase encounters the same 'x' value -// twice, for a collection with a unique index {x: 1}. -// -// It works by deleting a document at the end of the range we are cloning, then growing a document -// from the beginning of the range so that it moves to the hole in the end of the range. -// -// This also works for wiredTiger, because we grow the document by deleting and reinserting it, so -// the newly inserted document is included in the cursor on the source. -(function() { - "use strict"; - - load("jstests/libs/get_index_helpers.js"); - - var rst = new ReplSetTest({name: "initial_sync_move_forward", nodes: 1}); - rst.startSet(); - rst.initiate(); - - var masterColl = rst.getPrimary().getDB("test").coll; - - // Insert 500000 documents. Make the last two documents larger, so that {_id: 0, x: 0} and {_id: - // 1, x: 1} will fit into their positions when we grow them. - var count = 500000; - var bulk = masterColl.initializeUnorderedBulkOp(); - for (var i = 0; i < count - 2; ++i) { - bulk.insert({_id: i, x: i}); - } - var longString = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"; - bulk.insert({_id: count - 2, x: count - 2, longString: longString}); - bulk.insert({_id: count - 1, x: count - 1, longString: longString}); - assert.writeOK(bulk.execute()); - - // Create a unique index on {x: 1}. - assert.commandWorked(masterColl.ensureIndex({x: 1}, {unique: true})); - - // Add a secondary. - var secondary = rst.add({setParameter: "num3Dot2InitialSyncAttempts=1"}); - secondary.setSlaveOk(); - var secondaryColl = secondary.getDB("test").coll; - - // Pause initial sync when the secondary has copied {_id: 0, x: 0} and {_id: 1, x: 1}. - assert.commandWorked(secondary.adminCommand({ - configureFailPoint: "initialSyncHangDuringCollectionClone", - data: {namespace: secondaryColl.getFullName(), numDocsToClone: 2}, - mode: "alwaysOn" - })); - rst.reInitiate(); - assert.soon(function() { - var logMessages = assert.commandWorked(secondary.adminCommand({getLog: "global"})).log; - for (var i = 0; i < logMessages.length; i++) { - if (logMessages[i].indexOf( - "initial sync - initialSyncHangDuringCollectionClone fail point enabled") != - -1) { - return true; - } - } - return false; - }); - - // Delete {_id: count - 2} to make a hole. Grow {_id: 0} so that it moves into that hole. This - // will cause the secondary to clone {_id: 0} again. - // Change the value for 'x' so that we are not testing the uniqueness of 'x' in this case. - assert.writeOK(masterColl.remove({_id: 0, x: 0})); - assert.writeOK(masterColl.remove({_id: count - 2, x: count - 2})); - assert.writeOK(masterColl.insert({_id: 0, x: count, longString: longString})); - - // Delete {_id: count - 1} to make a hole. Grow {x: 1} so that it moves into that hole. This - // will cause the secondary to clone {x: 1} again. - // Change the value for _id so that we are not testing the uniqueness of _id in this case. - assert.writeOK(masterColl.remove({_id: 1, x: 1})); - assert.writeOK(masterColl.remove({_id: count - 1, x: count - 1})); - assert.writeOK(masterColl.insert({_id: count, x: 1, longString: longString})); - - // Resume initial sync. - assert.commandWorked(secondary.adminCommand( - {configureFailPoint: "initialSyncHangDuringCollectionClone", mode: "off"})); - - // Wait for initial sync to finish. - rst.awaitSecondaryNodes(); - - // Check document count on secondary. - assert.eq(count - 2, secondaryColl.find().itcount()); - - // Check for {_id: 0} on secondary. - assert.eq(1, secondaryColl.find({_id: 0, x: count}).itcount()); - - // Check for {x: 1} on secondary. - assert.eq(1, secondaryColl.find({_id: count, x: 1}).itcount()); - - // Check for unique index on secondary. - var indexSpec = GetIndexHelpers.findByKeyPattern(secondaryColl.getIndexes(), {x: 1}); - assert.neq(null, indexSpec); - assert.eq(true, indexSpec.unique); -})();
\ No newline at end of file diff --git a/jstests/replsets/replset7.js b/jstests/replsets/replset7.js new file mode 100644 index 00000000000..3dde7503fb5 --- /dev/null +++ b/jstests/replsets/replset7.js @@ -0,0 +1,54 @@ + +// test for SERVER-5040 - if documents move forward during an initial sync. + +var rt = new ReplSetTest({name: "replset7tests", nodes: 1}); + +var nodes = rt.startSet(); +rt.initiate(); +var master = rt.getPrimary(); + +var md = master.getDB('d'); +var mdc = md['c']; + +// prep the data +var doccount = 5000; +var bulk = mdc.initializeUnorderedBulkOp(); +for (i = 0; i < doccount; ++i) { + bulk.insert({_id: i, x: i}); +} +assert.writeOK(bulk.execute()); + +assert.commandWorked(mdc.ensureIndex({x: 1}, {unique: true})); + +// add a secondary +var slave = rt.add(); +rt.reInitiate(); +print("initiation complete!"); +var sc = slave.getDB('d')['c']; +slave.setSlaveOk(); + +// Wait for slave to start cloning. +// assert.soon( function() { c = sc.find( { _id:1, x:1 } ); print( c ); return c > 0; } ); + +// Move all documents to the end by growing it +bulk = mdc.initializeUnorderedBulkOp(); +var bigStr = "ayayayayayayayayayayayayayayayayayayayayayayayayayayayayayayayayay" + + "ayayayayayayayayayayayay"; +for (i = 0; i < doccount; ++i) { + bulk.find({_id: i, x: i}).remove(); + bulk.insert({_id: doccount + i, x: i, bigstring: bigStr}); +} +assert.writeOK(bulk.execute()); + +// Wait for replication to catch up. +rt.awaitSecondaryNodes(); + +// Do we have an index? +assert.eq(1, + slave.getDB('d')['c'] + .getIndexes() + .filter(function(doc) { + return (doc.v >= 1 && JSON.stringify(doc.key) === JSON.stringify({x: 1}) && + doc.ns === 'd.c' && doc.name === 'x_1'); + }) + .length); diff --git a/src/mongo/db/cloner.cpp b/src/mongo/db/cloner.cpp index 331f5716b81..d848f981680 100644 --- a/src/mongo/db/cloner.cpp +++ b/src/mongo/db/cloner.cpp @@ -57,7 +57,6 @@ #include "mongo/db/jsobj.h" #include "mongo/db/namespace_string.h" #include "mongo/db/op_observer.h" -#include "mongo/db/repl/data_replicator.h" #include "mongo/db/repl/isself.h" #include "mongo/db/repl/replication_coordinator_global.h" #include "mongo/db/server_parameters.h" @@ -65,13 +64,11 @@ #include "mongo/db/storage/storage_options.h" #include "mongo/s/grid.h" #include "mongo/util/assert_util.h" -#include "mongo/util/fail_point_service.h" #include "mongo/util/log.h" #include "mongo/util/mongoutils/str.h" namespace mongo { -using repl::initialSyncHangDuringCollectionClone; using std::endl; using std::list; using std::set; @@ -272,27 +269,13 @@ struct Cloner::Fun { << redact(status) << " obj:" << redact(doc); uassertStatusOK(status); } - if (status.isOK()) { - wunit.commit(); - } + wunit.commit(); } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "cloner insert", to_collection.ns()); RARELY if (time(0) - saveLast > 60) { log() << numSeen << " objects cloned so far from collection " << from_collection; saveLast = time(0); } - - MONGO_FAIL_POINT_BLOCK(initialSyncHangDuringCollectionClone, options) { - const BSONObj& data = options.getData(); - if (data["namespace"].String() == to_collection.ns() && - numSeen >= data["numDocsToClone"].numberInt()) { - log() << "initial sync - initialSyncHangDuringCollectionClone fail point " - "enabled. Blocking until fail point is disabled."; - while (MONGO_FAIL_POINT(initialSyncHangDuringCollectionClone)) { - mongo::sleepsecs(1); - } - } - } } } diff --git a/src/mongo/db/repl/SConscript b/src/mongo/db/repl/SConscript index 6e20ade7da9..7547ceab293 100644 --- a/src/mongo/db/repl/SConscript +++ b/src/mongo/db/repl/SConscript @@ -813,9 +813,8 @@ env.CppUnitTest( target='collection_cloner_test', source='collection_cloner_test.cpp', LIBDEPS=[ - 'base_cloner_test_fixture', 'collection_cloner', - 'data_replicator', + 'base_cloner_test_fixture', '$BUILD_DIR/mongo/db/auth/authorization_manager_mock_init', '$BUILD_DIR/mongo/db/commands_test_crutch', '$BUILD_DIR/mongo/db/service_context_noop_init', @@ -839,9 +838,8 @@ env.CppUnitTest( target='database_cloner_test', source='database_cloner_test.cpp', LIBDEPS=[ - 'base_cloner_test_fixture', - 'data_replicator', 'database_cloner', + 'base_cloner_test_fixture', '$BUILD_DIR/mongo/db/auth/authorization_manager_mock_init', '$BUILD_DIR/mongo/db/commands_test_crutch', '$BUILD_DIR/mongo/db/service_context_noop_init', @@ -863,7 +861,6 @@ env.CppUnitTest( target='databases_cloner_test', source='databases_cloner_test.cpp', LIBDEPS=[ - 'data_replicator', 'databases_cloner', 'oplog_entry', 'replmocks', diff --git a/src/mongo/db/repl/collection_cloner.cpp b/src/mongo/db/repl/collection_cloner.cpp index cf6934b9988..aeb2f61da63 100644 --- a/src/mongo/db/repl/collection_cloner.cpp +++ b/src/mongo/db/repl/collection_cloner.cpp @@ -37,14 +37,12 @@ #include "mongo/client/remote_command_retry_scheduler.h" #include "mongo/db/catalog/collection_options.h" #include "mongo/db/namespace_string.h" -#include "mongo/db/repl/data_replicator.h" #include "mongo/db/repl/storage_interface.h" #include "mongo/db/repl/storage_interface_mock.h" #include "mongo/db/server_parameters.h" #include "mongo/rpc/get_status_from_command_result.h" #include "mongo/util/assert_util.h" #include "mongo/util/destructor_guard.h" -#include "mongo/util/fail_point_service.h" #include "mongo/util/log.h" #include "mongo/util/mongoutils/str.h" @@ -493,18 +491,6 @@ void CollectionCloner::_insertDocumentsCallback(const executor::TaskExecutor::Ca return; } - MONGO_FAIL_POINT_BLOCK(initialSyncHangDuringCollectionClone, options) { - const BSONObj& data = options.getData(); - if (data["namespace"].String() == _destNss.ns() && - static_cast<int>(_stats.documentsCopied) >= data["numDocsToClone"].numberInt()) { - log() << "initial sync - initialSyncHangDuringCollectionClone fail point " - "enabled. Blocking until fail point is disabled."; - while (MONGO_FAIL_POINT(initialSyncHangDuringCollectionClone)) { - mongo::sleepsecs(1); - } - } - } - if (!lastBatch) { return; } diff --git a/src/mongo/db/repl/data_replicator.cpp b/src/mongo/db/repl/data_replicator.cpp index ecdea5557d8..b9b7b35640f 100644 --- a/src/mongo/db/repl/data_replicator.cpp +++ b/src/mongo/db/repl/data_replicator.cpp @@ -84,10 +84,6 @@ MONGO_FP_DECLARE(initialSyncHangBeforeFinish); // operation. MONGO_FP_DECLARE(initialSyncHangBeforeGettingMissingDocument); -// Failpoint which causes initial sync to hang when it has cloned 'numDocsToClone' documents to -// collection 'namespace'. This failpoint also applies to 3.2 initial sync. -MONGO_FP_DECLARE(initialSyncHangDuringCollectionClone); - // Failpoint which stops the applier. MONGO_FP_DECLARE(rsSyncApplyStop); diff --git a/src/mongo/db/repl/data_replicator.h b/src/mongo/db/repl/data_replicator.h index a78ed3b481f..4003719bf24 100644 --- a/src/mongo/db/repl/data_replicator.h +++ b/src/mongo/db/repl/data_replicator.h @@ -76,10 +76,6 @@ MONGO_FP_FORWARD_DECLARE(initialSyncHangBeforeCopyingDatabases); // operation. MONGO_FP_FORWARD_DECLARE(initialSyncHangBeforeGettingMissingDocument); -// Failpoint which causes initial sync to hang when it has cloned 'numDocsToClone' documents to -// collection 'namespace'. This failpoint also applies to 3.2 initial sync. -MONGO_FP_FORWARD_DECLARE(initialSyncHangDuringCollectionClone); - // Failpoint which stops the applier. MONGO_FP_FORWARD_DECLARE(rsSyncApplyStop); diff --git a/src/mongo/db/repl/rs_initialsync.cpp b/src/mongo/db/repl/rs_initialsync.cpp index 17666866b02..c8ede456db4 100644 --- a/src/mongo/db/repl/rs_initialsync.cpp +++ b/src/mongo/db/repl/rs_initialsync.cpp @@ -56,7 +56,6 @@ #include "mongo/db/repl/replication_coordinator_external_state.h" #include "mongo/db/repl/replication_coordinator_global.h" #include "mongo/db/repl/storage_interface.h" -#include "mongo/db/server_parameters.h" #include "mongo/db/service_context.h" #include "mongo/stdx/memory.h" #include "mongo/util/exit.h" @@ -75,8 +74,6 @@ using std::string; const auto kInitialSyncMaxConnectRetries = 10; -MONGO_EXPORT_SERVER_PARAMETER(num3Dot2InitialSyncAttempts, int, 10); - /** * Truncates the oplog (removes any documents) and resets internal variables that were * originally initialized or affected by using values from the oplog at startup time. These @@ -459,6 +456,7 @@ Status _initialSync(OperationContext* txn, BackgroundSync* bgsync) { } stdx::mutex _initialSyncMutex; +const auto kMaxFailedAttempts = 10; const auto kInitialSyncRetrySleepDuration = Seconds{5}; } // namespace @@ -540,7 +538,7 @@ void syncDoInitialSync(OperationContext* txn, }); int failedAttempts = 0; - while (failedAttempts < num3Dot2InitialSyncAttempts) { + while (failedAttempts < kMaxFailedAttempts) { try { // leave loop when successful Status status = _initialSync(txn, bgsync.get()); @@ -561,13 +559,13 @@ void syncDoInitialSync(OperationContext* txn, return; } - error() << "initial sync attempt failed, " - << (num3Dot2InitialSyncAttempts - ++failedAttempts) << " attempts remaining"; + error() << "initial sync attempt failed, " << (kMaxFailedAttempts - ++failedAttempts) + << " attempts remaining"; sleepmillis(durationCount<Milliseconds>(kInitialSyncRetrySleepDuration)); } // No need to print a stack - if (failedAttempts >= num3Dot2InitialSyncAttempts) { + if (failedAttempts >= kMaxFailedAttempts) { severe() << "The maximum number of retries have been exhausted for initial sync."; fassertFailedNoTrace(16233); } |