summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJudah Schvimer <judah@mongodb.com>2017-07-11 10:14:16 -0400
committerJudah Schvimer <judah@mongodb.com>2017-07-17 11:13:08 -0400
commitcbd74166d8930657f63e8e90791d276e15c3d3cb (patch)
tree0188f8c485433ffb622ba35d4cf7d9cbc7c93a15
parenta109a23f12a957e127814cd9bd82ce18f8437f9b (diff)
downloadmongo-cbd74166d8930657f63e8e90791d276e15c3d3cb.tar.gz
SERVER-27581 Only fetch missing documents on update oplog entries during initial
sync (cherry picked from commit f33e9a715cf3eb4d29d8d5c5a926e5edacf9f63d)
-rw-r--r--jstests/replsets/initial_sync_update_missing_doc1.js5
-rw-r--r--jstests/replsets/initial_sync_update_missing_doc2.js4
-rw-r--r--src/mongo/base/error_codes.err1
-rw-r--r--src/mongo/db/repl/master_slave.cpp24
-rw-r--r--src/mongo/db/repl/oplog.cpp6
-rw-r--r--src/mongo/db/repl/sync_tail.cpp94
-rw-r--r--src/mongo/db/repl/sync_tail.h9
-rw-r--r--src/mongo/db/repl/sync_tail_test.cpp50
-rw-r--r--src/mongo/dbtests/repltests.cpp12
9 files changed, 84 insertions, 121 deletions
diff --git a/jstests/replsets/initial_sync_update_missing_doc1.js b/jstests/replsets/initial_sync_update_missing_doc1.js
index d2d3c6aed58..4421b702bc2 100644
--- a/jstests/replsets/initial_sync_update_missing_doc1.js
+++ b/jstests/replsets/initial_sync_update_missing_doc1.js
@@ -54,10 +54,9 @@
{configureFailPoint: 'initialSyncHangBeforeCopyingDatabases', mode: 'off'}));
checkLog.contains(secondary, 'update of non-mod failed');
- checkLog.contains(secondary, 'adding missing object');
+ checkLog.contains(secondary, 'Fetching missing document');
checkLog.contains(
secondary, 'initial sync - initialSyncHangBeforeGettingMissingDocument fail point enabled');
-
var res = assert.commandWorked(secondary.adminCommand({replSetGetStatus: 1, initialSync: 1}));
assert.eq(res.initialSyncStatus.fetchedMissingDocs, 0);
var firstOplogEnd = res.initialSyncStatus.initialSyncOplogEnd;
@@ -68,7 +67,7 @@
assert.commandWorked(secondary.getDB('admin').runCommand(
{configureFailPoint: 'initialSyncHangBeforeGettingMissingDocument', mode: 'off'}));
checkLog.contains(secondary,
- 'missing object not found on source. presumably deleted later in oplog');
+ 'Missing document not found on source; presumably deleted later in oplog.');
checkLog.contains(secondary, 'initial sync done');
replSet.awaitReplication();
diff --git a/jstests/replsets/initial_sync_update_missing_doc2.js b/jstests/replsets/initial_sync_update_missing_doc2.js
index 9418757468c..6809b355a77 100644
--- a/jstests/replsets/initial_sync_update_missing_doc2.js
+++ b/jstests/replsets/initial_sync_update_missing_doc2.js
@@ -54,7 +54,7 @@
{configureFailPoint: 'initialSyncHangBeforeCopyingDatabases', mode: 'off'}));
checkLog.contains(secondary, 'update of non-mod failed');
- checkLog.contains(secondary, 'adding missing object');
+ checkLog.contains(secondary, 'Fetching missing document');
checkLog.contains(
secondary, 'initial sync - initialSyncHangBeforeGettingMissingDocument fail point enabled');
@@ -70,7 +70,7 @@
assert.commandWorked(secondary.getDB('admin').runCommand(
{configureFailPoint: 'initialSyncHangBeforeGettingMissingDocument', mode: 'off'}));
- checkLog.contains(secondary, 'inserted missing doc:');
+ checkLog.contains(secondary, 'Inserted missing document');
secondary.getDB('test').setLogLevel(0, 'replication');
checkLog.contains(secondary, 'initial sync done');
diff --git a/src/mongo/base/error_codes.err b/src/mongo/base/error_codes.err
index c090a203f77..c8def6bd695 100644
--- a/src/mongo/base/error_codes.err
+++ b/src/mongo/base/error_codes.err
@@ -201,6 +201,7 @@ error_code("ChunkRangeCleanupPending", 200)
error_code("CannotBuildIndexKeys", 201)
error_code("NetworkInterfaceExceededTimeLimit", 202)
error_code("TooManyLocks", 208)
+error_code("UpdateOperationFailed", 218)
# Non-sequential error codes (for compatibility only)
error_code("SocketException", 9001)
diff --git a/src/mongo/db/repl/master_slave.cpp b/src/mongo/db/repl/master_slave.cpp
index 05faad12599..96cfe58efdc 100644
--- a/src/mongo/db/repl/master_slave.cpp
+++ b/src/mongo/db/repl/master_slave.cpp
@@ -645,15 +645,7 @@ bool ReplSource::handleDuplicateDbName(OperationContext* txn,
void ReplSource::applyCommand(OperationContext* txn, const BSONObj& op) {
try {
Status status = applyCommand_inlock(txn, op, true);
- if (!status.isOK()) {
- SyncTail sync(nullptr, SyncTail::MultiSyncApplyFunc());
- sync.setHostname(hostName);
- if (sync.shouldRetry(txn, op)) {
- uassert(28639,
- "Failure retrying initial sync update",
- applyCommand_inlock(txn, op, true).isOK());
- }
- }
+ uassert(28639, "Failure applying initial sync command", status.isOK());
} catch (UserException& e) {
log() << "sync: caught user assertion " << redact(e) << " while applying op: " << redact(op)
<< endl;
@@ -669,13 +661,17 @@ void ReplSource::applyOperation(OperationContext* txn, Database* db, const BSONO
try {
Status status = applyOperation_inlock(txn, db, op);
if (!status.isOK()) {
+ uassert(15914,
+ "Failure applying initial sync operation",
+ status == ErrorCodes::UpdateOperationFailed);
+
+ // In initial sync, update operations can cause documents to be missed during
+ // collection cloning. As a result, it is possible that a document that we need to
+ // update is not present locally. In that case we fetch the document from the
+ // sync source.
SyncTail sync(nullptr, SyncTail::MultiSyncApplyFunc());
sync.setHostname(hostName);
- if (sync.shouldRetry(txn, op)) {
- uassert(15914,
- "Failure retrying initial sync update",
- applyOperation_inlock(txn, db, op).isOK());
- }
+ sync.fetchAndInsertMissingDocument(txn, op);
}
} catch (UserException& e) {
log() << "sync: caught user assertion " << redact(e) << " while applying op: " << redact(op)
diff --git a/src/mongo/db/repl/oplog.cpp b/src/mongo/db/repl/oplog.cpp
index 4868d7f4a75..69bc38cd3e5 100644
--- a/src/mongo/db/repl/oplog.cpp
+++ b/src/mongo/db/repl/oplog.cpp
@@ -916,7 +916,7 @@ Status applyOperation_inlock(OperationContext* txn,
// was a simple { _id : ... } update criteria
string msg = str::stream() << "failed to apply update: " << redact(op);
error() << msg;
- return Status(ErrorCodes::OperationFailed, msg);
+ return Status(ErrorCodes::UpdateOperationFailed, msg);
}
// Need to check to see if it isn't present so we can exit early with a
// failure. Note that adds some overhead for this extra check in some cases,
@@ -932,7 +932,7 @@ Status applyOperation_inlock(OperationContext* txn,
Helpers::findOne(txn, collection, updateCriteria, false).isNull())) {
string msg = str::stream() << "couldn't find doc: " << redact(op);
error() << msg;
- return Status(ErrorCodes::OperationFailed, msg);
+ return Status(ErrorCodes::UpdateOperationFailed, msg);
}
// Otherwise, it's present; zero objects were updated because of additional
@@ -944,7 +944,7 @@ Status applyOperation_inlock(OperationContext* txn,
if (!upsert) {
string msg = str::stream() << "update of non-mod failed: " << redact(op);
error() << msg;
- return Status(ErrorCodes::OperationFailed, msg);
+ return Status(ErrorCodes::UpdateOperationFailed, msg);
}
}
}
diff --git a/src/mongo/db/repl/sync_tail.cpp b/src/mongo/db/repl/sync_tail.cpp
index edb82cc82a4..8c0c0229508 100644
--- a/src/mongo/db/repl/sync_tail.cpp
+++ b/src/mongo/db/repl/sync_tail.cpp
@@ -935,17 +935,10 @@ OldThreadPool* SyncTail::getWriterPool() {
return _writerPool.get();
}
-BSONObj SyncTail::getMissingDoc(OperationContext* txn, Database* db, const BSONObj& o) {
+BSONObj SyncTail::getMissingDoc(OperationContext* txn, const BSONObj& o) {
OplogReader missingObjReader; // why are we using OplogReader to run a non-oplog query?
const char* ns = o.getStringField("ns");
- // capped collections
- Collection* collection = db->getCollection(ns);
- if (collection && collection->isCapped()) {
- log() << "missing doc, but this is okay for a capped collection (" << ns << ")";
- return BSONObj();
- }
-
if (MONGO_FAIL_POINT(initialSyncHangBeforeGettingMissingDocument)) {
log() << "initial sync - initialSyncHangBeforeGettingMissingDocument fail point enabled. "
"Blocking until fail point is disabled.";
@@ -1004,43 +997,52 @@ BSONObj SyncTail::getMissingDoc(OperationContext* txn, Database* db, const BSONO
str::stream() << "Can no longer connect to initial sync source: " << _hostname);
}
-bool SyncTail::shouldRetry(OperationContext* txn, const BSONObj& o) {
+bool SyncTail::fetchAndInsertMissingDocument(OperationContext* txn, const BSONObj& o) {
const NamespaceString nss(o.getStringField("ns"));
+
+ {
+ // If the document is in a capped collection then it's okay for it to be missing.
+ AutoGetCollectionForRead autoColl(txn, nss);
+ Collection* const collection = autoColl.getCollection();
+ if (collection && collection->isCapped()) {
+ log() << "Not fetching missing document in capped collection (" << nss << ")";
+ return false;
+ }
+ }
+
+ log() << "Fetching missing document: " << redact(o);
+ BSONObj missingObj = getMissingDoc(txn, o);
+
+ if (missingObj.isEmpty()) {
+ log() << "Missing document not found on source; presumably deleted later in oplog. o first "
+ "field: "
+ << o.getObjectField("o").firstElementFieldName()
+ << ", o2: " << redact(o.getObjectField("o2"));
+
+ return false;
+ }
+
MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
// Take an X lock on the database in order to preclude other modifications.
// Also, the database might not exist yet, so create it.
AutoGetOrCreateDb autoDb(txn, nss.db(), MODE_X);
Database* const db = autoDb.getDb();
- // we don't have the object yet, which is possible on initial sync. get it.
- log() << "adding missing object" << endl; // rare enough we can log
+ WriteUnitOfWork wunit(txn);
- BSONObj missingObj = getMissingDoc(txn, db, o);
+ Collection* const coll = db->getOrCreateCollection(txn, nss.toString());
+ invariant(coll);
- if (missingObj.isEmpty()) {
- log() << "missing object not found on source."
- " presumably deleted later in oplog";
- log() << "o2: " << redact(o.getObjectField("o2"));
- log() << "o firstfield: " << o.getObjectField("o").firstElementFieldName();
+ OpDebug* const nullOpDebug = nullptr;
+ Status status = coll->insertDocument(txn, missingObj, nullOpDebug, true);
+ uassert(15917,
+ str::stream() << "Failed to insert missing document: " << status.toString(),
+ status.isOK());
- return false;
- } else {
- WriteUnitOfWork wunit(txn);
-
- Collection* const coll = db->getOrCreateCollection(txn, nss.toString());
- invariant(coll);
+ LOG(1) << "Inserted missing document: " << redact(missingObj);
- OpDebug* const nullOpDebug = nullptr;
- Status status = coll->insertDocument(txn, missingObj, nullOpDebug, true);
- uassert(15917,
- str::stream() << "failed to insert missing doc: " << status.toString(),
- status.isOK());
-
- LOG(1) << "inserted missing doc: " << redact(missingObj);
-
- wunit.commit();
- return true;
- }
+ wunit.commit();
+ return true;
}
MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "InsertRetry", nss.ns());
@@ -1196,27 +1198,19 @@ Status multiInitialSyncApply_noAbort(OperationContext* txn,
try {
const Status s = SyncTail::syncApply(txn, entry.raw, inSteadyStateReplication);
if (!s.isOK()) {
- // Don't retry on commands.
- if (entry.isCommand()) {
- error() << "Error applying command (" << redact(entry.raw)
- << "): " << redact(s);
+ // In initial sync, update operations can cause documents to be missed during
+ // collection cloning. As a result, it is possible that a document that we need to
+ // update is not present locally. In that case we fetch the document from the
+ // sync source.
+ if (s != ErrorCodes::UpdateOperationFailed) {
+ error() << "Error applying operation: " << redact(s) << " ("
+ << redact(entry.raw) << ")";
return s;
}
// We might need to fetch the missing docs from the sync source.
fetchCount->fetchAndAdd(1);
- if (st->shouldRetry(txn, entry.raw)) {
- const Status s2 = SyncTail::syncApply(txn, entry.raw, inSteadyStateReplication);
- if (!s2.isOK()) {
- severe() << "Error applying operation (" << redact(entry.raw)
- << "): " << redact(s2);
- return s2;
- }
- }
-
- // If shouldRetry() returns false, fall through.
- // This can happen if the document that was moved and missed by Cloner
- // subsequently got deleted and no longer exists on the Sync Target at all
+ st->fetchAndInsertMissingDocument(txn, entry.raw);
}
} catch (const DBException& e) {
// SERVER-24927 If we have a NamespaceNotFound exception, then this document will be
diff --git a/src/mongo/db/repl/sync_tail.h b/src/mongo/db/repl/sync_tail.h
index ec133599bd3..1ad240c7890 100644
--- a/src/mongo/db/repl/sync_tail.h
+++ b/src/mongo/db/repl/sync_tail.h
@@ -200,12 +200,15 @@ public:
/**
* Fetch a single document referenced in the operation from the sync source.
*/
- virtual BSONObj getMissingDoc(OperationContext* txn, Database* db, const BSONObj& o);
+ virtual BSONObj getMissingDoc(OperationContext* txn, const BSONObj& o);
/**
- * If applyOperation_inlock should be called again after an update fails.
+ * If an update fails, fetches the missing document and inserts it into the local collection.
+ *
+ * Returns true if the document was fetched and inserted successfully.
*/
- virtual bool shouldRetry(OperationContext* txn, const BSONObj& o);
+ virtual bool fetchAndInsertMissingDocument(OperationContext* txn, const BSONObj& o);
+
void setHostname(const std::string& hostname);
/**
diff --git a/src/mongo/db/repl/sync_tail_test.cpp b/src/mongo/db/repl/sync_tail_test.cpp
index d73a7593829..1ea383ff9b3 100644
--- a/src/mongo/db/repl/sync_tail_test.cpp
+++ b/src/mongo/db/repl/sync_tail_test.cpp
@@ -91,19 +91,19 @@ protected:
class SyncTailWithLocalDocumentFetcher : public SyncTail {
public:
SyncTailWithLocalDocumentFetcher(const BSONObj& document);
- BSONObj getMissingDoc(OperationContext* txn, Database* db, const BSONObj& o) override;
+ BSONObj getMissingDoc(OperationContext* txn, const BSONObj& o) override;
private:
BSONObj _document;
};
/**
- * Testing-only SyncTail that checks the operation context in shouldRetry().
+ * Testing-only SyncTail that checks the operation context in fetchAndInsertMissingDocument().
*/
class SyncTailWithOperationContextChecker : public SyncTail {
public:
SyncTailWithOperationContextChecker();
- bool shouldRetry(OperationContext* txn, const BSONObj& o) override;
+ bool fetchAndInsertMissingDocument(OperationContext* txn, const BSONObj& o) override;
};
void SyncTailTest::setUp() {
@@ -142,16 +142,15 @@ void SyncTailTest::tearDown() {
SyncTailWithLocalDocumentFetcher::SyncTailWithLocalDocumentFetcher(const BSONObj& document)
: SyncTail(nullptr, SyncTail::MultiSyncApplyFunc(), nullptr), _document(document) {}
-BSONObj SyncTailWithLocalDocumentFetcher::getMissingDoc(OperationContext*,
- Database*,
- const BSONObj&) {
+BSONObj SyncTailWithLocalDocumentFetcher::getMissingDoc(OperationContext*, const BSONObj&) {
return _document;
}
SyncTailWithOperationContextChecker::SyncTailWithOperationContextChecker()
: SyncTail(nullptr, SyncTail::MultiSyncApplyFunc(), nullptr) {}
-bool SyncTailWithOperationContextChecker::shouldRetry(OperationContext* txn, const BSONObj&) {
+bool SyncTailWithOperationContextChecker::fetchAndInsertMissingDocument(OperationContext* txn,
+ const BSONObj&) {
ASSERT_FALSE(txn->writesAreReplicated());
ASSERT_FALSE(txn->lockState()->shouldConflictWithSecondaryBatchApplication());
ASSERT_TRUE(documentValidationDisabled(txn));
@@ -869,8 +868,7 @@ TEST_F(SyncTailTest, MultiInitialSyncApplyDisablesDocumentValidationWhileApplyin
ASSERT_EQUALS(fetchCount.load(), 1U);
}
-TEST_F(SyncTailTest,
- MultiInitialSyncApplyDoesNotRetryFailedUpdateIfDocumentIsMissingFromSyncSource) {
+TEST_F(SyncTailTest, MultiInitialSyncApplyIgnoresUpdateOperationIfDocumentIsMissingFromSyncSource) {
BSONObj emptyDoc;
SyncTailWithLocalDocumentFetcher syncTail(emptyDoc);
NamespaceString nss("local." + _agent.getSuiteName() + "_" + _agent.getTestName());
@@ -938,10 +936,11 @@ TEST_F(SyncTailTest, MultiInitialSyncApplySkipsIndexCreationOnNamespaceNotFound)
ASSERT_FALSE(AutoGetCollectionForRead(_opCtx.get(), badNss).getCollection());
}
-TEST_F(SyncTailTest, MultiInitialSyncApplyRetriesFailedUpdateIfDocumentIsAvailableFromSyncSource) {
+TEST_F(SyncTailTest,
+ MultiInitialSyncApplyFetchesMissingDocumentIfDocumentIsAvailableFromSyncSource) {
SyncTailWithLocalDocumentFetcher syncTail(BSON("_id" << 0 << "x" << 1));
NamespaceString nss("local." + _agent.getSuiteName() + "_" + _agent.getTestName());
- auto updatedDocument = BSON("_id" << 0 << "x" << 2);
+ auto updatedDocument = BSON("_id" << 0 << "x" << 1);
auto op = makeUpdateDocumentOplogEntry(
{Timestamp(Seconds(1), 0), 1LL}, nss, BSON("_id" << 0), updatedDocument);
MultiApplier::OperationPtrs ops = {&op};
@@ -958,35 +957,6 @@ TEST_F(SyncTailTest, MultiInitialSyncApplyRetriesFailedUpdateIfDocumentIsAvailab
ASSERT_EQUALS(ErrorCodes::CollectionIsEmpty, iter->next().getStatus());
}
-TEST_F(SyncTailTest, MultiInitialSyncApplyPassesThroughSyncApplyErrorAfterFailingToRetryBadOp) {
- SyncTailWithLocalDocumentFetcher syncTail(BSON("_id" << 0 << "x" << 1));
- NamespaceString nss("local." + _agent.getSuiteName() + "_" + _agent.getTestName());
- OplogEntry op(BSON("op"
- << "x"
- << "ns"
- << nss.ns()));
- MultiApplier::OperationPtrs ops = {&op};
- AtomicUInt32 fetchCount(0);
- ASSERT_EQUALS(ErrorCodes::BadValue,
- multiInitialSyncApply_noAbort(_opCtx.get(), &ops, &syncTail, &fetchCount));
- ASSERT_EQUALS(fetchCount.load(), 1U);
-}
-
-TEST_F(SyncTailTest, MultiInitialSyncApplyPassesThroughShouldSyncTailRetryError) {
- SyncTail syncTail(nullptr, SyncTail::MultiSyncApplyFunc(), nullptr);
- NamespaceString nss("local." + _agent.getSuiteName() + "_" + _agent.getTestName());
- auto op = makeUpdateDocumentOplogEntry(
- {Timestamp(Seconds(1), 0), 1LL}, nss, BSON("_id" << 0), BSON("_id" << 0 << "x" << 2));
- ASSERT_THROWS_CODE(syncTail.shouldRetry(_opCtx.get(), op.raw),
- mongo::UserException,
- ErrorCodes::FailedToParse);
- MultiApplier::OperationPtrs ops = {&op};
- AtomicUInt32 fetchCount(0);
- ASSERT_EQUALS(ErrorCodes::FailedToParse,
- multiInitialSyncApply_noAbort(_opCtx.get(), &ops, &syncTail, &fetchCount));
- ASSERT_EQUALS(fetchCount.load(), 1U);
-}
-
class IdempotencyTest : public SyncTailTest {
protected:
OplogEntry createCollection();
diff --git a/src/mongo/dbtests/repltests.cpp b/src/mongo/dbtests/repltests.cpp
index e10f536c21f..3a7c31a5eda 100644
--- a/src/mongo/dbtests/repltests.cpp
+++ b/src/mongo/dbtests/repltests.cpp
@@ -1381,7 +1381,7 @@ public:
bool returnEmpty;
SyncTest() : SyncTail(nullptr, SyncTail::MultiSyncApplyFunc()), returnEmpty(false) {}
virtual ~SyncTest() {}
- virtual BSONObj getMissingDoc(OperationContext* txn, Database* db, const BSONObj& o) {
+ virtual BSONObj getMissingDoc(OperationContext* txn, const BSONObj& o) {
if (returnEmpty) {
BSONObj o;
return o;
@@ -1393,7 +1393,7 @@ public:
}
};
-class ShouldRetry : public Base {
+class FetchAndInsertMissingDocument : public Base {
public:
void run() {
bool threw = false;
@@ -1414,7 +1414,7 @@ public:
badSource.setHostname("localhost:123");
OldClientContext ctx(&_txn, ns());
- badSource.getMissingDoc(&_txn, ctx.db(), o);
+ badSource.getMissingDoc(&_txn, o);
} catch (DBException&) {
threw = true;
}
@@ -1422,7 +1422,7 @@ public:
// now this should succeed
SyncTest t;
- verify(t.shouldRetry(&_txn, o));
+ verify(t.fetchAndInsertMissingDocument(&_txn, o));
verify(!_client
.findOne(ns(),
BSON("_id"
@@ -1431,7 +1431,7 @@ public:
// force it not to find an obj
t.returnEmpty = true;
- verify(!t.shouldRetry(&_txn, o));
+ verify(!t.fetchAndInsertMissingDocument(&_txn, o));
}
};
@@ -1498,7 +1498,7 @@ public:
add<DeleteOpIsIdBased>();
add<DatabaseIgnorerBasic>();
add<DatabaseIgnorerUpdate>();
- add<ShouldRetry>();
+ add<FetchAndInsertMissingDocument>();
}
};