summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLingzhi Deng <lingzhi.deng@mongodb.com>2020-02-19 18:12:23 -0500
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-02-20 16:22:02 +0000
commitf26fc48b099deb568cac3c2702cbc636991ead92 (patch)
tree7c42ed15d0eb00acbe449fb60bdc33a4c6237806
parent31426dd86fd67c7f4a0a8d586158a6aad5e59f27 (diff)
downloadmongo-f26fc48b099deb568cac3c2702cbc636991ead92.tar.gz
SERVER-44710: Add metrics.repl.network.getmores.numEmptyBatches
-rw-r--r--jstests/replsets/server_status_metrics.js17
-rw-r--r--src/mongo/db/repl/oplog_fetcher.cpp33
2 files changed, 46 insertions, 4 deletions
diff --git a/jstests/replsets/server_status_metrics.js b/jstests/replsets/server_status_metrics.js
index acf2df103a4..a88bb03c80d 100644
--- a/jstests/replsets/server_status_metrics.js
+++ b/jstests/replsets/server_status_metrics.js
@@ -255,6 +255,12 @@ jsTestLog(`Secondary ${secondary.host} metrics before restarting replication: ${
// Enable periodic noops to aid sync source selection.
assert.commandWorked(primary.adminCommand({setParameter: 1, writePeriodicNoops: true}));
+// Enable the setSmallOplogGetMoreMaxTimeMS failpoint on secondary so that it will start using
+// a small awaitData timeout for oplog fetching after re-choosing the sync source. This is needed to
+// make sync source return empty batches more frequently in order to test the metric
+// numEmptyBatches.
+configureFailPoint(secondary, 'setSmallOplogGetMoreMaxTimeMS');
+
// Repeatedly restart replication and wait for the sync source to be rechosen. If the sync source
// gets set to empty between stopping and restarting replication, then the secondary won't
// increment numTimesChoseSame, so we do this in a loop.
@@ -279,6 +285,17 @@ assert.soon(
assert.gt(ssNew.numSelections, ssOld.numSelections, "num selections not incremented");
assert.gt(ssNew.numTimesChoseSame, ssOld.numTimesChoseSame, "same sync source not chosen");
+// Get the base number of empty batches after the secondary is up to date. Assert that the secondary
+// eventually gets an empty batch due to awaitData timeout.
+rt.awaitLastOpCommitted();
+const targetNumEmptyBatches =
+ secondary.getDB("test").serverStatus().metrics.repl.network.getmores.numEmptyBatches + 1;
+assert.soon(
+ () => secondary.getDB("test").serverStatus().metrics.repl.network.getmores.numEmptyBatches >=
+ targetNumEmptyBatches,
+ `Timed out waiting for numEmptyBatches reach ${targetNumEmptyBatches}, current ${
+ secondary.getDB("test").serverStatus().metrics.repl.network.getmores.numEmptyBatches}`);
+
// Stop the primary so the secondary cannot choose a sync source.
ssOld = ssNew;
rt.stop(primary);
diff --git a/src/mongo/db/repl/oplog_fetcher.cpp b/src/mongo/db/repl/oplog_fetcher.cpp
index 94c212ce950..e02922be165 100644
--- a/src/mongo/db/repl/oplog_fetcher.cpp
+++ b/src/mongo/db/repl/oplog_fetcher.cpp
@@ -61,11 +61,36 @@ MONGO_FAIL_POINT_DEFINE(hangBeforeOplogFetcherRetries);
MONGO_FAIL_POINT_DEFINE(hangBeforeProcessingSuccessfulBatch);
namespace {
+class OplogBatchStats {
+public:
+ void recordMillis(int millis, bool isEmptyBatch);
+ BSONObj getReport() const;
+ operator BSONObj() const {
+ return getReport();
+ }
+
+private:
+ TimerStats _getMores;
+ Counter64 _numEmptyBatches;
+};
+
+void OplogBatchStats::recordMillis(int millis, bool isEmptyBatch) {
+ _getMores.recordMillis(millis);
+ if (isEmptyBatch) {
+ _numEmptyBatches.increment();
+ }
+}
+
+BSONObj OplogBatchStats::getReport() const {
+ BSONObjBuilder b(_getMores.getReport());
+ b.append("numEmptyBatches", _numEmptyBatches.get());
+ return b.obj();
+}
// The number and time spent reading batches off the network
-TimerStats getmoreReplStats;
-ServerStatusMetricField<TimerStats> displayBatchesRecieved("repl.network.getmores",
- &getmoreReplStats);
+OplogBatchStats oplogBatchStats;
+ServerStatusMetricField<OplogBatchStats> displayBatchesRecieved("repl.network.getmores",
+ &oplogBatchStats);
// The oplog entries read via the oplog reader
Counter64 opsReadStats;
ServerStatusMetricField<Counter64> displayOpsRead("repl.network.ops", &opsReadStats);
@@ -876,7 +901,7 @@ Status OplogFetcher::_onSuccessfulBatch(const Documents& documents) {
opsReadStats.increment(info.networkDocumentCount);
networkByteStats.increment(info.networkDocumentBytes);
- getmoreReplStats.recordMillis(_lastBatchElapsedMS);
+ oplogBatchStats.recordMillis(_lastBatchElapsedMS, documents.empty());
auto status = _enqueueDocumentsFn(firstDocToApply, documents.cend(), info);
if (!status.isOK()) {