summaryrefslogtreecommitdiff
path: root/jstests/noPassthrough/write_concern_operation_metrics.js
blob: 78749b54cf51c81c3cf88b571189e7e55dfdc423 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
/**
 * Tests that operation metrics are not increased while waiting for write concern.
 *
 * @tags: [
 *   # j:true requires persistence
 *   requires_persistence,
 *   requires_replication,
 * ]
 */

// This test reproduces the diagram below, which shows how Writer 1 can end up reading the oplog
// entry generated by Writer 2 when waiting for write concern. On serverless environments this can
// cause a user to be billed improperly billed for RPUs caused by reading large entries written by
// other tenants.
//
//  | Writer 1          | Writer 2     |
//  |-------------------+--------------|
//  | BeginTxn          |              |
//  | Timestamp 10      |              |
//  |                   | BeginTxn     |
//  | Write A           |              |
//  |                   | Update B     |
//  |                   | Timestamp 11 |
//  | Commit            |              |
//  | OnCommit hooks    |              |
//  |                   | Commit       |
//  | WaitForTopOfOplog |              |

(function() {
"use strict";

load("jstests/libs/fail_point_util.js");

// Returns metrics aggregated by database name.
const getDBMetrics = (adminDB) => {
    const cursor = adminDB.aggregate([{$operationMetrics: {}}]);
    let allMetrics = {};
    while (cursor.hasNext()) {
        let doc = cursor.next();
        // Remove localTime field as it prevents us from comparing objects since it always changes.
        delete doc.localTime;
        allMetrics[doc.db] = doc;
    }
    return allMetrics;
};

const setParams = {
    "aggregateOperationResourceConsumptionMetrics": true,
};

const replSet = new ReplSetTest({nodes: 1, nodeOptions: {setParameter: setParams}});
replSet.startSet();
replSet.initiate();

const primary = replSet.getPrimary();
const adminDB = primary.getDB('admin');

const db1Name = "db1";
const db1 = primary.getDB(db1Name);

// Create coll to avoid implicit creation.
db1.createCollection("coll");
// Insert document to be updated by Writer 2.
primary.getDB("otherDB").othercoll.insert({_id: 1, a: 'a'});

var doInsert = function() {
    jsTestLog("Writer 1 performing an insert.");
    assert.commandWorked(
        db.getSiblingDB("db1").coll.insertOne({a: 'a'}, {writeConcern: {w: "majority", j: true}}));
};

function doUpdate() {
    jsTestLog("Writer 2 performing an update.");
    // Write a large record which is going to be the top of the oplog.
    assert.commandWorked(
        db.getSiblingDB("otherDB").othercoll.update({_id: 1}, {a: 'a'.repeat(100 * 1024)}));
}

const hangAfterReserveOptime =
    configureFailPoint(primary,
                       "hangAndFailAfterDocumentInsertsReserveOpTimes",
                       {collectionNS: db1.coll.getFullName(), skipFail: true});

var joinWriter1 = startParallelShell(doInsert, primary.port);
// Ensure Writer 1 has fetched the op time before starting up Writer 2.
hangAfterReserveOptime.wait();

// We want Writer 2 to perform the update after Writer 1 has fetched the oplog.
const hangAfterUpdate = configureFailPoint(primary, "hangAfterBatchUpdate");
var joinWriter2 = startParallelShell(doUpdate, primary.port);
hangAfterUpdate.wait();

// Stop the primary from calling into awaitReplication()
const hangBeforeWaitingForWriteConcern =
    configureFailPoint(primary, "hangBeforeWaitingForWriteConcern");

// Unblock Writer 1 so it performs the insert and then blocks on hangBeforeWaitingForWriteConcern.
hangAfterReserveOptime.off();
// Unblock Writer 2, which should commit after Writer 1.
hangAfterUpdate.off();

// Waiting for write concern.
hangBeforeWaitingForWriteConcern.wait();

assert.soon(() => {
    // Make sure waitForWriteConcernDurationMillis exists in the currentOp query.
    let allOps = db1.currentOp({ns: "db1.coll"});
    for (const j in allOps.inprog) {
        let op = allOps.inprog[j];
        if (op.hasOwnProperty("waitForWriteConcernDurationMillis")) {
            return true;
        }
    }
    return false;
}, "did not find waitForWriteConcernDurationMillis in currentOp");

// Unblock write concern wait.
hangBeforeWaitingForWriteConcern.off();

joinWriter1();
joinWriter2();

// Check for the existence of waitForWriteConcernDurationMillis field in slow query.
const predicate = /Slow query.*"appName":"MongoDB Shell".*"waitForWriteConcernDurationMillis":.*/;
assert.soon(() => {
    return checkLog.checkContainsOnce(primary, predicate);
}, "did not find waitForWriteConcernDurationMillis in slow query log");

const metrics = getDBMetrics(adminDB);
jsTestLog(metrics);
// docBytesRead should be much smaller than 100kb. A threshold at 10kb should be enough.
assert.lt(metrics.db1.primaryMetrics.docBytesRead,
          10 * 1024,
          "Writer 1 wait for write concern caused undue consumption metrics increase.");

replSet.stopSet();
})();