summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--jstests/free_mon/free_mon_rs_delete.js61
-rw-r--r--jstests/free_mon/free_mon_rs_halt.js62
-rw-r--r--jstests/free_mon/free_mon_rs_perm_del.js56
-rw-r--r--jstests/free_mon/free_mon_rs_register.js3
-rw-r--r--jstests/free_mon/libs/free_mon.js77
-rw-r--r--jstests/free_mon/libs/mock_http_common.py4
-rw-r--r--jstests/free_mon/libs/mock_http_control.py4
-rw-r--r--jstests/free_mon/libs/mock_http_server.py36
-rw-r--r--src/mongo/db/free_mon/free_mon_op_observer.cpp17
-rw-r--r--src/mongo/db/free_mon/free_mon_op_observer.h2
-rw-r--r--src/mongo/db/free_mon/free_mon_processor.cpp10
11 files changed, 322 insertions, 10 deletions
diff --git a/jstests/free_mon/free_mon_rs_delete.js b/jstests/free_mon/free_mon_rs_delete.js
new file mode 100644
index 00000000000..6f14d17d1ad
--- /dev/null
+++ b/jstests/free_mon/free_mon_rs_delete.js
@@ -0,0 +1,61 @@
+// Validate a user deleting free monitoring in system.version does
+// not crash mongod
+load("jstests/free_mon/libs/free_mon.js");
+
+(function() {
+ 'use strict';
+
+ let mock_web = new FreeMonWebServer();
+
+ mock_web.start();
+
+ let options = {
+ setParameter: "cloudFreeMonitoringEndpointURL=" + mock_web.getURL(),
+ enableFreeMonitoring: "on",
+ verbose: 1,
+ };
+
+ const rst = new ReplSetTest({nodes: 2, nodeOptions: options});
+ rst.startSet();
+ rst.initiate();
+ rst.awaitReplication();
+
+ WaitForRegistration(rst.getPrimary());
+
+ mock_web.waitRegisters(2);
+
+ assert.eq(FreeMonGetStatus(rst.getPrimary()).state, 'enabled');
+ assert.eq(FreeMonGetStatus(rst.getSecondary()).state, 'enabled');
+
+ const qs1 = mock_web.queryStats();
+
+ // For kicks, delete the free monitoring storage state to knock free mon offline
+ // and make sure the node does not crash
+ rst.getPrimary().getDB("admin").system.version.remove({_id: "free_monitoring"});
+
+ sleep(20 * 1000);
+
+ const qs2 = mock_web.queryStats();
+
+ // Verify free monitoring stops but tolerate one additional collection
+ assert.gte(qs1.metrics + 2, qs2.metrics);
+ assert.eq(qs1.registers, qs2.registers);
+
+ // Make sure we are back to the initial state.
+ assert.eq(FreeMonGetStatus(rst.getPrimary()).state, 'undecided');
+ assert.eq(FreeMonGetStatus(rst.getSecondary()).state, 'undecided');
+
+ // Enable it again to be sure we can resume
+ assert.commandWorked(rst.getPrimary().adminCommand({setFreeMonitoring: 1, action: "enable"}));
+ WaitForRegistration(rst.getPrimary());
+ WaitForRegistration(rst.getSecondary());
+
+ sleep(20 * 1000);
+
+ assert.eq(FreeMonGetStatus(rst.getPrimary()).state, 'enabled');
+ assert.eq(FreeMonGetStatus(rst.getSecondary()).state, 'enabled');
+
+ rst.stopSet();
+
+ mock_web.stop();
+})();
diff --git a/jstests/free_mon/free_mon_rs_halt.js b/jstests/free_mon/free_mon_rs_halt.js
new file mode 100644
index 00000000000..0694f504254
--- /dev/null
+++ b/jstests/free_mon/free_mon_rs_halt.js
@@ -0,0 +1,62 @@
+// Validate that if the endpoint returns halt = true that free monitoring halts
+load("jstests/free_mon/libs/free_mon.js");
+
+(function() {
+ 'use strict';
+
+ let mock_web = new FreeMonWebServer(FAULT_HALT_METRICS_5, true);
+
+ mock_web.start();
+
+ let options = {
+ setParameter: "cloudFreeMonitoringEndpointURL=" + mock_web.getURL(),
+ enableFreeMonitoring: "on",
+ verbose: 1,
+ };
+
+ const rst = new ReplSetTest({nodes: 2, nodeOptions: options});
+ rst.startSet();
+ rst.initiate();
+ rst.awaitReplication();
+
+ WaitForRegistration(rst.getPrimary());
+
+ mock_web.waitRegisters(2);
+
+ assert.eq(FreeMonGetStatus(rst.getPrimary()).state, 'enabled');
+ assert.eq(FreeMonGetStatus(rst.getSecondary()).state, 'enabled');
+
+ mock_web.enableFaults();
+ mock_web.waitFaults(1);
+
+ const qs1 = mock_web.queryStats();
+
+ sleep(20 * 1000);
+
+ const qs2 = mock_web.queryStats();
+
+ // Verify free monitoring stops but tolerate one additional collection
+ assert.gte(qs1.metrics + 1, qs2.metrics);
+ assert.eq(qs1.registers, qs2.registers);
+
+ // Halt causes us to disable free monitoring, not return it to initial state.
+ assert.eq(FreeMonGetStatus(rst.getPrimary()).state, 'disabled');
+ assert.eq(FreeMonGetStatus(rst.getSecondary()).state, 'disabled');
+
+ // Disable the fault so we can re-enable again
+ mock_web.disableFaults();
+
+ // Enable it again to be sure we can resume
+ assert.commandWorked(rst.getPrimary().adminCommand({setFreeMonitoring: 1, action: "enable"}));
+ WaitForRegistration(rst.getPrimary());
+ WaitForRegistration(rst.getSecondary());
+
+ sleep(20 * 1000);
+
+ assert.eq(FreeMonGetStatus(rst.getPrimary()).state, 'enabled');
+ assert.eq(FreeMonGetStatus(rst.getSecondary()).state, 'enabled');
+
+ rst.stopSet();
+
+ mock_web.stop();
+})();
diff --git a/jstests/free_mon/free_mon_rs_perm_del.js b/jstests/free_mon/free_mon_rs_perm_del.js
new file mode 100644
index 00000000000..be8176ef8a5
--- /dev/null
+++ b/jstests/free_mon/free_mon_rs_perm_del.js
@@ -0,0 +1,56 @@
+// Validate that if the endpoint says permanently delete that the state
+// document is deleted and replicated properly
+load("jstests/free_mon/libs/free_mon.js");
+
+(function() {
+ 'use strict';
+
+ let mock_web = new FreeMonWebServer(FAULT_PERMANENTLY_DELETE_AFTER_3, true);
+
+ mock_web.start();
+
+ let options = {
+ setParameter: "cloudFreeMonitoringEndpointURL=" + mock_web.getURL(),
+ enableFreeMonitoring: "on",
+ verbose: 1,
+ };
+
+ const rst = new ReplSetTest({nodes: 2, nodeOptions: options});
+ rst.startSet();
+ rst.initiate();
+ rst.awaitReplication();
+
+ WaitForRegistration(rst.getPrimary());
+
+ mock_web.waitRegisters(2);
+
+ assert.eq(FreeMonGetStatus(rst.getPrimary()).state, 'enabled');
+ assert.eq(FreeMonGetStatus(rst.getSecondary()).state, 'enabled');
+
+ mock_web.enableFaults();
+ mock_web.waitFaults(1);
+
+ sleep(20 * 1000);
+
+ // Make sure we are back to the initial state.
+ assert.eq(FreeMonGetStatus(rst.getPrimary()).state, 'undecided');
+
+ assert.eq(FreeMonGetStatus(rst.getSecondary()).state, 'undecided');
+
+ // Disable the fault so we can re-enable again
+ mock_web.disableFaults();
+
+ // Enable it again to be sure we can resume
+ assert.commandWorked(rst.getPrimary().adminCommand({setFreeMonitoring: 1, action: "enable"}));
+ WaitForRegistration(rst.getPrimary());
+ WaitForRegistration(rst.getSecondary());
+
+ sleep(20 * 1000);
+
+ assert.eq(FreeMonGetStatus(rst.getPrimary()).state, 'enabled');
+ assert.eq(FreeMonGetStatus(rst.getSecondary()).state, 'enabled');
+
+ rst.stopSet();
+
+ mock_web.stop();
+})();
diff --git a/jstests/free_mon/free_mon_rs_register.js b/jstests/free_mon/free_mon_rs_register.js
index 9002e3e3793..2c988e299fa 100644
--- a/jstests/free_mon/free_mon_rs_register.js
+++ b/jstests/free_mon/free_mon_rs_register.js
@@ -27,6 +27,9 @@ load("jstests/free_mon/libs/free_mon.js");
mock_web.waitRegisters(2);
+ assert.eq(FreeMonGetStatus(rst.getPrimary()).state, 'enabled');
+ assert.eq(FreeMonGetStatus(rst.getSecondary()).state, 'enabled');
+
const last_register = mock_web.query("last_register");
print(tojson(last_register));
diff --git a/jstests/free_mon/libs/free_mon.js b/jstests/free_mon/libs/free_mon.js
index 1fd3c3c01e1..21d42fb3e66 100644
--- a/jstests/free_mon/libs/free_mon.js
+++ b/jstests/free_mon/libs/free_mon.js
@@ -10,14 +10,19 @@ const FAULT_INVALID_REGISTER = "invalid_register";
const FAULT_HALT_METRICS_5 = "halt_metrics_5";
const FAULT_PERMANENTLY_DELETE_AFTER_3 = "permanently_delete_after_3";
+const DISABLE_FAULTS = "disable_faults";
+const ENABLE_FAULTS = "enable_faults";
+
class FreeMonWebServer {
/**
* Create a new webserver.
*
* @param {string} fault_type
+ * @param {bool} disableFaultsOnStartup optionally disable fault on startup
*/
- constructor(fault_type) {
+ constructor(fault_type, disableFaultsOnStartup) {
this.python = "/opt/mongodbtoolchain/v2/bin/python3";
+ this.disableFaultsOnStartup = disableFaultsOnStartup || false;
this.fault_type = fault_type;
if (_isWindows()) {
@@ -65,6 +70,9 @@ class FreeMonWebServer {
let args = [this.python, "-u", this.web_server_py, "--port=" + this.port];
if (this.fault_type) {
args.push("--fault=" + this.fault_type);
+ if (this.disableFaultsOnStartup) {
+ args.push("--disable-faults");
+ }
}
this.pid = _startMongoProgram({args: args});
@@ -118,6 +126,40 @@ class FreeMonWebServer {
}
/**
+ * Control the HTTP server.
+ *
+ * @param {string} query type
+ */
+ control(query) {
+ const out_file = "out_" + this.port + ".txt";
+ const python_command = this.python + " -u " + this.control_py + " --port=" + this.port +
+ " --query=" + query + " > " + out_file;
+
+ let ret = 0;
+ if (_isWindows()) {
+ ret = runProgram('cmd.exe', '/c', python_command);
+ } else {
+ ret = runProgram('/bin/sh', '-c', python_command);
+ }
+
+ assert.eq(ret, 0);
+ }
+
+ /**
+ * Disable Faults
+ */
+ disableFaults() {
+ this.control(DISABLE_FAULTS);
+ }
+
+ /**
+ * Enable Faults
+ */
+ enableFaults() {
+ this.control(ENABLE_FAULTS);
+ }
+
+ /**
* Query the stats page for the HTTP server.
*
* @return {object} Object representation of JSON from the server.
@@ -155,6 +197,21 @@ class FreeMonWebServer {
return stats.metrics >= count;
}, "Failed to web server metrics", 60 * 1000);
}
+
+ /**
+ * Wait for N fault calls to e received by web server.
+ *
+ * @throws assert.soon() exception
+ */
+ waitFaults(count) {
+ const qs = this.queryStats.bind(this);
+ // Wait for faults to be triggered
+ assert.soon(function() {
+ const stats = qs();
+ print("QS : " + tojson(stats));
+ return stats.faults >= count;
+ }, "Failed to web server faults", 60 * 1000);
+ }
}
/**
@@ -163,6 +220,8 @@ class FreeMonWebServer {
* @param {object} conn
*/
function WaitForRegistration(conn) {
+ 'use strict';
+
const admin = conn.getDB("admin");
// Wait for registration to occur
@@ -179,8 +238,22 @@ function WaitForRegistration(conn) {
* @param {object} registration document
*/
function FreeMonGetRegistration(conn) {
+ 'use strict';
+
const admin = conn.getDB("admin");
const docs = admin.system.version.find({_id: "free_monitoring"});
const da = docs.toArray();
return da[0];
-} \ No newline at end of file
+}
+
+/**
+ * Get current Free Monitoring Status via serverStatus.
+ *
+ * @param {object} serverStatus.freeMonitoring section
+ */
+function FreeMonGetStatus(conn) {
+ 'use strict';
+
+ const admin = conn.getDB("admin");
+ return assert.commandWorked(admin.runCommand({serverStatus: 1})).freeMonitoring;
+}
diff --git a/jstests/free_mon/libs/mock_http_common.py b/jstests/free_mon/libs/mock_http_common.py
index 7840894062c..fe4c428ef6f 100644
--- a/jstests/free_mon/libs/mock_http_common.py
+++ b/jstests/free_mon/libs/mock_http_common.py
@@ -4,6 +4,8 @@ import json
URL_PATH_STATS = "/stats"
URL_PATH_LAST_REGISTER = "/last_register"
URL_PATH_LAST_METRICS = "/last_metrics"
+URL_DISABLE_FAULTS = "/disable_faults"
+URL_ENABLE_FAULTS = "/enable_faults"
class Stats:
"""Stats class shared between client and server."""
@@ -11,9 +13,11 @@ class Stats:
def __init__(self):
self.register_calls = 0
self.metrics_calls = 0
+ self.fault_calls = 0
def __repr__(self):
return json.dumps({
'metrics': self.metrics_calls,
'registers': self.register_calls,
+ 'faults': self.fault_calls,
})
diff --git a/jstests/free_mon/libs/mock_http_control.py b/jstests/free_mon/libs/mock_http_control.py
index fc72021c143..8f2469155cb 100644
--- a/jstests/free_mon/libs/mock_http_control.py
+++ b/jstests/free_mon/libs/mock_http_control.py
@@ -32,6 +32,10 @@ def main():
url_str += mock_http_common.URL_PATH_LAST_REGISTER
elif args.query == "last_metrics":
url_str += mock_http_common.URL_PATH_LAST_METRICS
+ elif args.query == "disable_faults":
+ url_str += mock_http_common.URL_DISABLE_FAULTS
+ elif args.query == "enable_faults":
+ url_str += mock_http_common.URL_ENABLE_FAULTS
else:
print("Unknown query type")
sys.exit(1)
diff --git a/jstests/free_mon/libs/mock_http_server.py b/jstests/free_mon/libs/mock_http_server.py
index ea5d0f21d08..2005575463c 100644
--- a/jstests/free_mon/libs/mock_http_server.py
+++ b/jstests/free_mon/libs/mock_http_server.py
@@ -21,7 +21,7 @@ import mock_http_common
stats = mock_http_common.Stats()
last_metrics = None
last_register = None
-
+disable_faults = False
fault_type = None
"""Fault which causes the server to return an HTTP failure on register."""
@@ -65,6 +65,10 @@ class FreeMonHandler(http.server.BaseHTTPRequestHandler):
self._do_last_register()
elif path == mock_http_common.URL_PATH_LAST_METRICS:
self._do_last_metrics()
+ elif path == mock_http_common.URL_DISABLE_FAULTS:
+ self._do_disable_faults()
+ elif path == mock_http_common.URL_ENABLE_FAULTS:
+ self._do_enable_faults()
else:
self.send_response(http.HTTPStatus.NOT_FOUND)
self.end_headers()
@@ -100,14 +104,16 @@ class FreeMonHandler(http.server.BaseHTTPRequestHandler):
decoded_doc = bson.BSON.decode(raw_input)
last_register = dumps(decoded_doc)
- if fault_type == FAULT_FAIL_REGISTER:
+ if not disable_faults and fault_type == FAULT_FAIL_REGISTER:
+ stats.fault_calls += 1
self.send_response(http.HTTPStatus.INTERNAL_SERVER_ERROR)
self.send_header("content-type", "application/octet-stream")
self.end_headers()
self.wfile.write("Internal Error of some sort.".encode())
return
- if fault_type == FAULT_INVALID_REGISTER:
+ if not disable_faults and fault_type == FAULT_INVALID_REGISTER:
+ stats.fault_calls += 1
data = bson.BSON.encode({
'version': bson.int64.Int64(42),
'haltMetricsUploading': False,
@@ -148,7 +154,10 @@ You can disable monitoring at any time by running db.disableFreeMonitoring()."""
decoded_doc = bson.BSON.decode(raw_input)
last_metrics = dumps(decoded_doc)
- if stats.metrics_calls > 5 and fault_type == FAULT_HALT_METRICS_5:
+ if not disable_faults and \
+ stats.metrics_calls > 5 and \
+ fault_type == FAULT_HALT_METRICS_5:
+ stats.fault_calls += 1
data = bson.BSON.encode({
'version': bson.int64.Int64(1),
'haltMetricsUploading': True,
@@ -157,7 +166,9 @@ You can disable monitoring at any time by running db.disableFreeMonitoring()."""
'reportingInterval': bson.int64.Int64(1),
'message': 'Thanks for all the metrics',
})
- elif stats.metrics_calls > 3 and fault_type == FAULT_PERMANENTLY_DELETE_AFTER_3:
+ elif not disable_faults and \
+ stats.metrics_calls > 3 and fault_type == FAULT_PERMANENTLY_DELETE_AFTER_3:
+ stats.fault_calls += 1
data = bson.BSON.encode({
'version': bson.int64.Int64(1),
'haltMetricsUploading': False,
@@ -196,6 +207,15 @@ You can disable monitoring at any time by running db.disableFreeMonitoring()."""
self.wfile.write(str(last_metrics).encode('utf-8'))
+ def _do_disable_faults(self):
+ global disable_faults
+ disable_faults = True
+ self._send_header()
+
+ def _do_enable_faults(self):
+ global disable_faults
+ disable_faults = False
+ self._send_header()
def run(port, server_class=http.server.HTTPServer, handler_class=FreeMonHandler):
"""Run web server."""
@@ -213,6 +233,7 @@ def run(port, server_class=http.server.HTTPServer, handler_class=FreeMonHandler)
def main():
"""Main Method."""
global fault_type
+ global disable_faults
parser = argparse.ArgumentParser(description='MongoDB Mock Free Monitoring Endpoint.')
@@ -222,6 +243,8 @@ def main():
parser.add_argument('--fault', type=str, help="Type of fault to inject")
+ parser.add_argument('--disable-faults', action='store_true', help="Disable faults on startup")
+
args = parser.parse_args()
if args.verbose:
logging.basicConfig(level=logging.DEBUG)
@@ -233,6 +256,9 @@ def main():
fault_type = args.fault
+ if args.disable_faults:
+ disable_faults = True
+
run(args.port)
diff --git a/src/mongo/db/free_mon/free_mon_op_observer.cpp b/src/mongo/db/free_mon/free_mon_op_observer.cpp
index b3457a687a6..8dde8c8b53e 100644
--- a/src/mongo/db/free_mon/free_mon_op_observer.cpp
+++ b/src/mongo/db/free_mon/free_mon_op_observer.cpp
@@ -32,6 +32,7 @@
#include "mongo/db/free_mon/free_mon_controller.h"
#include "mongo/db/free_mon/free_mon_storage.h"
+#include "mongo/db/operation_context.h"
namespace mongo {
namespace {
@@ -44,6 +45,8 @@ bool isStandaloneOrPrimary(OperationContext* opCtx) {
repl::MemberState::RS_PRIMARY);
}
+const auto getFreeMonDeleteState = OperationContext::declareDecoration<bool>();
+
} // namespace
FreeMonOpObserver::FreeMonOpObserver() = default;
@@ -111,6 +114,18 @@ void FreeMonOpObserver::onUpdate(OperationContext* opCtx, const OplogUpdateEntry
}
}
+void FreeMonOpObserver::aboutToDelete(OperationContext* opCtx,
+ const NamespaceString& nss,
+ const BSONObj& doc) {
+
+ bool isFreeMonDoc = (nss == NamespaceString::kServerConfigurationNamespace) &&
+ (doc["_id"].str() == FreeMonStorage::kFreeMonDocIdKey);
+
+ // Set a flag that indicates whether the document to be delete is the free monitoring state
+ // document
+ getFreeMonDeleteState(opCtx) = isFreeMonDoc;
+}
+
void FreeMonOpObserver::onDelete(OperationContext* opCtx,
const NamespaceString& nss,
OptionalCollectionUUID uuid,
@@ -125,7 +140,7 @@ void FreeMonOpObserver::onDelete(OperationContext* opCtx,
return;
}
- if (deletedDoc.get()["_id"].str() == FreeMonStorage::kFreeMonDocIdKey) {
+ if (getFreeMonDeleteState(opCtx) == true) {
auto controller = FreeMonController::get(opCtx->getServiceContext());
if (controller != nullptr) {
diff --git a/src/mongo/db/free_mon/free_mon_op_observer.h b/src/mongo/db/free_mon/free_mon_op_observer.h
index c084c440dd2..05d16a21e4a 100644
--- a/src/mongo/db/free_mon/free_mon_op_observer.h
+++ b/src/mongo/db/free_mon/free_mon_op_observer.h
@@ -61,7 +61,7 @@ public:
void aboutToDelete(OperationContext* opCtx,
const NamespaceString& nss,
- const BSONObj& doc) final {}
+ const BSONObj& doc) final;
void onDelete(OperationContext* opCtx,
const NamespaceString& nss,
diff --git a/src/mongo/db/free_mon/free_mon_processor.cpp b/src/mongo/db/free_mon/free_mon_processor.cpp
index c192ab21d93..4ff1411145e 100644
--- a/src/mongo/db/free_mon/free_mon_processor.cpp
+++ b/src/mongo/db/free_mon/free_mon_processor.cpp
@@ -791,6 +791,11 @@ void FreeMonProcessor::doAsyncMetricsComplete(
auto opCtxUnique = client->makeOperationContext();
FreeMonStorage::deleteState(opCtxUnique.get());
+ _state->setState(StorageStateEnum::pending);
+
+ // Clear out the in-memory state
+ _lastReadState = boost::none;
+
return;
}
@@ -945,7 +950,10 @@ void FreeMonProcessor::doNotifyOnDelete(Client* client) {
// the same and stop free monitoring. We continue collecting though.
// So we mark the internal state as disabled which stop registration and metrics send
- _state->setState(StorageStateEnum::disabled);
+ _state->setState(StorageStateEnum::pending);
+
+ // Clear out the in-memory state
+ _lastReadState = boost::none;
}
void FreeMonProcessor::doNotifyOnRollback(Client* client) {