diff options
author | Evan Broder <evan@stripe.com> | 2016-07-05 21:02:49 -0700 |
---|---|---|
committer | David Storch <david.storch@10gen.com> | 2017-01-26 18:35:29 -0500 |
commit | 94b37aac060bcc8b10c3eb41f178d84008136f9c (patch) | |
tree | f98c9f60273345ea880af48ab14ad7ac8ef167cd /src | |
parent | 249eaae3e1b78ed5c4b6b7425145a23dc658fd75 (diff) | |
download | mongo-94b37aac060bcc8b10c3eb41f178d84008136f9c.tar.gz |
SERVER-4786 Allow specifying sample rate of slow queries
Adds a sampleRate parameter to the profile command, a value
on the interval [0, 1] which indicates which fraction of
operations should be randomly sampled for profiling and
logging. This allows users to reduce their slowms threshold
or increase their profiling level with less performance
impact on the system.
Closes #1099
Signed-off-by: David Storch <david.storch@10gen.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/bson/util/bson_extract.cpp | 12 | ||||
-rw-r--r-- | src/mongo/bson/util/bson_extract.h | 15 | ||||
-rw-r--r-- | src/mongo/db/assemble_response.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/commands/dbcommands.cpp | 13 | ||||
-rw-r--r-- | src/mongo/db/mongod_options.cpp | 11 | ||||
-rw-r--r-- | src/mongo/db/ops/write_ops_exec.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/server_options.h | 1 | ||||
-rw-r--r-- | src/mongo/shell/db.js | 38 |
8 files changed, 85 insertions, 21 deletions
diff --git a/src/mongo/bson/util/bson_extract.cpp b/src/mongo/bson/util/bson_extract.cpp index 84c9cc92de6..8a453c02472 100644 --- a/src/mongo/bson/util/bson_extract.cpp +++ b/src/mongo/bson/util/bson_extract.cpp @@ -185,6 +185,18 @@ Status bsonExtractDoubleField(const BSONObj& object, StringData fieldName, doubl return Status::OK(); } +Status bsonExtractDoubleFieldWithDefault(const BSONObj& object, + StringData fieldName, + double defaultValue, + double* out) { + Status status = bsonExtractDoubleField(object, fieldName, out); + if (status == ErrorCodes::NoSuchKey) { + *out = defaultValue; + status = Status::OK(); + } + return status; +} + Status bsonExtractIntegerFieldWithDefault(const BSONObj& object, StringData fieldName, long long defaultValue, diff --git a/src/mongo/bson/util/bson_extract.h b/src/mongo/bson/util/bson_extract.h index cedc84cd01f..a4e2b90d82d 100644 --- a/src/mongo/bson/util/bson_extract.h +++ b/src/mongo/bson/util/bson_extract.h @@ -158,6 +158,21 @@ Status bsonExtractIntegerFieldWithDefault(const BSONObj& object, long long* out); /** + * Finds a double-precision floating point element named "fieldName" in "object". + * + * If a field named "fieldName" is present, and is a double, stores the value of the field into + * "*out". If no field named fieldName is present, sets "*out" to "defaultValue". In these cases, + * returns Status::OK(). + * + * If "fieldName" is present more than once, behavior is undefined. If the found field is not a + * double, returns ErrorCodes::TypeMismatch. + */ +Status bsonExtractDoubleFieldWithDefault(const BSONObj& object, + StringData fieldName, + double defaultValue, + double* out); + +/** * Finds a std::string element named "fieldName" in "object". * * If a field named "fieldName" is present, and is a string, stores the value of the field into diff --git a/src/mongo/db/assemble_response.cpp b/src/mongo/db/assemble_response.cpp index c2da985db30..6e14055aeed 100644 --- a/src/mongo/db/assemble_response.cpp +++ b/src/mongo/db/assemble_response.cpp @@ -628,13 +628,17 @@ void assembleResponse(OperationContext* txn, .incrementGlobalLatencyStats( txn, currentOp.totalTimeMicros(), currentOp.getReadWriteType()); - if (shouldLogOpDebug || debug.executionTimeMicros > logThresholdMs * 1000LL) { + const bool shouldSample = serverGlobalParams.sampleRate == 1.0 + ? true + : c.getPrng().nextCanonicalDouble() < serverGlobalParams.sampleRate; + + if (shouldLogOpDebug || (shouldSample && debug.executionTimeMicros > logThresholdMs * 1000LL)) { Locker::LockerInfo lockerInfo; txn->lockState()->getLockerInfo(&lockerInfo); log() << debug.report(&c, currentOp, lockerInfo.stats); } - if (currentOp.shouldDBProfile()) { + if (shouldSample && currentOp.shouldDBProfile()) { // Performance profiling is on if (txn->lockState()->isReadLocked()) { LOG(1) << "note: not profiling because recursive read lock"; diff --git a/src/mongo/db/commands/dbcommands.cpp b/src/mongo/db/commands/dbcommands.cpp index 7b5714b95e0..b9e0fd07669 100644 --- a/src/mongo/db/commands/dbcommands.cpp +++ b/src/mongo/db/commands/dbcommands.cpp @@ -38,6 +38,7 @@ #include "mongo/base/status.h" #include "mongo/base/status_with.h" #include "mongo/bson/simple_bsonobj_comparator.h" +#include "mongo/bson/util/bson_extract.h" #include "mongo/bson/util/builder.h" #include "mongo/db/audit.h" #include "mongo/db/auth/action_set.h" @@ -349,7 +350,8 @@ public: const BSONObj& cmdObj) { AuthorizationSession* authzSession = AuthorizationSession::get(client); - if (cmdObj.firstElement().numberInt() == -1 && !cmdObj.hasField("slowms")) { + if (cmdObj.firstElement().numberInt() == -1 && !cmdObj.hasField("slowms") && + !cmdObj.hasField("sampleRate")) { // If you just want to get the current profiling level you can do so with just // read access to system.profile, even if you can't change the profiling level. if (authzSession->isAuthorizedForActionsOnResource( @@ -393,6 +395,7 @@ public: result.append("was", db ? db->getProfilingLevel() : serverGlobalParams.defaultProfile); result.append("slowms", serverGlobalParams.slowMS); + result.append("sampleRate", serverGlobalParams.sampleRate); if (!readOnly) { if (!db) { @@ -408,6 +411,14 @@ public: serverGlobalParams.slowMS = slow.numberInt(); } + double newSampleRate; + uassertStatusOK(bsonExtractDoubleFieldWithDefault( + cmdObj, "sampleRate"_sd, serverGlobalParams.sampleRate, &newSampleRate)); + uassert(ErrorCodes::BadValue, + "sampleRate must be between 0.0 and 1.0 inclusive", + newSampleRate >= 0.0 && newSampleRate <= 1.0); + serverGlobalParams.sampleRate = newSampleRate; + if (!status.isOK()) { errmsg = status.reason(); } diff --git a/src/mongo/db/mongod_options.cpp b/src/mongo/db/mongod_options.cpp index 921de16e082..3824340a934 100644 --- a/src/mongo/db/mongod_options.cpp +++ b/src/mongo/db/mongod_options.cpp @@ -146,6 +146,13 @@ Status addMongodOptions(moe::OptionSection* options) { "value of slow for profile and console log") .setDefault(moe::Value(100)); + general_options + .addOptionChaining("operationProfiling.slowOpSampleRate", + "slowOpSampleRate", + moe::Double, + "fraction of slow ops to include in the profile and console log") + .setDefault(moe::Value(1.0)); + general_options.addOptionChaining("profile", "profile", moe::Int, "0=off 1=slow, 2=all") .setSources(moe::SourceAllLegacy); @@ -1044,6 +1051,10 @@ Status storeMongodOptions(const moe::Environment& params) { serverGlobalParams.slowMS = params["operationProfiling.slowOpThresholdMs"].as<int>(); } + if (params.count("operationProfiling.slowOpSampleRate")) { + serverGlobalParams.sampleRate = params["operationProfiling.slowOpSampleRate"].as<double>(); + } + if (params.count("storage.syncPeriodSecs")) { storageGlobalParams.syncdelay = params["storage.syncPeriodSecs"].as<double>(); if (storageGlobalParams.syncdelay < 0 || diff --git a/src/mongo/db/ops/write_ops_exec.cpp b/src/mongo/db/ops/write_ops_exec.cpp index 347d8d1d486..c3e36232801 100644 --- a/src/mongo/db/ops/write_ops_exec.cpp +++ b/src/mongo/db/ops/write_ops_exec.cpp @@ -109,13 +109,17 @@ void finishCurOp(OperationContext* txn, CurOp* curOp) { const bool logSlow = executionTimeMicros > (serverGlobalParams.slowMS + curOp->getExpectedLatencyMs()) * 1000LL; - if (logAll || logSlow) { + const bool shouldSample = serverGlobalParams.sampleRate == 1.0 + ? true + : txn->getClient()->getPrng().nextCanonicalDouble() < serverGlobalParams.sampleRate; + + if (logAll || (shouldSample && logSlow)) { Locker::LockerInfo lockerInfo; txn->lockState()->getLockerInfo(&lockerInfo); log() << curOp->debug().report(txn->getClient(), *curOp, lockerInfo.stats); } - if (curOp->shouldDBProfile()) { + if (shouldSample && curOp->shouldDBProfile()) { profile(txn, CurOp::get(txn)->getNetworkOp()); } } catch (const DBException& ex) { diff --git a/src/mongo/db/server_options.h b/src/mongo/db/server_options.h index 13dc2705150..95bd873740c 100644 --- a/src/mongo/db/server_options.h +++ b/src/mongo/db/server_options.h @@ -65,6 +65,7 @@ struct ServerGlobalParams { int defaultProfile = 0; // --profile int slowMS = 100; // --time in ms that is "slow" + double sampleRate = 1.0; // --samplerate rate at which to sample slow queries int defaultLocalThresholdMillis = 15; // --localThreshold in ms to consider a node local bool moveParanoia = false; // for move chunk paranoia diff --git a/src/mongo/shell/db.js b/src/mongo/shell/db.js index be8c7321bf2..583ef4d33e9 100644 --- a/src/mongo/shell/db.js +++ b/src/mongo/shell/db.js @@ -509,7 +509,7 @@ var DB; "\tdb.runCommand(cmdObj) run a database command. if cmdObj is a string, turns it into { cmdObj : 1 }"); print("\tdb.serverStatus()"); print("\tdb.setLogLevel(level,<component>)"); - print("\tdb.setProfilingLevel(level,<slowms>) 0=off 1=slow 2=all"); + print("\tdb.setProfilingLevel(level,slowms) 0=off 1=slow 2=all"); print( "\tdb.setWriteConcern( <write concern doc> ) - sets the write concern for writes to the db"); print( @@ -546,22 +546,25 @@ var DB; }; /** - * <p> Set profiling level for your db. Profiling gathers stats on query performance. </p> + * Configures settings for capturing operations inside the system.profile collection and in the + * slow query log. * - * <p>Default is off, and resets to off on a database restart -- so if you want it on, - * turn it on periodically. </p> + * The 'level' can be 0, 1, or 2: + * - 0 means that profiling is off and nothing will be written to system.profile. + * - 1 means that profiling is on for operations slower than the currently configured 'slowms' + * threshold (more on 'slowms' below). + * - 2 means that profiling is on for all operations, regardless of whether or not they are + * slower than 'slowms'. * - * <p>Levels :</p> - * <ul> - * <li>0=off</li> - * <li>1=log very slow operations; optional argument slowms specifies slowness threshold</li> - * <li>2=log all</li> - * @param {String} level Desired level of profiling - * @param {String} slowms For slow logging, query duration that counts as slow (default 100ms) - * @return SOMETHING_FIXME or null on error + * The 'options' parameter, if a number, is interpreted as the 'slowms' value to send to the + * server. 'slowms' determines the threshold, in milliseconds, above which slow operations get + * profiled at profiling level 1 or logged at logLevel 0. + * + * If 'options' is not a number, it is expected to be an object containing additional parameters + * to get passed to the server. For example, db.setProfilingLevel(2, {foo: "bar"}) will issue + * the command {profile: 2, foo: "bar"} to the server. */ - DB.prototype.setProfilingLevel = function(level, slowms) { - + DB.prototype.setProfilingLevel = function(level, options) { if (level < 0 || level > 2) { var errorText = "input level " + level + " is out of range [0..2]"; var errorObject = new Error(errorText); @@ -570,8 +573,11 @@ var DB; } var cmd = {profile: level}; - if (isNumber(slowms)) - cmd["slowms"] = slowms; + if (isNumber(options)) { + cmd.slowms = options; + } else { + cmd = Object.extend(cmd, options); + } return assert.commandWorked(this._dbCommand(cmd)); }; |