// @file curop.h
/*
* Copyright (C) 2010 10gen Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the GNU Affero General Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#pragma once
#include "mongo/base/disallow_copying.h"
#include "mongo/db/operation_context.h"
#include "mongo/db/server_options.h"
#include "mongo/platform/atomic_word.h"
#include "mongo/util/concurrency/spin_lock.h"
#include "mongo/util/progress_meter.h"
#include "mongo/util/thread_safe_string.h"
#include "mongo/util/time_support.h"
#include "mongo/util/net/message.h"
namespace mongo {
class Client;
class Command;
class CurOp;
class OperationContext;
struct PlanSummaryStats;
/**
* stores a copy of a bson obj in a fixed size buffer
* if its too big for the buffer, says "too big"
* useful for keeping a copy around indefinitely without wasting a lot of space or doing malloc
*/
class CachedBSONObjBase {
public:
static BSONObj _tooBig; // { $msg : "query not recording (too large)" }
};
template
class CachedBSONObj : public CachedBSONObjBase {
public:
enum { TOO_BIG_SENTINEL = 1 };
CachedBSONObj() {
reset();
}
void reset(int sz = 0) {
_lock.lock();
_reset(sz);
_lock.unlock();
}
void set(const BSONObj& o) {
scoped_spinlock lk(_lock);
size_t sz = o.objsize();
if (sz > sizeof(_buf)) {
_reset(TOO_BIG_SENTINEL);
} else {
memcpy(_buf, o.objdata(), sz);
}
}
int size() const {
return ConstDataView(_buf).read>();
}
bool have() const {
return size() > 0;
}
bool tooBig() const {
return size() == TOO_BIG_SENTINEL;
}
BSONObj get() const {
scoped_spinlock lk(_lock);
return _get();
}
void append(BSONObjBuilder& b, StringData name) const {
scoped_spinlock lk(_lock);
BSONObj temp = _get();
b.append(name, temp);
}
private:
/** you have to be locked when you call this */
BSONObj _get() const {
int sz = size();
if (sz == 0)
return BSONObj();
if (sz == TOO_BIG_SENTINEL)
return _tooBig;
return BSONObj(_buf).copy();
}
/** you have to be locked when you call this */
void _reset(int sz) {
DataView(_buf).write>(sz);
}
mutable SpinLock _lock;
char _buf[BUFFER_SIZE];
};
/* lifespan is different than CurOp because of recursives with DBDirectClient */
class OpDebug {
public:
OpDebug() = default;
std::string report(const CurOp& curop, const SingleThreadedLockStats& lockStats) const;
/**
* Appends information about the current operation to "builder"
*
* @param curop reference to the CurOp that owns this OpDebug
* @param lockStats lockStats object containing locking information about the operation
*/
void append(const CurOp& curop,
const SingleThreadedLockStats& lockStats,
BSONObjBuilder& builder) const;
/**
* Copies relevant plan summary metrics to this OpDebug instance.
*/
void setPlanSummaryMetrics(const PlanSummaryStats& planSummaryStats);
// -------------------
// basic options
// _networkOp represents the network-level op code: OP_QUERY, OP_GET_MORE, OP_COMMAND, etc.
NetworkOp networkOp{opInvalid}; // only set this through setNetworkOp_inlock() to keep synced
// _logicalOp is the logical operation type, ie 'dbQuery' regardless of whether this is an
// OP_QUERY find, a find command using OP_QUERY, or a find command using OP_COMMAND.
// Similarly, the return value will be dbGetMore for both OP_GET_MORE and getMore command.
LogicalOp logicalOp{LogicalOp::opInvalid}; // only set this through setNetworkOp_inlock()
bool iscommand{false};
BSONObj query{};
BSONObj updateobj{};
// detailed options
long long cursorid{-1};
long long ntoreturn{-1};
long long ntoskip{-1};
bool exhaust{false};
// debugging/profile info
long long keysExamined{-1};
long long docsExamined{-1};
// indicates short circuited code path on an update to make the update faster
bool idhack{false};
bool hasSortStage{false}; // true if the query plan involves an in-memory sort
// True if the plan came from the multi-planner (not from the plan cache and not a query with a
// single solution).
bool fromMultiPlanner{false};
// True if a replan was triggered during the execution of this operation.
bool replanned{false};
long long nMatched{-1}; // number of records that match the query
long long nModified{-1}; // number of records written (no no-ops)
long long nmoved{-1}; // updates resulted in a move (moves are expensive)
long long ninserted{-1};
long long ndeleted{-1};
bool fastmod{false};
bool fastmodinsert{false}; // upsert of an $operation. builds a default object
bool upsert{false}; // true if the update actually did an insert
bool cursorExhausted{
false}; // true if the cursor has been closed at end a find/getMore operation
int keyUpdates{-1};
long long writeConflicts{0};
// New Query Framework debugging/profiling info
// TODO: should this really be an opaque BSONObj? Not sure.
CachedBSONObj<4096> execStats;
// error handling
ExceptionInfo exceptionInfo;
// response info
int executionTime{0};
long long nreturned{-1};
int responseLength{-1};
private:
/**
* Returns true if this OpDebug instance was generated by a find command. Returns false for
* OP_QUERY find and all other operations.
*/
bool isFindCommand() const;
/**
* Returns true if this OpDebug instance was generated by a find command. Returns false for
* OP_GET_MORE and all other operations.
*/
bool isGetMoreCommand() const;
};
/**
* Container for data used to report information about an OperationContext.
*
* Every OperationContext in a server with CurOp support has a stack of CurOp
* objects. The entry at the top of the stack is used to record timing and
* resource statistics for the executing operation or suboperation.
*
* All of the accessor methods on CurOp may be called by the thread executing
* the associated OperationContext at any time, or by other threads that have
* locked the context's owning Client object.
*
* The mutator methods on CurOp whose names end _inlock may only be called by the thread
* executing the associated OperationContext and Client, and only when that thread has also
* locked the Client object. All other mutators may only be called by the thread executing
* CurOp, but do not require holding the Client lock. The exception to this is the kill()
* method, which is self-synchronizing.
*
* The OpDebug member of a CurOp, accessed via the debug() accessor should *only* be accessed
* from the thread executing an operation, and as a result its fields may be accessed without
* any synchronization.
*/
class CurOp {
MONGO_DISALLOW_COPYING(CurOp);
public:
static CurOp* get(const OperationContext* opCtx);
static CurOp* get(const OperationContext& opCtx);
/**
* Constructs a nested CurOp at the top of the given "opCtx"'s CurOp stack.
*/
explicit CurOp(OperationContext* opCtx);
~CurOp();
bool haveQuery() const {
return _query.have();
}
BSONObj query() const {
return _query.get();
}
void appendQuery(BSONObjBuilder& b, StringData name) const {
_query.append(b, name);
}
void enter_inlock(const char* ns, int dbProfileLevel);
/**
* Sets the type of the current network operation.
*/
void setNetworkOp_inlock(NetworkOp op) {
_networkOp = op;
_debug.networkOp = op;
}
/**
* Sets the type of the current logical operation.
*/
void setLogicalOp_inlock(LogicalOp op) {
_logicalOp = op;
_debug.logicalOp = op;
}
/**
* Marks the current operation as being a command.
*/
void markCommand_inlock() {
_isCommand = true;
}
/**
* Returns a structure containing data used for profiling, accessed only by a thread
* currently executing the operation context associated with this CurOp.
*/
OpDebug& debug() {
return _debug;
}
/**
* Gets the name of the namespace on which the current operation operates.
*/
std::string getNS() const {
return _ns;
}
bool shouldDBProfile(int ms) const {
if (_dbprofile <= 0)
return false;
return _dbprofile >= 2 || ms >= serverGlobalParams.slowMS;
}
/**
* Raises the profiling level for this operation to "dbProfileLevel" if it was previously
* less than "dbProfileLevel".
*
* This belongs on OpDebug, and so does not have the _inlock suffix.
*/
void raiseDbProfileLevel(int dbProfileLevel);
/**
* Gets the network operation type. No lock is required if called by the thread executing
* the operation, but the lock must be held if called from another thread.
*/
NetworkOp getNetworkOp() const {
return _networkOp;
}
/**
* Gets the logical operation type. No lock is required if called by the thread executing
* the operation, but the lock must be held if called from another thread.
*/
LogicalOp getLogicalOp() const {
return _logicalOp;
}
/**
* Returns true if the current operation is known to be a command.
*/
bool isCommand() const {
return _isCommand;
}
//
// Methods for controlling CurOp "max time".
//
/**
* Sets the amount of time operation this should be allowed to run, units of microseconds.
* The special value 0 is "allow to run indefinitely".
*/
void setMaxTimeMicros(uint64_t maxTimeMicros);
/**
* Returns true if a time limit has been set on this operation, and false otherwise.
*/
bool isMaxTimeSet() const;
/**
* Checks whether this operation has been running longer than its time limit. Returns
* false if not, or if the operation has no time limit.
*/
bool maxTimeHasExpired();
/**
* Returns the number of microseconds remaining for this operation's time limit, or the
* special value 0 if the operation has no time limit.
*
* Calling this method is more expensive than calling its sibling "maxTimeHasExpired()",
* since an accurate measure of remaining time needs to be calculated.
*/
uint64_t getRemainingMaxTimeMicros() const;
//
// Methods for getting/setting elapsed time. Note that the observed elapsed time may be
// negative, if the system time has been reset during the course of this operation.
//
void ensureStarted();
bool isStarted() const {
return _start > 0;
}
long long startTime() { // micros
ensureStarted();
return _start;
}
void done() {
_end = curTimeMicros64();
}
long long totalTimeMicros() {
massert(12601, "CurOp not marked done yet", _end);
return _end - startTime();
}
int totalTimeMillis() {
return (int)(totalTimeMicros() / 1000);
}
long long elapsedMicros() {
return curTimeMicros64() - startTime();
}
int elapsedMillis() {
return (int)(elapsedMicros() / 1000);
}
int elapsedSeconds() {
return elapsedMillis() / 1000;
}
void setQuery_inlock(const BSONObj& query) {
_query.set(query);
}
Command* getCommand() const {
return _command;
}
void setCommand_inlock(Command* command) {
_command = command;
}
/**
* Appends information about this CurOp to "builder".
*
* If called from a thread other than the one executing the operation associated with this
* CurOp, it is necessary to lock the associated Client object before executing this method.
*/
void reportState(BSONObjBuilder* builder);
/**
* Sets the message and the progress meter for this CurOp.
*
* While it is necessary to hold the lock while this method executes, the
* "hit" and "finished" methods of ProgressMeter may be called safely from
* the thread executing the operation without locking the Client.
*/
ProgressMeter& setMessage_inlock(const char* msg,
std::string name = "Progress",
unsigned long long progressMeterTotal = 0,
int secondsBetween = 3);
/**
* Gets the message for this CurOp.
*/
const std::string& getMessage() const {
return _message;
}
const ProgressMeter& getProgressMeter() {
return _progressMeter;
}
CurOp* parent() const {
return _parent;
}
void yielded() {
_numYields++;
} // Should be _inlock()?
/**
* Returns the number of times yielded() was called. Callers on threads other
* than the one executing the operation must lock the client.
*/
int numYields() const {
return _numYields;
}
/**
* Access to _expectedLatencyMs is not synchronized, so it is illegal for threads other than the
* one executing the operation to call getExpectedLatencyMs() and setExpectedLatencyMs().
*/
long long getExpectedLatencyMs() const {
return _expectedLatencyMs;
}
void setExpectedLatencyMs(long long latency) {
_expectedLatencyMs = latency;
}
/**
* this should be used very sparingly
* generally the Context should set this up
* but sometimes you want to do it ahead of time
*/
void setNS_inlock(StringData ns);
StringData getPlanSummary() const {
return _planSummary;
}
void setPlanSummary_inlock(StringData summary) {
_planSummary = summary.toString();
}
void setPlanSummary_inlock(std::string summary) {
_planSummary = std::move(summary);
}
private:
class CurOpStack;
static const OperationContext::Decoration _curopStack;
CurOp(OperationContext*, CurOpStack*);
CurOpStack* _stack;
CurOp* _parent{nullptr};
Command* _command{nullptr};
long long _start{0};
long long _end{0};
// _networkOp represents the network-level op code: OP_QUERY, OP_GET_MORE, OP_COMMAND, etc.
NetworkOp _networkOp{opInvalid}; // only set this through setNetworkOp_inlock() to keep synced
// _logicalOp is the logical operation type, ie 'dbQuery' regardless of whether this is an
// OP_QUERY find, a find command using OP_QUERY, or a find command using OP_COMMAND.
// Similarly, the return value will be dbGetMore for both OP_GET_MORE and getMore command.
LogicalOp _logicalOp{LogicalOp::opInvalid}; // only set this through setNetworkOp_inlock()
bool _isCommand{false};
int _dbprofile{0}; // 0=off, 1=slow, 2=all
std::string _ns;
CachedBSONObj<512> _query; // CachedBSONObj is thread safe
OpDebug _debug;
std::string _message;
ProgressMeter _progressMeter;
int _numYields{0};
// this is how much "extra" time a query might take
// a writebacklisten for example will block for 30s
// so this should be 30000 in that case
long long _expectedLatencyMs{0};
// Time limit for this operation. 0 if the operation has no time limit.
uint64_t _maxTimeMicros{0u};
std::string _planSummary;
/** Nested class that implements tracking of a time limit for a CurOp object. */
class MaxTimeTracker {
MONGO_DISALLOW_COPYING(MaxTimeTracker);
public:
/** Newly-constructed MaxTimeTracker objects have the time limit disabled. */
MaxTimeTracker() = default;
/** Returns whether or not time tracking is enabled. */
bool isEnabled() const {
return _enabled;
}
/**
* Enables time tracking. The time limit is set to be "durationMicros" microseconds
* from "startEpochMicros" (units of microseconds since the epoch).
*
* "durationMicros" must be nonzero.
*/
void setTimeLimit(uint64_t startEpochMicros, uint64_t durationMicros);
/**
* Checks whether the time limit has been hit. Returns false if not, or if time
* tracking is disabled.
*/
bool checkTimeLimit();
/**
* Returns the number of microseconds remaining for the time limit, or the special
* value 0 if time tracking is disabled.
*
* Calling this method is more expensive than calling its sibling "checkInterval()",
* since an accurate measure of remaining time needs to be calculated.
*/
uint64_t getRemainingMicros() const;
private:
// Whether or not time tracking is enabled for this operation.
bool _enabled{false};
// Point in time at which the time limit is hit. Units of microseconds since the
// epoch.
uint64_t _targetEpochMicros{0};
// Approximate point in time at which the time limit is hit. Units of milliseconds
// since the server process was started.
int64_t _approxTargetServerMillis{0};
} _maxTimeTracker;
};
}