summaryrefslogtreecommitdiff
path: root/src/mongo/db/query
diff options
context:
space:
mode:
authorMathias Stearn <mathias@10gen.com>2014-11-04 13:13:38 -0500
committerMathias Stearn <mathias@10gen.com>2014-11-04 14:13:14 -0500
commitd5f6eb21a94a39bd39c3c7a7b0ac107aca33a7e4 (patch)
treed34aff684828b74bbccd4f83aa4310f35e8391d2 /src/mongo/db/query
parentf0bafc7c171217b7541d337723c6390a793be359 (diff)
downloadmongo-d5f6eb21a94a39bd39c3c7a7b0ac107aca33a7e4.tar.gz
SERVER-15948 Fast OplogStart impl for RSs supporting arbitrary DiskLocs
Storage engines must opt-in to this functionality. For now, heap1 is the only one to do so.
Diffstat (limited to 'src/mongo/db/query')
-rw-r--r--src/mongo/db/query/new_find.cpp65
1 files changed, 44 insertions, 21 deletions
diff --git a/src/mongo/db/query/new_find.cpp b/src/mongo/db/query/new_find.cpp
index f546dc4db19..8474f796daf 100644
--- a/src/mongo/db/query/new_find.cpp
+++ b/src/mongo/db/query/new_find.cpp
@@ -50,6 +50,7 @@
#include "mongo/db/repl/repl_coordinator_global.h"
#include "mongo/db/server_options.h"
#include "mongo/db/server_parameters.h"
+#include "mongo/db/storage/oplog_hack.h"
#include "mongo/db/storage_options.h"
#include "mongo/db/catalog/collection.h"
#include "mongo/s/chunk_version.h"
@@ -105,6 +106,11 @@ namespace {
return mongoutils::str::equals(me->path().rawData(), "ts");
}
+ mongo::BSONElement extractOplogTsOptime(const mongo::MatchExpression* me) {
+ invariant(isOplogTsPred(me));
+ return static_cast<const mongo::ComparisonMatchExpression*>(me)->getData();
+ }
+
} // namespace
namespace mongo {
@@ -451,31 +457,48 @@ namespace mongo {
"$gt or $gte over the 'ts' field.");
}
- // Make an oplog start finding stage.
- WorkingSet* oplogws = new WorkingSet();
- OplogStart* stage = new OplogStart(txn, collection, tsExpr, oplogws);
-
- PlanExecutor* rawExec;
- // Takes ownership of ws and stage.
- Status execStatus = PlanExecutor::make(txn, oplogws, stage, collection,
- PlanExecutor::YIELD_AUTO, &rawExec);
- invariant(execStatus.isOK());
- scoped_ptr<PlanExecutor> exec(rawExec);
+ DiskLoc startLoc = DiskLoc().setInvalid();
- // The stage returns a DiskLoc of where to start.
- DiskLoc startLoc;
- PlanExecutor::ExecState state = exec->getNext(NULL, &startLoc);
+ // See if the RecordStore supports the oplogStartHack
+ const BSONElement tsElem = extractOplogTsOptime(tsExpr);
+ if (tsElem.type() == Timestamp) {
+ StatusWith<DiskLoc> goal = oploghack::keyForOptime(tsElem._opTime());
+ if (goal.isOK()) {
+ startLoc = collection->getRecordStore()->oplogStartHack(txn, goal.getValue());
+ }
+ }
- // This is normal. The start of the oplog is the beginning of the collection.
- if (PlanExecutor::IS_EOF == state) {
- return getExecutor(txn, collection, autoCq.release(), PlanExecutor::YIELD_AUTO,
- execOut);
+ if (startLoc.isValid()) {
+ LOG(3) << "Using direct oplog seek";
}
+ else {
+ LOG(3) << "Using OplogStart stage";
+
+ // Fallback to trying the OplogStart stage.
+ WorkingSet* oplogws = new WorkingSet();
+ OplogStart* stage = new OplogStart(txn, collection, tsExpr, oplogws);
+ PlanExecutor* rawExec;
+
+ // Takes ownership of oplogws and stage.
+ Status execStatus = PlanExecutor::make(txn, oplogws, stage, collection,
+ PlanExecutor::YIELD_AUTO, &rawExec);
+ invariant(execStatus.isOK());
+ scoped_ptr<PlanExecutor> exec(rawExec);
+
+ // The stage returns a DiskLoc of where to start.
+ PlanExecutor::ExecState state = exec->getNext(NULL, &startLoc);
+
+ // This is normal. The start of the oplog is the beginning of the collection.
+ if (PlanExecutor::IS_EOF == state) {
+ return getExecutor(txn, collection, autoCq.release(), PlanExecutor::YIELD_AUTO,
+ execOut);
+ }
- // This is not normal. An error was encountered.
- if (PlanExecutor::ADVANCED != state) {
- return Status(ErrorCodes::InternalError,
- "quick oplog start location had error...?");
+ // This is not normal. An error was encountered.
+ if (PlanExecutor::ADVANCED != state) {
+ return Status(ErrorCodes::InternalError,
+ "quick oplog start location had error...?");
+ }
}
// cout << "diskloc is " << startLoc.toString() << endl;