summaryrefslogtreecommitdiff
path: root/src/mongo
diff options
context:
space:
mode:
authorHari Khalsa <hkhalsa@10gen.com>2013-10-05 13:04:39 -0400
committerHari Khalsa <hkhalsa@10gen.com>2013-10-07 15:07:16 -0400
commita5c4104bb28f2870602a8c7a5e4a4b506dbc99a1 (patch)
tree3617caf0663936d4b109f3ff6ba590f307078f41 /src/mongo
parentb7660950c4888052f2c68a02f6667c114a72dfbd (diff)
downloadmongo-a5c4104bb28f2870602a8c7a5e4a4b506dbc99a1.tar.gz
SERVER-10471 merge index bounds, enumerate based on indices, bug fixes
Diffstat (limited to 'src/mongo')
-rw-r--r--src/mongo/db/exec/collection_scan.cpp3
-rw-r--r--src/mongo/db/exec/collection_scan.h1
-rw-r--r--src/mongo/db/exec/index_scan.cpp38
-rw-r--r--src/mongo/db/exec/plan_stats.h7
-rw-r--r--src/mongo/db/query/canonical_query.cpp21
-rw-r--r--src/mongo/db/query/explain_plan.cpp21
-rw-r--r--src/mongo/db/query/index_bounds.cpp9
-rw-r--r--src/mongo/db/query/index_bounds.h4
-rw-r--r--src/mongo/db/query/index_bounds_builder.cpp425
-rw-r--r--src/mongo/db/query/index_bounds_builder.h34
-rw-r--r--src/mongo/db/query/index_entry.h18
-rw-r--r--src/mongo/db/query/index_tag.h7
-rw-r--r--src/mongo/db/query/interval.cpp75
-rw-r--r--src/mongo/db/query/interval.h32
-rw-r--r--src/mongo/db/query/interval_test.cpp10
-rw-r--r--src/mongo/db/query/plan_enumerator.cpp405
-rw-r--r--src/mongo/db/query/plan_enumerator.h85
-rw-r--r--src/mongo/db/query/query_planner.cpp228
-rw-r--r--src/mongo/db/query/query_planner.h9
-rw-r--r--src/mongo/db/query/query_planner_test.cpp1
-rw-r--r--src/mongo/db/query/query_solution.cpp7
-rw-r--r--src/mongo/db/query/query_solution.h11
22 files changed, 967 insertions, 484 deletions
diff --git a/src/mongo/db/exec/collection_scan.cpp b/src/mongo/db/exec/collection_scan.cpp
index f381582c271..904187c6c9a 100644
--- a/src/mongo/db/exec/collection_scan.cpp
+++ b/src/mongo/db/exec/collection_scan.cpp
@@ -90,6 +90,8 @@ namespace mongo {
member->obj = member->loc.obj();
member->state = WorkingSetMember::LOC_AND_UNOWNED_OBJ;
+ ++_specificStats.docsTested;
+
if (Filter::passes(member, _filter)) {
*out = id;
++_commonStats.advanced;
@@ -134,6 +136,7 @@ namespace mongo {
PlanStageStats* CollectionScan::getStats() {
_commonStats.isEOF = isEOF();
auto_ptr<PlanStageStats> ret(new PlanStageStats(_commonStats, STAGE_COLLSCAN));
+ ret->specific.reset(new CollectionScanStats(_specificStats));
return ret.release();
}
diff --git a/src/mongo/db/exec/collection_scan.h b/src/mongo/db/exec/collection_scan.h
index 004b73bd309..b9075a3ad97 100644
--- a/src/mongo/db/exec/collection_scan.h
+++ b/src/mongo/db/exec/collection_scan.h
@@ -74,6 +74,7 @@ namespace mongo {
// Stats
CommonStats _commonStats;
+ CollectionScanStats _specificStats;
};
} // namespace mongo
diff --git a/src/mongo/db/exec/index_scan.cpp b/src/mongo/db/exec/index_scan.cpp
index e9b2e2eafbd..d3ee4586069 100644
--- a/src/mongo/db/exec/index_scan.cpp
+++ b/src/mongo/db/exec/index_scan.cpp
@@ -109,9 +109,11 @@ namespace mongo {
_hitEnd = true;
return PlanStage::FAILURE;
}
+ if (!isEOF()) {
+ _specificStats.keysExamined = 1;
+ }
}
else {
- // XXX: must be actually a Btree
// "Fast" Btree-specific navigation.
_btreeCursor = static_cast<BtreeIndexCursor*>(_indexCursor.get());
_checker.reset(new IndexBoundsChecker(&_params.bounds,
@@ -123,11 +125,14 @@ namespace mongo {
vector<bool> inc;
key.resize(nFields);
inc.resize(nFields);
- _checker->getStartKey(&key, &inc);
- _btreeCursor->seek(key, inc);
-
- _keyElts.resize(nFields);
- _keyEltsInc.resize(nFields);
+ if (_checker->getStartKey(&key, &inc)) {
+ _btreeCursor->seek(key, inc);
+ _keyElts.resize(nFields);
+ _keyEltsInc.resize(nFields);
+ }
+ else {
+ _hitEnd = true;
+ }
}
checkEnd();
@@ -141,7 +146,6 @@ namespace mongo {
// _indexCursor->next() if we're EOF.
if (!isEOF()) {
_indexCursor->next();
- ++_specificStats.keysExamined;
checkEnd();
}
}
@@ -189,7 +193,7 @@ namespace mongo {
return false;
}
- return _indexCursor->isEOF() || _hitEnd;
+ return _hitEnd || _indexCursor->isEOF();
}
void IndexScan::prepareToYield() {
@@ -261,6 +265,10 @@ namespace mongo {
_hitEnd = true;
_commonStats.isEOF = true;
}
+
+ if (!isEOF() && _params.bounds.isSimpleRange) {
+ ++_specificStats.keysExamined;
+ }
}
else {
verify(NULL != _btreeCursor);
@@ -268,6 +276,8 @@ namespace mongo {
// Use _checker to see how things are.
for (;;) {
+ //cout << "current index key is " << _indexCursor->getKey().toString() << endl;
+ //cout << "keysExamined is " << _specificStats.keysExamined << endl;
IndexBoundsChecker::KeyState keyState;
keyState = _checker->checkKey(_indexCursor->getKey(),
&_keyEltsToUse,
@@ -275,13 +285,19 @@ namespace mongo {
&_keyElts,
&_keyEltsInc);
- if (IndexBoundsChecker::VALID == keyState) {
- break;
- }
if (IndexBoundsChecker::DONE == keyState) {
_hitEnd = true;
break;
}
+
+ // This seems weird but it's the old definition of nscanned.
+ ++_specificStats.keysExamined;
+
+ if (IndexBoundsChecker::VALID == keyState) {
+ break;
+ }
+
+ // cout << "skipping...\n";
verify(IndexBoundsChecker::MUST_ADVANCE == keyState);
_btreeCursor->skip(_indexCursor->getKey(), _keyEltsToUse, _movePastKeyElts,
_keyElts, _keyEltsInc);
diff --git a/src/mongo/db/exec/plan_stats.h b/src/mongo/db/exec/plan_stats.h
index ebccd9125be..37f8c2d572e 100644
--- a/src/mongo/db/exec/plan_stats.h
+++ b/src/mongo/db/exec/plan_stats.h
@@ -108,6 +108,13 @@ namespace mongo {
virtual ~SpecificStats() { }
};
+ struct CollectionScanStats : public SpecificStats {
+ CollectionScanStats() : docsTested(0) { }
+
+ // How many documents did we check against our filter?
+ uint64_t docsTested;
+ };
+
struct AndHashStats : public SpecificStats {
AndHashStats() : flaggedButPassed(0),
flaggedInProgress(0) { }
diff --git a/src/mongo/db/query/canonical_query.cpp b/src/mongo/db/query/canonical_query.cpp
index 623e37de4b6..06095070ac5 100644
--- a/src/mongo/db/query/canonical_query.cpp
+++ b/src/mongo/db/query/canonical_query.cpp
@@ -83,14 +83,17 @@ namespace mongo {
return Status::OK();
}
- void normalizeTree(MatchExpression* root) {
+ /**
+ * Returns the normalized version of the subtree rooted at 'root'.
+ */
+ MatchExpression* normalizeTree(MatchExpression* root) {
// root->isLogical() is true now. We care about AND and OR. Negations currently scare us.
if (MatchExpression::AND == root->matchType() || MatchExpression::OR == root->matchType()) {
// We could have AND of AND of AND. Make sure we clean up our children before merging
// them.
// UNITTEST 11738048
- for (size_t i = 0; i < root->numChildren(); ++i) {
- normalizeTree(root->getChild(i));
+ for (size_t i = 0; i < root->getChildVector()->size(); ++i) {
+ (*root->getChildVector())[i] = normalizeTree(root->getChild(i));
}
// If any of our children are of the same logical operator that we are, we remove the
@@ -119,7 +122,17 @@ namespace mongo {
root->getChildVector()->insert(root->getChildVector()->end(),
absorbedChildren.begin(),
absorbedChildren.end());
+
+ // AND of 1 thing is the thing, OR of 1 thing is the thing.
+ if (1 == root->numChildren()) {
+ MatchExpression* ret = root->getChild(0);
+ root->getChildVector()->clear();
+ delete root;
+ return ret;
+ }
}
+
+ return root;
}
// TODO: Should this really live in the parsing? Or elsewhere?
@@ -202,7 +215,7 @@ namespace mongo {
if (!swme.isOK()) { return swme.getStatus(); }
MatchExpression* root = swme.getValue();
- normalizeTree(root);
+ root = normalizeTree(root);
Status validStatus = isValid(root);
if (!validStatus.isOK()) {
return validStatus;
diff --git a/src/mongo/db/query/explain_plan.cpp b/src/mongo/db/query/explain_plan.cpp
index 42cc3dcecd6..087abb93586 100644
--- a/src/mongo/db/query/explain_plan.cpp
+++ b/src/mongo/db/query/explain_plan.cpp
@@ -81,12 +81,23 @@ namespace mongo {
// number of keys that cursor retrieved, and into the stage's stats 'advanced' for
// nscannedObjects', which would be the number of keys that survived the IXSCAN
// filter. Those keys would have been FETCH-ed, if a fetch is present.
- //
- // TODO: This is kind of a lie for STAGE_GEO_NEAR_2DSPHERE.
- if (leaf->stageType == STAGE_COLLSCAN || leaf->stageType == STAGE_GEO_NEAR_2DSPHERE) {
+ if (leaf->stageType == STAGE_COLLSCAN) {
+ CollectionScanStats* csStats = static_cast<CollectionScanStats*>(leaf->specific.get());
res->setCursor("BasicCursor");
- res->setNScanned(leaf->common.advanced);
- res->setNScannedObjects(leaf->common.advanced);
+ res->setNScanned(csStats->docsTested);
+ res->setNScannedObjects(csStats->docsTested);
+
+ if (fullDetails) {
+ res->setIsMultiKey(false);
+ res->setIndexOnly(false);
+ }
+ }
+ else if (leaf->stageType == STAGE_GEO_NEAR_2DSPHERE) {
+ // TODO: This is kind of a lie for STAGE_GEO_NEAR_2DSPHERE.
+ res->setCursor("S2NearCursor");
+ // The first work() is an init. Every subsequent work examines a document.
+ res->setNScanned(leaf->common.works);
+ res->setNScannedObjects(leaf->common.works);
if (fullDetails) {
res->setIsMultiKey(false);
diff --git a/src/mongo/db/query/index_bounds.cpp b/src/mongo/db/query/index_bounds.cpp
index 6177e7d5b61..b1f1d8eb911 100644
--- a/src/mongo/db/query/index_bounds.cpp
+++ b/src/mongo/db/query/index_bounds.cpp
@@ -201,15 +201,20 @@ namespace mongo {
}
}
- void IndexBoundsChecker::getStartKey(vector<const BSONElement*>* valueOut,
- vector<bool>* inclusiveOut) {
+ bool IndexBoundsChecker::getStartKey(vector<const BSONElement*>* valueOut,
+ vector<bool>* inclusiveOut) {
verify(valueOut->size() == _bounds->fields.size());
verify(inclusiveOut->size() == _bounds->fields.size());
for (size_t i = 0; i < _bounds->fields.size(); ++i) {
+ if (0 == _bounds->fields[i].intervals.size()) {
+ return false;
+ }
(*valueOut)[i] = &_bounds->fields[i].intervals[0].start;
(*inclusiveOut)[i] = _bounds->fields[i].intervals[0].startInclusive;
}
+
+ return true;
}
bool IndexBoundsChecker::findLeftmostProblem(const vector<BSONElement>& keyValues,
diff --git a/src/mongo/db/query/index_bounds.h b/src/mongo/db/query/index_bounds.h
index 81aa7b008d2..2b0ba90cee6 100644
--- a/src/mongo/db/query/index_bounds.h
+++ b/src/mongo/db/query/index_bounds.h
@@ -107,8 +107,10 @@ namespace mongo {
/**
* Get the key that we should with.
+ *
+ * Returns true if there is a valid start key. Returns false otherwise.
*/
- void getStartKey(vector<const BSONElement*>* valueOut, vector<bool>* inclusiveOut);
+ bool getStartKey(vector<const BSONElement*>* valueOut, vector<bool>* inclusiveOut);
/**
* The states of a key from an index scan. See checkKey below.
diff --git a/src/mongo/db/query/index_bounds_builder.cpp b/src/mongo/db/query/index_bounds_builder.cpp
index 2e0ac5cd6eb..c7a041cfa10 100644
--- a/src/mongo/db/query/index_bounds_builder.cpp
+++ b/src/mongo/db/query/index_bounds_builder.cpp
@@ -40,6 +40,109 @@
namespace mongo {
+ string IndexBoundsBuilder::simpleRegex(const char* regex, const char* flags, bool* exact) {
+ string r = "";
+ *exact = false;
+
+ bool multilineOK;
+ if ( regex[0] == '\\' && regex[1] == 'A') {
+ multilineOK = true;
+ regex += 2;
+ }
+ else if (regex[0] == '^') {
+ multilineOK = false;
+ regex += 1;
+ }
+ else {
+ return r;
+ }
+
+ bool extended = false;
+ while (*flags) {
+ switch (*(flags++)) {
+ case 'm': // multiline
+ if (multilineOK)
+ continue;
+ else
+ return r;
+ case 'x': // extended
+ extended = true;
+ break;
+ default:
+ return r; // cant use index
+ }
+ }
+
+ stringstream ss;
+
+ while(*regex) {
+ char c = *(regex++);
+ if ( c == '*' || c == '?' ) {
+ // These are the only two symbols that make the last char optional
+ r = ss.str();
+ r = r.substr( 0 , r.size() - 1 );
+ return r; //breaking here fails with /^a?/
+ }
+ else if (c == '|') {
+ // whole match so far is optional. Nothing we can do here.
+ return string();
+ }
+ else if (c == '\\') {
+ c = *(regex++);
+ if (c == 'Q'){
+ // \Q...\E quotes everything inside
+ while (*regex) {
+ c = (*regex++);
+ if (c == '\\' && (*regex == 'E')){
+ regex++; //skip the 'E'
+ break; // go back to start of outer loop
+ }
+ else {
+ ss << c; // character should match itself
+ }
+ }
+ }
+ else if ((c >= 'A' && c <= 'Z') ||
+ (c >= 'a' && c <= 'z') ||
+ (c >= '0' && c <= '0') ||
+ (c == '\0')) {
+ // don't know what to do with these
+ r = ss.str();
+ break;
+ }
+ else {
+ // slash followed by non-alphanumeric represents the following char
+ ss << c;
+ }
+ }
+ else if (strchr("^$.[()+{", c)) {
+ // list of "metacharacters" from man pcrepattern
+ r = ss.str();
+ break;
+ }
+ else if (extended && c == '#') {
+ // comment
+ r = ss.str();
+ break;
+ }
+ else if (extended && isspace(c)) {
+ continue;
+ }
+ else {
+ // self-matching char
+ ss << c;
+ }
+ }
+
+ if ( r.empty() && *regex == 0 ) {
+ r = ss.str();
+ *exact = !r.empty();
+ }
+
+ return r;
+ }
+
+
// static
void IndexBoundsBuilder::allValuesForField(const BSONElement& elt, OrderedIntervalList* out) {
// ARGH, BSONValue would make this shorter.
@@ -66,13 +169,42 @@ namespace mongo {
return makeRangeInterval(bob.obj(), true, true);
}
+ bool IntervalComparison(const Interval& lhs, const Interval& rhs) {
+ int wo = lhs.start.woCompare(rhs.start, false);
+ if (0 != wo) {
+ return wo < 0;
+ }
+ // The start and end are equal. Put the bound that's inclusive to the left.
+ if (lhs.startInclusive) {
+ return true;
+ }
+ if (rhs.startInclusive) {
+ return false;
+ }
+ // If neither start nor end is inclusive but they begin at the same point who cares which is
+ // first.
+ return true;
+ }
+
+ // static
+ void IndexBoundsBuilder::translateAndIntersect(const MatchExpression* expr, const BSONElement& elt,
+ OrderedIntervalList* oilOut, bool* exactOut) {
+ OrderedIntervalList arg;
+ translate(expr, elt, &arg, exactOut);
+ // translate outputs arg in sorted order. intersectize assumes that its arguments are sorted.
+ intersectize(arg, oilOut);
+ }
+
+ // static
+ void IndexBoundsBuilder::translateAndUnion(const MatchExpression* expr, const BSONElement& elt,
+ OrderedIntervalList* oilOut, bool* exactOut) {
+ translate(expr, elt, oilOut, exactOut);
+ unionize(oilOut);
+ }
+
// static
void IndexBoundsBuilder::translate(const MatchExpression* expr, const BSONElement& elt,
OrderedIntervalList* oilOut, bool* exactOut) {
- int direction = (elt.numberInt() >= 0) ? 1 : -1;
-
- Interval interval;
- bool exact = false;
oilOut->name = elt.fieldName();
bool isHashed = false;
@@ -86,41 +218,8 @@ namespace mongo {
}
if (MatchExpression::EQ == expr->matchType()) {
- const EqualityMatchExpression* node =
- static_cast<const EqualityMatchExpression*>(expr);
-
- // We have to copy the data out of the parse tree and stuff it into the index
- // bounds. BSONValue will be useful here.
- BSONObj dataObj;
-
- if (isHashed) {
- dataObj = ExpressionMapping::hash(node->getData());
- }
- else {
- dataObj = objFromElement(node->getData());
- }
-
- // UNITTEST 11738048
- if (Array == dataObj.firstElement().type()) {
- // XXX: build better bounds
- warning() << "building lazy bounds for " << expr->toString() << endl;
- interval = allValues();
- exact = false;
- }
- else {
- verify(dataObj.isOwned());
- interval = makePointInterval(dataObj);
- // XXX: it's exact if the index isn't sparse
- if (dataObj.firstElement().isNull()) {
- exact = false;
- }
- else if (isHashed) {
- exact = false;
- }
- else {
- exact = true;
- }
- }
+ const EqualityMatchExpression* node = static_cast<const EqualityMatchExpression*>(expr);
+ translateEquality(node->getData(), isHashed, oilOut, exactOut);
}
else if (MatchExpression::LTE == expr->matchType()) {
const LTEMatchExpression* node = static_cast<const LTEMatchExpression*>(expr);
@@ -130,9 +229,9 @@ namespace mongo {
bob.append(dataElt);
BSONObj dataObj = bob.obj();
verify(dataObj.isOwned());
- interval = makeRangeInterval(dataObj, true, true);
+ oilOut->intervals.push_back(makeRangeInterval(dataObj, true, true));
// XXX: only exact if not (null or array)
- exact = true;
+ *exactOut = true;
}
else if (MatchExpression::LT == expr->matchType()) {
const LTMatchExpression* node = static_cast<const LTMatchExpression*>(expr);
@@ -142,9 +241,9 @@ namespace mongo {
bob.append(dataElt);
BSONObj dataObj = bob.obj();
verify(dataObj.isOwned());
- interval = makeRangeInterval(dataObj, true, false);
+ oilOut->intervals.push_back(makeRangeInterval(dataObj, true, false));
// XXX: only exact if not (null or array)
- exact = true;
+ *exactOut = true;
}
else if (MatchExpression::GT == expr->matchType()) {
const GTMatchExpression* node = static_cast<const GTMatchExpression*>(expr);
@@ -154,9 +253,9 @@ namespace mongo {
bob.appendMaxForType("", dataElt.type());
BSONObj dataObj = bob.obj();
verify(dataObj.isOwned());
- interval = makeRangeInterval(dataObj, false, true);
+ oilOut->intervals.push_back(makeRangeInterval(dataObj, false, true));
// XXX: only exact if not (null or array)
- exact = true;
+ *exactOut = true;
}
else if (MatchExpression::GTE == expr->matchType()) {
const GTEMatchExpression* node = static_cast<const GTEMatchExpression*>(expr);
@@ -167,14 +266,14 @@ namespace mongo {
bob.appendMaxForType("", dataElt.type());
BSONObj dataObj = bob.obj();
verify(dataObj.isOwned());
- interval = makeRangeInterval(dataObj, true, true);
+
+ oilOut->intervals.push_back(makeRangeInterval(dataObj, true, true));
// XXX: only exact if not (null or array)
- exact = true;
+ *exactOut = true;
}
else if (MatchExpression::REGEX == expr->matchType()) {
- warning() << "building lazy bounds for " << expr->toString() << endl;
- interval = allValues();
- exact = false;
+ const RegexMatchExpression* rme = static_cast<const RegexMatchExpression*>(expr);
+ translateRegex(rme, oilOut, exactOut);
}
else if (MatchExpression::MOD == expr->matchType()) {
BSONObjBuilder bob;
@@ -182,13 +281,8 @@ namespace mongo {
bob.appendMaxForType("", NumberDouble);
BSONObj dataObj = bob.obj();
verify(dataObj.isOwned());
- interval = makeRangeInterval(dataObj, true, true);
- exact = false;
- }
- else if (MatchExpression::MATCH_IN == expr->matchType()) {
- warning() << "building lazy bounds for " << expr->toString() << endl;
- interval = allValues();
- exact = false;
+ oilOut->intervals.push_back(makeRangeInterval(dataObj, true, true));
+ *exactOut = false;
}
else if (MatchExpression::TYPE_OPERATOR == expr->matchType()) {
const TypeMatchExpression* tme = static_cast<const TypeMatchExpression*>(expr);
@@ -197,13 +291,40 @@ namespace mongo {
bob.appendMaxForType("", tme->getData());
BSONObj dataObj = bob.obj();
verify(dataObj.isOwned());
- interval = makeRangeInterval(dataObj, true, true);
- exact = false;
+ oilOut->intervals.push_back(makeRangeInterval(dataObj, true, true));
+ *exactOut = false;
}
else if (MatchExpression::MATCH_IN == expr->matchType()) {
- warning() << "building lazy bounds for " << expr->toString() << endl;
- interval = allValues();
- exact = false;
+ const InMatchExpression* ime = static_cast<const InMatchExpression*>(expr);
+ const ArrayFilterEntries& afr = ime->getData();
+
+ *exactOut = true;
+
+ // Create our various intervals.
+
+ bool thisBoundExact = false;
+ for (BSONElementSet::iterator it = afr.equalities().begin();
+ it != afr.equalities().end(); ++it) {
+
+ translateEquality(*it, isHashed, oilOut, &thisBoundExact);
+ if (!thisBoundExact) {
+ *exactOut = false;
+ }
+ }
+
+ for (size_t i = 0; i < afr.numRegexes(); ++i) {
+ translateRegex(afr.regex(i), oilOut, &thisBoundExact);
+ if (!thisBoundExact) {
+ *exactOut = false;
+ }
+ }
+
+ // XXX: what happens here?
+ if (afr.hasNull()) { }
+ // XXX: what happens here as well?
+ if (afr.hasEmptyArray()) { }
+
+ unionize(oilOut);
}
else if (MatchExpression::GEO == expr->matchType()) {
const GeoMatchExpression* gme = static_cast<const GeoMatchExpression*>(expr);
@@ -217,21 +338,12 @@ namespace mongo {
const S2Region& region = gme->getGeoQuery().getRegion();
ExpressionMapping::cover2dsphere(region, oilOut);
*exactOut = false;
- // XXX: restructure this method
- return;
}
else {
warning() << "Planner error, trying to build bounds for expr "
<< expr->toString() << endl;
verify(0);
}
-
- if (-1 == direction) {
- reverseInterval(&interval);
- }
-
- oilOut->intervals.push_back(interval);
- *exactOut = exact;
}
// static
@@ -250,6 +362,116 @@ namespace mongo {
}
// static
+ void IndexBoundsBuilder::intersectize(const OrderedIntervalList& arg,
+ OrderedIntervalList* oilOut) {
+ verify(arg.name == oilOut->name);
+
+ size_t argidx = 0;
+ const vector<Interval>& argiv = arg.intervals;
+
+ size_t ividx = 0;
+ vector<Interval>& iv = oilOut->intervals;
+
+ vector<Interval> result;
+
+ while (argidx < argiv.size() && ividx < iv.size()) {
+ Interval::IntervalComparison cmp = argiv[argidx].compare(iv[ividx]);
+ cout << "comparing " << argiv[argidx].toString()
+ << " with " << iv[ividx].toString()
+ << " cmp is " << Interval::cmpstr(cmp) << endl;
+
+ verify(Interval::INTERVAL_UNKNOWN != cmp);
+
+ if (cmp == Interval::INTERVAL_PRECEDES
+ || cmp == Interval::INTERVAL_PRECEDES_COULD_UNION) {
+ // argiv is before iv. move argiv forward.
+ ++argidx;
+ }
+ else if (cmp == Interval::INTERVAL_SUCCEEDS) {
+ // iv is before argiv. move iv forward.
+ ++ividx;
+ }
+ else {
+ // argiv[argidx] (cmpresults) iv[ividx]
+ Interval newInt = argiv[argidx];
+ newInt.intersect(iv[ividx], cmp);
+ result.push_back(newInt);
+
+ if (Interval::INTERVAL_EQUALS == cmp) {
+ ++argidx;
+ ++ividx;
+ }
+ else if (Interval::INTERVAL_WITHIN == cmp) {
+ ++argidx;
+ }
+ else if (Interval::INTERVAL_CONTAINS == cmp) {
+ ++ividx;
+ }
+ else if (Interval::INTERVAL_OVERLAPS_BEFORE == cmp) {
+ ++argidx;
+ }
+ else if (Interval::INTERVAL_OVERLAPS_AFTER == cmp) {
+ ++ividx;
+ }
+ else {
+ verify(0);
+ }
+ }
+ }
+ // XXX swap
+ oilOut->intervals = result;
+ }
+
+ // static
+ void IndexBoundsBuilder::unionize(OrderedIntervalList* oilOut) {
+ // Step 1: sort.
+ std::sort(oilOut->intervals.begin(), oilOut->intervals.end(), IntervalComparison);
+
+ // Step 2: Walk through and merge.
+ vector<Interval>& iv = oilOut->intervals;
+ size_t i = 0;
+ while (i < iv.size() - 1) {
+ // Compare i with i + 1.
+ Interval::IntervalComparison cmp = iv[i].compare(iv[i + 1]);
+ // cout << "comparing " << iv[i].toString() << " with " << iv[i+1].toString()
+ // << " cmp is " << Interval::cmpstr(cmp) << endl;
+
+ // This means our sort didn't work.
+ verify(Interval::INTERVAL_SUCCEEDS != cmp);
+
+ // Intervals are correctly ordered.
+ if (Interval::INTERVAL_PRECEDES == cmp) {
+ // We can move to the next pair.
+ ++i;
+ }
+ else if (Interval::INTERVAL_EQUALS == cmp || Interval::INTERVAL_WITHIN == cmp) {
+ // Interval 'i' is equal to i+1, or is contained within i+1.
+ // Remove interval i and don't move to the next value of 'i'.
+ iv.erase(iv.begin() + i);
+ }
+ else if (Interval::INTERVAL_CONTAINS == cmp) {
+ // Interval 'i' contains i+1, remove i+1 and don't move to the next value of 'i'.
+ iv.erase(iv.begin() + i + 1);
+ }
+ else if (Interval::INTERVAL_OVERLAPS_BEFORE == cmp
+ || Interval::INTERVAL_PRECEDES_COULD_UNION == cmp) {
+ // We want to merge intervals i and i+1.
+ // Interval 'i' starts before interval 'i+1'.
+ BSONObjBuilder bob;
+ bob.append(iv[i].start);
+ bob.append(iv[i + 1].end);
+ BSONObj data = bob.obj();
+ bool startInclusive = iv[i].startInclusive;
+ bool endInclusive = iv[i + i].endInclusive;
+ iv.erase(iv.begin() + i);
+ // iv[i] is now the former iv[i + 1]
+ iv[i] = makeRangeInterval(data, startInclusive, endInclusive);
+ // Don't increment 'i'.
+ }
+ }
+ }
+
+ // static
Interval IndexBoundsBuilder::makeRangeInterval(const string& start, const string& end,
bool startInclusive, bool endInclusive) {
BSONObjBuilder bob;
@@ -292,4 +514,65 @@ namespace mongo {
ival->endInclusive = tmpInc;
}
+ // static
+ void IndexBoundsBuilder::translateRegex(const RegexMatchExpression* rme,
+ OrderedIntervalList* oilOut, bool* exact) {
+
+ const string start = simpleRegex(rme->getString().c_str(), rme->getFlags().c_str(), exact);
+
+ cout << "regex bounds start is " << start << endl;
+ // Note that 'exact' is set by simpleRegex above.
+ if (!start.empty()) {
+ string end = start;
+ end[end.size() - 1]++;
+ oilOut->intervals.push_back(makeRangeInterval(start, end, true, false));
+ }
+ else {
+ BSONObjBuilder bob;
+ bob.appendMinForType("", String);
+ bob.appendMaxForType("", String);
+ BSONObj dataObj = bob.obj();
+ verify(dataObj.isOwned());
+ oilOut->intervals.push_back(makeRangeInterval(dataObj, true, false));
+ }
+
+ // Regexes are after strings.
+ BSONObjBuilder bob;
+ bob.appendRegex("", rme->getString(), rme->getFlags());
+ oilOut->intervals.push_back(makePointInterval(bob.obj()));
+ }
+
+ // static
+ void IndexBoundsBuilder::translateEquality(const BSONElement& data, bool isHashed,
+ OrderedIntervalList* oil, bool* exact) {
+ // We have to copy the data out of the parse tree and stuff it into the index
+ // bounds. BSONValue will be useful here.
+ BSONObj dataObj;
+
+ if (isHashed) {
+ dataObj = ExpressionMapping::hash(data);
+ }
+ else {
+ dataObj = objFromElement(data);
+ }
+
+ // UNITTEST 11738048
+ if (Array == dataObj.firstElement().type()) {
+ // XXX: bad
+ oil->intervals.push_back(allValues());
+ *exact = false;
+ }
+ else {
+ verify(dataObj.isOwned());
+ oil->intervals.push_back(makePointInterval(dataObj));
+ // XXX: it's exact if the index isn't sparse?
+ if (dataObj.firstElement().isNull() || isHashed) {
+ *exact = false;
+ }
+ else {
+ *exact = true;
+ }
+ }
+ }
+
} // namespace mongo
diff --git a/src/mongo/db/query/index_bounds_builder.h b/src/mongo/db/query/index_bounds_builder.h
index 7c96cab676a..5a0921a213f 100644
--- a/src/mongo/db/query/index_bounds_builder.h
+++ b/src/mongo/db/query/index_bounds_builder.h
@@ -58,6 +58,20 @@ namespace mongo {
static void translate(const MatchExpression* expr, const BSONElement& elt,
OrderedIntervalList* oilOut, bool* exactOut);
+ /**
+ * Creates bounds for 'expr' (indexed according to 'elt'). Intersects those bounds
+ * with the bounds in oilOut, which is an in/out parameter.
+ */
+ static void translateAndIntersect(const MatchExpression* expr, const BSONElement& elt,
+ OrderedIntervalList* oilOut, bool* exactOut);
+
+ /**
+ * Creates bounds for 'expr' (indexed according to 'elt'). Unions those bounds
+ * with the bounds in oilOut, which is an in/out parameter.
+ */
+ static void translateAndUnion(const MatchExpression* expr, const BSONElement& elt,
+ OrderedIntervalList* oilOut, bool* exactOut);
+
private:
friend class ExpressionMapping;
@@ -91,7 +105,27 @@ namespace mongo {
*/
static void reverseInterval(Interval* ival);
+ /**
+ * Copied almost verbatim from db/queryutil.cpp.
+ *
+ * returns a string that when used as a matcher, would match a super set of regex()
+ *
+ * returns "" for complex regular expressions
+ *
+ * used to optimize queries in some simple regex cases that start with '^'
+ */
+ static string simpleRegex(const char* regex, const char* flags, bool* exact);
+
static Interval allValues();
+
+ static void translateRegex(const RegexMatchExpression* rme, OrderedIntervalList* oil,
+ bool* exact);
+
+ static void translateEquality(const BSONElement& data, bool isHashed,
+ OrderedIntervalList* oil, bool* exact);
+
+ static void unionize(OrderedIntervalList* oilOut);
+ static void intersectize(const OrderedIntervalList& arg, OrderedIntervalList* oilOut);
};
} // namespace mongo
diff --git a/src/mongo/db/query/index_entry.h b/src/mongo/db/query/index_entry.h
index 59c3aa166e3..bdfdd1c7fd9 100644
--- a/src/mongo/db/query/index_entry.h
+++ b/src/mongo/db/query/index_entry.h
@@ -28,7 +28,8 @@
#pragma once
-#include <vector>
+#include <sstream>
+#include <string>
#include "mongo/db/jsobj.h"
@@ -38,7 +39,8 @@ namespace mongo {
* This name sucks, but every name involving 'index' is used somewhere.
*/
struct IndexEntry {
- IndexEntry(BSONObj kp, bool mk, bool sp) : keyPattern(kp), multikey(mk), sparse(sp) { }
+ IndexEntry(const BSONObj& kp, bool mk, bool sp)
+ : keyPattern(kp), multikey(mk), sparse(sp) { }
IndexEntry(const IndexEntry& other) {
keyPattern = other.keyPattern;
@@ -51,6 +53,18 @@ namespace mongo {
bool multikey;
bool sparse;
+
+ std::string toString() const {
+ stringstream ss;
+ ss << keyPattern.toString();
+ if (multikey) {
+ ss << " multikey";
+ }
+ if (sparse) {
+ ss << " sparse";
+ }
+ return ss.str();
+ }
};
} // namespace mongo
diff --git a/src/mongo/db/query/index_tag.h b/src/mongo/db/query/index_tag.h
index 2173adce9ff..afc4c4a11e5 100644
--- a/src/mongo/db/query/index_tag.h
+++ b/src/mongo/db/query/index_tag.h
@@ -57,6 +57,12 @@ namespace mongo {
std::vector<size_t> first;
std::vector<size_t> notFirst;
+ // We don't know the full path from a node unless we keep notes as we traverse from the
+ // root. We do this once and store it.
+ // TODO: Do a FieldRef / StringData pass.
+ // TODO: We might want this inside of the MatchExpression.
+ string path;
+
virtual void debugString(StringBuilder* builder) const {
*builder << "First: ";
for (size_t i = 0; i < first.size(); ++i) {
@@ -66,6 +72,7 @@ namespace mongo {
for (size_t i = 0; i < notFirst.size(); ++i) {
*builder << notFirst[i] << " ";
}
+ *builder << "full path: " << path;
}
virtual MatchExpression::TagData* clone() const {
diff --git a/src/mongo/db/query/interval.cpp b/src/mongo/db/query/interval.cpp
index 9bf0cc51a8f..6c80632c4a5 100644
--- a/src/mongo/db/query/interval.cpp
+++ b/src/mongo/db/query/interval.cpp
@@ -99,13 +99,9 @@ namespace mongo {
} // unnamed namespace
- Interval:: Interval()
- : _intervalData(BSONObj())
- , start(BSONElement())
- , startInclusive(false)
- , end(BSONElement())
- , endInclusive(false) {
- }
+ Interval::Interval()
+ : _intervalData(BSONObj()), start(BSONElement()), startInclusive(false), end(BSONElement()),
+ endInclusive(false) { }
Interval::Interval(BSONObj base, bool si, bool ei) {
init(base, si, ei);
@@ -128,11 +124,11 @@ namespace mongo {
// TODO: shortcut number of comparisons
Interval::IntervalComparison Interval::compare(const Interval& other) const {
-
//
// Intersect cases
//
+ // TODO: rewrite this to be member functions so semantics are clearer.
if (intersects(*this, other)) {
if (exact(*this, other)) {
return INTERVAL_EQUALS;
@@ -144,7 +140,7 @@ namespace mongo {
return INTERVAL_CONTAINS;
}
if (precedes(*this, other)) {
- return INTERVAL_OVERLAPS_BEFORE;
+ return INTERVAL_OVERLAPS_BEFORE;
}
return INTERVAL_OVERLAPS_AFTER;
}
@@ -154,9 +150,13 @@ namespace mongo {
//
if (precedes(*this, other)) {
+ if (0 == end.woCompare(other.start, false)) {
+ return INTERVAL_PRECEDES_COULD_UNION;
+ }
return INTERVAL_PRECEDES;
}
- return INTERVAL_SUCCEDS;
+
+ return INTERVAL_SUCCEEDS;
}
void Interval::intersect(const Interval& other, IntervalComparison cmp) {
@@ -190,7 +190,7 @@ namespace mongo {
break;
case INTERVAL_PRECEDES:
- case INTERVAL_SUCCEDS:
+ case INTERVAL_SUCCEEDS:
*this = Interval();
break;
@@ -218,7 +218,7 @@ namespace mongo {
break;
case INTERVAL_OVERLAPS_AFTER:
- case INTERVAL_SUCCEDS:
+ case INTERVAL_SUCCEEDS:
builder.append(other.start);
builder.append(end);
init(builder.obj(), other.startInclusive, endInclusive);
@@ -236,4 +236,55 @@ namespace mongo {
}
}
+ // static
+ string Interval::cmpstr(IntervalComparison c) {
+ if (c == INTERVAL_EQUALS) {
+ return "INTERVAL_EQUALS";
+ }
+
+ // 'this' contains the other interval.
+ if (c == INTERVAL_CONTAINS) {
+ return "INTERVAL_CONTAINS";
+ }
+
+ // 'this' is contained by the other interval.
+ if (c == INTERVAL_WITHIN) {
+ return "INTERVAL_WITHIN";
+ }
+
+ // The two intervals intersect and 'this' is before the other interval.
+ if (c == INTERVAL_OVERLAPS_BEFORE) {
+ return "INTERVAL_OVERLAPS_BEFORE";
+ }
+
+ // The two intervals intersect and 'this is after the other interval.
+ if (c == INTERVAL_OVERLAPS_AFTER) {
+ return "INTERVAL_OVERLAPS_AFTER";
+ }
+
+ // There is no intersection.
+ if (c == INTERVAL_PRECEDES) {
+ return "INTERVAL_PRECEDES";
+ }
+
+ if (c == INTERVAL_PRECEDES_COULD_UNION) {
+ return "INTERVAL_PRECEDES_COULD_UNION";
+ }
+
+ if (c == INTERVAL_SUCCEEDS) {
+ return "INTERVAL_SUCCEEDS";
+ }
+
+ if (c == INTERVAL_UNKNOWN) {
+ return "INTERVAL_UNKNOWN";
+ }
+
+ return "NO IDEA DUDE";
+ }
+
+ void Interval::reverse() {
+ std::swap(start, end);
+ std::swap(startInclusive, endInclusive);
+ }
+
} // namespace mongo
diff --git a/src/mongo/db/query/interval.h b/src/mongo/db/query/interval.h
index 26dff5b2bcd..b5a2e33ed7e 100644
--- a/src/mongo/db/query/interval.h
+++ b/src/mongo/db/query/interval.h
@@ -76,24 +76,54 @@ namespace mongo {
/** Returns true if an empty-constructed interval hasn't been init()-ialized yet */
bool isEmpty() const;
+ /**
+ * Swap start and end points of interval.
+ */
+ void reverse();
+
/** Returns how 'this' compares to 'other' */
enum IntervalComparison {
+ //
// There is some intersection.
+ //
+
+ // The two intervals are *exactly* equal.
INTERVAL_EQUALS,
+
+ // 'this' contains the other interval.
INTERVAL_CONTAINS,
+
+ // 'this' is contained by the other interval.
INTERVAL_WITHIN,
+
+ // The two intervals intersect and 'this' is before the other interval.
INTERVAL_OVERLAPS_BEFORE,
+
+ // The two intervals intersect and 'this is after the other interval.
INTERVAL_OVERLAPS_AFTER,
+ //
// There is no intersection.
+ //
+
INTERVAL_PRECEDES,
- INTERVAL_SUCCEDS,
+
+ // This happens if we have [a,b) [b,c]
+ INTERVAL_PRECEDES_COULD_UNION,
+
+ INTERVAL_SUCCEEDS,
INTERVAL_UNKNOWN
};
+
IntervalComparison compare(const Interval& other) const;
/**
+ * toString for IntervalComparison
+ */
+ static string cmpstr(IntervalComparison c);
+
+ /**
* Updates 'this' with the intersection of 'this' and 'other'. If 'this' and 'other'
* have been compare()d before, that result can be optionally passed in 'cmp'
*/
diff --git a/src/mongo/db/query/interval_test.cpp b/src/mongo/db/query/interval_test.cpp
index 9118b327d0f..55bb164d87e 100644
--- a/src/mongo/db/query/interval_test.cpp
+++ b/src/mongo/db/query/interval_test.cpp
@@ -136,19 +136,19 @@ namespace {
TEST(Comparison, Succeds) {
Interval a(BSON("" << 10 << "" << 20), true, true);
- ASSERT_NOT_EQUALS(a.compare(a), Interval::INTERVAL_SUCCEDS);
+ ASSERT_NOT_EQUALS(a.compare(a), Interval::INTERVAL_SUCCEEDS);
Interval b(BSON("" << 20 << "" << 30), true, true);
- ASSERT_NOT_EQUALS(b.compare(a), Interval::INTERVAL_SUCCEDS);
+ ASSERT_NOT_EQUALS(b.compare(a), Interval::INTERVAL_SUCCEEDS);
Interval c(BSON("" << 20 << "" << 30), false, true);
- ASSERT_EQUALS(c.compare(a), Interval::INTERVAL_SUCCEDS);
+ ASSERT_EQUALS(c.compare(a), Interval::INTERVAL_SUCCEEDS);
Interval d(BSON("" << 21 << "" << 30), true, true);
- ASSERT_EQUALS(d.compare(a), Interval::INTERVAL_SUCCEDS);
+ ASSERT_EQUALS(d.compare(a), Interval::INTERVAL_SUCCEEDS);
Interval e(BSON("" << 15 << "" << 30), true, true);
- ASSERT_NOT_EQUALS(e.compare(a), Interval::INTERVAL_SUCCEDS);
+ ASSERT_NOT_EQUALS(e.compare(a), Interval::INTERVAL_SUCCEEDS);
}
//
diff --git a/src/mongo/db/query/plan_enumerator.cpp b/src/mongo/db/query/plan_enumerator.cpp
index 62626eec11b..3165c2f31db 100644
--- a/src/mongo/db/query/plan_enumerator.cpp
+++ b/src/mongo/db/query/plan_enumerator.cpp
@@ -27,7 +27,7 @@ namespace mongo {
: _root(root), _indices(indices) { }
PlanEnumerator::~PlanEnumerator() {
- for (map<size_t, NodeSolution*>::iterator it = _memo.begin(); it != _memo.end(); ++it) {
+ for (unordered_map<NodeID, NodeAssignment*>::iterator it = _memo.begin(); it != _memo.end(); ++it) {
delete it->second;
}
}
@@ -47,6 +47,7 @@ namespace mongo {
// cout << "root post-memo: " << _root->toString() << endl;
cout << "memo dump:\n";
+ verify(_inOrderCount == _memo.size());
for (size_t i = 0; i < _inOrderCount; ++i) {
cout << "Node #" << i << ": " << _memo[i]->toString() << endl;
}
@@ -54,17 +55,16 @@ namespace mongo {
if (!_done) {
// Tag with our first solution.
tagMemo(_nodeToId[_root]);
- checkCompound("", _root);
}
return Status::OK();
}
- bool PlanEnumerator::isCompound(size_t idx) {
+ bool PlanEnumerator::isCompound(IndexID idx) {
return (*_indices)[idx].keyPattern.nFields() > 1;
}
- string PlanEnumerator::NodeSolution::toString() const {
+ string PlanEnumerator::NodeAssignment::toString() const {
if (NULL != pred) {
stringstream ss;
ss << "predicate, first indices: [";
@@ -82,149 +82,36 @@ namespace mongo {
ss << "], pred: " << pred->expr->toString();
return ss.str();
}
- else if (NULL != andSolution) {
+ else if (NULL != newAnd) {
stringstream ss;
ss << "ONE OF: [";
- for (size_t i = 0; i < andSolution->subnodes.size(); ++i) {
- const vector<size_t>& sn = andSolution->subnodes[i];
- ss << "[";
- for (size_t j = 0; j < sn.size(); ++j) {
- ss << sn[j];
- if (j < sn.size() - 1)
- ss << ", ";
- }
- ss << "]";
- if (i < andSolution->subnodes.size() - 1)
+ for (size_t i = 0; i < newAnd->subnodes.size(); ++i) {
+ ss << "[" << newAnd->subnodes[i] << "]";
+ if (i < newAnd->subnodes.size() - 1) {
ss << ", ";
+ }
+ }
+ for (size_t i = 0; i < newAnd->predChoices.size(); ++i) {
+ const OneIndexAssignment& oie = newAnd->predChoices[i];
+ ss << "IDX#" << oie.index << " for preds: ";
+ for (size_t j = 0; j < oie.preds.size(); ++j) {
+ ss << oie.preds[j]->toString() << ", ";
+ }
}
- ss << "]";
return ss.str();
}
else {
- verify(NULL != orSolution);
+ verify(NULL != orAssignment);
stringstream ss;
ss << "ALL OF: [";
- for (size_t i = 0; i < orSolution->subnodes.size(); ++i) {
- ss << " " << orSolution->subnodes[i];
+ for (size_t i = 0; i < orAssignment->subnodes.size(); ++i) {
+ ss << " " << orAssignment->subnodes[i];
}
ss << "]";
return ss.str();
}
}
- /**
- * This is very expensive if the involved indices/predicates are numerous but
- * I suspect that's rare. TODO: revisit on perf pass.
- *
- * TODO: We can save time by not bothering to do this if the index is multiKey.
- * TODO: Perhaps this should be done by the planner??
- */
- void PlanEnumerator::checkCompound(string prefix, MatchExpression* node) {
- if (MatchExpression::AND == node->matchType()) {
- // Step 1: Find all compound indices.
- vector<MatchExpression*> assignedCompound;
- vector<MatchExpression*> unassigned;
-
- for (size_t i = 0; i < node->numChildren(); ++i) {
- MatchExpression* child = node->getChild(i);
- if (Indexability::nodeCanUseIndexOnOwnField(child)) {
- verify(NULL != _memo[_nodeToId[child]]);
- verify(NULL != _memo[_nodeToId[child]]->pred);
- if (NULL == child->getTag()) {
- // Not assigned an index.
- unassigned.push_back(child);
- }
- else {
- IndexTag* childTag = static_cast<IndexTag*>(child->getTag());
- if (isCompound(childTag->index)) {
- assignedCompound.push_back(child);
- }
- }
- }
- }
-
- for (size_t i = 0; i < assignedCompound.size(); ++i) {
- cout << "assigned compound: " << assignedCompound[i]->toString();
- }
- for (size_t i = 0; i < unassigned.size(); ++i) {
- cout << "unassigned : " << unassigned[i]->toString();
- }
-
- // Step 2: Iterate over the other fields of the compound indices
- // TODO: This could be optimized a lot.
- for (size_t i = 0; i < assignedCompound.size(); ++i) {
- IndexTag* childTag = static_cast<IndexTag*>(assignedCompound[i]->getTag());
-
- // XXX: If we assign a compound index and it's on a multikey index, the planner may
- // not be able to use the multikey for it, and then it may create a new index scan,
- // and that new scan will be bogus. For now don't assign, should fix planner to be
- // more resilient. once we figure out fully in what cases array operators can use
- // compound indices, change this to deal with that.
- //
- // That is, we can only safely assign compound indices if the planner uses them as
- // compound indices whenever we assign them.
- if ((*_indices)[i].multikey) { continue; }
-
- const BSONObj& kp = (*_indices)[childTag->index].keyPattern;
- BSONObjIterator it(kp);
- it.next();
-
- size_t posInIdx = 0;
- // we know isCompound is true so this should be true.
- verify(it.more());
- while (it.more()) {
- BSONElement kpElt = it.next();
- ++posInIdx;
- bool assignedField = false;
- // Trying to pick an unassigned M.E.
- for (size_t j = 0; j < unassigned.size(); ++j) {
- // TODO: is this really required? This seems really like it's just a
- // reiteration of the tagging process.
- if (prefix + unassigned[j]->path().toString() != kpElt.fieldName()) {
- // We need to find a predicate over kpElt.fieldName().
- continue;
- }
- // Another compound index was assigned.
- if (NULL != unassigned[j]->getTag()) {
- continue;
- }
- // Index no. childTag->index, the compound index, must be
- // a member of the notFirst
- NodeSolution* soln = _memo[_nodeToId[unassigned[j]]];
- verify(NULL != soln);
- verify(NULL != soln->pred);
- verify(unassigned[j] == soln->pred->expr);
- if (std::find(soln->pred->notFirst.begin(), soln->pred->notFirst.end(), childTag->index) != soln->pred->notFirst.end()) {
- cout << "compound-ing " << kp.toString() << " with node " << unassigned[j]->toString() << endl;
- assignedField = true;
- cout << "setting pos to " << posInIdx << endl;
- unassigned[j]->setTag(new IndexTag(childTag->index, posInIdx));
- // We've picked something for this (index, field) tuple. Don't pick anything else.
- break;
- }
- }
-
- // We must assign fields in compound indices contiguously.
- if (!assignedField) {
- cout << "Failed to assign to compound field " << kpElt.toString() << endl;
- break;
- }
- }
- }
- }
-
- if (Indexability::arrayUsesIndexOnChildren(node)) {
- if (!node->path().empty()) {
- prefix += node->path().toString() + ".";
- }
- }
-
- // Don't think the traversal order here matters.
- for (size_t i = 0; i < node->numChildren(); ++i) {
- checkCompound(prefix, node->getChild(i));
- }
- }
-
bool PlanEnumerator::getNext(MatchExpression** tree) {
if (_done) { return false; }
*tree = _root->shallowClone();
@@ -236,44 +123,23 @@ namespace mongo {
sortUsingTags(*tree);
_root->resetTag();
+ // TODO: enumerate >1 plan
_done = true;
return true;
}
bool PlanEnumerator::prepMemo(MatchExpression* node) {
- if (Indexability::arrayUsesIndexOnChildren(node)) {
- // TODO: Fold into logical->AND branch below?
- AndSolution* andSolution = new AndSolution();
- for (size_t i = 0; i < node->numChildren(); ++i) {
- if (prepMemo(node->getChild(i))) {
- vector<size_t> option;
- option.push_back(_nodeToId[node->getChild(i)]);
- andSolution->subnodes.push_back(option);
- }
- }
-
+ if (Indexability::nodeCanUseIndexOnOwnField(node)) {
+ // TODO: This is done for everything, maybe have NodeAssignment* newMemo(node)?
size_t myID = _inOrderCount++;
_nodeToId[node] = myID;
- NodeSolution* soln = new NodeSolution();
+ NodeAssignment* soln = new NodeAssignment();
_memo[_nodeToId[node]] = soln;
_curEnum[myID] = 0;
- // Takes ownership.
- soln->andSolution.reset(andSolution);
- return andSolution->subnodes.size() > 0;
- }
- else if (Indexability::nodeCanUseIndexOnOwnField(node)) {
- // TODO: This is done for everything, maybe have NodeSolution* newMemo(node)?
- size_t myID = _inOrderCount++;
- _nodeToId[node] = myID;
- NodeSolution* soln = new NodeSolution();
- _memo[_nodeToId[node]] = soln;
-
- _curEnum[myID] = 0;
-
- // Fill out the NodeSolution.
- soln->pred.reset(new PredicateSolution());
+ // Fill out the NodeAssignment.
+ soln->pred.reset(new PredicateAssignment());
if (NULL != node->getTag()) {
RelevantTag* rt = static_cast<RelevantTag*>(node->getTag());
soln->pred->first.swap(rt->first);
@@ -284,89 +150,150 @@ namespace mongo {
// be indexed when there are 'first' indices.
return soln->pred->first.size() > 0;
}
- else if (node->isLogical()) {
- if (MatchExpression::OR == node->matchType()) {
- // For an OR to be indexed all its children must be indexed.
- bool indexed = true;
- for (size_t i = 0; i < node->numChildren(); ++i) {
- if (!prepMemo(node->getChild(i))) {
- indexed = false;
- }
+ else if (MatchExpression::OR == node->matchType()) {
+ // For an OR to be indexed all its children must be indexed.
+ bool indexed = true;
+ for (size_t i = 0; i < node->numChildren(); ++i) {
+ if (!prepMemo(node->getChild(i))) {
+ indexed = false;
}
+ }
- size_t myID = _inOrderCount++;
- _nodeToId[node] = myID;
- NodeSolution* soln = new NodeSolution();
- _memo[_nodeToId[node]] = soln;
+ size_t myID = _inOrderCount++;
+ _nodeToId[node] = myID;
+ NodeAssignment* soln = new NodeAssignment();
+ _memo[_nodeToId[node]] = soln;
+
+ OrAssignment* orAssignment = new OrAssignment();
+ for (size_t i = 0; i < node->numChildren(); ++i) {
+ orAssignment->subnodes.push_back(_nodeToId[node->getChild(i)]);
+ }
+ soln->orAssignment.reset(orAssignment);
+ return indexed;
+ }
+ else if (MatchExpression::AND == node->matchType() || Indexability::arrayUsesIndexOnChildren(node)) {
+ // map from idx id to children that have a pred over it.
+ unordered_map<IndexID, vector<MatchExpression*> > idxToFirst;
+ unordered_map<IndexID, vector<MatchExpression*> > idxToNotFirst;
+
+ NewAndAssignment* newAndAssignment = new NewAndAssignment();
+ for (size_t i = 0; i < node->numChildren(); ++i) {
+ MatchExpression* child = node->getChild(i);
- OrSolution* orSolution = new OrSolution();
- for (size_t i = 0; i < node->numChildren(); ++i) {
- orSolution->subnodes.push_back(_nodeToId[node->getChild(i)]);
+ if (Indexability::nodeCanUseIndexOnOwnField(child)) {
+ RelevantTag* rt = static_cast<RelevantTag*>(child->getTag());
+ for (size_t j = 0; j < rt->first.size(); ++j) {
+ idxToFirst[rt->first[j]].push_back(child);
+ }
+ for (size_t j = 0 ; j< rt->notFirst.size(); ++j) {
+ idxToNotFirst[rt->notFirst[j]].push_back(child);
+ }
+ }
+ else {
+ if (prepMemo(child)) {
+ size_t childID = _nodeToId[child];
+ newAndAssignment->subnodes.push_back(childID);
+ }
}
- soln->orSolution.reset(orSolution);
- return indexed;
}
- else {
- // To be exhaustive, we would compute all solutions of size 1, 2, ...,
- // node->numChildren(). Each of these solutions would get a place in the
- // memo structure.
- // If there is a geoNear, we put it at the start of our options to ensure that, even
- // if we enumerate one plan, we will index it.
+ // At this point we know how many indices the AND's predicate children are over.
+ newAndAssignment->predChoices.resize(idxToFirst.size());
+
+ // This iterates through the predChoices.
+ size_t predChoicesIdx = 0;
+
+ // For each FIRST, we assign nodes to it.
+ for (unordered_map<IndexID, vector<MatchExpression*> >::iterator it = idxToFirst.begin(); it != idxToFirst.end(); ++it) {
+ OneIndexAssignment* assign = &newAndAssignment->predChoices[predChoicesIdx];
+ ++predChoicesIdx;
+
+ // Fill out the OneIndexAssignment with the preds that are over the first field.
+ assign->index = it->first;
+ // We can swap because we're never touching idxToFirst again after this loop over it.
+ assign->preds.swap(it->second);
+ assign->positions.resize(assign->preds.size(), 0);
+
+ //
+ // Compound analysis here and below.
//
- // XXX: This is a crappy substitute for something smarter, namely always including
- // geoNear in every possible selection inside of an AND. Revisit when we enum
- // more than one plan.
- size_t geoNearChild = IndexTag::kNoIndex;
-
- // For efficiency concerns, we don't explore any more than the size-1 members
- // of the power set. That is, we will only use one index at a time.
- AndSolution* andSolution = new AndSolution();
- for (size_t i = 0; i < node->numChildren(); ++i) {
- // If AND requires an index it can only piggyback on the children that have
- // indices.
- if (prepMemo(node->getChild(i))) {
- vector<size_t> option;
- size_t childID = _nodeToId[node->getChild(i)];
- option.push_back(childID);
- andSolution->subnodes.push_back(option);
-
- // Fill out geoNearChild, possibly.
- // TODO: Actually rank enumeration aside from "always pick GEO_NEAR".
- if (NULL != _memo[childID]) {
- NodeSolution* ns = _memo[childID];
- if (NULL != ns->pred) {
- verify(NULL != ns->pred->expr);
- if (MatchExpression::GEO_NEAR == ns->pred->expr->matchType()) {
- geoNearChild = andSolution->subnodes.size() - 1;
- }
- }
+
+ // Don't compound on multikey indices. (XXX: not whole story...)
+ if ((*_indices)[it->first].multikey) { continue; }
+
+ // Grab the expressions that are notFirst for the index whose assignments we're filling out.
+ unordered_map<size_t, vector<MatchExpression*> >::const_iterator compoundIt = idxToNotFirst.find(it->first);
+ if (compoundIt == idxToNotFirst.end()) { continue; }
+ const vector<MatchExpression*>& tryCompound = compoundIt->second;
+
+ // Walk over the key pattern trying to find
+ BSONObjIterator kpIt((*_indices)[it->first].keyPattern);
+ // Skip the first elt as it's already assigned.
+ kpIt.next();
+ size_t posInIdx = 0;
+ while (kpIt.more()) {
+ BSONElement keyElt = kpIt.next();
+ ++posInIdx;
+ bool fieldAssigned = false;
+ for (size_t j = 0; j < tryCompound.size(); ++j) {
+ MatchExpression* maybe = tryCompound[j];
+ // Sigh we grab the full path from the relevant tag.
+ RelevantTag* rt = static_cast<RelevantTag*>(maybe->getTag());
+ if (keyElt.fieldName() == rt->path) {
+ assign->preds.push_back(maybe);
+ assign->positions.push_back(posInIdx);
+ fieldAssigned = true;
}
}
+ // If we have (a,b,c) and we can't assign something to 'b' don't try
+ // to assign something to 'c'.
+ if (!fieldAssigned) { break; }
}
+ }
- if (IndexTag::kNoIndex != geoNearChild && (0 != geoNearChild)) {
- andSolution->subnodes[0].swap(andSolution->subnodes[geoNearChild]);
+ // Enumeration detail: We start enumerating with the first predChoice. To make sure that
+ // we always output the geoNear index, put it first.
+ //
+ // TODO: We can compute this more "on the fly" but it's clearer to see what's going on when
+ // we do this as a separate step.
+ for (size_t i = 0; i < newAndAssignment->predChoices.size(); ++i) {
+ const OneIndexAssignment& oie = newAndAssignment->predChoices[i];
+ bool foundGeoNear = false;
+
+ for (size_t j = 0; j < oie.preds.size(); ++j) {
+ MatchExpression* expr = oie.preds[j];
+ if (MatchExpression::GEO_NEAR == expr->matchType()) {
+ // If the GEO_NEAR idx is already at position 0 we'll choose it.
+ // TODO: We have to be smarter when we enumerate >1 plan.
+ if (0 != i) {
+ // Move the GEO_NEAR supplying index to the first choice spot.
+ std::swap(newAndAssignment->predChoices[0], newAndAssignment->predChoices[i]);
+ }
+ foundGeoNear = true;
+ break;
+ }
}
+ if (foundGeoNear) { break; }
+ }
- size_t myID = _inOrderCount++;
- _nodeToId[node] = myID;
- NodeSolution* soln = new NodeSolution();
- _memo[_nodeToId[node]] = soln;
-
- verify(MatchExpression::AND == node->matchType());
- _curEnum[myID] = 0;
+ // Normal node allocation stuff.
+ size_t myID = _inOrderCount++;
+ _nodeToId[node] = myID;
+ NodeAssignment* soln = new NodeAssignment();
+ _memo[_nodeToId[node]] = soln;
+ _curEnum[myID] = 0;
- // Takes ownership.
- soln->andSolution.reset(andSolution);
- return andSolution->subnodes.size() > 0;
- }
+ // Takes ownership.
+ soln->newAnd.reset(newAndAssignment);
+ return newAndAssignment->subnodes.size() > 0 || newAndAssignment->predChoices.size() > 0;
}
+
+ // Don't know what the node is at this point.
return false;
}
void PlanEnumerator::tagMemo(size_t id) {
- NodeSolution* soln = _memo[id];
+ NodeAssignment* soln = _memo[id];
verify(NULL != soln);
if (NULL != soln->pred) {
@@ -380,23 +307,33 @@ namespace mongo {
soln->pred->expr->setTag(new IndexTag(soln->pred->first[_curEnum[id]]));
}
}
- else if (NULL != soln->orSolution) {
- for (size_t i = 0; i < soln->orSolution->subnodes.size(); ++i) {
- tagMemo(soln->orSolution->subnodes[i]);
+ else if (NULL != soln->orAssignment) {
+ for (size_t i = 0; i < soln->orAssignment->subnodes.size(); ++i) {
+ tagMemo(soln->orAssignment->subnodes[i]);
}
// TODO: Who checks to make sure that we tag all nodes of an OR? We should
// know this early.
}
- else {
- verify(NULL != soln->andSolution);
- verify(_curEnum[id] < soln->andSolution->subnodes.size());
- vector<size_t> &cur = soln->andSolution->subnodes[_curEnum[id]];
-
- for (size_t i = 0; i < cur.size(); ++i) {
- // Tag the child.
- tagMemo(cur[i]);
+ else if (NULL != soln->newAnd) {
+ size_t curEnum = _curEnum[id];
+ if (curEnum < soln->newAnd->predChoices.size()) {
+ const OneIndexAssignment assign = soln->newAnd->predChoices[curEnum];
+ for (size_t i = 0; i < assign.preds.size(); ++i) {
+ MatchExpression* pred = assign.preds[i];
+ verify(NULL == pred->getTag());
+ pred->setTag(new IndexTag(assign.index, assign.positions[i]));
+ }
+ }
+ else {
+ curEnum -= soln->newAnd->predChoices.size();
+ if (curEnum < soln->newAnd->subnodes.size()) {
+ tagMemo(soln->newAnd->subnodes[curEnum]);
+ }
}
}
+ else {
+ verify(0);
+ }
}
bool PlanEnumerator::nextMemo(size_t id) {
diff --git a/src/mongo/db/query/plan_enumerator.h b/src/mongo/db/query/plan_enumerator.h
index 33b9072ad45..1e1669ab234 100644
--- a/src/mongo/db/query/plan_enumerator.h
+++ b/src/mongo/db/query/plan_enumerator.h
@@ -53,7 +53,7 @@ namespace mongo {
* Outputs a possible plan. Leaves in the plan are tagged with an index to use.
* Returns true if a plan was outputted, false if no more plans will be outputted.
*
- * 'tree' is set to point to the query tree. A QuerySolution is built from this tree.
+ * 'tree' is set to point to the query tree. A QueryAssignment is built from this tree.
* Caller owns the pointer. Note that 'tree' itself points into data owned by the
* provided CanonicalQuery.
*
@@ -65,18 +65,20 @@ namespace mongo {
private:
//
- // Data used by all enumeration strategies
+ // Memoization strategy
//
- // Match expression we're planning for. Not owned by us.
- MatchExpression* _root;
- // Indices we're allowed to enumerate with.
- const vector<IndexEntry>* _indices;
+ // Everything is really a size_t but it's far more readable to impose a type via typedef.
- //
- // Memoization strategy
- //
+ // An ID we use to index into _memo. An entry in _memo is a NodeAssignment.
+ typedef size_t NodeID;
+
+ // An index in _indices.
+ typedef size_t IndexID;
+
+ // The position of a field in a possibly compound index.
+ typedef size_t IndexPosition;
/**
* Traverses the match expression and generates the memo structure from it.
@@ -87,7 +89,7 @@ namespace mongo {
/**
* Returns true if index #idx is compound, false otherwise.
*/
- bool isCompound(size_t idx);
+ bool isCompound(IndexID idx);
/**
* When we assign indices to nodes, we only assign indices to predicates that are 'first'
@@ -102,7 +104,7 @@ namespace mongo {
* Traverses the memo structure and annotates the tree with IndexTags for the chosen
* indices.
*/
- void tagMemo(size_t id);
+ void tagMemo(NodeID id);
/**
* Move to the next enumeration state. Enumeration state is stored in curEnum.
@@ -116,29 +118,38 @@ namespace mongo {
*
* XXX implement.
*/
- bool nextMemo(size_t id);
+ bool nextMemo(NodeID id);
- struct PredicateSolution {
- vector<size_t> first;
- vector<size_t> notFirst;
+ struct PredicateAssignment {
+ vector<IndexID> first;
+ vector<IndexID> notFirst;
// Not owned here.
MatchExpression* expr;
};
- struct AndSolution {
- // Must use one of the elements of subnodes.
- vector<vector<size_t> > subnodes;
+ struct OrAssignment {
+ // Must use all of subnodes.
+ vector<NodeID> subnodes;
};
- struct OrSolution {
- // Must use all of subnodes.
- vector<size_t> subnodes;
+ // This is used by NewAndAssignment, not an actual assignment.
+ struct OneIndexAssignment {
+ // 'preds[i]' is uses index 'index' at position 'positions[i]'
+ vector<MatchExpression*> preds;
+ vector<IndexPosition> positions;
+ IndexID index;
};
- struct NodeSolution {
- scoped_ptr<PredicateSolution> pred;
- scoped_ptr<AndSolution> andSolution;
- scoped_ptr<OrSolution> orSolution;
+ struct NewAndAssignment {
+ // TODO: We really want to consider the power set of the union of the choices here.
+ vector<OneIndexAssignment> predChoices;
+ vector<NodeID> subnodes;
+ };
+
+ struct NodeAssignment {
+ scoped_ptr<PredicateAssignment> pred;
+ scoped_ptr<OrAssignment> orAssignment;
+ scoped_ptr<NewAndAssignment> newAnd;
string toString() const;
};
@@ -147,22 +158,30 @@ namespace mongo {
// Used to label nodes in the order in which we visit in a post-order traversal.
size_t _inOrderCount;
- // Map from node to its order/ID.
- map<MatchExpression*, size_t> _nodeToId;
+ // Map from expression to its NodeID.
+ unordered_map<MatchExpression*, NodeID> _nodeToId;
- // Map from order/ID to a memoized solution.
- map<size_t, NodeSolution*> _memo;
+ // Map from NodeID to its precomputed solution info.
+ unordered_map<NodeID, NodeAssignment*> _memo;
// Enumeration
- // ANDs count through clauses, PREDs count through indices.
- // Index is order/ID.
- // Value is whatever counter that node needs.
- map<size_t, size_t> _curEnum;
+ // Map from NodeID to a counter that the NodeID uses to enumerate its states.
+ unordered_map<NodeID, size_t> _curEnum;
// If true, there are no further enumeration states, and getNext should return false.
// We could be _done immediately after init if we're unable to output an indexed plan.
bool _done;
+
+ //
+ // Data used by all enumeration strategies
+ //
+
+ // Match expression we're planning for. Not owned by us.
+ MatchExpression* _root;
+
+ // Indices we're allowed to enumerate with.
+ const vector<IndexEntry>* _indices;
};
} // namespace mongo
diff --git a/src/mongo/db/query/query_planner.cpp b/src/mongo/db/query/query_planner.cpp
index 5d3f8f70b9e..13d6d98d5f1 100644
--- a/src/mongo/db/query/query_planner.cpp
+++ b/src/mongo/db/query/query_planner.cpp
@@ -169,6 +169,7 @@ namespace mongo {
verify(NULL == node->getTag());
RelevantTag* rt = new RelevantTag();
node->setTag(rt);
+ rt->path = fullPath;
// TODO: This is slow, with all the string compares.
for (size_t i = 0; i < indices.size(); ++i) {
@@ -231,10 +232,10 @@ namespace mongo {
}
// static
- QuerySolutionNode* QueryPlanner::makeLeafNode(const BSONObj& indexKeyPattern,
+ QuerySolutionNode* QueryPlanner::makeLeafNode(const IndexEntry& index,
MatchExpression* expr,
bool* exact) {
- cout << "making leaf node for " << expr->toString() << endl;
+ // cout << "making leaf node for " << expr->toString() << endl;
// We're guaranteed that all GEO_NEARs are first. This slightly violates the "sort index
// predicates by their position in the compound index" rule but GEO_NEAR isn't an ixscan.
// This saves our bacon when we have {foo: 1, bar: "2dsphere"} and the predicate on bar is a
@@ -244,7 +245,7 @@ namespace mongo {
// This should gracefully deal with the case where we have a pred over foo but no geo clause
// over bar. In that case there is no GEO_NEAR to appear first and it's treated like a
// straight ixscan.
- BSONElement elt = indexKeyPattern.firstElement();
+ BSONElement elt = index.keyPattern.firstElement();
bool indexIs2D = (String == elt.type() && "2d" == elt.String());
if (MatchExpression::GEO_NEAR == expr->matchType()) {
@@ -253,7 +254,7 @@ namespace mongo {
GeoNearMatchExpression* nearme = static_cast<GeoNearMatchExpression*>(expr);
if (indexIs2D) {
GeoNear2DNode* ret = new GeoNear2DNode();
- ret->indexKeyPattern = indexKeyPattern;
+ ret->indexKeyPattern = index.keyPattern;
ret->seek = nearme->getRawObj();
return ret;
}
@@ -261,14 +262,14 @@ namespace mongo {
// Note that even if we're starting a GeoNear node, we may never get a
// predicate for $near. So, in that case, the builder "collapses" it into
// an ixscan.
- cout << "Making geonear 2dblahblah kp " << indexKeyPattern.toString() << endl;
+ // cout << "Making geonear 2dblahblah kp " << index.toString() << endl;
GeoNear2DSphereNode* ret = new GeoNear2DSphereNode();
- ret->indexKeyPattern = indexKeyPattern;
+ ret->indexKeyPattern = index.keyPattern;
ret->nq = nearme->getData();
- ret->baseBounds.fields.resize(indexKeyPattern.nFields());
+ ret->baseBounds.fields.resize(index.keyPattern.nFields());
stringstream ss;
ret->appendToString(&ss, 0);
- cout << "geonear 2dsphere out " << ss.str() << endl;
+ // cout << "geonear 2dsphere out " << ss.str() << endl;
return ret;
}
}
@@ -279,18 +280,19 @@ namespace mongo {
GeoMatchExpression* nearme = static_cast<GeoMatchExpression*>(expr);
verify(indexIs2D);
Geo2DNode* ret = new Geo2DNode();
- ret->indexKeyPattern = indexKeyPattern;
+ ret->indexKeyPattern = index.keyPattern;
ret->seek = nearme->getRawObj();
return ret;
}
else {
- cout << "making ixscan for " << expr->toString() << endl;
+ // cout << "making ixscan for " << expr->toString() << endl;
// Note that indexKeyPattern.firstElement().fieldName() may not equal expr->path()
// because expr might be inside an array operator that provides a path prefix.
IndexScanNode* isn = new IndexScanNode();
- isn->indexKeyPattern = indexKeyPattern;
- isn->bounds.fields.resize(indexKeyPattern.nFields());
+ isn->indexKeyPattern = index.keyPattern;
+ isn->indexIsMultiKey = index.multikey;
+ isn->bounds.fields.resize(index.keyPattern.nFields());
// XXX XXX: move this if inside of the bounds builder.
if (MatchExpression::ELEM_MATCH_VALUE == expr->matchType()) {
@@ -299,23 +301,24 @@ namespace mongo {
// TODO: We could/should merge the bounds (possibly subject to various multikey
// etc. restrictions). For now we don't bother.
- IndexBoundsBuilder::translate(expr->getChild(0), indexKeyPattern.firstElement(),
+ IndexBoundsBuilder::translate(expr->getChild(0), index.keyPattern.firstElement(),
&isn->bounds.fields[0], exact);
// TODO: I think this is valid but double check.
*exact = false;
}
else {
- IndexBoundsBuilder::translate(expr, indexKeyPattern.firstElement(),
+ IndexBoundsBuilder::translate(expr, index.keyPattern.firstElement(),
&isn->bounds.fields[0], exact);
}
- cout << "bounds are " << isn->bounds.toString() << " exact " << *exact << endl;
+ // cout << "bounds are " << isn->bounds.toString() << " exact " << *exact << endl;
return isn;
}
}
- void QueryPlanner::mergeWithLeafNode(MatchExpression* expr, const BSONObj& keyPattern,
- size_t pos, bool* exactOut, QuerySolutionNode* node) {
+ void QueryPlanner::mergeWithLeafNode(MatchExpression* expr, const IndexEntry& index,
+ size_t pos, bool* exactOut, QuerySolutionNode* node,
+ MatchExpression::MatchType mergeType) {
const StageType type = node->getType();
if (STAGE_GEO_2D == type || STAGE_GEO_NEAR_2D == type) {
@@ -343,7 +346,7 @@ namespace mongo {
// Get the ixtag->pos-th element of the index key pattern.
// TODO: cache this instead/with ixtag->pos?
- BSONObjIterator it(keyPattern);
+ BSONObjIterator it(index.keyPattern);
BSONElement keyElt = it.next();
for (size_t i = 0; i < pos; ++i) {
verify(it.more());
@@ -359,13 +362,56 @@ namespace mongo {
//<< indices[currentIndexNumber].keyPattern.toString() << endl;
verify(boundsToFillOut->fields.size() > pos);
- verify(boundsToFillOut->fields[pos].name.empty());
- IndexBoundsBuilder::translate(expr, keyElt, &boundsToFillOut->fields[pos], exactOut);
+
+ OrderedIntervalList* oil = &boundsToFillOut->fields[pos];
+
+ if (boundsToFillOut->fields[pos].name.empty()) {
+ IndexBoundsBuilder::translate(expr, keyElt, oil, exactOut);
+ }
+ else {
+ if (MatchExpression::AND == mergeType) {
+ IndexBoundsBuilder::translateAndIntersect(expr, keyElt, oil, exactOut);
+ }
+ else {
+ verify(MatchExpression::OR == mergeType);
+ IndexBoundsBuilder::translateAndUnion(expr, keyElt, oil, exactOut);
+ }
+ }
+ }
+ }
+
+ /**
+ * Assumes each OIL in bounds is increasing.
+ * Reverses any OILs that are indexed according to '-1'.
+ */
+ void alignBounds(IndexBounds* bounds, const BSONObj& kp) {
+ BSONObjIterator it(kp);
+ size_t oilIdx = 0;
+ while (it.more()) {
+ BSONElement elt = it.next();
+ int direction = (elt.numberInt() >= 0) ? 1 : -1;
+ if (-1 == direction) {
+ vector<Interval>& iv = bounds->fields[oilIdx].intervals;
+ // Step 1: reverse the list.
+ std::reverse(iv.begin(), iv.end());
+ // Step 2: reverse each interval.
+ for (size_t i = 0; i < iv.size(); ++i) {
+ // cout << "reversing " << iv[i].toString() << endl;
+ iv[i].reverse();
+ }
+ }
+ ++oilIdx;
+ }
+
+ // XXX: some day we'll maybe go backward to pull a sort out
+ if (!bounds->isValidFor(kp, 1)) {
+ cout << "INVALID BOUNDS: " << bounds->toString() << endl;
+ verify(0);
}
}
// static
- void QueryPlanner::finishLeafNode(QuerySolutionNode* node, const BSONObj& indexKeyPattern) {
+ void QueryPlanner::finishLeafNode(QuerySolutionNode* node, const IndexEntry& index) {
const StageType type = node->getType();
if (STAGE_GEO_2D == type || STAGE_GEO_NEAR_2D == type) {
@@ -399,10 +445,13 @@ namespace mongo {
}
// All fields are filled out with bounds, nothing to do.
- if (firstEmptyField == bounds->fields.size()) { return; }
+ if (firstEmptyField == bounds->fields.size()) {
+ alignBounds(bounds, index.keyPattern);
+ return;
+ }
// Skip ahead to the firstEmptyField-th element, where we begin filling in bounds.
- BSONObjIterator it(indexKeyPattern);
+ BSONObjIterator it(index.keyPattern);
for (size_t i = 0; i < firstEmptyField; ++i) {
verify(it.more());
it.next();
@@ -421,6 +470,10 @@ namespace mongo {
// Make sure that the length of the key is the length of the bounds we started.
verify(firstEmptyField == bounds->fields.size());
+
+ // We create bounds assuming a forward direction but can easily reverse bounds to align
+ // according to our desired direction.
+ alignBounds(bounds, index.keyPattern);
}
}
@@ -430,11 +483,6 @@ namespace mongo {
const vector<IndexEntry>& indices,
vector<QuerySolutionNode*>* out) {
- bool isAnd = MatchExpression::AND == root->matchType();
- if (!isAnd) {
- verify(MatchExpression::OR == root->matchType());
- }
-
auto_ptr<QuerySolutionNode> currentScan;
size_t currentIndexNumber = IndexTag::kNoIndex;
size_t curChild = 0;
@@ -492,24 +540,7 @@ namespace mongo {
// could be NULL, in which case we don't output it. The rest of the logic is identical.
//
// If the child uses the same index as the current index scan, we may be able to merge
- // the bounds for the two scans. See the comments below for canMergeBounds and
- // shouldMergeBounds.
-
- // Is it remotely possible to merge the bounds? We care about isAnd because bounds
- // merging is currently totally unimplemented for ORs.
- //
- // TODO: When more than one child of an AND clause can have an index, we'll have to fix
- // the logic below to only merge when the merge is really filling out an additional
- // field of a compound index.
- //
- // TODO: Implement union of OILs to allow merging bounds for OR.
- // TODO: Implement intersection of OILs to allow merging of bounds for AND.
-
- bool canMergeBounds = (NULL != currentScan.get())
- && (currentIndexNumber == ixtag->index)
- && isAnd;
-
- // Is it semantically correct to merge bounds?
+ // the bounds for the two scans.
//
// Guiding principle: must the values we're testing come from the same array in the
// document? If so, we can combine bounds (via intersection or compounding). If not,
@@ -552,18 +583,30 @@ namespace mongo {
// If those values are both present in the same array, the index key for the
// aforementioned index will be {"":3, "":4}
// Therefore we can intersect bounds.
- //
- // XXX: There is more to it than this. See index13.js.
- bool shouldMergeBounds = (currentIndexNumber != IndexTag::kNoIndex)
- && !indices[currentIndexNumber].multikey;
+ // TODO: we should also merge if we're in an array operator, but only when we figure out index13.js.
+ if (NULL != currentScan.get() && (currentIndexNumber == ixtag->index) && !indices[currentIndexNumber].multikey) {
+ // The child uses the same index we're currently building a scan for. Merge
+ // the bounds and filters.
+ verify(currentIndexNumber == ixtag->index);
- // XXX: commented out until cases in index13.js are resolved.
- // bool shouldMergeBounds = !indices[currentIndexNumber].multikey || inArrayOperator;
+ bool exact = false;
+ mergeWithLeafNode(child, indices[currentIndexNumber], ixtag->pos, &exact,
+ currentScan.get(), root->matchType());
- if (!canMergeBounds || !shouldMergeBounds) {
+ if (exact) {
+ root->getChildVector()->erase(root->getChildVector()->begin()
+ + curChild);
+ delete child;
+ }
+ else {
+ // We keep curChild in the AND for affixing later.
+ ++curChild;
+ }
+ }
+ else {
if (NULL != currentScan.get()) {
- finishLeafNode(currentScan.get(), indices[currentIndexNumber].keyPattern);
+ finishLeafNode(currentScan.get(), indices[currentIndexNumber]);
out->push_back(currentScan.release());
}
else {
@@ -573,7 +616,7 @@ namespace mongo {
currentIndexNumber = ixtag->index;
bool exact = false;
- currentScan.reset(makeLeafNode(indices[currentIndexNumber].keyPattern,
+ currentScan.reset(makeLeafNode(indices[currentIndexNumber],
child, &exact));
if (exact && !inArrayOperator) {
@@ -590,30 +633,11 @@ namespace mongo {
++curChild;
}
}
- else {
- // The child uses the same index we're currently building a scan for. Merge
- // the bounds and filters.
- verify(currentIndexNumber == ixtag->index);
-
- bool exact = false;
- mergeWithLeafNode(child, indices[currentIndexNumber].keyPattern, ixtag->pos, &exact,
- currentScan.get());
-
- if (exact) {
- root->getChildVector()->erase(root->getChildVector()->begin()
- + curChild);
- delete child;
- }
- else {
- // We keep curChild in the AND for affixing later.
- ++curChild;
- }
- }
}
// Output the scan we're done with, if it exists.
if (NULL != currentScan.get()) {
- finishLeafNode(currentScan.get(), indices[currentIndexNumber].keyPattern);
+ finishLeafNode(currentScan.get(), indices[currentIndexNumber]);
out->push_back(currentScan.release());
}
@@ -703,7 +727,6 @@ namespace mongo {
autoRoot.reset(root);
}
- size_t expectedNumberScans = root->numChildren();
vector<QuerySolutionNode*> ixscanNodes;
if (!processIndexScans(root, inArrayOperator, indices, &ixscanNodes)) {
return NULL;
@@ -712,9 +735,7 @@ namespace mongo {
// Unlike an AND, an OR cannot have filters hanging off of it. We stop processing
// when any of our children lack index tags. If a node lacks an index tag it cannot
// be answered via an index.
- if (ixscanNodes.size() != expectedNumberScans
- || (!inArrayOperator && 0 != root->numChildren())) {
-
+ if (!inArrayOperator && 0 != root->numChildren()) {
warning() << "planner OR error, non-indexed child of OR.";
// We won't enumerate an OR without indices for each child, so this isn't an issue, even
// if we have an AND with an OR child -- we won't get here unless the OR is fully
@@ -800,13 +821,13 @@ namespace mongo {
IndexTag* tag = static_cast<IndexTag*>(root->getTag());
bool exact = false;
- QuerySolutionNode* soln = makeLeafNode(indices[tag->index].keyPattern, root,
+ QuerySolutionNode* soln = makeLeafNode(indices[tag->index], root,
&exact);
verify(NULL != soln);
stringstream ss;
soln->appendToString(&ss, 0);
- cout << "about to finish leaf node, soln " << ss.str() << endl;
- finishLeafNode(soln, indices[tag->index].keyPattern);
+ // cout << "about to finish leaf node, soln " << ss.str() << endl;
+ finishLeafNode(soln, indices[tag->index]);
if (inArrayOperator) {
return soln;
@@ -936,8 +957,10 @@ namespace mongo {
// Project the results.
if (NULL != query.getProj()) {
+ cout << "PROJECTION: fetched status: " << solnRoot->fetched() << endl;
+ cout << "PROJECTION: Current plan is " << solnRoot->toString() << endl;
if (query.getProj()->requiresDocument()) {
- cout << "projection claims to require doc adding fetch.\n";
+ cout << "PROJECTION: claims to require doc adding fetch.\n";
// If the projection requires the entire document, somebody must fetch.
if (!solnRoot->fetched()) {
FetchNode* fetch = new FetchNode();
@@ -946,11 +969,13 @@ namespace mongo {
}
}
else {
+ cout << "PROJECTION: requires fields\n";
const vector<string>& fields = query.getProj()->requiredFields();
bool covered = true;
for (size_t i = 0; i < fields.size(); ++i) {
if (!solnRoot->hasField(fields[i])) {
- cout << "not covered cuz doesn't have field " << fields[i] << endl;
+ cout << "PROJECTION: not covered cuz doesn't have field "
+ << fields[i] << endl;
covered = false;
break;
}
@@ -1080,6 +1105,7 @@ namespace mongo {
// Figure out what fields we care about.
unordered_set<string> fields;
getFields(query.root(), "", &fields);
+
/*
for (unordered_set<string>::const_iterator it = fields.begin(); it != fields.end(); ++it) {
cout << "field " << *it << endl;
@@ -1104,19 +1130,20 @@ namespace mongo {
}
}
+ size_t hintIndexNumber = numeric_limits<size_t>::max();
+
if (!hintIndex.isEmpty()) {
- bool hintValid = false;
for (size_t i = 0; i < indices.size(); ++i) {
if (0 == indices[i].keyPattern.woCompare(hintIndex)) {
relevantIndices.clear();
relevantIndices.push_back(indices[i]);
cout << "hint specified, restricting indices to " << hintIndex.toString()
<< endl;
- hintValid = true;
+ hintIndexNumber = i;
break;
}
}
- if (!hintValid) {
+ if (hintIndexNumber == numeric_limits<size_t>::max()) {
warning() << "Hinted index " << hintIndex.toString()
<< " does not exist, ignoring.";
}
@@ -1140,11 +1167,9 @@ namespace mongo {
// If we have any relevant indices, we try to create indexed plans.
if (0 < relevantIndices.size()) {
- /*
for (size_t i = 0; i < relevantIndices.size(); ++i) {
cout << "relevant idx " << i << " is " << relevantIndices[i].toString() << endl;
}
- */
// The enumerator spits out trees tagged with IndexTag(s).
PlanEnumerator isp(query.root(), &relevantIndices);
@@ -1173,9 +1198,12 @@ namespace mongo {
// scan the entire index to provide results and output that as our plan. This is the
// desired behavior when an index is hinted that is not relevant to the query.
if (!hintIndex.isEmpty() && (0 == out->size())) {
+ QuerySolutionNode* solnRoot = NULL;
+
// Build an ixscan over the id index, use it, and return it.
IndexScanNode* isn = new IndexScanNode();
isn->indexKeyPattern = hintIndex;
+ isn->indexIsMultiKey = indices[hintIndexNumber].multikey;
isn->bounds.fields.resize(hintIndex.nFields());
// TODO: can we use simple bounds with this compound idx?
@@ -1186,13 +1214,22 @@ namespace mongo {
++field;
}
- // TODO: We may not need to do the fetch if the predicates in root are covered. But
- // for now it's safe (though *maybe* slower).
- FetchNode* fetch = new FetchNode();
- fetch->filter.reset(query.root()->shallowClone());
- fetch->child.reset(isn);
+ MatchExpression* filter = query.root()->shallowClone();
+ // If it's find({}) remove the no-op root.
+ if (MatchExpression::AND == filter->matchType() && (0 == filter->numChildren())) {
+ delete filter;
+ solnRoot = isn;
+ }
+ else {
+ // TODO: We may not need to do the fetch if the predicates in root are covered. But
+ // for now it's safe (though *maybe* slower).
+ FetchNode* fetch = new FetchNode();
+ fetch->filter.reset(filter);
+ fetch->child.reset(isn);
+ solnRoot = fetch;
+ }
- QuerySolution* soln = analyzeDataAccess(query, fetch);
+ QuerySolution* soln = analyzeDataAccess(query, solnRoot);
verify(NULL != soln);
out->push_back(soln);
@@ -1223,6 +1260,7 @@ namespace mongo {
if (0 == kp.woCompare(query.getParsed().getSort())) {
IndexScanNode* isn = new IndexScanNode();
isn->indexKeyPattern = kp;
+ isn->indexIsMultiKey = indices[i].multikey;
isn->bounds.fields.resize(kp.nFields());
// TODO: can we use simple bounds if compound?
diff --git a/src/mongo/db/query/query_planner.h b/src/mongo/db/query/query_planner.h
index 573b5c64767..3bb7409c511 100644
--- a/src/mongo/db/query/query_planner.h
+++ b/src/mongo/db/query/query_planner.h
@@ -189,15 +189,16 @@ namespace mongo {
*
* If the node is a geo node, XXX.
*/
- static QuerySolutionNode* makeLeafNode(const BSONObj& indexKeyPattern,
+ static QuerySolutionNode* makeLeafNode(const IndexEntry& index,
MatchExpression* expr,
bool* exact);
/**
* Merge the predicate 'expr' with the leaf node 'node'.
*/
- static void mergeWithLeafNode(MatchExpression* expr, const BSONObj& keyPattern,
- size_t pos, bool* exactOut, QuerySolutionNode* node);
+ static void mergeWithLeafNode(MatchExpression* expr, const IndexEntry& index,
+ size_t pos, bool* exactOut, QuerySolutionNode* node,
+ MatchExpression::MatchType mergeType);
/**
* If index scan, fill in any bounds that are missing in 'node' with the "all values for
@@ -205,7 +206,7 @@ namespace mongo {
*
* If geo, XXX.
*/
- static void finishLeafNode(QuerySolutionNode* node, const BSONObj& indexKeyPattern);
+ static void finishLeafNode(QuerySolutionNode* node, const IndexEntry& index);
//
// Analysis of Data Access
diff --git a/src/mongo/db/query/query_planner_test.cpp b/src/mongo/db/query/query_planner_test.cpp
index 30e21c7dd92..e053c99c259 100644
--- a/src/mongo/db/query/query_planner_test.cpp
+++ b/src/mongo/db/query/query_planner_test.cpp
@@ -622,6 +622,7 @@ namespace {
}
TEST_F(SingleIndexTest, Basic2DSphereGeoNearReverseCompound) {
+ setIndex(BSON("x" << 1));
setIndex(BSON("x" << 1 << "a" << "2dsphere"));
runQuery(fromjson("{x:1, a: {$nearSphere: [0,0], $maxDistance: 0.31 }}"));
dumpSolutions();
diff --git a/src/mongo/db/query/query_solution.cpp b/src/mongo/db/query/query_solution.cpp
index 446f87a5a67..fd9e3332750 100644
--- a/src/mongo/db/query/query_solution.cpp
+++ b/src/mongo/db/query/query_solution.cpp
@@ -309,7 +309,8 @@ namespace mongo {
// IndexScanNode
//
- IndexScanNode::IndexScanNode() : filter(NULL), limit(0), direction(1) { }
+ IndexScanNode::IndexScanNode()
+ : indexIsMultiKey(false), filter(NULL), limit(0), direction(1) { }
void IndexScanNode::appendToString(stringstream* ss, int indent) const {
addIndent(ss, indent);
@@ -333,6 +334,10 @@ namespace mongo {
}
bool IndexScanNode::hasField(const string& field) const {
+ // There is no covering in a multikey index because you don't know whether or not the field
+ // in the key was extracted from an array in the original document.
+ if (indexIsMultiKey) { return false; }
+
BSONObjIterator it(indexKeyPattern);
while (it.more()) {
if (field == it.next().fieldName()) {
diff --git a/src/mongo/db/query/query_solution.h b/src/mongo/db/query/query_solution.h
index 948ac02e658..c8a0e933247 100644
--- a/src/mongo/db/query/query_solution.h
+++ b/src/mongo/db/query/query_solution.h
@@ -49,6 +49,12 @@ namespace mongo {
*/
virtual StageType getType() const = 0;
+ string toString() const {
+ stringstream ss;
+ appendToString(&ss, 0);
+ return ss.str();
+ }
+
/**
* Internal function called by toString()
*
@@ -75,9 +81,6 @@ namespace mongo {
* Usage: If an index-only plan has all the fields we're interested in, we don't
* have to fetch to show results with those fields.
*
- * XXX TODO: Cover issues resulting from covered and multikey. Multikey prohibits
- * covering, but allows sort (double check!)
- *
* TODO: 'field' is probably more appropriate as a FieldRef or string.
*/
virtual bool hasField(const string& field) const = 0;
@@ -297,6 +300,8 @@ namespace mongo {
BSONObj indexKeyPattern;
+ bool indexIsMultiKey;
+
scoped_ptr<MatchExpression> filter;
// Only set for 2d.