diff options
author | Hari Khalsa <hkhalsa@10gen.com> | 2013-10-05 13:04:39 -0400 |
---|---|---|
committer | Hari Khalsa <hkhalsa@10gen.com> | 2013-10-07 15:07:16 -0400 |
commit | a5c4104bb28f2870602a8c7a5e4a4b506dbc99a1 (patch) | |
tree | 3617caf0663936d4b109f3ff6ba590f307078f41 /src/mongo | |
parent | b7660950c4888052f2c68a02f6667c114a72dfbd (diff) | |
download | mongo-a5c4104bb28f2870602a8c7a5e4a4b506dbc99a1.tar.gz |
SERVER-10471 merge index bounds, enumerate based on indices, bug fixes
Diffstat (limited to 'src/mongo')
22 files changed, 967 insertions, 484 deletions
diff --git a/src/mongo/db/exec/collection_scan.cpp b/src/mongo/db/exec/collection_scan.cpp index f381582c271..904187c6c9a 100644 --- a/src/mongo/db/exec/collection_scan.cpp +++ b/src/mongo/db/exec/collection_scan.cpp @@ -90,6 +90,8 @@ namespace mongo { member->obj = member->loc.obj(); member->state = WorkingSetMember::LOC_AND_UNOWNED_OBJ; + ++_specificStats.docsTested; + if (Filter::passes(member, _filter)) { *out = id; ++_commonStats.advanced; @@ -134,6 +136,7 @@ namespace mongo { PlanStageStats* CollectionScan::getStats() { _commonStats.isEOF = isEOF(); auto_ptr<PlanStageStats> ret(new PlanStageStats(_commonStats, STAGE_COLLSCAN)); + ret->specific.reset(new CollectionScanStats(_specificStats)); return ret.release(); } diff --git a/src/mongo/db/exec/collection_scan.h b/src/mongo/db/exec/collection_scan.h index 004b73bd309..b9075a3ad97 100644 --- a/src/mongo/db/exec/collection_scan.h +++ b/src/mongo/db/exec/collection_scan.h @@ -74,6 +74,7 @@ namespace mongo { // Stats CommonStats _commonStats; + CollectionScanStats _specificStats; }; } // namespace mongo diff --git a/src/mongo/db/exec/index_scan.cpp b/src/mongo/db/exec/index_scan.cpp index e9b2e2eafbd..d3ee4586069 100644 --- a/src/mongo/db/exec/index_scan.cpp +++ b/src/mongo/db/exec/index_scan.cpp @@ -109,9 +109,11 @@ namespace mongo { _hitEnd = true; return PlanStage::FAILURE; } + if (!isEOF()) { + _specificStats.keysExamined = 1; + } } else { - // XXX: must be actually a Btree // "Fast" Btree-specific navigation. _btreeCursor = static_cast<BtreeIndexCursor*>(_indexCursor.get()); _checker.reset(new IndexBoundsChecker(&_params.bounds, @@ -123,11 +125,14 @@ namespace mongo { vector<bool> inc; key.resize(nFields); inc.resize(nFields); - _checker->getStartKey(&key, &inc); - _btreeCursor->seek(key, inc); - - _keyElts.resize(nFields); - _keyEltsInc.resize(nFields); + if (_checker->getStartKey(&key, &inc)) { + _btreeCursor->seek(key, inc); + _keyElts.resize(nFields); + _keyEltsInc.resize(nFields); + } + else { + _hitEnd = true; + } } checkEnd(); @@ -141,7 +146,6 @@ namespace mongo { // _indexCursor->next() if we're EOF. if (!isEOF()) { _indexCursor->next(); - ++_specificStats.keysExamined; checkEnd(); } } @@ -189,7 +193,7 @@ namespace mongo { return false; } - return _indexCursor->isEOF() || _hitEnd; + return _hitEnd || _indexCursor->isEOF(); } void IndexScan::prepareToYield() { @@ -261,6 +265,10 @@ namespace mongo { _hitEnd = true; _commonStats.isEOF = true; } + + if (!isEOF() && _params.bounds.isSimpleRange) { + ++_specificStats.keysExamined; + } } else { verify(NULL != _btreeCursor); @@ -268,6 +276,8 @@ namespace mongo { // Use _checker to see how things are. for (;;) { + //cout << "current index key is " << _indexCursor->getKey().toString() << endl; + //cout << "keysExamined is " << _specificStats.keysExamined << endl; IndexBoundsChecker::KeyState keyState; keyState = _checker->checkKey(_indexCursor->getKey(), &_keyEltsToUse, @@ -275,13 +285,19 @@ namespace mongo { &_keyElts, &_keyEltsInc); - if (IndexBoundsChecker::VALID == keyState) { - break; - } if (IndexBoundsChecker::DONE == keyState) { _hitEnd = true; break; } + + // This seems weird but it's the old definition of nscanned. + ++_specificStats.keysExamined; + + if (IndexBoundsChecker::VALID == keyState) { + break; + } + + // cout << "skipping...\n"; verify(IndexBoundsChecker::MUST_ADVANCE == keyState); _btreeCursor->skip(_indexCursor->getKey(), _keyEltsToUse, _movePastKeyElts, _keyElts, _keyEltsInc); diff --git a/src/mongo/db/exec/plan_stats.h b/src/mongo/db/exec/plan_stats.h index ebccd9125be..37f8c2d572e 100644 --- a/src/mongo/db/exec/plan_stats.h +++ b/src/mongo/db/exec/plan_stats.h @@ -108,6 +108,13 @@ namespace mongo { virtual ~SpecificStats() { } }; + struct CollectionScanStats : public SpecificStats { + CollectionScanStats() : docsTested(0) { } + + // How many documents did we check against our filter? + uint64_t docsTested; + }; + struct AndHashStats : public SpecificStats { AndHashStats() : flaggedButPassed(0), flaggedInProgress(0) { } diff --git a/src/mongo/db/query/canonical_query.cpp b/src/mongo/db/query/canonical_query.cpp index 623e37de4b6..06095070ac5 100644 --- a/src/mongo/db/query/canonical_query.cpp +++ b/src/mongo/db/query/canonical_query.cpp @@ -83,14 +83,17 @@ namespace mongo { return Status::OK(); } - void normalizeTree(MatchExpression* root) { + /** + * Returns the normalized version of the subtree rooted at 'root'. + */ + MatchExpression* normalizeTree(MatchExpression* root) { // root->isLogical() is true now. We care about AND and OR. Negations currently scare us. if (MatchExpression::AND == root->matchType() || MatchExpression::OR == root->matchType()) { // We could have AND of AND of AND. Make sure we clean up our children before merging // them. // UNITTEST 11738048 - for (size_t i = 0; i < root->numChildren(); ++i) { - normalizeTree(root->getChild(i)); + for (size_t i = 0; i < root->getChildVector()->size(); ++i) { + (*root->getChildVector())[i] = normalizeTree(root->getChild(i)); } // If any of our children are of the same logical operator that we are, we remove the @@ -119,7 +122,17 @@ namespace mongo { root->getChildVector()->insert(root->getChildVector()->end(), absorbedChildren.begin(), absorbedChildren.end()); + + // AND of 1 thing is the thing, OR of 1 thing is the thing. + if (1 == root->numChildren()) { + MatchExpression* ret = root->getChild(0); + root->getChildVector()->clear(); + delete root; + return ret; + } } + + return root; } // TODO: Should this really live in the parsing? Or elsewhere? @@ -202,7 +215,7 @@ namespace mongo { if (!swme.isOK()) { return swme.getStatus(); } MatchExpression* root = swme.getValue(); - normalizeTree(root); + root = normalizeTree(root); Status validStatus = isValid(root); if (!validStatus.isOK()) { return validStatus; diff --git a/src/mongo/db/query/explain_plan.cpp b/src/mongo/db/query/explain_plan.cpp index 42cc3dcecd6..087abb93586 100644 --- a/src/mongo/db/query/explain_plan.cpp +++ b/src/mongo/db/query/explain_plan.cpp @@ -81,12 +81,23 @@ namespace mongo { // number of keys that cursor retrieved, and into the stage's stats 'advanced' for // nscannedObjects', which would be the number of keys that survived the IXSCAN // filter. Those keys would have been FETCH-ed, if a fetch is present. - // - // TODO: This is kind of a lie for STAGE_GEO_NEAR_2DSPHERE. - if (leaf->stageType == STAGE_COLLSCAN || leaf->stageType == STAGE_GEO_NEAR_2DSPHERE) { + if (leaf->stageType == STAGE_COLLSCAN) { + CollectionScanStats* csStats = static_cast<CollectionScanStats*>(leaf->specific.get()); res->setCursor("BasicCursor"); - res->setNScanned(leaf->common.advanced); - res->setNScannedObjects(leaf->common.advanced); + res->setNScanned(csStats->docsTested); + res->setNScannedObjects(csStats->docsTested); + + if (fullDetails) { + res->setIsMultiKey(false); + res->setIndexOnly(false); + } + } + else if (leaf->stageType == STAGE_GEO_NEAR_2DSPHERE) { + // TODO: This is kind of a lie for STAGE_GEO_NEAR_2DSPHERE. + res->setCursor("S2NearCursor"); + // The first work() is an init. Every subsequent work examines a document. + res->setNScanned(leaf->common.works); + res->setNScannedObjects(leaf->common.works); if (fullDetails) { res->setIsMultiKey(false); diff --git a/src/mongo/db/query/index_bounds.cpp b/src/mongo/db/query/index_bounds.cpp index 6177e7d5b61..b1f1d8eb911 100644 --- a/src/mongo/db/query/index_bounds.cpp +++ b/src/mongo/db/query/index_bounds.cpp @@ -201,15 +201,20 @@ namespace mongo { } } - void IndexBoundsChecker::getStartKey(vector<const BSONElement*>* valueOut, - vector<bool>* inclusiveOut) { + bool IndexBoundsChecker::getStartKey(vector<const BSONElement*>* valueOut, + vector<bool>* inclusiveOut) { verify(valueOut->size() == _bounds->fields.size()); verify(inclusiveOut->size() == _bounds->fields.size()); for (size_t i = 0; i < _bounds->fields.size(); ++i) { + if (0 == _bounds->fields[i].intervals.size()) { + return false; + } (*valueOut)[i] = &_bounds->fields[i].intervals[0].start; (*inclusiveOut)[i] = _bounds->fields[i].intervals[0].startInclusive; } + + return true; } bool IndexBoundsChecker::findLeftmostProblem(const vector<BSONElement>& keyValues, diff --git a/src/mongo/db/query/index_bounds.h b/src/mongo/db/query/index_bounds.h index 81aa7b008d2..2b0ba90cee6 100644 --- a/src/mongo/db/query/index_bounds.h +++ b/src/mongo/db/query/index_bounds.h @@ -107,8 +107,10 @@ namespace mongo { /** * Get the key that we should with. + * + * Returns true if there is a valid start key. Returns false otherwise. */ - void getStartKey(vector<const BSONElement*>* valueOut, vector<bool>* inclusiveOut); + bool getStartKey(vector<const BSONElement*>* valueOut, vector<bool>* inclusiveOut); /** * The states of a key from an index scan. See checkKey below. diff --git a/src/mongo/db/query/index_bounds_builder.cpp b/src/mongo/db/query/index_bounds_builder.cpp index 2e0ac5cd6eb..c7a041cfa10 100644 --- a/src/mongo/db/query/index_bounds_builder.cpp +++ b/src/mongo/db/query/index_bounds_builder.cpp @@ -40,6 +40,109 @@ namespace mongo { + string IndexBoundsBuilder::simpleRegex(const char* regex, const char* flags, bool* exact) { + string r = ""; + *exact = false; + + bool multilineOK; + if ( regex[0] == '\\' && regex[1] == 'A') { + multilineOK = true; + regex += 2; + } + else if (regex[0] == '^') { + multilineOK = false; + regex += 1; + } + else { + return r; + } + + bool extended = false; + while (*flags) { + switch (*(flags++)) { + case 'm': // multiline + if (multilineOK) + continue; + else + return r; + case 'x': // extended + extended = true; + break; + default: + return r; // cant use index + } + } + + stringstream ss; + + while(*regex) { + char c = *(regex++); + if ( c == '*' || c == '?' ) { + // These are the only two symbols that make the last char optional + r = ss.str(); + r = r.substr( 0 , r.size() - 1 ); + return r; //breaking here fails with /^a?/ + } + else if (c == '|') { + // whole match so far is optional. Nothing we can do here. + return string(); + } + else if (c == '\\') { + c = *(regex++); + if (c == 'Q'){ + // \Q...\E quotes everything inside + while (*regex) { + c = (*regex++); + if (c == '\\' && (*regex == 'E')){ + regex++; //skip the 'E' + break; // go back to start of outer loop + } + else { + ss << c; // character should match itself + } + } + } + else if ((c >= 'A' && c <= 'Z') || + (c >= 'a' && c <= 'z') || + (c >= '0' && c <= '0') || + (c == '\0')) { + // don't know what to do with these + r = ss.str(); + break; + } + else { + // slash followed by non-alphanumeric represents the following char + ss << c; + } + } + else if (strchr("^$.[()+{", c)) { + // list of "metacharacters" from man pcrepattern + r = ss.str(); + break; + } + else if (extended && c == '#') { + // comment + r = ss.str(); + break; + } + else if (extended && isspace(c)) { + continue; + } + else { + // self-matching char + ss << c; + } + } + + if ( r.empty() && *regex == 0 ) { + r = ss.str(); + *exact = !r.empty(); + } + + return r; + } + + // static void IndexBoundsBuilder::allValuesForField(const BSONElement& elt, OrderedIntervalList* out) { // ARGH, BSONValue would make this shorter. @@ -66,13 +169,42 @@ namespace mongo { return makeRangeInterval(bob.obj(), true, true); } + bool IntervalComparison(const Interval& lhs, const Interval& rhs) { + int wo = lhs.start.woCompare(rhs.start, false); + if (0 != wo) { + return wo < 0; + } + // The start and end are equal. Put the bound that's inclusive to the left. + if (lhs.startInclusive) { + return true; + } + if (rhs.startInclusive) { + return false; + } + // If neither start nor end is inclusive but they begin at the same point who cares which is + // first. + return true; + } + + // static + void IndexBoundsBuilder::translateAndIntersect(const MatchExpression* expr, const BSONElement& elt, + OrderedIntervalList* oilOut, bool* exactOut) { + OrderedIntervalList arg; + translate(expr, elt, &arg, exactOut); + // translate outputs arg in sorted order. intersectize assumes that its arguments are sorted. + intersectize(arg, oilOut); + } + + // static + void IndexBoundsBuilder::translateAndUnion(const MatchExpression* expr, const BSONElement& elt, + OrderedIntervalList* oilOut, bool* exactOut) { + translate(expr, elt, oilOut, exactOut); + unionize(oilOut); + } + // static void IndexBoundsBuilder::translate(const MatchExpression* expr, const BSONElement& elt, OrderedIntervalList* oilOut, bool* exactOut) { - int direction = (elt.numberInt() >= 0) ? 1 : -1; - - Interval interval; - bool exact = false; oilOut->name = elt.fieldName(); bool isHashed = false; @@ -86,41 +218,8 @@ namespace mongo { } if (MatchExpression::EQ == expr->matchType()) { - const EqualityMatchExpression* node = - static_cast<const EqualityMatchExpression*>(expr); - - // We have to copy the data out of the parse tree and stuff it into the index - // bounds. BSONValue will be useful here. - BSONObj dataObj; - - if (isHashed) { - dataObj = ExpressionMapping::hash(node->getData()); - } - else { - dataObj = objFromElement(node->getData()); - } - - // UNITTEST 11738048 - if (Array == dataObj.firstElement().type()) { - // XXX: build better bounds - warning() << "building lazy bounds for " << expr->toString() << endl; - interval = allValues(); - exact = false; - } - else { - verify(dataObj.isOwned()); - interval = makePointInterval(dataObj); - // XXX: it's exact if the index isn't sparse - if (dataObj.firstElement().isNull()) { - exact = false; - } - else if (isHashed) { - exact = false; - } - else { - exact = true; - } - } + const EqualityMatchExpression* node = static_cast<const EqualityMatchExpression*>(expr); + translateEquality(node->getData(), isHashed, oilOut, exactOut); } else if (MatchExpression::LTE == expr->matchType()) { const LTEMatchExpression* node = static_cast<const LTEMatchExpression*>(expr); @@ -130,9 +229,9 @@ namespace mongo { bob.append(dataElt); BSONObj dataObj = bob.obj(); verify(dataObj.isOwned()); - interval = makeRangeInterval(dataObj, true, true); + oilOut->intervals.push_back(makeRangeInterval(dataObj, true, true)); // XXX: only exact if not (null or array) - exact = true; + *exactOut = true; } else if (MatchExpression::LT == expr->matchType()) { const LTMatchExpression* node = static_cast<const LTMatchExpression*>(expr); @@ -142,9 +241,9 @@ namespace mongo { bob.append(dataElt); BSONObj dataObj = bob.obj(); verify(dataObj.isOwned()); - interval = makeRangeInterval(dataObj, true, false); + oilOut->intervals.push_back(makeRangeInterval(dataObj, true, false)); // XXX: only exact if not (null or array) - exact = true; + *exactOut = true; } else if (MatchExpression::GT == expr->matchType()) { const GTMatchExpression* node = static_cast<const GTMatchExpression*>(expr); @@ -154,9 +253,9 @@ namespace mongo { bob.appendMaxForType("", dataElt.type()); BSONObj dataObj = bob.obj(); verify(dataObj.isOwned()); - interval = makeRangeInterval(dataObj, false, true); + oilOut->intervals.push_back(makeRangeInterval(dataObj, false, true)); // XXX: only exact if not (null or array) - exact = true; + *exactOut = true; } else if (MatchExpression::GTE == expr->matchType()) { const GTEMatchExpression* node = static_cast<const GTEMatchExpression*>(expr); @@ -167,14 +266,14 @@ namespace mongo { bob.appendMaxForType("", dataElt.type()); BSONObj dataObj = bob.obj(); verify(dataObj.isOwned()); - interval = makeRangeInterval(dataObj, true, true); + + oilOut->intervals.push_back(makeRangeInterval(dataObj, true, true)); // XXX: only exact if not (null or array) - exact = true; + *exactOut = true; } else if (MatchExpression::REGEX == expr->matchType()) { - warning() << "building lazy bounds for " << expr->toString() << endl; - interval = allValues(); - exact = false; + const RegexMatchExpression* rme = static_cast<const RegexMatchExpression*>(expr); + translateRegex(rme, oilOut, exactOut); } else if (MatchExpression::MOD == expr->matchType()) { BSONObjBuilder bob; @@ -182,13 +281,8 @@ namespace mongo { bob.appendMaxForType("", NumberDouble); BSONObj dataObj = bob.obj(); verify(dataObj.isOwned()); - interval = makeRangeInterval(dataObj, true, true); - exact = false; - } - else if (MatchExpression::MATCH_IN == expr->matchType()) { - warning() << "building lazy bounds for " << expr->toString() << endl; - interval = allValues(); - exact = false; + oilOut->intervals.push_back(makeRangeInterval(dataObj, true, true)); + *exactOut = false; } else if (MatchExpression::TYPE_OPERATOR == expr->matchType()) { const TypeMatchExpression* tme = static_cast<const TypeMatchExpression*>(expr); @@ -197,13 +291,40 @@ namespace mongo { bob.appendMaxForType("", tme->getData()); BSONObj dataObj = bob.obj(); verify(dataObj.isOwned()); - interval = makeRangeInterval(dataObj, true, true); - exact = false; + oilOut->intervals.push_back(makeRangeInterval(dataObj, true, true)); + *exactOut = false; } else if (MatchExpression::MATCH_IN == expr->matchType()) { - warning() << "building lazy bounds for " << expr->toString() << endl; - interval = allValues(); - exact = false; + const InMatchExpression* ime = static_cast<const InMatchExpression*>(expr); + const ArrayFilterEntries& afr = ime->getData(); + + *exactOut = true; + + // Create our various intervals. + + bool thisBoundExact = false; + for (BSONElementSet::iterator it = afr.equalities().begin(); + it != afr.equalities().end(); ++it) { + + translateEquality(*it, isHashed, oilOut, &thisBoundExact); + if (!thisBoundExact) { + *exactOut = false; + } + } + + for (size_t i = 0; i < afr.numRegexes(); ++i) { + translateRegex(afr.regex(i), oilOut, &thisBoundExact); + if (!thisBoundExact) { + *exactOut = false; + } + } + + // XXX: what happens here? + if (afr.hasNull()) { } + // XXX: what happens here as well? + if (afr.hasEmptyArray()) { } + + unionize(oilOut); } else if (MatchExpression::GEO == expr->matchType()) { const GeoMatchExpression* gme = static_cast<const GeoMatchExpression*>(expr); @@ -217,21 +338,12 @@ namespace mongo { const S2Region& region = gme->getGeoQuery().getRegion(); ExpressionMapping::cover2dsphere(region, oilOut); *exactOut = false; - // XXX: restructure this method - return; } else { warning() << "Planner error, trying to build bounds for expr " << expr->toString() << endl; verify(0); } - - if (-1 == direction) { - reverseInterval(&interval); - } - - oilOut->intervals.push_back(interval); - *exactOut = exact; } // static @@ -250,6 +362,116 @@ namespace mongo { } // static + void IndexBoundsBuilder::intersectize(const OrderedIntervalList& arg, + OrderedIntervalList* oilOut) { + verify(arg.name == oilOut->name); + + size_t argidx = 0; + const vector<Interval>& argiv = arg.intervals; + + size_t ividx = 0; + vector<Interval>& iv = oilOut->intervals; + + vector<Interval> result; + + while (argidx < argiv.size() && ividx < iv.size()) { + Interval::IntervalComparison cmp = argiv[argidx].compare(iv[ividx]); + cout << "comparing " << argiv[argidx].toString() + << " with " << iv[ividx].toString() + << " cmp is " << Interval::cmpstr(cmp) << endl; + + verify(Interval::INTERVAL_UNKNOWN != cmp); + + if (cmp == Interval::INTERVAL_PRECEDES + || cmp == Interval::INTERVAL_PRECEDES_COULD_UNION) { + // argiv is before iv. move argiv forward. + ++argidx; + } + else if (cmp == Interval::INTERVAL_SUCCEEDS) { + // iv is before argiv. move iv forward. + ++ividx; + } + else { + // argiv[argidx] (cmpresults) iv[ividx] + Interval newInt = argiv[argidx]; + newInt.intersect(iv[ividx], cmp); + result.push_back(newInt); + + if (Interval::INTERVAL_EQUALS == cmp) { + ++argidx; + ++ividx; + } + else if (Interval::INTERVAL_WITHIN == cmp) { + ++argidx; + } + else if (Interval::INTERVAL_CONTAINS == cmp) { + ++ividx; + } + else if (Interval::INTERVAL_OVERLAPS_BEFORE == cmp) { + ++argidx; + } + else if (Interval::INTERVAL_OVERLAPS_AFTER == cmp) { + ++ividx; + } + else { + verify(0); + } + } + } + // XXX swap + oilOut->intervals = result; + } + + // static + void IndexBoundsBuilder::unionize(OrderedIntervalList* oilOut) { + // Step 1: sort. + std::sort(oilOut->intervals.begin(), oilOut->intervals.end(), IntervalComparison); + + // Step 2: Walk through and merge. + vector<Interval>& iv = oilOut->intervals; + size_t i = 0; + while (i < iv.size() - 1) { + // Compare i with i + 1. + Interval::IntervalComparison cmp = iv[i].compare(iv[i + 1]); + // cout << "comparing " << iv[i].toString() << " with " << iv[i+1].toString() + // << " cmp is " << Interval::cmpstr(cmp) << endl; + + // This means our sort didn't work. + verify(Interval::INTERVAL_SUCCEEDS != cmp); + + // Intervals are correctly ordered. + if (Interval::INTERVAL_PRECEDES == cmp) { + // We can move to the next pair. + ++i; + } + else if (Interval::INTERVAL_EQUALS == cmp || Interval::INTERVAL_WITHIN == cmp) { + // Interval 'i' is equal to i+1, or is contained within i+1. + // Remove interval i and don't move to the next value of 'i'. + iv.erase(iv.begin() + i); + } + else if (Interval::INTERVAL_CONTAINS == cmp) { + // Interval 'i' contains i+1, remove i+1 and don't move to the next value of 'i'. + iv.erase(iv.begin() + i + 1); + } + else if (Interval::INTERVAL_OVERLAPS_BEFORE == cmp + || Interval::INTERVAL_PRECEDES_COULD_UNION == cmp) { + // We want to merge intervals i and i+1. + // Interval 'i' starts before interval 'i+1'. + BSONObjBuilder bob; + bob.append(iv[i].start); + bob.append(iv[i + 1].end); + BSONObj data = bob.obj(); + bool startInclusive = iv[i].startInclusive; + bool endInclusive = iv[i + i].endInclusive; + iv.erase(iv.begin() + i); + // iv[i] is now the former iv[i + 1] + iv[i] = makeRangeInterval(data, startInclusive, endInclusive); + // Don't increment 'i'. + } + } + } + + // static Interval IndexBoundsBuilder::makeRangeInterval(const string& start, const string& end, bool startInclusive, bool endInclusive) { BSONObjBuilder bob; @@ -292,4 +514,65 @@ namespace mongo { ival->endInclusive = tmpInc; } + // static + void IndexBoundsBuilder::translateRegex(const RegexMatchExpression* rme, + OrderedIntervalList* oilOut, bool* exact) { + + const string start = simpleRegex(rme->getString().c_str(), rme->getFlags().c_str(), exact); + + cout << "regex bounds start is " << start << endl; + // Note that 'exact' is set by simpleRegex above. + if (!start.empty()) { + string end = start; + end[end.size() - 1]++; + oilOut->intervals.push_back(makeRangeInterval(start, end, true, false)); + } + else { + BSONObjBuilder bob; + bob.appendMinForType("", String); + bob.appendMaxForType("", String); + BSONObj dataObj = bob.obj(); + verify(dataObj.isOwned()); + oilOut->intervals.push_back(makeRangeInterval(dataObj, true, false)); + } + + // Regexes are after strings. + BSONObjBuilder bob; + bob.appendRegex("", rme->getString(), rme->getFlags()); + oilOut->intervals.push_back(makePointInterval(bob.obj())); + } + + // static + void IndexBoundsBuilder::translateEquality(const BSONElement& data, bool isHashed, + OrderedIntervalList* oil, bool* exact) { + // We have to copy the data out of the parse tree and stuff it into the index + // bounds. BSONValue will be useful here. + BSONObj dataObj; + + if (isHashed) { + dataObj = ExpressionMapping::hash(data); + } + else { + dataObj = objFromElement(data); + } + + // UNITTEST 11738048 + if (Array == dataObj.firstElement().type()) { + // XXX: bad + oil->intervals.push_back(allValues()); + *exact = false; + } + else { + verify(dataObj.isOwned()); + oil->intervals.push_back(makePointInterval(dataObj)); + // XXX: it's exact if the index isn't sparse? + if (dataObj.firstElement().isNull() || isHashed) { + *exact = false; + } + else { + *exact = true; + } + } + } + } // namespace mongo diff --git a/src/mongo/db/query/index_bounds_builder.h b/src/mongo/db/query/index_bounds_builder.h index 7c96cab676a..5a0921a213f 100644 --- a/src/mongo/db/query/index_bounds_builder.h +++ b/src/mongo/db/query/index_bounds_builder.h @@ -58,6 +58,20 @@ namespace mongo { static void translate(const MatchExpression* expr, const BSONElement& elt, OrderedIntervalList* oilOut, bool* exactOut); + /** + * Creates bounds for 'expr' (indexed according to 'elt'). Intersects those bounds + * with the bounds in oilOut, which is an in/out parameter. + */ + static void translateAndIntersect(const MatchExpression* expr, const BSONElement& elt, + OrderedIntervalList* oilOut, bool* exactOut); + + /** + * Creates bounds for 'expr' (indexed according to 'elt'). Unions those bounds + * with the bounds in oilOut, which is an in/out parameter. + */ + static void translateAndUnion(const MatchExpression* expr, const BSONElement& elt, + OrderedIntervalList* oilOut, bool* exactOut); + private: friend class ExpressionMapping; @@ -91,7 +105,27 @@ namespace mongo { */ static void reverseInterval(Interval* ival); + /** + * Copied almost verbatim from db/queryutil.cpp. + * + * returns a string that when used as a matcher, would match a super set of regex() + * + * returns "" for complex regular expressions + * + * used to optimize queries in some simple regex cases that start with '^' + */ + static string simpleRegex(const char* regex, const char* flags, bool* exact); + static Interval allValues(); + + static void translateRegex(const RegexMatchExpression* rme, OrderedIntervalList* oil, + bool* exact); + + static void translateEquality(const BSONElement& data, bool isHashed, + OrderedIntervalList* oil, bool* exact); + + static void unionize(OrderedIntervalList* oilOut); + static void intersectize(const OrderedIntervalList& arg, OrderedIntervalList* oilOut); }; } // namespace mongo diff --git a/src/mongo/db/query/index_entry.h b/src/mongo/db/query/index_entry.h index 59c3aa166e3..bdfdd1c7fd9 100644 --- a/src/mongo/db/query/index_entry.h +++ b/src/mongo/db/query/index_entry.h @@ -28,7 +28,8 @@ #pragma once -#include <vector> +#include <sstream> +#include <string> #include "mongo/db/jsobj.h" @@ -38,7 +39,8 @@ namespace mongo { * This name sucks, but every name involving 'index' is used somewhere. */ struct IndexEntry { - IndexEntry(BSONObj kp, bool mk, bool sp) : keyPattern(kp), multikey(mk), sparse(sp) { } + IndexEntry(const BSONObj& kp, bool mk, bool sp) + : keyPattern(kp), multikey(mk), sparse(sp) { } IndexEntry(const IndexEntry& other) { keyPattern = other.keyPattern; @@ -51,6 +53,18 @@ namespace mongo { bool multikey; bool sparse; + + std::string toString() const { + stringstream ss; + ss << keyPattern.toString(); + if (multikey) { + ss << " multikey"; + } + if (sparse) { + ss << " sparse"; + } + return ss.str(); + } }; } // namespace mongo diff --git a/src/mongo/db/query/index_tag.h b/src/mongo/db/query/index_tag.h index 2173adce9ff..afc4c4a11e5 100644 --- a/src/mongo/db/query/index_tag.h +++ b/src/mongo/db/query/index_tag.h @@ -57,6 +57,12 @@ namespace mongo { std::vector<size_t> first; std::vector<size_t> notFirst; + // We don't know the full path from a node unless we keep notes as we traverse from the + // root. We do this once and store it. + // TODO: Do a FieldRef / StringData pass. + // TODO: We might want this inside of the MatchExpression. + string path; + virtual void debugString(StringBuilder* builder) const { *builder << "First: "; for (size_t i = 0; i < first.size(); ++i) { @@ -66,6 +72,7 @@ namespace mongo { for (size_t i = 0; i < notFirst.size(); ++i) { *builder << notFirst[i] << " "; } + *builder << "full path: " << path; } virtual MatchExpression::TagData* clone() const { diff --git a/src/mongo/db/query/interval.cpp b/src/mongo/db/query/interval.cpp index 9bf0cc51a8f..6c80632c4a5 100644 --- a/src/mongo/db/query/interval.cpp +++ b/src/mongo/db/query/interval.cpp @@ -99,13 +99,9 @@ namespace mongo { } // unnamed namespace - Interval:: Interval() - : _intervalData(BSONObj()) - , start(BSONElement()) - , startInclusive(false) - , end(BSONElement()) - , endInclusive(false) { - } + Interval::Interval() + : _intervalData(BSONObj()), start(BSONElement()), startInclusive(false), end(BSONElement()), + endInclusive(false) { } Interval::Interval(BSONObj base, bool si, bool ei) { init(base, si, ei); @@ -128,11 +124,11 @@ namespace mongo { // TODO: shortcut number of comparisons Interval::IntervalComparison Interval::compare(const Interval& other) const { - // // Intersect cases // + // TODO: rewrite this to be member functions so semantics are clearer. if (intersects(*this, other)) { if (exact(*this, other)) { return INTERVAL_EQUALS; @@ -144,7 +140,7 @@ namespace mongo { return INTERVAL_CONTAINS; } if (precedes(*this, other)) { - return INTERVAL_OVERLAPS_BEFORE; + return INTERVAL_OVERLAPS_BEFORE; } return INTERVAL_OVERLAPS_AFTER; } @@ -154,9 +150,13 @@ namespace mongo { // if (precedes(*this, other)) { + if (0 == end.woCompare(other.start, false)) { + return INTERVAL_PRECEDES_COULD_UNION; + } return INTERVAL_PRECEDES; } - return INTERVAL_SUCCEDS; + + return INTERVAL_SUCCEEDS; } void Interval::intersect(const Interval& other, IntervalComparison cmp) { @@ -190,7 +190,7 @@ namespace mongo { break; case INTERVAL_PRECEDES: - case INTERVAL_SUCCEDS: + case INTERVAL_SUCCEEDS: *this = Interval(); break; @@ -218,7 +218,7 @@ namespace mongo { break; case INTERVAL_OVERLAPS_AFTER: - case INTERVAL_SUCCEDS: + case INTERVAL_SUCCEEDS: builder.append(other.start); builder.append(end); init(builder.obj(), other.startInclusive, endInclusive); @@ -236,4 +236,55 @@ namespace mongo { } } + // static + string Interval::cmpstr(IntervalComparison c) { + if (c == INTERVAL_EQUALS) { + return "INTERVAL_EQUALS"; + } + + // 'this' contains the other interval. + if (c == INTERVAL_CONTAINS) { + return "INTERVAL_CONTAINS"; + } + + // 'this' is contained by the other interval. + if (c == INTERVAL_WITHIN) { + return "INTERVAL_WITHIN"; + } + + // The two intervals intersect and 'this' is before the other interval. + if (c == INTERVAL_OVERLAPS_BEFORE) { + return "INTERVAL_OVERLAPS_BEFORE"; + } + + // The two intervals intersect and 'this is after the other interval. + if (c == INTERVAL_OVERLAPS_AFTER) { + return "INTERVAL_OVERLAPS_AFTER"; + } + + // There is no intersection. + if (c == INTERVAL_PRECEDES) { + return "INTERVAL_PRECEDES"; + } + + if (c == INTERVAL_PRECEDES_COULD_UNION) { + return "INTERVAL_PRECEDES_COULD_UNION"; + } + + if (c == INTERVAL_SUCCEEDS) { + return "INTERVAL_SUCCEEDS"; + } + + if (c == INTERVAL_UNKNOWN) { + return "INTERVAL_UNKNOWN"; + } + + return "NO IDEA DUDE"; + } + + void Interval::reverse() { + std::swap(start, end); + std::swap(startInclusive, endInclusive); + } + } // namespace mongo diff --git a/src/mongo/db/query/interval.h b/src/mongo/db/query/interval.h index 26dff5b2bcd..b5a2e33ed7e 100644 --- a/src/mongo/db/query/interval.h +++ b/src/mongo/db/query/interval.h @@ -76,24 +76,54 @@ namespace mongo { /** Returns true if an empty-constructed interval hasn't been init()-ialized yet */ bool isEmpty() const; + /** + * Swap start and end points of interval. + */ + void reverse(); + /** Returns how 'this' compares to 'other' */ enum IntervalComparison { + // // There is some intersection. + // + + // The two intervals are *exactly* equal. INTERVAL_EQUALS, + + // 'this' contains the other interval. INTERVAL_CONTAINS, + + // 'this' is contained by the other interval. INTERVAL_WITHIN, + + // The two intervals intersect and 'this' is before the other interval. INTERVAL_OVERLAPS_BEFORE, + + // The two intervals intersect and 'this is after the other interval. INTERVAL_OVERLAPS_AFTER, + // // There is no intersection. + // + INTERVAL_PRECEDES, - INTERVAL_SUCCEDS, + + // This happens if we have [a,b) [b,c] + INTERVAL_PRECEDES_COULD_UNION, + + INTERVAL_SUCCEEDS, INTERVAL_UNKNOWN }; + IntervalComparison compare(const Interval& other) const; /** + * toString for IntervalComparison + */ + static string cmpstr(IntervalComparison c); + + /** * Updates 'this' with the intersection of 'this' and 'other'. If 'this' and 'other' * have been compare()d before, that result can be optionally passed in 'cmp' */ diff --git a/src/mongo/db/query/interval_test.cpp b/src/mongo/db/query/interval_test.cpp index 9118b327d0f..55bb164d87e 100644 --- a/src/mongo/db/query/interval_test.cpp +++ b/src/mongo/db/query/interval_test.cpp @@ -136,19 +136,19 @@ namespace { TEST(Comparison, Succeds) { Interval a(BSON("" << 10 << "" << 20), true, true); - ASSERT_NOT_EQUALS(a.compare(a), Interval::INTERVAL_SUCCEDS); + ASSERT_NOT_EQUALS(a.compare(a), Interval::INTERVAL_SUCCEEDS); Interval b(BSON("" << 20 << "" << 30), true, true); - ASSERT_NOT_EQUALS(b.compare(a), Interval::INTERVAL_SUCCEDS); + ASSERT_NOT_EQUALS(b.compare(a), Interval::INTERVAL_SUCCEEDS); Interval c(BSON("" << 20 << "" << 30), false, true); - ASSERT_EQUALS(c.compare(a), Interval::INTERVAL_SUCCEDS); + ASSERT_EQUALS(c.compare(a), Interval::INTERVAL_SUCCEEDS); Interval d(BSON("" << 21 << "" << 30), true, true); - ASSERT_EQUALS(d.compare(a), Interval::INTERVAL_SUCCEDS); + ASSERT_EQUALS(d.compare(a), Interval::INTERVAL_SUCCEEDS); Interval e(BSON("" << 15 << "" << 30), true, true); - ASSERT_NOT_EQUALS(e.compare(a), Interval::INTERVAL_SUCCEDS); + ASSERT_NOT_EQUALS(e.compare(a), Interval::INTERVAL_SUCCEEDS); } // diff --git a/src/mongo/db/query/plan_enumerator.cpp b/src/mongo/db/query/plan_enumerator.cpp index 62626eec11b..3165c2f31db 100644 --- a/src/mongo/db/query/plan_enumerator.cpp +++ b/src/mongo/db/query/plan_enumerator.cpp @@ -27,7 +27,7 @@ namespace mongo { : _root(root), _indices(indices) { } PlanEnumerator::~PlanEnumerator() { - for (map<size_t, NodeSolution*>::iterator it = _memo.begin(); it != _memo.end(); ++it) { + for (unordered_map<NodeID, NodeAssignment*>::iterator it = _memo.begin(); it != _memo.end(); ++it) { delete it->second; } } @@ -47,6 +47,7 @@ namespace mongo { // cout << "root post-memo: " << _root->toString() << endl; cout << "memo dump:\n"; + verify(_inOrderCount == _memo.size()); for (size_t i = 0; i < _inOrderCount; ++i) { cout << "Node #" << i << ": " << _memo[i]->toString() << endl; } @@ -54,17 +55,16 @@ namespace mongo { if (!_done) { // Tag with our first solution. tagMemo(_nodeToId[_root]); - checkCompound("", _root); } return Status::OK(); } - bool PlanEnumerator::isCompound(size_t idx) { + bool PlanEnumerator::isCompound(IndexID idx) { return (*_indices)[idx].keyPattern.nFields() > 1; } - string PlanEnumerator::NodeSolution::toString() const { + string PlanEnumerator::NodeAssignment::toString() const { if (NULL != pred) { stringstream ss; ss << "predicate, first indices: ["; @@ -82,149 +82,36 @@ namespace mongo { ss << "], pred: " << pred->expr->toString(); return ss.str(); } - else if (NULL != andSolution) { + else if (NULL != newAnd) { stringstream ss; ss << "ONE OF: ["; - for (size_t i = 0; i < andSolution->subnodes.size(); ++i) { - const vector<size_t>& sn = andSolution->subnodes[i]; - ss << "["; - for (size_t j = 0; j < sn.size(); ++j) { - ss << sn[j]; - if (j < sn.size() - 1) - ss << ", "; - } - ss << "]"; - if (i < andSolution->subnodes.size() - 1) + for (size_t i = 0; i < newAnd->subnodes.size(); ++i) { + ss << "[" << newAnd->subnodes[i] << "]"; + if (i < newAnd->subnodes.size() - 1) { ss << ", "; + } + } + for (size_t i = 0; i < newAnd->predChoices.size(); ++i) { + const OneIndexAssignment& oie = newAnd->predChoices[i]; + ss << "IDX#" << oie.index << " for preds: "; + for (size_t j = 0; j < oie.preds.size(); ++j) { + ss << oie.preds[j]->toString() << ", "; + } } - ss << "]"; return ss.str(); } else { - verify(NULL != orSolution); + verify(NULL != orAssignment); stringstream ss; ss << "ALL OF: ["; - for (size_t i = 0; i < orSolution->subnodes.size(); ++i) { - ss << " " << orSolution->subnodes[i]; + for (size_t i = 0; i < orAssignment->subnodes.size(); ++i) { + ss << " " << orAssignment->subnodes[i]; } ss << "]"; return ss.str(); } } - /** - * This is very expensive if the involved indices/predicates are numerous but - * I suspect that's rare. TODO: revisit on perf pass. - * - * TODO: We can save time by not bothering to do this if the index is multiKey. - * TODO: Perhaps this should be done by the planner?? - */ - void PlanEnumerator::checkCompound(string prefix, MatchExpression* node) { - if (MatchExpression::AND == node->matchType()) { - // Step 1: Find all compound indices. - vector<MatchExpression*> assignedCompound; - vector<MatchExpression*> unassigned; - - for (size_t i = 0; i < node->numChildren(); ++i) { - MatchExpression* child = node->getChild(i); - if (Indexability::nodeCanUseIndexOnOwnField(child)) { - verify(NULL != _memo[_nodeToId[child]]); - verify(NULL != _memo[_nodeToId[child]]->pred); - if (NULL == child->getTag()) { - // Not assigned an index. - unassigned.push_back(child); - } - else { - IndexTag* childTag = static_cast<IndexTag*>(child->getTag()); - if (isCompound(childTag->index)) { - assignedCompound.push_back(child); - } - } - } - } - - for (size_t i = 0; i < assignedCompound.size(); ++i) { - cout << "assigned compound: " << assignedCompound[i]->toString(); - } - for (size_t i = 0; i < unassigned.size(); ++i) { - cout << "unassigned : " << unassigned[i]->toString(); - } - - // Step 2: Iterate over the other fields of the compound indices - // TODO: This could be optimized a lot. - for (size_t i = 0; i < assignedCompound.size(); ++i) { - IndexTag* childTag = static_cast<IndexTag*>(assignedCompound[i]->getTag()); - - // XXX: If we assign a compound index and it's on a multikey index, the planner may - // not be able to use the multikey for it, and then it may create a new index scan, - // and that new scan will be bogus. For now don't assign, should fix planner to be - // more resilient. once we figure out fully in what cases array operators can use - // compound indices, change this to deal with that. - // - // That is, we can only safely assign compound indices if the planner uses them as - // compound indices whenever we assign them. - if ((*_indices)[i].multikey) { continue; } - - const BSONObj& kp = (*_indices)[childTag->index].keyPattern; - BSONObjIterator it(kp); - it.next(); - - size_t posInIdx = 0; - // we know isCompound is true so this should be true. - verify(it.more()); - while (it.more()) { - BSONElement kpElt = it.next(); - ++posInIdx; - bool assignedField = false; - // Trying to pick an unassigned M.E. - for (size_t j = 0; j < unassigned.size(); ++j) { - // TODO: is this really required? This seems really like it's just a - // reiteration of the tagging process. - if (prefix + unassigned[j]->path().toString() != kpElt.fieldName()) { - // We need to find a predicate over kpElt.fieldName(). - continue; - } - // Another compound index was assigned. - if (NULL != unassigned[j]->getTag()) { - continue; - } - // Index no. childTag->index, the compound index, must be - // a member of the notFirst - NodeSolution* soln = _memo[_nodeToId[unassigned[j]]]; - verify(NULL != soln); - verify(NULL != soln->pred); - verify(unassigned[j] == soln->pred->expr); - if (std::find(soln->pred->notFirst.begin(), soln->pred->notFirst.end(), childTag->index) != soln->pred->notFirst.end()) { - cout << "compound-ing " << kp.toString() << " with node " << unassigned[j]->toString() << endl; - assignedField = true; - cout << "setting pos to " << posInIdx << endl; - unassigned[j]->setTag(new IndexTag(childTag->index, posInIdx)); - // We've picked something for this (index, field) tuple. Don't pick anything else. - break; - } - } - - // We must assign fields in compound indices contiguously. - if (!assignedField) { - cout << "Failed to assign to compound field " << kpElt.toString() << endl; - break; - } - } - } - } - - if (Indexability::arrayUsesIndexOnChildren(node)) { - if (!node->path().empty()) { - prefix += node->path().toString() + "."; - } - } - - // Don't think the traversal order here matters. - for (size_t i = 0; i < node->numChildren(); ++i) { - checkCompound(prefix, node->getChild(i)); - } - } - bool PlanEnumerator::getNext(MatchExpression** tree) { if (_done) { return false; } *tree = _root->shallowClone(); @@ -236,44 +123,23 @@ namespace mongo { sortUsingTags(*tree); _root->resetTag(); + // TODO: enumerate >1 plan _done = true; return true; } bool PlanEnumerator::prepMemo(MatchExpression* node) { - if (Indexability::arrayUsesIndexOnChildren(node)) { - // TODO: Fold into logical->AND branch below? - AndSolution* andSolution = new AndSolution(); - for (size_t i = 0; i < node->numChildren(); ++i) { - if (prepMemo(node->getChild(i))) { - vector<size_t> option; - option.push_back(_nodeToId[node->getChild(i)]); - andSolution->subnodes.push_back(option); - } - } - + if (Indexability::nodeCanUseIndexOnOwnField(node)) { + // TODO: This is done for everything, maybe have NodeAssignment* newMemo(node)? size_t myID = _inOrderCount++; _nodeToId[node] = myID; - NodeSolution* soln = new NodeSolution(); + NodeAssignment* soln = new NodeAssignment(); _memo[_nodeToId[node]] = soln; _curEnum[myID] = 0; - // Takes ownership. - soln->andSolution.reset(andSolution); - return andSolution->subnodes.size() > 0; - } - else if (Indexability::nodeCanUseIndexOnOwnField(node)) { - // TODO: This is done for everything, maybe have NodeSolution* newMemo(node)? - size_t myID = _inOrderCount++; - _nodeToId[node] = myID; - NodeSolution* soln = new NodeSolution(); - _memo[_nodeToId[node]] = soln; - - _curEnum[myID] = 0; - - // Fill out the NodeSolution. - soln->pred.reset(new PredicateSolution()); + // Fill out the NodeAssignment. + soln->pred.reset(new PredicateAssignment()); if (NULL != node->getTag()) { RelevantTag* rt = static_cast<RelevantTag*>(node->getTag()); soln->pred->first.swap(rt->first); @@ -284,89 +150,150 @@ namespace mongo { // be indexed when there are 'first' indices. return soln->pred->first.size() > 0; } - else if (node->isLogical()) { - if (MatchExpression::OR == node->matchType()) { - // For an OR to be indexed all its children must be indexed. - bool indexed = true; - for (size_t i = 0; i < node->numChildren(); ++i) { - if (!prepMemo(node->getChild(i))) { - indexed = false; - } + else if (MatchExpression::OR == node->matchType()) { + // For an OR to be indexed all its children must be indexed. + bool indexed = true; + for (size_t i = 0; i < node->numChildren(); ++i) { + if (!prepMemo(node->getChild(i))) { + indexed = false; } + } - size_t myID = _inOrderCount++; - _nodeToId[node] = myID; - NodeSolution* soln = new NodeSolution(); - _memo[_nodeToId[node]] = soln; + size_t myID = _inOrderCount++; + _nodeToId[node] = myID; + NodeAssignment* soln = new NodeAssignment(); + _memo[_nodeToId[node]] = soln; + + OrAssignment* orAssignment = new OrAssignment(); + for (size_t i = 0; i < node->numChildren(); ++i) { + orAssignment->subnodes.push_back(_nodeToId[node->getChild(i)]); + } + soln->orAssignment.reset(orAssignment); + return indexed; + } + else if (MatchExpression::AND == node->matchType() || Indexability::arrayUsesIndexOnChildren(node)) { + // map from idx id to children that have a pred over it. + unordered_map<IndexID, vector<MatchExpression*> > idxToFirst; + unordered_map<IndexID, vector<MatchExpression*> > idxToNotFirst; + + NewAndAssignment* newAndAssignment = new NewAndAssignment(); + for (size_t i = 0; i < node->numChildren(); ++i) { + MatchExpression* child = node->getChild(i); - OrSolution* orSolution = new OrSolution(); - for (size_t i = 0; i < node->numChildren(); ++i) { - orSolution->subnodes.push_back(_nodeToId[node->getChild(i)]); + if (Indexability::nodeCanUseIndexOnOwnField(child)) { + RelevantTag* rt = static_cast<RelevantTag*>(child->getTag()); + for (size_t j = 0; j < rt->first.size(); ++j) { + idxToFirst[rt->first[j]].push_back(child); + } + for (size_t j = 0 ; j< rt->notFirst.size(); ++j) { + idxToNotFirst[rt->notFirst[j]].push_back(child); + } + } + else { + if (prepMemo(child)) { + size_t childID = _nodeToId[child]; + newAndAssignment->subnodes.push_back(childID); + } } - soln->orSolution.reset(orSolution); - return indexed; } - else { - // To be exhaustive, we would compute all solutions of size 1, 2, ..., - // node->numChildren(). Each of these solutions would get a place in the - // memo structure. - // If there is a geoNear, we put it at the start of our options to ensure that, even - // if we enumerate one plan, we will index it. + // At this point we know how many indices the AND's predicate children are over. + newAndAssignment->predChoices.resize(idxToFirst.size()); + + // This iterates through the predChoices. + size_t predChoicesIdx = 0; + + // For each FIRST, we assign nodes to it. + for (unordered_map<IndexID, vector<MatchExpression*> >::iterator it = idxToFirst.begin(); it != idxToFirst.end(); ++it) { + OneIndexAssignment* assign = &newAndAssignment->predChoices[predChoicesIdx]; + ++predChoicesIdx; + + // Fill out the OneIndexAssignment with the preds that are over the first field. + assign->index = it->first; + // We can swap because we're never touching idxToFirst again after this loop over it. + assign->preds.swap(it->second); + assign->positions.resize(assign->preds.size(), 0); + + // + // Compound analysis here and below. // - // XXX: This is a crappy substitute for something smarter, namely always including - // geoNear in every possible selection inside of an AND. Revisit when we enum - // more than one plan. - size_t geoNearChild = IndexTag::kNoIndex; - - // For efficiency concerns, we don't explore any more than the size-1 members - // of the power set. That is, we will only use one index at a time. - AndSolution* andSolution = new AndSolution(); - for (size_t i = 0; i < node->numChildren(); ++i) { - // If AND requires an index it can only piggyback on the children that have - // indices. - if (prepMemo(node->getChild(i))) { - vector<size_t> option; - size_t childID = _nodeToId[node->getChild(i)]; - option.push_back(childID); - andSolution->subnodes.push_back(option); - - // Fill out geoNearChild, possibly. - // TODO: Actually rank enumeration aside from "always pick GEO_NEAR". - if (NULL != _memo[childID]) { - NodeSolution* ns = _memo[childID]; - if (NULL != ns->pred) { - verify(NULL != ns->pred->expr); - if (MatchExpression::GEO_NEAR == ns->pred->expr->matchType()) { - geoNearChild = andSolution->subnodes.size() - 1; - } - } + + // Don't compound on multikey indices. (XXX: not whole story...) + if ((*_indices)[it->first].multikey) { continue; } + + // Grab the expressions that are notFirst for the index whose assignments we're filling out. + unordered_map<size_t, vector<MatchExpression*> >::const_iterator compoundIt = idxToNotFirst.find(it->first); + if (compoundIt == idxToNotFirst.end()) { continue; } + const vector<MatchExpression*>& tryCompound = compoundIt->second; + + // Walk over the key pattern trying to find + BSONObjIterator kpIt((*_indices)[it->first].keyPattern); + // Skip the first elt as it's already assigned. + kpIt.next(); + size_t posInIdx = 0; + while (kpIt.more()) { + BSONElement keyElt = kpIt.next(); + ++posInIdx; + bool fieldAssigned = false; + for (size_t j = 0; j < tryCompound.size(); ++j) { + MatchExpression* maybe = tryCompound[j]; + // Sigh we grab the full path from the relevant tag. + RelevantTag* rt = static_cast<RelevantTag*>(maybe->getTag()); + if (keyElt.fieldName() == rt->path) { + assign->preds.push_back(maybe); + assign->positions.push_back(posInIdx); + fieldAssigned = true; } } + // If we have (a,b,c) and we can't assign something to 'b' don't try + // to assign something to 'c'. + if (!fieldAssigned) { break; } } + } - if (IndexTag::kNoIndex != geoNearChild && (0 != geoNearChild)) { - andSolution->subnodes[0].swap(andSolution->subnodes[geoNearChild]); + // Enumeration detail: We start enumerating with the first predChoice. To make sure that + // we always output the geoNear index, put it first. + // + // TODO: We can compute this more "on the fly" but it's clearer to see what's going on when + // we do this as a separate step. + for (size_t i = 0; i < newAndAssignment->predChoices.size(); ++i) { + const OneIndexAssignment& oie = newAndAssignment->predChoices[i]; + bool foundGeoNear = false; + + for (size_t j = 0; j < oie.preds.size(); ++j) { + MatchExpression* expr = oie.preds[j]; + if (MatchExpression::GEO_NEAR == expr->matchType()) { + // If the GEO_NEAR idx is already at position 0 we'll choose it. + // TODO: We have to be smarter when we enumerate >1 plan. + if (0 != i) { + // Move the GEO_NEAR supplying index to the first choice spot. + std::swap(newAndAssignment->predChoices[0], newAndAssignment->predChoices[i]); + } + foundGeoNear = true; + break; + } } + if (foundGeoNear) { break; } + } - size_t myID = _inOrderCount++; - _nodeToId[node] = myID; - NodeSolution* soln = new NodeSolution(); - _memo[_nodeToId[node]] = soln; - - verify(MatchExpression::AND == node->matchType()); - _curEnum[myID] = 0; + // Normal node allocation stuff. + size_t myID = _inOrderCount++; + _nodeToId[node] = myID; + NodeAssignment* soln = new NodeAssignment(); + _memo[_nodeToId[node]] = soln; + _curEnum[myID] = 0; - // Takes ownership. - soln->andSolution.reset(andSolution); - return andSolution->subnodes.size() > 0; - } + // Takes ownership. + soln->newAnd.reset(newAndAssignment); + return newAndAssignment->subnodes.size() > 0 || newAndAssignment->predChoices.size() > 0; } + + // Don't know what the node is at this point. return false; } void PlanEnumerator::tagMemo(size_t id) { - NodeSolution* soln = _memo[id]; + NodeAssignment* soln = _memo[id]; verify(NULL != soln); if (NULL != soln->pred) { @@ -380,23 +307,33 @@ namespace mongo { soln->pred->expr->setTag(new IndexTag(soln->pred->first[_curEnum[id]])); } } - else if (NULL != soln->orSolution) { - for (size_t i = 0; i < soln->orSolution->subnodes.size(); ++i) { - tagMemo(soln->orSolution->subnodes[i]); + else if (NULL != soln->orAssignment) { + for (size_t i = 0; i < soln->orAssignment->subnodes.size(); ++i) { + tagMemo(soln->orAssignment->subnodes[i]); } // TODO: Who checks to make sure that we tag all nodes of an OR? We should // know this early. } - else { - verify(NULL != soln->andSolution); - verify(_curEnum[id] < soln->andSolution->subnodes.size()); - vector<size_t> &cur = soln->andSolution->subnodes[_curEnum[id]]; - - for (size_t i = 0; i < cur.size(); ++i) { - // Tag the child. - tagMemo(cur[i]); + else if (NULL != soln->newAnd) { + size_t curEnum = _curEnum[id]; + if (curEnum < soln->newAnd->predChoices.size()) { + const OneIndexAssignment assign = soln->newAnd->predChoices[curEnum]; + for (size_t i = 0; i < assign.preds.size(); ++i) { + MatchExpression* pred = assign.preds[i]; + verify(NULL == pred->getTag()); + pred->setTag(new IndexTag(assign.index, assign.positions[i])); + } + } + else { + curEnum -= soln->newAnd->predChoices.size(); + if (curEnum < soln->newAnd->subnodes.size()) { + tagMemo(soln->newAnd->subnodes[curEnum]); + } } } + else { + verify(0); + } } bool PlanEnumerator::nextMemo(size_t id) { diff --git a/src/mongo/db/query/plan_enumerator.h b/src/mongo/db/query/plan_enumerator.h index 33b9072ad45..1e1669ab234 100644 --- a/src/mongo/db/query/plan_enumerator.h +++ b/src/mongo/db/query/plan_enumerator.h @@ -53,7 +53,7 @@ namespace mongo { * Outputs a possible plan. Leaves in the plan are tagged with an index to use. * Returns true if a plan was outputted, false if no more plans will be outputted. * - * 'tree' is set to point to the query tree. A QuerySolution is built from this tree. + * 'tree' is set to point to the query tree. A QueryAssignment is built from this tree. * Caller owns the pointer. Note that 'tree' itself points into data owned by the * provided CanonicalQuery. * @@ -65,18 +65,20 @@ namespace mongo { private: // - // Data used by all enumeration strategies + // Memoization strategy // - // Match expression we're planning for. Not owned by us. - MatchExpression* _root; - // Indices we're allowed to enumerate with. - const vector<IndexEntry>* _indices; + // Everything is really a size_t but it's far more readable to impose a type via typedef. - // - // Memoization strategy - // + // An ID we use to index into _memo. An entry in _memo is a NodeAssignment. + typedef size_t NodeID; + + // An index in _indices. + typedef size_t IndexID; + + // The position of a field in a possibly compound index. + typedef size_t IndexPosition; /** * Traverses the match expression and generates the memo structure from it. @@ -87,7 +89,7 @@ namespace mongo { /** * Returns true if index #idx is compound, false otherwise. */ - bool isCompound(size_t idx); + bool isCompound(IndexID idx); /** * When we assign indices to nodes, we only assign indices to predicates that are 'first' @@ -102,7 +104,7 @@ namespace mongo { * Traverses the memo structure and annotates the tree with IndexTags for the chosen * indices. */ - void tagMemo(size_t id); + void tagMemo(NodeID id); /** * Move to the next enumeration state. Enumeration state is stored in curEnum. @@ -116,29 +118,38 @@ namespace mongo { * * XXX implement. */ - bool nextMemo(size_t id); + bool nextMemo(NodeID id); - struct PredicateSolution { - vector<size_t> first; - vector<size_t> notFirst; + struct PredicateAssignment { + vector<IndexID> first; + vector<IndexID> notFirst; // Not owned here. MatchExpression* expr; }; - struct AndSolution { - // Must use one of the elements of subnodes. - vector<vector<size_t> > subnodes; + struct OrAssignment { + // Must use all of subnodes. + vector<NodeID> subnodes; }; - struct OrSolution { - // Must use all of subnodes. - vector<size_t> subnodes; + // This is used by NewAndAssignment, not an actual assignment. + struct OneIndexAssignment { + // 'preds[i]' is uses index 'index' at position 'positions[i]' + vector<MatchExpression*> preds; + vector<IndexPosition> positions; + IndexID index; }; - struct NodeSolution { - scoped_ptr<PredicateSolution> pred; - scoped_ptr<AndSolution> andSolution; - scoped_ptr<OrSolution> orSolution; + struct NewAndAssignment { + // TODO: We really want to consider the power set of the union of the choices here. + vector<OneIndexAssignment> predChoices; + vector<NodeID> subnodes; + }; + + struct NodeAssignment { + scoped_ptr<PredicateAssignment> pred; + scoped_ptr<OrAssignment> orAssignment; + scoped_ptr<NewAndAssignment> newAnd; string toString() const; }; @@ -147,22 +158,30 @@ namespace mongo { // Used to label nodes in the order in which we visit in a post-order traversal. size_t _inOrderCount; - // Map from node to its order/ID. - map<MatchExpression*, size_t> _nodeToId; + // Map from expression to its NodeID. + unordered_map<MatchExpression*, NodeID> _nodeToId; - // Map from order/ID to a memoized solution. - map<size_t, NodeSolution*> _memo; + // Map from NodeID to its precomputed solution info. + unordered_map<NodeID, NodeAssignment*> _memo; // Enumeration - // ANDs count through clauses, PREDs count through indices. - // Index is order/ID. - // Value is whatever counter that node needs. - map<size_t, size_t> _curEnum; + // Map from NodeID to a counter that the NodeID uses to enumerate its states. + unordered_map<NodeID, size_t> _curEnum; // If true, there are no further enumeration states, and getNext should return false. // We could be _done immediately after init if we're unable to output an indexed plan. bool _done; + + // + // Data used by all enumeration strategies + // + + // Match expression we're planning for. Not owned by us. + MatchExpression* _root; + + // Indices we're allowed to enumerate with. + const vector<IndexEntry>* _indices; }; } // namespace mongo diff --git a/src/mongo/db/query/query_planner.cpp b/src/mongo/db/query/query_planner.cpp index 5d3f8f70b9e..13d6d98d5f1 100644 --- a/src/mongo/db/query/query_planner.cpp +++ b/src/mongo/db/query/query_planner.cpp @@ -169,6 +169,7 @@ namespace mongo { verify(NULL == node->getTag()); RelevantTag* rt = new RelevantTag(); node->setTag(rt); + rt->path = fullPath; // TODO: This is slow, with all the string compares. for (size_t i = 0; i < indices.size(); ++i) { @@ -231,10 +232,10 @@ namespace mongo { } // static - QuerySolutionNode* QueryPlanner::makeLeafNode(const BSONObj& indexKeyPattern, + QuerySolutionNode* QueryPlanner::makeLeafNode(const IndexEntry& index, MatchExpression* expr, bool* exact) { - cout << "making leaf node for " << expr->toString() << endl; + // cout << "making leaf node for " << expr->toString() << endl; // We're guaranteed that all GEO_NEARs are first. This slightly violates the "sort index // predicates by their position in the compound index" rule but GEO_NEAR isn't an ixscan. // This saves our bacon when we have {foo: 1, bar: "2dsphere"} and the predicate on bar is a @@ -244,7 +245,7 @@ namespace mongo { // This should gracefully deal with the case where we have a pred over foo but no geo clause // over bar. In that case there is no GEO_NEAR to appear first and it's treated like a // straight ixscan. - BSONElement elt = indexKeyPattern.firstElement(); + BSONElement elt = index.keyPattern.firstElement(); bool indexIs2D = (String == elt.type() && "2d" == elt.String()); if (MatchExpression::GEO_NEAR == expr->matchType()) { @@ -253,7 +254,7 @@ namespace mongo { GeoNearMatchExpression* nearme = static_cast<GeoNearMatchExpression*>(expr); if (indexIs2D) { GeoNear2DNode* ret = new GeoNear2DNode(); - ret->indexKeyPattern = indexKeyPattern; + ret->indexKeyPattern = index.keyPattern; ret->seek = nearme->getRawObj(); return ret; } @@ -261,14 +262,14 @@ namespace mongo { // Note that even if we're starting a GeoNear node, we may never get a // predicate for $near. So, in that case, the builder "collapses" it into // an ixscan. - cout << "Making geonear 2dblahblah kp " << indexKeyPattern.toString() << endl; + // cout << "Making geonear 2dblahblah kp " << index.toString() << endl; GeoNear2DSphereNode* ret = new GeoNear2DSphereNode(); - ret->indexKeyPattern = indexKeyPattern; + ret->indexKeyPattern = index.keyPattern; ret->nq = nearme->getData(); - ret->baseBounds.fields.resize(indexKeyPattern.nFields()); + ret->baseBounds.fields.resize(index.keyPattern.nFields()); stringstream ss; ret->appendToString(&ss, 0); - cout << "geonear 2dsphere out " << ss.str() << endl; + // cout << "geonear 2dsphere out " << ss.str() << endl; return ret; } } @@ -279,18 +280,19 @@ namespace mongo { GeoMatchExpression* nearme = static_cast<GeoMatchExpression*>(expr); verify(indexIs2D); Geo2DNode* ret = new Geo2DNode(); - ret->indexKeyPattern = indexKeyPattern; + ret->indexKeyPattern = index.keyPattern; ret->seek = nearme->getRawObj(); return ret; } else { - cout << "making ixscan for " << expr->toString() << endl; + // cout << "making ixscan for " << expr->toString() << endl; // Note that indexKeyPattern.firstElement().fieldName() may not equal expr->path() // because expr might be inside an array operator that provides a path prefix. IndexScanNode* isn = new IndexScanNode(); - isn->indexKeyPattern = indexKeyPattern; - isn->bounds.fields.resize(indexKeyPattern.nFields()); + isn->indexKeyPattern = index.keyPattern; + isn->indexIsMultiKey = index.multikey; + isn->bounds.fields.resize(index.keyPattern.nFields()); // XXX XXX: move this if inside of the bounds builder. if (MatchExpression::ELEM_MATCH_VALUE == expr->matchType()) { @@ -299,23 +301,24 @@ namespace mongo { // TODO: We could/should merge the bounds (possibly subject to various multikey // etc. restrictions). For now we don't bother. - IndexBoundsBuilder::translate(expr->getChild(0), indexKeyPattern.firstElement(), + IndexBoundsBuilder::translate(expr->getChild(0), index.keyPattern.firstElement(), &isn->bounds.fields[0], exact); // TODO: I think this is valid but double check. *exact = false; } else { - IndexBoundsBuilder::translate(expr, indexKeyPattern.firstElement(), + IndexBoundsBuilder::translate(expr, index.keyPattern.firstElement(), &isn->bounds.fields[0], exact); } - cout << "bounds are " << isn->bounds.toString() << " exact " << *exact << endl; + // cout << "bounds are " << isn->bounds.toString() << " exact " << *exact << endl; return isn; } } - void QueryPlanner::mergeWithLeafNode(MatchExpression* expr, const BSONObj& keyPattern, - size_t pos, bool* exactOut, QuerySolutionNode* node) { + void QueryPlanner::mergeWithLeafNode(MatchExpression* expr, const IndexEntry& index, + size_t pos, bool* exactOut, QuerySolutionNode* node, + MatchExpression::MatchType mergeType) { const StageType type = node->getType(); if (STAGE_GEO_2D == type || STAGE_GEO_NEAR_2D == type) { @@ -343,7 +346,7 @@ namespace mongo { // Get the ixtag->pos-th element of the index key pattern. // TODO: cache this instead/with ixtag->pos? - BSONObjIterator it(keyPattern); + BSONObjIterator it(index.keyPattern); BSONElement keyElt = it.next(); for (size_t i = 0; i < pos; ++i) { verify(it.more()); @@ -359,13 +362,56 @@ namespace mongo { //<< indices[currentIndexNumber].keyPattern.toString() << endl; verify(boundsToFillOut->fields.size() > pos); - verify(boundsToFillOut->fields[pos].name.empty()); - IndexBoundsBuilder::translate(expr, keyElt, &boundsToFillOut->fields[pos], exactOut); + + OrderedIntervalList* oil = &boundsToFillOut->fields[pos]; + + if (boundsToFillOut->fields[pos].name.empty()) { + IndexBoundsBuilder::translate(expr, keyElt, oil, exactOut); + } + else { + if (MatchExpression::AND == mergeType) { + IndexBoundsBuilder::translateAndIntersect(expr, keyElt, oil, exactOut); + } + else { + verify(MatchExpression::OR == mergeType); + IndexBoundsBuilder::translateAndUnion(expr, keyElt, oil, exactOut); + } + } + } + } + + /** + * Assumes each OIL in bounds is increasing. + * Reverses any OILs that are indexed according to '-1'. + */ + void alignBounds(IndexBounds* bounds, const BSONObj& kp) { + BSONObjIterator it(kp); + size_t oilIdx = 0; + while (it.more()) { + BSONElement elt = it.next(); + int direction = (elt.numberInt() >= 0) ? 1 : -1; + if (-1 == direction) { + vector<Interval>& iv = bounds->fields[oilIdx].intervals; + // Step 1: reverse the list. + std::reverse(iv.begin(), iv.end()); + // Step 2: reverse each interval. + for (size_t i = 0; i < iv.size(); ++i) { + // cout << "reversing " << iv[i].toString() << endl; + iv[i].reverse(); + } + } + ++oilIdx; + } + + // XXX: some day we'll maybe go backward to pull a sort out + if (!bounds->isValidFor(kp, 1)) { + cout << "INVALID BOUNDS: " << bounds->toString() << endl; + verify(0); } } // static - void QueryPlanner::finishLeafNode(QuerySolutionNode* node, const BSONObj& indexKeyPattern) { + void QueryPlanner::finishLeafNode(QuerySolutionNode* node, const IndexEntry& index) { const StageType type = node->getType(); if (STAGE_GEO_2D == type || STAGE_GEO_NEAR_2D == type) { @@ -399,10 +445,13 @@ namespace mongo { } // All fields are filled out with bounds, nothing to do. - if (firstEmptyField == bounds->fields.size()) { return; } + if (firstEmptyField == bounds->fields.size()) { + alignBounds(bounds, index.keyPattern); + return; + } // Skip ahead to the firstEmptyField-th element, where we begin filling in bounds. - BSONObjIterator it(indexKeyPattern); + BSONObjIterator it(index.keyPattern); for (size_t i = 0; i < firstEmptyField; ++i) { verify(it.more()); it.next(); @@ -421,6 +470,10 @@ namespace mongo { // Make sure that the length of the key is the length of the bounds we started. verify(firstEmptyField == bounds->fields.size()); + + // We create bounds assuming a forward direction but can easily reverse bounds to align + // according to our desired direction. + alignBounds(bounds, index.keyPattern); } } @@ -430,11 +483,6 @@ namespace mongo { const vector<IndexEntry>& indices, vector<QuerySolutionNode*>* out) { - bool isAnd = MatchExpression::AND == root->matchType(); - if (!isAnd) { - verify(MatchExpression::OR == root->matchType()); - } - auto_ptr<QuerySolutionNode> currentScan; size_t currentIndexNumber = IndexTag::kNoIndex; size_t curChild = 0; @@ -492,24 +540,7 @@ namespace mongo { // could be NULL, in which case we don't output it. The rest of the logic is identical. // // If the child uses the same index as the current index scan, we may be able to merge - // the bounds for the two scans. See the comments below for canMergeBounds and - // shouldMergeBounds. - - // Is it remotely possible to merge the bounds? We care about isAnd because bounds - // merging is currently totally unimplemented for ORs. - // - // TODO: When more than one child of an AND clause can have an index, we'll have to fix - // the logic below to only merge when the merge is really filling out an additional - // field of a compound index. - // - // TODO: Implement union of OILs to allow merging bounds for OR. - // TODO: Implement intersection of OILs to allow merging of bounds for AND. - - bool canMergeBounds = (NULL != currentScan.get()) - && (currentIndexNumber == ixtag->index) - && isAnd; - - // Is it semantically correct to merge bounds? + // the bounds for the two scans. // // Guiding principle: must the values we're testing come from the same array in the // document? If so, we can combine bounds (via intersection or compounding). If not, @@ -552,18 +583,30 @@ namespace mongo { // If those values are both present in the same array, the index key for the // aforementioned index will be {"":3, "":4} // Therefore we can intersect bounds. - // - // XXX: There is more to it than this. See index13.js. - bool shouldMergeBounds = (currentIndexNumber != IndexTag::kNoIndex) - && !indices[currentIndexNumber].multikey; + // TODO: we should also merge if we're in an array operator, but only when we figure out index13.js. + if (NULL != currentScan.get() && (currentIndexNumber == ixtag->index) && !indices[currentIndexNumber].multikey) { + // The child uses the same index we're currently building a scan for. Merge + // the bounds and filters. + verify(currentIndexNumber == ixtag->index); - // XXX: commented out until cases in index13.js are resolved. - // bool shouldMergeBounds = !indices[currentIndexNumber].multikey || inArrayOperator; + bool exact = false; + mergeWithLeafNode(child, indices[currentIndexNumber], ixtag->pos, &exact, + currentScan.get(), root->matchType()); - if (!canMergeBounds || !shouldMergeBounds) { + if (exact) { + root->getChildVector()->erase(root->getChildVector()->begin() + + curChild); + delete child; + } + else { + // We keep curChild in the AND for affixing later. + ++curChild; + } + } + else { if (NULL != currentScan.get()) { - finishLeafNode(currentScan.get(), indices[currentIndexNumber].keyPattern); + finishLeafNode(currentScan.get(), indices[currentIndexNumber]); out->push_back(currentScan.release()); } else { @@ -573,7 +616,7 @@ namespace mongo { currentIndexNumber = ixtag->index; bool exact = false; - currentScan.reset(makeLeafNode(indices[currentIndexNumber].keyPattern, + currentScan.reset(makeLeafNode(indices[currentIndexNumber], child, &exact)); if (exact && !inArrayOperator) { @@ -590,30 +633,11 @@ namespace mongo { ++curChild; } } - else { - // The child uses the same index we're currently building a scan for. Merge - // the bounds and filters. - verify(currentIndexNumber == ixtag->index); - - bool exact = false; - mergeWithLeafNode(child, indices[currentIndexNumber].keyPattern, ixtag->pos, &exact, - currentScan.get()); - - if (exact) { - root->getChildVector()->erase(root->getChildVector()->begin() - + curChild); - delete child; - } - else { - // We keep curChild in the AND for affixing later. - ++curChild; - } - } } // Output the scan we're done with, if it exists. if (NULL != currentScan.get()) { - finishLeafNode(currentScan.get(), indices[currentIndexNumber].keyPattern); + finishLeafNode(currentScan.get(), indices[currentIndexNumber]); out->push_back(currentScan.release()); } @@ -703,7 +727,6 @@ namespace mongo { autoRoot.reset(root); } - size_t expectedNumberScans = root->numChildren(); vector<QuerySolutionNode*> ixscanNodes; if (!processIndexScans(root, inArrayOperator, indices, &ixscanNodes)) { return NULL; @@ -712,9 +735,7 @@ namespace mongo { // Unlike an AND, an OR cannot have filters hanging off of it. We stop processing // when any of our children lack index tags. If a node lacks an index tag it cannot // be answered via an index. - if (ixscanNodes.size() != expectedNumberScans - || (!inArrayOperator && 0 != root->numChildren())) { - + if (!inArrayOperator && 0 != root->numChildren()) { warning() << "planner OR error, non-indexed child of OR."; // We won't enumerate an OR without indices for each child, so this isn't an issue, even // if we have an AND with an OR child -- we won't get here unless the OR is fully @@ -800,13 +821,13 @@ namespace mongo { IndexTag* tag = static_cast<IndexTag*>(root->getTag()); bool exact = false; - QuerySolutionNode* soln = makeLeafNode(indices[tag->index].keyPattern, root, + QuerySolutionNode* soln = makeLeafNode(indices[tag->index], root, &exact); verify(NULL != soln); stringstream ss; soln->appendToString(&ss, 0); - cout << "about to finish leaf node, soln " << ss.str() << endl; - finishLeafNode(soln, indices[tag->index].keyPattern); + // cout << "about to finish leaf node, soln " << ss.str() << endl; + finishLeafNode(soln, indices[tag->index]); if (inArrayOperator) { return soln; @@ -936,8 +957,10 @@ namespace mongo { // Project the results. if (NULL != query.getProj()) { + cout << "PROJECTION: fetched status: " << solnRoot->fetched() << endl; + cout << "PROJECTION: Current plan is " << solnRoot->toString() << endl; if (query.getProj()->requiresDocument()) { - cout << "projection claims to require doc adding fetch.\n"; + cout << "PROJECTION: claims to require doc adding fetch.\n"; // If the projection requires the entire document, somebody must fetch. if (!solnRoot->fetched()) { FetchNode* fetch = new FetchNode(); @@ -946,11 +969,13 @@ namespace mongo { } } else { + cout << "PROJECTION: requires fields\n"; const vector<string>& fields = query.getProj()->requiredFields(); bool covered = true; for (size_t i = 0; i < fields.size(); ++i) { if (!solnRoot->hasField(fields[i])) { - cout << "not covered cuz doesn't have field " << fields[i] << endl; + cout << "PROJECTION: not covered cuz doesn't have field " + << fields[i] << endl; covered = false; break; } @@ -1080,6 +1105,7 @@ namespace mongo { // Figure out what fields we care about. unordered_set<string> fields; getFields(query.root(), "", &fields); + /* for (unordered_set<string>::const_iterator it = fields.begin(); it != fields.end(); ++it) { cout << "field " << *it << endl; @@ -1104,19 +1130,20 @@ namespace mongo { } } + size_t hintIndexNumber = numeric_limits<size_t>::max(); + if (!hintIndex.isEmpty()) { - bool hintValid = false; for (size_t i = 0; i < indices.size(); ++i) { if (0 == indices[i].keyPattern.woCompare(hintIndex)) { relevantIndices.clear(); relevantIndices.push_back(indices[i]); cout << "hint specified, restricting indices to " << hintIndex.toString() << endl; - hintValid = true; + hintIndexNumber = i; break; } } - if (!hintValid) { + if (hintIndexNumber == numeric_limits<size_t>::max()) { warning() << "Hinted index " << hintIndex.toString() << " does not exist, ignoring."; } @@ -1140,11 +1167,9 @@ namespace mongo { // If we have any relevant indices, we try to create indexed plans. if (0 < relevantIndices.size()) { - /* for (size_t i = 0; i < relevantIndices.size(); ++i) { cout << "relevant idx " << i << " is " << relevantIndices[i].toString() << endl; } - */ // The enumerator spits out trees tagged with IndexTag(s). PlanEnumerator isp(query.root(), &relevantIndices); @@ -1173,9 +1198,12 @@ namespace mongo { // scan the entire index to provide results and output that as our plan. This is the // desired behavior when an index is hinted that is not relevant to the query. if (!hintIndex.isEmpty() && (0 == out->size())) { + QuerySolutionNode* solnRoot = NULL; + // Build an ixscan over the id index, use it, and return it. IndexScanNode* isn = new IndexScanNode(); isn->indexKeyPattern = hintIndex; + isn->indexIsMultiKey = indices[hintIndexNumber].multikey; isn->bounds.fields.resize(hintIndex.nFields()); // TODO: can we use simple bounds with this compound idx? @@ -1186,13 +1214,22 @@ namespace mongo { ++field; } - // TODO: We may not need to do the fetch if the predicates in root are covered. But - // for now it's safe (though *maybe* slower). - FetchNode* fetch = new FetchNode(); - fetch->filter.reset(query.root()->shallowClone()); - fetch->child.reset(isn); + MatchExpression* filter = query.root()->shallowClone(); + // If it's find({}) remove the no-op root. + if (MatchExpression::AND == filter->matchType() && (0 == filter->numChildren())) { + delete filter; + solnRoot = isn; + } + else { + // TODO: We may not need to do the fetch if the predicates in root are covered. But + // for now it's safe (though *maybe* slower). + FetchNode* fetch = new FetchNode(); + fetch->filter.reset(filter); + fetch->child.reset(isn); + solnRoot = fetch; + } - QuerySolution* soln = analyzeDataAccess(query, fetch); + QuerySolution* soln = analyzeDataAccess(query, solnRoot); verify(NULL != soln); out->push_back(soln); @@ -1223,6 +1260,7 @@ namespace mongo { if (0 == kp.woCompare(query.getParsed().getSort())) { IndexScanNode* isn = new IndexScanNode(); isn->indexKeyPattern = kp; + isn->indexIsMultiKey = indices[i].multikey; isn->bounds.fields.resize(kp.nFields()); // TODO: can we use simple bounds if compound? diff --git a/src/mongo/db/query/query_planner.h b/src/mongo/db/query/query_planner.h index 573b5c64767..3bb7409c511 100644 --- a/src/mongo/db/query/query_planner.h +++ b/src/mongo/db/query/query_planner.h @@ -189,15 +189,16 @@ namespace mongo { * * If the node is a geo node, XXX. */ - static QuerySolutionNode* makeLeafNode(const BSONObj& indexKeyPattern, + static QuerySolutionNode* makeLeafNode(const IndexEntry& index, MatchExpression* expr, bool* exact); /** * Merge the predicate 'expr' with the leaf node 'node'. */ - static void mergeWithLeafNode(MatchExpression* expr, const BSONObj& keyPattern, - size_t pos, bool* exactOut, QuerySolutionNode* node); + static void mergeWithLeafNode(MatchExpression* expr, const IndexEntry& index, + size_t pos, bool* exactOut, QuerySolutionNode* node, + MatchExpression::MatchType mergeType); /** * If index scan, fill in any bounds that are missing in 'node' with the "all values for @@ -205,7 +206,7 @@ namespace mongo { * * If geo, XXX. */ - static void finishLeafNode(QuerySolutionNode* node, const BSONObj& indexKeyPattern); + static void finishLeafNode(QuerySolutionNode* node, const IndexEntry& index); // // Analysis of Data Access diff --git a/src/mongo/db/query/query_planner_test.cpp b/src/mongo/db/query/query_planner_test.cpp index 30e21c7dd92..e053c99c259 100644 --- a/src/mongo/db/query/query_planner_test.cpp +++ b/src/mongo/db/query/query_planner_test.cpp @@ -622,6 +622,7 @@ namespace { } TEST_F(SingleIndexTest, Basic2DSphereGeoNearReverseCompound) { + setIndex(BSON("x" << 1)); setIndex(BSON("x" << 1 << "a" << "2dsphere")); runQuery(fromjson("{x:1, a: {$nearSphere: [0,0], $maxDistance: 0.31 }}")); dumpSolutions(); diff --git a/src/mongo/db/query/query_solution.cpp b/src/mongo/db/query/query_solution.cpp index 446f87a5a67..fd9e3332750 100644 --- a/src/mongo/db/query/query_solution.cpp +++ b/src/mongo/db/query/query_solution.cpp @@ -309,7 +309,8 @@ namespace mongo { // IndexScanNode // - IndexScanNode::IndexScanNode() : filter(NULL), limit(0), direction(1) { } + IndexScanNode::IndexScanNode() + : indexIsMultiKey(false), filter(NULL), limit(0), direction(1) { } void IndexScanNode::appendToString(stringstream* ss, int indent) const { addIndent(ss, indent); @@ -333,6 +334,10 @@ namespace mongo { } bool IndexScanNode::hasField(const string& field) const { + // There is no covering in a multikey index because you don't know whether or not the field + // in the key was extracted from an array in the original document. + if (indexIsMultiKey) { return false; } + BSONObjIterator it(indexKeyPattern); while (it.more()) { if (field == it.next().fieldName()) { diff --git a/src/mongo/db/query/query_solution.h b/src/mongo/db/query/query_solution.h index 948ac02e658..c8a0e933247 100644 --- a/src/mongo/db/query/query_solution.h +++ b/src/mongo/db/query/query_solution.h @@ -49,6 +49,12 @@ namespace mongo { */ virtual StageType getType() const = 0; + string toString() const { + stringstream ss; + appendToString(&ss, 0); + return ss.str(); + } + /** * Internal function called by toString() * @@ -75,9 +81,6 @@ namespace mongo { * Usage: If an index-only plan has all the fields we're interested in, we don't * have to fetch to show results with those fields. * - * XXX TODO: Cover issues resulting from covered and multikey. Multikey prohibits - * covering, but allows sort (double check!) - * * TODO: 'field' is probably more appropriate as a FieldRef or string. */ virtual bool hasField(const string& field) const = 0; @@ -297,6 +300,8 @@ namespace mongo { BSONObj indexKeyPattern; + bool indexIsMultiKey; + scoped_ptr<MatchExpression> filter; // Only set for 2d. |