diff options
author | David Hatch <david.hatch@mongodb.com> | 2016-06-09 10:11:23 -0400 |
---|---|---|
committer | David Hatch <david.hatch@mongodb.com> | 2016-06-17 18:09:03 -0400 |
commit | 0f6df2c8668432e46738f554d4c0c061d2d0ded2 (patch) | |
tree | a0b8b60c0e7eec6ee39318ced2e748e3d95280e6 /src/mongo/db/query/query_solution.cpp | |
parent | 907182f4672ab2dcea1e16da5366518a4e44fa8d (diff) | |
download | mongo-0f6df2c8668432e46738f554d4c0c061d2d0ded2.tar.gz |
SERVER-24279 Properly add sort when index does not respect query collation.
Diffstat (limited to 'src/mongo/db/query/query_solution.cpp')
-rw-r--r-- | src/mongo/db/query/query_solution.cpp | 255 |
1 files changed, 195 insertions, 60 deletions
diff --git a/src/mongo/db/query/query_solution.cpp b/src/mongo/db/query/query_solution.cpp index 1b53fd1daf6..05883f9cea6 100644 --- a/src/mongo/db/query/query_solution.cpp +++ b/src/mongo/db/query/query_solution.cpp @@ -26,16 +26,133 @@ * it in the license file. */ +#include <vector> + #include "mongo/db/query/query_solution.h" +#include "mongo/bson/bsontypes.h" #include "mongo/db/index_names.h" #include "mongo/db/matcher/expression_geo.h" +#include "mongo/db/query/index_bounds_builder.h" #include "mongo/db/query/planner_analysis.h" #include "mongo/db/query/query_planner_common.h" namespace mongo { -using std::set; +namespace { + +// This function returns true if the BSONElement el can contain a string. That is, el.type() is in +// String, Array, or Object. +inline bool elementCanContainString(const BSONElement& el) { + BSONType type = el.type(); + return type == BSONType::String || type == BSONType::Array || type == BSONType::Object; +} + +// Create an ordred interval list which represents the bounds for all BSON elements of type String, +// Object, or Array. +OrderedIntervalList buildStringBoundsOil(const std::string& keyName) { + OrderedIntervalList ret; + ret.name = keyName; + + BSONObjBuilder strBob; + strBob.appendMinForType("", BSONType::String); + strBob.appendMaxForType("", BSONType::String); + ret.intervals.push_back(IndexBoundsBuilder::makeRangeInterval(strBob.obj(), true, false)); + + BSONObjBuilder objBob; + objBob.appendMinForType("", BSONType::Object); + objBob.appendMaxForType("", BSONType::Object); + ret.intervals.push_back(IndexBoundsBuilder::makeRangeInterval(objBob.obj(), true, false)); + + BSONObjBuilder arrBob; + arrBob.appendMinForType("", BSONType::Array); + arrBob.appendMaxForType("", BSONType::Array); + ret.intervals.push_back(IndexBoundsBuilder::makeRangeInterval(arrBob.obj(), true, false)); + + return ret; +} + +bool rangeCanContainString(const BSONElement& startKey, + const BSONElement& endKey, + bool endKeyInclusive) { + OrderedIntervalList stringBoundsOil = buildStringBoundsOil(""); + OrderedIntervalList rangeOil; + BSONObjBuilder bob; + bob.appendAs(startKey, ""); + bob.appendAs(endKey, ""); + rangeOil.intervals.push_back( + IndexBoundsBuilder::makeRangeInterval(bob.obj(), true, endKeyInclusive)); + + IndexBoundsBuilder::intersectize(rangeOil, &stringBoundsOil); + return !stringBoundsOil.intervals.empty(); +} + +// Helper function for IndexScanNode::computeProperties for adding additional sort orders made +// possible by point bounds on some fields of the sort pattern. +void addEqualityFieldSorts(const BSONObj& sortPattern, + const std::set<string>& equalityFields, + BSONObjSet* sortsOut) { + invariant(sortsOut); + // TODO: Each field in equalityFields could be dropped from the sort order since it is + // a point interval. The full set of sort orders is as follows: + // For each sort in sortsOut: + // For each drop in powerset(equalityFields): + // Remove fields in 'drop' from 'sort' and add resulting sort to output. + // + // Since this involves a powerset, we don't generate the full set of possibilities. + // Instead, we generate sort orders by removing possible contiguous prefixes of equality + // predicates. For example, if the key pattern is {a: 1, b: 1, c: 1, d: 1, e: 1} + // and and there are equality predicates on 'a', 'b', and 'c', then here we add the sort + // orders {b: 1, c: 1, d: 1, e: 1} and {c: 1, d: 1, e: 1}. (We also end up adding + // {d: 1, e: 1} and {d: 1}, but this is done later on.) + BSONObjIterator it(sortPattern); + BSONObjBuilder suffixBob; + while (it.more()) { + BSONElement elt = it.next(); + // TODO: string slowness. fix when bounds are stringdata not string. + if (equalityFields.end() == equalityFields.find(string(elt.fieldName()))) { + suffixBob.append(elt); + // This field isn't a point interval, can't drop. + break; + } + + // We add the sort obtained by dropping 'elt' and all preceding elements from the index + // key pattern. + BSONObjIterator droppedPrefixIt = it; + if (!droppedPrefixIt.more()) { + // Do not insert an empty sort order. + break; + } + BSONObjBuilder droppedPrefixBob; + while (droppedPrefixIt.more()) { + droppedPrefixBob.append(droppedPrefixIt.next()); + } + sortsOut->insert(droppedPrefixBob.obj()); + } + + while (it.more()) { + suffixBob.append(it.next()); + } + + // We've found the suffix following the contiguous prefix of equality fields. + // Ex. For index {a: 1, b: 1, c: 1, d: 1} and query {a: 3, b: 5}, this suffix + // of the key pattern is {c: 1, d: 1}. + // + // Now we have to add all prefixes of this suffix as possible sort orders. + // Ex. Continuing the example from above, we have to include sort orders + // {c: 1} and {c: 1, d: 1}. + BSONObj filterPointsObj = suffixBob.obj(); + for (int i = 0; i < filterPointsObj.nFields(); ++i) { + // Make obj out of fields [0,i] + BSONObjIterator it(filterPointsObj); + BSONObjBuilder prefixBob; + for (int j = 0; j <= i; ++j) { + prefixBob.append(it.next()); + } + sortsOut->insert(prefixBob.obj()); + } +} +} string QuerySolutionNode::toString() const { mongoutils::str::stream ss; @@ -393,7 +510,12 @@ QuerySolutionNode* FetchNode::clone() const { // IndexScanNode::IndexScanNode() - : indexIsMultiKey(false), direction(1), maxScan(0), addKeyMetadata(false) {} + : indexIsMultiKey(false), + direction(1), + maxScan(0), + addKeyMetadata(false), + indexCollator(nullptr), + queryCollator(nullptr) {} void IndexScanNode::appendToString(mongoutils::str::stream* ss, int indent) const { addIndent(ss, indent); @@ -458,6 +580,56 @@ bool IndexScanNode::sortedByDiskLoc() const { return true; } +// static +std::set<StringData> IndexScanNode::getFieldsWithStringBounds(const IndexBounds& bounds, + const BSONObj& indexKeyPattern) { + BSONObjIterator keyPatternIterator = indexKeyPattern.begin(); + + if (bounds.isSimpleRange) { + // With a simple range, the only cases we can say for sure do not contain strings + // are those with point bounds. + BSONObjIterator startKeyIterator = bounds.startKey.begin(); + BSONObjIterator endKeyIterator = bounds.endKey.begin(); + while (keyPatternIterator.more() && startKeyIterator.more() && endKeyIterator.more()) { + BSONElement startKey = startKeyIterator.next(); + BSONElement endKey = endKeyIterator.next(); + if (startKey != endKey || elementCanContainString(startKey)) { + if (!rangeCanContainString( + startKey, endKey, (startKeyIterator.more() || bounds.endKeyInclusive))) { + // If the first non-point range cannot contain strings, we don't need to + // add it to the return set. + keyPatternIterator.next(); + } + + // Any remaining keys could have strings. + std::set<StringData> ret; + while (keyPatternIterator.more()) { + ret.insert(keyPatternIterator.next().fieldNameStringData()); + } + return ret; + } + + keyPatternIterator.next(); + } + + return {}; + } + + std::set<StringData> ret; + invariant(bounds.fields.size() == static_cast<size_t>(indexKeyPattern.nFields())); + for (const auto& oil : bounds.fields) { + invariant(keyPatternIterator.more()); + BSONElement el = keyPatternIterator.next(); + OrderedIntervalList intersection = buildStringBoundsOil(el.fieldName()); + IndexBoundsBuilder::intersectize(oil, &intersection); + if (!intersection.intervals.empty()) { + ret.insert(el.fieldNameStringData()); + } + } + + return ret; +} + void IndexScanNode::computeProperties() { _sorts.clear(); @@ -487,7 +659,7 @@ void IndexScanNode::computeProperties() { // See if there are any fields with equalities for bounds. We can drop these // from any sort orders created. - set<string> equalityFields; + std::set<string> equalityFields; if (!bounds.isSimpleRange) { // Figure out how many fields are point intervals. for (size_t i = 0; i < bounds.fields.size(); ++i) { @@ -513,67 +685,28 @@ void IndexScanNode::computeProperties() { } } - if (equalityFields.empty()) { - return; + if (!equalityFields.empty()) { + addEqualityFieldSorts(sortPattern, equalityFields, &_sorts); } - // TODO: Each field in equalityFields could be dropped from the sort order since it is - // a point interval. The full set of sort orders is as follows: - // For each sort in _sorts: - // For each drop in powerset(equalityFields): - // Remove fields in 'drop' from 'sort' and add resulting sort to output. - // - // Since this involves a powerset, we don't generate the full set of possibilities. - // Instead, we generate sort orders by removing possible contiguous prefixes of equality - // predicates. For example, if the key pattern is {a: 1, b: 1, c: 1, d: 1, e: 1} - // and and there are equality predicates on 'a', 'b', and 'c', then here we add the sort - // orders {b: 1, c: 1, d: 1, e: 1} and {c: 1, d: 1, e: 1}. (We also end up adding - // {d: 1, e: 1} and {d: 1}, but this is done later on.) - BSONObjIterator it(sortPattern); - BSONObjBuilder suffixBob; - while (it.more()) { - BSONElement elt = it.next(); - // TODO: string slowness. fix when bounds are stringdata not string. - if (equalityFields.end() == equalityFields.find(string(elt.fieldName()))) { - suffixBob.append(elt); - // This field isn't a point interval, can't drop. - break; - } - - // We add the sort obtained by dropping 'elt' and all preceding elements from the index - // key pattern. - BSONObjIterator droppedPrefixIt = it; - if (!droppedPrefixIt.more()) { - // Do not insert an empty sort order. - break; - } - BSONObjBuilder droppedPrefixBob; - while (droppedPrefixIt.more()) { - droppedPrefixBob.append(droppedPrefixIt.next()); - } - _sorts.insert(droppedPrefixBob.obj()); - } - - while (it.more()) { - suffixBob.append(it.next()); - } + if (!CollatorInterface::collatorsMatch(queryCollator, indexCollator)) { + // Prune sorts containing fields that don't match the collation. + std::set<StringData> collatedFields = getFieldsWithStringBounds(bounds, indexKeyPattern); + auto sortsIt = _sorts.begin(); + while (sortsIt != _sorts.end()) { + bool matched = false; + for (const BSONElement& el : *sortsIt) { + if (collatedFields.find(el.fieldNameStringData()) != collatedFields.end()) { + sortsIt = _sorts.erase(sortsIt); + matched = true; + break; + } + } - // We've found the suffix following the contiguous prefix of equality fields. - // Ex. For index {a: 1, b: 1, c: 1, d: 1} and query {a: 3, b: 5}, this suffix - // of the key pattern is {c: 1, d: 1}. - // - // Now we have to add all prefixes of this suffix as possible sort orders. - // Ex. Continuing the example from above, we have to include sort orders - // {c: 1} and {c: 1, d: 1}. - BSONObj filterPointsObj = suffixBob.obj(); - for (int i = 0; i < filterPointsObj.nFields(); ++i) { - // Make obj out of fields [0,i] - BSONObjIterator it(filterPointsObj); - BSONObjBuilder prefixBob; - for (int j = 0; j <= i; ++j) { - prefixBob.append(it.next()); + if (!matched) { + sortsIt++; + } } - _sorts.insert(prefixBob.obj()); } } @@ -588,6 +721,8 @@ QuerySolutionNode* IndexScanNode::clone() const { copy->maxScan = this->maxScan; copy->addKeyMetadata = this->addKeyMetadata; copy->bounds = this->bounds; + copy->indexCollator = this->indexCollator; + copy->queryCollator = this->queryCollator; return copy; } |