diff options
author | David Storch <david.storch@10gen.com> | 2017-06-22 17:49:08 -0400 |
---|---|---|
committer | David Storch <david.storch@10gen.com> | 2017-07-21 14:29:43 -0400 |
commit | 0b27b9e6c1196c53ec6bf68793ad7b57cb33cd30 (patch) | |
tree | 5e794f47b1060ab3f073f63feba96f786758cab9 /src/mongo | |
parent | 55753837a0445a4ec1d04df18ce821a86d4240d5 (diff) | |
download | mongo-0b27b9e6c1196c53ec6bf68793ad7b57cb33cd30.tar.gz |
SERVER-19402 Change agg array sort semantics to match find.
Diffstat (limited to 'src/mongo')
20 files changed, 1043 insertions, 392 deletions
diff --git a/src/mongo/db/exec/sort_key_generator.cpp b/src/mongo/db/exec/sort_key_generator.cpp index 8f145dffaf0..9e1ff5c2abc 100644 --- a/src/mongo/db/exec/sort_key_generator.cpp +++ b/src/mongo/db/exec/sort_key_generator.cpp @@ -47,153 +47,6 @@ namespace mongo { -// -// SortKeyGenerator -// - -SortKeyGenerator::SortKeyGenerator(OperationContext* opCtx, - const BSONObj& sortSpec, - const CollatorInterface* collator) - : _collator(collator), _rawSortSpec(sortSpec) { - // 'sortSpec' can be a mix of $meta and index key expressions. We pick it apart so that - // we only generate Btree keys for the index key expressions. - - // The Btree key fields go in here. We pass this fake index key pattern to the Btree - // key generator below as part of generating sort keys for the docs. - BSONObjBuilder btreeBob; - - for (auto&& elt : sortSpec) { - if (elt.isNumber()) { - btreeBob.append(elt); - } else { - // If this field of the sort pattern is non-numeric, we expect it to be a text-score - // meta sort. This is validated upstream. - invariant(QueryRequest::isTextScoreMeta(elt)); - _sortHasMeta = true; - } - } - - // The fake index key pattern used to generate Btree keys. - _btreeObj = btreeBob.obj(); - - // If we're just sorting by meta, don't bother with all the key stuff. - if (_btreeObj.isEmpty()) { - return; - } - - // We'll need to treat arrays as if we were to create an index over them. that is, - // we may need to unnest the first level and consider each array element to decide - // the sort order. - std::vector<const char*> fieldNames; - std::vector<BSONElement> fixed; - for (auto&& patternElt : _btreeObj) { - fieldNames.push_back(patternElt.fieldName()); - fixed.push_back(BSONElement()); - } - - constexpr bool isSparse = false; - _keyGen = stdx::make_unique<BtreeKeyGeneratorV1>(fieldNames, fixed, isSparse, _collator); -} - -Status SortKeyGenerator::getSortKey(const WorkingSetMember& member, BSONObj* objOut) const { - StatusWith<BSONObj> sortKey = BSONObj(); - - if (member.hasObj()) { - sortKey = getSortKeyFromObject(member); - } else { - sortKey = getSortKeyFromIndexKey(member); - } - if (!sortKey.isOK()) { - return sortKey.getStatus(); - } - - if (!_sortHasMeta) { - *objOut = sortKey.getValue(); - return Status::OK(); - } - - BSONObjBuilder mergedKeyBob; - - // Merge metadata into the key. - BSONObjIterator it(_rawSortSpec); - BSONObjIterator sortKeyIt(sortKey.getValue()); - while (it.more()) { - BSONElement elt = it.next(); - if (elt.isNumber()) { - // Merge btree key elt. - mergedKeyBob.append(sortKeyIt.next()); - } else if (QueryRequest::isTextScoreMeta(elt)) { - // Add text score metadata - double score = 0.0; - if (member.hasComputed(WSM_COMPUTED_TEXT_SCORE)) { - const TextScoreComputedData* scoreData = static_cast<const TextScoreComputedData*>( - member.getComputed(WSM_COMPUTED_TEXT_SCORE)); - score = scoreData->getScore(); - } - mergedKeyBob.append("$metaTextScore", score); - } - } - - *objOut = mergedKeyBob.obj(); - return Status::OK(); -} - -StatusWith<BSONObj> SortKeyGenerator::getSortKeyFromIndexKey(const WorkingSetMember& member) const { - invariant(member.getState() == WorkingSetMember::RID_AND_IDX); - invariant(!_sortHasMeta); - - BSONObjBuilder sortKeyObj; - for (BSONElement specElt : _rawSortSpec) { - invariant(specElt.isNumber()); - BSONElement sortKeyElt; - invariant(member.getFieldDotted(specElt.fieldName(), &sortKeyElt)); - sortKeyObj.appendAs(sortKeyElt, ""); - } - - return sortKeyObj.obj(); -} - -StatusWith<BSONObj> SortKeyGenerator::getSortKeyFromObject(const WorkingSetMember& member) const { - // Not sorting by anything in the key, just bail out early. - if (_btreeObj.isEmpty()) { - return BSONObj(); - } - - // We will sort '_data' in the same order an index over '_pattern' would have. This is - // tricky. Consider the sort pattern {a:1} and the document {a:[1, 10]}. We have - // potentially two keys we could use to sort on. Here we extract these keys. - const StringData::ComparatorInterface* stringComparator = nullptr; - BSONObjComparator patternCmp( - _btreeObj, BSONObjComparator::FieldNamesMode::kConsider, stringComparator); - BSONObjSet keys = patternCmp.makeBSONObjSet(); - - try { - // There's no need to compute the prefixes of the indexed fields that cause the index to be - // multikey when getting the index keys for sorting. - MultikeyPaths* multikeyPaths = nullptr; - _keyGen->getKeys(member.obj.value(), &keys, multikeyPaths); - } catch (const UserException& e) { - // Probably a parallel array. - if (ErrorCodes::CannotIndexParallelArrays == e.getCode()) { - return Status(ErrorCodes::BadValue, "cannot sort with keys that are parallel arrays"); - } else { - return e.toStatus(); - } - } catch (...) { - return Status(ErrorCodes::InternalError, "unknown error during sort key generation"); - } - - // Key generator isn't sparse so we should at least get an all-null key. - invariant(!keys.empty()); - - // The sort key is the first index key, ordered according to the pattern '_btreeObj'. - return *keys.begin(); -} - -// -// SortKeyGeneratorStage -// - const char* SortKeyGeneratorStage::kStageType = "SORT_KEY_GENERATOR"; SortKeyGeneratorStage::SortKeyGeneratorStage(OperationContext* opCtx, @@ -211,7 +64,7 @@ bool SortKeyGeneratorStage::isEOF() { PlanStage::StageState SortKeyGeneratorStage::doWork(WorkingSetID* out) { if (!_sortKeyGen) { - _sortKeyGen = stdx::make_unique<SortKeyGenerator>(getOpCtx(), _sortSpec, _collator); + _sortKeyGen = stdx::make_unique<SortKeyGenerator>(_sortSpec, _collator); return PlanStage::NEED_TIME; } @@ -219,15 +72,26 @@ PlanStage::StageState SortKeyGeneratorStage::doWork(WorkingSetID* out) { if (stageState == PlanStage::ADVANCED) { WorkingSetMember* member = _ws->get(*out); - BSONObj sortKey; - Status sortKeyStatus = _sortKeyGen->getSortKey(*member, &sortKey); - if (!sortKeyStatus.isOK()) { - *out = WorkingSetCommon::allocateStatusMember(_ws, sortKeyStatus); + StatusWith<BSONObj> sortKey = BSONObj(); + if (member->hasObj()) { + SortKeyGenerator::Metadata metadata; + if (_sortKeyGen->sortHasMeta() && member->hasComputed(WSM_COMPUTED_TEXT_SCORE)) { + auto scoreData = static_cast<const TextScoreComputedData*>( + member->getComputed(WSM_COMPUTED_TEXT_SCORE)); + metadata.textScore = scoreData->getScore(); + } + sortKey = _sortKeyGen->getSortKey(member->obj.value(), &metadata); + } else { + sortKey = getSortKeyFromIndexKey(*member); + } + + if (!sortKey.isOK()) { + *out = WorkingSetCommon::allocateStatusMember(_ws, sortKey.getStatus()); return PlanStage::FAILURE; } // Add the sort key to the WSM as computed data. - member->addComputed(new SortKeyComputedData(sortKey)); + member->addComputed(new SortKeyComputedData(sortKey.getValue())); return PlanStage::ADVANCED; } @@ -250,4 +114,20 @@ const SpecificStats* SortKeyGeneratorStage::getSpecificStats() const { return nullptr; } +StatusWith<BSONObj> SortKeyGeneratorStage::getSortKeyFromIndexKey( + const WorkingSetMember& member) const { + invariant(member.getState() == WorkingSetMember::RID_AND_IDX); + invariant(!_sortKeyGen->sortHasMeta()); + + BSONObjBuilder sortKeyObj; + for (BSONElement specElt : _sortSpec) { + invariant(specElt.isNumber()); + BSONElement sortKeyElt; + invariant(member.getFieldDotted(specElt.fieldName(), &sortKeyElt)); + sortKeyObj.appendAs(sortKeyElt, ""); + } + + return sortKeyObj.obj(); +} + } // namespace mongo diff --git a/src/mongo/db/exec/sort_key_generator.h b/src/mongo/db/exec/sort_key_generator.h index 986ad1b7633..79814418388 100644 --- a/src/mongo/db/exec/sort_key_generator.h +++ b/src/mongo/db/exec/sort_key_generator.h @@ -32,7 +32,7 @@ #include "mongo/bson/bsonobj.h" #include "mongo/db/exec/plan_stage.h" -#include "mongo/db/index/btree_key_generator.h" +#include "mongo/db/index/sort_key_generator.h" #include "mongo/db/query/index_bounds.h" #include "mongo/db/query/stage_types.h" @@ -43,49 +43,6 @@ class Collection; class WorkingSetMember; /** - * Maps a WSM value to a BSONObj key that can then be sorted via BSONObjCmp. - */ -class SortKeyGenerator { -public: - /** - * 'sortSpec' is the BSONObj in the .sort(...) clause. - * - * 'opCtx' must point to a valid OperationContext, but 'opCtx' does not need to outlive the - * constructed SortKeyGenerator. - */ - SortKeyGenerator(OperationContext* opCtx, - const BSONObj& sortSpec, - const CollatorInterface* collator); - - /** - * Returns the key used to sort 'member'. If the member is in LOC_AND_IDX state, it must not - * contain a $meta textScore in its sort spec, and this function will use the index key data - * stored in 'member' to extract the sort key. Otherwise, if the member is in LOC_AND_OBJ or - * OWNED_OBJ state, this function will use the object data stored in 'member' to extract the - * sort key. - */ - Status getSortKey(const WorkingSetMember& member, BSONObj* objOut) const; - -private: - StatusWith<BSONObj> getSortKeyFromIndexKey(const WorkingSetMember& member) const; - StatusWith<BSONObj> getSortKeyFromObject(const WorkingSetMember& member) const; - - const CollatorInterface* _collator = nullptr; - - // The raw object in .sort() - BSONObj _rawSortSpec; - - // The sort pattern with any non-Btree sort pulled out. - BSONObj _btreeObj; - - // If we're not sorting with a $meta value we can short-cut some work. - bool _sortHasMeta = false; - - // Helper to extract sorting keys from documents. - std::unique_ptr<BtreeKeyGenerator> _keyGen; -}; - -/** * Passes results from the child through after adding the sort key for each result as * WorkingSetMember computed data. */ @@ -99,8 +56,6 @@ public: bool isEOF() final; - StageState doWork(WorkingSetID* out) final; - StageType stageType() const final { return STAGE_SORT_KEY_GENERATOR; } @@ -111,7 +66,12 @@ public: static const char* kStageType; +protected: + StageState doWork(WorkingSetID* out) final; + private: + StatusWith<BSONObj> getSortKeyFromIndexKey(const WorkingSetMember& member) const; + WorkingSet* const _ws; // The raw sort pattern as expressed by the user. diff --git a/src/mongo/db/index/SConscript b/src/mongo/db/index/SConscript index 63c69e1763d..b66905be7ba 100644 --- a/src/mongo/db/index/SConscript +++ b/src/mongo/db/index/SConscript @@ -19,10 +19,9 @@ env.Library( source=[ 'btree_key_generator.cpp', 'expression_keys_private.cpp', + 'sort_key_generator.cpp', ], LIBDEPS=[ - 'expression_params', - 'index_descriptor', '$BUILD_DIR/mongo/base', '$BUILD_DIR/mongo/db/bson/dotted_path_support', '$BUILD_DIR/mongo/db/fts/base', @@ -31,6 +30,8 @@ env.Library( '$BUILD_DIR/mongo/db/mongohasher', '$BUILD_DIR/mongo/db/query/collation/collator_interface', '$BUILD_DIR/third_party/s2/s2', + 'expression_params', + 'index_descriptor', ], ) @@ -70,6 +71,7 @@ env.CppUnitTest( 'btree_key_generator_test.cpp', 'hash_key_generator_test.cpp', 's2_key_generator_test.cpp', + 'sort_key_generator_test.cpp', ], LIBDEPS=[ 'key_generator', diff --git a/src/mongo/db/index/sort_key_generator.cpp b/src/mongo/db/index/sort_key_generator.cpp new file mode 100644 index 00000000000..33f40c6ecfb --- /dev/null +++ b/src/mongo/db/index/sort_key_generator.cpp @@ -0,0 +1,170 @@ +/** + * Copyright (C) 2017 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/index/sort_key_generator.h" + +#include "mongo/bson/bsonobj_comparator.h" + +namespace mongo { + +SortKeyGenerator::SortKeyGenerator(const BSONObj& sortSpec, const CollatorInterface* collator) + : _collator(collator) { + BSONObjBuilder btreeBob; + + for (auto&& elt : sortSpec) { + if (elt.isNumber()) { + btreeBob.append(elt); + _patternPartTypes.push_back(SortPatternPartType::kFieldPath); + } else { + // If this field of the sort pattern is non-numeric, we expect it to be a text-score + // meta sort. + invariant(elt.type() == BSONType::Object); + invariant(elt.embeddedObject().nFields() == 1); + auto metaElem = elt.embeddedObject().firstElement(); + invariant(metaElem.fieldNameStringData() == "$meta"_sd); + if (metaElem.valueStringData() == "textScore"_sd) { + _patternPartTypes.push_back(SortPatternPartType::kMetaTextScore); + } else { + invariant(metaElem.valueStringData() == "randVal"_sd); + _patternPartTypes.push_back(SortPatternPartType::kMetaRandVal); + } + _sortHasMeta = true; + } + } + + // The fake index key pattern used to generate Btree keys. + _sortSpecWithoutMeta = btreeBob.obj(); + + // If we're just sorting by meta, don't bother with all the key stuff. + if (_sortSpecWithoutMeta.isEmpty()) { + return; + } + + // We'll need to treat arrays as if we were to create an index over them. that is, we may need + // to unnest the first level and consider each array element to decide the sort order. In order + // to do this, we make a BtreeKeyGenerator. + std::vector<const char*> fieldNames; + std::vector<BSONElement> fixed; + for (auto&& patternElt : _sortSpecWithoutMeta) { + fieldNames.push_back(patternElt.fieldName()); + fixed.push_back(BSONElement()); + } + + constexpr bool isSparse = false; + _indexKeyGen = stdx::make_unique<BtreeKeyGeneratorV1>(fieldNames, fixed, isSparse, _collator); +} + +StatusWith<BSONObj> SortKeyGenerator::getSortKey(const BSONObj& obj, + const Metadata* metadata) const { + if (_sortHasMeta) { + invariant(metadata); + } + + auto indexKey = getIndexKey(obj); + if (!indexKey.isOK()) { + return indexKey; + } + + if (!_sortHasMeta) { + // We don't have to worry about $meta sort, so the index key becomes the sort key. + return indexKey; + } + + BSONObjBuilder mergedKeyBob; + + // Merge metadata into the key. + BSONObjIterator sortKeyIt(indexKey.getValue()); + for (auto type : _patternPartTypes) { + switch (type) { + case SortPatternPartType::kFieldPath: { + invariant(sortKeyIt.more()); + mergedKeyBob.append(sortKeyIt.next()); + continue; + } + case SortPatternPartType::kMetaTextScore: { + mergedKeyBob.append("", metadata->textScore); + continue; + } + case SortPatternPartType::kMetaRandVal: { + mergedKeyBob.append("", metadata->randVal); + continue; + } + default: { MONGO_UNREACHABLE; } + } + } + + // We should have added a key component for each part of the index key pattern. + invariant(!sortKeyIt.more()); + + return mergedKeyBob.obj(); +} + +StatusWith<BSONObj> SortKeyGenerator::getIndexKey(const BSONObj& obj) const { + // Not sorting by anything in the key, just bail out early. + if (_sortSpecWithoutMeta.isEmpty()) { + return BSONObj(); + } + + // We will sort 'obj' in the same order an index over '_sortSpecWithoutMeta' would have. This is + // tricky. Consider the sort pattern {a:1} and the document {a: [1, 10]}. We have potentially + // two keys we could use to sort on. Here we extract these keys. + // + // The keys themselves will incorporate the collation, with strings translated to their + // corresponding collation keys. Therefore, we use the simple string comparator when comparing + // the keys themselves. + const StringData::ComparatorInterface* stringComparator = nullptr; + BSONObjComparator patternCmp( + _sortSpecWithoutMeta, BSONObjComparator::FieldNamesMode::kConsider, stringComparator); + BSONObjSet keys = patternCmp.makeBSONObjSet(); + + try { + // There's no need to compute the prefixes of the indexed fields that cause the index to be + // multikey when getting the index keys for sorting. + MultikeyPaths* multikeyPaths = nullptr; + _indexKeyGen->getKeys(obj, &keys, multikeyPaths); + } catch (const UserException& e) { + // Probably a parallel array. + if (ErrorCodes::CannotIndexParallelArrays == e.getCode()) { + return Status(ErrorCodes::BadValue, "cannot sort with keys that are parallel arrays"); + } else { + return e.toStatus(); + } + } catch (...) { + return Status(ErrorCodes::InternalError, "unknown error during sort key generation"); + } + + // Key generator isn't sparse so we should at least get an all-null key. + invariant(!keys.empty()); + + // The sort key is the first index key, ordered according to the pattern '_sortSpecWithoutMeta'. + return *keys.begin(); +} + +} // namespace mongo diff --git a/src/mongo/db/index/sort_key_generator.h b/src/mongo/db/index/sort_key_generator.h new file mode 100644 index 00000000000..addd5c779b9 --- /dev/null +++ b/src/mongo/db/index/sort_key_generator.h @@ -0,0 +1,101 @@ +/** + * Copyright (C) 2017 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#pragma once + +#include "mongo/bson/bsonobj.h" +#include "mongo/db/index/btree_key_generator.h" +#include "mongo/db/operation_context.h" +#include "mongo/db/query/collation/collator_interface.h" + +namespace mongo { + +class SortKeyGenerator { +public: + /** + * Metadata about a document which is needed to produce keys for $meta sort. The client of the + * SortKeyGenerator must provide this metadata in order to correctly obtain sort keys for sort + * patterns with $meta. + */ + struct Metadata { + double textScore = 0.0; + double randVal = 0.0; + }; + + /** + * Constructs a sort key generator which will generate keys for sort pattern 'sortSpec'. The + * keys will incorporate the collation given by 'collator', and thus when actually compared to + * one another should use the simple collation. + */ + SortKeyGenerator(const BSONObj& sortSpec, const CollatorInterface* collator); + + /** + * Returns the key which should be used to sort 'obj', or a non-OK status if no key could be + * generated. + * + * The caller must supply the appropriate 'metadata' in the case that the sort pattern includes + * a $meta sort (i.e. if sortHasMeta() is true). These values are filled in at the corresponding + * positions in the sort key. + */ + StatusWith<BSONObj> getSortKey(const BSONObj& obj, const Metadata*) const; + + /** + * Returns true if the sort pattern for this sort key generator includes a $meta sort. + */ + bool sortHasMeta() const { + return _sortHasMeta; + } + +private: + // Describes whether a component of the sort pattern is a field path (e.g. sort by "a.b"), or + // else describes the type of $meta sort. + enum class SortPatternPartType { + kFieldPath, + kMetaTextScore, + kMetaRandVal, + }; + + StatusWith<BSONObj> getIndexKey(const BSONObj& obj) const; + + const CollatorInterface* _collator = nullptr; + + // The sort pattern with any $meta sort components stripped out, since the underlying index key + // generator does not understand $meta sort. + BSONObj _sortSpecWithoutMeta; + + // For each element of the raw sort spec, describes whether the element is sorting by a field + // path or by a particular meta-sort. + std::vector<SortPatternPartType> _patternPartTypes; + + // If we're not sorting with a $meta value we can short-cut some work. + bool _sortHasMeta = false; + + std::unique_ptr<BtreeKeyGenerator> _indexKeyGen; +}; + +} // namespace mongo diff --git a/src/mongo/db/index/sort_key_generator_test.cpp b/src/mongo/db/index/sort_key_generator_test.cpp new file mode 100644 index 00000000000..854bb912f6e --- /dev/null +++ b/src/mongo/db/index/sort_key_generator_test.cpp @@ -0,0 +1,222 @@ +/** + * Copyright (C) 2017 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/index/sort_key_generator.h" + +#include "mongo/db/query/collation/collator_interface_mock.h" +#include "mongo/stdx/memory.h" +#include "mongo/unittest/death_test.h" +#include "mongo/unittest/unittest.h" + +namespace mongo { +namespace { + +TEST(SortKeyGeneratorTest, ExtractNumberKeyForNonCompoundSortNonNested) { + auto sortKeyGen = stdx::make_unique<SortKeyGenerator>(BSON("a" << 1), nullptr); + auto sortKey = sortKeyGen->getSortKey(fromjson("{_id: 0, a: 5}"), nullptr); + ASSERT_OK(sortKey.getStatus()); + ASSERT_BSONOBJ_EQ(sortKey.getValue(), BSON("" << 5)); +} + +TEST(SortKeyGeneratorTest, ExtractNumberKeyFromDocWithSeveralFields) { + auto sortKeyGen = stdx::make_unique<SortKeyGenerator>(BSON("a" << 1), nullptr); + auto sortKey = sortKeyGen->getSortKey(fromjson("{_id: 0, z: 10, a: 6, b: 16}"), nullptr); + ASSERT_OK(sortKey.getStatus()); + ASSERT_BSONOBJ_EQ(sortKey.getValue(), BSON("" << 6)); +} + +TEST(SortKeyGeneratorTest, ExtractStringKeyNonCompoundNonNested) { + auto sortKeyGen = stdx::make_unique<SortKeyGenerator>(BSON("a" << 1), nullptr); + auto sortKey = + sortKeyGen->getSortKey(fromjson("{_id: 0, z: 'thing1', a: 'thing2', b: 16}"), nullptr); + ASSERT_OK(sortKey.getStatus()); + ASSERT_BSONOBJ_EQ(sortKey.getValue(), + BSON("" + << "thing2")); +} + +TEST(SortKeyGeneratorTest, CompoundSortPattern) { + auto sortKeyGen = stdx::make_unique<SortKeyGenerator>(BSON("a" << 1 << "b" << 1), nullptr); + auto sortKey = + sortKeyGen->getSortKey(fromjson("{_id: 0, z: 'thing1', a: 99, c: {a: 4}, b: 16}"), nullptr); + ASSERT_OK(sortKey.getStatus()); + ASSERT_BSONOBJ_EQ(sortKey.getValue(), BSON("" << 99 << "" << 16)); +} + +TEST(SortKeyGeneratorTest, CompoundSortPatternWithDottedPath) { + auto sortKeyGen = stdx::make_unique<SortKeyGenerator>(BSON("c.a" << 1 << "b" << 1), nullptr); + auto sortKey = + sortKeyGen->getSortKey(fromjson("{_id: 0, z: 'thing1', a: 99, c: {a: 4}, b: 16}"), nullptr); + ASSERT_OK(sortKey.getStatus()); + ASSERT_BSONOBJ_EQ(sortKey.getValue(), BSON("" << 4 << "" << 16)); +} + +TEST(SortKeyGeneratorTest, CompoundPatternLeadingFieldIsArray) { + auto sortKeyGen = stdx::make_unique<SortKeyGenerator>(BSON("c" << 1 << "b" << 1), nullptr); + auto sortKey = sortKeyGen->getSortKey( + fromjson("{_id: 0, z: 'thing1', a: 99, c: [2, 4, 1], b: 16}"), nullptr); + ASSERT_OK(sortKey.getStatus()); + ASSERT_BSONOBJ_EQ(sortKey.getValue(), BSON("" << 1 << "" << 16)); +} + +TEST(SortKeyGeneratorTest, ExtractStringSortKeyWithCollatorUsesComparisonKey) { + CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); + auto sortKeyGen = stdx::make_unique<SortKeyGenerator>(BSON("a" << 1), &collator); + auto sortKey = + sortKeyGen->getSortKey(fromjson("{_id: 0, z: 'thing1', a: 'thing2', b: 16}"), nullptr); + ASSERT_OK(sortKey.getStatus()); + ASSERT_BSONOBJ_EQ(sortKey.getValue(), + BSON("" + << "2gniht")); +} + +TEST(SortKeyGeneratorTest, CollatorHasNoEffectWhenExtractingNonStringSortKey) { + CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); + auto sortKeyGen = stdx::make_unique<SortKeyGenerator>(BSON("a" << 1), &collator); + auto sortKey = sortKeyGen->getSortKey(fromjson("{_id: 0, z: 10, a: 6, b: 16}"), nullptr); + ASSERT_OK(sortKey.getStatus()); + ASSERT_BSONOBJ_EQ(sortKey.getValue(), BSON("" << 6)); +} + +TEST(SortKeyGeneratorTest, SortKeyGenerationForArraysChoosesCorrectKey) { + auto sortKeyGen = stdx::make_unique<SortKeyGenerator>(BSON("a" << -1), nullptr); + auto sortKey = sortKeyGen->getSortKey(fromjson("{_id: 0, a: [1, 2, 3, 4]}"), nullptr); + ASSERT_OK(sortKey.getStatus()); + ASSERT_BSONOBJ_EQ(sortKey.getValue(), BSON("" << 4)); +} + +TEST(SortKeyGeneratorTest, EnsureSortKeyGenerationForArraysRespectsCollation) { + CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); + auto sortKeyGen = stdx::make_unique<SortKeyGenerator>(BSON("a" << 1), &collator); + auto sortKey = + sortKeyGen->getSortKey(fromjson("{_id: 0, a: ['aaz', 'zza', 'yya', 'zzb']}"), nullptr); + ASSERT_OK(sortKey.getStatus()); + ASSERT_BSONOBJ_EQ(sortKey.getValue(), + BSON("" + << "ayy")); +} + +TEST(SortKeyGeneratorTest, SortKeyGenerationForArraysRespectsCompoundOrdering) { + auto sortKeyGen = stdx::make_unique<SortKeyGenerator>(BSON("a.b" << 1 << "a.c" << -1), nullptr); + auto sortKey = sortKeyGen->getSortKey( + fromjson("{_id: 0, a: [{b: 1, c: 0}, {b: 0, c: 3}, {b: 0, c: 1}]}"), nullptr); + ASSERT_OK(sortKey.getStatus()); + ASSERT_BSONOBJ_EQ(sortKey.getValue(), BSON("" << 0 << "" << 3)); +} + +DEATH_TEST(SortKeyGeneratorTest, + SortPatternComponentWithStringIsFatal, + "Invariant failure elt.type() == BSONType::Object") { + stdx::make_unique<SortKeyGenerator>(BSON("a" + << "foo"), + nullptr); +} + +DEATH_TEST(SortKeyGeneratorTest, + SortPatternComponentWhoseObjectHasMultipleKeysIsFatal, + "Invariant failure elt.embeddedObject().nFields() == 1") { + stdx::make_unique<SortKeyGenerator>(BSON("a" << BSON("$meta" + << "textScore" + << "extra" + << 1)), + nullptr); +} + +DEATH_TEST(SortKeyGeneratorTest, + SortPatternComponentWithNonMetaObjectSortIsFatal, + "Invariant failure metaElem.fieldNameStringData() == \"$meta\"_sd") { + stdx::make_unique<SortKeyGenerator>(BSON("a" << BSON("$unknown" + << "textScore")), + nullptr); +} + +DEATH_TEST(SortKeyGeneratorTest, + SortPatternComponentWithUnknownMetaKeywordIsFatal, + "Invariant failure metaElem.valueStringData() == \"randVal\"_sd") { + stdx::make_unique<SortKeyGenerator>(BSON("a" << BSON("$meta" + << "unknown")), + nullptr); +} + +DEATH_TEST(SortKeyGeneratorTest, + NoMetadataWhenPatternHasMetaTextScoreIsFatal, + "Invariant failure metadata") { + auto sortKeyGen = stdx::make_unique<SortKeyGenerator>(BSON("a" << BSON("$meta" + << "textScore")), + nullptr); + uassertStatusOK(sortKeyGen->getSortKey(BSONObj{}, nullptr).getStatus()); +} + +DEATH_TEST(SortKeyGeneratorTest, + NoMetadataWhenPatternHasMetaRandValIsFatal, + "Invariant failure metadata") { + auto sortKeyGen = stdx::make_unique<SortKeyGenerator>(BSON("a" << BSON("$meta" + << "randVal")), + nullptr); + uassertStatusOK(sortKeyGen->getSortKey(BSONObj{}, nullptr).getStatus()); +} + +TEST(SortKeyGeneratorTest, CanGenerateKeysForTextScoreMetaSort) { + auto sortKeyGen = stdx::make_unique<SortKeyGenerator>(BSON("a" << BSON("$meta" + << "textScore")), + nullptr); + SortKeyGenerator::Metadata metadata; + metadata.textScore = 1.5; + auto sortKey = sortKeyGen->getSortKey(BSONObj{}, &metadata); + ASSERT_OK(sortKey.getStatus()); + ASSERT_BSONOBJ_EQ(sortKey.getValue(), BSON("" << 1.5)); +} + +TEST(SortKeyGeneratorTest, CanGenerateKeysForRandValMetaSort) { + auto sortKeyGen = stdx::make_unique<SortKeyGenerator>(BSON("a" << BSON("$meta" + << "randVal")), + nullptr); + SortKeyGenerator::Metadata metadata; + metadata.randVal = 0.3; + auto sortKey = sortKeyGen->getSortKey(BSONObj{}, &metadata); + ASSERT_OK(sortKey.getStatus()); + ASSERT_BSONOBJ_EQ(sortKey.getValue(), BSON("" << 0.3)); +} + +TEST(SortKeyGeneratorTest, CanGenerateKeysForCompoundMetaSort) { + BSONObj pattern = fromjson( + "{a: 1, b: {$meta: 'randVal'}, c: {$meta: 'textScore'}, d: -1, e: {$meta: 'textScore'}}"); + auto sortKeyGen = stdx::make_unique<SortKeyGenerator>(pattern, nullptr); + SortKeyGenerator::Metadata metadata; + metadata.randVal = 0.3; + metadata.textScore = 1.5; + auto sortKey = sortKeyGen->getSortKey(BSON("a" << 4 << "d" << 5), &metadata); + ASSERT_OK(sortKey.getStatus()); + ASSERT_BSONOBJ_EQ(sortKey.getValue(), + BSON("" << 4 << "" << 0.3 << "" << 1.5 << "" << 5 << "" << 1.5)); +} + +} // namespace +} // namespace mongo diff --git a/src/mongo/db/pipeline/SConscript b/src/mongo/db/pipeline/SConscript index 20ca397173f..85c78e5a0c9 100644 --- a/src/mongo/db/pipeline/SConscript +++ b/src/mongo/db/pipeline/SConscript @@ -238,6 +238,7 @@ docSourceEnv.Library( 'document_source_geo_near.cpp', 'document_source_group.cpp', 'document_source_index_stats.cpp', + 'document_source_internal_inhibit_optimization.cpp', 'document_source_limit.cpp', 'document_source_match.cpp', 'document_source_merge_cursors.cpp', @@ -256,22 +257,23 @@ docSourceEnv.Library( 'lite_parsed_document_source.cpp', ], LIBDEPS=[ - 'accumulator', - 'granularity_rounder', - 'dependencies', - 'document_value', - 'expression', - 'parsed_aggregation_projection', '$BUILD_DIR/mongo/client/clientdriver', '$BUILD_DIR/mongo/db/bson/dotted_path_support', - '$BUILD_DIR/mongo/db/matcher/expressions', + '$BUILD_DIR/mongo/db/index/key_generator', '$BUILD_DIR/mongo/db/matcher/expression_algo', + '$BUILD_DIR/mongo/db/matcher/expressions', '$BUILD_DIR/mongo/db/service_context', '$BUILD_DIR/mongo/db/stats/top', '$BUILD_DIR/mongo/db/storage/encryption_hooks', '$BUILD_DIR/mongo/db/storage/storage_options', '$BUILD_DIR/mongo/s/is_mongos', '$BUILD_DIR/third_party/shim_snappy', + 'accumulator', + 'dependencies', + 'document_value', + 'expression', + 'granularity_rounder', + 'parsed_aggregation_projection', ], ) diff --git a/src/mongo/db/pipeline/document_path_support.cpp b/src/mongo/db/pipeline/document_path_support.cpp index 38dbcce6ac2..a2b1b11378d 100644 --- a/src/mongo/db/pipeline/document_path_support.cpp +++ b/src/mongo/db/pipeline/document_path_support.cpp @@ -119,5 +119,37 @@ void visitAllValuesAtPath(const Document& doc, visitAllValuesAtPathHelper(doc, path, 0, callback); } +StatusWith<Value> extractElementAlongNonArrayPath(const Document& doc, const FieldPath& path) { + invariant(path.getPathLength() > 0); + Value curValue = doc.getField(path.getFieldName(0)); + if (curValue.getType() == BSONType::Array) { + return {ErrorCodes::InternalError, "array along path"}; + } + + for (size_t i = 1; i < path.getPathLength(); i++) { + curValue = curValue[path.getFieldName(i)]; + if (curValue.getType() == BSONType::Array) { + return {ErrorCodes::InternalError, "array along path"}; + } + } + + return curValue; +} + +BSONObj documentToBsonWithPaths(const Document& input, const std::set<std::string>& paths) { + BSONObjBuilder outputBuilder; + for (auto&& path : paths) { + // getNestedField does not handle dotted paths correctly, so instead of retrieving the + // entire path, we just extract the first element of the path. + const auto prefix = FieldPath::extractFirstFieldFromDottedPath(path); + if (!outputBuilder.hasField(prefix)) { + // Avoid adding the same prefix twice. + input.getField(prefix).addToBsonObj(&outputBuilder, prefix); + } + } + + return outputBuilder.obj(); +} + } // namespace document_path_support } // namespace mongo diff --git a/src/mongo/db/pipeline/document_path_support.h b/src/mongo/db/pipeline/document_path_support.h index 94e22f604c7..8882bf816dd 100644 --- a/src/mongo/db/pipeline/document_path_support.h +++ b/src/mongo/db/pipeline/document_path_support.h @@ -51,5 +51,18 @@ void visitAllValuesAtPath(const Document& doc, const FieldPath& path, stdx::function<void(const Value&)> callback); +/** + * Returns the element at 'path' in 'doc', or a missing Value if the path does not fully exist. + * + * Returns ErrorCodes::InternalError if an array is encountered along the path or at the end of the + * path. + */ +StatusWith<Value> extractElementAlongNonArrayPath(const Document& doc, const FieldPath& path); + +/** + * Extracts 'paths' from the input document and returns a BSON object containing only those paths. + */ +BSONObj documentToBsonWithPaths(const Document&, const std::set<std::string>& paths); + } // namespace document_path_support } // namespace mongo diff --git a/src/mongo/db/pipeline/document_path_support_test.cpp b/src/mongo/db/pipeline/document_path_support_test.cpp index cb3a2e1b3b5..eb4fd089407 100644 --- a/src/mongo/db/pipeline/document_path_support_test.cpp +++ b/src/mongo/db/pipeline/document_path_support_test.cpp @@ -281,6 +281,101 @@ TEST(VisitAllValuesAtPathTest, DoesNotAddMissingValueWithinArrayToResults) { ASSERT_EQ(values.count(Value(2)), 1UL); } +TEST(ExtractElementAlongNonArrayPathTest, ReturnsMissingIfPathDoesNotExist) { + Document doc{{"a", 1}, {"b", 2}}; + auto result = extractElementAlongNonArrayPath(doc, FieldPath{"c.d"}); + ASSERT_OK(result.getStatus()); + ASSERT_VALUE_EQ(result.getValue(), Value{}); +} + +TEST(ExtractElementAlongNonArrayPathTest, ReturnsMissingIfPathPartiallyExists) { + Document doc{fromjson("{a: {b: {c: 1}}}")}; + auto result = extractElementAlongNonArrayPath(doc, FieldPath{"a.b.c.d"}); + ASSERT_OK(result.getStatus()); + ASSERT_VALUE_EQ(result.getValue(), Value{}); +} + +TEST(ExtractElementAlongNonArrayPathTest, ReturnsValueIfPathExists) { + Document doc{fromjson("{a: {b: {c: {d: {e: 1}}}}}")}; + auto result = extractElementAlongNonArrayPath(doc, FieldPath{"a.b.c.d"}); + ASSERT_OK(result.getStatus()); + ASSERT_VALUE_EQ(result.getValue(), Value{BSON("e" << 1)}); +} + +TEST(ExtractElementAlongNonArrayPathTest, FailsIfPathTerminatesAtEmptyArray) { + Document doc{fromjson("{a: {b: {c: {d: []}}}}}")}; + auto result = extractElementAlongNonArrayPath(doc, FieldPath{"a.b.c.d"}); + ASSERT_EQ(result.getStatus(), ErrorCodes::InternalError); +} + +TEST(ExtractElementAlongNonArrayPathTest, FailsIfPathTerminatesAtNonEmptyArray) { + Document doc{fromjson("{a: {b: {c: {d: [1, 2, 3]}}}}}")}; + auto result = extractElementAlongNonArrayPath(doc, FieldPath{"a.b.c.d"}); + ASSERT_EQ(result.getStatus(), ErrorCodes::InternalError); +} + +TEST(ExtractElementAlongNonArrayPathTest, FailsIfPathContainsArray) { + Document doc{fromjson("{a: {b: [{c: {d: 1}}]}}")}; + auto result = extractElementAlongNonArrayPath(doc, FieldPath{"a.b.c.d"}); + ASSERT_EQ(result.getStatus(), ErrorCodes::InternalError); +} + +TEST(ExtractElementAlongNonArrayPathTest, FailsIfFirstPathComponentIsArray) { + Document doc{fromjson("{a: [1, 2, {b: 1}]}")}; + auto result = extractElementAlongNonArrayPath(doc, FieldPath{"a.b"}); + ASSERT_EQ(result.getStatus(), ErrorCodes::InternalError); +} + +TEST(DocumentToBsonWithPathsTest, ShouldExtractTopLevelFieldIfDottedFieldNeeded) { + Document input(fromjson("{a: 1, b: {c: 1, d: 1}}")); + BSONObj expected = fromjson("{b: {c: 1, d: 1}}"); + ASSERT_BSONOBJ_EQ(expected, document_path_support::documentToBsonWithPaths(input, {"b.c"})); +} + +TEST(DocumentToBsonWithPathsTest, ShouldExtractEntireArray) { + Document input(fromjson("{a: [1, 2, 3], b: 1}")); + BSONObj expected = fromjson("{a: [1, 2, 3]}"); + ASSERT_BSONOBJ_EQ(expected, document_path_support::documentToBsonWithPaths(input, {"a"})); +} + +TEST(DocumentToBsonWithPathsTest, ShouldExtractEntireArrayWithNumericPathComponent) { + Document input(fromjson("{a: [1, 2, 3], b: 1}")); + BSONObj expected = fromjson("{a: [1, 2, 3]}"); + ASSERT_BSONOBJ_EQ(expected, document_path_support::documentToBsonWithPaths(input, {"a.0"})); +} + +TEST(DocumentToBsonWithPathsTest, ShouldExtractEntireObjectWithNumericPathComponent) { + Document input(fromjson("{a: {'0': 2, c: 3}, b: 1}")); + BSONObj expected = fromjson("{a: {'0': 2, c: 3}}"); + ASSERT_BSONOBJ_EQ(expected, document_path_support::documentToBsonWithPaths(input, {"a.0"})); +} + +TEST(DocumentToBsonWithPathsTest, ShouldOnlyAddPrefixedFieldOnceIfTwoDottedSubfields) { + Document input(fromjson("{a: 1, b: {c: 1, f: {d: {e: 1}}}}")); + BSONObj expected = fromjson("{b: {c: 1, f: {d: {e: 1}}}}"); + ASSERT_BSONOBJ_EQ(expected, + document_path_support::documentToBsonWithPaths(input, {"b.f", "b.f.d.e"})); +} + +TEST(DocumentToBsonWithPathsTest, MissingFieldShouldNotAppearInResult) { + Document input(fromjson("{a: 1}")); + BSONObj expected; + ASSERT_BSONOBJ_EQ(expected, document_path_support::documentToBsonWithPaths(input, {"b", "c"})); +} + +TEST(DocumentToBsonWithPathsTest, ShouldSerializeNothingIfNothingIsNeeded) { + Document input(fromjson("{a: 1, b: {c: 1}}")); + BSONObj expected; + ASSERT_BSONOBJ_EQ( + expected, document_path_support::documentToBsonWithPaths(input, std::set<std::string>{})); +} + +TEST(DocumentToBsonWithPathsTest, ShouldExtractEntireArrayFromPrefixOfDottedField) { + Document input(fromjson("{a: [{b: 1}, {b: 2}], c: 1}")); + BSONObj expected = fromjson("{a: [{b: 1}, {b: 2}]}"); + ASSERT_BSONOBJ_EQ(expected, document_path_support::documentToBsonWithPaths(input, {"a.b"})); +} + } // namespace } // namespace document_path_support } // namespace mongo diff --git a/src/mongo/db/pipeline/document_source_internal_inhibit_optimization.cpp b/src/mongo/db/pipeline/document_source_internal_inhibit_optimization.cpp new file mode 100644 index 00000000000..9fc0ef985ab --- /dev/null +++ b/src/mongo/db/pipeline/document_source_internal_inhibit_optimization.cpp @@ -0,0 +1,67 @@ +/** + * Copyright (C) 2017 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/pipeline/document_source_internal_inhibit_optimization.h" + +namespace mongo { + +REGISTER_DOCUMENT_SOURCE(_internalInhibitOptimization, + LiteParsedDocumentSourceDefault::parse, + DocumentSourceInternalInhibitOptimization::createFromBson); + +constexpr StringData DocumentSourceInternalInhibitOptimization::kStageName; + +boost::intrusive_ptr<DocumentSource> DocumentSourceInternalInhibitOptimization::createFromBson( + BSONElement elem, const boost::intrusive_ptr<ExpressionContext>& expCtx) { + uassert(ErrorCodes::TypeMismatch, + str::stream() << "$_internalInhibitOptimization must take a nested object but found: " + << elem, + elem.type() == BSONType::Object); + + auto specObj = elem.embeddedObject(); + uassert(ErrorCodes::FailedToParse, + str::stream() << "$_internalInhibitOptimization must take an empty object but found: " + << specObj, + specObj.isEmpty()); + + return new DocumentSourceInternalInhibitOptimization(expCtx); +} + +DocumentSource::GetNextResult DocumentSourceInternalInhibitOptimization::getNext() { + pExpCtx->checkForInterrupt(); + return pSource->getNext(); +} + +Value DocumentSourceInternalInhibitOptimization::serialize( + boost::optional<ExplainOptions::Verbosity> explain) const { + return Value(Document{{getSourceName(), Value{Document{}}}}); +} + +} // namesace mongo diff --git a/src/mongo/db/pipeline/document_source_internal_inhibit_optimization.h b/src/mongo/db/pipeline/document_source_internal_inhibit_optimization.h new file mode 100644 index 00000000000..a750443a5b5 --- /dev/null +++ b/src/mongo/db/pipeline/document_source_internal_inhibit_optimization.h @@ -0,0 +1,61 @@ +/** + * Copyright (C) 2017 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#pragma once + +#include "mongo/db/pipeline/document_source.h" + +namespace mongo { + +/** + * An internal stage available for testing. Acts as a simple passthrough of intermediate results + * from the source stage. Does not participate in optimizations such as swapping, coalescing, or + * pushdown into the query system, so this stage can be useful in tests to ensure that an + * unoptimized code path is being exercised. + */ +class DocumentSourceInternalInhibitOptimization final : public DocumentSource { +public: + static constexpr StringData kStageName = "$_internalInhibitOptimization"_sd; + + static boost::intrusive_ptr<DocumentSource> createFromBson( + BSONElement, const boost::intrusive_ptr<ExpressionContext>&); + + DocumentSourceInternalInhibitOptimization(const boost::intrusive_ptr<ExpressionContext>& expCtx) + : DocumentSource(expCtx) {} + + const char* getSourceName() const final { + return kStageName.rawData(); + } + + GetNextResult getNext() final; + +private: + Value serialize(boost::optional<ExplainOptions::Verbosity> explain = boost::none) const final; +}; + +} // namesace mongo diff --git a/src/mongo/db/pipeline/document_source_match.cpp b/src/mongo/db/pipeline/document_source_match.cpp index e34d9f2d1a5..7a58d33f7b4 100644 --- a/src/mongo/db/pipeline/document_source_match.cpp +++ b/src/mongo/db/pipeline/document_source_match.cpp @@ -37,6 +37,7 @@ #include "mongo/db/matcher/expression_parser.h" #include "mongo/db/matcher/extensions_callback_noop.h" #include "mongo/db/pipeline/document.h" +#include "mongo/db/pipeline/document_path_support.h" #include "mongo/db/pipeline/expression.h" #include "mongo/db/pipeline/lite_parsed_document_source.h" #include "mongo/stdx/memory.h" @@ -78,7 +79,8 @@ DocumentSource::GetNextResult DocumentSourceMatch::getNext() { // only serialize the fields we need to do the match. BSONObj toMatch = _dependencies.needWholeDocument ? nextInput.getDocument().toBson() - : getObjectForMatch(nextInput.getDocument(), _dependencies.fields); + : document_path_support::documentToBsonWithPaths(nextInput.getDocument(), + _dependencies.fields); if (_expression->matchesBSON(toMatch)) { return nextInput; @@ -120,22 +122,6 @@ Pipeline::SourceContainer::iterator DocumentSourceMatch::doOptimizeAt( return std::next(itr); } -BSONObj DocumentSourceMatch::getObjectForMatch(const Document& input, - const std::set<std::string>& fields) { - BSONObjBuilder matchObject; - for (auto&& field : fields) { - // getNestedField does not handle dotted paths correctly, so instead of retrieving the - // entire path, we just extract the first element of the path. - const auto prefix = FieldPath::extractFirstFieldFromDottedPath(field); - if (!matchObject.hasField(prefix)) { - // Avoid adding the same prefix twice. - input.getField(prefix).addToBsonObj(&matchObject, prefix); - } - } - - return matchObject.obj(); -} - namespace { // This block contains the functions that make up the implementation of // DocumentSourceMatch::redactSafePortion(). They will only be called after diff --git a/src/mongo/db/pipeline/document_source_match.h b/src/mongo/db/pipeline/document_source_match.h index b6c41cd5161..0b5800515aa 100644 --- a/src/mongo/db/pipeline/document_source_match.h +++ b/src/mongo/db/pipeline/document_source_match.h @@ -125,11 +125,6 @@ public: splitSourceBy(const std::set<std::string>& fields, const StringMap<std::string>& renames); /** - * Given a document 'input', extract 'fields' and produce a BSONObj with those values. - */ - static BSONObj getObjectForMatch(const Document& input, const std::set<std::string>& fields); - - /** * Returns a new DocumentSourceMatch with a MatchExpression that, if executed on the * sub-document at 'path', is equivalent to 'expression'. * diff --git a/src/mongo/db/pipeline/document_source_match_test.cpp b/src/mongo/db/pipeline/document_source_match_test.cpp index 36914f34093..9f73219cfa2 100644 --- a/src/mongo/db/pipeline/document_source_match_test.cpp +++ b/src/mongo/db/pipeline/document_source_match_test.cpp @@ -446,42 +446,5 @@ TEST_F(DocumentSourceMatchTest, ShouldCorrectlyEvaluateElemMatchPredicate) { ASSERT_TRUE(match->getNext().isEOF()); } -TEST(ObjectForMatch, ShouldExtractTopLevelFieldIfDottedFieldNeeded) { - Document input(fromjson("{a: 1, b: {c: 1, d: 1}}")); - BSONObj expected = fromjson("{b: {c: 1, d: 1}}"); - ASSERT_BSONOBJ_EQ(expected, DocumentSourceMatch::getObjectForMatch(input, {"b.c"})); -} - -TEST(ObjectForMatch, ShouldExtractEntireArray) { - Document input(fromjson("{a: [1, 2, 3], b: 1}")); - BSONObj expected = fromjson("{a: [1, 2, 3]}"); - ASSERT_BSONOBJ_EQ(expected, DocumentSourceMatch::getObjectForMatch(input, {"a"})); -} - -TEST(ObjectForMatch, ShouldOnlyAddPrefixedFieldOnceIfTwoDottedSubfields) { - Document input(fromjson("{a: 1, b: {c: 1, f: {d: {e: 1}}}}")); - BSONObj expected = fromjson("{b: {c: 1, f: {d: {e: 1}}}}"); - ASSERT_BSONOBJ_EQ(expected, DocumentSourceMatch::getObjectForMatch(input, {"b.f", "b.f.d.e"})); -} - -TEST(ObjectForMatch, MissingFieldShouldNotAppearInResult) { - Document input(fromjson("{a: 1}")); - BSONObj expected; - ASSERT_BSONOBJ_EQ(expected, DocumentSourceMatch::getObjectForMatch(input, {"b", "c"})); -} - -TEST(ObjectForMatch, ShouldSerializeNothingIfNothingIsNeeded) { - Document input(fromjson("{a: 1, b: {c: 1}}")); - BSONObj expected; - ASSERT_BSONOBJ_EQ(expected, - DocumentSourceMatch::getObjectForMatch(input, std::set<std::string>{})); -} - -TEST(ObjectForMatch, ShouldExtractEntireArrayFromPrefixOfDottedField) { - Document input(fromjson("{a: [{b: 1}, {b: 2}], c: 1}")); - BSONObj expected = fromjson("{a: [{b: 1}, {b: 2}]}"); - ASSERT_BSONOBJ_EQ(expected, DocumentSourceMatch::getObjectForMatch(input, {"a.b"})); -} - } // namespace } // namespace mongo diff --git a/src/mongo/db/pipeline/document_source_sort.cpp b/src/mongo/db/pipeline/document_source_sort.cpp index b0cb3e989e9..4d78d08ccaa 100644 --- a/src/mongo/db/pipeline/document_source_sort.cpp +++ b/src/mongo/db/pipeline/document_source_sort.cpp @@ -32,6 +32,7 @@ #include "mongo/db/jsobj.h" #include "mongo/db/pipeline/document.h" +#include "mongo/db/pipeline/document_path_support.h" #include "mongo/db/pipeline/document_source_merge_cursors.h" #include "mongo/db/pipeline/expression.h" #include "mongo/db/pipeline/expression_context.h" @@ -108,29 +109,18 @@ long long DocumentSourceSort::getLimit() const { return limitSrc ? limitSrc->getLimit() : -1; } -void DocumentSourceSort::addKey(StringData fieldPath, bool ascending) { - VariablesParseState vps = pExpCtx->variablesParseState; - vSortKey.push_back(ExpressionFieldPath::parse(pExpCtx, "$$ROOT." + fieldPath.toString(), vps)); - vAscending.push_back(ascending); -} - Document DocumentSourceSort::serializeSortKey(bool explain) const { MutableDocument keyObj; - // add the key fields - const size_t n = vSortKey.size(); + const size_t n = _sortPattern.size(); for (size_t i = 0; i < n; ++i) { - if (ExpressionFieldPath* efp = dynamic_cast<ExpressionFieldPath*>(vSortKey[i].get())) { - // ExpressionFieldPath gets special syntax that includes direction - const FieldPath& withVariable = efp->getFieldPath(); - verify(withVariable.getPathLength() > 1); - verify(withVariable.getFieldName(0) == "ROOT"); - const string fieldPath = withVariable.tail().fullPath(); - - // append a named integer based on the sort order - keyObj.setField(fieldPath, Value(vAscending[i] ? 1 : -1)); + if (_sortPattern[i].fieldPath) { + // Append a named integer based on whether the sort is ascending/descending. + keyObj.setField(_sortPattern[i].fieldPath->fullPath(), + Value(_sortPattern[i].isAscending ? 1 : -1)); } else { - // other expressions use a made-up field name - keyObj[string(str::stream() << "$computed" << i)] = vSortKey[i]->serialize(explain); + // For sorts that are not simply on a field path, use a made-up field name. + keyObj[string(str::stream() << "$computed" << i)] = + _sortPattern[i].expression->serialize(explain); } } return keyObj.freeze(); @@ -152,8 +142,12 @@ Pipeline::SourceContainer::iterator DocumentSourceSort::doOptimizeAt( } DocumentSource::GetDepsReturn DocumentSourceSort::getDependencies(DepsTracker* deps) const { - for (size_t i = 0; i < vSortKey.size(); ++i) { - vSortKey[i]->addDependencies(deps); + for (auto&& keyPart : _sortPattern) { + if (keyPart.expression) { + keyPart.expression->addDependencies(deps); + } else { + deps->fields.insert(keyPart.fieldPath->fullPath()); + } } return SEE_NEXT; @@ -173,7 +167,7 @@ intrusive_ptr<DocumentSourceSort> DocumentSourceSort::create( uint64_t maxMemoryUsageBytes) { intrusive_ptr<DocumentSourceSort> pSort(new DocumentSourceSort(pExpCtx)); pSort->_maxMemoryUsageBytes = maxMemoryUsageBytes; - pSort->_sort = sortOrder.getOwned(); + pSort->_rawSort = sortOrder.getOwned(); for (auto&& keyField : sortOrder) { auto fieldName = keyField.fieldNameStringData(); @@ -184,6 +178,8 @@ intrusive_ptr<DocumentSourceSort> DocumentSourceSort::create( continue; } + SortPatternPart patternPart; + if (keyField.type() == Object) { BSONObj metaDoc = keyField.Obj(); // this restriction is due to needing to figure out sort direction @@ -191,12 +187,18 @@ intrusive_ptr<DocumentSourceSort> DocumentSourceSort::create( "$meta is the only expression supported by $sort right now", metaDoc.firstElement().fieldNameStringData() == "$meta"); + uassert(ErrorCodes::FailedToParse, + "Cannot have additional keys in a $meta sort specification", + metaDoc.nFields() == 1); + VariablesParseState vps = pExpCtx->variablesParseState; - pSort->vSortKey.push_back(ExpressionMeta::parse(pExpCtx, metaDoc.firstElement(), vps)); + patternPart.expression = ExpressionMeta::parse(pExpCtx, metaDoc.firstElement(), vps); // If sorting by textScore, sort highest scores first. If sorting by randVal, order // doesn't matter, so just always use descending. - pSort->vAscending.push_back(false); + patternPart.isAscending = false; + + pSort->_sortPattern.push_back(std::move(patternPart)); continue; } @@ -210,10 +212,17 @@ intrusive_ptr<DocumentSourceSort> DocumentSourceSort::create( "$sort key ordering must be 1 (for ascending) or -1 (for descending)", ((sortOrder == 1) || (sortOrder == -1))); - pSort->addKey(fieldName, (sortOrder > 0)); + patternPart.fieldPath = FieldPath{fieldName}; + patternPart.isAscending = (sortOrder > 0); + pSort->_paths.insert(patternPart.fieldPath->fullPath()); + pSort->_sortPattern.push_back(std::move(patternPart)); } - uassert(15976, "$sort stage must have at least one sort key", !pSort->vSortKey.empty()); + uassert(15976, "$sort stage must have at least one sort key", !pSort->_sortPattern.empty()); + + const bool isExplain = false; + pSort->_sortKeyGen = + SortKeyGenerator{pSort->serializeSortKey(isExplain).toBson(), pExpCtx->getCollator()}; if (limit > 0) { pSort->setLimitSrc(DocumentSourceLimit::create(pExpCtx, limit)); @@ -224,7 +233,7 @@ intrusive_ptr<DocumentSourceSort> DocumentSourceSort::create( SortOptions DocumentSourceSort::makeSortOptions() const { /* make sure we've got a sort key */ - verify(vSortKey.size()); + verify(_sortPattern.size()); SortOptions opts; if (limitSrc) @@ -305,17 +314,68 @@ void DocumentSourceSort::populateFromCursors(const vector<DBClientCursor*>& curs _populated = true; } -Value DocumentSourceSort::extractKey(const Document& d) const { - if (vSortKey.size() == 1) { - return vSortKey[0]->evaluate(d); +StatusWith<Value> DocumentSourceSort::extractKeyPart(const Document& doc, + const SortPatternPart& patternPart) const { + if (patternPart.fieldPath) { + invariant(!patternPart.expression); + return document_path_support::extractElementAlongNonArrayPath(doc, *patternPart.fieldPath); + } else { + invariant(patternPart.expression); + return patternPart.expression->evaluate(doc); + } +} + +StatusWith<Value> DocumentSourceSort::extractKeyFast(const Document& doc) const { + if (_sortPattern.size() == 1u) { + return extractKeyPart(doc, _sortPattern[0]); + } + + vector<Value> keys; + keys.reserve(_sortPattern.size()); + for (auto&& keyPart : _sortPattern) { + auto extractedKey = extractKeyPart(doc, keyPart); + if (!extractedKey.isOK()) { + // We can't use the fast path, so bail out. + return extractedKey; + } + + keys.push_back(std::move(extractedKey.getValue())); + } + return Value{std::move(keys)}; +} + +Value DocumentSourceSort::extractKeyWithArray(const Document& doc) const { + SortKeyGenerator::Metadata metadata; + if (doc.hasTextScore()) { + metadata.textScore = doc.getTextScore(); } + if (doc.hasRandMetaField()) { + metadata.randVal = doc.getRandMetaField(); + } + + // Convert the Document to a BSONObj, but only do the conversion for the paths we actually need. + // Then run the result through the SortKeyGenerator to obtain the final sort key. + auto bsonDoc = document_path_support::documentToBsonWithPaths(doc, _paths); + auto bsonKey = uassertStatusOK(_sortKeyGen->getSortKey(std::move(bsonDoc), &metadata)); + // Convert BSON sort key, which is an object with empty field names, into the corresponding + // array of keys representation as a Value. BSONObj {'': 1, '': [2, 3]} becomes Value [1, [2, + // 3]]. vector<Value> keys; - keys.reserve(vSortKey.size()); - for (size_t i = 0; i < vSortKey.size(); i++) { - keys.push_back(vSortKey[i]->evaluate(d)); + keys.reserve(_sortPattern.size()); + for (auto&& elt : bsonKey) { + keys.push_back(Value{elt}); + } + return Value{std::move(keys)}; +} + +Value DocumentSourceSort::extractKey(const Document& doc) const { + auto key = extractKeyFast(doc); + if (key.isOK()) { + return key.getValue(); } - return Value(std::move(keys)); + + return extractKeyWithArray(doc); } int DocumentSourceSort::compare(const Value& lhs, const Value& rhs) const { @@ -326,9 +386,9 @@ int DocumentSourceSort::compare(const Value& lhs, const Value& rhs) const { However, the tricky part is what to do is none of the sort keys are present. In this case, consider the document less. */ - const size_t n = vSortKey.size(); + const size_t n = _sortPattern.size(); if (n == 1) { // simple fast case - if (vAscending[0]) + if (_sortPattern[0].isAscending) return pExpCtx->getValueComparator().compare(lhs, rhs); else return -pExpCtx->getValueComparator().compare(lhs, rhs); @@ -339,7 +399,7 @@ int DocumentSourceSort::compare(const Value& lhs, const Value& rhs) const { int cmp = pExpCtx->getValueComparator().compare(lhs[i], rhs[i]); if (cmp) { /* if necessary, adjust the return value by the key ordering */ - if (!vAscending[i]) + if (!_sortPattern[i].isAscending) cmp = -cmp; return cmp; @@ -361,11 +421,10 @@ intrusive_ptr<DocumentSource> DocumentSourceSort::getShardSource() { intrusive_ptr<DocumentSource> DocumentSourceSort::getMergeSource() { verify(!_mergingPresorted); intrusive_ptr<DocumentSourceSort> other = new DocumentSourceSort(pExpCtx); - other->vAscending = vAscending; - other->vSortKey = vSortKey; + other->_sortPattern = _sortPattern; other->limitSrc = limitSrc; other->_mergingPresorted = true; - other->_sort = _sort; + other->_rawSort = _rawSort; return other; } } diff --git a/src/mongo/db/pipeline/document_source_sort.h b/src/mongo/db/pipeline/document_source_sort.h index b9ece4be37a..cb1c709ecde 100644 --- a/src/mongo/db/pipeline/document_source_sort.h +++ b/src/mongo/db/pipeline/document_source_sort.h @@ -28,14 +28,14 @@ #pragma once +#include "mongo/db/index/sort_key_generator.h" #include "mongo/db/pipeline/document_source.h" #include "mongo/db/pipeline/document_source_limit.h" +#include "mongo/db/pipeline/expression.h" #include "mongo/db/sorter/sorter.h" namespace mongo { -class Expression; - class DocumentSourceSort final : public DocumentSource, public SplittableDocumentSource { public: static const uint64_t kMaxMemoryUsageBytes = 100 * 1024 * 1024; @@ -59,7 +59,7 @@ public: } BSONObjSet getOutputSorts() final { - return allPrefixes(_sort); + return allPrefixes(_rawSort); } GetDepsReturn getDependencies(DepsTracker* deps) const final; @@ -126,6 +126,34 @@ protected: void doDispose() final; private: + // This is used to merge pre-sorted results from a DocumentSourceMergeCursors. + class IteratorFromCursor; + + using MySorter = Sorter<Value, Document>; + + // For MySorter. + class Comparator { + public: + explicit Comparator(const DocumentSourceSort& source) : _source(source) {} + int operator()(const MySorter::Data& lhs, const MySorter::Data& rhs) const { + return _source.compare(lhs.first, rhs.first); + } + + private: + const DocumentSourceSort& _source; + }; + + // Represents one of the components in a compound sort pattern. Each component is either the + // field path by which we are sorting, or an Expression which can be used to retrieve the sort + // value in the case of a $meta-sort (but not both). + struct SortPatternPart { + bool isAscending = true; + boost::optional<FieldPath> fieldPath; + boost::intrusive_ptr<Expression> expression; + }; + + using SortPattern = std::vector<SortPatternPart>; + explicit DocumentSourceSort(const boost::intrusive_ptr<ExpressionContext>& pExpCtx); Value serialize(boost::optional<ExplainOptions::Verbosity> explain = boost::none) const final { @@ -133,11 +161,6 @@ private: } /** - * Helper to add a sort key to this stage. - */ - void addKey(StringData fieldPath, bool ascending); - - /** * Before returning anything, we have to consume all input and sort it. This method consumes all * input and prepares the sorted stream '_output'. * @@ -146,28 +169,35 @@ private: * GetNextResult encountered, which may be either kEOF or kPauseExecution. */ GetNextResult populate(); - bool _populated = false; - - BSONObj _sort; SortOptions makeSortOptions() const; - // This is used to merge pre-sorted results from a DocumentSourceMergeCursors. - class IteratorFromCursor; + /** + * Returns the sort key for 'doc' based on the SortPattern. Attempts to generate the key using a + * fast path that does not handle arrays. If an array is encountered, falls back on + * extractKeyWithArray(). + */ + Value extractKey(const Document& doc) const; + + /** + * Returns the sort key for 'doc' based on the SortPattern, or ErrorCodes::InternalError if an + * array is encountered during sort key generation. + */ + StatusWith<Value> extractKeyFast(const Document& doc) const; - /* these two parallel each other */ - typedef std::vector<boost::intrusive_ptr<Expression>> SortKey; - SortKey vSortKey; - std::vector<char> vAscending; // used like std::vector<bool> but without specialization + /** + * Extracts the sort key component described by 'keyPart' from 'doc' and returns it. Returns + * ErrorCodes::Internal error if the path for 'keyPart' contains an array in 'doc'. + */ + StatusWith<Value> extractKeyPart(const Document& doc, const SortPatternPart& keyPart) const; - /// Extracts the fields in vSortKey from the Document; - Value extractKey(const Document& d) const; + /** + * Returns the sort key for 'doc' based on the SortPattern. + */ + Value extractKeyWithArray(const Document& doc) const; - /// Compare two Values according to the specified sort key. int compare(const Value& lhs, const Value& rhs) const; - typedef Sorter<Value, Document> MySorter; - /** * Absorbs 'limit', enabling a top-k sort. It is safe to call this multiple times, it will keep * the smallest limit. @@ -178,17 +208,16 @@ private: } } - // For MySorter - class Comparator { - public: - explicit Comparator(const DocumentSourceSort& source) : _source(source) {} - int operator()(const MySorter::Data& lhs, const MySorter::Data& rhs) const { - return _source.compare(lhs.first, rhs.first); - } + bool _populated = false; - private: - const DocumentSourceSort& _source; - }; + BSONObj _rawSort; + + boost::optional<SortKeyGenerator> _sortKeyGen; + + SortPattern _sortPattern; + + // The set of paths on which we're sorting. + std::set<std::string> _paths; boost::intrusive_ptr<DocumentSourceLimit> limitSrc; diff --git a/src/mongo/db/pipeline/document_source_sort_test.cpp b/src/mongo/db/pipeline/document_source_sort_test.cpp index 6c273d9f984..92b0f4dd07c 100644 --- a/src/mongo/db/pipeline/document_source_sort_test.cpp +++ b/src/mongo/db/pipeline/document_source_sort_test.cpp @@ -348,9 +348,9 @@ TEST_F(DocumentSourceSortExecutionTest, MissingObjectWithinArray) { /** Compare nested values from within an array. */ TEST_F(DocumentSourceSortExecutionTest, ExtractArrayValues) { checkResults({Document{{"_id", 0}, {"a", DOC_ARRAY(DOC("b" << 1) << DOC("b" << 2))}}, - Document{{"_id", 1}, {"a", DOC_ARRAY(DOC("b" << 1) << DOC("b" << 1))}}}, + Document{{"_id", 1}, {"a", DOC_ARRAY(DOC("b" << 1) << DOC("b" << 0))}}}, BSON("a.b" << 1), - "[{_id:1,a:[{b:1},{b:1}]},{_id:0,a:[{b:1},{b:2}]}]"); + "[{_id:1,a:[{b:1},{b:0}]},{_id:0,a:[{b:1},{b:2}]}]"); } TEST_F(DocumentSourceSortExecutionTest, ShouldPauseWhenAskedTo) { diff --git a/src/mongo/dbtests/SConscript b/src/mongo/dbtests/SConscript index 34a511adeba..74383934018 100644 --- a/src/mongo/dbtests/SConscript +++ b/src/mongo/dbtests/SConscript @@ -99,6 +99,7 @@ dbtest = env.Program( 'query_stage_merge_sort.cpp', 'query_stage_near.cpp', 'query_stage_sort.cpp', + 'query_stage_sort_key_generator.cpp', 'query_stage_subplan.cpp', 'query_stage_tests.cpp', 'query_stage_update.cpp', @@ -108,7 +109,6 @@ dbtest = env.Program( 'repltests.cpp', 'rollbacktests.cpp', 'socktests.cpp', - 'sort_key_generator_test.cpp', 'threadedtests.cpp', 'updatetests.cpp', 'validate_tests.cpp', diff --git a/src/mongo/dbtests/sort_key_generator_test.cpp b/src/mongo/dbtests/query_stage_sort_key_generator.cpp index c7baa9c664a..b488e9e7e35 100644 --- a/src/mongo/dbtests/sort_key_generator_test.cpp +++ b/src/mongo/dbtests/query_stage_sort_key_generator.cpp @@ -28,7 +28,9 @@ #include "mongo/platform/basic.h" +#include "mongo/db/exec/queued_data_stage.h" #include "mongo/db/exec/sort_key_generator.h" +#include "mongo/db/exec/working_set_computed_data.h" #include "mongo/db/json.h" #include "mongo/db/query/collation/collator_interface_mock.h" #include "mongo/db/query/query_test_service_context.h" @@ -39,42 +41,53 @@ namespace mongo { namespace { +BSONObj extractKeyFromKeyGenStage(SortKeyGeneratorStage* sortKeyGen, WorkingSet* workingSet) { + WorkingSetID wsid; + PlanStage::StageState state = PlanStage::NEED_TIME; + while (state == PlanStage::NEED_TIME) { + state = sortKeyGen->work(&wsid); + } + + ASSERT_EQ(state, PlanStage::ADVANCED); + auto wsm = workingSet->get(wsid); + auto sortKeyComputedData = + static_cast<const SortKeyComputedData*>(wsm->getComputed(WSM_SORT_KEY)); + return sortKeyComputedData->getSortKey(); +} + /** - * Test function to verify that the SortKeyGenerator can generate a sortKey from a fetched document. - * - * sortSpec - The JSON representation of the sort spec BSONObj. - * doc - The JSON representation of the BSON document. - * queryObj - The JSON representation of the query predicate. Used when generating the sort key from - * an array. - * collator - The collation for the sort that we are are generating keys for. + * Given a JSON string 'sortSpec' representing a sort pattern, returns the corresponding sort key + * from 'doc', a JSON string representation of a user document. Does so using the SORT_KEY_GENERATOR + * stage. * - * Returns the BSON representation of the sort key, to be checked against the expected sort key. + * The 'collator' is used to specify the string comparison semantics that should be used when + * generating the sort key. */ BSONObj extractSortKey(const char* sortSpec, const char* doc, const CollatorInterface* collator) { QueryTestServiceContext serviceContext; auto opCtx = serviceContext.makeOperationContext(); - WorkingSetMember wsm; - wsm.obj = Snapshotted<BSONObj>(SnapshotId(), fromjson(doc)); - wsm.transitionToOwnedObj(); + WorkingSet workingSet; - BSONObj sortKey; - auto sortKeyGen = - stdx::make_unique<SortKeyGenerator>(opCtx.get(), fromjson(sortSpec), collator); - ASSERT_OK(sortKeyGen->getSortKey(wsm, &sortKey)); + auto mockStage = stdx::make_unique<QueuedDataStage>(opCtx.get(), &workingSet); + auto wsid = workingSet.allocate(); + auto wsm = workingSet.get(wsid); + wsm->obj = Snapshotted<BSONObj>(SnapshotId(), fromjson(doc)); + wsm->transitionToOwnedObj(); + mockStage->pushBack(wsid); - return sortKey; + BSONObj sortPattern = fromjson(sortSpec); + SortKeyGeneratorStage sortKeyGen{ + opCtx.get(), mockStage.release(), &workingSet, std::move(sortPattern), collator}; + return extractKeyFromKeyGenStage(&sortKeyGen, &workingSet); } /** - * Test function to verify that the SortKeyGenerator can generate a sortKey while using only index - * data (that is, the document will not be fetched). For SERVER-20117. + * Given a JSON string 'sortSpec' representing a sort pattern, returns the corresponding sort key + * from the index key 'ikd'. Does so using the SORT_KEY_GENERATOR stage. * - * sortSpec - The JSON representation of the sort spec BSONObj. - * ikd - The data stored in the index. - * collator - The collation for the sort that we are are generating keys for. - * - * Returns the BSON representation of the sort key, to be checked against the expected sort key. + * The 'collator' is used to specify the string comparison semantics that should be used when + * generating the sort key. */ BSONObj extractSortKeyCovered(const char* sortSpec, const IndexKeyDatum& ikd, @@ -82,33 +95,34 @@ BSONObj extractSortKeyCovered(const char* sortSpec, QueryTestServiceContext serviceContext; auto opCtx = serviceContext.makeOperationContext(); - WorkingSet ws; - WorkingSetID wsid = ws.allocate(); - WorkingSetMember* wsm = ws.get(wsid); - wsm->keyData.push_back(ikd); - ws.transitionToRecordIdAndIdx(wsid); + WorkingSet workingSet; - BSONObj sortKey; - auto sortKeyGen = - stdx::make_unique<SortKeyGenerator>(opCtx.get(), fromjson(sortSpec), collator); - ASSERT_OK(sortKeyGen->getSortKey(*wsm, &sortKey)); + auto mockStage = stdx::make_unique<QueuedDataStage>(opCtx.get(), &workingSet); + auto wsid = workingSet.allocate(); + auto wsm = workingSet.get(wsid); + wsm->keyData.push_back(ikd); + workingSet.transitionToRecordIdAndIdx(wsid); + mockStage->pushBack(wsid); - return sortKey; + BSONObj sortPattern = fromjson(sortSpec); + SortKeyGeneratorStage sortKeyGen{ + opCtx.get(), mockStage.release(), &workingSet, std::move(sortPattern), collator}; + return extractKeyFromKeyGenStage(&sortKeyGen, &workingSet); } -TEST(SortKeyGeneratorTest, SortKeyNormal) { +TEST(SortKeyGeneratorStageTest, SortKeyNormal) { BSONObj actualOut = extractSortKey("{a: 1}", "{_id: 0, a: 5}", nullptr); BSONObj expectedOut = BSON("" << 5); ASSERT_BSONOBJ_EQ(actualOut, expectedOut); } -TEST(SortKeyGeneratorTest, SortKeyNormal2) { +TEST(SortKeyGeneratorStageTest, SortKeyNormal2) { BSONObj actualOut = extractSortKey("{a: 1}", "{_id: 0, z: 10, a: 6, b: 16}", nullptr); BSONObj expectedOut = BSON("" << 6); ASSERT_BSONOBJ_EQ(actualOut, expectedOut); } -TEST(SortKeyGeneratorTest, SortKeyString) { +TEST(SortKeyGeneratorStageTest, SortKeyString) { BSONObj actualOut = extractSortKey("{a: 1}", "{_id: 0, z: 'thing1', a: 'thing2', b: 16}", nullptr); BSONObj expectedOut = BSON("" @@ -116,28 +130,28 @@ TEST(SortKeyGeneratorTest, SortKeyString) { ASSERT_BSONOBJ_EQ(actualOut, expectedOut); } -TEST(SortKeyGeneratorTest, SortKeyCompound) { +TEST(SortKeyGeneratorStageTest, SortKeyCompound) { BSONObj actualOut = extractSortKey("{a: 1, b: 1}", "{_id: 0, z: 'thing1', a: 99, c: {a: 4}, b: 16}", nullptr); BSONObj expectedOut = BSON("" << 99 << "" << 16); ASSERT_BSONOBJ_EQ(actualOut, expectedOut); } -TEST(SortKeyGeneratorTest, SortKeyEmbedded) { +TEST(SortKeyGeneratorStageTest, SortKeyEmbedded) { BSONObj actualOut = extractSortKey( "{'c.a': 1, b: 1}", "{_id: 0, z: 'thing1', a: 99, c: {a: 4}, b: 16}", nullptr); BSONObj expectedOut = BSON("" << 4 << "" << 16); ASSERT_BSONOBJ_EQ(actualOut, expectedOut); } -TEST(SortKeyGeneratorTest, SortKeyArray) { +TEST(SortKeyGeneratorStageTest, SortKeyArray) { BSONObj actualOut = extractSortKey( "{'c': 1, b: 1}", "{_id: 0, z: 'thing1', a: 99, c: [2, 4, 1], b: 16}", nullptr); BSONObj expectedOut = BSON("" << 1 << "" << 16); ASSERT_BSONOBJ_EQ(actualOut, expectedOut); } -TEST(SortKeyGeneratorTest, SortKeyCoveredNormal) { +TEST(SortKeyGeneratorStageTest, SortKeyCoveredNormal) { CollatorInterface* collator = nullptr; BSONObj actualOut = extractSortKeyCovered( "{a: 1}", IndexKeyDatum(BSON("a" << 1), BSON("" << 5), nullptr), collator); @@ -145,7 +159,7 @@ TEST(SortKeyGeneratorTest, SortKeyCoveredNormal) { ASSERT_BSONOBJ_EQ(actualOut, expectedOut); } -TEST(SortKeyGeneratorTest, SortKeyCoveredEmbedded) { +TEST(SortKeyGeneratorStageTest, SortKeyCoveredEmbedded) { CollatorInterface* collator = nullptr; BSONObj actualOut = extractSortKeyCovered( "{'a.c': 1}", @@ -155,7 +169,7 @@ TEST(SortKeyGeneratorTest, SortKeyCoveredEmbedded) { ASSERT_BSONOBJ_EQ(actualOut, expectedOut); } -TEST(SortKeyGeneratorTest, SortKeyCoveredCompound) { +TEST(SortKeyGeneratorStageTest, SortKeyCoveredCompound) { CollatorInterface* collator = nullptr; BSONObj actualOut = extractSortKeyCovered( "{a: 1, c: 1}", @@ -165,7 +179,7 @@ TEST(SortKeyGeneratorTest, SortKeyCoveredCompound) { ASSERT_BSONOBJ_EQ(actualOut, expectedOut); } -TEST(SortKeyGeneratorTest, SortKeyCoveredCompound2) { +TEST(SortKeyGeneratorStageTest, SortKeyCoveredCompound2) { CollatorInterface* collator = nullptr; BSONObj actualOut = extractSortKeyCovered("{a: 1, b: 1}", IndexKeyDatum(BSON("a" << 1 << "b" << 1 << "c" << 1), @@ -176,7 +190,7 @@ TEST(SortKeyGeneratorTest, SortKeyCoveredCompound2) { ASSERT_BSONOBJ_EQ(actualOut, expectedOut); } -TEST(SortKeyGeneratorTest, SortKeyCoveredCompound3) { +TEST(SortKeyGeneratorStageTest, SortKeyCoveredCompound3) { CollatorInterface* collator = nullptr; BSONObj actualOut = extractSortKeyCovered("{b: 1, c: 1}", @@ -188,7 +202,7 @@ TEST(SortKeyGeneratorTest, SortKeyCoveredCompound3) { ASSERT_BSONOBJ_EQ(actualOut, expectedOut); } -TEST(SortKeyGeneratorTest, ExtractStringSortKeyWithCollatorUsesComparisonKey) { +TEST(SortKeyGeneratorStageTest, ExtractStringSortKeyWithCollatorUsesComparisonKey) { CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); BSONObj actualOut = extractSortKey("{a: 1}", "{_id: 0, z: 'thing1', a: 'thing2', b: 16}", &collator); @@ -197,14 +211,14 @@ TEST(SortKeyGeneratorTest, ExtractStringSortKeyWithCollatorUsesComparisonKey) { ASSERT_BSONOBJ_EQ(actualOut, expectedOut); } -TEST(SortKeyGeneratorTest, CollatorHasNoEffectWhenExtractingNonStringSortKey) { +TEST(SortKeyGeneratorStageTest, CollatorHasNoEffectWhenExtractingNonStringSortKey) { CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); BSONObj actualOut = extractSortKey("{a: 1}", "{_id: 0, z: 10, a: 6, b: 16}", &collator); BSONObj expectedOut = BSON("" << 6); ASSERT_BSONOBJ_EQ(actualOut, expectedOut); } -TEST(SortKeyGeneratorTest, CollatorHasNoAffectWhenExtractingCoveredSortKey) { +TEST(SortKeyGeneratorStageTest, CollatorHasNoAffectWhenExtractingCoveredSortKey) { CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); BSONObj actualOut = extractSortKeyCovered("{b: 1}", IndexKeyDatum(BSON("a" << 1 << "b" << 1), @@ -217,13 +231,13 @@ TEST(SortKeyGeneratorTest, CollatorHasNoAffectWhenExtractingCoveredSortKey) { ASSERT_BSONOBJ_EQ(actualOut, expectedOut); } -TEST(SortKeyGeneratorTest, SortKeyGenerationForArraysChoosesCorrectKey) { +TEST(SortKeyGeneratorStageTest, SortKeyGenerationForArraysChoosesCorrectKey) { BSONObj actualOut = extractSortKey("{a: -1}", "{_id: 0, a: [1, 2, 3, 4]}", nullptr); BSONObj expectedOut = BSON("" << 4); ASSERT_BSONOBJ_EQ(actualOut, expectedOut); } -TEST(SortKeyGeneratorTest, EnsureSortKeyGenerationForArraysRespectsCollation) { +TEST(SortKeyGeneratorStageTest, EnsureSortKeyGenerationForArraysRespectsCollation) { CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); BSONObj actualOut = extractSortKey("{a: 1}", "{_id: 0, a: ['aaz', 'zza', 'yya', 'zzb']}", &collator); @@ -232,7 +246,7 @@ TEST(SortKeyGeneratorTest, EnsureSortKeyGenerationForArraysRespectsCollation) { ASSERT_BSONOBJ_EQ(actualOut, expectedOut); } -TEST(SortKeyGeneratorTest, SortKeyGenerationForArraysRespectsCompoundOrdering) { +TEST(SortKeyGeneratorStageTest, SortKeyGenerationForArraysRespectsCompoundOrdering) { BSONObj actualOut = extractSortKey("{'a.b': 1, 'a.c': -1}", "{_id: 0, a: [{b: 1, c: 0}, {b: 0, c: 3}, {b: 0, c: 1}]}", nullptr); |