From ef408a20e6fd71ffc97f3602dd65ef5b03de6a45 Mon Sep 17 00:00:00 2001 From: Tess Avitabile Date: Tue, 1 Aug 2017 15:32:03 -0400 Subject: SERVER-30189 Reduce calls to allocator for large $in expressions (cherry picked from commit 50a1afafc816097ed57804ff7033dffd85dbe160) --- src/mongo/bson/bson_comparator_interface_base.h | 11 ++++ src/mongo/bson/bsonelement_comparator_interface.h | 13 +++++ src/mongo/bson/bsonobj.cpp | 1 + src/mongo/bson/bsonobj.h | 4 -- src/mongo/db/bson/dotted_path_support.cpp | 2 +- src/mongo/db/bson/dotted_path_support.h | 3 +- src/mongo/db/index/expression_keys_private.cpp | 5 +- src/mongo/db/matcher/expression_leaf.cpp | 36 +++++++------ src/mongo/db/matcher/expression_leaf.h | 22 +++++--- src/mongo/db/matcher/expression_leaf_test.cpp | 66 ++++++++++++++--------- src/mongo/db/matcher/expression_parser.cpp | 7 ++- src/mongo/db/query/canonical_query_test.cpp | 3 +- 12 files changed, 110 insertions(+), 63 deletions(-) diff --git a/src/mongo/bson/bson_comparator_interface_base.h b/src/mongo/bson/bson_comparator_interface_base.h index f70bec7b872..29d80916f72 100644 --- a/src/mongo/bson/bson_comparator_interface_base.h +++ b/src/mongo/bson/bson_comparator_interface_base.h @@ -28,9 +28,11 @@ #pragma once +#include #include #include #include +#include #include "mongo/base/disallow_copying.h" #include "mongo/base/string_data_comparator_interface.h" @@ -51,6 +53,9 @@ class BSONComparatorInterfaceBase { MONGO_DISALLOW_COPYING(BSONComparatorInterfaceBase); public: + BSONComparatorInterfaceBase(BSONComparatorInterfaceBase&& other) = default; + BSONComparatorInterfaceBase& operator=(BSONComparatorInterfaceBase&& other) = default; + /** * A deferred comparison between two objects of type T, which can be converted into a boolean * via the evaluate() method. @@ -122,6 +127,8 @@ public: using Set = std::set; + using FlatSet = boost::container::flat_set; + using UnorderedSet = stdx::unordered_set; template @@ -207,6 +214,10 @@ protected: return Set(init, LessThan(this)); } + FlatSet makeFlatSet(const std::vector& elements) const { + return FlatSet(elements.begin(), elements.end(), LessThan(this)); + } + UnorderedSet makeUnorderedSet(std::initializer_list init = {}) const { return UnorderedSet(init, 0, Hasher(this), EqualTo(this)); } diff --git a/src/mongo/bson/bsonelement_comparator_interface.h b/src/mongo/bson/bsonelement_comparator_interface.h index c6223a2d352..d69397a68db 100644 --- a/src/mongo/bson/bsonelement_comparator_interface.h +++ b/src/mongo/bson/bsonelement_comparator_interface.h @@ -33,6 +33,9 @@ namespace mongo { +typedef std::set BSONElementSet; +typedef std::multiset BSONElementMultiSet; + /** * A BSONElement::ComparatorInterface is an abstract class for comparing BSONElement objects. Usage * for comparing two BSON elements, 'lhs' and 'rhs', where 'comparator' is an instance of a class @@ -60,6 +63,14 @@ public: return makeSet(init); } + /** + * Constructs a BSONEltFlatSet whose equivalence classes are given by this comparator. This + * comparator must outlive the returned set. + */ + FlatSet makeBSONEltFlatSet(const std::vector& elements) const { + return makeFlatSet(elements); + } + /** * Constructs a BSONEltUnorderedSet whose equivalence classes are given by this * comparator. This comparator must outlive the returned set. @@ -91,6 +102,8 @@ public: using BSONEltSet = BSONComparatorInterfaceBase::Set; +using BSONEltFlatSet = BSONComparatorInterfaceBase::FlatSet; + using BSONEltUnorderedSet = BSONComparatorInterfaceBase::UnorderedSet; template diff --git a/src/mongo/bson/bsonobj.cpp b/src/mongo/bson/bsonobj.cpp index d9af2c706a9..bc0045f5681 100644 --- a/src/mongo/bson/bsonobj.cpp +++ b/src/mongo/bson/bsonobj.cpp @@ -33,6 +33,7 @@ #include "mongo/base/data_range.h" #include "mongo/bson/bson_validate.h" +#include "mongo/bson/bsonelement_comparator_interface.h" #include "mongo/db/json.h" #include "mongo/util/allocator.h" #include "mongo/util/hex.h" diff --git a/src/mongo/bson/bsonobj.h b/src/mongo/bson/bsonobj.h index 22ab6ff182f..0036fea6518 100644 --- a/src/mongo/bson/bsonobj.h +++ b/src/mongo/bson/bsonobj.h @@ -42,7 +42,6 @@ #include "mongo/base/string_data_comparator_interface.h" #include "mongo/bson/bson_comparator_interface_base.h" #include "mongo/bson/bsonelement.h" -#include "mongo/bson/bsonelement_comparator_interface.h" #include "mongo/bson/bsontypes.h" #include "mongo/bson/oid.h" #include "mongo/bson/timestamp.h" @@ -53,9 +52,6 @@ namespace mongo { -typedef std::set BSONElementSet; -typedef std::multiset BSONElementMSet; - /** C++ representation of a "BSON" object -- that is, an extended JSON-style object in a binary representation. diff --git a/src/mongo/db/bson/dotted_path_support.cpp b/src/mongo/db/bson/dotted_path_support.cpp index 1a9fda88501..31a92a5c54c 100644 --- a/src/mongo/db/bson/dotted_path_support.cpp +++ b/src/mongo/db/bson/dotted_path_support.cpp @@ -176,7 +176,7 @@ void extractAllElementsAlongPath(const BSONObj& obj, void extractAllElementsAlongPath(const BSONObj& obj, StringData path, - BSONElementMSet& elements, + BSONElementMultiSet& elements, bool expandArrayOnTrailingField, std::set* arrayComponents) { const size_t initialDepth = 0; diff --git a/src/mongo/db/bson/dotted_path_support.h b/src/mongo/db/bson/dotted_path_support.h index c9c6676c75a..0a3d072c56a 100644 --- a/src/mongo/db/bson/dotted_path_support.h +++ b/src/mongo/db/bson/dotted_path_support.h @@ -31,6 +31,7 @@ #include #include +#include "mongo/bson/bsonelement_comparator_interface.h" #include "mongo/bson/bsonobj.h" namespace mongo { @@ -104,7 +105,7 @@ void extractAllElementsAlongPath(const BSONObj& obj, void extractAllElementsAlongPath(const BSONObj& obj, StringData path, - BSONElementMSet& elements, + BSONElementMultiSet& elements, bool expandArrayOnTrailingField = true, std::set* arrayComponents = nullptr); diff --git a/src/mongo/db/index/expression_keys_private.cpp b/src/mongo/db/index/expression_keys_private.cpp index d58c5ce1dc1..9fe3365f393 100644 --- a/src/mongo/db/index/expression_keys_private.cpp +++ b/src/mongo/db/index/expression_keys_private.cpp @@ -32,6 +32,7 @@ #include +#include "mongo/bson/bsonelement_comparator_interface.h" #include "mongo/bson/simple_bsonobj_comparator.h" #include "mongo/db/bson/dotted_path_support.h" #include "mongo/db/field_ref.h" @@ -247,7 +248,7 @@ void ExpressionKeysPrivate::get2DKeys(const BSONObj& obj, const TwoDIndexingParams& params, BSONObjSet* keys, std::vector* locs) { - BSONElementMSet bSet; + BSONElementMultiSet bSet; // Get all the nested location fields, but don't return individual elements from // the last array, if it exists. @@ -256,7 +257,7 @@ void ExpressionKeysPrivate::get2DKeys(const BSONObj& obj, if (bSet.empty()) return; - for (BSONElementMSet::iterator setI = bSet.begin(); setI != bSet.end(); ++setI) { + for (BSONElementMultiSet::iterator setI = bSet.begin(); setI != bSet.end(); ++setI) { BSONElement geo = *setI; if (geo.eoo() || !geo.isABSONObj()) diff --git a/src/mongo/db/matcher/expression_leaf.cpp b/src/mongo/db/matcher/expression_leaf.cpp index d562bff141e..33356317816 100644 --- a/src/mongo/db/matcher/expression_leaf.cpp +++ b/src/mongo/db/matcher/expression_leaf.cpp @@ -656,29 +656,31 @@ bool InMatchExpression::equivalent(const MatchExpression* other) const { void InMatchExpression::_doSetCollator(const CollatorInterface* collator) { _collator = collator; + _eltCmp = BSONElementComparator(BSONElementComparator::FieldNamesMode::kIgnore, _collator); // We need to re-compute '_equalitySet', since our set comparator has changed. - BSONElementSet equalitiesWithNewComparator( - _originalEqualityVector.begin(), _originalEqualityVector.end(), collator); - _equalitySet = std::move(equalitiesWithNewComparator); + _equalitySet = _eltCmp.makeBSONEltFlatSet(_originalEqualityVector); } -Status InMatchExpression::addEquality(const BSONElement& elt) { - if (elt.type() == BSONType::RegEx) { - return Status(ErrorCodes::BadValue, "InMatchExpression equality cannot be a regex"); - } - if (elt.type() == BSONType::Undefined) { - return Status(ErrorCodes::BadValue, "InMatchExpression equality cannot be undefined"); - } +Status InMatchExpression::setEqualities(std::vector equalities) { + for (auto&& equality : equalities) { + if (equality.type() == BSONType::RegEx) { + return Status(ErrorCodes::BadValue, "InMatchExpression equality cannot be a regex"); + } + if (equality.type() == BSONType::Undefined) { + return Status(ErrorCodes::BadValue, "InMatchExpression equality cannot be undefined"); + } - if (elt.type() == BSONType::jstNULL) { - _hasNull = true; - } - if (elt.type() == BSONType::Array && elt.Obj().isEmpty()) { - _hasEmptyArray = true; + if (equality.type() == BSONType::jstNULL) { + _hasNull = true; + } else if (equality.type() == BSONType::Array && equality.Obj().isEmpty()) { + _hasEmptyArray = true; + } } - _equalitySet.insert(elt); - _originalEqualityVector.push_back(elt); + _originalEqualityVector = std::move(equalities); + + _equalitySet = _eltCmp.makeBSONEltFlatSet(_originalEqualityVector); + return Status::OK(); } diff --git a/src/mongo/db/matcher/expression_leaf.h b/src/mongo/db/matcher/expression_leaf.h index 128d3070737..5761f2ca2a2 100644 --- a/src/mongo/db/matcher/expression_leaf.h +++ b/src/mongo/db/matcher/expression_leaf.h @@ -30,9 +30,11 @@ #pragma once +#include "mongo/bson/bsonelement_comparator.h" #include "mongo/bson/bsonmisc.h" #include "mongo/bson/bsonobj.h" #include "mongo/db/matcher/expression.h" +#include "mongo/db/query/collation/collator_interface.h" #include "mongo/stdx/memory.h" #include "mongo/stdx/unordered_map.h" @@ -330,7 +332,10 @@ public: */ class InMatchExpression : public LeafMatchExpression { public: - InMatchExpression() : LeafMatchExpression(MATCH_IN) {} + InMatchExpression() + : LeafMatchExpression(MATCH_IN), + _eltCmp(BSONElementComparator::FieldNamesMode::kIgnore, _collator), + _equalitySet(_eltCmp.makeBSONEltFlatSet(_originalEqualityVector)) {} Status init(StringData path); @@ -349,11 +354,11 @@ public: */ virtual void _doSetCollator(const CollatorInterface* collator); - Status addEquality(const BSONElement& elt); + Status setEqualities(std::vector equalities); Status addRegex(std::unique_ptr expr); - const BSONElementSet& getEqualities() const { + const BSONEltFlatSet& getEqualities() const { return _equalitySet; } @@ -380,17 +385,20 @@ private: // Whether or not '_equalities' has an empty array element in it. bool _hasEmptyArray = false; - // Collator used to compare elements. By default, simple binary comparison will be used. + // Collator used to construct '_eltCmp'; const CollatorInterface* _collator = nullptr; - // Set of equality elements associated with this expression. '_collator' is used as a comparator - // for this set. - BSONElementSet _equalitySet; + // Comparator used to compare elements. By default, simple binary comparison will be used. + BSONElementComparator _eltCmp; // Original container of equality elements, including duplicates. Needed for re-computing // '_equalitySet' in case '_collator' changes after elements have been added. std::vector _originalEqualityVector; + // Set of equality elements associated with this expression. '_eltCmp' is used as a comparator + // for this set. + BSONEltFlatSet _equalitySet; + // Container of regex elements this object owns. std::vector> _regexes; }; diff --git a/src/mongo/db/matcher/expression_leaf_test.cpp b/src/mongo/db/matcher/expression_leaf_test.cpp index 293ed59c85c..1c3f7795a95 100644 --- a/src/mongo/db/matcher/expression_leaf_test.cpp +++ b/src/mongo/db/matcher/expression_leaf_test.cpp @@ -1501,7 +1501,8 @@ TEST(InMatchExpression, MatchesElementSingle) { BSONObj match = BSON("a" << 1); BSONObj notMatch = BSON("a" << 2); InMatchExpression in; - in.addEquality(operand.firstElement()); + std::vector equalities{operand.firstElement()}; + ASSERT_OK(in.setEqualities(std::move(equalities))); ASSERT(in.matchesSingleElement(match["a"])); ASSERT(!in.matchesSingleElement(notMatch["a"])); } @@ -1519,10 +1520,8 @@ TEST(InMatchExpression, MatchesEmpty) { TEST(InMatchExpression, MatchesElementMultiple) { BSONObj operand = BSON_ARRAY(1 << "r" << true << 1); InMatchExpression in; - in.addEquality(operand[0]); - in.addEquality(operand[1]); - in.addEquality(operand[2]); - in.addEquality(operand[3]); + std::vector equalities{operand[0], operand[1], operand[2], operand[3]}; + ASSERT_OK(in.setEqualities(std::move(equalities))); BSONObj matchFirst = BSON("a" << 1); BSONObj matchSecond = BSON("a" @@ -1540,7 +1539,8 @@ TEST(InMatchExpression, MatchesScalar) { BSONObj operand = BSON_ARRAY(5); InMatchExpression in; in.init("a"); - in.addEquality(operand.firstElement()); + std::vector equalities{operand.firstElement()}; + ASSERT_OK(in.setEqualities(std::move(equalities))); ASSERT(in.matchesBSON(BSON("a" << 5.0), NULL)); ASSERT(!in.matchesBSON(BSON("a" << 4), NULL)); @@ -1550,7 +1550,8 @@ TEST(InMatchExpression, MatchesArrayValue) { BSONObj operand = BSON_ARRAY(5); InMatchExpression in; in.init("a"); - in.addEquality(operand.firstElement()); + std::vector equalities{operand.firstElement()}; + ASSERT_OK(in.setEqualities(std::move(equalities))); ASSERT(in.matchesBSON(BSON("a" << BSON_ARRAY(5.0 << 6)), NULL)); ASSERT(!in.matchesBSON(BSON("a" << BSON_ARRAY(6 << 7)), NULL)); @@ -1562,7 +1563,8 @@ TEST(InMatchExpression, MatchesNull) { InMatchExpression in; in.init("a"); - in.addEquality(operand.firstElement()); + std::vector equalities{operand.firstElement()}; + ASSERT_OK(in.setEqualities(std::move(equalities))); ASSERT(in.matchesBSON(BSONObj(), NULL)); ASSERT(in.matchesBSON(BSON("a" << BSONNULL), NULL)); @@ -1576,15 +1578,16 @@ TEST(InMatchExpression, MatchesUndefined) { InMatchExpression in; in.init("a"); - Status s = in.addEquality(operand.firstElement()); - ASSERT_NOT_OK(s); + std::vector equalities{operand.firstElement()}; + ASSERT_NOT_OK(in.setEqualities(std::move(equalities))); } TEST(InMatchExpression, MatchesMinKey) { BSONObj operand = BSON_ARRAY(MinKey); InMatchExpression in; in.init("a"); - in.addEquality(operand.firstElement()); + std::vector equalities{operand.firstElement()}; + ASSERT_OK(in.setEqualities(std::move(equalities))); ASSERT(in.matchesBSON(BSON("a" << MinKey), NULL)); ASSERT(!in.matchesBSON(BSON("a" << MaxKey), NULL)); @@ -1595,7 +1598,8 @@ TEST(InMatchExpression, MatchesMaxKey) { BSONObj operand = BSON_ARRAY(MaxKey); InMatchExpression in; in.init("a"); - in.addEquality(operand.firstElement()); + std::vector equalities{operand.firstElement()}; + ASSERT_OK(in.setEqualities(std::move(equalities))); ASSERT(in.matchesBSON(BSON("a" << MaxKey), NULL)); ASSERT(!in.matchesBSON(BSON("a" << MinKey), NULL)); @@ -1606,9 +1610,8 @@ TEST(InMatchExpression, MatchesFullArray) { BSONObj operand = BSON_ARRAY(BSON_ARRAY(1 << 2) << 4 << 5); InMatchExpression in; in.init("a"); - in.addEquality(operand[0]); - in.addEquality(operand[1]); - in.addEquality(operand[2]); + std::vector equalities{operand[0], operand[1], operand[2]}; + ASSERT_OK(in.setEqualities(std::move(equalities))); ASSERT(in.matchesBSON(BSON("a" << BSON_ARRAY(1 << 2)), NULL)); ASSERT(!in.matchesBSON(BSON("a" << BSON_ARRAY(1 << 2 << 3)), NULL)); @@ -1620,8 +1623,8 @@ TEST(InMatchExpression, ElemMatchKey) { BSONObj operand = BSON_ARRAY(5 << 2); InMatchExpression in; in.init("a"); - in.addEquality(operand[0]); - in.addEquality(operand[1]); + std::vector equalities{operand[0], operand[1]}; + ASSERT_OK(in.setEqualities(std::move(equalities))); MatchDetails details; details.requestElemMatchKey(); @@ -1639,7 +1642,8 @@ TEST(InMatchExpression, InMatchExpressionsWithDifferentNumbersOfElementsAreUnequ << "string"); InMatchExpression eq1; InMatchExpression eq2; - eq1.addEquality(obj.firstElement()); + std::vector equalities{obj.firstElement()}; + ASSERT_OK(eq1.setEqualities(std::move(equalities))); ASSERT(!eq1.equivalent(&eq2)); } @@ -1675,8 +1679,12 @@ TEST(InMatchExpression, InMatchExpressionsWithCollationEquivalentElementsAreEqua InMatchExpression eq2; eq2.setCollator(&collator2); - eq1.addEquality(obj1.firstElement()); - eq2.addEquality(obj2.firstElement()); + std::vector equalities1{obj1.firstElement()}; + ASSERT_OK(eq1.setEqualities(std::move(equalities1))); + + std::vector equalities2{obj2.firstElement()}; + ASSERT_OK(eq2.setEqualities(std::move(equalities2))); + ASSERT(eq1.equivalent(&eq2)); } @@ -1692,8 +1700,12 @@ TEST(InMatchExpression, InMatchExpressionsWithCollationNonEquivalentElementsAreU InMatchExpression eq2; eq2.setCollator(&collator2); - eq1.addEquality(obj1.firstElement()); - eq2.addEquality(obj2.firstElement()); + std::vector equalities1{obj1.firstElement()}; + ASSERT_OK(eq1.setEqualities(std::move(equalities1))); + + std::vector equalities2{obj2.firstElement()}; + ASSERT_OK(eq2.setEqualities(std::move(equalities2))); + ASSERT(!eq1.equivalent(&eq2)); } @@ -1702,7 +1714,8 @@ TEST(InMatchExpression, StringMatchingWithNullCollatorUsesBinaryComparison) { BSONObj notMatch = BSON("a" << "string2"); InMatchExpression in; - in.addEquality(operand.firstElement()); + std::vector equalities{operand.firstElement()}; + ASSERT_OK(in.setEqualities(std::move(equalities))); ASSERT(!in.matchesSingleElement(notMatch["a"])); } @@ -1713,7 +1726,8 @@ TEST(InMatchExpression, StringMatchingRespectsCollation) { CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kAlwaysEqual); InMatchExpression in; in.setCollator(&collator); - in.addEquality(operand.firstElement()); + std::vector equalities{operand.firstElement()}; + ASSERT_OK(in.setEqualities(std::move(equalities))); ASSERT(in.matchesSingleElement(match["a"])); } @@ -1726,8 +1740,8 @@ TEST(InMatchExpression, ChangingCollationAfterAddingEqualitiesPreservesEqualitie CollatorInterfaceMock collatorReverseString(CollatorInterfaceMock::MockType::kReverseString); InMatchExpression in; in.setCollator(&collatorAlwaysEqual); - in.addEquality(obj1.firstElement()); - in.addEquality(obj2.firstElement()); + std::vector equalities{obj1.firstElement(), obj2.firstElement()}; + ASSERT_OK(in.setEqualities(std::move(equalities))); ASSERT(in.getEqualities().size() == 1); in.setCollator(&collatorReverseString); ASSERT(in.getEqualities().size() == 2); diff --git a/src/mongo/db/matcher/expression_parser.cpp b/src/mongo/db/matcher/expression_parser.cpp index 2d1399edcf9..9bfc2d75f3e 100644 --- a/src/mongo/db/matcher/expression_parser.cpp +++ b/src/mongo/db/matcher/expression_parser.cpp @@ -588,6 +588,7 @@ Status MatchExpressionParser::_parseInExpression(InMatchExpression* inExpression const BSONObj& theArray, const CollatorInterface* collator) { inExpression->setCollator(collator); + std::vector equalities; BSONObjIterator i(theArray); while (i.more()) { BSONElement e = i.next(); @@ -606,12 +607,10 @@ Status MatchExpressionParser::_parseInExpression(InMatchExpression* inExpression if (!s.isOK()) return s; } else { - Status s = inExpression->addEquality(e); - if (!s.isOK()) - return s; + equalities.push_back(e); } } - return Status::OK(); + return inExpression->setEqualities(std::move(equalities)); } StatusWithMatchExpression MatchExpressionParser::_parseType(const char* name, diff --git a/src/mongo/db/query/canonical_query_test.cpp b/src/mongo/db/query/canonical_query_test.cpp index c7a1f2a05a2..31ba2737ee9 100644 --- a/src/mongo/db/query/canonical_query_test.cpp +++ b/src/mongo/db/query/canonical_query_test.cpp @@ -548,7 +548,8 @@ TEST(CanonicalQueryTest, NormalizeWithInPreservesCollator) { BSONObj obj = fromjson("{'': 'string'}"); auto inMatchExpression = stdx::make_unique(); inMatchExpression->setCollator(&collator); - inMatchExpression->addEquality(obj.firstElement()); + std::vector equalities{obj.firstElement()}; + ASSERT_OK(inMatchExpression->setEqualities(std::move(equalities))); unique_ptr matchExpression( CanonicalQuery::normalizeTree(inMatchExpression.release())); ASSERT(matchExpression->matchType() == MatchExpression::MatchType::EQ); -- cgit v1.2.1