diff options
Diffstat (limited to 'src/mongo/db/pipeline')
45 files changed, 11350 insertions, 11250 deletions
diff --git a/src/mongo/db/pipeline/accumulator.h b/src/mongo/db/pipeline/accumulator.h index 7fa6cd1a8a9..fcb4fe922e1 100644 --- a/src/mongo/db/pipeline/accumulator.h +++ b/src/mongo/db/pipeline/accumulator.h @@ -38,182 +38,182 @@ #include "mongo/db/pipeline/value.h" namespace mongo { - class Accumulator : public RefCountable { - public: - Accumulator() = default; - - /** Process input and update internal state. - * merging should be true when processing outputs from getValue(true). - */ - void process(const Value& input, bool merging) { - processInternal(input, merging); - } - - /** Marks the end of the evaluate() phase and return accumulated result. - * toBeMerged should be true when the outputs will be merged by process(). - */ - virtual Value getValue(bool toBeMerged) const = 0; - - /// The name of the op as used in a serialization of the pipeline. - virtual const char* getOpName() const = 0; - - int memUsageForSorter() const { - dassert(_memUsageBytes != 0); // This would mean subclass didn't set it - return _memUsageBytes; - } - - /// Reset this accumulator to a fresh state ready to receive input. - virtual void reset() = 0; - - protected: - /// Update subclass's internal state based on input - virtual void processInternal(const Value& input, bool merging) = 0; - - /// subclasses are expected to update this as necessary - int _memUsageBytes = 0; - }; +class Accumulator : public RefCountable { +public: + Accumulator() = default; + /** Process input and update internal state. + * merging should be true when processing outputs from getValue(true). + */ + void process(const Value& input, bool merging) { + processInternal(input, merging); + } - class AccumulatorAddToSet final : public Accumulator { - public: - AccumulatorAddToSet(); + /** Marks the end of the evaluate() phase and return accumulated result. + * toBeMerged should be true when the outputs will be merged by process(). + */ + virtual Value getValue(bool toBeMerged) const = 0; - void processInternal(const Value& input, bool merging) final; - Value getValue(bool toBeMerged) const final; - const char* getOpName() const final; - void reset() final; + /// The name of the op as used in a serialization of the pipeline. + virtual const char* getOpName() const = 0; - static boost::intrusive_ptr<Accumulator> create(); + int memUsageForSorter() const { + dassert(_memUsageBytes != 0); // This would mean subclass didn't set it + return _memUsageBytes; + } - private: - typedef boost::unordered_set<Value, Value::Hash> SetType; - SetType set; - }; + /// Reset this accumulator to a fresh state ready to receive input. + virtual void reset() = 0; +protected: + /// Update subclass's internal state based on input + virtual void processInternal(const Value& input, bool merging) = 0; - class AccumulatorFirst final : public Accumulator { - public: - AccumulatorFirst(); + /// subclasses are expected to update this as necessary + int _memUsageBytes = 0; +}; - void processInternal(const Value& input, bool merging) final; - Value getValue(bool toBeMerged) const final; - const char* getOpName() const final; - void reset() final; - static boost::intrusive_ptr<Accumulator> create(); +class AccumulatorAddToSet final : public Accumulator { +public: + AccumulatorAddToSet(); - private: - bool _haveFirst; - Value _first; - }; + void processInternal(const Value& input, bool merging) final; + Value getValue(bool toBeMerged) const final; + const char* getOpName() const final; + void reset() final; + static boost::intrusive_ptr<Accumulator> create(); - class AccumulatorLast final : public Accumulator { - public: - AccumulatorLast(); +private: + typedef boost::unordered_set<Value, Value::Hash> SetType; + SetType set; +}; - void processInternal(const Value& input, bool merging) final; - Value getValue(bool toBeMerged) const final; - const char* getOpName() const final; - void reset() final; - static boost::intrusive_ptr<Accumulator> create(); +class AccumulatorFirst final : public Accumulator { +public: + AccumulatorFirst(); - private: - Value _last; - }; + void processInternal(const Value& input, bool merging) final; + Value getValue(bool toBeMerged) const final; + const char* getOpName() const final; + void reset() final; + static boost::intrusive_ptr<Accumulator> create(); - class AccumulatorSum final : public Accumulator { - public: - AccumulatorSum(); +private: + bool _haveFirst; + Value _first; +}; - void processInternal(const Value& input, bool merging) final; - Value getValue(bool toBeMerged) const final; - const char* getOpName() const final; - void reset() final; - static boost::intrusive_ptr<Accumulator> create(); +class AccumulatorLast final : public Accumulator { +public: + AccumulatorLast(); + + void processInternal(const Value& input, bool merging) final; + Value getValue(bool toBeMerged) const final; + const char* getOpName() const final; + void reset() final; + + static boost::intrusive_ptr<Accumulator> create(); + +private: + Value _last; +}; - private: - BSONType totalType; - long long longTotal; - double doubleTotal; - }; +class AccumulatorSum final : public Accumulator { +public: + AccumulatorSum(); - class AccumulatorMinMax final : public Accumulator { - public: - enum Sense : int { - MIN = 1, - MAX = -1, // Used to "scale" comparison. - }; + void processInternal(const Value& input, bool merging) final; + Value getValue(bool toBeMerged) const final; + const char* getOpName() const final; + void reset() final; - explicit AccumulatorMinMax(Sense sense); + static boost::intrusive_ptr<Accumulator> create(); - void processInternal(const Value& input, bool merging) final; - Value getValue(bool toBeMerged) const final; - const char* getOpName() const final; - void reset() final; +private: + BSONType totalType; + long long longTotal; + double doubleTotal; +}; - static boost::intrusive_ptr<Accumulator> createMin(); - static boost::intrusive_ptr<Accumulator> createMax(); - private: - Value _val; - const Sense _sense; +class AccumulatorMinMax final : public Accumulator { +public: + enum Sense : int { + MIN = 1, + MAX = -1, // Used to "scale" comparison. }; + explicit AccumulatorMinMax(Sense sense); - class AccumulatorPush final : public Accumulator { - public: - AccumulatorPush(); + void processInternal(const Value& input, bool merging) final; + Value getValue(bool toBeMerged) const final; + const char* getOpName() const final; + void reset() final; - void processInternal(const Value& input, bool merging) final; - Value getValue(bool toBeMerged) const final; - const char* getOpName() const final; - void reset() final; + static boost::intrusive_ptr<Accumulator> createMin(); + static boost::intrusive_ptr<Accumulator> createMax(); - static boost::intrusive_ptr<Accumulator> create(); +private: + Value _val; + const Sense _sense; +}; - private: - std::vector<Value> vpValue; - }; +class AccumulatorPush final : public Accumulator { +public: + AccumulatorPush(); - class AccumulatorAvg final : public Accumulator { - public: - AccumulatorAvg(); + void processInternal(const Value& input, bool merging) final; + Value getValue(bool toBeMerged) const final; + const char* getOpName() const final; + void reset() final; - void processInternal(const Value& input, bool merging) final; - Value getValue(bool toBeMerged) const final; - const char* getOpName() const final; - void reset() final; + static boost::intrusive_ptr<Accumulator> create(); - static boost::intrusive_ptr<Accumulator> create(); +private: + std::vector<Value> vpValue; +}; - private: - double _total; - long long _count; - }; +class AccumulatorAvg final : public Accumulator { +public: + AccumulatorAvg(); - class AccumulatorStdDev final : public Accumulator { - public: - explicit AccumulatorStdDev(bool isSamp); + void processInternal(const Value& input, bool merging) final; + Value getValue(bool toBeMerged) const final; + const char* getOpName() const final; + void reset() final; - void processInternal(const Value& input, bool merging) final; - Value getValue(bool toBeMerged) const final; - const char* getOpName() const final; - void reset() final; + static boost::intrusive_ptr<Accumulator> create(); - static boost::intrusive_ptr<Accumulator> createSamp(); - static boost::intrusive_ptr<Accumulator> createPop(); +private: + double _total; + long long _count; +}; - private: - const bool _isSamp; - long long _count; - double _mean; - double _m2; // Running sum of squares of delta from mean. Named to match algorithm. - }; + +class AccumulatorStdDev final : public Accumulator { +public: + explicit AccumulatorStdDev(bool isSamp); + + void processInternal(const Value& input, bool merging) final; + Value getValue(bool toBeMerged) const final; + const char* getOpName() const final; + void reset() final; + + static boost::intrusive_ptr<Accumulator> createSamp(); + static boost::intrusive_ptr<Accumulator> createPop(); + +private: + const bool _isSamp; + long long _count; + double _mean; + double _m2; // Running sum of squares of delta from mean. Named to match algorithm. +}; } diff --git a/src/mongo/db/pipeline/accumulator_add_to_set.cpp b/src/mongo/db/pipeline/accumulator_add_to_set.cpp index bd151964ffa..bf2bc183afd 100644 --- a/src/mongo/db/pipeline/accumulator_add_to_set.cpp +++ b/src/mongo/db/pipeline/accumulator_add_to_set.cpp @@ -34,53 +34,52 @@ namespace mongo { - using boost::intrusive_ptr; - using std::vector; +using boost::intrusive_ptr; +using std::vector; - void AccumulatorAddToSet::processInternal(const Value& input, bool merging) { - if (!merging) { - if (!input.missing()) { - bool inserted = set.insert(input).second; - if (inserted) { - _memUsageBytes += input.getApproximateSize(); - } +void AccumulatorAddToSet::processInternal(const Value& input, bool merging) { + if (!merging) { + if (!input.missing()) { + bool inserted = set.insert(input).second; + if (inserted) { + _memUsageBytes += input.getApproximateSize(); } } - else { - // If we're merging, we need to take apart the arrays we - // receive and put their elements into the array we are collecting. - // If we didn't, then we'd get an array of arrays, with one array - // from each merge source. - verify(input.getType() == Array); - - const vector<Value>& array = input.getArray(); - for (size_t i=0; i < array.size(); i++) { - bool inserted = set.insert(array[i]).second; - if (inserted) { - _memUsageBytes += array[i].getApproximateSize(); - } + } else { + // If we're merging, we need to take apart the arrays we + // receive and put their elements into the array we are collecting. + // If we didn't, then we'd get an array of arrays, with one array + // from each merge source. + verify(input.getType() == Array); + + const vector<Value>& array = input.getArray(); + for (size_t i = 0; i < array.size(); i++) { + bool inserted = set.insert(array[i]).second; + if (inserted) { + _memUsageBytes += array[i].getApproximateSize(); } } } +} - Value AccumulatorAddToSet::getValue(bool toBeMerged) const { - return Value(vector<Value>(set.begin(), set.end())); - } +Value AccumulatorAddToSet::getValue(bool toBeMerged) const { + return Value(vector<Value>(set.begin(), set.end())); +} - AccumulatorAddToSet::AccumulatorAddToSet() { - _memUsageBytes = sizeof(*this); - } +AccumulatorAddToSet::AccumulatorAddToSet() { + _memUsageBytes = sizeof(*this); +} - void AccumulatorAddToSet::reset() { - SetType().swap(set); - _memUsageBytes = sizeof(*this); - } +void AccumulatorAddToSet::reset() { + SetType().swap(set); + _memUsageBytes = sizeof(*this); +} - intrusive_ptr<Accumulator> AccumulatorAddToSet::create() { - return new AccumulatorAddToSet(); - } +intrusive_ptr<Accumulator> AccumulatorAddToSet::create() { + return new AccumulatorAddToSet(); +} - const char *AccumulatorAddToSet::getOpName() const { - return "$addToSet"; - } +const char* AccumulatorAddToSet::getOpName() const { + return "$addToSet"; +} } diff --git a/src/mongo/db/pipeline/accumulator_avg.cpp b/src/mongo/db/pipeline/accumulator_avg.cpp index 6378dac13e8..ad027e7709d 100644 --- a/src/mongo/db/pipeline/accumulator_avg.cpp +++ b/src/mongo/db/pipeline/accumulator_avg.cpp @@ -35,62 +35,56 @@ namespace mongo { - using boost::intrusive_ptr; +using boost::intrusive_ptr; namespace { - const char subTotalName[] = "subTotal"; - const char countName[] = "count"; +const char subTotalName[] = "subTotal"; +const char countName[] = "count"; } - void AccumulatorAvg::processInternal(const Value& input, bool merging) { - if (!merging) { - // non numeric types have no impact on average - if (!input.numeric()) - return; +void AccumulatorAvg::processInternal(const Value& input, bool merging) { + if (!merging) { + // non numeric types have no impact on average + if (!input.numeric()) + return; - _total += input.getDouble(); - _count += 1; - } - else { - // We expect an object that contains both a subtotal and a count. - // This is what getValue(true) produced below. - verify(input.getType() == Object); - _total += input[subTotalName].getDouble(); - _count += input[countName].getLong(); - } + _total += input.getDouble(); + _count += 1; + } else { + // We expect an object that contains both a subtotal and a count. + // This is what getValue(true) produced below. + verify(input.getType() == Object); + _total += input[subTotalName].getDouble(); + _count += input[countName].getLong(); } +} - intrusive_ptr<Accumulator> AccumulatorAvg::create() { - return new AccumulatorAvg(); - } +intrusive_ptr<Accumulator> AccumulatorAvg::create() { + return new AccumulatorAvg(); +} - Value AccumulatorAvg::getValue(bool toBeMerged) const { - if (!toBeMerged) { - if (_count == 0) - return Value(0.0); +Value AccumulatorAvg::getValue(bool toBeMerged) const { + if (!toBeMerged) { + if (_count == 0) + return Value(0.0); - return Value(_total / static_cast<double>(_count)); - } - else { - return Value(DOC(subTotalName << _total - << countName << _count)); - } + return Value(_total / static_cast<double>(_count)); + } else { + return Value(DOC(subTotalName << _total << countName << _count)); } +} - AccumulatorAvg::AccumulatorAvg() - : _total(0) - , _count(0) - { - // This is a fixed size Accumulator so we never need to update this - _memUsageBytes = sizeof(*this); - } +AccumulatorAvg::AccumulatorAvg() : _total(0), _count(0) { + // This is a fixed size Accumulator so we never need to update this + _memUsageBytes = sizeof(*this); +} - void AccumulatorAvg::reset() { - _total = 0; - _count = 0; - } +void AccumulatorAvg::reset() { + _total = 0; + _count = 0; +} - const char *AccumulatorAvg::getOpName() const { - return "$avg"; - } +const char* AccumulatorAvg::getOpName() const { + return "$avg"; +} } diff --git a/src/mongo/db/pipeline/accumulator_first.cpp b/src/mongo/db/pipeline/accumulator_first.cpp index ed49fe163f1..9425199793f 100644 --- a/src/mongo/db/pipeline/accumulator_first.cpp +++ b/src/mongo/db/pipeline/accumulator_first.cpp @@ -33,40 +33,38 @@ namespace mongo { - using boost::intrusive_ptr; +using boost::intrusive_ptr; - void AccumulatorFirst::processInternal(const Value& input, bool merging) { - /* only remember the first value seen */ - if (!_haveFirst) { - // can't use pValue.missing() since we want the first value even if missing - _haveFirst = true; - _first = input; - _memUsageBytes = sizeof(*this) + input.getApproximateSize() - sizeof(Value); - } +void AccumulatorFirst::processInternal(const Value& input, bool merging) { + /* only remember the first value seen */ + if (!_haveFirst) { + // can't use pValue.missing() since we want the first value even if missing + _haveFirst = true; + _first = input; + _memUsageBytes = sizeof(*this) + input.getApproximateSize() - sizeof(Value); } +} - Value AccumulatorFirst::getValue(bool toBeMerged) const { - return _first; - } +Value AccumulatorFirst::getValue(bool toBeMerged) const { + return _first; +} - AccumulatorFirst::AccumulatorFirst() - : _haveFirst(false) - { - _memUsageBytes = sizeof(*this); - } +AccumulatorFirst::AccumulatorFirst() : _haveFirst(false) { + _memUsageBytes = sizeof(*this); +} - void AccumulatorFirst::reset() { - _haveFirst = false; - _first = Value(); - _memUsageBytes = sizeof(*this); - } +void AccumulatorFirst::reset() { + _haveFirst = false; + _first = Value(); + _memUsageBytes = sizeof(*this); +} - intrusive_ptr<Accumulator> AccumulatorFirst::create() { - return new AccumulatorFirst(); - } +intrusive_ptr<Accumulator> AccumulatorFirst::create() { + return new AccumulatorFirst(); +} - const char *AccumulatorFirst::getOpName() const { - return "$first"; - } +const char* AccumulatorFirst::getOpName() const { + return "$first"; +} } diff --git a/src/mongo/db/pipeline/accumulator_last.cpp b/src/mongo/db/pipeline/accumulator_last.cpp index 4b24cf828b7..c9f4b487d1c 100644 --- a/src/mongo/db/pipeline/accumulator_last.cpp +++ b/src/mongo/db/pipeline/accumulator_last.cpp @@ -33,32 +33,32 @@ namespace mongo { - using boost::intrusive_ptr; +using boost::intrusive_ptr; - void AccumulatorLast::processInternal(const Value& input, bool merging) { - /* always remember the last value seen */ - _last = input; - _memUsageBytes = sizeof(*this) + _last.getApproximateSize() - sizeof(Value); - } +void AccumulatorLast::processInternal(const Value& input, bool merging) { + /* always remember the last value seen */ + _last = input; + _memUsageBytes = sizeof(*this) + _last.getApproximateSize() - sizeof(Value); +} - Value AccumulatorLast::getValue(bool toBeMerged) const { - return _last; - } +Value AccumulatorLast::getValue(bool toBeMerged) const { + return _last; +} - AccumulatorLast::AccumulatorLast() { - _memUsageBytes = sizeof(*this); - } +AccumulatorLast::AccumulatorLast() { + _memUsageBytes = sizeof(*this); +} - void AccumulatorLast::reset() { - _memUsageBytes = sizeof(*this); - _last = Value(); - } +void AccumulatorLast::reset() { + _memUsageBytes = sizeof(*this); + _last = Value(); +} - intrusive_ptr<Accumulator> AccumulatorLast::create() { - return new AccumulatorLast(); - } +intrusive_ptr<Accumulator> AccumulatorLast::create() { + return new AccumulatorLast(); +} - const char *AccumulatorLast::getOpName() const { - return "$last"; - } +const char* AccumulatorLast::getOpName() const { + return "$last"; +} } diff --git a/src/mongo/db/pipeline/accumulator_min_max.cpp b/src/mongo/db/pipeline/accumulator_min_max.cpp index 8f7d857de76..da4f280f797 100644 --- a/src/mongo/db/pipeline/accumulator_min_max.cpp +++ b/src/mongo/db/pipeline/accumulator_min_max.cpp @@ -33,45 +33,44 @@ namespace mongo { - using boost::intrusive_ptr; +using boost::intrusive_ptr; - void AccumulatorMinMax::processInternal(const Value& input, bool merging) { - // nullish values should have no impact on result - if (!input.nullish()) { - /* compare with the current value; swap if appropriate */ - int cmp = Value::compare(_val, input) * _sense; - if (cmp > 0 || _val.missing()) { // missing is lower than all other values - _val = input; - _memUsageBytes = sizeof(*this) + input.getApproximateSize() - sizeof(Value); - } +void AccumulatorMinMax::processInternal(const Value& input, bool merging) { + // nullish values should have no impact on result + if (!input.nullish()) { + /* compare with the current value; swap if appropriate */ + int cmp = Value::compare(_val, input) * _sense; + if (cmp > 0 || _val.missing()) { // missing is lower than all other values + _val = input; + _memUsageBytes = sizeof(*this) + input.getApproximateSize() - sizeof(Value); } } +} - Value AccumulatorMinMax::getValue(bool toBeMerged) const { - return _val; - } +Value AccumulatorMinMax::getValue(bool toBeMerged) const { + return _val; +} - AccumulatorMinMax::AccumulatorMinMax(Sense sense) : - _sense(sense) { - _memUsageBytes = sizeof(*this); - } +AccumulatorMinMax::AccumulatorMinMax(Sense sense) : _sense(sense) { + _memUsageBytes = sizeof(*this); +} - void AccumulatorMinMax::reset() { - _val = Value(); - _memUsageBytes = sizeof(*this); - } +void AccumulatorMinMax::reset() { + _val = Value(); + _memUsageBytes = sizeof(*this); +} - intrusive_ptr<Accumulator> AccumulatorMinMax::createMin() { - return new AccumulatorMinMax(Sense::MIN); - } +intrusive_ptr<Accumulator> AccumulatorMinMax::createMin() { + return new AccumulatorMinMax(Sense::MIN); +} - intrusive_ptr<Accumulator> AccumulatorMinMax::createMax() { - return new AccumulatorMinMax(Sense::MAX); - } +intrusive_ptr<Accumulator> AccumulatorMinMax::createMax() { + return new AccumulatorMinMax(Sense::MAX); +} - const char *AccumulatorMinMax::getOpName() const { - if (_sense == 1) - return "$min"; - return "$max"; - } +const char* AccumulatorMinMax::getOpName() const { + if (_sense == 1) + return "$min"; + return "$max"; +} } diff --git a/src/mongo/db/pipeline/accumulator_push.cpp b/src/mongo/db/pipeline/accumulator_push.cpp index e7a2b6b5514..b19ec08f71d 100644 --- a/src/mongo/db/pipeline/accumulator_push.cpp +++ b/src/mongo/db/pipeline/accumulator_push.cpp @@ -34,50 +34,49 @@ namespace mongo { - using boost::intrusive_ptr; - using std::vector; +using boost::intrusive_ptr; +using std::vector; - void AccumulatorPush::processInternal(const Value& input, bool merging) { - if (!merging) { - if (!input.missing()) { - vpValue.push_back(input); - _memUsageBytes += input.getApproximateSize(); - } +void AccumulatorPush::processInternal(const Value& input, bool merging) { + if (!merging) { + if (!input.missing()) { + vpValue.push_back(input); + _memUsageBytes += input.getApproximateSize(); } - else { - // If we're merging, we need to take apart the arrays we - // receive and put their elements into the array we are collecting. - // If we didn't, then we'd get an array of arrays, with one array - // from each merge source. - verify(input.getType() == Array); - - const vector<Value>& vec = input.getArray(); - vpValue.insert(vpValue.end(), vec.begin(), vec.end()); + } else { + // If we're merging, we need to take apart the arrays we + // receive and put their elements into the array we are collecting. + // If we didn't, then we'd get an array of arrays, with one array + // from each merge source. + verify(input.getType() == Array); - for (size_t i=0; i < vec.size(); i++) { - _memUsageBytes += vec[i].getApproximateSize(); - } + const vector<Value>& vec = input.getArray(); + vpValue.insert(vpValue.end(), vec.begin(), vec.end()); + + for (size_t i = 0; i < vec.size(); i++) { + _memUsageBytes += vec[i].getApproximateSize(); } } +} - Value AccumulatorPush::getValue(bool toBeMerged) const { - return Value(vpValue); - } +Value AccumulatorPush::getValue(bool toBeMerged) const { + return Value(vpValue); +} - AccumulatorPush::AccumulatorPush() { - _memUsageBytes = sizeof(*this); - } +AccumulatorPush::AccumulatorPush() { + _memUsageBytes = sizeof(*this); +} - void AccumulatorPush::reset() { - vector<Value>().swap(vpValue); - _memUsageBytes = sizeof(*this); - } +void AccumulatorPush::reset() { + vector<Value>().swap(vpValue); + _memUsageBytes = sizeof(*this); +} - intrusive_ptr<Accumulator> AccumulatorPush::create() { - return new AccumulatorPush(); - } +intrusive_ptr<Accumulator> AccumulatorPush::create() { + return new AccumulatorPush(); +} - const char *AccumulatorPush::getOpName() const { - return "$push"; - } +const char* AccumulatorPush::getOpName() const { + return "$push"; +} } diff --git a/src/mongo/db/pipeline/accumulator_std_dev.cpp b/src/mongo/db/pipeline/accumulator_std_dev.cpp index 76957c3d112..b51a21bbe6f 100644 --- a/src/mongo/db/pipeline/accumulator_std_dev.cpp +++ b/src/mongo/db/pipeline/accumulator_std_dev.cpp @@ -34,84 +34,75 @@ #include "mongo/db/pipeline/value.h" namespace mongo { - using boost::intrusive_ptr; - - void AccumulatorStdDev::processInternal(const Value& input, bool merging) { - if (!merging) { - // non numeric types have no impact on standard deviation - if (!input.numeric()) - return; - - const double val = input.getDouble(); - - // This is an implementation of the following algorithm: - // http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Online_algorithm - _count += 1; - const double delta = val - _mean; - _mean += delta / _count; - _m2 += delta * (val - _mean); - } - else { - // This is what getValue(true) produced below. - verify(input.getType() == Object); - const double m2 = input["m2"].getDouble(); - const double mean = input["mean"].getDouble(); - const long long count = input["count"].getLong(); - - if (count == 0) - return; // This partition had no data to contribute. - - // This is an implementation of the following algorithm: - // http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm - const double delta = mean - _mean; - const long long newCount = count + _count; - - _mean = ((_count * _mean) + (count * mean)) / newCount; - _m2 += m2 + (delta * delta * (double(_count) * count / newCount)); - _count = newCount; - } +using boost::intrusive_ptr; + +void AccumulatorStdDev::processInternal(const Value& input, bool merging) { + if (!merging) { + // non numeric types have no impact on standard deviation + if (!input.numeric()) + return; + + const double val = input.getDouble(); + + // This is an implementation of the following algorithm: + // http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Online_algorithm + _count += 1; + const double delta = val - _mean; + _mean += delta / _count; + _m2 += delta * (val - _mean); + } else { + // This is what getValue(true) produced below. + verify(input.getType() == Object); + const double m2 = input["m2"].getDouble(); + const double mean = input["mean"].getDouble(); + const long long count = input["count"].getLong(); + + if (count == 0) + return; // This partition had no data to contribute. + + // This is an implementation of the following algorithm: + // http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm + const double delta = mean - _mean; + const long long newCount = count + _count; + + _mean = ((_count * _mean) + (count * mean)) / newCount; + _m2 += m2 + (delta * delta * (double(_count) * count / newCount)); + _count = newCount; } +} - Value AccumulatorStdDev::getValue(bool toBeMerged) const { - if (!toBeMerged) { - const long long adjustedCount = (_isSamp ? _count - 1 : _count); - if (adjustedCount <= 0) - return Value(BSONNULL); // standard deviation not well defined in this case - - return Value(sqrt(_m2 / adjustedCount)); - } - else { - return Value(DOC("m2" << _m2 - << "mean" << _mean - << "count" << _count)); - } - } +Value AccumulatorStdDev::getValue(bool toBeMerged) const { + if (!toBeMerged) { + const long long adjustedCount = (_isSamp ? _count - 1 : _count); + if (adjustedCount <= 0) + return Value(BSONNULL); // standard deviation not well defined in this case - intrusive_ptr<Accumulator> AccumulatorStdDev::createSamp() { - return new AccumulatorStdDev(true); + return Value(sqrt(_m2 / adjustedCount)); + } else { + return Value(DOC("m2" << _m2 << "mean" << _mean << "count" << _count)); } +} - intrusive_ptr<Accumulator> AccumulatorStdDev::createPop() { - return new AccumulatorStdDev(false); - } +intrusive_ptr<Accumulator> AccumulatorStdDev::createSamp() { + return new AccumulatorStdDev(true); +} - AccumulatorStdDev::AccumulatorStdDev(bool isSamp) - : _isSamp(isSamp) - , _count(0) - , _mean(0) - , _m2(0) - { - // This is a fixed size Accumulator so we never need to update this - _memUsageBytes = sizeof(*this); - } +intrusive_ptr<Accumulator> AccumulatorStdDev::createPop() { + return new AccumulatorStdDev(false); +} - void AccumulatorStdDev::reset() { - _count = 0; - _mean = 0; - _m2 = 0; - } +AccumulatorStdDev::AccumulatorStdDev(bool isSamp) : _isSamp(isSamp), _count(0), _mean(0), _m2(0) { + // This is a fixed size Accumulator so we never need to update this + _memUsageBytes = sizeof(*this); +} - const char *AccumulatorStdDev::getOpName() const { - return (_isSamp ? "$stdDevSamp" : "$stdDevPop"); - } +void AccumulatorStdDev::reset() { + _count = 0; + _mean = 0; + _m2 = 0; +} + +const char* AccumulatorStdDev::getOpName() const { + return (_isSamp ? "$stdDevSamp" : "$stdDevPop"); +} } diff --git a/src/mongo/db/pipeline/accumulator_sum.cpp b/src/mongo/db/pipeline/accumulator_sum.cpp index 9076a324dfa..4da24904078 100644 --- a/src/mongo/db/pipeline/accumulator_sum.cpp +++ b/src/mongo/db/pipeline/accumulator_sum.cpp @@ -33,66 +33,57 @@ namespace mongo { - using boost::intrusive_ptr; +using boost::intrusive_ptr; - void AccumulatorSum::processInternal(const Value& input, bool merging) { - // do nothing with non numeric types - if (!input.numeric()) - return; +void AccumulatorSum::processInternal(const Value& input, bool merging) { + // do nothing with non numeric types + if (!input.numeric()) + return; - // upgrade to the widest type required to hold the result - totalType = Value::getWidestNumeric(totalType, input.getType()); + // upgrade to the widest type required to hold the result + totalType = Value::getWidestNumeric(totalType, input.getType()); - if (totalType == NumberInt || totalType == NumberLong) { - long long v = input.coerceToLong(); - longTotal += v; - doubleTotal += v; - } - else if (totalType == NumberDouble) { - double v = input.coerceToDouble(); - doubleTotal += v; - } - else { - // non numerics should have returned above so we should never get here - verify(false); - } + if (totalType == NumberInt || totalType == NumberLong) { + long long v = input.coerceToLong(); + longTotal += v; + doubleTotal += v; + } else if (totalType == NumberDouble) { + double v = input.coerceToDouble(); + doubleTotal += v; + } else { + // non numerics should have returned above so we should never get here + verify(false); } +} - intrusive_ptr<Accumulator> AccumulatorSum::create() { - return new AccumulatorSum(); - } +intrusive_ptr<Accumulator> AccumulatorSum::create() { + return new AccumulatorSum(); +} - Value AccumulatorSum::getValue(bool toBeMerged) const { - if (totalType == NumberLong) { - return Value(longTotal); - } - else if (totalType == NumberDouble) { - return Value(doubleTotal); - } - else if (totalType == NumberInt) { - return Value::createIntOrLong(longTotal); - } - else { - massert(16000, "$sum resulted in a non-numeric type", false); - } +Value AccumulatorSum::getValue(bool toBeMerged) const { + if (totalType == NumberLong) { + return Value(longTotal); + } else if (totalType == NumberDouble) { + return Value(doubleTotal); + } else if (totalType == NumberInt) { + return Value::createIntOrLong(longTotal); + } else { + massert(16000, "$sum resulted in a non-numeric type", false); } +} - AccumulatorSum::AccumulatorSum() - : totalType(NumberInt) - , longTotal(0) - , doubleTotal(0) - { - // This is a fixed size Accumulator so we never need to update this - _memUsageBytes = sizeof(*this); - } +AccumulatorSum::AccumulatorSum() : totalType(NumberInt), longTotal(0), doubleTotal(0) { + // This is a fixed size Accumulator so we never need to update this + _memUsageBytes = sizeof(*this); +} - void AccumulatorSum::reset() { - totalType = NumberInt; - longTotal = 0; - doubleTotal = 0; - } +void AccumulatorSum::reset() { + totalType = NumberInt; + longTotal = 0; + doubleTotal = 0; +} - const char *AccumulatorSum::getOpName() const { - return "$sum"; - } +const char* AccumulatorSum::getOpName() const { + return "$sum"; +} } diff --git a/src/mongo/db/pipeline/dependencies.cpp b/src/mongo/db/pipeline/dependencies.cpp index 128d46bd703..30ced50e06a 100644 --- a/src/mongo/db/pipeline/dependencies.cpp +++ b/src/mongo/db/pipeline/dependencies.cpp @@ -35,146 +35,148 @@ namespace mongo { - using std::set; - using std::string; - using std::vector; +using std::set; +using std::string; +using std::vector; - namespace str = mongoutils::str; +namespace str = mongoutils::str; - BSONObj DepsTracker::toProjection() const { - BSONObjBuilder bb; +BSONObj DepsTracker::toProjection() const { + BSONObjBuilder bb; - if (needTextScore) - bb.append(Document::metaFieldTextScore, BSON("$meta" << "textScore")); + if (needTextScore) + bb.append(Document::metaFieldTextScore, + BSON("$meta" + << "textScore")); - if (needWholeDocument) - return bb.obj(); + if (needWholeDocument) + return bb.obj(); - if (fields.empty()) { - // Projection language lacks good a way to say no fields needed. This fakes it. - bb.append("_id", 0); - bb.append("$noFieldsNeeded", 1); - return bb.obj(); - } + if (fields.empty()) { + // Projection language lacks good a way to say no fields needed. This fakes it. + bb.append("_id", 0); + bb.append("$noFieldsNeeded", 1); + return bb.obj(); + } - bool needId = false; - string last; - for (set<string>::const_iterator it(fields.begin()), end(fields.end()); it!=end; ++it) { - if (str::startsWith(*it, "_id") && (it->size() == 3 || (*it)[3] == '.')) { - // _id and subfields are handled specially due in part to SERVER-7502 - needId = true; - continue; - } - - if (!last.empty() && str::startsWith(*it, last)) { - // we are including a parent of *it so we don't need to include this field - // explicitly. In fact, due to SERVER-6527 if we included this field, the parent - // wouldn't be fully included. This logic relies on on set iterators going in - // lexicographic order so that a string is always directly before of all fields it - // prefixes. - continue; - } - - last = *it + '.'; - bb.append(*it, 1); + bool needId = false; + string last; + for (set<string>::const_iterator it(fields.begin()), end(fields.end()); it != end; ++it) { + if (str::startsWith(*it, "_id") && (it->size() == 3 || (*it)[3] == '.')) { + // _id and subfields are handled specially due in part to SERVER-7502 + needId = true; + continue; } - if (needId) // we are explicit either way - bb.append("_id", 1); - else - bb.append("_id", 0); + if (!last.empty() && str::startsWith(*it, last)) { + // we are including a parent of *it so we don't need to include this field + // explicitly. In fact, due to SERVER-6527 if we included this field, the parent + // wouldn't be fully included. This logic relies on on set iterators going in + // lexicographic order so that a string is always directly before of all fields it + // prefixes. + continue; + } - return bb.obj(); + last = *it + '.'; + bb.append(*it, 1); } - // ParsedDeps::_fields is a simple recursive look-up table. For each field: - // If the value has type==Bool, the whole field is needed - // If the value has type==Object, the fields in the subobject are needed - // All other fields should be missing which means not needed - boost::optional<ParsedDeps> DepsTracker::toParsedDeps() const { - MutableDocument md; + if (needId) // we are explicit either way + bb.append("_id", 1); + else + bb.append("_id", 0); - if (needWholeDocument || needTextScore) { - // can't use ParsedDeps in this case - return boost::none; - } + return bb.obj(); +} - string last; - for (set<string>::const_iterator it(fields.begin()), end(fields.end()); it!=end; ++it) { - if (!last.empty() && str::startsWith(*it, last)) { - // we are including a parent of *it so we don't need to include this field - // explicitly. In fact, if we included this field, the parent wouldn't be fully - // included. This logic relies on on set iterators going in lexicographic order so - // that a string is always directly before of all fields it prefixes. - continue; - } - last = *it + '.'; - md.setNestedField(*it, Value(true)); - } +// ParsedDeps::_fields is a simple recursive look-up table. For each field: +// If the value has type==Bool, the whole field is needed +// If the value has type==Object, the fields in the subobject are needed +// All other fields should be missing which means not needed +boost::optional<ParsedDeps> DepsTracker::toParsedDeps() const { + MutableDocument md; + + if (needWholeDocument || needTextScore) { + // can't use ParsedDeps in this case + return boost::none; + } - return ParsedDeps(md.freeze()); + string last; + for (set<string>::const_iterator it(fields.begin()), end(fields.end()); it != end; ++it) { + if (!last.empty() && str::startsWith(*it, last)) { + // we are including a parent of *it so we don't need to include this field + // explicitly. In fact, if we included this field, the parent wouldn't be fully + // included. This logic relies on on set iterators going in lexicographic order so + // that a string is always directly before of all fields it prefixes. + continue; + } + last = *it + '.'; + md.setNestedField(*it, Value(true)); } + return ParsedDeps(md.freeze()); +} + namespace { - // Mutually recursive with arrayHelper - Document documentHelper(const BSONObj& bson, const Document& neededFields); - - // Handles array-typed values for ParsedDeps::extractFields - Value arrayHelper(const BSONObj& bson, const Document& neededFields) { - BSONObjIterator it(bson); - - vector<Value> values; - while (it.more()) { - BSONElement bsonElement(it.next()); - if (bsonElement.type() == Object) { - Document sub = documentHelper(bsonElement.embeddedObject(), neededFields); - values.push_back(Value(sub)); - } - - if (bsonElement.type() == Array) { - values.push_back(arrayHelper(bsonElement.embeddedObject(), neededFields)); - } +// Mutually recursive with arrayHelper +Document documentHelper(const BSONObj& bson, const Document& neededFields); + +// Handles array-typed values for ParsedDeps::extractFields +Value arrayHelper(const BSONObj& bson, const Document& neededFields) { + BSONObjIterator it(bson); + + vector<Value> values; + while (it.more()) { + BSONElement bsonElement(it.next()); + if (bsonElement.type() == Object) { + Document sub = documentHelper(bsonElement.embeddedObject(), neededFields); + values.push_back(Value(sub)); } - return Value(std::move(values)); + if (bsonElement.type() == Array) { + values.push_back(arrayHelper(bsonElement.embeddedObject(), neededFields)); + } } - // Handles object-typed values including the top-level for ParsedDeps::extractFields - Document documentHelper(const BSONObj& bson, const Document& neededFields) { - MutableDocument md(neededFields.size()); + return Value(std::move(values)); +} - BSONObjIterator it(bson); - while (it.more()) { - BSONElement bsonElement (it.next()); - StringData fieldName = bsonElement.fieldNameStringData(); - Value isNeeded = neededFields[fieldName]; +// Handles object-typed values including the top-level for ParsedDeps::extractFields +Document documentHelper(const BSONObj& bson, const Document& neededFields) { + MutableDocument md(neededFields.size()); - if (isNeeded.missing()) - continue; + BSONObjIterator it(bson); + while (it.more()) { + BSONElement bsonElement(it.next()); + StringData fieldName = bsonElement.fieldNameStringData(); + Value isNeeded = neededFields[fieldName]; - if (isNeeded.getType() == Bool) { - md.addField(fieldName, Value(bsonElement)); - continue; - } + if (isNeeded.missing()) + continue; - dassert(isNeeded.getType() == Object); + if (isNeeded.getType() == Bool) { + md.addField(fieldName, Value(bsonElement)); + continue; + } - if (bsonElement.type() == Object) { - Document sub = documentHelper(bsonElement.embeddedObject(), isNeeded.getDocument()); - md.addField(fieldName, Value(sub)); - } + dassert(isNeeded.getType() == Object); - if (bsonElement.type() == Array) { - md.addField(fieldName, arrayHelper(bsonElement.embeddedObject(), - isNeeded.getDocument())); - } + if (bsonElement.type() == Object) { + Document sub = documentHelper(bsonElement.embeddedObject(), isNeeded.getDocument()); + md.addField(fieldName, Value(sub)); } - return md.freeze(); + if (bsonElement.type() == Array) { + md.addField(fieldName, + arrayHelper(bsonElement.embeddedObject(), isNeeded.getDocument())); + } } -} // namespace - Document ParsedDeps::extractFields(const BSONObj& input) const { - return documentHelper(input, _fields); - } + return md.freeze(); +} +} // namespace + +Document ParsedDeps::extractFields(const BSONObj& input) const { + return documentHelper(input, _fields); +} } diff --git a/src/mongo/db/pipeline/dependencies.h b/src/mongo/db/pipeline/dependencies.h index 47f8f46c432..b39ff1fcfd3 100644 --- a/src/mongo/db/pipeline/dependencies.h +++ b/src/mongo/db/pipeline/dependencies.h @@ -35,43 +35,38 @@ #include "mongo/db/pipeline/document.h" namespace mongo { - class ParsedDeps; +class ParsedDeps; + +/** + * This struct allows components in an agg pipeline to report what they need from their input. + */ +struct DepsTracker { + DepsTracker() : needWholeDocument(false), needTextScore(false) {} /** - * This struct allows components in an agg pipeline to report what they need from their input. + * Returns a projection object covering the dependencies tracked by this class. */ - struct DepsTracker { - DepsTracker() - : needWholeDocument(false) - , needTextScore(false) - {} - - /** - * Returns a projection object covering the dependencies tracked by this class. - */ - BSONObj toProjection() const; + BSONObj toProjection() const; - boost::optional<ParsedDeps> toParsedDeps() const; + boost::optional<ParsedDeps> toParsedDeps() const; - std::set<std::string> fields; // names of needed fields in dotted notation - bool needWholeDocument; // if true, ignore fields and assume the whole document is needed - bool needTextScore; - }; + std::set<std::string> fields; // names of needed fields in dotted notation + bool needWholeDocument; // if true, ignore fields and assume the whole document is needed + bool needTextScore; +}; - /** - * This class is designed to quickly extract the needed fields from a BSONObj into a Document. - * It should only be created by a call to DepsTracker::ParsedDeps - */ - class ParsedDeps { - public: - Document extractFields(const BSONObj& input) const; +/** + * This class is designed to quickly extract the needed fields from a BSONObj into a Document. + * It should only be created by a call to DepsTracker::ParsedDeps + */ +class ParsedDeps { +public: + Document extractFields(const BSONObj& input) const; - private: - friend struct DepsTracker; // so it can call constructor - explicit ParsedDeps(const Document& fields) - : _fields(fields) - {} +private: + friend struct DepsTracker; // so it can call constructor + explicit ParsedDeps(const Document& fields) : _fields(fields) {} - Document _fields; - }; + Document _fields; +}; } diff --git a/src/mongo/db/pipeline/document.cpp b/src/mongo/db/pipeline/document.cpp index 741834d39cf..49efa640277 100644 --- a/src/mongo/db/pipeline/document.cpp +++ b/src/mongo/db/pipeline/document.cpp @@ -37,425 +37,412 @@ #include "mongo/util/mongoutils/str.h" namespace mongo { - using namespace mongoutils; - using boost::intrusive_ptr; - using std::string; - using std::vector; - - Position DocumentStorage::findField(StringData requested) const { - int reqSize = requested.size(); // get size calculation out of the way if needed - - if (_numFields >= HASH_TAB_MIN) { // hash lookup - const unsigned bucket = bucketForKey(requested); - - Position pos = _hashTab[bucket]; - while (pos.found()) { - const ValueElement& elem = getField(pos); - if (elem.nameLen == reqSize - && memcmp(requested.rawData(), elem._name, reqSize) == 0) { - return pos; - } - - // possible collision - pos = elem.nextCollision; +using namespace mongoutils; +using boost::intrusive_ptr; +using std::string; +using std::vector; + +Position DocumentStorage::findField(StringData requested) const { + int reqSize = requested.size(); // get size calculation out of the way if needed + + if (_numFields >= HASH_TAB_MIN) { // hash lookup + const unsigned bucket = bucketForKey(requested); + + Position pos = _hashTab[bucket]; + while (pos.found()) { + const ValueElement& elem = getField(pos); + if (elem.nameLen == reqSize && memcmp(requested.rawData(), elem._name, reqSize) == 0) { + return pos; } + + // possible collision + pos = elem.nextCollision; } - else { // linear scan - for (DocumentStorageIterator it = iteratorAll(); !it.atEnd(); it.advance()) { - if (it->nameLen == reqSize - && memcmp(requested.rawData(), it->_name, reqSize) == 0) { - return it.position(); - } + } else { // linear scan + for (DocumentStorageIterator it = iteratorAll(); !it.atEnd(); it.advance()) { + if (it->nameLen == reqSize && memcmp(requested.rawData(), it->_name, reqSize) == 0) { + return it.position(); } } - - // if we got here, there's no such field - return Position(); } - Value& DocumentStorage::appendField(StringData name) { - Position pos = getNextPosition(); - const int nameSize = name.size(); - - // these are the same for everyone - const Position nextCollision; - const Value value; - - // Make room for new field (and padding at end for alignment) - const unsigned newUsed = ValueElement::align(_usedBytes + sizeof(ValueElement) + nameSize); - if (_buffer + newUsed > _bufferEnd) - alloc(newUsed); - _usedBytes = newUsed; - - // Append structure of a ValueElement - char* dest = _buffer + pos.index; // must be after alloc since it changes _buffer -#define append(x) memcpy(dest, &(x), sizeof(x)); dest += sizeof(x) - append(value); - append(nextCollision); - append(nameSize); - name.copyTo( dest, true ); - // Padding for alignment handled above -#undef append + // if we got here, there's no such field + return Position(); +} - // Make sure next field starts where we expect it - fassert(16486, getField(pos).next()->ptr() == _buffer + _usedBytes); +Value& DocumentStorage::appendField(StringData name) { + Position pos = getNextPosition(); + const int nameSize = name.size(); + + // these are the same for everyone + const Position nextCollision; + const Value value; + + // Make room for new field (and padding at end for alignment) + const unsigned newUsed = ValueElement::align(_usedBytes + sizeof(ValueElement) + nameSize); + if (_buffer + newUsed > _bufferEnd) + alloc(newUsed); + _usedBytes = newUsed; + + // Append structure of a ValueElement + char* dest = _buffer + pos.index; // must be after alloc since it changes _buffer +#define append(x) \ + memcpy(dest, &(x), sizeof(x)); \ + dest += sizeof(x) + append(value); + append(nextCollision); + append(nameSize); + name.copyTo(dest, true); +// Padding for alignment handled above +#undef append - _numFields++; + // Make sure next field starts where we expect it + fassert(16486, getField(pos).next()->ptr() == _buffer + _usedBytes); - if (_numFields > HASH_TAB_MIN) { - addFieldToHashTable(pos); - } - else if (_numFields == HASH_TAB_MIN) { - // adds all fields to hash table (including the one we just added) - rehash(); - } + _numFields++; - return getField(pos).val; + if (_numFields > HASH_TAB_MIN) { + addFieldToHashTable(pos); + } else if (_numFields == HASH_TAB_MIN) { + // adds all fields to hash table (including the one we just added) + rehash(); } - // Call after adding field to _fields and increasing _numFields - void DocumentStorage::addFieldToHashTable(Position pos) { - ValueElement& elem = getField(pos); - elem.nextCollision = Position(); + return getField(pos).val; +} - const unsigned bucket = bucketForKey(elem.nameSD()); +// Call after adding field to _fields and increasing _numFields +void DocumentStorage::addFieldToHashTable(Position pos) { + ValueElement& elem = getField(pos); + elem.nextCollision = Position(); - Position* posPtr = &_hashTab[bucket]; - while (posPtr->found()) { - // collision: walk links and add new to end - posPtr = &getField(*posPtr).nextCollision; - } - *posPtr = Position(pos.index); + const unsigned bucket = bucketForKey(elem.nameSD()); + + Position* posPtr = &_hashTab[bucket]; + while (posPtr->found()) { + // collision: walk links and add new to end + posPtr = &getField(*posPtr).nextCollision; } + *posPtr = Position(pos.index); +} - void DocumentStorage::alloc(unsigned newSize) { - const bool firstAlloc = !_buffer; - const bool doingRehash = needRehash(); - const size_t oldCapacity = _bufferEnd - _buffer; - - // make new bucket count big enough - while (needRehash() || hashTabBuckets() < HASH_TAB_INIT_SIZE) - _hashTabMask = hashTabBuckets()*2 - 1; - - // only allocate power-of-two sized space > 128 bytes - size_t capacity = 128; - while (capacity < newSize + hashTabBytes()) - capacity *= 2; - - uassert(16490, "Tried to make oversized document", - capacity <= size_t(BufferMaxSize)); - - std::unique_ptr<char[]> oldBuf(_buffer); - _buffer = new char[capacity]; - _bufferEnd = _buffer + capacity - hashTabBytes(); - - if (!firstAlloc) { - // This just copies the elements - memcpy(_buffer, oldBuf.get(), _usedBytes); - - if (_numFields >= HASH_TAB_MIN) { - // if we were hashing, deal with the hash table - if (doingRehash) { - rehash(); - } - else { - // no rehash needed so just slide table down to new position - memcpy(_hashTab, oldBuf.get() + oldCapacity, hashTabBytes()); - } +void DocumentStorage::alloc(unsigned newSize) { + const bool firstAlloc = !_buffer; + const bool doingRehash = needRehash(); + const size_t oldCapacity = _bufferEnd - _buffer; + + // make new bucket count big enough + while (needRehash() || hashTabBuckets() < HASH_TAB_INIT_SIZE) + _hashTabMask = hashTabBuckets() * 2 - 1; + + // only allocate power-of-two sized space > 128 bytes + size_t capacity = 128; + while (capacity < newSize + hashTabBytes()) + capacity *= 2; + + uassert(16490, "Tried to make oversized document", capacity <= size_t(BufferMaxSize)); + + std::unique_ptr<char[]> oldBuf(_buffer); + _buffer = new char[capacity]; + _bufferEnd = _buffer + capacity - hashTabBytes(); + + if (!firstAlloc) { + // This just copies the elements + memcpy(_buffer, oldBuf.get(), _usedBytes); + + if (_numFields >= HASH_TAB_MIN) { + // if we were hashing, deal with the hash table + if (doingRehash) { + rehash(); + } else { + // no rehash needed so just slide table down to new position + memcpy(_hashTab, oldBuf.get() + oldCapacity, hashTabBytes()); } } } +} - void DocumentStorage::reserveFields(size_t expectedFields) { - fassert(16487, !_buffer); +void DocumentStorage::reserveFields(size_t expectedFields) { + fassert(16487, !_buffer); - unsigned buckets = HASH_TAB_INIT_SIZE; - while (buckets < expectedFields) - buckets *= 2; - _hashTabMask = buckets - 1; + unsigned buckets = HASH_TAB_INIT_SIZE; + while (buckets < expectedFields) + buckets *= 2; + _hashTabMask = buckets - 1; - // Using expectedFields+1 to allow space for long field names - const size_t newSize = (expectedFields+1) * ValueElement::align(sizeof(ValueElement)); + // Using expectedFields+1 to allow space for long field names + const size_t newSize = (expectedFields + 1) * ValueElement::align(sizeof(ValueElement)); - uassert(16491, "Tried to make oversized document", - newSize <= size_t(BufferMaxSize)); + uassert(16491, "Tried to make oversized document", newSize <= size_t(BufferMaxSize)); + + _buffer = new char[newSize + hashTabBytes()]; + _bufferEnd = _buffer + newSize; +} - _buffer = new char[newSize + hashTabBytes()]; - _bufferEnd = _buffer + newSize; +intrusive_ptr<DocumentStorage> DocumentStorage::clone() const { + intrusive_ptr<DocumentStorage> out(new DocumentStorage()); + + // Make a copy of the buffer. + // It is very important that the positions of each field are the same after cloning. + const size_t bufferBytes = (_bufferEnd + hashTabBytes()) - _buffer; + out->_buffer = new char[bufferBytes]; + out->_bufferEnd = out->_buffer + (_bufferEnd - _buffer); + memcpy(out->_buffer, _buffer, bufferBytes); + + // Copy remaining fields + out->_usedBytes = _usedBytes; + out->_numFields = _numFields; + out->_hashTabMask = _hashTabMask; + out->_hasTextScore = _hasTextScore; + out->_textScore = _textScore; + + // Tell values that they have been memcpyed (updates ref counts) + for (DocumentStorageIterator it = out->iteratorAll(); !it.atEnd(); it.advance()) { + it->val.memcpyed(); } - intrusive_ptr<DocumentStorage> DocumentStorage::clone() const { - intrusive_ptr<DocumentStorage> out (new DocumentStorage()); - - // Make a copy of the buffer. - // It is very important that the positions of each field are the same after cloning. - const size_t bufferBytes = (_bufferEnd + hashTabBytes()) - _buffer; - out->_buffer = new char[bufferBytes]; - out->_bufferEnd = out->_buffer + (_bufferEnd - _buffer); - memcpy(out->_buffer, _buffer, bufferBytes); - - // Copy remaining fields - out->_usedBytes = _usedBytes; - out->_numFields = _numFields; - out->_hashTabMask = _hashTabMask; - out->_hasTextScore = _hasTextScore; - out->_textScore = _textScore; - - // Tell values that they have been memcpyed (updates ref counts) - for (DocumentStorageIterator it = out->iteratorAll(); !it.atEnd(); it.advance()) { - it->val.memcpyed(); - } + return out; +} - return out; +DocumentStorage::~DocumentStorage() { + std::unique_ptr<char[]> deleteBufferAtScopeEnd(_buffer); + + for (DocumentStorageIterator it = iteratorAll(); !it.atEnd(); it.advance()) { + it->val.~Value(); // explicit destructor call } +} - DocumentStorage::~DocumentStorage() { - std::unique_ptr<char[]> deleteBufferAtScopeEnd (_buffer); +Document::Document(const BSONObj& bson) { + MutableDocument md(bson.nFields()); - for (DocumentStorageIterator it = iteratorAll(); !it.atEnd(); it.advance()) { - it->val.~Value(); // explicit destructor call - } + BSONObjIterator it(bson); + while (it.more()) { + BSONElement bsonElement(it.next()); + md.addField(bsonElement.fieldNameStringData(), Value(bsonElement)); } - Document::Document(const BSONObj& bson) { - MutableDocument md(bson.nFields()); - - BSONObjIterator it(bson); - while(it.more()) { - BSONElement bsonElement(it.next()); - md.addField(bsonElement.fieldNameStringData(), Value(bsonElement)); - } + *this = md.freeze(); +} - *this = md.freeze(); - } +BSONObjBuilder& operator<<(BSONObjBuilderValueStream& builder, const Document& doc) { + BSONObjBuilder subobj(builder.subobjStart()); + doc.toBson(&subobj); + subobj.doneFast(); + return builder.builder(); +} - BSONObjBuilder& operator << (BSONObjBuilderValueStream& builder, const Document& doc) { - BSONObjBuilder subobj(builder.subobjStart()); - doc.toBson(&subobj); - subobj.doneFast(); - return builder.builder(); +void Document::toBson(BSONObjBuilder* pBuilder) const { + for (DocumentStorageIterator it = storage().iterator(); !it.atEnd(); it.advance()) { + *pBuilder << it->nameSD() << it->val; } +} - void Document::toBson(BSONObjBuilder* pBuilder) const { - for (DocumentStorageIterator it = storage().iterator(); !it.atEnd(); it.advance()) { - *pBuilder << it->nameSD() << it->val; - } - } +BSONObj Document::toBson() const { + BSONObjBuilder bb; + toBson(&bb); + return bb.obj(); +} - BSONObj Document::toBson() const { - BSONObjBuilder bb; - toBson(&bb); - return bb.obj(); - } +const StringData Document::metaFieldTextScore("$textScore", StringData::LiteralTag()); - const StringData Document::metaFieldTextScore("$textScore", StringData::LiteralTag()); +BSONObj Document::toBsonWithMetaData() const { + BSONObjBuilder bb; + toBson(&bb); + if (hasTextScore()) + bb.append(metaFieldTextScore, getTextScore()); + return bb.obj(); +} - BSONObj Document::toBsonWithMetaData() const { - BSONObjBuilder bb; - toBson(&bb); - if (hasTextScore()) - bb.append(metaFieldTextScore, getTextScore()); - return bb.obj(); - } +Document Document::fromBsonWithMetaData(const BSONObj& bson) { + MutableDocument md; - Document Document::fromBsonWithMetaData(const BSONObj& bson) { - MutableDocument md; - - BSONObjIterator it(bson); - while(it.more()) { - BSONElement elem(it.next()); - if (elem.fieldName()[0] == '$') { - if (elem.fieldNameStringData() == metaFieldTextScore) { - md.setTextScore(elem.Double()); - continue; - } + BSONObjIterator it(bson); + while (it.more()) { + BSONElement elem(it.next()); + if (elem.fieldName()[0] == '$') { + if (elem.fieldNameStringData() == metaFieldTextScore) { + md.setTextScore(elem.Double()); + continue; } - - // Note: this will not parse out metadata in embedded documents. - md.addField(elem.fieldNameStringData(), Value(elem)); } - return md.freeze(); + // Note: this will not parse out metadata in embedded documents. + md.addField(elem.fieldNameStringData(), Value(elem)); } - MutableDocument::MutableDocument(size_t expectedFields) - : _storageHolder(NULL) - , _storage(_storageHolder) - { - if (expectedFields) { - storage().reserveFields(expectedFields); - } - } + return md.freeze(); +} - MutableValue MutableDocument::getNestedFieldHelper(const FieldPath& dottedField, - size_t level) { - if (level == dottedField.getPathLength()-1) { - return getField(dottedField.getFieldName(level)); - } - else { - MutableDocument nested (getField(dottedField.getFieldName(level))); - return nested.getNestedFieldHelper(dottedField, level+1); - } +MutableDocument::MutableDocument(size_t expectedFields) + : _storageHolder(NULL), _storage(_storageHolder) { + if (expectedFields) { + storage().reserveFields(expectedFields); } +} - MutableValue MutableDocument::getNestedField(const FieldPath& dottedField) { - fassert(16601, dottedField.getPathLength()); - return getNestedFieldHelper(dottedField, 0); +MutableValue MutableDocument::getNestedFieldHelper(const FieldPath& dottedField, size_t level) { + if (level == dottedField.getPathLength() - 1) { + return getField(dottedField.getFieldName(level)); + } else { + MutableDocument nested(getField(dottedField.getFieldName(level))); + return nested.getNestedFieldHelper(dottedField, level + 1); } +} - MutableValue MutableDocument::getNestedFieldHelper(const vector<Position>& positions, - size_t level) { - if (level == positions.size()-1) { - return getField(positions[level]); - } - else { - MutableDocument nested (getField(positions[level])); - return nested.getNestedFieldHelper(positions, level+1); - } - } +MutableValue MutableDocument::getNestedField(const FieldPath& dottedField) { + fassert(16601, dottedField.getPathLength()); + return getNestedFieldHelper(dottedField, 0); +} - MutableValue MutableDocument::getNestedField(const vector<Position>& positions) { - fassert(16488, !positions.empty()); - return getNestedFieldHelper(positions, 0); +MutableValue MutableDocument::getNestedFieldHelper(const vector<Position>& positions, + size_t level) { + if (level == positions.size() - 1) { + return getField(positions[level]); + } else { + MutableDocument nested(getField(positions[level])); + return nested.getNestedFieldHelper(positions, level + 1); } +} - static Value getNestedFieldHelper(const Document& doc, - const FieldPath& fieldNames, - vector<Position>* positions, - size_t level) { +MutableValue MutableDocument::getNestedField(const vector<Position>& positions) { + fassert(16488, !positions.empty()); + return getNestedFieldHelper(positions, 0); +} - const string& fieldName = fieldNames.getFieldName(level); - const Position pos = doc.positionOf(fieldName); +static Value getNestedFieldHelper(const Document& doc, + const FieldPath& fieldNames, + vector<Position>* positions, + size_t level) { + const string& fieldName = fieldNames.getFieldName(level); + const Position pos = doc.positionOf(fieldName); - if (!pos.found()) - return Value(); + if (!pos.found()) + return Value(); - if (positions) - positions->push_back(pos); + if (positions) + positions->push_back(pos); - if (level == fieldNames.getPathLength()-1) - return doc.getField(pos); + if (level == fieldNames.getPathLength() - 1) + return doc.getField(pos); - Value val = doc.getField(pos); - if (val.getType() != Object) - return Value(); + Value val = doc.getField(pos); + if (val.getType() != Object) + return Value(); - return getNestedFieldHelper(val.getDocument(), fieldNames, positions, level+1); - } - - const Value Document::getNestedField(const FieldPath& fieldNames, - vector<Position>* positions) const { - fassert(16489, fieldNames.getPathLength()); - return getNestedFieldHelper(*this, fieldNames, positions, 0); - } + return getNestedFieldHelper(val.getDocument(), fieldNames, positions, level + 1); +} - size_t Document::getApproximateSize() const { - if (!_storage) - return 0; // we've allocated no memory +const Value Document::getNestedField(const FieldPath& fieldNames, + vector<Position>* positions) const { + fassert(16489, fieldNames.getPathLength()); + return getNestedFieldHelper(*this, fieldNames, positions, 0); +} - size_t size = sizeof(DocumentStorage); - size += storage().allocatedBytes(); +size_t Document::getApproximateSize() const { + if (!_storage) + return 0; // we've allocated no memory - for (DocumentStorageIterator it = storage().iterator(); !it.atEnd(); it.advance()) { - size += it->val.getApproximateSize(); - size -= sizeof(Value); // already accounted for above - } + size_t size = sizeof(DocumentStorage); + size += storage().allocatedBytes(); - return size; + for (DocumentStorageIterator it = storage().iterator(); !it.atEnd(); it.advance()) { + size += it->val.getApproximateSize(); + size -= sizeof(Value); // already accounted for above } - void Document::hash_combine(size_t &seed) const { - for (DocumentStorageIterator it = storage().iterator(); !it.atEnd(); it.advance()) { - StringData name = it->nameSD(); - boost::hash_range(seed, name.rawData(), name.rawData() + name.size()); - it->val.hash_combine(seed); - } + return size; +} + +void Document::hash_combine(size_t& seed) const { + for (DocumentStorageIterator it = storage().iterator(); !it.atEnd(); it.advance()) { + StringData name = it->nameSD(); + boost::hash_range(seed, name.rawData(), name.rawData() + name.size()); + it->val.hash_combine(seed); } +} - int Document::compare(const Document& rL, const Document& rR) { - DocumentStorageIterator lIt = rL.storage().iterator(); - DocumentStorageIterator rIt = rR.storage().iterator(); +int Document::compare(const Document& rL, const Document& rR) { + DocumentStorageIterator lIt = rL.storage().iterator(); + DocumentStorageIterator rIt = rR.storage().iterator(); - while (true) { - if (lIt.atEnd()) { - if (rIt.atEnd()) - return 0; // documents are the same length + while (true) { + if (lIt.atEnd()) { + if (rIt.atEnd()) + return 0; // documents are the same length - return -1; // left document is shorter - } + return -1; // left document is shorter + } - if (rIt.atEnd()) - return 1; // right document is shorter + if (rIt.atEnd()) + return 1; // right document is shorter - const ValueElement& rField = rIt.get(); - const ValueElement& lField = lIt.get(); + const ValueElement& rField = rIt.get(); + const ValueElement& lField = lIt.get(); - // For compatibility with BSONObj::woCompare() consider the canonical type of values - // before considerting their names. - const int rCType = canonicalizeBSONType(rField.val.getType()); - const int lCType = canonicalizeBSONType(lField.val.getType()); - if (lCType != rCType) - return lCType < rCType ? -1 : 1; + // For compatibility with BSONObj::woCompare() consider the canonical type of values + // before considerting their names. + const int rCType = canonicalizeBSONType(rField.val.getType()); + const int lCType = canonicalizeBSONType(lField.val.getType()); + if (lCType != rCType) + return lCType < rCType ? -1 : 1; - const int nameCmp = lField.nameSD().compare(rField.nameSD()); - if (nameCmp) - return nameCmp; // field names are unequal + const int nameCmp = lField.nameSD().compare(rField.nameSD()); + if (nameCmp) + return nameCmp; // field names are unequal - const int valueCmp = Value::compare(lField.val, rField.val); - if (valueCmp) - return valueCmp; // fields are unequal + const int valueCmp = Value::compare(lField.val, rField.val); + if (valueCmp) + return valueCmp; // fields are unequal - rIt.advance(); - lIt.advance(); - } + rIt.advance(); + lIt.advance(); } +} - string Document::toString() const { - if (empty()) - return "{}"; - - StringBuilder out; - const char* prefix = "{"; +string Document::toString() const { + if (empty()) + return "{}"; - for (DocumentStorageIterator it = storage().iterator(); !it.atEnd(); it.advance()) { - out << prefix << it->nameSD() << ": " << it->val.toString(); - prefix = ", "; - } - out << '}'; + StringBuilder out; + const char* prefix = "{"; - return out.str(); + for (DocumentStorageIterator it = storage().iterator(); !it.atEnd(); it.advance()) { + out << prefix << it->nameSD() << ": " << it->val.toString(); + prefix = ", "; } + out << '}'; - void Document::serializeForSorter(BufBuilder& buf) const { - const int numElems = size(); - buf.appendNum(numElems); + return out.str(); +} - for (DocumentStorageIterator it = storage().iterator(); !it.atEnd(); it.advance()) { - buf.appendStr(it->nameSD(), /*NUL byte*/ true); - it->val.serializeForSorter(buf); - } +void Document::serializeForSorter(BufBuilder& buf) const { + const int numElems = size(); + buf.appendNum(numElems); - if (hasTextScore()) { - buf.appendNum(char(1)); - buf.appendNum(getTextScore()); - } - else { - buf.appendNum(char(0)); - } + for (DocumentStorageIterator it = storage().iterator(); !it.atEnd(); it.advance()) { + buf.appendStr(it->nameSD(), /*NUL byte*/ true); + it->val.serializeForSorter(buf); } - Document Document::deserializeForSorter(BufReader& buf, const SorterDeserializeSettings&) { - const int numElems = buf.read<int>(); - MutableDocument doc(numElems); - for (int i = 0; i < numElems; i++) { - StringData name = buf.readCStr(); - doc.addField(name, Value::deserializeForSorter(buf, - Value::SorterDeserializeSettings())); - } - - if (buf.read<char>()) // hasTextScore - doc.setTextScore(buf.read<double>()); + if (hasTextScore()) { + buf.appendNum(char(1)); + buf.appendNum(getTextScore()); + } else { + buf.appendNum(char(0)); + } +} - return doc.freeze(); +Document Document::deserializeForSorter(BufReader& buf, const SorterDeserializeSettings&) { + const int numElems = buf.read<int>(); + MutableDocument doc(numElems); + for (int i = 0; i < numElems; i++) { + StringData name = buf.readCStr(); + doc.addField(name, Value::deserializeForSorter(buf, Value::SorterDeserializeSettings())); } + + if (buf.read<char>()) // hasTextScore + doc.setTextScore(buf.read<double>()); + + return doc.freeze(); +} } diff --git a/src/mongo/db/pipeline/document.h b/src/mongo/db/pipeline/document.h index 491b9c050d3..5010f69b5fa 100644 --- a/src/mongo/db/pipeline/document.h +++ b/src/mongo/db/pipeline/document.h @@ -36,529 +36,580 @@ #include "mongo/bson/util/builder.h" namespace mongo { - class BSONObj; - class FieldIterator; - class FieldPath; - class Value; - class MutableDocument; +class BSONObj; +class FieldIterator; +class FieldPath; +class Value; +class MutableDocument; - /** An internal class that represents the position of a field in a document. +/** An internal class that represents the position of a field in a document. + * + * This is a low-level class that you usually don't need to worry about. + * + * The main use of this class for clients is to allow refetching or + * setting a field without looking it up again. It has a default + * constructor that represents a field not being in a document. It also + * has a method 'bool found()' that tells you if a field was found. + * + * For more details see document_internal.h + */ +class Position; + +/** A Document is similar to a BSONObj but with a different in-memory representation. + * + * A Document can be treated as a const std::map<std::string, const Value> that is + * very cheap to copy and is Assignable. Therefore, it is acceptable to + * pass and return by Value. Note that the data in a Document is + * immutable, but you can replace a Document instance with assignment. + * + * See Also: Value class in Value.h + */ +class Document { +public: + /// Empty Document (does no allocation) + Document() {} + + /// Create a new Document deep-converted from the given BSONObj. + explicit Document(const BSONObj& bson); + + void swap(Document& rhs) { + _storage.swap(rhs._storage); + } + + /// Look up a field by key name. Returns Value() if no such field. O(1) + const Value operator[](StringData key) const { + return getField(key); + } + const Value getField(StringData key) const { + return storage().getField(key); + } + + /// Look up a field by Position. See positionOf and getNestedField. + const Value operator[](Position pos) const { + return getField(pos); + } + const Value getField(Position pos) const { + return storage().getField(pos).val; + } + + /** Similar to BSONObj::getFieldDotted, but using FieldPath rather than a dotted string. + * If you pass a non-NULL positions vector, you get back a path suitable + * to pass to MutableDocument::setNestedField. + * + * TODO a version that doesn't use FieldPath + */ + const Value getNestedField(const FieldPath& fieldNames, + std::vector<Position>* positions = NULL) const; + + /// Number of fields in this document. O(n) + size_t size() const { + return storage().size(); + } + + /// True if this document has no fields. + bool empty() const { + return !_storage || storage().iterator().atEnd(); + } + + /// Create a new FieldIterator that can be used to examine the Document's fields in order. + FieldIterator fieldIterator() const; + + /// Convenience type for dealing with fields. Used by FieldIterator. + typedef std::pair<StringData, Value> FieldPair; + + /** Get the approximate storage size of the document and sub-values in bytes. + * Note: Some memory may be shared with other Documents or between fields within + * a single Document so this can overestimate usage. + */ + size_t getApproximateSize() const; + + /** Compare two documents. * - * This is a low-level class that you usually don't need to worry about. + * BSON document field order is significant, so this just goes through + * the fields in order. The comparison is done in roughly the same way + * as strings are compared, but comparing one field at a time instead + * of one character at a time. * - * The main use of this class for clients is to allow refetching or - * setting a field without looking it up again. It has a default - * constructor that represents a field not being in a document. It also - * has a method 'bool found()' that tells you if a field was found. + * Note: This does not consider metadata when comparing documents. * - * For more details see document_internal.h + * @returns an integer less than zero, zero, or an integer greater than + * zero, depending on whether lhs < rhs, lhs == rhs, or lhs > rhs + * Warning: may return values other than -1, 0, or 1 */ - class Position; + static int compare(const Document& lhs, const Document& rhs); - /** A Document is similar to a BSONObj but with a different in-memory representation. - * - * A Document can be treated as a const std::map<std::string, const Value> that is - * very cheap to copy and is Assignable. Therefore, it is acceptable to - * pass and return by Value. Note that the data in a Document is - * immutable, but you can replace a Document instance with assignment. + std::string toString() const; + + friend std::ostream& operator<<(std::ostream& out, const Document& doc) { + return out << doc.toString(); + } + + /** Calculate a hash value. * - * See Also: Value class in Value.h + * Meant to be used to create composite hashes suitable for + * hashed container classes such as unordered_map. */ - class Document { - public: + void hash_combine(size_t& seed) const; - /// Empty Document (does no allocation) - Document() {} - - /// Create a new Document deep-converted from the given BSONObj. - explicit Document(const BSONObj& bson); - - void swap(Document& rhs) { _storage.swap(rhs._storage); } - - /// Look up a field by key name. Returns Value() if no such field. O(1) - const Value operator[] (StringData key) const { return getField(key); } - const Value getField(StringData key) const { return storage().getField(key); } - - /// Look up a field by Position. See positionOf and getNestedField. - const Value operator[] (Position pos) const { return getField(pos); } - const Value getField(Position pos) const { return storage().getField(pos).val; } - - /** Similar to BSONObj::getFieldDotted, but using FieldPath rather than a dotted string. - * If you pass a non-NULL positions vector, you get back a path suitable - * to pass to MutableDocument::setNestedField. - * - * TODO a version that doesn't use FieldPath - */ - const Value getNestedField(const FieldPath& fieldNames, - std::vector<Position>* positions=NULL) const; - - /// Number of fields in this document. O(n) - size_t size() const { return storage().size(); } - - /// True if this document has no fields. - bool empty() const { return !_storage || storage().iterator().atEnd(); } - - /// Create a new FieldIterator that can be used to examine the Document's fields in order. - FieldIterator fieldIterator() const; - - /// Convenience type for dealing with fields. Used by FieldIterator. - typedef std::pair<StringData, Value> FieldPair; - - /** Get the approximate storage size of the document and sub-values in bytes. - * Note: Some memory may be shared with other Documents or between fields within - * a single Document so this can overestimate usage. - */ - size_t getApproximateSize() const; - - /** Compare two documents. - * - * BSON document field order is significant, so this just goes through - * the fields in order. The comparison is done in roughly the same way - * as strings are compared, but comparing one field at a time instead - * of one character at a time. - * - * Note: This does not consider metadata when comparing documents. - * - * @returns an integer less than zero, zero, or an integer greater than - * zero, depending on whether lhs < rhs, lhs == rhs, or lhs > rhs - * Warning: may return values other than -1, 0, or 1 - */ - static int compare(const Document& lhs, const Document& rhs); - - std::string toString() const; - - friend - std::ostream& operator << (std::ostream& out, const Document& doc) { return out << doc.toString(); } - - /** Calculate a hash value. - * - * Meant to be used to create composite hashes suitable for - * hashed container classes such as unordered_map. - */ - void hash_combine(size_t &seed) const; - - /** - * Add this document to the BSONObj under construction with the given BSONObjBuilder. - * Does not include metadata. - */ - void toBson(BSONObjBuilder *pBsonObjBuilder) const; - BSONObj toBson() const; - - /** - * Like toBson, but includes metadata at the top-level. - * Output is parseable by fromBsonWithMetaData - */ - BSONObj toBsonWithMetaData() const; - - /** - * Like Document(BSONObj) but treats top-level fields with special names as metadata. - * Special field names are available as static constants on this class with names starting - * with metaField. - */ - static Document fromBsonWithMetaData(const BSONObj& bson); - - // Support BSONObjBuilder and BSONArrayBuilder "stream" API - friend BSONObjBuilder& operator << (BSONObjBuilderValueStream& builder, const Document& d); - - /** Return the abstract Position of a field, suitable to pass to operator[] or getField(). - * This can potentially save time if you need to refer to a field multiple times. - */ - Position positionOf(StringData fieldName) const { return storage().findField(fieldName); } - - /** Clone a document. - * - * This should only be called by MutableDocument and tests - * - * The new document shares all the fields' values with the original. - * This is not a deep copy. Only the fields on the top-level document - * are cloned. - */ - Document clone() const { return Document(storage().clone().get()); } - - static const StringData metaFieldTextScore; // "$textScore" - bool hasTextScore() const { return storage().hasTextScore(); } - double getTextScore() const { return storage().getTextScore(); } - - /// members for Sorter - struct SorterDeserializeSettings {}; // unused - void serializeForSorter(BufBuilder& buf) const; - static Document deserializeForSorter(BufReader& buf, const SorterDeserializeSettings&); - int memUsageForSorter() const { return getApproximateSize(); } - Document getOwned() const { return *this; } - - /// only for testing - const void* getPtr() const { return _storage.get(); } - - private: - friend class FieldIterator; - friend class ValueStorage; - friend class MutableDocument; - friend class MutableValue; - - explicit Document(const DocumentStorage* ptr) : _storage(ptr) {}; - - const DocumentStorage& storage() const { - return (_storage ? *_storage : DocumentStorage::emptyDoc()); - } - boost::intrusive_ptr<const DocumentStorage> _storage; - }; + /** + * Add this document to the BSONObj under construction with the given BSONObjBuilder. + * Does not include metadata. + */ + void toBson(BSONObjBuilder* pBsonObjBuilder) const; + BSONObj toBson() const; + + /** + * Like toBson, but includes metadata at the top-level. + * Output is parseable by fromBsonWithMetaData + */ + BSONObj toBsonWithMetaData() const; + + /** + * Like Document(BSONObj) but treats top-level fields with special names as metadata. + * Special field names are available as static constants on this class with names starting + * with metaField. + */ + static Document fromBsonWithMetaData(const BSONObj& bson); + + // Support BSONObjBuilder and BSONArrayBuilder "stream" API + friend BSONObjBuilder& operator<<(BSONObjBuilderValueStream& builder, const Document& d); - inline bool operator== (const Document& l, const Document& r) { - return Document::compare(l, r) == 0; + /** Return the abstract Position of a field, suitable to pass to operator[] or getField(). + * This can potentially save time if you need to refer to a field multiple times. + */ + Position positionOf(StringData fieldName) const { + return storage().findField(fieldName); } - inline bool operator!= (const Document& l, const Document& r) { - return Document::compare(l, r) != 0; + + /** Clone a document. + * + * This should only be called by MutableDocument and tests + * + * The new document shares all the fields' values with the original. + * This is not a deep copy. Only the fields on the top-level document + * are cloned. + */ + Document clone() const { + return Document(storage().clone().get()); } - inline bool operator< (const Document& l, const Document& r) { - return Document::compare(l, r) < 0; + + static const StringData metaFieldTextScore; // "$textScore" + bool hasTextScore() const { + return storage().hasTextScore(); } - inline bool operator<= (const Document& l, const Document& r) { - return Document::compare(l, r) <= 0; + double getTextScore() const { + return storage().getTextScore(); } - inline bool operator> (const Document& l, const Document& r) { - return Document::compare(l, r) > 0; + + /// members for Sorter + struct SorterDeserializeSettings {}; // unused + void serializeForSorter(BufBuilder& buf) const; + static Document deserializeForSorter(BufReader& buf, const SorterDeserializeSettings&); + int memUsageForSorter() const { + return getApproximateSize(); } - inline bool operator>= (const Document& l, const Document& r) { - return Document::compare(l, r) >= 0; + Document getOwned() const { + return *this; } + /// only for testing + const void* getPtr() const { + return _storage.get(); + } - /** This class is returned by MutableDocument to allow you to modify its values. - * You are not allowed to hold variables of this type (enforced by the type system). - */ - class MutableValue { - public: - void operator= (const Value& v) { _val = v; } - - /** These are designed to allow things like mutDoc["a"]["b"]["c"] = Value(10); - * It is safe to use even on nonexistent fields. - */ - MutableValue operator[] (StringData key) { return getField(key); } - MutableValue operator[] (Position pos) { return getField(pos); } - - MutableValue getField(StringData key); - MutableValue getField(Position pos); - - private: - friend class MutableDocument; - - /// can only be constructed or copied by self and friends - MutableValue(const MutableValue& other): _val(other._val) {} - explicit MutableValue(Value& val): _val(val) {} - - /// Used by MutableDocument(MutableValue) - const RefCountable*& getDocPtr() { - if (_val.getType() != Object || _val._storage.genericRCPtr == NULL) { - // If the current value isn't an object we replace it with a Object-typed Value. - // Note that we can't just use Document() here because that is a NULL pointer and - // Value doesn't refcount NULL pointers. This led to a memory leak (SERVER-10554) - // because MutableDocument::newStorage() would set a non-NULL pointer into the Value - // without setting the refCounter bit. While allocating a DocumentStorage here could - // result in an allocation where none is needed, in practice this is only called - // when we are about to add a field to the sub-document so this just changes where - // the allocation is done. - _val = Value(Document(new DocumentStorage())); - } - - return _val._storage.genericRCPtr; - } +private: + friend class FieldIterator; + friend class ValueStorage; + friend class MutableDocument; + friend class MutableValue; - MutableValue& operator= (const MutableValue&); // not assignable with another MutableValue + explicit Document(const DocumentStorage* ptr) : _storage(ptr){}; - Value& _val; - }; + const DocumentStorage& storage() const { + return (_storage ? *_storage : DocumentStorage::emptyDoc()); + } + boost::intrusive_ptr<const DocumentStorage> _storage; +}; - /** MutableDocument is a Document builder that supports both adding and updating fields. - * - * This class fills a similar role to BSONObjBuilder, but allows you to - * change existing fields and more easily write to sub-Documents. - * - * To preserve the immutability of Documents, MutableDocument will - * shallow-clone its storage on write (COW) if it is shared with any other - * Documents. +inline bool operator==(const Document& l, const Document& r) { + return Document::compare(l, r) == 0; +} +inline bool operator!=(const Document& l, const Document& r) { + return Document::compare(l, r) != 0; +} +inline bool operator<(const Document& l, const Document& r) { + return Document::compare(l, r) < 0; +} +inline bool operator<=(const Document& l, const Document& r) { + return Document::compare(l, r) <= 0; +} +inline bool operator>(const Document& l, const Document& r) { + return Document::compare(l, r) > 0; +} +inline bool operator>=(const Document& l, const Document& r) { + return Document::compare(l, r) >= 0; +} + + +/** This class is returned by MutableDocument to allow you to modify its values. + * You are not allowed to hold variables of this type (enforced by the type system). + */ +class MutableValue { +public: + void operator=(const Value& v) { + _val = v; + } + + /** These are designed to allow things like mutDoc["a"]["b"]["c"] = Value(10); + * It is safe to use even on nonexistent fields. */ - class MutableDocument { - MONGO_DISALLOW_COPYING(MutableDocument); - public: + MutableValue operator[](StringData key) { + return getField(key); + } + MutableValue operator[](Position pos) { + return getField(pos); + } - /** Create a new empty Document. - * - * @param expectedFields a hint at what the number of fields will be, if known. - * this can be used to increase memory allocation efficiency. There is - * no impact on correctness if this field over or under estimates. - * - * TODO: find some way to convey field-name sizes to make even more efficient - */ - MutableDocument() :_storageHolder(NULL), _storage(_storageHolder) {} - explicit MutableDocument(size_t expectedFields); - - /// No copy yet. Copy-on-write. See storage() - explicit MutableDocument(const Document& d) : _storageHolder(NULL) - , _storage(_storageHolder) { - reset(d); + MutableValue getField(StringData key); + MutableValue getField(Position pos); + +private: + friend class MutableDocument; + + /// can only be constructed or copied by self and friends + MutableValue(const MutableValue& other) : _val(other._val) {} + explicit MutableValue(Value& val) : _val(val) {} + + /// Used by MutableDocument(MutableValue) + const RefCountable*& getDocPtr() { + if (_val.getType() != Object || _val._storage.genericRCPtr == NULL) { + // If the current value isn't an object we replace it with a Object-typed Value. + // Note that we can't just use Document() here because that is a NULL pointer and + // Value doesn't refcount NULL pointers. This led to a memory leak (SERVER-10554) + // because MutableDocument::newStorage() would set a non-NULL pointer into the Value + // without setting the refCounter bit. While allocating a DocumentStorage here could + // result in an allocation where none is needed, in practice this is only called + // when we are about to add a field to the sub-document so this just changes where + // the allocation is done. + _val = Value(Document(new DocumentStorage())); } - ~MutableDocument() { - if (_storageHolder) - intrusive_ptr_release(_storageHolder); - } + return _val._storage.genericRCPtr; + } - /** Replace the current base Document with the argument - * - * All Positions from the passed in Document are valid and refer to the - * same field in this MutableDocument. - */ - void reset(const Document& d=Document()) { reset(d._storage.get()); } - - /** Add the given field to the Document. - * - * BSON documents' fields are ordered; the new Field will be - * appended to the current list of fields. - * - * Unlike getField/setField, addField does not look for a field with the - * same name and therefore cannot be used to update fields. - * - * It is an error to add a field that has the same name as another field. - * - * TODO: This is currently allowed but getField only gets first field. - * Decide what level of support is needed for duplicate fields. - * If duplicates are not allowed, consider removing this method. - */ - void addField(StringData fieldName, const Value& val) { - storage().appendField(fieldName) = val; - } + MutableValue& operator=(const MutableValue&); // not assignable with another MutableValue - /** Update field by key. If there is no field with that key, add one. - * - * If the new value is missing(), the field is logically removed. - */ - MutableValue operator[] (StringData key) { return getField(key); } - void setField(StringData key, const Value& val) { getField(key) = val; } - MutableValue getField(StringData key) { - return MutableValue(storage().getField(key)); - } + Value& _val; +}; - /// Update field by Position. Must already be a valid Position. - MutableValue operator[] (Position pos) { return getField(pos); } - void setField(Position pos, const Value& val) { getField(pos) = val; } - MutableValue getField(Position pos) { - return MutableValue(storage().getField(pos).val); - } +/** MutableDocument is a Document builder that supports both adding and updating fields. + * + * This class fills a similar role to BSONObjBuilder, but allows you to + * change existing fields and more easily write to sub-Documents. + * + * To preserve the immutability of Documents, MutableDocument will + * shallow-clone its storage on write (COW) if it is shared with any other + * Documents. + */ +class MutableDocument { + MONGO_DISALLOW_COPYING(MutableDocument); - /// Logically remove a field. Note that memory usage does not decrease. - void remove(StringData key) { getField(key) = Value(); } - - /** Gets/Sets a nested field given a path. - * - * All fields along path are created as empty Documents if they don't exist - * or are any other type. - */ - MutableValue getNestedField(const FieldPath& dottedField); - void setNestedField(const FieldPath& dottedField, const Value& val) { - getNestedField(dottedField) = val; - } +public: + /** Create a new empty Document. + * + * @param expectedFields a hint at what the number of fields will be, if known. + * this can be used to increase memory allocation efficiency. There is + * no impact on correctness if this field over or under estimates. + * + * TODO: find some way to convey field-name sizes to make even more efficient + */ + MutableDocument() : _storageHolder(NULL), _storage(_storageHolder) {} + explicit MutableDocument(size_t expectedFields); - /// Takes positions vector from Document::getNestedField. All fields in path must exist. - MutableValue getNestedField(const std::vector<Position>& positions); - void setNestedField(const std::vector<Position>& positions, const Value& val) { - getNestedField(positions) = val; - } + /// No copy yet. Copy-on-write. See storage() + explicit MutableDocument(const Document& d) : _storageHolder(NULL), _storage(_storageHolder) { + reset(d); + } - /** - * Copies all metadata from source if it has any. - * Note: does not clear metadata from this. - */ - void copyMetaDataFrom(const Document& source) { - storage().copyMetaDataFrom(source.storage()); - } + ~MutableDocument() { + if (_storageHolder) + intrusive_ptr_release(_storageHolder); + } - void setTextScore(double score) { storage().setTextScore(score); } - - /** Convert to a read-only document and release reference. - * - * Call this to indicate that you are done with this Document and will - * not be making further changes from this MutableDocument. - * - * TODO: there are some optimizations that may make sense at freeze time. - */ - Document freeze() { - // This essentially moves _storage into a new Document by way of temp. - Document ret; - boost::intrusive_ptr<const DocumentStorage> temp (storagePtr(), /*inc_ref_count=*/false); - temp.swap(ret._storage); - _storage = NULL; - return ret; - } + /** Replace the current base Document with the argument + * + * All Positions from the passed in Document are valid and refer to the + * same field in this MutableDocument. + */ + void reset(const Document& d = Document()) { + reset(d._storage.get()); + } - /// Used to simplify the common pattern of creating a value of the document. - Value freezeToValue() { - return Value(freeze()); - } + /** Add the given field to the Document. + * + * BSON documents' fields are ordered; the new Field will be + * appended to the current list of fields. + * + * Unlike getField/setField, addField does not look for a field with the + * same name and therefore cannot be used to update fields. + * + * It is an error to add a field that has the same name as another field. + * + * TODO: This is currently allowed but getField only gets first field. + * Decide what level of support is needed for duplicate fields. + * If duplicates are not allowed, consider removing this method. + */ + void addField(StringData fieldName, const Value& val) { + storage().appendField(fieldName) = val; + } - /** Borrow a readable reference to this Document. - * - * Note that unlike freeze(), this indicates intention to continue - * modifying this document. The returned Document will not observe - * future changes to this MutableDocument. - */ - Document peek() { - return Document(storagePtr()); - } + /** Update field by key. If there is no field with that key, add one. + * + * If the new value is missing(), the field is logically removed. + */ + MutableValue operator[](StringData key) { + return getField(key); + } + void setField(StringData key, const Value& val) { + getField(key) = val; + } + MutableValue getField(StringData key) { + return MutableValue(storage().getField(key)); + } - private: - friend class MutableValue; // for access to next constructor - explicit MutableDocument(MutableValue mv) - : _storageHolder(NULL) - , _storage(mv.getDocPtr()) - {} - - void reset(const DocumentStorage* ds) { - if (_storage) intrusive_ptr_release(_storage); - _storage = ds; - if (_storage) intrusive_ptr_add_ref(_storage); - } + /// Update field by Position. Must already be a valid Position. + MutableValue operator[](Position pos) { + return getField(pos); + } + void setField(Position pos, const Value& val) { + getField(pos) = val; + } + MutableValue getField(Position pos) { + return MutableValue(storage().getField(pos).val); + } - // This is split into 3 functions to speed up the fast-path - DocumentStorage& storage() { - if (MONGO_unlikely( !_storage )) - return newStorage(); + /// Logically remove a field. Note that memory usage does not decrease. + void remove(StringData key) { + getField(key) = Value(); + } - if (MONGO_unlikely( _storage->isShared() )) - return clonedStorage(); + /** Gets/Sets a nested field given a path. + * + * All fields along path are created as empty Documents if they don't exist + * or are any other type. + */ + MutableValue getNestedField(const FieldPath& dottedField); + void setNestedField(const FieldPath& dottedField, const Value& val) { + getNestedField(dottedField) = val; + } - // This function exists to ensure this is safe - return const_cast<DocumentStorage&>(*storagePtr()); - } - DocumentStorage& newStorage() { - reset(new DocumentStorage); - return const_cast<DocumentStorage&>(*storagePtr()); - } - DocumentStorage& clonedStorage() { - reset(storagePtr()->clone().get()); - return const_cast<DocumentStorage&>(*storagePtr()); - } + /// Takes positions vector from Document::getNestedField. All fields in path must exist. + MutableValue getNestedField(const std::vector<Position>& positions); + void setNestedField(const std::vector<Position>& positions, const Value& val) { + getNestedField(positions) = val; + } - // recursive helpers for same-named public methods - MutableValue getNestedFieldHelper(const FieldPath& dottedField, size_t level); - MutableValue getNestedFieldHelper(const std::vector<Position>& positions, size_t level); + /** + * Copies all metadata from source if it has any. + * Note: does not clear metadata from this. + */ + void copyMetaDataFrom(const Document& source) { + storage().copyMetaDataFrom(source.storage()); + } - // this should only be called by storage methods and peek/freeze - const DocumentStorage* storagePtr() const { - dassert(!_storage || typeid(*_storage) == typeid(const DocumentStorage)); - return static_cast<const DocumentStorage*>(_storage); - } + void setTextScore(double score) { + storage().setTextScore(score); + } - // These are both const to prevent modifications bypassing storage() method. - // They always point to NULL or an object with dynamic type DocumentStorage. - const RefCountable* _storageHolder; // Only used in constructors and destructor - const RefCountable*& _storage; // references either above member or genericRCPtr in a Value - }; + /** Convert to a read-only document and release reference. + * + * Call this to indicate that you are done with this Document and will + * not be making further changes from this MutableDocument. + * + * TODO: there are some optimizations that may make sense at freeze time. + */ + Document freeze() { + // This essentially moves _storage into a new Document by way of temp. + Document ret; + boost::intrusive_ptr<const DocumentStorage> temp(storagePtr(), /*inc_ref_count=*/false); + temp.swap(ret._storage); + _storage = NULL; + return ret; + } - /// This is the public iterator over a document - class FieldIterator { - public: - explicit FieldIterator(const Document& doc) - : _doc(doc) - , _it(_doc.storage().iterator()) - {} + /// Used to simplify the common pattern of creating a value of the document. + Value freezeToValue() { + return Value(freeze()); + } - /// Ask if there are more fields to return. - bool more() const { return !_it.atEnd(); } + /** Borrow a readable reference to this Document. + * + * Note that unlike freeze(), this indicates intention to continue + * modifying this document. The returned Document will not observe + * future changes to this MutableDocument. + */ + Document peek() { + return Document(storagePtr()); + } - /// Get next item and advance iterator - Document::FieldPair next() { - verify(more()); +private: + friend class MutableValue; // for access to next constructor + explicit MutableDocument(MutableValue mv) : _storageHolder(NULL), _storage(mv.getDocPtr()) {} - Document::FieldPair fp (_it->nameSD(), _it->val); - _it.advance(); - return fp; - } + void reset(const DocumentStorage* ds) { + if (_storage) + intrusive_ptr_release(_storage); + _storage = ds; + if (_storage) + intrusive_ptr_add_ref(_storage); + } - private: - // We'll hang on to the original document to ensure we keep its storage alive - Document _doc; - DocumentStorageIterator _it; - }; + // This is split into 3 functions to speed up the fast-path + DocumentStorage& storage() { + if (MONGO_unlikely(!_storage)) + return newStorage(); - /// Macro to create Document literals. Syntax is the same as the BSON("name" << 123) macro. -#define DOC(fields) ((DocumentStream() << fields).done()) + if (MONGO_unlikely(_storage->isShared())) + return clonedStorage(); - /** Macro to create Array-typed Value literals. - * Syntax is the same as the BSON_ARRAY(123 << "foo") macro. - */ -#define DOC_ARRAY(fields) ((ValueArrayStream() << fields).done()) + // This function exists to ensure this is safe + return const_cast<DocumentStorage&>(*storagePtr()); + } + DocumentStorage& newStorage() { + reset(new DocumentStorage); + return const_cast<DocumentStorage&>(*storagePtr()); + } + DocumentStorage& clonedStorage() { + reset(storagePtr()->clone().get()); + return const_cast<DocumentStorage&>(*storagePtr()); + } + // recursive helpers for same-named public methods + MutableValue getNestedFieldHelper(const FieldPath& dottedField, size_t level); + MutableValue getNestedFieldHelper(const std::vector<Position>& positions, size_t level); - // These classes are only for the implementation of the DOC and DOC_ARRAY macros. - // They should not be used for any other reason. - class DocumentStream { - // The stream alternates between DocumentStream taking a fieldname - // and ValueStream taking a Value. - class ValueStream { - public: - ValueStream(DocumentStream& builder) :builder(builder) {} + // this should only be called by storage methods and peek/freeze + const DocumentStorage* storagePtr() const { + dassert(!_storage || typeid(*_storage) == typeid(const DocumentStorage)); + return static_cast<const DocumentStorage*>(_storage); + } - DocumentStream& operator << (const Value& val) { - builder._md[name] = val; - return builder; - } + // These are both const to prevent modifications bypassing storage() method. + // They always point to NULL or an object with dynamic type DocumentStorage. + const RefCountable* _storageHolder; // Only used in constructors and destructor + const RefCountable*& _storage; // references either above member or genericRCPtr in a Value +}; - /// support anything directly supported by a value constructor - template <typename T> - DocumentStream& operator << (const T& val) { - return *this << Value(val); - } +/// This is the public iterator over a document +class FieldIterator { +public: + explicit FieldIterator(const Document& doc) : _doc(doc), _it(_doc.storage().iterator()) {} - StringData name; - DocumentStream& builder; - }; + /// Ask if there are more fields to return. + bool more() const { + return !_it.atEnd(); + } - public: - DocumentStream() :_stream(*this) {} + /// Get next item and advance iterator + Document::FieldPair next() { + verify(more()); - ValueStream& operator << (StringData name) { - _stream.name = name; - return _stream; - } + Document::FieldPair fp(_it->nameSD(), _it->val); + _it.advance(); + return fp; + } - Document done() { return _md.freeze(); } +private: + // We'll hang on to the original document to ensure we keep its storage alive + Document _doc; + DocumentStorageIterator _it; +}; + +/// Macro to create Document literals. Syntax is the same as the BSON("name" << 123) macro. +#define DOC(fields) ((DocumentStream() << fields).done()) + +/** Macro to create Array-typed Value literals. + * Syntax is the same as the BSON_ARRAY(123 << "foo") macro. + */ +#define DOC_ARRAY(fields) ((ValueArrayStream() << fields).done()) - private: - ValueStream _stream; - MutableDocument _md; - }; - class ValueArrayStream { +// These classes are only for the implementation of the DOC and DOC_ARRAY macros. +// They should not be used for any other reason. +class DocumentStream { + // The stream alternates between DocumentStream taking a fieldname + // and ValueStream taking a Value. + class ValueStream { public: - ValueArrayStream& operator << (const Value& val) { - _array.push_back(val); - return *this; + ValueStream(DocumentStream& builder) : builder(builder) {} + + DocumentStream& operator<<(const Value& val) { + builder._md[name] = val; + return builder; } /// support anything directly supported by a value constructor template <typename T> - ValueArrayStream& operator << (const T& val) { + DocumentStream& operator<<(const T& val) { return *this << Value(val); } - Value done() { return Value(std::move(_array)); } - - private: - std::vector<Value> _array; + StringData name; + DocumentStream& builder; }; - inline void swap(mongo::Document& lhs, mongo::Document& rhs) { lhs.swap(rhs); } +public: + DocumentStream() : _stream(*this) {} -/* ======================= INLINED IMPLEMENTATIONS ========================== */ + ValueStream& operator<<(StringData name) { + _stream.name = name; + return _stream; + } - inline FieldIterator Document::fieldIterator() const { - return FieldIterator(*this); + Document done() { + return _md.freeze(); } - inline MutableValue MutableValue::getField(Position pos) { - return MutableDocument(*this).getField(pos); +private: + ValueStream _stream; + MutableDocument _md; +}; + +class ValueArrayStream { +public: + ValueArrayStream& operator<<(const Value& val) { + _array.push_back(val); + return *this; } - inline MutableValue MutableValue::getField(StringData key) { - return MutableDocument(*this).getField(key); + + /// support anything directly supported by a value constructor + template <typename T> + ValueArrayStream& operator<<(const T& val) { + return *this << Value(val); + } + + Value done() { + return Value(std::move(_array)); } + +private: + std::vector<Value> _array; +}; + +inline void swap(mongo::Document& lhs, mongo::Document& rhs) { + lhs.swap(rhs); +} + +/* ======================= INLINED IMPLEMENTATIONS ========================== */ + +inline FieldIterator Document::fieldIterator() const { + return FieldIterator(*this); +} + +inline MutableValue MutableValue::getField(Position pos) { + return MutableDocument(*this).getField(pos); +} +inline MutableValue MutableValue::getField(StringData key) { + return MutableDocument(*this).getField(key); +} } diff --git a/src/mongo/db/pipeline/document_internal.h b/src/mongo/db/pipeline/document_internal.h index 93188ffb6f3..fa5988611b5 100644 --- a/src/mongo/db/pipeline/document_internal.h +++ b/src/mongo/db/pipeline/document_internal.h @@ -36,300 +36,332 @@ #include "mongo/db/pipeline/value.h" namespace mongo { - /** Helper class to make the position in a document abstract - * Warning: This is NOT guaranteed to be the ordered position. - * eg. the first field may not be at Position(0) - */ - class Position { - public: - // This represents "not found" similar to std::string::npos - Position() :index(static_cast<unsigned>(-1)) {} - bool found() const { return index != Position().index; } - - bool operator == (Position rhs) const { return this->index == rhs.index; } - bool operator != (Position rhs) const { return !(*this == rhs); } - - // For debugging and ASSERT_EQUALS in tests. - template <typename OStream> - friend OStream& operator<<(OStream& stream, Position p) { return stream << p.index; } - - private: - explicit Position(size_t i) :index(i) {} - unsigned index; - friend class DocumentStorage; - friend class DocumentStorageIterator; - }; +/** Helper class to make the position in a document abstract + * Warning: This is NOT guaranteed to be the ordered position. + * eg. the first field may not be at Position(0) + */ +class Position { +public: + // This represents "not found" similar to std::string::npos + Position() : index(static_cast<unsigned>(-1)) {} + bool found() const { + return index != Position().index; + } + + bool operator==(Position rhs) const { + return this->index == rhs.index; + } + bool operator!=(Position rhs) const { + return !(*this == rhs); + } + + // For debugging and ASSERT_EQUALS in tests. + template <typename OStream> + friend OStream& operator<<(OStream& stream, Position p) { + return stream << p.index; + } + +private: + explicit Position(size_t i) : index(i) {} + unsigned index; + friend class DocumentStorage; + friend class DocumentStorageIterator; +}; #pragma pack(1) - /** This is how values are stored in the DocumentStorage buffer - * Internal class. Consumers shouldn't care about this. - */ - class ValueElement { - MONGO_DISALLOW_COPYING(ValueElement); - public: - Value val; - Position nextCollision; // Position of next field with same hashBucket - const int nameLen; // doesn't include '\0' - const char _name[1]; // pointer to start of name (use nameSD instead) - - ValueElement* next() { - return align(plusBytes(sizeof(ValueElement) + nameLen)); - } - - const ValueElement* next() const { - return align(plusBytes(sizeof(ValueElement) + nameLen)); - } - - StringData nameSD() const { return StringData(_name, nameLen); } - - - // helpers for doing pointer arithmetic with this class - // Note: These don't dereference 'this' so they are safe to use with NULL - char* ptr() { return reinterpret_cast<char*>(this); } - const char* ptr() const { return reinterpret_cast<const char*>(this); } - const ValueElement* plusBytes(size_t bytes) const { - return reinterpret_cast<const ValueElement*>(ptr() + bytes); - } - ValueElement* plusBytes(size_t bytes) { - return reinterpret_cast<ValueElement*>(ptr() + bytes); - } - - // Round number or pointer up to N-byte boundary. No change if already aligned. - template <typename T> - static T align(T size) { - const intmax_t ALIGNMENT = 8; // must be power of 2 and <= 16 (malloc alignment) - // Can't use c++ cast because of conversion between intmax_t and both ints and pointers - return (T)(((intmax_t)(size) + (ALIGNMENT-1)) & ~(ALIGNMENT-1)); - } - - private: - ValueElement(); // this class should never be constructed - ~ValueElement(); // or destructed - }; - // Real size is sizeof(ValueElement) + nameLen +/** This is how values are stored in the DocumentStorage buffer + * Internal class. Consumers shouldn't care about this. + */ +class ValueElement { + MONGO_DISALLOW_COPYING(ValueElement); + +public: + Value val; + Position nextCollision; // Position of next field with same hashBucket + const int nameLen; // doesn't include '\0' + const char _name[1]; // pointer to start of name (use nameSD instead) + + ValueElement* next() { + return align(plusBytes(sizeof(ValueElement) + nameLen)); + } + + const ValueElement* next() const { + return align(plusBytes(sizeof(ValueElement) + nameLen)); + } + + StringData nameSD() const { + return StringData(_name, nameLen); + } + + + // helpers for doing pointer arithmetic with this class + // Note: These don't dereference 'this' so they are safe to use with NULL + char* ptr() { + return reinterpret_cast<char*>(this); + } + const char* ptr() const { + return reinterpret_cast<const char*>(this); + } + const ValueElement* plusBytes(size_t bytes) const { + return reinterpret_cast<const ValueElement*>(ptr() + bytes); + } + ValueElement* plusBytes(size_t bytes) { + return reinterpret_cast<ValueElement*>(ptr() + bytes); + } + + // Round number or pointer up to N-byte boundary. No change if already aligned. + template <typename T> + static T align(T size) { + const intmax_t ALIGNMENT = 8; // must be power of 2 and <= 16 (malloc alignment) + // Can't use c++ cast because of conversion between intmax_t and both ints and pointers + return (T)(((intmax_t)(size) + (ALIGNMENT - 1)) & ~(ALIGNMENT - 1)); + } + +private: + ValueElement(); // this class should never be constructed + ~ValueElement(); // or destructed +}; +// Real size is sizeof(ValueElement) + nameLen #pragma pack() - BOOST_STATIC_ASSERT(sizeof(ValueElement) == (sizeof(Value) + - sizeof(Position) + - sizeof(int) + - 1)); - - // This is an internal class for Document. See FieldIterator for the public version. - class DocumentStorageIterator { - public: - // DocumentStorage::iterator() and iteratorAll() are easier to use - DocumentStorageIterator(const ValueElement* first, - const ValueElement* end, - bool includeMissing) - : _first(first) - , _it(first) - , _end(end) - , _includeMissing(includeMissing) { - if (!_includeMissing) - skipMissing(); - } - - bool atEnd() const { return _it == _end; } - - const ValueElement& get() const { return *_it; } - - Position position() const { return Position(_it->ptr() - _first->ptr()); } - - void advance() { +BOOST_STATIC_ASSERT(sizeof(ValueElement) == (sizeof(Value) + sizeof(Position) + sizeof(int) + 1)); + +// This is an internal class for Document. See FieldIterator for the public version. +class DocumentStorageIterator { +public: + // DocumentStorage::iterator() and iteratorAll() are easier to use + DocumentStorageIterator(const ValueElement* first, const ValueElement* end, bool includeMissing) + : _first(first), _it(first), _end(end), _includeMissing(includeMissing) { + if (!_includeMissing) + skipMissing(); + } + + bool atEnd() const { + return _it == _end; + } + + const ValueElement& get() const { + return *_it; + } + + Position position() const { + return Position(_it->ptr() - _first->ptr()); + } + + void advance() { + advanceOne(); + if (!_includeMissing) + skipMissing(); + } + + const ValueElement* operator->() { + return _it; + } + const ValueElement& operator*() { + return *_it; + } + +private: + void advanceOne() { + _it = _it->next(); + } + + void skipMissing() { + while (!atEnd() && _it->val.missing()) { advanceOne(); - if (!_includeMissing) - skipMissing(); - } - - const ValueElement* operator-> () { return _it; } - const ValueElement& operator* () { return *_it; } - - private: - void advanceOne() { - _it = _it->next(); - } - - void skipMissing() { - while (!atEnd() && _it->val.missing()) { - advanceOne(); - } - } - - const ValueElement* _first; - const ValueElement* _it; - const ValueElement* _end; - bool _includeMissing; - }; - - /// Storage class used by both Document and MutableDocument - class DocumentStorage : public RefCountable { - public: - // Note: default constructor should zero-init to support emptyDoc() - DocumentStorage() : _buffer(NULL) - , _bufferEnd(NULL) - , _usedBytes(0) - , _numFields(0) - , _hashTabMask(0) - , _hasTextScore(false) - , _textScore(0) - {} - ~DocumentStorage(); - - static const DocumentStorage& emptyDoc() { - static const char emptyBytes[sizeof(DocumentStorage)] = {0}; - return *reinterpret_cast<const DocumentStorage*>(emptyBytes); - } - - size_t size() const { - // can't use _numFields because it includes removed Fields - size_t count = 0; - for (DocumentStorageIterator it = iterator(); !it.atEnd(); it.advance()) - count++; - return count; - } - - /// Returns the position of the next field to be inserted - Position getNextPosition() const { return Position(_usedBytes); } - - /// Returns the position of the named field (may be missing) or Position() - Position findField(StringData name) const; - - // Document uses these - const ValueElement& getField(Position pos) const { - verify(pos.found()); - return *(_firstElement->plusBytes(pos.index)); - } - Value getField(StringData name) const { - Position pos = findField(name); - if (!pos.found()) - return Value(); - return getField(pos).val; - } - - // MutableDocument uses these - ValueElement& getField(Position pos) { - verify(pos.found()); - return *(_firstElement->plusBytes(pos.index)); - } - Value& getField(StringData name) { - Position pos = findField(name); - if (!pos.found()) - return appendField(name); // TODO: find a way to avoid hashing name twice - return getField(pos).val; - } - - /// Adds a new field with missing Value at the end of the document - Value& appendField(StringData name); - - /** Preallocates space for fields. Use this to attempt to prevent buffer growth. - * This is only valid to call before anything is added to the document. - */ - void reserveFields(size_t expectedFields); - - /// This skips missing values - DocumentStorageIterator iterator() const { - return DocumentStorageIterator(_firstElement, end(), false); } + } + + const ValueElement* _first; + const ValueElement* _it; + const ValueElement* _end; + bool _includeMissing; +}; + +/// Storage class used by both Document and MutableDocument +class DocumentStorage : public RefCountable { +public: + // Note: default constructor should zero-init to support emptyDoc() + DocumentStorage() + : _buffer(NULL), + _bufferEnd(NULL), + _usedBytes(0), + _numFields(0), + _hashTabMask(0), + _hasTextScore(false), + _textScore(0) {} + ~DocumentStorage(); + + static const DocumentStorage& emptyDoc() { + static const char emptyBytes[sizeof(DocumentStorage)] = {0}; + return *reinterpret_cast<const DocumentStorage*>(emptyBytes); + } + + size_t size() const { + // can't use _numFields because it includes removed Fields + size_t count = 0; + for (DocumentStorageIterator it = iterator(); !it.atEnd(); it.advance()) + count++; + return count; + } + + /// Returns the position of the next field to be inserted + Position getNextPosition() const { + return Position(_usedBytes); + } + + /// Returns the position of the named field (may be missing) or Position() + Position findField(StringData name) const; + + // Document uses these + const ValueElement& getField(Position pos) const { + verify(pos.found()); + return *(_firstElement->plusBytes(pos.index)); + } + Value getField(StringData name) const { + Position pos = findField(name); + if (!pos.found()) + return Value(); + return getField(pos).val; + } + + // MutableDocument uses these + ValueElement& getField(Position pos) { + verify(pos.found()); + return *(_firstElement->plusBytes(pos.index)); + } + Value& getField(StringData name) { + Position pos = findField(name); + if (!pos.found()) + return appendField(name); // TODO: find a way to avoid hashing name twice + return getField(pos).val; + } + + /// Adds a new field with missing Value at the end of the document + Value& appendField(StringData name); + + /** Preallocates space for fields. Use this to attempt to prevent buffer growth. + * This is only valid to call before anything is added to the document. + */ + void reserveFields(size_t expectedFields); - /// This includes missing values - DocumentStorageIterator iteratorAll() const { - return DocumentStorageIterator(_firstElement, end(), true); - } + /// This skips missing values + DocumentStorageIterator iterator() const { + return DocumentStorageIterator(_firstElement, end(), false); + } - /// Shallow copy of this. Caller owns memory. - boost::intrusive_ptr<DocumentStorage> clone() const; + /// This includes missing values + DocumentStorageIterator iteratorAll() const { + return DocumentStorageIterator(_firstElement, end(), true); + } - size_t allocatedBytes() const { - return !_buffer ? 0 : (_bufferEnd - _buffer + hashTabBytes()); - } + /// Shallow copy of this. Caller owns memory. + boost::intrusive_ptr<DocumentStorage> clone() const; - /** - * Copies all metadata from source if it has any. - * Note: does not clear metadata from this. - */ - void copyMetaDataFrom(const DocumentStorage& source) { - if (source.hasTextScore()) { - setTextScore(source.getTextScore()); - } - } + size_t allocatedBytes() const { + return !_buffer ? 0 : (_bufferEnd - _buffer + hashTabBytes()); + } - bool hasTextScore() const { return _hasTextScore; } - double getTextScore() const { return _textScore; } - void setTextScore(double score) { - _hasTextScore = true; - _textScore = score; + /** + * Copies all metadata from source if it has any. + * Note: does not clear metadata from this. + */ + void copyMetaDataFrom(const DocumentStorage& source) { + if (source.hasTextScore()) { + setTextScore(source.getTextScore()); } + } + + bool hasTextScore() const { + return _hasTextScore; + } + double getTextScore() const { + return _textScore; + } + void setTextScore(double score) { + _hasTextScore = true; + _textScore = score; + } + +private: + /// Same as lastElement->next() or firstElement() if empty. + const ValueElement* end() const { + return _firstElement->plusBytes(_usedBytes); + } + + /// Allocates space in _buffer. Copies existing data if there is any. + void alloc(unsigned newSize); + + /// Call after adding field to _buffer and increasing _numFields + void addFieldToHashTable(Position pos); + + // assumes _hashTabMask is (power of two) - 1 + unsigned hashTabBuckets() const { + return _hashTabMask + 1; + } + unsigned hashTabBytes() const { + return hashTabBuckets() * sizeof(Position); + } + + /// rehash on buffer growth if load-factor > .5 (attempt to keep lf < 1 when full) + bool needRehash() const { + return _numFields * 2 > hashTabBuckets(); + } + + /// Initialize empty hash table + void hashTabInit() { + memset(_hashTab, -1, hashTabBytes()); + } + + static unsigned hashKey(StringData name) { + // TODO consider FNV-1a once we have a better benchmark corpus + unsigned out; + MurmurHash3_x86_32(name.rawData(), name.size(), 0, &out); + return out; + } + + unsigned bucketForKey(StringData name) const { + return hashKey(name) & _hashTabMask; + } + + /// Adds all fields to the hash table + void rehash() { + hashTabInit(); + for (DocumentStorageIterator it = iteratorAll(); !it.atEnd(); it.advance()) + addFieldToHashTable(it.position()); + } + + enum { + HASH_TAB_INIT_SIZE = 8, // must be power of 2 + HASH_TAB_MIN = 4, // don't hash fields for docs smaller than this + // set to 1 to always hash + }; - private: - - /// Same as lastElement->next() or firstElement() if empty. - const ValueElement* end() const { return _firstElement->plusBytes(_usedBytes); } - - /// Allocates space in _buffer. Copies existing data if there is any. - void alloc(unsigned newSize); - - /// Call after adding field to _buffer and increasing _numFields - void addFieldToHashTable(Position pos); - - // assumes _hashTabMask is (power of two) - 1 - unsigned hashTabBuckets() const { return _hashTabMask + 1; } - unsigned hashTabBytes() const { return hashTabBuckets() * sizeof(Position); } - - /// rehash on buffer growth if load-factor > .5 (attempt to keep lf < 1 when full) - bool needRehash() const { return _numFields*2 > hashTabBuckets(); } - - /// Initialize empty hash table - void hashTabInit() { memset(_hashTab, -1, hashTabBytes()); } - - static unsigned hashKey(StringData name) { - // TODO consider FNV-1a once we have a better benchmark corpus - unsigned out; - MurmurHash3_x86_32(name.rawData(), name.size(), 0, &out); - return out; - } + // _buffer layout: + // ------------------------------------------------------------------------------- + // | ValueElement1 Name1 | ValueElement2 Name2 | ... FREE SPACE ... | Hash Table | + // ------------------------------------------------------------------------------- + // ^ _buffer and _firstElement point here ^ + // _bufferEnd and _hashTab point here ^ + // + // + // When the buffer grows, the hash table moves to the new end. + union { + char* _buffer; + ValueElement* _firstElement; + }; - unsigned bucketForKey(StringData name) const { - return hashKey(name) & _hashTabMask; - } + union { + // pointer to "end" of _buffer element space and start of hash table (same position) + char* _bufferEnd; + Position* _hashTab; // table lazily initialized once _numFields == HASH_TAB_MIN + }; - /// Adds all fields to the hash table - void rehash() { - hashTabInit(); - for (DocumentStorageIterator it = iteratorAll(); !it.atEnd(); it.advance()) - addFieldToHashTable(it.position()); - } + unsigned _usedBytes; // position where next field would start + unsigned _numFields; // this includes removed fields + unsigned _hashTabMask; // equal to hashTabBuckets()-1 but used more often - enum { - HASH_TAB_INIT_SIZE = 8, // must be power of 2 - HASH_TAB_MIN = 4, // don't hash fields for docs smaller than this - // set to 1 to always hash - }; - - // _buffer layout: - // ------------------------------------------------------------------------------- - // | ValueElement1 Name1 | ValueElement2 Name2 | ... FREE SPACE ... | Hash Table | - // ------------------------------------------------------------------------------- - // ^ _buffer and _firstElement point here ^ - // _bufferEnd and _hashTab point here ^ - // - // - // When the buffer grows, the hash table moves to the new end. - union { - char* _buffer; - ValueElement* _firstElement; - }; - - union { - // pointer to "end" of _buffer element space and start of hash table (same position) - char* _bufferEnd; - Position* _hashTab; // table lazily initialized once _numFields == HASH_TAB_MIN - }; - - unsigned _usedBytes; // position where next field would start - unsigned _numFields; // this includes removed fields - unsigned _hashTabMask; // equal to hashTabBuckets()-1 but used more often - - bool _hasTextScore; // When adding more metadata fields, this should become a bitvector - double _textScore; - // When adding a field, make sure to update clone() method - }; + bool _hasTextScore; // When adding more metadata fields, this should become a bitvector + double _textScore; + // When adding a field, make sure to update clone() method +}; } diff --git a/src/mongo/db/pipeline/document_source.cpp b/src/mongo/db/pipeline/document_source.cpp index baa3e486784..57d12a7c85c 100644 --- a/src/mongo/db/pipeline/document_source.cpp +++ b/src/mongo/db/pipeline/document_source.cpp @@ -34,42 +34,40 @@ namespace mongo { - using boost::intrusive_ptr; - using std::vector; +using boost::intrusive_ptr; +using std::vector; - DocumentSource::DocumentSource(const intrusive_ptr<ExpressionContext> &pCtx) - : pSource(NULL) - , pExpCtx(pCtx) - {} +DocumentSource::DocumentSource(const intrusive_ptr<ExpressionContext>& pCtx) + : pSource(NULL), pExpCtx(pCtx) {} - const char *DocumentSource::getSourceName() const { - static const char unknown[] = "[UNKNOWN]"; - return unknown; - } +const char* DocumentSource::getSourceName() const { + static const char unknown[] = "[UNKNOWN]"; + return unknown; +} - void DocumentSource::setSource(DocumentSource *pTheSource) { - verify(!pSource); - pSource = pTheSource; - } +void DocumentSource::setSource(DocumentSource* pTheSource) { + verify(!pSource); + pSource = pTheSource; +} - bool DocumentSource::coalesce(const intrusive_ptr<DocumentSource> &pNextSource) { - return false; - } +bool DocumentSource::coalesce(const intrusive_ptr<DocumentSource>& pNextSource) { + return false; +} - intrusive_ptr<DocumentSource> DocumentSource::optimize() { - return this; - } +intrusive_ptr<DocumentSource> DocumentSource::optimize() { + return this; +} - void DocumentSource::dispose() { - if ( pSource ) { - pSource->dispose(); - } +void DocumentSource::dispose() { + if (pSource) { + pSource->dispose(); } +} - void DocumentSource::serializeToArray(vector<Value>& array, bool explain) const { - Value entry = serialize(explain); - if (!entry.missing()) { - array.push_back(entry); - } +void DocumentSource::serializeToArray(vector<Value>& array, bool explain) const { + Value entry = serialize(explain); + if (!entry.missing()) { + array.push_back(entry); } } +} diff --git a/src/mongo/db/pipeline/document_source.h b/src/mongo/db/pipeline/document_source.h index 51f5ae7c3b3..4902f8b4a40 100644 --- a/src/mongo/db/pipeline/document_source.h +++ b/src/mongo/db/pipeline/document_source.h @@ -51,1062 +51,1082 @@ namespace mongo { - class Accumulator; - class Document; - class Expression; - class ExpressionFieldPath; - class ExpressionObject; - class DocumentSourceLimit; - class PlanExecutor; - - class DocumentSource : public IntrusiveCounterUnsigned { - public: - virtual ~DocumentSource() {} - - /** Returns the next Document if there is one or boost::none if at EOF. - * Subclasses must call pExpCtx->checkForInterupt(). - */ - virtual boost::optional<Document> getNext() = 0; - - /** - * Inform the source that it is no longer needed and may release its resources. After - * dispose() is called the source must still be able to handle iteration requests, but may - * become eof(). - * NOTE: For proper mutex yielding, dispose() must be called on any DocumentSource that will - * not be advanced until eof(), see SERVER-6123. - */ - virtual void dispose(); - - /** - Get the source's name. +class Accumulator; +class Document; +class Expression; +class ExpressionFieldPath; +class ExpressionObject; +class DocumentSourceLimit; +class PlanExecutor; + +class DocumentSource : public IntrusiveCounterUnsigned { +public: + virtual ~DocumentSource() {} + + /** Returns the next Document if there is one or boost::none if at EOF. + * Subclasses must call pExpCtx->checkForInterupt(). + */ + virtual boost::optional<Document> getNext() = 0; - @returns the std::string name of the source as a constant string; - this is static, and there's no need to worry about adopting it - */ - virtual const char *getSourceName() const; + /** + * Inform the source that it is no longer needed and may release its resources. After + * dispose() is called the source must still be able to handle iteration requests, but may + * become eof(). + * NOTE: For proper mutex yielding, dispose() must be called on any DocumentSource that will + * not be advanced until eof(), see SERVER-6123. + */ + virtual void dispose(); - /** - Set the underlying source this source should use to get Documents - from. + /** + Get the source's name. - It is an error to set the source more than once. This is to - prevent changing sources once the original source has been started; - this could break the state maintained by the DocumentSource. + @returns the std::string name of the source as a constant string; + this is static, and there's no need to worry about adopting it + */ + virtual const char* getSourceName() const; - This pointer is not reference counted because that has led to - some circular references. As a result, this doesn't keep - sources alive, and is only intended to be used temporarily for - the lifetime of a Pipeline::run(). + /** + Set the underlying source this source should use to get Documents + from. - @param pSource the underlying source to use - */ - virtual void setSource(DocumentSource *pSource); + It is an error to set the source more than once. This is to + prevent changing sources once the original source has been started; + this could break the state maintained by the DocumentSource. - /** - Attempt to coalesce this DocumentSource with its successor in the - document processing pipeline. If successful, the successor - DocumentSource should be removed from the pipeline and discarded. + This pointer is not reference counted because that has led to + some circular references. As a result, this doesn't keep + sources alive, and is only intended to be used temporarily for + the lifetime of a Pipeline::run(). - If successful, this operation can be applied repeatedly, in an - attempt to coalesce several sources together. + @param pSource the underlying source to use + */ + virtual void setSource(DocumentSource* pSource); - The default implementation is to do nothing, and return false. + /** + Attempt to coalesce this DocumentSource with its successor in the + document processing pipeline. If successful, the successor + DocumentSource should be removed from the pipeline and discarded. - @param pNextSource the next source in the document processing chain. - @returns whether or not the attempt to coalesce was successful or not; - if the attempt was not successful, nothing has been changed - */ - virtual bool coalesce(const boost::intrusive_ptr<DocumentSource> &pNextSource); + If successful, this operation can be applied repeatedly, in an + attempt to coalesce several sources together. - /** - * Returns an optimized DocumentSource that is semantically equivalent to this one, or - * nullptr if this stage is a no-op. Implementations are allowed to modify themselves - * in-place and return a pointer to themselves. For best results, first coalesce compatible - * sources using coalesce(). - * - * This is intended for any operations that include expressions, and provides a hook for - * those to optimize those operations. - * - * The default implementation is to do nothing and return yourself. - */ - virtual boost::intrusive_ptr<DocumentSource> optimize(); + The default implementation is to do nothing, and return false. - enum GetDepsReturn { - NOT_SUPPORTED = 0x0, // The full object and all metadata may be required - SEE_NEXT = 0x1, // Later stages could need either fields or metadata - EXHAUSTIVE_FIELDS = 0x2, // Later stages won't need more fields from input - EXHAUSTIVE_META = 0x4, // Later stages won't need more metadata from input - EXHAUSTIVE_ALL = EXHAUSTIVE_FIELDS | EXHAUSTIVE_META, // Later stages won't need either - }; + @param pNextSource the next source in the document processing chain. + @returns whether or not the attempt to coalesce was successful or not; + if the attempt was not successful, nothing has been changed + */ + virtual bool coalesce(const boost::intrusive_ptr<DocumentSource>& pNextSource); - /** - * Get the dependencies this operation needs to do its job. - */ - virtual GetDepsReturn getDependencies(DepsTracker* deps) const { - return NOT_SUPPORTED; - } + /** + * Returns an optimized DocumentSource that is semantically equivalent to this one, or + * nullptr if this stage is a no-op. Implementations are allowed to modify themselves + * in-place and return a pointer to themselves. For best results, first coalesce compatible + * sources using coalesce(). + * + * This is intended for any operations that include expressions, and provides a hook for + * those to optimize those operations. + * + * The default implementation is to do nothing and return yourself. + */ + virtual boost::intrusive_ptr<DocumentSource> optimize(); + + enum GetDepsReturn { + NOT_SUPPORTED = 0x0, // The full object and all metadata may be required + SEE_NEXT = 0x1, // Later stages could need either fields or metadata + EXHAUSTIVE_FIELDS = 0x2, // Later stages won't need more fields from input + EXHAUSTIVE_META = 0x4, // Later stages won't need more metadata from input + EXHAUSTIVE_ALL = EXHAUSTIVE_FIELDS | EXHAUSTIVE_META, // Later stages won't need either + }; - /** - * In the default case, serializes the DocumentSource and adds it to the std::vector<Value>. - * - * A subclass may choose to overwrite this, rather than serialize, - * if it should output multiple stages (eg, $sort sometimes also outputs a $limit). - */ + /** + * Get the dependencies this operation needs to do its job. + */ + virtual GetDepsReturn getDependencies(DepsTracker* deps) const { + return NOT_SUPPORTED; + } - virtual void serializeToArray(std::vector<Value>& array, bool explain = false) const; + /** + * In the default case, serializes the DocumentSource and adds it to the std::vector<Value>. + * + * A subclass may choose to overwrite this, rather than serialize, + * if it should output multiple stages (eg, $sort sometimes also outputs a $limit). + */ - /// Returns true if doesn't require an input source (most DocumentSources do). - virtual bool isValidInitialSource() const { return false; } + virtual void serializeToArray(std::vector<Value>& array, bool explain = false) const; - protected: - /** - Base constructor. - */ - DocumentSource(const boost::intrusive_ptr<ExpressionContext> &pExpCtx); + /// Returns true if doesn't require an input source (most DocumentSources do). + virtual bool isValidInitialSource() const { + return false; + } - /* - Most DocumentSources have an underlying source they get their data - from. This is a convenience for them. +protected: + /** + Base constructor. + */ + DocumentSource(const boost::intrusive_ptr<ExpressionContext>& pExpCtx); - The default implementation of setSource() sets this; if you don't - need a source, override that to verify(). The default is to - verify() if this has already been set. - */ - DocumentSource *pSource; + /* + Most DocumentSources have an underlying source they get their data + from. This is a convenience for them. - boost::intrusive_ptr<ExpressionContext> pExpCtx; + The default implementation of setSource() sets this; if you don't + need a source, override that to verify(). The default is to + verify() if this has already been set. + */ + DocumentSource* pSource; - private: - /** - * Create a Value that represents the document source. - * - * This is used by the default implementation of serializeToArray() to add this object - * to a pipeline being serialized. Returning a missing() Value results in no entry - * being added to the array for this stage (DocumentSource). - */ - virtual Value serialize(bool explain = false) const = 0; - }; + boost::intrusive_ptr<ExpressionContext> pExpCtx; - /** This class marks DocumentSources that should be split between the merger and the shards. - * See Pipeline::Optimizations::Sharded::findSplitPoint() for details. +private: + /** + * Create a Value that represents the document source. + * + * This is used by the default implementation of serializeToArray() to add this object + * to a pipeline being serialized. Returning a missing() Value results in no entry + * being added to the array for this stage (DocumentSource). */ - class SplittableDocumentSource { - public: - /** returns a source to be run on the shards. - * if NULL, don't run on shards - */ - virtual boost::intrusive_ptr<DocumentSource> getShardSource() = 0; - - /** returns a source that combines results from shards. - * if NULL, don't run on merger - */ - virtual boost::intrusive_ptr<DocumentSource> getMergeSource() = 0; - protected: - // It is invalid to delete through a SplittableDocumentSource-typed pointer. - virtual ~SplittableDocumentSource() {} - }; + virtual Value serialize(bool explain = false) const = 0; +}; - - /** This class marks DocumentSources which need mongod-specific functionality. - * It causes a MongodInterface to be injected when in a mongod and prevents mongos from - * merging pipelines containing this stage. +/** This class marks DocumentSources that should be split between the merger and the shards. + * See Pipeline::Optimizations::Sharded::findSplitPoint() for details. + */ +class SplittableDocumentSource { +public: + /** returns a source to be run on the shards. + * if NULL, don't run on shards */ - class DocumentSourceNeedsMongod { - public: - // Wraps mongod-specific functions to allow linking into mongos. - class MongodInterface { - public: - virtual ~MongodInterface() {}; - - /** - * Always returns a DBDirectClient. - * Callers must not cache the returned pointer outside the scope of a single function. - */ - virtual DBClientBase* directClient() = 0; - - // Note that in some rare cases this could return a false negative but will never return - // a false positive. This method will be fixed in the future once it becomes possible to - // avoid false negatives. - virtual bool isSharded(const NamespaceString& ns) = 0; - - virtual bool isCapped(const NamespaceString& ns) = 0; - - /** - * Inserts 'objs' into 'ns' and returns the "detailed" last error object. - */ - virtual BSONObj insert(const NamespaceString& ns, const std::vector<BSONObj>& objs) = 0; - - // Add new methods as needed. - }; - - void injectMongodInterface(std::shared_ptr<MongodInterface> mongod) { - _mongod = mongod; - } + virtual boost::intrusive_ptr<DocumentSource> getShardSource() = 0; - protected: - // It is invalid to delete through a DocumentSourceNeedsMongod-typed pointer. - virtual ~DocumentSourceNeedsMongod() {} + /** returns a source that combines results from shards. + * if NULL, don't run on merger + */ + virtual boost::intrusive_ptr<DocumentSource> getMergeSource() = 0; - // Gives subclasses access to a MongodInterface implementation - std::shared_ptr<MongodInterface> _mongod; - }; +protected: + // It is invalid to delete through a SplittableDocumentSource-typed pointer. + virtual ~SplittableDocumentSource() {} +}; - class DocumentSourceBsonArray : - public DocumentSource { +/** This class marks DocumentSources which need mongod-specific functionality. + * It causes a MongodInterface to be injected when in a mongod and prevents mongos from + * merging pipelines containing this stage. + */ +class DocumentSourceNeedsMongod { +public: + // Wraps mongod-specific functions to allow linking into mongos. + class MongodInterface { public: - // virtuals from DocumentSource - virtual boost::optional<Document> getNext(); - virtual Value serialize(bool explain = false) const; - virtual void setSource(DocumentSource *pSource); - virtual bool isValidInitialSource() const { return true; } + virtual ~MongodInterface(){}; /** - Create a document source based on a BSON array. - - This is usually put at the beginning of a chain of document sources - in order to fetch data from the database. - - CAUTION: the BSON is not read until the source is used. Any - elements that appear after these documents must not be read until - this source is exhausted. - - @param array the BSON array to treat as a document source - @param pExpCtx the expression context for the pipeline - @returns the newly created document source - */ - static boost::intrusive_ptr<DocumentSourceBsonArray> create( - const BSONObj& array, - const boost::intrusive_ptr<ExpressionContext> &pExpCtx); - - private: - DocumentSourceBsonArray( - const BSONObj& embeddedArray, - const boost::intrusive_ptr<ExpressionContext> &pExpCtx); + * Always returns a DBDirectClient. + * Callers must not cache the returned pointer outside the scope of a single function. + */ + virtual DBClientBase* directClient() = 0; - BSONObj embeddedObject; - BSONObjIterator arrayIterator; - }; + // Note that in some rare cases this could return a false negative but will never return + // a false positive. This method will be fixed in the future once it becomes possible to + // avoid false negatives. + virtual bool isSharded(const NamespaceString& ns) = 0; + virtual bool isCapped(const NamespaceString& ns) = 0; - class DocumentSourceCommandShards : - public DocumentSource { - public: - // virtuals from DocumentSource - virtual boost::optional<Document> getNext(); - virtual Value serialize(bool explain = false) const; - virtual void setSource(DocumentSource *pSource); - virtual bool isValidInitialSource() const { return true; } - - /* convenient shorthand for a commonly used type */ - typedef std::vector<Strategy::CommandResult> ShardOutput; - - /** Returns the result arrays from shards using the 2.4 protocol. - * Call this instead of getNext() if you want access to the raw streams. - * This method should only be called at most once. + /** + * Inserts 'objs' into 'ns' and returns the "detailed" last error object. */ - std::vector<BSONArray> getArrays(); + virtual BSONObj insert(const NamespaceString& ns, const std::vector<BSONObj>& objs) = 0; - /** - Create a DocumentSource that wraps the output of many shards + // Add new methods as needed. + }; - @param shardOutput output from the individual shards - @param pExpCtx the expression context for the pipeline - @returns the newly created DocumentSource - */ - static boost::intrusive_ptr<DocumentSourceCommandShards> create( - const ShardOutput& shardOutput, - const boost::intrusive_ptr<ExpressionContext>& pExpCtx); + void injectMongodInterface(std::shared_ptr<MongodInterface> mongod) { + _mongod = mongod; + } - private: - DocumentSourceCommandShards(const ShardOutput& shardOutput, - const boost::intrusive_ptr<ExpressionContext>& pExpCtx); +protected: + // It is invalid to delete through a DocumentSourceNeedsMongod-typed pointer. + virtual ~DocumentSourceNeedsMongod() {} - /** - Advance to the next document, setting pCurrent appropriately. + // Gives subclasses access to a MongodInterface implementation + std::shared_ptr<MongodInterface> _mongod; +}; - Adjusts pCurrent, pBsonSource, and iterator, as needed. On exit, - pCurrent is the Document to return, or NULL. If NULL, this - indicates there is nothing more to return. - */ - void getNextDocument(); - - bool unstarted; - bool hasCurrent; - bool newSource; // set to true for the first item of a new source - boost::intrusive_ptr<DocumentSourceBsonArray> pBsonSource; - Document pCurrent; - ShardOutput::const_iterator iterator; - ShardOutput::const_iterator listEnd; - }; +class DocumentSourceBsonArray : public DocumentSource { +public: + // virtuals from DocumentSource + virtual boost::optional<Document> getNext(); + virtual Value serialize(bool explain = false) const; + virtual void setSource(DocumentSource* pSource); + virtual bool isValidInitialSource() const { + return true; + } /** - * Constructs and returns Documents from the BSONObj objects produced by a supplied - * PlanExecutor. - * - * An object of this type may only be used by one thread, see SERVER-6123. + Create a document source based on a BSON array. + + This is usually put at the beginning of a chain of document sources + in order to fetch data from the database. + + CAUTION: the BSON is not read until the source is used. Any + elements that appear after these documents must not be read until + this source is exhausted. + + @param array the BSON array to treat as a document source + @param pExpCtx the expression context for the pipeline + @returns the newly created document source + */ + static boost::intrusive_ptr<DocumentSourceBsonArray> create( + const BSONObj& array, const boost::intrusive_ptr<ExpressionContext>& pExpCtx); + +private: + DocumentSourceBsonArray(const BSONObj& embeddedArray, + const boost::intrusive_ptr<ExpressionContext>& pExpCtx); + + BSONObj embeddedObject; + BSONObjIterator arrayIterator; +}; + + +class DocumentSourceCommandShards : public DocumentSource { +public: + // virtuals from DocumentSource + virtual boost::optional<Document> getNext(); + virtual Value serialize(bool explain = false) const; + virtual void setSource(DocumentSource* pSource); + virtual bool isValidInitialSource() const { + return true; + } + + /* convenient shorthand for a commonly used type */ + typedef std::vector<Strategy::CommandResult> ShardOutput; + + /** Returns the result arrays from shards using the 2.4 protocol. + * Call this instead of getNext() if you want access to the raw streams. + * This method should only be called at most once. */ - class DocumentSourceCursor : - public DocumentSource { - public: - // virtuals from DocumentSource - virtual ~DocumentSourceCursor(); - virtual boost::optional<Document> getNext(); - virtual const char *getSourceName() const; - virtual Value serialize(bool explain = false) const; - virtual void setSource(DocumentSource *pSource); - virtual bool coalesce(const boost::intrusive_ptr<DocumentSource>& nextSource); - virtual bool isValidInitialSource() const { return true; } - virtual void dispose(); + std::vector<BSONArray> getArrays(); - /** - * Create a document source based on a passed-in PlanExecutor. - * - * This is usually put at the beginning of a chain of document sources - * in order to fetch data from the database. - */ - static boost::intrusive_ptr<DocumentSourceCursor> create( - const std::string& ns, - const std::shared_ptr<PlanExecutor>& exec, - const boost::intrusive_ptr<ExpressionContext> &pExpCtx); + /** + Create a DocumentSource that wraps the output of many shards - /* - Record the query that was specified for the cursor this wraps, if - any. + @param shardOutput output from the individual shards + @param pExpCtx the expression context for the pipeline + @returns the newly created DocumentSource + */ + static boost::intrusive_ptr<DocumentSourceCommandShards> create( + const ShardOutput& shardOutput, const boost::intrusive_ptr<ExpressionContext>& pExpCtx); - This should be captured after any optimizations are applied to - the pipeline so that it reflects what is really used. +private: + DocumentSourceCommandShards(const ShardOutput& shardOutput, + const boost::intrusive_ptr<ExpressionContext>& pExpCtx); - This gets used for explain output. + /** + Advance to the next document, setting pCurrent appropriately. - @param pBsonObj the query to record - */ - void setQuery(const BSONObj& query) { _query = query; } + Adjusts pCurrent, pBsonSource, and iterator, as needed. On exit, + pCurrent is the Document to return, or NULL. If NULL, this + indicates there is nothing more to return. + */ + void getNextDocument(); - /* - Record the sort that was specified for the cursor this wraps, if - any. + bool unstarted; + bool hasCurrent; + bool newSource; // set to true for the first item of a new source + boost::intrusive_ptr<DocumentSourceBsonArray> pBsonSource; + Document pCurrent; + ShardOutput::const_iterator iterator; + ShardOutput::const_iterator listEnd; +}; - This should be captured after any optimizations are applied to - the pipeline so that it reflects what is really used. - This gets used for explain output. +/** + * Constructs and returns Documents from the BSONObj objects produced by a supplied + * PlanExecutor. + * + * An object of this type may only be used by one thread, see SERVER-6123. + */ +class DocumentSourceCursor : public DocumentSource { +public: + // virtuals from DocumentSource + virtual ~DocumentSourceCursor(); + virtual boost::optional<Document> getNext(); + virtual const char* getSourceName() const; + virtual Value serialize(bool explain = false) const; + virtual void setSource(DocumentSource* pSource); + virtual bool coalesce(const boost::intrusive_ptr<DocumentSource>& nextSource); + virtual bool isValidInitialSource() const { + return true; + } + virtual void dispose(); - @param pBsonObj the sort to record - */ - void setSort(const BSONObj& sort) { _sort = sort; } + /** + * Create a document source based on a passed-in PlanExecutor. + * + * This is usually put at the beginning of a chain of document sources + * in order to fetch data from the database. + */ + static boost::intrusive_ptr<DocumentSourceCursor> create( + const std::string& ns, + const std::shared_ptr<PlanExecutor>& exec, + const boost::intrusive_ptr<ExpressionContext>& pExpCtx); - /** - * Informs this object of projection and dependency information. - * - * @param projection A projection specification describing the fields needed by the rest of - * the pipeline. - * @param deps The output of DepsTracker::toParsedDeps - */ - void setProjection(const BSONObj& projection, const boost::optional<ParsedDeps>& deps); + /* + Record the query that was specified for the cursor this wraps, if + any. - /// returns -1 for no limit - long long getLimit() const; + This should be captured after any optimizations are applied to + the pipeline so that it reflects what is really used. - private: - DocumentSourceCursor( - const std::string& ns, - const std::shared_ptr<PlanExecutor>& exec, - const boost::intrusive_ptr<ExpressionContext> &pExpCtx); + This gets used for explain output. - void loadBatch(); + @param pBsonObj the query to record + */ + void setQuery(const BSONObj& query) { + _query = query; + } - std::deque<Document> _currentBatch; + /* + Record the sort that was specified for the cursor this wraps, if + any. - // BSONObj members must outlive _projection and cursor. - BSONObj _query; - BSONObj _sort; - BSONObj _projection; - boost::optional<ParsedDeps> _dependencies; - boost::intrusive_ptr<DocumentSourceLimit> _limit; - long long _docsAddedToBatches; // for _limit enforcement + This should be captured after any optimizations are applied to + the pipeline so that it reflects what is really used. - const std::string _ns; - std::shared_ptr<PlanExecutor> _exec; // PipelineProxyStage holds a weak_ptr to this. - }; + This gets used for explain output. + @param pBsonObj the sort to record + */ + void setSort(const BSONObj& sort) { + _sort = sort; + } - class DocumentSourceGroup : public DocumentSource - , public SplittableDocumentSource { - public: - // virtuals from DocumentSource - virtual boost::optional<Document> getNext(); - virtual const char *getSourceName() const; - virtual boost::intrusive_ptr<DocumentSource> optimize(); - virtual GetDepsReturn getDependencies(DepsTracker* deps) const; - virtual void dispose(); - virtual Value serialize(bool explain = false) const; + /** + * Informs this object of projection and dependency information. + * + * @param projection A projection specification describing the fields needed by the rest of + * the pipeline. + * @param deps The output of DepsTracker::toParsedDeps + */ + void setProjection(const BSONObj& projection, const boost::optional<ParsedDeps>& deps); - static boost::intrusive_ptr<DocumentSourceGroup> create( - const boost::intrusive_ptr<ExpressionContext> &pExpCtx); + /// returns -1 for no limit + long long getLimit() const; - /** - Add an accumulator. +private: + DocumentSourceCursor(const std::string& ns, + const std::shared_ptr<PlanExecutor>& exec, + const boost::intrusive_ptr<ExpressionContext>& pExpCtx); - Accumulators become fields in the Documents that result from - grouping. Each unique group document must have it's own - accumulator; the accumulator factory is used to create that. + void loadBatch(); - @param fieldName the name the accumulator result will have in the - result documents - @param pAccumulatorFactory used to create the accumulator for the - group field - */ - void addAccumulator(const std::string& fieldName, - boost::intrusive_ptr<Accumulator> (*pAccumulatorFactory)(), - const boost::intrusive_ptr<Expression> &pExpression); + std::deque<Document> _currentBatch; - /// Tell this source if it is doing a merge from shards. Defaults to false. - void setDoingMerge(bool doingMerge) { _doingMerge = doingMerge; } + // BSONObj members must outlive _projection and cursor. + BSONObj _query; + BSONObj _sort; + BSONObj _projection; + boost::optional<ParsedDeps> _dependencies; + boost::intrusive_ptr<DocumentSourceLimit> _limit; + long long _docsAddedToBatches; // for _limit enforcement - /** - Create a grouping DocumentSource from BSON. + const std::string _ns; + std::shared_ptr<PlanExecutor> _exec; // PipelineProxyStage holds a weak_ptr to this. +}; - This is a convenience method that uses the above, and operates on - a BSONElement that has been deteremined to be an Object with an - element named $group. - @param pBsonElement the BSONELement that defines the group - @param pExpCtx the expression context - @returns the grouping DocumentSource - */ - static boost::intrusive_ptr<DocumentSource> createFromBson( - BSONElement elem, - const boost::intrusive_ptr<ExpressionContext> &pExpCtx); +class DocumentSourceGroup : public DocumentSource, public SplittableDocumentSource { +public: + // virtuals from DocumentSource + virtual boost::optional<Document> getNext(); + virtual const char* getSourceName() const; + virtual boost::intrusive_ptr<DocumentSource> optimize(); + virtual GetDepsReturn getDependencies(DepsTracker* deps) const; + virtual void dispose(); + virtual Value serialize(bool explain = false) const; - // Virtuals for SplittableDocumentSource - virtual boost::intrusive_ptr<DocumentSource> getShardSource(); - virtual boost::intrusive_ptr<DocumentSource> getMergeSource(); + static boost::intrusive_ptr<DocumentSourceGroup> create( + const boost::intrusive_ptr<ExpressionContext>& pExpCtx); - static const char groupName[]; - - private: - DocumentSourceGroup(const boost::intrusive_ptr<ExpressionContext> &pExpCtx); - - /// Spill groups map to disk and returns an iterator to the file. - std::shared_ptr<Sorter<Value, Value>::Iterator> spill(); - - // Only used by spill. Would be function-local if that were legal in C++03. - class SpillSTLComparator; + /** + Add an accumulator. - /* - Before returning anything, this source must fetch everything from - the underlying source and group it. populate() is used to do that - on the first call to any method on this source. The populated - boolean indicates that this has been done. - */ - void populate(); - bool populated; + Accumulators become fields in the Documents that result from + grouping. Each unique group document must have it's own + accumulator; the accumulator factory is used to create that. - /** - * Parses the raw id expression into _idExpressions and possibly _idFieldNames. - */ - void parseIdExpression(BSONElement groupField, const VariablesParseState& vps); + @param fieldName the name the accumulator result will have in the + result documents + @param pAccumulatorFactory used to create the accumulator for the + group field + */ + void addAccumulator(const std::string& fieldName, + boost::intrusive_ptr<Accumulator>(*pAccumulatorFactory)(), + const boost::intrusive_ptr<Expression>& pExpression); - /** - * Computes the internal representation of the group key. - */ - Value computeId(Variables* vars); + /// Tell this source if it is doing a merge from shards. Defaults to false. + void setDoingMerge(bool doingMerge) { + _doingMerge = doingMerge; + } - /** - * Converts the internal representation of the group key to the _id shape specified by the - * user. - */ - Value expandId(const Value& val); - - - typedef std::vector<boost::intrusive_ptr<Accumulator> > Accumulators; - typedef boost::unordered_map<Value, Accumulators, Value::Hash> GroupsMap; - GroupsMap groups; - - /* - The field names for the result documents and the accumulator - factories for the result documents. The Expressions are the - common expressions used by each instance of each accumulator - in order to find the right-hand side of what gets added to the - accumulator. Note that each of those is the same for each group, - so we can share them across all groups by adding them to the - accumulators after we use the factories to make a new set of - accumulators for each new group. - - These three vectors parallel each other. - */ - std::vector<std::string> vFieldName; - std::vector<boost::intrusive_ptr<Accumulator> (*)()> vpAccumulatorFactory; - std::vector<boost::intrusive_ptr<Expression> > vpExpression; - - - Document makeDocument(const Value& id, const Accumulators& accums, bool mergeableOutput); - - bool _doingMerge; - bool _spilled; - const bool _extSortAllowed; - const int _maxMemoryUsageBytes; - std::unique_ptr<Variables> _variables; - std::vector<std::string> _idFieldNames; // used when id is a document - std::vector<boost::intrusive_ptr<Expression> > _idExpressions; - - // only used when !_spilled - GroupsMap::iterator groupsIterator; - - // only used when _spilled - std::unique_ptr<Sorter<Value, Value>::Iterator> _sorterIterator; - std::pair<Value, Value> _firstPartOfNextGroup; - Value _currentId; - Accumulators _currentAccumulators; - }; + /** + Create a grouping DocumentSource from BSON. + This is a convenience method that uses the above, and operates on + a BSONElement that has been deteremined to be an Object with an + element named $group. - class DocumentSourceMatch : public DocumentSource { - public: - // virtuals from DocumentSource - virtual boost::optional<Document> getNext(); - virtual const char *getSourceName() const; - virtual bool coalesce(const boost::intrusive_ptr<DocumentSource>& nextSource); - virtual Value serialize(bool explain = false) const; - virtual boost::intrusive_ptr<DocumentSource> optimize(); - virtual void setSource(DocumentSource* Source); + @param pBsonElement the BSONELement that defines the group + @param pExpCtx the expression context + @returns the grouping DocumentSource + */ + static boost::intrusive_ptr<DocumentSource> createFromBson( + BSONElement elem, const boost::intrusive_ptr<ExpressionContext>& pExpCtx); - /** - Create a filter. + // Virtuals for SplittableDocumentSource + virtual boost::intrusive_ptr<DocumentSource> getShardSource(); + virtual boost::intrusive_ptr<DocumentSource> getMergeSource(); - @param pBsonElement the raw BSON specification for the filter - @returns the filter - */ - static boost::intrusive_ptr<DocumentSource> createFromBson( - BSONElement elem, - const boost::intrusive_ptr<ExpressionContext> &pCtx); - - /// Returns the query in Matcher syntax. - BSONObj getQuery() const; - - static const char matchName[]; - - /** Returns the portion of the match that can safely be promoted to before a $redact. - * If this returns an empty BSONObj, no part of this match may safely be promoted. - * - * To be safe to promote, removing a field from a document to be matched must not cause - * that document to be accepted when it would otherwise be rejected. As an example, - * {name: {$ne: "bob smith"}} accepts documents without a name field, which means that - * running this filter before a redact that would remove the name field would leak - * information. On the other hand, {age: {$gt:5}} is ok because it doesn't accept documents - * that have had their age field removed. - */ - BSONObj redactSafePortion() const; + static const char groupName[]; - static bool isTextQuery(const BSONObj& query); - bool isTextQuery() const { return _isTextQuery; } +private: + DocumentSourceGroup(const boost::intrusive_ptr<ExpressionContext>& pExpCtx); - private: - DocumentSourceMatch(const BSONObj &query, - const boost::intrusive_ptr<ExpressionContext> &pExpCtx); + /// Spill groups map to disk and returns an iterator to the file. + std::shared_ptr<Sorter<Value, Value>::Iterator> spill(); - std::unique_ptr<Matcher> matcher; - bool _isTextQuery; - }; + // Only used by spill. Would be function-local if that were legal in C++03. + class SpillSTLComparator; - class DocumentSourceMergeCursors : - public DocumentSource { - public: - typedef std::vector<std::pair<ConnectionString, CursorId> > CursorIds; + /* + Before returning anything, this source must fetch everything from + the underlying source and group it. populate() is used to do that + on the first call to any method on this source. The populated + boolean indicates that this has been done. + */ + void populate(); + bool populated; - // virtuals from DocumentSource - boost::optional<Document> getNext(); - virtual void setSource(DocumentSource *pSource); - virtual const char *getSourceName() const; - virtual void dispose(); - virtual Value serialize(bool explain = false) const; - virtual bool isValidInitialSource() const { return true; } + /** + * Parses the raw id expression into _idExpressions and possibly _idFieldNames. + */ + void parseIdExpression(BSONElement groupField, const VariablesParseState& vps); - static boost::intrusive_ptr<DocumentSource> createFromBson( - BSONElement elem, - const boost::intrusive_ptr<ExpressionContext> &pExpCtx); + /** + * Computes the internal representation of the group key. + */ + Value computeId(Variables* vars); - static boost::intrusive_ptr<DocumentSource> create( - const CursorIds& cursorIds, - const boost::intrusive_ptr<ExpressionContext> &pExpCtx); + /** + * Converts the internal representation of the group key to the _id shape specified by the + * user. + */ + Value expandId(const Value& val); + + + typedef std::vector<boost::intrusive_ptr<Accumulator>> Accumulators; + typedef boost::unordered_map<Value, Accumulators, Value::Hash> GroupsMap; + GroupsMap groups; + + /* + The field names for the result documents and the accumulator + factories for the result documents. The Expressions are the + common expressions used by each instance of each accumulator + in order to find the right-hand side of what gets added to the + accumulator. Note that each of those is the same for each group, + so we can share them across all groups by adding them to the + accumulators after we use the factories to make a new set of + accumulators for each new group. + + These three vectors parallel each other. + */ + std::vector<std::string> vFieldName; + std::vector<boost::intrusive_ptr<Accumulator>(*)()> vpAccumulatorFactory; + std::vector<boost::intrusive_ptr<Expression>> vpExpression; + + + Document makeDocument(const Value& id, const Accumulators& accums, bool mergeableOutput); + + bool _doingMerge; + bool _spilled; + const bool _extSortAllowed; + const int _maxMemoryUsageBytes; + std::unique_ptr<Variables> _variables; + std::vector<std::string> _idFieldNames; // used when id is a document + std::vector<boost::intrusive_ptr<Expression>> _idExpressions; + + // only used when !_spilled + GroupsMap::iterator groupsIterator; + + // only used when _spilled + std::unique_ptr<Sorter<Value, Value>::Iterator> _sorterIterator; + std::pair<Value, Value> _firstPartOfNextGroup; + Value _currentId; + Accumulators _currentAccumulators; +}; + + +class DocumentSourceMatch : public DocumentSource { +public: + // virtuals from DocumentSource + virtual boost::optional<Document> getNext(); + virtual const char* getSourceName() const; + virtual bool coalesce(const boost::intrusive_ptr<DocumentSource>& nextSource); + virtual Value serialize(bool explain = false) const; + virtual boost::intrusive_ptr<DocumentSource> optimize(); + virtual void setSource(DocumentSource* Source); - static const char name[]; + /** + Create a filter. - /** Returns non-owning pointers to cursors managed by this stage. - * Call this instead of getNext() if you want access to the raw streams. - * This method should only be called at most once. - */ - std::vector<DBClientCursor*> getCursors(); + @param pBsonElement the raw BSON specification for the filter + @returns the filter + */ + static boost::intrusive_ptr<DocumentSource> createFromBson( + BSONElement elem, const boost::intrusive_ptr<ExpressionContext>& pCtx); - /** - * Returns the next object from the cursor, throwing an appropriate exception if the cursor - * reported an error. This is a better form of DBClientCursor::nextSafe. - */ - static Document nextSafeFrom(DBClientCursor* cursor); + /// Returns the query in Matcher syntax. + BSONObj getQuery() const; - private: + static const char matchName[]; - struct CursorAndConnection { - CursorAndConnection(ConnectionString host, NamespaceString ns, CursorId id); - ScopedDbConnection connection; - DBClientCursor cursor; - }; + /** Returns the portion of the match that can safely be promoted to before a $redact. + * If this returns an empty BSONObj, no part of this match may safely be promoted. + * + * To be safe to promote, removing a field from a document to be matched must not cause + * that document to be accepted when it would otherwise be rejected. As an example, + * {name: {$ne: "bob smith"}} accepts documents without a name field, which means that + * running this filter before a redact that would remove the name field would leak + * information. On the other hand, {age: {$gt:5}} is ok because it doesn't accept documents + * that have had their age field removed. + */ + BSONObj redactSafePortion() const; + + static bool isTextQuery(const BSONObj& query); + bool isTextQuery() const { + return _isTextQuery; + } + +private: + DocumentSourceMatch(const BSONObj& query, + const boost::intrusive_ptr<ExpressionContext>& pExpCtx); + + std::unique_ptr<Matcher> matcher; + bool _isTextQuery; +}; + +class DocumentSourceMergeCursors : public DocumentSource { +public: + typedef std::vector<std::pair<ConnectionString, CursorId>> CursorIds; + + // virtuals from DocumentSource + boost::optional<Document> getNext(); + virtual void setSource(DocumentSource* pSource); + virtual const char* getSourceName() const; + virtual void dispose(); + virtual Value serialize(bool explain = false) const; + virtual bool isValidInitialSource() const { + return true; + } + + static boost::intrusive_ptr<DocumentSource> createFromBson( + BSONElement elem, const boost::intrusive_ptr<ExpressionContext>& pExpCtx); + + static boost::intrusive_ptr<DocumentSource> create( + const CursorIds& cursorIds, const boost::intrusive_ptr<ExpressionContext>& pExpCtx); + + static const char name[]; + + /** Returns non-owning pointers to cursors managed by this stage. + * Call this instead of getNext() if you want access to the raw streams. + * This method should only be called at most once. + */ + std::vector<DBClientCursor*> getCursors(); - // using list to enable removing arbitrary elements - typedef std::list<std::shared_ptr<CursorAndConnection> > Cursors; + /** + * Returns the next object from the cursor, throwing an appropriate exception if the cursor + * reported an error. This is a better form of DBClientCursor::nextSafe. + */ + static Document nextSafeFrom(DBClientCursor* cursor); - DocumentSourceMergeCursors( - const CursorIds& cursorIds, - const boost::intrusive_ptr<ExpressionContext> &pExpCtx); +private: + struct CursorAndConnection { + CursorAndConnection(ConnectionString host, NamespaceString ns, CursorId id); + ScopedDbConnection connection; + DBClientCursor cursor; + }; - // Converts _cursorIds into active _cursors. - void start(); + // using list to enable removing arbitrary elements + typedef std::list<std::shared_ptr<CursorAndConnection>> Cursors; - // This is the description of cursors to merge. - const CursorIds _cursorIds; + DocumentSourceMergeCursors(const CursorIds& cursorIds, + const boost::intrusive_ptr<ExpressionContext>& pExpCtx); - // These are the actual cursors we are merging. Created lazily. - Cursors _cursors; - Cursors::iterator _currentCursor; + // Converts _cursorIds into active _cursors. + void start(); - bool _unstarted; - }; + // This is the description of cursors to merge. + const CursorIds _cursorIds; - class DocumentSourceOut : public DocumentSource - , public SplittableDocumentSource - , public DocumentSourceNeedsMongod { - public: - // virtuals from DocumentSource - virtual ~DocumentSourceOut(); - virtual boost::optional<Document> getNext(); - virtual const char *getSourceName() const; - virtual Value serialize(bool explain = false) const; - virtual GetDepsReturn getDependencies(DepsTracker* deps) const; + // These are the actual cursors we are merging. Created lazily. + Cursors _cursors; + Cursors::iterator _currentCursor; - // Virtuals for SplittableDocumentSource - virtual boost::intrusive_ptr<DocumentSource> getShardSource() { return NULL; } - virtual boost::intrusive_ptr<DocumentSource> getMergeSource() { return this; } + bool _unstarted; +}; - const NamespaceString& getOutputNs() const { return _outputNs; } +class DocumentSourceOut : public DocumentSource, + public SplittableDocumentSource, + public DocumentSourceNeedsMongod { +public: + // virtuals from DocumentSource + virtual ~DocumentSourceOut(); + virtual boost::optional<Document> getNext(); + virtual const char* getSourceName() const; + virtual Value serialize(bool explain = false) const; + virtual GetDepsReturn getDependencies(DepsTracker* deps) const; - /** - Create a document source for output and pass-through. + // Virtuals for SplittableDocumentSource + virtual boost::intrusive_ptr<DocumentSource> getShardSource() { + return NULL; + } + virtual boost::intrusive_ptr<DocumentSource> getMergeSource() { + return this; + } - This can be put anywhere in a pipeline and will store content as - well as pass it on. + const NamespaceString& getOutputNs() const { + return _outputNs; + } - @param pBsonElement the raw BSON specification for the source - @param pExpCtx the expression context for the pipeline - @returns the newly created document source - */ - static boost::intrusive_ptr<DocumentSource> createFromBson( - BSONElement elem, - const boost::intrusive_ptr<ExpressionContext> &pExpCtx); + /** + Create a document source for output and pass-through. - static const char outName[]; + This can be put anywhere in a pipeline and will store content as + well as pass it on. - private: - DocumentSourceOut(const NamespaceString& outputNs, - const boost::intrusive_ptr<ExpressionContext> &pExpCtx); + @param pBsonElement the raw BSON specification for the source + @param pExpCtx the expression context for the pipeline + @returns the newly created document source + */ + static boost::intrusive_ptr<DocumentSource> createFromBson( + BSONElement elem, const boost::intrusive_ptr<ExpressionContext>& pExpCtx); - // Sets _tempsNs and prepares it to receive data. - void prepTempCollection(); + static const char outName[]; - void spill(const std::vector<BSONObj>& toInsert); +private: + DocumentSourceOut(const NamespaceString& outputNs, + const boost::intrusive_ptr<ExpressionContext>& pExpCtx); - bool _done; + // Sets _tempsNs and prepares it to receive data. + void prepTempCollection(); - NamespaceString _tempNs; // output goes here as it is being processed. - const NamespaceString _outputNs; // output will go here after all data is processed. - }; + void spill(const std::vector<BSONObj>& toInsert); + bool _done; - class DocumentSourceProject : public DocumentSource { - public: - // virtuals from DocumentSource - virtual boost::optional<Document> getNext(); - virtual const char *getSourceName() const; - virtual boost::intrusive_ptr<DocumentSource> optimize(); - virtual Value serialize(bool explain = false) const; + NamespaceString _tempNs; // output goes here as it is being processed. + const NamespaceString _outputNs; // output will go here after all data is processed. +}; - virtual GetDepsReturn getDependencies(DepsTracker* deps) const; - /** - Create a new projection DocumentSource from BSON. +class DocumentSourceProject : public DocumentSource { +public: + // virtuals from DocumentSource + virtual boost::optional<Document> getNext(); + virtual const char* getSourceName() const; + virtual boost::intrusive_ptr<DocumentSource> optimize(); + virtual Value serialize(bool explain = false) const; - This is a convenience for directly handling BSON, and relies on the - above methods. + virtual GetDepsReturn getDependencies(DepsTracker* deps) const; - @param pBsonElement the BSONElement with an object named $project - @param pExpCtx the expression context for the pipeline - @returns the created projection - */ - static boost::intrusive_ptr<DocumentSource> createFromBson( - BSONElement elem, - const boost::intrusive_ptr<ExpressionContext> &pExpCtx); + /** + Create a new projection DocumentSource from BSON. - static const char projectName[]; + This is a convenience for directly handling BSON, and relies on the + above methods. - /** projection as specified by the user */ - BSONObj getRaw() const { return _raw; } + @param pBsonElement the BSONElement with an object named $project + @param pExpCtx the expression context for the pipeline + @returns the created projection + */ + static boost::intrusive_ptr<DocumentSource> createFromBson( + BSONElement elem, const boost::intrusive_ptr<ExpressionContext>& pExpCtx); - private: - DocumentSourceProject(const boost::intrusive_ptr<ExpressionContext>& pExpCtx, - const boost::intrusive_ptr<ExpressionObject>& exprObj); + static const char projectName[]; - // configuration state - std::unique_ptr<Variables> _variables; - boost::intrusive_ptr<ExpressionObject> pEO; - BSONObj _raw; - }; + /** projection as specified by the user */ + BSONObj getRaw() const { + return _raw; + } - class DocumentSourceRedact : - public DocumentSource { - public: - virtual boost::optional<Document> getNext(); - virtual const char* getSourceName() const; - virtual boost::intrusive_ptr<DocumentSource> optimize(); +private: + DocumentSourceProject(const boost::intrusive_ptr<ExpressionContext>& pExpCtx, + const boost::intrusive_ptr<ExpressionObject>& exprObj); - static const char redactName[]; + // configuration state + std::unique_ptr<Variables> _variables; + boost::intrusive_ptr<ExpressionObject> pEO; + BSONObj _raw; +}; - static boost::intrusive_ptr<DocumentSource> createFromBson( - BSONElement elem, - const boost::intrusive_ptr<ExpressionContext>& expCtx); +class DocumentSourceRedact : public DocumentSource { +public: + virtual boost::optional<Document> getNext(); + virtual const char* getSourceName() const; + virtual boost::intrusive_ptr<DocumentSource> optimize(); - virtual Value serialize(bool explain = false) const; + static const char redactName[]; - private: - DocumentSourceRedact(const boost::intrusive_ptr<ExpressionContext>& expCtx, - const boost::intrusive_ptr<Expression>& previsit); + static boost::intrusive_ptr<DocumentSource> createFromBson( + BSONElement elem, const boost::intrusive_ptr<ExpressionContext>& expCtx); - // These both work over _variables - boost::optional<Document> redactObject(); // redacts CURRENT - Value redactValue(const Value& in); + virtual Value serialize(bool explain = false) const; - Variables::Id _currentId; - std::unique_ptr<Variables> _variables; - boost::intrusive_ptr<Expression> _expression; - }; +private: + DocumentSourceRedact(const boost::intrusive_ptr<ExpressionContext>& expCtx, + const boost::intrusive_ptr<Expression>& previsit); - class DocumentSourceSort : public DocumentSource - , public SplittableDocumentSource { - public: - // virtuals from DocumentSource - virtual boost::optional<Document> getNext(); - virtual const char *getSourceName() const; - virtual void serializeToArray(std::vector<Value>& array, bool explain = false) const; - virtual bool coalesce(const boost::intrusive_ptr<DocumentSource> &pNextSource); - virtual void dispose(); + // These both work over _variables + boost::optional<Document> redactObject(); // redacts CURRENT + Value redactValue(const Value& in); - virtual GetDepsReturn getDependencies(DepsTracker* deps) const; + Variables::Id _currentId; + std::unique_ptr<Variables> _variables; + boost::intrusive_ptr<Expression> _expression; +}; - virtual boost::intrusive_ptr<DocumentSource> getShardSource(); - virtual boost::intrusive_ptr<DocumentSource> getMergeSource(); +class DocumentSourceSort : public DocumentSource, public SplittableDocumentSource { +public: + // virtuals from DocumentSource + virtual boost::optional<Document> getNext(); + virtual const char* getSourceName() const; + virtual void serializeToArray(std::vector<Value>& array, bool explain = false) const; + virtual bool coalesce(const boost::intrusive_ptr<DocumentSource>& pNextSource); + virtual void dispose(); - /** - Add sort key field. + virtual GetDepsReturn getDependencies(DepsTracker* deps) const; - Adds a sort key field to the key being built up. A concatenated - key is built up by calling this repeatedly. + virtual boost::intrusive_ptr<DocumentSource> getShardSource(); + virtual boost::intrusive_ptr<DocumentSource> getMergeSource(); - @param fieldPath the field path to the key component - @param ascending if true, use the key for an ascending sort, - otherwise, use it for descending - */ - void addKey(const std::string &fieldPath, bool ascending); + /** + Add sort key field. - /// Write out a Document whose contents are the sort key. - Document serializeSortKey(bool explain) const; + Adds a sort key field to the key being built up. A concatenated + key is built up by calling this repeatedly. - /** - Create a sorting DocumentSource from BSON. + @param fieldPath the field path to the key component + @param ascending if true, use the key for an ascending sort, + otherwise, use it for descending + */ + void addKey(const std::string& fieldPath, bool ascending); - This is a convenience method that uses the above, and operates on - a BSONElement that has been deteremined to be an Object with an - element named $group. + /// Write out a Document whose contents are the sort key. + Document serializeSortKey(bool explain) const; - @param pBsonElement the BSONELement that defines the group - @param pExpCtx the expression context for the pipeline - @returns the grouping DocumentSource - */ - static boost::intrusive_ptr<DocumentSource> createFromBson( - BSONElement elem, - const boost::intrusive_ptr<ExpressionContext> &pExpCtx); + /** + Create a sorting DocumentSource from BSON. - /// Create a DocumentSourceSort with a given sort and (optional) limit - static boost::intrusive_ptr<DocumentSourceSort> create( - const boost::intrusive_ptr<ExpressionContext> &pExpCtx, - BSONObj sortOrder, - long long limit=-1); + This is a convenience method that uses the above, and operates on + a BSONElement that has been deteremined to be an Object with an + element named $group. - /// returns -1 for no limit - long long getLimit() const; + @param pBsonElement the BSONELement that defines the group + @param pExpCtx the expression context for the pipeline + @returns the grouping DocumentSource + */ + static boost::intrusive_ptr<DocumentSource> createFromBson( + BSONElement elem, const boost::intrusive_ptr<ExpressionContext>& pExpCtx); - boost::intrusive_ptr<DocumentSourceLimit> getLimitSrc() const { return limitSrc; } + /// Create a DocumentSourceSort with a given sort and (optional) limit + static boost::intrusive_ptr<DocumentSourceSort> create( + const boost::intrusive_ptr<ExpressionContext>& pExpCtx, + BSONObj sortOrder, + long long limit = -1); - static const char sortName[]; + /// returns -1 for no limit + long long getLimit() const; - private: - DocumentSourceSort(const boost::intrusive_ptr<ExpressionContext> &pExpCtx); + boost::intrusive_ptr<DocumentSourceLimit> getLimitSrc() const { + return limitSrc; + } - virtual Value serialize(bool explain = false) const { - verify(false); // should call addToBsonArray instead - } + static const char sortName[]; - /* - Before returning anything, this source must fetch everything from - the underlying source and group it. populate() is used to do that - on the first call to any method on this source. The populated - boolean indicates that this has been done. - */ - void populate(); - bool populated; - - SortOptions makeSortOptions() const; - - // These are used to merge pre-sorted results from a DocumentSourceMergeCursors or a - // DocumentSourceCommandShards depending on whether we have finished upgrading to 2.6 or - // not. - class IteratorFromCursor; - class IteratorFromBsonArray; - void populateFromCursors(const std::vector<DBClientCursor*>& cursors); - void populateFromBsonArrays(const std::vector<BSONArray>& arrays); - - /* these two parallel each other */ - typedef std::vector<boost::intrusive_ptr<Expression> > SortKey; - SortKey vSortKey; - std::vector<char> vAscending; // used like std::vector<bool> but without specialization - - /// Extracts the fields in vSortKey from the Document; - Value extractKey(const Document& d) const; - - /// Compare two Values according to the specified sort key. - int compare(const Value& lhs, const Value& rhs) const; - - typedef Sorter<Value, Document> MySorter; - - // For MySorter - class Comparator { - public: - explicit Comparator(const DocumentSourceSort& source): _source(source) {} - int operator()(const MySorter::Data& lhs, const MySorter::Data& rhs) const { - return _source.compare(lhs.first, rhs.first); - } - private: - const DocumentSourceSort& _source; - }; - - boost::intrusive_ptr<DocumentSourceLimit> limitSrc; - - bool _done; - bool _mergingPresorted; - std::unique_ptr<MySorter::Iterator> _output; - }; +private: + DocumentSourceSort(const boost::intrusive_ptr<ExpressionContext>& pExpCtx); - class DocumentSourceLimit : public DocumentSource - , public SplittableDocumentSource { - public: - // virtuals from DocumentSource - virtual boost::optional<Document> getNext(); - virtual const char *getSourceName() const; - virtual bool coalesce(const boost::intrusive_ptr<DocumentSource> &pNextSource); - virtual Value serialize(bool explain = false) const; - - virtual GetDepsReturn getDependencies(DepsTracker* deps) const { - return SEE_NEXT; // This doesn't affect needed fields - } + virtual Value serialize(bool explain = false) const { + verify(false); // should call addToBsonArray instead + } - /** - Create a new limiting DocumentSource. + /* + Before returning anything, this source must fetch everything from + the underlying source and group it. populate() is used to do that + on the first call to any method on this source. The populated + boolean indicates that this has been done. + */ + void populate(); + bool populated; - @param pExpCtx the expression context for the pipeline - @returns the DocumentSource - */ - static boost::intrusive_ptr<DocumentSourceLimit> create( - const boost::intrusive_ptr<ExpressionContext> &pExpCtx, - long long limit); + SortOptions makeSortOptions() const; - // Virtuals for SplittableDocumentSource - // Need to run on rounter. Running on shard as well is an optimization. - virtual boost::intrusive_ptr<DocumentSource> getShardSource() { return this; } - virtual boost::intrusive_ptr<DocumentSource> getMergeSource() { return this; } + // These are used to merge pre-sorted results from a DocumentSourceMergeCursors or a + // DocumentSourceCommandShards depending on whether we have finished upgrading to 2.6 or + // not. + class IteratorFromCursor; + class IteratorFromBsonArray; + void populateFromCursors(const std::vector<DBClientCursor*>& cursors); + void populateFromBsonArrays(const std::vector<BSONArray>& arrays); - long long getLimit() const { return limit; } - void setLimit(long long newLimit) { limit = newLimit; } + /* these two parallel each other */ + typedef std::vector<boost::intrusive_ptr<Expression>> SortKey; + SortKey vSortKey; + std::vector<char> vAscending; // used like std::vector<bool> but without specialization - /** - Create a limiting DocumentSource from BSON. + /// Extracts the fields in vSortKey from the Document; + Value extractKey(const Document& d) const; - This is a convenience method that uses the above, and operates on - a BSONElement that has been deteremined to be an Object with an - element named $limit. + /// Compare two Values according to the specified sort key. + int compare(const Value& lhs, const Value& rhs) const; - @param pBsonElement the BSONELement that defines the limit - @param pExpCtx the expression context - @returns the grouping DocumentSource - */ - static boost::intrusive_ptr<DocumentSource> createFromBson( - BSONElement elem, - const boost::intrusive_ptr<ExpressionContext> &pExpCtx); + typedef Sorter<Value, Document> MySorter; - static const char limitName[]; + // For MySorter + class Comparator { + public: + explicit Comparator(const DocumentSourceSort& source) : _source(source) {} + int operator()(const MySorter::Data& lhs, const MySorter::Data& rhs) const { + return _source.compare(lhs.first, rhs.first); + } private: - DocumentSourceLimit(const boost::intrusive_ptr<ExpressionContext> &pExpCtx, - long long limit); - - long long limit; - long long count; + const DocumentSourceSort& _source; }; - class DocumentSourceSkip : public DocumentSource - , public SplittableDocumentSource { - public: - // virtuals from DocumentSource - virtual boost::optional<Document> getNext(); - virtual const char *getSourceName() const; - virtual bool coalesce(const boost::intrusive_ptr<DocumentSource> &pNextSource); - virtual Value serialize(bool explain = false) const; - virtual boost::intrusive_ptr<DocumentSource> optimize(); - - virtual GetDepsReturn getDependencies(DepsTracker* deps) const { - return SEE_NEXT; // This doesn't affect needed fields - } - - /** - Create a new skipping DocumentSource. + boost::intrusive_ptr<DocumentSourceLimit> limitSrc; - @param pExpCtx the expression context - @returns the DocumentSource - */ - static boost::intrusive_ptr<DocumentSourceSkip> create( - const boost::intrusive_ptr<ExpressionContext> &pExpCtx); + bool _done; + bool _mergingPresorted; + std::unique_ptr<MySorter::Iterator> _output; +}; - // Virtuals for SplittableDocumentSource - // Need to run on rounter. Can't run on shards. - virtual boost::intrusive_ptr<DocumentSource> getShardSource() { return NULL; } - virtual boost::intrusive_ptr<DocumentSource> getMergeSource() { return this; } +class DocumentSourceLimit : public DocumentSource, public SplittableDocumentSource { +public: + // virtuals from DocumentSource + virtual boost::optional<Document> getNext(); + virtual const char* getSourceName() const; + virtual bool coalesce(const boost::intrusive_ptr<DocumentSource>& pNextSource); + virtual Value serialize(bool explain = false) const; - long long getSkip() const { return _skip; } - void setSkip(long long newSkip) { _skip = newSkip; } + virtual GetDepsReturn getDependencies(DepsTracker* deps) const { + return SEE_NEXT; // This doesn't affect needed fields + } - /** - Create a skipping DocumentSource from BSON. + /** + Create a new limiting DocumentSource. - This is a convenience method that uses the above, and operates on - a BSONElement that has been deteremined to be an Object with an - element named $skip. + @param pExpCtx the expression context for the pipeline + @returns the DocumentSource + */ + static boost::intrusive_ptr<DocumentSourceLimit> create( + const boost::intrusive_ptr<ExpressionContext>& pExpCtx, long long limit); + + // Virtuals for SplittableDocumentSource + // Need to run on rounter. Running on shard as well is an optimization. + virtual boost::intrusive_ptr<DocumentSource> getShardSource() { + return this; + } + virtual boost::intrusive_ptr<DocumentSource> getMergeSource() { + return this; + } + + long long getLimit() const { + return limit; + } + void setLimit(long long newLimit) { + limit = newLimit; + } - @param pBsonElement the BSONELement that defines the skip - @param pExpCtx the expression context - @returns the grouping DocumentSource - */ - static boost::intrusive_ptr<DocumentSource> createFromBson( - BSONElement elem, - const boost::intrusive_ptr<ExpressionContext> &pExpCtx); + /** + Create a limiting DocumentSource from BSON. - static const char skipName[]; + This is a convenience method that uses the above, and operates on + a BSONElement that has been deteremined to be an Object with an + element named $limit. - private: - DocumentSourceSkip(const boost::intrusive_ptr<ExpressionContext> &pExpCtx); + @param pBsonElement the BSONELement that defines the limit + @param pExpCtx the expression context + @returns the grouping DocumentSource + */ + static boost::intrusive_ptr<DocumentSource> createFromBson( + BSONElement elem, const boost::intrusive_ptr<ExpressionContext>& pExpCtx); - long long _skip; - bool _needToSkip; - }; + static const char limitName[]; +private: + DocumentSourceLimit(const boost::intrusive_ptr<ExpressionContext>& pExpCtx, long long limit); - class DocumentSourceUnwind : - public DocumentSource { - public: - // virtuals from DocumentSource - virtual boost::optional<Document> getNext(); - virtual const char *getSourceName() const; - virtual Value serialize(bool explain = false) const; + long long limit; + long long count; +}; - virtual GetDepsReturn getDependencies(DepsTracker* deps) const; +class DocumentSourceSkip : public DocumentSource, public SplittableDocumentSource { +public: + // virtuals from DocumentSource + virtual boost::optional<Document> getNext(); + virtual const char* getSourceName() const; + virtual bool coalesce(const boost::intrusive_ptr<DocumentSource>& pNextSource); + virtual Value serialize(bool explain = false) const; + virtual boost::intrusive_ptr<DocumentSource> optimize(); - /** - Create a new projection DocumentSource from BSON. + virtual GetDepsReturn getDependencies(DepsTracker* deps) const { + return SEE_NEXT; // This doesn't affect needed fields + } - This is a convenience for directly handling BSON, and relies on the - above methods. + /** + Create a new skipping DocumentSource. - @param pBsonElement the BSONElement with an object named $project - @param pExpCtx the expression context for the pipeline - @returns the created projection - */ - static boost::intrusive_ptr<DocumentSource> createFromBson( - BSONElement elem, - const boost::intrusive_ptr<ExpressionContext> &pExpCtx); + @param pExpCtx the expression context + @returns the DocumentSource + */ + static boost::intrusive_ptr<DocumentSourceSkip> create( + const boost::intrusive_ptr<ExpressionContext>& pExpCtx); + + // Virtuals for SplittableDocumentSource + // Need to run on rounter. Can't run on shards. + virtual boost::intrusive_ptr<DocumentSource> getShardSource() { + return NULL; + } + virtual boost::intrusive_ptr<DocumentSource> getMergeSource() { + return this; + } + + long long getSkip() const { + return _skip; + } + void setSkip(long long newSkip) { + _skip = newSkip; + } - static const char unwindName[]; + /** + Create a skipping DocumentSource from BSON. - private: - DocumentSourceUnwind(const boost::intrusive_ptr<ExpressionContext> &pExpCtx); + This is a convenience method that uses the above, and operates on + a BSONElement that has been deteremined to be an Object with an + element named $skip. - /** Specify the field to unwind. */ - void unwindPath(const FieldPath &fieldPath); + @param pBsonElement the BSONELement that defines the skip + @param pExpCtx the expression context + @returns the grouping DocumentSource + */ + static boost::intrusive_ptr<DocumentSource> createFromBson( + BSONElement elem, const boost::intrusive_ptr<ExpressionContext>& pExpCtx); - // Configuration state. - std::unique_ptr<FieldPath> _unwindPath; + static const char skipName[]; - // Iteration state. - class Unwinder; - std::unique_ptr<Unwinder> _unwinder; - }; +private: + DocumentSourceSkip(const boost::intrusive_ptr<ExpressionContext>& pExpCtx); - class DocumentSourceGeoNear : public DocumentSource - , public SplittableDocumentSource - , public DocumentSourceNeedsMongod { - public: - // virtuals from DocumentSource - virtual boost::optional<Document> getNext(); - virtual const char *getSourceName() const; - virtual void setSource(DocumentSource *pSource); - virtual bool coalesce(const boost::intrusive_ptr<DocumentSource> &pNextSource); - virtual bool isValidInitialSource() const { return true; } - virtual Value serialize(bool explain = false) const; + long long _skip; + bool _needToSkip; +}; - // Virtuals for SplittableDocumentSource - virtual boost::intrusive_ptr<DocumentSource> getShardSource(); - virtual boost::intrusive_ptr<DocumentSource> getMergeSource(); - static boost::intrusive_ptr<DocumentSource> createFromBson( - BSONElement elem, - const boost::intrusive_ptr<ExpressionContext> &pCtx); +class DocumentSourceUnwind : public DocumentSource { +public: + // virtuals from DocumentSource + virtual boost::optional<Document> getNext(); + virtual const char* getSourceName() const; + virtual Value serialize(bool explain = false) const; - static char geoNearName[]; + virtual GetDepsReturn getDependencies(DepsTracker* deps) const; - long long getLimit() { return limit; } + /** + Create a new projection DocumentSource from BSON. - // this should only be used for testing - static boost::intrusive_ptr<DocumentSourceGeoNear> create( - const boost::intrusive_ptr<ExpressionContext> &pCtx); + This is a convenience for directly handling BSON, and relies on the + above methods. - private: - DocumentSourceGeoNear(const boost::intrusive_ptr<ExpressionContext> &pExpCtx); - - void parseOptions(BSONObj options); - BSONObj buildGeoNearCmd() const; - void runCommand(); - - // These fields describe the command to run. - // coords and distanceField are required, rest are optional - BSONObj coords; // "near" option, but near is a reserved keyword on windows - bool coordsIsArray; - std::unique_ptr<FieldPath> distanceField; // Using unique_ptr because FieldPath can't be empty - long long limit; - double maxDistance; - double minDistance; - BSONObj query; - bool spherical; - double distanceMultiplier; - std::unique_ptr<FieldPath> includeLocs; - - // these fields are used while processing the results - BSONObj cmdOutput; - std::unique_ptr<BSONObjIterator> resultsIterator; // iterator over cmdOutput["results"] - }; + @param pBsonElement the BSONElement with an object named $project + @param pExpCtx the expression context for the pipeline + @returns the created projection + */ + static boost::intrusive_ptr<DocumentSource> createFromBson( + BSONElement elem, const boost::intrusive_ptr<ExpressionContext>& pExpCtx); + + static const char unwindName[]; + +private: + DocumentSourceUnwind(const boost::intrusive_ptr<ExpressionContext>& pExpCtx); + + /** Specify the field to unwind. */ + void unwindPath(const FieldPath& fieldPath); + + // Configuration state. + std::unique_ptr<FieldPath> _unwindPath; + + // Iteration state. + class Unwinder; + std::unique_ptr<Unwinder> _unwinder; +}; + +class DocumentSourceGeoNear : public DocumentSource, + public SplittableDocumentSource, + public DocumentSourceNeedsMongod { +public: + // virtuals from DocumentSource + virtual boost::optional<Document> getNext(); + virtual const char* getSourceName() const; + virtual void setSource(DocumentSource* pSource); + virtual bool coalesce(const boost::intrusive_ptr<DocumentSource>& pNextSource); + virtual bool isValidInitialSource() const { + return true; + } + virtual Value serialize(bool explain = false) const; + + // Virtuals for SplittableDocumentSource + virtual boost::intrusive_ptr<DocumentSource> getShardSource(); + virtual boost::intrusive_ptr<DocumentSource> getMergeSource(); + + static boost::intrusive_ptr<DocumentSource> createFromBson( + BSONElement elem, const boost::intrusive_ptr<ExpressionContext>& pCtx); + + static char geoNearName[]; + + long long getLimit() { + return limit; + } + + // this should only be used for testing + static boost::intrusive_ptr<DocumentSourceGeoNear> create( + const boost::intrusive_ptr<ExpressionContext>& pCtx); + +private: + DocumentSourceGeoNear(const boost::intrusive_ptr<ExpressionContext>& pExpCtx); + + void parseOptions(BSONObj options); + BSONObj buildGeoNearCmd() const; + void runCommand(); + + // These fields describe the command to run. + // coords and distanceField are required, rest are optional + BSONObj coords; // "near" option, but near is a reserved keyword on windows + bool coordsIsArray; + std::unique_ptr<FieldPath> distanceField; // Using unique_ptr because FieldPath can't be empty + long long limit; + double maxDistance; + double minDistance; + BSONObj query; + bool spherical; + double distanceMultiplier; + std::unique_ptr<FieldPath> includeLocs; + + // these fields are used while processing the results + BSONObj cmdOutput; + std::unique_ptr<BSONObjIterator> resultsIterator; // iterator over cmdOutput["results"] +}; } diff --git a/src/mongo/db/pipeline/document_source_bson_array.cpp b/src/mongo/db/pipeline/document_source_bson_array.cpp index ca3e56e65d1..aa05258e954 100644 --- a/src/mongo/db/pipeline/document_source_bson_array.cpp +++ b/src/mongo/db/pipeline/document_source_bson_array.cpp @@ -34,41 +34,35 @@ namespace mongo { - using boost::intrusive_ptr; +using boost::intrusive_ptr; - boost::optional<Document> DocumentSourceBsonArray::getNext() { - pExpCtx->checkForInterrupt(); +boost::optional<Document> DocumentSourceBsonArray::getNext() { + pExpCtx->checkForInterrupt(); - if (!arrayIterator.more()) - return boost::none; + if (!arrayIterator.more()) + return boost::none; - return Document(arrayIterator.next().Obj()); - } - - void DocumentSourceBsonArray::setSource(DocumentSource *pSource) { - /* this doesn't take a source */ - verify(false); - } + return Document(arrayIterator.next().Obj()); +} - DocumentSourceBsonArray::DocumentSourceBsonArray( - const BSONObj& array, - const intrusive_ptr<ExpressionContext> &pExpCtx) - : DocumentSource(pExpCtx) - , embeddedObject(array) - , arrayIterator(embeddedObject) - {} +void DocumentSourceBsonArray::setSource(DocumentSource* pSource) { + /* this doesn't take a source */ + verify(false); +} - intrusive_ptr<DocumentSourceBsonArray> DocumentSourceBsonArray::create( - const BSONObj& array, - const intrusive_ptr<ExpressionContext> &pExpCtx) { +DocumentSourceBsonArray::DocumentSourceBsonArray(const BSONObj& array, + const intrusive_ptr<ExpressionContext>& pExpCtx) + : DocumentSource(pExpCtx), embeddedObject(array), arrayIterator(embeddedObject) {} - return new DocumentSourceBsonArray(array, pExpCtx); - } +intrusive_ptr<DocumentSourceBsonArray> DocumentSourceBsonArray::create( + const BSONObj& array, const intrusive_ptr<ExpressionContext>& pExpCtx) { + return new DocumentSourceBsonArray(array, pExpCtx); +} - Value DocumentSourceBsonArray::serialize(bool explain) const { - if (explain) { - return Value(DOC("bsonArray" << Document())); - } - return Value(); +Value DocumentSourceBsonArray::serialize(bool explain) const { + if (explain) { + return Value(DOC("bsonArray" << Document())); } + return Value(); +} } diff --git a/src/mongo/db/pipeline/document_source_command_shards.cpp b/src/mongo/db/pipeline/document_source_command_shards.cpp index 5e32f19881c..548c68d4f64 100644 --- a/src/mongo/db/pipeline/document_source_command_shards.cpp +++ b/src/mongo/db/pipeline/document_source_command_shards.cpp @@ -33,97 +33,93 @@ namespace mongo { - using boost::intrusive_ptr; - using std::vector; +using boost::intrusive_ptr; +using std::vector; - void DocumentSourceCommandShards::setSource(DocumentSource *pSource) { - /* this doesn't take a source */ - verify(false); - } +void DocumentSourceCommandShards::setSource(DocumentSource* pSource) { + /* this doesn't take a source */ + verify(false); +} - Value DocumentSourceCommandShards::serialize(bool explain) const { - // this has no BSON equivalent - verify(false); - } +Value DocumentSourceCommandShards::serialize(bool explain) const { + // this has no BSON equivalent + verify(false); +} - DocumentSourceCommandShards::DocumentSourceCommandShards( - const ShardOutput& shardOutput, - const intrusive_ptr<ExpressionContext> &pExpCtx): - DocumentSource(pExpCtx), - unstarted(true), - hasCurrent(false), - newSource(false), - pBsonSource(), - pCurrent(), - iterator(shardOutput.begin()), - listEnd(shardOutput.end()) - {} - - intrusive_ptr<DocumentSourceCommandShards> - DocumentSourceCommandShards::create( - const ShardOutput& shardOutput, - const intrusive_ptr<ExpressionContext> &pExpCtx) { - intrusive_ptr<DocumentSourceCommandShards> pSource( - new DocumentSourceCommandShards(shardOutput, pExpCtx)); - return pSource; - } +DocumentSourceCommandShards::DocumentSourceCommandShards( + const ShardOutput& shardOutput, const intrusive_ptr<ExpressionContext>& pExpCtx) + : DocumentSource(pExpCtx), + unstarted(true), + hasCurrent(false), + newSource(false), + pBsonSource(), + pCurrent(), + iterator(shardOutput.begin()), + listEnd(shardOutput.end()) {} + +intrusive_ptr<DocumentSourceCommandShards> DocumentSourceCommandShards::create( + const ShardOutput& shardOutput, const intrusive_ptr<ExpressionContext>& pExpCtx) { + intrusive_ptr<DocumentSourceCommandShards> pSource( + new DocumentSourceCommandShards(shardOutput, pExpCtx)); + return pSource; +} namespace { - BSONArray extractResultsArray(const Strategy::CommandResult& result) { - /* grab the next command result */ - BSONObj resultObj = result.result; - - uassert(16390, str::stream() << "sharded pipeline failed on shard " << - result.shardTargetId << ": " << - resultObj.toString(), - resultObj["ok"].trueValue()); - - /* grab the result array out of the shard server's response */ - BSONElement resultArray = resultObj["result"]; - massert(16391, str::stream() << "no result array? shard:" << - result.shardTargetId << ": " << - resultObj.toString(), - resultArray.type() == Array); - - return BSONArray(resultArray.Obj()); - } +BSONArray extractResultsArray(const Strategy::CommandResult& result) { + /* grab the next command result */ + BSONObj resultObj = result.result; + + uassert(16390, + str::stream() << "sharded pipeline failed on shard " << result.shardTargetId << ": " + << resultObj.toString(), + resultObj["ok"].trueValue()); + + /* grab the result array out of the shard server's response */ + BSONElement resultArray = resultObj["result"]; + massert(16391, + str::stream() << "no result array? shard:" << result.shardTargetId << ": " + << resultObj.toString(), + resultArray.type() == Array); + + return BSONArray(resultArray.Obj()); +} } - vector<BSONArray> DocumentSourceCommandShards::getArrays() { - vector<BSONArray> out; - for (; iterator != listEnd; ++iterator) { - out.push_back(extractResultsArray(*iterator)); - } - return out; +vector<BSONArray> DocumentSourceCommandShards::getArrays() { + vector<BSONArray> out; + for (; iterator != listEnd; ++iterator) { + out.push_back(extractResultsArray(*iterator)); } + return out; +} - boost::optional<Document> DocumentSourceCommandShards::getNext() { - pExpCtx->checkForInterrupt(); - - while(true) { - if (!pBsonSource.get()) { - /* if there aren't any more futures, we're done */ - if (iterator == listEnd) - return boost::none; +boost::optional<Document> DocumentSourceCommandShards::getNext() { + pExpCtx->checkForInterrupt(); - BSONArray resultArray = extractResultsArray(*iterator); + while (true) { + if (!pBsonSource.get()) { + /* if there aren't any more futures, we're done */ + if (iterator == listEnd) + return boost::none; - // done with error checking, don't need the shard name anymore - ++iterator; + BSONArray resultArray = extractResultsArray(*iterator); - if (resultArray.isEmpty()){ - // this shard had no results, on to the next one - continue; - } + // done with error checking, don't need the shard name anymore + ++iterator; - pBsonSource = DocumentSourceBsonArray::create(resultArray, pExpCtx); + if (resultArray.isEmpty()) { + // this shard had no results, on to the next one + continue; } - if (boost::optional<Document> out = pBsonSource->getNext()) - return out; - - // Source exhausted. Try next. - pBsonSource.reset(); + pBsonSource = DocumentSourceBsonArray::create(resultArray, pExpCtx); } + + if (boost::optional<Document> out = pBsonSource->getNext()) + return out; + + // Source exhausted. Try next. + pBsonSource.reset(); } } +} diff --git a/src/mongo/db/pipeline/document_source_cursor.cpp b/src/mongo/db/pipeline/document_source_cursor.cpp index d862663363d..702852f53b2 100644 --- a/src/mongo/db/pipeline/document_source_cursor.cpp +++ b/src/mongo/db/pipeline/document_source_cursor.cpp @@ -43,179 +43,175 @@ namespace mongo { - using boost::intrusive_ptr; - using std::shared_ptr; - using std::string; +using boost::intrusive_ptr; +using std::shared_ptr; +using std::string; - DocumentSourceCursor::~DocumentSourceCursor() { - dispose(); - } - - const char *DocumentSourceCursor::getSourceName() const { - return "$cursor"; - } +DocumentSourceCursor::~DocumentSourceCursor() { + dispose(); +} - boost::optional<Document> DocumentSourceCursor::getNext() { - pExpCtx->checkForInterrupt(); +const char* DocumentSourceCursor::getSourceName() const { + return "$cursor"; +} - if (_currentBatch.empty()) { - loadBatch(); +boost::optional<Document> DocumentSourceCursor::getNext() { + pExpCtx->checkForInterrupt(); - if (_currentBatch.empty()) // exhausted the cursor - return boost::none; - } + if (_currentBatch.empty()) { + loadBatch(); - Document out = _currentBatch.front(); - _currentBatch.pop_front(); - return out; + if (_currentBatch.empty()) // exhausted the cursor + return boost::none; } - void DocumentSourceCursor::dispose() { - // Can't call in to PlanExecutor or ClientCursor registries from this function since it - // will be called when an agg cursor is killed which would cause a deadlock. - _exec.reset(); - _currentBatch.clear(); - } - - void DocumentSourceCursor::loadBatch() { - if (!_exec) { - dispose(); - return; - } + Document out = _currentBatch.front(); + _currentBatch.pop_front(); + return out; +} - // We have already validated the sharding version when we constructed the PlanExecutor - // so we shouldn't check it again. - const NamespaceString nss(_ns); - AutoGetCollectionForRead autoColl(pExpCtx->opCtx, nss); +void DocumentSourceCursor::dispose() { + // Can't call in to PlanExecutor or ClientCursor registries from this function since it + // will be called when an agg cursor is killed which would cause a deadlock. + _exec.reset(); + _currentBatch.clear(); +} - _exec->restoreState(pExpCtx->opCtx); +void DocumentSourceCursor::loadBatch() { + if (!_exec) { + dispose(); + return; + } - int memUsageBytes = 0; - BSONObj obj; - PlanExecutor::ExecState state; - while ((state = _exec->getNext(&obj, NULL)) == PlanExecutor::ADVANCED) { - if (_dependencies) { - _currentBatch.push_back(_dependencies->extractFields(obj)); - } - else { - _currentBatch.push_back(Document::fromBsonWithMetaData(obj)); - } + // We have already validated the sharding version when we constructed the PlanExecutor + // so we shouldn't check it again. + const NamespaceString nss(_ns); + AutoGetCollectionForRead autoColl(pExpCtx->opCtx, nss); + + _exec->restoreState(pExpCtx->opCtx); + + int memUsageBytes = 0; + BSONObj obj; + PlanExecutor::ExecState state; + while ((state = _exec->getNext(&obj, NULL)) == PlanExecutor::ADVANCED) { + if (_dependencies) { + _currentBatch.push_back(_dependencies->extractFields(obj)); + } else { + _currentBatch.push_back(Document::fromBsonWithMetaData(obj)); + } - if (_limit) { - if (++_docsAddedToBatches == _limit->getLimit()) { - break; - } - verify(_docsAddedToBatches < _limit->getLimit()); + if (_limit) { + if (++_docsAddedToBatches == _limit->getLimit()) { + break; } + verify(_docsAddedToBatches < _limit->getLimit()); + } - memUsageBytes += _currentBatch.back().getApproximateSize(); + memUsageBytes += _currentBatch.back().getApproximateSize(); - if (memUsageBytes > MaxBytesToReturnToClientAtOnce) { - // End this batch and prepare PlanExecutor for yielding. - _exec->saveState(); - return; - } + if (memUsageBytes > MaxBytesToReturnToClientAtOnce) { + // End this batch and prepare PlanExecutor for yielding. + _exec->saveState(); + return; } + } - // If we got here, there won't be any more documents, so destroy the executor. Can't use - // dispose since we want to keep the _currentBatch. - _exec.reset(); - - uassert(16028, str::stream() << "collection or index disappeared when cursor yielded: " - << WorkingSetCommon::toStatusString(obj), - state != PlanExecutor::DEAD); + // If we got here, there won't be any more documents, so destroy the executor. Can't use + // dispose since we want to keep the _currentBatch. + _exec.reset(); - uassert(17285, str::stream() << "cursor encountered an error: " - << WorkingSetCommon::toStatusString(obj), - state != PlanExecutor::FAILURE); + uassert(16028, + str::stream() << "collection or index disappeared when cursor yielded: " + << WorkingSetCommon::toStatusString(obj), + state != PlanExecutor::DEAD); - massert(17286, str::stream() << "Unexpected return from PlanExecutor::getNext: " << state, - state == PlanExecutor::IS_EOF || state == PlanExecutor::ADVANCED); - } + uassert( + 17285, + str::stream() << "cursor encountered an error: " << WorkingSetCommon::toStatusString(obj), + state != PlanExecutor::FAILURE); - void DocumentSourceCursor::setSource(DocumentSource *pSource) { - /* this doesn't take a source */ - verify(false); - } + massert(17286, + str::stream() << "Unexpected return from PlanExecutor::getNext: " << state, + state == PlanExecutor::IS_EOF || state == PlanExecutor::ADVANCED); +} - long long DocumentSourceCursor::getLimit() const { - return _limit ? _limit->getLimit() : -1; - } +void DocumentSourceCursor::setSource(DocumentSource* pSource) { + /* this doesn't take a source */ + verify(false); +} - bool DocumentSourceCursor::coalesce(const intrusive_ptr<DocumentSource>& nextSource) { - // Note: Currently we assume the $limit is logically after any $sort or - // $match. If we ever pull in $match or $sort using this method, we - // will need to keep track of the order of the sub-stages. +long long DocumentSourceCursor::getLimit() const { + return _limit ? _limit->getLimit() : -1; +} - if (!_limit) { - _limit = dynamic_cast<DocumentSourceLimit*>(nextSource.get()); - return _limit.get(); // false if next is not a $limit - } - else { - return _limit->coalesce(nextSource); - } +bool DocumentSourceCursor::coalesce(const intrusive_ptr<DocumentSource>& nextSource) { + // Note: Currently we assume the $limit is logically after any $sort or + // $match. If we ever pull in $match or $sort using this method, we + // will need to keep track of the order of the sub-stages. - return false; + if (!_limit) { + _limit = dynamic_cast<DocumentSourceLimit*>(nextSource.get()); + return _limit.get(); // false if next is not a $limit + } else { + return _limit->coalesce(nextSource); } - Value DocumentSourceCursor::serialize(bool explain) const { - // we never parse a documentSourceCursor, so we only serialize for explain - if (!explain) - return Value(); + return false; +} - // Get planner-level explain info from the underlying PlanExecutor. - BSONObjBuilder explainBuilder; - { - const NamespaceString nss(_ns); - AutoGetCollectionForRead autoColl(pExpCtx->opCtx, nss); +Value DocumentSourceCursor::serialize(bool explain) const { + // we never parse a documentSourceCursor, so we only serialize for explain + if (!explain) + return Value(); - massert(17392, "No _exec. Were we disposed before explained?", _exec); + // Get planner-level explain info from the underlying PlanExecutor. + BSONObjBuilder explainBuilder; + { + const NamespaceString nss(_ns); + AutoGetCollectionForRead autoColl(pExpCtx->opCtx, nss); - _exec->restoreState(pExpCtx->opCtx); - Explain::explainStages(_exec.get(), ExplainCommon::QUERY_PLANNER, &explainBuilder); - _exec->saveState(); - } + massert(17392, "No _exec. Were we disposed before explained?", _exec); - MutableDocument out; - out["query"] = Value(_query); + _exec->restoreState(pExpCtx->opCtx); + Explain::explainStages(_exec.get(), ExplainCommon::QUERY_PLANNER, &explainBuilder); + _exec->saveState(); + } - if (!_sort.isEmpty()) - out["sort"] = Value(_sort); + MutableDocument out; + out["query"] = Value(_query); - if (_limit) - out["limit"] = Value(_limit->getLimit()); + if (!_sort.isEmpty()) + out["sort"] = Value(_sort); - if (!_projection.isEmpty()) - out["fields"] = Value(_projection); + if (_limit) + out["limit"] = Value(_limit->getLimit()); - // Add explain results from the query system into the agg explain output. - BSONObj explainObj = explainBuilder.obj(); - invariant(explainObj.hasField("queryPlanner")); - out["queryPlanner"] = Value(explainObj["queryPlanner"]); + if (!_projection.isEmpty()) + out["fields"] = Value(_projection); - return Value(DOC(getSourceName() << out.freezeToValue())); - } + // Add explain results from the query system into the agg explain output. + BSONObj explainObj = explainBuilder.obj(); + invariant(explainObj.hasField("queryPlanner")); + out["queryPlanner"] = Value(explainObj["queryPlanner"]); - DocumentSourceCursor::DocumentSourceCursor(const string& ns, - const std::shared_ptr<PlanExecutor>& exec, - const intrusive_ptr<ExpressionContext> &pCtx) - : DocumentSource(pCtx) - , _docsAddedToBatches(0) - , _ns(ns) - , _exec(exec) - {} - - intrusive_ptr<DocumentSourceCursor> DocumentSourceCursor::create( - const string& ns, - const std::shared_ptr<PlanExecutor>& exec, - const intrusive_ptr<ExpressionContext> &pExpCtx) { - return new DocumentSourceCursor(ns, exec, pExpCtx); - } + return Value(DOC(getSourceName() << out.freezeToValue())); +} - void DocumentSourceCursor::setProjection( - const BSONObj& projection, - const boost::optional<ParsedDeps>& deps) { - _projection = projection; - _dependencies = deps; - } +DocumentSourceCursor::DocumentSourceCursor(const string& ns, + const std::shared_ptr<PlanExecutor>& exec, + const intrusive_ptr<ExpressionContext>& pCtx) + : DocumentSource(pCtx), _docsAddedToBatches(0), _ns(ns), _exec(exec) {} + +intrusive_ptr<DocumentSourceCursor> DocumentSourceCursor::create( + const string& ns, + const std::shared_ptr<PlanExecutor>& exec, + const intrusive_ptr<ExpressionContext>& pExpCtx) { + return new DocumentSourceCursor(ns, exec, pExpCtx); +} + +void DocumentSourceCursor::setProjection(const BSONObj& projection, + const boost::optional<ParsedDeps>& deps) { + _projection = projection; + _dependencies = deps; +} } diff --git a/src/mongo/db/pipeline/document_source_geo_near.cpp b/src/mongo/db/pipeline/document_source_geo_near.cpp index 20ba3e2dadc..c66702480fc 100644 --- a/src/mongo/db/pipeline/document_source_geo_near.cpp +++ b/src/mongo/db/pipeline/document_source_geo_near.cpp @@ -36,197 +36,195 @@ namespace mongo { - using boost::intrusive_ptr; - using std::min; +using boost::intrusive_ptr; +using std::min; - char DocumentSourceGeoNear::geoNearName[] = "$geoNear"; - const char *DocumentSourceGeoNear::getSourceName() const { return geoNearName; } - - boost::optional<Document> DocumentSourceGeoNear::getNext() { - pExpCtx->checkForInterrupt(); +char DocumentSourceGeoNear::geoNearName[] = "$geoNear"; +const char* DocumentSourceGeoNear::getSourceName() const { + return geoNearName; +} - if (!resultsIterator) - runCommand(); +boost::optional<Document> DocumentSourceGeoNear::getNext() { + pExpCtx->checkForInterrupt(); - if (!resultsIterator->more()) - return boost::none; + if (!resultsIterator) + runCommand(); - // each result from the geoNear command is wrapped in a wrapper object with "obj", - // "dis" and maybe "loc" fields. We want to take the object from "obj" and inject the - // other fields into it. - Document result (resultsIterator->next().embeddedObject()); - MutableDocument output (result["obj"].getDocument()); - output.setNestedField(*distanceField, result["dis"]); - if (includeLocs) - output.setNestedField(*includeLocs, result["loc"]); + if (!resultsIterator->more()) + return boost::none; - return output.freeze(); - } + // each result from the geoNear command is wrapped in a wrapper object with "obj", + // "dis" and maybe "loc" fields. We want to take the object from "obj" and inject the + // other fields into it. + Document result(resultsIterator->next().embeddedObject()); + MutableDocument output(result["obj"].getDocument()); + output.setNestedField(*distanceField, result["dis"]); + if (includeLocs) + output.setNestedField(*includeLocs, result["loc"]); - void DocumentSourceGeoNear::setSource(DocumentSource*) { - uasserted(16602, "$geoNear is only allowed as the first pipeline stage"); - } + return output.freeze(); +} - bool DocumentSourceGeoNear::coalesce(const intrusive_ptr<DocumentSource> &pNextSource) { - DocumentSourceLimit* limitSrc = dynamic_cast<DocumentSourceLimit*>(pNextSource.get()); - if (limitSrc) { - limit = min(limit, limitSrc->getLimit()); - return true; - } +void DocumentSourceGeoNear::setSource(DocumentSource*) { + uasserted(16602, "$geoNear is only allowed as the first pipeline stage"); +} - return false; +bool DocumentSourceGeoNear::coalesce(const intrusive_ptr<DocumentSource>& pNextSource) { + DocumentSourceLimit* limitSrc = dynamic_cast<DocumentSourceLimit*>(pNextSource.get()); + if (limitSrc) { + limit = min(limit, limitSrc->getLimit()); + return true; } - // This command is sent as-is to the shards. - // On router this becomes a sort by distance (nearest-first) with limit. - intrusive_ptr<DocumentSource> DocumentSourceGeoNear::getShardSource() { return this; } - intrusive_ptr<DocumentSource> DocumentSourceGeoNear::getMergeSource() { - return DocumentSourceSort::create(pExpCtx, - BSON(distanceField->getPath(false) << 1), - limit); - } + return false; +} - Value DocumentSourceGeoNear::serialize(bool explain) const { - MutableDocument result; +// This command is sent as-is to the shards. +// On router this becomes a sort by distance (nearest-first) with limit. +intrusive_ptr<DocumentSource> DocumentSourceGeoNear::getShardSource() { + return this; +} +intrusive_ptr<DocumentSource> DocumentSourceGeoNear::getMergeSource() { + return DocumentSourceSort::create(pExpCtx, BSON(distanceField->getPath(false) << 1), limit); +} - if (coordsIsArray) { - result.setField("near", Value(BSONArray(coords))); - } - else { - result.setField("near", Value(coords)); - } +Value DocumentSourceGeoNear::serialize(bool explain) const { + MutableDocument result; - // not in buildGeoNearCmd - result.setField("distanceField", Value(distanceField->getPath(false))); + if (coordsIsArray) { + result.setField("near", Value(BSONArray(coords))); + } else { + result.setField("near", Value(coords)); + } - result.setField("limit", Value(limit)); + // not in buildGeoNearCmd + result.setField("distanceField", Value(distanceField->getPath(false))); - if (maxDistance > 0) - result.setField("maxDistance", Value(maxDistance)); + result.setField("limit", Value(limit)); - if (minDistance > 0) - result.setField("minDistance", Value(minDistance)); + if (maxDistance > 0) + result.setField("maxDistance", Value(maxDistance)); - result.setField("query", Value(query)); - result.setField("spherical", Value(spherical)); - result.setField("distanceMultiplier", Value(distanceMultiplier)); + if (minDistance > 0) + result.setField("minDistance", Value(minDistance)); - if (includeLocs) - result.setField("includeLocs", Value(includeLocs->getPath(false))); + result.setField("query", Value(query)); + result.setField("spherical", Value(spherical)); + result.setField("distanceMultiplier", Value(distanceMultiplier)); - return Value(DOC(getSourceName() << result.freeze())); - } + if (includeLocs) + result.setField("includeLocs", Value(includeLocs->getPath(false))); - BSONObj DocumentSourceGeoNear::buildGeoNearCmd() const { - // this is very similar to sourceToBson, but slightly different. - // differences will be noted. + return Value(DOC(getSourceName() << result.freeze())); +} - BSONObjBuilder geoNear; // not building a subField +BSONObj DocumentSourceGeoNear::buildGeoNearCmd() const { + // this is very similar to sourceToBson, but slightly different. + // differences will be noted. - geoNear.append("geoNear", pExpCtx->ns.coll()); // not in toBson + BSONObjBuilder geoNear; // not building a subField - if (coordsIsArray) { - geoNear.appendArray("near", coords); - } - else { - geoNear.append("near", coords); - } + geoNear.append("geoNear", pExpCtx->ns.coll()); // not in toBson - geoNear.append("num", limit); // called limit in toBson + if (coordsIsArray) { + geoNear.appendArray("near", coords); + } else { + geoNear.append("near", coords); + } - if (maxDistance > 0) - geoNear.append("maxDistance", maxDistance); + geoNear.append("num", limit); // called limit in toBson - if (minDistance > 0) - geoNear.append("minDistance", minDistance); + if (maxDistance > 0) + geoNear.append("maxDistance", maxDistance); - geoNear.append("query", query); - geoNear.append("spherical", spherical); - geoNear.append("distanceMultiplier", distanceMultiplier); + if (minDistance > 0) + geoNear.append("minDistance", minDistance); - if (includeLocs) - geoNear.append("includeLocs", true); // String in toBson + geoNear.append("query", query); + geoNear.append("spherical", spherical); + geoNear.append("distanceMultiplier", distanceMultiplier); - return geoNear.obj(); - } + if (includeLocs) + geoNear.append("includeLocs", true); // String in toBson - void DocumentSourceGeoNear::runCommand() { - massert(16603, "Already ran geoNearCommand", - !resultsIterator); + return geoNear.obj(); +} - bool ok = _mongod->directClient()->runCommand(pExpCtx->ns.db().toString(), - buildGeoNearCmd(), - cmdOutput); - uassert(16604, "geoNear command failed: " + cmdOutput.toString(), - ok); +void DocumentSourceGeoNear::runCommand() { + massert(16603, "Already ran geoNearCommand", !resultsIterator); - resultsIterator.reset(new BSONObjIterator(cmdOutput["results"].embeddedObject())); - } + bool ok = _mongod->directClient()->runCommand( + pExpCtx->ns.db().toString(), buildGeoNearCmd(), cmdOutput); + uassert(16604, "geoNear command failed: " + cmdOutput.toString(), ok); - intrusive_ptr<DocumentSourceGeoNear> DocumentSourceGeoNear::create( - const intrusive_ptr<ExpressionContext> &pCtx) { - return new DocumentSourceGeoNear(pCtx); - } + resultsIterator.reset(new BSONObjIterator(cmdOutput["results"].embeddedObject())); +} - intrusive_ptr<DocumentSource> DocumentSourceGeoNear::createFromBson( - BSONElement elem, - const intrusive_ptr<ExpressionContext> &pCtx) { - intrusive_ptr<DocumentSourceGeoNear> out = new DocumentSourceGeoNear(pCtx); - out->parseOptions(elem.embeddedObjectUserCheck()); - return out; - } +intrusive_ptr<DocumentSourceGeoNear> DocumentSourceGeoNear::create( + const intrusive_ptr<ExpressionContext>& pCtx) { + return new DocumentSourceGeoNear(pCtx); +} - void DocumentSourceGeoNear::parseOptions(BSONObj options) { - // near and distanceField are required +intrusive_ptr<DocumentSource> DocumentSourceGeoNear::createFromBson( + BSONElement elem, const intrusive_ptr<ExpressionContext>& pCtx) { + intrusive_ptr<DocumentSourceGeoNear> out = new DocumentSourceGeoNear(pCtx); + out->parseOptions(elem.embeddedObjectUserCheck()); + return out; +} - uassert(16605, "$geoNear requires a 'near' option as an Array", - options["near"].isABSONObj()); // Array or Object (Object is deprecated) - coordsIsArray = options["near"].type() == Array; - coords = options["near"].embeddedObject().getOwned(); +void DocumentSourceGeoNear::parseOptions(BSONObj options) { + // near and distanceField are required - uassert(16606, "$geoNear requires a 'distanceField' option as a String", - options["distanceField"].type() == String); - distanceField.reset(new FieldPath(options["distanceField"].str())); + uassert(16605, + "$geoNear requires a 'near' option as an Array", + options["near"].isABSONObj()); // Array or Object (Object is deprecated) + coordsIsArray = options["near"].type() == Array; + coords = options["near"].embeddedObject().getOwned(); - // remaining fields are optional + uassert(16606, + "$geoNear requires a 'distanceField' option as a String", + options["distanceField"].type() == String); + distanceField.reset(new FieldPath(options["distanceField"].str())); - // num and limit are synonyms - if (options["limit"].isNumber()) - limit = options["limit"].numberLong(); - if (options["num"].isNumber()) - limit = options["num"].numberLong(); + // remaining fields are optional - if (options["maxDistance"].isNumber()) - maxDistance = options["maxDistance"].numberDouble(); + // num and limit are synonyms + if (options["limit"].isNumber()) + limit = options["limit"].numberLong(); + if (options["num"].isNumber()) + limit = options["num"].numberLong(); - if (options["minDistance"].isNumber()) - minDistance = options["minDistance"].numberDouble(); + if (options["maxDistance"].isNumber()) + maxDistance = options["maxDistance"].numberDouble(); - if (options["query"].type() == Object) - query = options["query"].embeddedObject().getOwned(); + if (options["minDistance"].isNumber()) + minDistance = options["minDistance"].numberDouble(); - spherical = options["spherical"].trueValue(); + if (options["query"].type() == Object) + query = options["query"].embeddedObject().getOwned(); - if (options["distanceMultiplier"].isNumber()) - distanceMultiplier = options["distanceMultiplier"].numberDouble(); + spherical = options["spherical"].trueValue(); - if (options.hasField("includeLocs")) { - uassert(16607, "$geoNear requires that 'includeLocs' option is a String", - options["includeLocs"].type() == String); - includeLocs.reset(new FieldPath(options["includeLocs"].str())); - } + if (options["distanceMultiplier"].isNumber()) + distanceMultiplier = options["distanceMultiplier"].numberDouble(); - if (options.hasField("uniqueDocs")) - warning() << "ignoring deprecated uniqueDocs option in $geoNear aggregation stage"; + if (options.hasField("includeLocs")) { + uassert(16607, + "$geoNear requires that 'includeLocs' option is a String", + options["includeLocs"].type() == String); + includeLocs.reset(new FieldPath(options["includeLocs"].str())); } - DocumentSourceGeoNear::DocumentSourceGeoNear(const intrusive_ptr<ExpressionContext> &pExpCtx) - : DocumentSource(pExpCtx) - , coordsIsArray(false) - , limit(100) - , maxDistance(-1.0) - , minDistance(-1.0) - , spherical(false) - , distanceMultiplier(1.0) - {} + if (options.hasField("uniqueDocs")) + warning() << "ignoring deprecated uniqueDocs option in $geoNear aggregation stage"; +} + +DocumentSourceGeoNear::DocumentSourceGeoNear(const intrusive_ptr<ExpressionContext>& pExpCtx) + : DocumentSource(pExpCtx), + coordsIsArray(false), + limit(100), + maxDistance(-1.0), + minDistance(-1.0), + spherical(false), + distanceMultiplier(1.0) {} } diff --git a/src/mongo/db/pipeline/document_source_group.cpp b/src/mongo/db/pipeline/document_source_group.cpp index aba16b65e17..487f0809ed0 100644 --- a/src/mongo/db/pipeline/document_source_group.cpp +++ b/src/mongo/db/pipeline/document_source_group.cpp @@ -39,596 +39,580 @@ namespace mongo { - using boost::intrusive_ptr; - using std::shared_ptr; - using std::pair; - using std::vector; +using boost::intrusive_ptr; +using std::shared_ptr; +using std::pair; +using std::vector; - const char DocumentSourceGroup::groupName[] = "$group"; +const char DocumentSourceGroup::groupName[] = "$group"; - const char *DocumentSourceGroup::getSourceName() const { - return groupName; - } +const char* DocumentSourceGroup::getSourceName() const { + return groupName; +} - boost::optional<Document> DocumentSourceGroup::getNext() { - pExpCtx->checkForInterrupt(); +boost::optional<Document> DocumentSourceGroup::getNext() { + pExpCtx->checkForInterrupt(); - if (!populated) - populate(); + if (!populated) + populate(); - if (_spilled) { - if (!_sorterIterator) - return boost::none; + if (_spilled) { + if (!_sorterIterator) + return boost::none; - const size_t numAccumulators = vpAccumulatorFactory.size(); - for (size_t i=0; i < numAccumulators; i++) { - _currentAccumulators[i]->reset(); // prep accumulators for a new group - } + const size_t numAccumulators = vpAccumulatorFactory.size(); + for (size_t i = 0; i < numAccumulators; i++) { + _currentAccumulators[i]->reset(); // prep accumulators for a new group + } - _currentId = _firstPartOfNextGroup.first; - while (_currentId == _firstPartOfNextGroup.first) { - // Inside of this loop, _firstPartOfNextGroup is the current data being processed. - // At loop exit, it is the first value to be processed in the next group. + _currentId = _firstPartOfNextGroup.first; + while (_currentId == _firstPartOfNextGroup.first) { + // Inside of this loop, _firstPartOfNextGroup is the current data being processed. + // At loop exit, it is the first value to be processed in the next group. - switch (numAccumulators) { // mirrors switch in spill() - case 0: // no Accumulators so no Values + switch (numAccumulators) { // mirrors switch in spill() + case 0: // no Accumulators so no Values break; - case 1: // single accumulators serialize as a single Value + case 1: // single accumulators serialize as a single Value _currentAccumulators[0]->process(_firstPartOfNextGroup.second, /*merging=*/true); break; - default: { // multiple accumulators serialize as an array + default: { // multiple accumulators serialize as an array const vector<Value>& accumulatorStates = _firstPartOfNextGroup.second.getArray(); - for (size_t i=0; i < numAccumulators; i++) { + for (size_t i = 0; i < numAccumulators; i++) { _currentAccumulators[i]->process(accumulatorStates[i], /*merging=*/true); } break; } - } - - if (!_sorterIterator->more()) { - dispose(); - break; - } - - _firstPartOfNextGroup = _sorterIterator->next(); } - return makeDocument(_currentId, _currentAccumulators, pExpCtx->inShard); - - } else { - if (groups.empty()) - return boost::none; - - Document out = makeDocument(groupsIterator->first, - groupsIterator->second, - pExpCtx->inShard); - - if (++groupsIterator == groups.end()) + if (!_sorterIterator->more()) { dispose(); + break; + } - return out; + _firstPartOfNextGroup = _sorterIterator->next(); } - } - - void DocumentSourceGroup::dispose() { - // free our resources - GroupsMap().swap(groups); - _sorterIterator.reset(); - // make us look done - groupsIterator = groups.end(); + return makeDocument(_currentId, _currentAccumulators, pExpCtx->inShard); - // free our source's resources - pSource->dispose(); - } + } else { + if (groups.empty()) + return boost::none; - intrusive_ptr<DocumentSource> DocumentSourceGroup::optimize() { - // TODO if all _idExpressions are ExpressionConstants after optimization, then we know there - // will only be one group. We should take advantage of that to avoid going through the hash - // table. - for (size_t i = 0; i < _idExpressions.size(); i++) { - _idExpressions[i] = _idExpressions[i]->optimize(); - } + Document out = + makeDocument(groupsIterator->first, groupsIterator->second, pExpCtx->inShard); - for (size_t i = 0; i < vFieldName.size(); i++) { - vpExpression[i] = vpExpression[i]->optimize(); - } + if (++groupsIterator == groups.end()) + dispose(); - return this; + return out; } +} - Value DocumentSourceGroup::serialize(bool explain) const { - MutableDocument insides; +void DocumentSourceGroup::dispose() { + // free our resources + GroupsMap().swap(groups); + _sorterIterator.reset(); - // add the _id - if (_idFieldNames.empty()) { - invariant(_idExpressions.size() == 1); - insides["_id"] = _idExpressions[0]->serialize(explain); - } - else { - // decomposed document case - invariant(_idExpressions.size() == _idFieldNames.size()); - MutableDocument md; - for (size_t i = 0; i < _idExpressions.size(); i++) { - md[_idFieldNames[i]] = _idExpressions[i]->serialize(explain); - } - insides["_id"] = md.freezeToValue(); - } + // make us look done + groupsIterator = groups.end(); - // add the remaining fields - const size_t n = vFieldName.size(); - for(size_t i = 0; i < n; ++i) { - intrusive_ptr<Accumulator> accum = vpAccumulatorFactory[i](); - insides[vFieldName[i]] = - Value(DOC(accum->getOpName() << vpExpression[i]->serialize(explain))); - } - - if (_doingMerge) { - // This makes the output unparsable (with error) on pre 2.6 shards, but it will never - // be sent to old shards when this flag is true since they can't do a merge anyway. + // free our source's resources + pSource->dispose(); +} - insides["$doingMerge"] = Value(true); - } +intrusive_ptr<DocumentSource> DocumentSourceGroup::optimize() { + // TODO if all _idExpressions are ExpressionConstants after optimization, then we know there + // will only be one group. We should take advantage of that to avoid going through the hash + // table. + for (size_t i = 0; i < _idExpressions.size(); i++) { + _idExpressions[i] = _idExpressions[i]->optimize(); + } - return Value(DOC(getSourceName() << insides.freeze())); + for (size_t i = 0; i < vFieldName.size(); i++) { + vpExpression[i] = vpExpression[i]->optimize(); } - DocumentSource::GetDepsReturn DocumentSourceGroup::getDependencies(DepsTracker* deps) const { - // add the _id + return this; +} + +Value DocumentSourceGroup::serialize(bool explain) const { + MutableDocument insides; + + // add the _id + if (_idFieldNames.empty()) { + invariant(_idExpressions.size() == 1); + insides["_id"] = _idExpressions[0]->serialize(explain); + } else { + // decomposed document case + invariant(_idExpressions.size() == _idFieldNames.size()); + MutableDocument md; for (size_t i = 0; i < _idExpressions.size(); i++) { - _idExpressions[i]->addDependencies(deps); + md[_idFieldNames[i]] = _idExpressions[i]->serialize(explain); } + insides["_id"] = md.freezeToValue(); + } - // add the rest - const size_t n = vFieldName.size(); - for(size_t i = 0; i < n; ++i) { - vpExpression[i]->addDependencies(deps); - } + // add the remaining fields + const size_t n = vFieldName.size(); + for (size_t i = 0; i < n; ++i) { + intrusive_ptr<Accumulator> accum = vpAccumulatorFactory[i](); + insides[vFieldName[i]] = + Value(DOC(accum->getOpName() << vpExpression[i]->serialize(explain))); + } + + if (_doingMerge) { + // This makes the output unparsable (with error) on pre 2.6 shards, but it will never + // be sent to old shards when this flag is true since they can't do a merge anyway. - return EXHAUSTIVE_ALL; + insides["$doingMerge"] = Value(true); } - intrusive_ptr<DocumentSourceGroup> DocumentSourceGroup::create( - const intrusive_ptr<ExpressionContext> &pExpCtx) { - intrusive_ptr<DocumentSourceGroup> pSource( - new DocumentSourceGroup(pExpCtx)); - return pSource; + return Value(DOC(getSourceName() << insides.freeze())); +} + +DocumentSource::GetDepsReturn DocumentSourceGroup::getDependencies(DepsTracker* deps) const { + // add the _id + for (size_t i = 0; i < _idExpressions.size(); i++) { + _idExpressions[i]->addDependencies(deps); } - DocumentSourceGroup::DocumentSourceGroup(const intrusive_ptr<ExpressionContext>& pExpCtx) - : DocumentSource(pExpCtx) - , populated(false) - , _doingMerge(false) - , _spilled(false) - , _extSortAllowed(pExpCtx->extSortAllowed && !pExpCtx->inRouter) - , _maxMemoryUsageBytes(100*1024*1024) - {} - - void DocumentSourceGroup::addAccumulator( - const std::string& fieldName, - intrusive_ptr<Accumulator> (*pAccumulatorFactory)(), - const intrusive_ptr<Expression> &pExpression) { - vFieldName.push_back(fieldName); - vpAccumulatorFactory.push_back(pAccumulatorFactory); - vpExpression.push_back(pExpression); + // add the rest + const size_t n = vFieldName.size(); + for (size_t i = 0; i < n; ++i) { + vpExpression[i]->addDependencies(deps); } + return EXHAUSTIVE_ALL; +} - struct GroupOpDesc { - const char* name; - intrusive_ptr<Accumulator> (*factory)(); - }; +intrusive_ptr<DocumentSourceGroup> DocumentSourceGroup::create( + const intrusive_ptr<ExpressionContext>& pExpCtx) { + intrusive_ptr<DocumentSourceGroup> pSource(new DocumentSourceGroup(pExpCtx)); + return pSource; +} - static int GroupOpDescCmp(const void *pL, const void *pR) { - return strcmp(((const GroupOpDesc *)pL)->name, - ((const GroupOpDesc *)pR)->name); - } +DocumentSourceGroup::DocumentSourceGroup(const intrusive_ptr<ExpressionContext>& pExpCtx) + : DocumentSource(pExpCtx), + populated(false), + _doingMerge(false), + _spilled(false), + _extSortAllowed(pExpCtx->extSortAllowed && !pExpCtx->inRouter), + _maxMemoryUsageBytes(100 * 1024 * 1024) {} + +void DocumentSourceGroup::addAccumulator(const std::string& fieldName, + intrusive_ptr<Accumulator>(*pAccumulatorFactory)(), + const intrusive_ptr<Expression>& pExpression) { + vFieldName.push_back(fieldName); + vpAccumulatorFactory.push_back(pAccumulatorFactory); + vpExpression.push_back(pExpression); +} - /* - Keep these sorted alphabetically so we can bsearch() them using - GroupOpDescCmp() above. - */ - static const GroupOpDesc GroupOpTable[] = { - {"$addToSet", AccumulatorAddToSet::create}, - {"$avg", AccumulatorAvg::create}, - {"$first", AccumulatorFirst::create}, - {"$last", AccumulatorLast::create}, - {"$max", AccumulatorMinMax::createMax}, - {"$min", AccumulatorMinMax::createMin}, - {"$push", AccumulatorPush::create}, - {"$stdDevPop", AccumulatorStdDev::createPop}, - {"$stdDevSamp", AccumulatorStdDev::createSamp}, - {"$sum", AccumulatorSum::create}, - }; - - static const size_t NGroupOp = sizeof(GroupOpTable)/sizeof(GroupOpTable[0]); - - intrusive_ptr<DocumentSource> DocumentSourceGroup::createFromBson( - BSONElement elem, - const intrusive_ptr<ExpressionContext> &pExpCtx) { - uassert(15947, "a group's fields must be specified in an object", - elem.type() == Object); - - intrusive_ptr<DocumentSourceGroup> pGroup( - DocumentSourceGroup::create(pExpCtx)); - - BSONObj groupObj(elem.Obj()); - BSONObjIterator groupIterator(groupObj); - VariablesIdGenerator idGenerator; - VariablesParseState vps(&idGenerator); - while(groupIterator.more()) { - BSONElement groupField(groupIterator.next()); - const char *pFieldName = groupField.fieldName(); - - if (str::equals(pFieldName, "_id")) { - uassert(15948, "a group's _id may only be specified once", - pGroup->_idExpressions.empty()); - pGroup->parseIdExpression(groupField, vps); - invariant(!pGroup->_idExpressions.empty()); - } - else if (str::equals(pFieldName, "$doingMerge")) { - massert(17030, "$doingMerge should be true if present", - groupField.Bool()); - pGroup->setDoingMerge(true); - } - else { - /* - Treat as a projection field with the additional ability to - add aggregation operators. - */ - uassert(16414, str::stream() << - "the group aggregate field name '" << pFieldName << - "' cannot be used because $group's field names cannot contain '.'", - !str::contains(pFieldName, '.') ); - - uassert(15950, str::stream() << - "the group aggregate field name '" << - pFieldName << "' cannot be an operator name", - pFieldName[0] != '$'); - - uassert(15951, str::stream() << - "the group aggregate field '" << pFieldName << - "' must be defined as an expression inside an object", - groupField.type() == Object); - - BSONObj subField(groupField.Obj()); - BSONObjIterator subIterator(subField); - size_t subCount = 0; - for(; subIterator.more(); ++subCount) { - BSONElement subElement(subIterator.next()); - - /* look for the specified operator */ - GroupOpDesc key; - key.name = subElement.fieldName(); - const GroupOpDesc *pOp = - (const GroupOpDesc *)bsearch( - &key, GroupOpTable, NGroupOp, sizeof(GroupOpDesc), - GroupOpDescCmp); - - uassert(15952, str::stream() << "unknown group operator '" << key.name << "'", - pOp); - - intrusive_ptr<Expression> pGroupExpr; - - BSONType elementType = subElement.type(); - if (elementType == Object) { - Expression::ObjectCtx oCtx(Expression::ObjectCtx::DOCUMENT_OK); - pGroupExpr = Expression::parseObject(subElement.Obj(), &oCtx, vps); - } - else if (elementType == Array) { - uasserted(15953, str::stream() - << "aggregating group operators are unary (" << key.name << ")"); - } - else { /* assume its an atomic single operand */ - pGroupExpr = Expression::parseOperand(subElement, vps); - } +struct GroupOpDesc { + const char* name; + intrusive_ptr<Accumulator>(*factory)(); +}; - pGroup->addAccumulator(pFieldName, pOp->factory, pGroupExpr); +static int GroupOpDescCmp(const void* pL, const void* pR) { + return strcmp(((const GroupOpDesc*)pL)->name, ((const GroupOpDesc*)pR)->name); +} + +/* + Keep these sorted alphabetically so we can bsearch() them using + GroupOpDescCmp() above. +*/ +static const GroupOpDesc GroupOpTable[] = { + {"$addToSet", AccumulatorAddToSet::create}, + {"$avg", AccumulatorAvg::create}, + {"$first", AccumulatorFirst::create}, + {"$last", AccumulatorLast::create}, + {"$max", AccumulatorMinMax::createMax}, + {"$min", AccumulatorMinMax::createMin}, + {"$push", AccumulatorPush::create}, + {"$stdDevPop", AccumulatorStdDev::createPop}, + {"$stdDevSamp", AccumulatorStdDev::createSamp}, + {"$sum", AccumulatorSum::create}, +}; + +static const size_t NGroupOp = sizeof(GroupOpTable) / sizeof(GroupOpTable[0]); + +intrusive_ptr<DocumentSource> DocumentSourceGroup::createFromBson( + BSONElement elem, const intrusive_ptr<ExpressionContext>& pExpCtx) { + uassert(15947, "a group's fields must be specified in an object", elem.type() == Object); + + intrusive_ptr<DocumentSourceGroup> pGroup(DocumentSourceGroup::create(pExpCtx)); + + BSONObj groupObj(elem.Obj()); + BSONObjIterator groupIterator(groupObj); + VariablesIdGenerator idGenerator; + VariablesParseState vps(&idGenerator); + while (groupIterator.more()) { + BSONElement groupField(groupIterator.next()); + const char* pFieldName = groupField.fieldName(); + + if (str::equals(pFieldName, "_id")) { + uassert( + 15948, "a group's _id may only be specified once", pGroup->_idExpressions.empty()); + pGroup->parseIdExpression(groupField, vps); + invariant(!pGroup->_idExpressions.empty()); + } else if (str::equals(pFieldName, "$doingMerge")) { + massert(17030, "$doingMerge should be true if present", groupField.Bool()); + + pGroup->setDoingMerge(true); + } else { + /* + Treat as a projection field with the additional ability to + add aggregation operators. + */ + uassert( + 16414, + str::stream() << "the group aggregate field name '" << pFieldName + << "' cannot be used because $group's field names cannot contain '.'", + !str::contains(pFieldName, '.')); + + uassert(15950, + str::stream() << "the group aggregate field name '" << pFieldName + << "' cannot be an operator name", + pFieldName[0] != '$'); + + uassert(15951, + str::stream() << "the group aggregate field '" << pFieldName + << "' must be defined as an expression inside an object", + groupField.type() == Object); + + BSONObj subField(groupField.Obj()); + BSONObjIterator subIterator(subField); + size_t subCount = 0; + for (; subIterator.more(); ++subCount) { + BSONElement subElement(subIterator.next()); + + /* look for the specified operator */ + GroupOpDesc key; + key.name = subElement.fieldName(); + const GroupOpDesc* pOp = (const GroupOpDesc*)bsearch( + &key, GroupOpTable, NGroupOp, sizeof(GroupOpDesc), GroupOpDescCmp); + + uassert(15952, str::stream() << "unknown group operator '" << key.name << "'", pOp); + + intrusive_ptr<Expression> pGroupExpr; + + BSONType elementType = subElement.type(); + if (elementType == Object) { + Expression::ObjectCtx oCtx(Expression::ObjectCtx::DOCUMENT_OK); + pGroupExpr = Expression::parseObject(subElement.Obj(), &oCtx, vps); + } else if (elementType == Array) { + uasserted(15953, + str::stream() << "aggregating group operators are unary (" << key.name + << ")"); + } else { /* assume its an atomic single operand */ + pGroupExpr = Expression::parseOperand(subElement, vps); } - uassert(15954, str::stream() << - "the computed aggregate '" << - pFieldName << "' must specify exactly one operator", - subCount == 1); + pGroup->addAccumulator(pFieldName, pOp->factory, pGroupExpr); } + + uassert(15954, + str::stream() << "the computed aggregate '" << pFieldName + << "' must specify exactly one operator", + subCount == 1); } + } - uassert(15955, "a group specification must include an _id", - !pGroup->_idExpressions.empty()); + uassert(15955, "a group specification must include an _id", !pGroup->_idExpressions.empty()); - pGroup->_variables.reset(new Variables(idGenerator.getIdCount())); + pGroup->_variables.reset(new Variables(idGenerator.getIdCount())); - return pGroup; - } + return pGroup; +} - namespace { - class SorterComparator { - public: - typedef pair<Value, Value> Data; - int operator() (const Data& lhs, const Data& rhs) const { - return Value::compare(lhs.first, rhs.first); - } - }; +namespace { +class SorterComparator { +public: + typedef pair<Value, Value> Data; + int operator()(const Data& lhs, const Data& rhs) const { + return Value::compare(lhs.first, rhs.first); } +}; +} - void DocumentSourceGroup::populate() { - const size_t numAccumulators = vpAccumulatorFactory.size(); - dassert(numAccumulators == vpExpression.size()); - - // pushed to on spill() - vector<shared_ptr<Sorter<Value, Value>::Iterator> > sortedFiles; - int memoryUsageBytes = 0; - - // This loop consumes all input from pSource and buckets it based on pIdExpression. - while (boost::optional<Document> input = pSource->getNext()) { - if (memoryUsageBytes > _maxMemoryUsageBytes) { - uassert(16945, "Exceeded memory limit for $group, but didn't allow external sort." - " Pass allowDiskUse:true to opt in.", - _extSortAllowed); - sortedFiles.push_back(spill()); - memoryUsageBytes = 0; - } +void DocumentSourceGroup::populate() { + const size_t numAccumulators = vpAccumulatorFactory.size(); + dassert(numAccumulators == vpExpression.size()); + + // pushed to on spill() + vector<shared_ptr<Sorter<Value, Value>::Iterator>> sortedFiles; + int memoryUsageBytes = 0; + + // This loop consumes all input from pSource and buckets it based on pIdExpression. + while (boost::optional<Document> input = pSource->getNext()) { + if (memoryUsageBytes > _maxMemoryUsageBytes) { + uassert(16945, + "Exceeded memory limit for $group, but didn't allow external sort." + " Pass allowDiskUse:true to opt in.", + _extSortAllowed); + sortedFiles.push_back(spill()); + memoryUsageBytes = 0; + } - _variables->setRoot(*input); + _variables->setRoot(*input); - /* get the _id value */ - Value id = computeId(_variables.get()); + /* get the _id value */ + Value id = computeId(_variables.get()); - /* treat missing values the same as NULL SERVER-4674 */ - if (id.missing()) - id = Value(BSONNULL); + /* treat missing values the same as NULL SERVER-4674 */ + if (id.missing()) + id = Value(BSONNULL); - /* - Look for the _id value in the map; if it's not there, add a - new entry with a blank accumulator. - */ - const size_t oldSize = groups.size(); - vector<intrusive_ptr<Accumulator> >& group = groups[id]; - const bool inserted = groups.size() != oldSize; + /* + Look for the _id value in the map; if it's not there, add a + new entry with a blank accumulator. + */ + const size_t oldSize = groups.size(); + vector<intrusive_ptr<Accumulator>>& group = groups[id]; + const bool inserted = groups.size() != oldSize; - if (inserted) { - memoryUsageBytes += id.getApproximateSize(); + if (inserted) { + memoryUsageBytes += id.getApproximateSize(); - // Add the accumulators - group.reserve(numAccumulators); - for (size_t i = 0; i < numAccumulators; i++) { - group.push_back(vpAccumulatorFactory[i]()); - } - } else { - for (size_t i = 0; i < numAccumulators; i++) { - // subtract old mem usage. New usage added back after processing. - memoryUsageBytes -= group[i]->memUsageForSorter(); - } + // Add the accumulators + group.reserve(numAccumulators); + for (size_t i = 0; i < numAccumulators; i++) { + group.push_back(vpAccumulatorFactory[i]()); } - - /* tickle all the accumulators for the group we found */ - dassert(numAccumulators == group.size()); + } else { for (size_t i = 0; i < numAccumulators; i++) { - group[i]->process(vpExpression[i]->evaluate(_variables.get()), _doingMerge); - memoryUsageBytes += group[i]->memUsageForSorter(); + // subtract old mem usage. New usage added back after processing. + memoryUsageBytes -= group[i]->memUsageForSorter(); } + } - // We are done with the ROOT document so release it. - _variables->clearRoot(); - - DEV { - // In debug mode, spill every time we have a duplicate id to stress merge logic. - if (!inserted // is a dup - && !pExpCtx->inRouter // can't spill to disk in router - && !_extSortAllowed // don't change behavior when testing external sort - && sortedFiles.size() < 20 // don't open too many FDs - ) { - sortedFiles.push_back(spill()); - } - } + /* tickle all the accumulators for the group we found */ + dassert(numAccumulators == group.size()); + for (size_t i = 0; i < numAccumulators; i++) { + group[i]->process(vpExpression[i]->evaluate(_variables.get()), _doingMerge); + memoryUsageBytes += group[i]->memUsageForSorter(); } - // These blocks do any final steps necessary to prepare to output results. - if (!sortedFiles.empty()) { - _spilled = true; - if (!groups.empty()) { + // We are done with the ROOT document so release it. + _variables->clearRoot(); + + DEV { + // In debug mode, spill every time we have a duplicate id to stress merge logic. + if (!inserted // is a dup + && + !pExpCtx->inRouter // can't spill to disk in router + && + !_extSortAllowed // don't change behavior when testing external sort + && + sortedFiles.size() < 20 // don't open too many FDs + ) { sortedFiles.push_back(spill()); } + } + } - // We won't be using groups again so free its memory. - GroupsMap().swap(groups); + // These blocks do any final steps necessary to prepare to output results. + if (!sortedFiles.empty()) { + _spilled = true; + if (!groups.empty()) { + sortedFiles.push_back(spill()); + } - _sorterIterator.reset( - Sorter<Value,Value>::Iterator::merge( - sortedFiles, SortOptions(), SorterComparator())); + // We won't be using groups again so free its memory. + GroupsMap().swap(groups); - // prepare current to accumulate data - _currentAccumulators.reserve(numAccumulators); - for (size_t i = 0; i < numAccumulators; i++) { - _currentAccumulators.push_back(vpAccumulatorFactory[i]()); - } + _sorterIterator.reset( + Sorter<Value, Value>::Iterator::merge(sortedFiles, SortOptions(), SorterComparator())); - verify(_sorterIterator->more()); // we put data in, we should get something out. - _firstPartOfNextGroup = _sorterIterator->next(); - } else { - // start the group iterator - groupsIterator = groups.begin(); + // prepare current to accumulate data + _currentAccumulators.reserve(numAccumulators); + for (size_t i = 0; i < numAccumulators; i++) { + _currentAccumulators.push_back(vpAccumulatorFactory[i]()); } - populated = true; + verify(_sorterIterator->more()); // we put data in, we should get something out. + _firstPartOfNextGroup = _sorterIterator->next(); + } else { + // start the group iterator + groupsIterator = groups.begin(); } - class DocumentSourceGroup::SpillSTLComparator { - public: - bool operator() (const GroupsMap::value_type* lhs, const GroupsMap::value_type* rhs) const { - return Value::compare(lhs->first, rhs->first) < 0; - } - }; + populated = true; +} - shared_ptr<Sorter<Value, Value>::Iterator> DocumentSourceGroup::spill() { - vector<const GroupsMap::value_type*> ptrs; // using pointers to speed sorting - ptrs.reserve(groups.size()); - for (GroupsMap::const_iterator it=groups.begin(), end=groups.end(); it != end; ++it) { - ptrs.push_back(&*it); - } +class DocumentSourceGroup::SpillSTLComparator { +public: + bool operator()(const GroupsMap::value_type* lhs, const GroupsMap::value_type* rhs) const { + return Value::compare(lhs->first, rhs->first) < 0; + } +}; + +shared_ptr<Sorter<Value, Value>::Iterator> DocumentSourceGroup::spill() { + vector<const GroupsMap::value_type*> ptrs; // using pointers to speed sorting + ptrs.reserve(groups.size()); + for (GroupsMap::const_iterator it = groups.begin(), end = groups.end(); it != end; ++it) { + ptrs.push_back(&*it); + } - stable_sort(ptrs.begin(), ptrs.end(), SpillSTLComparator()); + stable_sort(ptrs.begin(), ptrs.end(), SpillSTLComparator()); - SortedFileWriter<Value, Value> writer(SortOptions().TempDir(pExpCtx->tempDir)); - switch (vpAccumulatorFactory.size()) { // same as ptrs[i]->second.size() for all i. - case 0: // no values, essentially a distinct - for (size_t i=0; i < ptrs.size(); i++) { + SortedFileWriter<Value, Value> writer(SortOptions().TempDir(pExpCtx->tempDir)); + switch (vpAccumulatorFactory.size()) { // same as ptrs[i]->second.size() for all i. + case 0: // no values, essentially a distinct + for (size_t i = 0; i < ptrs.size(); i++) { writer.addAlreadySorted(ptrs[i]->first, Value()); } break; - case 1: // just one value, use optimized serialization as single Value - for (size_t i=0; i < ptrs.size(); i++) { + case 1: // just one value, use optimized serialization as single Value + for (size_t i = 0; i < ptrs.size(); i++) { writer.addAlreadySorted(ptrs[i]->first, ptrs[i]->second[0]->getValue(/*toBeMerged=*/true)); } break; - default: // multiple values, serialize as array-typed Value - for (size_t i=0; i < ptrs.size(); i++) { + default: // multiple values, serialize as array-typed Value + for (size_t i = 0; i < ptrs.size(); i++) { vector<Value> accums; - for (size_t j=0; j < ptrs[i]->second.size(); j++) { + for (size_t j = 0; j < ptrs[i]->second.size(); j++) { accums.push_back(ptrs[i]->second[j]->getValue(/*toBeMerged=*/true)); } writer.addAlreadySorted(ptrs[i]->first, Value(std::move(accums))); } break; - } + } - groups.clear(); + groups.clear(); - return shared_ptr<Sorter<Value, Value>::Iterator>(writer.done()); - } + return shared_ptr<Sorter<Value, Value>::Iterator>(writer.done()); +} - void DocumentSourceGroup::parseIdExpression(BSONElement groupField, - const VariablesParseState& vps) { - if (groupField.type() == Object && !groupField.Obj().isEmpty()) { - // {_id: {}} is treated as grouping on a constant, not an expression +void DocumentSourceGroup::parseIdExpression(BSONElement groupField, + const VariablesParseState& vps) { + if (groupField.type() == Object && !groupField.Obj().isEmpty()) { + // {_id: {}} is treated as grouping on a constant, not an expression - const BSONObj idKeyObj = groupField.Obj(); - if (idKeyObj.firstElementFieldName()[0] == '$') { - // grouping on a $op expression - Expression::ObjectCtx oCtx(0); - _idExpressions.push_back(Expression::parseObject(idKeyObj, &oCtx, vps)); - } - else { - // grouping on an "artificial" object. Rather than create the object for each input - // in populate(), instead group on the output of the raw expressions. The artificial - // object will be created at the end in makeDocument() while outputting results. - BSONForEach(field, idKeyObj) { - uassert(17390, "$group does not support inclusion-style expressions", - !field.isNumber() && field.type() != Bool); - - _idFieldNames.push_back(field.fieldName()); - _idExpressions.push_back(Expression::parseOperand(field, vps)); - } + const BSONObj idKeyObj = groupField.Obj(); + if (idKeyObj.firstElementFieldName()[0] == '$') { + // grouping on a $op expression + Expression::ObjectCtx oCtx(0); + _idExpressions.push_back(Expression::parseObject(idKeyObj, &oCtx, vps)); + } else { + // grouping on an "artificial" object. Rather than create the object for each input + // in populate(), instead group on the output of the raw expressions. The artificial + // object will be created at the end in makeDocument() while outputting results. + BSONForEach(field, idKeyObj) { + uassert(17390, + "$group does not support inclusion-style expressions", + !field.isNumber() && field.type() != Bool); + + _idFieldNames.push_back(field.fieldName()); + _idExpressions.push_back(Expression::parseOperand(field, vps)); } } - else if (groupField.type() == String && groupField.valuestr()[0] == '$') { - // grouping on a field path. - _idExpressions.push_back(ExpressionFieldPath::parse(groupField.str(), vps)); - } - else { - // constant id - single group - _idExpressions.push_back(ExpressionConstant::create(Value(groupField))); - } + } else if (groupField.type() == String && groupField.valuestr()[0] == '$') { + // grouping on a field path. + _idExpressions.push_back(ExpressionFieldPath::parse(groupField.str(), vps)); + } else { + // constant id - single group + _idExpressions.push_back(ExpressionConstant::create(Value(groupField))); } +} - Value DocumentSourceGroup::computeId(Variables* vars) { - // If only one expression return result directly - if (_idExpressions.size() == 1) - return _idExpressions[0]->evaluate(vars); +Value DocumentSourceGroup::computeId(Variables* vars) { + // If only one expression return result directly + if (_idExpressions.size() == 1) + return _idExpressions[0]->evaluate(vars); - // Multiple expressions get results wrapped in a vector - vector<Value> vals; - vals.reserve(_idExpressions.size()); - for (size_t i = 0; i < _idExpressions.size(); i++) { - vals.push_back(_idExpressions[i]->evaluate(vars)); - } - return Value(std::move(vals)); + // Multiple expressions get results wrapped in a vector + vector<Value> vals; + vals.reserve(_idExpressions.size()); + for (size_t i = 0; i < _idExpressions.size(); i++) { + vals.push_back(_idExpressions[i]->evaluate(vars)); } + return Value(std::move(vals)); +} - Value DocumentSourceGroup::expandId(const Value& val) { - // _id doesn't get wrapped in a document - if (_idFieldNames.empty()) - return val; - - // _id is a single-field document containing val - if (_idFieldNames.size() == 1) - return Value(DOC(_idFieldNames[0] << val)); - - // _id is a multi-field document containing the elements of val - const vector<Value>& vals = val.getArray(); - invariant(_idFieldNames.size() == vals.size()); - MutableDocument md(vals.size()); - for (size_t i = 0; i < vals.size(); i++) { - md[_idFieldNames[i]] = vals[i]; - } - return md.freezeToValue(); +Value DocumentSourceGroup::expandId(const Value& val) { + // _id doesn't get wrapped in a document + if (_idFieldNames.empty()) + return val; + + // _id is a single-field document containing val + if (_idFieldNames.size() == 1) + return Value(DOC(_idFieldNames[0] << val)); + + // _id is a multi-field document containing the elements of val + const vector<Value>& vals = val.getArray(); + invariant(_idFieldNames.size() == vals.size()); + MutableDocument md(vals.size()); + for (size_t i = 0; i < vals.size(); i++) { + md[_idFieldNames[i]] = vals[i]; } + return md.freezeToValue(); +} - Document DocumentSourceGroup::makeDocument(const Value& id, - const Accumulators& accums, - bool mergeableOutput) { - const size_t n = vFieldName.size(); - MutableDocument out (1 + n); - - /* add the _id field */ - out.addField("_id", expandId(id)); - - /* add the rest of the fields */ - for(size_t i = 0; i < n; ++i) { - Value val = accums[i]->getValue(mergeableOutput); - if (val.missing()) { - // we return null in this case so return objects are predictable - out.addField(vFieldName[i], Value(BSONNULL)); - } - else { - out.addField(vFieldName[i], val); - } +Document DocumentSourceGroup::makeDocument(const Value& id, + const Accumulators& accums, + bool mergeableOutput) { + const size_t n = vFieldName.size(); + MutableDocument out(1 + n); + + /* add the _id field */ + out.addField("_id", expandId(id)); + + /* add the rest of the fields */ + for (size_t i = 0; i < n; ++i) { + Value val = accums[i]->getValue(mergeableOutput); + if (val.missing()) { + // we return null in this case so return objects are predictable + out.addField(vFieldName[i], Value(BSONNULL)); + } else { + out.addField(vFieldName[i], val); } - - return out.freeze(); } - intrusive_ptr<DocumentSource> DocumentSourceGroup::getShardSource() { - return this; // No modifications necessary when on shard - } - - intrusive_ptr<DocumentSource> DocumentSourceGroup::getMergeSource() { - intrusive_ptr<DocumentSourceGroup> pMerger(DocumentSourceGroup::create(pExpCtx)); - pMerger->setDoingMerge(true); + return out.freeze(); +} - VariablesIdGenerator idGenerator; - VariablesParseState vps(&idGenerator); - /* the merger will use the same grouping key */ - pMerger->_idExpressions.push_back(ExpressionFieldPath::parse("$$ROOT._id", vps)); +intrusive_ptr<DocumentSource> DocumentSourceGroup::getShardSource() { + return this; // No modifications necessary when on shard +} - const size_t n = vFieldName.size(); - for(size_t i = 0; i < n; ++i) { - /* - The merger's output field names will be the same, as will the - accumulator factories. However, for some accumulators, the - expression to be accumulated will be different. The original - accumulator may be collecting an expression based on a field - expression or constant. Here, we accumulate the output of the - same name from the prior group. - */ - pMerger->addAccumulator( - vFieldName[i], vpAccumulatorFactory[i], - ExpressionFieldPath::parse("$$ROOT." + vFieldName[i], vps)); - } +intrusive_ptr<DocumentSource> DocumentSourceGroup::getMergeSource() { + intrusive_ptr<DocumentSourceGroup> pMerger(DocumentSourceGroup::create(pExpCtx)); + pMerger->setDoingMerge(true); + + VariablesIdGenerator idGenerator; + VariablesParseState vps(&idGenerator); + /* the merger will use the same grouping key */ + pMerger->_idExpressions.push_back(ExpressionFieldPath::parse("$$ROOT._id", vps)); + + const size_t n = vFieldName.size(); + for (size_t i = 0; i < n; ++i) { + /* + The merger's output field names will be the same, as will the + accumulator factories. However, for some accumulators, the + expression to be accumulated will be different. The original + accumulator may be collecting an expression based on a field + expression or constant. Here, we accumulate the output of the + same name from the prior group. + */ + pMerger->addAccumulator(vFieldName[i], + vpAccumulatorFactory[i], + ExpressionFieldPath::parse("$$ROOT." + vFieldName[i], vps)); + } - pMerger->_variables.reset(new Variables(idGenerator.getIdCount())); + pMerger->_variables.reset(new Variables(idGenerator.getIdCount())); - return pMerger; - } + return pMerger; +} } #include "mongo/db/sorter/sorter.cpp" diff --git a/src/mongo/db/pipeline/document_source_limit.cpp b/src/mongo/db/pipeline/document_source_limit.cpp index 1789b689eda..9729c6bae8f 100644 --- a/src/mongo/db/pipeline/document_source_limit.cpp +++ b/src/mongo/db/pipeline/document_source_limit.cpp @@ -37,66 +37,57 @@ namespace mongo { - using boost::intrusive_ptr; +using boost::intrusive_ptr; - const char DocumentSourceLimit::limitName[] = "$limit"; +const char DocumentSourceLimit::limitName[] = "$limit"; - DocumentSourceLimit::DocumentSourceLimit(const intrusive_ptr<ExpressionContext> &pExpCtx, - long long limit) - : DocumentSource(pExpCtx) - , limit(limit) - , count(0) - {} +DocumentSourceLimit::DocumentSourceLimit(const intrusive_ptr<ExpressionContext>& pExpCtx, + long long limit) + : DocumentSource(pExpCtx), limit(limit), count(0) {} - const char *DocumentSourceLimit::getSourceName() const { - return limitName; - } - - bool DocumentSourceLimit::coalesce( - const intrusive_ptr<DocumentSource> &pNextSource) { - DocumentSourceLimit *pLimit = - dynamic_cast<DocumentSourceLimit *>(pNextSource.get()); +const char* DocumentSourceLimit::getSourceName() const { + return limitName; +} - /* if it's not another $limit, we can't coalesce */ - if (!pLimit) - return false; +bool DocumentSourceLimit::coalesce(const intrusive_ptr<DocumentSource>& pNextSource) { + DocumentSourceLimit* pLimit = dynamic_cast<DocumentSourceLimit*>(pNextSource.get()); - /* we need to limit by the minimum of the two limits */ - if (pLimit->limit < limit) - limit = pLimit->limit; - return true; - } + /* if it's not another $limit, we can't coalesce */ + if (!pLimit) + return false; - boost::optional<Document> DocumentSourceLimit::getNext() { - pExpCtx->checkForInterrupt(); + /* we need to limit by the minimum of the two limits */ + if (pLimit->limit < limit) + limit = pLimit->limit; + return true; +} - if (++count > limit) { - pSource->dispose(); - return boost::none; - } +boost::optional<Document> DocumentSourceLimit::getNext() { + pExpCtx->checkForInterrupt(); - return pSource->getNext(); + if (++count > limit) { + pSource->dispose(); + return boost::none; } - Value DocumentSourceLimit::serialize(bool explain) const { - return Value(DOC(getSourceName() << limit)); - } + return pSource->getNext(); +} - intrusive_ptr<DocumentSourceLimit> DocumentSourceLimit::create( - const intrusive_ptr<ExpressionContext> &pExpCtx, - long long limit) { - uassert(15958, "the limit must be positive", - limit > 0); - return new DocumentSourceLimit(pExpCtx, limit); - } +Value DocumentSourceLimit::serialize(bool explain) const { + return Value(DOC(getSourceName() << limit)); +} + +intrusive_ptr<DocumentSourceLimit> DocumentSourceLimit::create( + const intrusive_ptr<ExpressionContext>& pExpCtx, long long limit) { + uassert(15958, "the limit must be positive", limit > 0); + return new DocumentSourceLimit(pExpCtx, limit); +} - intrusive_ptr<DocumentSource> DocumentSourceLimit::createFromBson( - BSONElement elem, - const intrusive_ptr<ExpressionContext> &pExpCtx) { - uassert(15957, "the limit must be specified as a number", - elem.isNumber()); +intrusive_ptr<DocumentSource> DocumentSourceLimit::createFromBson( + BSONElement elem, const intrusive_ptr<ExpressionContext>& pExpCtx) { + uassert(15957, "the limit must be specified as a number", elem.isNumber()); - long long limit = elem.numberLong(); - return DocumentSourceLimit::create(pExpCtx, limit); - } + long long limit = elem.numberLong(); + return DocumentSourceLimit::create(pExpCtx, limit); +} } diff --git a/src/mongo/db/pipeline/document_source_match.cpp b/src/mongo/db/pipeline/document_source_match.cpp index b6c6e005446..50ef5e95eb5 100644 --- a/src/mongo/db/pipeline/document_source_match.cpp +++ b/src/mongo/db/pipeline/document_source_match.cpp @@ -39,121 +39,123 @@ namespace mongo { - using boost::intrusive_ptr; - using std::string; - using std::vector; +using boost::intrusive_ptr; +using std::string; +using std::vector; - const char DocumentSourceMatch::matchName[] = "$match"; +const char DocumentSourceMatch::matchName[] = "$match"; - const char *DocumentSourceMatch::getSourceName() const { - return matchName; - } - - Value DocumentSourceMatch::serialize(bool explain) const { - return Value(DOC(getSourceName() << Document(getQuery()))); - } +const char* DocumentSourceMatch::getSourceName() const { + return matchName; +} - intrusive_ptr<DocumentSource> DocumentSourceMatch::optimize() { - return getQuery().isEmpty() ? nullptr : this; - } +Value DocumentSourceMatch::serialize(bool explain) const { + return Value(DOC(getSourceName() << Document(getQuery()))); +} - boost::optional<Document> DocumentSourceMatch::getNext() { - pExpCtx->checkForInterrupt(); +intrusive_ptr<DocumentSource> DocumentSourceMatch::optimize() { + return getQuery().isEmpty() ? nullptr : this; +} - // The user facing error should have been generated earlier. - massert(17309, "Should never call getNext on a $match stage with $text clause", - !_isTextQuery); +boost::optional<Document> DocumentSourceMatch::getNext() { + pExpCtx->checkForInterrupt(); - while (boost::optional<Document> next = pSource->getNext()) { - // The matcher only takes BSON documents, so we have to make one. - if (matcher->matches(next->toBson())) - return next; - } + // The user facing error should have been generated earlier. + massert(17309, "Should never call getNext on a $match stage with $text clause", !_isTextQuery); - // Nothing matched - return boost::none; + while (boost::optional<Document> next = pSource->getNext()) { + // The matcher only takes BSON documents, so we have to make one. + if (matcher->matches(next->toBson())) + return next; } - bool DocumentSourceMatch::coalesce(const intrusive_ptr<DocumentSource>& nextSource) { - DocumentSourceMatch* otherMatch = dynamic_cast<DocumentSourceMatch*>(nextSource.get()); - if (!otherMatch) - return false; + // Nothing matched + return boost::none; +} - if (otherMatch->_isTextQuery) { - // Non-initial text queries are disallowed (enforced by setSource below). This prevents - // "hiding" a non-initial text query by combining it with another match. - return false; +bool DocumentSourceMatch::coalesce(const intrusive_ptr<DocumentSource>& nextSource) { + DocumentSourceMatch* otherMatch = dynamic_cast<DocumentSourceMatch*>(nextSource.get()); + if (!otherMatch) + return false; - // The rest of this block is for once we support non-initial text queries. + if (otherMatch->_isTextQuery) { + // Non-initial text queries are disallowed (enforced by setSource below). This prevents + // "hiding" a non-initial text query by combining it with another match. + return false; - if (_isTextQuery) { - // The score should only come from the last $match. We can't combine since then this - // match's score would impact otherMatch's. - return false; - } + // The rest of this block is for once we support non-initial text queries. - _isTextQuery = true; + if (_isTextQuery) { + // The score should only come from the last $match. We can't combine since then this + // match's score would impact otherMatch's. + return false; } - // Replace our matcher with the $and of ours and theirs. - matcher.reset(new Matcher(BSON("$and" << BSON_ARRAY(getQuery() - << otherMatch->getQuery())), - MatchExpressionParser::WhereCallback())); - - return true; + _isTextQuery = true; } + // Replace our matcher with the $and of ours and theirs. + matcher.reset(new Matcher(BSON("$and" << BSON_ARRAY(getQuery() << otherMatch->getQuery())), + MatchExpressionParser::WhereCallback())); + + return true; +} + namespace { - // This block contains the functions that make up the implementation of - // DocumentSourceMatch::redactSafePortion(). They will only be called after - // the Match expression has been successfully parsed so they can assume that - // input is well formed. +// This block contains the functions that make up the implementation of +// DocumentSourceMatch::redactSafePortion(). They will only be called after +// the Match expression has been successfully parsed so they can assume that +// input is well formed. - bool isAllDigits(StringData str) { - if (str.empty()) - return false; +bool isAllDigits(StringData str) { + if (str.empty()) + return false; - for (size_t i=0; i < str.size(); i++) { - if (!isdigit(str[i])) - return false; - } - return true; + for (size_t i = 0; i < str.size(); i++) { + if (!isdigit(str[i])) + return false; } + return true; +} - bool isFieldnameRedactSafe(StringData fieldName) { - // Can't have numeric elements in the dotted path since redacting elements from an array - // would change the indexes. +bool isFieldnameRedactSafe(StringData fieldName) { + // Can't have numeric elements in the dotted path since redacting elements from an array + // would change the indexes. - const size_t dotPos = fieldName.find('.'); - if (dotPos == string::npos) - return !isAllDigits(fieldName); + const size_t dotPos = fieldName.find('.'); + if (dotPos == string::npos) + return !isAllDigits(fieldName); - const StringData part = fieldName.substr(0, dotPos); - const StringData rest = fieldName.substr(dotPos + 1); - return !isAllDigits(part) && isFieldnameRedactSafe(rest); - } + const StringData part = fieldName.substr(0, dotPos); + const StringData rest = fieldName.substr(dotPos + 1); + return !isAllDigits(part) && isFieldnameRedactSafe(rest); +} - bool isTypeRedactSafeInComparison(BSONType type) { - if (type == Array) return false; - if (type == Object) return false; - if (type == jstNULL) return false; - if (type == Undefined) return false; // Currently a Matcher parse error. +bool isTypeRedactSafeInComparison(BSONType type) { + if (type == Array) + return false; + if (type == Object) + return false; + if (type == jstNULL) + return false; + if (type == Undefined) + return false; // Currently a Matcher parse error. - return true; - } + return true; +} - Document redactSafePortionTopLevel(BSONObj query); // mutually recursive with next function +Document redactSafePortionTopLevel(BSONObj query); // mutually recursive with next function - // Returns the redact-safe portion of an "inner" match expression. This is the layer like - // {$gt: 5} which does not include the field name. Returns an empty document if none of the - // expression can safely be promoted in front of a $redact. - Document redactSafePortionDollarOps(BSONObj expr) { - MutableDocument output; - BSONForEach(field, expr) { - if (field.fieldName()[0] != '$') - continue; +// Returns the redact-safe portion of an "inner" match expression. This is the layer like +// {$gt: 5} which does not include the field name. Returns an empty document if none of the +// expression can safely be promoted in front of a $redact. +Document redactSafePortionDollarOps(BSONObj expr) { + MutableDocument output; + BSONForEach(field, expr) { + if (field.fieldName()[0] != '$') + continue; - switch(BSONObj::MatchType(field.getGtLtOp(BSONObj::Equality))) { + switch (BSONObj::MatchType(field.getGtLtOp(BSONObj::Equality))) { // These are always ok case BSONObj::opTYPE: case BSONObj::opREGEX: @@ -218,7 +220,7 @@ namespace { } // These are never allowed - case BSONObj::Equality: // This actually means unknown + case BSONObj::Equality: // This actually means unknown case BSONObj::opMAX_DISTANCE: case BSONObj::opNEAR: case BSONObj::NE: @@ -228,55 +230,57 @@ namespace { case BSONObj::opWITHIN: case BSONObj::opGEO_INTERSECTS: continue; - } } - return output.freeze(); } + return output.freeze(); +} - // Returns the redact-safe portion of an "outer" match expression. This is the layer like - // {fieldName: {...}} which does include the field name. Returns an empty document if none of - // the expression can safely be promoted in front of a $redact. - Document redactSafePortionTopLevel(BSONObj query) { - MutableDocument output; - BSONForEach(field, query) { - if (field.fieldName()[0] == '$') { - if (str::equals(field.fieldName(), "$or")) { - // $or must be all-or-nothing (line $in). Can't include subset of elements. - vector<Value> okClauses; - BSONForEach(elem, field.Obj()) { - Document clause = redactSafePortionTopLevel(elem.Obj()); - if (clause.empty()) { - okClauses.clear(); - break; - } - okClauses.push_back(Value(clause)); - } - - if (!okClauses.empty()) - output["$or"] = Value(std::move(okClauses)); - } - else if (str::equals(field.fieldName(), "$and")) { - // $and can include subset of elements (like $all). - vector<Value> okClauses; - BSONForEach(elem, field.Obj()) { - Document clause = redactSafePortionTopLevel(elem.Obj()); - if (!clause.empty()) - okClauses.push_back(Value(clause)); +// Returns the redact-safe portion of an "outer" match expression. This is the layer like +// {fieldName: {...}} which does include the field name. Returns an empty document if none of +// the expression can safely be promoted in front of a $redact. +Document redactSafePortionTopLevel(BSONObj query) { + MutableDocument output; + BSONForEach(field, query) { + if (field.fieldName()[0] == '$') { + if (str::equals(field.fieldName(), "$or")) { + // $or must be all-or-nothing (line $in). Can't include subset of elements. + vector<Value> okClauses; + BSONForEach(elem, field.Obj()) { + Document clause = redactSafePortionTopLevel(elem.Obj()); + if (clause.empty()) { + okClauses.clear(); + break; } - if (!okClauses.empty()) - output["$and"] = Value(std::move(okClauses)); + okClauses.push_back(Value(clause)); } - continue; + if (!okClauses.empty()) + output["$or"] = Value(std::move(okClauses)); + } else if (str::equals(field.fieldName(), "$and")) { + // $and can include subset of elements (like $all). + vector<Value> okClauses; + BSONForEach(elem, field.Obj()) { + Document clause = redactSafePortionTopLevel(elem.Obj()); + if (!clause.empty()) + okClauses.push_back(Value(clause)); + } + if (!okClauses.empty()) + output["$and"] = Value(std::move(okClauses)); } - if (!isFieldnameRedactSafe(field.fieldNameStringData())) - continue; + continue; + } + + if (!isFieldnameRedactSafe(field.fieldNameStringData())) + continue; - switch (field.type()) { - case Array: continue; // exact matches on arrays are never allowed - case jstNULL: continue; // can't look for missing fields - case Undefined: continue; // Currently a Matcher parse error. + switch (field.type()) { + case Array: + continue; // exact matches on arrays are never allowed + case jstNULL: + continue; // can't look for missing fields + case Undefined: + continue; // Currently a Matcher parse error. case Object: { Document sub = redactSafePortionDollarOps(field.Obj()); @@ -290,69 +294,68 @@ namespace { default: output[field.fieldNameStringData()] = Value(field); break; - } } - return output.freeze(); } + return output.freeze(); +} } - BSONObj DocumentSourceMatch::redactSafePortion() const { - return redactSafePortionTopLevel(getQuery()).toBson(); - } +BSONObj DocumentSourceMatch::redactSafePortion() const { + return redactSafePortionTopLevel(getQuery()).toBson(); +} - void DocumentSourceMatch::setSource(DocumentSource* source) { - uassert(17313, "$match with $text is only allowed as the first pipeline stage", - !_isTextQuery); +void DocumentSourceMatch::setSource(DocumentSource* source) { + uassert(17313, "$match with $text is only allowed as the first pipeline stage", !_isTextQuery); - DocumentSource::setSource(source); - } + DocumentSource::setSource(source); +} - bool DocumentSourceMatch::isTextQuery(const BSONObj& query) { - BSONForEach(e, query) { - const StringData fieldName = e.fieldNameStringData(); - if (fieldName == StringData("$text", StringData::LiteralTag())) - return true; +bool DocumentSourceMatch::isTextQuery(const BSONObj& query) { + BSONForEach(e, query) { + const StringData fieldName = e.fieldNameStringData(); + if (fieldName == StringData("$text", StringData::LiteralTag())) + return true; - if (e.isABSONObj() && isTextQuery(e.Obj())) - return true; - } - return false; + if (e.isABSONObj() && isTextQuery(e.Obj())) + return true; } + return false; +} - static void uassertNoDisallowedClauses(BSONObj query) { - BSONForEach(e, query) { - // can't use the Matcher API because this would segfault the constructor - uassert(16395, "$where is not allowed inside of a $match aggregation expression", - ! str::equals(e.fieldName(), "$where")); - // geo breaks if it is not the first portion of the pipeline - uassert(16424, "$near is not allowed inside of a $match aggregation expression", - ! str::equals(e.fieldName(), "$near")); - uassert(16426, "$nearSphere is not allowed inside of a $match aggregation expression", - ! str::equals(e.fieldName(), "$nearSphere")); - if (e.isABSONObj()) - uassertNoDisallowedClauses(e.Obj()); - } +static void uassertNoDisallowedClauses(BSONObj query) { + BSONForEach(e, query) { + // can't use the Matcher API because this would segfault the constructor + uassert(16395, + "$where is not allowed inside of a $match aggregation expression", + !str::equals(e.fieldName(), "$where")); + // geo breaks if it is not the first portion of the pipeline + uassert(16424, + "$near is not allowed inside of a $match aggregation expression", + !str::equals(e.fieldName(), "$near")); + uassert(16426, + "$nearSphere is not allowed inside of a $match aggregation expression", + !str::equals(e.fieldName(), "$nearSphere")); + if (e.isABSONObj()) + uassertNoDisallowedClauses(e.Obj()); } +} - intrusive_ptr<DocumentSource> DocumentSourceMatch::createFromBson( - BSONElement elem, - const intrusive_ptr<ExpressionContext> &pExpCtx) { - uassert(15959, "the match filter must be an expression in an object", - elem.type() == Object); +intrusive_ptr<DocumentSource> DocumentSourceMatch::createFromBson( + BSONElement elem, const intrusive_ptr<ExpressionContext>& pExpCtx) { + uassert(15959, "the match filter must be an expression in an object", elem.type() == Object); - uassertNoDisallowedClauses(elem.Obj()); + uassertNoDisallowedClauses(elem.Obj()); - return new DocumentSourceMatch(elem.Obj(), pExpCtx); - } + return new DocumentSourceMatch(elem.Obj(), pExpCtx); +} - BSONObj DocumentSourceMatch::getQuery() const { - return *(matcher->getQuery()); - } +BSONObj DocumentSourceMatch::getQuery() const { + return *(matcher->getQuery()); +} - DocumentSourceMatch::DocumentSourceMatch(const BSONObj &query, - const intrusive_ptr<ExpressionContext> &pExpCtx) - : DocumentSource(pExpCtx), - matcher(new Matcher(query.getOwned(), MatchExpressionParser::WhereCallback())), - _isTextQuery(isTextQuery(query)) - {} +DocumentSourceMatch::DocumentSourceMatch(const BSONObj& query, + const intrusive_ptr<ExpressionContext>& pExpCtx) + : DocumentSource(pExpCtx), + matcher(new Matcher(query.getOwned(), MatchExpressionParser::WhereCallback())), + _isTextQuery(isTextQuery(query)) {} } diff --git a/src/mongo/db/pipeline/document_source_merge_cursors.cpp b/src/mongo/db/pipeline/document_source_merge_cursors.cpp index afe924d6a2f..d1e618f35bf 100644 --- a/src/mongo/db/pipeline/document_source_merge_cursors.cpp +++ b/src/mongo/db/pipeline/document_source_merge_cursors.cpp @@ -33,146 +33,137 @@ namespace mongo { - using boost::intrusive_ptr; - using std::make_pair; - using std::string; - using std::vector; +using boost::intrusive_ptr; +using std::make_pair; +using std::string; +using std::vector; - const char DocumentSourceMergeCursors::name[] = "$mergeCursors"; +const char DocumentSourceMergeCursors::name[] = "$mergeCursors"; - const char* DocumentSourceMergeCursors::getSourceName() const { - return name; - } +const char* DocumentSourceMergeCursors::getSourceName() const { + return name; +} - void DocumentSourceMergeCursors::setSource(DocumentSource *pSource) { - /* this doesn't take a source */ - verify(false); - } +void DocumentSourceMergeCursors::setSource(DocumentSource* pSource) { + /* this doesn't take a source */ + verify(false); +} - DocumentSourceMergeCursors::DocumentSourceMergeCursors( - const CursorIds& cursorIds, - const intrusive_ptr<ExpressionContext> &pExpCtx) - : DocumentSource(pExpCtx) - , _cursorIds(cursorIds) - , _unstarted(true) - {} - - intrusive_ptr<DocumentSource> DocumentSourceMergeCursors::create( - const CursorIds& cursorIds, - const intrusive_ptr<ExpressionContext> &pExpCtx) { - return new DocumentSourceMergeCursors(cursorIds, pExpCtx); - } +DocumentSourceMergeCursors::DocumentSourceMergeCursors( + const CursorIds& cursorIds, const intrusive_ptr<ExpressionContext>& pExpCtx) + : DocumentSource(pExpCtx), _cursorIds(cursorIds), _unstarted(true) {} - intrusive_ptr<DocumentSource> DocumentSourceMergeCursors::createFromBson( - BSONElement elem, - const intrusive_ptr<ExpressionContext>& pExpCtx) { +intrusive_ptr<DocumentSource> DocumentSourceMergeCursors::create( + const CursorIds& cursorIds, const intrusive_ptr<ExpressionContext>& pExpCtx) { + return new DocumentSourceMergeCursors(cursorIds, pExpCtx); +} - massert(17026, string("Expected an Array, but got a ") + typeName(elem.type()), - elem.type() == Array); +intrusive_ptr<DocumentSource> DocumentSourceMergeCursors::createFromBson( + BSONElement elem, const intrusive_ptr<ExpressionContext>& pExpCtx) { + massert(17026, + string("Expected an Array, but got a ") + typeName(elem.type()), + elem.type() == Array); + + CursorIds cursorIds; + BSONObj array = elem.embeddedObject(); + BSONForEach(cursor, array) { + massert(17027, + string("Expected an Object, but got a ") + typeName(cursor.type()), + cursor.type() == Object); + + cursorIds.push_back( + make_pair(ConnectionString(HostAndPort(cursor["host"].String())), cursor["id"].Long())); + } - CursorIds cursorIds; - BSONObj array = elem.embeddedObject(); - BSONForEach(cursor, array) { - massert(17027, string("Expected an Object, but got a ") + typeName(cursor.type()), - cursor.type() == Object); + return new DocumentSourceMergeCursors(cursorIds, pExpCtx); +} - cursorIds.push_back(make_pair(ConnectionString(HostAndPort(cursor["host"].String())), - cursor["id"].Long())); - } - - return new DocumentSourceMergeCursors(cursorIds, pExpCtx); +Value DocumentSourceMergeCursors::serialize(bool explain) const { + vector<Value> cursors; + for (size_t i = 0; i < _cursorIds.size(); i++) { + cursors.push_back(Value( + DOC("host" << Value(_cursorIds[i].first.toString()) << "id" << _cursorIds[i].second))); } + return Value(DOC(getSourceName() << Value(cursors))); +} - Value DocumentSourceMergeCursors::serialize(bool explain) const { - vector<Value> cursors; - for (size_t i = 0; i < _cursorIds.size(); i++) { - cursors.push_back(Value(DOC("host" << Value(_cursorIds[i].first.toString()) - << "id" << _cursorIds[i].second))); - } - return Value(DOC(getSourceName() << Value(cursors))); +DocumentSourceMergeCursors::CursorAndConnection::CursorAndConnection(ConnectionString host, + NamespaceString ns, + CursorId id) + : connection(host), cursor(connection.get(), ns, id, 0, 0) {} + +vector<DBClientCursor*> DocumentSourceMergeCursors::getCursors() { + verify(_unstarted); + start(); + vector<DBClientCursor*> out; + for (Cursors::const_iterator it = _cursors.begin(); it != _cursors.end(); ++it) { + out.push_back(&((*it)->cursor)); } - DocumentSourceMergeCursors::CursorAndConnection::CursorAndConnection( - ConnectionString host, - NamespaceString ns, - CursorId id) - : connection(host) - , cursor(connection.get(), ns, id, 0, 0) - {} + return out; +} - vector<DBClientCursor*> DocumentSourceMergeCursors::getCursors() { - verify(_unstarted); - start(); - vector<DBClientCursor*> out; - for (Cursors::const_iterator it = _cursors.begin(); it !=_cursors.end(); ++it) { - out.push_back(&((*it)->cursor)); - } +void DocumentSourceMergeCursors::start() { + _unstarted = false; - return out; + // open each cursor and send message asking for a batch + for (CursorIds::const_iterator it = _cursorIds.begin(); it != _cursorIds.end(); ++it) { + _cursors.push_back( + std::make_shared<CursorAndConnection>(it->first, pExpCtx->ns, it->second)); + verify(_cursors.back()->connection->lazySupported()); + _cursors.back()->cursor.initLazy(); // shouldn't block } - void DocumentSourceMergeCursors::start() { - _unstarted = false; - - // open each cursor and send message asking for a batch - for (CursorIds::const_iterator it = _cursorIds.begin(); it !=_cursorIds.end(); ++it) { - _cursors.push_back(std::make_shared<CursorAndConnection>( - it->first, pExpCtx->ns, it->second)); - verify(_cursors.back()->connection->lazySupported()); - _cursors.back()->cursor.initLazy(); // shouldn't block - } - - // wait for all cursors to return a batch - // TODO need a way to keep cursors alive if some take longer than 10 minutes. - for (Cursors::const_iterator it = _cursors.begin(); it !=_cursors.end(); ++it) { - bool retry = false; - bool ok = (*it)->cursor.initLazyFinish(retry); // blocks here for first batch - - uassert(17028, - "error reading response from " + _cursors.back()->connection->toString(), - ok); - verify(!retry); - } + // wait for all cursors to return a batch + // TODO need a way to keep cursors alive if some take longer than 10 minutes. + for (Cursors::const_iterator it = _cursors.begin(); it != _cursors.end(); ++it) { + bool retry = false; + bool ok = (*it)->cursor.initLazyFinish(retry); // blocks here for first batch - _currentCursor = _cursors.begin(); + uassert( + 17028, "error reading response from " + _cursors.back()->connection->toString(), ok); + verify(!retry); } - Document DocumentSourceMergeCursors::nextSafeFrom(DBClientCursor* cursor) { - const BSONObj next = cursor->next(); - if (next.hasField("$err")) { - const int code = next.hasField("code") ? next["code"].numberInt() : 17029; - uasserted(code, str::stream() << "Received error in response from " - << cursor->originalHost() - << ": " << next); - } - return Document::fromBsonWithMetaData(next); + _currentCursor = _cursors.begin(); +} + +Document DocumentSourceMergeCursors::nextSafeFrom(DBClientCursor* cursor) { + const BSONObj next = cursor->next(); + if (next.hasField("$err")) { + const int code = next.hasField("code") ? next["code"].numberInt() : 17029; + uasserted(code, + str::stream() << "Received error in response from " << cursor->originalHost() + << ": " << next); } + return Document::fromBsonWithMetaData(next); +} - boost::optional<Document> DocumentSourceMergeCursors::getNext() { - if (_unstarted) - start(); +boost::optional<Document> DocumentSourceMergeCursors::getNext() { + if (_unstarted) + start(); - // purge eof cursors and release their connections - while (!_cursors.empty() && !(*_currentCursor)->cursor.more()) { - (*_currentCursor)->connection.done(); - _cursors.erase(_currentCursor); - _currentCursor = _cursors.begin(); - } + // purge eof cursors and release their connections + while (!_cursors.empty() && !(*_currentCursor)->cursor.more()) { + (*_currentCursor)->connection.done(); + _cursors.erase(_currentCursor); + _currentCursor = _cursors.begin(); + } - if (_cursors.empty()) - return boost::none; + if (_cursors.empty()) + return boost::none; - const Document next = nextSafeFrom(&((*_currentCursor)->cursor)); + const Document next = nextSafeFrom(&((*_currentCursor)->cursor)); - // advance _currentCursor, wrapping if needed - if (++_currentCursor == _cursors.end()) - _currentCursor = _cursors.begin(); + // advance _currentCursor, wrapping if needed + if (++_currentCursor == _cursors.end()) + _currentCursor = _cursors.begin(); - return next; - } + return next; +} - void DocumentSourceMergeCursors::dispose() { - _cursors.clear(); - _currentCursor = _cursors.end(); - } +void DocumentSourceMergeCursors::dispose() { + _cursors.clear(); + _currentCursor = _cursors.end(); +} } diff --git a/src/mongo/db/pipeline/document_source_out.cpp b/src/mongo/db/pipeline/document_source_out.cpp index bdb8d1d7055..858df389be8 100644 --- a/src/mongo/db/pipeline/document_source_out.cpp +++ b/src/mongo/db/pipeline/document_source_out.cpp @@ -32,171 +32,166 @@ namespace mongo { - using boost::intrusive_ptr; - using std::vector; - - const char DocumentSourceOut::outName[] = "$out"; - - DocumentSourceOut::~DocumentSourceOut() { - DESTRUCTOR_GUARD( - // Make sure we drop the temp collection if anything goes wrong. Errors are ignored - // here because nothing can be done about them. Additionally, if this fails and the - // collection is left behind, it will be cleaned up next time the server is started. - if (_mongod && _tempNs.size()) - _mongod->directClient()->dropCollection(_tempNs.ns()); - ) - } +using boost::intrusive_ptr; +using std::vector; - const char *DocumentSourceOut::getSourceName() const { - return outName; - } +const char DocumentSourceOut::outName[] = "$out"; - static AtomicUInt32 aggOutCounter; - void DocumentSourceOut::prepTempCollection() { - verify(_mongod); - verify(_tempNs.size() == 0); - - DBClientBase* conn = _mongod->directClient(); - - // Fail early by checking before we do any work. - uassert(17017, str::stream() << "namespace '" << _outputNs.ns() - << "' is sharded so it can't be used for $out'", - !_mongod->isSharded(_outputNs)); - - // cannot $out to capped collection - uassert(17152, str::stream() << "namespace '" << _outputNs.ns() - << "' is capped so it can't be used for $out", - !_mongod->isCapped(_outputNs)); - - _tempNs = NamespaceString(StringData(str::stream() << _outputNs.db() - << ".tmp.agg_out." - << aggOutCounter.addAndFetch(1) - )); - - // Create output collection, copying options from existing collection if any. - { - const auto infos = conn->getCollectionInfos(_outputNs.db().toString(), - BSON("name" << _outputNs.coll())); - const auto options = infos.empty() ? BSONObj() - : infos.front().getObjectField("options"); - - BSONObjBuilder cmd; - cmd << "create" << _tempNs.coll(); - cmd << "temp" << true; - cmd.appendElementsUnique(options); - - BSONObj info; - bool ok = conn->runCommand(_outputNs.db().toString(), cmd.done(), info); - uassert(16994, str::stream() << "failed to create temporary $out collection '" - << _tempNs.ns() << "': " << info.toString(), - ok); - } +DocumentSourceOut::~DocumentSourceOut() { + DESTRUCTOR_GUARD( + // Make sure we drop the temp collection if anything goes wrong. Errors are ignored + // here because nothing can be done about them. Additionally, if this fails and the + // collection is left behind, it will be cleaned up next time the server is started. + if (_mongod && _tempNs.size()) _mongod->directClient()->dropCollection(_tempNs.ns());) +} - // copy indexes on _outputNs to _tempNs - const std::list<BSONObj> indexes = conn->getIndexSpecs(_outputNs); - for (std::list<BSONObj>::const_iterator it = indexes.begin(); it != indexes.end(); ++it) { - MutableDocument index((Document(*it))); - index.remove("_id"); // indexes shouldn't have _ids but some existing ones do - index["ns"] = Value(_tempNs.ns()); - - BSONObj indexBson = index.freeze().toBson(); - conn->insert(_tempNs.getSystemIndexesCollection(), indexBson); - BSONObj err = conn->getLastErrorDetailed(); - uassert(16995, str::stream() << "copying index for $out failed." - << " index: " << indexBson - << " error: " << err, - DBClientWithCommands::getLastErrorString(err).empty()); - } - } +const char* DocumentSourceOut::getSourceName() const { + return outName; +} - void DocumentSourceOut::spill(const vector<BSONObj>& toInsert) { - BSONObj err = _mongod->insert(_tempNs, toInsert); - uassert(16996, str::stream() << "insert for $out failed: " << err, - DBClientWithCommands::getLastErrorString(err).empty()); - } +static AtomicUInt32 aggOutCounter; +void DocumentSourceOut::prepTempCollection() { + verify(_mongod); + verify(_tempNs.size() == 0); - boost::optional<Document> DocumentSourceOut::getNext() { - pExpCtx->checkForInterrupt(); - - // make sure we only write out once - if (_done) - return boost::none; - _done = true; - - verify(_mongod); - DBClientBase* conn = _mongod->directClient(); - - prepTempCollection(); - verify(_tempNs.size() != 0); - - vector<BSONObj> bufferedObjects; - int bufferedBytes = 0; - while (boost::optional<Document> next = pSource->getNext()) { - BSONObj toInsert = next->toBson(); - bufferedBytes += toInsert.objsize(); - if (!bufferedObjects.empty() && bufferedBytes > BSONObjMaxUserSize) { - spill(bufferedObjects); - bufferedObjects.clear(); - bufferedBytes = toInsert.objsize(); - } - bufferedObjects.push_back(toInsert); - } + DBClientBase* conn = _mongod->directClient(); - if (!bufferedObjects.empty()) - spill(bufferedObjects); + // Fail early by checking before we do any work. + uassert(17017, + str::stream() << "namespace '" << _outputNs.ns() + << "' is sharded so it can't be used for $out'", + !_mongod->isSharded(_outputNs)); + + // cannot $out to capped collection + uassert(17152, + str::stream() << "namespace '" << _outputNs.ns() + << "' is capped so it can't be used for $out", + !_mongod->isCapped(_outputNs)); + + _tempNs = NamespaceString(StringData(str::stream() << _outputNs.db() << ".tmp.agg_out." + << aggOutCounter.addAndFetch(1))); - // Checking again to make sure we didn't become sharded while running. - uassert(17018, str::stream() << "namespace '" << _outputNs.ns() - << "' became sharded so it can't be used for $out'", - !_mongod->isSharded(_outputNs)); + // Create output collection, copying options from existing collection if any. + { + const auto infos = + conn->getCollectionInfos(_outputNs.db().toString(), BSON("name" << _outputNs.coll())); + const auto options = infos.empty() ? BSONObj() : infos.front().getObjectField("options"); + + BSONObjBuilder cmd; + cmd << "create" << _tempNs.coll(); + cmd << "temp" << true; + cmd.appendElementsUnique(options); - BSONObj rename = BSON("renameCollection" << _tempNs.ns() - << "to" << _outputNs.ns() - << "dropTarget" << true - ); BSONObj info; - bool ok = conn->runCommand("admin", rename, info); - uassert(16997, str::stream() << "renameCollection for $out failed: " << info, + bool ok = conn->runCommand(_outputNs.db().toString(), cmd.done(), info); + uassert(16994, + str::stream() << "failed to create temporary $out collection '" << _tempNs.ns() + << "': " << info.toString(), ok); + } + + // copy indexes on _outputNs to _tempNs + const std::list<BSONObj> indexes = conn->getIndexSpecs(_outputNs); + for (std::list<BSONObj>::const_iterator it = indexes.begin(); it != indexes.end(); ++it) { + MutableDocument index((Document(*it))); + index.remove("_id"); // indexes shouldn't have _ids but some existing ones do + index["ns"] = Value(_tempNs.ns()); + + BSONObj indexBson = index.freeze().toBson(); + conn->insert(_tempNs.getSystemIndexesCollection(), indexBson); + BSONObj err = conn->getLastErrorDetailed(); + uassert(16995, + str::stream() << "copying index for $out failed." + << " index: " << indexBson << " error: " << err, + DBClientWithCommands::getLastErrorString(err).empty()); + } +} - // We don't need to drop the temp collection in our destructor if the rename succeeded. - _tempNs = NamespaceString(""); +void DocumentSourceOut::spill(const vector<BSONObj>& toInsert) { + BSONObj err = _mongod->insert(_tempNs, toInsert); + uassert(16996, + str::stream() << "insert for $out failed: " << err, + DBClientWithCommands::getLastErrorString(err).empty()); +} + +boost::optional<Document> DocumentSourceOut::getNext() { + pExpCtx->checkForInterrupt(); - // This "DocumentSource" doesn't produce output documents. This can change in the future - // if we support using $out in "tee" mode. + // make sure we only write out once + if (_done) return boost::none; - } + _done = true; - DocumentSourceOut::DocumentSourceOut(const NamespaceString& outputNs, - const intrusive_ptr<ExpressionContext>& pExpCtx) - : DocumentSource(pExpCtx) - , _done(false) - , _tempNs("") // filled in by prepTempCollection - , _outputNs(outputNs) - {} - - intrusive_ptr<DocumentSource> DocumentSourceOut::createFromBson( - BSONElement elem, - const intrusive_ptr<ExpressionContext> &pExpCtx) { - uassert(16990, str::stream() << "$out only supports a string argument, not " - << typeName(elem.type()), - elem.type() == String); - - NamespaceString outputNs(pExpCtx->ns.db().toString() + '.' + elem.str()); - uassert(17385, "Can't $out to special collection: " + elem.str(), - !outputNs.isSpecial()); - return new DocumentSourceOut(outputNs, pExpCtx); - } + verify(_mongod); + DBClientBase* conn = _mongod->directClient(); - Value DocumentSourceOut::serialize(bool explain) const { - massert(17000, "$out shouldn't have different db than input", - _outputNs.db() == pExpCtx->ns.db()); + prepTempCollection(); + verify(_tempNs.size() != 0); - return Value(DOC(getSourceName() << _outputNs.coll())); + vector<BSONObj> bufferedObjects; + int bufferedBytes = 0; + while (boost::optional<Document> next = pSource->getNext()) { + BSONObj toInsert = next->toBson(); + bufferedBytes += toInsert.objsize(); + if (!bufferedObjects.empty() && bufferedBytes > BSONObjMaxUserSize) { + spill(bufferedObjects); + bufferedObjects.clear(); + bufferedBytes = toInsert.objsize(); + } + bufferedObjects.push_back(toInsert); } - DocumentSource::GetDepsReturn DocumentSourceOut::getDependencies(DepsTracker* deps) const { - deps->needWholeDocument = true; - return EXHAUSTIVE_ALL; - } + if (!bufferedObjects.empty()) + spill(bufferedObjects); + + // Checking again to make sure we didn't become sharded while running. + uassert(17018, + str::stream() << "namespace '" << _outputNs.ns() + << "' became sharded so it can't be used for $out'", + !_mongod->isSharded(_outputNs)); + + BSONObj rename = + BSON("renameCollection" << _tempNs.ns() << "to" << _outputNs.ns() << "dropTarget" << true); + BSONObj info; + bool ok = conn->runCommand("admin", rename, info); + uassert(16997, str::stream() << "renameCollection for $out failed: " << info, ok); + + // We don't need to drop the temp collection in our destructor if the rename succeeded. + _tempNs = NamespaceString(""); + + // This "DocumentSource" doesn't produce output documents. This can change in the future + // if we support using $out in "tee" mode. + return boost::none; +} + +DocumentSourceOut::DocumentSourceOut(const NamespaceString& outputNs, + const intrusive_ptr<ExpressionContext>& pExpCtx) + : DocumentSource(pExpCtx), + _done(false), + _tempNs("") // filled in by prepTempCollection + , + _outputNs(outputNs) {} + +intrusive_ptr<DocumentSource> DocumentSourceOut::createFromBson( + BSONElement elem, const intrusive_ptr<ExpressionContext>& pExpCtx) { + uassert(16990, + str::stream() << "$out only supports a string argument, not " << typeName(elem.type()), + elem.type() == String); + + NamespaceString outputNs(pExpCtx->ns.db().toString() + '.' + elem.str()); + uassert(17385, "Can't $out to special collection: " + elem.str(), !outputNs.isSpecial()); + return new DocumentSourceOut(outputNs, pExpCtx); +} + +Value DocumentSourceOut::serialize(bool explain) const { + massert( + 17000, "$out shouldn't have different db than input", _outputNs.db() == pExpCtx->ns.db()); + + return Value(DOC(getSourceName() << _outputNs.coll())); +} + +DocumentSource::GetDepsReturn DocumentSourceOut::getDependencies(DepsTracker* deps) const { + deps->needWholeDocument = true; + return EXHAUSTIVE_ALL; +} } diff --git a/src/mongo/db/pipeline/document_source_project.cpp b/src/mongo/db/pipeline/document_source_project.cpp index c652b177fa4..c0bc9aa68a2 100644 --- a/src/mongo/db/pipeline/document_source_project.cpp +++ b/src/mongo/db/pipeline/document_source_project.cpp @@ -38,91 +38,85 @@ namespace mongo { - using boost::intrusive_ptr; - using std::string; - using std::vector; - - const char DocumentSourceProject::projectName[] = "$project"; - - DocumentSourceProject::DocumentSourceProject(const intrusive_ptr<ExpressionContext>& pExpCtx, - const intrusive_ptr<ExpressionObject>& exprObj) - : DocumentSource(pExpCtx) - , pEO(exprObj) - { } - - const char *DocumentSourceProject::getSourceName() const { - return projectName; - } - - boost::optional<Document> DocumentSourceProject::getNext() { - pExpCtx->checkForInterrupt(); - - boost::optional<Document> input = pSource->getNext(); - if (!input) - return boost::none; - - /* create the result document */ - const size_t sizeHint = pEO->getSizeHint(); - MutableDocument out (sizeHint); - out.copyMetaDataFrom(*input); - - /* - Use the ExpressionObject to create the base result. - - If we're excluding fields at the top level, leave out the _id if - it is found, because we took care of it above. - */ - _variables->setRoot(*input); - pEO->addToDocument(out, *input, _variables.get()); - _variables->clearRoot(); - - return out.freeze(); - } - - intrusive_ptr<DocumentSource> DocumentSourceProject::optimize() { - intrusive_ptr<Expression> pE(pEO->optimize()); - pEO = boost::dynamic_pointer_cast<ExpressionObject>(pE); - return this; - } - - Value DocumentSourceProject::serialize(bool explain) const { - return Value(DOC(getSourceName() << pEO->serialize(explain))); - } - - intrusive_ptr<DocumentSource> DocumentSourceProject::createFromBson( - BSONElement elem, - const intrusive_ptr<ExpressionContext> &pExpCtx) { - - /* validate */ - uassert(15969, str::stream() << projectName << - " specification must be an object", - elem.type() == Object); - - Expression::ObjectCtx objectCtx( - Expression::ObjectCtx::DOCUMENT_OK - | Expression::ObjectCtx::TOP_LEVEL - | Expression::ObjectCtx::INCLUSION_OK - ); - - VariablesIdGenerator idGenerator; - VariablesParseState vps(&idGenerator); - intrusive_ptr<Expression> parsed = Expression::parseObject(elem.Obj(), &objectCtx, vps); - ExpressionObject* exprObj = dynamic_cast<ExpressionObject*>(parsed.get()); - massert(16402, "parseObject() returned wrong type of Expression", exprObj); - uassert(16403, "$projection requires at least one output field", exprObj->getFieldCount()); - - intrusive_ptr<DocumentSourceProject> pProject(new DocumentSourceProject(pExpCtx, exprObj)); - pProject->_variables.reset(new Variables(idGenerator.getIdCount())); - - BSONObj projectObj = elem.Obj(); - pProject->_raw = projectObj.getOwned(); - - return pProject; - } - - DocumentSource::GetDepsReturn DocumentSourceProject::getDependencies(DepsTracker* deps) const { - vector<string> path; // empty == top-level - pEO->addDependencies(deps, &path); - return EXHAUSTIVE_FIELDS; - } +using boost::intrusive_ptr; +using std::string; +using std::vector; + +const char DocumentSourceProject::projectName[] = "$project"; + +DocumentSourceProject::DocumentSourceProject(const intrusive_ptr<ExpressionContext>& pExpCtx, + const intrusive_ptr<ExpressionObject>& exprObj) + : DocumentSource(pExpCtx), pEO(exprObj) {} + +const char* DocumentSourceProject::getSourceName() const { + return projectName; +} + +boost::optional<Document> DocumentSourceProject::getNext() { + pExpCtx->checkForInterrupt(); + + boost::optional<Document> input = pSource->getNext(); + if (!input) + return boost::none; + + /* create the result document */ + const size_t sizeHint = pEO->getSizeHint(); + MutableDocument out(sizeHint); + out.copyMetaDataFrom(*input); + + /* + Use the ExpressionObject to create the base result. + + If we're excluding fields at the top level, leave out the _id if + it is found, because we took care of it above. + */ + _variables->setRoot(*input); + pEO->addToDocument(out, *input, _variables.get()); + _variables->clearRoot(); + + return out.freeze(); +} + +intrusive_ptr<DocumentSource> DocumentSourceProject::optimize() { + intrusive_ptr<Expression> pE(pEO->optimize()); + pEO = boost::dynamic_pointer_cast<ExpressionObject>(pE); + return this; +} + +Value DocumentSourceProject::serialize(bool explain) const { + return Value(DOC(getSourceName() << pEO->serialize(explain))); +} + +intrusive_ptr<DocumentSource> DocumentSourceProject::createFromBson( + BSONElement elem, const intrusive_ptr<ExpressionContext>& pExpCtx) { + /* validate */ + uassert(15969, + str::stream() << projectName << " specification must be an object", + elem.type() == Object); + + Expression::ObjectCtx objectCtx(Expression::ObjectCtx::DOCUMENT_OK | + Expression::ObjectCtx::TOP_LEVEL | + Expression::ObjectCtx::INCLUSION_OK); + + VariablesIdGenerator idGenerator; + VariablesParseState vps(&idGenerator); + intrusive_ptr<Expression> parsed = Expression::parseObject(elem.Obj(), &objectCtx, vps); + ExpressionObject* exprObj = dynamic_cast<ExpressionObject*>(parsed.get()); + massert(16402, "parseObject() returned wrong type of Expression", exprObj); + uassert(16403, "$projection requires at least one output field", exprObj->getFieldCount()); + + intrusive_ptr<DocumentSourceProject> pProject(new DocumentSourceProject(pExpCtx, exprObj)); + pProject->_variables.reset(new Variables(idGenerator.getIdCount())); + + BSONObj projectObj = elem.Obj(); + pProject->_raw = projectObj.getOwned(); + + return pProject; +} + +DocumentSource::GetDepsReturn DocumentSourceProject::getDependencies(DepsTracker* deps) const { + vector<string> path; // empty == top-level + pEO->addDependencies(deps, &path); + return EXHAUSTIVE_FIELDS; +} } diff --git a/src/mongo/db/pipeline/document_source_redact.cpp b/src/mongo/db/pipeline/document_source_redact.cpp index 98916a0afe0..860dd3a8f73 100644 --- a/src/mongo/db/pipeline/document_source_redact.cpp +++ b/src/mongo/db/pipeline/document_source_redact.cpp @@ -39,135 +39,124 @@ namespace mongo { - using boost::intrusive_ptr; - using std::vector; +using boost::intrusive_ptr; +using std::vector; - const char DocumentSourceRedact::redactName[] = "$redact"; +const char DocumentSourceRedact::redactName[] = "$redact"; - DocumentSourceRedact::DocumentSourceRedact(const intrusive_ptr<ExpressionContext>& expCtx, - const intrusive_ptr<Expression>& expression) - : DocumentSource(expCtx) - , _expression(expression) - { } +DocumentSourceRedact::DocumentSourceRedact(const intrusive_ptr<ExpressionContext>& expCtx, + const intrusive_ptr<Expression>& expression) + : DocumentSource(expCtx), _expression(expression) {} - const char *DocumentSourceRedact::getSourceName() const { - return redactName; - } +const char* DocumentSourceRedact::getSourceName() const { + return redactName; +} - static const Value descendVal = Value("descend"); - static const Value pruneVal = Value("prune"); - static const Value keepVal = Value("keep"); +static const Value descendVal = Value("descend"); +static const Value pruneVal = Value("prune"); +static const Value keepVal = Value("keep"); - boost::optional<Document> DocumentSourceRedact::getNext() { - while (boost::optional<Document> in = pSource->getNext()) { - _variables->setRoot(*in); - _variables->setValue(_currentId, Value(*in)); - if (boost::optional<Document> result = redactObject()) { - return result; - } +boost::optional<Document> DocumentSourceRedact::getNext() { + while (boost::optional<Document> in = pSource->getNext()) { + _variables->setRoot(*in); + _variables->setValue(_currentId, Value(*in)); + if (boost::optional<Document> result = redactObject()) { + return result; } - - return boost::none; } - Value DocumentSourceRedact::redactValue(const Value& in) { - const BSONType valueType = in.getType(); - if (valueType == Object) { - _variables->setValue(_currentId, in); - const boost::optional<Document> result = redactObject(); - if (result) { - return Value(*result); - } - else { - return Value(); - } + return boost::none; +} + +Value DocumentSourceRedact::redactValue(const Value& in) { + const BSONType valueType = in.getType(); + if (valueType == Object) { + _variables->setValue(_currentId, in); + const boost::optional<Document> result = redactObject(); + if (result) { + return Value(*result); + } else { + return Value(); } - else if (valueType == Array) { - // TODO dont copy if possible - vector<Value> newArr; - const vector<Value>& arr = in.getArray(); - for (size_t i = 0; i < arr.size(); i++) { - if (arr[i].getType() == Object || arr[i].getType() == Array) { - const Value toAdd = redactValue(arr[i]) ; - if (!toAdd.missing()) { - newArr.push_back(toAdd); - } - } - else { - newArr.push_back(arr[i]); + } else if (valueType == Array) { + // TODO dont copy if possible + vector<Value> newArr; + const vector<Value>& arr = in.getArray(); + for (size_t i = 0; i < arr.size(); i++) { + if (arr[i].getType() == Object || arr[i].getType() == Array) { + const Value toAdd = redactValue(arr[i]); + if (!toAdd.missing()) { + newArr.push_back(toAdd); } + } else { + newArr.push_back(arr[i]); } - return Value(std::move(newArr)); - } - else { - return in; } + return Value(std::move(newArr)); + } else { + return in; } +} - boost::optional<Document> DocumentSourceRedact::redactObject() { - const Value expressionResult = _expression->evaluate(_variables.get()); - - if (expressionResult == keepVal) { - return _variables->getDocument(_currentId); - } - else if (expressionResult == pruneVal) { - return boost::optional<Document>(); - } - else if (expressionResult == descendVal) { - const Document in = _variables->getDocument(_currentId); - MutableDocument out; - out.copyMetaDataFrom(in); - FieldIterator fields(in); - while (fields.more()) { - const Document::FieldPair field(fields.next()); - - // This changes CURRENT so don't read from _variables after this - const Value val = redactValue(field.second); - if (!val.missing()) { - out.addField(field.first, val); - } +boost::optional<Document> DocumentSourceRedact::redactObject() { + const Value expressionResult = _expression->evaluate(_variables.get()); + + if (expressionResult == keepVal) { + return _variables->getDocument(_currentId); + } else if (expressionResult == pruneVal) { + return boost::optional<Document>(); + } else if (expressionResult == descendVal) { + const Document in = _variables->getDocument(_currentId); + MutableDocument out; + out.copyMetaDataFrom(in); + FieldIterator fields(in); + while (fields.more()) { + const Document::FieldPair field(fields.next()); + + // This changes CURRENT so don't read from _variables after this + const Value val = redactValue(field.second); + if (!val.missing()) { + out.addField(field.first, val); } - return out.freeze(); - } - else { - uasserted(17053, str::stream() << "$redact's expression should not return anything " - << "aside from the variables $$KEEP, $$DESCEND, and " - << "$$PRUNE, but returned " - << expressionResult.toString()); } + return out.freeze(); + } else { + uasserted(17053, + str::stream() << "$redact's expression should not return anything " + << "aside from the variables $$KEEP, $$DESCEND, and " + << "$$PRUNE, but returned " << expressionResult.toString()); } +} - intrusive_ptr<DocumentSource> DocumentSourceRedact::optimize() { - _expression = _expression->optimize(); - return this; - } +intrusive_ptr<DocumentSource> DocumentSourceRedact::optimize() { + _expression = _expression->optimize(); + return this; +} - Value DocumentSourceRedact::serialize(bool explain) const { - return Value(DOC(getSourceName() << _expression.get()->serialize(explain))); - } +Value DocumentSourceRedact::serialize(bool explain) const { + return Value(DOC(getSourceName() << _expression.get()->serialize(explain))); +} - intrusive_ptr<DocumentSource> DocumentSourceRedact::createFromBson( - BSONElement elem, - const intrusive_ptr<ExpressionContext>& expCtx) { - - VariablesIdGenerator idGenerator; - VariablesParseState vps(&idGenerator); - Variables::Id currentId = vps.defineVariable("CURRENT"); // will differ from ROOT - Variables::Id decendId = vps.defineVariable("DESCEND"); - Variables::Id pruneId = vps.defineVariable("PRUNE"); - Variables::Id keepId = vps.defineVariable("KEEP"); - intrusive_ptr<Expression> expression = Expression::parseOperand(elem, vps); - intrusive_ptr<DocumentSourceRedact> source = new DocumentSourceRedact(expCtx, expression); - - // TODO figure out how much of this belongs in constructor and how much here. - // Set up variables. Never need to reset DESCEND, PRUNE, or KEEP. - source->_currentId = currentId; - source->_variables.reset(new Variables(idGenerator.getIdCount())); - source->_variables->setValue(decendId, descendVal); - source->_variables->setValue(pruneId, pruneVal); - source->_variables->setValue(keepId, keepVal); - - - return source; - } +intrusive_ptr<DocumentSource> DocumentSourceRedact::createFromBson( + BSONElement elem, const intrusive_ptr<ExpressionContext>& expCtx) { + VariablesIdGenerator idGenerator; + VariablesParseState vps(&idGenerator); + Variables::Id currentId = vps.defineVariable("CURRENT"); // will differ from ROOT + Variables::Id decendId = vps.defineVariable("DESCEND"); + Variables::Id pruneId = vps.defineVariable("PRUNE"); + Variables::Id keepId = vps.defineVariable("KEEP"); + intrusive_ptr<Expression> expression = Expression::parseOperand(elem, vps); + intrusive_ptr<DocumentSourceRedact> source = new DocumentSourceRedact(expCtx, expression); + + // TODO figure out how much of this belongs in constructor and how much here. + // Set up variables. Never need to reset DESCEND, PRUNE, or KEEP. + source->_currentId = currentId; + source->_variables.reset(new Variables(idGenerator.getIdCount())); + source->_variables->setValue(decendId, descendVal); + source->_variables->setValue(pruneId, pruneVal); + source->_variables->setValue(keepId, keepVal); + + + return source; +} } diff --git a/src/mongo/db/pipeline/document_source_skip.cpp b/src/mongo/db/pipeline/document_source_skip.cpp index 4899565fe99..3a1430b3b98 100644 --- a/src/mongo/db/pipeline/document_source_skip.cpp +++ b/src/mongo/db/pipeline/document_source_skip.cpp @@ -37,78 +37,72 @@ namespace mongo { - using boost::intrusive_ptr; +using boost::intrusive_ptr; - const char DocumentSourceSkip::skipName[] = "$skip"; +const char DocumentSourceSkip::skipName[] = "$skip"; - DocumentSourceSkip::DocumentSourceSkip(const intrusive_ptr<ExpressionContext> &pExpCtx): - DocumentSource(pExpCtx), - _skip(0), - _needToSkip(true) { - } +DocumentSourceSkip::DocumentSourceSkip(const intrusive_ptr<ExpressionContext>& pExpCtx) + : DocumentSource(pExpCtx), _skip(0), _needToSkip(true) {} - const char *DocumentSourceSkip::getSourceName() const { - return skipName; - } +const char* DocumentSourceSkip::getSourceName() const { + return skipName; +} - bool DocumentSourceSkip::coalesce( - const intrusive_ptr<DocumentSource> &pNextSource) { - DocumentSourceSkip *pSkip = - dynamic_cast<DocumentSourceSkip *>(pNextSource.get()); +bool DocumentSourceSkip::coalesce(const intrusive_ptr<DocumentSource>& pNextSource) { + DocumentSourceSkip* pSkip = dynamic_cast<DocumentSourceSkip*>(pNextSource.get()); - /* if it's not another $skip, we can't coalesce */ - if (!pSkip) - return false; + /* if it's not another $skip, we can't coalesce */ + if (!pSkip) + return false; - /* we need to skip over the sum of the two consecutive $skips */ - _skip += pSkip->_skip; - return true; - } + /* we need to skip over the sum of the two consecutive $skips */ + _skip += pSkip->_skip; + return true; +} - boost::optional<Document> DocumentSourceSkip::getNext() { - pExpCtx->checkForInterrupt(); +boost::optional<Document> DocumentSourceSkip::getNext() { + pExpCtx->checkForInterrupt(); - if (_needToSkip) { - _needToSkip = false; - for (long long i=0; i < _skip; i++) { - if (!pSource->getNext()) - return boost::none; - } + if (_needToSkip) { + _needToSkip = false; + for (long long i = 0; i < _skip; i++) { + if (!pSource->getNext()) + return boost::none; } - - return pSource->getNext(); } - Value DocumentSourceSkip::serialize(bool explain) const { - return Value(DOC(getSourceName() << _skip)); - } + return pSource->getNext(); +} - intrusive_ptr<DocumentSource> DocumentSourceSkip::optimize() { - return _skip == 0 ? nullptr : this; - } +Value DocumentSourceSkip::serialize(bool explain) const { + return Value(DOC(getSourceName() << _skip)); +} - intrusive_ptr<DocumentSourceSkip> DocumentSourceSkip::create( - const intrusive_ptr<ExpressionContext> &pExpCtx) { - intrusive_ptr<DocumentSourceSkip> pSource( - new DocumentSourceSkip(pExpCtx)); - return pSource; - } +intrusive_ptr<DocumentSource> DocumentSourceSkip::optimize() { + return _skip == 0 ? nullptr : this; +} - intrusive_ptr<DocumentSource> DocumentSourceSkip::createFromBson( - BSONElement elem, - const intrusive_ptr<ExpressionContext> &pExpCtx) { - uassert(15972, str::stream() << DocumentSourceSkip::skipName << - ": the value to skip must be a number", - elem.isNumber()); +intrusive_ptr<DocumentSourceSkip> DocumentSourceSkip::create( + const intrusive_ptr<ExpressionContext>& pExpCtx) { + intrusive_ptr<DocumentSourceSkip> pSource(new DocumentSourceSkip(pExpCtx)); + return pSource; +} - intrusive_ptr<DocumentSourceSkip> pSkip( - DocumentSourceSkip::create(pExpCtx)); +intrusive_ptr<DocumentSource> DocumentSourceSkip::createFromBson( + BSONElement elem, const intrusive_ptr<ExpressionContext>& pExpCtx) { + uassert(15972, + str::stream() << DocumentSourceSkip::skipName + << ": the value to skip must be a number", + elem.isNumber()); - pSkip->_skip = elem.numberLong(); - uassert(15956, str::stream() << DocumentSourceSkip::skipName << - ": the number to skip cannot be negative", - pSkip->_skip >= 0); + intrusive_ptr<DocumentSourceSkip> pSkip(DocumentSourceSkip::create(pExpCtx)); - return pSkip; - } + pSkip->_skip = elem.numberLong(); + uassert(15956, + str::stream() << DocumentSourceSkip::skipName + << ": the number to skip cannot be negative", + pSkip->_skip >= 0); + + return pSkip; +} } diff --git a/src/mongo/db/pipeline/document_source_sort.cpp b/src/mongo/db/pipeline/document_source_sort.cpp index f4e57d5c8ae..1b7396b8513 100644 --- a/src/mongo/db/pipeline/document_source_sort.cpp +++ b/src/mongo/db/pipeline/document_source_sort.cpp @@ -39,329 +39,324 @@ namespace mongo { - using boost::intrusive_ptr; - using std::unique_ptr; - using std::make_pair; - using std::string; - using std::vector; +using boost::intrusive_ptr; +using std::unique_ptr; +using std::make_pair; +using std::string; +using std::vector; - const char DocumentSourceSort::sortName[] = "$sort"; +const char DocumentSourceSort::sortName[] = "$sort"; - const char *DocumentSourceSort::getSourceName() const { - return sortName; - } +const char* DocumentSourceSort::getSourceName() const { + return sortName; +} - boost::optional<Document> DocumentSourceSort::getNext() { - pExpCtx->checkForInterrupt(); +boost::optional<Document> DocumentSourceSort::getNext() { + pExpCtx->checkForInterrupt(); - if (!populated) - populate(); + if (!populated) + populate(); - if (!_output || !_output->more()) - return boost::none; + if (!_output || !_output->more()) + return boost::none; - return _output->next().second; - } + return _output->next().second; +} - void DocumentSourceSort::serializeToArray(vector<Value>& array, bool explain) const { - if (explain) { // always one Value for combined $sort + $limit - array.push_back(Value(DOC(getSourceName() << - DOC("sortKey" << serializeSortKey(explain) - << "mergePresorted" << (_mergingPresorted ? Value(true) : Value()) - << "limit" << (limitSrc ? Value(limitSrc->getLimit()) : Value()))))); - } - else { // one Value for $sort and maybe a Value for $limit - MutableDocument inner (serializeSortKey(explain)); - if (_mergingPresorted) - inner["$mergePresorted"] = Value(true); - array.push_back(Value(DOC(getSourceName() << inner.freeze()))); - - if (limitSrc) { - limitSrc->serializeToArray(array); - } +void DocumentSourceSort::serializeToArray(vector<Value>& array, bool explain) const { + if (explain) { // always one Value for combined $sort + $limit + array.push_back( + Value(DOC(getSourceName() + << DOC("sortKey" << serializeSortKey(explain) << "mergePresorted" + << (_mergingPresorted ? Value(true) : Value()) << "limit" + << (limitSrc ? Value(limitSrc->getLimit()) : Value()))))); + } else { // one Value for $sort and maybe a Value for $limit + MutableDocument inner(serializeSortKey(explain)); + if (_mergingPresorted) + inner["$mergePresorted"] = Value(true); + array.push_back(Value(DOC(getSourceName() << inner.freeze()))); + + if (limitSrc) { + limitSrc->serializeToArray(array); } } +} - void DocumentSourceSort::dispose() { - _output.reset(); - pSource->dispose(); - } +void DocumentSourceSort::dispose() { + _output.reset(); + pSource->dispose(); +} - DocumentSourceSort::DocumentSourceSort(const intrusive_ptr<ExpressionContext> &pExpCtx) - : DocumentSource(pExpCtx) - , populated(false) - , _mergingPresorted(false) - {} +DocumentSourceSort::DocumentSourceSort(const intrusive_ptr<ExpressionContext>& pExpCtx) + : DocumentSource(pExpCtx), populated(false), _mergingPresorted(false) {} - long long DocumentSourceSort::getLimit() const { - return limitSrc ? limitSrc->getLimit() : -1; - } +long long DocumentSourceSort::getLimit() const { + return limitSrc ? limitSrc->getLimit() : -1; +} - bool DocumentSourceSort::coalesce(const intrusive_ptr<DocumentSource> &pNextSource) { - if (!limitSrc) { - limitSrc = dynamic_cast<DocumentSourceLimit*>(pNextSource.get()); - return limitSrc.get(); // false if next is not a $limit - } - else { - return limitSrc->coalesce(pNextSource); - } +bool DocumentSourceSort::coalesce(const intrusive_ptr<DocumentSource>& pNextSource) { + if (!limitSrc) { + limitSrc = dynamic_cast<DocumentSourceLimit*>(pNextSource.get()); + return limitSrc.get(); // false if next is not a $limit + } else { + return limitSrc->coalesce(pNextSource); } +} - void DocumentSourceSort::addKey(const string& fieldPath, bool ascending) { - VariablesIdGenerator idGenerator; - VariablesParseState vps(&idGenerator); - vSortKey.push_back(ExpressionFieldPath::parse("$$ROOT." + fieldPath, vps)); - vAscending.push_back(ascending); - } +void DocumentSourceSort::addKey(const string& fieldPath, bool ascending) { + VariablesIdGenerator idGenerator; + VariablesParseState vps(&idGenerator); + vSortKey.push_back(ExpressionFieldPath::parse("$$ROOT." + fieldPath, vps)); + vAscending.push_back(ascending); +} - Document DocumentSourceSort::serializeSortKey(bool explain) const { - MutableDocument keyObj; - // add the key fields - const size_t n = vSortKey.size(); - for(size_t i = 0; i < n; ++i) { - if (ExpressionFieldPath* efp = dynamic_cast<ExpressionFieldPath*>(vSortKey[i].get())) { - // ExpressionFieldPath gets special syntax that includes direction - const FieldPath& withVariable = efp->getFieldPath(); - verify(withVariable.getPathLength() > 1); - verify(withVariable.getFieldName(0) == "ROOT"); - const string fieldPath = withVariable.tail().getPath(false); - - // append a named integer based on the sort order - keyObj.setField(fieldPath, Value(vAscending[i] ? 1 : -1)); - } - else { - // other expressions use a made-up field name - keyObj[string(str::stream() << "$computed" << i)] = vSortKey[i]->serialize(explain); - } +Document DocumentSourceSort::serializeSortKey(bool explain) const { + MutableDocument keyObj; + // add the key fields + const size_t n = vSortKey.size(); + for (size_t i = 0; i < n; ++i) { + if (ExpressionFieldPath* efp = dynamic_cast<ExpressionFieldPath*>(vSortKey[i].get())) { + // ExpressionFieldPath gets special syntax that includes direction + const FieldPath& withVariable = efp->getFieldPath(); + verify(withVariable.getPathLength() > 1); + verify(withVariable.getFieldName(0) == "ROOT"); + const string fieldPath = withVariable.tail().getPath(false); + + // append a named integer based on the sort order + keyObj.setField(fieldPath, Value(vAscending[i] ? 1 : -1)); + } else { + // other expressions use a made-up field name + keyObj[string(str::stream() << "$computed" << i)] = vSortKey[i]->serialize(explain); } - return keyObj.freeze(); } + return keyObj.freeze(); +} - DocumentSource::GetDepsReturn DocumentSourceSort::getDependencies(DepsTracker* deps) const { - for(size_t i = 0; i < vSortKey.size(); ++i) { - vSortKey[i]->addDependencies(deps); - } - - return SEE_NEXT; +DocumentSource::GetDepsReturn DocumentSourceSort::getDependencies(DepsTracker* deps) const { + for (size_t i = 0; i < vSortKey.size(); ++i) { + vSortKey[i]->addDependencies(deps); } + return SEE_NEXT; +} - intrusive_ptr<DocumentSource> DocumentSourceSort::createFromBson( - BSONElement elem, - const intrusive_ptr<ExpressionContext> &pExpCtx) { - uassert(15973, str::stream() << " the " << - sortName << " key specification must be an object", - elem.type() == Object); - return create(pExpCtx, elem.embeddedObject()); - } +intrusive_ptr<DocumentSource> DocumentSourceSort::createFromBson( + BSONElement elem, const intrusive_ptr<ExpressionContext>& pExpCtx) { + uassert(15973, + str::stream() << " the " << sortName << " key specification must be an object", + elem.type() == Object); - intrusive_ptr<DocumentSourceSort> DocumentSourceSort::create( - const intrusive_ptr<ExpressionContext> &pExpCtx, - BSONObj sortOrder, - long long limit) { + return create(pExpCtx, elem.embeddedObject()); +} - intrusive_ptr<DocumentSourceSort> pSort = new DocumentSourceSort(pExpCtx); +intrusive_ptr<DocumentSourceSort> DocumentSourceSort::create( + const intrusive_ptr<ExpressionContext>& pExpCtx, BSONObj sortOrder, long long limit) { + intrusive_ptr<DocumentSourceSort> pSort = new DocumentSourceSort(pExpCtx); - /* check for then iterate over the sort object */ - BSONForEach(keyField, sortOrder) { - const char* fieldName = keyField.fieldName(); + /* check for then iterate over the sort object */ + BSONForEach(keyField, sortOrder) { + const char* fieldName = keyField.fieldName(); - if (str::equals(fieldName, "$mergePresorted")) { - verify(keyField.Bool()); - pSort->_mergingPresorted = true; - continue; - } + if (str::equals(fieldName, "$mergePresorted")) { + verify(keyField.Bool()); + pSort->_mergingPresorted = true; + continue; + } - if (keyField.type() == Object) { - // this restriction is due to needing to figure out sort direction - uassert(17312, - "the only expression supported by $sort right now is {$meta: 'textScore'}", - keyField.Obj() == BSON("$meta" << "textScore")); + if (keyField.type() == Object) { + // this restriction is due to needing to figure out sort direction + uassert(17312, + "the only expression supported by $sort right now is {$meta: 'textScore'}", + keyField.Obj() == BSON("$meta" + << "textScore")); - pSort->vSortKey.push_back(new ExpressionMeta()); - pSort->vAscending.push_back(false); // best scoring documents first - continue; - } - - uassert(15974, - "$sort key ordering must be specified using a number or {$meta: 'textScore'}", - keyField.isNumber()); + pSort->vSortKey.push_back(new ExpressionMeta()); + pSort->vAscending.push_back(false); // best scoring documents first + continue; + } - int sortOrder = keyField.numberInt(); + uassert(15974, + "$sort key ordering must be specified using a number or {$meta: 'textScore'}", + keyField.isNumber()); - uassert(15975, "$sort key ordering must be 1 (for ascending) or -1 (for descending)", - ((sortOrder == 1) || (sortOrder == -1))); + int sortOrder = keyField.numberInt(); - pSort->addKey(fieldName, (sortOrder > 0)); - } + uassert(15975, + "$sort key ordering must be 1 (for ascending) or -1 (for descending)", + ((sortOrder == 1) || (sortOrder == -1))); - uassert(15976, str::stream() << sortName << " must have at least one sort key", - !pSort->vSortKey.empty()); + pSort->addKey(fieldName, (sortOrder > 0)); + } - if (limit > 0) { - bool coalesced = pSort->coalesce(DocumentSourceLimit::create(pExpCtx, limit)); - verify(coalesced); // should always coalesce - verify(pSort->getLimit() == limit); - } + uassert(15976, + str::stream() << sortName << " must have at least one sort key", + !pSort->vSortKey.empty()); - return pSort; + if (limit > 0) { + bool coalesced = pSort->coalesce(DocumentSourceLimit::create(pExpCtx, limit)); + verify(coalesced); // should always coalesce + verify(pSort->getLimit() == limit); } - SortOptions DocumentSourceSort::makeSortOptions() const { - /* make sure we've got a sort key */ - verify(vSortKey.size()); + return pSort; +} - SortOptions opts; - if (limitSrc) - opts.limit = limitSrc->getLimit(); +SortOptions DocumentSourceSort::makeSortOptions() const { + /* make sure we've got a sort key */ + verify(vSortKey.size()); - opts.maxMemoryUsageBytes = 100*1024*1024; - if (pExpCtx->extSortAllowed && !pExpCtx->inRouter) { - opts.extSortAllowed = true; - opts.tempDir = pExpCtx->tempDir; - } + SortOptions opts; + if (limitSrc) + opts.limit = limitSrc->getLimit(); - return opts; + opts.maxMemoryUsageBytes = 100 * 1024 * 1024; + if (pExpCtx->extSortAllowed && !pExpCtx->inRouter) { + opts.extSortAllowed = true; + opts.tempDir = pExpCtx->tempDir; } - void DocumentSourceSort::populate() { - if (_mergingPresorted) { - typedef DocumentSourceMergeCursors DSCursors; - typedef DocumentSourceCommandShards DSCommands; - if (DSCursors* castedSource = dynamic_cast<DSCursors*>(pSource)) { - populateFromCursors(castedSource->getCursors()); - } else if (DSCommands* castedSource = dynamic_cast<DSCommands*>(pSource)) { - populateFromBsonArrays(castedSource->getArrays()); - } else { - msgasserted(17196, "can only mergePresorted from MergeCursors and CommandShards"); - } + return opts; +} + +void DocumentSourceSort::populate() { + if (_mergingPresorted) { + typedef DocumentSourceMergeCursors DSCursors; + typedef DocumentSourceCommandShards DSCommands; + if (DSCursors* castedSource = dynamic_cast<DSCursors*>(pSource)) { + populateFromCursors(castedSource->getCursors()); + } else if (DSCommands* castedSource = dynamic_cast<DSCommands*>(pSource)) { + populateFromBsonArrays(castedSource->getArrays()); } else { - unique_ptr<MySorter> sorter (MySorter::make(makeSortOptions(), Comparator(*this))); - while (boost::optional<Document> next = pSource->getNext()) { - sorter->add(extractKey(*next), *next); - } - _output.reset(sorter->done()); + msgasserted(17196, "can only mergePresorted from MergeCursors and CommandShards"); } - populated = true; + } else { + unique_ptr<MySorter> sorter(MySorter::make(makeSortOptions(), Comparator(*this))); + while (boost::optional<Document> next = pSource->getNext()) { + sorter->add(extractKey(*next), *next); + } + _output.reset(sorter->done()); } + populated = true; +} - class DocumentSourceSort::IteratorFromCursor : public MySorter::Iterator { - public: - IteratorFromCursor(DocumentSourceSort* sorter, DBClientCursor* cursor) - : _sorter(sorter) - , _cursor(cursor) - {} - - bool more() { return _cursor->more(); } - Data next() { - const Document doc = DocumentSourceMergeCursors::nextSafeFrom(_cursor); - return make_pair(_sorter->extractKey(doc), doc); - } - private: - DocumentSourceSort* _sorter; - DBClientCursor* _cursor; - }; - - void DocumentSourceSort::populateFromCursors(const vector<DBClientCursor*>& cursors) { - vector<std::shared_ptr<MySorter::Iterator> > iterators; - for (size_t i = 0; i < cursors.size(); i++) { - iterators.push_back(std::make_shared<IteratorFromCursor>(this, cursors[i])); - } +class DocumentSourceSort::IteratorFromCursor : public MySorter::Iterator { +public: + IteratorFromCursor(DocumentSourceSort* sorter, DBClientCursor* cursor) + : _sorter(sorter), _cursor(cursor) {} - _output.reset(MySorter::Iterator::merge(iterators, makeSortOptions(), Comparator(*this))); + bool more() { + return _cursor->more(); + } + Data next() { + const Document doc = DocumentSourceMergeCursors::nextSafeFrom(_cursor); + return make_pair(_sorter->extractKey(doc), doc); } - class DocumentSourceSort::IteratorFromBsonArray : public MySorter::Iterator { - public: - IteratorFromBsonArray(DocumentSourceSort* sorter, const BSONArray& array) - : _sorter(sorter) - , _iterator(array) - {} - - bool more() { return _iterator.more(); } - Data next() { - Document doc(_iterator.next().Obj()); - return make_pair(_sorter->extractKey(doc), doc); - } - private: - DocumentSourceSort* _sorter; - BSONObjIterator _iterator; - }; - - void DocumentSourceSort::populateFromBsonArrays(const vector<BSONArray>& arrays) { - vector<std::shared_ptr<MySorter::Iterator> > iterators; - for (size_t i = 0; i < arrays.size(); i++) { - iterators.push_back(std::make_shared<IteratorFromBsonArray>(this, arrays[i])); - } +private: + DocumentSourceSort* _sorter; + DBClientCursor* _cursor; +}; - _output.reset(MySorter::Iterator::merge(iterators, makeSortOptions(), Comparator(*this))); +void DocumentSourceSort::populateFromCursors(const vector<DBClientCursor*>& cursors) { + vector<std::shared_ptr<MySorter::Iterator>> iterators; + for (size_t i = 0; i < cursors.size(); i++) { + iterators.push_back(std::make_shared<IteratorFromCursor>(this, cursors[i])); } - Value DocumentSourceSort::extractKey(const Document& d) const { - Variables vars(0, d); - if (vSortKey.size() == 1) { - return vSortKey[0]->evaluate(&vars); - } + _output.reset(MySorter::Iterator::merge(iterators, makeSortOptions(), Comparator(*this))); +} - vector<Value> keys; - keys.reserve(vSortKey.size()); - for (size_t i=0; i < vSortKey.size(); i++) { - keys.push_back(vSortKey[i]->evaluate(&vars)); - } - return Value(std::move(keys)); +class DocumentSourceSort::IteratorFromBsonArray : public MySorter::Iterator { +public: + IteratorFromBsonArray(DocumentSourceSort* sorter, const BSONArray& array) + : _sorter(sorter), _iterator(array) {} + + bool more() { + return _iterator.more(); + } + Data next() { + Document doc(_iterator.next().Obj()); + return make_pair(_sorter->extractKey(doc), doc); } - int DocumentSourceSort::compare(const Value& lhs, const Value& rhs) const { - - /* - populate() already checked that there is a non-empty sort key, - so we shouldn't have to worry about that here. - - However, the tricky part is what to do is none of the sort keys are - present. In this case, consider the document less. - */ - const size_t n = vSortKey.size(); - if (n == 1) { // simple fast case - if (vAscending[0]) - return Value::compare(lhs, rhs); - else - return -Value::compare(lhs, rhs); - } +private: + DocumentSourceSort* _sorter; + BSONObjIterator _iterator; +}; - // compound sort - for (size_t i = 0; i < n; i++) { - int cmp = Value::compare(lhs[i], rhs[i]); - if (cmp) { - /* if necessary, adjust the return value by the key ordering */ - if (!vAscending[i]) - cmp = -cmp; +void DocumentSourceSort::populateFromBsonArrays(const vector<BSONArray>& arrays) { + vector<std::shared_ptr<MySorter::Iterator>> iterators; + for (size_t i = 0; i < arrays.size(); i++) { + iterators.push_back(std::make_shared<IteratorFromBsonArray>(this, arrays[i])); + } - return cmp; - } - } + _output.reset(MySorter::Iterator::merge(iterators, makeSortOptions(), Comparator(*this))); +} - /* - If we got here, everything matched (or didn't exist), so we'll - consider the documents equal for purposes of this sort. - */ - return 0; +Value DocumentSourceSort::extractKey(const Document& d) const { + Variables vars(0, d); + if (vSortKey.size() == 1) { + return vSortKey[0]->evaluate(&vars); } - intrusive_ptr<DocumentSource> DocumentSourceSort::getShardSource() { - verify(!_mergingPresorted); - return this; + vector<Value> keys; + keys.reserve(vSortKey.size()); + for (size_t i = 0; i < vSortKey.size(); i++) { + keys.push_back(vSortKey[i]->evaluate(&vars)); } + return Value(std::move(keys)); +} + +int DocumentSourceSort::compare(const Value& lhs, const Value& rhs) const { + /* + populate() already checked that there is a non-empty sort key, + so we shouldn't have to worry about that here. + + However, the tricky part is what to do is none of the sort keys are + present. In this case, consider the document less. + */ + const size_t n = vSortKey.size(); + if (n == 1) { // simple fast case + if (vAscending[0]) + return Value::compare(lhs, rhs); + else + return -Value::compare(lhs, rhs); + } + + // compound sort + for (size_t i = 0; i < n; i++) { + int cmp = Value::compare(lhs[i], rhs[i]); + if (cmp) { + /* if necessary, adjust the return value by the key ordering */ + if (!vAscending[i]) + cmp = -cmp; - intrusive_ptr<DocumentSource> DocumentSourceSort::getMergeSource() { - verify(!_mergingPresorted); - intrusive_ptr<DocumentSourceSort> other = new DocumentSourceSort(pExpCtx); - other->vAscending = vAscending; - other->vSortKey = vSortKey; - other->limitSrc = limitSrc; - other->_mergingPresorted = true; - return other; + return cmp; + } } + + /* + If we got here, everything matched (or didn't exist), so we'll + consider the documents equal for purposes of this sort. + */ + return 0; +} + +intrusive_ptr<DocumentSource> DocumentSourceSort::getShardSource() { + verify(!_mergingPresorted); + return this; +} + +intrusive_ptr<DocumentSource> DocumentSourceSort::getMergeSource() { + verify(!_mergingPresorted); + intrusive_ptr<DocumentSourceSort> other = new DocumentSourceSort(pExpCtx); + other->vAscending = vAscending; + other->vSortKey = vSortKey; + other->limitSrc = limitSrc; + other->_mergingPresorted = true; + return other; +} } #include "mongo/db/sorter/sorter.cpp" diff --git a/src/mongo/db/pipeline/document_source_unwind.cpp b/src/mongo/db/pipeline/document_source_unwind.cpp index 2caf7f95c64..8ec126b967c 100644 --- a/src/mongo/db/pipeline/document_source_unwind.cpp +++ b/src/mongo/db/pipeline/document_source_unwind.cpp @@ -36,150 +36,141 @@ namespace mongo { - using boost::intrusive_ptr; - using std::string; - using std::vector; - - /** Helper class to unwind array from a single document. */ - class DocumentSourceUnwind::Unwinder { - public: - /** @param unwindPath is the field path to the array to unwind. */ - Unwinder(const FieldPath& unwindPath); - /** Reset the unwinder to unwind a new document. */ - void resetDocument(const Document& document); - - /** - * @return the next document unwound from the document provided to resetDocument(), using - * the current value in the array located at the provided unwindPath. - * - * Returns boost::none if the array is exhausted. - */ - boost::optional<Document> getNext(); - - private: - // Path to the array to unwind. - const FieldPath _unwindPath; - - Value _inputArray; - MutableDocument _output; - - // Document indexes of the field path components. - vector<Position> _unwindPathFieldIndexes; - // Index into the _inputArray to return next. - size_t _index; - }; - - DocumentSourceUnwind::Unwinder::Unwinder(const FieldPath& unwindPath): - _unwindPath(unwindPath) { +using boost::intrusive_ptr; +using std::string; +using std::vector; + +/** Helper class to unwind array from a single document. */ +class DocumentSourceUnwind::Unwinder { +public: + /** @param unwindPath is the field path to the array to unwind. */ + Unwinder(const FieldPath& unwindPath); + /** Reset the unwinder to unwind a new document. */ + void resetDocument(const Document& document); + + /** + * @return the next document unwound from the document provided to resetDocument(), using + * the current value in the array located at the provided unwindPath. + * + * Returns boost::none if the array is exhausted. + */ + boost::optional<Document> getNext(); + +private: + // Path to the array to unwind. + const FieldPath _unwindPath; + + Value _inputArray; + MutableDocument _output; + + // Document indexes of the field path components. + vector<Position> _unwindPathFieldIndexes; + // Index into the _inputArray to return next. + size_t _index; +}; + +DocumentSourceUnwind::Unwinder::Unwinder(const FieldPath& unwindPath) : _unwindPath(unwindPath) {} + +void DocumentSourceUnwind::Unwinder::resetDocument(const Document& document) { + // Reset document specific attributes. + _inputArray = Value(); + _output.reset(document); + _unwindPathFieldIndexes.clear(); + _index = 0; + + Value pathValue = document.getNestedField(_unwindPath, &_unwindPathFieldIndexes); + if (pathValue.nullish()) { + // The path does not exist or is null. + return; } - void DocumentSourceUnwind::Unwinder::resetDocument(const Document& document) { - - // Reset document specific attributes. - _inputArray = Value(); - _output.reset(document); - _unwindPathFieldIndexes.clear(); - _index = 0; + _inputArray = pathValue; +} - Value pathValue = document.getNestedField(_unwindPath, &_unwindPathFieldIndexes); - if (pathValue.nullish()) { - // The path does not exist or is null. - return; - } +boost::optional<Document> DocumentSourceUnwind::Unwinder::getNext() { + if (_inputArray.missing()) + return boost::none; - _inputArray = pathValue; - } - - boost::optional<Document> DocumentSourceUnwind::Unwinder::getNext() { - if (_inputArray.missing()) - return boost::none; + // If needed, this will automatically clone all the documents along the + // field path so that the end values are not shared across documents + // that have come out of this pipeline operator. This is a partial deep + // clone. Because the value at the end will be replaced, everything + // along the path leading to that will be replaced in order not to share + // that change with any other clones (or the original). - // If needed, this will automatically clone all the documents along the - // field path so that the end values are not shared across documents - // that have come out of this pipeline operator. This is a partial deep - // clone. Because the value at the end will be replaced, everything - // along the path leading to that will be replaced in order not to share - // that change with any other clones (or the original). - - if (_inputArray.getType() == Array) { - if (_index == _inputArray.getArrayLength()) - return boost::none; - _output.setNestedField(_unwindPathFieldIndexes, _inputArray[_index]); - } - else if (_index > 0) { + if (_inputArray.getType() == Array) { + if (_index == _inputArray.getArrayLength()) return boost::none; - } - else { - //_output.setNestedField(_unwindPathFieldIndexes, _inputArray); - } - _index++; - return _output.peek(); + _output.setNestedField(_unwindPathFieldIndexes, _inputArray[_index]); + } else if (_index > 0) { + return boost::none; + } else { + //_output.setNestedField(_unwindPathFieldIndexes, _inputArray); } + _index++; + return _output.peek(); +} - const char DocumentSourceUnwind::unwindName[] = "$unwind"; +const char DocumentSourceUnwind::unwindName[] = "$unwind"; - DocumentSourceUnwind::DocumentSourceUnwind( - const intrusive_ptr<ExpressionContext> &pExpCtx): - DocumentSource(pExpCtx) { - } +DocumentSourceUnwind::DocumentSourceUnwind(const intrusive_ptr<ExpressionContext>& pExpCtx) + : DocumentSource(pExpCtx) {} - const char *DocumentSourceUnwind::getSourceName() const { - return unwindName; - } - - boost::optional<Document> DocumentSourceUnwind::getNext() { - pExpCtx->checkForInterrupt(); +const char* DocumentSourceUnwind::getSourceName() const { + return unwindName; +} - boost::optional<Document> out = _unwinder->getNext(); - while (!out) { - // No more elements in array currently being unwound. This will loop if the input - // document is missing the unwind field or has an empty array. - boost::optional<Document> input = pSource->getNext(); - if (!input) - return boost::none; // input exhausted +boost::optional<Document> DocumentSourceUnwind::getNext() { + pExpCtx->checkForInterrupt(); - // Try to extract an output document from the new input document. - _unwinder->resetDocument(*input); - out = _unwinder->getNext(); - } + boost::optional<Document> out = _unwinder->getNext(); + while (!out) { + // No more elements in array currently being unwound. This will loop if the input + // document is missing the unwind field or has an empty array. + boost::optional<Document> input = pSource->getNext(); + if (!input) + return boost::none; // input exhausted - return out; + // Try to extract an output document from the new input document. + _unwinder->resetDocument(*input); + out = _unwinder->getNext(); } - Value DocumentSourceUnwind::serialize(bool explain) const { - verify(_unwindPath); - return Value(DOC(getSourceName() << _unwindPath->getPath(true))); - } + return out; +} - DocumentSource::GetDepsReturn DocumentSourceUnwind::getDependencies(DepsTracker* deps) const { - deps->fields.insert(_unwindPath->getPath(false)); - return SEE_NEXT; - } +Value DocumentSourceUnwind::serialize(bool explain) const { + verify(_unwindPath); + return Value(DOC(getSourceName() << _unwindPath->getPath(true))); +} - void DocumentSourceUnwind::unwindPath(const FieldPath &fieldPath) { - // Can't set more than one unwind path. - uassert(15979, str::stream() << unwindName << "can't unwind more than one path", - !_unwindPath); - // Record the unwind path. - _unwindPath.reset(new FieldPath(fieldPath)); - _unwinder.reset(new Unwinder(fieldPath)); - } +DocumentSource::GetDepsReturn DocumentSourceUnwind::getDependencies(DepsTracker* deps) const { + deps->fields.insert(_unwindPath->getPath(false)); + return SEE_NEXT; +} - intrusive_ptr<DocumentSource> DocumentSourceUnwind::createFromBson( - BSONElement elem, - const intrusive_ptr<ExpressionContext> &pExpCtx) { - /* - The value of $unwind should just be a field path. - */ - uassert(15981, str::stream() << "the " << unwindName << - " field path must be specified as a string", - elem.type() == String); - - string prefixedPathString(elem.str()); - string pathString(Expression::removeFieldPrefix(prefixedPathString)); - intrusive_ptr<DocumentSourceUnwind> pUnwind(new DocumentSourceUnwind(pExpCtx)); - pUnwind->unwindPath(FieldPath(pathString)); - - return pUnwind; - } +void DocumentSourceUnwind::unwindPath(const FieldPath& fieldPath) { + // Can't set more than one unwind path. + uassert(15979, str::stream() << unwindName << "can't unwind more than one path", !_unwindPath); + // Record the unwind path. + _unwindPath.reset(new FieldPath(fieldPath)); + _unwinder.reset(new Unwinder(fieldPath)); +} + +intrusive_ptr<DocumentSource> DocumentSourceUnwind::createFromBson( + BSONElement elem, const intrusive_ptr<ExpressionContext>& pExpCtx) { + /* + The value of $unwind should just be a field path. + */ + uassert(15981, + str::stream() << "the " << unwindName << " field path must be specified as a string", + elem.type() == String); + + string prefixedPathString(elem.str()); + string pathString(Expression::removeFieldPrefix(prefixedPathString)); + intrusive_ptr<DocumentSourceUnwind> pUnwind(new DocumentSourceUnwind(pExpCtx)); + pUnwind->unwindPath(FieldPath(pathString)); + + return pUnwind; +} } diff --git a/src/mongo/db/pipeline/document_value_test.cpp b/src/mongo/db/pipeline/document_value_test.cpp index 0afffb5b639..544ac629a9b 100644 --- a/src/mongo/db/pipeline/document_value_test.cpp +++ b/src/mongo/db/pipeline/document_value_test.cpp @@ -38,1480 +38,1618 @@ namespace DocumentTests { - using std::endl; - using std::numeric_limits; - using std::string; - using std::vector; +using std::endl; +using std::numeric_limits; +using std::string; +using std::vector; + +mongo::Document::FieldPair getNthField(mongo::Document doc, size_t index) { + mongo::FieldIterator it(doc); + while (index--) // advance index times + it.next(); + return it.next(); +} + +namespace Document { + +using mongo::Document; + +BSONObj toBson(const Document& document) { + return document.toBson(); +} + +Document fromBson(BSONObj obj) { + return Document(obj); +} + +void assertRoundTrips(const Document& document1) { + BSONObj obj1 = toBson(document1); + Document document2 = fromBson(obj1); + BSONObj obj2 = toBson(document2); + ASSERT_EQUALS(obj1, obj2); + ASSERT_EQUALS(document1, document2); +} + +/** Create a Document. */ +class Create { +public: + void run() { + Document document; + ASSERT_EQUALS(0U, document.size()); + assertRoundTrips(document); + } +}; + +/** Create a Document from a BSONObj. */ +class CreateFromBsonObj { +public: + void run() { + Document document = fromBson(BSONObj()); + ASSERT_EQUALS(0U, document.size()); + document = fromBson(BSON("a" << 1 << "b" + << "q")); + ASSERT_EQUALS(2U, document.size()); + ASSERT_EQUALS("a", getNthField(document, 0).first.toString()); + ASSERT_EQUALS(1, getNthField(document, 0).second.getInt()); + ASSERT_EQUALS("b", getNthField(document, 1).first.toString()); + ASSERT_EQUALS("q", getNthField(document, 1).second.getString()); + assertRoundTrips(document); + } +}; + +/** Add Document fields. */ +class AddField { +public: + void run() { + MutableDocument md; + md.addField("foo", Value(1)); + ASSERT_EQUALS(1U, md.peek().size()); + ASSERT_EQUALS(1, md.peek()["foo"].getInt()); + md.addField("bar", Value(99)); + ASSERT_EQUALS(2U, md.peek().size()); + ASSERT_EQUALS(99, md.peek()["bar"].getInt()); + // No assertion is triggered by a duplicate field name. + md.addField("a", Value(5)); + + Document final = md.freeze(); + ASSERT_EQUALS(3U, final.size()); + assertRoundTrips(final); + } +}; + +/** Get Document values. */ +class GetValue { +public: + void run() { + Document document = fromBson(BSON("a" << 1 << "b" << 2.2)); + ASSERT_EQUALS(1, document["a"].getInt()); + ASSERT_EQUALS(1, document["a"].getInt()); + ASSERT_EQUALS(2.2, document["b"].getDouble()); + ASSERT_EQUALS(2.2, document["b"].getDouble()); + // Missing field. + ASSERT(document["c"].missing()); + ASSERT(document["c"].missing()); + assertRoundTrips(document); + } +}; + +/** Get Document fields. */ +class SetField { +public: + void run() { + Document original = fromBson(BSON("a" << 1 << "b" << 2.2 << "c" << 99)); + + // Initial positions. Used at end of function to make sure nothing moved + const Position apos = original.positionOf("a"); + const Position bpos = original.positionOf("c"); + const Position cpos = original.positionOf("c"); + + MutableDocument md(original); + + // Set the first field. + md.setField("a", Value("foo")); + ASSERT_EQUALS(3U, md.peek().size()); + ASSERT_EQUALS("foo", md.peek()["a"].getString()); + ASSERT_EQUALS("foo", getNthField(md.peek(), 0).second.getString()); + assertRoundTrips(md.peek()); + // Set the second field. + md["b"] = Value("bar"); + ASSERT_EQUALS(3U, md.peek().size()); + ASSERT_EQUALS("bar", md.peek()["b"].getString()); + ASSERT_EQUALS("bar", getNthField(md.peek(), 1).second.getString()); + assertRoundTrips(md.peek()); + + // Remove the second field. + md.setField("b", Value()); + PRINT(md.peek().toString()); + ASSERT_EQUALS(2U, md.peek().size()); + ASSERT(md.peek()["b"].missing()); + ASSERT_EQUALS("a", getNthField(md.peek(), 0).first.toString()); + ASSERT_EQUALS("c", getNthField(md.peek(), 1).first.toString()); + ASSERT_EQUALS(99, md.peek()["c"].getInt()); + assertRoundTrips(md.peek()); + + // Remove the first field. + md["a"] = Value(); + ASSERT_EQUALS(1U, md.peek().size()); + ASSERT(md.peek()["a"].missing()); + ASSERT_EQUALS("c", getNthField(md.peek(), 0).first.toString()); + ASSERT_EQUALS(99, md.peek()["c"].getInt()); + assertRoundTrips(md.peek()); + + // Remove the final field. Verify document is empty. + md.remove("c"); + ASSERT(md.peek().empty()); + ASSERT_EQUALS(0U, md.peek().size()); + ASSERT_EQUALS(md.peek(), Document()); + ASSERT(!FieldIterator(md.peek()).more()); + ASSERT(md.peek()["c"].missing()); + assertRoundTrips(md.peek()); + + // Set a nested field using [] + md["x"]["y"]["z"] = Value("nested"); + ASSERT_EQUALS(md.peek()["x"]["y"]["z"], Value("nested")); + + // Set a nested field using setNestedField + FieldPath xxyyzz = string("xx.yy.zz"); + md.setNestedField(xxyyzz, Value("nested")); + ASSERT_EQUALS(md.peek().getNestedField(xxyyzz), Value("nested")); + + // Set a nested fields through an existing empty document + md["xxx"] = Value(Document()); + md["xxx"]["yyy"] = Value(Document()); + FieldPath xxxyyyzzz = string("xxx.yyy.zzz"); + md.setNestedField(xxxyyyzzz, Value("nested")); + ASSERT_EQUALS(md.peek().getNestedField(xxxyyyzzz), Value("nested")); + + // Make sure nothing moved + ASSERT_EQUALS(apos, md.peek().positionOf("a")); + ASSERT_EQUALS(bpos, md.peek().positionOf("c")); + ASSERT_EQUALS(cpos, md.peek().positionOf("c")); + ASSERT_EQUALS(Position(), md.peek().positionOf("d")); + } +}; + +/** Document comparator. */ +class Compare { +public: + void run() { + assertComparison(0, BSONObj(), BSONObj()); + assertComparison(0, BSON("a" << 1), BSON("a" << 1)); + assertComparison(-1, BSONObj(), BSON("a" << 1)); + assertComparison(-1, BSON("a" << 1), BSON("c" << 1)); + assertComparison(0, BSON("a" << 1 << "r" << 2), BSON("a" << 1 << "r" << 2)); + assertComparison(-1, BSON("a" << 1), BSON("a" << 1 << "r" << 2)); + assertComparison(0, BSON("a" << 2), BSON("a" << 2)); + assertComparison(-1, BSON("a" << 1), BSON("a" << 2)); + assertComparison(-1, BSON("a" << 1 << "b" << 1), BSON("a" << 1 << "b" << 2)); + // numbers sort before strings + assertComparison(-1, + BSON("a" << 1), + BSON("a" + << "foo")); + // numbers sort before strings, even if keys compare otherwise + assertComparison(-1, + BSON("b" << 1), + BSON("a" + << "foo")); + // null before number, even if keys compare otherwise + assertComparison(-1, BSON("z" << BSONNULL), BSON("a" << 1)); + } - mongo::Document::FieldPair getNthField(mongo::Document doc, size_t index) { - mongo::FieldIterator it (doc); - while (index--) // advance index times - it.next(); - return it.next(); +public: + int cmp(const BSONObj& a, const BSONObj& b) { + int result = Document::compare(fromBson(a), fromBson(b)); + return // sign + result < 0 ? -1 : result > 0 ? 1 : 0; + } + void assertComparison(int expectedResult, const BSONObj& a, const BSONObj& b) { + ASSERT_EQUALS(expectedResult, cmp(a, b)); + ASSERT_EQUALS(-expectedResult, cmp(b, a)); + if (expectedResult == 0) { + ASSERT_EQUALS(hash(a), hash(b)); + } + } + size_t hash(const BSONObj& obj) { + size_t seed = 0x106e1e1; + Document(obj).hash_combine(seed); + return seed; } +}; - namespace Document { +/** Shallow copy clone of a single field Document. */ +class Clone { +public: + void run() { + const Document document = fromBson(BSON("a" << BSON("b" << 1))); + MutableDocument cloneOnDemand(document); - using mongo::Document; + // Check equality. + ASSERT_EQUALS(document, cloneOnDemand.peek()); + // Check pointer equality of sub document. + ASSERT_EQUALS(document["a"].getDocument().getPtr(), + cloneOnDemand.peek()["a"].getDocument().getPtr()); - BSONObj toBson( const Document& document ) { - return document.toBson(); - } - Document fromBson( BSONObj obj ) { - return Document(obj); - } + // Change field in clone and ensure the original document's field is unchanged. + cloneOnDemand.setField(StringData("a"), Value(2)); + ASSERT_EQUALS(Value(1), document.getNestedField(FieldPath("a.b"))); - void assertRoundTrips( const Document& document1 ) { - BSONObj obj1 = toBson( document1 ); - Document document2 = fromBson( obj1 ); - BSONObj obj2 = toBson( document2 ); - ASSERT_EQUALS( obj1, obj2 ); - ASSERT_EQUALS( document1, document2 ); - } - /** Create a Document. */ - class Create { - public: - void run() { - Document document; - ASSERT_EQUALS( 0U, document.size() ); - assertRoundTrips( document ); - } - }; - - /** Create a Document from a BSONObj. */ - class CreateFromBsonObj { - public: - void run() { - Document document = fromBson( BSONObj() ); - ASSERT_EQUALS( 0U, document.size() ); - document = fromBson( BSON( "a" << 1 << "b" << "q" ) ); - ASSERT_EQUALS( 2U, document.size() ); - ASSERT_EQUALS( "a", getNthField(document, 0).first.toString() ); - ASSERT_EQUALS( 1, getNthField(document, 0).second.getInt() ); - ASSERT_EQUALS( "b", getNthField(document, 1).first.toString() ); - ASSERT_EQUALS( "q", getNthField(document, 1).second.getString() ); - assertRoundTrips( document ); - } - }; - - /** Add Document fields. */ - class AddField { - public: - void run() { - MutableDocument md; - md.addField( "foo", Value( 1 ) ); - ASSERT_EQUALS( 1U, md.peek().size() ); - ASSERT_EQUALS( 1, md.peek()["foo"].getInt() ); - md.addField( "bar", Value( 99 ) ); - ASSERT_EQUALS( 2U, md.peek().size() ); - ASSERT_EQUALS( 99, md.peek()["bar"].getInt() ); - // No assertion is triggered by a duplicate field name. - md.addField( "a", Value( 5 ) ); - - Document final = md.freeze(); - ASSERT_EQUALS( 3U, final.size() ); - assertRoundTrips( final ); - } - }; - - /** Get Document values. */ - class GetValue { - public: - void run() { - Document document = fromBson( BSON( "a" << 1 << "b" << 2.2 ) ); - ASSERT_EQUALS( 1, document["a"].getInt() ); - ASSERT_EQUALS( 1, document["a"].getInt() ); - ASSERT_EQUALS( 2.2, document["b"].getDouble() ); - ASSERT_EQUALS( 2.2, document["b"].getDouble() ); - // Missing field. - ASSERT( document["c"].missing() ); - ASSERT( document["c"].missing() ); - assertRoundTrips( document ); - } - }; - - /** Get Document fields. */ - class SetField { - public: - void run() { - Document original = fromBson(BSON("a" << 1 << "b" << 2.2 << "c" << 99)); - - // Initial positions. Used at end of function to make sure nothing moved - const Position apos = original.positionOf("a"); - const Position bpos = original.positionOf("c"); - const Position cpos = original.positionOf("c"); - - MutableDocument md (original); - - // Set the first field. - md.setField( "a" , Value( "foo" ) ); - ASSERT_EQUALS( 3U, md.peek().size() ); - ASSERT_EQUALS( "foo", md.peek()["a"].getString() ); - ASSERT_EQUALS( "foo", getNthField(md.peek(), 0).second.getString() ); - assertRoundTrips( md.peek() ); - // Set the second field. - md["b"] = Value("bar"); - ASSERT_EQUALS( 3U, md.peek().size() ); - ASSERT_EQUALS( "bar", md.peek()["b"].getString() ); - ASSERT_EQUALS( "bar", getNthField(md.peek(), 1).second.getString() ); - assertRoundTrips( md.peek() ); - - // Remove the second field. - md.setField("b", Value()); - PRINT(md.peek().toString()); - ASSERT_EQUALS( 2U, md.peek().size() ); - ASSERT( md.peek()["b"].missing() ); - ASSERT_EQUALS( "a", getNthField(md.peek(), 0 ).first.toString() ); - ASSERT_EQUALS( "c", getNthField(md.peek(), 1 ).first.toString() ); - ASSERT_EQUALS( 99, md.peek()["c"].getInt() ); - assertRoundTrips( md.peek() ); - - // Remove the first field. - md["a"] = Value(); - ASSERT_EQUALS( 1U, md.peek().size() ); - ASSERT( md.peek()["a"].missing() ); - ASSERT_EQUALS( "c", getNthField(md.peek(), 0 ).first.toString() ); - ASSERT_EQUALS( 99, md.peek()["c"].getInt() ); - assertRoundTrips( md.peek() ); - - // Remove the final field. Verify document is empty. - md.remove("c"); - ASSERT( md.peek().empty() ); - ASSERT_EQUALS( 0U, md.peek().size() ); - ASSERT_EQUALS( md.peek(), Document() ); - ASSERT( !FieldIterator(md.peek()).more() ); - ASSERT( md.peek()["c"].missing() ); - assertRoundTrips( md.peek() ); - - // Set a nested field using [] - md["x"]["y"]["z"] = Value("nested"); - ASSERT_EQUALS(md.peek()["x"]["y"]["z"], Value("nested")); - - // Set a nested field using setNestedField - FieldPath xxyyzz = string("xx.yy.zz"); - md.setNestedField(xxyyzz, Value("nested")); - ASSERT_EQUALS(md.peek().getNestedField(xxyyzz), Value("nested") ); - - // Set a nested fields through an existing empty document - md["xxx"] = Value(Document()); - md["xxx"]["yyy"] = Value(Document()); - FieldPath xxxyyyzzz = string("xxx.yyy.zzz"); - md.setNestedField(xxxyyyzzz, Value("nested")); - ASSERT_EQUALS(md.peek().getNestedField(xxxyyyzzz), Value("nested") ); - - // Make sure nothing moved - ASSERT_EQUALS(apos, md.peek().positionOf("a")); - ASSERT_EQUALS(bpos, md.peek().positionOf("c")); - ASSERT_EQUALS(cpos, md.peek().positionOf("c")); - ASSERT_EQUALS(Position(), md.peek().positionOf("d")); - } - }; - - /** Document comparator. */ - class Compare { - public: - void run() { - assertComparison( 0, BSONObj(), BSONObj() ); - assertComparison( 0, BSON( "a" << 1 ), BSON( "a" << 1 ) ); - assertComparison( -1, BSONObj(), BSON( "a" << 1 ) ); - assertComparison( -1, BSON( "a" << 1 ), BSON( "c" << 1 ) ); - assertComparison( 0, BSON( "a" << 1 << "r" << 2 ), BSON( "a" << 1 << "r" << 2 ) ); - assertComparison( -1, BSON( "a" << 1 ), BSON( "a" << 1 << "r" << 2 ) ); - assertComparison( 0, BSON( "a" << 2 ), BSON( "a" << 2 ) ); - assertComparison( -1, BSON( "a" << 1 ), BSON( "a" << 2 ) ); - assertComparison( -1, BSON( "a" << 1 << "b" << 1 ), BSON( "a" << 1 << "b" << 2 ) ); - // numbers sort before strings - assertComparison( -1, BSON( "a" << 1 ), BSON( "a" << "foo" ) ); - // numbers sort before strings, even if keys compare otherwise - assertComparison( -1, BSON( "b" << 1 ), BSON( "a" << "foo" ) ); - // null before number, even if keys compare otherwise - assertComparison( -1, BSON( "z" << BSONNULL ), BSON( "a" << 1 ) ); - } - public: - int cmp( const BSONObj& a, const BSONObj& b ) { - int result = Document::compare( fromBson( a ), fromBson( b ) ); - return // sign - result < 0 ? -1 : - result > 0 ? 1 : - 0; - } - void assertComparison( int expectedResult, const BSONObj& a, const BSONObj& b ) { - ASSERT_EQUALS( expectedResult, cmp( a, b ) ); - ASSERT_EQUALS( -expectedResult, cmp( b, a ) ); - if ( expectedResult == 0 ) { - ASSERT_EQUALS( hash( a ), hash( b ) ); - } - } - size_t hash( const BSONObj& obj ) { - size_t seed = 0x106e1e1; - Document(obj).hash_combine(seed); - return seed; - } - }; - - /** Shallow copy clone of a single field Document. */ - class Clone { - public: - void run() { - const Document document = fromBson( BSON( "a" << BSON( "b" << 1 ) ) ); - MutableDocument cloneOnDemand (document); - - // Check equality. - ASSERT_EQUALS(document, cloneOnDemand.peek()); - // Check pointer equality of sub document. - ASSERT_EQUALS( document["a"].getDocument().getPtr(), - cloneOnDemand.peek()["a"].getDocument().getPtr() ); - - - // Change field in clone and ensure the original document's field is unchanged. - cloneOnDemand.setField( StringData("a"), Value(2) ); - ASSERT_EQUALS( Value(1), document.getNestedField(FieldPath("a.b")) ); - - - // setNestedField and ensure the original document is unchanged. - - cloneOnDemand.reset(document); - vector<Position> path; - ASSERT_EQUALS( Value(1), document.getNestedField(FieldPath("a.b"), &path) ); - - cloneOnDemand.setNestedField(path, Value(2)); - - ASSERT_EQUALS( Value(1), document.getNestedField(FieldPath("a.b")) ); - ASSERT_EQUALS( Value(2), cloneOnDemand.peek().getNestedField(FieldPath("a.b")) ); - ASSERT_EQUALS( DOC( "a" << DOC( "b" << 1 ) ), document ); - ASSERT_EQUALS( DOC( "a" << DOC( "b" << 2 ) ), cloneOnDemand.freeze() ); - } - }; - - /** Shallow copy clone of a multi field Document. */ - class CloneMultipleFields { - public: - void run() { - Document document = - fromBson( fromjson( "{a:1,b:['ra',4],c:{z:1},d:'lal'}" ) ); - Document clonedDocument = document.clone(); - ASSERT_EQUALS(document, clonedDocument); - } - }; - - /** FieldIterator for an empty Document. */ - class FieldIteratorEmpty { - public: - void run() { - FieldIterator iterator ( (Document()) ); - ASSERT( !iterator.more() ); - } - }; - - /** FieldIterator for a single field Document. */ - class FieldIteratorSingle { - public: - void run() { - FieldIterator iterator (fromBson( BSON( "a" << 1 ) )); - ASSERT( iterator.more() ); - Document::FieldPair field = iterator.next(); - ASSERT_EQUALS( "a", field.first.toString() ); - ASSERT_EQUALS( 1, field.second.getInt() ); - ASSERT( !iterator.more() ); - } - }; - - /** FieldIterator for a multiple field Document. */ - class FieldIteratorMultiple { - public: - void run() { - FieldIterator iterator (fromBson( BSON( "a" << 1 << "b" << 5.6 << "c" << "z" ))); - ASSERT( iterator.more() ); - Document::FieldPair field = iterator.next(); - ASSERT_EQUALS( "a", field.first.toString() ); - ASSERT_EQUALS( 1, field.second.getInt() ); - ASSERT( iterator.more() ); - - Document::FieldPair field2 = iterator.next(); - ASSERT_EQUALS( "b", field2.first.toString() ); - ASSERT_EQUALS( 5.6, field2.second.getDouble() ); - ASSERT( iterator.more() ); - - Document::FieldPair field3 = iterator.next(); - ASSERT_EQUALS( "c", field3.first.toString() ); - ASSERT_EQUALS( "z", field3.second.getString() ); - ASSERT( !iterator.more() ); - } - }; - - class AllTypesDoc { - public: - void run() { - // These are listed in order of BSONType with some duplicates - append("minkey", MINKEY); - // EOO not valid in middle of BSONObj - append("double", 1.0); - append("c-string", "string\0after NUL"); // after NULL is ignored - append("c++", StringData("string\0after NUL", StringData::LiteralTag()).toString()); - append("StringData", StringData("string\0after NUL", StringData::LiteralTag())); - append("emptyObj", BSONObj()); - append("filledObj", BSON("a" << 1)); - append("emptyArray", BSON("" << BSONArray()).firstElement()); - append("filledArray", BSON("" << BSON_ARRAY(1 << "a")).firstElement()); - append("binData", BSONBinData("a\0b", 3, BinDataGeneral)); - append("binDataCustom", BSONBinData("a\0b", 3, bdtCustom)); - append("binDataUUID", BSONBinData("123456789\0abcdef", 16, bdtUUID)); - append("undefined", BSONUndefined); - append("oid", OID()); - append("true", true); - append("false", false); - append("date", jsTime()); - append("null", BSONNULL); - append("regex", BSONRegEx(".*")); - append("regexFlags", BSONRegEx(".*", "i")); - append("regexEmpty", BSONRegEx("", "")); - append("dbref", BSONDBRef("foo", OID())); - append("code", BSONCode("function() {}")); - append("codeNul", BSONCode(StringData("var nul = '\0'", StringData::LiteralTag()))); - append("symbol", BSONSymbol("foo")); - append("symbolNul", BSONSymbol(StringData("f\0o", StringData::LiteralTag()))); - append("codeWScope", BSONCodeWScope("asdf", BSONObj())); - append("codeWScopeWScope", BSONCodeWScope("asdf", BSON("one" << 1))); - append("int", 1); - append("timestamp", Timestamp()); - append("long", 1LL); - append("very long", 1LL << 40); - append("maxkey", MAXKEY); - - const BSONArray arr = arrBuilder.arr(); - - // can't use append any more since arrBuilder is done - objBuilder << "mega array" << arr; - docBuilder["mega array"] = mongo::Value(values); - - const BSONObj obj = objBuilder.obj(); - const Document doc = docBuilder.freeze(); - - const BSONObj obj2 = toBson(doc); - const Document doc2 = fromBson(obj); - - // logical equality - ASSERT_EQUALS(obj, obj2); - ASSERT_EQUALS(doc, doc2); - - // binary equality - ASSERT_EQUALS(obj.objsize(), obj2.objsize()); - ASSERT_EQUALS(memcmp(obj.objdata(), obj2.objdata(), obj.objsize()), 0); - - // ensure sorter serialization round-trips correctly - BufBuilder bb; - doc.serializeForSorter(bb); - BufReader reader(bb.buf(), bb.len()); - const Document doc3 = Document::deserializeForSorter( - reader, Document::SorterDeserializeSettings()); - BSONObj obj3 = toBson(doc3); - ASSERT_EQUALS(obj.objsize(), obj3.objsize()); - ASSERT_EQUALS(memcmp(obj.objdata(), obj3.objdata(), obj.objsize()), 0); - } - - template <typename T> - void append(const char* name, const T& thing) { - objBuilder << name << thing; - arrBuilder << thing; - docBuilder[name] = mongo::Value(thing); - values.push_back(mongo::Value(thing)); - } - - vector<mongo::Value> values; - MutableDocument docBuilder; - BSONObjBuilder objBuilder; - BSONArrayBuilder arrBuilder; - }; - } // namespace Document - - namespace Value { - - using mongo::Value; - - BSONObj toBson( const Value& value ) { - if (value.missing()) - return BSONObj(); // EOO - - BSONObjBuilder bob; - value.addToBsonObj( &bob, "" ); - return bob.obj(); - } + // setNestedField and ensure the original document is unchanged. - Value fromBson( const BSONObj& obj ) { - BSONElement element = obj.firstElement(); - return Value( element ); - } + cloneOnDemand.reset(document); + vector<Position> path; + ASSERT_EQUALS(Value(1), document.getNestedField(FieldPath("a.b"), &path)); - void assertRoundTrips( const Value& value1 ) { - BSONObj obj1 = toBson( value1 ); - Value value2 = fromBson( obj1 ); - BSONObj obj2 = toBson( value2 ); - ASSERT_EQUALS( obj1, obj2 ); - ASSERT_EQUALS(value1, value2); - ASSERT_EQUALS(value1.getType(), value2.getType()); - } + cloneOnDemand.setNestedField(path, Value(2)); - class BSONArrayTest { - public: - void run() { - ASSERT_EQUALS(Value(BSON_ARRAY(1 << 2 << 3)), DOC_ARRAY(1 << 2 << 3)); - ASSERT_EQUALS(Value(BSONArray()), Value(vector<Value>())); - } - }; - - /** Int type. */ - class Int { - public: - void run() { - Value value = Value( 5 ); - ASSERT_EQUALS( 5, value.getInt() ); - ASSERT_EQUALS( 5, value.getLong() ); - ASSERT_EQUALS( 5, value.getDouble() ); - ASSERT_EQUALS( NumberInt, value.getType() ); - assertRoundTrips( value ); - } - }; - - /** Long type. */ - class Long { - public: - void run() { - Value value = Value( 99LL ); - ASSERT_EQUALS( 99, value.getLong() ); - ASSERT_EQUALS( 99, value.getDouble() ); - ASSERT_EQUALS( NumberLong, value.getType() ); - assertRoundTrips( value ); - } - }; - - /** Double type. */ - class Double { - public: - void run() { - Value value = Value( 5.5 ); - ASSERT_EQUALS( 5.5, value.getDouble() ); - ASSERT_EQUALS( NumberDouble, value.getType() ); - assertRoundTrips( value ); - } - }; - - /** String type. */ - class String { - public: - void run() { - Value value = Value( "foo" ); - ASSERT_EQUALS( "foo", value.getString() ); - ASSERT_EQUALS( mongo::String, value.getType() ); - assertRoundTrips( value ); - } - }; - - /** String with a null character. */ - class StringWithNull { - public: - void run() { - string withNull( "a\0b", 3 ); - BSONObj objWithNull = BSON( "" << withNull ); - ASSERT_EQUALS( withNull, objWithNull[ "" ].str() ); - Value value = fromBson( objWithNull ); - ASSERT_EQUALS( withNull, value.getString() ); - assertRoundTrips( value ); - } - }; - - /** Date type. */ - class Date { - public: - void run() { - Value value = Value(Date_t::fromMillisSinceEpoch(999)); - ASSERT_EQUALS( 999, value.getDate() ); - ASSERT_EQUALS( mongo::Date, value.getType() ); - assertRoundTrips( value ); - } - }; - - /** Timestamp type. */ - class JSTimestamp { - public: - void run() { - Value value = Value( Timestamp( 777 ) ); - ASSERT( Timestamp( 777 ) == value.getTimestamp() ); - ASSERT_EQUALS( mongo::bsonTimestamp, value.getType() ); - assertRoundTrips( value ); - } - }; - - /** Document with no fields. */ - class EmptyDocument { - public: - void run() { - mongo::Document document = mongo::Document(); - Value value = Value( document ); - ASSERT_EQUALS( document.getPtr(), value.getDocument().getPtr() ); - ASSERT_EQUALS( Object, value.getType() ); - assertRoundTrips( value ); - } - }; - - /** Document type. */ - class Document { - public: - void run() { - mongo::MutableDocument md; - md.addField( "a", Value( 5 ) ); - md.addField( "apple", Value( "rrr" ) ); - md.addField( "banana", Value( -.3 ) ); - mongo::Document document = md.freeze(); - - Value value = Value( document ); - // Check document pointers are equal. - ASSERT_EQUALS( document.getPtr(), value.getDocument().getPtr() ); - // Check document contents. - ASSERT_EQUALS( 5, document["a"].getInt() ); - ASSERT_EQUALS( "rrr", document["apple"].getString() ); - ASSERT_EQUALS( -.3, document["banana"].getDouble() ); - ASSERT_EQUALS( Object, value.getType() ); - assertRoundTrips( value ); - } - }; - - /** Array with no elements. */ - class EmptyArray { - public: - void run() { - vector<Value> array; - Value value (array); - const vector<Value>& array2 = value.getArray(); - - ASSERT( array2.empty() ); - ASSERT_EQUALS( Array, value.getType() ); - ASSERT_EQUALS( 0U, value.getArrayLength() ); - assertRoundTrips( value ); - } - }; - - /** Array type. */ - class Array { - public: - void run() { - vector<Value> array; - array.push_back( Value( 5 ) ); - array.push_back( Value( "lala" ) ); - array.push_back( Value( 3.14 ) ); - Value value = Value( array ); - const vector<Value>& array2 = value.getArray(); - - ASSERT( !array2.empty() ); - ASSERT_EQUALS( array2.size(), 3U); - ASSERT_EQUALS( 5, array2[0].getInt() ); - ASSERT_EQUALS( "lala", array2[1].getString() ); - ASSERT_EQUALS( 3.14, array2[2].getDouble() ); - ASSERT_EQUALS( mongo::Array, value.getType() ); - ASSERT_EQUALS( 3U, value.getArrayLength() ); - assertRoundTrips( value ); - } - }; - - /** Oid type. */ - class Oid { - public: - void run() { - Value value = - fromBson( BSON( "" << OID( "abcdefabcdefabcdefabcdef" ) ) ); - ASSERT_EQUALS( OID( "abcdefabcdefabcdefabcdef" ), value.getOid() ); - ASSERT_EQUALS( jstOID, value.getType() ); - assertRoundTrips( value ); - } - }; - - /** Bool type. */ - class Bool { - public: - void run() { - Value value = fromBson( BSON( "" << true ) ); - ASSERT_EQUALS( true, value.getBool() ); - ASSERT_EQUALS( mongo::Bool, value.getType() ); - assertRoundTrips( value ); - } - }; - - /** Regex type. */ - class Regex { - public: - void run() { - Value value = fromBson( fromjson( "{'':/abc/}" ) ); - ASSERT_EQUALS( string("abc"), value.getRegex() ); - ASSERT_EQUALS( RegEx, value.getType() ); - assertRoundTrips( value ); - } - }; - - /** Symbol type (currently unsupported). */ - class Symbol { - public: - void run() { - Value value (BSONSymbol("FOOBAR")); - ASSERT_EQUALS( "FOOBAR", value.getSymbol() ); - ASSERT_EQUALS( mongo::Symbol, value.getType() ); - assertRoundTrips( value ); - } - }; - - /** Undefined type. */ - class Undefined { - public: - void run() { - Value value = Value(BSONUndefined); - ASSERT_EQUALS( mongo::Undefined, value.getType() ); - assertRoundTrips( value ); - } - }; - - /** Null type. */ - class Null { - public: - void run() { - Value value = Value(BSONNULL); - ASSERT_EQUALS( jstNULL, value.getType() ); - assertRoundTrips( value ); - } - }; - - /** True value. */ - class True { - public: - void run() { - Value value = Value(true); - ASSERT_EQUALS( true, value.getBool() ); - ASSERT_EQUALS( mongo::Bool, value.getType() ); - assertRoundTrips( value ); - } - }; - - /** False value. */ - class False { - public: - void run() { - Value value = Value(false); - ASSERT_EQUALS( false, value.getBool() ); - ASSERT_EQUALS( mongo::Bool, value.getType() ); - assertRoundTrips( value ); - } - }; - - /** -1 value. */ - class MinusOne { - public: - void run() { - Value value = Value(-1); - ASSERT_EQUALS( -1, value.getInt() ); - ASSERT_EQUALS( NumberInt, value.getType() ); - assertRoundTrips( value ); - } - }; - - /** 0 value. */ - class Zero { - public: - void run() { - Value value = Value(0); - ASSERT_EQUALS( 0, value.getInt() ); - ASSERT_EQUALS( NumberInt, value.getType() ); - assertRoundTrips( value ); - } - }; - - /** 1 value. */ - class One { - public: - void run() { - Value value = Value(1); - ASSERT_EQUALS( 1, value.getInt() ); - ASSERT_EQUALS( NumberInt, value.getType() ); - assertRoundTrips( value ); - } - }; - - namespace Coerce { - - class ToBoolBase { - public: - virtual ~ToBoolBase() { - } - void run() { - ASSERT_EQUALS( expected(), value().coerceToBool() ); - } - protected: - virtual Value value() = 0; - virtual bool expected() = 0; - }; - - class ToBoolTrue : public ToBoolBase { - bool expected() { return true; } - }; - - class ToBoolFalse : public ToBoolBase { - bool expected() { return false; } - }; - - /** Coerce 0 to bool. */ - class ZeroIntToBool : public ToBoolFalse { - Value value() { return Value( 0 ); } - }; - - /** Coerce -1 to bool. */ - class NonZeroIntToBool : public ToBoolTrue { - Value value() { return Value( -1 ); } - }; - - /** Coerce 0LL to bool. */ - class ZeroLongToBool : public ToBoolFalse { - Value value() { return Value( 0LL ); } - }; - - /** Coerce 5LL to bool. */ - class NonZeroLongToBool : public ToBoolTrue { - Value value() { return Value( 5LL ); } - }; - - /** Coerce 0.0 to bool. */ - class ZeroDoubleToBool : public ToBoolFalse { - Value value() { return Value( 0 ); } - }; - - /** Coerce -1.3 to bool. */ - class NonZeroDoubleToBool : public ToBoolTrue { - Value value() { return Value( -1.3 ); } - }; - - /** Coerce "" to bool. */ - class StringToBool : public ToBoolTrue { - Value value() { return Value( "" ); } - }; - - /** Coerce {} to bool. */ - class ObjectToBool : public ToBoolTrue { - Value value() { - return Value( mongo::Document() ); - } - }; - - /** Coerce [] to bool. */ - class ArrayToBool : public ToBoolTrue { - Value value() { - return Value( vector<Value>() ); - } - }; - - /** Coerce Date(0) to bool. */ - class DateToBool : public ToBoolTrue { - Value value() { return Value(Date_t{}); } - }; - - /** Coerce js literal regex to bool. */ - class RegexToBool : public ToBoolTrue { - Value value() { return fromBson( fromjson( "{''://}" ) ); } - }; - - /** Coerce true to bool. */ - class TrueToBool : public ToBoolTrue { - Value value() { return fromBson( BSON( "" << true ) ); } - }; - - /** Coerce false to bool. */ - class FalseToBool : public ToBoolFalse { - Value value() { return fromBson( BSON( "" << false ) ); } - }; - - /** Coerce null to bool. */ - class NullToBool : public ToBoolFalse { - Value value() { return Value(BSONNULL); } - }; - - /** Coerce undefined to bool. */ - class UndefinedToBool : public ToBoolFalse { - Value value() { return Value(BSONUndefined); } - }; - - class ToIntBase { - public: - virtual ~ToIntBase() { - } - void run() { - if (asserts()) - ASSERT_THROWS( value().coerceToInt(), UserException ); - else - ASSERT_EQUALS( expected(), value().coerceToInt() ); - } - protected: - virtual Value value() = 0; - virtual int expected() { return 0; } - virtual bool asserts() { return false; } - }; - - /** Coerce -5 to int. */ - class IntToInt : public ToIntBase { - Value value() { return Value( -5 ); } - int expected() { return -5; } - }; - - /** Coerce long to int. */ - class LongToInt : public ToIntBase { - Value value() { return Value( 0xff00000007LL ); } - int expected() { return 7; } - }; - - /** Coerce 9.8 to int. */ - class DoubleToInt : public ToIntBase { - Value value() { return Value( 9.8 ); } - int expected() { return 9; } - }; - - /** Coerce null to int. */ - class NullToInt : public ToIntBase { - Value value() { return Value(BSONNULL); } - bool asserts() { return true; } - }; - - /** Coerce undefined to int. */ - class UndefinedToInt : public ToIntBase { - Value value() { return Value(BSONUndefined); } - bool asserts() { return true; } - }; - - /** Coerce "" to int unsupported. */ - class StringToInt { - public: - void run() { - ASSERT_THROWS( Value( "" ).coerceToInt(), UserException ); - } - }; - - class ToLongBase { - public: - virtual ~ToLongBase() { - } - void run() { - if (asserts()) - ASSERT_THROWS( value().coerceToLong(), UserException ); - else - ASSERT_EQUALS( expected(), value().coerceToLong() ); - } - protected: - virtual Value value() = 0; - virtual long long expected() { return 0; } - virtual bool asserts() { return false; } - }; - - /** Coerce -5 to long. */ - class IntToLong : public ToLongBase { - Value value() { return Value( -5 ); } - long long expected() { return -5; } - }; - - /** Coerce long to long. */ - class LongToLong : public ToLongBase { - Value value() { return Value( 0xff00000007LL ); } - long long expected() { return 0xff00000007LL; } - }; - - /** Coerce 9.8 to long. */ - class DoubleToLong : public ToLongBase { - Value value() { return Value( 9.8 ); } - long long expected() { return 9; } - }; - - /** Coerce null to long. */ - class NullToLong : public ToLongBase { - Value value() { return Value(BSONNULL); } - bool asserts() { return true; } - }; - - /** Coerce undefined to long. */ - class UndefinedToLong : public ToLongBase { - Value value() { return Value(BSONUndefined); } - bool asserts() { return true; } - }; - - /** Coerce string to long unsupported. */ - class StringToLong { - public: - void run() { - ASSERT_THROWS( Value( "" ).coerceToLong(), UserException ); - } - }; - - class ToDoubleBase { - public: - virtual ~ToDoubleBase() { - } - void run() { - if (asserts()) - ASSERT_THROWS( value().coerceToDouble(), UserException ); - else - ASSERT_EQUALS( expected(), value().coerceToDouble() ); - } - protected: - virtual Value value() = 0; - virtual double expected() { return 0; } - virtual bool asserts() { return false; } - }; - - /** Coerce -5 to double. */ - class IntToDouble : public ToDoubleBase { - Value value() { return Value( -5 ); } - double expected() { return -5; } - }; - - /** Coerce long to double. */ - class LongToDouble : public ToDoubleBase { - Value value() { - // A long that cannot be exactly represented as a double. - return Value( static_cast<double>( 0x8fffffffffffffffLL ) ); - } - double expected() { return static_cast<double>( 0x8fffffffffffffffLL ); } - }; - - /** Coerce double to double. */ - class DoubleToDouble : public ToDoubleBase { - Value value() { return Value( 9.8 ); } - double expected() { return 9.8; } - }; - - /** Coerce null to double. */ - class NullToDouble : public ToDoubleBase { - Value value() { return Value(BSONNULL); } - bool asserts() { return true; } - }; - - /** Coerce undefined to double. */ - class UndefinedToDouble : public ToDoubleBase { - Value value() { return Value(BSONUndefined); } - bool asserts() { return true; } - }; - - /** Coerce string to double unsupported. */ - class StringToDouble { - public: - void run() { - ASSERT_THROWS( Value( "" ).coerceToDouble(), UserException ); - } - }; - - class ToDateBase { - public: - virtual ~ToDateBase() { - } - void run() { - ASSERT_EQUALS( expected(), value().coerceToDate() ); - } - protected: - virtual Value value() = 0; - virtual long long expected() = 0; - }; - - /** Coerce date to date. */ - class DateToDate : public ToDateBase { - Value value() { return Value(Date_t::fromMillisSinceEpoch(888)); } - long long expected() { return 888; } - }; - - /** - * Convert timestamp to date. This extracts the time portion of the timestamp, which - * is different from BSON behavior of interpreting all bytes as a date. - */ - class TimestampToDate : public ToDateBase { - Value value() { - return Value( Timestamp( 777, 666 ) ); - } - long long expected() { return 777 * 1000; } - }; - - /** Coerce string to date unsupported. */ - class StringToDate { - public: - void run() { - ASSERT_THROWS( Value( "" ).coerceToDate(), UserException ); - } - }; - - class ToStringBase { - public: - virtual ~ToStringBase() { - } - void run() { - ASSERT_EQUALS( expected(), value().coerceToString() ); - } - protected: - virtual Value value() = 0; - virtual string expected() { return ""; } - }; - - /** Coerce -0.2 to string. */ - class DoubleToString : public ToStringBase { - Value value() { return Value( -0.2 ); } - string expected() { return "-0.2"; } - }; - - /** Coerce -4 to string. */ - class IntToString : public ToStringBase { - Value value() { return Value( -4 ); } - string expected() { return "-4"; } - }; - - /** Coerce 10000LL to string. */ - class LongToString : public ToStringBase { - Value value() { return Value( 10000LL ); } - string expected() { return "10000"; } - }; - - /** Coerce string to string. */ - class StringToString : public ToStringBase { - Value value() { return Value( "fO_o" ); } - string expected() { return "fO_o"; } - }; - - /** Coerce timestamp to string. */ - class TimestampToString : public ToStringBase { - Value value() { - return Value( Timestamp( 1, 2 ) ); - } - string expected() { return Timestamp( 1, 2 ).toStringPretty(); } - }; - - /** Coerce date to string. */ - class DateToString : public ToStringBase { - Value value() { return Value(Date_t::fromMillisSinceEpoch(1234567890LL*1000)); } - string expected() { return "2009-02-13T23:31:30"; } // from js - }; - - /** Coerce null to string. */ - class NullToString : public ToStringBase { - Value value() { return Value(BSONNULL); } - }; - - /** Coerce undefined to string. */ - class UndefinedToString : public ToStringBase { - Value value() { return Value(BSONUndefined); } - }; - - /** Coerce document to string unsupported. */ - class DocumentToString { - public: - void run() { - ASSERT_THROWS( Value - ( mongo::Document() ).coerceToString(), - UserException ); - } - }; - - /** Coerce timestamp to timestamp. */ - class TimestampToTimestamp { - public: - void run() { - Value value = Value( Timestamp( 1010 ) ); - ASSERT( Timestamp( 1010 ) == value.coerceToTimestamp() ); - } - }; - - /** Coerce date to timestamp unsupported. */ - class DateToTimestamp { - public: - void run() { - ASSERT_THROWS( Value(Date_t::fromMillisSinceEpoch(1010)).coerceToTimestamp(), - UserException ); - } - }; - - } // namespace Coerce - - /** Get the "widest" of two numeric types. */ - class GetWidestNumeric { - public: - void run() { - using mongo::Undefined; - - // Numeric types. - assertWidest( NumberInt, NumberInt, NumberInt ); - assertWidest( NumberLong, NumberInt, NumberLong ); - assertWidest( NumberDouble, NumberInt, NumberDouble ); - assertWidest( NumberLong, NumberLong, NumberLong ); - assertWidest( NumberDouble, NumberLong, NumberDouble ); - assertWidest( NumberDouble, NumberDouble, NumberDouble ); - - // Missing value and numeric types (result Undefined). - assertWidest( Undefined, NumberInt, Undefined ); - assertWidest( Undefined, NumberInt, Undefined ); - assertWidest( Undefined, NumberLong, jstNULL ); - assertWidest( Undefined, NumberLong, Undefined ); - assertWidest( Undefined, NumberDouble, jstNULL ); - assertWidest( Undefined, NumberDouble, Undefined ); - - // Missing value types (result Undefined). - assertWidest( Undefined, jstNULL, jstNULL ); - assertWidest( Undefined, jstNULL, Undefined ); - assertWidest( Undefined, Undefined, Undefined ); - - // Other types (result Undefined). - assertWidest( Undefined, NumberInt, mongo::Bool ); - assertWidest( Undefined, mongo::String, NumberDouble ); - } - private: - void assertWidest( BSONType expectedWidest, BSONType a, BSONType b ) { - ASSERT_EQUALS( expectedWidest, Value::getWidestNumeric( a, b ) ); - ASSERT_EQUALS( expectedWidest, Value::getWidestNumeric( b, a ) ); - } - }; - - /** Add a Value to a BSONObj. */ - class AddToBsonObj { - public: - void run() { - BSONObjBuilder bob; - Value( 4.4 ).addToBsonObj( &bob, "a" ); - Value( 22 ).addToBsonObj( &bob, "b" ); - Value( "astring" ).addToBsonObj( &bob, "c" ); - ASSERT_EQUALS( BSON( "a" << 4.4 << "b" << 22 << "c" << "astring" ), bob.obj() ); - } - }; - - /** Add a Value to a BSONArray. */ - class AddToBsonArray { - public: - void run() { - BSONArrayBuilder bab; - Value( 4.4 ).addToBsonArray( &bab ); - Value( 22 ).addToBsonArray( &bab ); - Value( "astring" ).addToBsonArray( &bab ); - ASSERT_EQUALS( BSON_ARRAY( 4.4 << 22 << "astring" ), bab.arr() ); - } - }; - - /** Value comparator. */ - class Compare { - public: - void run() { - BSONObjBuilder undefinedBuilder; - undefinedBuilder.appendUndefined( "" ); - BSONObj undefined = undefinedBuilder.obj(); - - // Undefined / null. - assertComparison( 0, undefined, undefined ); - assertComparison( -1, undefined, BSON( "" << BSONNULL ) ); - assertComparison( 0, BSON( "" << BSONNULL ), BSON( "" << BSONNULL ) ); - - // Undefined / null with other types. - assertComparison( -1, undefined, BSON( "" << 1 ) ); - assertComparison( -1, undefined, BSON( "" << "bar" ) ); - assertComparison( -1, BSON( "" << BSONNULL ), BSON( "" << -1 ) ); - assertComparison( -1, BSON( "" << BSONNULL ), BSON( "" << "bar" ) ); - - // Numeric types. - assertComparison( 0, 5, 5LL ); - assertComparison( 0, -2, -2.0 ); - assertComparison( 0, 90LL, 90.0 ); - assertComparison( -1, 5, 6LL ); - assertComparison( -1, -2, 2.1 ); - assertComparison( 1, 90LL, 89.999 ); - assertComparison( -1, 90, 90.1 ); - assertComparison( 0, numeric_limits<double>::quiet_NaN(), - numeric_limits<double>::signaling_NaN() ); - assertComparison( -1, numeric_limits<double>::quiet_NaN(), 5 ); - - // strings compare between numbers and objects - assertComparison( 1, "abc", 90 ); - assertComparison( -1, "abc", BSON( "a" << "b" ) ); - - // String comparison. - assertComparison( -1, "", "a" ); - assertComparison( 0, "a", "a" ); - assertComparison( -1, "a", "b" ); - assertComparison( -1, "aa", "b" ); - assertComparison( 1, "bb", "b" ); - assertComparison( 1, "bb", "b" ); - assertComparison( 1, "b-", "b" ); - assertComparison( -1, "b-", "ba" ); - // With a null character. - assertComparison( 1, string( "a\0", 2 ), "a" ); - - // Object. - assertComparison( 0, fromjson( "{'':{}}" ), fromjson( "{'':{}}" ) ); - assertComparison( 0, fromjson( "{'':{x:1}}" ), fromjson( "{'':{x:1}}" ) ); - assertComparison( -1, fromjson( "{'':{}}" ), fromjson( "{'':{x:1}}" ) ); - assertComparison( -1, fromjson( "{'':{'z': 1}}" ), fromjson( "{'':{'a': 'a'}}") ); - - // Array. - assertComparison( 0, fromjson( "{'':[]}" ), fromjson( "{'':[]}" ) ); - assertComparison( -1, fromjson( "{'':[0]}" ), fromjson( "{'':[1]}" ) ); - assertComparison( -1, fromjson( "{'':[0,0]}" ), fromjson( "{'':[1]}" ) ); - assertComparison( -1, fromjson( "{'':[0]}" ), fromjson( "{'':[0,0]}" ) ); - assertComparison( -1, fromjson( "{'':[0]}" ), fromjson( "{'':['']}" ) ); - - // OID. - assertComparison( 0, OID( "abcdefabcdefabcdefabcdef" ), - OID( "abcdefabcdefabcdefabcdef" ) ); - assertComparison( 1, OID( "abcdefabcdefabcdefabcdef" ), - OID( "010101010101010101010101" ) ); - - // Bool. - assertComparison( 0, true, true ); - assertComparison( 0, false, false ); - assertComparison( 1, true, false ); - - // Date. - assertComparison( 0, - Date_t::fromMillisSinceEpoch( 555 ), - Date_t::fromMillisSinceEpoch( 555 ) ); - assertComparison( 1, - Date_t::fromMillisSinceEpoch( 555 ), - Date_t::fromMillisSinceEpoch( 554 ) ); - // Negative date. - assertComparison( 1, - Date_t::fromMillisSinceEpoch( 0 ), - Date_t::fromMillisSinceEpoch( -1 ) ); - - // Regex. - assertComparison( 0, fromjson( "{'':/a/}" ), fromjson( "{'':/a/}" ) ); - assertComparison( -1, fromjson( "{'':/a/}" ), fromjson( "{'':/a/i}" ) ); - assertComparison( -1, fromjson( "{'':/a/}" ), fromjson( "{'':/aa/}" ) ); - - // Timestamp. - assertComparison( 0, Timestamp( 1234 ), Timestamp( 1234 ) ); - assertComparison( -1, Timestamp( 4 ), Timestamp( 1234 ) ); - - // Cross-type comparisons. Listed in order of canonical types. - assertComparison(-1, Value(mongo::MINKEY), Value()); - assertComparison(0, Value(), Value()); - assertComparison(0, Value(), Value(BSONUndefined)); - assertComparison(-1, Value(BSONUndefined), Value(BSONNULL)); - assertComparison(-1, Value(BSONNULL), Value(1)); - assertComparison(0, Value(1), Value(1LL)); - assertComparison(0, Value(1), Value(1.0)); - assertComparison(-1, Value(1), Value("string")); - assertComparison(0, Value("string"), Value(BSONSymbol("string"))); - assertComparison(-1, Value("string"), Value(mongo::Document())); - assertComparison(-1, Value(mongo::Document()), Value(vector<Value>())); - assertComparison(-1, Value(vector<Value>()), Value(BSONBinData("", 0, MD5Type))); - assertComparison(-1, Value(BSONBinData("", 0, MD5Type)), Value(mongo::OID())); - assertComparison(-1, Value(mongo::OID()), Value(false)); - assertComparison(-1, Value(false), Value(Date_t())); - assertComparison(-1, Value(Date_t()), Value(Timestamp())); - assertComparison(-1, Value(Timestamp()), Value(BSONRegEx(""))); - assertComparison(-1, Value(BSONRegEx("")), Value(BSONDBRef("", mongo::OID()))); - assertComparison(-1, Value(BSONDBRef("", mongo::OID())), Value(BSONCode(""))); - assertComparison(-1, Value(BSONCode("")), Value(BSONCodeWScope("", BSONObj()))); - assertComparison(-1, Value(BSONCodeWScope("", BSONObj())), Value(mongo::MAXKEY)); - } - private: - template<class T,class U> - void assertComparison( int expectedResult, const T& a, const U& b ) { - assertComparison( expectedResult, BSON( "" << a ), BSON( "" << b ) ); - } - void assertComparison( int expectedResult, const Timestamp& a, const Timestamp& b ) { - BSONObjBuilder first; - first.append( "", a ); - BSONObjBuilder second; - second.append( "", b ); - assertComparison( expectedResult, first.obj(), second.obj() ); - } - int sign(int cmp) { - if (cmp == 0) return 0; - else if (cmp < 0) return -1; - else return 1; - } - int cmp( const Value& a, const Value& b ) { - return sign(Value::compare(a, b)); - } - void assertComparison( int expectedResult, const BSONObj& a, const BSONObj& b ) { - assertComparison(expectedResult, fromBson(a), fromBson(b)); - } - void assertComparison(int expectedResult, const Value& a, const Value& b) { - mongo::unittest::log() << - "testing " << a.toString() << " and " << b.toString() << endl; - // reflexivity - ASSERT_EQUALS(0, cmp(a, a)); - ASSERT_EQUALS(0, cmp(b, b)); - - // symmetry - ASSERT_EQUALS( expectedResult, cmp( a, b ) ); - ASSERT_EQUALS( -expectedResult, cmp( b, a ) ); - - if ( expectedResult == 0 ) { - // equal values must hash equally. - ASSERT_EQUALS( hash( a ), hash( b ) ); - } - else { - // unequal values must hash unequally. - // (not true in general but we should error if it fails in any of these cases) - ASSERT_NOT_EQUALS( hash( a ), hash( b ) ); - } - - // same as BSON - ASSERT_EQUALS(expectedResult, sign(toBson(a).firstElement().woCompare( - toBson(b).firstElement()))); - } - size_t hash(const Value& v) { - size_t seed = 0xf00ba6; - v.hash_combine( seed ); - return seed; - } - }; - - class SubFields { - public: - void run() { - const Value val = fromBson(fromjson( - "{'': {a: [{x:1, b:[1, {y:1, c:1234, z:1}, 1]}]}}")); - // ^ this outer object is removed by fromBson - - ASSERT(val.getType() == mongo::Object); - - ASSERT(val[999].missing()); - ASSERT(val["missing"].missing()); - ASSERT(val["a"].getType() == mongo::Array); - - ASSERT(val["a"][999].missing()); - ASSERT(val["a"]["missing"].missing()); - ASSERT(val["a"][0].getType() == mongo::Object); - - ASSERT(val["a"][0][999].missing()); - ASSERT(val["a"][0]["missing"].missing()); - ASSERT(val["a"][0]["b"].getType() == mongo::Array); - - ASSERT(val["a"][0]["b"][999].missing()); - ASSERT(val["a"][0]["b"]["missing"].missing()); - ASSERT(val["a"][0]["b"][1].getType() == mongo::Object); - - ASSERT(val["a"][0]["b"][1][999].missing()); - ASSERT(val["a"][0]["b"][1]["missing"].missing()); - ASSERT(val["a"][0]["b"][1]["c"].getType() == mongo::NumberInt); - ASSERT_EQUALS(val["a"][0]["b"][1]["c"].getInt(), 1234); - } - }; - - - class SerializationOfMissingForSorter { - // Can't be tested in AllTypesDoc since missing values are omitted when adding to BSON. - public: - void run() { - const Value missing; - const Value arrayOfMissing = Value(vector<Value>(10)); - - BufBuilder bb; - missing.serializeForSorter(bb); - arrayOfMissing.serializeForSorter(bb); - - BufReader reader(bb.buf(), bb.len()); - ASSERT_EQUALS( - missing, - Value::deserializeForSorter(reader, Value::SorterDeserializeSettings())); - ASSERT_EQUALS( - arrayOfMissing, - Value::deserializeForSorter(reader, Value::SorterDeserializeSettings())); - } - }; - } // namespace Value - - class All : public Suite { - public: - All() : Suite( "document" ) { - } - void setupTests() { - add<Document::Create>(); - add<Document::CreateFromBsonObj>(); - add<Document::AddField>(); - add<Document::GetValue>(); - add<Document::SetField>(); - add<Document::Compare>(); - add<Document::Clone>(); - add<Document::CloneMultipleFields>(); - add<Document::FieldIteratorEmpty>(); - add<Document::FieldIteratorSingle>(); - add<Document::FieldIteratorMultiple>(); - add<Document::AllTypesDoc>(); - - add<Value::BSONArrayTest>(); - add<Value::Int>(); - add<Value::Long>(); - add<Value::Double>(); - add<Value::String>(); - add<Value::StringWithNull>(); - add<Value::Date>(); - add<Value::JSTimestamp>(); - add<Value::EmptyDocument>(); - add<Value::EmptyArray>(); - add<Value::Array>(); - add<Value::Oid>(); - add<Value::Bool>(); - add<Value::Regex>(); - add<Value::Symbol>(); - add<Value::Undefined>(); - add<Value::Null>(); - add<Value::True>(); - add<Value::False>(); - add<Value::MinusOne>(); - add<Value::Zero>(); - add<Value::One>(); - - add<Value::Coerce::ZeroIntToBool>(); - add<Value::Coerce::NonZeroIntToBool>(); - add<Value::Coerce::ZeroLongToBool>(); - add<Value::Coerce::NonZeroLongToBool>(); - add<Value::Coerce::ZeroDoubleToBool>(); - add<Value::Coerce::NonZeroDoubleToBool>(); - add<Value::Coerce::StringToBool>(); - add<Value::Coerce::ObjectToBool>(); - add<Value::Coerce::ArrayToBool>(); - add<Value::Coerce::DateToBool>(); - add<Value::Coerce::RegexToBool>(); - add<Value::Coerce::TrueToBool>(); - add<Value::Coerce::FalseToBool>(); - add<Value::Coerce::NullToBool>(); - add<Value::Coerce::UndefinedToBool>(); - add<Value::Coerce::IntToInt>(); - add<Value::Coerce::LongToInt>(); - add<Value::Coerce::DoubleToInt>(); - add<Value::Coerce::NullToInt>(); - add<Value::Coerce::UndefinedToInt>(); - add<Value::Coerce::StringToInt>(); - add<Value::Coerce::IntToLong>(); - add<Value::Coerce::LongToLong>(); - add<Value::Coerce::DoubleToLong>(); - add<Value::Coerce::NullToLong>(); - add<Value::Coerce::UndefinedToLong>(); - add<Value::Coerce::StringToLong>(); - add<Value::Coerce::IntToDouble>(); - add<Value::Coerce::LongToDouble>(); - add<Value::Coerce::DoubleToDouble>(); - add<Value::Coerce::NullToDouble>(); - add<Value::Coerce::UndefinedToDouble>(); - add<Value::Coerce::StringToDouble>(); - add<Value::Coerce::DateToDate>(); - add<Value::Coerce::TimestampToDate>(); - add<Value::Coerce::StringToDate>(); - add<Value::Coerce::DoubleToString>(); - add<Value::Coerce::IntToString>(); - add<Value::Coerce::LongToString>(); - add<Value::Coerce::StringToString>(); - add<Value::Coerce::TimestampToString>(); - add<Value::Coerce::DateToString>(); - add<Value::Coerce::NullToString>(); - add<Value::Coerce::UndefinedToString>(); - add<Value::Coerce::DocumentToString>(); - add<Value::Coerce::TimestampToTimestamp>(); - add<Value::Coerce::DateToTimestamp>(); - - add<Value::GetWidestNumeric>(); - add<Value::AddToBsonObj>(); - add<Value::AddToBsonArray>(); - add<Value::Compare>(); - add<Value::SubFields>(); - add<Value::SerializationOfMissingForSorter>(); + ASSERT_EQUALS(Value(1), document.getNestedField(FieldPath("a.b"))); + ASSERT_EQUALS(Value(2), cloneOnDemand.peek().getNestedField(FieldPath("a.b"))); + ASSERT_EQUALS(DOC("a" << DOC("b" << 1)), document); + ASSERT_EQUALS(DOC("a" << DOC("b" << 2)), cloneOnDemand.freeze()); + } +}; + +/** Shallow copy clone of a multi field Document. */ +class CloneMultipleFields { +public: + void run() { + Document document = fromBson(fromjson("{a:1,b:['ra',4],c:{z:1},d:'lal'}")); + Document clonedDocument = document.clone(); + ASSERT_EQUALS(document, clonedDocument); + } +}; + +/** FieldIterator for an empty Document. */ +class FieldIteratorEmpty { +public: + void run() { + FieldIterator iterator((Document())); + ASSERT(!iterator.more()); + } +}; + +/** FieldIterator for a single field Document. */ +class FieldIteratorSingle { +public: + void run() { + FieldIterator iterator(fromBson(BSON("a" << 1))); + ASSERT(iterator.more()); + Document::FieldPair field = iterator.next(); + ASSERT_EQUALS("a", field.first.toString()); + ASSERT_EQUALS(1, field.second.getInt()); + ASSERT(!iterator.more()); + } +}; + +/** FieldIterator for a multiple field Document. */ +class FieldIteratorMultiple { +public: + void run() { + FieldIterator iterator(fromBson(BSON("a" << 1 << "b" << 5.6 << "c" + << "z"))); + ASSERT(iterator.more()); + Document::FieldPair field = iterator.next(); + ASSERT_EQUALS("a", field.first.toString()); + ASSERT_EQUALS(1, field.second.getInt()); + ASSERT(iterator.more()); + + Document::FieldPair field2 = iterator.next(); + ASSERT_EQUALS("b", field2.first.toString()); + ASSERT_EQUALS(5.6, field2.second.getDouble()); + ASSERT(iterator.more()); + + Document::FieldPair field3 = iterator.next(); + ASSERT_EQUALS("c", field3.first.toString()); + ASSERT_EQUALS("z", field3.second.getString()); + ASSERT(!iterator.more()); + } +}; + +class AllTypesDoc { +public: + void run() { + // These are listed in order of BSONType with some duplicates + append("minkey", MINKEY); + // EOO not valid in middle of BSONObj + append("double", 1.0); + append("c-string", "string\0after NUL"); // after NULL is ignored + append("c++", StringData("string\0after NUL", StringData::LiteralTag()).toString()); + append("StringData", StringData("string\0after NUL", StringData::LiteralTag())); + append("emptyObj", BSONObj()); + append("filledObj", BSON("a" << 1)); + append("emptyArray", BSON("" << BSONArray()).firstElement()); + append("filledArray", BSON("" << BSON_ARRAY(1 << "a")).firstElement()); + append("binData", BSONBinData("a\0b", 3, BinDataGeneral)); + append("binDataCustom", BSONBinData("a\0b", 3, bdtCustom)); + append("binDataUUID", BSONBinData("123456789\0abcdef", 16, bdtUUID)); + append("undefined", BSONUndefined); + append("oid", OID()); + append("true", true); + append("false", false); + append("date", jsTime()); + append("null", BSONNULL); + append("regex", BSONRegEx(".*")); + append("regexFlags", BSONRegEx(".*", "i")); + append("regexEmpty", BSONRegEx("", "")); + append("dbref", BSONDBRef("foo", OID())); + append("code", BSONCode("function() {}")); + append("codeNul", BSONCode(StringData("var nul = '\0'", StringData::LiteralTag()))); + append("symbol", BSONSymbol("foo")); + append("symbolNul", BSONSymbol(StringData("f\0o", StringData::LiteralTag()))); + append("codeWScope", BSONCodeWScope("asdf", BSONObj())); + append("codeWScopeWScope", BSONCodeWScope("asdf", BSON("one" << 1))); + append("int", 1); + append("timestamp", Timestamp()); + append("long", 1LL); + append("very long", 1LL << 40); + append("maxkey", MAXKEY); + + const BSONArray arr = arrBuilder.arr(); + + // can't use append any more since arrBuilder is done + objBuilder << "mega array" << arr; + docBuilder["mega array"] = mongo::Value(values); + + const BSONObj obj = objBuilder.obj(); + const Document doc = docBuilder.freeze(); + + const BSONObj obj2 = toBson(doc); + const Document doc2 = fromBson(obj); + + // logical equality + ASSERT_EQUALS(obj, obj2); + ASSERT_EQUALS(doc, doc2); + + // binary equality + ASSERT_EQUALS(obj.objsize(), obj2.objsize()); + ASSERT_EQUALS(memcmp(obj.objdata(), obj2.objdata(), obj.objsize()), 0); + + // ensure sorter serialization round-trips correctly + BufBuilder bb; + doc.serializeForSorter(bb); + BufReader reader(bb.buf(), bb.len()); + const Document doc3 = + Document::deserializeForSorter(reader, Document::SorterDeserializeSettings()); + BSONObj obj3 = toBson(doc3); + ASSERT_EQUALS(obj.objsize(), obj3.objsize()); + ASSERT_EQUALS(memcmp(obj.objdata(), obj3.objdata(), obj.objsize()), 0); + } + + template <typename T> + void append(const char* name, const T& thing) { + objBuilder << name << thing; + arrBuilder << thing; + docBuilder[name] = mongo::Value(thing); + values.push_back(mongo::Value(thing)); + } + + vector<mongo::Value> values; + MutableDocument docBuilder; + BSONObjBuilder objBuilder; + BSONArrayBuilder arrBuilder; +}; +} // namespace Document + +namespace Value { + +using mongo::Value; + +BSONObj toBson(const Value& value) { + if (value.missing()) + return BSONObj(); // EOO + + BSONObjBuilder bob; + value.addToBsonObj(&bob, ""); + return bob.obj(); +} + +Value fromBson(const BSONObj& obj) { + BSONElement element = obj.firstElement(); + return Value(element); +} + +void assertRoundTrips(const Value& value1) { + BSONObj obj1 = toBson(value1); + Value value2 = fromBson(obj1); + BSONObj obj2 = toBson(value2); + ASSERT_EQUALS(obj1, obj2); + ASSERT_EQUALS(value1, value2); + ASSERT_EQUALS(value1.getType(), value2.getType()); +} + +class BSONArrayTest { +public: + void run() { + ASSERT_EQUALS(Value(BSON_ARRAY(1 << 2 << 3)), DOC_ARRAY(1 << 2 << 3)); + ASSERT_EQUALS(Value(BSONArray()), Value(vector<Value>())); + } +}; + +/** Int type. */ +class Int { +public: + void run() { + Value value = Value(5); + ASSERT_EQUALS(5, value.getInt()); + ASSERT_EQUALS(5, value.getLong()); + ASSERT_EQUALS(5, value.getDouble()); + ASSERT_EQUALS(NumberInt, value.getType()); + assertRoundTrips(value); + } +}; + +/** Long type. */ +class Long { +public: + void run() { + Value value = Value(99LL); + ASSERT_EQUALS(99, value.getLong()); + ASSERT_EQUALS(99, value.getDouble()); + ASSERT_EQUALS(NumberLong, value.getType()); + assertRoundTrips(value); + } +}; + +/** Double type. */ +class Double { +public: + void run() { + Value value = Value(5.5); + ASSERT_EQUALS(5.5, value.getDouble()); + ASSERT_EQUALS(NumberDouble, value.getType()); + assertRoundTrips(value); + } +}; + +/** String type. */ +class String { +public: + void run() { + Value value = Value("foo"); + ASSERT_EQUALS("foo", value.getString()); + ASSERT_EQUALS(mongo::String, value.getType()); + assertRoundTrips(value); + } +}; + +/** String with a null character. */ +class StringWithNull { +public: + void run() { + string withNull("a\0b", 3); + BSONObj objWithNull = BSON("" << withNull); + ASSERT_EQUALS(withNull, objWithNull[""].str()); + Value value = fromBson(objWithNull); + ASSERT_EQUALS(withNull, value.getString()); + assertRoundTrips(value); + } +}; + +/** Date type. */ +class Date { +public: + void run() { + Value value = Value(Date_t::fromMillisSinceEpoch(999)); + ASSERT_EQUALS(999, value.getDate()); + ASSERT_EQUALS(mongo::Date, value.getType()); + assertRoundTrips(value); + } +}; + +/** Timestamp type. */ +class JSTimestamp { +public: + void run() { + Value value = Value(Timestamp(777)); + ASSERT(Timestamp(777) == value.getTimestamp()); + ASSERT_EQUALS(mongo::bsonTimestamp, value.getType()); + assertRoundTrips(value); + } +}; + +/** Document with no fields. */ +class EmptyDocument { +public: + void run() { + mongo::Document document = mongo::Document(); + Value value = Value(document); + ASSERT_EQUALS(document.getPtr(), value.getDocument().getPtr()); + ASSERT_EQUALS(Object, value.getType()); + assertRoundTrips(value); + } +}; + +/** Document type. */ +class Document { +public: + void run() { + mongo::MutableDocument md; + md.addField("a", Value(5)); + md.addField("apple", Value("rrr")); + md.addField("banana", Value(-.3)); + mongo::Document document = md.freeze(); + + Value value = Value(document); + // Check document pointers are equal. + ASSERT_EQUALS(document.getPtr(), value.getDocument().getPtr()); + // Check document contents. + ASSERT_EQUALS(5, document["a"].getInt()); + ASSERT_EQUALS("rrr", document["apple"].getString()); + ASSERT_EQUALS(-.3, document["banana"].getDouble()); + ASSERT_EQUALS(Object, value.getType()); + assertRoundTrips(value); + } +}; + +/** Array with no elements. */ +class EmptyArray { +public: + void run() { + vector<Value> array; + Value value(array); + const vector<Value>& array2 = value.getArray(); + + ASSERT(array2.empty()); + ASSERT_EQUALS(Array, value.getType()); + ASSERT_EQUALS(0U, value.getArrayLength()); + assertRoundTrips(value); + } +}; + +/** Array type. */ +class Array { +public: + void run() { + vector<Value> array; + array.push_back(Value(5)); + array.push_back(Value("lala")); + array.push_back(Value(3.14)); + Value value = Value(array); + const vector<Value>& array2 = value.getArray(); + + ASSERT(!array2.empty()); + ASSERT_EQUALS(array2.size(), 3U); + ASSERT_EQUALS(5, array2[0].getInt()); + ASSERT_EQUALS("lala", array2[1].getString()); + ASSERT_EQUALS(3.14, array2[2].getDouble()); + ASSERT_EQUALS(mongo::Array, value.getType()); + ASSERT_EQUALS(3U, value.getArrayLength()); + assertRoundTrips(value); + } +}; + +/** Oid type. */ +class Oid { +public: + void run() { + Value value = fromBson(BSON("" << OID("abcdefabcdefabcdefabcdef"))); + ASSERT_EQUALS(OID("abcdefabcdefabcdefabcdef"), value.getOid()); + ASSERT_EQUALS(jstOID, value.getType()); + assertRoundTrips(value); + } +}; + +/** Bool type. */ +class Bool { +public: + void run() { + Value value = fromBson(BSON("" << true)); + ASSERT_EQUALS(true, value.getBool()); + ASSERT_EQUALS(mongo::Bool, value.getType()); + assertRoundTrips(value); + } +}; + +/** Regex type. */ +class Regex { +public: + void run() { + Value value = fromBson(fromjson("{'':/abc/}")); + ASSERT_EQUALS(string("abc"), value.getRegex()); + ASSERT_EQUALS(RegEx, value.getType()); + assertRoundTrips(value); + } +}; + +/** Symbol type (currently unsupported). */ +class Symbol { +public: + void run() { + Value value(BSONSymbol("FOOBAR")); + ASSERT_EQUALS("FOOBAR", value.getSymbol()); + ASSERT_EQUALS(mongo::Symbol, value.getType()); + assertRoundTrips(value); + } +}; + +/** Undefined type. */ +class Undefined { +public: + void run() { + Value value = Value(BSONUndefined); + ASSERT_EQUALS(mongo::Undefined, value.getType()); + assertRoundTrips(value); + } +}; + +/** Null type. */ +class Null { +public: + void run() { + Value value = Value(BSONNULL); + ASSERT_EQUALS(jstNULL, value.getType()); + assertRoundTrips(value); + } +}; + +/** True value. */ +class True { +public: + void run() { + Value value = Value(true); + ASSERT_EQUALS(true, value.getBool()); + ASSERT_EQUALS(mongo::Bool, value.getType()); + assertRoundTrips(value); + } +}; + +/** False value. */ +class False { +public: + void run() { + Value value = Value(false); + ASSERT_EQUALS(false, value.getBool()); + ASSERT_EQUALS(mongo::Bool, value.getType()); + assertRoundTrips(value); + } +}; + +/** -1 value. */ +class MinusOne { +public: + void run() { + Value value = Value(-1); + ASSERT_EQUALS(-1, value.getInt()); + ASSERT_EQUALS(NumberInt, value.getType()); + assertRoundTrips(value); + } +}; + +/** 0 value. */ +class Zero { +public: + void run() { + Value value = Value(0); + ASSERT_EQUALS(0, value.getInt()); + ASSERT_EQUALS(NumberInt, value.getType()); + assertRoundTrips(value); + } +}; + +/** 1 value. */ +class One { +public: + void run() { + Value value = Value(1); + ASSERT_EQUALS(1, value.getInt()); + ASSERT_EQUALS(NumberInt, value.getType()); + assertRoundTrips(value); + } +}; + +namespace Coerce { + +class ToBoolBase { +public: + virtual ~ToBoolBase() {} + void run() { + ASSERT_EQUALS(expected(), value().coerceToBool()); + } + +protected: + virtual Value value() = 0; + virtual bool expected() = 0; +}; + +class ToBoolTrue : public ToBoolBase { + bool expected() { + return true; + } +}; + +class ToBoolFalse : public ToBoolBase { + bool expected() { + return false; + } +}; + +/** Coerce 0 to bool. */ +class ZeroIntToBool : public ToBoolFalse { + Value value() { + return Value(0); + } +}; + +/** Coerce -1 to bool. */ +class NonZeroIntToBool : public ToBoolTrue { + Value value() { + return Value(-1); + } +}; + +/** Coerce 0LL to bool. */ +class ZeroLongToBool : public ToBoolFalse { + Value value() { + return Value(0LL); + } +}; + +/** Coerce 5LL to bool. */ +class NonZeroLongToBool : public ToBoolTrue { + Value value() { + return Value(5LL); + } +}; + +/** Coerce 0.0 to bool. */ +class ZeroDoubleToBool : public ToBoolFalse { + Value value() { + return Value(0); + } +}; + +/** Coerce -1.3 to bool. */ +class NonZeroDoubleToBool : public ToBoolTrue { + Value value() { + return Value(-1.3); + } +}; + +/** Coerce "" to bool. */ +class StringToBool : public ToBoolTrue { + Value value() { + return Value(""); + } +}; + +/** Coerce {} to bool. */ +class ObjectToBool : public ToBoolTrue { + Value value() { + return Value(mongo::Document()); + } +}; + +/** Coerce [] to bool. */ +class ArrayToBool : public ToBoolTrue { + Value value() { + return Value(vector<Value>()); + } +}; + +/** Coerce Date(0) to bool. */ +class DateToBool : public ToBoolTrue { + Value value() { + return Value(Date_t{}); + } +}; + +/** Coerce js literal regex to bool. */ +class RegexToBool : public ToBoolTrue { + Value value() { + return fromBson(fromjson("{''://}")); + } +}; + +/** Coerce true to bool. */ +class TrueToBool : public ToBoolTrue { + Value value() { + return fromBson(BSON("" << true)); + } +}; + +/** Coerce false to bool. */ +class FalseToBool : public ToBoolFalse { + Value value() { + return fromBson(BSON("" << false)); + } +}; + +/** Coerce null to bool. */ +class NullToBool : public ToBoolFalse { + Value value() { + return Value(BSONNULL); + } +}; + +/** Coerce undefined to bool. */ +class UndefinedToBool : public ToBoolFalse { + Value value() { + return Value(BSONUndefined); + } +}; + +class ToIntBase { +public: + virtual ~ToIntBase() {} + void run() { + if (asserts()) + ASSERT_THROWS(value().coerceToInt(), UserException); + else + ASSERT_EQUALS(expected(), value().coerceToInt()); + } + +protected: + virtual Value value() = 0; + virtual int expected() { + return 0; + } + virtual bool asserts() { + return false; + } +}; + +/** Coerce -5 to int. */ +class IntToInt : public ToIntBase { + Value value() { + return Value(-5); + } + int expected() { + return -5; + } +}; + +/** Coerce long to int. */ +class LongToInt : public ToIntBase { + Value value() { + return Value(0xff00000007LL); + } + int expected() { + return 7; + } +}; + +/** Coerce 9.8 to int. */ +class DoubleToInt : public ToIntBase { + Value value() { + return Value(9.8); + } + int expected() { + return 9; + } +}; + +/** Coerce null to int. */ +class NullToInt : public ToIntBase { + Value value() { + return Value(BSONNULL); + } + bool asserts() { + return true; + } +}; + +/** Coerce undefined to int. */ +class UndefinedToInt : public ToIntBase { + Value value() { + return Value(BSONUndefined); + } + bool asserts() { + return true; + } +}; + +/** Coerce "" to int unsupported. */ +class StringToInt { +public: + void run() { + ASSERT_THROWS(Value("").coerceToInt(), UserException); + } +}; + +class ToLongBase { +public: + virtual ~ToLongBase() {} + void run() { + if (asserts()) + ASSERT_THROWS(value().coerceToLong(), UserException); + else + ASSERT_EQUALS(expected(), value().coerceToLong()); + } + +protected: + virtual Value value() = 0; + virtual long long expected() { + return 0; + } + virtual bool asserts() { + return false; + } +}; + +/** Coerce -5 to long. */ +class IntToLong : public ToLongBase { + Value value() { + return Value(-5); + } + long long expected() { + return -5; + } +}; + +/** Coerce long to long. */ +class LongToLong : public ToLongBase { + Value value() { + return Value(0xff00000007LL); + } + long long expected() { + return 0xff00000007LL; + } +}; + +/** Coerce 9.8 to long. */ +class DoubleToLong : public ToLongBase { + Value value() { + return Value(9.8); + } + long long expected() { + return 9; + } +}; + +/** Coerce null to long. */ +class NullToLong : public ToLongBase { + Value value() { + return Value(BSONNULL); + } + bool asserts() { + return true; + } +}; + +/** Coerce undefined to long. */ +class UndefinedToLong : public ToLongBase { + Value value() { + return Value(BSONUndefined); + } + bool asserts() { + return true; + } +}; + +/** Coerce string to long unsupported. */ +class StringToLong { +public: + void run() { + ASSERT_THROWS(Value("").coerceToLong(), UserException); + } +}; + +class ToDoubleBase { +public: + virtual ~ToDoubleBase() {} + void run() { + if (asserts()) + ASSERT_THROWS(value().coerceToDouble(), UserException); + else + ASSERT_EQUALS(expected(), value().coerceToDouble()); + } + +protected: + virtual Value value() = 0; + virtual double expected() { + return 0; + } + virtual bool asserts() { + return false; + } +}; + +/** Coerce -5 to double. */ +class IntToDouble : public ToDoubleBase { + Value value() { + return Value(-5); + } + double expected() { + return -5; + } +}; + +/** Coerce long to double. */ +class LongToDouble : public ToDoubleBase { + Value value() { + // A long that cannot be exactly represented as a double. + return Value(static_cast<double>(0x8fffffffffffffffLL)); + } + double expected() { + return static_cast<double>(0x8fffffffffffffffLL); + } +}; + +/** Coerce double to double. */ +class DoubleToDouble : public ToDoubleBase { + Value value() { + return Value(9.8); + } + double expected() { + return 9.8; + } +}; + +/** Coerce null to double. */ +class NullToDouble : public ToDoubleBase { + Value value() { + return Value(BSONNULL); + } + bool asserts() { + return true; + } +}; + +/** Coerce undefined to double. */ +class UndefinedToDouble : public ToDoubleBase { + Value value() { + return Value(BSONUndefined); + } + bool asserts() { + return true; + } +}; + +/** Coerce string to double unsupported. */ +class StringToDouble { +public: + void run() { + ASSERT_THROWS(Value("").coerceToDouble(), UserException); + } +}; + +class ToDateBase { +public: + virtual ~ToDateBase() {} + void run() { + ASSERT_EQUALS(expected(), value().coerceToDate()); + } + +protected: + virtual Value value() = 0; + virtual long long expected() = 0; +}; + +/** Coerce date to date. */ +class DateToDate : public ToDateBase { + Value value() { + return Value(Date_t::fromMillisSinceEpoch(888)); + } + long long expected() { + return 888; + } +}; + +/** + * Convert timestamp to date. This extracts the time portion of the timestamp, which + * is different from BSON behavior of interpreting all bytes as a date. + */ +class TimestampToDate : public ToDateBase { + Value value() { + return Value(Timestamp(777, 666)); + } + long long expected() { + return 777 * 1000; + } +}; + +/** Coerce string to date unsupported. */ +class StringToDate { +public: + void run() { + ASSERT_THROWS(Value("").coerceToDate(), UserException); + } +}; + +class ToStringBase { +public: + virtual ~ToStringBase() {} + void run() { + ASSERT_EQUALS(expected(), value().coerceToString()); + } + +protected: + virtual Value value() = 0; + virtual string expected() { + return ""; + } +}; + +/** Coerce -0.2 to string. */ +class DoubleToString : public ToStringBase { + Value value() { + return Value(-0.2); + } + string expected() { + return "-0.2"; + } +}; + +/** Coerce -4 to string. */ +class IntToString : public ToStringBase { + Value value() { + return Value(-4); + } + string expected() { + return "-4"; + } +}; + +/** Coerce 10000LL to string. */ +class LongToString : public ToStringBase { + Value value() { + return Value(10000LL); + } + string expected() { + return "10000"; + } +}; + +/** Coerce string to string. */ +class StringToString : public ToStringBase { + Value value() { + return Value("fO_o"); + } + string expected() { + return "fO_o"; + } +}; + +/** Coerce timestamp to string. */ +class TimestampToString : public ToStringBase { + Value value() { + return Value(Timestamp(1, 2)); + } + string expected() { + return Timestamp(1, 2).toStringPretty(); + } +}; + +/** Coerce date to string. */ +class DateToString : public ToStringBase { + Value value() { + return Value(Date_t::fromMillisSinceEpoch(1234567890LL * 1000)); + } + string expected() { + return "2009-02-13T23:31:30"; + } // from js +}; + +/** Coerce null to string. */ +class NullToString : public ToStringBase { + Value value() { + return Value(BSONNULL); + } +}; + +/** Coerce undefined to string. */ +class UndefinedToString : public ToStringBase { + Value value() { + return Value(BSONUndefined); + } +}; + +/** Coerce document to string unsupported. */ +class DocumentToString { +public: + void run() { + ASSERT_THROWS(Value(mongo::Document()).coerceToString(), UserException); + } +}; + +/** Coerce timestamp to timestamp. */ +class TimestampToTimestamp { +public: + void run() { + Value value = Value(Timestamp(1010)); + ASSERT(Timestamp(1010) == value.coerceToTimestamp()); + } +}; + +/** Coerce date to timestamp unsupported. */ +class DateToTimestamp { +public: + void run() { + ASSERT_THROWS(Value(Date_t::fromMillisSinceEpoch(1010)).coerceToTimestamp(), UserException); + } +}; + +} // namespace Coerce + +/** Get the "widest" of two numeric types. */ +class GetWidestNumeric { +public: + void run() { + using mongo::Undefined; + + // Numeric types. + assertWidest(NumberInt, NumberInt, NumberInt); + assertWidest(NumberLong, NumberInt, NumberLong); + assertWidest(NumberDouble, NumberInt, NumberDouble); + assertWidest(NumberLong, NumberLong, NumberLong); + assertWidest(NumberDouble, NumberLong, NumberDouble); + assertWidest(NumberDouble, NumberDouble, NumberDouble); + + // Missing value and numeric types (result Undefined). + assertWidest(Undefined, NumberInt, Undefined); + assertWidest(Undefined, NumberInt, Undefined); + assertWidest(Undefined, NumberLong, jstNULL); + assertWidest(Undefined, NumberLong, Undefined); + assertWidest(Undefined, NumberDouble, jstNULL); + assertWidest(Undefined, NumberDouble, Undefined); + + // Missing value types (result Undefined). + assertWidest(Undefined, jstNULL, jstNULL); + assertWidest(Undefined, jstNULL, Undefined); + assertWidest(Undefined, Undefined, Undefined); + + // Other types (result Undefined). + assertWidest(Undefined, NumberInt, mongo::Bool); + assertWidest(Undefined, mongo::String, NumberDouble); + } + +private: + void assertWidest(BSONType expectedWidest, BSONType a, BSONType b) { + ASSERT_EQUALS(expectedWidest, Value::getWidestNumeric(a, b)); + ASSERT_EQUALS(expectedWidest, Value::getWidestNumeric(b, a)); + } +}; + +/** Add a Value to a BSONObj. */ +class AddToBsonObj { +public: + void run() { + BSONObjBuilder bob; + Value(4.4).addToBsonObj(&bob, "a"); + Value(22).addToBsonObj(&bob, "b"); + Value("astring").addToBsonObj(&bob, "c"); + ASSERT_EQUALS(BSON("a" << 4.4 << "b" << 22 << "c" + << "astring"), + bob.obj()); + } +}; + +/** Add a Value to a BSONArray. */ +class AddToBsonArray { +public: + void run() { + BSONArrayBuilder bab; + Value(4.4).addToBsonArray(&bab); + Value(22).addToBsonArray(&bab); + Value("astring").addToBsonArray(&bab); + ASSERT_EQUALS(BSON_ARRAY(4.4 << 22 << "astring"), bab.arr()); + } +}; + +/** Value comparator. */ +class Compare { +public: + void run() { + BSONObjBuilder undefinedBuilder; + undefinedBuilder.appendUndefined(""); + BSONObj undefined = undefinedBuilder.obj(); + + // Undefined / null. + assertComparison(0, undefined, undefined); + assertComparison(-1, undefined, BSON("" << BSONNULL)); + assertComparison(0, BSON("" << BSONNULL), BSON("" << BSONNULL)); + + // Undefined / null with other types. + assertComparison(-1, undefined, BSON("" << 1)); + assertComparison(-1, + undefined, + BSON("" + << "bar")); + assertComparison(-1, BSON("" << BSONNULL), BSON("" << -1)); + assertComparison(-1, + BSON("" << BSONNULL), + BSON("" + << "bar")); + + // Numeric types. + assertComparison(0, 5, 5LL); + assertComparison(0, -2, -2.0); + assertComparison(0, 90LL, 90.0); + assertComparison(-1, 5, 6LL); + assertComparison(-1, -2, 2.1); + assertComparison(1, 90LL, 89.999); + assertComparison(-1, 90, 90.1); + assertComparison( + 0, numeric_limits<double>::quiet_NaN(), numeric_limits<double>::signaling_NaN()); + assertComparison(-1, numeric_limits<double>::quiet_NaN(), 5); + + // strings compare between numbers and objects + assertComparison(1, "abc", 90); + assertComparison(-1, + "abc", + BSON("a" + << "b")); + + // String comparison. + assertComparison(-1, "", "a"); + assertComparison(0, "a", "a"); + assertComparison(-1, "a", "b"); + assertComparison(-1, "aa", "b"); + assertComparison(1, "bb", "b"); + assertComparison(1, "bb", "b"); + assertComparison(1, "b-", "b"); + assertComparison(-1, "b-", "ba"); + // With a null character. + assertComparison(1, string("a\0", 2), "a"); + + // Object. + assertComparison(0, fromjson("{'':{}}"), fromjson("{'':{}}")); + assertComparison(0, fromjson("{'':{x:1}}"), fromjson("{'':{x:1}}")); + assertComparison(-1, fromjson("{'':{}}"), fromjson("{'':{x:1}}")); + assertComparison(-1, fromjson("{'':{'z': 1}}"), fromjson("{'':{'a': 'a'}}")); + + // Array. + assertComparison(0, fromjson("{'':[]}"), fromjson("{'':[]}")); + assertComparison(-1, fromjson("{'':[0]}"), fromjson("{'':[1]}")); + assertComparison(-1, fromjson("{'':[0,0]}"), fromjson("{'':[1]}")); + assertComparison(-1, fromjson("{'':[0]}"), fromjson("{'':[0,0]}")); + assertComparison(-1, fromjson("{'':[0]}"), fromjson("{'':['']}")); + + // OID. + assertComparison(0, OID("abcdefabcdefabcdefabcdef"), OID("abcdefabcdefabcdefabcdef")); + assertComparison(1, OID("abcdefabcdefabcdefabcdef"), OID("010101010101010101010101")); + + // Bool. + assertComparison(0, true, true); + assertComparison(0, false, false); + assertComparison(1, true, false); + + // Date. + assertComparison(0, Date_t::fromMillisSinceEpoch(555), Date_t::fromMillisSinceEpoch(555)); + assertComparison(1, Date_t::fromMillisSinceEpoch(555), Date_t::fromMillisSinceEpoch(554)); + // Negative date. + assertComparison(1, Date_t::fromMillisSinceEpoch(0), Date_t::fromMillisSinceEpoch(-1)); + + // Regex. + assertComparison(0, fromjson("{'':/a/}"), fromjson("{'':/a/}")); + assertComparison(-1, fromjson("{'':/a/}"), fromjson("{'':/a/i}")); + assertComparison(-1, fromjson("{'':/a/}"), fromjson("{'':/aa/}")); + + // Timestamp. + assertComparison(0, Timestamp(1234), Timestamp(1234)); + assertComparison(-1, Timestamp(4), Timestamp(1234)); + + // Cross-type comparisons. Listed in order of canonical types. + assertComparison(-1, Value(mongo::MINKEY), Value()); + assertComparison(0, Value(), Value()); + assertComparison(0, Value(), Value(BSONUndefined)); + assertComparison(-1, Value(BSONUndefined), Value(BSONNULL)); + assertComparison(-1, Value(BSONNULL), Value(1)); + assertComparison(0, Value(1), Value(1LL)); + assertComparison(0, Value(1), Value(1.0)); + assertComparison(-1, Value(1), Value("string")); + assertComparison(0, Value("string"), Value(BSONSymbol("string"))); + assertComparison(-1, Value("string"), Value(mongo::Document())); + assertComparison(-1, Value(mongo::Document()), Value(vector<Value>())); + assertComparison(-1, Value(vector<Value>()), Value(BSONBinData("", 0, MD5Type))); + assertComparison(-1, Value(BSONBinData("", 0, MD5Type)), Value(mongo::OID())); + assertComparison(-1, Value(mongo::OID()), Value(false)); + assertComparison(-1, Value(false), Value(Date_t())); + assertComparison(-1, Value(Date_t()), Value(Timestamp())); + assertComparison(-1, Value(Timestamp()), Value(BSONRegEx(""))); + assertComparison(-1, Value(BSONRegEx("")), Value(BSONDBRef("", mongo::OID()))); + assertComparison(-1, Value(BSONDBRef("", mongo::OID())), Value(BSONCode(""))); + assertComparison(-1, Value(BSONCode("")), Value(BSONCodeWScope("", BSONObj()))); + assertComparison(-1, Value(BSONCodeWScope("", BSONObj())), Value(mongo::MAXKEY)); + } + +private: + template <class T, class U> + void assertComparison(int expectedResult, const T& a, const U& b) { + assertComparison(expectedResult, BSON("" << a), BSON("" << b)); + } + void assertComparison(int expectedResult, const Timestamp& a, const Timestamp& b) { + BSONObjBuilder first; + first.append("", a); + BSONObjBuilder second; + second.append("", b); + assertComparison(expectedResult, first.obj(), second.obj()); + } + int sign(int cmp) { + if (cmp == 0) + return 0; + else if (cmp < 0) + return -1; + else + return 1; + } + int cmp(const Value& a, const Value& b) { + return sign(Value::compare(a, b)); + } + void assertComparison(int expectedResult, const BSONObj& a, const BSONObj& b) { + assertComparison(expectedResult, fromBson(a), fromBson(b)); + } + void assertComparison(int expectedResult, const Value& a, const Value& b) { + mongo::unittest::log() << "testing " << a.toString() << " and " << b.toString() << endl; + // reflexivity + ASSERT_EQUALS(0, cmp(a, a)); + ASSERT_EQUALS(0, cmp(b, b)); + + // symmetry + ASSERT_EQUALS(expectedResult, cmp(a, b)); + ASSERT_EQUALS(-expectedResult, cmp(b, a)); + + if (expectedResult == 0) { + // equal values must hash equally. + ASSERT_EQUALS(hash(a), hash(b)); + } else { + // unequal values must hash unequally. + // (not true in general but we should error if it fails in any of these cases) + ASSERT_NOT_EQUALS(hash(a), hash(b)); } - }; - SuiteInstance<All> myall; + // same as BSON + ASSERT_EQUALS(expectedResult, + sign(toBson(a).firstElement().woCompare(toBson(b).firstElement()))); + } + size_t hash(const Value& v) { + size_t seed = 0xf00ba6; + v.hash_combine(seed); + return seed; + } +}; + +class SubFields { +public: + void run() { + const Value val = fromBson(fromjson("{'': {a: [{x:1, b:[1, {y:1, c:1234, z:1}, 1]}]}}")); + // ^ this outer object is removed by fromBson + + ASSERT(val.getType() == mongo::Object); + + ASSERT(val[999].missing()); + ASSERT(val["missing"].missing()); + ASSERT(val["a"].getType() == mongo::Array); + + ASSERT(val["a"][999].missing()); + ASSERT(val["a"]["missing"].missing()); + ASSERT(val["a"][0].getType() == mongo::Object); + + ASSERT(val["a"][0][999].missing()); + ASSERT(val["a"][0]["missing"].missing()); + ASSERT(val["a"][0]["b"].getType() == mongo::Array); + + ASSERT(val["a"][0]["b"][999].missing()); + ASSERT(val["a"][0]["b"]["missing"].missing()); + ASSERT(val["a"][0]["b"][1].getType() == mongo::Object); + + ASSERT(val["a"][0]["b"][1][999].missing()); + ASSERT(val["a"][0]["b"][1]["missing"].missing()); + ASSERT(val["a"][0]["b"][1]["c"].getType() == mongo::NumberInt); + ASSERT_EQUALS(val["a"][0]["b"][1]["c"].getInt(), 1234); + } +}; + + +class SerializationOfMissingForSorter { + // Can't be tested in AllTypesDoc since missing values are omitted when adding to BSON. +public: + void run() { + const Value missing; + const Value arrayOfMissing = Value(vector<Value>(10)); + + BufBuilder bb; + missing.serializeForSorter(bb); + arrayOfMissing.serializeForSorter(bb); + + BufReader reader(bb.buf(), bb.len()); + ASSERT_EQUALS(missing, + Value::deserializeForSorter(reader, Value::SorterDeserializeSettings())); + ASSERT_EQUALS(arrayOfMissing, + Value::deserializeForSorter(reader, Value::SorterDeserializeSettings())); + } +}; +} // namespace Value + +class All : public Suite { +public: + All() : Suite("document") {} + void setupTests() { + add<Document::Create>(); + add<Document::CreateFromBsonObj>(); + add<Document::AddField>(); + add<Document::GetValue>(); + add<Document::SetField>(); + add<Document::Compare>(); + add<Document::Clone>(); + add<Document::CloneMultipleFields>(); + add<Document::FieldIteratorEmpty>(); + add<Document::FieldIteratorSingle>(); + add<Document::FieldIteratorMultiple>(); + add<Document::AllTypesDoc>(); + + add<Value::BSONArrayTest>(); + add<Value::Int>(); + add<Value::Long>(); + add<Value::Double>(); + add<Value::String>(); + add<Value::StringWithNull>(); + add<Value::Date>(); + add<Value::JSTimestamp>(); + add<Value::EmptyDocument>(); + add<Value::EmptyArray>(); + add<Value::Array>(); + add<Value::Oid>(); + add<Value::Bool>(); + add<Value::Regex>(); + add<Value::Symbol>(); + add<Value::Undefined>(); + add<Value::Null>(); + add<Value::True>(); + add<Value::False>(); + add<Value::MinusOne>(); + add<Value::Zero>(); + add<Value::One>(); + + add<Value::Coerce::ZeroIntToBool>(); + add<Value::Coerce::NonZeroIntToBool>(); + add<Value::Coerce::ZeroLongToBool>(); + add<Value::Coerce::NonZeroLongToBool>(); + add<Value::Coerce::ZeroDoubleToBool>(); + add<Value::Coerce::NonZeroDoubleToBool>(); + add<Value::Coerce::StringToBool>(); + add<Value::Coerce::ObjectToBool>(); + add<Value::Coerce::ArrayToBool>(); + add<Value::Coerce::DateToBool>(); + add<Value::Coerce::RegexToBool>(); + add<Value::Coerce::TrueToBool>(); + add<Value::Coerce::FalseToBool>(); + add<Value::Coerce::NullToBool>(); + add<Value::Coerce::UndefinedToBool>(); + add<Value::Coerce::IntToInt>(); + add<Value::Coerce::LongToInt>(); + add<Value::Coerce::DoubleToInt>(); + add<Value::Coerce::NullToInt>(); + add<Value::Coerce::UndefinedToInt>(); + add<Value::Coerce::StringToInt>(); + add<Value::Coerce::IntToLong>(); + add<Value::Coerce::LongToLong>(); + add<Value::Coerce::DoubleToLong>(); + add<Value::Coerce::NullToLong>(); + add<Value::Coerce::UndefinedToLong>(); + add<Value::Coerce::StringToLong>(); + add<Value::Coerce::IntToDouble>(); + add<Value::Coerce::LongToDouble>(); + add<Value::Coerce::DoubleToDouble>(); + add<Value::Coerce::NullToDouble>(); + add<Value::Coerce::UndefinedToDouble>(); + add<Value::Coerce::StringToDouble>(); + add<Value::Coerce::DateToDate>(); + add<Value::Coerce::TimestampToDate>(); + add<Value::Coerce::StringToDate>(); + add<Value::Coerce::DoubleToString>(); + add<Value::Coerce::IntToString>(); + add<Value::Coerce::LongToString>(); + add<Value::Coerce::StringToString>(); + add<Value::Coerce::TimestampToString>(); + add<Value::Coerce::DateToString>(); + add<Value::Coerce::NullToString>(); + add<Value::Coerce::UndefinedToString>(); + add<Value::Coerce::DocumentToString>(); + add<Value::Coerce::TimestampToTimestamp>(); + add<Value::Coerce::DateToTimestamp>(); + + add<Value::GetWidestNumeric>(); + add<Value::AddToBsonObj>(); + add<Value::AddToBsonArray>(); + add<Value::Compare>(); + add<Value::SubFields>(); + add<Value::SerializationOfMissingForSorter>(); + } +}; + +SuiteInstance<All> myall; -} // namespace DocumentTests +} // namespace DocumentTests diff --git a/src/mongo/db/pipeline/expression.cpp b/src/mongo/db/pipeline/expression.cpp index 34cbda702a3..8db17f3152a 100644 --- a/src/mongo/db/pipeline/expression.cpp +++ b/src/mongo/db/pipeline/expression.cpp @@ -31,7 +31,7 @@ #include "mongo/db/pipeline/expression.h" #include <boost/algorithm/string.hpp> -#include <boost/preprocessor/cat.hpp> // like the ## operator but works with __LINE__ +#include <boost/preprocessor/cat.hpp> // like the ## operator but works with __LINE__ #include <cstdio> #include "mongo/base/init.h" @@ -44,268 +44,266 @@ #include "mongo/util/mongoutils/str.h" namespace mongo { - using namespace mongoutils; +using namespace mongoutils; - using boost::intrusive_ptr; - using std::set; - using std::string; - using std::vector; +using boost::intrusive_ptr; +using std::set; +using std::string; +using std::vector; - /// Helper function to easily wrap constants with $const. - static Value serializeConstant(Value val) { - return Value(DOC("$const" << val)); - } - - void Variables::uassertValidNameForUserWrite(StringData varName) { - // System variables users allowed to write to (currently just one) - if (varName == "CURRENT") { - return; - } - - uassert(16866, "empty variable names are not allowed", - !varName.empty()); - - const bool firstCharIsValid = (varName[0] >= 'a' && varName[0] <= 'z') - || (varName[0] & '\x80') // non-ascii - ; - - uassert(16867, str::stream() << - "'" << varName << "' starts with an invalid character for a user variable name", - firstCharIsValid); - - for (size_t i = 1; i < varName.size(); i++) { - const bool charIsValid = (varName[i] >= 'a' && varName[i] <= 'z') - || (varName[i] >= 'A' && varName[i] <= 'Z') - || (varName[i] >= '0' && varName[i] <= '9') - || (varName[i] == '_') - || (varName[i] & '\x80') // non-ascii - ; +/// Helper function to easily wrap constants with $const. +static Value serializeConstant(Value val) { + return Value(DOC("$const" << val)); +} - uassert(16868, str::stream() << "'" << varName << "' contains an invalid character " - << "for a variable name: '" << varName[i] << "'", - charIsValid); - } +void Variables::uassertValidNameForUserWrite(StringData varName) { + // System variables users allowed to write to (currently just one) + if (varName == "CURRENT") { + return; } - void Variables::uassertValidNameForUserRead(StringData varName) { - uassert(16869, "empty variable names are not allowed", - !varName.empty()); + uassert(16866, "empty variable names are not allowed", !varName.empty()); - const bool firstCharIsValid = (varName[0] >= 'a' && varName[0] <= 'z') - || (varName[0] >= 'A' && varName[0] <= 'Z') - || (varName[0] & '\x80') // non-ascii - ; + const bool firstCharIsValid = + (varName[0] >= 'a' && varName[0] <= 'z') || (varName[0] & '\x80') // non-ascii + ; - uassert(16870, str::stream() << - "'" << varName << "' starts with an invalid character for a variable name", - firstCharIsValid); + uassert(16867, + str::stream() << "'" << varName + << "' starts with an invalid character for a user variable name", + firstCharIsValid); - for (size_t i = 1; i < varName.size(); i++) { - const bool charIsValid = (varName[i] >= 'a' && varName[i] <= 'z') - || (varName[i] >= 'A' && varName[i] <= 'Z') - || (varName[i] >= '0' && varName[i] <= '9') - || (varName[i] == '_') - || (varName[i] & '\x80') // non-ascii - ; + for (size_t i = 1; i < varName.size(); i++) { + const bool charIsValid = (varName[i] >= 'a' && varName[i] <= 'z') || + (varName[i] >= 'A' && varName[i] <= 'Z') || (varName[i] >= '0' && varName[i] <= '9') || + (varName[i] == '_') || (varName[i] & '\x80') // non-ascii + ; - uassert(16871, str::stream() << "'" << varName << "' contains an invalid character " - << "for a variable name: '" << varName[i] << "'", - charIsValid); - } - } - - void Variables::setValue(Id id, const Value& value) { - massert(17199, "can't use Variables::setValue to set ROOT", - id != ROOT_ID); - - verify(id < _numVars); - _rest[id] = value; + uassert(16868, + str::stream() << "'" << varName << "' contains an invalid character " + << "for a variable name: '" << varName[i] << "'", + charIsValid); } +} - Value Variables::getValue(Id id) const { - if (id == ROOT_ID) - return Value(_root); +void Variables::uassertValidNameForUserRead(StringData varName) { + uassert(16869, "empty variable names are not allowed", !varName.empty()); - verify(id < _numVars); - return _rest[id]; - } + const bool firstCharIsValid = (varName[0] >= 'a' && varName[0] <= 'z') || + (varName[0] >= 'A' && varName[0] <= 'Z') || (varName[0] & '\x80') // non-ascii + ; - Document Variables::getDocument(Id id) const { - if (id == ROOT_ID) - return _root; + uassert(16870, + str::stream() << "'" << varName + << "' starts with an invalid character for a variable name", + firstCharIsValid); - verify(id < _numVars); - const Value var = _rest[id]; - if (var.getType() == Object) - return var.getDocument(); + for (size_t i = 1; i < varName.size(); i++) { + const bool charIsValid = (varName[i] >= 'a' && varName[i] <= 'z') || + (varName[i] >= 'A' && varName[i] <= 'Z') || (varName[i] >= '0' && varName[i] <= '9') || + (varName[i] == '_') || (varName[i] & '\x80') // non-ascii + ; - return Document(); + uassert(16871, + str::stream() << "'" << varName << "' contains an invalid character " + << "for a variable name: '" << varName[i] << "'", + charIsValid); } +} - Variables::Id VariablesParseState::defineVariable(StringData name) { - // caller should have validated before hand by using Variables::uassertValidNameForUserWrite - massert(17275, "Can't redefine ROOT", - name != "ROOT"); +void Variables::setValue(Id id, const Value& value) { + massert(17199, "can't use Variables::setValue to set ROOT", id != ROOT_ID); - Variables::Id id = _idGenerator->generateId(); - _variables[name] = id; - return id; - } - - Variables::Id VariablesParseState::getVariable(StringData name) const { - StringMap<Variables::Id>::const_iterator it = _variables.find(name); - if (it != _variables.end()) - return it->second; + verify(id < _numVars); + _rest[id] = value; +} - uassert(17276, str::stream() << "Use of undefined variable: " << name, - name == "ROOT" || name == "CURRENT"); +Value Variables::getValue(Id id) const { + if (id == ROOT_ID) + return Value(_root); - return Variables::ROOT_ID; - } + verify(id < _numVars); + return _rest[id]; +} - /* --------------------------- Expression ------------------------------ */ +Document Variables::getDocument(Id id) const { + if (id == ROOT_ID) + return _root; - Expression::ObjectCtx::ObjectCtx(int theOptions) - : options(theOptions) - {} + verify(id < _numVars); + const Value var = _rest[id]; + if (var.getType() == Object) + return var.getDocument(); - bool Expression::ObjectCtx::documentOk() const { - return ((options & DOCUMENT_OK) != 0); - } + return Document(); +} - bool Expression::ObjectCtx::topLevel() const { - return ((options & TOP_LEVEL) != 0); - } +Variables::Id VariablesParseState::defineVariable(StringData name) { + // caller should have validated before hand by using Variables::uassertValidNameForUserWrite + massert(17275, "Can't redefine ROOT", name != "ROOT"); - bool Expression::ObjectCtx::inclusionOk() const { - return ((options & INCLUSION_OK) != 0); - } + Variables::Id id = _idGenerator->generateId(); + _variables[name] = id; + return id; +} - string Expression::removeFieldPrefix(const string &prefixedField) { - uassert(16419, str::stream()<<"field path must not contain embedded null characters" << prefixedField.find("\0") << "," , - prefixedField.find('\0') == string::npos); +Variables::Id VariablesParseState::getVariable(StringData name) const { + StringMap<Variables::Id>::const_iterator it = _variables.find(name); + if (it != _variables.end()) + return it->second; - const char* pPrefixedField = prefixedField.c_str(); - uassert(15982, str::stream() << - "field path references must be prefixed with a '$' ('" << - prefixedField << "'", pPrefixedField[0] == '$'); + uassert(17276, + str::stream() << "Use of undefined variable: " << name, + name == "ROOT" || name == "CURRENT"); - return string(pPrefixedField + 1); - } + return Variables::ROOT_ID; +} - intrusive_ptr<Expression> Expression::parseObject( - BSONObj obj, - ObjectCtx* pCtx, - const VariablesParseState& vps) { - /* - An object expression can take any of the following forms: +/* --------------------------- Expression ------------------------------ */ - f0: {f1: ..., f2: ..., f3: ...} - f0: {$operator:[operand1, operand2, ...]} - */ +Expression::ObjectCtx::ObjectCtx(int theOptions) : options(theOptions) {} - intrusive_ptr<Expression> pExpression; // the result - intrusive_ptr<ExpressionObject> pExpressionObject; // alt result - enum { UNKNOWN, NOTOPERATOR, OPERATOR } kind = UNKNOWN; +bool Expression::ObjectCtx::documentOk() const { + return ((options & DOCUMENT_OK) != 0); +} - if (obj.isEmpty()) - return ExpressionObject::create(); - BSONObjIterator iter(obj); +bool Expression::ObjectCtx::topLevel() const { + return ((options & TOP_LEVEL) != 0); +} - for(size_t fieldCount = 0; iter.more(); ++fieldCount) { - BSONElement fieldElement(iter.next()); - const char* pFieldName = fieldElement.fieldName(); +bool Expression::ObjectCtx::inclusionOk() const { + return ((options & INCLUSION_OK) != 0); +} - if (pFieldName[0] == '$') { - uassert(15983, str::stream() << - "the operator must be the only field in a pipeline object (at '" - << pFieldName << "'", - fieldCount == 0); +string Expression::removeFieldPrefix(const string& prefixedField) { + uassert(16419, + str::stream() << "field path must not contain embedded null characters" + << prefixedField.find("\0") << ",", + prefixedField.find('\0') == string::npos); - uassert(16404, "$expressions are not allowed at the top-level of $project", - !pCtx->topLevel()); + const char* pPrefixedField = prefixedField.c_str(); + uassert(15982, + str::stream() << "field path references must be prefixed with a '$' ('" << prefixedField + << "'", + pPrefixedField[0] == '$'); - /* we've determined this "object" is an operator expression */ - kind = OPERATOR; + return string(pPrefixedField + 1); +} - pExpression = parseExpression(fieldElement, vps); +intrusive_ptr<Expression> Expression::parseObject(BSONObj obj, + ObjectCtx* pCtx, + const VariablesParseState& vps) { + /* + An object expression can take any of the following forms: + + f0: {f1: ..., f2: ..., f3: ...} + f0: {$operator:[operand1, operand2, ...]} + */ + + intrusive_ptr<Expression> pExpression; // the result + intrusive_ptr<ExpressionObject> pExpressionObject; // alt result + enum { UNKNOWN, NOTOPERATOR, OPERATOR } kind = UNKNOWN; + + if (obj.isEmpty()) + return ExpressionObject::create(); + BSONObjIterator iter(obj); + + for (size_t fieldCount = 0; iter.more(); ++fieldCount) { + BSONElement fieldElement(iter.next()); + const char* pFieldName = fieldElement.fieldName(); + + if (pFieldName[0] == '$') { + uassert( + 15983, + str::stream() << "the operator must be the only field in a pipeline object (at '" + << pFieldName << "'", + fieldCount == 0); + + uassert(16404, + "$expressions are not allowed at the top-level of $project", + !pCtx->topLevel()); + + /* we've determined this "object" is an operator expression */ + kind = OPERATOR; + + pExpression = parseExpression(fieldElement, vps); + } else { + uassert(15990, + str::stream() << "this object is already an operator expression, and can't be " + "used as a document expression (at '" << pFieldName << "')", + kind != OPERATOR); + + uassert(16405, + "dotted field names are only allowed at the top level", + pCtx->topLevel() || !str::contains(pFieldName, '.')); + + /* if it's our first time, create the document expression */ + if (!pExpression.get()) { + verify(pCtx->documentOk()); + // CW TODO error: document not allowed in this context + + pExpressionObject = + pCtx->topLevel() ? ExpressionObject::createRoot() : ExpressionObject::create(); + pExpression = pExpressionObject; + + /* this "object" is not an operator expression */ + kind = NOTOPERATOR; } - else { - uassert(15990, str::stream() << "this object is already an operator expression, and can't be used as a document expression (at '" << - pFieldName << "')", - kind != OPERATOR); - uassert(16405, "dotted field names are only allowed at the top level", - pCtx->topLevel() || !str::contains(pFieldName, '.')); + BSONType fieldType = fieldElement.type(); + string fieldName(pFieldName); + switch (fieldType) { + case Object: { + /* it's a nested document */ + ObjectCtx oCtx((pCtx->documentOk() ? ObjectCtx::DOCUMENT_OK : 0) | + (pCtx->inclusionOk() ? ObjectCtx::INCLUSION_OK : 0)); - /* if it's our first time, create the document expression */ - if (!pExpression.get()) { - verify(pCtx->documentOk()); - // CW TODO error: document not allowed in this context - - pExpressionObject = pCtx->topLevel() ? ExpressionObject::createRoot() - : ExpressionObject::create(); - pExpression = pExpressionObject; - - /* this "object" is not an operator expression */ - kind = NOTOPERATOR; + pExpressionObject->addField(fieldName, + parseObject(fieldElement.Obj(), &oCtx, vps)); + break; } - - BSONType fieldType = fieldElement.type(); - string fieldName(pFieldName); - switch (fieldType){ - case Object: { - /* it's a nested document */ - ObjectCtx oCtx( - (pCtx->documentOk() ? ObjectCtx::DOCUMENT_OK : 0) - | (pCtx->inclusionOk() ? ObjectCtx::INCLUSION_OK : 0)); - - pExpressionObject->addField(fieldName, - parseObject(fieldElement.Obj(), &oCtx, vps)); - break; - } - case String: { - /* it's a renamed field */ - // CW TODO could also be a constant - pExpressionObject->addField(fieldName, - ExpressionFieldPath::parse(fieldElement.str(), - vps)); - break; - } - case Bool: - case NumberDouble: - case NumberLong: - case NumberInt: { - /* it's an inclusion specification */ - if (fieldElement.trueValue()) { - uassert(16420, "field inclusion is not allowed inside of $expressions", - pCtx->inclusionOk()); - pExpressionObject->includePath(fieldName); - } - else { - uassert(16406, - "The top-level _id field is the only field currently supported for exclusion", - pCtx->topLevel() && fieldName == "_id"); - pExpressionObject->excludeId(true); - } - break; + case String: { + /* it's a renamed field */ + // CW TODO could also be a constant + pExpressionObject->addField( + fieldName, ExpressionFieldPath::parse(fieldElement.str(), vps)); + break; + } + case Bool: + case NumberDouble: + case NumberLong: + case NumberInt: { + /* it's an inclusion specification */ + if (fieldElement.trueValue()) { + uassert(16420, + "field inclusion is not allowed inside of $expressions", + pCtx->inclusionOk()); + pExpressionObject->includePath(fieldName); + } else { + uassert(16406, + "The top-level _id field is the only field currently supported for " + "exclusion", + pCtx->topLevel() && fieldName == "_id"); + pExpressionObject->excludeId(true); } - default: - uassert(15992, str::stream() << - "disallowed field type " << typeName(fieldType) << - " in object expression (at '" << - fieldName << "')", false); + break; } + default: + uassert(15992, + str::stream() << "disallowed field type " << typeName(fieldType) + << " in object expression (at '" << fieldName << "')", + false); } } - - return pExpression; } + return pExpression; +} + namespace { - typedef stdx::function<intrusive_ptr<Expression>(BSONElement, const VariablesParseState&)> - ExpressionParser; - StringMap<ExpressionParser> expressionParserMap; +typedef stdx::function<intrusive_ptr<Expression>(BSONElement, const VariablesParseState&)> + ExpressionParser; +StringMap<ExpressionParser> expressionParserMap; } /** Registers an ExpressionParser so it can be called from parseExpression and friends. @@ -313,2440 +311,2355 @@ namespace { * As an example, if your expression looks like {"$foo": [1,2,3]} you would add this line: * REGISTER_EXPRESSION("$foo", ExpressionFoo::parse); */ -#define REGISTER_EXPRESSION(key, parserFunc) \ - MONGO_INITIALIZER(BOOST_PP_CAT(addToExpressionParserMap, __LINE__))(InitializerContext*) { \ - /* prevent duplicate expressions */ \ - StringMap<ExpressionParser>::const_iterator op = expressionParserMap.find(key); \ - massert(17064, str::stream() << "Duplicate expression (" << key << ") detected at " \ - << __FILE__ << ":" << __LINE__, \ - op == expressionParserMap.end()); \ - /* register expression */ \ - expressionParserMap[key] = (parserFunc); \ - return Status::OK(); \ - } - - intrusive_ptr<Expression> Expression::parseExpression( - BSONElement exprElement, - const VariablesParseState& vps) { - - /* look for the specified operator */ - const char* opName = exprElement.fieldName(); - StringMap<ExpressionParser>::const_iterator op = expressionParserMap.find(opName); - uassert(15999, str::stream() << "invalid operator '" << opName << "'", - op != expressionParserMap.end()); - - /* make the expression node */ - return op->second(exprElement, vps); - } - - Expression::ExpressionVector ExpressionNary::parseArguments( - BSONElement exprElement, - const VariablesParseState& vps) { - - ExpressionVector out; - if (exprElement.type() == Array) { - BSONForEach(elem, exprElement.Obj()) { - out.push_back(Expression::parseOperand(elem, vps)); - } - } - else { // assume it's an atomic operand - out.push_back(Expression::parseOperand(exprElement, vps)); - } +#define REGISTER_EXPRESSION(key, parserFunc) \ + MONGO_INITIALIZER(BOOST_PP_CAT(addToExpressionParserMap, __LINE__))(InitializerContext*) { \ + /* prevent duplicate expressions */ \ + StringMap<ExpressionParser>::const_iterator op = expressionParserMap.find(key); \ + massert(17064, \ + str::stream() << "Duplicate expression (" << key << ") detected at " << __FILE__ \ + << ":" << __LINE__, \ + op == expressionParserMap.end()); \ + /* register expression */ \ + expressionParserMap[key] = (parserFunc); \ + return Status::OK(); \ + } + +intrusive_ptr<Expression> Expression::parseExpression(BSONElement exprElement, + const VariablesParseState& vps) { + /* look for the specified operator */ + const char* opName = exprElement.fieldName(); + StringMap<ExpressionParser>::const_iterator op = expressionParserMap.find(opName); + uassert(15999, + str::stream() << "invalid operator '" << opName << "'", + op != expressionParserMap.end()); + + /* make the expression node */ + return op->second(exprElement, vps); +} - return out; +Expression::ExpressionVector ExpressionNary::parseArguments(BSONElement exprElement, + const VariablesParseState& vps) { + ExpressionVector out; + if (exprElement.type() == Array) { + BSONForEach(elem, exprElement.Obj()) { + out.push_back(Expression::parseOperand(elem, vps)); + } + } else { // assume it's an atomic operand + out.push_back(Expression::parseOperand(exprElement, vps)); } - intrusive_ptr<Expression> Expression::parseOperand( - BSONElement exprElement, - const VariablesParseState& vps) { + return out; +} - BSONType type = exprElement.type(); +intrusive_ptr<Expression> Expression::parseOperand(BSONElement exprElement, + const VariablesParseState& vps) { + BSONType type = exprElement.type(); - if (type == String && exprElement.valuestr()[0] == '$') { - /* if we got here, this is a field path expression */ - return ExpressionFieldPath::parse(exprElement.str(), vps); - } - else if (type == Object) { - ObjectCtx oCtx(ObjectCtx::DOCUMENT_OK); - return Expression::parseObject(exprElement.Obj(), &oCtx, vps); - } - else { - return ExpressionConstant::parse(exprElement, vps); - } + if (type == String && exprElement.valuestr()[0] == '$') { + /* if we got here, this is a field path expression */ + return ExpressionFieldPath::parse(exprElement.str(), vps); + } else if (type == Object) { + ObjectCtx oCtx(ObjectCtx::DOCUMENT_OK); + return Expression::parseObject(exprElement.Obj(), &oCtx, vps); + } else { + return ExpressionConstant::parse(exprElement, vps); } +} - /* ----------------------- ExpressionAbs ---------------------------- */ +/* ----------------------- ExpressionAbs ---------------------------- */ - Value ExpressionAbs::evaluateInternal(Variables* vars) const { - Value val = vpOperand[0]->evaluateInternal(vars); +Value ExpressionAbs::evaluateInternal(Variables* vars) const { + Value val = vpOperand[0]->evaluateInternal(vars); - if (val.numeric()) { - BSONType type = val.getType(); - if (type == NumberDouble) { - return Value(std::abs(val.getDouble())); - } - else { - long long num = val.getLong(); - uassert(28680, "can't take $abs of long long min", - num != std::numeric_limits<long long>::min()); - long long absVal = std::abs(num); - return type == NumberLong ? Value(absVal) : Value::createIntOrLong(absVal); - } - } - else if (val.nullish()) { - return Value(BSONNULL); - } - else { - uasserted(28681, str::stream() << "$abs only supports numeric types, not " - << typeName(val.getType())); + if (val.numeric()) { + BSONType type = val.getType(); + if (type == NumberDouble) { + return Value(std::abs(val.getDouble())); + } else { + long long num = val.getLong(); + uassert(28680, + "can't take $abs of long long min", + num != std::numeric_limits<long long>::min()); + long long absVal = std::abs(num); + return type == NumberLong ? Value(absVal) : Value::createIntOrLong(absVal); } + } else if (val.nullish()) { + return Value(BSONNULL); + } else { + uasserted(28681, + str::stream() << "$abs only supports numeric types, not " + << typeName(val.getType())); } +} - REGISTER_EXPRESSION("$abs", ExpressionAbs::parse); - const char* ExpressionAbs::getOpName() const { - return "$abs"; - } +REGISTER_EXPRESSION("$abs", ExpressionAbs::parse); +const char* ExpressionAbs::getOpName() const { + return "$abs"; +} - /* ------------------------- ExpressionAdd ----------------------------- */ +/* ------------------------- ExpressionAdd ----------------------------- */ - Value ExpressionAdd::evaluateInternal(Variables* vars) const { +Value ExpressionAdd::evaluateInternal(Variables* vars) const { + /* + We'll try to return the narrowest possible result value. To do that + without creating intermediate Values, do the arithmetic for double + and integral types in parallel, tracking the current narrowest + type. + */ + double doubleTotal = 0; + long long longTotal = 0; + BSONType totalType = NumberInt; + bool haveDate = false; - /* - We'll try to return the narrowest possible result value. To do that - without creating intermediate Values, do the arithmetic for double - and integral types in parallel, tracking the current narrowest - type. - */ - double doubleTotal = 0; - long long longTotal = 0; - BSONType totalType = NumberInt; - bool haveDate = false; - - const size_t n = vpOperand.size(); - for (size_t i = 0; i < n; ++i) { - Value val = vpOperand[i]->evaluateInternal(vars); - - if (val.numeric()) { - totalType = Value::getWidestNumeric(totalType, val.getType()); - - doubleTotal += val.coerceToDouble(); - longTotal += val.coerceToLong(); - } - else if (val.getType() == Date) { - uassert(16612, "only one Date allowed in an $add expression", - !haveDate); - haveDate = true; + const size_t n = vpOperand.size(); + for (size_t i = 0; i < n; ++i) { + Value val = vpOperand[i]->evaluateInternal(vars); - // We don't manipulate totalType here. + if (val.numeric()) { + totalType = Value::getWidestNumeric(totalType, val.getType()); - longTotal += val.getDate(); - doubleTotal += val.getDate(); - } - else if (val.nullish()) { - return Value(BSONNULL); - } - else { - uasserted(16554, str::stream() << "$add only supports numeric or date types, not " - << typeName(val.getType())); - } - } + doubleTotal += val.coerceToDouble(); + longTotal += val.coerceToLong(); + } else if (val.getType() == Date) { + uassert(16612, "only one Date allowed in an $add expression", !haveDate); + haveDate = true; - if (haveDate) { - if (totalType == NumberDouble) - longTotal = static_cast<long long>(doubleTotal); - return Value(Date_t::fromMillisSinceEpoch(longTotal)); - } - else if (totalType == NumberLong) { - return Value(longTotal); - } - else if (totalType == NumberDouble) { - return Value(doubleTotal); - } - else if (totalType == NumberInt) { - return Value::createIntOrLong(longTotal); - } - else { - massert(16417, "$add resulted in a non-numeric type", false); - } - } + // We don't manipulate totalType here. - REGISTER_EXPRESSION("$add", ExpressionAdd::parse); - const char* ExpressionAdd::getOpName() const { - return "$add"; + longTotal += val.getDate(); + doubleTotal += val.getDate(); + } else if (val.nullish()) { + return Value(BSONNULL); + } else { + uasserted(16554, + str::stream() << "$add only supports numeric or date types, not " + << typeName(val.getType())); + } + } + + if (haveDate) { + if (totalType == NumberDouble) + longTotal = static_cast<long long>(doubleTotal); + return Value(Date_t::fromMillisSinceEpoch(longTotal)); + } else if (totalType == NumberLong) { + return Value(longTotal); + } else if (totalType == NumberDouble) { + return Value(doubleTotal); + } else if (totalType == NumberInt) { + return Value::createIntOrLong(longTotal); + } else { + massert(16417, "$add resulted in a non-numeric type", false); } +} - /* ------------------------- ExpressionAllElementsTrue -------------------------- */ +REGISTER_EXPRESSION("$add", ExpressionAdd::parse); +const char* ExpressionAdd::getOpName() const { + return "$add"; +} - Value ExpressionAllElementsTrue::evaluateInternal(Variables* vars) const { - const Value arr = vpOperand[0]->evaluateInternal(vars); - uassert(17040, str::stream() << getOpName() << "'s argument must be an array, but is " - << typeName(arr.getType()), - arr.getType() == Array); - const vector<Value>& array = arr.getArray(); - for (vector<Value>::const_iterator it = array.begin(); it != array.end(); ++it) { - if (!it->coerceToBool()) { - return Value(false); - } - } - return Value(true); - } +/* ------------------------- ExpressionAllElementsTrue -------------------------- */ - REGISTER_EXPRESSION("$allElementsTrue", ExpressionAllElementsTrue::parse); - const char* ExpressionAllElementsTrue::getOpName() const { - return "$allElementsTrue"; +Value ExpressionAllElementsTrue::evaluateInternal(Variables* vars) const { + const Value arr = vpOperand[0]->evaluateInternal(vars); + uassert(17040, + str::stream() << getOpName() << "'s argument must be an array, but is " + << typeName(arr.getType()), + arr.getType() == Array); + const vector<Value>& array = arr.getArray(); + for (vector<Value>::const_iterator it = array.begin(); it != array.end(); ++it) { + if (!it->coerceToBool()) { + return Value(false); + } } + return Value(true); +} - /* ------------------------- ExpressionAnd ----------------------------- */ - - intrusive_ptr<Expression> ExpressionAnd::optimize() { - /* optimize the conjunction as much as possible */ - intrusive_ptr<Expression> pE(ExpressionNary::optimize()); +REGISTER_EXPRESSION("$allElementsTrue", ExpressionAllElementsTrue::parse); +const char* ExpressionAllElementsTrue::getOpName() const { + return "$allElementsTrue"; +} - /* if the result isn't a conjunction, we can't do anything */ - ExpressionAnd *pAnd = dynamic_cast<ExpressionAnd *>(pE.get()); - if (!pAnd) - return pE; +/* ------------------------- ExpressionAnd ----------------------------- */ - /* - Check the last argument on the result; if it's not constant (as - promised by ExpressionNary::optimize(),) then there's nothing - we can do. - */ - const size_t n = pAnd->vpOperand.size(); - // ExpressionNary::optimize() generates an ExpressionConstant for {$and:[]}. - verify(n > 0); - intrusive_ptr<Expression> pLast(pAnd->vpOperand[n - 1]); - const ExpressionConstant *pConst = - dynamic_cast<ExpressionConstant *>(pLast.get()); - if (!pConst) - return pE; +intrusive_ptr<Expression> ExpressionAnd::optimize() { + /* optimize the conjunction as much as possible */ + intrusive_ptr<Expression> pE(ExpressionNary::optimize()); - /* - Evaluate and coerce the last argument to a boolean. If it's false, - then we can replace this entire expression. - */ - bool last = pConst->getValue().coerceToBool(); - if (!last) { - intrusive_ptr<ExpressionConstant> pFinal( - ExpressionConstant::create(Value(false))); - return pFinal; - } + /* if the result isn't a conjunction, we can't do anything */ + ExpressionAnd* pAnd = dynamic_cast<ExpressionAnd*>(pE.get()); + if (!pAnd) + return pE; - /* - If we got here, the final operand was true, so we don't need it - anymore. If there was only one other operand, we don't need the - conjunction either. Note we still need to keep the promise that - the result will be a boolean. - */ - if (n == 2) { - intrusive_ptr<Expression> pFinal( - ExpressionCoerceToBool::create(pAnd->vpOperand[0])); - return pFinal; - } + /* + Check the last argument on the result; if it's not constant (as + promised by ExpressionNary::optimize(),) then there's nothing + we can do. + */ + const size_t n = pAnd->vpOperand.size(); + // ExpressionNary::optimize() generates an ExpressionConstant for {$and:[]}. + verify(n > 0); + intrusive_ptr<Expression> pLast(pAnd->vpOperand[n - 1]); + const ExpressionConstant* pConst = dynamic_cast<ExpressionConstant*>(pLast.get()); + if (!pConst) + return pE; - /* - Remove the final "true" value, and return the new expression. + /* + Evaluate and coerce the last argument to a boolean. If it's false, + then we can replace this entire expression. + */ + bool last = pConst->getValue().coerceToBool(); + if (!last) { + intrusive_ptr<ExpressionConstant> pFinal(ExpressionConstant::create(Value(false))); + return pFinal; + } - CW TODO: - Note that because of any implicit conversions, we may need to - apply an implicit boolean conversion. - */ - pAnd->vpOperand.resize(n - 1); - return pE; + /* + If we got here, the final operand was true, so we don't need it + anymore. If there was only one other operand, we don't need the + conjunction either. Note we still need to keep the promise that + the result will be a boolean. + */ + if (n == 2) { + intrusive_ptr<Expression> pFinal(ExpressionCoerceToBool::create(pAnd->vpOperand[0])); + return pFinal; } - Value ExpressionAnd::evaluateInternal(Variables* vars) const { - const size_t n = vpOperand.size(); - for(size_t i = 0; i < n; ++i) { - Value pValue(vpOperand[i]->evaluateInternal(vars)); - if (!pValue.coerceToBool()) - return Value(false); - } + /* + Remove the final "true" value, and return the new expression. - return Value(true); - } + CW TODO: + Note that because of any implicit conversions, we may need to + apply an implicit boolean conversion. + */ + pAnd->vpOperand.resize(n - 1); + return pE; +} - REGISTER_EXPRESSION("$and", ExpressionAnd::parse); - const char* ExpressionAnd::getOpName() const { - return "$and"; +Value ExpressionAnd::evaluateInternal(Variables* vars) const { + const size_t n = vpOperand.size(); + for (size_t i = 0; i < n; ++i) { + Value pValue(vpOperand[i]->evaluateInternal(vars)); + if (!pValue.coerceToBool()) + return Value(false); } - /* ------------------------- ExpressionAnyElementTrue -------------------------- */ + return Value(true); +} - Value ExpressionAnyElementTrue::evaluateInternal(Variables* vars) const { - const Value arr = vpOperand[0]->evaluateInternal(vars); - uassert(17041, str::stream() << getOpName() << "'s argument must be an array, but is " - << typeName(arr.getType()), - arr.getType() == Array); - const vector<Value>& array = arr.getArray(); - for (vector<Value>::const_iterator it = array.begin(); it != array.end(); ++it) { - if (it->coerceToBool()) { - return Value(true); - } +REGISTER_EXPRESSION("$and", ExpressionAnd::parse); +const char* ExpressionAnd::getOpName() const { + return "$and"; +} + +/* ------------------------- ExpressionAnyElementTrue -------------------------- */ + +Value ExpressionAnyElementTrue::evaluateInternal(Variables* vars) const { + const Value arr = vpOperand[0]->evaluateInternal(vars); + uassert(17041, + str::stream() << getOpName() << "'s argument must be an array, but is " + << typeName(arr.getType()), + arr.getType() == Array); + const vector<Value>& array = arr.getArray(); + for (vector<Value>::const_iterator it = array.begin(); it != array.end(); ++it) { + if (it->coerceToBool()) { + return Value(true); } - return Value(false); } + return Value(false); +} - REGISTER_EXPRESSION("$anyElementTrue", ExpressionAnyElementTrue::parse); - const char* ExpressionAnyElementTrue::getOpName() const { - return "$anyElementTrue"; - } +REGISTER_EXPRESSION("$anyElementTrue", ExpressionAnyElementTrue::parse); +const char* ExpressionAnyElementTrue::getOpName() const { + return "$anyElementTrue"; +} - /* ------------------------- ExpressionArrayElemAt -------------------------- */ +/* ------------------------- ExpressionArrayElemAt -------------------------- */ + +Value ExpressionArrayElemAt::evaluateInternal(Variables* vars) const { + const Value array = vpOperand[0]->evaluateInternal(vars); + const Value indexArg = vpOperand[1]->evaluateInternal(vars); + + if (array.nullish() || indexArg.nullish()) { + return Value(BSONNULL); + } + + uassert(28689, + str::stream() << getOpName() << "'s first argument must be an array, but is " + << typeName(array.getType()), + array.getType() == Array); + uassert(28690, + str::stream() << getOpName() << "'s second argument must be a numeric value," + << " but is " << typeName(indexArg.getType()), + indexArg.numeric()); + uassert(28691, + str::stream() << getOpName() << "'s second argument must be representable as" + << " a 32-bit integer: " << indexArg.coerceToDouble(), + indexArg.integral()); + + long long i = indexArg.coerceToLong(); + if (i < 0 && static_cast<size_t>(std::abs(i)) > array.getArrayLength()) { + // Positive indices that are too large are handled automatically by Value. + return Value(); + } else if (i < 0) { + // Index from the back of the array. + i = array.getArrayLength() + i; + } + const size_t index = static_cast<size_t>(i); + return array[index]; +} - Value ExpressionArrayElemAt::evaluateInternal(Variables* vars) const { - const Value array = vpOperand[0]->evaluateInternal(vars); - const Value indexArg = vpOperand[1]->evaluateInternal(vars); +REGISTER_EXPRESSION("$arrayElemAt", ExpressionArrayElemAt::parse); +const char* ExpressionArrayElemAt::getOpName() const { + return "$arrayElemAt"; +} - if (array.nullish() || indexArg.nullish()) { - return Value(BSONNULL); - } +/* -------------------- ExpressionCoerceToBool ------------------------- */ - uassert(28689, str::stream() << getOpName() << "'s first argument must be an array, but is " - << typeName(array.getType()), - array.getType() == Array); - uassert(28690, str::stream() << getOpName() << "'s second argument must be a numeric value," - << " but is " << typeName(indexArg.getType()), - indexArg.numeric()); - uassert(28691, str::stream() << getOpName() << "'s second argument must be representable as" - << " a 32-bit integer: " << indexArg.coerceToDouble(), - indexArg.integral()); - - long long i = indexArg.coerceToLong(); - if (i < 0 && static_cast<size_t>(std::abs(i)) > array.getArrayLength()) { - // Positive indices that are too large are handled automatically by Value. - return Value(); - } - else if (i < 0) { - // Index from the back of the array. - i = array.getArrayLength() + i; - } - const size_t index = static_cast<size_t>(i); - return array[index]; - } +intrusive_ptr<ExpressionCoerceToBool> ExpressionCoerceToBool::create( + const intrusive_ptr<Expression>& pExpression) { + intrusive_ptr<ExpressionCoerceToBool> pNew(new ExpressionCoerceToBool(pExpression)); + return pNew; +} - REGISTER_EXPRESSION("$arrayElemAt", ExpressionArrayElemAt::parse); - const char* ExpressionArrayElemAt::getOpName() const { - return "$arrayElemAt"; - } +ExpressionCoerceToBool::ExpressionCoerceToBool(const intrusive_ptr<Expression>& pTheExpression) + : Expression(), pExpression(pTheExpression) {} - /* -------------------- ExpressionCoerceToBool ------------------------- */ +intrusive_ptr<Expression> ExpressionCoerceToBool::optimize() { + /* optimize the operand */ + pExpression = pExpression->optimize(); - intrusive_ptr<ExpressionCoerceToBool> ExpressionCoerceToBool::create( - const intrusive_ptr<Expression> &pExpression) { - intrusive_ptr<ExpressionCoerceToBool> pNew( - new ExpressionCoerceToBool(pExpression)); - return pNew; - } + /* if the operand already produces a boolean, then we don't need this */ + /* LATER - Expression to support a "typeof" query? */ + Expression* pE = pExpression.get(); + if (dynamic_cast<ExpressionAnd*>(pE) || dynamic_cast<ExpressionOr*>(pE) || + dynamic_cast<ExpressionNot*>(pE) || dynamic_cast<ExpressionCoerceToBool*>(pE)) + return pExpression; - ExpressionCoerceToBool::ExpressionCoerceToBool( - const intrusive_ptr<Expression> &pTheExpression): - Expression(), - pExpression(pTheExpression) { - } + return intrusive_ptr<Expression>(this); +} - intrusive_ptr<Expression> ExpressionCoerceToBool::optimize() { - /* optimize the operand */ - pExpression = pExpression->optimize(); +void ExpressionCoerceToBool::addDependencies(DepsTracker* deps, vector<string>* path) const { + pExpression->addDependencies(deps); +} - /* if the operand already produces a boolean, then we don't need this */ - /* LATER - Expression to support a "typeof" query? */ - Expression *pE = pExpression.get(); - if (dynamic_cast<ExpressionAnd *>(pE) || - dynamic_cast<ExpressionOr *>(pE) || - dynamic_cast<ExpressionNot *>(pE) || - dynamic_cast<ExpressionCoerceToBool *>(pE)) - return pExpression; +Value ExpressionCoerceToBool::evaluateInternal(Variables* vars) const { + Value pResult(pExpression->evaluateInternal(vars)); + bool b = pResult.coerceToBool(); + if (b) + return Value(true); + return Value(false); +} - return intrusive_ptr<Expression>(this); - } +Value ExpressionCoerceToBool::serialize(bool explain) const { + // When not explaining, serialize to an $and expression. When parsed, the $and expression + // will be optimized back into a ExpressionCoerceToBool. + const char* name = explain ? "$coerceToBool" : "$and"; + return Value(DOC(name << DOC_ARRAY(pExpression->serialize(explain)))); +} - void ExpressionCoerceToBool::addDependencies(DepsTracker* deps, vector<string>* path) const { - pExpression->addDependencies(deps); - } +/* ----------------------- ExpressionCompare --------------------------- */ + +REGISTER_EXPRESSION("$cmp", + stdx::bind(ExpressionCompare::parse, + stdx::placeholders::_1, + stdx::placeholders::_2, + ExpressionCompare::CMP)); +REGISTER_EXPRESSION("$eq", + stdx::bind(ExpressionCompare::parse, + stdx::placeholders::_1, + stdx::placeholders::_2, + ExpressionCompare::EQ)); +REGISTER_EXPRESSION("$gt", + stdx::bind(ExpressionCompare::parse, + stdx::placeholders::_1, + stdx::placeholders::_2, + ExpressionCompare::GT)); +REGISTER_EXPRESSION("$gte", + stdx::bind(ExpressionCompare::parse, + stdx::placeholders::_1, + stdx::placeholders::_2, + ExpressionCompare::GTE)); +REGISTER_EXPRESSION("$lt", + stdx::bind(ExpressionCompare::parse, + stdx::placeholders::_1, + stdx::placeholders::_2, + ExpressionCompare::LT)); +REGISTER_EXPRESSION("$lte", + stdx::bind(ExpressionCompare::parse, + stdx::placeholders::_1, + stdx::placeholders::_2, + ExpressionCompare::LTE)); +REGISTER_EXPRESSION("$ne", + stdx::bind(ExpressionCompare::parse, + stdx::placeholders::_1, + stdx::placeholders::_2, + ExpressionCompare::NE)); +intrusive_ptr<Expression> ExpressionCompare::parse(BSONElement bsonExpr, + const VariablesParseState& vps, + CmpOp op) { + intrusive_ptr<ExpressionCompare> expr = new ExpressionCompare(op); + ExpressionVector args = parseArguments(bsonExpr, vps); + expr->validateArguments(args); + expr->vpOperand = args; + return expr; +} - Value ExpressionCoerceToBool::evaluateInternal(Variables* vars) const { - Value pResult(pExpression->evaluateInternal(vars)); - bool b = pResult.coerceToBool(); - if (b) - return Value(true); - return Value(false); - } - - Value ExpressionCoerceToBool::serialize(bool explain) const { - // When not explaining, serialize to an $and expression. When parsed, the $and expression - // will be optimized back into a ExpressionCoerceToBool. - const char* name = explain ? "$coerceToBool" : "$and"; - return Value(DOC(name << DOC_ARRAY(pExpression->serialize(explain)))); - } - - /* ----------------------- ExpressionCompare --------------------------- */ - - REGISTER_EXPRESSION("$cmp", - stdx::bind(ExpressionCompare::parse, stdx::placeholders::_1, stdx::placeholders::_2, ExpressionCompare::CMP)); - REGISTER_EXPRESSION("$eq", - stdx::bind(ExpressionCompare::parse, stdx::placeholders::_1, stdx::placeholders::_2, ExpressionCompare::EQ)); - REGISTER_EXPRESSION("$gt", - stdx::bind(ExpressionCompare::parse, stdx::placeholders::_1, stdx::placeholders::_2, ExpressionCompare::GT)); - REGISTER_EXPRESSION("$gte", - stdx::bind(ExpressionCompare::parse, stdx::placeholders::_1, stdx::placeholders::_2, ExpressionCompare::GTE)); - REGISTER_EXPRESSION("$lt", - stdx::bind(ExpressionCompare::parse, stdx::placeholders::_1, stdx::placeholders::_2, ExpressionCompare::LT)); - REGISTER_EXPRESSION("$lte", - stdx::bind(ExpressionCompare::parse, stdx::placeholders::_1, stdx::placeholders::_2, ExpressionCompare::LTE)); - REGISTER_EXPRESSION("$ne", - stdx::bind(ExpressionCompare::parse, stdx::placeholders::_1, stdx::placeholders::_2, ExpressionCompare::NE)); - intrusive_ptr<Expression> ExpressionCompare::parse( - BSONElement bsonExpr, - const VariablesParseState& vps, - CmpOp op) { - - intrusive_ptr<ExpressionCompare> expr = new ExpressionCompare(op); - ExpressionVector args = parseArguments(bsonExpr, vps); - expr->validateArguments(args); - expr->vpOperand = args; - return expr; - } - - ExpressionCompare::ExpressionCompare(CmpOp theCmpOp) - : cmpOp(theCmpOp) - {} +ExpressionCompare::ExpressionCompare(CmpOp theCmpOp) : cmpOp(theCmpOp) {} namespace { - // Lookup table for truth value returns - struct CmpLookup { - const bool truthValue[3]; // truth value for -1, 0, 1 - const ExpressionCompare::CmpOp reverse; // reverse(b,a) returns the same as op(a,b) - const char name[5]; // string name with trailing '\0' - }; - static const CmpLookup cmpLookup[7] = { - /* -1 0 1 reverse name */ - /* EQ */ { { false, true, false }, ExpressionCompare::EQ, "$eq" }, - /* NE */ { { true, false, true }, ExpressionCompare::NE, "$ne" }, - /* GT */ { { false, false, true }, ExpressionCompare::LT, "$gt" }, - /* GTE */ { { false, true, true }, ExpressionCompare::LTE, "$gte" }, - /* LT */ { { true, false, false }, ExpressionCompare::GT, "$lt" }, - /* LTE */ { { true, true, false }, ExpressionCompare::GTE, "$lte" }, - - // CMP is special. Only name is used. - /* CMP */ { { false, false, false }, ExpressionCompare::CMP, "$cmp" }, - }; -} - - Value ExpressionCompare::evaluateInternal(Variables* vars) const { - Value pLeft(vpOperand[0]->evaluateInternal(vars)); - Value pRight(vpOperand[1]->evaluateInternal(vars)); - - int cmp = Value::compare(pLeft, pRight); - - // Make cmp one of 1, 0, or -1. - if (cmp == 0) { - // leave as 0 - } else if (cmp < 0) { - cmp = -1; - } else if (cmp > 0) { - cmp = 1; - } +// Lookup table for truth value returns +struct CmpLookup { + const bool truthValue[3]; // truth value for -1, 0, 1 + const ExpressionCompare::CmpOp reverse; // reverse(b,a) returns the same as op(a,b) + const char name[5]; // string name with trailing '\0' +}; +static const CmpLookup cmpLookup[7] = { + /* -1 0 1 reverse name */ + /* EQ */ {{false, true, false}, ExpressionCompare::EQ, "$eq"}, + /* NE */ {{true, false, true}, ExpressionCompare::NE, "$ne"}, + /* GT */ {{false, false, true}, ExpressionCompare::LT, "$gt"}, + /* GTE */ {{false, true, true}, ExpressionCompare::LTE, "$gte"}, + /* LT */ {{true, false, false}, ExpressionCompare::GT, "$lt"}, + /* LTE */ {{true, true, false}, ExpressionCompare::GTE, "$lte"}, + + // CMP is special. Only name is used. + /* CMP */ {{false, false, false}, ExpressionCompare::CMP, "$cmp"}, +}; +} - if (cmpOp == CMP) - return Value(cmp); +Value ExpressionCompare::evaluateInternal(Variables* vars) const { + Value pLeft(vpOperand[0]->evaluateInternal(vars)); + Value pRight(vpOperand[1]->evaluateInternal(vars)); - bool returnValue = cmpLookup[cmpOp].truthValue[cmp + 1]; - return Value(returnValue); - } + int cmp = Value::compare(pLeft, pRight); - const char* ExpressionCompare::getOpName() const { - return cmpLookup[cmpOp].name; + // Make cmp one of 1, 0, or -1. + if (cmp == 0) { + // leave as 0 + } else if (cmp < 0) { + cmp = -1; + } else if (cmp > 0) { + cmp = 1; } - /* ------------------------- ExpressionConcat ----------------------------- */ + if (cmpOp == CMP) + return Value(cmp); - Value ExpressionConcat::evaluateInternal(Variables* vars) const { - const size_t n = vpOperand.size(); + bool returnValue = cmpLookup[cmpOp].truthValue[cmp + 1]; + return Value(returnValue); +} - StringBuilder result; - for (size_t i = 0; i < n; ++i) { - Value val = vpOperand[i]->evaluateInternal(vars); - if (val.nullish()) - return Value(BSONNULL); +const char* ExpressionCompare::getOpName() const { + return cmpLookup[cmpOp].name; +} - uassert(16702, str::stream() << "$concat only supports strings, not " - << typeName(val.getType()), - val.getType() == String); +/* ------------------------- ExpressionConcat ----------------------------- */ - result << val.coerceToString(); - } +Value ExpressionConcat::evaluateInternal(Variables* vars) const { + const size_t n = vpOperand.size(); - return Value(result.str()); - } + StringBuilder result; + for (size_t i = 0; i < n; ++i) { + Value val = vpOperand[i]->evaluateInternal(vars); + if (val.nullish()) + return Value(BSONNULL); + + uassert(16702, + str::stream() << "$concat only supports strings, not " << typeName(val.getType()), + val.getType() == String); - REGISTER_EXPRESSION("$concat", ExpressionConcat::parse); - const char* ExpressionConcat::getOpName() const { - return "$concat"; + result << val.coerceToString(); } - /* ------------------------- ExpressionConcatArrays ----------------------------- */ + return Value(result.str()); +} - Value ExpressionConcatArrays::evaluateInternal(Variables* vars) const { - const size_t n = vpOperand.size(); - vector<Value> values; +REGISTER_EXPRESSION("$concat", ExpressionConcat::parse); +const char* ExpressionConcat::getOpName() const { + return "$concat"; +} - for (size_t i = 0; i < n; ++i) { - Value val = vpOperand[i]->evaluateInternal(vars); - if (val.nullish()) { - return Value(BSONNULL); - } +/* ------------------------- ExpressionConcatArrays ----------------------------- */ - uassert(28664, str::stream() << "$concatArrays only supports arrays, not " - << typeName(val.getType()), - val.getType() == Array); +Value ExpressionConcatArrays::evaluateInternal(Variables* vars) const { + const size_t n = vpOperand.size(); + vector<Value> values; - const auto& subValues = val.getArray(); - values.insert(values.end(), subValues.begin(), subValues.end()); + for (size_t i = 0; i < n; ++i) { + Value val = vpOperand[i]->evaluateInternal(vars); + if (val.nullish()) { + return Value(BSONNULL); } - return Value(std::move(values)); - } - - REGISTER_EXPRESSION("$concatArrays", ExpressionConcatArrays::parse); - const char* ExpressionConcatArrays::getOpName() const { - return "$concatArrays"; - } - /* ----------------------- ExpressionCond ------------------------------ */ + uassert(28664, + str::stream() << "$concatArrays only supports arrays, not " + << typeName(val.getType()), + val.getType() == Array); - Value ExpressionCond::evaluateInternal(Variables* vars) const { - Value pCond(vpOperand[0]->evaluateInternal(vars)); - int idx = pCond.coerceToBool() ? 1 : 2; - return vpOperand[idx]->evaluateInternal(vars); + const auto& subValues = val.getArray(); + values.insert(values.end(), subValues.begin(), subValues.end()); } + return Value(std::move(values)); +} - intrusive_ptr<Expression> ExpressionCond::parse( - BSONElement expr, - const VariablesParseState& vps) { +REGISTER_EXPRESSION("$concatArrays", ExpressionConcatArrays::parse); +const char* ExpressionConcatArrays::getOpName() const { + return "$concatArrays"; +} - if (expr.type() != Object) { - return Base::parse(expr, vps); - } - verify(str::equals(expr.fieldName(), "$cond")); - - intrusive_ptr<ExpressionCond> ret = new ExpressionCond(); - ret->vpOperand.resize(3); - - const BSONObj args = expr.embeddedObject(); - BSONForEach(arg, args) { - if (str::equals(arg.fieldName(), "if")) { - ret->vpOperand[0] = parseOperand(arg, vps); - } else if (str::equals(arg.fieldName(), "then")) { - ret->vpOperand[1] = parseOperand(arg, vps); - } else if (str::equals(arg.fieldName(), "else")) { - ret->vpOperand[2] = parseOperand(arg, vps); - } else { - uasserted(17083, str::stream() - << "Unrecognized parameter to $cond: " << arg.fieldName()); - } - } +/* ----------------------- ExpressionCond ------------------------------ */ - uassert(17080, "Missing 'if' parameter to $cond", - ret->vpOperand[0]); - uassert(17081, "Missing 'then' parameter to $cond", - ret->vpOperand[1]); - uassert(17082, "Missing 'else' parameter to $cond", - ret->vpOperand[2]); +Value ExpressionCond::evaluateInternal(Variables* vars) const { + Value pCond(vpOperand[0]->evaluateInternal(vars)); + int idx = pCond.coerceToBool() ? 1 : 2; + return vpOperand[idx]->evaluateInternal(vars); +} - return ret; +intrusive_ptr<Expression> ExpressionCond::parse(BSONElement expr, const VariablesParseState& vps) { + if (expr.type() != Object) { + return Base::parse(expr, vps); } + verify(str::equals(expr.fieldName(), "$cond")); + + intrusive_ptr<ExpressionCond> ret = new ExpressionCond(); + ret->vpOperand.resize(3); - REGISTER_EXPRESSION("$cond", ExpressionCond::parse); - const char* ExpressionCond::getOpName() const { - return "$cond"; + const BSONObj args = expr.embeddedObject(); + BSONForEach(arg, args) { + if (str::equals(arg.fieldName(), "if")) { + ret->vpOperand[0] = parseOperand(arg, vps); + } else if (str::equals(arg.fieldName(), "then")) { + ret->vpOperand[1] = parseOperand(arg, vps); + } else if (str::equals(arg.fieldName(), "else")) { + ret->vpOperand[2] = parseOperand(arg, vps); + } else { + uasserted(17083, + str::stream() << "Unrecognized parameter to $cond: " << arg.fieldName()); + } } - /* ---------------------- ExpressionConstant --------------------------- */ + uassert(17080, "Missing 'if' parameter to $cond", ret->vpOperand[0]); + uassert(17081, "Missing 'then' parameter to $cond", ret->vpOperand[1]); + uassert(17082, "Missing 'else' parameter to $cond", ret->vpOperand[2]); - intrusive_ptr<Expression> ExpressionConstant::parse( - BSONElement exprElement, - const VariablesParseState& vps) { - return new ExpressionConstant(Value(exprElement)); - } + return ret; +} +REGISTER_EXPRESSION("$cond", ExpressionCond::parse); +const char* ExpressionCond::getOpName() const { + return "$cond"; +} - intrusive_ptr<ExpressionConstant> ExpressionConstant::create(const Value& pValue) { - intrusive_ptr<ExpressionConstant> pEC(new ExpressionConstant(pValue)); - return pEC; - } +/* ---------------------- ExpressionConstant --------------------------- */ - ExpressionConstant::ExpressionConstant(const Value& pTheValue): pValue(pTheValue) {} +intrusive_ptr<Expression> ExpressionConstant::parse(BSONElement exprElement, + const VariablesParseState& vps) { + return new ExpressionConstant(Value(exprElement)); +} - intrusive_ptr<Expression> ExpressionConstant::optimize() { - /* nothing to do */ - return intrusive_ptr<Expression>(this); - } +intrusive_ptr<ExpressionConstant> ExpressionConstant::create(const Value& pValue) { + intrusive_ptr<ExpressionConstant> pEC(new ExpressionConstant(pValue)); + return pEC; +} - void ExpressionConstant::addDependencies(DepsTracker* deps, vector<string>* path) const { - /* nothing to do */ - } +ExpressionConstant::ExpressionConstant(const Value& pTheValue) : pValue(pTheValue) {} - Value ExpressionConstant::evaluateInternal(Variables* vars) const { - return pValue; - } - Value ExpressionConstant::serialize(bool explain) const { - return serializeConstant(pValue); - } +intrusive_ptr<Expression> ExpressionConstant::optimize() { + /* nothing to do */ + return intrusive_ptr<Expression>(this); +} - REGISTER_EXPRESSION("$const", ExpressionConstant::parse); - REGISTER_EXPRESSION("$literal", ExpressionConstant::parse); // alias - const char* ExpressionConstant::getOpName() const { - return "$const"; - } +void ExpressionConstant::addDependencies(DepsTracker* deps, vector<string>* path) const { + /* nothing to do */ +} + +Value ExpressionConstant::evaluateInternal(Variables* vars) const { + return pValue; +} + +Value ExpressionConstant::serialize(bool explain) const { + return serializeConstant(pValue); +} - /* ---------------------- ExpressionDateToString ----------------------- */ +REGISTER_EXPRESSION("$const", ExpressionConstant::parse); +REGISTER_EXPRESSION("$literal", ExpressionConstant::parse); // alias +const char* ExpressionConstant::getOpName() const { + return "$const"; +} - REGISTER_EXPRESSION("$dateToString", ExpressionDateToString::parse); - intrusive_ptr<Expression> ExpressionDateToString::parse( - BSONElement expr, - const VariablesParseState& vps) { +/* ---------------------- ExpressionDateToString ----------------------- */ - verify(str::equals(expr.fieldName(), "$dateToString")); +REGISTER_EXPRESSION("$dateToString", ExpressionDateToString::parse); +intrusive_ptr<Expression> ExpressionDateToString::parse(BSONElement expr, + const VariablesParseState& vps) { + verify(str::equals(expr.fieldName(), "$dateToString")); - uassert(18629, "$dateToString only supports an object as its argument", - expr.type() == Object); + uassert(18629, "$dateToString only supports an object as its argument", expr.type() == Object); - BSONElement formatElem; - BSONElement dateElem; - const BSONObj args = expr.embeddedObject(); - BSONForEach(arg, args) { - if (str::equals(arg.fieldName(), "format")) { - formatElem = arg; - } else if (str::equals(arg.fieldName(), "date")) { - dateElem = arg; - } else { - uasserted(18534, str::stream() << "Unrecognized argument to $dateToString: " - << arg.fieldName()); - } + BSONElement formatElem; + BSONElement dateElem; + const BSONObj args = expr.embeddedObject(); + BSONForEach(arg, args) { + if (str::equals(arg.fieldName(), "format")) { + formatElem = arg; + } else if (str::equals(arg.fieldName(), "date")) { + dateElem = arg; + } else { + uasserted(18534, + str::stream() + << "Unrecognized argument to $dateToString: " << arg.fieldName()); } + } - uassert(18627, "Missing 'format' parameter to $dateToString", - !formatElem.eoo()); - uassert(18628, "Missing 'date' parameter to $dateToString", - !dateElem.eoo()); - - uassert(18533, "The 'format' parameter to $dateToString must be a string literal", - formatElem.type() == String); + uassert(18627, "Missing 'format' parameter to $dateToString", !formatElem.eoo()); + uassert(18628, "Missing 'date' parameter to $dateToString", !dateElem.eoo()); - const string format = formatElem.str(); + uassert(18533, + "The 'format' parameter to $dateToString must be a string literal", + formatElem.type() == String); - validateFormat(format); + const string format = formatElem.str(); - return new ExpressionDateToString(format, parseOperand(dateElem, vps)); - } + validateFormat(format); - ExpressionDateToString::ExpressionDateToString(const string& format, - intrusive_ptr<Expression> date) - : _format(format) - , _date(date) - {} + return new ExpressionDateToString(format, parseOperand(dateElem, vps)); +} - intrusive_ptr<Expression> ExpressionDateToString::optimize() { - _date = _date->optimize(); - return this; - } +ExpressionDateToString::ExpressionDateToString(const string& format, intrusive_ptr<Expression> date) + : _format(format), _date(date) {} - Value ExpressionDateToString::serialize(bool explain) const { - return Value(DOC("$dateToString" << DOC("format" << _format - << "date" << _date->serialize(explain) - ))); - } +intrusive_ptr<Expression> ExpressionDateToString::optimize() { + _date = _date->optimize(); + return this; +} - Value ExpressionDateToString::evaluateInternal(Variables* vars) const { - const Value date = _date->evaluateInternal(vars); +Value ExpressionDateToString::serialize(bool explain) const { + return Value( + DOC("$dateToString" << DOC("format" << _format << "date" << _date->serialize(explain)))); +} - if (date.nullish()) { - return Value(BSONNULL); - } +Value ExpressionDateToString::evaluateInternal(Variables* vars) const { + const Value date = _date->evaluateInternal(vars); - return Value(formatDate(_format, date.coerceToTm(), date.coerceToDate())); + if (date.nullish()) { + return Value(BSONNULL); } - // verifies that any '%' is followed by a valid format character, and that - // the format string ends with an even number of '%' symbols - void ExpressionDateToString::validateFormat(const std::string& format) { - for (string::const_iterator it = format.begin(); it != format.end(); ++it) { - if (*it != '%') { - continue; - } + return Value(formatDate(_format, date.coerceToTm(), date.coerceToDate())); +} + +// verifies that any '%' is followed by a valid format character, and that +// the format string ends with an even number of '%' symbols +void ExpressionDateToString::validateFormat(const std::string& format) { + for (string::const_iterator it = format.begin(); it != format.end(); ++it) { + if (*it != '%') { + continue; + } - ++it; // next character must be format modifier - uassert(18535, "Unmatched '%' at end of $dateToString format string", - it != format.end()); + ++it; // next character must be format modifier + uassert(18535, "Unmatched '%' at end of $dateToString format string", it != format.end()); - switch (*it) { + switch (*it) { // all of these fall through intentionally - case '%': case 'Y': case 'm': - case 'd': case 'H': case 'M': - case 'S': case 'L': case 'j': - case 'w': case 'U': + case '%': + case 'Y': + case 'm': + case 'd': + case 'H': + case 'M': + case 'S': + case 'L': + case 'j': + case 'w': + case 'U': break; default: - uasserted(18536, str::stream() << "Invalid format character '%" - << *it - << "' in $dateToString format string"); - } + uasserted(18536, + str::stream() << "Invalid format character '%" << *it + << "' in $dateToString format string"); } } +} - string ExpressionDateToString::formatDate(const string& format, - const tm& tm, - const long long date) { - StringBuilder formatted; - for (string::const_iterator it = format.begin(); it != format.end(); ++it) { - if (*it != '%') { - formatted << *it; - continue; - } +string ExpressionDateToString::formatDate(const string& format, + const tm& tm, + const long long date) { + StringBuilder formatted; + for (string::const_iterator it = format.begin(); it != format.end(); ++it) { + if (*it != '%') { + formatted << *it; + continue; + } - ++it; // next character is format modifier - invariant(it != format.end()); // checked in validateFormat + ++it; // next character is format modifier + invariant(it != format.end()); // checked in validateFormat - switch (*it) { - case '%': // Escaped literal % + switch (*it) { + case '%': // Escaped literal % formatted << '%'; break; - case 'Y': // Year - { - const int year = ExpressionYear::extract(tm); - uassert(18537, str::stream() << "$dateToString is only defined on year 0-9999," - << " tried to use year " - << year, - (year >= 0) && (year <= 9999)); - insertPadded(formatted, year, 4); - break; - } - case 'm': // Month + case 'Y': // Year + { + const int year = ExpressionYear::extract(tm); + uassert(18537, + str::stream() << "$dateToString is only defined on year 0-9999," + << " tried to use year " << year, + (year >= 0) && (year <= 9999)); + insertPadded(formatted, year, 4); + break; + } + case 'm': // Month insertPadded(formatted, ExpressionMonth::extract(tm), 2); break; - case 'd': // Day of month + case 'd': // Day of month insertPadded(formatted, ExpressionDayOfMonth::extract(tm), 2); break; - case 'H': // Hour + case 'H': // Hour insertPadded(formatted, ExpressionHour::extract(tm), 2); break; - case 'M': // Minute + case 'M': // Minute insertPadded(formatted, ExpressionMinute::extract(tm), 2); break; - case 'S': // Second + case 'S': // Second insertPadded(formatted, ExpressionSecond::extract(tm), 2); break; - case 'L': // Millisecond + case 'L': // Millisecond insertPadded(formatted, ExpressionMillisecond::extract(date), 3); break; - case 'j': // Day of year + case 'j': // Day of year insertPadded(formatted, ExpressionDayOfYear::extract(tm), 3); break; - case 'w': // Day of week + case 'w': // Day of week insertPadded(formatted, ExpressionDayOfWeek::extract(tm), 1); break; - case 'U': // Week + case 'U': // Week insertPadded(formatted, ExpressionWeek::extract(tm), 2); break; default: // Should never happen as format is pre-validated invariant(false); - } } - return formatted.str(); - } - - // Only works with 1 <= spaces <= 4 and 0 <= number <= 9999. - // If spaces is less than the digit count of number we simply insert the number - // without padding. - void ExpressionDateToString::insertPadded(StringBuilder& sb, int number, int width) { - invariant(width >= 1); - invariant(width <= 4); - invariant(number >= 0); - invariant(number <= 9999); - - int digits = 1; - - if (number >= 1000) { - digits = 4; - } else if (number >= 100) { - digits = 3; - } else if (number >= 10) { - digits = 2; - } - - if (width > digits) { - sb.write("0000", width - digits); - } - sb << number; } + return formatted.str(); +} - void ExpressionDateToString::addDependencies(DepsTracker* deps, vector<string> *path) const { - _date->addDependencies(deps); - } +// Only works with 1 <= spaces <= 4 and 0 <= number <= 9999. +// If spaces is less than the digit count of number we simply insert the number +// without padding. +void ExpressionDateToString::insertPadded(StringBuilder& sb, int number, int width) { + invariant(width >= 1); + invariant(width <= 4); + invariant(number >= 0); + invariant(number <= 9999); - /* ---------------------- ExpressionDayOfMonth ------------------------- */ + int digits = 1; - Value ExpressionDayOfMonth::evaluateInternal(Variables* vars) const { - Value pDate(vpOperand[0]->evaluateInternal(vars)); - return Value(extract(pDate.coerceToTm())); + if (number >= 1000) { + digits = 4; + } else if (number >= 100) { + digits = 3; + } else if (number >= 10) { + digits = 2; } - REGISTER_EXPRESSION("$dayOfMonth", ExpressionDayOfMonth::parse); - const char* ExpressionDayOfMonth::getOpName() const { - return "$dayOfMonth"; + if (width > digits) { + sb.write("0000", width - digits); } + sb << number; +} - /* ------------------------- ExpressionDayOfWeek ----------------------------- */ +void ExpressionDateToString::addDependencies(DepsTracker* deps, vector<string>* path) const { + _date->addDependencies(deps); +} - Value ExpressionDayOfWeek::evaluateInternal(Variables* vars) const { - Value pDate(vpOperand[0]->evaluateInternal(vars)); - return Value(extract(pDate.coerceToTm())); - } +/* ---------------------- ExpressionDayOfMonth ------------------------- */ - REGISTER_EXPRESSION("$dayOfWeek", ExpressionDayOfWeek::parse); - const char* ExpressionDayOfWeek::getOpName() const { - return "$dayOfWeek"; - } +Value ExpressionDayOfMonth::evaluateInternal(Variables* vars) const { + Value pDate(vpOperand[0]->evaluateInternal(vars)); + return Value(extract(pDate.coerceToTm())); +} - /* ------------------------- ExpressionDayOfYear ----------------------------- */ +REGISTER_EXPRESSION("$dayOfMonth", ExpressionDayOfMonth::parse); +const char* ExpressionDayOfMonth::getOpName() const { + return "$dayOfMonth"; +} - Value ExpressionDayOfYear::evaluateInternal(Variables* vars) const { - Value pDate(vpOperand[0]->evaluateInternal(vars)); - return Value(extract(pDate.coerceToTm())); - } +/* ------------------------- ExpressionDayOfWeek ----------------------------- */ - REGISTER_EXPRESSION("$dayOfYear", ExpressionDayOfYear::parse); - const char* ExpressionDayOfYear::getOpName() const { - return "$dayOfYear"; - } +Value ExpressionDayOfWeek::evaluateInternal(Variables* vars) const { + Value pDate(vpOperand[0]->evaluateInternal(vars)); + return Value(extract(pDate.coerceToTm())); +} - /* ----------------------- ExpressionDivide ---------------------------- */ +REGISTER_EXPRESSION("$dayOfWeek", ExpressionDayOfWeek::parse); +const char* ExpressionDayOfWeek::getOpName() const { + return "$dayOfWeek"; +} - Value ExpressionDivide::evaluateInternal(Variables* vars) const { - Value lhs = vpOperand[0]->evaluateInternal(vars); - Value rhs = vpOperand[1]->evaluateInternal(vars); +/* ------------------------- ExpressionDayOfYear ----------------------------- */ - if (lhs.numeric() && rhs.numeric()) { - double numer = lhs.coerceToDouble(); - double denom = rhs.coerceToDouble(); - uassert(16608, "can't $divide by zero", - denom != 0); +Value ExpressionDayOfYear::evaluateInternal(Variables* vars) const { + Value pDate(vpOperand[0]->evaluateInternal(vars)); + return Value(extract(pDate.coerceToTm())); +} - return Value(numer / denom); - } - else if (lhs.nullish() || rhs.nullish()) { - return Value(BSONNULL); - } - else { - uasserted(16609, str::stream() << "$divide only supports numeric types, not " - << typeName(lhs.getType()) - << " and " - << typeName(rhs.getType())); - } - } +REGISTER_EXPRESSION("$dayOfYear", ExpressionDayOfYear::parse); +const char* ExpressionDayOfYear::getOpName() const { + return "$dayOfYear"; +} - REGISTER_EXPRESSION("$divide", ExpressionDivide::parse); - const char* ExpressionDivide::getOpName() const { - return "$divide"; - } +/* ----------------------- ExpressionDivide ---------------------------- */ - /* ---------------------- ExpressionObject --------------------------- */ +Value ExpressionDivide::evaluateInternal(Variables* vars) const { + Value lhs = vpOperand[0]->evaluateInternal(vars); + Value rhs = vpOperand[1]->evaluateInternal(vars); - intrusive_ptr<ExpressionObject> ExpressionObject::create() { - return new ExpressionObject(false); - } + if (lhs.numeric() && rhs.numeric()) { + double numer = lhs.coerceToDouble(); + double denom = rhs.coerceToDouble(); + uassert(16608, "can't $divide by zero", denom != 0); - intrusive_ptr<ExpressionObject> ExpressionObject::createRoot() { - return new ExpressionObject(true); + return Value(numer / denom); + } else if (lhs.nullish() || rhs.nullish()) { + return Value(BSONNULL); + } else { + uasserted(16609, + str::stream() << "$divide only supports numeric types, not " + << typeName(lhs.getType()) << " and " << typeName(rhs.getType())); } +} - ExpressionObject::ExpressionObject(bool atRoot) - : _excludeId(false) - , _atRoot(atRoot) - {} +REGISTER_EXPRESSION("$divide", ExpressionDivide::parse); +const char* ExpressionDivide::getOpName() const { + return "$divide"; +} - intrusive_ptr<Expression> ExpressionObject::optimize() { - for (FieldMap::iterator it(_expressions.begin()); it!=_expressions.end(); ++it) { - if (it->second) - it->second = it->second->optimize(); - } +/* ---------------------- ExpressionObject --------------------------- */ + +intrusive_ptr<ExpressionObject> ExpressionObject::create() { + return new ExpressionObject(false); +} - return intrusive_ptr<Expression>(this); +intrusive_ptr<ExpressionObject> ExpressionObject::createRoot() { + return new ExpressionObject(true); +} + +ExpressionObject::ExpressionObject(bool atRoot) : _excludeId(false), _atRoot(atRoot) {} + +intrusive_ptr<Expression> ExpressionObject::optimize() { + for (FieldMap::iterator it(_expressions.begin()); it != _expressions.end(); ++it) { + if (it->second) + it->second = it->second->optimize(); } - bool ExpressionObject::isSimple() { - for (FieldMap::iterator it(_expressions.begin()); it!=_expressions.end(); ++it) { - if (it->second && !it->second->isSimple()) - return false; - } - return true; + return intrusive_ptr<Expression>(this); +} + +bool ExpressionObject::isSimple() { + for (FieldMap::iterator it(_expressions.begin()); it != _expressions.end(); ++it) { + if (it->second && !it->second->isSimple()) + return false; } + return true; +} - void ExpressionObject::addDependencies(DepsTracker* deps, vector<string>* path) const { - string pathStr; - if (path) { - if (path->empty()) { - // we are in the top level of a projection so _id is implicit - if (!_excludeId) - deps->fields.insert("_id"); - } - else { - FieldPath f (*path); - pathStr = f.getPath(false); - pathStr += '.'; - } - } - else { - verify(!_excludeId); +void ExpressionObject::addDependencies(DepsTracker* deps, vector<string>* path) const { + string pathStr; + if (path) { + if (path->empty()) { + // we are in the top level of a projection so _id is implicit + if (!_excludeId) + deps->fields.insert("_id"); + } else { + FieldPath f(*path); + pathStr = f.getPath(false); + pathStr += '.'; } + } else { + verify(!_excludeId); + } - for (FieldMap::const_iterator it(_expressions.begin()); it!=_expressions.end(); ++it) { - if (it->second) { - if (path) path->push_back(it->first); - it->second->addDependencies(deps, path); - if (path) path->pop_back(); - } - else { // inclusion - uassert(16407, "inclusion not supported in objects nested in $expressions", - path); + for (FieldMap::const_iterator it(_expressions.begin()); it != _expressions.end(); ++it) { + if (it->second) { + if (path) + path->push_back(it->first); + it->second->addDependencies(deps, path); + if (path) + path->pop_back(); + } else { // inclusion + uassert(16407, "inclusion not supported in objects nested in $expressions", path); - deps->fields.insert(pathStr + it->first); - } + deps->fields.insert(pathStr + it->first); } } +} - void ExpressionObject::addToDocument( - MutableDocument& out, - const Document& currentDoc, - Variables* vars - ) const - { - FieldMap::const_iterator end = _expressions.end(); +void ExpressionObject::addToDocument(MutableDocument& out, + const Document& currentDoc, + Variables* vars) const { + FieldMap::const_iterator end = _expressions.end(); - // This is used to mark fields we've done so that we can add the ones we haven't - set<string> doneFields; + // This is used to mark fields we've done so that we can add the ones we haven't + set<string> doneFields; - FieldIterator fields(currentDoc); - while(fields.more()) { - Document::FieldPair field (fields.next()); + FieldIterator fields(currentDoc); + while (fields.more()) { + Document::FieldPair field(fields.next()); - // TODO don't make a new string here - const string fieldName = field.first.toString(); - FieldMap::const_iterator exprIter = _expressions.find(fieldName); + // TODO don't make a new string here + const string fieldName = field.first.toString(); + FieldMap::const_iterator exprIter = _expressions.find(fieldName); - // This field is not supposed to be in the output (unless it is _id) - if (exprIter == end) { - if (!_excludeId && _atRoot && field.first == "_id") { - // _id from the root doc is always included (until exclusion is supported) - // not updating doneFields since "_id" isn't in _expressions - out.addField(field.first, field.second); - } - continue; + // This field is not supposed to be in the output (unless it is _id) + if (exprIter == end) { + if (!_excludeId && _atRoot && field.first == "_id") { + // _id from the root doc is always included (until exclusion is supported) + // not updating doneFields since "_id" isn't in _expressions + out.addField(field.first, field.second); } + continue; + } - // make sure we don't add this field again - doneFields.insert(exprIter->first); + // make sure we don't add this field again + doneFields.insert(exprIter->first); - Expression* expr = exprIter->second.get(); + Expression* expr = exprIter->second.get(); - if (!expr) { - // This means pull the matching field from the input document - out.addField(field.first, field.second); - continue; - } + if (!expr) { + // This means pull the matching field from the input document + out.addField(field.first, field.second); + continue; + } - ExpressionObject* exprObj = dynamic_cast<ExpressionObject*>(expr); - BSONType valueType = field.second.getType(); - if ((valueType != Object && valueType != Array) || !exprObj ) { - // This expression replace the whole field + ExpressionObject* exprObj = dynamic_cast<ExpressionObject*>(expr); + BSONType valueType = field.second.getType(); + if ((valueType != Object && valueType != Array) || !exprObj) { + // This expression replace the whole field - Value pValue(expr->evaluateInternal(vars)); + Value pValue(expr->evaluateInternal(vars)); - // don't add field if nothing was found in the subobject - if (exprObj && pValue.getDocument().empty()) - continue; + // don't add field if nothing was found in the subobject + if (exprObj && pValue.getDocument().empty()) + continue; - /* - Don't add non-existent values (note: different from NULL or Undefined); - this is consistent with existing selection syntax which doesn't - force the appearance of non-existent fields. - */ - if (!pValue.missing()) - out.addField(field.first, pValue); + /* + Don't add non-existent values (note: different from NULL or Undefined); + this is consistent with existing selection syntax which doesn't + force the appearance of non-existent fields. + */ + if (!pValue.missing()) + out.addField(field.first, pValue); - continue; - } + continue; + } + /* + Check on the type of the input value. If it's an + object, just walk down into that recursively, and + add it to the result. + */ + if (valueType == Object) { + MutableDocument sub(exprObj->getSizeHint()); + exprObj->addToDocument(sub, field.second.getDocument(), vars); + out.addField(field.first, sub.freezeToValue()); + } else if (valueType == Array) { /* - Check on the type of the input value. If it's an - object, just walk down into that recursively, and - add it to the result. + If it's an array, we have to do the same thing, + but to each array element. Then, add the array + of results to the current document. */ - if (valueType == Object) { - MutableDocument sub (exprObj->getSizeHint()); - exprObj->addToDocument(sub, field.second.getDocument(), vars); - out.addField(field.first, sub.freezeToValue()); - } - else if (valueType == Array) { - /* - If it's an array, we have to do the same thing, - but to each array element. Then, add the array - of results to the current document. - */ - vector<Value> result; - const vector<Value>& input = field.second.getArray(); - for (size_t i=0; i < input.size(); i++) { - // can't look for a subfield in a non-object value. - if (input[i].getType() != Object) - continue; - - MutableDocument doc (exprObj->getSizeHint()); - exprObj->addToDocument(doc, input[i].getDocument(), vars); - result.push_back(doc.freezeToValue()); - } + vector<Value> result; + const vector<Value>& input = field.second.getArray(); + for (size_t i = 0; i < input.size(); i++) { + // can't look for a subfield in a non-object value. + if (input[i].getType() != Object) + continue; - out.addField(field.first, Value(std::move(result))); + MutableDocument doc(exprObj->getSizeHint()); + exprObj->addToDocument(doc, input[i].getDocument(), vars); + result.push_back(doc.freezeToValue()); } - else { - verify( false ); - } - } - if (doneFields.size() == _expressions.size()) - return; + out.addField(field.first, Value(std::move(result))); + } else { + verify(false); + } + } - /* add any remaining fields we haven't already taken care of */ - for (vector<string>::const_iterator i(_order.begin()); i!=_order.end(); ++i) { - FieldMap::const_iterator it = _expressions.find(*i); - string fieldName(it->first); + if (doneFields.size() == _expressions.size()) + return; - /* if we've already dealt with this field, above, do nothing */ - if (doneFields.count(fieldName)) - continue; + /* add any remaining fields we haven't already taken care of */ + for (vector<string>::const_iterator i(_order.begin()); i != _order.end(); ++i) { + FieldMap::const_iterator it = _expressions.find(*i); + string fieldName(it->first); - // this is a missing inclusion field - if (!it->second) - continue; + /* if we've already dealt with this field, above, do nothing */ + if (doneFields.count(fieldName)) + continue; - Value pValue(it->second->evaluateInternal(vars)); + // this is a missing inclusion field + if (!it->second) + continue; - /* - Don't add non-existent values (note: different from NULL or Undefined); - this is consistent with existing selection syntax which doesn't - force the appearnance of non-existent fields. - */ - if (pValue.missing()) - continue; + Value pValue(it->second->evaluateInternal(vars)); - // don't add field if nothing was found in the subobject - if (dynamic_cast<ExpressionObject*>(it->second.get()) - && pValue.getDocument().empty()) - continue; + /* + Don't add non-existent values (note: different from NULL or Undefined); + this is consistent with existing selection syntax which doesn't + force the appearnance of non-existent fields. + */ + if (pValue.missing()) + continue; + // don't add field if nothing was found in the subobject + if (dynamic_cast<ExpressionObject*>(it->second.get()) && pValue.getDocument().empty()) + continue; - out.addField(fieldName, pValue); - } - } - size_t ExpressionObject::getSizeHint() const { - // Note: this can overestimate, but that is better than underestimating - return _expressions.size() + (_excludeId ? 0 : 1); + out.addField(fieldName, pValue); } +} - Document ExpressionObject::evaluateDocument(Variables* vars) const { - /* create and populate the result */ - MutableDocument out (getSizeHint()); +size_t ExpressionObject::getSizeHint() const { + // Note: this can overestimate, but that is better than underestimating + return _expressions.size() + (_excludeId ? 0 : 1); +} - addToDocument(out, - Document(), // No inclusion field matching. - vars); - return out.freeze(); - } +Document ExpressionObject::evaluateDocument(Variables* vars) const { + /* create and populate the result */ + MutableDocument out(getSizeHint()); - Value ExpressionObject::evaluateInternal(Variables* vars) const { - return Value(evaluateDocument(vars)); - } + addToDocument(out, + Document(), // No inclusion field matching. + vars); + return out.freeze(); +} - void ExpressionObject::addField(const FieldPath &fieldPath, - const intrusive_ptr<Expression> &pExpression) { - const string fieldPart = fieldPath.getFieldName(0); - const bool haveExpr = _expressions.count(fieldPart); +Value ExpressionObject::evaluateInternal(Variables* vars) const { + return Value(evaluateDocument(vars)); +} - intrusive_ptr<Expression>& expr = _expressions[fieldPart]; // inserts if !haveExpr - intrusive_ptr<ExpressionObject> subObj = dynamic_cast<ExpressionObject*>(expr.get()); +void ExpressionObject::addField(const FieldPath& fieldPath, + const intrusive_ptr<Expression>& pExpression) { + const string fieldPart = fieldPath.getFieldName(0); + const bool haveExpr = _expressions.count(fieldPart); - if (!haveExpr) { - _order.push_back(fieldPart); - } - else { // we already have an expression or inclusion for this field - if (fieldPath.getPathLength() == 1) { - // This expression is for right here - - ExpressionObject* newSubObj = dynamic_cast<ExpressionObject*>(pExpression.get()); - uassert(16400, str::stream() - << "can't add an expression for field " << fieldPart - << " because there is already an expression for that field" - << " or one of its sub-fields.", - subObj && newSubObj); // we can merge them - - // Copy everything from the newSubObj to the existing subObj - // This is for cases like { $project:{ 'b.c':1, b:{ a:1 } } } - for (vector<string>::const_iterator it (newSubObj->_order.begin()); - it != newSubObj->_order.end(); - ++it) { - // asserts if any fields are dupes - subObj->addField(*it, newSubObj->_expressions[*it]); - } - return; - } - else { - // This expression is for a subfield - uassert(16401, str::stream() - << "can't add an expression for a subfield of " << fieldPart - << " because there is already an expression that applies to" - << " the whole field", - subObj); - } - } + intrusive_ptr<Expression>& expr = _expressions[fieldPart]; // inserts if !haveExpr + intrusive_ptr<ExpressionObject> subObj = dynamic_cast<ExpressionObject*>(expr.get()); + if (!haveExpr) { + _order.push_back(fieldPart); + } else { // we already have an expression or inclusion for this field if (fieldPath.getPathLength() == 1) { - verify(!haveExpr); // haveExpr case handled above. - expr = pExpression; + // This expression is for right here + + ExpressionObject* newSubObj = dynamic_cast<ExpressionObject*>(pExpression.get()); + uassert(16400, + str::stream() << "can't add an expression for field " << fieldPart + << " because there is already an expression for that field" + << " or one of its sub-fields.", + subObj && newSubObj); // we can merge them + + // Copy everything from the newSubObj to the existing subObj + // This is for cases like { $project:{ 'b.c':1, b:{ a:1 } } } + for (vector<string>::const_iterator it(newSubObj->_order.begin()); + it != newSubObj->_order.end(); + ++it) { + // asserts if any fields are dupes + subObj->addField(*it, newSubObj->_expressions[*it]); + } return; + } else { + // This expression is for a subfield + uassert(16401, + str::stream() << "can't add an expression for a subfield of " << fieldPart + << " because there is already an expression that applies to" + << " the whole field", + subObj); } - - if (!haveExpr) - expr = subObj = ExpressionObject::create(); - - subObj->addField(fieldPath.tail(), pExpression); } - void ExpressionObject::includePath(const string &theFieldPath) { - addField(theFieldPath, NULL); + if (fieldPath.getPathLength() == 1) { + verify(!haveExpr); // haveExpr case handled above. + expr = pExpression; + return; } - Value ExpressionObject::serialize(bool explain) const { - MutableDocument valBuilder; - if (_excludeId) - valBuilder["_id"] = Value(false); + if (!haveExpr) + expr = subObj = ExpressionObject::create(); - for (vector<string>::const_iterator it(_order.begin()); it!=_order.end(); ++it) { - string fieldName = *it; - verify(_expressions.find(fieldName) != _expressions.end()); - intrusive_ptr<Expression> expr = _expressions.find(fieldName)->second; + subObj->addField(fieldPath.tail(), pExpression); +} - if (!expr) { - // this is inclusion, not an expression - valBuilder[fieldName] = Value(true); - } - else { - valBuilder[fieldName] = expr->serialize(explain); - } - } - return valBuilder.freezeToValue(); - } +void ExpressionObject::includePath(const string& theFieldPath) { + addField(theFieldPath, NULL); +} - /* --------------------- ExpressionFieldPath --------------------------- */ +Value ExpressionObject::serialize(bool explain) const { + MutableDocument valBuilder; + if (_excludeId) + valBuilder["_id"] = Value(false); - // this is the old deprecated version only used by tests not using variables - intrusive_ptr<ExpressionFieldPath> ExpressionFieldPath::create(const string& fieldPath) { - return new ExpressionFieldPath("CURRENT." + fieldPath, Variables::ROOT_ID); - } + for (vector<string>::const_iterator it(_order.begin()); it != _order.end(); ++it) { + string fieldName = *it; + verify(_expressions.find(fieldName) != _expressions.end()); + intrusive_ptr<Expression> expr = _expressions.find(fieldName)->second; - // this is the new version that supports every syntax - intrusive_ptr<ExpressionFieldPath> ExpressionFieldPath::parse( - const string& raw, - const VariablesParseState& vps) { + if (!expr) { + // this is inclusion, not an expression + valBuilder[fieldName] = Value(true); + } else { + valBuilder[fieldName] = expr->serialize(explain); + } + } + return valBuilder.freezeToValue(); +} - uassert(16873, str::stream() << "FieldPath '" << raw << "' doesn't start with $", - raw.c_str()[0] == '$'); // c_str()[0] is always a valid reference. +/* --------------------- ExpressionFieldPath --------------------------- */ - uassert(16872, str::stream() << "'$' by itself is not a valid FieldPath", - raw.size() >= 2); // need at least "$" and either "$" or a field name +// this is the old deprecated version only used by tests not using variables +intrusive_ptr<ExpressionFieldPath> ExpressionFieldPath::create(const string& fieldPath) { + return new ExpressionFieldPath("CURRENT." + fieldPath, Variables::ROOT_ID); +} - if (raw[1] == '$') { - const StringData rawSD = raw; - const StringData fieldPath = rawSD.substr(2); // strip off $$ - const StringData varName = fieldPath.substr(0, fieldPath.find('.')); - Variables::uassertValidNameForUserRead(varName); - return new ExpressionFieldPath(fieldPath.toString(), vps.getVariable(varName)); - } - else { - return new ExpressionFieldPath("CURRENT." + raw.substr(1), // strip the "$" prefix - vps.getVariable("CURRENT")); - } +// this is the new version that supports every syntax +intrusive_ptr<ExpressionFieldPath> ExpressionFieldPath::parse(const string& raw, + const VariablesParseState& vps) { + uassert(16873, + str::stream() << "FieldPath '" << raw << "' doesn't start with $", + raw.c_str()[0] == '$'); // c_str()[0] is always a valid reference. + + uassert(16872, + str::stream() << "'$' by itself is not a valid FieldPath", + raw.size() >= 2); // need at least "$" and either "$" or a field name + + if (raw[1] == '$') { + const StringData rawSD = raw; + const StringData fieldPath = rawSD.substr(2); // strip off $$ + const StringData varName = fieldPath.substr(0, fieldPath.find('.')); + Variables::uassertValidNameForUserRead(varName); + return new ExpressionFieldPath(fieldPath.toString(), vps.getVariable(varName)); + } else { + return new ExpressionFieldPath("CURRENT." + raw.substr(1), // strip the "$" prefix + vps.getVariable("CURRENT")); } +} - ExpressionFieldPath::ExpressionFieldPath(const string& theFieldPath, Variables::Id variable) - : _fieldPath(theFieldPath) - , _variable(variable) - {} +ExpressionFieldPath::ExpressionFieldPath(const string& theFieldPath, Variables::Id variable) + : _fieldPath(theFieldPath), _variable(variable) {} - intrusive_ptr<Expression> ExpressionFieldPath::optimize() { - /* nothing can be done for these */ - return intrusive_ptr<Expression>(this); - } +intrusive_ptr<Expression> ExpressionFieldPath::optimize() { + /* nothing can be done for these */ + return intrusive_ptr<Expression>(this); +} - void ExpressionFieldPath::addDependencies(DepsTracker* deps, vector<string>* path) const { - if (_variable == Variables::ROOT_ID) { // includes CURRENT when it is equivalent to ROOT. - if (_fieldPath.getPathLength() == 1) { - deps->needWholeDocument = true; // need full doc if just "$$ROOT" - } else { - deps->fields.insert(_fieldPath.tail().getPath(false)); - } +void ExpressionFieldPath::addDependencies(DepsTracker* deps, vector<string>* path) const { + if (_variable == Variables::ROOT_ID) { // includes CURRENT when it is equivalent to ROOT. + if (_fieldPath.getPathLength() == 1) { + deps->needWholeDocument = true; // need full doc if just "$$ROOT" + } else { + deps->fields.insert(_fieldPath.tail().getPath(false)); } } +} - Value ExpressionFieldPath::evaluatePathArray(size_t index, const Value& input) const { - dassert(input.getType() == Array); - - // Check for remaining path in each element of array - vector<Value> result; - const vector<Value>& array = input.getArray(); - for (size_t i=0; i < array.size(); i++) { - if (array[i].getType() != Object) - continue; +Value ExpressionFieldPath::evaluatePathArray(size_t index, const Value& input) const { + dassert(input.getType() == Array); - const Value nested = evaluatePath(index, array[i].getDocument()); - if (!nested.missing()) - result.push_back(nested); - } + // Check for remaining path in each element of array + vector<Value> result; + const vector<Value>& array = input.getArray(); + for (size_t i = 0; i < array.size(); i++) { + if (array[i].getType() != Object) + continue; - return Value(std::move(result)); + const Value nested = evaluatePath(index, array[i].getDocument()); + if (!nested.missing()) + result.push_back(nested); } - Value ExpressionFieldPath::evaluatePath(size_t index, const Document& input) const { - // Note this function is very hot so it is important that is is well optimized. - // In particular, all return paths should support RVO. - /* if we've hit the end of the path, stop */ - if (index == _fieldPath.getPathLength() - 1) - return input[_fieldPath.getFieldName(index)]; + return Value(std::move(result)); +} +Value ExpressionFieldPath::evaluatePath(size_t index, const Document& input) const { + // Note this function is very hot so it is important that is is well optimized. + // In particular, all return paths should support RVO. + + /* if we've hit the end of the path, stop */ + if (index == _fieldPath.getPathLength() - 1) + return input[_fieldPath.getFieldName(index)]; - // Try to dive deeper - const Value val = input[_fieldPath.getFieldName(index)]; - switch (val.getType()) { + // Try to dive deeper + const Value val = input[_fieldPath.getFieldName(index)]; + switch (val.getType()) { case Object: - return evaluatePath(index+1, val.getDocument()); + return evaluatePath(index + 1, val.getDocument()); case Array: - return evaluatePathArray(index+1, val); + return evaluatePathArray(index + 1, val); default: return Value(); - } } +} - Value ExpressionFieldPath::evaluateInternal(Variables* vars) const { - if (_fieldPath.getPathLength() == 1) // get the whole variable - return vars->getValue(_variable); +Value ExpressionFieldPath::evaluateInternal(Variables* vars) const { + if (_fieldPath.getPathLength() == 1) // get the whole variable + return vars->getValue(_variable); - if (_variable == Variables::ROOT_ID) { - // ROOT is always a document so use optimized code path - return evaluatePath(1, vars->getRoot()); - } - - Value var = vars->getValue(_variable); - switch (var.getType()) { - case Object: return evaluatePath(1, var.getDocument()); - case Array: return evaluatePathArray(1, var); - default: return Value(); - } + if (_variable == Variables::ROOT_ID) { + // ROOT is always a document so use optimized code path + return evaluatePath(1, vars->getRoot()); } - Value ExpressionFieldPath::serialize(bool explain) const { - if (_fieldPath.getFieldName(0) == "CURRENT" && _fieldPath.getPathLength() > 1) { - // use short form for "$$CURRENT.foo" but not just "$$CURRENT" - return Value("$" + _fieldPath.tail().getPath(false)); - } - else { - return Value("$$" + _fieldPath.getPath(false)); - } + Value var = vars->getValue(_variable); + switch (var.getType()) { + case Object: + return evaluatePath(1, var.getDocument()); + case Array: + return evaluatePathArray(1, var); + default: + return Value(); } +} - /* ------------------------- ExpressionFilter ----------------------------- */ +Value ExpressionFieldPath::serialize(bool explain) const { + if (_fieldPath.getFieldName(0) == "CURRENT" && _fieldPath.getPathLength() > 1) { + // use short form for "$$CURRENT.foo" but not just "$$CURRENT" + return Value("$" + _fieldPath.tail().getPath(false)); + } else { + return Value("$$" + _fieldPath.getPath(false)); + } +} - REGISTER_EXPRESSION("$filter", ExpressionFilter::parse); - intrusive_ptr<Expression> ExpressionFilter::parse(BSONElement expr, - const VariablesParseState& vpsIn) { +/* ------------------------- ExpressionFilter ----------------------------- */ - verify(str::equals(expr.fieldName(), "$filter")); +REGISTER_EXPRESSION("$filter", ExpressionFilter::parse); +intrusive_ptr<Expression> ExpressionFilter::parse(BSONElement expr, + const VariablesParseState& vpsIn) { + verify(str::equals(expr.fieldName(), "$filter")); - uassert(28646, "$filter only supports an object as its argument", - expr.type() == Object); + uassert(28646, "$filter only supports an object as its argument", expr.type() == Object); - // "cond" must be parsed after "as" regardless of BSON order. - BSONElement inputElem; - BSONElement asElem; - BSONElement condElem; - for (auto elem : expr.Obj()) { - if (str::equals(elem.fieldName(), "input")) { - inputElem = elem; - } else if (str::equals(elem.fieldName(), "as")) { - asElem = elem; - } else if (str::equals(elem.fieldName(), "cond")) { - condElem = elem; - } else { - uasserted(28647, str::stream() - << "Unrecognized parameter to $filter: " << elem.fieldName()); - } + // "cond" must be parsed after "as" regardless of BSON order. + BSONElement inputElem; + BSONElement asElem; + BSONElement condElem; + for (auto elem : expr.Obj()) { + if (str::equals(elem.fieldName(), "input")) { + inputElem = elem; + } else if (str::equals(elem.fieldName(), "as")) { + asElem = elem; + } else if (str::equals(elem.fieldName(), "cond")) { + condElem = elem; + } else { + uasserted(28647, + str::stream() << "Unrecognized parameter to $filter: " << elem.fieldName()); } + } - uassert(28648, "Missing 'input' parameter to $filter", - !inputElem.eoo()); - uassert(28649, "Missing 'as' parameter to $filter", - !asElem.eoo()); - uassert(28650, "Missing 'cond' parameter to $filter", - !condElem.eoo()); + uassert(28648, "Missing 'input' parameter to $filter", !inputElem.eoo()); + uassert(28649, "Missing 'as' parameter to $filter", !asElem.eoo()); + uassert(28650, "Missing 'cond' parameter to $filter", !condElem.eoo()); - // Parse "input", only has outer variables. - intrusive_ptr<Expression> input = parseOperand(inputElem, vpsIn); + // Parse "input", only has outer variables. + intrusive_ptr<Expression> input = parseOperand(inputElem, vpsIn); - // Parse "as". - VariablesParseState vpsSub(vpsIn); // vpsSub gets our variable, vpsIn doesn't. - string varName = asElem.str(); - Variables::uassertValidNameForUserWrite(varName); - Variables::Id varId = vpsSub.defineVariable(varName); + // Parse "as". + VariablesParseState vpsSub(vpsIn); // vpsSub gets our variable, vpsIn doesn't. + string varName = asElem.str(); + Variables::uassertValidNameForUserWrite(varName); + Variables::Id varId = vpsSub.defineVariable(varName); - // Parse "cond", has access to "as" variable. - intrusive_ptr<Expression> cond = parseOperand(condElem, vpsSub); - - return new ExpressionFilter(std::move(varName), varId, std::move(input), std::move(cond)); - } + // Parse "cond", has access to "as" variable. + intrusive_ptr<Expression> cond = parseOperand(condElem, vpsSub); - ExpressionFilter::ExpressionFilter(string varName, - Variables::Id varId, - intrusive_ptr<Expression> input, - intrusive_ptr<Expression> filter) - : _varName(std::move(varName)) - , _varId(varId) - , _input(std::move(input)) - , _filter(std::move(filter)) - {} + return new ExpressionFilter(std::move(varName), varId, std::move(input), std::move(cond)); +} - intrusive_ptr<Expression> ExpressionFilter::optimize() { - // TODO handle when _input is constant. - _input = _input->optimize(); - _filter = _filter->optimize(); - return this; - } +ExpressionFilter::ExpressionFilter(string varName, + Variables::Id varId, + intrusive_ptr<Expression> input, + intrusive_ptr<Expression> filter) + : _varName(std::move(varName)), + _varId(varId), + _input(std::move(input)), + _filter(std::move(filter)) {} + +intrusive_ptr<Expression> ExpressionFilter::optimize() { + // TODO handle when _input is constant. + _input = _input->optimize(); + _filter = _filter->optimize(); + return this; +} - Value ExpressionFilter::serialize(bool explain) const { - return Value(DOC("$filter" << DOC("input" << _input->serialize(explain) - << "as" << _varName - << "cond" << _filter->serialize(explain) - ))); - } +Value ExpressionFilter::serialize(bool explain) const { + return Value(DOC("$filter" << DOC("input" << _input->serialize(explain) << "as" << _varName + << "cond" << _filter->serialize(explain)))); +} - Value ExpressionFilter::evaluateInternal(Variables* vars) const { - // We are guaranteed at parse time that this isn't using our _varId. - const Value inputVal = _input->evaluateInternal(vars); - if (inputVal.nullish()) - return Value(BSONNULL); +Value ExpressionFilter::evaluateInternal(Variables* vars) const { + // We are guaranteed at parse time that this isn't using our _varId. + const Value inputVal = _input->evaluateInternal(vars); + if (inputVal.nullish()) + return Value(BSONNULL); - uassert(28651, str::stream() << "input to $filter must be an Array not " - << typeName(inputVal.getType()), - inputVal.getType() == Array); + uassert(28651, + str::stream() << "input to $filter must be an Array not " + << typeName(inputVal.getType()), + inputVal.getType() == Array); - const vector<Value>& input = inputVal.getArray(); + const vector<Value>& input = inputVal.getArray(); - if (input.empty()) - return inputVal; + if (input.empty()) + return inputVal; - vector<Value> output; - for (const auto& elem : input) { - vars->setValue(_varId, elem); + vector<Value> output; + for (const auto& elem : input) { + vars->setValue(_varId, elem); - if (_filter->evaluateInternal(vars).coerceToBool()) { - output.push_back(std::move(elem)); - } + if (_filter->evaluateInternal(vars).coerceToBool()) { + output.push_back(std::move(elem)); } - - return Value(std::move(output)); } - void ExpressionFilter::addDependencies(DepsTracker* deps, vector<string>* path) const { - _input->addDependencies(deps); - _filter->addDependencies(deps); - } + return Value(std::move(output)); +} - /* ------------------------- ExpressionLet ----------------------------- */ +void ExpressionFilter::addDependencies(DepsTracker* deps, vector<string>* path) const { + _input->addDependencies(deps); + _filter->addDependencies(deps); +} - REGISTER_EXPRESSION("$let", ExpressionLet::parse); - intrusive_ptr<Expression> ExpressionLet::parse( - BSONElement expr, - const VariablesParseState& vpsIn) { +/* ------------------------- ExpressionLet ----------------------------- */ - verify(str::equals(expr.fieldName(), "$let")); +REGISTER_EXPRESSION("$let", ExpressionLet::parse); +intrusive_ptr<Expression> ExpressionLet::parse(BSONElement expr, const VariablesParseState& vpsIn) { + verify(str::equals(expr.fieldName(), "$let")); - uassert(16874, "$let only supports an object as its argument", - expr.type() == Object); - const BSONObj args = expr.embeddedObject(); + uassert(16874, "$let only supports an object as its argument", expr.type() == Object); + const BSONObj args = expr.embeddedObject(); - // varsElem must be parsed before inElem regardless of BSON order. - BSONElement varsElem; - BSONElement inElem; - BSONForEach(arg, args) { - if (str::equals(arg.fieldName(), "vars")) { - varsElem = arg; - } else if (str::equals(arg.fieldName(), "in")) { - inElem = arg; - } else { - uasserted(16875, str::stream() - << "Unrecognized parameter to $let: " << arg.fieldName()); - } + // varsElem must be parsed before inElem regardless of BSON order. + BSONElement varsElem; + BSONElement inElem; + BSONForEach(arg, args) { + if (str::equals(arg.fieldName(), "vars")) { + varsElem = arg; + } else if (str::equals(arg.fieldName(), "in")) { + inElem = arg; + } else { + uasserted(16875, + str::stream() << "Unrecognized parameter to $let: " << arg.fieldName()); } + } - uassert(16876, "Missing 'vars' parameter to $let", - !varsElem.eoo()); - uassert(16877, "Missing 'in' parameter to $let", - !inElem.eoo()); - - // parse "vars" - VariablesParseState vpsSub(vpsIn); // vpsSub gets our vars, vpsIn doesn't. - VariableMap vars; - BSONForEach(varElem, varsElem.embeddedObjectUserCheck()) { - const string varName = varElem.fieldName(); - Variables::uassertValidNameForUserWrite(varName); - Variables::Id id = vpsSub.defineVariable(varName); - - vars[id] = NameAndExpression(varName, - parseOperand(varElem, vpsIn)); // only has outer vars - } + uassert(16876, "Missing 'vars' parameter to $let", !varsElem.eoo()); + uassert(16877, "Missing 'in' parameter to $let", !inElem.eoo()); - // parse "in" - intrusive_ptr<Expression> subExpression = parseOperand(inElem, vpsSub); // has our vars + // parse "vars" + VariablesParseState vpsSub(vpsIn); // vpsSub gets our vars, vpsIn doesn't. + VariableMap vars; + BSONForEach(varElem, varsElem.embeddedObjectUserCheck()) { + const string varName = varElem.fieldName(); + Variables::uassertValidNameForUserWrite(varName); + Variables::Id id = vpsSub.defineVariable(varName); - return new ExpressionLet(vars, subExpression); + vars[id] = NameAndExpression(varName, parseOperand(varElem, vpsIn)); // only has outer vars } - ExpressionLet::ExpressionLet(const VariableMap& vars, intrusive_ptr<Expression> subExpression) - : _variables(vars) - , _subExpression(subExpression) - {} + // parse "in" + intrusive_ptr<Expression> subExpression = parseOperand(inElem, vpsSub); // has our vars - intrusive_ptr<Expression> ExpressionLet::optimize() { - if (_variables.empty()) { - // we aren't binding any variables so just return the subexpression - return _subExpression->optimize(); - } + return new ExpressionLet(vars, subExpression); +} - for (VariableMap::iterator it=_variables.begin(), end=_variables.end(); it != end; ++it) { - it->second.expression = it->second.expression->optimize(); - } +ExpressionLet::ExpressionLet(const VariableMap& vars, intrusive_ptr<Expression> subExpression) + : _variables(vars), _subExpression(subExpression) {} - // TODO be smarter with constant "variables" - _subExpression = _subExpression->optimize(); +intrusive_ptr<Expression> ExpressionLet::optimize() { + if (_variables.empty()) { + // we aren't binding any variables so just return the subexpression + return _subExpression->optimize(); + } - return this; + for (VariableMap::iterator it = _variables.begin(), end = _variables.end(); it != end; ++it) { + it->second.expression = it->second.expression->optimize(); } - Value ExpressionLet::serialize(bool explain) const { - MutableDocument vars; - for (VariableMap::const_iterator it=_variables.begin(), end=_variables.end(); - it != end; ++it) { - vars[it->second.name] = it->second.expression->serialize(explain); - } + // TODO be smarter with constant "variables" + _subExpression = _subExpression->optimize(); + + return this; +} - return Value(DOC("$let" << DOC("vars" << vars.freeze() - << "in" << _subExpression->serialize(explain)) - )); +Value ExpressionLet::serialize(bool explain) const { + MutableDocument vars; + for (VariableMap::const_iterator it = _variables.begin(), end = _variables.end(); it != end; + ++it) { + vars[it->second.name] = it->second.expression->serialize(explain); } - Value ExpressionLet::evaluateInternal(Variables* vars) const { - for (VariableMap::const_iterator it=_variables.begin(), end=_variables.end(); - it != end; ++it) { - // It is guaranteed at parse-time that these expressions don't use the variable ids we - // are setting - vars->setValue(it->first, - it->second.expression->evaluateInternal(vars)); - } + return Value( + DOC("$let" << DOC("vars" << vars.freeze() << "in" << _subExpression->serialize(explain)))); +} - return _subExpression->evaluateInternal(vars); +Value ExpressionLet::evaluateInternal(Variables* vars) const { + for (VariableMap::const_iterator it = _variables.begin(), end = _variables.end(); it != end; + ++it) { + // It is guaranteed at parse-time that these expressions don't use the variable ids we + // are setting + vars->setValue(it->first, it->second.expression->evaluateInternal(vars)); } - void ExpressionLet::addDependencies(DepsTracker* deps, vector<string>* path) const { - for (VariableMap::const_iterator it=_variables.begin(), end=_variables.end(); - it != end; ++it) { - it->second.expression->addDependencies(deps); - } + return _subExpression->evaluateInternal(vars); +} - // TODO be smarter when CURRENT is a bound variable - _subExpression->addDependencies(deps); +void ExpressionLet::addDependencies(DepsTracker* deps, vector<string>* path) const { + for (VariableMap::const_iterator it = _variables.begin(), end = _variables.end(); it != end; + ++it) { + it->second.expression->addDependencies(deps); } + // TODO be smarter when CURRENT is a bound variable + _subExpression->addDependencies(deps); +} - /* ------------------------- ExpressionMap ----------------------------- */ - REGISTER_EXPRESSION("$map", ExpressionMap::parse); - intrusive_ptr<Expression> ExpressionMap::parse( - BSONElement expr, - const VariablesParseState& vpsIn) { +/* ------------------------- ExpressionMap ----------------------------- */ - verify(str::equals(expr.fieldName(), "$map")); +REGISTER_EXPRESSION("$map", ExpressionMap::parse); +intrusive_ptr<Expression> ExpressionMap::parse(BSONElement expr, const VariablesParseState& vpsIn) { + verify(str::equals(expr.fieldName(), "$map")); - uassert(16878, "$map only supports an object as its argument", - expr.type() == Object); + uassert(16878, "$map only supports an object as its argument", expr.type() == Object); - // "in" must be parsed after "as" regardless of BSON order - BSONElement inputElem; - BSONElement asElem; - BSONElement inElem; - const BSONObj args = expr.embeddedObject(); - BSONForEach(arg, args) { - if (str::equals(arg.fieldName(), "input")) { - inputElem = arg; - } else if (str::equals(arg.fieldName(), "as")) { - asElem = arg; - } else if (str::equals(arg.fieldName(), "in")) { - inElem = arg; - } else { - uasserted(16879, str::stream() - << "Unrecognized parameter to $map: " << arg.fieldName()); - } + // "in" must be parsed after "as" regardless of BSON order + BSONElement inputElem; + BSONElement asElem; + BSONElement inElem; + const BSONObj args = expr.embeddedObject(); + BSONForEach(arg, args) { + if (str::equals(arg.fieldName(), "input")) { + inputElem = arg; + } else if (str::equals(arg.fieldName(), "as")) { + asElem = arg; + } else if (str::equals(arg.fieldName(), "in")) { + inElem = arg; + } else { + uasserted(16879, + str::stream() << "Unrecognized parameter to $map: " << arg.fieldName()); } + } - uassert(16880, "Missing 'input' parameter to $map", - !inputElem.eoo()); - uassert(16881, "Missing 'as' parameter to $map", - !asElem.eoo()); - uassert(16882, "Missing 'in' parameter to $map", - !inElem.eoo()); - - // parse "input" - intrusive_ptr<Expression> input = parseOperand(inputElem, vpsIn); // only has outer vars - - // parse "as" - VariablesParseState vpsSub(vpsIn); // vpsSub gets our vars, vpsIn doesn't. - string varName = asElem.str(); - Variables::uassertValidNameForUserWrite(varName); - Variables::Id varId = vpsSub.defineVariable(varName); - - // parse "in" - intrusive_ptr<Expression> in = parseOperand(inElem, vpsSub); // has access to map variable + uassert(16880, "Missing 'input' parameter to $map", !inputElem.eoo()); + uassert(16881, "Missing 'as' parameter to $map", !asElem.eoo()); + uassert(16882, "Missing 'in' parameter to $map", !inElem.eoo()); - return new ExpressionMap(varName, varId, input, in); - } + // parse "input" + intrusive_ptr<Expression> input = parseOperand(inputElem, vpsIn); // only has outer vars - ExpressionMap::ExpressionMap(const string& varName, - Variables::Id varId, - intrusive_ptr<Expression> input, - intrusive_ptr<Expression> each) - : _varName(varName) - , _varId(varId) - , _input(input) - , _each(each) - {} + // parse "as" + VariablesParseState vpsSub(vpsIn); // vpsSub gets our vars, vpsIn doesn't. + string varName = asElem.str(); + Variables::uassertValidNameForUserWrite(varName); + Variables::Id varId = vpsSub.defineVariable(varName); - intrusive_ptr<Expression> ExpressionMap::optimize() { - // TODO handle when _input is constant - _input = _input->optimize(); - _each = _each->optimize(); - return this; - } + // parse "in" + intrusive_ptr<Expression> in = parseOperand(inElem, vpsSub); // has access to map variable - Value ExpressionMap::serialize(bool explain) const { - return Value(DOC("$map" << DOC("input" << _input->serialize(explain) - << "as" << _varName - << "in" << _each->serialize(explain) - ))); - } + return new ExpressionMap(varName, varId, input, in); +} - Value ExpressionMap::evaluateInternal(Variables* vars) const { - // guaranteed at parse time that this isn't using our _varId - const Value inputVal = _input->evaluateInternal(vars); - if (inputVal.nullish()) - return Value(BSONNULL); +ExpressionMap::ExpressionMap(const string& varName, + Variables::Id varId, + intrusive_ptr<Expression> input, + intrusive_ptr<Expression> each) + : _varName(varName), _varId(varId), _input(input), _each(each) {} + +intrusive_ptr<Expression> ExpressionMap::optimize() { + // TODO handle when _input is constant + _input = _input->optimize(); + _each = _each->optimize(); + return this; +} - uassert(16883, str::stream() << "input to $map must be an Array not " - << typeName(inputVal.getType()), - inputVal.getType() == Array); +Value ExpressionMap::serialize(bool explain) const { + return Value(DOC("$map" << DOC("input" << _input->serialize(explain) << "as" << _varName << "in" + << _each->serialize(explain)))); +} - const vector<Value>& input = inputVal.getArray(); +Value ExpressionMap::evaluateInternal(Variables* vars) const { + // guaranteed at parse time that this isn't using our _varId + const Value inputVal = _input->evaluateInternal(vars); + if (inputVal.nullish()) + return Value(BSONNULL); - if (input.empty()) - return inputVal; + uassert(16883, + str::stream() << "input to $map must be an Array not " << typeName(inputVal.getType()), + inputVal.getType() == Array); - vector<Value> output; - output.reserve(input.size()); - for (size_t i=0; i < input.size(); i++) { - vars->setValue(_varId, input[i]); + const vector<Value>& input = inputVal.getArray(); - Value toInsert = _each->evaluateInternal(vars); - if (toInsert.missing()) - toInsert = Value(BSONNULL); // can't insert missing values into array + if (input.empty()) + return inputVal; - output.push_back(toInsert); - } + vector<Value> output; + output.reserve(input.size()); + for (size_t i = 0; i < input.size(); i++) { + vars->setValue(_varId, input[i]); - return Value(std::move(output)); - } + Value toInsert = _each->evaluateInternal(vars); + if (toInsert.missing()) + toInsert = Value(BSONNULL); // can't insert missing values into array - void ExpressionMap::addDependencies(DepsTracker* deps, vector<string>* path) const { - _input->addDependencies(deps); - _each->addDependencies(deps); + output.push_back(toInsert); } - /* ------------------------- ExpressionMeta ----------------------------- */ + return Value(std::move(output)); +} - REGISTER_EXPRESSION("$meta", ExpressionMeta::parse); - intrusive_ptr<Expression> ExpressionMeta::parse( - BSONElement expr, - const VariablesParseState& vpsIn) { +void ExpressionMap::addDependencies(DepsTracker* deps, vector<string>* path) const { + _input->addDependencies(deps); + _each->addDependencies(deps); +} - uassert(17307, "$meta only supports String arguments", - expr.type() == String); - uassert(17308, "Unsupported argument to $meta: " + expr.String(), - expr.String() == "textScore"); +/* ------------------------- ExpressionMeta ----------------------------- */ - return new ExpressionMeta(); - } +REGISTER_EXPRESSION("$meta", ExpressionMeta::parse); +intrusive_ptr<Expression> ExpressionMeta::parse(BSONElement expr, + const VariablesParseState& vpsIn) { + uassert(17307, "$meta only supports String arguments", expr.type() == String); + uassert(17308, "Unsupported argument to $meta: " + expr.String(), expr.String() == "textScore"); - Value ExpressionMeta::serialize(bool explain) const { - return Value(DOC("$meta" << "textScore")); - } + return new ExpressionMeta(); +} - Value ExpressionMeta::evaluateInternal(Variables* vars) const { - const Document& root = vars->getRoot(); - return root.hasTextScore() - ? Value(root.getTextScore()) - : Value(); - } +Value ExpressionMeta::serialize(bool explain) const { + return Value(DOC("$meta" + << "textScore")); +} - void ExpressionMeta::addDependencies(DepsTracker* deps, vector<string>* path) const { - deps->needTextScore = true; - } +Value ExpressionMeta::evaluateInternal(Variables* vars) const { + const Document& root = vars->getRoot(); + return root.hasTextScore() ? Value(root.getTextScore()) : Value(); +} - /* ------------------------- ExpressionMillisecond ----------------------------- */ +void ExpressionMeta::addDependencies(DepsTracker* deps, vector<string>* path) const { + deps->needTextScore = true; +} - Value ExpressionMillisecond::evaluateInternal(Variables* vars) const { - Value date(vpOperand[0]->evaluateInternal(vars)); - return Value(extract(date.coerceToDate())); - } +/* ------------------------- ExpressionMillisecond ----------------------------- */ - int ExpressionMillisecond::extract(const long long date) { - const int ms = date % 1000LL; - // adding 1000 since dates before 1970 would have negative ms - return ms >= 0 ? ms : 1000 + ms; - } +Value ExpressionMillisecond::evaluateInternal(Variables* vars) const { + Value date(vpOperand[0]->evaluateInternal(vars)); + return Value(extract(date.coerceToDate())); +} - REGISTER_EXPRESSION("$millisecond", ExpressionMillisecond::parse); - const char* ExpressionMillisecond::getOpName() const { - return "$millisecond"; - } +int ExpressionMillisecond::extract(const long long date) { + const int ms = date % 1000LL; + // adding 1000 since dates before 1970 would have negative ms + return ms >= 0 ? ms : 1000 + ms; +} - /* ------------------------- ExpressionMinute -------------------------- */ +REGISTER_EXPRESSION("$millisecond", ExpressionMillisecond::parse); +const char* ExpressionMillisecond::getOpName() const { + return "$millisecond"; +} - Value ExpressionMinute::evaluateInternal(Variables* vars) const { - Value pDate(vpOperand[0]->evaluateInternal(vars)); - return Value(extract(pDate.coerceToTm())); - } +/* ------------------------- ExpressionMinute -------------------------- */ - REGISTER_EXPRESSION("$minute", ExpressionMinute::parse); - const char* ExpressionMinute::getOpName() const { - return "$minute"; - } +Value ExpressionMinute::evaluateInternal(Variables* vars) const { + Value pDate(vpOperand[0]->evaluateInternal(vars)); + return Value(extract(pDate.coerceToTm())); +} - /* ----------------------- ExpressionMod ---------------------------- */ +REGISTER_EXPRESSION("$minute", ExpressionMinute::parse); +const char* ExpressionMinute::getOpName() const { + return "$minute"; +} - Value ExpressionMod::evaluateInternal(Variables* vars) const { - Value lhs = vpOperand[0]->evaluateInternal(vars); - Value rhs = vpOperand[1]->evaluateInternal(vars); +/* ----------------------- ExpressionMod ---------------------------- */ - BSONType leftType = lhs.getType(); - BSONType rightType = rhs.getType(); +Value ExpressionMod::evaluateInternal(Variables* vars) const { + Value lhs = vpOperand[0]->evaluateInternal(vars); + Value rhs = vpOperand[1]->evaluateInternal(vars); - if (lhs.numeric() && rhs.numeric()) { - // ensure we aren't modding by 0 - double right = rhs.coerceToDouble(); + BSONType leftType = lhs.getType(); + BSONType rightType = rhs.getType(); - uassert(16610, "can't $mod by 0", - right != 0); + if (lhs.numeric() && rhs.numeric()) { + // ensure we aren't modding by 0 + double right = rhs.coerceToDouble(); - if (leftType == NumberDouble || (rightType == NumberDouble && !rhs.integral())) { - // Need to do fmod. Integer-valued double case is handled below. + uassert(16610, "can't $mod by 0", right != 0); - double left = lhs.coerceToDouble(); - return Value(fmod(left, right)); - } - else if (leftType == NumberLong || rightType == NumberLong) { - // if either is long, return long - long long left = lhs.coerceToLong(); - long long rightLong = rhs.coerceToLong(); - return Value(left % rightLong); - } + if (leftType == NumberDouble || (rightType == NumberDouble && !rhs.integral())) { + // Need to do fmod. Integer-valued double case is handled below. - // lastly they must both be ints, return int - int left = lhs.coerceToInt(); - int rightInt = rhs.coerceToInt(); - return Value(left % rightInt); - } - else if (lhs.nullish() || rhs.nullish()) { - return Value(BSONNULL); - } - else { - uasserted(16611, str::stream() << "$mod only supports numeric types, not " - << typeName(lhs.getType()) - << " and " - << typeName(rhs.getType())); + double left = lhs.coerceToDouble(); + return Value(fmod(left, right)); + } else if (leftType == NumberLong || rightType == NumberLong) { + // if either is long, return long + long long left = lhs.coerceToLong(); + long long rightLong = rhs.coerceToLong(); + return Value(left % rightLong); } - } - REGISTER_EXPRESSION("$mod", ExpressionMod::parse); - const char* ExpressionMod::getOpName() const { - return "$mod"; + // lastly they must both be ints, return int + int left = lhs.coerceToInt(); + int rightInt = rhs.coerceToInt(); + return Value(left % rightInt); + } else if (lhs.nullish() || rhs.nullish()) { + return Value(BSONNULL); + } else { + uasserted(16611, + str::stream() << "$mod only supports numeric types, not " + << typeName(lhs.getType()) << " and " << typeName(rhs.getType())); } +} - /* ------------------------ ExpressionMonth ----------------------------- */ +REGISTER_EXPRESSION("$mod", ExpressionMod::parse); +const char* ExpressionMod::getOpName() const { + return "$mod"; +} - Value ExpressionMonth::evaluateInternal(Variables* vars) const { - Value pDate(vpOperand[0]->evaluateInternal(vars)); - return Value(extract(pDate.coerceToTm())); - } +/* ------------------------ ExpressionMonth ----------------------------- */ - REGISTER_EXPRESSION("$month", ExpressionMonth::parse); - const char* ExpressionMonth::getOpName() const { - return "$month"; - } +Value ExpressionMonth::evaluateInternal(Variables* vars) const { + Value pDate(vpOperand[0]->evaluateInternal(vars)); + return Value(extract(pDate.coerceToTm())); +} - /* ------------------------- ExpressionMultiply ----------------------------- */ +REGISTER_EXPRESSION("$month", ExpressionMonth::parse); +const char* ExpressionMonth::getOpName() const { + return "$month"; +} - Value ExpressionMultiply::evaluateInternal(Variables* vars) const { - /* - We'll try to return the narrowest possible result value. To do that - without creating intermediate Values, do the arithmetic for double - and integral types in parallel, tracking the current narrowest - type. - */ - double doubleProduct = 1; - long long longProduct = 1; - BSONType productType = NumberInt; - - const size_t n = vpOperand.size(); - for(size_t i = 0; i < n; ++i) { - Value val = vpOperand[i]->evaluateInternal(vars); - - if (val.numeric()) { - productType = Value::getWidestNumeric(productType, val.getType()); - - doubleProduct *= val.coerceToDouble(); - longProduct *= val.coerceToLong(); - } - else if (val.nullish()) { - return Value(BSONNULL); - } - else { - uasserted(16555, str::stream() << "$multiply only supports numeric types, not " - << typeName(val.getType())); - } - } +/* ------------------------- ExpressionMultiply ----------------------------- */ - if (productType == NumberDouble) - return Value(doubleProduct); - else if (productType == NumberLong) - return Value(longProduct); - else if (productType == NumberInt) - return Value::createIntOrLong(longProduct); - else - massert(16418, "$multiply resulted in a non-numeric type", false); - } +Value ExpressionMultiply::evaluateInternal(Variables* vars) const { + /* + We'll try to return the narrowest possible result value. To do that + without creating intermediate Values, do the arithmetic for double + and integral types in parallel, tracking the current narrowest + type. + */ + double doubleProduct = 1; + long long longProduct = 1; + BSONType productType = NumberInt; - REGISTER_EXPRESSION("$multiply", ExpressionMultiply::parse); - const char* ExpressionMultiply::getOpName() const { - return "$multiply"; - } + const size_t n = vpOperand.size(); + for (size_t i = 0; i < n; ++i) { + Value val = vpOperand[i]->evaluateInternal(vars); - /* ------------------------- ExpressionHour ----------------------------- */ + if (val.numeric()) { + productType = Value::getWidestNumeric(productType, val.getType()); - Value ExpressionHour::evaluateInternal(Variables* vars) const { - Value pDate(vpOperand[0]->evaluateInternal(vars)); - return Value(extract(pDate.coerceToTm())); + doubleProduct *= val.coerceToDouble(); + longProduct *= val.coerceToLong(); + } else if (val.nullish()) { + return Value(BSONNULL); + } else { + uasserted(16555, + str::stream() << "$multiply only supports numeric types, not " + << typeName(val.getType())); + } } - REGISTER_EXPRESSION("$hour", ExpressionHour::parse); - const char* ExpressionHour::getOpName() const { - return "$hour"; - } + if (productType == NumberDouble) + return Value(doubleProduct); + else if (productType == NumberLong) + return Value(longProduct); + else if (productType == NumberInt) + return Value::createIntOrLong(longProduct); + else + massert(16418, "$multiply resulted in a non-numeric type", false); +} - /* ----------------------- ExpressionIfNull ---------------------------- */ +REGISTER_EXPRESSION("$multiply", ExpressionMultiply::parse); +const char* ExpressionMultiply::getOpName() const { + return "$multiply"; +} - Value ExpressionIfNull::evaluateInternal(Variables* vars) const { - Value pLeft(vpOperand[0]->evaluateInternal(vars)); - if (!pLeft.nullish()) - return pLeft; +/* ------------------------- ExpressionHour ----------------------------- */ - Value pRight(vpOperand[1]->evaluateInternal(vars)); - return pRight; - } +Value ExpressionHour::evaluateInternal(Variables* vars) const { + Value pDate(vpOperand[0]->evaluateInternal(vars)); + return Value(extract(pDate.coerceToTm())); +} - REGISTER_EXPRESSION("$ifNull", ExpressionIfNull::parse); - const char* ExpressionIfNull::getOpName() const { - return "$ifNull"; - } +REGISTER_EXPRESSION("$hour", ExpressionHour::parse); +const char* ExpressionHour::getOpName() const { + return "$hour"; +} - /* ------------------------ ExpressionNary ----------------------------- */ +/* ----------------------- ExpressionIfNull ---------------------------- */ - intrusive_ptr<Expression> ExpressionNary::optimize() { - const size_t n = vpOperand.size(); +Value ExpressionIfNull::evaluateInternal(Variables* vars) const { + Value pLeft(vpOperand[0]->evaluateInternal(vars)); + if (!pLeft.nullish()) + return pLeft; - // optimize sub-expressions and count constants - unsigned constCount = 0; - for(size_t i = 0; i < n; ++i) { - intrusive_ptr<Expression> optimized = vpOperand[i]->optimize(); + Value pRight(vpOperand[1]->evaluateInternal(vars)); + return pRight; +} - // substitute the optimized expression - vpOperand[i] = optimized; +REGISTER_EXPRESSION("$ifNull", ExpressionIfNull::parse); +const char* ExpressionIfNull::getOpName() const { + return "$ifNull"; +} - // check to see if the result was a constant - if (dynamic_cast<ExpressionConstant*>(optimized.get())) { - constCount++; - } - } +/* ------------------------ ExpressionNary ----------------------------- */ - // If all the operands are constant, we can replace this expression with a constant. Using - // an empty Variables since it will never be accessed. - if (constCount == n) { - Variables emptyVars; - Value pResult(evaluateInternal(&emptyVars)); - intrusive_ptr<Expression> pReplacement( - ExpressionConstant::create(pResult)); - return pReplacement; - } +intrusive_ptr<Expression> ExpressionNary::optimize() { + const size_t n = vpOperand.size(); - // Remaining optimizations are only for associative and commutative expressions. - if (!isAssociativeAndCommutative()) - return this; - - // Process vpOperand to split it into constant and nonconstant vectors. - // This can leave vpOperand in an invalid state that is cleaned up after the loop. - ExpressionVector constExprs; - ExpressionVector nonConstExprs; - for(size_t i = 0; i < vpOperand.size(); ++i) { // NOTE: vpOperand grows in loop - intrusive_ptr<Expression> expr = vpOperand[i]; - if (dynamic_cast<ExpressionConstant*>(expr.get())) { - constExprs.push_back(expr); - } - else { - // If the child operand is the same type as this, then we can - // extract its operands and inline them here because we know - // this is commutative and associative. We detect sameness of - // the child operator by checking for equality of the opNames - ExpressionNary* nary = dynamic_cast<ExpressionNary*>(expr.get()); - if (!nary || !str::equals(nary->getOpName(), getOpName())) { - nonConstExprs.push_back(expr); - } - else { - // same expression, so flatten by adding to vpOperand which - // will be processed later in this loop. - vpOperand.insert(vpOperand.end(), - nary->vpOperand.begin(), - nary->vpOperand.end()); - } - } - } + // optimize sub-expressions and count constants + unsigned constCount = 0; + for (size_t i = 0; i < n; ++i) { + intrusive_ptr<Expression> optimized = vpOperand[i]->optimize(); - // collapse all constant expressions (if any) - Value constValue; - if (!constExprs.empty()) { - vpOperand = constExprs; - Variables emptyVars; - constValue = evaluateInternal(&emptyVars); - } + // substitute the optimized expression + vpOperand[i] = optimized; - // now set the final expression list with constant (if any) at the end - vpOperand = nonConstExprs; - if (!constExprs.empty()) { - vpOperand.push_back(ExpressionConstant::create(constValue)); + // check to see if the result was a constant + if (dynamic_cast<ExpressionConstant*>(optimized.get())) { + constCount++; } + } - return this; + // If all the operands are constant, we can replace this expression with a constant. Using + // an empty Variables since it will never be accessed. + if (constCount == n) { + Variables emptyVars; + Value pResult(evaluateInternal(&emptyVars)); + intrusive_ptr<Expression> pReplacement(ExpressionConstant::create(pResult)); + return pReplacement; } - void ExpressionNary::addDependencies(DepsTracker* deps, vector<string>* path) const { - for(ExpressionVector::const_iterator i(vpOperand.begin()); - i != vpOperand.end(); ++i) { - (*i)->addDependencies(deps); + // Remaining optimizations are only for associative and commutative expressions. + if (!isAssociativeAndCommutative()) + return this; + + // Process vpOperand to split it into constant and nonconstant vectors. + // This can leave vpOperand in an invalid state that is cleaned up after the loop. + ExpressionVector constExprs; + ExpressionVector nonConstExprs; + for (size_t i = 0; i < vpOperand.size(); ++i) { // NOTE: vpOperand grows in loop + intrusive_ptr<Expression> expr = vpOperand[i]; + if (dynamic_cast<ExpressionConstant*>(expr.get())) { + constExprs.push_back(expr); + } else { + // If the child operand is the same type as this, then we can + // extract its operands and inline them here because we know + // this is commutative and associative. We detect sameness of + // the child operator by checking for equality of the opNames + ExpressionNary* nary = dynamic_cast<ExpressionNary*>(expr.get()); + if (!nary || !str::equals(nary->getOpName(), getOpName())) { + nonConstExprs.push_back(expr); + } else { + // same expression, so flatten by adding to vpOperand which + // will be processed later in this loop. + vpOperand.insert(vpOperand.end(), nary->vpOperand.begin(), nary->vpOperand.end()); + } } } - void ExpressionNary::addOperand(const intrusive_ptr<Expression>& pExpression) { - vpOperand.push_back(pExpression); + // collapse all constant expressions (if any) + Value constValue; + if (!constExprs.empty()) { + vpOperand = constExprs; + Variables emptyVars; + constValue = evaluateInternal(&emptyVars); } - Value ExpressionNary::serialize(bool explain) const { - const size_t nOperand = vpOperand.size(); - vector<Value> array; - /* build up the array */ - for(size_t i = 0; i < nOperand; i++) - array.push_back(vpOperand[i]->serialize(explain)); + // now set the final expression list with constant (if any) at the end + vpOperand = nonConstExprs; + if (!constExprs.empty()) { + vpOperand.push_back(ExpressionConstant::create(constValue)); + } - return Value(DOC(getOpName() << array)); + return this; +} + +void ExpressionNary::addDependencies(DepsTracker* deps, vector<string>* path) const { + for (ExpressionVector::const_iterator i(vpOperand.begin()); i != vpOperand.end(); ++i) { + (*i)->addDependencies(deps); } +} - /* ------------------------- ExpressionNot ----------------------------- */ +void ExpressionNary::addOperand(const intrusive_ptr<Expression>& pExpression) { + vpOperand.push_back(pExpression); +} - Value ExpressionNot::evaluateInternal(Variables* vars) const { - Value pOp(vpOperand[0]->evaluateInternal(vars)); +Value ExpressionNary::serialize(bool explain) const { + const size_t nOperand = vpOperand.size(); + vector<Value> array; + /* build up the array */ + for (size_t i = 0; i < nOperand; i++) + array.push_back(vpOperand[i]->serialize(explain)); - bool b = pOp.coerceToBool(); - return Value(!b); - } + return Value(DOC(getOpName() << array)); +} - REGISTER_EXPRESSION("$not", ExpressionNot::parse); - const char* ExpressionNot::getOpName() const { - return "$not"; - } +/* ------------------------- ExpressionNot ----------------------------- */ - /* -------------------------- ExpressionOr ----------------------------- */ +Value ExpressionNot::evaluateInternal(Variables* vars) const { + Value pOp(vpOperand[0]->evaluateInternal(vars)); - Value ExpressionOr::evaluateInternal(Variables* vars) const { - const size_t n = vpOperand.size(); - for(size_t i = 0; i < n; ++i) { - Value pValue(vpOperand[i]->evaluateInternal(vars)); - if (pValue.coerceToBool()) - return Value(true); - } + bool b = pOp.coerceToBool(); + return Value(!b); +} - return Value(false); - } +REGISTER_EXPRESSION("$not", ExpressionNot::parse); +const char* ExpressionNot::getOpName() const { + return "$not"; +} - intrusive_ptr<Expression> ExpressionOr::optimize() { - /* optimize the disjunction as much as possible */ - intrusive_ptr<Expression> pE(ExpressionNary::optimize()); +/* -------------------------- ExpressionOr ----------------------------- */ - /* if the result isn't a disjunction, we can't do anything */ - ExpressionOr *pOr = dynamic_cast<ExpressionOr *>(pE.get()); - if (!pOr) - return pE; +Value ExpressionOr::evaluateInternal(Variables* vars) const { + const size_t n = vpOperand.size(); + for (size_t i = 0; i < n; ++i) { + Value pValue(vpOperand[i]->evaluateInternal(vars)); + if (pValue.coerceToBool()) + return Value(true); + } - /* - Check the last argument on the result; if it's not constant (as - promised by ExpressionNary::optimize(),) then there's nothing - we can do. - */ - const size_t n = pOr->vpOperand.size(); - // ExpressionNary::optimize() generates an ExpressionConstant for {$or:[]}. - verify(n > 0); - intrusive_ptr<Expression> pLast(pOr->vpOperand[n - 1]); - const ExpressionConstant *pConst = - dynamic_cast<ExpressionConstant *>(pLast.get()); - if (!pConst) - return pE; + return Value(false); +} - /* - Evaluate and coerce the last argument to a boolean. If it's true, - then we can replace this entire expression. - */ - bool last = pConst->getValue().coerceToBool(); - if (last) { - intrusive_ptr<ExpressionConstant> pFinal( - ExpressionConstant::create(Value(true))); - return pFinal; - } +intrusive_ptr<Expression> ExpressionOr::optimize() { + /* optimize the disjunction as much as possible */ + intrusive_ptr<Expression> pE(ExpressionNary::optimize()); - /* - If we got here, the final operand was false, so we don't need it - anymore. If there was only one other operand, we don't need the - conjunction either. Note we still need to keep the promise that - the result will be a boolean. - */ - if (n == 2) { - intrusive_ptr<Expression> pFinal( - ExpressionCoerceToBool::create(pOr->vpOperand[0])); - return pFinal; - } + /* if the result isn't a disjunction, we can't do anything */ + ExpressionOr* pOr = dynamic_cast<ExpressionOr*>(pE.get()); + if (!pOr) + return pE; - /* - Remove the final "false" value, and return the new expression. - */ - pOr->vpOperand.resize(n - 1); + /* + Check the last argument on the result; if it's not constant (as + promised by ExpressionNary::optimize(),) then there's nothing + we can do. + */ + const size_t n = pOr->vpOperand.size(); + // ExpressionNary::optimize() generates an ExpressionConstant for {$or:[]}. + verify(n > 0); + intrusive_ptr<Expression> pLast(pOr->vpOperand[n - 1]); + const ExpressionConstant* pConst = dynamic_cast<ExpressionConstant*>(pLast.get()); + if (!pConst) return pE; - } - REGISTER_EXPRESSION("$or", ExpressionOr::parse); - const char* ExpressionOr::getOpName() const { - return "$or"; + /* + Evaluate and coerce the last argument to a boolean. If it's true, + then we can replace this entire expression. + */ + bool last = pConst->getValue().coerceToBool(); + if (last) { + intrusive_ptr<ExpressionConstant> pFinal(ExpressionConstant::create(Value(true))); + return pFinal; } - /* ------------------------- ExpressionSecond ----------------------------- */ - - Value ExpressionSecond::evaluateInternal(Variables* vars) const { - Value pDate(vpOperand[0]->evaluateInternal(vars)); - return Value(extract(pDate.coerceToTm())); + /* + If we got here, the final operand was false, so we don't need it + anymore. If there was only one other operand, we don't need the + conjunction either. Note we still need to keep the promise that + the result will be a boolean. + */ + if (n == 2) { + intrusive_ptr<Expression> pFinal(ExpressionCoerceToBool::create(pOr->vpOperand[0])); + return pFinal; } - REGISTER_EXPRESSION("$second", ExpressionSecond::parse); - const char* ExpressionSecond::getOpName() const { - return "$second"; - } + /* + Remove the final "false" value, and return the new expression. + */ + pOr->vpOperand.resize(n - 1); + return pE; +} - namespace { - ValueSet arrayToSet(const Value& val) { - const vector<Value>& array = val.getArray(); - return ValueSet(array.begin(), array.end()); - } - } +REGISTER_EXPRESSION("$or", ExpressionOr::parse); +const char* ExpressionOr::getOpName() const { + return "$or"; +} - /* ----------------------- ExpressionSetDifference ---------------------------- */ +/* ------------------------- ExpressionSecond ----------------------------- */ - Value ExpressionSetDifference::evaluateInternal(Variables* vars) const { - const Value lhs = vpOperand[0]->evaluateInternal(vars); - const Value rhs = vpOperand[1]->evaluateInternal(vars); +Value ExpressionSecond::evaluateInternal(Variables* vars) const { + Value pDate(vpOperand[0]->evaluateInternal(vars)); + return Value(extract(pDate.coerceToTm())); +} - if (lhs.nullish() || rhs.nullish()) { - return Value(BSONNULL); - } +REGISTER_EXPRESSION("$second", ExpressionSecond::parse); +const char* ExpressionSecond::getOpName() const { + return "$second"; +} - uassert(17048, str::stream() << "both operands of $setDifference must be arrays. First " - << "argument is of type: " << typeName(lhs.getType()), - lhs.getType() == Array); - uassert(17049, str::stream() << "both operands of $setDifference must be arrays. Second " - << "argument is of type: " << typeName(rhs.getType()), - rhs.getType() == Array); +namespace { +ValueSet arrayToSet(const Value& val) { + const vector<Value>& array = val.getArray(); + return ValueSet(array.begin(), array.end()); +} +} - ValueSet rhsSet = arrayToSet(rhs); - const vector<Value>& lhsArray = lhs.getArray(); - vector<Value> returnVec; +/* ----------------------- ExpressionSetDifference ---------------------------- */ - for (vector<Value>::const_iterator it = lhsArray.begin(); it != lhsArray.end(); ++it) { - // rhsSet serves the dual role of filtering out elements that were originally present - // in RHS and of eleminating duplicates from LHS - if (rhsSet.insert(*it).second) { - returnVec.push_back(*it); - } - } - return Value(std::move(returnVec)); - } +Value ExpressionSetDifference::evaluateInternal(Variables* vars) const { + const Value lhs = vpOperand[0]->evaluateInternal(vars); + const Value rhs = vpOperand[1]->evaluateInternal(vars); - REGISTER_EXPRESSION("$setDifference", ExpressionSetDifference::parse); - const char* ExpressionSetDifference::getOpName() const { - return "$setDifference"; + if (lhs.nullish() || rhs.nullish()) { + return Value(BSONNULL); } - /* ----------------------- ExpressionSetEquals ---------------------------- */ + uassert(17048, + str::stream() << "both operands of $setDifference must be arrays. First " + << "argument is of type: " << typeName(lhs.getType()), + lhs.getType() == Array); + uassert(17049, + str::stream() << "both operands of $setDifference must be arrays. Second " + << "argument is of type: " << typeName(rhs.getType()), + rhs.getType() == Array); - void ExpressionSetEquals::validateArguments(const ExpressionVector& args) const { - uassert(17045, str::stream() << "$setEquals needs at least two arguments had: " - << args.size(), - args.size() >= 2); + ValueSet rhsSet = arrayToSet(rhs); + const vector<Value>& lhsArray = lhs.getArray(); + vector<Value> returnVec; + + for (vector<Value>::const_iterator it = lhsArray.begin(); it != lhsArray.end(); ++it) { + // rhsSet serves the dual role of filtering out elements that were originally present + // in RHS and of eleminating duplicates from LHS + if (rhsSet.insert(*it).second) { + returnVec.push_back(*it); + } } + return Value(std::move(returnVec)); +} - Value ExpressionSetEquals::evaluateInternal(Variables* vars) const { - const size_t n = vpOperand.size(); - std::set<Value> lhs; +REGISTER_EXPRESSION("$setDifference", ExpressionSetDifference::parse); +const char* ExpressionSetDifference::getOpName() const { + return "$setDifference"; +} - for (size_t i = 0; i < n; i++) { - const Value nextEntry = vpOperand[i]->evaluateInternal(vars); - uassert(17044, str::stream() << "All operands of $setEquals must be arrays. One " - << "argument is of type: " - << typeName(nextEntry.getType()), - nextEntry.getType() == Array); +/* ----------------------- ExpressionSetEquals ---------------------------- */ - if (i == 0) { - lhs.insert(nextEntry.getArray().begin(), nextEntry.getArray().end()); - } - else { - const std::set<Value> rhs(nextEntry.getArray().begin(), nextEntry.getArray().end()); - if (lhs != rhs) { - return Value(false); - } +void ExpressionSetEquals::validateArguments(const ExpressionVector& args) const { + uassert(17045, + str::stream() << "$setEquals needs at least two arguments had: " << args.size(), + args.size() >= 2); +} + +Value ExpressionSetEquals::evaluateInternal(Variables* vars) const { + const size_t n = vpOperand.size(); + std::set<Value> lhs; + + for (size_t i = 0; i < n; i++) { + const Value nextEntry = vpOperand[i]->evaluateInternal(vars); + uassert(17044, + str::stream() << "All operands of $setEquals must be arrays. One " + << "argument is of type: " << typeName(nextEntry.getType()), + nextEntry.getType() == Array); + + if (i == 0) { + lhs.insert(nextEntry.getArray().begin(), nextEntry.getArray().end()); + } else { + const std::set<Value> rhs(nextEntry.getArray().begin(), nextEntry.getArray().end()); + if (lhs != rhs) { + return Value(false); } } - return Value(true); } + return Value(true); +} - REGISTER_EXPRESSION("$setEquals", ExpressionSetEquals::parse); - const char* ExpressionSetEquals::getOpName() const { - return "$setEquals"; - } +REGISTER_EXPRESSION("$setEquals", ExpressionSetEquals::parse); +const char* ExpressionSetEquals::getOpName() const { + return "$setEquals"; +} - /* ----------------------- ExpressionSetIntersection ---------------------------- */ +/* ----------------------- ExpressionSetIntersection ---------------------------- */ - Value ExpressionSetIntersection::evaluateInternal(Variables* vars) const { - const size_t n = vpOperand.size(); - ValueSet currentIntersection; - for (size_t i = 0; i < n; i++) { - const Value nextEntry = vpOperand[i]->evaluateInternal(vars); - if (nextEntry.nullish()) { - return Value(BSONNULL); - } - uassert(17047, str::stream() << "All operands of $setIntersection must be arrays. One " - << "argument is of type: " - << typeName(nextEntry.getType()), - nextEntry.getType() == Array); - - if (i == 0) { - currentIntersection.insert(nextEntry.getArray().begin(), - nextEntry.getArray().end()); +Value ExpressionSetIntersection::evaluateInternal(Variables* vars) const { + const size_t n = vpOperand.size(); + ValueSet currentIntersection; + for (size_t i = 0; i < n; i++) { + const Value nextEntry = vpOperand[i]->evaluateInternal(vars); + if (nextEntry.nullish()) { + return Value(BSONNULL); + } + uassert(17047, + str::stream() << "All operands of $setIntersection must be arrays. One " + << "argument is of type: " << typeName(nextEntry.getType()), + nextEntry.getType() == Array); + + if (i == 0) { + currentIntersection.insert(nextEntry.getArray().begin(), nextEntry.getArray().end()); + } else { + ValueSet nextSet = arrayToSet(nextEntry); + if (currentIntersection.size() > nextSet.size()) { + // to iterate over whichever is the smaller set + nextSet.swap(currentIntersection); } - else { - ValueSet nextSet = arrayToSet(nextEntry); - if (currentIntersection.size() > nextSet.size()) { - // to iterate over whichever is the smaller set - nextSet.swap(currentIntersection); - } - ValueSet::iterator it = currentIntersection.begin(); - while (it != currentIntersection.end()) { - if (!nextSet.count(*it)) { - ValueSet::iterator del = it; - ++it; - currentIntersection.erase(del); - } - else { - ++it; - } + ValueSet::iterator it = currentIntersection.begin(); + while (it != currentIntersection.end()) { + if (!nextSet.count(*it)) { + ValueSet::iterator del = it; + ++it; + currentIntersection.erase(del); + } else { + ++it; } } - if (currentIntersection.empty()) { - break; - } } - return Value(vector<Value>(currentIntersection.begin(), - currentIntersection.end())); + if (currentIntersection.empty()) { + break; + } } + return Value(vector<Value>(currentIntersection.begin(), currentIntersection.end())); +} - REGISTER_EXPRESSION("$setIntersection", ExpressionSetIntersection::parse); - const char* ExpressionSetIntersection::getOpName() const { - return "$setIntersection"; - } +REGISTER_EXPRESSION("$setIntersection", ExpressionSetIntersection::parse); +const char* ExpressionSetIntersection::getOpName() const { + return "$setIntersection"; +} - /* ----------------------- ExpressionSetIsSubset ---------------------------- */ +/* ----------------------- ExpressionSetIsSubset ---------------------------- */ namespace { - Value setIsSubsetHelper(const vector<Value>& lhs, const ValueSet& rhs) { - // do not shortcircuit when lhs.size() > rhs.size() - // because lhs can have redundant entries - for (vector<Value>::const_iterator it = lhs.begin(); it != lhs.end(); ++it) { - if (!rhs.count(*it)) { - return Value(false); - } +Value setIsSubsetHelper(const vector<Value>& lhs, const ValueSet& rhs) { + // do not shortcircuit when lhs.size() > rhs.size() + // because lhs can have redundant entries + for (vector<Value>::const_iterator it = lhs.begin(); it != lhs.end(); ++it) { + if (!rhs.count(*it)) { + return Value(false); } - return Value(true); } + return Value(true); +} } - Value ExpressionSetIsSubset::evaluateInternal(Variables* vars) const { - const Value lhs = vpOperand[0]->evaluateInternal(vars); - const Value rhs = vpOperand[1]->evaluateInternal(vars); - - uassert(17046, str::stream() << "both operands of $setIsSubset must be arrays. First " - << "argument is of type: " << typeName(lhs.getType()), - lhs.getType() == Array); - uassert(17042, str::stream() << "both operands of $setIsSubset must be arrays. Second " - << "argument is of type: " << typeName(rhs.getType()), - rhs.getType() == Array); +Value ExpressionSetIsSubset::evaluateInternal(Variables* vars) const { + const Value lhs = vpOperand[0]->evaluateInternal(vars); + const Value rhs = vpOperand[1]->evaluateInternal(vars); - return setIsSubsetHelper(lhs.getArray(), arrayToSet(rhs)); - } + uassert(17046, + str::stream() << "both operands of $setIsSubset must be arrays. First " + << "argument is of type: " << typeName(lhs.getType()), + lhs.getType() == Array); + uassert(17042, + str::stream() << "both operands of $setIsSubset must be arrays. Second " + << "argument is of type: " << typeName(rhs.getType()), + rhs.getType() == Array); - /** - * This class handles the case where the RHS set is constant. - * - * Since it is constant we can construct the hashset once which makes the runtime performance - * effectively constant with respect to the size of RHS. Large, constant RHS is expected to be a - * major use case for $redact and this has been verified to improve performance significantly. - */ - class ExpressionSetIsSubset::Optimized : public ExpressionSetIsSubset { - public: - Optimized(const ValueSet& cachedRhsSet, const ExpressionVector& operands) - : _cachedRhsSet(cachedRhsSet) - { - vpOperand = operands; - } + return setIsSubsetHelper(lhs.getArray(), arrayToSet(rhs)); +} - virtual Value evaluateInternal(Variables* vars) const { - const Value lhs = vpOperand[0]->evaluateInternal(vars); +/** + * This class handles the case where the RHS set is constant. + * + * Since it is constant we can construct the hashset once which makes the runtime performance + * effectively constant with respect to the size of RHS. Large, constant RHS is expected to be a + * major use case for $redact and this has been verified to improve performance significantly. + */ +class ExpressionSetIsSubset::Optimized : public ExpressionSetIsSubset { +public: + Optimized(const ValueSet& cachedRhsSet, const ExpressionVector& operands) + : _cachedRhsSet(cachedRhsSet) { + vpOperand = operands; + } - uassert(17310, str::stream() << "both operands of $setIsSubset must be arrays. First " - << "argument is of type: " << typeName(lhs.getType()), - lhs.getType() == Array); + virtual Value evaluateInternal(Variables* vars) const { + const Value lhs = vpOperand[0]->evaluateInternal(vars); - return setIsSubsetHelper(lhs.getArray(), _cachedRhsSet); - } + uassert(17310, + str::stream() << "both operands of $setIsSubset must be arrays. First " + << "argument is of type: " << typeName(lhs.getType()), + lhs.getType() == Array); - private: - const ValueSet _cachedRhsSet; - }; + return setIsSubsetHelper(lhs.getArray(), _cachedRhsSet); + } - intrusive_ptr<Expression> ExpressionSetIsSubset::optimize() { - // perfore basic optimizations - intrusive_ptr<Expression> optimized = ExpressionNary::optimize(); +private: + const ValueSet _cachedRhsSet; +}; - // if ExpressionNary::optimize() created a new value, return it directly - if (optimized.get() != this) - return optimized; +intrusive_ptr<Expression> ExpressionSetIsSubset::optimize() { + // perfore basic optimizations + intrusive_ptr<Expression> optimized = ExpressionNary::optimize(); - if (ExpressionConstant* ec = dynamic_cast<ExpressionConstant*>(vpOperand[1].get())) { - const Value rhs = ec->getValue(); - uassert(17311, str::stream() << "both operands of $setIsSubset must be arrays. Second " - << "argument is of type: " << typeName(rhs.getType()), - rhs.getType() == Array); + // if ExpressionNary::optimize() created a new value, return it directly + if (optimized.get() != this) + return optimized; - return new Optimized(arrayToSet(rhs), vpOperand); - } + if (ExpressionConstant* ec = dynamic_cast<ExpressionConstant*>(vpOperand[1].get())) { + const Value rhs = ec->getValue(); + uassert(17311, + str::stream() << "both operands of $setIsSubset must be arrays. Second " + << "argument is of type: " << typeName(rhs.getType()), + rhs.getType() == Array); - return optimized; + return new Optimized(arrayToSet(rhs), vpOperand); } - REGISTER_EXPRESSION("$setIsSubset", ExpressionSetIsSubset::parse); - const char* ExpressionSetIsSubset::getOpName() const { - return "$setIsSubset"; - } + return optimized; +} - /* ----------------------- ExpressionSetUnion ---------------------------- */ +REGISTER_EXPRESSION("$setIsSubset", ExpressionSetIsSubset::parse); +const char* ExpressionSetIsSubset::getOpName() const { + return "$setIsSubset"; +} - Value ExpressionSetUnion::evaluateInternal(Variables* vars) const { - ValueSet unionedSet; - const size_t n = vpOperand.size(); - for (size_t i = 0; i < n; i++) { - const Value newEntries = vpOperand[i]->evaluateInternal(vars); - if (newEntries.nullish()) { - return Value(BSONNULL); - } - uassert(17043, str::stream() << "All operands of $setUnion must be arrays. One argument" - << " is of type: " << typeName(newEntries.getType()), - newEntries.getType() == Array); +/* ----------------------- ExpressionSetUnion ---------------------------- */ - unionedSet.insert(newEntries.getArray().begin(), newEntries.getArray().end()); +Value ExpressionSetUnion::evaluateInternal(Variables* vars) const { + ValueSet unionedSet; + const size_t n = vpOperand.size(); + for (size_t i = 0; i < n; i++) { + const Value newEntries = vpOperand[i]->evaluateInternal(vars); + if (newEntries.nullish()) { + return Value(BSONNULL); } - return Value(vector<Value>(unionedSet.begin(), unionedSet.end())); - } + uassert(17043, + str::stream() << "All operands of $setUnion must be arrays. One argument" + << " is of type: " << typeName(newEntries.getType()), + newEntries.getType() == Array); - REGISTER_EXPRESSION("$setUnion", ExpressionSetUnion::parse); - const char* ExpressionSetUnion::getOpName() const { - return "$setUnion"; + unionedSet.insert(newEntries.getArray().begin(), newEntries.getArray().end()); } + return Value(vector<Value>(unionedSet.begin(), unionedSet.end())); +} - /* ----------------------- ExpressionIsArray ---------------------------- */ - - Value ExpressionIsArray::evaluateInternal(Variables* vars) const { - Value argument = vpOperand[0]->evaluateInternal(vars); - return Value(argument.getType() == Array); - } - - REGISTER_EXPRESSION("$isArray", ExpressionIsArray::parse); - const char* ExpressionIsArray::getOpName() const { - return "$isArray"; - } - - /* ----------------------- ExpressionSize ---------------------------- */ - - Value ExpressionSize::evaluateInternal(Variables* vars) const { - Value array = vpOperand[0]->evaluateInternal(vars); - - uassert(17124, str::stream() << "The argument to $size must be an Array, but was of type: " - << typeName(array.getType()), - array.getType() == Array); - return Value::createIntOrLong(array.getArray().size()); - } +REGISTER_EXPRESSION("$setUnion", ExpressionSetUnion::parse); +const char* ExpressionSetUnion::getOpName() const { + return "$setUnion"; +} - REGISTER_EXPRESSION("$size", ExpressionSize::parse); - const char* ExpressionSize::getOpName() const { - return "$size"; - } +/* ----------------------- ExpressionIsArray ---------------------------- */ - /* ----------------------- ExpressionStrcasecmp ---------------------------- */ +Value ExpressionIsArray::evaluateInternal(Variables* vars) const { + Value argument = vpOperand[0]->evaluateInternal(vars); + return Value(argument.getType() == Array); +} - Value ExpressionStrcasecmp::evaluateInternal(Variables* vars) const { - Value pString1(vpOperand[0]->evaluateInternal(vars)); - Value pString2(vpOperand[1]->evaluateInternal(vars)); +REGISTER_EXPRESSION("$isArray", ExpressionIsArray::parse); +const char* ExpressionIsArray::getOpName() const { + return "$isArray"; +} - /* boost::iequals returns a bool not an int so strings must actually be allocated */ - string str1 = boost::to_upper_copy( pString1.coerceToString() ); - string str2 = boost::to_upper_copy( pString2.coerceToString() ); - int result = str1.compare(str2); +/* ----------------------- ExpressionSize ---------------------------- */ - if (result == 0) - return Value(0); - else if (result > 0) - return Value(1); - else - return Value(-1); - } +Value ExpressionSize::evaluateInternal(Variables* vars) const { + Value array = vpOperand[0]->evaluateInternal(vars); - REGISTER_EXPRESSION("$strcasecmp", ExpressionStrcasecmp::parse); - const char* ExpressionStrcasecmp::getOpName() const { - return "$strcasecmp"; - } + uassert(17124, + str::stream() << "The argument to $size must be an Array, but was of type: " + << typeName(array.getType()), + array.getType() == Array); + return Value::createIntOrLong(array.getArray().size()); +} - /* ----------------------- ExpressionSubstr ---------------------------- */ +REGISTER_EXPRESSION("$size", ExpressionSize::parse); +const char* ExpressionSize::getOpName() const { + return "$size"; +} - Value ExpressionSubstr::evaluateInternal(Variables* vars) const { - Value pString(vpOperand[0]->evaluateInternal(vars)); - Value pLower(vpOperand[1]->evaluateInternal(vars)); - Value pLength(vpOperand[2]->evaluateInternal(vars)); +/* ----------------------- ExpressionStrcasecmp ---------------------------- */ - string str = pString.coerceToString(); - uassert(16034, str::stream() << getOpName() << - ": starting index must be a numeric type (is BSON type " << - typeName(pLower.getType()) << ")", - (pLower.getType() == NumberInt - || pLower.getType() == NumberLong - || pLower.getType() == NumberDouble)); - uassert(16035, str::stream() << getOpName() << - ": length must be a numeric type (is BSON type " << - typeName(pLength.getType() )<< ")", - (pLength.getType() == NumberInt - || pLength.getType() == NumberLong - || pLength.getType() == NumberDouble)); +Value ExpressionStrcasecmp::evaluateInternal(Variables* vars) const { + Value pString1(vpOperand[0]->evaluateInternal(vars)); + Value pString2(vpOperand[1]->evaluateInternal(vars)); - string::size_type lower = static_cast< string::size_type >( pLower.coerceToLong() ); - string::size_type length = static_cast< string::size_type >( pLength.coerceToLong() ); + /* boost::iequals returns a bool not an int so strings must actually be allocated */ + string str1 = boost::to_upper_copy(pString1.coerceToString()); + string str2 = boost::to_upper_copy(pString2.coerceToString()); + int result = str1.compare(str2); - auto isContinuationByte = [](char c){ return ((c & 0xc0) == 0x80); }; + if (result == 0) + return Value(0); + else if (result > 0) + return Value(1); + else + return Value(-1); +} - uassert(28656, str::stream() << getOpName() << - ": Invalid range, starting index is a UTF-8 continuation byte.", - (lower >= str.length() || !isContinuationByte(str[lower]))); +REGISTER_EXPRESSION("$strcasecmp", ExpressionStrcasecmp::parse); +const char* ExpressionStrcasecmp::getOpName() const { + return "$strcasecmp"; +} - // Check the byte after the last character we'd return. If it is a continuation byte, that - // means we're in the middle of a UTF-8 character. - uassert(28657, str::stream() << getOpName() << - ": Invalid range, ending index is in the middle of a UTF-8 character.", - (lower + length >= str.length() || !isContinuationByte(str[lower + length]))); +/* ----------------------- ExpressionSubstr ---------------------------- */ + +Value ExpressionSubstr::evaluateInternal(Variables* vars) const { + Value pString(vpOperand[0]->evaluateInternal(vars)); + Value pLower(vpOperand[1]->evaluateInternal(vars)); + Value pLength(vpOperand[2]->evaluateInternal(vars)); + + string str = pString.coerceToString(); + uassert(16034, + str::stream() << getOpName() + << ": starting index must be a numeric type (is BSON type " + << typeName(pLower.getType()) << ")", + (pLower.getType() == NumberInt || pLower.getType() == NumberLong || + pLower.getType() == NumberDouble)); + uassert(16035, + str::stream() << getOpName() << ": length must be a numeric type (is BSON type " + << typeName(pLength.getType()) << ")", + (pLength.getType() == NumberInt || pLength.getType() == NumberLong || + pLength.getType() == NumberDouble)); + + string::size_type lower = static_cast<string::size_type>(pLower.coerceToLong()); + string::size_type length = static_cast<string::size_type>(pLength.coerceToLong()); + + auto isContinuationByte = [](char c) { return ((c & 0xc0) == 0x80); }; + + uassert(28656, + str::stream() << getOpName() + << ": Invalid range, starting index is a UTF-8 continuation byte.", + (lower >= str.length() || !isContinuationByte(str[lower]))); + + // Check the byte after the last character we'd return. If it is a continuation byte, that + // means we're in the middle of a UTF-8 character. + uassert( + 28657, + str::stream() << getOpName() + << ": Invalid range, ending index is in the middle of a UTF-8 character.", + (lower + length >= str.length() || !isContinuationByte(str[lower + length]))); + + if (lower >= str.length()) { + // If lower > str.length() then string::substr() will throw out_of_range, so return an + // empty string if lower is not a valid string index. + return Value(""); + } + return Value(str.substr(lower, length)); +} - if ( lower >= str.length() ) { - // If lower > str.length() then string::substr() will throw out_of_range, so return an - // empty string if lower is not a valid string index. - return Value(""); - } - return Value(str.substr(lower, length)); - } +REGISTER_EXPRESSION("$substr", ExpressionSubstr::parse); +const char* ExpressionSubstr::getOpName() const { + return "$substr"; +} - REGISTER_EXPRESSION("$substr", ExpressionSubstr::parse); - const char* ExpressionSubstr::getOpName() const { - return "$substr"; +/* ----------------------- ExpressionSubtract ---------------------------- */ + +Value ExpressionSubtract::evaluateInternal(Variables* vars) const { + Value lhs = vpOperand[0]->evaluateInternal(vars); + Value rhs = vpOperand[1]->evaluateInternal(vars); + + BSONType diffType = Value::getWidestNumeric(rhs.getType(), lhs.getType()); + + if (diffType == NumberDouble) { + double right = rhs.coerceToDouble(); + double left = lhs.coerceToDouble(); + return Value(left - right); + } else if (diffType == NumberLong) { + long long right = rhs.coerceToLong(); + long long left = lhs.coerceToLong(); + return Value(left - right); + } else if (diffType == NumberInt) { + long long right = rhs.coerceToLong(); + long long left = lhs.coerceToLong(); + return Value::createIntOrLong(left - right); + } else if (lhs.nullish() || rhs.nullish()) { + return Value(BSONNULL); + } else if (lhs.getType() == Date) { + if (rhs.getType() == Date) { + long long timeDelta = lhs.getDate() - rhs.getDate(); + return Value(timeDelta); + } else if (rhs.numeric()) { + long long millisSinceEpoch = lhs.getDate() - rhs.coerceToLong(); + return Value(Date_t::fromMillisSinceEpoch(millisSinceEpoch)); + } else { + uasserted(16613, + str::stream() << "cant $subtract a " << typeName(rhs.getType()) + << " from a Date"); + } + } else { + uasserted(16556, + str::stream() << "cant $subtract a" << typeName(rhs.getType()) << " from a " + << typeName(lhs.getType())); } +} - /* ----------------------- ExpressionSubtract ---------------------------- */ - - Value ExpressionSubtract::evaluateInternal(Variables* vars) const { - Value lhs = vpOperand[0]->evaluateInternal(vars); - Value rhs = vpOperand[1]->evaluateInternal(vars); +REGISTER_EXPRESSION("$subtract", ExpressionSubtract::parse); +const char* ExpressionSubtract::getOpName() const { + return "$subtract"; +} - BSONType diffType = Value::getWidestNumeric(rhs.getType(), lhs.getType()); +/* ------------------------- ExpressionToLower ----------------------------- */ - if (diffType == NumberDouble) { - double right = rhs.coerceToDouble(); - double left = lhs.coerceToDouble(); - return Value(left - right); - } - else if (diffType == NumberLong) { - long long right = rhs.coerceToLong(); - long long left = lhs.coerceToLong(); - return Value(left - right); - } - else if (diffType == NumberInt) { - long long right = rhs.coerceToLong(); - long long left = lhs.coerceToLong(); - return Value::createIntOrLong(left - right); - } - else if (lhs.nullish() || rhs.nullish()) { - return Value(BSONNULL); - } - else if (lhs.getType() == Date) { - if (rhs.getType() == Date) { - long long timeDelta = lhs.getDate() - rhs.getDate(); - return Value(timeDelta); - } - else if (rhs.numeric()) { - long long millisSinceEpoch = lhs.getDate() - rhs.coerceToLong(); - return Value(Date_t::fromMillisSinceEpoch(millisSinceEpoch)); - } - else { - uasserted(16613, str::stream() << "cant $subtract a " - << typeName(rhs.getType()) - << " from a Date"); - } - } - else { - uasserted(16556, str::stream() << "cant $subtract a" - << typeName(rhs.getType()) - << " from a " - << typeName(lhs.getType())); - } - } +Value ExpressionToLower::evaluateInternal(Variables* vars) const { + Value pString(vpOperand[0]->evaluateInternal(vars)); + string str = pString.coerceToString(); + boost::to_lower(str); + return Value(str); +} - REGISTER_EXPRESSION("$subtract", ExpressionSubtract::parse); - const char* ExpressionSubtract::getOpName() const { - return "$subtract"; - } +REGISTER_EXPRESSION("$toLower", ExpressionToLower::parse); +const char* ExpressionToLower::getOpName() const { + return "$toLower"; +} - /* ------------------------- ExpressionToLower ----------------------------- */ +/* ------------------------- ExpressionToUpper -------------------------- */ - Value ExpressionToLower::evaluateInternal(Variables* vars) const { - Value pString(vpOperand[0]->evaluateInternal(vars)); - string str = pString.coerceToString(); - boost::to_lower(str); - return Value(str); - } +Value ExpressionToUpper::evaluateInternal(Variables* vars) const { + Value pString(vpOperand[0]->evaluateInternal(vars)); + string str(pString.coerceToString()); + boost::to_upper(str); + return Value(str); +} - REGISTER_EXPRESSION("$toLower", ExpressionToLower::parse); - const char* ExpressionToLower::getOpName() const { - return "$toLower"; - } +REGISTER_EXPRESSION("$toUpper", ExpressionToUpper::parse); +const char* ExpressionToUpper::getOpName() const { + return "$toUpper"; +} - /* ------------------------- ExpressionToUpper -------------------------- */ +/* ------------------------- ExpressionWeek ----------------------------- */ - Value ExpressionToUpper::evaluateInternal(Variables* vars) const { - Value pString(vpOperand[0]->evaluateInternal(vars)); - string str(pString.coerceToString()); - boost::to_upper(str); - return Value(str); - } +Value ExpressionWeek::evaluateInternal(Variables* vars) const { + Value pDate(vpOperand[0]->evaluateInternal(vars)); + return Value(extract(pDate.coerceToTm())); +} - REGISTER_EXPRESSION("$toUpper", ExpressionToUpper::parse); - const char* ExpressionToUpper::getOpName() const { - return "$toUpper"; - } +int ExpressionWeek::extract(const tm& tm) { + int dayOfWeek = tm.tm_wday; + int dayOfYear = tm.tm_yday; + int prevSundayDayOfYear = dayOfYear - dayOfWeek; // may be negative + int nextSundayDayOfYear = prevSundayDayOfYear + 7; // must be positive - /* ------------------------- ExpressionWeek ----------------------------- */ + // Return the zero based index of the week of the next sunday, equal to the one based index + // of the week of the previous sunday, which is to be returned. + int nextSundayWeek = nextSundayDayOfYear / 7; - Value ExpressionWeek::evaluateInternal(Variables* vars) const { - Value pDate(vpOperand[0]->evaluateInternal(vars)); - return Value(extract(pDate.coerceToTm())); + // Verify that the week calculation is consistent with strftime "%U". + DEV { + char buf[3]; + verify(strftime(buf, 3, "%U", &tm)); + verify(int(str::toUnsigned(buf)) == nextSundayWeek); } - int ExpressionWeek::extract(const tm& tm) { - int dayOfWeek = tm.tm_wday; - int dayOfYear = tm.tm_yday; - int prevSundayDayOfYear = dayOfYear - dayOfWeek; // may be negative - int nextSundayDayOfYear = prevSundayDayOfYear + 7; // must be positive - - // Return the zero based index of the week of the next sunday, equal to the one based index - // of the week of the previous sunday, which is to be returned. - int nextSundayWeek = nextSundayDayOfYear / 7; - - // Verify that the week calculation is consistent with strftime "%U". - DEV{ - char buf[3]; - verify(strftime(buf,3,"%U",&tm)); - verify(int(str::toUnsigned(buf))==nextSundayWeek); - } - - return nextSundayWeek; - } + return nextSundayWeek; +} - REGISTER_EXPRESSION("$week", ExpressionWeek::parse); - const char* ExpressionWeek::getOpName() const { - return "$week"; - } +REGISTER_EXPRESSION("$week", ExpressionWeek::parse); +const char* ExpressionWeek::getOpName() const { + return "$week"; +} - /* ------------------------- ExpressionYear ----------------------------- */ +/* ------------------------- ExpressionYear ----------------------------- */ - Value ExpressionYear::evaluateInternal(Variables* vars) const { - Value pDate(vpOperand[0]->evaluateInternal(vars)); - return Value(extract(pDate.coerceToTm())); - } +Value ExpressionYear::evaluateInternal(Variables* vars) const { + Value pDate(vpOperand[0]->evaluateInternal(vars)); + return Value(extract(pDate.coerceToTm())); +} - REGISTER_EXPRESSION("$year", ExpressionYear::parse); - const char* ExpressionYear::getOpName() const { - return "$year"; - } +REGISTER_EXPRESSION("$year", ExpressionYear::parse); +const char* ExpressionYear::getOpName() const { + return "$year"; +} } diff --git a/src/mongo/db/pipeline/expression.h b/src/mongo/db/pipeline/expression.h index 258e0d6c034..bec2013e2ff 100644 --- a/src/mongo/db/pipeline/expression.h +++ b/src/mongo/db/pipeline/expression.h @@ -42,1032 +42,1047 @@ namespace mongo { - class BSONArrayBuilder; - class BSONElement; - class BSONObjBuilder; - class DocumentSource; - - // TODO: Look into merging with ExpressionContext and possibly ObjectCtx. - /// The state used as input and working space for Expressions. - class Variables { - MONGO_DISALLOW_COPYING(Variables); - public: - /** - * Each unique variable is assigned a unique id of this type - */ - typedef size_t Id; - - // This is only for expressions that use no variables (even ROOT). - Variables() :_numVars(0) {} - - explicit Variables(size_t numVars, const Document& root = Document()) - : _root(root) - , _rest(numVars == 0 ? NULL : new Value[numVars]) - , _numVars(numVars) - {} +class BSONArrayBuilder; +class BSONElement; +class BSONObjBuilder; +class DocumentSource; - static void uassertValidNameForUserWrite(StringData varName); - static void uassertValidNameForUserRead(StringData varName); +// TODO: Look into merging with ExpressionContext and possibly ObjectCtx. +/// The state used as input and working space for Expressions. +class Variables { + MONGO_DISALLOW_COPYING(Variables); - static const Id ROOT_ID = Id(-1); +public: + /** + * Each unique variable is assigned a unique id of this type + */ + typedef size_t Id; - /** - * Use this instead of setValue for setting ROOT - */ - void setRoot(const Document& root) { _root = root; } - void clearRoot() { _root = Document(); } - const Document& getRoot() const { return _root; } + // This is only for expressions that use no variables (even ROOT). + Variables() : _numVars(0) {} - void setValue(Id id, const Value& value); - Value getValue(Id id) const; + explicit Variables(size_t numVars, const Document& root = Document()) + : _root(root), _rest(numVars == 0 ? NULL : new Value[numVars]), _numVars(numVars) {} - /** - * returns Document() for non-document values. - */ - Document getDocument(Id id) const; + static void uassertValidNameForUserWrite(StringData varName); + static void uassertValidNameForUserRead(StringData varName); - private: - Document _root; - const std::unique_ptr<Value[]> _rest; - const size_t _numVars; - }; + static const Id ROOT_ID = Id(-1); /** - * Generates Variables::Ids and keeps track of the number of Ids handed out. + * Use this instead of setValue for setting ROOT */ - class VariablesIdGenerator { - public: - VariablesIdGenerator() : _nextId(0) {} - - Variables::Id generateId() { return _nextId++; } - - /** - * Returns the number of Ids handed out by this Generator. - * Return value is intended to be passed to Variables constructor. - */ - Variables::Id getIdCount() const { return _nextId; } + void setRoot(const Document& root) { + _root = root; + } + void clearRoot() { + _root = Document(); + } + const Document& getRoot() const { + return _root; + } - private: - Variables::Id _nextId; - }; + void setValue(Id id, const Value& value); + Value getValue(Id id) const; /** - * This class represents the Variables that are defined in an Expression tree. - * - * All copies from a given instance share enough information to ensure unique Ids are assigned - * and to propagate back to the original instance enough information to correctly construct a - * Variables instance. + * returns Document() for non-document values. */ - class VariablesParseState { - public: - explicit VariablesParseState(VariablesIdGenerator* idGenerator) - : _idGenerator(idGenerator) - {} + Document getDocument(Id id) const; - /** - * Assigns a named variable a unique Id. This differs from all other variables, even - * others with the same name. - * - * The special variables ROOT and CURRENT are always implicitly defined with CURRENT - * equivalent to ROOT. If CURRENT is explicitly defined by a call to this function, it - * breaks that equivalence. - * - * NOTE: Name validation is responsibility of caller. - */ - Variables::Id defineVariable(StringData name); - - /** - * Returns the current Id for a variable. uasserts if the variable isn't defined. - */ - Variables::Id getVariable(StringData name) const; +private: + Document _root; + const std::unique_ptr<Value[]> _rest; + const size_t _numVars; +}; - private: - StringMap<Variables::Id> _variables; - VariablesIdGenerator* _idGenerator; - }; +/** + * Generates Variables::Ids and keeps track of the number of Ids handed out. + */ +class VariablesIdGenerator { +public: + VariablesIdGenerator() : _nextId(0) {} - class Expression : - public IntrusiveCounterUnsigned { - public: - virtual ~Expression() {}; + Variables::Id generateId() { + return _nextId++; + } - /* - Optimize the Expression. + /** + * Returns the number of Ids handed out by this Generator. + * Return value is intended to be passed to Variables constructor. + */ + Variables::Id getIdCount() const { + return _nextId; + } - This provides an opportunity to do constant folding, or to - collapse nested operators that have the same precedence, such as - $add, $and, or $or. +private: + Variables::Id _nextId; +}; - The Expression should be replaced with the return value, which may - or may not be the same object. In the case of constant folding, - a computed expression may be replaced by a constant. +/** + * This class represents the Variables that are defined in an Expression tree. + * + * All copies from a given instance share enough information to ensure unique Ids are assigned + * and to propagate back to the original instance enough information to correctly construct a + * Variables instance. + */ +class VariablesParseState { +public: + explicit VariablesParseState(VariablesIdGenerator* idGenerator) : _idGenerator(idGenerator) {} - @returns the optimized Expression - */ - virtual boost::intrusive_ptr<Expression> optimize() { return this; } + /** + * Assigns a named variable a unique Id. This differs from all other variables, even + * others with the same name. + * + * The special variables ROOT and CURRENT are always implicitly defined with CURRENT + * equivalent to ROOT. If CURRENT is explicitly defined by a call to this function, it + * breaks that equivalence. + * + * NOTE: Name validation is responsibility of caller. + */ + Variables::Id defineVariable(StringData name); - /** - * Add this expression's field dependencies to the set - * - * Expressions are trees, so this is often recursive. - * - * @param deps Fully qualified paths to depended-on fields are added to this set. - * Empty std::string means need full document. - * @param path path to self if all ancestors are ExpressionObjects. - * Top-level ExpressionObject gets pointer to empty vector. - * If any other Expression is an ancestor, or in other cases - * where {a:1} inclusion objects aren't allowed, they get - * NULL. - */ - virtual void addDependencies(DepsTracker* deps, std::vector<std::string>* path=NULL) const = 0; + /** + * Returns the current Id for a variable. uasserts if the variable isn't defined. + */ + Variables::Id getVariable(StringData name) const; - /** simple expressions are just inclusion exclusion as supported by ExpressionObject */ - virtual bool isSimple() { return false; } +private: + StringMap<Variables::Id> _variables; + VariablesIdGenerator* _idGenerator; +}; +class Expression : public IntrusiveCounterUnsigned { +public: + virtual ~Expression(){}; - /** - * Serialize the Expression tree recursively. - * If explain is false, returns a Value parsable by parseOperand(). - */ - virtual Value serialize(bool explain) const = 0; + /* + Optimize the Expression. - /// Evaluate expression with specified inputs and return result. (only used by tests) - Value evaluate(const Document& root) const { - Variables vars(0, root); - return evaluate(&vars); - } + This provides an opportunity to do constant folding, or to + collapse nested operators that have the same precedence, such as + $add, $and, or $or. - /** - * Evaluate expression with specified inputs and return result. - * - * While vars is non-const, if properly constructed, subexpressions modifications to it - * should not effect outer expressions due to unique variable Ids. - */ - Value evaluate(Variables* vars) const { return evaluateInternal(vars); } + The Expression should be replaced with the return value, which may + or may not be the same object. In the case of constant folding, + a computed expression may be replaced by a constant. - /* - Utility class for parseObject() below. + @returns the optimized Expression + */ + virtual boost::intrusive_ptr<Expression> optimize() { + return this; + } - DOCUMENT_OK indicates that it is OK to use a Document in the current - context. - */ - class ObjectCtx { - public: - ObjectCtx(int options); - static const int DOCUMENT_OK = 0x0001; - static const int TOP_LEVEL = 0x0002; - static const int INCLUSION_OK = 0x0004; - - bool documentOk() const; - bool topLevel() const; - bool inclusionOk() const; - - private: - int options; - }; - - // - // Diagram of relationship between parse functions when parsing a $op: - // - // { someFieldOrArrayIndex: { $op: [ARGS] } } - // ^ parseExpression on inner $op BSONElement - // ^ parseObject on BSONObject - // ^ parseOperand on outer BSONElement wrapping the $op Object - // + /** + * Add this expression's field dependencies to the set + * + * Expressions are trees, so this is often recursive. + * + * @param deps Fully qualified paths to depended-on fields are added to this set. + * Empty std::string means need full document. + * @param path path to self if all ancestors are ExpressionObjects. + * Top-level ExpressionObject gets pointer to empty vector. + * If any other Expression is an ancestor, or in other cases + * where {a:1} inclusion objects aren't allowed, they get + * NULL. + */ + virtual void addDependencies(DepsTracker* deps, + std::vector<std::string>* path = NULL) const = 0; - /** - * Parses a BSON Object that could represent a functional expression or a Document - * expression. - */ - static boost::intrusive_ptr<Expression> parseObject( - BSONObj obj, - ObjectCtx *pCtx, - const VariablesParseState& vps); + /** simple expressions are just inclusion exclusion as supported by ExpressionObject */ + virtual bool isSimple() { + return false; + } - /** - * Parses a BSONElement which has already been determined to be functional expression. - * - * exprElement should be the only element inside the expression object. That is the - * field name should be the $op for the expression. - */ - static boost::intrusive_ptr<Expression> parseExpression( - BSONElement exprElement, - const VariablesParseState& vps); + /** + * Serialize the Expression tree recursively. + * If explain is false, returns a Value parsable by parseOperand(). + */ + virtual Value serialize(bool explain) const = 0; - /** - * Parses a BSONElement which is an operand in an Expression. - * - * This is the most generic parser and can parse ExpressionFieldPath, a literal, or a $op. - * If it is a $op, exprElement should be the outer element whose value is an Object - * containing the $op. - */ - static boost::intrusive_ptr<Expression> parseOperand( - BSONElement exprElement, - const VariablesParseState& vps); + /// Evaluate expression with specified inputs and return result. (only used by tests) + Value evaluate(const Document& root) const { + Variables vars(0, root); + return evaluate(&vars); + } - /* - Produce a field path std::string with the field prefix removed. + /** + * Evaluate expression with specified inputs and return result. + * + * While vars is non-const, if properly constructed, subexpressions modifications to it + * should not effect outer expressions due to unique variable Ids. + */ + Value evaluate(Variables* vars) const { + return evaluateInternal(vars); + } - Throws an error if the field prefix is not present. + /* + Utility class for parseObject() below. - @param prefixedField the prefixed field - @returns the field path with the prefix removed - */ - static std::string removeFieldPrefix(const std::string &prefixedField); + DOCUMENT_OK indicates that it is OK to use a Document in the current + context. + */ + class ObjectCtx { + public: + ObjectCtx(int options); + static const int DOCUMENT_OK = 0x0001; + static const int TOP_LEVEL = 0x0002; + static const int INCLUSION_OK = 0x0004; - /** Evaluate the subclass Expression using the given Variables as context and return result. - * - * Should only be called by subclasses, but can't be protected because they need to call - * this function on each other. - */ - virtual Value evaluateInternal(Variables* vars) const = 0; + bool documentOk() const; + bool topLevel() const; + bool inclusionOk() const; - protected: - typedef std::vector<boost::intrusive_ptr<Expression> > ExpressionVector; + private: + int options; }; + // + // Diagram of relationship between parse functions when parsing a $op: + // + // { someFieldOrArrayIndex: { $op: [ARGS] } } + // ^ parseExpression on inner $op BSONElement + // ^ parseObject on BSONObject + // ^ parseOperand on outer BSONElement wrapping the $op Object + // - /// Inherit from ExpressionVariadic or ExpressionFixedArity instead of directly from this class. - class ExpressionNary : - public Expression { - public: - // virtuals from Expression - virtual boost::intrusive_ptr<Expression> optimize(); - virtual Value serialize(bool explain) const; - virtual void addDependencies(DepsTracker* deps, std::vector<std::string>* path=NULL) const; - - /* - Add an operand to the n-ary expression. - - @param pExpression the expression to add - */ - virtual void addOperand(const boost::intrusive_ptr<Expression> &pExpression); - - // TODO split this into two functions - virtual bool isAssociativeAndCommutative() const { return false; } + /** + * Parses a BSON Object that could represent a functional expression or a Document + * expression. + */ + static boost::intrusive_ptr<Expression> parseObject(BSONObj obj, + ObjectCtx* pCtx, + const VariablesParseState& vps); - /* - Get the name of the operator. + /** + * Parses a BSONElement which has already been determined to be functional expression. + * + * exprElement should be the only element inside the expression object. That is the + * field name should be the $op for the expression. + */ + static boost::intrusive_ptr<Expression> parseExpression(BSONElement exprElement, + const VariablesParseState& vps); - @returns the name of the operator; this std::string belongs to the class - implementation, and should not be deleted - and should not - */ - virtual const char* getOpName() const = 0; - /// Allow subclasses the opportunity to validate arguments at parse time. - virtual void validateArguments(const ExpressionVector& args) const {} + /** + * Parses a BSONElement which is an operand in an Expression. + * + * This is the most generic parser and can parse ExpressionFieldPath, a literal, or a $op. + * If it is a $op, exprElement should be the outer element whose value is an Object + * containing the $op. + */ + static boost::intrusive_ptr<Expression> parseOperand(BSONElement exprElement, + const VariablesParseState& vps); - static ExpressionVector parseArguments( - BSONElement bsonExpr, - const VariablesParseState& vps); + /* + Produce a field path std::string with the field prefix removed. - protected: - ExpressionNary() {} + Throws an error if the field prefix is not present. - ExpressionVector vpOperand; - }; + @param prefixedField the prefixed field + @returns the field path with the prefix removed + */ + static std::string removeFieldPrefix(const std::string& prefixedField); - /// Inherit from ExpressionVariadic or ExpressionFixedArity instead of directly from this class. - template <typename SubClass> - class ExpressionNaryBase : public ExpressionNary { - public: - static boost::intrusive_ptr<Expression> parse(BSONElement bsonExpr, - const VariablesParseState& vps) { - boost::intrusive_ptr<ExpressionNaryBase> expr = new SubClass(); - ExpressionVector args = parseArguments(bsonExpr, vps); - expr->validateArguments(args); - expr->vpOperand = args; - return expr; - } - }; + /** Evaluate the subclass Expression using the given Variables as context and return result. + * + * Should only be called by subclasses, but can't be protected because they need to call + * this function on each other. + */ + virtual Value evaluateInternal(Variables* vars) const = 0; - /// Inherit from this class if your expression takes a variable number of arguments. - template <typename SubClass> - class ExpressionVariadic : public ExpressionNaryBase<SubClass> { - }; +protected: + typedef std::vector<boost::intrusive_ptr<Expression>> ExpressionVector; +}; - /// Inherit from this class if your expression takes a fixed number of arguments. - template <typename SubClass, int NArgs> - class ExpressionFixedArity : public ExpressionNaryBase<SubClass> { - public: - virtual void validateArguments(const Expression::ExpressionVector& args) const { - uassert(16020, mongoutils::str::stream() - << "Expression " << this->getOpName() << " takes exactly " << NArgs - << " arguments. " << args.size() << " were passed in.", - args.size() == NArgs); - - } - }; +/// Inherit from ExpressionVariadic or ExpressionFixedArity instead of directly from this class. +class ExpressionNary : public Expression { +public: + // virtuals from Expression + virtual boost::intrusive_ptr<Expression> optimize(); + virtual Value serialize(bool explain) const; + virtual void addDependencies(DepsTracker* deps, std::vector<std::string>* path = NULL) const; - class ExpressionAbs final : public ExpressionFixedArity<ExpressionAbs, 1> { - Value evaluateInternal(Variables* vars) const final; - const char* getOpName() const final; - }; + /* + Add an operand to the n-ary expression. + @param pExpression the expression to add + */ + virtual void addOperand(const boost::intrusive_ptr<Expression>& pExpression); - class ExpressionAdd : public ExpressionVariadic<ExpressionAdd> { - public: - // virtuals from Expression - virtual Value evaluateInternal(Variables* vars) const; - virtual const char* getOpName() const; - virtual bool isAssociativeAndCommutative() const { return true; } - }; + // TODO split this into two functions + virtual bool isAssociativeAndCommutative() const { + return false; + } + /* + Get the name of the operator. + + @returns the name of the operator; this std::string belongs to the class + implementation, and should not be deleted + and should not + */ + virtual const char* getOpName() const = 0; + + /// Allow subclasses the opportunity to validate arguments at parse time. + virtual void validateArguments(const ExpressionVector& args) const {} + + static ExpressionVector parseArguments(BSONElement bsonExpr, const VariablesParseState& vps); + +protected: + ExpressionNary() {} + + ExpressionVector vpOperand; +}; + +/// Inherit from ExpressionVariadic or ExpressionFixedArity instead of directly from this class. +template <typename SubClass> +class ExpressionNaryBase : public ExpressionNary { +public: + static boost::intrusive_ptr<Expression> parse(BSONElement bsonExpr, + const VariablesParseState& vps) { + boost::intrusive_ptr<ExpressionNaryBase> expr = new SubClass(); + ExpressionVector args = parseArguments(bsonExpr, vps); + expr->validateArguments(args); + expr->vpOperand = args; + return expr; + } +}; + +/// Inherit from this class if your expression takes a variable number of arguments. +template <typename SubClass> +class ExpressionVariadic : public ExpressionNaryBase<SubClass> {}; + +/// Inherit from this class if your expression takes a fixed number of arguments. +template <typename SubClass, int NArgs> +class ExpressionFixedArity : public ExpressionNaryBase<SubClass> { +public: + virtual void validateArguments(const Expression::ExpressionVector& args) const { + uassert(16020, + mongoutils::str::stream() << "Expression " << this->getOpName() << " takes exactly " + << NArgs << " arguments. " << args.size() + << " were passed in.", + args.size() == NArgs); + } +}; - class ExpressionAllElementsTrue : public ExpressionFixedArity<ExpressionAllElementsTrue, 1> { - public: - // virtuals from ExpressionNary - virtual Value evaluateInternal(Variables* vars) const; - virtual const char* getOpName() const; - }; +class ExpressionAbs final : public ExpressionFixedArity<ExpressionAbs, 1> { + Value evaluateInternal(Variables* vars) const final; + const char* getOpName() const final; +}; - class ExpressionAnd : public ExpressionVariadic<ExpressionAnd> { - public: - // virtuals from Expression - virtual boost::intrusive_ptr<Expression> optimize(); - virtual Value evaluateInternal(Variables* vars) const; - virtual const char* getOpName() const; - virtual bool isAssociativeAndCommutative() const { return true; } - }; +class ExpressionAdd : public ExpressionVariadic<ExpressionAdd> { +public: + // virtuals from Expression + virtual Value evaluateInternal(Variables* vars) const; + virtual const char* getOpName() const; + virtual bool isAssociativeAndCommutative() const { + return true; + } +}; - class ExpressionAnyElementTrue : public ExpressionFixedArity<ExpressionAnyElementTrue, 1> { - public: - // virtuals from ExpressionNary - virtual Value evaluateInternal(Variables* vars) const; - virtual const char* getOpName() const; - }; +class ExpressionAllElementsTrue : public ExpressionFixedArity<ExpressionAllElementsTrue, 1> { +public: + // virtuals from ExpressionNary + virtual Value evaluateInternal(Variables* vars) const; + virtual const char* getOpName() const; +}; - class ExpressionArrayElemAt final : public ExpressionFixedArity<ExpressionArrayElemAt, 2> { - public: - Value evaluateInternal(Variables* vars) const final; - const char* getOpName() const final; - }; +class ExpressionAnd : public ExpressionVariadic<ExpressionAnd> { +public: + // virtuals from Expression + virtual boost::intrusive_ptr<Expression> optimize(); + virtual Value evaluateInternal(Variables* vars) const; + virtual const char* getOpName() const; + virtual bool isAssociativeAndCommutative() const { + return true; + } +}; - class ExpressionCoerceToBool : public Expression { - public: - // virtuals from ExpressionNary - virtual boost::intrusive_ptr<Expression> optimize(); - virtual void addDependencies(DepsTracker* deps, std::vector<std::string>* path=NULL) const; - virtual Value evaluateInternal(Variables* vars) const; - virtual Value serialize(bool explain) const; - static boost::intrusive_ptr<ExpressionCoerceToBool> create( - const boost::intrusive_ptr<Expression> &pExpression); +class ExpressionAnyElementTrue : public ExpressionFixedArity<ExpressionAnyElementTrue, 1> { +public: + // virtuals from ExpressionNary + virtual Value evaluateInternal(Variables* vars) const; + virtual const char* getOpName() const; +}; - private: - ExpressionCoerceToBool(const boost::intrusive_ptr<Expression> &pExpression); +class ExpressionArrayElemAt final : public ExpressionFixedArity<ExpressionArrayElemAt, 2> { +public: + Value evaluateInternal(Variables* vars) const final; + const char* getOpName() const final; +}; - boost::intrusive_ptr<Expression> pExpression; - }; +class ExpressionCoerceToBool : public Expression { +public: + // virtuals from ExpressionNary + virtual boost::intrusive_ptr<Expression> optimize(); + virtual void addDependencies(DepsTracker* deps, std::vector<std::string>* path = NULL) const; + virtual Value evaluateInternal(Variables* vars) const; + virtual Value serialize(bool explain) const; - class ExpressionCompare : public ExpressionFixedArity<ExpressionCompare, 2> { - public: + static boost::intrusive_ptr<ExpressionCoerceToBool> create( + const boost::intrusive_ptr<Expression>& pExpression); - /** Enumeration of comparison operators. Any changes to these values require adjustment of - * the lookup table in the implementation. - */ - enum CmpOp { - EQ = 0, // return true for a == b, false otherwise - NE = 1, // return true for a != b, false otherwise - GT = 2, // return true for a > b, false otherwise - GTE = 3, // return true for a >= b, false otherwise - LT = 4, // return true for a < b, false otherwise - LTE = 5, // return true for a <= b, false otherwise - CMP = 6, // return -1, 0, 1 for a < b, a == b, a > b - }; - - // virtuals from ExpressionNary - virtual Value evaluateInternal(Variables* vars) const; - virtual const char* getOpName() const; - - static boost::intrusive_ptr<Expression> parse( - BSONElement bsonExpr, - const VariablesParseState& vps, - CmpOp cmpOp); - - ExpressionCompare(CmpOp cmpOp); - private: - CmpOp cmpOp; - }; +private: + ExpressionCoerceToBool(const boost::intrusive_ptr<Expression>& pExpression); + boost::intrusive_ptr<Expression> pExpression; +}; - class ExpressionConcat : public ExpressionVariadic<ExpressionConcat> { - public: - // virtuals from ExpressionNary - virtual Value evaluateInternal(Variables* vars) const; - virtual const char* getOpName() const; - }; - - class ExpressionConcatArrays final : public ExpressionVariadic<ExpressionConcatArrays> { - public: - Value evaluateInternal(Variables* vars) const final; - const char* getOpName() const final; +class ExpressionCompare : public ExpressionFixedArity<ExpressionCompare, 2> { +public: + /** Enumeration of comparison operators. Any changes to these values require adjustment of + * the lookup table in the implementation. + */ + enum CmpOp { + EQ = 0, // return true for a == b, false otherwise + NE = 1, // return true for a != b, false otherwise + GT = 2, // return true for a > b, false otherwise + GTE = 3, // return true for a >= b, false otherwise + LT = 4, // return true for a < b, false otherwise + LTE = 5, // return true for a <= b, false otherwise + CMP = 6, // return -1, 0, 1 for a < b, a == b, a > b }; + // virtuals from ExpressionNary + virtual Value evaluateInternal(Variables* vars) const; + virtual const char* getOpName() const; - class ExpressionCond : public ExpressionFixedArity<ExpressionCond, 3> { - typedef ExpressionFixedArity<ExpressionCond, 3> Base; - public: - // virtuals from ExpressionNary - virtual Value evaluateInternal(Variables* vars) const; - virtual const char* getOpName() const; + static boost::intrusive_ptr<Expression> parse(BSONElement bsonExpr, + const VariablesParseState& vps, + CmpOp cmpOp); - static boost::intrusive_ptr<Expression> parse( - BSONElement expr, - const VariablesParseState& vps); - }; + ExpressionCompare(CmpOp cmpOp); +private: + CmpOp cmpOp; +}; - class ExpressionConstant : public Expression { - public: - // virtuals from Expression - virtual boost::intrusive_ptr<Expression> optimize(); - virtual void addDependencies(DepsTracker* deps, std::vector<std::string>* path=NULL) const; - virtual Value evaluateInternal(Variables* vars) const; - virtual const char* getOpName() const; - virtual Value serialize(bool explain) const; - - static boost::intrusive_ptr<ExpressionConstant> create(const Value& pValue); - static boost::intrusive_ptr<Expression> parse( - BSONElement bsonExpr, - const VariablesParseState& vps); - - /* - Get the constant value represented by this Expression. - - @returns the value - */ - Value getValue() const; - private: - ExpressionConstant(const Value& pValue); +class ExpressionConcat : public ExpressionVariadic<ExpressionConcat> { +public: + // virtuals from ExpressionNary + virtual Value evaluateInternal(Variables* vars) const; + virtual const char* getOpName() const; +}; - Value pValue; - }; - class ExpressionDateToString : public Expression { - public: - // virtuals from Expression - virtual boost::intrusive_ptr<Expression> optimize(); - virtual Value serialize(bool explain) const; - virtual Value evaluateInternal(Variables* vars) const; - virtual void addDependencies(DepsTracker* deps, std::vector<std::string>* path=NULL) const; +class ExpressionConcatArrays final : public ExpressionVariadic<ExpressionConcatArrays> { +public: + Value evaluateInternal(Variables* vars) const final; + const char* getOpName() const final; +}; - static boost::intrusive_ptr<Expression> parse( - BSONElement expr, - const VariablesParseState& vps); - private: - ExpressionDateToString(const std::string& format, // the format string - boost::intrusive_ptr<Expression> date); // the date to format +class ExpressionCond : public ExpressionFixedArity<ExpressionCond, 3> { + typedef ExpressionFixedArity<ExpressionCond, 3> Base; - // Will uassert on invalid data - static void validateFormat(const std::string& format); +public: + // virtuals from ExpressionNary + virtual Value evaluateInternal(Variables* vars) const; + virtual const char* getOpName() const; - // Need raw date as tm doesn't have millisecond resolution. - // Format must be valid. - static std::string formatDate(const std::string& format, - const tm& tm, - const long long date); + static boost::intrusive_ptr<Expression> parse(BSONElement expr, const VariablesParseState& vps); +}; - static void insertPadded(StringBuilder& sb, int number, int spaces); - const std::string _format; - boost::intrusive_ptr<Expression> _date; - }; +class ExpressionConstant : public Expression { +public: + // virtuals from Expression + virtual boost::intrusive_ptr<Expression> optimize(); + virtual void addDependencies(DepsTracker* deps, std::vector<std::string>* path = NULL) const; + virtual Value evaluateInternal(Variables* vars) const; + virtual const char* getOpName() const; + virtual Value serialize(bool explain) const; - class ExpressionDayOfMonth : public ExpressionFixedArity<ExpressionDayOfMonth, 1> { - public: - // virtuals from ExpressionNary - virtual Value evaluateInternal(Variables* vars) const; - virtual const char* getOpName() const; + static boost::intrusive_ptr<ExpressionConstant> create(const Value& pValue); + static boost::intrusive_ptr<Expression> parse(BSONElement bsonExpr, + const VariablesParseState& vps); - static inline int extract(const tm& tm) { return tm.tm_mday; } - }; + /* + Get the constant value represented by this Expression. + @returns the value + */ + Value getValue() const; - class ExpressionDayOfWeek : public ExpressionFixedArity<ExpressionDayOfWeek, 1> { - public: - // virtuals from ExpressionNary - virtual Value evaluateInternal(Variables* vars) const; - virtual const char* getOpName() const; +private: + ExpressionConstant(const Value& pValue); - // MySQL uses 1-7, tm uses 0-6 - static inline int extract(const tm& tm) { return tm.tm_wday + 1; } - }; + Value pValue; +}; +class ExpressionDateToString : public Expression { +public: + // virtuals from Expression + virtual boost::intrusive_ptr<Expression> optimize(); + virtual Value serialize(bool explain) const; + virtual Value evaluateInternal(Variables* vars) const; + virtual void addDependencies(DepsTracker* deps, std::vector<std::string>* path = NULL) const; - class ExpressionDayOfYear : public ExpressionFixedArity<ExpressionDayOfYear, 1> { - public: - // virtuals from ExpressionNary - virtual Value evaluateInternal(Variables* vars) const; - virtual const char* getOpName() const; + static boost::intrusive_ptr<Expression> parse(BSONElement expr, const VariablesParseState& vps); - // MySQL uses 1-366, tm uses 0-365 - static inline int extract(const tm& tm) { return tm.tm_yday + 1; } - }; +private: + ExpressionDateToString(const std::string& format, // the format string + boost::intrusive_ptr<Expression> date); // the date to format + // Will uassert on invalid data + static void validateFormat(const std::string& format); - class ExpressionDivide : public ExpressionFixedArity<ExpressionDivide, 2> { - public: - // virtuals from ExpressionNary - virtual Value evaluateInternal(Variables* vars) const; - virtual const char* getOpName() const; - }; + // Need raw date as tm doesn't have millisecond resolution. + // Format must be valid. + static std::string formatDate(const std::string& format, const tm& tm, const long long date); + static void insertPadded(StringBuilder& sb, int number, int spaces); - class ExpressionFieldPath : public Expression { - public: - // virtuals from Expression - virtual boost::intrusive_ptr<Expression> optimize(); - virtual void addDependencies(DepsTracker* deps, std::vector<std::string>* path=NULL) const; - virtual Value evaluateInternal(Variables* vars) const; - virtual Value serialize(bool explain) const; + const std::string _format; + boost::intrusive_ptr<Expression> _date; +}; - /* - Create a field path expression using old semantics (rooted off of CURRENT). +class ExpressionDayOfMonth : public ExpressionFixedArity<ExpressionDayOfMonth, 1> { +public: + // virtuals from ExpressionNary + virtual Value evaluateInternal(Variables* vars) const; + virtual const char* getOpName() const; - // NOTE: this method is deprecated and only used by tests - // TODO remove this method in favor of parse() + static inline int extract(const tm& tm) { + return tm.tm_mday; + } +}; - Evaluation will extract the value associated with the given field - path from the source document. - @param fieldPath the field path string, without any leading document - indicator - @returns the newly created field path expression - */ - static boost::intrusive_ptr<ExpressionFieldPath> create(const std::string& fieldPath); +class ExpressionDayOfWeek : public ExpressionFixedArity<ExpressionDayOfWeek, 1> { +public: + // virtuals from ExpressionNary + virtual Value evaluateInternal(Variables* vars) const; + virtual const char* getOpName() const; - /// Like create(), but works with the raw std::string from the user with the "$" prefixes. - static boost::intrusive_ptr<ExpressionFieldPath> parse( - const std::string& raw, - const VariablesParseState& vps); + // MySQL uses 1-7, tm uses 0-6 + static inline int extract(const tm& tm) { + return tm.tm_wday + 1; + } +}; - const FieldPath& getFieldPath() const { return _fieldPath; } - private: - ExpressionFieldPath(const std::string& fieldPath, Variables::Id variable); +class ExpressionDayOfYear : public ExpressionFixedArity<ExpressionDayOfYear, 1> { +public: + // virtuals from ExpressionNary + virtual Value evaluateInternal(Variables* vars) const; + virtual const char* getOpName() const; - /* - Internal implementation of evaluateInternal(), used recursively. + // MySQL uses 1-366, tm uses 0-365 + static inline int extract(const tm& tm) { + return tm.tm_yday + 1; + } +}; - The internal implementation doesn't just use a loop because of - the possibility that we need to skip over an array. If the path - is "a.b.c", and a is an array, then we fan out from there, and - traverse "b.c" for each element of a:[...]. This requires that - a be an array of objects in order to navigate more deeply. - @param index current path field index to extract - @param input current document traversed to (not the top-level one) - @returns the field found; could be an array - */ - Value evaluatePath(size_t index, const Document& input) const; +class ExpressionDivide : public ExpressionFixedArity<ExpressionDivide, 2> { +public: + // virtuals from ExpressionNary + virtual Value evaluateInternal(Variables* vars) const; + virtual const char* getOpName() const; +}; - // Helper for evaluatePath to handle Array case - Value evaluatePathArray(size_t index, const Value& input) const; - const FieldPath _fieldPath; - const Variables::Id _variable; - }; +class ExpressionFieldPath : public Expression { +public: + // virtuals from Expression + virtual boost::intrusive_ptr<Expression> optimize(); + virtual void addDependencies(DepsTracker* deps, std::vector<std::string>* path = NULL) const; + virtual Value evaluateInternal(Variables* vars) const; + virtual Value serialize(bool explain) const; + /* + Create a field path expression using old semantics (rooted off of CURRENT). - class ExpressionFilter final : public Expression { - public: - // virtuals from Expression - boost::intrusive_ptr<Expression> optimize() final; - Value serialize(bool explain) const final; - Value evaluateInternal(Variables* vars) const final; - void addDependencies(DepsTracker* deps, - std::vector<std::string>* path=NULL) const final; + // NOTE: this method is deprecated and only used by tests + // TODO remove this method in favor of parse() - static boost::intrusive_ptr<Expression> parse( - BSONElement expr, - const VariablesParseState& vps); + Evaluation will extract the value associated with the given field + path from the source document. - private: - ExpressionFilter(std::string varName, - Variables::Id varId, - boost::intrusive_ptr<Expression> input, - boost::intrusive_ptr<Expression> filter); - - // The name of the variable to set to each element in the array. - std::string _varName; - // The id of the variable to set. - Variables::Id _varId; - // The array to iterate over. - boost::intrusive_ptr<Expression> _input; - // The expression determining whether each element should be present in the result array. - boost::intrusive_ptr<Expression> _filter; - }; - - - class ExpressionHour : public ExpressionFixedArity<ExpressionHour, 1> { - public: - // virtuals from ExpressionNary - virtual Value evaluateInternal(Variables* vars) const; - virtual const char* getOpName() const; - - static inline int extract(const tm& tm) { return tm.tm_hour; } - }; + @param fieldPath the field path string, without any leading document + indicator + @returns the newly created field path expression + */ + static boost::intrusive_ptr<ExpressionFieldPath> create(const std::string& fieldPath); + /// Like create(), but works with the raw std::string from the user with the "$" prefixes. + static boost::intrusive_ptr<ExpressionFieldPath> parse(const std::string& raw, + const VariablesParseState& vps); - class ExpressionIfNull : public ExpressionFixedArity<ExpressionIfNull, 2> { - public: - // virtuals from ExpressionNary - virtual Value evaluateInternal(Variables* vars) const; - virtual const char* getOpName() const; - }; + const FieldPath& getFieldPath() const { + return _fieldPath; + } +private: + ExpressionFieldPath(const std::string& fieldPath, Variables::Id variable); - class ExpressionLet : public Expression { - public: - // virtuals from Expression - virtual boost::intrusive_ptr<Expression> optimize(); - virtual Value serialize(bool explain) const; - virtual Value evaluateInternal(Variables* vars) const; - virtual void addDependencies(DepsTracker* deps, std::vector<std::string>* path=NULL) const; + /* + Internal implementation of evaluateInternal(), used recursively. - static boost::intrusive_ptr<Expression> parse( - BSONElement expr, - const VariablesParseState& vps); + The internal implementation doesn't just use a loop because of + the possibility that we need to skip over an array. If the path + is "a.b.c", and a is an array, then we fan out from there, and + traverse "b.c" for each element of a:[...]. This requires that + a be an array of objects in order to navigate more deeply. - struct NameAndExpression { - NameAndExpression() {} - NameAndExpression(std::string name, boost::intrusive_ptr<Expression> expression) - : name(name) - , expression(expression) - {} + @param index current path field index to extract + @param input current document traversed to (not the top-level one) + @returns the field found; could be an array + */ + Value evaluatePath(size_t index, const Document& input) const; + + // Helper for evaluatePath to handle Array case + Value evaluatePathArray(size_t index, const Value& input) const; + + const FieldPath _fieldPath; + const Variables::Id _variable; +}; + + +class ExpressionFilter final : public Expression { +public: + // virtuals from Expression + boost::intrusive_ptr<Expression> optimize() final; + Value serialize(bool explain) const final; + Value evaluateInternal(Variables* vars) const final; + void addDependencies(DepsTracker* deps, std::vector<std::string>* path = NULL) const final; + + static boost::intrusive_ptr<Expression> parse(BSONElement expr, const VariablesParseState& vps); + +private: + ExpressionFilter(std::string varName, + Variables::Id varId, + boost::intrusive_ptr<Expression> input, + boost::intrusive_ptr<Expression> filter); + + // The name of the variable to set to each element in the array. + std::string _varName; + // The id of the variable to set. + Variables::Id _varId; + // The array to iterate over. + boost::intrusive_ptr<Expression> _input; + // The expression determining whether each element should be present in the result array. + boost::intrusive_ptr<Expression> _filter; +}; + + +class ExpressionHour : public ExpressionFixedArity<ExpressionHour, 1> { +public: + // virtuals from ExpressionNary + virtual Value evaluateInternal(Variables* vars) const; + virtual const char* getOpName() const; + + static inline int extract(const tm& tm) { + return tm.tm_hour; + } +}; - std::string name; - boost::intrusive_ptr<Expression> expression; - }; - typedef std::map<Variables::Id, NameAndExpression> VariableMap; +class ExpressionIfNull : public ExpressionFixedArity<ExpressionIfNull, 2> { +public: + // virtuals from ExpressionNary + virtual Value evaluateInternal(Variables* vars) const; + virtual const char* getOpName() const; +}; - private: - ExpressionLet(const VariableMap& vars, - boost::intrusive_ptr<Expression> subExpression); - VariableMap _variables; - boost::intrusive_ptr<Expression> _subExpression; - }; +class ExpressionLet : public Expression { +public: + // virtuals from Expression + virtual boost::intrusive_ptr<Expression> optimize(); + virtual Value serialize(bool explain) const; + virtual Value evaluateInternal(Variables* vars) const; + virtual void addDependencies(DepsTracker* deps, std::vector<std::string>* path = NULL) const; - class ExpressionMap : public Expression { - public: - // virtuals from Expression - virtual boost::intrusive_ptr<Expression> optimize(); - virtual Value serialize(bool explain) const; - virtual Value evaluateInternal(Variables* vars) const; - virtual void addDependencies(DepsTracker* deps, std::vector<std::string>* path=NULL) const; + static boost::intrusive_ptr<Expression> parse(BSONElement expr, const VariablesParseState& vps); - static boost::intrusive_ptr<Expression> parse( - BSONElement expr, - const VariablesParseState& vps); + struct NameAndExpression { + NameAndExpression() {} + NameAndExpression(std::string name, boost::intrusive_ptr<Expression> expression) + : name(name), expression(expression) {} - private: - ExpressionMap(const std::string& varName, // name of variable to set - Variables::Id varId, // id of variable to set - boost::intrusive_ptr<Expression> input, // yields array to iterate - boost::intrusive_ptr<Expression> each); // yields results to be added to output array - - std::string _varName; - Variables::Id _varId; - boost::intrusive_ptr<Expression> _input; - boost::intrusive_ptr<Expression> _each; + std::string name; + boost::intrusive_ptr<Expression> expression; }; - class ExpressionMeta : public Expression { - public: - // virtuals from Expression - virtual Value serialize(bool explain) const; - virtual Value evaluateInternal(Variables* vars) const; - virtual void addDependencies(DepsTracker* deps, std::vector<std::string>* path=NULL) const; - - static boost::intrusive_ptr<Expression> parse( - BSONElement expr, - const VariablesParseState& vps); - }; + typedef std::map<Variables::Id, NameAndExpression> VariableMap; + +private: + ExpressionLet(const VariableMap& vars, boost::intrusive_ptr<Expression> subExpression); + + VariableMap _variables; + boost::intrusive_ptr<Expression> _subExpression; +}; + +class ExpressionMap : public Expression { +public: + // virtuals from Expression + virtual boost::intrusive_ptr<Expression> optimize(); + virtual Value serialize(bool explain) const; + virtual Value evaluateInternal(Variables* vars) const; + virtual void addDependencies(DepsTracker* deps, std::vector<std::string>* path = NULL) const; + + static boost::intrusive_ptr<Expression> parse(BSONElement expr, const VariablesParseState& vps); + +private: + ExpressionMap( + const std::string& varName, // name of variable to set + Variables::Id varId, // id of variable to set + boost::intrusive_ptr<Expression> input, // yields array to iterate + boost::intrusive_ptr<Expression> each); // yields results to be added to output array + + std::string _varName; + Variables::Id _varId; + boost::intrusive_ptr<Expression> _input; + boost::intrusive_ptr<Expression> _each; +}; + +class ExpressionMeta : public Expression { +public: + // virtuals from Expression + virtual Value serialize(bool explain) const; + virtual Value evaluateInternal(Variables* vars) const; + virtual void addDependencies(DepsTracker* deps, std::vector<std::string>* path = NULL) const; + + static boost::intrusive_ptr<Expression> parse(BSONElement expr, const VariablesParseState& vps); +}; + +class ExpressionMillisecond : public ExpressionFixedArity<ExpressionMillisecond, 1> { +public: + // virtuals from ExpressionNary + virtual Value evaluateInternal(Variables* vars) const; + virtual const char* getOpName() const; + + static int extract(const long long date); +}; + + +class ExpressionMinute : public ExpressionFixedArity<ExpressionMinute, 1> { +public: + // virtuals from ExpressionNary + virtual Value evaluateInternal(Variables* vars) const; + virtual const char* getOpName() const; + + static int extract(const tm& tm) { + return tm.tm_min; + } +}; - class ExpressionMillisecond : public ExpressionFixedArity<ExpressionMillisecond, 1> { - public: - // virtuals from ExpressionNary - virtual Value evaluateInternal(Variables* vars) const; - virtual const char* getOpName() const; - static int extract(const long long date); - }; +class ExpressionMod : public ExpressionFixedArity<ExpressionMod, 2> { +public: + // virtuals from ExpressionNary + virtual Value evaluateInternal(Variables* vars) const; + virtual const char* getOpName() const; +}; - class ExpressionMinute : public ExpressionFixedArity<ExpressionMinute, 1> { - public: - // virtuals from ExpressionNary - virtual Value evaluateInternal(Variables* vars) const; - virtual const char* getOpName() const; +class ExpressionMultiply : public ExpressionVariadic<ExpressionMultiply> { +public: + // virtuals from Expression + virtual Value evaluateInternal(Variables* vars) const; + virtual const char* getOpName() const; + virtual bool isAssociativeAndCommutative() const { + return true; + } +}; - static int extract(const tm& tm) { return tm.tm_min; } - }; +class ExpressionMonth : public ExpressionFixedArity<ExpressionMonth, 1> { +public: + // virtuals from ExpressionNary + virtual Value evaluateInternal(Variables* vars) const; + virtual const char* getOpName() const; - class ExpressionMod : public ExpressionFixedArity<ExpressionMod, 2> { - public: - // virtuals from ExpressionNary - virtual Value evaluateInternal(Variables* vars) const; - virtual const char* getOpName() const; - }; - + // MySQL uses 1-12, tm uses 0-11 + static inline int extract(const tm& tm) { + return tm.tm_mon + 1; + } +}; - class ExpressionMultiply : public ExpressionVariadic<ExpressionMultiply> { - public: - // virtuals from Expression - virtual Value evaluateInternal(Variables* vars) const; - virtual const char* getOpName() const; - virtual bool isAssociativeAndCommutative() const { return true; } - }; +class ExpressionNot : public ExpressionFixedArity<ExpressionNot, 1> { +public: + // virtuals from ExpressionNary + virtual Value evaluateInternal(Variables* vars) const; + virtual const char* getOpName() const; +}; - class ExpressionMonth : public ExpressionFixedArity<ExpressionMonth, 1> { - public: - // virtuals from ExpressionNary - virtual Value evaluateInternal(Variables* vars) const; - virtual const char* getOpName() const; - // MySQL uses 1-12, tm uses 0-11 - static inline int extract(const tm& tm) { return tm.tm_mon + 1; } - }; +class ExpressionObject : public Expression { +public: + // virtuals from Expression + virtual boost::intrusive_ptr<Expression> optimize(); + virtual bool isSimple(); + virtual void addDependencies(DepsTracker* deps, std::vector<std::string>* path = NULL) const; + /** Only evaluates non inclusion expressions. For inclusions, use addToDocument(). */ + virtual Value evaluateInternal(Variables* vars) const; + virtual Value serialize(bool explain) const; + /// like evaluate(), but return a Document instead of a Value-wrapped Document. + Document evaluateDocument(Variables* vars) const; - class ExpressionNot : public ExpressionFixedArity<ExpressionNot, 1> { - public: - // virtuals from ExpressionNary - virtual Value evaluateInternal(Variables* vars) const; - virtual const char* getOpName() const; - }; + /** Evaluates with inclusions and adds results to passed in Mutable document + * + * @param output the MutableDocument to add the evaluated expressions to + * @param currentDoc the input Document for this level (for inclusions) + * @param vars the variables for use in subexpressions + */ + void addToDocument(MutableDocument& ouput, const Document& currentDoc, Variables* vars) const; + // estimated number of fields that will be output + size_t getSizeHint() const; - class ExpressionObject : public Expression { - public: - // virtuals from Expression - virtual boost::intrusive_ptr<Expression> optimize(); - virtual bool isSimple(); - virtual void addDependencies(DepsTracker* deps, std::vector<std::string>* path=NULL) const; - /** Only evaluates non inclusion expressions. For inclusions, use addToDocument(). */ - virtual Value evaluateInternal(Variables* vars) const; - virtual Value serialize(bool explain) const; - - /// like evaluate(), but return a Document instead of a Value-wrapped Document. - Document evaluateDocument(Variables* vars) const; - - /** Evaluates with inclusions and adds results to passed in Mutable document - * - * @param output the MutableDocument to add the evaluated expressions to - * @param currentDoc the input Document for this level (for inclusions) - * @param vars the variables for use in subexpressions - */ - void addToDocument(MutableDocument& ouput, - const Document& currentDoc, - Variables* vars - ) const; + /** Create an empty expression. + * Until fields are added, this will evaluate to an empty document. + */ + static boost::intrusive_ptr<ExpressionObject> create(); - // estimated number of fields that will be output - size_t getSizeHint() const; + /// Like create but uses special handling of _id for root object of $project. + static boost::intrusive_ptr<ExpressionObject> createRoot(); - /** Create an empty expression. - * Until fields are added, this will evaluate to an empty document. - */ - static boost::intrusive_ptr<ExpressionObject> create(); + /* + Add a field to the document expression. - /// Like create but uses special handling of _id for root object of $project. - static boost::intrusive_ptr<ExpressionObject> createRoot(); + @param fieldPath the path the evaluated expression will have in the + result Document + @param pExpression the expression to evaluate obtain this field's + Value in the result Document + */ + void addField(const FieldPath& fieldPath, const boost::intrusive_ptr<Expression>& pExpression); - /* - Add a field to the document expression. + /* + Add a field path to the set of those to be included. - @param fieldPath the path the evaluated expression will have in the - result Document - @param pExpression the expression to evaluate obtain this field's - Value in the result Document - */ - void addField(const FieldPath &fieldPath, - const boost::intrusive_ptr<Expression> &pExpression); + Note that including a nested field implies including everything on + the path leading down to it. - /* - Add a field path to the set of those to be included. + @param fieldPath the name of the field to be included + */ + void includePath(const std::string& fieldPath); - Note that including a nested field implies including everything on - the path leading down to it. + /* + Get a count of the added fields. - @param fieldPath the name of the field to be included - */ - void includePath(const std::string &fieldPath); + @returns how many fields have been added + */ + size_t getFieldCount() const; - /* - Get a count of the added fields. + /* + Specialized BSON conversion that allows for writing out a + $project specification. This creates a standalone object, which must + be added to a containing object with a name - @returns how many fields have been added - */ - size_t getFieldCount() const; + @param pBuilder where to write the object to + @param requireExpression see Expression::addToBsonObj + */ + void documentToBson(BSONObjBuilder* pBuilder, bool requireExpression) const; - /* - Specialized BSON conversion that allows for writing out a - $project specification. This creates a standalone object, which must - be added to a containing object with a name + /* + Visitor abstraction used by emitPaths(). Each path is recorded by + calling path(). + */ + class PathSink { + public: + virtual ~PathSink(){}; - @param pBuilder where to write the object to - @param requireExpression see Expression::addToBsonObj - */ - void documentToBson(BSONObjBuilder *pBuilder, - bool requireExpression) const; + /** + Record a path. - /* - Visitor abstraction used by emitPaths(). Each path is recorded by - calling path(). + @param path the dotted path string + @param include if true, the path is included; if false, the path + is excluded */ - class PathSink { - public: - virtual ~PathSink() {}; - - /** - Record a path. + virtual void path(const std::string& path, bool include) = 0; + }; - @param path the dotted path string - @param include if true, the path is included; if false, the path - is excluded - */ - virtual void path(const std::string &path, bool include) = 0; - }; + void excludeId(bool b) { + _excludeId = b; + } - void excludeId(bool b) { _excludeId = b; } +private: + ExpressionObject(bool atRoot); - private: - ExpressionObject(bool atRoot); + // Mapping from fieldname to the Expression that generates its value. + // NULL expression means inclusion from source document. + typedef std::map<std::string, boost::intrusive_ptr<Expression>> FieldMap; + FieldMap _expressions; - // Mapping from fieldname to the Expression that generates its value. - // NULL expression means inclusion from source document. - typedef std::map<std::string, boost::intrusive_ptr<Expression> > FieldMap; - FieldMap _expressions; + // this is used to maintain order for generated fields not in the source document + std::vector<std::string> _order; - // this is used to maintain order for generated fields not in the source document - std::vector<std::string> _order; + bool _excludeId; + bool _atRoot; +}; - bool _excludeId; - bool _atRoot; - }; +class ExpressionOr : public ExpressionVariadic<ExpressionOr> { +public: + // virtuals from Expression + virtual boost::intrusive_ptr<Expression> optimize(); + virtual Value evaluateInternal(Variables* vars) const; + virtual const char* getOpName() const; + virtual bool isAssociativeAndCommutative() const { + return true; + } +}; - class ExpressionOr : public ExpressionVariadic<ExpressionOr> { - public: - // virtuals from Expression - virtual boost::intrusive_ptr<Expression> optimize(); - virtual Value evaluateInternal(Variables* vars) const; - virtual const char* getOpName() const; - virtual bool isAssociativeAndCommutative() const { return true; } - }; +class ExpressionSecond : public ExpressionFixedArity<ExpressionSecond, 1> { +public: + // virtuals from ExpressionNary + virtual Value evaluateInternal(Variables* vars) const; + virtual const char* getOpName() const; - class ExpressionSecond : public ExpressionFixedArity<ExpressionSecond, 1> { - public: - // virtuals from ExpressionNary - virtual Value evaluateInternal(Variables* vars) const; - virtual const char* getOpName() const; + static inline int extract(const tm& tm) { + return tm.tm_sec; + } +}; - static inline int extract(const tm& tm) { return tm.tm_sec; } - }; +class ExpressionSetDifference : public ExpressionFixedArity<ExpressionSetDifference, 2> { +public: + // virtuals from ExpressionNary + virtual Value evaluateInternal(Variables* vars) const; + virtual const char* getOpName() const; +}; - class ExpressionSetDifference : public ExpressionFixedArity<ExpressionSetDifference, 2> { - public: - // virtuals from ExpressionNary - virtual Value evaluateInternal(Variables* vars) const; - virtual const char* getOpName() const; - }; +class ExpressionSetEquals : public ExpressionVariadic<ExpressionSetEquals> { +public: + // virtuals from ExpressionNary + virtual Value evaluateInternal(Variables* vars) const; + virtual const char* getOpName() const; + virtual void validateArguments(const ExpressionVector& args) const; +}; - class ExpressionSetEquals : public ExpressionVariadic<ExpressionSetEquals> { - public: - // virtuals from ExpressionNary - virtual Value evaluateInternal(Variables* vars) const; - virtual const char* getOpName() const; - virtual void validateArguments(const ExpressionVector& args) const; - }; +class ExpressionSetIntersection : public ExpressionVariadic<ExpressionSetIntersection> { +public: + // virtuals from ExpressionNary + virtual Value evaluateInternal(Variables* vars) const; + virtual const char* getOpName() const; + virtual bool isAssociativeAndCommutative() const { + return true; + } +}; - class ExpressionSetIntersection : public ExpressionVariadic<ExpressionSetIntersection> { - public: - // virtuals from ExpressionNary - virtual Value evaluateInternal(Variables* vars) const; - virtual const char* getOpName() const; - virtual bool isAssociativeAndCommutative() const { return true; } - }; +class ExpressionSetIsSubset : public ExpressionFixedArity<ExpressionSetIsSubset, 2> { +public: + // virtuals from ExpressionNary + virtual boost::intrusive_ptr<Expression> optimize(); + virtual Value evaluateInternal(Variables* vars) const; + virtual const char* getOpName() const; - class ExpressionSetIsSubset : public ExpressionFixedArity<ExpressionSetIsSubset, 2> { - public: - // virtuals from ExpressionNary - virtual boost::intrusive_ptr<Expression> optimize(); - virtual Value evaluateInternal(Variables* vars) const; - virtual const char* getOpName() const; - private: - class Optimized; - }; +private: + class Optimized; +}; - class ExpressionSetUnion : public ExpressionVariadic<ExpressionSetUnion> { - public: - // virtuals from ExpressionNary - // virtual intrusive_ptr<Expression> optimize(); - virtual Value evaluateInternal(Variables* vars) const; - virtual const char* getOpName() const; - virtual bool isAssociativeAndCommutative() const { return true; } - }; +class ExpressionSetUnion : public ExpressionVariadic<ExpressionSetUnion> { +public: + // virtuals from ExpressionNary + // virtual intrusive_ptr<Expression> optimize(); + virtual Value evaluateInternal(Variables* vars) const; + virtual const char* getOpName() const; + virtual bool isAssociativeAndCommutative() const { + return true; + } +}; - class ExpressionIsArray : public ExpressionFixedArity<ExpressionIsArray, 1> { - public: - // virtuals from ExpressionNary - virtual Value evaluateInternal(Variables* vars) const; - virtual const char* getOpName() const; - }; +class ExpressionIsArray : public ExpressionFixedArity<ExpressionIsArray, 1> { +public: + // virtuals from ExpressionNary + virtual Value evaluateInternal(Variables* vars) const; + virtual const char* getOpName() const; +}; - class ExpressionSize : public ExpressionFixedArity<ExpressionSize, 1> { - public: - // virtuals from ExpressionNary - virtual Value evaluateInternal(Variables* vars) const; - virtual const char* getOpName() const; - }; +class ExpressionSize : public ExpressionFixedArity<ExpressionSize, 1> { +public: + // virtuals from ExpressionNary + virtual Value evaluateInternal(Variables* vars) const; + virtual const char* getOpName() const; +}; - class ExpressionStrcasecmp : public ExpressionFixedArity<ExpressionStrcasecmp, 2> { - public: - // virtuals from ExpressionNary - virtual Value evaluateInternal(Variables* vars) const; - virtual const char* getOpName() const; - }; +class ExpressionStrcasecmp : public ExpressionFixedArity<ExpressionStrcasecmp, 2> { +public: + // virtuals from ExpressionNary + virtual Value evaluateInternal(Variables* vars) const; + virtual const char* getOpName() const; +}; - class ExpressionSubstr : public ExpressionFixedArity<ExpressionSubstr, 3> { - public: - // virtuals from ExpressionNary - virtual Value evaluateInternal(Variables* vars) const; - virtual const char* getOpName() const; - }; +class ExpressionSubstr : public ExpressionFixedArity<ExpressionSubstr, 3> { +public: + // virtuals from ExpressionNary + virtual Value evaluateInternal(Variables* vars) const; + virtual const char* getOpName() const; +}; - class ExpressionSubtract : public ExpressionFixedArity<ExpressionSubtract, 2> { - public: - // virtuals from ExpressionNary - virtual Value evaluateInternal(Variables* vars) const; - virtual const char* getOpName() const; - }; +class ExpressionSubtract : public ExpressionFixedArity<ExpressionSubtract, 2> { +public: + // virtuals from ExpressionNary + virtual Value evaluateInternal(Variables* vars) const; + virtual const char* getOpName() const; +}; - class ExpressionToLower : public ExpressionFixedArity<ExpressionToLower, 1> { - public: - // virtuals from ExpressionNary - virtual Value evaluateInternal(Variables* vars) const; - virtual const char* getOpName() const; - }; +class ExpressionToLower : public ExpressionFixedArity<ExpressionToLower, 1> { +public: + // virtuals from ExpressionNary + virtual Value evaluateInternal(Variables* vars) const; + virtual const char* getOpName() const; +}; - class ExpressionToUpper : public ExpressionFixedArity<ExpressionToUpper, 1> { - public: - // virtuals from ExpressionNary - virtual Value evaluateInternal(Variables* vars) const; - virtual const char* getOpName() const; - }; +class ExpressionToUpper : public ExpressionFixedArity<ExpressionToUpper, 1> { +public: + // virtuals from ExpressionNary + virtual Value evaluateInternal(Variables* vars) const; + virtual const char* getOpName() const; +}; - class ExpressionWeek : public ExpressionFixedArity<ExpressionWeek, 1> { - public: - // virtuals from ExpressionNary - virtual Value evaluateInternal(Variables* vars) const; - virtual const char* getOpName() const; +class ExpressionWeek : public ExpressionFixedArity<ExpressionWeek, 1> { +public: + // virtuals from ExpressionNary + virtual Value evaluateInternal(Variables* vars) const; + virtual const char* getOpName() const; - static int extract(const tm& tm); - }; + static int extract(const tm& tm); +}; - class ExpressionYear : public ExpressionFixedArity<ExpressionYear, 1> { - public: - // virtuals from ExpressionNary - virtual Value evaluateInternal(Variables* vars) const; - virtual const char* getOpName() const; +class ExpressionYear : public ExpressionFixedArity<ExpressionYear, 1> { +public: + // virtuals from ExpressionNary + virtual Value evaluateInternal(Variables* vars) const; + virtual const char* getOpName() const; - // tm_year is years since 1990 - static int extract(const tm& tm) { return tm.tm_year + 1900; } - }; + // tm_year is years since 1990 + static int extract(const tm& tm) { + return tm.tm_year + 1900; + } +}; } @@ -1075,11 +1090,11 @@ namespace mongo { namespace mongo { - inline Value ExpressionConstant::getValue() const { - return pValue; - } +inline Value ExpressionConstant::getValue() const { + return pValue; +} - inline size_t ExpressionObject::getFieldCount() const { - return _expressions.size(); - } +inline size_t ExpressionObject::getFieldCount() const { + return _expressions.size(); +} } diff --git a/src/mongo/db/pipeline/expression_context.h b/src/mongo/db/pipeline/expression_context.h index 96cce9e4b4f..d4e18dcc582 100644 --- a/src/mongo/db/pipeline/expression_context.h +++ b/src/mongo/db/pipeline/expression_context.h @@ -36,34 +36,31 @@ namespace mongo { - struct ExpressionContext : public IntrusiveCounterUnsigned { - public: - ExpressionContext(OperationContext* opCtx, const NamespaceString& ns) - : ns(ns) - , opCtx(opCtx) - {} +struct ExpressionContext : public IntrusiveCounterUnsigned { +public: + ExpressionContext(OperationContext* opCtx, const NamespaceString& ns) : ns(ns), opCtx(opCtx) {} - /** Used by a pipeline to check for interrupts so that killOp() works. - * @throws if the operation has been interrupted - */ - void checkForInterrupt() { - if (opCtx && --interruptCounter == 0) { // XXX SERVER-13931 for opCtx check - // The checkForInterrupt could be expensive, at least in relative terms. - opCtx->checkForInterrupt(); - interruptCounter = kInterruptCheckPeriod; - } + /** Used by a pipeline to check for interrupts so that killOp() works. + * @throws if the operation has been interrupted + */ + void checkForInterrupt() { + if (opCtx && --interruptCounter == 0) { // XXX SERVER-13931 for opCtx check + // The checkForInterrupt could be expensive, at least in relative terms. + opCtx->checkForInterrupt(); + interruptCounter = kInterruptCheckPeriod; } + } - bool inShard = false; - bool inRouter = false; - bool extSortAllowed = false; - bool bypassDocumentValidation = false; + bool inShard = false; + bool inRouter = false; + bool extSortAllowed = false; + bool bypassDocumentValidation = false; - NamespaceString ns; - std::string tempDir; // Defaults to empty to prevent external sorting in mongos. + NamespaceString ns; + std::string tempDir; // Defaults to empty to prevent external sorting in mongos. - OperationContext* opCtx; - static const int kInterruptCheckPeriod = 128; - int interruptCounter = kInterruptCheckPeriod; // when 0, check interruptStatus - }; + OperationContext* opCtx; + static const int kInterruptCheckPeriod = 128; + int interruptCounter = kInterruptCheckPeriod; // when 0, check interruptStatus +}; } diff --git a/src/mongo/db/pipeline/field_path.cpp b/src/mongo/db/pipeline/field_path.cpp index 49115178a02..c2c934de41a 100644 --- a/src/mongo/db/pipeline/field_path.cpp +++ b/src/mongo/db/pipeline/field_path.cpp @@ -34,87 +34,85 @@ namespace mongo { - using std::ostream; - using std::string; - using std::stringstream; - using std::vector; +using std::ostream; +using std::string; +using std::stringstream; +using std::vector; - using namespace mongoutils; +using namespace mongoutils; - const char FieldPath::prefix[] = "$"; +const char FieldPath::prefix[] = "$"; - FieldPath::FieldPath(const vector<string>& fieldPath) { - massert(16409, "FieldPath cannot be constructed from an empty vector.", !fieldPath.empty()); - vFieldName.reserve(fieldPath.size()); - for(vector<string>::const_iterator i = fieldPath.begin(); i != fieldPath.end(); ++i) { - pushFieldName(*i); - } - verify(getPathLength() > 0); +FieldPath::FieldPath(const vector<string>& fieldPath) { + massert(16409, "FieldPath cannot be constructed from an empty vector.", !fieldPath.empty()); + vFieldName.reserve(fieldPath.size()); + for (vector<string>::const_iterator i = fieldPath.begin(); i != fieldPath.end(); ++i) { + pushFieldName(*i); } + verify(getPathLength() > 0); +} - FieldPath::FieldPath(const string& fieldPath) { - /* - The field path could be using dot notation. - Break the field path up by peeling off successive pieces. - */ - size_t startpos = 0; - while(true) { - /* find the next dot */ - const size_t dotpos = fieldPath.find('.', startpos); - - /* if there are no more dots, use the remainder of the string */ - if (dotpos == fieldPath.npos) { - string lastFieldName = fieldPath.substr(startpos, dotpos); - pushFieldName(lastFieldName); - break; - } - - /* use the string up to the dot */ - const size_t length = dotpos - startpos; - string nextFieldName = fieldPath.substr(startpos, length); - pushFieldName(nextFieldName); - - /* next time, search starting one spot after that */ - startpos = dotpos + 1; +FieldPath::FieldPath(const string& fieldPath) { + /* + The field path could be using dot notation. + Break the field path up by peeling off successive pieces. + */ + size_t startpos = 0; + while (true) { + /* find the next dot */ + const size_t dotpos = fieldPath.find('.', startpos); + + /* if there are no more dots, use the remainder of the string */ + if (dotpos == fieldPath.npos) { + string lastFieldName = fieldPath.substr(startpos, dotpos); + pushFieldName(lastFieldName); + break; } - verify(getPathLength() > 0); - } - string FieldPath::getPath(bool fieldPrefix) const { - stringstream ss; - writePath(ss, fieldPrefix); - return ss.str(); + /* use the string up to the dot */ + const size_t length = dotpos - startpos; + string nextFieldName = fieldPath.substr(startpos, length); + pushFieldName(nextFieldName); + + /* next time, search starting one spot after that */ + startpos = dotpos + 1; } + verify(getPathLength() > 0); +} - void FieldPath::writePath(ostream &outStream, bool fieldPrefix) const { - if (fieldPrefix) - outStream << prefix; +string FieldPath::getPath(bool fieldPrefix) const { + stringstream ss; + writePath(ss, fieldPrefix); + return ss.str(); +} - const size_t n = vFieldName.size(); +void FieldPath::writePath(ostream& outStream, bool fieldPrefix) const { + if (fieldPrefix) + outStream << prefix; - verify(n > 0); - outStream << vFieldName[0]; - for(size_t i = 1; i < n; ++i) - outStream << '.' << vFieldName[i]; - } + const size_t n = vFieldName.size(); - FieldPath FieldPath::tail() const { - vector<string> allButFirst(vFieldName.begin()+1, vFieldName.end()); - return FieldPath(allButFirst); - } + verify(n > 0); + outStream << vFieldName[0]; + for (size_t i = 1; i < n; ++i) + outStream << '.' << vFieldName[i]; +} - void FieldPath::uassertValidFieldName(const string& fieldName) { - uassert(15998, "FieldPath field names may not be empty strings.", fieldName.length() > 0); - uassert(16410, "FieldPath field names may not start with '$'.", fieldName[0] != '$'); - uassert(16411, "FieldPath field names may not contain '\0'.", - fieldName.find('\0') == string::npos); - uassert(16412, "FieldPath field names may not contain '.'.", - !str::contains(fieldName, '.')); - } +FieldPath FieldPath::tail() const { + vector<string> allButFirst(vFieldName.begin() + 1, vFieldName.end()); + return FieldPath(allButFirst); +} - void FieldPath::pushFieldName(const string& fieldName) { - uassertValidFieldName(fieldName); - vFieldName.push_back(fieldName); - } +void FieldPath::uassertValidFieldName(const string& fieldName) { + uassert(15998, "FieldPath field names may not be empty strings.", fieldName.length() > 0); + uassert(16410, "FieldPath field names may not start with '$'.", fieldName[0] != '$'); + uassert( + 16411, "FieldPath field names may not contain '\0'.", fieldName.find('\0') == string::npos); + uassert(16412, "FieldPath field names may not contain '.'.", !str::contains(fieldName, '.')); +} +void FieldPath::pushFieldName(const string& fieldName) { + uassertValidFieldName(fieldName); + vFieldName.push_back(fieldName); +} } diff --git a/src/mongo/db/pipeline/field_path.h b/src/mongo/db/pipeline/field_path.h index f400ca421ce..84dca905fdb 100644 --- a/src/mongo/db/pipeline/field_path.h +++ b/src/mongo/db/pipeline/field_path.h @@ -33,77 +33,76 @@ namespace mongo { - class FieldPath { - public: - - /** - * Constructor. - * - * @param fieldPath the dotted field path std::string or non empty pre-split vector. - * The constructed object will have getPathLength() > 0. - * Uassert if any component field names do not pass validation. - */ - FieldPath(const std::string& fieldPath); - FieldPath(const std::vector<std::string>& fieldPath); - - /** - Get the number of path elements in the field path. - - @returns the number of path elements - */ - size_t getPathLength() const; - - /** - Get a particular path element from the path. - - @param i the zero based index of the path element. - @returns the path element - */ - const std::string& getFieldName(size_t i) const; - - /** - Get the full path. - - @param fieldPrefix whether or not to include the field prefix - @returns the complete field path - */ - std::string getPath(bool fieldPrefix) const; - - /** - Write the full path. - - @param outStream where to write the path to - @param fieldPrefix whether or not to include the field prefix - */ - void writePath(std::ostream &outStream, bool fieldPrefix) const; - - /** - Get the prefix string. - - @returns the prefix string - */ - static const char *getPrefix(); - - static const char prefix[]; - - /** - * A FieldPath like this but missing the first element (useful for recursion). - * Precondition getPathLength() > 1. - */ - FieldPath tail() const; - - private: - /** Uassert if a field name does not pass validation. */ - static void uassertValidFieldName(const std::string& fieldName); - - /** - * Push a new field name to the back of the vector of names comprising the field path. - * Uassert if 'fieldName' does not pass validation. - */ - void pushFieldName(const std::string& fieldName); - - std::vector<std::string> vFieldName; - }; +class FieldPath { +public: + /** + * Constructor. + * + * @param fieldPath the dotted field path std::string or non empty pre-split vector. + * The constructed object will have getPathLength() > 0. + * Uassert if any component field names do not pass validation. + */ + FieldPath(const std::string& fieldPath); + FieldPath(const std::vector<std::string>& fieldPath); + + /** + Get the number of path elements in the field path. + + @returns the number of path elements + */ + size_t getPathLength() const; + + /** + Get a particular path element from the path. + + @param i the zero based index of the path element. + @returns the path element + */ + const std::string& getFieldName(size_t i) const; + + /** + Get the full path. + + @param fieldPrefix whether or not to include the field prefix + @returns the complete field path + */ + std::string getPath(bool fieldPrefix) const; + + /** + Write the full path. + + @param outStream where to write the path to + @param fieldPrefix whether or not to include the field prefix + */ + void writePath(std::ostream& outStream, bool fieldPrefix) const; + + /** + Get the prefix string. + + @returns the prefix string + */ + static const char* getPrefix(); + + static const char prefix[]; + + /** + * A FieldPath like this but missing the first element (useful for recursion). + * Precondition getPathLength() > 1. + */ + FieldPath tail() const; + +private: + /** Uassert if a field name does not pass validation. */ + static void uassertValidFieldName(const std::string& fieldName); + + /** + * Push a new field name to the back of the vector of names comprising the field path. + * Uassert if 'fieldName' does not pass validation. + */ + void pushFieldName(const std::string& fieldName); + + std::vector<std::string> vFieldName; +}; } @@ -111,18 +110,16 @@ namespace mongo { namespace mongo { - inline size_t FieldPath::getPathLength() const { - return vFieldName.size(); - } - - inline const std::string& FieldPath::getFieldName(size_t i) const { - dassert(i < getPathLength()); - return vFieldName[i]; - } - - inline const char *FieldPath::getPrefix() { - return prefix; - } +inline size_t FieldPath::getPathLength() const { + return vFieldName.size(); +} +inline const std::string& FieldPath::getFieldName(size_t i) const { + dassert(i < getPathLength()); + return vFieldName[i]; } +inline const char* FieldPath::getPrefix() { + return prefix; +} +} diff --git a/src/mongo/db/pipeline/field_path_test.cpp b/src/mongo/db/pipeline/field_path_test.cpp index 052d2321248..63c0216a76d 100644 --- a/src/mongo/db/pipeline/field_path_test.cpp +++ b/src/mongo/db/pipeline/field_path_test.cpp @@ -34,220 +34,219 @@ #include "mongo/dbtests/dbtests.h" namespace mongo { - using std::string; - using std::vector; - - /** FieldPath constructed from empty string. */ - class Empty { - public: - void run() { - ASSERT_THROWS( FieldPath path( "" ), UserException ); - } - }; - - /** FieldPath constructed from empty vector. */ - class EmptyVector { - public: - void run() { - vector<string> vec; - ASSERT_THROWS( FieldPath path( vec ), MsgAssertionException ); - } - }; - - /** FieldPath constructed from a simple string (without dots). */ - class Simple { - public: - void run() { - FieldPath path( "foo" ); - ASSERT_EQUALS( 1U, path.getPathLength() ); - ASSERT_EQUALS( "foo", path.getFieldName( 0 ) ); - ASSERT_EQUALS( "foo", path.getPath( false ) ); - ASSERT_EQUALS( "$foo", path.getPath( true ) ); - } - }; - - /** FieldPath constructed from a single element vector. */ - class SimpleVector { - public: - void run() { - vector<string> vec( 1, "foo" ); - FieldPath path( vec ); - ASSERT_EQUALS( 1U, path.getPathLength() ); - ASSERT_EQUALS( "foo", path.getFieldName( 0 ) ); - ASSERT_EQUALS( "foo", path.getPath( false ) ); - } - }; - - /** FieldPath consisting of a '$' character. */ - class DollarSign { - public: - void run() { - ASSERT_THROWS( FieldPath path( "$" ), UserException ); - } - }; - - /** FieldPath with a '$' prefix. */ - class DollarSignPrefix { - public: - void run() { - ASSERT_THROWS( FieldPath path( "$a" ), UserException ); - } - }; - - /** FieldPath constructed from a string with one dot. */ - class Dotted { - public: - void run() { - FieldPath path( "foo.bar" ); - ASSERT_EQUALS( 2U, path.getPathLength() ); - ASSERT_EQUALS( "foo", path.getFieldName( 0 ) ); - ASSERT_EQUALS( "bar", path.getFieldName( 1 ) ); - ASSERT_EQUALS( "foo.bar", path.getPath( false ) ); - ASSERT_EQUALS( "$foo.bar", path.getPath( true ) ); - } - }; - - /** FieldPath constructed from a single element vector containing a dot. */ - class VectorWithDot { - public: - void run() { - vector<string> vec( 1, "fo.o" ); - ASSERT_THROWS( FieldPath path( vec ), UserException ); - } - }; - - /** FieldPath constructed from a two element vector. */ - class TwoFieldVector { - public: - void run() { - vector<string> vec; - vec.push_back( "foo" ); - vec.push_back( "bar" ); - FieldPath path( vec ); - ASSERT_EQUALS( 2U, path.getPathLength() ); - ASSERT_EQUALS( "foo.bar", path.getPath( false ) ); - } - }; - - /** FieldPath with a '$' prefix in the second field. */ - class DollarSignPrefixSecondField { - public: - void run() { - ASSERT_THROWS( FieldPath path( "a.$b" ), UserException ); - } - }; - - /** FieldPath constructed from a string with two dots. */ - class TwoDotted { - public: - void run() { - FieldPath path( "foo.bar.baz" ); - ASSERT_EQUALS( 3U, path.getPathLength() ); - ASSERT_EQUALS( "foo", path.getFieldName( 0 ) ); - ASSERT_EQUALS( "bar", path.getFieldName( 1 ) ); - ASSERT_EQUALS( "baz", path.getFieldName( 2 ) ); - ASSERT_EQUALS( "foo.bar.baz", path.getPath( false ) ); - } - }; - - /** FieldPath constructed from a string ending in a dot. */ - class TerminalDot { - public: - void run() { - ASSERT_THROWS( FieldPath path( "foo." ), UserException ); - } - }; - - /** FieldPath constructed from a string beginning with a dot. */ - class PrefixDot { - public: - void run() { - ASSERT_THROWS( FieldPath path( ".foo" ), UserException ); - } - }; - - /** FieldPath constructed from a string with adjacent dots. */ - class AdjacentDots { - public: - void run() { - ASSERT_THROWS( FieldPath path( "foo..bar" ), UserException ); - } - }; - - /** FieldPath constructed from a string with one letter between two dots. */ - class LetterBetweenDots { - public: - void run() { - FieldPath path( "foo.a.bar" ); - ASSERT_EQUALS( 3U, path.getPathLength() ); - ASSERT_EQUALS( "foo.a.bar", path.getPath( false ) ); - } - }; - - /** FieldPath containing a null character. */ - class NullCharacter { - public: - void run() { - ASSERT_THROWS( FieldPath path( string( "foo.b\0r", 7 ) ), UserException ); - } - }; - - /** FieldPath constructed with a vector containing a null character. */ - class VectorNullCharacter { - public: - void run() { - vector<string> vec; - vec.push_back( "foo" ); - vec.push_back( string( "b\0r", 3 ) ); - ASSERT_THROWS( FieldPath path( vec ), UserException ); - } - }; - - /** Tail of a FieldPath. */ - class Tail { - public: - void run() { - FieldPath path = FieldPath( "foo.bar" ).tail(); - ASSERT_EQUALS( 1U, path.getPathLength() ); - ASSERT_EQUALS( "bar", path.getPath( false ) ); - } - }; - - /** Tail of a FieldPath with three fields. */ - class TailThreeFields { - public: - void run() { - FieldPath path = FieldPath( "foo.bar.baz" ).tail(); - ASSERT_EQUALS( 2U, path.getPathLength() ); - ASSERT_EQUALS( "bar.baz", path.getPath( false ) ); - } - }; - - class All : public Suite { - public: - All() : Suite( "field_path" ) { - } - void setupTests() { - add<Empty>(); - add<EmptyVector>(); - add<Simple>(); - add<SimpleVector>(); - add<DollarSign>(); - add<DollarSignPrefix>(); - add<Dotted>(); - add<VectorWithDot>(); - add<TwoFieldVector>(); - add<DollarSignPrefixSecondField>(); - add<TwoDotted>(); - add<TerminalDot>(); - add<PrefixDot>(); - add<AdjacentDots>(); - add<LetterBetweenDots>(); - add<NullCharacter>(); - add<VectorNullCharacter>(); - add<Tail>(); - add<TailThreeFields>(); - } - }; - SuiteInstance<All> myall; -} // namespace mongo +using std::string; +using std::vector; + +/** FieldPath constructed from empty string. */ +class Empty { +public: + void run() { + ASSERT_THROWS(FieldPath path(""), UserException); + } +}; + +/** FieldPath constructed from empty vector. */ +class EmptyVector { +public: + void run() { + vector<string> vec; + ASSERT_THROWS(FieldPath path(vec), MsgAssertionException); + } +}; + +/** FieldPath constructed from a simple string (without dots). */ +class Simple { +public: + void run() { + FieldPath path("foo"); + ASSERT_EQUALS(1U, path.getPathLength()); + ASSERT_EQUALS("foo", path.getFieldName(0)); + ASSERT_EQUALS("foo", path.getPath(false)); + ASSERT_EQUALS("$foo", path.getPath(true)); + } +}; + +/** FieldPath constructed from a single element vector. */ +class SimpleVector { +public: + void run() { + vector<string> vec(1, "foo"); + FieldPath path(vec); + ASSERT_EQUALS(1U, path.getPathLength()); + ASSERT_EQUALS("foo", path.getFieldName(0)); + ASSERT_EQUALS("foo", path.getPath(false)); + } +}; + +/** FieldPath consisting of a '$' character. */ +class DollarSign { +public: + void run() { + ASSERT_THROWS(FieldPath path("$"), UserException); + } +}; + +/** FieldPath with a '$' prefix. */ +class DollarSignPrefix { +public: + void run() { + ASSERT_THROWS(FieldPath path("$a"), UserException); + } +}; + +/** FieldPath constructed from a string with one dot. */ +class Dotted { +public: + void run() { + FieldPath path("foo.bar"); + ASSERT_EQUALS(2U, path.getPathLength()); + ASSERT_EQUALS("foo", path.getFieldName(0)); + ASSERT_EQUALS("bar", path.getFieldName(1)); + ASSERT_EQUALS("foo.bar", path.getPath(false)); + ASSERT_EQUALS("$foo.bar", path.getPath(true)); + } +}; + +/** FieldPath constructed from a single element vector containing a dot. */ +class VectorWithDot { +public: + void run() { + vector<string> vec(1, "fo.o"); + ASSERT_THROWS(FieldPath path(vec), UserException); + } +}; + +/** FieldPath constructed from a two element vector. */ +class TwoFieldVector { +public: + void run() { + vector<string> vec; + vec.push_back("foo"); + vec.push_back("bar"); + FieldPath path(vec); + ASSERT_EQUALS(2U, path.getPathLength()); + ASSERT_EQUALS("foo.bar", path.getPath(false)); + } +}; + +/** FieldPath with a '$' prefix in the second field. */ +class DollarSignPrefixSecondField { +public: + void run() { + ASSERT_THROWS(FieldPath path("a.$b"), UserException); + } +}; + +/** FieldPath constructed from a string with two dots. */ +class TwoDotted { +public: + void run() { + FieldPath path("foo.bar.baz"); + ASSERT_EQUALS(3U, path.getPathLength()); + ASSERT_EQUALS("foo", path.getFieldName(0)); + ASSERT_EQUALS("bar", path.getFieldName(1)); + ASSERT_EQUALS("baz", path.getFieldName(2)); + ASSERT_EQUALS("foo.bar.baz", path.getPath(false)); + } +}; + +/** FieldPath constructed from a string ending in a dot. */ +class TerminalDot { +public: + void run() { + ASSERT_THROWS(FieldPath path("foo."), UserException); + } +}; + +/** FieldPath constructed from a string beginning with a dot. */ +class PrefixDot { +public: + void run() { + ASSERT_THROWS(FieldPath path(".foo"), UserException); + } +}; + +/** FieldPath constructed from a string with adjacent dots. */ +class AdjacentDots { +public: + void run() { + ASSERT_THROWS(FieldPath path("foo..bar"), UserException); + } +}; + +/** FieldPath constructed from a string with one letter between two dots. */ +class LetterBetweenDots { +public: + void run() { + FieldPath path("foo.a.bar"); + ASSERT_EQUALS(3U, path.getPathLength()); + ASSERT_EQUALS("foo.a.bar", path.getPath(false)); + } +}; + +/** FieldPath containing a null character. */ +class NullCharacter { +public: + void run() { + ASSERT_THROWS(FieldPath path(string("foo.b\0r", 7)), UserException); + } +}; + +/** FieldPath constructed with a vector containing a null character. */ +class VectorNullCharacter { +public: + void run() { + vector<string> vec; + vec.push_back("foo"); + vec.push_back(string("b\0r", 3)); + ASSERT_THROWS(FieldPath path(vec), UserException); + } +}; + +/** Tail of a FieldPath. */ +class Tail { +public: + void run() { + FieldPath path = FieldPath("foo.bar").tail(); + ASSERT_EQUALS(1U, path.getPathLength()); + ASSERT_EQUALS("bar", path.getPath(false)); + } +}; + +/** Tail of a FieldPath with three fields. */ +class TailThreeFields { +public: + void run() { + FieldPath path = FieldPath("foo.bar.baz").tail(); + ASSERT_EQUALS(2U, path.getPathLength()); + ASSERT_EQUALS("bar.baz", path.getPath(false)); + } +}; + +class All : public Suite { +public: + All() : Suite("field_path") {} + void setupTests() { + add<Empty>(); + add<EmptyVector>(); + add<Simple>(); + add<SimpleVector>(); + add<DollarSign>(); + add<DollarSignPrefix>(); + add<Dotted>(); + add<VectorWithDot>(); + add<TwoFieldVector>(); + add<DollarSignPrefixSecondField>(); + add<TwoDotted>(); + add<TerminalDot>(); + add<PrefixDot>(); + add<AdjacentDots>(); + add<LetterBetweenDots>(); + add<NullCharacter>(); + add<VectorNullCharacter>(); + add<Tail>(); + add<TailThreeFields>(); + } +}; +SuiteInstance<All> myall; +} // namespace mongo diff --git a/src/mongo/db/pipeline/pipeline.cpp b/src/mongo/db/pipeline/pipeline.cpp index bba6bf9615f..9e9427190f1 100644 --- a/src/mongo/db/pipeline/pipeline.cpp +++ b/src/mongo/db/pipeline/pipeline.cpp @@ -46,597 +46,572 @@ namespace mongo { - using boost::intrusive_ptr; - using std::endl; - using std::ostringstream; - using std::string; - using std::vector; - - const char Pipeline::commandName[] = "aggregate"; - const char Pipeline::pipelineName[] = "pipeline"; - const char Pipeline::explainName[] = "explain"; - const char Pipeline::fromRouterName[] = "fromRouter"; - const char Pipeline::serverPipelineName[] = "serverPipeline"; - const char Pipeline::mongosPipelineName[] = "mongosPipeline"; - - Pipeline::Pipeline(const intrusive_ptr<ExpressionContext> &pTheCtx): - explain(false), - pCtx(pTheCtx) { - } - - - /* this structure is used to make a lookup table of operators */ - struct StageDesc { - const char *pName; - intrusive_ptr<DocumentSource> (*pFactory)( - BSONElement, const intrusive_ptr<ExpressionContext> &); - }; - - /* this table must be in alphabetical order by name for bsearch() */ - static const StageDesc stageDesc[] = { - {DocumentSourceGeoNear::geoNearName, - DocumentSourceGeoNear::createFromBson}, - {DocumentSourceGroup::groupName, - DocumentSourceGroup::createFromBson}, - {DocumentSourceLimit::limitName, - DocumentSourceLimit::createFromBson}, - {DocumentSourceMatch::matchName, - DocumentSourceMatch::createFromBson}, - {DocumentSourceMergeCursors::name, - DocumentSourceMergeCursors::createFromBson}, - {DocumentSourceOut::outName, - DocumentSourceOut::createFromBson}, - {DocumentSourceProject::projectName, - DocumentSourceProject::createFromBson}, - {DocumentSourceRedact::redactName, - DocumentSourceRedact::createFromBson}, - {DocumentSourceSkip::skipName, - DocumentSourceSkip::createFromBson}, - {DocumentSourceSort::sortName, - DocumentSourceSort::createFromBson}, - {DocumentSourceUnwind::unwindName, - DocumentSourceUnwind::createFromBson}, - }; - static const size_t nStageDesc = sizeof(stageDesc) / sizeof(StageDesc); - - static int stageDescCmp(const void *pL, const void *pR) { - return strcmp(((const StageDesc *)pL)->pName, - ((const StageDesc *)pR)->pName); - } - - intrusive_ptr<Pipeline> Pipeline::parseCommand(string& errmsg, - const BSONObj& cmdObj, - const intrusive_ptr<ExpressionContext>& pCtx) { - intrusive_ptr<Pipeline> pPipeline(new Pipeline(pCtx)); - vector<BSONElement> pipeline; - - /* gather the specification for the aggregation */ - for(BSONObj::iterator cmdIterator = cmdObj.begin(); - cmdIterator.more(); ) { - BSONElement cmdElement(cmdIterator.next()); - const char *pFieldName = cmdElement.fieldName(); - - // ignore top-level fields prefixed with $. They are for the command processor, not us. - if (pFieldName[0] == '$') { - continue; - } - - // maxTimeMS is also for the command processor. - if (pFieldName == LiteParsedQuery::cmdOptionMaxTimeMS) { - continue; - } - - // ignore cursor options since they are handled externally. - if (str::equals(pFieldName, "cursor")) { - continue; - } +using boost::intrusive_ptr; +using std::endl; +using std::ostringstream; +using std::string; +using std::vector; + +const char Pipeline::commandName[] = "aggregate"; +const char Pipeline::pipelineName[] = "pipeline"; +const char Pipeline::explainName[] = "explain"; +const char Pipeline::fromRouterName[] = "fromRouter"; +const char Pipeline::serverPipelineName[] = "serverPipeline"; +const char Pipeline::mongosPipelineName[] = "mongosPipeline"; + +Pipeline::Pipeline(const intrusive_ptr<ExpressionContext>& pTheCtx) + : explain(false), pCtx(pTheCtx) {} + + +/* this structure is used to make a lookup table of operators */ +struct StageDesc { + const char* pName; + intrusive_ptr<DocumentSource>(*pFactory)(BSONElement, const intrusive_ptr<ExpressionContext>&); +}; + +/* this table must be in alphabetical order by name for bsearch() */ +static const StageDesc stageDesc[] = { + {DocumentSourceGeoNear::geoNearName, DocumentSourceGeoNear::createFromBson}, + {DocumentSourceGroup::groupName, DocumentSourceGroup::createFromBson}, + {DocumentSourceLimit::limitName, DocumentSourceLimit::createFromBson}, + {DocumentSourceMatch::matchName, DocumentSourceMatch::createFromBson}, + {DocumentSourceMergeCursors::name, DocumentSourceMergeCursors::createFromBson}, + {DocumentSourceOut::outName, DocumentSourceOut::createFromBson}, + {DocumentSourceProject::projectName, DocumentSourceProject::createFromBson}, + {DocumentSourceRedact::redactName, DocumentSourceRedact::createFromBson}, + {DocumentSourceSkip::skipName, DocumentSourceSkip::createFromBson}, + {DocumentSourceSort::sortName, DocumentSourceSort::createFromBson}, + {DocumentSourceUnwind::unwindName, DocumentSourceUnwind::createFromBson}, +}; +static const size_t nStageDesc = sizeof(stageDesc) / sizeof(StageDesc); + +static int stageDescCmp(const void* pL, const void* pR) { + return strcmp(((const StageDesc*)pL)->pName, ((const StageDesc*)pR)->pName); +} + +intrusive_ptr<Pipeline> Pipeline::parseCommand(string& errmsg, + const BSONObj& cmdObj, + const intrusive_ptr<ExpressionContext>& pCtx) { + intrusive_ptr<Pipeline> pPipeline(new Pipeline(pCtx)); + vector<BSONElement> pipeline; + + /* gather the specification for the aggregation */ + for (BSONObj::iterator cmdIterator = cmdObj.begin(); cmdIterator.more();) { + BSONElement cmdElement(cmdIterator.next()); + const char* pFieldName = cmdElement.fieldName(); + + // ignore top-level fields prefixed with $. They are for the command processor, not us. + if (pFieldName[0] == '$') { + continue; + } - /* look for the aggregation command */ - if (!strcmp(pFieldName, commandName)) { - continue; - } + // maxTimeMS is also for the command processor. + if (pFieldName == LiteParsedQuery::cmdOptionMaxTimeMS) { + continue; + } - /* check for the collection name */ - if (!strcmp(pFieldName, pipelineName)) { - pipeline = cmdElement.Array(); - continue; - } + // ignore cursor options since they are handled externally. + if (str::equals(pFieldName, "cursor")) { + continue; + } - /* check for explain option */ - if (!strcmp(pFieldName, explainName)) { - pPipeline->explain = cmdElement.Bool(); - continue; - } + /* look for the aggregation command */ + if (!strcmp(pFieldName, commandName)) { + continue; + } - /* if the request came from the router, we're in a shard */ - if (!strcmp(pFieldName, fromRouterName)) { - pCtx->inShard = cmdElement.Bool(); - continue; - } + /* check for the collection name */ + if (!strcmp(pFieldName, pipelineName)) { + pipeline = cmdElement.Array(); + continue; + } - if (str::equals(pFieldName, "allowDiskUse")) { - uassert(16949, - str::stream() << "allowDiskUse must be a bool, not a " - << typeName(cmdElement.type()), - cmdElement.type() == Bool); - pCtx->extSortAllowed = cmdElement.Bool(); - continue; - } + /* check for explain option */ + if (!strcmp(pFieldName, explainName)) { + pPipeline->explain = cmdElement.Bool(); + continue; + } - if (pFieldName == bypassDocumentValidationCommandOption()) { - pCtx->bypassDocumentValidation = cmdElement.trueValue(); - continue; - } + /* if the request came from the router, we're in a shard */ + if (!strcmp(pFieldName, fromRouterName)) { + pCtx->inShard = cmdElement.Bool(); + continue; + } - /* we didn't recognize a field in the command */ - ostringstream sb; - sb << "unrecognized field '" << cmdElement.fieldName() << "'"; - errmsg = sb.str(); - return intrusive_ptr<Pipeline>(); + if (str::equals(pFieldName, "allowDiskUse")) { + uassert(16949, + str::stream() << "allowDiskUse must be a bool, not a " + << typeName(cmdElement.type()), + cmdElement.type() == Bool); + pCtx->extSortAllowed = cmdElement.Bool(); + continue; } - /* - If we get here, we've harvested the fields we expect for a pipeline. - - Set up the specified document source pipeline. - */ - SourceContainer& sources = pPipeline->sources; // shorthand - - /* iterate over the steps in the pipeline */ - const size_t nSteps = pipeline.size(); - for(size_t iStep = 0; iStep < nSteps; ++iStep) { - /* pull out the pipeline element as an object */ - BSONElement pipeElement(pipeline[iStep]); - uassert(15942, str::stream() << "pipeline element " << - iStep << " is not an object", - pipeElement.type() == Object); - BSONObj bsonObj(pipeElement.Obj()); - - // Parse a pipeline stage from 'bsonObj'. - uassert(16435, "A pipeline stage specification object must contain exactly one field.", - bsonObj.nFields() == 1); - BSONElement stageSpec = bsonObj.firstElement(); - const char* stageName = stageSpec.fieldName(); - - // Create a DocumentSource pipeline stage from 'stageSpec'. - StageDesc key; - key.pName = stageName; - const StageDesc* pDesc = (const StageDesc*) - bsearch(&key, stageDesc, nStageDesc, sizeof(StageDesc), - stageDescCmp); - - uassert(16436, - str::stream() << "Unrecognized pipeline stage name: '" << stageName << "'", - pDesc); - intrusive_ptr<DocumentSource> stage = pDesc->pFactory(stageSpec, pCtx); - verify(stage); - sources.push_back(stage); - - // TODO find a good general way to check stages that must be first syntactically - - if (dynamic_cast<DocumentSourceOut*>(stage.get())) { - uassert(16991, "$out can only be the final stage in the pipeline", - iStep == nSteps - 1); - } + if (pFieldName == bypassDocumentValidationCommandOption()) { + pCtx->bypassDocumentValidation = cmdElement.trueValue(); + continue; } - // The order in which optimizations are applied can have significant impact on the - // efficiency of the final pipeline. Be Careful! - Optimizations::Local::moveMatchBeforeSort(pPipeline.get()); - Optimizations::Local::moveSkipAndLimitBeforeProject(pPipeline.get()); - Optimizations::Local::moveLimitBeforeSkip(pPipeline.get()); - Optimizations::Local::coalesceAdjacent(pPipeline.get()); - Optimizations::Local::optimizeEachDocumentSource(pPipeline.get()); - Optimizations::Local::duplicateMatchBeforeInitalRedact(pPipeline.get()); + /* we didn't recognize a field in the command */ + ostringstream sb; + sb << "unrecognized field '" << cmdElement.fieldName() << "'"; + errmsg = sb.str(); + return intrusive_ptr<Pipeline>(); + } - return pPipeline; + /* + If we get here, we've harvested the fields we expect for a pipeline. + + Set up the specified document source pipeline. + */ + SourceContainer& sources = pPipeline->sources; // shorthand + + /* iterate over the steps in the pipeline */ + const size_t nSteps = pipeline.size(); + for (size_t iStep = 0; iStep < nSteps; ++iStep) { + /* pull out the pipeline element as an object */ + BSONElement pipeElement(pipeline[iStep]); + uassert(15942, + str::stream() << "pipeline element " << iStep << " is not an object", + pipeElement.type() == Object); + BSONObj bsonObj(pipeElement.Obj()); + + // Parse a pipeline stage from 'bsonObj'. + uassert(16435, + "A pipeline stage specification object must contain exactly one field.", + bsonObj.nFields() == 1); + BSONElement stageSpec = bsonObj.firstElement(); + const char* stageName = stageSpec.fieldName(); + + // Create a DocumentSource pipeline stage from 'stageSpec'. + StageDesc key; + key.pName = stageName; + const StageDesc* pDesc = + (const StageDesc*)bsearch(&key, stageDesc, nStageDesc, sizeof(StageDesc), stageDescCmp); + + uassert(16436, + str::stream() << "Unrecognized pipeline stage name: '" << stageName << "'", + pDesc); + intrusive_ptr<DocumentSource> stage = pDesc->pFactory(stageSpec, pCtx); + verify(stage); + sources.push_back(stage); + + // TODO find a good general way to check stages that must be first syntactically + + if (dynamic_cast<DocumentSourceOut*>(stage.get())) { + uassert(16991, "$out can only be the final stage in the pipeline", iStep == nSteps - 1); + } } - void Pipeline::Optimizations::Local::moveMatchBeforeSort(Pipeline* pipeline) { - // TODO Keep moving matches across multiple sorts as moveLimitBeforeSkip does below. - // TODO Check sort for limit. Not an issue currently due to order optimizations are applied, - // but should be fixed. - SourceContainer& sources = pipeline->sources; - for (size_t srcn = sources.size(), srci = 1; srci < srcn; ++srci) { - intrusive_ptr<DocumentSource> &pSource = sources[srci]; - DocumentSourceMatch* match = dynamic_cast<DocumentSourceMatch *>(pSource.get()); - if (match && !match->isTextQuery()) { - intrusive_ptr<DocumentSource> &pPrevious = sources[srci - 1]; - if (dynamic_cast<DocumentSourceSort *>(pPrevious.get())) { - /* swap this item with the previous */ - intrusive_ptr<DocumentSource> pTemp(pPrevious); - pPrevious = pSource; - pSource = pTemp; - } + // The order in which optimizations are applied can have significant impact on the + // efficiency of the final pipeline. Be Careful! + Optimizations::Local::moveMatchBeforeSort(pPipeline.get()); + Optimizations::Local::moveSkipAndLimitBeforeProject(pPipeline.get()); + Optimizations::Local::moveLimitBeforeSkip(pPipeline.get()); + Optimizations::Local::coalesceAdjacent(pPipeline.get()); + Optimizations::Local::optimizeEachDocumentSource(pPipeline.get()); + Optimizations::Local::duplicateMatchBeforeInitalRedact(pPipeline.get()); + + return pPipeline; +} + +void Pipeline::Optimizations::Local::moveMatchBeforeSort(Pipeline* pipeline) { + // TODO Keep moving matches across multiple sorts as moveLimitBeforeSkip does below. + // TODO Check sort for limit. Not an issue currently due to order optimizations are applied, + // but should be fixed. + SourceContainer& sources = pipeline->sources; + for (size_t srcn = sources.size(), srci = 1; srci < srcn; ++srci) { + intrusive_ptr<DocumentSource>& pSource = sources[srci]; + DocumentSourceMatch* match = dynamic_cast<DocumentSourceMatch*>(pSource.get()); + if (match && !match->isTextQuery()) { + intrusive_ptr<DocumentSource>& pPrevious = sources[srci - 1]; + if (dynamic_cast<DocumentSourceSort*>(pPrevious.get())) { + /* swap this item with the previous */ + intrusive_ptr<DocumentSource> pTemp(pPrevious); + pPrevious = pSource; + pSource = pTemp; } } } - - void Pipeline::Optimizations::Local::moveSkipAndLimitBeforeProject(Pipeline* pipeline) { - SourceContainer& sources = pipeline->sources; - if (sources.empty()) return; - - for (int i = sources.size() - 1; i >= 1 /* not looking at 0 */; i--) { - // This optimization only applies when a $project comes before a $skip or $limit. - auto project = dynamic_cast<DocumentSourceProject*>(sources[i-1].get()); - if (!project) continue; - - auto skip = dynamic_cast<DocumentSourceSkip*>(sources[i].get()); - auto limit = dynamic_cast<DocumentSourceLimit*>(sources[i].get()); - if (!(skip || limit)) continue; - - swap(sources[i], sources[i-1]); - - // Start at back again. This is needed to handle cases with more than 1 $skip or - // $limit (S means skip, L means limit, P means project) +} + +void Pipeline::Optimizations::Local::moveSkipAndLimitBeforeProject(Pipeline* pipeline) { + SourceContainer& sources = pipeline->sources; + if (sources.empty()) + return; + + for (int i = sources.size() - 1; i >= 1 /* not looking at 0 */; i--) { + // This optimization only applies when a $project comes before a $skip or $limit. + auto project = dynamic_cast<DocumentSourceProject*>(sources[i - 1].get()); + if (!project) + continue; + + auto skip = dynamic_cast<DocumentSourceSkip*>(sources[i].get()); + auto limit = dynamic_cast<DocumentSourceLimit*>(sources[i].get()); + if (!(skip || limit)) + continue; + + swap(sources[i], sources[i - 1]); + + // Start at back again. This is needed to handle cases with more than 1 $skip or + // $limit (S means skip, L means limit, P means project) + // + // These would work without second pass (assuming back to front ordering) + // PS -> SP + // PL -> LP + // PPL -> LPP + // PPS -> SPP + // + // The following cases need a second pass to handle the second skip or limit + // PLL -> LLP + // PPLL -> LLPP + // PLPL -> LLPP + i = sources.size(); // decremented before next pass + } +} + +void Pipeline::Optimizations::Local::moveLimitBeforeSkip(Pipeline* pipeline) { + SourceContainer& sources = pipeline->sources; + if (sources.empty()) + return; + + for (int i = sources.size() - 1; i >= 1 /* not looking at 0 */; i--) { + DocumentSourceLimit* limit = dynamic_cast<DocumentSourceLimit*>(sources[i].get()); + DocumentSourceSkip* skip = dynamic_cast<DocumentSourceSkip*>(sources[i - 1].get()); + if (limit && skip) { + // Increase limit by skip since the skipped docs now pass through the $limit + limit->setLimit(limit->getLimit() + skip->getSkip()); + swap(sources[i], sources[i - 1]); + + // Start at back again. This is needed to handle cases with more than 1 $limit + // (S means skip, L means limit) // - // These would work without second pass (assuming back to front ordering) - // PS -> SP - // PL -> LP - // PPL -> LPP - // PPS -> SPP + // These two would work without second pass (assuming back to front ordering) + // SL -> LS + // SSL -> LSS // - // The following cases need a second pass to handle the second skip or limit - // PLL -> LLP - // PPLL -> LLPP - // PLPL -> LLPP - i = sources.size(); // decremented before next pass + // The following cases need a second pass to handle the second limit + // SLL -> LLS + // SSLL -> LLSS + // SLSL -> LLSS + i = sources.size(); // decremented before next pass } } - - void Pipeline::Optimizations::Local::moveLimitBeforeSkip(Pipeline* pipeline) { - SourceContainer& sources = pipeline->sources; - if (sources.empty()) - return; - - for(int i = sources.size() - 1; i >= 1 /* not looking at 0 */; i--) { - DocumentSourceLimit* limit = - dynamic_cast<DocumentSourceLimit*>(sources[i].get()); - DocumentSourceSkip* skip = - dynamic_cast<DocumentSourceSkip*>(sources[i-1].get()); - if (limit && skip) { - // Increase limit by skip since the skipped docs now pass through the $limit - limit->setLimit(limit->getLimit() + skip->getSkip()); - swap(sources[i], sources[i-1]); - - // Start at back again. This is needed to handle cases with more than 1 $limit - // (S means skip, L means limit) - // - // These two would work without second pass (assuming back to front ordering) - // SL -> LS - // SSL -> LSS - // - // The following cases need a second pass to handle the second limit - // SLL -> LLS - // SSLL -> LLSS - // SLSL -> LLSS - i = sources.size(); // decremented before next pass - } - } +} + +void Pipeline::Optimizations::Local::coalesceAdjacent(Pipeline* pipeline) { + SourceContainer& sources = pipeline->sources; + if (sources.empty()) + return; + + // move all sources to a temporary list + SourceContainer tempSources; + sources.swap(tempSources); + + // move the first one to the final list + sources.push_back(tempSources[0]); + + // run through the sources, coalescing them or keeping them + for (size_t tempn = tempSources.size(), tempi = 1; tempi < tempn; ++tempi) { + // If we can't coalesce the source with the last, then move it + // to the final list, and make it the new last. (If we succeeded, + // then we're still on the same last, and there's no need to move + // or do anything with the source -- the destruction of tempSources + // will take care of the rest.) + intrusive_ptr<DocumentSource>& pLastSource = sources.back(); + intrusive_ptr<DocumentSource>& pTemp = tempSources[tempi]; + verify(pTemp && pLastSource); + if (!pLastSource->coalesce(pTemp)) + sources.push_back(pTemp); } - - void Pipeline::Optimizations::Local::coalesceAdjacent(Pipeline* pipeline) { - SourceContainer& sources = pipeline->sources; - if (sources.empty()) - return; - - // move all sources to a temporary list - SourceContainer tempSources; - sources.swap(tempSources); - - // move the first one to the final list - sources.push_back(tempSources[0]); - - // run through the sources, coalescing them or keeping them - for (size_t tempn = tempSources.size(), tempi = 1; tempi < tempn; ++tempi) { - // If we can't coalesce the source with the last, then move it - // to the final list, and make it the new last. (If we succeeded, - // then we're still on the same last, and there's no need to move - // or do anything with the source -- the destruction of tempSources - // will take care of the rest.) - intrusive_ptr<DocumentSource> &pLastSource = sources.back(); - intrusive_ptr<DocumentSource> &pTemp = tempSources[tempi]; - verify(pTemp && pLastSource); - if (!pLastSource->coalesce(pTemp)) - sources.push_back(pTemp); +} + +void Pipeline::Optimizations::Local::optimizeEachDocumentSource(Pipeline* pipeline) { + SourceContainer& sources = pipeline->sources; + SourceContainer newSources; + for (SourceContainer::iterator it(sources.begin()); it != sources.end(); ++it) { + if (auto out = (*it)->optimize()) { + newSources.push_back(std::move(out)); } } - - void Pipeline::Optimizations::Local::optimizeEachDocumentSource(Pipeline* pipeline) { - SourceContainer& sources = pipeline->sources; - SourceContainer newSources; - for (SourceContainer::iterator it(sources.begin()); it != sources.end(); ++it) { - if (auto out = (*it)->optimize()) { - newSources.push_back(std::move(out)); + pipeline->sources = std::move(newSources); +} + +void Pipeline::Optimizations::Local::duplicateMatchBeforeInitalRedact(Pipeline* pipeline) { + SourceContainer& sources = pipeline->sources; + if (sources.size() >= 2 && dynamic_cast<DocumentSourceRedact*>(sources[0].get())) { + if (DocumentSourceMatch* match = dynamic_cast<DocumentSourceMatch*>(sources[1].get())) { + const BSONObj redactSafePortion = match->redactSafePortion(); + if (!redactSafePortion.isEmpty()) { + sources.push_front(DocumentSourceMatch::createFromBson( + BSON("$match" << redactSafePortion).firstElement(), pipeline->pCtx)); } } - pipeline->sources = std::move(newSources); } - - void Pipeline::Optimizations::Local::duplicateMatchBeforeInitalRedact(Pipeline* pipeline) { - SourceContainer& sources = pipeline->sources; - if (sources.size() >= 2 && dynamic_cast<DocumentSourceRedact*>(sources[0].get())) { - if (DocumentSourceMatch* match = dynamic_cast<DocumentSourceMatch*>(sources[1].get())) { - const BSONObj redactSafePortion = match->redactSafePortion(); - if (!redactSafePortion.isEmpty()) { - sources.push_front( - DocumentSourceMatch::createFromBson( - BSON("$match" << redactSafePortion).firstElement(), - pipeline->pCtx)); - } +} + +void Pipeline::addRequiredPrivileges(Command* commandTemplate, + const string& db, + BSONObj cmdObj, + vector<Privilege>* out) { + ResourcePattern inputResource(commandTemplate->parseResourcePattern(db, cmdObj)); + uassert(17138, + mongoutils::str::stream() << "Invalid input resource, " << inputResource.toString(), + inputResource.isExactNamespacePattern()); + + out->push_back(Privilege(inputResource, ActionType::find)); + + BSONObj pipeline = cmdObj.getObjectField("pipeline"); + BSONForEach(stageElem, pipeline) { + BSONObj stage = stageElem.embeddedObjectUserCheck(); + if (str::equals(stage.firstElementFieldName(), "$out")) { + NamespaceString outputNs(db, stage.firstElement().str()); + uassert(17139, + mongoutils::str::stream() << "Invalid $out target namespace, " << outputNs.ns(), + outputNs.isValid()); + + ActionSet actions; + actions.addAction(ActionType::remove); + actions.addAction(ActionType::insert); + if (shouldBypassDocumentValidationForCommand(cmdObj)) { + actions.addAction(ActionType::bypassDocumentValidation); } + + out->push_back(Privilege(ResourcePattern::forExactNamespace(outputNs), actions)); } } - - void Pipeline::addRequiredPrivileges(Command* commandTemplate, - const string& db, - BSONObj cmdObj, - vector<Privilege>* out) { - ResourcePattern inputResource(commandTemplate->parseResourcePattern(db, cmdObj)); - uassert(17138, - mongoutils::str::stream() << "Invalid input resource, " << inputResource.toString(), - inputResource.isExactNamespacePattern()); - - out->push_back(Privilege(inputResource, ActionType::find)); - - BSONObj pipeline = cmdObj.getObjectField("pipeline"); - BSONForEach(stageElem, pipeline) { - BSONObj stage = stageElem.embeddedObjectUserCheck(); - if (str::equals(stage.firstElementFieldName(), "$out")) { - NamespaceString outputNs(db, stage.firstElement().str()); - uassert(17139, - mongoutils::str::stream() << "Invalid $out target namespace, " << - outputNs.ns(), - outputNs.isValid()); - - ActionSet actions; - actions.addAction(ActionType::remove); - actions.addAction(ActionType::insert); - if (shouldBypassDocumentValidationForCommand(cmdObj)) { - actions.addAction(ActionType::bypassDocumentValidation); - } - - out->push_back(Privilege(ResourcePattern::forExactNamespace(outputNs), actions)); - } +} + +intrusive_ptr<Pipeline> Pipeline::splitForSharded() { + // Create and initialize the shard spec we'll return. We start with an empty pipeline on the + // shards and all work being done in the merger. Optimizations can move operations between + // the pipelines to be more efficient. + intrusive_ptr<Pipeline> shardPipeline(new Pipeline(pCtx)); + shardPipeline->explain = explain; + + // The order in which optimizations are applied can have significant impact on the + // efficiency of the final pipeline. Be Careful! + Optimizations::Sharded::findSplitPoint(shardPipeline.get(), this); + Optimizations::Sharded::moveFinalUnwindFromShardsToMerger(shardPipeline.get(), this); + Optimizations::Sharded::limitFieldsSentFromShardsToMerger(shardPipeline.get(), this); + + return shardPipeline; +} + +void Pipeline::Optimizations::Sharded::findSplitPoint(Pipeline* shardPipe, Pipeline* mergePipe) { + while (!mergePipe->sources.empty()) { + intrusive_ptr<DocumentSource> current = mergePipe->sources.front(); + mergePipe->sources.pop_front(); + + // Check if this source is splittable + SplittableDocumentSource* splittable = + dynamic_cast<SplittableDocumentSource*>(current.get()); + + if (!splittable) { + // move the source from the merger sources to the shard sources + shardPipe->sources.push_back(current); + } else { + // split this source into Merge and Shard sources + intrusive_ptr<DocumentSource> shardSource = splittable->getShardSource(); + intrusive_ptr<DocumentSource> mergeSource = splittable->getMergeSource(); + if (shardSource) + shardPipe->sources.push_back(shardSource); + if (mergeSource) + mergePipe->sources.push_front(mergeSource); + + break; } } +} + +void Pipeline::Optimizations::Sharded::moveFinalUnwindFromShardsToMerger(Pipeline* shardPipe, + Pipeline* mergePipe) { + while (!shardPipe->sources.empty() && + dynamic_cast<DocumentSourceUnwind*>(shardPipe->sources.back().get())) { + mergePipe->sources.push_front(shardPipe->sources.back()); + shardPipe->sources.pop_back(); + } +} + +void Pipeline::Optimizations::Sharded::limitFieldsSentFromShardsToMerger(Pipeline* shardPipe, + Pipeline* mergePipe) { + DepsTracker mergeDeps = mergePipe->getDependencies(shardPipe->getInitialQuery()); + if (mergeDeps.needWholeDocument) + return; // the merge needs all fields, so nothing we can do. + + // Empty project is "special" so if no fields are needed, we just ask for _id instead. + if (mergeDeps.fields.empty()) + mergeDeps.fields.insert("_id"); + + // Remove metadata from dependencies since it automatically flows through projection and we + // don't want to project it in to the document. + mergeDeps.needTextScore = false; + + // HEURISTIC: only apply optimization if none of the shard stages have an exhaustive list of + // field dependencies. While this may not be 100% ideal in all cases, it is simple and + // avoids the worst cases by ensuring that: + // 1) Optimization IS applied when the shards wouldn't have known their exhaustive list of + // dependencies. This situation can happen when a $sort is before the first $project or + // $group. Without the optimization, the shards would have to reify and transmit full + // objects even though only a subset of fields are needed. + // 2) Optimization IS NOT applied immediately following a $project or $group since it would + // add an unnecessary project (and therefore a deep-copy). + for (size_t i = 0; i < shardPipe->sources.size(); i++) { + DepsTracker dt; // ignored + if (shardPipe->sources[i]->getDependencies(&dt) & DocumentSource::EXHAUSTIVE_FIELDS) + return; + } - intrusive_ptr<Pipeline> Pipeline::splitForSharded() { - // Create and initialize the shard spec we'll return. We start with an empty pipeline on the - // shards and all work being done in the merger. Optimizations can move operations between - // the pipelines to be more efficient. - intrusive_ptr<Pipeline> shardPipeline(new Pipeline(pCtx)); - shardPipeline->explain = explain; - - // The order in which optimizations are applied can have significant impact on the - // efficiency of the final pipeline. Be Careful! - Optimizations::Sharded::findSplitPoint(shardPipeline.get(), this); - Optimizations::Sharded::moveFinalUnwindFromShardsToMerger(shardPipeline.get(), this); - Optimizations::Sharded::limitFieldsSentFromShardsToMerger(shardPipeline.get(), this); + // if we get here, add the project. + shardPipe->sources.push_back(DocumentSourceProject::createFromBson( + BSON("$project" << mergeDeps.toProjection()).firstElement(), shardPipe->pCtx)); +} - return shardPipeline; - } +BSONObj Pipeline::getInitialQuery() const { + if (sources.empty()) + return BSONObj(); - void Pipeline::Optimizations::Sharded::findSplitPoint(Pipeline* shardPipe, - Pipeline* mergePipe) { - while (!mergePipe->sources.empty()) { - intrusive_ptr<DocumentSource> current = mergePipe->sources.front(); - mergePipe->sources.pop_front(); + /* look for an initial $match */ + DocumentSourceMatch* match = dynamic_cast<DocumentSourceMatch*>(sources.front().get()); + if (!match) + return BSONObj(); - // Check if this source is splittable - SplittableDocumentSource* splittable = - dynamic_cast<SplittableDocumentSource*>(current.get()); + return match->getQuery(); +} - if (!splittable){ - // move the source from the merger sources to the shard sources - shardPipe->sources.push_back(current); - } - else { - // split this source into Merge and Shard sources - intrusive_ptr<DocumentSource> shardSource = splittable->getShardSource(); - intrusive_ptr<DocumentSource> mergeSource = splittable->getMergeSource(); - if (shardSource) shardPipe->sources.push_back(shardSource); - if (mergeSource) mergePipe->sources.push_front(mergeSource); - - break; - } - } +bool Pipeline::hasOutStage() const { + if (sources.empty()) { + return false; } - void Pipeline::Optimizations::Sharded::moveFinalUnwindFromShardsToMerger(Pipeline* shardPipe, - Pipeline* mergePipe) { - while (!shardPipe->sources.empty() - && dynamic_cast<DocumentSourceUnwind*>(shardPipe->sources.back().get())) { - mergePipe->sources.push_front(shardPipe->sources.back()); - shardPipe->sources.pop_back(); - } + // The $out stage must be the last one in the pipeline, so check if the last stage is $out. + return dynamic_cast<DocumentSourceOut*>(sources.back().get()); +} + +Document Pipeline::serialize() const { + MutableDocument serialized; + // create an array out of the pipeline operations + vector<Value> array; + for (SourceContainer::const_iterator iter(sources.begin()), listEnd(sources.end()); + iter != listEnd; + ++iter) { + intrusive_ptr<DocumentSource> pSource(*iter); + pSource->serializeToArray(array); } - void Pipeline::Optimizations::Sharded::limitFieldsSentFromShardsToMerger(Pipeline* shardPipe, - Pipeline* mergePipe) { - DepsTracker mergeDeps = mergePipe->getDependencies(shardPipe->getInitialQuery()); - if (mergeDeps.needWholeDocument) - return; // the merge needs all fields, so nothing we can do. - - // Empty project is "special" so if no fields are needed, we just ask for _id instead. - if (mergeDeps.fields.empty()) - mergeDeps.fields.insert("_id"); - - // Remove metadata from dependencies since it automatically flows through projection and we - // don't want to project it in to the document. - mergeDeps.needTextScore = false; - - // HEURISTIC: only apply optimization if none of the shard stages have an exhaustive list of - // field dependencies. While this may not be 100% ideal in all cases, it is simple and - // avoids the worst cases by ensuring that: - // 1) Optimization IS applied when the shards wouldn't have known their exhaustive list of - // dependencies. This situation can happen when a $sort is before the first $project or - // $group. Without the optimization, the shards would have to reify and transmit full - // objects even though only a subset of fields are needed. - // 2) Optimization IS NOT applied immediately following a $project or $group since it would - // add an unnecessary project (and therefore a deep-copy). - for (size_t i = 0; i < shardPipe->sources.size(); i++) { - DepsTracker dt; // ignored - if (shardPipe->sources[i]->getDependencies(&dt) & DocumentSource::EXHAUSTIVE_FIELDS) - return; - } + // add the top-level items to the command + serialized.setField(commandName, Value(pCtx->ns.coll())); + serialized.setField(pipelineName, Value(array)); - // if we get here, add the project. - shardPipe->sources.push_back( - DocumentSourceProject::createFromBson( - BSON("$project" << mergeDeps.toProjection()).firstElement(), - shardPipe->pCtx)); + if (explain) { + serialized.setField(explainName, Value(explain)); } - BSONObj Pipeline::getInitialQuery() const { - if (sources.empty()) - return BSONObj(); - - /* look for an initial $match */ - DocumentSourceMatch* match = dynamic_cast<DocumentSourceMatch*>(sources.front().get()); - if (!match) - return BSONObj(); - - return match->getQuery(); + if (pCtx->extSortAllowed) { + serialized.setField("allowDiskUse", Value(true)); } - bool Pipeline::hasOutStage() const { - if (sources.empty()) { - return false; - } - - // The $out stage must be the last one in the pipeline, so check if the last stage is $out. - return dynamic_cast<DocumentSourceOut*>(sources.back().get()); + if (pCtx->bypassDocumentValidation) { + serialized.setField(bypassDocumentValidationCommandOption(), Value(true)); } - Document Pipeline::serialize() const { - MutableDocument serialized; - // create an array out of the pipeline operations - vector<Value> array; - for(SourceContainer::const_iterator iter(sources.begin()), - listEnd(sources.end()); - iter != listEnd; - ++iter) { - intrusive_ptr<DocumentSource> pSource(*iter); - pSource->serializeToArray(array); - } - - // add the top-level items to the command - serialized.setField(commandName, Value(pCtx->ns.coll())); - serialized.setField(pipelineName, Value(array)); - - if (explain) { - serialized.setField(explainName, Value(explain)); - } - - if (pCtx->extSortAllowed) { - serialized.setField("allowDiskUse", Value(true)); - } + return serialized.freeze(); +} - if (pCtx->bypassDocumentValidation) { - serialized.setField(bypassDocumentValidationCommandOption(), Value(true)); - } +void Pipeline::stitch() { + massert(16600, "should not have an empty pipeline", !sources.empty()); - return serialized.freeze(); + /* chain together the sources we found */ + DocumentSource* prevSource = sources.front().get(); + for (SourceContainer::iterator iter(sources.begin() + 1), listEnd(sources.end()); + iter != listEnd; + ++iter) { + intrusive_ptr<DocumentSource> pTemp(*iter); + pTemp->setSource(prevSource); + prevSource = pTemp.get(); } - - void Pipeline::stitch() { - massert(16600, "should not have an empty pipeline", - !sources.empty()); - - /* chain together the sources we found */ - DocumentSource* prevSource = sources.front().get(); - for(SourceContainer::iterator iter(sources.begin() + 1), - listEnd(sources.end()); - iter != listEnd; - ++iter) { - intrusive_ptr<DocumentSource> pTemp(*iter); - pTemp->setSource(prevSource); - prevSource = pTemp.get(); - } +} + +void Pipeline::run(BSONObjBuilder& result) { + // should not get here in the explain case + verify(!explain); + + // the array in which the aggregation results reside + // cant use subArrayStart() due to error handling + BSONArrayBuilder resultArray; + DocumentSource* finalSource = sources.back().get(); + while (boost::optional<Document> next = finalSource->getNext()) { + // add the document to the result set + BSONObjBuilder documentBuilder(resultArray.subobjStart()); + next->toBson(&documentBuilder); + documentBuilder.doneFast(); + // object will be too large, assert. the extra 1KB is for headers + uassert(16389, + str::stream() << "aggregation result exceeds maximum document size (" + << BSONObjMaxUserSize / (1024 * 1024) << "MB)", + resultArray.len() < BSONObjMaxUserSize - 1024); } - void Pipeline::run(BSONObjBuilder& result) { - // should not get here in the explain case - verify(!explain); - - // the array in which the aggregation results reside - // cant use subArrayStart() due to error handling - BSONArrayBuilder resultArray; - DocumentSource* finalSource = sources.back().get(); - while (boost::optional<Document> next = finalSource->getNext()) { - // add the document to the result set - BSONObjBuilder documentBuilder (resultArray.subobjStart()); - next->toBson(&documentBuilder); - documentBuilder.doneFast(); - // object will be too large, assert. the extra 1KB is for headers - uassert(16389, - str::stream() << "aggregation result exceeds maximum document size (" - << BSONObjMaxUserSize / (1024 * 1024) << "MB)", - resultArray.len() < BSONObjMaxUserSize - 1024); - } + resultArray.done(); + result.appendArray("result", resultArray.arr()); +} - resultArray.done(); - result.appendArray("result", resultArray.arr()); +vector<Value> Pipeline::writeExplainOps() const { + vector<Value> array; + for (SourceContainer::const_iterator it = sources.begin(); it != sources.end(); ++it) { + (*it)->serializeToArray(array, /*explain=*/true); } - - vector<Value> Pipeline::writeExplainOps() const { - vector<Value> array; - for(SourceContainer::const_iterator it = sources.begin(); it != sources.end(); ++it) { - (*it)->serializeToArray(array, /*explain=*/true); + return array; +} + +void Pipeline::addInitialSource(intrusive_ptr<DocumentSource> source) { + sources.push_front(source); +} + +DepsTracker Pipeline::getDependencies(const BSONObj& initialQuery) const { + DepsTracker deps; + bool knowAllFields = false; + bool knowAllMeta = false; + for (size_t i = 0; i < sources.size() && !(knowAllFields && knowAllMeta); i++) { + DepsTracker localDeps; + DocumentSource::GetDepsReturn status = sources[i]->getDependencies(&localDeps); + + if (status == DocumentSource::NOT_SUPPORTED) { + // Assume this stage needs everything. We may still know something about our + // dependencies if an earlier stage returned either EXHAUSTIVE_FIELDS or + // EXHAUSTIVE_META. + break; } - return array; - } - void Pipeline::addInitialSource(intrusive_ptr<DocumentSource> source) { - sources.push_front(source); - } - - DepsTracker Pipeline::getDependencies(const BSONObj& initialQuery) const { - DepsTracker deps; - bool knowAllFields = false; - bool knowAllMeta = false; - for (size_t i=0; i < sources.size() && !(knowAllFields && knowAllMeta); i++) { - DepsTracker localDeps; - DocumentSource::GetDepsReturn status = sources[i]->getDependencies(&localDeps); - - if (status == DocumentSource::NOT_SUPPORTED) { - // Assume this stage needs everything. We may still know something about our - // dependencies if an earlier stage returned either EXHAUSTIVE_FIELDS or - // EXHAUSTIVE_META. - break; - } - - if (!knowAllFields) { - deps.fields.insert(localDeps.fields.begin(), localDeps.fields.end()); - if (localDeps.needWholeDocument) - deps.needWholeDocument = true; - knowAllFields = status & DocumentSource::EXHAUSTIVE_FIELDS; - } - - if (!knowAllMeta) { - if (localDeps.needTextScore) - deps.needTextScore = true; - - knowAllMeta = status & DocumentSource::EXHAUSTIVE_META; - } + if (!knowAllFields) { + deps.fields.insert(localDeps.fields.begin(), localDeps.fields.end()); + if (localDeps.needWholeDocument) + deps.needWholeDocument = true; + knowAllFields = status & DocumentSource::EXHAUSTIVE_FIELDS; } - if (!knowAllFields) - deps.needWholeDocument = true; // don't know all fields we need - - // NOTE This code assumes that textScore can only be generated by the initial query. - if (DocumentSourceMatch::isTextQuery(initialQuery)) { - // If doing a text query, assume we need the score if we can't prove we don't. - if (!knowAllMeta) + if (!knowAllMeta) { + if (localDeps.needTextScore) deps.needTextScore = true; + + knowAllMeta = status & DocumentSource::EXHAUSTIVE_META; } - else { - // If we aren't doing a text query, then we don't need to ask for the textScore since we - // know it will be missing anyway. - deps.needTextScore = false; - } + } - return deps; + if (!knowAllFields) + deps.needWholeDocument = true; // don't know all fields we need + + // NOTE This code assumes that textScore can only be generated by the initial query. + if (DocumentSourceMatch::isTextQuery(initialQuery)) { + // If doing a text query, assume we need the score if we can't prove we don't. + if (!knowAllMeta) + deps.needTextScore = true; + } else { + // If we aren't doing a text query, then we don't need to ask for the textScore since we + // know it will be missing anyway. + deps.needTextScore = false; } -} // namespace mongo + + return deps; +} +} // namespace mongo diff --git a/src/mongo/db/pipeline/pipeline.h b/src/mongo/db/pipeline/pipeline.h index c6274fbf17e..0eb9988d6af 100644 --- a/src/mongo/db/pipeline/pipeline.h +++ b/src/mongo/db/pipeline/pipeline.h @@ -37,149 +37,155 @@ #include "mongo/util/timer.h" namespace mongo { - class BSONObj; - class BSONObjBuilder; - class Command; - struct DepsTracker; - class DocumentSource; - struct ExpressionContext; - class Privilege; - - /** mongodb "commands" (sent via db.$cmd.findOne(...)) - subclass to make a command. define a singleton object for it. - */ - class Pipeline : - public IntrusiveCounterUnsigned { +class BSONObj; +class BSONObjBuilder; +class Command; +struct DepsTracker; +class DocumentSource; +struct ExpressionContext; +class Privilege; + +/** mongodb "commands" (sent via db.$cmd.findOne(...)) + subclass to make a command. define a singleton object for it. + */ +class Pipeline : public IntrusiveCounterUnsigned { +public: + /** + * Create a pipeline from the command. + * + * @param errmsg where to write errors, if there are any + * @param cmdObj the command object sent from the client + * @returns the pipeline, if created, otherwise a NULL reference + */ + static boost::intrusive_ptr<Pipeline> parseCommand( + std::string& errmsg, + const BSONObj& cmdObj, + const boost::intrusive_ptr<ExpressionContext>& pCtx); + + /// Helper to implement Command::addRequiredPrivileges + static void addRequiredPrivileges(Command* commandTemplate, + const std::string& dbname, + BSONObj cmdObj, + std::vector<Privilege>* out); + + const boost::intrusive_ptr<ExpressionContext>& getContext() const { + return pCtx; + } + + /** + Split the current Pipeline into a Pipeline for each shard, and + a Pipeline that combines the results within mongos. + + This permanently alters this pipeline for the merging operation. + + @returns the Spec for the pipeline command that should be sent + to the shards + */ + boost::intrusive_ptr<Pipeline> splitForSharded(); + + /** If the pipeline starts with a $match, return its BSON predicate. + * Returns empty BSON if the first stage isn't $match. + */ + BSONObj getInitialQuery() const; + + /** + * Returns true if the pipeline contains a $out stage, and false otherwise. + */ + bool hasOutStage() const; + + /** + Write the Pipeline as a BSONObj command. This should be the + inverse of parseCommand(). + + This is only intended to be used by the shard command obtained + from splitForSharded(). Some pipeline operations in the merge + process do not have equivalent command forms, and using this on + the mongos Pipeline will cause assertions. + + @param the builder to write the command to + */ + Document serialize() const; + + /** Stitch together the source pointers (by calling setSource) for each source in sources. + * Must be called after optimize and addInitialSource but before trying to get results. + */ + void stitch(); + + /** + Run the Pipeline on the given source. + + @param result builder to write the result to + */ + void run(BSONObjBuilder& result); + + bool isExplain() const { + return explain; + } + + /// The initial source is special since it varies between mongos and mongod. + void addInitialSource(boost::intrusive_ptr<DocumentSource> source); + + /// The source that represents the output. Returns a non-owning pointer. + DocumentSource* output() { + invariant(!sources.empty()); + return sources.back().get(); + } + + /** + * Write the pipeline's operators to a std::vector<Value>, with the + * explain flag true (for DocumentSource::serializeToArray()). + */ + std::vector<Value> writeExplainOps() const; + + /** + * Returns the dependencies needed by this pipeline. + * + * initialQuery is used as a fallback for metadata dependency detection. The assumption is + * that any metadata produced by the query is needed unless we can prove it isn't. + */ + DepsTracker getDependencies(const BSONObj& initialQuery) const; + + /** + The aggregation command name. + */ + static const char commandName[]; + + /* + PipelineD is a "sister" class that has additional functionality + for the Pipeline. It exists because of linkage requirements. + Pipeline needs to function in mongod and mongos. PipelineD + contains extra functionality required in mongod, and which can't + appear in mongos because the required symbols are unavailable + for linking there. Consider PipelineD to be an extension of this + class for mongod only. + */ + friend class PipelineD; + +private: + class Optimizations { public: - /** - * Create a pipeline from the command. - * - * @param errmsg where to write errors, if there are any - * @param cmdObj the command object sent from the client - * @returns the pipeline, if created, otherwise a NULL reference - */ - static boost::intrusive_ptr<Pipeline> parseCommand( - std::string& errmsg, - const BSONObj& cmdObj, - const boost::intrusive_ptr<ExpressionContext>& pCtx); - - /// Helper to implement Command::addRequiredPrivileges - static void addRequiredPrivileges(Command* commandTemplate, - const std::string& dbname, - BSONObj cmdObj, - std::vector<Privilege>* out); - - const boost::intrusive_ptr<ExpressionContext>& getContext() const { return pCtx; } - - /** - Split the current Pipeline into a Pipeline for each shard, and - a Pipeline that combines the results within mongos. - - This permanently alters this pipeline for the merging operation. - - @returns the Spec for the pipeline command that should be sent - to the shards - */ - boost::intrusive_ptr<Pipeline> splitForSharded(); - - /** If the pipeline starts with a $match, return its BSON predicate. - * Returns empty BSON if the first stage isn't $match. - */ - BSONObj getInitialQuery() const; - - /** - * Returns true if the pipeline contains a $out stage, and false otherwise. - */ - bool hasOutStage() const; - - /** - Write the Pipeline as a BSONObj command. This should be the - inverse of parseCommand(). - - This is only intended to be used by the shard command obtained - from splitForSharded(). Some pipeline operations in the merge - process do not have equivalent command forms, and using this on - the mongos Pipeline will cause assertions. - - @param the builder to write the command to - */ - Document serialize() const; - - /** Stitch together the source pointers (by calling setSource) for each source in sources. - * Must be called after optimize and addInitialSource but before trying to get results. - */ - void stitch(); - - /** - Run the Pipeline on the given source. - - @param result builder to write the result to - */ - void run(BSONObjBuilder& result); - - bool isExplain() const { return explain; } - - /// The initial source is special since it varies between mongos and mongod. - void addInitialSource(boost::intrusive_ptr<DocumentSource> source); - - /// The source that represents the output. Returns a non-owning pointer. - DocumentSource* output() { invariant( !sources.empty() ); return sources.back().get(); } - - /** - * Write the pipeline's operators to a std::vector<Value>, with the - * explain flag true (for DocumentSource::serializeToArray()). - */ - std::vector<Value> writeExplainOps() const; - - /** - * Returns the dependencies needed by this pipeline. - * - * initialQuery is used as a fallback for metadata dependency detection. The assumption is - * that any metadata produced by the query is needed unless we can prove it isn't. - */ - DepsTracker getDependencies(const BSONObj& initialQuery) const; - - /** - The aggregation command name. - */ - static const char commandName[]; - - /* - PipelineD is a "sister" class that has additional functionality - for the Pipeline. It exists because of linkage requirements. - Pipeline needs to function in mongod and mongos. PipelineD - contains extra functionality required in mongod, and which can't - appear in mongos because the required symbols are unavailable - for linking there. Consider PipelineD to be an extension of this - class for mongod only. - */ - friend class PipelineD; - - private: - class Optimizations { - public: - // These contain static functions that optimize pipelines in various ways. - // They are classes rather than namespaces so that they can be friends of Pipeline. - // Classes are defined in pipeline_optimizations.h. - class Local; - class Sharded; - }; - - friend class Optimizations::Local; - friend class Optimizations::Sharded; - - static const char pipelineName[]; - static const char explainName[]; - static const char fromRouterName[]; - static const char serverPipelineName[]; - static const char mongosPipelineName[]; - - Pipeline(const boost::intrusive_ptr<ExpressionContext> &pCtx); - - typedef std::deque<boost::intrusive_ptr<DocumentSource> > SourceContainer; - SourceContainer sources; - bool explain; - - boost::intrusive_ptr<ExpressionContext> pCtx; + // These contain static functions that optimize pipelines in various ways. + // They are classes rather than namespaces so that they can be friends of Pipeline. + // Classes are defined in pipeline_optimizations.h. + class Local; + class Sharded; }; -} // namespace mongo + + friend class Optimizations::Local; + friend class Optimizations::Sharded; + + static const char pipelineName[]; + static const char explainName[]; + static const char fromRouterName[]; + static const char serverPipelineName[]; + static const char mongosPipelineName[]; + + Pipeline(const boost::intrusive_ptr<ExpressionContext>& pCtx); + + typedef std::deque<boost::intrusive_ptr<DocumentSource>> SourceContainer; + SourceContainer sources; + bool explain; + + boost::intrusive_ptr<ExpressionContext> pCtx; +}; +} // namespace mongo diff --git a/src/mongo/db/pipeline/pipeline_d.cpp b/src/mongo/db/pipeline/pipeline_d.cpp index b6ddfdd7e12..6dbcfe4c812 100644 --- a/src/mongo/db/pipeline/pipeline_d.cpp +++ b/src/mongo/db/pipeline/pipeline_d.cpp @@ -45,219 +45,197 @@ namespace mongo { - using boost::intrusive_ptr; - using std::shared_ptr; - using std::string; +using boost::intrusive_ptr; +using std::shared_ptr; +using std::string; namespace { - class MongodImplementation final : public DocumentSourceNeedsMongod::MongodInterface { - public: - MongodImplementation(const intrusive_ptr<ExpressionContext>& ctx) - : _ctx(ctx) - , _client(ctx->opCtx) - {} - - DBClientBase* directClient() final { - // opCtx may have changed since our last call - invariant(_ctx->opCtx); - _client.setOpCtx(_ctx->opCtx); - return &_client; - } +class MongodImplementation final : public DocumentSourceNeedsMongod::MongodInterface { +public: + MongodImplementation(const intrusive_ptr<ExpressionContext>& ctx) + : _ctx(ctx), _client(ctx->opCtx) {} + + DBClientBase* directClient() final { + // opCtx may have changed since our last call + invariant(_ctx->opCtx); + _client.setOpCtx(_ctx->opCtx); + return &_client; + } - bool isSharded(const NamespaceString& ns) final { - const ChunkVersion unsharded(0, 0, OID()); - return !(shardingState.getVersion(ns.ns()).isWriteCompatibleWith(unsharded)); - } + bool isSharded(const NamespaceString& ns) final { + const ChunkVersion unsharded(0, 0, OID()); + return !(shardingState.getVersion(ns.ns()).isWriteCompatibleWith(unsharded)); + } - bool isCapped(const NamespaceString& ns) final { - AutoGetCollectionForRead ctx(_ctx->opCtx, ns.ns()); - Collection* collection = ctx.getCollection(); - return collection && collection->isCapped(); - } + bool isCapped(const NamespaceString& ns) final { + AutoGetCollectionForRead ctx(_ctx->opCtx, ns.ns()); + Collection* collection = ctx.getCollection(); + return collection && collection->isCapped(); + } - BSONObj insert(const NamespaceString& ns, const std::vector<BSONObj>& objs) final { - boost::optional<DisableDocumentValidation> maybeDisableValidation; - if (_ctx->bypassDocumentValidation) - maybeDisableValidation.emplace(_ctx->opCtx); + BSONObj insert(const NamespaceString& ns, const std::vector<BSONObj>& objs) final { + boost::optional<DisableDocumentValidation> maybeDisableValidation; + if (_ctx->bypassDocumentValidation) + maybeDisableValidation.emplace(_ctx->opCtx); - _client.insert(ns.ns(), objs); - return _client.getLastErrorDetailed(); - } + _client.insert(ns.ns(), objs); + return _client.getLastErrorDetailed(); + } - private: - intrusive_ptr<ExpressionContext> _ctx; - DBDirectClient _client; - }; +private: + intrusive_ptr<ExpressionContext> _ctx; + DBDirectClient _client; +}; } - shared_ptr<PlanExecutor> PipelineD::prepareCursorSource( - OperationContext* txn, - Collection* collection, - const intrusive_ptr<Pipeline>& pPipeline, - const intrusive_ptr<ExpressionContext>& pExpCtx) { - // get the full "namespace" name - const string& fullName = pExpCtx->ns.ns(); - - // We will be modifying the source vector as we go - Pipeline::SourceContainer& sources = pPipeline->sources; - - // Inject a MongodImplementation to sources that need them. - for (size_t i = 0; i < sources.size(); i++) { - DocumentSourceNeedsMongod* needsMongod = - dynamic_cast<DocumentSourceNeedsMongod*>(sources[i].get()); - if (needsMongod) { - needsMongod->injectMongodInterface( - std::make_shared<MongodImplementation>(pExpCtx)); - } +shared_ptr<PlanExecutor> PipelineD::prepareCursorSource( + OperationContext* txn, + Collection* collection, + const intrusive_ptr<Pipeline>& pPipeline, + const intrusive_ptr<ExpressionContext>& pExpCtx) { + // get the full "namespace" name + const string& fullName = pExpCtx->ns.ns(); + + // We will be modifying the source vector as we go + Pipeline::SourceContainer& sources = pPipeline->sources; + + // Inject a MongodImplementation to sources that need them. + for (size_t i = 0; i < sources.size(); i++) { + DocumentSourceNeedsMongod* needsMongod = + dynamic_cast<DocumentSourceNeedsMongod*>(sources[i].get()); + if (needsMongod) { + needsMongod->injectMongodInterface(std::make_shared<MongodImplementation>(pExpCtx)); } + } - if (!sources.empty() && sources.front()->isValidInitialSource()) { - if (dynamic_cast<DocumentSourceMergeCursors*>(sources.front().get())) { - // Enable the hooks for setting up authentication on the subsequent internal - // connections we are going to create. This would normally have been done - // when SetShardVersion was called, but since SetShardVersion is never called - // on secondaries, this is needed. - ShardedConnectionInfo::addHook(); - } - return std::shared_ptr<PlanExecutor>(); // don't need a cursor + if (!sources.empty() && sources.front()->isValidInitialSource()) { + if (dynamic_cast<DocumentSourceMergeCursors*>(sources.front().get())) { + // Enable the hooks for setting up authentication on the subsequent internal + // connections we are going to create. This would normally have been done + // when SetShardVersion was called, but since SetShardVersion is never called + // on secondaries, this is needed. + ShardedConnectionInfo::addHook(); } + return std::shared_ptr<PlanExecutor>(); // don't need a cursor + } - // Look for an initial match. This works whether we got an initial query or not. - // If not, it results in a "{}" query, which will be what we want in that case. - const BSONObj queryObj = pPipeline->getInitialQuery(); - if (!queryObj.isEmpty()) { - // This will get built in to the Cursor we'll create, so - // remove the match from the pipeline - sources.pop_front(); - } - - // Find the set of fields in the source documents depended on by this pipeline. - const DepsTracker deps = pPipeline->getDependencies(queryObj); - - // Passing query an empty projection since it is faster to use ParsedDeps::extractFields(). - // This will need to change to support covering indexes (SERVER-12015). There is an - // exception for textScore since that can only be retrieved by a query projection. - const BSONObj projectionForQuery = deps.needTextScore ? deps.toProjection() : BSONObj(); - - /* - Look for an initial sort; we'll try to add this to the - Cursor we create. If we're successful in doing that (further down), - we'll remove the $sort from the pipeline, because the documents - will already come sorted in the specified order as a result of the - index scan. - */ - intrusive_ptr<DocumentSourceSort> sortStage; - BSONObj sortObj; - if (!sources.empty()) { - sortStage = dynamic_cast<DocumentSourceSort*>(sources.front().get()); - if (sortStage) { - // build the sort key - sortObj = sortStage->serializeSortKey(/*explain*/false).toBson(); - } - } - - // Create the PlanExecutor. - // - // If we try to create a PlanExecutor that includes both the match and the - // sort, and the two are incompatible wrt the available indexes, then - // we don't get a PlanExecutor back. - // - // So we try to use both first. If that fails, try again, without the - // sort. - // - // If we don't have a sort, jump straight to just creating a PlanExecutor. - // without the sort. - // - // If we are able to incorporate the sort into the PlanExecutor, remove it - // from the head of the pipeline. - // - // LATER - we should be able to find this out before we create the - // cursor. Either way, we can then apply other optimizations there - // are tickets for, such as SERVER-4507. - const size_t runnerOptions = QueryPlannerParams::DEFAULT - | QueryPlannerParams::INCLUDE_SHARD_FILTER - | QueryPlannerParams::NO_BLOCKING_SORT - ; - std::shared_ptr<PlanExecutor> exec; - bool sortInRunner = false; - - const WhereCallbackReal whereCallback(pExpCtx->opCtx, pExpCtx->ns.db()); + // Look for an initial match. This works whether we got an initial query or not. + // If not, it results in a "{}" query, which will be what we want in that case. + const BSONObj queryObj = pPipeline->getInitialQuery(); + if (!queryObj.isEmpty()) { + // This will get built in to the Cursor we'll create, so + // remove the match from the pipeline + sources.pop_front(); + } + // Find the set of fields in the source documents depended on by this pipeline. + const DepsTracker deps = pPipeline->getDependencies(queryObj); + + // Passing query an empty projection since it is faster to use ParsedDeps::extractFields(). + // This will need to change to support covering indexes (SERVER-12015). There is an + // exception for textScore since that can only be retrieved by a query projection. + const BSONObj projectionForQuery = deps.needTextScore ? deps.toProjection() : BSONObj(); + + /* + Look for an initial sort; we'll try to add this to the + Cursor we create. If we're successful in doing that (further down), + we'll remove the $sort from the pipeline, because the documents + will already come sorted in the specified order as a result of the + index scan. + */ + intrusive_ptr<DocumentSourceSort> sortStage; + BSONObj sortObj; + if (!sources.empty()) { + sortStage = dynamic_cast<DocumentSourceSort*>(sources.front().get()); if (sortStage) { - CanonicalQuery* cq; - Status status = - CanonicalQuery::canonicalize(pExpCtx->ns, - queryObj, - sortObj, - projectionForQuery, - &cq, - whereCallback); - - PlanExecutor* rawExec; - if (status.isOK() && getExecutor(txn, - collection, - cq, - PlanExecutor::YIELD_AUTO, - &rawExec, - runnerOptions).isOK()) { - // success: The PlanExecutor will handle sorting for us using an index. - exec.reset(rawExec); - sortInRunner = true; - - sources.pop_front(); - if (sortStage->getLimitSrc()) { - // need to reinsert coalesced $limit after removing $sort - sources.push_front(sortStage->getLimitSrc()); - } - } + // build the sort key + sortObj = sortStage->serializeSortKey(/*explain*/ false).toBson(); } + } - if (!exec.get()) { - const BSONObj noSort; - CanonicalQuery* cq; - uassertStatusOK( - CanonicalQuery::canonicalize(pExpCtx->ns, - queryObj, - noSort, - projectionForQuery, - &cq, - whereCallback)); - - PlanExecutor* rawExec; - uassertStatusOK(getExecutor(txn, - collection, - cq, - PlanExecutor::YIELD_AUTO, - &rawExec, - runnerOptions)); + // Create the PlanExecutor. + // + // If we try to create a PlanExecutor that includes both the match and the + // sort, and the two are incompatible wrt the available indexes, then + // we don't get a PlanExecutor back. + // + // So we try to use both first. If that fails, try again, without the + // sort. + // + // If we don't have a sort, jump straight to just creating a PlanExecutor. + // without the sort. + // + // If we are able to incorporate the sort into the PlanExecutor, remove it + // from the head of the pipeline. + // + // LATER - we should be able to find this out before we create the + // cursor. Either way, we can then apply other optimizations there + // are tickets for, such as SERVER-4507. + const size_t runnerOptions = QueryPlannerParams::DEFAULT | + QueryPlannerParams::INCLUDE_SHARD_FILTER | QueryPlannerParams::NO_BLOCKING_SORT; + std::shared_ptr<PlanExecutor> exec; + bool sortInRunner = false; + + const WhereCallbackReal whereCallback(pExpCtx->opCtx, pExpCtx->ns.db()); + + if (sortStage) { + CanonicalQuery* cq; + Status status = CanonicalQuery::canonicalize( + pExpCtx->ns, queryObj, sortObj, projectionForQuery, &cq, whereCallback); + + PlanExecutor* rawExec; + if (status.isOK() && + getExecutor(txn, collection, cq, PlanExecutor::YIELD_AUTO, &rawExec, runnerOptions) + .isOK()) { + // success: The PlanExecutor will handle sorting for us using an index. exec.reset(rawExec); + sortInRunner = true; + + sources.pop_front(); + if (sortStage->getLimitSrc()) { + // need to reinsert coalesced $limit after removing $sort + sources.push_front(sortStage->getLimitSrc()); + } } + } + if (!exec.get()) { + const BSONObj noSort; + CanonicalQuery* cq; + uassertStatusOK(CanonicalQuery::canonicalize( + pExpCtx->ns, queryObj, noSort, projectionForQuery, &cq, whereCallback)); - // DocumentSourceCursor expects a yielding PlanExecutor that has had its state saved. We - // deregister the PlanExecutor so that it can be registered with ClientCursor. - exec->deregisterExec(); - exec->saveState(); + PlanExecutor* rawExec; + uassertStatusOK( + getExecutor(txn, collection, cq, PlanExecutor::YIELD_AUTO, &rawExec, runnerOptions)); + exec.reset(rawExec); + } - // Put the PlanExecutor into a DocumentSourceCursor and add it to the front of the pipeline. - intrusive_ptr<DocumentSourceCursor> pSource = - DocumentSourceCursor::create(fullName, exec, pExpCtx); - // Note the query, sort, and projection for explain. - pSource->setQuery(queryObj); - if (sortInRunner) - pSource->setSort(sortObj); + // DocumentSourceCursor expects a yielding PlanExecutor that has had its state saved. We + // deregister the PlanExecutor so that it can be registered with ClientCursor. + exec->deregisterExec(); + exec->saveState(); - pSource->setProjection(deps.toProjection(), deps.toParsedDeps()); + // Put the PlanExecutor into a DocumentSourceCursor and add it to the front of the pipeline. + intrusive_ptr<DocumentSourceCursor> pSource = + DocumentSourceCursor::create(fullName, exec, pExpCtx); - while (!sources.empty() && pSource->coalesce(sources.front())) { - sources.pop_front(); - } + // Note the query, sort, and projection for explain. + pSource->setQuery(queryObj); + if (sortInRunner) + pSource->setSort(sortObj); - pPipeline->addInitialSource(pSource); + pSource->setProjection(deps.toProjection(), deps.toParsedDeps()); - return exec; + while (!sources.empty() && pSource->coalesce(sources.front())) { + sources.pop_front(); } -} // namespace mongo + pPipeline->addInitialSource(pSource); + + return exec; +} + +} // namespace mongo diff --git a/src/mongo/db/pipeline/pipeline_d.h b/src/mongo/db/pipeline/pipeline_d.h index 3a818c3ddcd..8866e6bdfe6 100644 --- a/src/mongo/db/pipeline/pipeline_d.h +++ b/src/mongo/db/pipeline/pipeline_d.h @@ -32,54 +32,53 @@ #include <memory> namespace mongo { - class Collection; - class DocumentSourceCursor; - struct ExpressionContext; - class OperationContext; - class Pipeline; - class PlanExecutor; +class Collection; +class DocumentSourceCursor; +struct ExpressionContext; +class OperationContext; +class Pipeline; +class PlanExecutor; - /* - PipelineD is an extension of the Pipeline class, but with additional - material that references symbols that are not available in mongos, - where the remainder of the Pipeline class also functions. PipelineD - is a friend of Pipeline so that it can have equal access to Pipeline's - members. +/* + PipelineD is an extension of the Pipeline class, but with additional + material that references symbols that are not available in mongos, + where the remainder of the Pipeline class also functions. PipelineD + is a friend of Pipeline so that it can have equal access to Pipeline's + members. - See the friend declaration in Pipeline. + See the friend declaration in Pipeline. + */ +class PipelineD { +public: + /** + * Create a Cursor wrapped in a DocumentSourceCursor, which is suitable + * to be the first source for a pipeline to begin with. This source + * will feed the execution of the pipeline. + * + * This method looks for early pipeline stages that can be folded into + * the underlying cursor, and when a cursor can absorb those, they + * are removed from the head of the pipeline. For example, an + * early match can be removed and replaced with a Cursor that will + * do an index scan. + * + * The cursor is added to the front of the pipeline's sources. + * + * Must have a AutoGetCollectionForRead before entering. + * + * If the returned PlanExecutor is non-null, you are responsible for ensuring + * it receives appropriate invalidate and kill messages. + * + * @param pPipeline the logical "this" for this operation + * @param pExpCtx the expression context for this pipeline */ - class PipelineD { - public: - - /** - * Create a Cursor wrapped in a DocumentSourceCursor, which is suitable - * to be the first source for a pipeline to begin with. This source - * will feed the execution of the pipeline. - * - * This method looks for early pipeline stages that can be folded into - * the underlying cursor, and when a cursor can absorb those, they - * are removed from the head of the pipeline. For example, an - * early match can be removed and replaced with a Cursor that will - * do an index scan. - * - * The cursor is added to the front of the pipeline's sources. - * - * Must have a AutoGetCollectionForRead before entering. - * - * If the returned PlanExecutor is non-null, you are responsible for ensuring - * it receives appropriate invalidate and kill messages. - * - * @param pPipeline the logical "this" for this operation - * @param pExpCtx the expression context for this pipeline - */ - static std::shared_ptr<PlanExecutor> prepareCursorSource( - OperationContext* txn, - Collection* collection, - const boost::intrusive_ptr<Pipeline> &pPipeline, - const boost::intrusive_ptr<ExpressionContext> &pExpCtx); + static std::shared_ptr<PlanExecutor> prepareCursorSource( + OperationContext* txn, + Collection* collection, + const boost::intrusive_ptr<Pipeline>& pPipeline, + const boost::intrusive_ptr<ExpressionContext>& pExpCtx); - private: - PipelineD(); // does not exist: prevent instantiation - }; +private: + PipelineD(); // does not exist: prevent instantiation +}; -} // namespace mongo +} // namespace mongo diff --git a/src/mongo/db/pipeline/pipeline_optimizations.h b/src/mongo/db/pipeline/pipeline_optimizations.h index ac4b9e8b697..68763c2ac5c 100644 --- a/src/mongo/db/pipeline/pipeline_optimizations.h +++ b/src/mongo/db/pipeline/pipeline_optimizations.h @@ -36,97 +36,97 @@ #include "mongo/db/pipeline/pipeline.h" namespace mongo { +/** + * This class holds optimizations applied to a single Pipeline. + * + * Each function has the same signature and takes a Pipeline as an in/out parameter. + */ +class Pipeline::Optimizations::Local { +public: /** - * This class holds optimizations applied to a single Pipeline. + * Moves matches before any adjacent sort phases. * - * Each function has the same signature and takes a Pipeline as an in/out parameter. + * This means we sort fewer items. Neither sorts, nor matches (excluding $text) + * change the documents in the stream, so this transformation shouldn't affect + * the result. */ - class Pipeline::Optimizations::Local { - public: - /** - * Moves matches before any adjacent sort phases. - * - * This means we sort fewer items. Neither sorts, nor matches (excluding $text) - * change the documents in the stream, so this transformation shouldn't affect - * the result. - */ - static void moveMatchBeforeSort(Pipeline* pipeline); + static void moveMatchBeforeSort(Pipeline* pipeline); - /** - * Moves skip and limit before any adjacent project phases. - * - * While this is performance-neutral on its own, it enables other optimizations - * such as combining sort and limit. - */ - static void moveSkipAndLimitBeforeProject(Pipeline* pipeline); + /** + * Moves skip and limit before any adjacent project phases. + * + * While this is performance-neutral on its own, it enables other optimizations + * such as combining sort and limit. + */ + static void moveSkipAndLimitBeforeProject(Pipeline* pipeline); - /** - * Moves limits before any adjacent skip phases. - * - * This is more optimal for sharding since currently, we can only split - * the pipeline at a single source and it is better to limit the results - * coming from each shard. This also enables other optimizations like - * coalescing the limit into a sort. - */ - static void moveLimitBeforeSkip(Pipeline* pipeline); + /** + * Moves limits before any adjacent skip phases. + * + * This is more optimal for sharding since currently, we can only split + * the pipeline at a single source and it is better to limit the results + * coming from each shard. This also enables other optimizations like + * coalescing the limit into a sort. + */ + static void moveLimitBeforeSkip(Pipeline* pipeline); - /** - * Runs through the DocumentSources, and give each one the opportunity - * to coalesce with its successor. If successful, remove the successor. - * - * This should generally be run after optimizations that reorder stages - * to be most effective. - * - * NOTE: uses the DocumentSource::coalesce() method - */ - static void coalesceAdjacent(Pipeline* pipeline); + /** + * Runs through the DocumentSources, and give each one the opportunity + * to coalesce with its successor. If successful, remove the successor. + * + * This should generally be run after optimizations that reorder stages + * to be most effective. + * + * NOTE: uses the DocumentSource::coalesce() method + */ + static void coalesceAdjacent(Pipeline* pipeline); - /** - * Gives each DocumentSource the opportunity to optimize itself. - * - * NOTE: uses the DocumentSource::optimize() method - */ - static void optimizeEachDocumentSource(Pipeline* pipeline); + /** + * Gives each DocumentSource the opportunity to optimize itself. + * + * NOTE: uses the DocumentSource::optimize() method + */ + static void optimizeEachDocumentSource(Pipeline* pipeline); - /** - * Optimizes [$redact, $match] to [$match, $redact, $match] if possible. - * - * This gives us the ability to use indexes and reduce the number of - * BSONObjs converted to Documents. - */ - static void duplicateMatchBeforeInitalRedact(Pipeline* pipeline); - }; + /** + * Optimizes [$redact, $match] to [$match, $redact, $match] if possible. + * + * This gives us the ability to use indexes and reduce the number of + * BSONObjs converted to Documents. + */ + static void duplicateMatchBeforeInitalRedact(Pipeline* pipeline); +}; +/** + * This class holds optimizations applied to a shard Pipeline and a merger Pipeline. + * + * Each function has the same signature and takes two Pipelines, both as an in/out parameters. + */ +class Pipeline::Optimizations::Sharded { +public: /** - * This class holds optimizations applied to a shard Pipeline and a merger Pipeline. + * Moves everything before a splittable stage to the shards. If there + * are no splittable stages, moves everything to the shards. + * + * It is not safe to call this optimization multiple times. * - * Each function has the same signature and takes two Pipelines, both as an in/out parameters. + * NOTE: looks for SplittableDocumentSources and uses that API */ - class Pipeline::Optimizations::Sharded { - public: - /** - * Moves everything before a splittable stage to the shards. If there - * are no splittable stages, moves everything to the shards. - * - * It is not safe to call this optimization multiple times. - * - * NOTE: looks for SplittableDocumentSources and uses that API - */ - static void findSplitPoint(Pipeline* shardPipe, Pipeline* mergePipe); + static void findSplitPoint(Pipeline* shardPipe, Pipeline* mergePipe); - /** - * If the final stage on shards is to unwind an array, move that stage to the merger. This - * cuts down on network traffic and allows us to take advantage of reduced copying in - * unwind. - */ - static void moveFinalUnwindFromShardsToMerger(Pipeline* shardPipe, Pipeline* mergePipe); + /** + * If the final stage on shards is to unwind an array, move that stage to the merger. This + * cuts down on network traffic and allows us to take advantage of reduced copying in + * unwind. + */ + static void moveFinalUnwindFromShardsToMerger(Pipeline* shardPipe, Pipeline* mergePipe); - /** - * Adds a stage to the end of shardPipe explicitly requesting all fields that mergePipe - * needs. This is only done if it heuristically determines that it is needed. This - * optimization can reduce the amount of network traffic and can also enable the shards to - * convert less source BSON into Documents. - */ - static void limitFieldsSentFromShardsToMerger(Pipeline* shardPipe, Pipeline* mergePipe); - }; -} // namespace mongo + /** + * Adds a stage to the end of shardPipe explicitly requesting all fields that mergePipe + * needs. This is only done if it heuristically determines that it is needed. This + * optimization can reduce the amount of network traffic and can also enable the shards to + * convert less source BSON into Documents. + */ + static void limitFieldsSentFromShardsToMerger(Pipeline* shardPipe, Pipeline* mergePipe); +}; +} // namespace mongo diff --git a/src/mongo/db/pipeline/value.cpp b/src/mongo/db/pipeline/value.cpp index 0d55a309002..4afc5ccf684 100644 --- a/src/mongo/db/pipeline/value.cpp +++ b/src/mongo/db/pipeline/value.cpp @@ -41,17 +41,17 @@ #include "mongo/util/mongoutils/str.h" namespace mongo { - using namespace mongoutils; - using boost::intrusive_ptr; - using std::min; - using std::numeric_limits; - using std::ostream; - using std::string; - using std::stringstream; - using std::vector; - - void ValueStorage::verifyRefCountingIfShould() const { - switch (type) { +using namespace mongoutils; +using boost::intrusive_ptr; +using std::min; +using std::numeric_limits; +using std::ostream; +using std::string; +using std::stringstream; +using std::vector; + +void ValueStorage::verifyRefCountingIfShould() const { + switch (type) { case MinKey: case MaxKey: case jstOID: @@ -76,8 +76,8 @@ namespace mongo { verify(refCounter == !shortStr); break; - case BinData: // TODO this should probably support short-string optimization - case Array: // TODO this should probably support empty-is-NULL optimization + case BinData: // TODO this should probably support short-string optimization + case Array: // TODO this should probably support empty-is-NULL optimization case DBRef: case CodeWScope: // the above types always reference external data. @@ -89,61 +89,60 @@ namespace mongo { // Objects either hold a NULL ptr or should be ref-counting verify(refCounter == bool(genericRCPtr)); break; - } } +} - void ValueStorage::putString(StringData s) { - // Note: this also stores data portion of BinData - const size_t sizeNoNUL = s.size(); - if (sizeNoNUL <= sizeof(shortStrStorage)) { - shortStr = true; - shortStrSize = s.size(); - s.copyTo(shortStrStorage, false); // no NUL - - // All memory is zeroed before this is called. - // Note this may be past end of shortStrStorage and into nulTerminator - dassert(shortStrStorage[sizeNoNUL] == '\0'); - } - else { - putRefCountable(RCString::create(s)); - } +void ValueStorage::putString(StringData s) { + // Note: this also stores data portion of BinData + const size_t sizeNoNUL = s.size(); + if (sizeNoNUL <= sizeof(shortStrStorage)) { + shortStr = true; + shortStrSize = s.size(); + s.copyTo(shortStrStorage, false); // no NUL + + // All memory is zeroed before this is called. + // Note this may be past end of shortStrStorage and into nulTerminator + dassert(shortStrStorage[sizeNoNUL] == '\0'); + } else { + putRefCountable(RCString::create(s)); } +} - void ValueStorage::putDocument(const Document& d) { - putRefCountable(d._storage); - } +void ValueStorage::putDocument(const Document& d) { + putRefCountable(d._storage); +} - void ValueStorage::putVector(const RCVector* vec) { - fassert(16485, vec); - putRefCountable(vec); - } +void ValueStorage::putVector(const RCVector* vec) { + fassert(16485, vec); + putRefCountable(vec); +} - void ValueStorage::putRegEx(const BSONRegEx& re) { - const size_t patternLen = re.pattern.size(); - const size_t flagsLen = re.flags.size(); - const size_t totalLen = patternLen + 1/*middle NUL*/ + flagsLen; +void ValueStorage::putRegEx(const BSONRegEx& re) { + const size_t patternLen = re.pattern.size(); + const size_t flagsLen = re.flags.size(); + const size_t totalLen = patternLen + 1 /*middle NUL*/ + flagsLen; - // Need to copy since putString doesn't support scatter-gather. - std::unique_ptr<char[]> buf (new char[totalLen]); - re.pattern.copyTo(buf.get(), true); - re.flags.copyTo(buf.get() + patternLen + 1, false); // no NUL - putString(StringData(buf.get(), totalLen)); - } + // Need to copy since putString doesn't support scatter-gather. + std::unique_ptr<char[]> buf(new char[totalLen]); + re.pattern.copyTo(buf.get(), true); + re.flags.copyTo(buf.get() + patternLen + 1, false); // no NUL + putString(StringData(buf.get(), totalLen)); +} - Document ValueStorage::getDocument() const { - if (!genericRCPtr) - return Document(); +Document ValueStorage::getDocument() const { + if (!genericRCPtr) + return Document(); - dassert(typeid(*genericRCPtr) == typeid(const DocumentStorage)); - const DocumentStorage* documentPtr = static_cast<const DocumentStorage*>(genericRCPtr); - return Document(documentPtr); - } + dassert(typeid(*genericRCPtr) == typeid(const DocumentStorage)); + const DocumentStorage* documentPtr = static_cast<const DocumentStorage*>(genericRCPtr); + return Document(documentPtr); +} - // not in header because document is fwd declared - Value::Value(const BSONObj& obj) : _storage(Object, Document(obj)) {} +// not in header because document is fwd declared +Value::Value(const BSONObj& obj) : _storage(Object, Document(obj)) {} - Value::Value(const BSONElement& elem) : _storage(elem.type()) { - switch(elem.type()) { +Value::Value(const BSONElement& elem) : _storage(elem.type()) { + switch (elem.type()) { // These are all type-only, no data case EOO: case MinKey: @@ -168,7 +167,7 @@ namespace mongo { } case Array: { - intrusive_ptr<RCVector> vec (new RCVector); + intrusive_ptr<RCVector> vec(new RCVector); BSONForEach(sub, elem.embeddedObject()) { vec->vec.push_back(Value(sub)); } @@ -207,7 +206,7 @@ namespace mongo { break; case CodeWScope: { - StringData code (elem.codeWScopeCode(), elem.codeWScopeCodeLen()-1); + StringData code(elem.codeWScopeCode(), elem.codeWScopeCodeLen() - 1); _storage.putCodeWScope(BSONCodeWScope(code, elem.codeWScopeObject())); break; } @@ -222,83 +221,100 @@ namespace mongo { case DBRef: _storage.putDBRef(BSONDBRef(elem.dbrefNS(), elem.dbrefOID())); break; - } } +} - Value::Value(const BSONArray& arr) : _storage(Array) { - intrusive_ptr<RCVector> vec (new RCVector); - BSONForEach(sub, arr) { - vec->vec.push_back(Value(sub)); - } - _storage.putVector(vec.get()); +Value::Value(const BSONArray& arr) : _storage(Array) { + intrusive_ptr<RCVector> vec(new RCVector); + BSONForEach(sub, arr) { + vec->vec.push_back(Value(sub)); } + _storage.putVector(vec.get()); +} - Value Value::createIntOrLong(long long longValue) { - int intValue = longValue; - if (intValue != longValue) { - // it is too large to be an int and should remain a long - return Value(longValue); - } - - // should be an int since all arguments were int and it fits - return Value(intValue); +Value Value::createIntOrLong(long long longValue) { + int intValue = longValue; + if (intValue != longValue) { + // it is too large to be an int and should remain a long + return Value(longValue); } - double Value::getDouble() const { - BSONType type = getType(); - if (type == NumberInt) - return _storage.intValue; - if (type == NumberLong) - return static_cast< double >( _storage.longValue ); + // should be an int since all arguments were int and it fits + return Value(intValue); +} - verify(type == NumberDouble); - return _storage.doubleValue; - } +double Value::getDouble() const { + BSONType type = getType(); + if (type == NumberInt) + return _storage.intValue; + if (type == NumberLong) + return static_cast<double>(_storage.longValue); - Document Value::getDocument() const { - verify(getType() == Object); - return _storage.getDocument(); - } + verify(type == NumberDouble); + return _storage.doubleValue; +} + +Document Value::getDocument() const { + verify(getType() == Object); + return _storage.getDocument(); +} - Value Value::operator[] (size_t index) const { - if (getType() != Array || index >= getArrayLength()) - return Value(); +Value Value::operator[](size_t index) const { + if (getType() != Array || index >= getArrayLength()) + return Value(); - return getArray()[index]; - } + return getArray()[index]; +} - Value Value::operator[] (StringData name) const { - if (getType() != Object) - return Value(); +Value Value::operator[](StringData name) const { + if (getType() != Object) + return Value(); - return getDocument()[name]; - } + return getDocument()[name]; +} - BSONObjBuilder& operator << (BSONObjBuilderValueStream& builder, const Value& val) { - switch(val.getType()) { - case EOO: return builder.builder(); // nothing appended - case MinKey: return builder << MINKEY; - case MaxKey: return builder << MAXKEY; - case jstNULL: return builder << BSONNULL; - case Undefined: return builder << BSONUndefined; - case jstOID: return builder << val.getOid(); - case NumberInt: return builder << val.getInt(); - case NumberLong: return builder << val.getLong(); - case NumberDouble: return builder << val.getDouble(); - case String: return builder << val.getStringData(); - case Bool: return builder << val.getBool(); - case Date: return builder << Date_t::fromMillisSinceEpoch(val.getDate()); - case bsonTimestamp: return builder << val.getTimestamp(); - case Object: return builder << val.getDocument(); - case Symbol: return builder << BSONSymbol(val.getStringData()); - case Code: return builder << BSONCode(val.getStringData()); - case RegEx: return builder << BSONRegEx(val.getRegex(), val.getRegexFlags()); +BSONObjBuilder& operator<<(BSONObjBuilderValueStream& builder, const Value& val) { + switch (val.getType()) { + case EOO: + return builder.builder(); // nothing appended + case MinKey: + return builder << MINKEY; + case MaxKey: + return builder << MAXKEY; + case jstNULL: + return builder << BSONNULL; + case Undefined: + return builder << BSONUndefined; + case jstOID: + return builder << val.getOid(); + case NumberInt: + return builder << val.getInt(); + case NumberLong: + return builder << val.getLong(); + case NumberDouble: + return builder << val.getDouble(); + case String: + return builder << val.getStringData(); + case Bool: + return builder << val.getBool(); + case Date: + return builder << Date_t::fromMillisSinceEpoch(val.getDate()); + case bsonTimestamp: + return builder << val.getTimestamp(); + case Object: + return builder << val.getDocument(); + case Symbol: + return builder << BSONSymbol(val.getStringData()); + case Code: + return builder << BSONCode(val.getStringData()); + case RegEx: + return builder << BSONRegEx(val.getRegex(), val.getRegexFlags()); case DBRef: return builder << BSONDBRef(val._storage.getDBRef()->ns, val._storage.getDBRef()->oid); case BinData: - return builder << BSONBinData(val.getStringData().rawData(), // looking for void* + return builder << BSONBinData(val.getStringData().rawData(), // looking for void* val.getStringData().size(), val._storage.binDataType()); @@ -310,29 +326,29 @@ namespace mongo { const vector<Value>& array = val.getArray(); const size_t n = array.size(); BSONArrayBuilder arrayBuilder(builder.subarrayStart()); - for(size_t i = 0; i < n; i++) { + for (size_t i = 0; i < n; i++) { array[i].addToBsonArray(&arrayBuilder); } arrayBuilder.doneFast(); return builder.builder(); } - } - verify(false); } + verify(false); +} - void Value::addToBsonObj(BSONObjBuilder* pBuilder, StringData fieldName) const { - *pBuilder << fieldName << *this; - } +void Value::addToBsonObj(BSONObjBuilder* pBuilder, StringData fieldName) const { + *pBuilder << fieldName << *this; +} - void Value::addToBsonArray(BSONArrayBuilder* pBuilder) const { - if (!missing()) { // don't want to increment builder's counter - *pBuilder << *this; - } +void Value::addToBsonArray(BSONArrayBuilder* pBuilder) const { + if (!missing()) { // don't want to increment builder's counter + *pBuilder << *this; } +} - bool Value::coerceToBool() const { - // TODO Unify the implementation with BSONElement::trueValue(). - switch(getType()) { +bool Value::coerceToBool() const { + // TODO Unify the implementation with BSONElement::trueValue(). + switch (getType()) { case CodeWScope: case MinKey: case DBRef: @@ -354,16 +370,20 @@ namespace mongo { case Undefined: return false; - case Bool: return _storage.boolValue; - case NumberInt: return _storage.intValue; - case NumberLong: return _storage.longValue; - case NumberDouble: return _storage.doubleValue; - } - verify(false); + case Bool: + return _storage.boolValue; + case NumberInt: + return _storage.intValue; + case NumberLong: + return _storage.longValue; + case NumberDouble: + return _storage.doubleValue; } + verify(false); +} - int Value::coerceToInt() const { - switch(getType()) { +int Value::coerceToInt() const { + switch (getType()) { case NumberInt: return _storage.intValue; @@ -374,15 +394,15 @@ namespace mongo { return static_cast<int>(_storage.doubleValue); default: - uassert(16003, str::stream() << - "can't convert from BSON type " << typeName(getType()) << - " to int", + uassert(16003, + str::stream() << "can't convert from BSON type " << typeName(getType()) + << " to int", false); - } // switch(getType()) - } + } // switch(getType()) +} - long long Value::coerceToLong() const { - switch(getType()) { +long long Value::coerceToLong() const { + switch (getType()) { case NumberLong: return _storage.longValue; @@ -393,15 +413,15 @@ namespace mongo { return static_cast<long long>(_storage.doubleValue); default: - uassert(16004, str::stream() << - "can't convert from BSON type " << typeName(getType()) << - " to long", + uassert(16004, + str::stream() << "can't convert from BSON type " << typeName(getType()) + << " to long", false); - } // switch(getType()) - } + } // switch(getType()) +} - double Value::coerceToDouble() const { - switch(getType()) { +double Value::coerceToDouble() const { + switch (getType()) { case NumberDouble: return _storage.doubleValue; @@ -412,15 +432,15 @@ namespace mongo { return static_cast<double>(_storage.longValue); default: - uassert(16005, str::stream() << - "can't convert from BSON type " << typeName(getType()) << - " to double", + uassert(16005, + str::stream() << "can't convert from BSON type " << typeName(getType()) + << " to double", false); - } // switch(getType()) - } + } // switch(getType()) +} - long long Value::coerceToDate() const { - switch(getType()) { +long long Value::coerceToDate() const { + switch (getType()) { case Date: return getDate(); @@ -428,65 +448,66 @@ namespace mongo { return getTimestamp().getSecs() * 1000LL; default: - uassert(16006, str::stream() << - "can't convert from BSON type " << typeName(getType()) << " to Date", + uassert(16006, + str::stream() << "can't convert from BSON type " << typeName(getType()) + << " to Date", false); - } // switch(getType()) - } + } // switch(getType()) +} - time_t Value::coerceToTimeT() const { - long long millis = coerceToDate(); - if (millis < 0) { - // We want the division below to truncate toward -inf rather than 0 - // eg Dec 31, 1969 23:59:58.001 should be -2 seconds rather than -1 - // This is needed to get the correct values from coerceToTM - if ( -1999 / 1000 != -2) { // this is implementation defined - millis -= 1000-1; - } +time_t Value::coerceToTimeT() const { + long long millis = coerceToDate(); + if (millis < 0) { + // We want the division below to truncate toward -inf rather than 0 + // eg Dec 31, 1969 23:59:58.001 should be -2 seconds rather than -1 + // This is needed to get the correct values from coerceToTM + if (-1999 / 1000 != -2) { // this is implementation defined + millis -= 1000 - 1; } - const long long seconds = millis / 1000; + } + const long long seconds = millis / 1000; - uassert(16421, "Can't handle date values outside of time_t range", - seconds >= std::numeric_limits<time_t>::min() && - seconds <= std::numeric_limits<time_t>::max()); + uassert(16421, + "Can't handle date values outside of time_t range", + seconds >= std::numeric_limits<time_t>::min() && + seconds <= std::numeric_limits<time_t>::max()); - return static_cast<time_t>(seconds); - } - tm Value::coerceToTm() const { - // See implementation in Date_t. - // Can't reuse that here because it doesn't support times before 1970 - time_t dtime = coerceToTimeT(); - tm out; - -#if defined(_WIN32) // Both the argument order and the return values differ - bool itWorked = gmtime_s(&out, &dtime) == 0; + return static_cast<time_t>(seconds); +} +tm Value::coerceToTm() const { + // See implementation in Date_t. + // Can't reuse that here because it doesn't support times before 1970 + time_t dtime = coerceToTimeT(); + tm out; + +#if defined(_WIN32) // Both the argument order and the return values differ + bool itWorked = gmtime_s(&out, &dtime) == 0; #else - bool itWorked = gmtime_r(&dtime, &out) != NULL; + bool itWorked = gmtime_r(&dtime, &out) != NULL; #endif - if (!itWorked) { - if (dtime < 0) { - // Windows docs say it doesn't support these, but empirically it seems to work - uasserted(16422, "gmtime failed - your system doesn't support dates before 1970"); - } - else { - uasserted(16423, str::stream() << "gmtime failed to convert time_t of " << dtime); - } + if (!itWorked) { + if (dtime < 0) { + // Windows docs say it doesn't support these, but empirically it seems to work + uasserted(16422, "gmtime failed - your system doesn't support dates before 1970"); + } else { + uasserted(16423, str::stream() << "gmtime failed to convert time_t of " << dtime); } - - return out; } - static string tmToISODateString(const tm& time) { - char buf[128]; - size_t len = strftime(buf, 128, "%Y-%m-%dT%H:%M:%S", &time); - verify(len > 0); - verify(len < 128); - return buf; - } + return out; +} + +static string tmToISODateString(const tm& time) { + char buf[128]; + size_t len = strftime(buf, 128, "%Y-%m-%dT%H:%M:%S", &time); + verify(len > 0); + verify(len < 128); + return buf; +} - string Value::coerceToString() const { - switch(getType()) { +string Value::coerceToString() const { + switch (getType()) { case NumberDouble: return str::stream() << _storage.doubleValue; @@ -513,57 +534,53 @@ namespace mongo { return ""; default: - uassert(16007, str::stream() << - "can't convert from BSON type " << typeName(getType()) << - " to String", + uassert(16007, + str::stream() << "can't convert from BSON type " << typeName(getType()) + << " to String", false); - } // switch(getType()) - } + } // switch(getType()) +} - Timestamp Value::coerceToTimestamp() const { - switch(getType()) { +Timestamp Value::coerceToTimestamp() const { + switch (getType()) { case bsonTimestamp: return getTimestamp(); default: - uassert(16378, str::stream() << - "can't convert from BSON type " << typeName(getType()) << - " to timestamp", + uassert(16378, + str::stream() << "can't convert from BSON type " << typeName(getType()) + << " to timestamp", false); - } // switch(getType()) - } + } // switch(getType()) +} - // Helper function for Value::compare. - // Better than l-r for cases where difference > MAX_INT - template <typename T> - inline static int cmp(const T& left, const T& right) { - if (left < right) { - return -1; - } - else if (left == right) { - return 0; - } - else { - dassert(left > right); - return 1; - } +// Helper function for Value::compare. +// Better than l-r for cases where difference > MAX_INT +template <typename T> +inline static int cmp(const T& left, const T& right) { + if (left < right) { + return -1; + } else if (left == right) { + return 0; + } else { + dassert(left > right); + return 1; } +} - int Value::compare(const Value& rL, const Value& rR) { - // Note, this function needs to behave identically to BSON's compareElementValues(). - // Additionally, any changes here must be replicated in hash_combine(). - BSONType lType = rL.getType(); - BSONType rType = rR.getType(); +int Value::compare(const Value& rL, const Value& rR) { + // Note, this function needs to behave identically to BSON's compareElementValues(). + // Additionally, any changes here must be replicated in hash_combine(). + BSONType lType = rL.getType(); + BSONType rType = rR.getType(); - int ret = lType == rType - ? 0 // fast-path common case - : cmp(canonicalizeBSONType(lType), - canonicalizeBSONType(rType)); + int ret = lType == rType ? 0 // fast-path common case + : cmp(canonicalizeBSONType(lType), canonicalizeBSONType(rType)); - if (ret) - return ret; + if (ret) + return ret; - switch(lType) { + switch (lType) { // Order of types is the same as in compareElementValues() to make it easier to verify // These are valueless types @@ -577,10 +594,10 @@ namespace mongo { case Bool: return rL.getBool() - rR.getBool(); - case bsonTimestamp: // unsigned + case bsonTimestamp: // unsigned return cmp(rL._storage.timestampValue, rR._storage.timestampValue); - case Date: // signed + case Date: // signed return cmp(rL._storage.dateValue, rR._storage.dateValue); // Numbers should compare by equivalence even if different types @@ -588,32 +605,40 @@ namespace mongo { // All types can precisely represent all NumberInts, so it is safe to simply convert to // whatever rhs's type is. switch (rType) { - case NumberInt: return compareInts(rL._storage.intValue, rR._storage.intValue); - case NumberLong: return compareLongs(rL._storage.intValue, rR._storage.longValue); - case NumberDouble: return compareDoubles(rL._storage.intValue, rR._storage.doubleValue); - default: invariant(false); + case NumberInt: + return compareInts(rL._storage.intValue, rR._storage.intValue); + case NumberLong: + return compareLongs(rL._storage.intValue, rR._storage.longValue); + case NumberDouble: + return compareDoubles(rL._storage.intValue, rR._storage.doubleValue); + default: + invariant(false); } } case NumberLong: { switch (rType) { - case NumberLong: return compareLongs(rL._storage.longValue, rR._storage.longValue); - case NumberInt: return compareLongs(rL._storage.longValue, rR._storage.intValue); - case NumberDouble: return compareLongToDouble(rL._storage.longValue, - rR._storage.doubleValue); - default: invariant(false); + case NumberLong: + return compareLongs(rL._storage.longValue, rR._storage.longValue); + case NumberInt: + return compareLongs(rL._storage.longValue, rR._storage.intValue); + case NumberDouble: + return compareLongToDouble(rL._storage.longValue, rR._storage.doubleValue); + default: + invariant(false); } } case NumberDouble: { switch (rType) { - case NumberDouble: return compareDoubles(rL._storage.doubleValue, - rR._storage.doubleValue); - case NumberInt: return compareDoubles(rL._storage.doubleValue, - rR._storage.intValue); - case NumberLong: return compareDoubleToLong(rL._storage.doubleValue, - rR._storage.longValue); - default: invariant(false); + case NumberDouble: + return compareDoubles(rL._storage.doubleValue, rR._storage.doubleValue); + case NumberInt: + return compareDoubles(rL._storage.doubleValue, rR._storage.intValue); + case NumberLong: + return compareDoubleToLong(rL._storage.doubleValue, rR._storage.longValue); + default: + invariant(false); } } @@ -633,14 +658,14 @@ namespace mongo { const vector<Value>& rArr = rR.getArray(); const size_t elems = std::min(lArr.size(), rArr.size()); - for (size_t i = 0; i < elems; i++ ) { + for (size_t i = 0; i < elems; i++) { // compare the two corresponding elements ret = Value::compare(lArr[i], rArr[i]); if (ret) - return ret; // values are unequal + return ret; // values are unequal } - // if we get here we are either equal or one is prefix of the other + // if we get here we are either equal or one is prefix of the other return cmp(lArr.size(), rArr.size()); } @@ -667,7 +692,7 @@ namespace mongo { return rL.getStringData().compare(rR.getStringData()); } - case RegEx: // same as String in this impl but keeping order same as compareElementValues + case RegEx: // same as String in this impl but keeping order same as compareElementValues return rL.getStringData().compare(rR.getStringData()); case CodeWScope: { @@ -680,16 +705,16 @@ namespace mongo { return l->scope.woCompare(r->scope); } - } - verify(false); } + verify(false); +} - void Value::hash_combine(size_t &seed) const { - BSONType type = getType(); +void Value::hash_combine(size_t& seed) const { + BSONType type = getType(); - boost::hash_combine(seed, canonicalizeBSONType(type)); + boost::hash_combine(seed, canonicalizeBSONType(type)); - switch (type) { + switch (type) { // Order of types is the same as in Value::compare() and compareElementValues(). // These are valueless types @@ -720,8 +745,7 @@ namespace mongo { const double dbl = getDouble(); if (std::isnan(dbl)) { boost::hash_combine(seed, numeric_limits<double>::quiet_NaN()); - } - else { + } else { boost::hash_combine(seed, dbl); } break; @@ -745,7 +769,7 @@ namespace mongo { case Array: { const vector<Value>& vec = getArray(); - for (size_t i=0; i < vec.size(); i++) + for (size_t i = 0; i < vec.size(); i++) vec[i].hash_combine(seed); break; } @@ -775,12 +799,12 @@ namespace mongo { boost::hash_combine(seed, BSONObj::Hasher()(cws->scope)); break; } - } } +} - BSONType Value::getWidestNumeric(BSONType lType, BSONType rType) { - if (lType == NumberDouble) { - switch(rType) { +BSONType Value::getWidestNumeric(BSONType lType, BSONType rType) { + if (lType == NumberDouble) { + switch (rType) { case NumberDouble: case NumberLong: case NumberInt: @@ -788,10 +812,9 @@ namespace mongo { default: break; - } } - else if (lType == NumberLong) { - switch(rType) { + } else if (lType == NumberLong) { + switch (rType) { case NumberDouble: return NumberDouble; @@ -801,10 +824,9 @@ namespace mongo { default: break; - } } - else if (lType == NumberInt) { - switch(rType) { + } else if (lType == NumberInt) { + switch (rType) { case NumberDouble: return NumberDouble; @@ -816,38 +838,38 @@ namespace mongo { default: break; - } } - - // Reachable, but callers must subsequently err out in this case. - return Undefined; } - bool Value::integral() const { - switch (getType()) { + // Reachable, but callers must subsequently err out in this case. + return Undefined; +} + +bool Value::integral() const { + switch (getType()) { case NumberInt: return true; case NumberLong: - return (_storage.longValue <= numeric_limits<int>::max() - && _storage.longValue >= numeric_limits<int>::min()); + return (_storage.longValue <= numeric_limits<int>::max() && + _storage.longValue >= numeric_limits<int>::min()); case NumberDouble: - return (_storage.doubleValue <= numeric_limits<int>::max() - && _storage.doubleValue >= numeric_limits<int>::min() - && _storage.doubleValue == static_cast<int>(_storage.doubleValue)); + return (_storage.doubleValue <= numeric_limits<int>::max() && + _storage.doubleValue >= numeric_limits<int>::min() && + _storage.doubleValue == static_cast<int>(_storage.doubleValue)); default: return false; - } } +} - size_t Value::getApproximateSize() const { - switch(getType()) { +size_t Value::getApproximateSize() const { + switch (getType()) { case Code: case RegEx: case Symbol: case BinData: case String: return sizeof(Value) + (_storage.shortStr - ? 0 // string stored inline, so no extra mem usage + ? 0 // string stored inline, so no extra mem usage : sizeof(RCString) + _storage.getString().size()); case Object: @@ -857,15 +879,15 @@ namespace mongo { size_t size = sizeof(Value); size += sizeof(RCVector); const size_t n = getArray().size(); - for(size_t i = 0; i < n; ++i) { + for (size_t i = 0; i < n; ++i) { size += getArray()[i].getApproximateSize(); } return size; } case CodeWScope: - return sizeof(Value) + sizeof(RCCodeWScope) + _storage.getCodeWScope()->code.size() - + _storage.getCodeWScope()->scope.objsize(); + return sizeof(Value) + sizeof(RCCodeWScope) + _storage.getCodeWScope()->code.size() + + _storage.getCodeWScope()->scope.objsize(); case DBRef: return sizeof(Value) + sizeof(RCDBRef) + _storage.getDBRef()->ns.size(); @@ -884,40 +906,57 @@ namespace mongo { case jstNULL: case Undefined: return sizeof(Value); - } - verify(false); } + verify(false); +} - string Value::toString() const { - // TODO use StringBuilder when operator << is ready - stringstream out; - out << *this; - return out.str(); - } +string Value::toString() const { + // TODO use StringBuilder when operator << is ready + stringstream out; + out << *this; + return out.str(); +} - ostream& operator << (ostream& out, const Value& val) { - switch(val.getType()) { - case EOO: return out << "MISSING"; - case MinKey: return out << "MinKey"; - case MaxKey: return out << "MaxKey"; - case jstOID: return out << val.getOid(); - case String: return out << '"' << val.getString() << '"'; - case RegEx: return out << '/' << val.getRegex() << '/' << val.getRegexFlags(); - case Symbol: return out << "Symbol(\"" << val.getSymbol() << "\")"; - case Code: return out << "Code(\"" << val.getCode() << "\")"; - case Bool: return out << (val.getBool() ? "true" : "false"); - case NumberDouble: return out << val.getDouble(); - case NumberLong: return out << val.getLong(); - case NumberInt: return out << val.getInt(); - case jstNULL: return out << "null"; - case Undefined: return out << "undefined"; - case Date: return out << tmToISODateString(val.coerceToTm()); - case bsonTimestamp: return out << val.getTimestamp().toString(); - case Object: return out << val.getDocument().toString(); +ostream& operator<<(ostream& out, const Value& val) { + switch (val.getType()) { + case EOO: + return out << "MISSING"; + case MinKey: + return out << "MinKey"; + case MaxKey: + return out << "MaxKey"; + case jstOID: + return out << val.getOid(); + case String: + return out << '"' << val.getString() << '"'; + case RegEx: + return out << '/' << val.getRegex() << '/' << val.getRegexFlags(); + case Symbol: + return out << "Symbol(\"" << val.getSymbol() << "\")"; + case Code: + return out << "Code(\"" << val.getCode() << "\")"; + case Bool: + return out << (val.getBool() ? "true" : "false"); + case NumberDouble: + return out << val.getDouble(); + case NumberLong: + return out << val.getLong(); + case NumberInt: + return out << val.getInt(); + case jstNULL: + return out << "null"; + case Undefined: + return out << "undefined"; + case Date: + return out << tmToISODateString(val.coerceToTm()); + case bsonTimestamp: + return out << val.getTimestamp().toString(); + case Object: + return out << val.getDocument().toString(); case Array: { out << "["; const size_t n = val.getArray().size(); - for(size_t i = 0; i < n; i++) { + for (size_t i = 0; i < n; i++) { if (i) out << ", "; out << val.getArray()[i]; @@ -928,26 +967,25 @@ namespace mongo { case CodeWScope: return out << "CodeWScope(\"" << val._storage.getCodeWScope()->code << "\", " - << val._storage.getCodeWScope()->scope << ')'; + << val._storage.getCodeWScope()->scope << ')'; - case BinData: + case BinData: return out << "BinData(" << val._storage.binDataType() << ", \"" - << toHex(val._storage.getString().rawData() - ,val._storage.getString().size()) - << "\")"; + << toHex(val._storage.getString().rawData(), val._storage.getString().size()) + << "\")"; case DBRef: return out << "DBRef(\"" << val._storage.getDBRef()->ns << "\", " - << val._storage.getDBRef()->oid << ')'; - } - - // Not in default case to trigger better warning if a case is missing - verify(false); + << val._storage.getDBRef()->oid << ')'; } - void Value::serializeForSorter(BufBuilder& buf) const { - buf.appendChar(getType()); - switch(getType()) { + // Not in default case to trigger better warning if a case is missing + verify(false); +} + +void Value::serializeForSorter(BufBuilder& buf) const { + buf.appendChar(getType()); + switch (getType()) { // type-only types case EOO: case MinKey: @@ -957,13 +995,27 @@ namespace mongo { break; // simple types - case jstOID: buf.appendStruct(_storage.oid); break; - case NumberInt: buf.appendNum(_storage.intValue); break; - case NumberLong: buf.appendNum(_storage.longValue); break; - case NumberDouble: buf.appendNum(_storage.doubleValue); break; - case Bool: buf.appendChar(_storage.boolValue); break; - case Date: buf.appendNum(_storage.dateValue); break; - case bsonTimestamp: buf.appendStruct(getTimestamp()); break; + case jstOID: + buf.appendStruct(_storage.oid); + break; + case NumberInt: + buf.appendNum(_storage.intValue); + break; + case NumberLong: + buf.appendNum(_storage.longValue); + break; + case NumberDouble: + buf.appendNum(_storage.doubleValue); + break; + case Bool: + buf.appendChar(_storage.boolValue); + break; + case Date: + buf.appendNum(_storage.dateValue); + break; + case bsonTimestamp: + buf.appendStruct(getTimestamp()); + break; // types that are like strings case String: @@ -1003,7 +1055,7 @@ namespace mongo { buf.appendStr(cws->code, /*NUL byte*/ false); cws->scope.serializeForSorter(buf); break; - } + } case Array: { const vector<Value>& array = getArray(); @@ -1013,12 +1065,12 @@ namespace mongo { array[i].serializeForSorter(buf); break; } - } } +} - Value Value::deserializeForSorter(BufReader& buf, const SorterDeserializeSettings& settings) { - const BSONType type = BSONType(buf.read<signed char>()); // need sign extension for MinKey - switch(type) { +Value Value::deserializeForSorter(BufReader& buf, const SorterDeserializeSettings& settings) { + const BSONType type = BSONType(buf.read<signed char>()); // need sign extension for MinKey + switch (type) { // type-only types case EOO: case MinKey: @@ -1028,13 +1080,20 @@ namespace mongo { return Value(ValueStorage(type)); // simple types - case jstOID: return Value(OID::from(buf.skip(OID::kOIDSize))); - case NumberInt: return Value(buf.read<int>()); - case NumberLong: return Value(buf.read<long long>()); - case NumberDouble: return Value(buf.read<double>()); - case Bool: return Value(bool(buf.read<char>())); - case Date: return Value(Date_t::fromMillisSinceEpoch(buf.read<long long>())); - case bsonTimestamp: return Value(buf.read<Timestamp>()); + case jstOID: + return Value(OID::from(buf.skip(OID::kOIDSize))); + case NumberInt: + return Value(buf.read<int>()); + case NumberLong: + return Value(buf.read<long long>()); + case NumberDouble: + return Value(buf.read<double>()); + case Bool: + return Value(bool(buf.read<char>())); + case Date: + return Value(Date_t::fromMillisSinceEpoch(buf.read<long long>())); + case bsonTimestamp: + return Value(buf.read<Timestamp>()); // types that are like strings case String: @@ -1059,8 +1118,8 @@ namespace mongo { } case Object: - return Value(Document::deserializeForSorter(buf, - Document::SorterDeserializeSettings())); + return Value( + Document::deserializeForSorter(buf, Document::SorterDeserializeSettings())); case DBRef: { OID oid = OID::from(buf.skip(OID::kOIDSize)); @@ -1073,7 +1132,7 @@ namespace mongo { const char* str = static_cast<const char*>(buf.skip(size)); BSONObj bson = BSONObj::deserializeForSorter(buf, BSONObj::SorterDeserializeSettings()); return Value(BSONCodeWScope(StringData(str, size), bson)); - } + } case Array: { const int numElems = buf.read<int>(); @@ -1083,7 +1142,7 @@ namespace mongo { array.push_back(deserializeForSorter(buf, settings)); return Value(std::move(array)); } - } - verify(false); } + verify(false); +} } diff --git a/src/mongo/db/pipeline/value.h b/src/mongo/db/pipeline/value.h index 41201f64873..0852407a84a 100644 --- a/src/mongo/db/pipeline/value.h +++ b/src/mongo/db/pipeline/value.h @@ -33,328 +33,338 @@ #include "mongo/platform/unordered_set.h" namespace mongo { - class BSONElement; +class BSONElement; - /** A variant type that can hold any type of data representable in BSON +/** A variant type that can hold any type of data representable in BSON + * + * Small values are stored inline, but some values, such as large strings, + * are heap allocated. It has smart pointer capabilities built-in so it is + * safe and recommended to pass these around and return them by value. + * + * Values are immutable, but can be assigned. This means that once you have + * a Value, you can be assured that none of the data in that Value will + * change. However if you have a non-const Value you replace it with + * operator=. These rules are the same as BSONObj, and similar to + * shared_ptr<const Object> with stronger guarantees of constness. This is + * also the same as Java's std::string type. + * + * Thread-safety: A single Value instance can be safely shared between + * threads as long as there are no writers while other threads are + * accessing the object. Any number of threads can read from a Value + * concurrently. There are no restrictions on how threads access Value + * instances exclusively owned by them, even if they reference the same + * storage as Value in other threads. + */ +class Value { +public: + /** Construct a Value * - * Small values are stored inline, but some values, such as large strings, - * are heap allocated. It has smart pointer capabilities built-in so it is - * safe and recommended to pass these around and return them by value. + * All types not listed will be rejected rather than converted (see private for why) * - * Values are immutable, but can be assigned. This means that once you have - * a Value, you can be assured that none of the data in that Value will - * change. However if you have a non-const Value you replace it with - * operator=. These rules are the same as BSONObj, and similar to - * shared_ptr<const Object> with stronger guarantees of constness. This is - * also the same as Java's std::string type. + * Note: Currently these are all explicit conversions. + * I'm not sure if we want implicit or not. + * //TODO decide + */ + + Value() : _storage() {} // "Missing" value + explicit Value(bool value) : _storage(Bool, value) {} + explicit Value(int value) : _storage(NumberInt, value) {} + explicit Value(long long value) : _storage(NumberLong, value) {} + explicit Value(double value) : _storage(NumberDouble, value) {} + explicit Value(const Timestamp& value) : _storage(bsonTimestamp, value) {} + explicit Value(const OID& value) : _storage(jstOID, value) {} + explicit Value(StringData value) : _storage(String, value) {} + explicit Value(const std::string& value) : _storage(String, StringData(value)) {} + explicit Value(const char* value) : _storage(String, StringData(value)) {} + explicit Value(const Document& doc) : _storage(Object, doc) {} + explicit Value(const BSONObj& obj); + explicit Value(const BSONArray& arr); + explicit Value(std::vector<Value> vec) : _storage(Array, new RCVector(std::move(vec))) {} + explicit Value(const BSONBinData& bd) : _storage(BinData, bd) {} + explicit Value(const BSONRegEx& re) : _storage(RegEx, re) {} + explicit Value(const BSONCodeWScope& cws) : _storage(CodeWScope, cws) {} + explicit Value(const BSONDBRef& dbref) : _storage(DBRef, dbref) {} + explicit Value(const BSONSymbol& sym) : _storage(Symbol, sym.symbol) {} + explicit Value(const BSONCode& code) : _storage(Code, code.code) {} + explicit Value(const NullLabeler&) : _storage(jstNULL) {} // BSONNull + explicit Value(const UndefinedLabeler&) : _storage(Undefined) {} // BSONUndefined + explicit Value(const MinKeyLabeler&) : _storage(MinKey) {} // MINKEY + explicit Value(const MaxKeyLabeler&) : _storage(MaxKey) {} // MAXKEY + explicit Value(const Date_t& date) : _storage(Date, date.toMillisSinceEpoch()) {} + + // TODO: add an unsafe version that can share storage with the BSONElement + /// Deep-convert from BSONElement to Value + explicit Value(const BSONElement& elem); + + /** Construct a long or integer-valued Value. * - * Thread-safety: A single Value instance can be safely shared between - * threads as long as there are no writers while other threads are - * accessing the object. Any number of threads can read from a Value - * concurrently. There are no restrictions on how threads access Value - * instances exclusively owned by them, even if they reference the same - * storage as Value in other threads. + * Used when preforming arithmetic operations with int where the + * result may be too large and need to be stored as long. The Value + * will be an int if value fits, otherwise it will be a long. + */ + static Value createIntOrLong(long long value); + + /** A "missing" value indicates the lack of a Value. + * This is similar to undefined/null but should not appear in output to BSON. + * Missing Values are returned by Document when accessing non-existent fields. */ - class Value { - public: - /** Construct a Value - * - * All types not listed will be rejected rather than converted (see private for why) - * - * Note: Currently these are all explicit conversions. - * I'm not sure if we want implicit or not. - * //TODO decide - */ - - Value(): _storage() {} // "Missing" value - explicit Value(bool value) : _storage(Bool, value) {} - explicit Value(int value) : _storage(NumberInt, value) {} - explicit Value(long long value) : _storage(NumberLong, value) {} - explicit Value(double value) : _storage(NumberDouble, value) {} - explicit Value(const Timestamp& value) : _storage(bsonTimestamp, value) {} - explicit Value(const OID& value) : _storage(jstOID, value) {} - explicit Value(StringData value) : _storage(String, value) {} - explicit Value(const std::string& value) : _storage(String, StringData(value)) {} - explicit Value(const char* value) : _storage(String, StringData(value)) {} - explicit Value(const Document& doc) : _storage(Object, doc) {} - explicit Value(const BSONObj& obj); - explicit Value(const BSONArray& arr); - explicit Value(std::vector<Value> vec) : _storage(Array, new RCVector(std::move(vec))) {} - explicit Value(const BSONBinData& bd) : _storage(BinData, bd) {} - explicit Value(const BSONRegEx& re) : _storage(RegEx, re) {} - explicit Value(const BSONCodeWScope& cws) : _storage(CodeWScope, cws) {} - explicit Value(const BSONDBRef& dbref) : _storage(DBRef, dbref) {} - explicit Value(const BSONSymbol& sym) : _storage(Symbol, sym.symbol) {} - explicit Value(const BSONCode& code) : _storage(Code, code.code) {} - explicit Value(const NullLabeler&) : _storage(jstNULL) {} // BSONNull - explicit Value(const UndefinedLabeler&) : _storage(Undefined) {} // BSONUndefined - explicit Value(const MinKeyLabeler&) : _storage(MinKey) {} // MINKEY - explicit Value(const MaxKeyLabeler&) : _storage(MaxKey) {} // MAXKEY - explicit Value(const Date_t& date) : _storage(Date, date.toMillisSinceEpoch()) {} - - // TODO: add an unsafe version that can share storage with the BSONElement - /// Deep-convert from BSONElement to Value - explicit Value(const BSONElement& elem); - - /** Construct a long or integer-valued Value. - * - * Used when preforming arithmetic operations with int where the - * result may be too large and need to be stored as long. The Value - * will be an int if value fits, otherwise it will be a long. - */ - static Value createIntOrLong(long long value); - - /** A "missing" value indicates the lack of a Value. - * This is similar to undefined/null but should not appear in output to BSON. - * Missing Values are returned by Document when accessing non-existent fields. - */ - bool missing() const { return _storage.type == EOO; } - - /// true if missing() or type is jstNULL or Undefined - bool nullish() const { - return missing() - || _storage.type == jstNULL - || _storage.type == Undefined; - } + bool missing() const { + return _storage.type == EOO; + } - /// true if type represents a number - bool numeric() const { - return _storage.type == NumberDouble - || _storage.type == NumberLong - || _storage.type == NumberInt; - } + /// true if missing() or type is jstNULL or Undefined + bool nullish() const { + return missing() || _storage.type == jstNULL || _storage.type == Undefined; + } - /** - * Returns true if this value is a numeric type that can be represented as a 32-bit integer, - * and false otherwise. - */ - bool integral() const; - - /// Get the BSON type of the field. - BSONType getType() const { return _storage.bsonType(); } - - /** Exact type getters. - * Asserts if the requested value type is not exactly correct. - * See coerceTo methods below for a more type-flexible alternative. - */ - double getDouble() const; - std::string getString() const; - Document getDocument() const; - OID getOid() const; - bool getBool() const; - long long getDate() const; // in milliseconds - Timestamp getTimestamp() const; - const char* getRegex() const; - const char* getRegexFlags() const; - std::string getSymbol() const; - std::string getCode() const; - int getInt() const; - long long getLong() const; - const std::vector<Value>& getArray() const { return _storage.getArray(); } - size_t getArrayLength() const; - - /// Access an element of a subarray. Returns Value() if missing or getType() != Array - Value operator[] (size_t index) const; - - /// Access a field of a subdocument. Returns Value() if missing or getType() != Object - Value operator[] (StringData name) const; - - /// Add this value to the BSON object under construction. - void addToBsonObj(BSONObjBuilder* pBuilder, StringData fieldName) const; - - /// Add this field to the BSON array under construction. - void addToBsonArray(BSONArrayBuilder* pBuilder) const; - - // Support BSONObjBuilder and BSONArrayBuilder "stream" API - friend BSONObjBuilder& operator << (BSONObjBuilderValueStream& builder, const Value& val); - - /** Coerce a value to a bool using BSONElement::trueValue() rules. - */ - bool coerceToBool() const; - - /** Coercion operators to extract values with fuzzy type logic. - * - * These currently assert if called on an unconvertible type. - * TODO: decided how to handle unsupported types. - */ - std::string coerceToString() const; - int coerceToInt() const; - long long coerceToLong() const; - double coerceToDouble() const; - Timestamp coerceToTimestamp() const; - long long coerceToDate() const; - time_t coerceToTimeT() const; - tm coerceToTm() const; // broken-out time struct (see man gmtime) - - - /** Compare two Values. - * @returns an integer less than zero, zero, or an integer greater than - * zero, depending on whether lhs < rhs, lhs == rhs, or lhs > rhs - * Warning: may return values other than -1, 0, or 1 - */ - static int compare(const Value& lhs, const Value& rhs); - - friend - bool operator==(const Value& v1, const Value& v2) { - if (v1._storage.identical(v2._storage)) { - // Simple case - return true; - } - return (Value::compare(v1, v2) == 0); - } - - friend bool operator!=(const Value& v1, const Value& v2) { - return !(v1 == v2); - } + /// true if type represents a number + bool numeric() const { + return _storage.type == NumberDouble || _storage.type == NumberLong || + _storage.type == NumberInt; + } - friend bool operator<(const Value& lhs, const Value& rhs) { - return (Value::compare(lhs, rhs) < 0); - } + /** + * Returns true if this value is a numeric type that can be represented as a 32-bit integer, + * and false otherwise. + */ + bool integral() const; - /// This is for debugging, logging, etc. See getString() for how to extract a string. - std::string toString() const; - friend std::ostream& operator << (std::ostream& out, const Value& v); + /// Get the BSON type of the field. + BSONType getType() const { + return _storage.bsonType(); + } - void swap(Value& rhs) { - _storage.swap(rhs._storage); - } + /** Exact type getters. + * Asserts if the requested value type is not exactly correct. + * See coerceTo methods below for a more type-flexible alternative. + */ + double getDouble() const; + std::string getString() const; + Document getDocument() const; + OID getOid() const; + bool getBool() const; + long long getDate() const; // in milliseconds + Timestamp getTimestamp() const; + const char* getRegex() const; + const char* getRegexFlags() const; + std::string getSymbol() const; + std::string getCode() const; + int getInt() const; + long long getLong() const; + const std::vector<Value>& getArray() const { + return _storage.getArray(); + } + size_t getArrayLength() const; - /** Figure out what the widest of two numeric types is. - * - * Widest can be thought of as "most capable," or "able to hold the - * largest or most precise value." The progression is Int, Long, Double. - */ - static BSONType getWidestNumeric(BSONType lType, BSONType rType); - - /// Get the approximate memory size of the value, in bytes. Includes sizeof(Value) - size_t getApproximateSize() const; - - /** Calculate a hash value. - * - * Meant to be used to create composite hashes suitable for - * hashed container classes such as unordered_map<>. - */ - void hash_combine(size_t& seed) const; - - /// struct Hash is defined to enable the use of Values as keys in unordered_map. - struct Hash : std::unary_function<const Value&, size_t> { - size_t operator()(const Value& rV) const; - }; - - /// Call this after memcpying to update ref counts if needed - void memcpyed() const { _storage.memcpyed(); } - - /// members for Sorter - struct SorterDeserializeSettings {}; // unused - void serializeForSorter(BufBuilder& buf) const; - static Value deserializeForSorter(BufReader& buf, const SorterDeserializeSettings&); - int memUsageForSorter() const { return getApproximateSize(); } - Value getOwned() const { return *this; } - - private: - /** This is a "honeypot" to prevent unexpected implicit conversions to the accepted argument - * types. bool is especially bad since without this it will accept any pointer. - * - * Template argument name was chosen to make produced error easier to read. - */ - template <typename InvalidArgumentType> - explicit Value(const InvalidArgumentType& invalidArgument); - - explicit Value(const ValueStorage& storage) :_storage(storage) {} - - // does no type checking - StringData getStringData() const; // May contain embedded NUL bytes - - ValueStorage _storage; - friend class MutableValue; // gets and sets _storage.genericRCPtr - }; - BOOST_STATIC_ASSERT(sizeof(Value) == 16); + /// Access an element of a subarray. Returns Value() if missing or getType() != Array + Value operator[](size_t index) const; - typedef unordered_set<Value, Value::Hash> ValueSet; -} + /// Access a field of a subdocument. Returns Value() if missing or getType() != Object + Value operator[](StringData name) const; -namespace std { - // This is used by std::sort and others - template <> - inline void swap(mongo::Value& lhs, mongo::Value& rhs) { lhs.swap(rhs); } -} + /// Add this value to the BSON object under construction. + void addToBsonObj(BSONObjBuilder* pBuilder, StringData fieldName) const; -/* ======================= INLINED IMPLEMENTATIONS ========================== */ + /// Add this field to the BSON array under construction. + void addToBsonArray(BSONArrayBuilder* pBuilder) const; -namespace mongo { + // Support BSONObjBuilder and BSONArrayBuilder "stream" API + friend BSONObjBuilder& operator<<(BSONObjBuilderValueStream& builder, const Value& val); - inline size_t Value::getArrayLength() const { - verify(getType() == Array); - return getArray().size(); - } + /** Coerce a value to a bool using BSONElement::trueValue() rules. + */ + bool coerceToBool() const; - inline size_t Value::Hash::operator()(const Value& v) const { - size_t seed = 0xf0afbeef; - v.hash_combine(seed); - return seed; - } + /** Coercion operators to extract values with fuzzy type logic. + * + * These currently assert if called on an unconvertible type. + * TODO: decided how to handle unsupported types. + */ + std::string coerceToString() const; + int coerceToInt() const; + long long coerceToLong() const; + double coerceToDouble() const; + Timestamp coerceToTimestamp() const; + long long coerceToDate() const; + time_t coerceToTimeT() const; + tm coerceToTm() const; // broken-out time struct (see man gmtime) + + + /** Compare two Values. + * @returns an integer less than zero, zero, or an integer greater than + * zero, depending on whether lhs < rhs, lhs == rhs, or lhs > rhs + * Warning: may return values other than -1, 0, or 1 + */ + static int compare(const Value& lhs, const Value& rhs); - inline StringData Value::getStringData() const { - return _storage.getString(); + friend bool operator==(const Value& v1, const Value& v2) { + if (v1._storage.identical(v2._storage)) { + // Simple case + return true; + } + return (Value::compare(v1, v2) == 0); } - inline std::string Value::getString() const { - verify(getType() == String); - return _storage.getString().toString(); + friend bool operator!=(const Value& v1, const Value& v2) { + return !(v1 == v2); } - inline OID Value::getOid() const { - verify(getType() == jstOID); - return OID(_storage.oid); + friend bool operator<(const Value& lhs, const Value& rhs) { + return (Value::compare(lhs, rhs) < 0); } - inline bool Value::getBool() const { - verify(getType() == Bool); - return _storage.boolValue; - } + /// This is for debugging, logging, etc. See getString() for how to extract a string. + std::string toString() const; + friend std::ostream& operator<<(std::ostream& out, const Value& v); - inline long long Value::getDate() const { - verify(getType() == Date); - return _storage.dateValue; + void swap(Value& rhs) { + _storage.swap(rhs._storage); } - inline Timestamp Value::getTimestamp() const { - verify(getType() == bsonTimestamp); - return Timestamp(_storage.timestampValue); - } + /** Figure out what the widest of two numeric types is. + * + * Widest can be thought of as "most capable," or "able to hold the + * largest or most precise value." The progression is Int, Long, Double. + */ + static BSONType getWidestNumeric(BSONType lType, BSONType rType); - inline const char* Value::getRegex() const { - verify(getType() == RegEx); - return _storage.getString().rawData(); // this is known to be NUL terminated - } - inline const char* Value::getRegexFlags() const { - verify(getType() == RegEx); - const char* pattern = _storage.getString().rawData(); // this is known to be NUL terminated - const char* flags = pattern + strlen(pattern) + 1; // first byte after pattern's NUL - dassert(flags + strlen(flags) == pattern + _storage.getString().size()); - return flags; + /// Get the approximate memory size of the value, in bytes. Includes sizeof(Value) + size_t getApproximateSize() const; + + /** Calculate a hash value. + * + * Meant to be used to create composite hashes suitable for + * hashed container classes such as unordered_map<>. + */ + void hash_combine(size_t& seed) const; + + /// struct Hash is defined to enable the use of Values as keys in unordered_map. + struct Hash : std::unary_function<const Value&, size_t> { + size_t operator()(const Value& rV) const; + }; + + /// Call this after memcpying to update ref counts if needed + void memcpyed() const { + _storage.memcpyed(); } - inline std::string Value::getSymbol() const { - verify(getType() == Symbol); - return _storage.getString().toString(); + /// members for Sorter + struct SorterDeserializeSettings {}; // unused + void serializeForSorter(BufBuilder& buf) const; + static Value deserializeForSorter(BufReader& buf, const SorterDeserializeSettings&); + int memUsageForSorter() const { + return getApproximateSize(); } - inline std::string Value::getCode() const { - verify(getType() == Code); - return _storage.getString().toString(); + Value getOwned() const { + return *this; } - inline int Value::getInt() const { - verify(getType() == NumberInt); - return _storage.intValue; - } +private: + /** This is a "honeypot" to prevent unexpected implicit conversions to the accepted argument + * types. bool is especially bad since without this it will accept any pointer. + * + * Template argument name was chosen to make produced error easier to read. + */ + template <typename InvalidArgumentType> + explicit Value(const InvalidArgumentType& invalidArgument); - inline long long Value::getLong() const { - BSONType type = getType(); - if (type == NumberInt) - return _storage.intValue; + explicit Value(const ValueStorage& storage) : _storage(storage) {} - verify(type == NumberLong); - return _storage.longValue; - } + // does no type checking + StringData getStringData() const; // May contain embedded NUL bytes + + ValueStorage _storage; + friend class MutableValue; // gets and sets _storage.genericRCPtr +}; +BOOST_STATIC_ASSERT(sizeof(Value) == 16); + +typedef unordered_set<Value, Value::Hash> ValueSet; +} + +namespace std { +// This is used by std::sort and others +template <> +inline void swap(mongo::Value& lhs, mongo::Value& rhs) { + lhs.swap(rhs); +} +} + +/* ======================= INLINED IMPLEMENTATIONS ========================== */ + +namespace mongo { + +inline size_t Value::getArrayLength() const { + verify(getType() == Array); + return getArray().size(); +} + +inline size_t Value::Hash::operator()(const Value& v) const { + size_t seed = 0xf0afbeef; + v.hash_combine(seed); + return seed; +} + +inline StringData Value::getStringData() const { + return _storage.getString(); +} + +inline std::string Value::getString() const { + verify(getType() == String); + return _storage.getString().toString(); +} + +inline OID Value::getOid() const { + verify(getType() == jstOID); + return OID(_storage.oid); +} + +inline bool Value::getBool() const { + verify(getType() == Bool); + return _storage.boolValue; +} + +inline long long Value::getDate() const { + verify(getType() == Date); + return _storage.dateValue; +} + +inline Timestamp Value::getTimestamp() const { + verify(getType() == bsonTimestamp); + return Timestamp(_storage.timestampValue); +} + +inline const char* Value::getRegex() const { + verify(getType() == RegEx); + return _storage.getString().rawData(); // this is known to be NUL terminated +} +inline const char* Value::getRegexFlags() const { + verify(getType() == RegEx); + const char* pattern = _storage.getString().rawData(); // this is known to be NUL terminated + const char* flags = pattern + strlen(pattern) + 1; // first byte after pattern's NUL + dassert(flags + strlen(flags) == pattern + _storage.getString().size()); + return flags; +} + +inline std::string Value::getSymbol() const { + verify(getType() == Symbol); + return _storage.getString().toString(); +} +inline std::string Value::getCode() const { + verify(getType() == Code); + return _storage.getString().toString(); +} + +inline int Value::getInt() const { + verify(getType() == NumberInt); + return _storage.intValue; +} + +inline long long Value::getLong() const { + BSONType type = getType(); + if (type == NumberInt) + return _storage.intValue; + + verify(type == NumberLong); + return _storage.longValue; +} }; diff --git a/src/mongo/db/pipeline/value_internal.h b/src/mongo/db/pipeline/value_internal.h index 6cb31c8f635..b5b9f5c77ea 100644 --- a/src/mongo/db/pipeline/value_internal.h +++ b/src/mongo/db/pipeline/value_internal.h @@ -41,226 +41,275 @@ namespace mongo { - class Document; - class DocumentStorage; - class Value; - - //TODO: a MutableVector, similar to MutableDocument - /// A heap-allocated reference-counted std::vector - class RCVector : public RefCountable { - public: - RCVector() {} - RCVector(std::vector<Value> v) :vec(std::move(v)) {} - std::vector<Value> vec; - }; - - class RCCodeWScope : public RefCountable { - public: - RCCodeWScope(const std::string& str, BSONObj obj) :code(str), scope(obj.getOwned()) {} - const std::string code; - const BSONObj scope; // Not worth converting to Document for now - }; - - class RCDBRef : public RefCountable { - public: - RCDBRef(const std::string& str, const OID& o) :ns(str), oid(o) {} - const std::string ns; - const OID oid; - }; +class Document; +class DocumentStorage; +class Value; + +// TODO: a MutableVector, similar to MutableDocument +/// A heap-allocated reference-counted std::vector +class RCVector : public RefCountable { +public: + RCVector() {} + RCVector(std::vector<Value> v) : vec(std::move(v)) {} + std::vector<Value> vec; +}; + +class RCCodeWScope : public RefCountable { +public: + RCCodeWScope(const std::string& str, BSONObj obj) : code(str), scope(obj.getOwned()) {} + const std::string code; + const BSONObj scope; // Not worth converting to Document for now +}; + +class RCDBRef : public RefCountable { +public: + RCDBRef(const std::string& str, const OID& o) : ns(str), oid(o) {} + const std::string ns; + const OID oid; +}; #pragma pack(1) - class ValueStorage { - public: - // Note: it is important the memory is zeroed out (by calling zero()) at the start of every - // constructor. Much code relies on every byte being predictably initialized to zero. - - // This is a "missing" Value - ValueStorage() { zero(); type = EOO; } - - explicit ValueStorage(BSONType t) { zero(); type = t; } - ValueStorage(BSONType t, int i) { zero(); type = t; intValue = i; } - ValueStorage(BSONType t, long long l) { zero(); type = t; longValue = l; } - ValueStorage(BSONType t, double d) { zero(); type = t; doubleValue = d; } - ValueStorage(BSONType t, Timestamp r) { zero(); type = t; timestampValue = r.asULL(); } - ValueStorage(BSONType t, bool b) { zero(); type = t; boolValue = b; } - ValueStorage(BSONType t, const Document& d) { zero(); type = t; putDocument(d); } - ValueStorage(BSONType t, const RCVector* a) { zero(); type = t; putVector(a); } - ValueStorage(BSONType t, StringData s) { zero(); type = t; putString(s); } - ValueStorage(BSONType t, const BSONBinData& bd) { zero(); type = t; putBinData(bd); } - ValueStorage(BSONType t, const BSONRegEx& re) { zero(); type = t; putRegEx(re); } - ValueStorage(BSONType t, const BSONCodeWScope& cs) { zero(); type = t; putCodeWScope(cs); } - ValueStorage(BSONType t, const BSONDBRef& dbref) { zero(); type = t; putDBRef(dbref); } - - ValueStorage(BSONType t, const OID& o) { - zero(); - type = t; - memcpy(&oid, o.view().view(), OID::kOIDSize); - } - - ValueStorage(const ValueStorage& rhs) { - memcpy(this, &rhs, sizeof(*this)); - memcpyed(); - } - - ~ValueStorage() { - DEV verifyRefCountingIfShould(); - if (refCounter) - intrusive_ptr_release(genericRCPtr); - DEV memset(this, 0xee, sizeof(*this)); - } - - ValueStorage& operator= (ValueStorage rhsCopy) { - this->swap(rhsCopy); - return *this; - } - - void swap(ValueStorage& rhs) { - // Don't need to update ref-counts because they will be the same in the end - char temp[sizeof(ValueStorage)]; - memcpy(temp, this, sizeof(*this)); - memcpy(this, &rhs, sizeof(*this)); - memcpy(&rhs, temp, sizeof(*this)); +class ValueStorage { +public: + // Note: it is important the memory is zeroed out (by calling zero()) at the start of every + // constructor. Much code relies on every byte being predictably initialized to zero. + + // This is a "missing" Value + ValueStorage() { + zero(); + type = EOO; + } + + explicit ValueStorage(BSONType t) { + zero(); + type = t; + } + ValueStorage(BSONType t, int i) { + zero(); + type = t; + intValue = i; + } + ValueStorage(BSONType t, long long l) { + zero(); + type = t; + longValue = l; + } + ValueStorage(BSONType t, double d) { + zero(); + type = t; + doubleValue = d; + } + ValueStorage(BSONType t, Timestamp r) { + zero(); + type = t; + timestampValue = r.asULL(); + } + ValueStorage(BSONType t, bool b) { + zero(); + type = t; + boolValue = b; + } + ValueStorage(BSONType t, const Document& d) { + zero(); + type = t; + putDocument(d); + } + ValueStorage(BSONType t, const RCVector* a) { + zero(); + type = t; + putVector(a); + } + ValueStorage(BSONType t, StringData s) { + zero(); + type = t; + putString(s); + } + ValueStorage(BSONType t, const BSONBinData& bd) { + zero(); + type = t; + putBinData(bd); + } + ValueStorage(BSONType t, const BSONRegEx& re) { + zero(); + type = t; + putRegEx(re); + } + ValueStorage(BSONType t, const BSONCodeWScope& cs) { + zero(); + type = t; + putCodeWScope(cs); + } + ValueStorage(BSONType t, const BSONDBRef& dbref) { + zero(); + type = t; + putDBRef(dbref); + } + + ValueStorage(BSONType t, const OID& o) { + zero(); + type = t; + memcpy(&oid, o.view().view(), OID::kOIDSize); + } + + ValueStorage(const ValueStorage& rhs) { + memcpy(this, &rhs, sizeof(*this)); + memcpyed(); + } + + ~ValueStorage() { + DEV verifyRefCountingIfShould(); + if (refCounter) + intrusive_ptr_release(genericRCPtr); + DEV memset(this, 0xee, sizeof(*this)); + } + + ValueStorage& operator=(ValueStorage rhsCopy) { + this->swap(rhsCopy); + return *this; + } + + void swap(ValueStorage& rhs) { + // Don't need to update ref-counts because they will be the same in the end + char temp[sizeof(ValueStorage)]; + memcpy(temp, this, sizeof(*this)); + memcpy(this, &rhs, sizeof(*this)); + memcpy(&rhs, temp, sizeof(*this)); + } + + /// Call this after memcpying to update ref counts if needed + void memcpyed() const { + DEV verifyRefCountingIfShould(); + if (refCounter) + intrusive_ptr_add_ref(genericRCPtr); + } + + /// These are only to be called during Value construction on an empty Value + void putString(StringData s); + void putVector(const RCVector* v); + void putDocument(const Document& d); + void putRegEx(const BSONRegEx& re); + void putBinData(const BSONBinData& bd) { + putRefCountable(RCString::create(StringData(static_cast<const char*>(bd.data), bd.length))); + binSubType = bd.type; + } + + void putDBRef(const BSONDBRef& dbref) { + putRefCountable(new RCDBRef(dbref.ns.toString(), dbref.oid)); + } + + void putCodeWScope(const BSONCodeWScope& cws) { + putRefCountable(new RCCodeWScope(cws.code.toString(), cws.scope)); + } + + void putRefCountable(boost::intrusive_ptr<const RefCountable> ptr) { + genericRCPtr = ptr.get(); + + if (genericRCPtr) { + intrusive_ptr_add_ref(genericRCPtr); + refCounter = true; } - - /// Call this after memcpying to update ref counts if needed - void memcpyed() const { - DEV verifyRefCountingIfShould(); - if (refCounter) - intrusive_ptr_add_ref(genericRCPtr); - } - - /// These are only to be called during Value construction on an empty Value - void putString(StringData s); - void putVector(const RCVector* v); - void putDocument(const Document& d); - void putRegEx(const BSONRegEx& re); - void putBinData(const BSONBinData& bd) { - putRefCountable( - RCString::create( - StringData(static_cast<const char*>(bd.data), bd.length))); - binSubType = bd.type; - } - - void putDBRef(const BSONDBRef& dbref) { - putRefCountable(new RCDBRef(dbref.ns.toString(), dbref.oid)); - } - - void putCodeWScope(const BSONCodeWScope& cws) { - putRefCountable(new RCCodeWScope(cws.code.toString(), cws.scope)); + DEV verifyRefCountingIfShould(); + } + + StringData getString() const { + if (shortStr) { + return StringData(shortStrStorage, shortStrSize); + } else { + dassert(typeid(*genericRCPtr) == typeid(const RCString)); + const RCString* stringPtr = static_cast<const RCString*>(genericRCPtr); + return StringData(stringPtr->c_str(), stringPtr->size()); } - - void putRefCountable(boost::intrusive_ptr<const RefCountable> ptr) { - genericRCPtr = ptr.get(); - - if (genericRCPtr) { - intrusive_ptr_add_ref(genericRCPtr); - refCounter = true; - } - DEV verifyRefCountingIfShould(); - } - - StringData getString() const { - if (shortStr) { - return StringData(shortStrStorage, shortStrSize); - } - else { - dassert(typeid(*genericRCPtr) == typeid(const RCString)); - const RCString* stringPtr = static_cast<const RCString*>(genericRCPtr); - return StringData(stringPtr->c_str(), stringPtr->size()); - } - } - - const std::vector<Value>& getArray() const { - dassert(typeid(*genericRCPtr) == typeid(const RCVector)); - const RCVector* arrayPtr = static_cast<const RCVector*>(genericRCPtr); - return arrayPtr->vec; - } - - boost::intrusive_ptr<const RCCodeWScope> getCodeWScope() const { - dassert(typeid(*genericRCPtr) == typeid(const RCCodeWScope)); - return static_cast<const RCCodeWScope*>(genericRCPtr); - } - - boost::intrusive_ptr<const RCDBRef> getDBRef() const { - dassert(typeid(*genericRCPtr) == typeid(const RCDBRef)); - return static_cast<const RCDBRef*>(genericRCPtr); - } - - // Document is incomplete here so this can't be inline - Document getDocument() const; - - BSONType bsonType() const { - return BSONType(type); - } - - BinDataType binDataType() const { - dassert(type == BinData); - return BinDataType(binSubType); - } - - void zero() { - memset(this, 0, sizeof(*this)); - } - - // Byte-for-byte identical - bool identical(const ValueStorage& other) const { - return (i64[0] == other.i64[0] - && i64[1] == other.i64[1]); - } - - void verifyRefCountingIfShould() const; - - // This data is public because this should only be used by Value which would be a friend - union { + } + + const std::vector<Value>& getArray() const { + dassert(typeid(*genericRCPtr) == typeid(const RCVector)); + const RCVector* arrayPtr = static_cast<const RCVector*>(genericRCPtr); + return arrayPtr->vec; + } + + boost::intrusive_ptr<const RCCodeWScope> getCodeWScope() const { + dassert(typeid(*genericRCPtr) == typeid(const RCCodeWScope)); + return static_cast<const RCCodeWScope*>(genericRCPtr); + } + + boost::intrusive_ptr<const RCDBRef> getDBRef() const { + dassert(typeid(*genericRCPtr) == typeid(const RCDBRef)); + return static_cast<const RCDBRef*>(genericRCPtr); + } + + // Document is incomplete here so this can't be inline + Document getDocument() const; + + BSONType bsonType() const { + return BSONType(type); + } + + BinDataType binDataType() const { + dassert(type == BinData); + return BinDataType(binSubType); + } + + void zero() { + memset(this, 0, sizeof(*this)); + } + + // Byte-for-byte identical + bool identical(const ValueStorage& other) const { + return (i64[0] == other.i64[0] && i64[1] == other.i64[1]); + } + + void verifyRefCountingIfShould() const; + + // This data is public because this should only be used by Value which would be a friend + union { + struct { + // byte 1 + signed char type; + + // byte 2 struct { - // byte 1 - signed char type; + bool refCounter : 1; // true if we need to refCount + bool shortStr : 1; // true if we are using short strings + // reservedFlags: 6; + }; + + // bytes 3-16; + union { + unsigned char oid[12]; - // byte 2 struct { - bool refCounter : 1; // true if we need to refCount - bool shortStr : 1; // true if we are using short strings - // reservedFlags: 6; + char shortStrSize; // TODO Consider moving into flags union (4 bits) + char shortStrStorage[16 /*total bytes*/ - 3 /*offset*/ - 1 /*NUL byte*/]; + union { + char nulTerminator; + }; }; - // bytes 3-16; - union { - unsigned char oid[12]; - - struct { - char shortStrSize; // TODO Consider moving into flags union (4 bits) - char shortStrStorage[16/*total bytes*/ - 3/*offset*/ - 1/*NUL byte*/]; - union { - char nulTerminator; - }; + struct { + union { + unsigned char binSubType; + char pad[6]; + char stringCache[6]; // TODO copy first few bytes of strings in here }; - - struct { - union { - unsigned char binSubType; - char pad[6]; - char stringCache[6]; // TODO copy first few bytes of strings in here - }; - union { // 8 bytes long and 8-byte aligned - // There should be no pointers to non-const data - const RefCountable* genericRCPtr; - - double doubleValue; - bool boolValue; - int intValue; - long long longValue; - unsigned long long timestampValue; - long long dateValue; - }; + union { // 8 bytes long and 8-byte aligned + // There should be no pointers to non-const data + const RefCountable* genericRCPtr; + + double doubleValue; + bool boolValue; + int intValue; + long long longValue; + unsigned long long timestampValue; + long long dateValue; }; }; }; - - // covers the whole ValueStorage - long long i64[2]; }; + + // covers the whole ValueStorage + long long i64[2]; }; - BOOST_STATIC_ASSERT(sizeof(ValueStorage) == 16); +}; +BOOST_STATIC_ASSERT(sizeof(ValueStorage) == 16); #pragma pack() - } |