diff options
author | Mathias Stearn <mathias@10gen.com> | 2013-09-17 18:44:35 -0400 |
---|---|---|
committer | Mathias Stearn <mathias@10gen.com> | 2015-06-09 16:33:21 -0400 |
commit | ac0ff84d3fd4c127af8cbcea560d3793a3c3f131 (patch) | |
tree | df75f10bd8a8da3db7b074b15dcec38935ff6992 /src | |
parent | a5fae81a6bd8c25cd2508c9b303ac1a1c9c18011 (diff) | |
download | mongo-ac0ff84d3fd4c127af8cbcea560d3793a3c3f131.tar.gz |
SERVER-5044 Standard Deviation accumulator operators
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/db/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/db/pipeline/accumulator.h | 20 | ||||
-rw-r--r-- | src/mongo/db/pipeline/accumulator_std_dev.cpp | 117 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_group.cpp | 2 |
4 files changed, 140 insertions, 0 deletions
diff --git a/src/mongo/db/SConscript b/src/mongo/db/SConscript index 5d809eba476..4bc47d99f8a 100644 --- a/src/mongo/db/SConscript +++ b/src/mongo/db/SConscript @@ -436,6 +436,7 @@ coredbEnv.Library( "pipeline/accumulator_last.cpp", "pipeline/accumulator_min_max.cpp", "pipeline/accumulator_push.cpp", + "pipeline/accumulator_std_dev.cpp", "pipeline/accumulator_sum.cpp", "pipeline/dependencies.cpp", "pipeline/document_source.cpp", diff --git a/src/mongo/db/pipeline/accumulator.h b/src/mongo/db/pipeline/accumulator.h index f7ef68e07a7..0e1c6db431b 100644 --- a/src/mongo/db/pipeline/accumulator.h +++ b/src/mongo/db/pipeline/accumulator.h @@ -188,4 +188,24 @@ namespace mongo { double _total; long long _count; }; + + + class AccumulatorStdDev : public Accumulator { + public: + virtual void processInternal(const Value& input, bool merging); + virtual Value getValue(bool toBeMerged) const; + virtual const char* getOpName() const; + virtual void reset(); + + static boost::intrusive_ptr<Accumulator> createSamp(); + static boost::intrusive_ptr<Accumulator> createPop(); + + private: + explicit AccumulatorStdDev(bool isSamp); + + const bool _isSamp; + long long _count; + double _mean; + double _m2; // Running sum of squares of delta from mean. Named to match algorithm. + }; } diff --git a/src/mongo/db/pipeline/accumulator_std_dev.cpp b/src/mongo/db/pipeline/accumulator_std_dev.cpp new file mode 100644 index 00000000000..76957c3d112 --- /dev/null +++ b/src/mongo/db/pipeline/accumulator_std_dev.cpp @@ -0,0 +1,117 @@ +/** + * Copyright (c) 2011 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/pipeline/accumulator.h" +#include "mongo/db/pipeline/document.h" +#include "mongo/db/pipeline/expression_context.h" +#include "mongo/db/pipeline/value.h" + +namespace mongo { + using boost::intrusive_ptr; + + void AccumulatorStdDev::processInternal(const Value& input, bool merging) { + if (!merging) { + // non numeric types have no impact on standard deviation + if (!input.numeric()) + return; + + const double val = input.getDouble(); + + // This is an implementation of the following algorithm: + // http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Online_algorithm + _count += 1; + const double delta = val - _mean; + _mean += delta / _count; + _m2 += delta * (val - _mean); + } + else { + // This is what getValue(true) produced below. + verify(input.getType() == Object); + const double m2 = input["m2"].getDouble(); + const double mean = input["mean"].getDouble(); + const long long count = input["count"].getLong(); + + if (count == 0) + return; // This partition had no data to contribute. + + // This is an implementation of the following algorithm: + // http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm + const double delta = mean - _mean; + const long long newCount = count + _count; + + _mean = ((_count * _mean) + (count * mean)) / newCount; + _m2 += m2 + (delta * delta * (double(_count) * count / newCount)); + _count = newCount; + } + } + + Value AccumulatorStdDev::getValue(bool toBeMerged) const { + if (!toBeMerged) { + const long long adjustedCount = (_isSamp ? _count - 1 : _count); + if (adjustedCount <= 0) + return Value(BSONNULL); // standard deviation not well defined in this case + + return Value(sqrt(_m2 / adjustedCount)); + } + else { + return Value(DOC("m2" << _m2 + << "mean" << _mean + << "count" << _count)); + } + } + + intrusive_ptr<Accumulator> AccumulatorStdDev::createSamp() { + return new AccumulatorStdDev(true); + } + + intrusive_ptr<Accumulator> AccumulatorStdDev::createPop() { + return new AccumulatorStdDev(false); + } + + AccumulatorStdDev::AccumulatorStdDev(bool isSamp) + : _isSamp(isSamp) + , _count(0) + , _mean(0) + , _m2(0) + { + // This is a fixed size Accumulator so we never need to update this + _memUsageBytes = sizeof(*this); + } + + void AccumulatorStdDev::reset() { + _count = 0; + _mean = 0; + _m2 = 0; + } + + const char *AccumulatorStdDev::getOpName() const { + return (_isSamp ? "$stdDevSamp" : "$stdDevPop"); + } +} diff --git a/src/mongo/db/pipeline/document_source_group.cpp b/src/mongo/db/pipeline/document_source_group.cpp index 5003211e932..8305225a467 100644 --- a/src/mongo/db/pipeline/document_source_group.cpp +++ b/src/mongo/db/pipeline/document_source_group.cpp @@ -241,6 +241,8 @@ namespace mongo { {"$max", AccumulatorMinMax::createMax}, {"$min", AccumulatorMinMax::createMin}, {"$push", AccumulatorPush::create}, + {"$stdDevPop", AccumulatorStdDev::createPop}, + {"$stdDevSamp", AccumulatorStdDev::createSamp}, {"$sum", AccumulatorSum::create}, }; |