summaryrefslogtreecommitdiff
path: root/src/mongo/db/query/stats/array_histogram.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/mongo/db/query/stats/array_histogram.cpp')
-rw-r--r--src/mongo/db/query/stats/array_histogram.cpp209
1 files changed, 209 insertions, 0 deletions
diff --git a/src/mongo/db/query/stats/array_histogram.cpp b/src/mongo/db/query/stats/array_histogram.cpp
new file mode 100644
index 00000000000..ccf11bf02d2
--- /dev/null
+++ b/src/mongo/db/query/stats/array_histogram.cpp
@@ -0,0 +1,209 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/db/query/stats/array_histogram.h"
+#include "mongo/db/query/stats/value_utils.h"
+
+namespace mongo::stats {
+namespace {
+TypeCounts mapStatsTypeCountToTypeCounts(std::vector<TypeTag> tc) {
+ TypeCounts out;
+ for (const auto& t : tc) {
+ out.emplace(deserialize(t.getTypeName().toString()), t.getCount());
+ }
+ return out;
+}
+} // namespace
+
+ArrayHistogram::ArrayHistogram() : ArrayHistogram(ScalarHistogram(), {}) {}
+
+ArrayHistogram::ArrayHistogram(Statistics stats)
+ : ArrayHistogram(stats.getScalarHistogram(),
+ mapStatsTypeCountToTypeCounts(stats.getTypeCount()),
+ stats.getTrueCount(),
+ stats.getFalseCount()) {
+ // TODO SERVER-71513: initialize non-scalar histogram fields.
+}
+
+ArrayHistogram::ArrayHistogram(ScalarHistogram scalar,
+ TypeCounts typeCounts,
+ ScalarHistogram arrayUnique,
+ ScalarHistogram arrayMin,
+ ScalarHistogram arrayMax,
+ TypeCounts arrayTypeCounts,
+ double emptyArrayCount,
+ double trueCount,
+ double falseCount)
+ : _scalar(std::move(scalar)),
+ _typeCounts(std::move(typeCounts)),
+ _emptyArrayCount(emptyArrayCount),
+ _trueCount(trueCount),
+ _falseCount(falseCount),
+ _arrayUnique(std::move(arrayUnique)),
+ _arrayMin(std::move(arrayMin)),
+ _arrayMax(std::move(arrayMax)),
+ _arrayTypeCounts(std::move(arrayTypeCounts)) {
+ invariant(isArray());
+}
+
+ArrayHistogram::ArrayHistogram(ScalarHistogram scalar,
+ TypeCounts typeCounts,
+ double trueCount,
+ double falseCount)
+ : _scalar(std::move(scalar)),
+ _typeCounts(std::move(typeCounts)),
+ _emptyArrayCount(0.0),
+ _trueCount(trueCount),
+ _falseCount(falseCount),
+ _arrayUnique(boost::none),
+ _arrayMin(boost::none),
+ _arrayMax(boost::none),
+ _arrayTypeCounts(boost::none) {
+ invariant(!isArray());
+}
+
+bool ArrayHistogram::isArray() const {
+ return _arrayUnique && _arrayMin && _arrayMax && _arrayTypeCounts;
+}
+
+std::string typeCountsToString(const TypeCounts& typeCounts) {
+ std::ostringstream os;
+ os << "{";
+ bool first = true;
+ for (auto [tag, count] : typeCounts) {
+ if (!first)
+ os << ", ";
+ os << tag << ": " << count;
+ first = false;
+ }
+ os << "}";
+ return os.str();
+}
+
+std::string ArrayHistogram::toString() const {
+ std::ostringstream os;
+ os << "{\n";
+ os << " scalar: " << _scalar.toString();
+ os << ",\n typeCounts: " << typeCountsToString(_typeCounts);
+ if (isArray()) {
+ os << ",\n arrayUnique: " << _arrayUnique->toString();
+ os << ",\n arrayMin: " << _arrayMin->toString();
+ os << ",\n arrayMax: " << _arrayMax->toString();
+ os << ",\n arrayTypeCounts: " << typeCountsToString(*_arrayTypeCounts);
+ }
+ os << "\n}\n";
+ return os.str();
+}
+
+const ScalarHistogram& ArrayHistogram::getScalar() const {
+ return _scalar;
+}
+
+const ScalarHistogram& ArrayHistogram::getArrayUnique() const {
+ invariant(isArray());
+ return *_arrayUnique;
+}
+
+const ScalarHistogram& ArrayHistogram::getArrayMin() const {
+ invariant(isArray());
+ return *_arrayMin;
+}
+
+const ScalarHistogram& ArrayHistogram::getArrayMax() const {
+ invariant(isArray());
+ return *_arrayMax;
+}
+
+const TypeCounts& ArrayHistogram::getTypeCounts() const {
+ return _typeCounts;
+}
+
+const TypeCounts& ArrayHistogram::getArrayTypeCounts() const {
+ invariant(isArray());
+ return *_arrayTypeCounts;
+}
+
+double ArrayHistogram::getArrayCount() const {
+ if (isArray()) {
+ auto findArray = _typeCounts.find(sbe::value::TypeTags::Array);
+ uassert(6979504,
+ "Histogram with array data must have a total array count.",
+ findArray != _typeCounts.end());
+ double arrayCount = findArray->second;
+ uassert(6979503, "Histogram with array data must have at least one array.", arrayCount > 0);
+ return arrayCount;
+ }
+ return 0;
+}
+
+BSONObj ArrayHistogram::serialize() const {
+ BSONObjBuilder histogramBuilder;
+
+ // Serialize boolean type counters.
+ histogramBuilder.append("trueCount", getTrueCount());
+ histogramBuilder.append("falseCount", getFalseCount());
+
+ // Serialize empty array counts.
+ histogramBuilder.appendNumber("emptyArrayCount", getEmptyArrayCount());
+
+ // Serialize type counts.
+ BSONArrayBuilder typeCountBuilder(histogramBuilder.subarrayStart("typeCount"));
+ const auto& typeCounts = getTypeCounts();
+ for (const auto& [sbeType, count] : typeCounts) {
+ auto typeCount = BSON("typeName" << stats::serialize(sbeType) << "count" << count);
+ typeCountBuilder.append(typeCount);
+ }
+ typeCountBuilder.doneFast();
+
+ // Serialize scalar histogram.
+ histogramBuilder.append("scalarHistogram", getScalar().serialize());
+
+ // TODO SERVER-71513: serialize array histograms.
+
+ histogramBuilder.doneFast();
+ return histogramBuilder.obj();
+}
+
+BSONObj makeStatistics(double documents, const ArrayHistogram& arrayHistogram) {
+ BSONObjBuilder builder;
+ builder.appendNumber("documents", documents);
+ builder.appendElements(arrayHistogram.serialize());
+ builder.doneFast();
+ return builder.obj();
+}
+
+BSONObj makeStatsPath(StringData path, double documents, const ArrayHistogram& arrayHistogram) {
+ BSONObjBuilder builder;
+ builder.append("_id", path);
+ builder.append("statistics", makeStatistics(documents, arrayHistogram));
+ builder.doneFast();
+ return builder.obj();
+}
+
+} // namespace mongo::stats