summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMisha Tyulenev <misha.tyulenev@mongodb.com>2022-09-01 02:48:00 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-09-01 03:18:53 +0000
commitbd8a8d4d880577302c777ff961f359b03435126a (patch)
tree64d42e2ea5a05cfa4aa31c7ba5eb6fe003ba9709
parent0d37d07a97b4a91e9d501f1c8a53b005a6f7dff1 (diff)
downloadmongo-bd8a8d4d880577302c777ff961f359b03435126a.tar.gz
SERVER-68745 Implement histogram parsing from BSON
-rw-r--r--src/mongo/db/query/ce/SConscript28
-rw-r--r--src/mongo/db/query/ce/stats.idl109
-rw-r--r--src/mongo/db/query/ce/stats_cache_loader_impl.cpp2
-rw-r--r--src/mongo/db/query/ce/stats_cache_loader_test.cpp4
-rw-r--r--src/mongo/db/query/ce/stats_path_test.cpp113
-rw-r--r--src/mongo/db/query/ce/stats_serialization_utils.cpp102
-rw-r--r--src/mongo/db/query/ce/stats_serialization_utils.h66
7 files changed, 421 insertions, 3 deletions
diff --git a/src/mongo/db/query/ce/SConscript b/src/mongo/db/query/ce/SConscript
index cba29f65e55..edb7b808623 100644
--- a/src/mongo/db/query/ce/SConscript
+++ b/src/mongo/db/query/ce/SConscript
@@ -38,6 +38,20 @@ env.Library(
],
)
+env.Library(
+ target="stats_serialization",
+ source=[
+ 'stats.idl',
+ 'stats_serialization_utils.cpp',
+ ],
+ LIBDEPS=[
+ '$BUILD_DIR/mongo/base',
+ '$BUILD_DIR/mongo/db/exec/sbe/query_sbe_values',
+ '$BUILD_DIR/mongo/db/server_base',
+ '$BUILD_DIR/mongo/db/service_context',
+ ],
+)
+
env.CppUnitTest(
target="ce_histogram_test",
source=[
@@ -94,6 +108,7 @@ env.CppUnitTest(
'$BUILD_DIR/mongo/util/fail_point',
'$BUILD_DIR/mongo/util/pcre_wrapper',
'query_ce',
+ 'stats_serialization',
],
)
env.CppUnitTest(
@@ -109,3 +124,16 @@ env.CppUnitTest(
'query_ce',
],
)
+env.CppUnitTest(
+ target="stats_path_test",
+ source=[
+ "stats_path_test.cpp",
+ ],
+ LIBDEPS=[
+ '$BUILD_DIR/mongo/base',
+ '$BUILD_DIR/mongo/db/service_context',
+ 'ce_test_utils',
+ 'query_ce',
+ 'stats_serialization',
+ ],
+)
diff --git a/src/mongo/db/query/ce/stats.idl b/src/mongo/db/query/ce/stats.idl
new file mode 100644
index 00000000000..96b123a333b
--- /dev/null
+++ b/src/mongo/db/query/ce/stats.idl
@@ -0,0 +1,109 @@
+# Copyright (C) 2022-present MongoDB, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the Server Side Public License, version 1,
+# as published by MongoDB, Inc.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# Server Side Public License for more details.
+#
+# You should have received a copy of the Server Side Public License
+# along with this program. If not, see
+# <http://www.mongodb.com/licensing/server-side-public-license>.
+#
+# As a special exception, the copyright holders give permission to link the
+# code of portions of this program with the OpenSSL library under certain
+# conditions as described in each individual source file and distribute
+# linked combinations including the program with the OpenSSL library. You
+# must comply with the Server Side Public License in all respects for
+# all of the code used other than as permitted herein. If you modify file(s)
+# with this exception, you may extend this exception to your version of the
+# file(s), but you are not obligated to do so. If you do not wish to do so,
+# delete this exception statement from your version. If you delete this
+# exception statement from all source files in the program, then also delete
+# it in the license file.
+#
+global:
+ cpp_namespace: "mongo"
+
+imports:
+ - "mongo/db/basic_types.idl"
+
+structs:
+ StatsBucket:
+ description: "Histogram bucket"
+ fields:
+ upperBoundary:
+ type: IDLAnyType
+ boundaryCount:
+ type: long
+ rangeCount:
+ type: long
+ rangeDistincts:
+ type: long
+ cumulativeCount:
+ type: long
+ cumulativeDistincts:
+ type: long
+
+ BoolCount:
+ description: "Tracks count of booleans separately from histogram buckets and other type
+counts"
+ fields:
+ trueCount:
+ type: long
+ falseCount:
+ type: long
+
+ TypeTag:
+ description: "SBE types and their corresponding frequencies in the histogram"
+ fields:
+ typeName:
+ type: string
+ count:
+ type: long
+
+ StatsArrayHistogram:
+ description: "Array Histogram"
+ fields:
+ minHistogram:
+ type: array<StatsBucket>
+ maxHistogram:
+ type: array<StatsBucket>
+ uniqueHistogram:
+ type: array<StatsBucket>
+ boolCount:
+ type: BoolCount
+ typeCount:
+ type: array<TypeTag>
+
+ StatsPath:
+ description: "Serialized representation of data statistics for a key path"
+ fields:
+ path:
+ type: string
+ documents:
+ type: long
+ documentsSampled:
+ type: long
+ optional: true
+ samplingRate:
+ type: double
+ optional: true
+ samplesRequested:
+ type: long
+ optional: true
+ boolCount:
+ type: BoolCount
+ optional: true
+ typeCount:
+ type: array<TypeTag>
+ optional: true
+ scalarHistogram:
+ type: array<StatsBucket>
+ optional: true
+ arrayStatistics:
+ type: StatsArrayHistogram
+ optional: true
diff --git a/src/mongo/db/query/ce/stats_cache_loader_impl.cpp b/src/mongo/db/query/ce/stats_cache_loader_impl.cpp
index 3253787d0cf..9e30d67de62 100644
--- a/src/mongo/db/query/ce/stats_cache_loader_impl.cpp
+++ b/src/mongo/db/query/ce/stats_cache_loader_impl.cpp
@@ -66,7 +66,7 @@ SemiFuture<CollectionStatistics> StatsCacheLoaderImpl::getStats(OperationContext
histograms.push_back(std::move(document));
}
- // TODO: SERVER-68745, parse histograms BSONs.
+ // TODO: SERVER-69238, parse histograms BSONs.
CollectionStatistics stats{0};
return makeReadyFutureWith([this, stats] { return stats; }).semi();
} catch (const DBException& ex) {
diff --git a/src/mongo/db/query/ce/stats_cache_loader_test.cpp b/src/mongo/db/query/ce/stats_cache_loader_test.cpp
index 7b3a7b2d63a..b6407fcad10 100644
--- a/src/mongo/db/query/ce/stats_cache_loader_test.cpp
+++ b/src/mongo/db/query/ce/stats_cache_loader_test.cpp
@@ -67,14 +67,14 @@ TEST_F(StatsCacheLoaderTest, VerifyStatsLoad) {
const CollectionPtr& coll = autoColl.getCollection();
{
WriteUnitOfWork wuow(operationContext());
- // TODO: SERVER-68745, insert histogram.
+ // TODO: SERVER-69238, insert histogram.
BSONObj doc = BSON("_id" << 1);
ASSERT_OK(collection_internal::insertDocument(
operationContext(), coll, InsertStatement(doc), nullptr));
wuow.commit();
}
auto newStats = _statsCacheLoader.getStats(operationContext(), nss).get();
- // TODO: SERVER-68745, verify histogram.
+ // TODO: SERVER-69238, verify histogram.
}
} // namespace
diff --git a/src/mongo/db/query/ce/stats_path_test.cpp b/src/mongo/db/query/ce/stats_path_test.cpp
new file mode 100644
index 00000000000..6b66e06589d
--- /dev/null
+++ b/src/mongo/db/query/ce/stats_path_test.cpp
@@ -0,0 +1,113 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/bson/bsonobjbuilder.h"
+#include "mongo/db/exec/sbe/values/bson.h"
+#include "mongo/db/exec/sbe/values/value.h"
+#include "mongo/db/query/ce/stats_gen.h"
+#include "mongo/db/query/ce/stats_serialization_utils.h"
+#include "mongo/unittest/unittest.h"
+#include "mongo/util/assert_util.h"
+
+namespace mongo {
+namespace {
+
+IDLParserContext ctx("StatsPath");
+
+
+/**
+ * Validate round trip convertion for numeric SBE values using NumberDouble datatype.
+ */
+TEST(StatsPath, BasicValidStatsBucketDouble) {
+
+ auto value =
+ stats_serialization_utils::TypeValuePair(sbe::value::TypeTags::NumberDouble, double{1});
+ auto serializedBucket = stats_serialization_utils::makeStatsBucket(value, 1, 2, 3, 4, 5);
+ auto parsedBucket = StatsBucket::parse(ctx, serializedBucket);
+
+ // Verify the upperBoundary field conversion.
+ ASSERT(sbe::bson::convertFrom<1>(parsedBucket.getUpperBoundary().getElement()) == value);
+
+ // roundtrip
+ auto bucketToBSON = parsedBucket.toBSON();
+ ASSERT_BSONOBJ_EQ(serializedBucket, bucketToBSON);
+}
+
+/**
+ * Validate round trip convertion for heap SBE values using StringBig datatype.
+ */
+TEST(StatsPath, BasicValidStatsBucketString) {
+ auto aString = "abcdef"_sd;
+ auto value = sbe::value::makeBigString(aString);
+ auto serializedBucket = stats_serialization_utils::makeStatsBucket(value, 1, 2, 3, 4, 5);
+ auto parsedBucket = StatsBucket::parse(ctx, serializedBucket);
+
+ // Verify the upperBoundary field conversion.
+ auto rtValue = sbe::bson::convertFrom<1>(parsedBucket.getUpperBoundary().getElement());
+ ASSERT(sbe::value::getStringView(rtValue.first, rtValue.second) == aString);
+
+ // roundtrip
+ auto bucketToBSON = parsedBucket.toBSON();
+ ASSERT_BSONOBJ_EQ(serializedBucket, bucketToBSON);
+ sbe::value::releaseValueDeep(value.first, value.second);
+}
+
+/**
+ * Validate round trip convertion for StatsPath datatype.
+ */
+TEST(StatsPath, BasicValidStatsPath) {
+
+ std::list<BSONObj> buckets;
+ for (long long i = 1; i <= 3; i++) {
+ auto typeValue = stats_serialization_utils::TypeValuePair(
+ sbe::value::TypeTags::NumberDouble, double{i + 1.0});
+
+ auto bucket = stats_serialization_utils::makeStatsBucket(typeValue, i, i, i, i, i);
+ buckets.push_back(bucket);
+ }
+ stats_serialization_utils::TypeCount types;
+ for (long long i = 1; i <= 3; i++) {
+ std::stringstream typeName;
+ typeName << "type" << i;
+ auto typeElem = std::pair<std::string, long>(typeName.str(), i);
+ types.push_back(typeElem);
+ }
+ auto serializedPath = stats_serialization_utils::makeStatsPath(
+ "somePath", 100, 10, 0.1, 10, std::make_pair(4LL, 6LL), types, buckets, boost::none);
+
+ auto parsedPath = StatsPath::parse(ctx, serializedPath);
+ auto pathToBSON = parsedPath.toBSON();
+
+ ASSERT_BSONOBJ_EQ(serializedPath, pathToBSON);
+}
+
+} // namespace
+} // namespace mongo
diff --git a/src/mongo/db/query/ce/stats_serialization_utils.cpp b/src/mongo/db/query/ce/stats_serialization_utils.cpp
new file mode 100644
index 00000000000..c1e7d4dcef0
--- /dev/null
+++ b/src/mongo/db/query/ce/stats_serialization_utils.cpp
@@ -0,0 +1,102 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+
+#include "mongo/db/query/ce/stats_serialization_utils.h"
+
+#include "mongo/bson/bsonobj.h"
+#include "mongo/bson/bsonobjbuilder.h"
+#include "mongo/db/exec/sbe/values/bson.h"
+
+namespace mongo::stats_serialization_utils {
+
+BSONObj makeStatsBucket(TypeValuePair typeValue,
+ long long boundaryCount,
+ long long rangeCount,
+ long long rangeDistincts,
+ long long cumulativeCount,
+ long long cumulativeDistincts) {
+ BSONObjBuilder bucketBuilder;
+ sbe::bson::appendValueToBsonObj(
+ bucketBuilder, "upperBoundary", typeValue.first, typeValue.second);
+ bucketBuilder.append("boundaryCount", boundaryCount);
+ bucketBuilder.append("rangeCount", rangeCount);
+ bucketBuilder.append("rangeDistincts", rangeDistincts);
+ bucketBuilder.append("cumulativeCount", cumulativeCount);
+ bucketBuilder.append("cumulativeDistincts", cumulativeDistincts);
+ return bucketBuilder.obj();
+}
+
+BSONObj makeStatsPath(StringData path,
+ long long documents,
+ boost::optional<long long> documentsSampled,
+ boost::optional<double> samplingRate,
+ boost::optional<long long> samplesRequested,
+ boost::optional<std::pair<long long, long long>> boolCount,
+ boost::optional<TypeCount> typeCount,
+ boost::optional<std::list<BSONObj>> scalarHistogram,
+ boost::optional<BSONObj> arrayHistogram) {
+ BSONObjBuilder statsBuilder;
+ statsBuilder.append("path", path);
+ statsBuilder.append("documents", documents);
+ if (documentsSampled) {
+ statsBuilder.append("documentsSampled", *documentsSampled);
+ }
+ if (samplingRate) {
+ statsBuilder.append("samplingRate", *samplingRate);
+ }
+ if (samplesRequested) {
+ statsBuilder.append("samplesRequested", *samplesRequested);
+ }
+ if (boolCount) {
+ BSONObjBuilder boolCountBuilder = statsBuilder.subobjStart("boolCount");
+ boolCountBuilder.append("trueCount", boolCount->first);
+ boolCountBuilder.append("falseCount", boolCount->second);
+ boolCountBuilder.done();
+ }
+ if (typeCount) {
+ BSONArrayBuilder typeCountBuilder = statsBuilder.subarrayStart("typeCount");
+ for (const auto& typeElem : *typeCount) {
+ auto typeElemObj = BSON("typeName" << typeElem.first << "count" << typeElem.second);
+ typeCountBuilder.append(typeElemObj);
+ }
+ typeCountBuilder.done();
+ }
+ if (scalarHistogram) {
+ BSONArrayBuilder histArrBuilder = statsBuilder.subarrayStart("scalarHistogram");
+ histArrBuilder.append(*scalarHistogram);
+ histArrBuilder.done();
+ }
+ if (arrayHistogram) {
+ statsBuilder.append("arrayHistogram", *arrayHistogram);
+ }
+ return statsBuilder.obj();
+}
+
+} // namespace mongo::stats_serialization_utils
diff --git a/src/mongo/db/query/ce/stats_serialization_utils.h b/src/mongo/db/query/ce/stats_serialization_utils.h
new file mode 100644
index 00000000000..84a952e71f2
--- /dev/null
+++ b/src/mongo/db/query/ce/stats_serialization_utils.h
@@ -0,0 +1,66 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include <list>
+#include <string>
+
+#include "mongo/base/string_data.h"
+#include "mongo/bson/bsonobj.h"
+#include "mongo/db/exec/sbe/values/value.h"
+
+namespace mongo::stats_serialization_utils {
+
+using TypeCount = std::list<std::pair<std::string, long long>>;
+using TypeValuePair = std::pair<sbe::value::TypeTags, sbe::value::Value>;
+
+/**
+ * Returns owned BSON Object representing data matching mongo::StatsBucket IDL.
+ */
+BSONObj makeStatsBucket(TypeValuePair typeValue,
+ long long boundaryCount,
+ long long rangeCount,
+ long long rangeDistincts,
+ long long cumulativeCount,
+ long long cumulativeDistincts);
+/**
+ * Returns owned BSON Object representing data matching mongo::StatsPath IDL.
+ */
+BSONObj makeStatsPath(StringData path,
+ long long documents,
+ boost::optional<long long> documentsSampled,
+ boost::optional<double> samplingRate,
+ boost::optional<long long> samplesRequested,
+ boost::optional<std::pair<long long, long long>> boolCount,
+ boost::optional<TypeCount> typeCount,
+ boost::optional<std::list<BSONObj>> scalarHistogram,
+ boost::optional<BSONObj> arrayHistogram);
+
+} // namespace mongo::stats_serialization_utils