diff options
author | Misha Tyulenev <misha.tyulenev@mongodb.com> | 2022-09-01 02:48:00 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-09-01 03:18:53 +0000 |
commit | bd8a8d4d880577302c777ff961f359b03435126a (patch) | |
tree | 64d42e2ea5a05cfa4aa31c7ba5eb6fe003ba9709 /src/mongo/db | |
parent | 0d37d07a97b4a91e9d501f1c8a53b005a6f7dff1 (diff) | |
download | mongo-bd8a8d4d880577302c777ff961f359b03435126a.tar.gz |
SERVER-68745 Implement histogram parsing from BSON
Diffstat (limited to 'src/mongo/db')
-rw-r--r-- | src/mongo/db/query/ce/SConscript | 28 | ||||
-rw-r--r-- | src/mongo/db/query/ce/stats.idl | 109 | ||||
-rw-r--r-- | src/mongo/db/query/ce/stats_cache_loader_impl.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/query/ce/stats_cache_loader_test.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/query/ce/stats_path_test.cpp | 113 | ||||
-rw-r--r-- | src/mongo/db/query/ce/stats_serialization_utils.cpp | 102 | ||||
-rw-r--r-- | src/mongo/db/query/ce/stats_serialization_utils.h | 66 |
7 files changed, 421 insertions, 3 deletions
diff --git a/src/mongo/db/query/ce/SConscript b/src/mongo/db/query/ce/SConscript index cba29f65e55..edb7b808623 100644 --- a/src/mongo/db/query/ce/SConscript +++ b/src/mongo/db/query/ce/SConscript @@ -38,6 +38,20 @@ env.Library( ], ) +env.Library( + target="stats_serialization", + source=[ + 'stats.idl', + 'stats_serialization_utils.cpp', + ], + LIBDEPS=[ + '$BUILD_DIR/mongo/base', + '$BUILD_DIR/mongo/db/exec/sbe/query_sbe_values', + '$BUILD_DIR/mongo/db/server_base', + '$BUILD_DIR/mongo/db/service_context', + ], +) + env.CppUnitTest( target="ce_histogram_test", source=[ @@ -94,6 +108,7 @@ env.CppUnitTest( '$BUILD_DIR/mongo/util/fail_point', '$BUILD_DIR/mongo/util/pcre_wrapper', 'query_ce', + 'stats_serialization', ], ) env.CppUnitTest( @@ -109,3 +124,16 @@ env.CppUnitTest( 'query_ce', ], ) +env.CppUnitTest( + target="stats_path_test", + source=[ + "stats_path_test.cpp", + ], + LIBDEPS=[ + '$BUILD_DIR/mongo/base', + '$BUILD_DIR/mongo/db/service_context', + 'ce_test_utils', + 'query_ce', + 'stats_serialization', + ], +) diff --git a/src/mongo/db/query/ce/stats.idl b/src/mongo/db/query/ce/stats.idl new file mode 100644 index 00000000000..96b123a333b --- /dev/null +++ b/src/mongo/db/query/ce/stats.idl @@ -0,0 +1,109 @@ +# Copyright (C) 2022-present MongoDB, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the Server Side Public License, version 1, +# as published by MongoDB, Inc. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# Server Side Public License for more details. +# +# You should have received a copy of the Server Side Public License +# along with this program. If not, see +# <http://www.mongodb.com/licensing/server-side-public-license>. +# +# As a special exception, the copyright holders give permission to link the +# code of portions of this program with the OpenSSL library under certain +# conditions as described in each individual source file and distribute +# linked combinations including the program with the OpenSSL library. You +# must comply with the Server Side Public License in all respects for +# all of the code used other than as permitted herein. If you modify file(s) +# with this exception, you may extend this exception to your version of the +# file(s), but you are not obligated to do so. If you do not wish to do so, +# delete this exception statement from your version. If you delete this +# exception statement from all source files in the program, then also delete +# it in the license file. +# +global: + cpp_namespace: "mongo" + +imports: + - "mongo/db/basic_types.idl" + +structs: + StatsBucket: + description: "Histogram bucket" + fields: + upperBoundary: + type: IDLAnyType + boundaryCount: + type: long + rangeCount: + type: long + rangeDistincts: + type: long + cumulativeCount: + type: long + cumulativeDistincts: + type: long + + BoolCount: + description: "Tracks count of booleans separately from histogram buckets and other type +counts" + fields: + trueCount: + type: long + falseCount: + type: long + + TypeTag: + description: "SBE types and their corresponding frequencies in the histogram" + fields: + typeName: + type: string + count: + type: long + + StatsArrayHistogram: + description: "Array Histogram" + fields: + minHistogram: + type: array<StatsBucket> + maxHistogram: + type: array<StatsBucket> + uniqueHistogram: + type: array<StatsBucket> + boolCount: + type: BoolCount + typeCount: + type: array<TypeTag> + + StatsPath: + description: "Serialized representation of data statistics for a key path" + fields: + path: + type: string + documents: + type: long + documentsSampled: + type: long + optional: true + samplingRate: + type: double + optional: true + samplesRequested: + type: long + optional: true + boolCount: + type: BoolCount + optional: true + typeCount: + type: array<TypeTag> + optional: true + scalarHistogram: + type: array<StatsBucket> + optional: true + arrayStatistics: + type: StatsArrayHistogram + optional: true diff --git a/src/mongo/db/query/ce/stats_cache_loader_impl.cpp b/src/mongo/db/query/ce/stats_cache_loader_impl.cpp index 3253787d0cf..9e30d67de62 100644 --- a/src/mongo/db/query/ce/stats_cache_loader_impl.cpp +++ b/src/mongo/db/query/ce/stats_cache_loader_impl.cpp @@ -66,7 +66,7 @@ SemiFuture<CollectionStatistics> StatsCacheLoaderImpl::getStats(OperationContext histograms.push_back(std::move(document)); } - // TODO: SERVER-68745, parse histograms BSONs. + // TODO: SERVER-69238, parse histograms BSONs. CollectionStatistics stats{0}; return makeReadyFutureWith([this, stats] { return stats; }).semi(); } catch (const DBException& ex) { diff --git a/src/mongo/db/query/ce/stats_cache_loader_test.cpp b/src/mongo/db/query/ce/stats_cache_loader_test.cpp index 7b3a7b2d63a..b6407fcad10 100644 --- a/src/mongo/db/query/ce/stats_cache_loader_test.cpp +++ b/src/mongo/db/query/ce/stats_cache_loader_test.cpp @@ -67,14 +67,14 @@ TEST_F(StatsCacheLoaderTest, VerifyStatsLoad) { const CollectionPtr& coll = autoColl.getCollection(); { WriteUnitOfWork wuow(operationContext()); - // TODO: SERVER-68745, insert histogram. + // TODO: SERVER-69238, insert histogram. BSONObj doc = BSON("_id" << 1); ASSERT_OK(collection_internal::insertDocument( operationContext(), coll, InsertStatement(doc), nullptr)); wuow.commit(); } auto newStats = _statsCacheLoader.getStats(operationContext(), nss).get(); - // TODO: SERVER-68745, verify histogram. + // TODO: SERVER-69238, verify histogram. } } // namespace diff --git a/src/mongo/db/query/ce/stats_path_test.cpp b/src/mongo/db/query/ce/stats_path_test.cpp new file mode 100644 index 00000000000..6b66e06589d --- /dev/null +++ b/src/mongo/db/query/ce/stats_path_test.cpp @@ -0,0 +1,113 @@ +/** + * Copyright (C) 2022-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/bson/bsonobjbuilder.h" +#include "mongo/db/exec/sbe/values/bson.h" +#include "mongo/db/exec/sbe/values/value.h" +#include "mongo/db/query/ce/stats_gen.h" +#include "mongo/db/query/ce/stats_serialization_utils.h" +#include "mongo/unittest/unittest.h" +#include "mongo/util/assert_util.h" + +namespace mongo { +namespace { + +IDLParserContext ctx("StatsPath"); + + +/** + * Validate round trip convertion for numeric SBE values using NumberDouble datatype. + */ +TEST(StatsPath, BasicValidStatsBucketDouble) { + + auto value = + stats_serialization_utils::TypeValuePair(sbe::value::TypeTags::NumberDouble, double{1}); + auto serializedBucket = stats_serialization_utils::makeStatsBucket(value, 1, 2, 3, 4, 5); + auto parsedBucket = StatsBucket::parse(ctx, serializedBucket); + + // Verify the upperBoundary field conversion. + ASSERT(sbe::bson::convertFrom<1>(parsedBucket.getUpperBoundary().getElement()) == value); + + // roundtrip + auto bucketToBSON = parsedBucket.toBSON(); + ASSERT_BSONOBJ_EQ(serializedBucket, bucketToBSON); +} + +/** + * Validate round trip convertion for heap SBE values using StringBig datatype. + */ +TEST(StatsPath, BasicValidStatsBucketString) { + auto aString = "abcdef"_sd; + auto value = sbe::value::makeBigString(aString); + auto serializedBucket = stats_serialization_utils::makeStatsBucket(value, 1, 2, 3, 4, 5); + auto parsedBucket = StatsBucket::parse(ctx, serializedBucket); + + // Verify the upperBoundary field conversion. + auto rtValue = sbe::bson::convertFrom<1>(parsedBucket.getUpperBoundary().getElement()); + ASSERT(sbe::value::getStringView(rtValue.first, rtValue.second) == aString); + + // roundtrip + auto bucketToBSON = parsedBucket.toBSON(); + ASSERT_BSONOBJ_EQ(serializedBucket, bucketToBSON); + sbe::value::releaseValueDeep(value.first, value.second); +} + +/** + * Validate round trip convertion for StatsPath datatype. + */ +TEST(StatsPath, BasicValidStatsPath) { + + std::list<BSONObj> buckets; + for (long long i = 1; i <= 3; i++) { + auto typeValue = stats_serialization_utils::TypeValuePair( + sbe::value::TypeTags::NumberDouble, double{i + 1.0}); + + auto bucket = stats_serialization_utils::makeStatsBucket(typeValue, i, i, i, i, i); + buckets.push_back(bucket); + } + stats_serialization_utils::TypeCount types; + for (long long i = 1; i <= 3; i++) { + std::stringstream typeName; + typeName << "type" << i; + auto typeElem = std::pair<std::string, long>(typeName.str(), i); + types.push_back(typeElem); + } + auto serializedPath = stats_serialization_utils::makeStatsPath( + "somePath", 100, 10, 0.1, 10, std::make_pair(4LL, 6LL), types, buckets, boost::none); + + auto parsedPath = StatsPath::parse(ctx, serializedPath); + auto pathToBSON = parsedPath.toBSON(); + + ASSERT_BSONOBJ_EQ(serializedPath, pathToBSON); +} + +} // namespace +} // namespace mongo diff --git a/src/mongo/db/query/ce/stats_serialization_utils.cpp b/src/mongo/db/query/ce/stats_serialization_utils.cpp new file mode 100644 index 00000000000..c1e7d4dcef0 --- /dev/null +++ b/src/mongo/db/query/ce/stats_serialization_utils.cpp @@ -0,0 +1,102 @@ +/** + * Copyright (C) 2022-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + + +#include "mongo/db/query/ce/stats_serialization_utils.h" + +#include "mongo/bson/bsonobj.h" +#include "mongo/bson/bsonobjbuilder.h" +#include "mongo/db/exec/sbe/values/bson.h" + +namespace mongo::stats_serialization_utils { + +BSONObj makeStatsBucket(TypeValuePair typeValue, + long long boundaryCount, + long long rangeCount, + long long rangeDistincts, + long long cumulativeCount, + long long cumulativeDistincts) { + BSONObjBuilder bucketBuilder; + sbe::bson::appendValueToBsonObj( + bucketBuilder, "upperBoundary", typeValue.first, typeValue.second); + bucketBuilder.append("boundaryCount", boundaryCount); + bucketBuilder.append("rangeCount", rangeCount); + bucketBuilder.append("rangeDistincts", rangeDistincts); + bucketBuilder.append("cumulativeCount", cumulativeCount); + bucketBuilder.append("cumulativeDistincts", cumulativeDistincts); + return bucketBuilder.obj(); +} + +BSONObj makeStatsPath(StringData path, + long long documents, + boost::optional<long long> documentsSampled, + boost::optional<double> samplingRate, + boost::optional<long long> samplesRequested, + boost::optional<std::pair<long long, long long>> boolCount, + boost::optional<TypeCount> typeCount, + boost::optional<std::list<BSONObj>> scalarHistogram, + boost::optional<BSONObj> arrayHistogram) { + BSONObjBuilder statsBuilder; + statsBuilder.append("path", path); + statsBuilder.append("documents", documents); + if (documentsSampled) { + statsBuilder.append("documentsSampled", *documentsSampled); + } + if (samplingRate) { + statsBuilder.append("samplingRate", *samplingRate); + } + if (samplesRequested) { + statsBuilder.append("samplesRequested", *samplesRequested); + } + if (boolCount) { + BSONObjBuilder boolCountBuilder = statsBuilder.subobjStart("boolCount"); + boolCountBuilder.append("trueCount", boolCount->first); + boolCountBuilder.append("falseCount", boolCount->second); + boolCountBuilder.done(); + } + if (typeCount) { + BSONArrayBuilder typeCountBuilder = statsBuilder.subarrayStart("typeCount"); + for (const auto& typeElem : *typeCount) { + auto typeElemObj = BSON("typeName" << typeElem.first << "count" << typeElem.second); + typeCountBuilder.append(typeElemObj); + } + typeCountBuilder.done(); + } + if (scalarHistogram) { + BSONArrayBuilder histArrBuilder = statsBuilder.subarrayStart("scalarHistogram"); + histArrBuilder.append(*scalarHistogram); + histArrBuilder.done(); + } + if (arrayHistogram) { + statsBuilder.append("arrayHistogram", *arrayHistogram); + } + return statsBuilder.obj(); +} + +} // namespace mongo::stats_serialization_utils diff --git a/src/mongo/db/query/ce/stats_serialization_utils.h b/src/mongo/db/query/ce/stats_serialization_utils.h new file mode 100644 index 00000000000..84a952e71f2 --- /dev/null +++ b/src/mongo/db/query/ce/stats_serialization_utils.h @@ -0,0 +1,66 @@ +/** + * Copyright (C) 2022-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include <list> +#include <string> + +#include "mongo/base/string_data.h" +#include "mongo/bson/bsonobj.h" +#include "mongo/db/exec/sbe/values/value.h" + +namespace mongo::stats_serialization_utils { + +using TypeCount = std::list<std::pair<std::string, long long>>; +using TypeValuePair = std::pair<sbe::value::TypeTags, sbe::value::Value>; + +/** + * Returns owned BSON Object representing data matching mongo::StatsBucket IDL. + */ +BSONObj makeStatsBucket(TypeValuePair typeValue, + long long boundaryCount, + long long rangeCount, + long long rangeDistincts, + long long cumulativeCount, + long long cumulativeDistincts); +/** + * Returns owned BSON Object representing data matching mongo::StatsPath IDL. + */ +BSONObj makeStatsPath(StringData path, + long long documents, + boost::optional<long long> documentsSampled, + boost::optional<double> samplingRate, + boost::optional<long long> samplesRequested, + boost::optional<std::pair<long long, long long>> boolCount, + boost::optional<TypeCount> typeCount, + boost::optional<std::list<BSONObj>> scalarHistogram, + boost::optional<BSONObj> arrayHistogram); + +} // namespace mongo::stats_serialization_utils |