diff options
7 files changed, 434 insertions, 74 deletions
diff --git a/src/mongo/db/exec/geo_near.cpp b/src/mongo/db/exec/geo_near.cpp index 5a6d6beb9c8..0ce4f9ab856 100644 --- a/src/mongo/db/exec/geo_near.cpp +++ b/src/mongo/db/exec/geo_near.cpp @@ -57,8 +57,6 @@ namespace mongo { using std::abs; using std::unique_ptr; -namespace dps = ::mongo::dotted_path_support; - // // Shared GeoNear search functionality // @@ -67,77 +65,6 @@ static const double kCircOfEarthInMeters = 2 * M_PI * kRadiusOfEarthInMeters; static const double kMaxEarthDistanceInMeters = kCircOfEarthInMeters / 2; static const double kMetersPerDegreeAtEquator = kCircOfEarthInMeters / 360; -namespace { - -/** - * Structure that holds BSON addresses (BSONElements) and the corresponding geometry parsed - * at those locations. - * Used to separate the parsing of geometries from a BSONObj (which must stay in scope) from - * the computation over those geometries. - * TODO: Merge with 2D/2DSphere key extraction? - */ -struct StoredGeometry { - static StoredGeometry* parseFrom(const BSONElement& element) { - if (!element.isABSONObj()) - return nullptr; - - unique_ptr<StoredGeometry> stored(new StoredGeometry); - - // GeoNear stage can only be run with an existing index - // Therefore, it is always safe to skip geometry validation - if (!stored->geometry.parseFromStorage(element, true).isOK()) - return nullptr; - stored->element = element; - return stored.release(); - } - - BSONElement element; - GeometryContainer geometry; -}; -} // namespace - -/** - * Find and parse all geometry elements on the appropriate field path from the document. - */ -static void extractGeometries(const BSONObj& doc, - const string& path, - std::vector<std::unique_ptr<StoredGeometry>>* geometries) { - BSONElementSet geomElements; - // NOTE: Annoyingly, we cannot just expand arrays b/c single 2d points are arrays, we need - // to manually expand all results to check if they are geometries - dps::extractAllElementsAlongPath(doc, path, geomElements, false /* expand arrays */); - - for (BSONElementSet::iterator it = geomElements.begin(); it != geomElements.end(); ++it) { - const BSONElement& el = *it; - unique_ptr<StoredGeometry> stored(StoredGeometry::parseFrom(el)); - - if (stored.get()) { - // Valid geometry element - geometries->push_back(std::move(stored)); - } else if (el.type() == Array) { - // Many geometries may be in an array - BSONObjIterator arrIt(el.Obj()); - while (arrIt.more()) { - const BSONElement nextEl = arrIt.next(); - stored.reset(StoredGeometry::parseFrom(nextEl)); - - if (stored.get()) { - // Valid geometry element - geometries->push_back(std::move(stored)); - } else { - LOGV2_WARNING(23760, - "geoNear stage read non-geometry element in array", - "nextElement"_attr = redact(nextEl), - "element"_attr = redact(el)); - } - } - } else { - LOGV2_WARNING( - 23761, "geoNear stage read non-geometry element", "element"_attr = redact(el)); - } - } -} - static double computeGeoNearDistance(const GeoNearParams& nearParams, WorkingSetMember* member) { // // Generic GeoNear distance computation @@ -152,7 +79,8 @@ static double computeGeoNearDistance(const GeoNearParams& nearParams, WorkingSet // Extract all the geometries out of this document for the near query std::vector<std::unique_ptr<StoredGeometry>> geometries; - extractGeometries(member->doc.value().toBson(), nearParams.nearQuery->field, &geometries); + StoredGeometry::extractGeometries( + member->doc.value().toBson(), nearParams.nearQuery->field, &geometries, true); // Compute the minimum distance of all the geometries in the document double minDistance = -1; diff --git a/src/mongo/db/geo/geometry_container.cpp b/src/mongo/db/geo/geometry_container.cpp index 8eaa488df90..e4f18636b14 100644 --- a/src/mongo/db/geo/geometry_container.cpp +++ b/src/mongo/db/geo/geometry_container.cpp @@ -27,10 +27,13 @@ * it in the license file. */ +#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery + #include "mongo/db/geo/geometry_container.h" #include "mongo/db/geo/geoconstants.h" #include "mongo/db/geo/geoparser.h" +#include "mongo/logv2/log.h" #include "mongo/util/str.h" #include "mongo/util/transitional_tools_do_not_use/vector_spooling.h" @@ -1331,4 +1334,62 @@ const CapWithCRS* GeometryContainer::getCapGeometryHack() const { return _cap.get(); } +StoredGeometry* StoredGeometry::parseFrom(const BSONElement& element, bool skipValidation) { + if (!element.isABSONObj()) + return nullptr; + + std::unique_ptr<StoredGeometry> stored(new StoredGeometry); + + // GeoNear stage can only be run with an existing index + // Therefore, it is always safe to skip geometry validation + if (!stored->geometry.parseFromStorage(element, skipValidation).isOK()) + return nullptr; + stored->element = element; + return stored.release(); +} + +/** + * Find and parse all geometry elements on the appropriate field path from the document. + */ +void StoredGeometry::extractGeometries(const BSONObj& doc, + const string& path, + std::vector<std::unique_ptr<StoredGeometry>>* geometries, + bool skipValidation) { + BSONElementSet geomElements; + // NOTE: Annoyingly, we cannot just expand arrays b/c single 2d points are arrays, we need + // to manually expand all results to check if they are geometries + ::mongo::dotted_path_support::extractAllElementsAlongPath( + doc, path, geomElements, false /* expand arrays */); + + for (BSONElementSet::iterator it = geomElements.begin(); it != geomElements.end(); ++it) { + const BSONElement& el = *it; + std::unique_ptr<StoredGeometry> stored(StoredGeometry::parseFrom(el, skipValidation)); + + if (stored.get()) { + // Valid geometry element + geometries->push_back(std::move(stored)); + } else if (el.type() == Array) { + // Many geometries may be in an array + BSONObjIterator arrIt(el.Obj()); + while (arrIt.more()) { + const BSONElement nextEl = arrIt.next(); + stored.reset(StoredGeometry::parseFrom(nextEl, skipValidation)); + + if (stored.get()) { + // Valid geometry element + geometries->push_back(std::move(stored)); + } else { + LOGV2_WARNING(23760, + "geoNear stage read non-geometry element in array", + "nextElement"_attr = redact(nextEl), + "element"_attr = redact(el)); + } + } + } else { + LOGV2_WARNING( + 23761, "geoNear stage read non-geometry element", "element"_attr = redact(el)); + } + } +} + } // namespace mongo diff --git a/src/mongo/db/geo/geometry_container.h b/src/mongo/db/geo/geometry_container.h index 74b32491532..aa5bc3cf81e 100644 --- a/src/mongo/db/geo/geometry_container.h +++ b/src/mongo/db/geo/geometry_container.h @@ -31,6 +31,7 @@ #include <string> +#include "mongo/db/bson/dotted_path_support.h" #include "mongo/db/geo/shapes.h" #include "third_party/s2/s2regionunion.h" @@ -174,4 +175,24 @@ private: BSONElement _geoElm; }; +/** + * Structure that holds BSON addresses (BSONElements) and the corresponding geometry parsed + * at those locations. + * Used to separate the parsing of geometries from a BSONObj (which must stay in scope) from + * the computation over those geometries. + * TODO: Merge with 2D/2DSphere key extraction? + */ +class StoredGeometry { +public: + static StoredGeometry* parseFrom(const BSONElement& element, bool skipValidation); + + static void extractGeometries(const BSONObj& doc, + const string& path, + std::vector<std::unique_ptr<StoredGeometry>>* geometries, + bool skipValidation); + + BSONElement element; + GeometryContainer geometry; +}; + } // namespace mongo diff --git a/src/mongo/db/pipeline/SConscript b/src/mongo/db/pipeline/SConscript index 3e5430216ab..03dd31b2279 100644 --- a/src/mongo/db/pipeline/SConscript +++ b/src/mongo/db/pipeline/SConscript @@ -249,6 +249,7 @@ pipelineEnv.Library( 'document_source_graph_lookup.cpp', 'document_source_group.cpp', 'document_source_index_stats.cpp', + 'document_source_internal_compute_geo_near_distance.cpp', 'document_source_internal_inhibit_optimization.cpp', 'document_source_internal_shard_filter.cpp', 'document_source_internal_split_pipeline.cpp', @@ -455,6 +456,7 @@ env.CppUnitTest( 'document_source_sort_by_count_test.cpp', 'document_source_sort_test.cpp', 'document_source_union_with_test.cpp', + 'document_source_internal_compute_geo_near_distance_test.cpp', 'document_source_internal_unpack_bucket_test/extract_or_build_project_to_internalize_test.cpp', 'document_source_internal_unpack_bucket_test/create_predicates_on_bucket_level_field_test.cpp', 'document_source_internal_unpack_bucket_test/extract_project_for_pushdown_test.cpp', diff --git a/src/mongo/db/pipeline/document_source_internal_compute_geo_near_distance.cpp b/src/mongo/db/pipeline/document_source_internal_compute_geo_near_distance.cpp new file mode 100644 index 00000000000..7ef8f05b318 --- /dev/null +++ b/src/mongo/db/pipeline/document_source_internal_compute_geo_near_distance.cpp @@ -0,0 +1,126 @@ +/** + * Copyright (C) 2021-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery + +#include "mongo/platform/basic.h" + +#include "mongo/db/exec/document_value/document.h" +#include "mongo/db/geo/geoparser.h" +#include "mongo/db/pipeline/document_source_internal_compute_geo_near_distance.h" + +namespace mongo { + +REGISTER_DOCUMENT_SOURCE(_internalComputeGeoNearDistance, + LiteParsedDocumentSourceDefault::parse, + DocumentSourceInternalGeoNearDistance::createFromBson, + AllowedWithApiStrict::kInternal); + +boost::intrusive_ptr<DocumentSource> DocumentSourceInternalGeoNearDistance::createFromBson( + BSONElement elem, const boost::intrusive_ptr<ExpressionContext>& pExpCtx) { + auto obj = elem.embeddedObjectUserCheck(); + tassert(5874500, + str::stream() << DocumentSourceInternalGeoNearDistance::kKeyFieldName + << " field is required and must be a string", + obj.hasField(DocumentSourceInternalGeoNearDistance::kKeyFieldName) && + obj[DocumentSourceInternalGeoNearDistance::kKeyFieldName].type() == + BSONType::String); + tassert(5874501, + str::stream() << DocumentSourceInternalGeoNearDistance::kNearFieldName + << " field is required and must be an object or array", + obj.hasField(DocumentSourceInternalGeoNearDistance::kNearFieldName) && + obj[DocumentSourceInternalGeoNearDistance::kNearFieldName].isABSONObj()); + + auto nearElm = obj[DocumentSourceInternalGeoNearDistance::kNearFieldName]; + auto centroid = std::make_unique<PointWithCRS>(); + uassertStatusOK(GeoParser::parseQueryPoint(nearElm, centroid.get())); + + boost::intrusive_ptr<DocumentSourceInternalGeoNearDistance> out = + new DocumentSourceInternalGeoNearDistance( + pExpCtx, + obj[DocumentSourceInternalGeoNearDistance::kKeyFieldName].String(), + std::move(centroid), + nearElm.embeddedObject().getOwned()); + + return out; +} + +DocumentSourceInternalGeoNearDistance::DocumentSourceInternalGeoNearDistance( + const boost::intrusive_ptr<ExpressionContext>& pExpCtx, + const std::string& key, + std::unique_ptr<PointWithCRS> centroid, + const BSONObj& coords) + : DocumentSource(kStageName, pExpCtx), + _key(key), + _centroid(std::move(centroid)), + _coords(coords) {} + +DocumentSource::GetNextResult DocumentSourceInternalGeoNearDistance::doGetNext() { + auto next = pSource->getNext(); + + if (next.isAdvanced()) { + // Extract all the geometries out of this document for the near query + std::vector<std::unique_ptr<StoredGeometry>> geometries; + StoredGeometry::extractGeometries(next.getDocument().toBson(), _key, &geometries, false); + + // Compute the minimum distance of all the geometries in the document + double minDistance = -1; + for (auto it = geometries.begin(); it != geometries.end(); ++it) { + StoredGeometry& stored = **it; + + if (!stored.geometry.supportsProject(_centroid->crs)) + continue; + stored.geometry.projectInto(_centroid->crs); + + double nextDistance = stored.geometry.minDistance(*_centroid); + + if (minDistance < 0 || nextDistance < minDistance) { + minDistance = nextDistance; + } + } + + MutableDocument doc(next.releaseDocument()); + doc.metadata().setGeoNearDistance(minDistance); + + return doc.freeze(); + } + + return next; +} + +Value DocumentSourceInternalGeoNearDistance::serialize( + boost::optional<ExplainOptions::Verbosity> explain) const { + MutableDocument out; + out.setField(DocumentSourceInternalGeoNearDistance::kNearFieldName, Value(_coords)); + out.setField(DocumentSourceInternalGeoNearDistance::kKeyFieldName, Value(_key)); + + return Value(DOC(getSourceName() << out.freeze())); +} + +} // namespace mongo diff --git a/src/mongo/db/pipeline/document_source_internal_compute_geo_near_distance.h b/src/mongo/db/pipeline/document_source_internal_compute_geo_near_distance.h new file mode 100644 index 00000000000..fe0eafdc7ee --- /dev/null +++ b/src/mongo/db/pipeline/document_source_internal_compute_geo_near_distance.h @@ -0,0 +1,85 @@ +/** + * Copyright (C) 2021-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/db/geo/geometry_container.h" +#include "mongo/db/pipeline/document_source.h" + +namespace mongo { + +/** + * This is an internal stage that computes the distance between the given centroid and the value of + * '_field' of the input Document. + */ +class DocumentSourceInternalGeoNearDistance final : public DocumentSource { +public: + static constexpr StringData kStageName = "$_internalComputeGeoNearDistance"_sd; + static constexpr StringData kNearFieldName = "near"_sd; + static constexpr StringData kKeyFieldName = "key"_sd; + + static boost::intrusive_ptr<DocumentSource> createFromBson( + BSONElement elem, const boost::intrusive_ptr<ExpressionContext>& pExpCtx); + + DocumentSourceInternalGeoNearDistance(const boost::intrusive_ptr<ExpressionContext>& pExpCtx, + const std::string& key, + std::unique_ptr<PointWithCRS> centroid, + const BSONObj& coords); + + const char* getSourceName() const override { + return kStageName.rawData(); + } + + StageConstraints constraints(Pipeline::SplitState pipeState) const override { + return StageConstraints(StreamType::kStreaming, + PositionRequirement::kNone, + HostTypeRequirement::kNone, + DiskUseRequirement::kNoDiskUse, + FacetRequirement::kAllowed, + TransactionRequirement::kAllowed, + LookupRequirement::kAllowed, + UnionRequirement::kAllowed); + } + + boost::optional<DistributedPlanLogic> distributedPlanLogic() override { + return boost::none; + } + +private: + Value serialize( + boost::optional<ExplainOptions::Verbosity> explain = boost::none) const override; + + GetNextResult doGetNext() override; + + std::string _key; + std::unique_ptr<PointWithCRS> _centroid; + BSONObj _coords; // "near" option +}; + +} // namespace mongo diff --git a/src/mongo/db/pipeline/document_source_internal_compute_geo_near_distance_test.cpp b/src/mongo/db/pipeline/document_source_internal_compute_geo_near_distance_test.cpp new file mode 100644 index 00000000000..10bc7f7023d --- /dev/null +++ b/src/mongo/db/pipeline/document_source_internal_compute_geo_near_distance_test.cpp @@ -0,0 +1,137 @@ +/** + * Copyright (C) 2021-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/bson/bsonmisc.h" +#include "mongo/bson/bsonobj.h" +#include "mongo/bson/bsonobjbuilder.h" +#include "mongo/db/pipeline/aggregation_context_fixture.h" +#include "mongo/db/pipeline/document_source_internal_compute_geo_near_distance.h" +#include "mongo/db/pipeline/document_source_mock.h" +#include "mongo/unittest/unittest.h" + +namespace mongo { +namespace { + +using DocumentSourceInternalGeoNearDistanceTest = AggregationContextFixture; + +TEST_F(DocumentSourceInternalGeoNearDistanceTest, DistanceBetweenOverlappingPoints) { + BSONObj computeGeoSpec = fromjson(R"( + { $_internalComputeGeoNearDistance: { + near: { + type: "Point", + coordinates: [1, 1] + }, + key: "loc" + }})"); + auto geoDist = DocumentSourceInternalGeoNearDistance::createFromBson( + computeGeoSpec.firstElement(), getExpCtx()); + + auto mock = + DocumentSourceMock::createForTest(DOC("loc" << DOC("type" + << "Point"_sd + << "coordinates" << DOC_ARRAY(1 << 1))), + getExpCtx()); + + geoDist->setSource(mock.get()); + auto next = geoDist->getNext(); + ASSERT_TRUE(next.isAdvanced()); + auto doc = next.getDocument(); + ASSERT_EQUALS(doc.metadata().getGeoNearDistance(), 0); +} + +TEST_F(DocumentSourceInternalGeoNearDistanceTest, SphericalDistanceBetweenTwoPoints) { + BSONObj computeGeoSpec = fromjson(R"( + { $_internalComputeGeoNearDistance: { + near: { + type: "Point", + coordinates: [0, 1] + }, + key: "loc" + }})"); + auto geoDist = DocumentSourceInternalGeoNearDistance::createFromBson( + computeGeoSpec.firstElement(), getExpCtx()); + + auto mock = + DocumentSourceMock::createForTest(DOC("loc" << DOC("type" + << "Point"_sd + << "coordinates" << DOC_ARRAY(0 << 0))), + getExpCtx()); + + geoDist->setSource(mock.get()); + auto next = geoDist->getNext(); + ASSERT_TRUE(next.isAdvanced()); + auto doc = next.getDocument(); + const int meterToLatDegree = 111319; // Each degree of latitude is approximately 111km. + ASSERT_APPROX_EQUAL(doc.metadata().getGeoNearDistance(), meterToLatDegree, 300); +} + +TEST_F(DocumentSourceInternalGeoNearDistanceTest, DistanceBetweenTwoLegacyPoints) { + BSONObj computeGeoSpec = fromjson(R"( + { $_internalComputeGeoNearDistance: { + near: [1, 1], + key: "loc" + }})"); + auto geoDist = DocumentSourceInternalGeoNearDistance::createFromBson( + computeGeoSpec.firstElement(), getExpCtx()); + + auto mock = DocumentSourceMock::createForTest(DOC("loc" << DOC_ARRAY(0 << 0)), getExpCtx()); + + geoDist->setSource(mock.get()); + auto next = geoDist->getNext(); + ASSERT_TRUE(next.isAdvanced()); + auto doc = next.getDocument(); + ASSERT_APPROX_EQUAL(doc.metadata().getGeoNearDistance(), 1.41421, 0.01); +} + +TEST_F(DocumentSourceInternalGeoNearDistanceTest, DistanceBetweenTwoMixedPointsSphereAndFlat) { + BSONObj computeGeoSpec = fromjson(R"( + { $_internalComputeGeoNearDistance: { + near: { + type: "Point", + coordinates: [0, 1] + }, + key: "loc" + }})"); + auto geoDist = DocumentSourceInternalGeoNearDistance::createFromBson( + computeGeoSpec.firstElement(), getExpCtx()); + + auto mock = DocumentSourceMock::createForTest(DOC("loc" << DOC_ARRAY(0 << 0)), getExpCtx()); + + geoDist->setSource(mock.get()); + auto next = geoDist->getNext(); + ASSERT_TRUE(next.isAdvanced()); + auto doc = next.getDocument(); + const int meterToLatDegree = 111319; // Each degree of latitude is approximately 111km. + ASSERT_APPROX_EQUAL(doc.metadata().getGeoNearDistance(), meterToLatDegree, 300); +} + +} // namespace +} // namespace mongo |