diff options
author | Daniel Gómez Ferro <daniel.gomezferro@mongodb.com> | 2022-02-07 15:29:29 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-02-07 18:50:45 +0000 |
commit | 7a2d86c376488cc756c0325e6edaf3406a86ec5d (patch) | |
tree | 660a74392777be30a2d629bde1951b21c3a61883 /src/mongo/db | |
parent | 59e19dc9bff797fc1cff45d56942e5e61da36f1a (diff) | |
download | mongo-7a2d86c376488cc756c0325e6edaf3406a86ec5d.tar.gz |
SERVER-61939 Tighter bounds for clustered collection scans
Diffstat (limited to 'src/mongo/db')
21 files changed, 233 insertions, 118 deletions
diff --git a/src/mongo/db/dbhelpers.cpp b/src/mongo/db/dbhelpers.cpp index e36026fca0a..1933a6907ef 100644 --- a/src/mongo/db/dbhelpers.cpp +++ b/src/mongo/db/dbhelpers.cpp @@ -46,6 +46,7 @@ #include "mongo/db/ops/update.h" #include "mongo/db/ops/update_request.h" #include "mongo/db/query/get_executor.h" +#include "mongo/db/query/index_bounds_builder.h" #include "mongo/db/query/internal_plans.h" #include "mongo/db/query/query_planner.h" #include "mongo/db/record_id_helpers.h" @@ -174,7 +175,7 @@ bool Helpers::findById(OperationContext* opCtx, if (collection->isClustered()) { Snapshotted<BSONObj> doc; if (collection->findDoc(opCtx, - RecordId(record_id_helpers::keyForElem( + record_id_helpers::keyForObj(IndexBoundsBuilder::objFromElement( query["_id"], collection->getDefaultCollator())), &doc)) { result = std::move(doc.value()); @@ -200,8 +201,8 @@ RecordId Helpers::findById(OperationContext* opCtx, if (!desc && clustered_util::isClusteredOnId(collection->getClusteredInfo())) { // There is no explicit IndexDescriptor for _id on a collection clustered by _id. However, // the RecordId can be constructed directly from the input. - return RecordId( - record_id_helpers::keyForElem(idquery["_id"], collection->getDefaultCollator())); + return record_id_helpers::keyForObj( + IndexBoundsBuilder::objFromElement(idquery["_id"], collection->getDefaultCollator())); } uassert(13430, "no _id index", desc); diff --git a/src/mongo/db/exec/collection_scan.cpp b/src/mongo/db/exec/collection_scan.cpp index cb8e776eaec..09ea30bbf15 100644 --- a/src/mongo/db/exec/collection_scan.cpp +++ b/src/mongo/db/exec/collection_scan.cpp @@ -42,6 +42,7 @@ #include "mongo/db/exec/scoped_timer.h" #include "mongo/db/exec/working_set.h" #include "mongo/db/exec/working_set_common.h" +#include "mongo/db/record_id_helpers.h" #include "mongo/db/repl/optime.h" #include "mongo/logv2/log.h" #include "mongo/util/fail_point.h" @@ -202,13 +203,13 @@ PlanStage::StageState CollectionScan::doWork(WorkingSetID* out) { if (_lastSeenId.isNull() && _params.direction == CollectionScanParams::FORWARD && _params.minRecord) { // Seek to the approximate start location. - record = _cursor->seekNear(*_params.minRecord); + record = _cursor->seekNear(_params.minRecord->recordId()); } if (_lastSeenId.isNull() && _params.direction == CollectionScanParams::BACKWARD && _params.maxRecord) { // Seek to the approximate start location (at the end). - record = _cursor->seekNear(*_params.maxRecord); + record = _cursor->seekNear(_params.maxRecord->recordId()); } if (!record) { @@ -304,7 +305,7 @@ bool pastEndOfRange(const CollectionScanParams& params, const WorkingSetMember& return false; } - auto endRecord = *params.maxRecord; + auto endRecord = params.maxRecord->recordId(); return member.recordId > endRecord || (member.recordId == endRecord && !shouldIncludeEndRecord(params)); } else { @@ -312,7 +313,7 @@ bool pastEndOfRange(const CollectionScanParams& params, const WorkingSetMember& if (!params.minRecord) { return false; } - auto endRecord = *params.minRecord; + auto endRecord = params.minRecord->recordId(); return member.recordId < endRecord || (member.recordId == endRecord && !shouldIncludeEndRecord(params)); @@ -326,7 +327,7 @@ bool beforeStartOfRange(const CollectionScanParams& params, const WorkingSetMemb return false; } - auto startRecord = *params.minRecord; + auto startRecord = params.minRecord->recordId(); return member.recordId < startRecord || (member.recordId == startRecord && !shouldIncludeStartRecord(params)); } else { @@ -334,7 +335,7 @@ bool beforeStartOfRange(const CollectionScanParams& params, const WorkingSetMemb if (!params.maxRecord) { return false; } - auto startRecord = *params.maxRecord; + auto startRecord = params.maxRecord->recordId(); return member.recordId > startRecord || (member.recordId == startRecord && !shouldIncludeStartRecord(params)); } diff --git a/src/mongo/db/exec/collection_scan_common.h b/src/mongo/db/exec/collection_scan_common.h index b1ece351bdc..ba5559a4491 100644 --- a/src/mongo/db/exec/collection_scan_common.h +++ b/src/mongo/db/exec/collection_scan_common.h @@ -30,6 +30,7 @@ #pragma once #include "mongo/bson/timestamp.h" +#include "mongo/db/query/record_id_bound.h" #include "mongo/db/record_id.h" namespace mongo { @@ -54,7 +55,7 @@ struct CollectionScanParams { // be used for scans on clustered collections and forward oplog scans. If exclusive // bounds are required, a MatchExpression must be passed to the CollectionScan stage. This field // cannot be used in conjunction with 'resumeAfterRecordId' - boost::optional<RecordId> minRecord; + boost::optional<RecordIdBound> minRecord; // If present, this parameter sets the start point of a reverse scan or the end point of a // forward scan. A forward scan will stop and return EOF on the first document with a RecordId @@ -63,7 +64,7 @@ struct CollectionScanParams { // only be used for scans on clustered collections and forward oplog scans. If exclusive // bounds are required, a MatchExpression must be passed to the CollectionScan stage. This field // cannot be used in conjunction with 'resumeAfterRecordId'. - boost::optional<RecordId> maxRecord; + boost::optional<RecordIdBound> maxRecord; // If true, the collection scan will return a token that can be used to resume the scan. bool requestResumeToken = false; diff --git a/src/mongo/db/exec/plan_stats.h b/src/mongo/db/exec/plan_stats.h index 055505371b9..36014955c0d 100644 --- a/src/mongo/db/exec/plan_stats.h +++ b/src/mongo/db/exec/plan_stats.h @@ -38,6 +38,7 @@ #include "mongo/db/index/multikey_paths.h" #include "mongo/db/jsobj.h" #include "mongo/db/query/plan_summary_stats.h" +#include "mongo/db/query/record_id_bound.h" #include "mongo/db/query/stage_types.h" #include "mongo/db/record_id.h" #include "mongo/util/container_size_helper.h" @@ -297,10 +298,10 @@ struct CollectionScanStats : public SpecificStats { bool tailable{false}; // The start location of a forward scan and end location for a reverse scan. - boost::optional<RecordId> minRecord; + boost::optional<RecordIdBound> minRecord; // The end location of a reverse scan and start location for a forward scan. - boost::optional<RecordId> maxRecord; + boost::optional<RecordIdBound> maxRecord; }; struct CountStats : public SpecificStats { diff --git a/src/mongo/db/pipeline/change_stream_expired_pre_image_remover.cpp b/src/mongo/db/pipeline/change_stream_expired_pre_image_remover.cpp index 8e8c1193904..614992ea99b 100644 --- a/src/mongo/db/pipeline/change_stream_expired_pre_image_remover.cpp +++ b/src/mongo/db/pipeline/change_stream_expired_pre_image_remover.cpp @@ -49,7 +49,7 @@ namespace mongo { namespace { RecordId toRecordId(ChangeStreamPreImageId id) { return record_id_helpers::keyForElem( - BSON(ChangeStreamPreImage::kIdFieldName << id.toBSON()).firstElement(), nullptr); + BSON(ChangeStreamPreImage::kIdFieldName << id.toBSON()).firstElement()); } /** @@ -153,10 +153,10 @@ public: while (true) { // Fetch the first pre-image from the next collection, that has pre-images enabled. auto planExecutor = _previousCollectionUUID - ? createCollectionScan( + ? createCollectionScan(RecordIdBound( toRecordId(ChangeStreamPreImageId(*_previousCollectionUUID, Timestamp::max(), - std::numeric_limits<int64_t>::max()))) + std::numeric_limits<int64_t>::max())))) : createCollectionScan(boost::none); auto preImageAttributes = getNextPreImageAttributes(planExecutor); @@ -185,10 +185,10 @@ public: // '_earliestOplogEntryTimestamp', as the pre-images with smaller or equal // timestamp are guaranteed to be expired. Timestamp lastExpiredPreimageTs(_earliestOplogEntryTimestamp.asULL() - 1); - auto planExecutor = createCollectionScan( + auto planExecutor = createCollectionScan(RecordIdBound( toRecordId(ChangeStreamPreImageId(currentCollectionUUID, lastExpiredPreimageTs, - std::numeric_limits<int64_t>::max()))); + std::numeric_limits<int64_t>::max())))); // Iterate over all the expired pre-images in the collection in order to find // the max RecordId. @@ -233,9 +233,10 @@ public: preImageAttributes.operationTime <= expirationTime; } + // Set up the new collection scan to start from the 'minKey'. std::unique_ptr<PlanExecutor, PlanExecutor::Deleter> createCollectionScan( - boost::optional<RecordId> minKey) const { + boost::optional<RecordIdBound> minKey) const { return InternalPlanner::collectionScan(_opCtx, _preImagesCollPtr, PlanYieldPolicy::YieldPolicy::INTERRUPT_ONLY, @@ -315,6 +316,7 @@ void deleteExpiredChangeStreamPreImages(Client* client) { // TODO SERVER-58693: pass expiration duration parameter to the iterator. ChangeStreamExpiredPreImageIterator expiredPreImages( opCtx.get(), &preImagesColl, currentEarliestOplogEntryTs); + for (const auto& collectionRange : expiredPreImages) { writeConflictRetry(opCtx.get(), "ChangeStreamExpiredPreImagesRemover", @@ -329,8 +331,8 @@ void deleteExpiredChangeStreamPreImages(Client* client) { std::move(params), PlanYieldPolicy::YieldPolicy::YIELD_AUTO, InternalPlanner::Direction::FORWARD, - collectionRange.first, - collectionRange.second); + RecordIdBound(collectionRange.first), + RecordIdBound(collectionRange.second)); numberOfRemovals += exec->executeDelete(); }); } diff --git a/src/mongo/db/pipeline/document_source_check_resume_token_test.cpp b/src/mongo/db/pipeline/document_source_check_resume_token_test.cpp index 67f8007fe2d..7c025a92766 100644 --- a/src/mongo/db/pipeline/document_source_check_resume_token_test.cpp +++ b/src/mongo/db/pipeline/document_source_check_resume_token_test.cpp @@ -170,7 +170,7 @@ public: _filter = MatchExpressionParser::parseAndNormalize(_filterExpr, pExpCtx); _params.assertTsHasNotFallenOffOplog = Timestamp(0); _params.shouldTrackLatestOplogTimestamp = true; - _params.minRecord = RecordId(0); + _params.minRecord = RecordIdBound(RecordId(0)); _params.tailable = true; } @@ -178,7 +178,7 @@ public: invariant(!_collScan); _filterExpr = BSON("ns" << kTestNs << "ts" << BSON("$gte" << resumeToken.clusterTime)); _filter = MatchExpressionParser::parseAndNormalize(_filterExpr, pExpCtx); - _params.minRecord = RecordId(resumeToken.clusterTime.asLL()); + _params.minRecord = RecordIdBound(RecordId(resumeToken.clusterTime.asLL())); _params.assertTsHasNotFallenOffOplog = resumeToken.clusterTime; } diff --git a/src/mongo/db/query/internal_plans.cpp b/src/mongo/db/query/internal_plans.cpp index 0f45d3b4649..425d2857eb5 100644 --- a/src/mongo/db/query/internal_plans.cpp +++ b/src/mongo/db/query/internal_plans.cpp @@ -79,12 +79,12 @@ CollectionScanParams convertIndexScanParamsToCollScanParams( clustered_util::matchesClusterKey(keyPattern, collection->getClusteredInfo())); invariant(collection->getDefaultCollator() == nullptr); - boost::optional<RecordId> startRecord, endRecord; + boost::optional<RecordIdBound> startRecord, endRecord; if (!startKey.isEmpty()) { - startRecord = RecordId(record_id_helpers::keyForElem(startKey.firstElement(), nullptr)); + startRecord = RecordIdBound(record_id_helpers::keyForElem(startKey.firstElement())); } if (!endKey.isEmpty()) { - endRecord = RecordId(record_id_helpers::keyForElem(endKey.firstElement(), nullptr)); + endRecord = RecordIdBound(record_id_helpers::keyForElem(endKey.firstElement())); } // For a forward scan, the startKey is the minRecord. For a backward scan, it is the maxRecord. @@ -93,7 +93,7 @@ CollectionScanParams convertIndexScanParamsToCollScanParams( if (minRecord && maxRecord) { // Regardless of direction, the minRecord should always be less than the maxRecord - dassert(minRecord < maxRecord, + dassert(minRecord->recordId() < maxRecord->recordId(), str::stream() << "Expected the minRecord " << minRecord << " to be less than the maxRecord " << maxRecord << " on a bounded collection scan. Original startKey and endKey for " @@ -105,6 +105,7 @@ CollectionScanParams convertIndexScanParamsToCollScanParams( CollectionScanParams params; params.minRecord = minRecord; params.maxRecord = maxRecord; + if (InternalPlanner::FORWARD == direction) { params.direction = CollectionScanParams::FORWARD; } else { @@ -120,8 +121,8 @@ CollectionScanParams createCollectionScanParams( const CollectionPtr* coll, InternalPlanner::Direction direction, boost::optional<RecordId> resumeAfterRecordId, - boost::optional<RecordId> minRecord, - boost::optional<RecordId> maxRecord) { + boost::optional<RecordIdBound> minRecord, + boost::optional<RecordIdBound> maxRecord) { const auto& collection = *coll; invariant(collection); @@ -146,8 +147,8 @@ std::unique_ptr<PlanExecutor, PlanExecutor::Deleter> InternalPlanner::collection PlanYieldPolicy::YieldPolicy yieldPolicy, const Direction direction, boost::optional<RecordId> resumeAfterRecordId, - boost::optional<RecordId> minRecord, - boost::optional<RecordId> maxRecord) { + boost::optional<RecordIdBound> minRecord, + boost::optional<RecordIdBound> maxRecord) { const auto& collection = *coll; invariant(collection); @@ -205,8 +206,8 @@ std::unique_ptr<PlanExecutor, PlanExecutor::Deleter> InternalPlanner::deleteWith std::unique_ptr<DeleteStageParams> params, PlanYieldPolicy::YieldPolicy yieldPolicy, Direction direction, - boost::optional<RecordId> minRecord, - boost::optional<RecordId> maxRecord) { + boost::optional<RecordIdBound> minRecord, + boost::optional<RecordIdBound> maxRecord) { const auto& collection = *coll; invariant(collection); auto ws = std::make_unique<WorkingSet>(); diff --git a/src/mongo/db/query/internal_plans.h b/src/mongo/db/query/internal_plans.h index 387fb8f9d80..781b336526f 100644 --- a/src/mongo/db/query/internal_plans.h +++ b/src/mongo/db/query/internal_plans.h @@ -78,8 +78,8 @@ public: PlanYieldPolicy::YieldPolicy yieldPolicy, Direction direction = FORWARD, boost::optional<RecordId> resumeAfterRecordId = boost::none, - boost::optional<RecordId> minRecord = boost::none, - boost::optional<RecordId> maxRecord = boost::none); + boost::optional<RecordIdBound> minRecord = boost::none, + boost::optional<RecordIdBound> maxRecord = boost::none); static std::unique_ptr<PlanExecutor, PlanExecutor::Deleter> collectionScan( OperationContext* opCtx, @@ -96,8 +96,8 @@ public: std::unique_ptr<DeleteStageParams> deleteStageParams, PlanYieldPolicy::YieldPolicy yieldPolicy, Direction direction = FORWARD, - boost::optional<RecordId> minRecord = boost::none, - boost::optional<RecordId> maxRecord = boost::none); + boost::optional<RecordIdBound> minRecord = boost::none, + boost::optional<RecordIdBound> maxRecord = boost::none); /** * Returns an index scan. Caller owns returned pointer. diff --git a/src/mongo/db/query/plan_explainer_impl.cpp b/src/mongo/db/query/plan_explainer_impl.cpp index 04cd97980e9..1e34cf73ebe 100644 --- a/src/mongo/db/query/plan_explainer_impl.cpp +++ b/src/mongo/db/query/plan_explainer_impl.cpp @@ -257,10 +257,10 @@ void statsToBSON(const PlanStageStats& stats, CollectionScanStats* spec = static_cast<CollectionScanStats*>(stats.specific.get()); bob->append("direction", spec->direction > 0 ? "forward" : "backward"); if (spec->minRecord) { - record_id_helpers::appendToBSONAs(*spec->minRecord, bob, "minRecord"); + spec->minRecord->appendToBSONAs(bob, "minRecord"); } if (spec->maxRecord) { - record_id_helpers::appendToBSONAs(*spec->maxRecord, bob, "maxRecord"); + spec->maxRecord->appendToBSONAs(bob, "maxRecord"); } if (verbosity >= ExplainOptions::Verbosity::kExecStats) { bob->appendNumber("docsExamined", static_cast<long long>(spec->docsTested)); diff --git a/src/mongo/db/query/plan_explainer_sbe.cpp b/src/mongo/db/query/plan_explainer_sbe.cpp index 35c9a2830ac..676994720a7 100644 --- a/src/mongo/db/query/plan_explainer_sbe.cpp +++ b/src/mongo/db/query/plan_explainer_sbe.cpp @@ -73,10 +73,10 @@ void statsToBSON(const QuerySolutionNode* node, auto csn = static_cast<const CollectionScanNode*>(node); bob->append("direction", csn->direction > 0 ? "forward" : "backward"); if (csn->minRecord) { - record_id_helpers::appendToBSONAs(*csn->minRecord, bob, "minRecord"); + csn->minRecord->appendToBSONAs(bob, "minRecord"); } if (csn->maxRecord) { - record_id_helpers::appendToBSONAs(*csn->maxRecord, bob, "maxRecord"); + csn->maxRecord->appendToBSONAs(bob, "maxRecord"); } break; } diff --git a/src/mongo/db/query/planner_access.cpp b/src/mongo/db/query/planner_access.cpp index 99073842737..3c6a8fdc0c0 100644 --- a/src/mongo/db/query/planner_access.cpp +++ b/src/mongo/db/query/planner_access.cpp @@ -228,6 +228,20 @@ bool affectedByCollator(const BSONElement& element) { } } +void setMinRecord(CollectionScanNode* collScan, const BSONObj& min) { + const auto newMinRecord = record_id_helpers::keyForObj(min); + if (!collScan->minRecord || newMinRecord > collScan->minRecord->recordId()) { + collScan->minRecord = RecordIdBound(newMinRecord, min); + } +} + +void setMaxRecord(CollectionScanNode* collScan, const BSONObj& max) { + const auto newMaxRecord = record_id_helpers::keyForObj(max); + if (!collScan->maxRecord || newMaxRecord < collScan->maxRecord->recordId()) { + collScan->maxRecord = RecordIdBound(newMaxRecord, max); + } +} + // Returns whether element is not affected by collators or query and collection collators are // compatible. bool compatibleCollator(const QueryPlannerParams& params, @@ -267,30 +281,12 @@ void handleRIDRangeMinMax(const CanonicalQuery& query, // Assumes clustered collection scans are only supported with the forward direction. collScan->boundInclusion = CollectionScanParams::ScanBoundInclusion::kIncludeStartRecordOnly; - newMaxRecord = record_id_helpers::keyForElem(maxObj.firstElement(), collator); + setMaxRecord(collScan, IndexBoundsBuilder::objFromElement(maxObj.firstElement(), collator)); } if (!minObj.isEmpty() && compatibleCollator(params, collator, minObj.firstElement())) { // The min() is inclusive as are bounded collection scans by default. - newMinRecord = record_id_helpers::keyForElem(minObj.firstElement(), collator); - } - - if (!collScan->minRecord) { - collScan->minRecord = newMinRecord; - } else if (newMinRecord) { - if (*newMinRecord > *collScan->minRecord) { - // The newMinRecord is more restrictive than the existing minRecord. - collScan->minRecord = newMinRecord; - } - } - - if (!collScan->maxRecord) { - collScan->maxRecord = newMaxRecord; - } else if (newMaxRecord) { - if (*newMaxRecord < *collScan->maxRecord) { - // The newMaxRecord is more restrictive than the existing maxRecord. - collScan->maxRecord = newMaxRecord; - } + setMinRecord(collScan, IndexBoundsBuilder::objFromElement(minObj.firstElement(), collator)); } } @@ -329,24 +325,31 @@ void handleRIDRangeScan(const MatchExpression* conjunct, } const auto& element = match->getData(); + + // Set coarse min/max bounds based on type in case we can't set tight bounds. + BSONObjBuilder minb; + minb.appendMinForType("", element.type()); + setMinRecord(collScan, minb.obj()); + + BSONObjBuilder maxb; + maxb.appendMaxForType("", element.type()); + setMaxRecord(collScan, maxb.obj()); + bool compatible = compatibleCollator(params, collator, element); if (!compatible) { return; // Collator affects probe and it's not compatible with collection's collator. } - auto& maxRecord = collScan->maxRecord; - auto& minRecord = collScan->minRecord; + const auto collated = IndexBoundsBuilder::objFromElement(element, collator); if (dynamic_cast<const EqualityMatchExpression*>(match)) { - minRecord = record_id_helpers::keyForElem(element, collator); - maxRecord = minRecord; - } else if (!maxRecord && - (dynamic_cast<const LTMatchExpression*>(match) || - dynamic_cast<const LTEMatchExpression*>(match))) { - maxRecord = record_id_helpers::keyForElem(element, collator); - } else if (!minRecord && - (dynamic_cast<const GTMatchExpression*>(match) || - dynamic_cast<const GTEMatchExpression*>(match))) { - minRecord = record_id_helpers::keyForElem(element, collator); + setMinRecord(collScan, collated); + setMaxRecord(collScan, collated); + } else if (dynamic_cast<const LTMatchExpression*>(match) || + dynamic_cast<const LTEMatchExpression*>(match)) { + setMaxRecord(collScan, collated); + } else if (dynamic_cast<const GTMatchExpression*>(match) || + dynamic_cast<const GTEMatchExpression*>(match)) { + setMinRecord(collScan, collated); } } @@ -398,7 +401,7 @@ std::unique_ptr<QuerySolutionNode> QueryPlannerAccess::makeCollectionScan( if (minTs) { StatusWith<RecordId> goal = record_id_helpers::keyForOptime(*minTs); if (goal.isOK()) { - csn->minRecord = goal.getValue(); + csn->minRecord = RecordIdBound(goal.getValue()); } if (assertMinTsHasNotFallenOffOplog) { @@ -408,7 +411,7 @@ std::unique_ptr<QuerySolutionNode> QueryPlannerAccess::makeCollectionScan( if (maxTs) { StatusWith<RecordId> goal = record_id_helpers::keyForOptime(*maxTs); if (goal.isOK()) { - csn->maxRecord = goal.getValue(); + csn->maxRecord = RecordIdBound(goal.getValue()); } } } diff --git a/src/mongo/db/query/query_solution.h b/src/mongo/db/query/query_solution.h index e91bb18f7ab..d63704a3afa 100644 --- a/src/mongo/db/query/query_solution.h +++ b/src/mongo/db/query/query_solution.h @@ -40,6 +40,7 @@ #include "mongo/db/query/classic_plan_cache.h" #include "mongo/db/query/index_bounds.h" #include "mongo/db/query/plan_enumerator_explain_info.h" +#include "mongo/db/query/record_id_bound.h" #include "mongo/db/query/stage_types.h" #include "mongo/util/id_generator.h" @@ -444,11 +445,11 @@ struct CollectionScanNode : public QuerySolutionNodeWithSortSet { // If present, this parameter sets the start point of a forward scan or the end point of a // reverse scan. - boost::optional<RecordId> minRecord; + boost::optional<RecordIdBound> minRecord; // If present, this parameter sets the start point of a reverse scan or the end point of a // forward scan. - boost::optional<RecordId> maxRecord; + boost::optional<RecordIdBound> maxRecord; // If true, the collection scan will return a token that can be used to resume the scan. bool requestResumeToken = false; diff --git a/src/mongo/db/query/record_id_bound.h b/src/mongo/db/query/record_id_bound.h new file mode 100644 index 00000000000..99400ae938d --- /dev/null +++ b/src/mongo/db/query/record_id_bound.h @@ -0,0 +1,97 @@ +/** + * Copyright (C) 2022-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include <boost/optional.hpp> +#include <fmt/format.h> +#include <ostream> + +#include "mongo/bson/bsonobjbuilder.h" +#include "mongo/bson/util/builder.h" +#include "mongo/db/record_id.h" +#include "mongo/db/record_id_helpers.h" +#include "mongo/db/storage/key_string.h" + +namespace mongo { + +/** + * A RecordId bound for a collection scan, with an optional BSON representation for pretty printing. + */ +class RecordIdBound { +public: + RecordIdBound() = default; + + explicit RecordIdBound(RecordId&& recordId, boost::optional<BSONObj> bson = boost::none) + : _recordId(recordId), _bson(bson) {} + + explicit RecordIdBound(const RecordId& recordId, boost::optional<BSONObj> bson = boost::none) + : _recordId(recordId), _bson(bson) {} + + RecordId recordId() const { + return _recordId; + } + + /** + * Appends a BSON respresentation of the bound to a BSONObjBuilder. If one is not explicitily + * provided it reconstructs it from the RecordId. + */ + void appendToBSONAs(BSONObjBuilder* builder, StringData fieldName) const { + if (_bson) { + builder->appendAs(_bson->firstElement(), fieldName); + } else { + record_id_helpers::appendToBSONAs(_recordId, builder, fieldName); + } + } + + std::string toString() const { + return _recordId.toString(); + } + + /** + * Compares the underlying RecordIds. + */ + int compare(const RecordIdBound& rhs) const { + return _recordId.compare(rhs._recordId); + } + +private: + RecordId _recordId; + boost::optional<BSONObj> _bson; +}; + +inline StringBuilder& operator<<(StringBuilder& stream, const RecordIdBound& id) { + return stream << "RecordIdBound(" << id.toString() << ')'; +} + +inline std::ostream& operator<<(std::ostream& stream, const RecordIdBound& id) { + return stream << "RecordIdBound(" << id.toString() << ')'; +} + +} // namespace mongo diff --git a/src/mongo/db/query/sbe_stage_builder_coll_scan.cpp b/src/mongo/db/query/sbe_stage_builder_coll_scan.cpp index d2102cea044..9ec9bb1ace3 100644 --- a/src/mongo/db/query/sbe_stage_builder_coll_scan.cpp +++ b/src/mongo/db/query/sbe_stage_builder_coll_scan.cpp @@ -45,6 +45,7 @@ #include "mongo/db/query/sbe_stage_builder.h" #include "mongo/db/query/sbe_stage_builder_filter.h" #include "mongo/db/query/util/make_data_structure.h" +#include "mongo/db/record_id_helpers.h" #include "mongo/logv2/log.h" #include "mongo/util/str.h" @@ -272,7 +273,7 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateOptimizedOplo return {state.slotId(), makeConstant(tag, val)}; } else if (csn->minRecord) { auto cursor = collection->getRecordStore()->getCursor(state.opCtx); - auto startRec = cursor->seekNear(*csn->minRecord); + auto startRec = cursor->seekNear(csn->minRecord->recordId()); if (startRec) { LOGV2_DEBUG(205841, 3, "Using direct oplog seek"); auto [tag, val] = sbe::value::makeCopyRecordId(startRec->id); @@ -437,7 +438,8 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateOptimizedOplo std::move(stage), makeBinaryOp(sbe::EPrimBinary::lessEq, makeVariable(*tsSlot), - makeConstant(sbe::value::TypeTags::Timestamp, csn->maxRecord->getLong())), + makeConstant(sbe::value::TypeTags::Timestamp, + csn->maxRecord->recordId().getLong())), csn->nodeId()); } diff --git a/src/mongo/db/record_id.h b/src/mongo/db/record_id.h index 66a58a37a04..58931fed9f9 100644 --- a/src/mongo/db/record_id.h +++ b/src/mongo/db/record_id.h @@ -85,8 +85,8 @@ public: } /** - * Construct a RecordId that holds a small binary string. The raw value for RecordStore storage - * may be retrieved using getStr(). + * Construct a RecordId that holds a binary string. The raw value for RecordStore storage may be + * retrieved using getStr(). */ explicit RecordId(const char* str, int32_t size) { invariant(size > 0, "key size must be greater than 0"); @@ -209,8 +209,8 @@ public: } /** - * Compares two RecordIds. Requires that both RecordIds are of the same format, unless one or - * both are null. Null always compares less than every other RecordId format. + * Compares two RecordIds. Requires that both RecordIds are of the same "type" (long or string). + * Null is always comparable and is less than every other RecordId format. */ int compare(const RecordId& rhs) const { switch (_format) { diff --git a/src/mongo/db/record_id_helpers.cpp b/src/mongo/db/record_id_helpers.cpp index 35925a75019..02bad4bad4e 100644 --- a/src/mongo/db/record_id_helpers.cpp +++ b/src/mongo/db/record_id_helpers.cpp @@ -51,20 +51,21 @@ StatusWith<RecordId> keyForOptime(const Timestamp& opTime) { // don't sort differently when put in a RecordId. It also avoids issues with Null/Invalid // RecordIds if (opTime.getSecs() > uint32_t(std::numeric_limits<int32_t>::max())) - return StatusWith<RecordId>(ErrorCodes::BadValue, "ts secs too high"); + return {ErrorCodes::BadValue, "ts secs too high"}; if (opTime.getInc() > uint32_t(std::numeric_limits<int32_t>::max())) - return StatusWith<RecordId>(ErrorCodes::BadValue, "ts inc too high"); + return {ErrorCodes::BadValue, "ts inc too high"}; - const RecordId out = RecordId(opTime.getSecs(), opTime.getInc()); + const auto out = RecordId(opTime.getSecs(), opTime.getInc()); if (out <= RecordId::minLong()) - return StatusWith<RecordId>(ErrorCodes::BadValue, "ts too low"); + return {ErrorCodes::BadValue, "ts too low"}; if (out >= RecordId::maxLong()) - return StatusWith<RecordId>(ErrorCodes::BadValue, "ts too high"); + return {ErrorCodes::BadValue, "ts too high"}; - return StatusWith<RecordId>(out); + return out; } + /** * data and len must be the arguments from RecordStore::insert() on an oplog collection. */ @@ -77,9 +78,9 @@ StatusWith<RecordId> extractKeyOptime(const char* data, int len) { const BSONObj obj(data); const BSONElement elem = obj["ts"]; if (elem.eoo()) - return StatusWith<RecordId>(ErrorCodes::BadValue, "no ts field"); + return {ErrorCodes::BadValue, "no ts field"}; if (elem.type() != bsonTimestamp) - return StatusWith<RecordId>(ErrorCodes::BadValue, "ts must be a Timestamp"); + return {ErrorCodes::BadValue, "ts must be a Timestamp"}; return keyForOptime(elem.timestamp()); } @@ -96,25 +97,28 @@ StatusWith<RecordId> keyForDoc(const BSONObj& doc, str::stream() << "Document " << redact(doc) << " is missing the '" << clusterKeyField << "' field"}; } + if (collator) { + BSONObjBuilder out; + CollationIndexKey::collationAwareIndexKeyAppend(keyElement, collator, &out); + return keyForElem(out.done().firstElement()); + } - return keyForElem(keyElement, collator); + return keyForElem(keyElement); } -RecordId keyForElem(const BSONElement& elem, const CollatorInterface* collator) { +RecordId keyForElem(const BSONElement& elem) { // Intentionally discard the TypeBits since the type information will be stored in the cluster // key of the original document. The consequence of this behavior is that cluster key values // that compare similarly, but are of different types may not be used concurrently. KeyString::Builder keyBuilder(KeyString::Version::kLatestVersion); - if (collator) { - BSONObjBuilder out; - CollationIndexKey::collationAwareIndexKeyAppend(elem, collator, &out); - keyBuilder.appendBSONElement(out.done().firstElement()); - } else { - keyBuilder.appendBSONElement(elem); - } + keyBuilder.appendBSONElement(elem); return RecordId(keyBuilder.getBuffer(), keyBuilder.getSize()); } +RecordId keyForObj(const BSONObj& obj) { + return keyForElem(obj.firstElement()); +} + RecordId keyForOID(OID oid) { KeyString::Builder keyBuilder(KeyString::Version::kLatestVersion); keyBuilder.appendOID(oid); diff --git a/src/mongo/db/record_id_helpers.h b/src/mongo/db/record_id_helpers.h index 4627d134f9b..378466df45a 100644 --- a/src/mongo/db/record_id_helpers.h +++ b/src/mongo/db/record_id_helpers.h @@ -39,6 +39,7 @@ namespace mongo { class Timestamp; class RecordId; + namespace record_id_helpers { /** @@ -53,7 +54,8 @@ StatusWith<RecordId> keyForOptime(const Timestamp& opTime); StatusWith<RecordId> keyForDoc(const BSONObj& doc, const ClusteredIndexSpec& indexSpec, const CollatorInterface* collator); -RecordId keyForElem(const BSONElement& elem, const CollatorInterface* collator); +RecordId keyForElem(const BSONElement& elem); +RecordId keyForObj(const BSONObj& obj); RecordId keyForOID(OID oid); RecordId keyForDate(Date_t date); diff --git a/src/mongo/db/repl/SConscript b/src/mongo/db/repl/SConscript index 20c70ef15ec..9c22495f891 100644 --- a/src/mongo/db/repl/SConscript +++ b/src/mongo/db/repl/SConscript @@ -22,6 +22,9 @@ env.Library( source=[ 'tenant_migration_decoration.cpp', ], + LIBDEPS_PRIVATE=[ + '$BUILD_DIR/mongo/base', + ] ) env.Library( diff --git a/src/mongo/db/repl/dbcheck.cpp b/src/mongo/db/repl/dbcheck.cpp index c24c170e988..6fb94689544 100644 --- a/src/mongo/db/repl/dbcheck.cpp +++ b/src/mongo/db/repl/dbcheck.cpp @@ -247,14 +247,14 @@ DbCheckHasher::DbCheckHasher(OperationContext* opCtx, InternalPlanner::IXSCAN_FETCH); } else { CollectionScanParams params; - params.minRecord = uassertStatusOK( + params.minRecord = RecordIdBound(uassertStatusOK( record_id_helpers::keyForDoc(start.obj(), collection->getClusteredInfo()->getIndexSpec(), - collection->getDefaultCollator())); - params.maxRecord = uassertStatusOK( + collection->getDefaultCollator()))); + params.maxRecord = RecordIdBound(uassertStatusOK( record_id_helpers::keyForDoc(end.obj(), collection->getClusteredInfo()->getIndexSpec(), - collection->getDefaultCollator())); + collection->getDefaultCollator()))); params.boundInclusion = CollectionScanParams::ScanBoundInclusion::kIncludeEndRecordOnly; _exec = InternalPlanner::collectionScan( opCtx, &collection, params, PlanYieldPolicy::YieldPolicy::NO_YIELD); diff --git a/src/mongo/db/repl/storage_interface_impl.cpp b/src/mongo/db/repl/storage_interface_impl.cpp index 8e27ac5a4ca..cc2b0c251c1 100644 --- a/src/mongo/db/repl/storage_interface_impl.cpp +++ b/src/mongo/db/repl/storage_interface_impl.cpp @@ -734,16 +734,12 @@ StatusWith<std::vector<BSONObj>> _findOrDeleteDocuments( "bounded collection scans only support forward scans"); } - auto collator = collection->getDefaultCollator(); - boost::optional<RecordId> minRecord, maxRecord; + boost::optional<RecordIdBound> minRecord, maxRecord; if (!startKey.isEmpty()) { - minRecord = - RecordId(record_id_helpers::keyForElem(startKey.firstElement(), collator)); + minRecord = RecordIdBound(record_id_helpers::keyForObj(startKey)); } - if (!endKey.isEmpty()) { - maxRecord = - RecordId(record_id_helpers::keyForElem(endKey.firstElement(), collator)); + maxRecord = RecordIdBound(record_id_helpers::keyForObj(endKey)); } planExecutor = isFind diff --git a/src/mongo/db/ttl.cpp b/src/mongo/db/ttl.cpp index 78c77202d75..e02c1f8fc06 100644 --- a/src/mongo/db/ttl.cpp +++ b/src/mongo/db/ttl.cpp @@ -446,24 +446,24 @@ private: * delete entries of type 'ObjectId'. All other collections must only delete entries of type * 'Date'. */ - RecordId makeCollScanEndBound(const CollectionPtr& collection, Date_t expirationDate) { + RecordIdBound makeCollScanEndBound(const CollectionPtr& collection, Date_t expirationDate) { if (collection->getTimeseriesOptions()) { auto endOID = OID(); endOID.init(expirationDate, true /* max */); - return record_id_helpers::keyForOID(endOID); + return RecordIdBound(record_id_helpers::keyForOID(endOID)); } - return record_id_helpers::keyForDate(expirationDate); + return RecordIdBound(record_id_helpers::keyForDate(expirationDate)); } - RecordId makeCollScanStartBound(const CollectionPtr& collection, const Date_t startDate) { + RecordIdBound makeCollScanStartBound(const CollectionPtr& collection, const Date_t startDate) { if (collection->getTimeseriesOptions()) { auto startOID = OID(); startOID.init(startDate, false /* max */); - return record_id_helpers::keyForOID(startOID); + return RecordIdBound(record_id_helpers::keyForOID(startOID)); } - return record_id_helpers::keyForDate(startDate); + return RecordIdBound(record_id_helpers::keyForDate(startDate)); } /* |