diff options
author | Arun Banala <arun.banala@mongodb.com> | 2019-03-21 13:41:14 +0000 |
---|---|---|
committer | Arun Banala <arun.banala@mongodb.com> | 2019-05-22 17:24:12 +0100 |
commit | 713a1575150594222866b085c14bae4f13fe93b5 (patch) | |
tree | 00d734dac9e343d3007abb8ad0d8dbd5e8a4d666 /src/mongo | |
parent | c04ac24382c947fdc24821bd40dac0e2dedd0483 (diff) | |
download | mongo-713a1575150594222866b085c14bae4f13fe93b5.tar.gz |
SERVER-39158 Change updates to prefer target by the query
Diffstat (limited to 'src/mongo')
-rw-r--r-- | src/mongo/db/exec/update_stage.cpp | 71 | ||||
-rw-r--r-- | src/mongo/db/exec/update_stage.h | 6 | ||||
-rw-r--r-- | src/mongo/s/write_ops/chunk_manager_targeter.cpp | 187 | ||||
-rw-r--r-- | src/mongo/s/write_ops/chunk_manager_targeter.h | 19 |
4 files changed, 124 insertions, 159 deletions
diff --git a/src/mongo/db/exec/update_stage.cpp b/src/mongo/db/exec/update_stage.cpp index 0b421f54d78..da467a3b407 100644 --- a/src/mongo/db/exec/update_stage.cpp +++ b/src/mongo/db/exec/update_stage.cpp @@ -608,10 +608,17 @@ void UpdateStage::doInsert() { auto newShardKey = shardKeyPattern.extractShardKeyFromDoc(newObj); if (!metadata->keyBelongsToMe(newShardKey)) { - assertNewDocHasValidShardKeyOptions(metadata); + // An attempt to upsert a document with a shard key value that belongs on another + // shard must either be a retryable write or inside a transaction. + uassert(ErrorCodes::IllegalOperation, + "The upsert document could not be inserted onto the shard targeted by the " + "query, since its shard key belongs on a different shard. Cross-shard " + "upserts are only allowed when running in a transaction or with " + "retryWrites: true.", + getOpCtx()->getTxnNumber()); uasserted( WouldChangeOwningShardInfo(request->getQuery(), newObj, true /* upsert */), - str::stream() << "The document we are inserting belongs on a different shard"); + "The document we are inserting belongs on a different shard"); } } } @@ -931,47 +938,55 @@ PlanStage::StageState UpdateStage::prepareToRetryWSM(WorkingSetID idToRetry, Wor return NEED_YIELD; } -void UpdateStage::assertNewDocHasValidShardKeyOptions(const ScopedCollectionMetadata& metadata) { - // Assert that the updated doc has all shard key fields and none are arrays or array - // descendants. +bool UpdateStage::checkUpdateChangesShardKeyFields(ScopedCollectionMetadata metadata, + const Snapshotted<BSONObj>& oldObj) { + auto newObj = _doc.getObject(); + const ShardKeyPattern shardKeyPattern(metadata->getKeyPattern()); + auto oldShardKey = shardKeyPattern.extractShardKeyFromDoc(oldObj.value()); + auto newShardKey = shardKeyPattern.extractShardKeyFromDoc(newObj); + + // If the shard key fields remain unchanged by this update or if this document is an orphan and + // so does not belong to this shard, we can skip the rest of the checks. + if ((newShardKey.woCompare(oldShardKey) == 0) || !metadata->keyBelongsToMe(oldShardKey)) { + return false; + } + FieldRefSet shardKeyPaths; const auto& shardKeyPathsVector = metadata->getKeyPatternFields(); shardKeyPaths.fillFrom(transitional_tools_do_not_use::unspool_vector(shardKeyPathsVector)); + + // Assert that the updated doc has all shard key fields and none are arrays or array + // descendants. assertRequiredPathsPresent(_doc, shardKeyPaths); - // TODO SERVER-39158: Add validation that the query has all shard key fields + // We do not allow modifying shard key value without specifying the full shard key in the query. + // If the query is a simple equality match on _id, then '_params.canonicalQuery' will be null. + // But if we are here, we already know that the shard key is not _id, since we have an assertion + // earlier for requests that try to modify the immutable _id field. So it is safe to uassert if + // '_params.canonicalQuery' is null OR if the query does not include equality matches on all + // shard key fields. + pathsupport::EqualityMatches equalities; + uassert(31025, + "Shard key update is not allowed without specifying the full shard key in the query", + _params.canonicalQuery && + pathsupport::extractFullEqualityMatches( + *(_params.canonicalQuery->root()), shardKeyPaths, &equalities) + .isOK() && + equalities.size() == shardKeyPathsVector.size()); // We do not allow updates to the shard key when 'multi' is true. uassert(ErrorCodes::InvalidOptions, - str::stream() << "Multi-update operations are not allowed when updating the" - << " shard key field.", + "Multi-update operations are not allowed when updating the shard key field.", !_params.request->isMulti()); // If this node is a replica set primary node, an attempted update to the shard key value must // either be a retryable write or inside a transaction. - // // If this node is a replica set secondary node, we can skip validation. uassert(ErrorCodes::IllegalOperation, - str::stream() << "Must run update to shard key field " - << " in a multi-statement transaction or" - " with retryWrites: true.", + "Must run update to shard key field in a multi-statement transaction or with " + "retryWrites: true.", getOpCtx()->getTxnNumber() || !getOpCtx()->writesAreReplicated()); -} - -bool UpdateStage::checkUpdateChangesShardKeyFields(ScopedCollectionMetadata metadata, - const Snapshotted<BSONObj>& oldObj) { - auto newObj = _doc.getObject(); - const ShardKeyPattern shardKeyPattern(metadata->getKeyPattern()); - auto oldShardKey = shardKeyPattern.extractShardKeyFromDoc(oldObj.value()); - auto newShardKey = shardKeyPattern.extractShardKeyFromDoc(newObj); - - // If the shard key fields remain unchanged by this update or if this document is an orphan and - // so does not belong to this shard, we can skip the rest of the checks. - if ((newShardKey.woCompare(oldShardKey) == 0) || !metadata->keyBelongsToMe(oldShardKey)) { - return false; - } - assertNewDocHasValidShardKeyOptions(metadata); if (!metadata->keyBelongsToMe(newShardKey)) { if (MONGO_FAIL_POINT(hangBeforeThrowWouldChangeOwningShard)) { log() << "Hit hangBeforeThrowWouldChangeOwningShard failpoint"; @@ -980,7 +995,7 @@ bool UpdateStage::checkUpdateChangesShardKeyFields(ScopedCollectionMetadata meta } uasserted(WouldChangeOwningShardInfo(oldObj.value(), newObj, false /* upsert */), - str::stream() << "This update would cause the doc to change owning shards"); + "This update would cause the doc to change owning shards"); } // We passed all checks, so we will return that this update changes the shard key field, and diff --git a/src/mongo/db/exec/update_stage.h b/src/mongo/db/exec/update_stage.h index 455f8917398..363f63d808a 100644 --- a/src/mongo/db/exec/update_stage.h +++ b/src/mongo/db/exec/update_stage.h @@ -199,12 +199,6 @@ private: StageState prepareToRetryWSM(WorkingSetID idToRetry, WorkingSetID* out); /** - * Called when update modifies shard key fields. Checks that the updated doc has all required - * shard key fields and throws if it does not. - */ - void assertNewDocHasValidShardKeyOptions(const ScopedCollectionMetadata& metadata); - - /** * Checks that the updated doc has all required shard key fields and throws if it does not. * * Also checks if the updated doc still belongs to this node and throws if it does not. If the diff --git a/src/mongo/s/write_ops/chunk_manager_targeter.cpp b/src/mongo/s/write_ops/chunk_manager_targeter.cpp index 770fd0c0795..25fd831460b 100644 --- a/src/mongo/s/write_ops/chunk_manager_targeter.cpp +++ b/src/mongo/s/write_ops/chunk_manager_targeter.cpp @@ -148,6 +148,12 @@ StatusWith<BSONObj> getUpdateExpr(OperationContext* opCtx, } else if (auto idElt = idFromQuery.getValue()[kIdFieldName]) { updateExpr = updateExpr.addField(idElt); } + // Confirm that the finalized replacement shard key is valid. + auto skStatus = + ShardKeyPattern::checkShardKeySize(shardKeyPattern.extractShardKeyFromDoc(updateExpr)); + if (!skStatus.isOK()) { + return skStatus; + } return updateExpr; } @@ -406,23 +412,14 @@ StatusWith<ShardEndpoint> ChunkManagerTargeter::targetInsert(OperationContext* o StatusWith<std::vector<ShardEndpoint>> ChunkManagerTargeter::targetUpdate( OperationContext* opCtx, const write_ops::UpdateOpEntry& updateDoc) const { // - // Update targeting may use either the query or the update. This is to support save-style - // updates, of the form: - // - // coll.update({ _id : xxx }, { _id : xxx, shardKey : 1, foo : bar }, { upsert : true }) - // - // Because drivers do not know the shard key, they can't pull the shard key automatically - // into the query doc, and to correctly support upsert we must target a single shard. - // - // The rule is simple - If the update is replacement style (no '$set' or pipeline), we target - // using the update. If the update is not replacement style, we target using the query. Because - // mongoD will automatically propagate '_id' from an existing document, and will extract it from - // an exact-match in the query in the case of an upsert, we augment the replacement doc with the + // The rule is simple; we always attempt to target using the query first. If the update is + // replacement-style and the query targets more than one shard, we fall back to targeting using + // the replacement document, since it must always contain a full shard key. Upserts should have + // full shard key in the query and we always use query for targeting. Because mongoD will + // automatically propagate '_id' from an existing document, and will extract it from an + // exact-match in the query in the case of an upsert, we augment the replacement doc with the // query's '_id' for targeting purposes, if it exists. // - // Once we have determined the correct component to target on, we attempt to extract an exact - // shard key from it. If one is present, we target using it. - // const auto updateType = getUpdateExprType(updateDoc); if (!updateType.isOK()) { return updateType.getStatus(); @@ -449,39 +446,61 @@ StatusWith<std::vector<ShardEndpoint>> ChunkManagerTargeter::targetUpdate( return updateExpr.getStatus(); } - // We first attempt to target by exact match on the shard key. - const auto swTarget = _targetUpdateByShardKey( - opCtx, updateType.getValue(), query, collation, updateExpr.getValue(), isUpsert); - - // Return the status if we were successful in targeting by shard key. If this is an upsert, then - // we return the status regardless of whether or not we succeeded, since an upsert must always - // target an exact shard key or fail. If this is *not* an upsert and we were unable to target an - // exact shard key, then we proceed to try other means of targeting the update. - if (isUpsert || swTarget.isOK()) { - return swTarget; - } - - // If we could not target by shard key, attempt to route the update by query or replacement doc. - auto shardEndPoints = (updateType.getValue() == UpdateType::kOpStyle - ? _targetQuery(opCtx, query, collation) - : _targetDoc(opCtx, updateExpr.getValue(), collation)); - - // Single (non-multi) updates must target a single shard, or an exact _id. - if (!updateDoc.getMulti() && shardEndPoints.isOK() && shardEndPoints.getValue().size() > 1) { - if (!isExactIdQuery(opCtx, getNS(), query, collation, _routingInfo->cm().get())) { - return {ErrorCodes::InvalidOptions, - str::stream() << "A {multi:false} update on a sharded collection must either " - "contain an exact match on _id (and have the collection " - "default collation) or must target a single shard (and have " - "the simple collation), but this update targeted " - << shardEndPoints.getValue().size() - << " shards. Update request: " - << updateDoc.toBSON() - << ", shard key pattern: " - << shardKeyPattern.toString()}; + // Utility function to target an update by shard key, and to handle any potential error results. + const auto targetByShardKey = [&collation, this]( + StatusWith<BSONObj> shardKey, StringData msg) -> StatusWith<std::vector<ShardEndpoint>> { + if (!shardKey.isOK()) { + return shardKey.getStatus().withContext(msg); + } + if (shardKey.getValue().isEmpty()) { + return {ErrorCodes::ShardKeyNotFound, + str::stream() << msg << " :: could not extract exact shard key"}; } + auto swEndPoint = _targetShardKey(shardKey.getValue(), collation, 0); + if (!swEndPoint.isOK()) { + return swEndPoint.getStatus().withContext(msg); + } + return {{swEndPoint.getValue()}}; + }; + + // If this is an upsert, then the query must contain an exact match on the shard key. If we were + // to target based on the replacement doc, it could result in an insertion even if a document + // matching the query exists on another shard. + if (isUpsert) { + return targetByShardKey(shardKeyPattern.extractShardKeyFromQuery(opCtx, query), + "Failed to target upsert by query"); + } + + // We first try to target based on the update's query. It is always valid to forward any update + // or upsert to a single shard, so return immediately if we are able to target a single shard. + auto shardEndPoints = _targetQuery(opCtx, query, collation); + if (!shardEndPoints.isOK() || shardEndPoints.getValue().size() == 1) { + return shardEndPoints; + } + + // Replacement-style updates must always target a single shard. If we were unable to do so using + // the query, we attempt to extract the shard key from the replacement and target based on it. + if (updateType == UpdateType::kReplacement) { + return targetByShardKey(shardKeyPattern.extractShardKeyFromDoc(updateExpr.getValue()), + "Failed to target update by replacement document"); + } + + // If we are here then this is an op-style update and we were not able to target a single shard. + // Non-multi updates must target a single shard or an exact _id. + if (!updateDoc.getMulti() && + !isExactIdQuery(opCtx, getNS(), query, collation, _routingInfo->cm().get())) { + return {ErrorCodes::InvalidOptions, + str::stream() << "A {multi:false} update on a sharded collection must either " + "contain an exact match on _id or must target a single shard but " + "this update targeted _id (and have the collection default " + "collation) or must target a single shard (and have the simple " + "collation), but this update targeted " + << shardEndPoints.getValue().size() + << " shards. Update request: " + << updateDoc.toBSON() + << ", shard key pattern: " + << shardKeyPattern.toString()}; } - return shardEndPoints; } @@ -512,10 +531,9 @@ StatusWith<std::vector<ShardEndpoint>> ChunkManagerTargeter::targetDelete( // Target the shard key or delete query if (!shardKey.isEmpty()) { - try { - return std::vector<ShardEndpoint>{_targetShardKey(shardKey, collation, 0)}; - } catch (const DBException&) { - // This delete is potentially not constrained to a single shard + auto swEndpoint = _targetShardKey(shardKey, collation, 0); + if (swEndpoint.isOK()) { + return {{swEndpoint.getValue()}}; } } @@ -555,58 +573,6 @@ StatusWith<std::vector<ShardEndpoint>> ChunkManagerTargeter::targetDelete( return _targetQuery(opCtx, deleteDoc.getQ(), collation); } -StatusWith<std::vector<ShardEndpoint>> ChunkManagerTargeter::_targetUpdateByShardKey( - OperationContext* opCtx, - const UpdateType updateType, - const BSONObj query, - const BSONObj collation, - const BSONObj updateExpr, - const bool isUpsert) const { - // This method should only ever be called on a sharded collection with a valid updateType. - invariant(updateType != UpdateType::kUnknown); - invariant(_routingInfo->cm()); - - const auto& shardKeyPattern = _routingInfo->cm()->getShardKeyPattern(); - - // Attempt to extract the shard key from the query (for an op-style update) or the update - // expression document (for a replacement-style update). - const auto shardKey = - (updateType == UpdateType::kOpStyle ? shardKeyPattern.extractShardKeyFromQuery(opCtx, query) - : shardKeyPattern.extractShardKeyFromDoc(updateExpr)); - if (!shardKey.isOK()) { - return shardKey.getStatus(); - } - - // Attempt to dispatch the update by routing on the extracted shard key. - if (!shardKey.getValue().isEmpty()) { - // Verify that the shard key does not exceed the maximum permitted size. - const auto shardKeySizeStatus = ShardKeyPattern::checkShardKeySize(shardKey.getValue()); - if (!shardKeySizeStatus.isOK()) { - return shardKeySizeStatus; - } - try { - const long long estimatedDataSize = query.objsize() + updateExpr.objsize(); - return std::vector<ShardEndpoint>{ - _targetShardKey(shardKey.getValue(), collation, estimatedDataSize)}; - } catch (const DBException& ex) { - // The update is potentially not constrained to a single shard. If this is an upsert, - // then we do not return the error here; we provide a more descriptive message below. - if (!isUpsert) - return ex.toStatus(); - } - } - return { - ErrorCodes::ShardKeyNotFound, - str::stream() - << (isUpsert - ? "Sharded upserts must contain the shard key and have the simple collation. " - : "Could not extract an exact shard key value. ") - << "Update request: " - << BSON("q" << query << "u" << updateExpr) - << ", shard key pattern: " - << shardKeyPattern.toString()}; -} - StatusWith<std::vector<ShardEndpoint>> ChunkManagerTargeter::_targetDoc( OperationContext* opCtx, const BSONObj& doc, const BSONObj& collation) const { // NOTE: This is weird and fragile, but it's the way our language works right now - documents @@ -643,12 +609,16 @@ StatusWith<std::vector<ShardEndpoint>> ChunkManagerTargeter::_targetQuery( return endpoints; } -ShardEndpoint ChunkManagerTargeter::_targetShardKey(const BSONObj& shardKey, - const BSONObj& collation, - long long estDataSize) const { - const auto chunk = _routingInfo->cm()->findIntersectingChunk(shardKey, collation); - - return {chunk.getShardId(), _routingInfo->cm()->getVersion(chunk.getShardId())}; +StatusWith<ShardEndpoint> ChunkManagerTargeter::_targetShardKey(const BSONObj& shardKey, + const BSONObj& collation, + long long estDataSize) const { + try { + auto chunk = _routingInfo->cm()->findIntersectingChunk(shardKey, collation); + return {{chunk.getShardId(), _routingInfo->cm()->getVersion(chunk.getShardId())}}; + } catch (const DBException& ex) { + return ex.toStatus(); + } + MONGO_UNREACHABLE; } StatusWith<std::vector<ShardEndpoint>> ChunkManagerTargeter::targetCollection() const { @@ -671,7 +641,6 @@ StatusWith<std::vector<ShardEndpoint>> ChunkManagerTargeter::targetCollection() : ChunkVersion::UNSHARDED(); endpoints.emplace_back(std::move(shardId), version); } - return endpoints; } diff --git a/src/mongo/s/write_ops/chunk_manager_targeter.h b/src/mongo/s/write_ops/chunk_manager_targeter.h index 624e3c5c670..2ace173e625 100644 --- a/src/mongo/s/write_ops/chunk_manager_targeter.h +++ b/src/mongo/s/write_ops/chunk_manager_targeter.h @@ -117,19 +117,6 @@ private: Status _refreshNow(OperationContext* opCtx); /** - * Attempts to route an update operation by extracting an exact shard key from the given query - * and/or update expression. Should only be called on sharded collections, and with a valid - * updateType. Returns not-OK if the user's query is invalid, if the extracted shard key exceeds - * the maximum allowed length, or if the update could not be targeted by exact shard key. - */ - StatusWith<std::vector<ShardEndpoint>> _targetUpdateByShardKey(OperationContext* opCtx, - const UpdateType updateType, - const BSONObj query, - const BSONObj collation, - const BSONObj updateExpr, - const bool isUpsert) const; - - /** * Returns a vector of ShardEndpoints where a document might need to be placed. * * Returns !OK with message if replacement could not be targeted @@ -159,9 +146,9 @@ private: * * If 'collation' is empty, we use the collection default collation for targeting. */ - ShardEndpoint _targetShardKey(const BSONObj& shardKey, - const BSONObj& collation, - long long estDataSize) const; + StatusWith<ShardEndpoint> _targetShardKey(const BSONObj& shardKey, + const BSONObj& collation, + long long estDataSize) const; // Full namespace of the collection for this targeter const NamespaceString _nss; |