summaryrefslogtreecommitdiff
path: root/src/mongo
diff options
context:
space:
mode:
authorArun Banala <arun.banala@mongodb.com>2019-03-21 13:41:14 +0000
committerArun Banala <arun.banala@mongodb.com>2019-05-22 17:24:12 +0100
commit713a1575150594222866b085c14bae4f13fe93b5 (patch)
tree00d734dac9e343d3007abb8ad0d8dbd5e8a4d666 /src/mongo
parentc04ac24382c947fdc24821bd40dac0e2dedd0483 (diff)
downloadmongo-713a1575150594222866b085c14bae4f13fe93b5.tar.gz
SERVER-39158 Change updates to prefer target by the query
Diffstat (limited to 'src/mongo')
-rw-r--r--src/mongo/db/exec/update_stage.cpp71
-rw-r--r--src/mongo/db/exec/update_stage.h6
-rw-r--r--src/mongo/s/write_ops/chunk_manager_targeter.cpp187
-rw-r--r--src/mongo/s/write_ops/chunk_manager_targeter.h19
4 files changed, 124 insertions, 159 deletions
diff --git a/src/mongo/db/exec/update_stage.cpp b/src/mongo/db/exec/update_stage.cpp
index 0b421f54d78..da467a3b407 100644
--- a/src/mongo/db/exec/update_stage.cpp
+++ b/src/mongo/db/exec/update_stage.cpp
@@ -608,10 +608,17 @@ void UpdateStage::doInsert() {
auto newShardKey = shardKeyPattern.extractShardKeyFromDoc(newObj);
if (!metadata->keyBelongsToMe(newShardKey)) {
- assertNewDocHasValidShardKeyOptions(metadata);
+ // An attempt to upsert a document with a shard key value that belongs on another
+ // shard must either be a retryable write or inside a transaction.
+ uassert(ErrorCodes::IllegalOperation,
+ "The upsert document could not be inserted onto the shard targeted by the "
+ "query, since its shard key belongs on a different shard. Cross-shard "
+ "upserts are only allowed when running in a transaction or with "
+ "retryWrites: true.",
+ getOpCtx()->getTxnNumber());
uasserted(
WouldChangeOwningShardInfo(request->getQuery(), newObj, true /* upsert */),
- str::stream() << "The document we are inserting belongs on a different shard");
+ "The document we are inserting belongs on a different shard");
}
}
}
@@ -931,47 +938,55 @@ PlanStage::StageState UpdateStage::prepareToRetryWSM(WorkingSetID idToRetry, Wor
return NEED_YIELD;
}
-void UpdateStage::assertNewDocHasValidShardKeyOptions(const ScopedCollectionMetadata& metadata) {
- // Assert that the updated doc has all shard key fields and none are arrays or array
- // descendants.
+bool UpdateStage::checkUpdateChangesShardKeyFields(ScopedCollectionMetadata metadata,
+ const Snapshotted<BSONObj>& oldObj) {
+ auto newObj = _doc.getObject();
+ const ShardKeyPattern shardKeyPattern(metadata->getKeyPattern());
+ auto oldShardKey = shardKeyPattern.extractShardKeyFromDoc(oldObj.value());
+ auto newShardKey = shardKeyPattern.extractShardKeyFromDoc(newObj);
+
+ // If the shard key fields remain unchanged by this update or if this document is an orphan and
+ // so does not belong to this shard, we can skip the rest of the checks.
+ if ((newShardKey.woCompare(oldShardKey) == 0) || !metadata->keyBelongsToMe(oldShardKey)) {
+ return false;
+ }
+
FieldRefSet shardKeyPaths;
const auto& shardKeyPathsVector = metadata->getKeyPatternFields();
shardKeyPaths.fillFrom(transitional_tools_do_not_use::unspool_vector(shardKeyPathsVector));
+
+ // Assert that the updated doc has all shard key fields and none are arrays or array
+ // descendants.
assertRequiredPathsPresent(_doc, shardKeyPaths);
- // TODO SERVER-39158: Add validation that the query has all shard key fields
+ // We do not allow modifying shard key value without specifying the full shard key in the query.
+ // If the query is a simple equality match on _id, then '_params.canonicalQuery' will be null.
+ // But if we are here, we already know that the shard key is not _id, since we have an assertion
+ // earlier for requests that try to modify the immutable _id field. So it is safe to uassert if
+ // '_params.canonicalQuery' is null OR if the query does not include equality matches on all
+ // shard key fields.
+ pathsupport::EqualityMatches equalities;
+ uassert(31025,
+ "Shard key update is not allowed without specifying the full shard key in the query",
+ _params.canonicalQuery &&
+ pathsupport::extractFullEqualityMatches(
+ *(_params.canonicalQuery->root()), shardKeyPaths, &equalities)
+ .isOK() &&
+ equalities.size() == shardKeyPathsVector.size());
// We do not allow updates to the shard key when 'multi' is true.
uassert(ErrorCodes::InvalidOptions,
- str::stream() << "Multi-update operations are not allowed when updating the"
- << " shard key field.",
+ "Multi-update operations are not allowed when updating the shard key field.",
!_params.request->isMulti());
// If this node is a replica set primary node, an attempted update to the shard key value must
// either be a retryable write or inside a transaction.
- //
// If this node is a replica set secondary node, we can skip validation.
uassert(ErrorCodes::IllegalOperation,
- str::stream() << "Must run update to shard key field "
- << " in a multi-statement transaction or"
- " with retryWrites: true.",
+ "Must run update to shard key field in a multi-statement transaction or with "
+ "retryWrites: true.",
getOpCtx()->getTxnNumber() || !getOpCtx()->writesAreReplicated());
-}
-
-bool UpdateStage::checkUpdateChangesShardKeyFields(ScopedCollectionMetadata metadata,
- const Snapshotted<BSONObj>& oldObj) {
- auto newObj = _doc.getObject();
- const ShardKeyPattern shardKeyPattern(metadata->getKeyPattern());
- auto oldShardKey = shardKeyPattern.extractShardKeyFromDoc(oldObj.value());
- auto newShardKey = shardKeyPattern.extractShardKeyFromDoc(newObj);
-
- // If the shard key fields remain unchanged by this update or if this document is an orphan and
- // so does not belong to this shard, we can skip the rest of the checks.
- if ((newShardKey.woCompare(oldShardKey) == 0) || !metadata->keyBelongsToMe(oldShardKey)) {
- return false;
- }
- assertNewDocHasValidShardKeyOptions(metadata);
if (!metadata->keyBelongsToMe(newShardKey)) {
if (MONGO_FAIL_POINT(hangBeforeThrowWouldChangeOwningShard)) {
log() << "Hit hangBeforeThrowWouldChangeOwningShard failpoint";
@@ -980,7 +995,7 @@ bool UpdateStage::checkUpdateChangesShardKeyFields(ScopedCollectionMetadata meta
}
uasserted(WouldChangeOwningShardInfo(oldObj.value(), newObj, false /* upsert */),
- str::stream() << "This update would cause the doc to change owning shards");
+ "This update would cause the doc to change owning shards");
}
// We passed all checks, so we will return that this update changes the shard key field, and
diff --git a/src/mongo/db/exec/update_stage.h b/src/mongo/db/exec/update_stage.h
index 455f8917398..363f63d808a 100644
--- a/src/mongo/db/exec/update_stage.h
+++ b/src/mongo/db/exec/update_stage.h
@@ -199,12 +199,6 @@ private:
StageState prepareToRetryWSM(WorkingSetID idToRetry, WorkingSetID* out);
/**
- * Called when update modifies shard key fields. Checks that the updated doc has all required
- * shard key fields and throws if it does not.
- */
- void assertNewDocHasValidShardKeyOptions(const ScopedCollectionMetadata& metadata);
-
- /**
* Checks that the updated doc has all required shard key fields and throws if it does not.
*
* Also checks if the updated doc still belongs to this node and throws if it does not. If the
diff --git a/src/mongo/s/write_ops/chunk_manager_targeter.cpp b/src/mongo/s/write_ops/chunk_manager_targeter.cpp
index 770fd0c0795..25fd831460b 100644
--- a/src/mongo/s/write_ops/chunk_manager_targeter.cpp
+++ b/src/mongo/s/write_ops/chunk_manager_targeter.cpp
@@ -148,6 +148,12 @@ StatusWith<BSONObj> getUpdateExpr(OperationContext* opCtx,
} else if (auto idElt = idFromQuery.getValue()[kIdFieldName]) {
updateExpr = updateExpr.addField(idElt);
}
+ // Confirm that the finalized replacement shard key is valid.
+ auto skStatus =
+ ShardKeyPattern::checkShardKeySize(shardKeyPattern.extractShardKeyFromDoc(updateExpr));
+ if (!skStatus.isOK()) {
+ return skStatus;
+ }
return updateExpr;
}
@@ -406,23 +412,14 @@ StatusWith<ShardEndpoint> ChunkManagerTargeter::targetInsert(OperationContext* o
StatusWith<std::vector<ShardEndpoint>> ChunkManagerTargeter::targetUpdate(
OperationContext* opCtx, const write_ops::UpdateOpEntry& updateDoc) const {
//
- // Update targeting may use either the query or the update. This is to support save-style
- // updates, of the form:
- //
- // coll.update({ _id : xxx }, { _id : xxx, shardKey : 1, foo : bar }, { upsert : true })
- //
- // Because drivers do not know the shard key, they can't pull the shard key automatically
- // into the query doc, and to correctly support upsert we must target a single shard.
- //
- // The rule is simple - If the update is replacement style (no '$set' or pipeline), we target
- // using the update. If the update is not replacement style, we target using the query. Because
- // mongoD will automatically propagate '_id' from an existing document, and will extract it from
- // an exact-match in the query in the case of an upsert, we augment the replacement doc with the
+ // The rule is simple; we always attempt to target using the query first. If the update is
+ // replacement-style and the query targets more than one shard, we fall back to targeting using
+ // the replacement document, since it must always contain a full shard key. Upserts should have
+ // full shard key in the query and we always use query for targeting. Because mongoD will
+ // automatically propagate '_id' from an existing document, and will extract it from an
+ // exact-match in the query in the case of an upsert, we augment the replacement doc with the
// query's '_id' for targeting purposes, if it exists.
//
- // Once we have determined the correct component to target on, we attempt to extract an exact
- // shard key from it. If one is present, we target using it.
- //
const auto updateType = getUpdateExprType(updateDoc);
if (!updateType.isOK()) {
return updateType.getStatus();
@@ -449,39 +446,61 @@ StatusWith<std::vector<ShardEndpoint>> ChunkManagerTargeter::targetUpdate(
return updateExpr.getStatus();
}
- // We first attempt to target by exact match on the shard key.
- const auto swTarget = _targetUpdateByShardKey(
- opCtx, updateType.getValue(), query, collation, updateExpr.getValue(), isUpsert);
-
- // Return the status if we were successful in targeting by shard key. If this is an upsert, then
- // we return the status regardless of whether or not we succeeded, since an upsert must always
- // target an exact shard key or fail. If this is *not* an upsert and we were unable to target an
- // exact shard key, then we proceed to try other means of targeting the update.
- if (isUpsert || swTarget.isOK()) {
- return swTarget;
- }
-
- // If we could not target by shard key, attempt to route the update by query or replacement doc.
- auto shardEndPoints = (updateType.getValue() == UpdateType::kOpStyle
- ? _targetQuery(opCtx, query, collation)
- : _targetDoc(opCtx, updateExpr.getValue(), collation));
-
- // Single (non-multi) updates must target a single shard, or an exact _id.
- if (!updateDoc.getMulti() && shardEndPoints.isOK() && shardEndPoints.getValue().size() > 1) {
- if (!isExactIdQuery(opCtx, getNS(), query, collation, _routingInfo->cm().get())) {
- return {ErrorCodes::InvalidOptions,
- str::stream() << "A {multi:false} update on a sharded collection must either "
- "contain an exact match on _id (and have the collection "
- "default collation) or must target a single shard (and have "
- "the simple collation), but this update targeted "
- << shardEndPoints.getValue().size()
- << " shards. Update request: "
- << updateDoc.toBSON()
- << ", shard key pattern: "
- << shardKeyPattern.toString()};
+ // Utility function to target an update by shard key, and to handle any potential error results.
+ const auto targetByShardKey = [&collation, this](
+ StatusWith<BSONObj> shardKey, StringData msg) -> StatusWith<std::vector<ShardEndpoint>> {
+ if (!shardKey.isOK()) {
+ return shardKey.getStatus().withContext(msg);
+ }
+ if (shardKey.getValue().isEmpty()) {
+ return {ErrorCodes::ShardKeyNotFound,
+ str::stream() << msg << " :: could not extract exact shard key"};
}
+ auto swEndPoint = _targetShardKey(shardKey.getValue(), collation, 0);
+ if (!swEndPoint.isOK()) {
+ return swEndPoint.getStatus().withContext(msg);
+ }
+ return {{swEndPoint.getValue()}};
+ };
+
+ // If this is an upsert, then the query must contain an exact match on the shard key. If we were
+ // to target based on the replacement doc, it could result in an insertion even if a document
+ // matching the query exists on another shard.
+ if (isUpsert) {
+ return targetByShardKey(shardKeyPattern.extractShardKeyFromQuery(opCtx, query),
+ "Failed to target upsert by query");
+ }
+
+ // We first try to target based on the update's query. It is always valid to forward any update
+ // or upsert to a single shard, so return immediately if we are able to target a single shard.
+ auto shardEndPoints = _targetQuery(opCtx, query, collation);
+ if (!shardEndPoints.isOK() || shardEndPoints.getValue().size() == 1) {
+ return shardEndPoints;
+ }
+
+ // Replacement-style updates must always target a single shard. If we were unable to do so using
+ // the query, we attempt to extract the shard key from the replacement and target based on it.
+ if (updateType == UpdateType::kReplacement) {
+ return targetByShardKey(shardKeyPattern.extractShardKeyFromDoc(updateExpr.getValue()),
+ "Failed to target update by replacement document");
+ }
+
+ // If we are here then this is an op-style update and we were not able to target a single shard.
+ // Non-multi updates must target a single shard or an exact _id.
+ if (!updateDoc.getMulti() &&
+ !isExactIdQuery(opCtx, getNS(), query, collation, _routingInfo->cm().get())) {
+ return {ErrorCodes::InvalidOptions,
+ str::stream() << "A {multi:false} update on a sharded collection must either "
+ "contain an exact match on _id or must target a single shard but "
+ "this update targeted _id (and have the collection default "
+ "collation) or must target a single shard (and have the simple "
+ "collation), but this update targeted "
+ << shardEndPoints.getValue().size()
+ << " shards. Update request: "
+ << updateDoc.toBSON()
+ << ", shard key pattern: "
+ << shardKeyPattern.toString()};
}
-
return shardEndPoints;
}
@@ -512,10 +531,9 @@ StatusWith<std::vector<ShardEndpoint>> ChunkManagerTargeter::targetDelete(
// Target the shard key or delete query
if (!shardKey.isEmpty()) {
- try {
- return std::vector<ShardEndpoint>{_targetShardKey(shardKey, collation, 0)};
- } catch (const DBException&) {
- // This delete is potentially not constrained to a single shard
+ auto swEndpoint = _targetShardKey(shardKey, collation, 0);
+ if (swEndpoint.isOK()) {
+ return {{swEndpoint.getValue()}};
}
}
@@ -555,58 +573,6 @@ StatusWith<std::vector<ShardEndpoint>> ChunkManagerTargeter::targetDelete(
return _targetQuery(opCtx, deleteDoc.getQ(), collation);
}
-StatusWith<std::vector<ShardEndpoint>> ChunkManagerTargeter::_targetUpdateByShardKey(
- OperationContext* opCtx,
- const UpdateType updateType,
- const BSONObj query,
- const BSONObj collation,
- const BSONObj updateExpr,
- const bool isUpsert) const {
- // This method should only ever be called on a sharded collection with a valid updateType.
- invariant(updateType != UpdateType::kUnknown);
- invariant(_routingInfo->cm());
-
- const auto& shardKeyPattern = _routingInfo->cm()->getShardKeyPattern();
-
- // Attempt to extract the shard key from the query (for an op-style update) or the update
- // expression document (for a replacement-style update).
- const auto shardKey =
- (updateType == UpdateType::kOpStyle ? shardKeyPattern.extractShardKeyFromQuery(opCtx, query)
- : shardKeyPattern.extractShardKeyFromDoc(updateExpr));
- if (!shardKey.isOK()) {
- return shardKey.getStatus();
- }
-
- // Attempt to dispatch the update by routing on the extracted shard key.
- if (!shardKey.getValue().isEmpty()) {
- // Verify that the shard key does not exceed the maximum permitted size.
- const auto shardKeySizeStatus = ShardKeyPattern::checkShardKeySize(shardKey.getValue());
- if (!shardKeySizeStatus.isOK()) {
- return shardKeySizeStatus;
- }
- try {
- const long long estimatedDataSize = query.objsize() + updateExpr.objsize();
- return std::vector<ShardEndpoint>{
- _targetShardKey(shardKey.getValue(), collation, estimatedDataSize)};
- } catch (const DBException& ex) {
- // The update is potentially not constrained to a single shard. If this is an upsert,
- // then we do not return the error here; we provide a more descriptive message below.
- if (!isUpsert)
- return ex.toStatus();
- }
- }
- return {
- ErrorCodes::ShardKeyNotFound,
- str::stream()
- << (isUpsert
- ? "Sharded upserts must contain the shard key and have the simple collation. "
- : "Could not extract an exact shard key value. ")
- << "Update request: "
- << BSON("q" << query << "u" << updateExpr)
- << ", shard key pattern: "
- << shardKeyPattern.toString()};
-}
-
StatusWith<std::vector<ShardEndpoint>> ChunkManagerTargeter::_targetDoc(
OperationContext* opCtx, const BSONObj& doc, const BSONObj& collation) const {
// NOTE: This is weird and fragile, but it's the way our language works right now - documents
@@ -643,12 +609,16 @@ StatusWith<std::vector<ShardEndpoint>> ChunkManagerTargeter::_targetQuery(
return endpoints;
}
-ShardEndpoint ChunkManagerTargeter::_targetShardKey(const BSONObj& shardKey,
- const BSONObj& collation,
- long long estDataSize) const {
- const auto chunk = _routingInfo->cm()->findIntersectingChunk(shardKey, collation);
-
- return {chunk.getShardId(), _routingInfo->cm()->getVersion(chunk.getShardId())};
+StatusWith<ShardEndpoint> ChunkManagerTargeter::_targetShardKey(const BSONObj& shardKey,
+ const BSONObj& collation,
+ long long estDataSize) const {
+ try {
+ auto chunk = _routingInfo->cm()->findIntersectingChunk(shardKey, collation);
+ return {{chunk.getShardId(), _routingInfo->cm()->getVersion(chunk.getShardId())}};
+ } catch (const DBException& ex) {
+ return ex.toStatus();
+ }
+ MONGO_UNREACHABLE;
}
StatusWith<std::vector<ShardEndpoint>> ChunkManagerTargeter::targetCollection() const {
@@ -671,7 +641,6 @@ StatusWith<std::vector<ShardEndpoint>> ChunkManagerTargeter::targetCollection()
: ChunkVersion::UNSHARDED();
endpoints.emplace_back(std::move(shardId), version);
}
-
return endpoints;
}
diff --git a/src/mongo/s/write_ops/chunk_manager_targeter.h b/src/mongo/s/write_ops/chunk_manager_targeter.h
index 624e3c5c670..2ace173e625 100644
--- a/src/mongo/s/write_ops/chunk_manager_targeter.h
+++ b/src/mongo/s/write_ops/chunk_manager_targeter.h
@@ -117,19 +117,6 @@ private:
Status _refreshNow(OperationContext* opCtx);
/**
- * Attempts to route an update operation by extracting an exact shard key from the given query
- * and/or update expression. Should only be called on sharded collections, and with a valid
- * updateType. Returns not-OK if the user's query is invalid, if the extracted shard key exceeds
- * the maximum allowed length, or if the update could not be targeted by exact shard key.
- */
- StatusWith<std::vector<ShardEndpoint>> _targetUpdateByShardKey(OperationContext* opCtx,
- const UpdateType updateType,
- const BSONObj query,
- const BSONObj collation,
- const BSONObj updateExpr,
- const bool isUpsert) const;
-
- /**
* Returns a vector of ShardEndpoints where a document might need to be placed.
*
* Returns !OK with message if replacement could not be targeted
@@ -159,9 +146,9 @@ private:
*
* If 'collation' is empty, we use the collection default collation for targeting.
*/
- ShardEndpoint _targetShardKey(const BSONObj& shardKey,
- const BSONObj& collation,
- long long estDataSize) const;
+ StatusWith<ShardEndpoint> _targetShardKey(const BSONObj& shardKey,
+ const BSONObj& collation,
+ long long estDataSize) const;
// Full namespace of the collection for this targeter
const NamespaceString _nss;