summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMihai Andrei <mihai.andrei@10gen.com>2022-11-22 21:00:16 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-11-22 22:10:14 +0000
commita5fecd06a02b363ba02cdde14556fb000786ce2f (patch)
treecb6000fbe88ef44e475d285cdc859bd96dec4bf0
parent5631df7cc62b6804e619315b27fff39db309d88b (diff)
downloadmongo-a5fecd06a02b363ba02cdde14556fb000786ce2f.tar.gz
SERVER-71456 Update and delete size estimation logic should account for 'sampleId' field
-rw-r--r--src/mongo/db/commands/write_commands.cpp18
-rw-r--r--src/mongo/db/ops/write_ops.cpp71
-rw-r--r--src/mongo/db/ops/write_ops.h18
-rw-r--r--src/mongo/db/ops/write_ops_exec_test.cpp67
-rw-r--r--src/mongo/db/pipeline/process_interface/common_process_interface.h3
-rw-r--r--src/mongo/s/write_ops/batch_write_op.cpp36
6 files changed, 177 insertions, 36 deletions
diff --git a/src/mongo/db/commands/write_commands.cpp b/src/mongo/db/commands/write_commands.cpp
index 5a034bc922a..1bc460c4436 100644
--- a/src/mongo/db/commands/write_commands.cpp
+++ b/src/mongo/db/commands/write_commands.cpp
@@ -1671,6 +1671,15 @@ public:
source = OperationSource::kTimeseriesUpdate;
}
+ // On debug builds, verify that the estimated size of the updates are at least as large
+ // as the actual, serialized size. This ensures that the logic that estimates the size
+ // of deletes for batch writes is correct.
+ if constexpr (kDebugBuild) {
+ for (auto&& updateOp : request().getUpdates()) {
+ invariant(write_ops::verifySizeEstimate(updateOp));
+ }
+ }
+
long long nModified = 0;
// Tracks the upserted information. The memory of this variable gets moved in the
@@ -1873,6 +1882,15 @@ public:
source = OperationSource::kTimeseriesDelete;
}
+ // On debug builds, verify that the estimated size of the deletes are at least as large
+ // as the actual, serialized size. This ensures that the logic that estimates the size
+ // of deletes for batch writes is correct.
+ if constexpr (kDebugBuild) {
+ for (auto&& deleteOp : request().getDeletes()) {
+ invariant(write_ops::verifySizeEstimate(deleteOp));
+ }
+ }
+
auto reply = write_ops_exec::performDeletes(opCtx, request(), source);
populateReply(opCtx,
!request().getWriteCommandRequestBase().getOrdered(),
diff --git a/src/mongo/db/ops/write_ops.cpp b/src/mongo/db/ops/write_ops.cpp
index 8e19bfd3117..135486149ef 100644
--- a/src/mongo/db/ops/write_ops.cpp
+++ b/src/mongo/db/ops/write_ops.cpp
@@ -53,6 +53,15 @@ using write_ops::WriteCommandRequestBase;
namespace {
+// This constant accounts for the null terminator in each field name and the BSONType byte for
+// each element.
+static constexpr int kPerElementOverhead = 2;
+
+// This constant tracks the overhead for serializing UUIDs. It includes 1 byte for the
+// 'BinDataType', 4 bytes for serializing the integer size of the UUID, and finally, 16 bytes
+// for the UUID itself.
+static const int kUUIDSize = 21;
+
template <class T>
void checkOpCountForCommand(const T& op, size_t numOps) {
uassert(ErrorCodes::InvalidLength,
@@ -142,7 +151,8 @@ int getUpdateSizeEstimate(const BSONObj& q,
const bool includeUpsertSupplied,
const boost::optional<mongo::BSONObj>& collation,
const boost::optional<std::vector<mongo::BSONObj>>& arrayFilters,
- const mongo::BSONObj& hint) {
+ const mongo::BSONObj& hint,
+ const boost::optional<UUID>& sampleId) {
using UpdateOpEntry = write_ops::UpdateOpEntry;
// This constant accounts for the null terminator in each field name and the BSONType byte for
@@ -190,14 +200,71 @@ int getUpdateSizeEstimate(const BSONObj& q,
})();
}
- // Add the size of 'hint' field if present.
+ // Add the size of the 'hint' field, if present.
if (!hint.isEmpty()) {
estSize += UpdateOpEntry::kHintFieldName.size() + hint.objsize() + kPerElementOverhead;
}
+ // Add the size of the 'sampleId' field, if present.
+ if (sampleId) {
+ estSize += UpdateOpEntry::kSampleIdFieldName.size() + kUUIDSize + kPerElementOverhead;
+ }
+
+ return estSize;
+}
+
+int getDeleteSizeEstimate(const BSONObj& q,
+ const boost::optional<mongo::BSONObj>& collation,
+ const mongo::BSONObj& hint,
+ const boost::optional<UUID>& sampleId) {
+ using DeleteOpEntry = write_ops::DeleteOpEntry;
+
+ static const int kIntSize = 4;
+ int estSize = static_cast<int>(BSONObj::kMinBSONLength);
+
+ // Add the size of the 'q' field.
+ estSize += DeleteOpEntry::kQFieldName.size() + q.objsize() + kPerElementOverhead;
+
+ // Add the size of the 'collation' field, if present.
+ if (collation) {
+ estSize +=
+ DeleteOpEntry::kCollationFieldName.size() + collation->objsize() + kPerElementOverhead;
+ }
+
+ // Add the size of the 'limit' field.
+ estSize += DeleteOpEntry::kMultiFieldName.size() + kIntSize + kPerElementOverhead;
+
+ // Add the size of the 'hint' field, if present.
+ if (!hint.isEmpty()) {
+ estSize += DeleteOpEntry::kHintFieldName.size() + hint.objsize() + kPerElementOverhead;
+ }
+
+ // Add the size of the 'sampleId' field, if present.
+ if (sampleId) {
+ estSize += DeleteOpEntry::kSampleIdFieldName.size() + kUUIDSize + kPerElementOverhead;
+ }
+
return estSize;
}
+bool verifySizeEstimate(const write_ops::UpdateOpEntry& update) {
+ return write_ops::getUpdateSizeEstimate(update.getQ(),
+ update.getU(),
+ update.getC(),
+ update.getUpsertSupplied().has_value(),
+ update.getCollation(),
+ update.getArrayFilters(),
+ update.getHint(),
+ update.getSampleId()) >= update.toBSON().objsize();
+}
+
+bool verifySizeEstimate(const write_ops::DeleteOpEntry& deleteOp) {
+ return write_ops::getDeleteSizeEstimate(deleteOp.getQ(),
+ deleteOp.getCollation(),
+ deleteOp.getHint(),
+ deleteOp.getSampleId()) >= deleteOp.toBSON().objsize();
+}
+
bool isClassicalUpdateReplacement(const BSONObj& update) {
// An empty update object will be treated as replacement as firstElementFieldName() returns "".
return update.firstElementFieldName()[0] != '$';
diff --git a/src/mongo/db/ops/write_ops.h b/src/mongo/db/ops/write_ops.h
index a1201412a33..e2e546cf3df 100644
--- a/src/mongo/db/ops/write_ops.h
+++ b/src/mongo/db/ops/write_ops.h
@@ -104,8 +104,8 @@ const std::vector<BSONObj>& arrayFiltersOf(const T& opEntry) {
}
/**
- * Utility which estimates the size in bytes of an update statement with the given parameters, when
- * serialized in the format used for the update command.
+ * Set of utilities which estimate the size, in bytes, of an update/delete statement with the given
+ * parameters, when serialized in the format used for the update/delete commands.
*/
int getUpdateSizeEstimate(const BSONObj& q,
const write_ops::UpdateModification& u,
@@ -113,7 +113,19 @@ int getUpdateSizeEstimate(const BSONObj& q,
bool includeUpsertSupplied,
const boost::optional<mongo::BSONObj>& collation,
const boost::optional<std::vector<mongo::BSONObj>>& arrayFilters,
- const mongo::BSONObj& hint);
+ const mongo::BSONObj& hint,
+ const boost::optional<UUID>& sampleId);
+int getDeleteSizeEstimate(const BSONObj& q,
+ const boost::optional<mongo::BSONObj>& collation,
+ const mongo::BSONObj& hint,
+ const boost::optional<UUID>& sampleId);
+
+/**
+ * Set of utilities which return true if the estimated write size is greater than or equal to the
+ * actual write size, false otherwise.
+ */
+bool verifySizeEstimate(const write_ops::UpdateOpEntry& update);
+bool verifySizeEstimate(const write_ops::DeleteOpEntry& deleteOp);
/**
* If the response from a write command contains any write errors, it will throw the first one. All
diff --git a/src/mongo/db/ops/write_ops_exec_test.cpp b/src/mongo/db/ops/write_ops_exec_test.cpp
index 9c4caac330d..7b534cf09a0 100644
--- a/src/mongo/db/ops/write_ops_exec_test.cpp
+++ b/src/mongo/db/ops/write_ops_exec_test.cpp
@@ -44,6 +44,73 @@ protected:
using CatalogTestFixture::setUp;
};
+TEST_F(WriteOpsExecTest, TestUpdateSizeEstimationLogic) {
+ // Basic test case.
+ OID id = OID::createFromString("629e1e680958e279dc29a989"_sd);
+ BSONObj updateStmt = fromjson("{$set: {a: 5}}");
+ write_ops::UpdateModification mod(std::move(updateStmt));
+ write_ops::UpdateOpEntry updateOpEntry(BSON("_id" << id), std::move(mod));
+ ASSERT(write_ops::verifySizeEstimate(updateOpEntry));
+
+ // Add 'let' constants.
+ BSONObj constants = fromjson("{constOne: 'foo'}");
+ updateOpEntry.setC(constants);
+ ASSERT(write_ops::verifySizeEstimate(updateOpEntry));
+
+ // Add 'upsertSupplied'.
+ updateOpEntry.setUpsertSupplied(OptionalBool(false));
+ ASSERT(write_ops::verifySizeEstimate(updateOpEntry));
+
+ // Set 'upsertSupplied' to true.
+ updateOpEntry.setUpsertSupplied(OptionalBool(true));
+ ASSERT(write_ops::verifySizeEstimate(updateOpEntry));
+
+ // Set 'upsertSupplied' to boost::none.
+ updateOpEntry.setUpsertSupplied(OptionalBool(boost::none));
+ ASSERT(write_ops::verifySizeEstimate(updateOpEntry));
+
+ // Add a collation.
+ BSONObj collation = fromjson("{locale: 'simple'}");
+ updateOpEntry.setCollation(collation);
+ ASSERT(write_ops::verifySizeEstimate(updateOpEntry));
+
+ // Add a hint.
+ BSONObj hint = fromjson("{_id: 1}");
+ updateOpEntry.setHint(hint);
+ ASSERT(write_ops::verifySizeEstimate(updateOpEntry));
+
+ // Add arrayFilters.
+ auto arrayFilter = std::vector<BSONObj>{fromjson("{'x.a': {$gt: 85}}")};
+ updateOpEntry.setArrayFilters(arrayFilter);
+ ASSERT(write_ops::verifySizeEstimate(updateOpEntry));
+
+ // Add a sampleId.
+ updateOpEntry.setSampleId(UUID::gen());
+ ASSERT(write_ops::verifySizeEstimate(updateOpEntry));
+}
+
+TEST_F(WriteOpsExecTest, TestDeleteSizeEstimationLogic) {
+ // Basic test case.
+ OID id = OID::createFromString("629e1e680958e279dc29a989"_sd);
+ write_ops::DeleteOpEntry deleteOpEntry(BSON("_id" << id), false /* multi */);
+ ASSERT(write_ops::verifySizeEstimate(deleteOpEntry));
+
+ // Add a collation.
+ BSONObj collation = fromjson("{locale: 'simple'}");
+ deleteOpEntry.setCollation(collation);
+ ASSERT(write_ops::verifySizeEstimate(deleteOpEntry));
+
+ // Add a hint.
+ BSONObj hint = fromjson("{_id: 1}");
+ deleteOpEntry.setHint(hint);
+ ASSERT(write_ops::verifySizeEstimate(deleteOpEntry));
+
+ // Add a sampleId.
+ deleteOpEntry.setSampleId(UUID::gen());
+ ASSERT(write_ops::verifySizeEstimate(deleteOpEntry));
+}
+
+
TEST_F(WriteOpsExecTest, PerformAtomicTimeseriesWritesWithTransform) {
NamespaceString ns{"db_write_ops_exec_test", "ts"};
auto opCtx = operationContext();
diff --git a/src/mongo/db/pipeline/process_interface/common_process_interface.h b/src/mongo/db/pipeline/process_interface/common_process_interface.h
index ea18e45b50a..80795171e26 100644
--- a/src/mongo/db/pipeline/process_interface/common_process_interface.h
+++ b/src/mongo/db/pipeline/process_interface/common_process_interface.h
@@ -82,7 +82,8 @@ public:
type != UpsertType::kNone /* includeUpsertSupplied */,
boost::none /* collation */,
boost::none /* arrayFilters */,
- BSONObj() /* hint*/) +
+ BSONObj() /* hint*/,
+ boost::none /* sampleId */) +
write_ops::kWriteCommandBSONArrayPerElementOverheadBytes;
}
};
diff --git a/src/mongo/s/write_ops/batch_write_op.cpp b/src/mongo/s/write_ops/batch_write_op.cpp
index f42b1d7f894..b89b6f9d487 100644
--- a/src/mongo/s/write_ops/batch_write_op.cpp
+++ b/src/mongo/s/write_ops/batch_write_op.cpp
@@ -55,14 +55,6 @@ struct WriteErrorComp {
}
};
-// MAGIC NUMBERS
-//
-// Before serializing updates/deletes, we don't know how big their fields would be, but we break
-// batches before serializing.
-//
-// TODO: Revisit when we revisit command limits in general
-const int kEstDeleteOverheadBytes = (BSONObjMaxInternalSize - BSONObjMaxUserSize) / 100;
-
/**
* Returns a new write concern that has the copy of every field from the original
* document but with a w set to 1. This is intended for upgrading { w: 0 } write
@@ -167,35 +159,19 @@ int getWriteSizeBytes(const WriteOp& writeOp) {
update.getUpsertSupplied().has_value(),
update.getCollation(),
update.getArrayFilters(),
- update.getHint());
+ update.getHint(),
+ update.getSampleId());
// When running a debug build, verify that estSize is at least the BSON serialization size.
dassert(estSize >= update.toBSON().objsize());
return estSize;
} else if (batchType == BatchedCommandRequest::BatchType_Delete) {
- // Note: Be conservative here - it's okay if we send slightly too many batches.
- auto estSize = static_cast<int>(BSONObj::kMinBSONLength);
- static const auto intSize = 4;
-
- // Add the size of the 'collation' field, if present.
- estSize += !item.getDelete().getCollation() ? 0
- : (DeleteOpEntry::kCollationFieldName.size() +
- item.getDelete().getCollation()->objsize());
-
- // Add the size of the 'limit' field.
- estSize += DeleteOpEntry::kMultiFieldName.size() + intSize;
-
- // Add the size of 'hint' field if present.
- if (auto hint = item.getDelete().getHint(); !hint.isEmpty()) {
- estSize += DeleteOpEntry::kHintFieldName.size() + hint.objsize();
- }
-
- // Add the size of the 'q' field, plus the constant deleteOp overhead size.
- estSize += kEstDeleteOverheadBytes +
- (DeleteOpEntry::kQFieldName.size() + item.getDelete().getQ().objsize());
+ const auto& deleteOp = item.getDelete();
+ auto estSize = write_ops::getDeleteSizeEstimate(
+ deleteOp.getQ(), deleteOp.getCollation(), deleteOp.getHint(), deleteOp.getSampleId());
// When running a debug build, verify that estSize is at least the BSON serialization size.
- dassert(estSize >= item.getDelete().toBSON().objsize());
+ dassert(estSize >= deleteOp.toBSON().objsize());
return estSize;
}