From 35a35f737db703ec3b35e33f94e2bc5862453bd1 Mon Sep 17 00:00:00 2001 From: Mark Benvenuto Date: Wed, 1 Mar 2023 22:47:48 -0500 Subject: SERVER-74150 Create batch interface for reading tags for QE V2 --- src/mongo/crypto/fle_crypto.cpp | 72 +++++++++++++++++++++++++++++++++ src/mongo/crypto/fle_crypto.h | 24 +++++++++++ src/mongo/crypto/fle_crypto_types.h | 32 +++++++++++++++ src/mongo/crypto/fle_tags.cpp | 81 +++++++++++++++++++++++++++++++------ 4 files changed, 196 insertions(+), 13 deletions(-) (limited to 'src/mongo/crypto') diff --git a/src/mongo/crypto/fle_crypto.cpp b/src/mongo/crypto/fle_crypto.cpp index eee03185f4b..f35633f9974 100644 --- a/src/mongo/crypto/fle_crypto.cpp +++ b/src/mongo/crypto/fle_crypto.cpp @@ -2235,6 +2235,56 @@ BSONObj runStateMachineForDecryption(mongocrypt_ctx_t* ctx, FLEKeyVault* keyVaul return result; } +FLEEdgeCountInfo getEdgeCountInfo(const FLEStateCollectionReader& reader, + ConstDataRange tag, + FLETagQueryInterface::TagQueryType type, + const boost::optional& edc) { + + uint64_t count; + + auto escToken = EDCServerPayloadInfo::getESCToken(tag); + + auto tagToken = FLETwiceDerivedTokenGenerator::generateESCTwiceDerivedTagToken(escToken); + auto valueToken = FLETwiceDerivedTokenGenerator::generateESCTwiceDerivedValueToken(escToken); + + auto positions = ESCCollection::emuBinaryV2(reader, tagToken, valueToken); + + if (positions.cpos.has_value()) { + // Either no ESC documents exist yet (cpos == 0), OR new non-anchors + // have been inserted since the last compact/cleanup (cpos > 0). + count = positions.cpos.value() + 1; + } else { + // No new non-anchors since the last compact/cleanup. + // There must be at least one anchor. + uassert(7291902, + "An ESC anchor document is expected but none is found", + !positions.apos.has_value() || positions.apos.value() > 0); + + PrfBlock anchorId; + if (!positions.apos.has_value()) { + anchorId = ESCCollection::generateNullAnchorId(tagToken); + } else { + anchorId = ESCCollection::generateAnchorId(tagToken, positions.apos.value()); + } + + BSONObj anchorDoc = reader.getById(anchorId); + uassert(7291903, "ESC anchor document not found", !anchorDoc.isEmpty()); + + auto escAnchor = + uassertStatusOK(ESCCollection::decryptAnchorDocument(valueToken, anchorDoc)); + count = escAnchor.count + 1; + } + + + if (type == FLETagQueryInterface::TagQueryType::kQuery) { + count -= 1; + } + + return FLEEdgeCountInfo(count, tagToken, edc.map([](const PrfBlock& prf) { + return FLETokenFromCDR(prf); + })); +} + } // namespace std::vector getMinCover(const FLE2RangeFindSpec& spec, uint8_t sparsity) { @@ -3084,6 +3134,28 @@ boost::optional ESCCollection::binaryHops(const FLEStateCollectionRead return binarySearchCommon(reader, rho, lambda, i, idGenerator, tracker); } +std::vector> ESCCollection::getTags( + const FLEStateCollectionReader& reader, + const std::vector>& tokensSets, + FLETagQueryInterface::TagQueryType type) { + + std::vector> countInfoSets; + countInfoSets.reserve(tokensSets.size()); + + for (const auto& tokens : tokensSets) { + std::vector countInfos; + countInfos.reserve(tokens.size()); + + for (const auto& token : tokens) { + countInfos.push_back(getEdgeCountInfo(reader, token.esc, type, token.edc)); + } + + countInfoSets.emplace_back(countInfos); + } + + return countInfoSets; +} + PrfBlock ECCCollection::generateId(ECCTwiceDerivedTagToken tagToken, boost::optional index) { if (index.has_value()) { diff --git a/src/mongo/crypto/fle_crypto.h b/src/mongo/crypto/fle_crypto.h index 9e73fe28d5e..fd0d5a25cfd 100644 --- a/src/mongo/crypto/fle_crypto.h +++ b/src/mongo/crypto/fle_crypto.h @@ -331,6 +331,8 @@ struct ESCDocument { */ class FLETagQueryInterface { public: + enum class TagQueryType { kInsert, kQuery }; + virtual ~FLETagQueryInterface(); /** @@ -347,6 +349,17 @@ public: * Throws if the collection is not found. */ virtual uint64_t countDocuments(const NamespaceString& nss) = 0; + + /** + * Get the set of counts from ESC for a set of tags. Returns counts for these fields suitable + * either for query or insert based on the type parameter. + * + * Returns a vector of zeros if the collection does not exist. + */ + virtual std::vector> getTags( + const NamespaceString& nss, + const std::vector>& tokensSets, + TagQueryType type) = 0; }; @@ -513,6 +526,17 @@ public: const ESCTwiceDerivedValueToken& valueToken, boost::optional x, FLEStatusSection::EmuBinaryTracker& tracker); + + /** + * Get the set of counts from ESC for a set of tags. Returns counts for these fields suitable + * either for query or insert based on the type parameter. + * + * Returns a vector of zeros if the collection does not exist. + */ + static std::vector> getTags( + const FLEStateCollectionReader& reader, + const std::vector>& tokensSets, + FLETagQueryInterface::TagQueryType type); }; diff --git a/src/mongo/crypto/fle_crypto_types.h b/src/mongo/crypto/fle_crypto_types.h index ee536bf3833..59469b8e809 100644 --- a/src/mongo/crypto/fle_crypto_types.h +++ b/src/mongo/crypto/fle_crypto_types.h @@ -238,4 +238,36 @@ using ServerCountAndContentionFactorEncryptionToken = FLEToken; using ServerZerosEncryptionToken = FLEToken; + +/** + * A pair of a (ESCDerivedFromDataTokenAndContentionFactorToken, optional + * EDCDerivedFromDataTokenAndContentionFactorToken) that will be used to lookup a count for the ESC + * token from ESC. The EDC token is simply passed through to the response for query tag generation. + * The inclusion of EDC simplifies the code that processes the response. + */ +struct FLEEdgePrfBlock { + PrfBlock esc; // ESCDerivedFromDataTokenAndContentionFactorToken + boost::optional edc; // EDCDerivedFromDataTokenAndContentionFactorToken +}; + +/** + * The information retrieved from ESC for a given ESC token. Count may reflect a count suitable for + * insert or query. + */ +struct FLEEdgeCountInfo { + FLEEdgeCountInfo(uint64_t c, ESCTwiceDerivedTagToken t) : count(c), tagToken(t) {} + + FLEEdgeCountInfo(uint64_t c, + ESCTwiceDerivedTagToken t, + boost::optional edcParam) + : count(c), tagToken(t), edc(edcParam) {} + + // May reflect a value suitable for insert or query. + uint64_t count; + + ESCTwiceDerivedTagToken tagToken; + + boost::optional edc; +}; + } // namespace mongo diff --git a/src/mongo/crypto/fle_tags.cpp b/src/mongo/crypto/fle_tags.cpp index af0f550724d..f09cc23fb54 100644 --- a/src/mongo/crypto/fle_tags.cpp +++ b/src/mongo/crypto/fle_tags.cpp @@ -67,6 +67,17 @@ void verifyTagsWillFit(size_t tagCount, size_t memoryLimit) { sizeArrayElementsMemory(tagCount) <= memoryLimit); } +void generateTags(uint64_t numInserts, + EDCDerivedFromDataTokenAndContentionFactorToken edcTok, + std::vector& binaryTags) { + + auto edcTag = TwiceDerived::generateEDCTwiceDerivedToken(edcTok); + + for (uint64_t i = 1; i <= numInserts; i++) { + binaryTags.emplace_back(EDCServerCollection::generateTag(edcTag, i)); + } +} + } // namespace size_t sizeArrayElementsMemory(size_t tagCount) { @@ -262,32 +273,76 @@ std::vector readTags(FLETagQueryInterface* queryImpl, EDCDerivedFromDataToken d, boost::optional cm) { - auto makeCollectionReader = [](FLETagQueryInterface* queryImpl, const NamespaceString& nss) { - auto docCount = queryImpl->countDocuments(nss); - return TxnCollectionReader(docCount, queryImpl, nss); - }; - - // Construct FLE rewriter from the transaction client and encryptionInformation. - auto esc = makeCollectionReader(queryImpl, nssEsc); - auto ecc = makeCollectionReader(queryImpl, nssEcc); - // The output of readTags will be used as the argument to a $in expression, so make sure we // don't exceed the configured memory limit. - auto limit = static_cast(internalQueryFLERewriteMemoryLimit.load()); + auto memoryLimit = static_cast(internalQueryFLERewriteMemoryLimit.load()); auto contentionMax = cm.value_or(0); std::vector binaryTags; // TODO: SERVER-73303 remove when v2 is enabled by default if (!gFeatureFlagFLE2ProtocolVersion2.isEnabled(serverGlobalParams.featureCompatibility)) { + + auto makeCollectionReader = [](FLETagQueryInterface* queryImpl, + const NamespaceString& nss) { + auto docCount = queryImpl->countDocuments(nss); + return TxnCollectionReader(docCount, queryImpl, nss); + }; + + // Construct FLE rewriter from the transaction client and encryptionInformation. + auto esc = makeCollectionReader(queryImpl, nssEsc); + auto ecc = makeCollectionReader(queryImpl, nssEcc); + for (auto i = 0; i <= contentionMax; i++) { - binaryTags = readTagsWithContention(esc, ecc, s, c, d, i, limit, std::move(binaryTags)); + binaryTags = + readTagsWithContention(esc, ecc, s, c, d, i, memoryLimit, std::move(binaryTags)); } + return binaryTags; } - for (auto i = 0; i <= contentionMax; i++) { - binaryTags = readTagsWithContentionV2(esc, s, d, i, limit, std::move(binaryTags)); + std::vector blocks; + blocks.reserve(contentionMax + 1); + + for (auto cf = 0; cf <= contentionMax; cf++) { + auto escToken = + DerivedToken::generateESCDerivedFromDataTokenAndContentionFactorToken(s, cf); + auto edcToken = + DerivedToken::generateEDCDerivedFromDataTokenAndContentionFactorToken(d, cf); + + FLEEdgePrfBlock edgeSet{escToken.data, edcToken.data}; + + blocks.push_back(edgeSet); + } + + std::vector> blockSets; + blockSets.push_back(blocks); + + auto countInfoSets = + queryImpl->getTags(nssEsc, blockSets, FLETagQueryInterface::TagQueryType::kQuery); + + + // Count how many tags we will need and check once if we they will fit + // + uint32_t totalTagCount = 0; + + for (const auto& countInfoSet : countInfoSets) { + for (const auto& countInfo : countInfoSet) { + totalTagCount += countInfo.count; + } } + + verifyTagsWillFit(totalTagCount, memoryLimit); + + binaryTags.reserve(totalTagCount); + + for (const auto& countInfoSet : countInfoSets) { + for (const auto& countInfo : countInfoSet) { + + uassert(7415001, "Missing EDC value", countInfo.edc.has_value()); + generateTags(countInfo.count, countInfo.edc.value(), binaryTags); + } + } + return binaryTags; } } // namespace mongo::fle -- cgit v1.2.1