diff options
Diffstat (limited to 'src/mongo/db/commands')
-rw-r--r-- | src/mongo/db/commands/SConscript | 10 | ||||
-rw-r--r-- | src/mongo/db/commands/fle2_compact.cpp | 377 | ||||
-rw-r--r-- | src/mongo/db/commands/fle2_compact.h | 29 | ||||
-rw-r--r-- | src/mongo/db/commands/fle2_compact.idl | 24 | ||||
-rw-r--r-- | src/mongo/db/commands/fle2_compact_cmd.cpp | 45 | ||||
-rw-r--r-- | src/mongo/db/commands/fle_compact_test.cpp | 568 |
6 files changed, 1042 insertions, 11 deletions
diff --git a/src/mongo/db/commands/SConscript b/src/mongo/db/commands/SConscript index db92db44bac..d28bd5d286b 100644 --- a/src/mongo/db/commands/SConscript +++ b/src/mongo/db/commands/SConscript @@ -107,10 +107,12 @@ env.Library( LIBDEPS_PRIVATE=[ '$BUILD_DIR/mongo/bson/mutable/mutable_bson', '$BUILD_DIR/mongo/crypto/encrypted_field_config', + '$BUILD_DIR/mongo/crypto/fle_crypto', '$BUILD_DIR/mongo/db/auth/auth', '$BUILD_DIR/mongo/db/auth/authprivilege', '$BUILD_DIR/mongo/db/commands', '$BUILD_DIR/mongo/db/common', + '$BUILD_DIR/mongo/db/fle_crud', '$BUILD_DIR/mongo/db/kill_sessions', '$BUILD_DIR/mongo/db/logical_session_cache', '$BUILD_DIR/mongo/db/logical_session_cache_impl', @@ -560,6 +562,7 @@ env.Library( LIBDEPS_PRIVATE=[ '$BUILD_DIR/mongo/base', '$BUILD_DIR/mongo/client/clientdriver_minimal', + '$BUILD_DIR/mongo/crypto/fle_crypto', '$BUILD_DIR/mongo/db/auth/address_restriction', '$BUILD_DIR/mongo/db/auth/auth', '$BUILD_DIR/mongo/db/auth/auth_options', @@ -581,6 +584,7 @@ env.Library( '$BUILD_DIR/mongo/db/dbhelpers', '$BUILD_DIR/mongo/db/exec/sbe_cmd', '$BUILD_DIR/mongo/db/exec/stagedebug_cmd', + '$BUILD_DIR/mongo/db/fle_crud_mongod', '$BUILD_DIR/mongo/db/index_builds_coordinator_interface', '$BUILD_DIR/mongo/db/multitenancy', '$BUILD_DIR/mongo/db/pipeline/pipeline', @@ -604,6 +608,7 @@ env.Library( '$BUILD_DIR/mongo/idl/idl_parser', '$BUILD_DIR/mongo/util/net/ssl_manager', 'core', + 'create_command', 'kill_common', 'map_reduce_agg', 'mongod_fcv', @@ -811,6 +816,7 @@ env.CppUnitTest( target="db_commands_test", source=[ "index_filter_commands_test.cpp", + "fle_compact_test.cpp", "list_collections_filter_test.cpp", "mr_test.cpp" if get_option('js-engine') != 'none' else [], "parse_log_component_settings_test.cpp", @@ -818,10 +824,14 @@ env.CppUnitTest( "set_cluster_parameter_invocation_test.cpp", ], LIBDEPS=[ + "$BUILD_DIR/mongo/crypto/encrypted_field_config", + "$BUILD_DIR/mongo/crypto/fle_crypto", "$BUILD_DIR/mongo/db/auth/authmocks", "$BUILD_DIR/mongo/db/catalog/collection", "$BUILD_DIR/mongo/db/commands/list_collections_filter", "$BUILD_DIR/mongo/db/dbdirectclient", + "$BUILD_DIR/mongo/db/fle_crud", + "$BUILD_DIR/mongo/db/fle_mocks", "$BUILD_DIR/mongo/db/multitenancy", "$BUILD_DIR/mongo/db/op_observer", "$BUILD_DIR/mongo/db/query/query_planner", diff --git a/src/mongo/db/commands/fle2_compact.cpp b/src/mongo/db/commands/fle2_compact.cpp index 24256c6bc54..ebc964066cb 100644 --- a/src/mongo/db/commands/fle2_compact.cpp +++ b/src/mongo/db/commands/fle2_compact.cpp @@ -35,6 +35,236 @@ #include "mongo/db/catalog/collection_catalog.h" namespace mongo { +namespace { +/** + * Wrapper class around the IDL stats types that enables easier + * addition to the statistics counters. + * Wrapped object must outlive this object. + */ +template <typename IDLType> +class CompactStatsCounter { +public: + CompactStatsCounter(IDLType* wrappedType) : _stats(wrappedType) {} + void addReads(std::int64_t n) { + _stats->setRead(_stats->getRead() + n); + } + void addDeletes(std::int64_t n) { + _stats->setDeleted(_stats->getDeleted() + n); + } + void addInserts(std::int64_t n) { + _stats->setInserted(_stats->getInserted() + n); + } + void addUpdates(std::int64_t n) { + _stats->setUpdated(_stats->getUpdated() + n); + } + +private: + IDLType* _stats; +}; + +/** + * ECOCStats specializations of these functions are no-ops + * since ECOCStats does not have insert and update counters + */ +template <> +void CompactStatsCounter<ECOCStats>::addInserts(std::int64_t n) {} +template <> +void CompactStatsCounter<ECOCStats>::addUpdates(std::int64_t n) {} + +/** + * Implementation of FLEStateCollectionReader for txn_api::TransactionClient + */ +template <typename StatsType> +class TxnCollectionReader : public FLEStateCollectionReader { +public: + TxnCollectionReader(FLEQueryInterface* queryImpl, const NamespaceString& nss, StatsType* stats) + : _queryImpl(queryImpl), _nss(nss), _stats(stats) {} + + uint64_t getDocumentCount() const override { + return _queryImpl->countDocuments(_nss); + } + + BSONObj getById(PrfBlock block) const override { + auto doc = BSON("v" << BSONBinData(block.data(), block.size(), BinDataGeneral)); + BSONElement element = doc.firstElement(); + auto result = _queryImpl->getById(_nss, element); + _stats.addReads(1); + return result; + } + +private: + FLEQueryInterface* _queryImpl; + const NamespaceString& _nss; + mutable CompactStatsCounter<StatsType> _stats; +}; + +/** + * Deletes an entry at the given position from FLECollection, using + * the TagToken to generate the _id value for the delete query. + */ +template <typename FLECollection, typename TagToken> +void deleteDocumentByPos(FLEQueryInterface* queryImpl, + const NamespaceString& nss, + boost::optional<uint64_t> pos, + const TagToken& tagToken, + ECStats* stats) { + CompactStatsCounter<ECStats> statsCtr(stats); + + write_ops::DeleteOpEntry deleteEntry; + auto block = FLECollection::generateId(tagToken, pos); + deleteEntry.setMulti(false); + deleteEntry.setQ(BSON("_id" << BSONBinData(block.data(), block.size(), BinDataGeneral))); + write_ops::DeleteCommandRequest deleteRequest(nss, {std::move(deleteEntry)}); + auto [deleteReply, deletedDoc] = + queryImpl->deleteWithPreimage(nss, EncryptionInformation(BSONObj()), deleteRequest); + + if (deletedDoc.isEmpty()) { + // nothing was deleted + return; + } + checkWriteErrors(deleteReply); + statsCtr.addDeletes(1); +} + +/** + * Inserts or updates a null document in FLECollection. + * The newNullDoc must contain the _id of the null document to update. + */ +void upsertNullDocument(FLEQueryInterface* queryImpl, + bool hasNullDoc, + BSONObj newNullDoc, + const NamespaceString& nss, + ECStats* stats) { + CompactStatsCounter<ECStats> statsCtr(stats); + if (hasNullDoc) { + // update the null doc with a replacement modification + write_ops::UpdateOpEntry updateEntry; + updateEntry.setMulti("false"); + updateEntry.setUpsert("false"); + updateEntry.setQ(newNullDoc.getField("_id").wrap()); + updateEntry.setU(mongo::write_ops::UpdateModification( + newNullDoc, write_ops::UpdateModification::ClassicTag(), true)); + write_ops::UpdateCommandRequest updateRequest(nss, {std::move(updateEntry)}); + auto [reply, originalDoc] = + queryImpl->updateWithPreimage(nss, EncryptionInformation(BSONObj()), updateRequest); + checkWriteErrors(reply); + if (!originalDoc.isEmpty()) { + statsCtr.addUpdates(1); + } + } else { + // insert the null doc; translate duplicate key error to a FLE contention error + auto reply = uassertStatusOK(queryImpl->insertDocument(nss, newNullDoc, true)); + checkWriteErrors(reply); + statsCtr.addInserts(1); + } +} + +/** + * Deletes a document at the specified position from the ESC + */ +void deleteESCDocument(FLEQueryInterface* queryImpl, + const NamespaceString& nss, + boost::optional<uint64_t> pos, + const ESCTwiceDerivedTagToken& tagToken, + ECStats* escStats) { + deleteDocumentByPos<ESCCollection, ESCTwiceDerivedTagToken>( + queryImpl, nss, pos, tagToken, escStats); +} + +/** + * Deletes a document at the specified position from the ECC + */ +void deleteECCDocument(FLEQueryInterface* queryImpl, + const NamespaceString& nss, + boost::optional<uint64_t> pos, + const ECCTwiceDerivedTagToken& tagToken, + ECStats* eccStats) { + deleteDocumentByPos<ECCCollection, ECCTwiceDerivedTagToken>( + queryImpl, nss, pos, tagToken, eccStats); +} + +struct ESCPreCompactState { + uint64_t count{0}; + uint64_t ipos{0}; + uint64_t pos{0}; +}; + +/** + * Finds the upper and lower bound positions, and the current counter + * value from the ESC collection for the given twice-derived tokens, + * and inserts the compaction placeholder document. + */ +ESCPreCompactState prepareESCForCompaction(FLEQueryInterface* queryImpl, + const NamespaceString& nssEsc, + const ESCTwiceDerivedTagToken& tagToken, + const ESCTwiceDerivedValueToken& valueToken, + ECStats* escStats) { + CompactStatsCounter<ECStats> stats(escStats); + + TxnCollectionReader reader(queryImpl, nssEsc, escStats); + + // get the upper bound index 'pos' using binary search + // get the lower bound index 'ipos' from the null doc, if it exists, otherwise 1 + ESCPreCompactState state; + + auto alpha = ESCCollection::emuBinary(reader, tagToken, valueToken); + if (alpha.has_value() && alpha.value() == 0) { + // no null doc & no entries yet for this field/value pair so nothing to compact. + // this can happen if a previous compact command deleted all ESC entries for this + // field/value pair, but failed before the renamed ECOC collection could be dropped. + // skip inserting the compaction placeholder. + return state; + } else if (!alpha.has_value()) { + // only the null doc exists + auto block = ESCCollection::generateId(tagToken, boost::none); + auto r_esc = reader.getById(block); + uassert(6346802, "ESC null document not found", !r_esc.isEmpty()); + + auto nullDoc = uassertStatusOK(ESCCollection::decryptNullDocument(valueToken, r_esc)); + + // +2 to skip over index of placeholder doc from previous compaction + state.pos = nullDoc.position + 2; + state.ipos = state.pos; + state.count = nullDoc.count; + } else { + // one or more entries exist for this field/value pair + auto block = ESCCollection::generateId(tagToken, alpha); + auto r_esc = reader.getById(block); + uassert(6346803, "ESC document not found", !r_esc.isEmpty()); + + auto escDoc = uassertStatusOK(ESCCollection::decryptDocument(valueToken, r_esc)); + + state.pos = alpha.value() + 1; + state.count = escDoc.count; + + // null doc may or may not yet exist + block = ESCCollection::generateId(tagToken, boost::none); + r_esc = reader.getById(block); + if (r_esc.isEmpty()) { + state.ipos = 1; + } else { + auto nullDoc = uassertStatusOK(ESCCollection::decryptNullDocument(valueToken, r_esc)); + state.ipos = nullDoc.position + 2; + } + } + + uassert(6346804, "Invalid position range for ESC compact", state.ipos <= state.pos); + uassert(6346805, "Invalid counter value for ESC compact", state.count > 0); + + // Insert a placeholder at the next ESC position; this is deleted later in compact. + // This serves to trigger a write conflict if another write transaction is + // committed before the current compact transaction commits + auto placeholder = ESCCollection::generateCompactionPlaceholderDocument( + tagToken, valueToken, state.pos, state.count); + auto insertReply = uassertStatusOK(queryImpl->insertDocument(nssEsc, placeholder, true)); + checkWriteErrors(insertReply); + stats.addInserts(1); + + return state; +} + +} // namespace + StatusWith<EncryptedStateCollectionsNamespaces> EncryptedStateCollectionsNamespaces::createFromDataCollection(const Collection& edc) { @@ -54,6 +284,7 @@ EncryptedStateCollectionsNamespaces::createFromDataCollection(const Collection& return StringData(); }; + namespaces.edcNss = edc.ns(); namespaces.escNss = NamespaceString(db, cfg.getEscCollection().value_or_eval([&f]() { return f("state"_sd); })); namespaces.eccNss = @@ -73,4 +304,150 @@ EncryptedStateCollectionsNamespaces::createFromDataCollection(const Collection& return namespaces; } +/** + * Parses the compaction tokens from the compact request, and + * for each one, retrieves the unique entries in the ECOC collection + * that have been encrypted with that token. All entries are returned + * in a set in their decrypted form. + */ +stdx::unordered_set<ECOCCompactionDocument> getUniqueCompactionDocuments( + FLEQueryInterface* queryImpl, + const CompactStructuredEncryptionData& request, + const NamespaceString& ecocNss, + ECOCStats* ecocStats) { + + CompactStatsCounter<ECOCStats> stats(ecocStats); + + // Initialize a set 'C' and for each compaction token, find all entries + // in ECOC with matching field name. Decrypt entries and add to set 'C'. + stdx::unordered_set<ECOCCompactionDocument> c; + auto compactionTokens = CompactionHelpers::parseCompactionTokens(request.getCompactionTokens()); + + for (auto& compactionToken : compactionTokens) { + auto docs = queryImpl->findDocuments( + ecocNss, BSON(EcocDocument::kFieldNameFieldName << compactionToken.fieldPathName)); + stats.addReads(docs.size()); + + for (auto& doc : docs) { + auto ecocDoc = ECOCCollection::parseAndDecrypt(doc, compactionToken.token); + c.insert(std::move(ecocDoc)); + } + } + return c; +} + +void compactOneFieldValuePair(FLEQueryInterface* queryImpl, + const ECOCCompactionDocument& ecocDoc, + const EncryptedStateCollectionsNamespaces& namespaces, + ECStats* escStats, + ECStats* eccStats) { + // PART 1 + // prepare the ESC, and get back the highest counter value before the placeholder + // document, ipos, and pos + auto escTagToken = FLETwiceDerivedTokenGenerator::generateESCTwiceDerivedTagToken(ecocDoc.esc); + auto escValueToken = + FLETwiceDerivedTokenGenerator::generateESCTwiceDerivedValueToken(ecocDoc.esc); + auto escState = + prepareESCForCompaction(queryImpl, namespaces.escNss, escTagToken, escValueToken, escStats); + + // PART 2 + // prepare the ECC, and get back the merged set 'g_prime', whether (g_prime != g), + // ipos_prime, and pos_prime + // TODO: SERVER-63469 + + // PART 3 + // A. compact the ECC + // TODO: SERVER-63469 + bool allEntriesDeleted = false; + + // B. compact the ESC + if (escState.count != 0) { + bool hasNullDoc = (escState.ipos > 1); + + // Delete ESC entries between ipos and pos, inclusive. + // The compaction placeholder is at index pos, so it will be deleted as well. + for (auto k = escState.ipos; k <= escState.pos; k++) { + deleteESCDocument(queryImpl, namespaces.escNss, k, escTagToken, escStats); + } + + if (!allEntriesDeleted) { + // update or insert the ESC null doc + auto newNullDoc = ESCCollection::generateNullDocument( + escTagToken, escValueToken, escState.pos - 1, escState.count); + upsertNullDocument(queryImpl, hasNullDoc, newNullDoc, namespaces.escNss, escStats); + } else { + // delete the ESC null doc + if (hasNullDoc) { + deleteESCDocument(queryImpl, namespaces.escNss, boost::none, escTagToken, escStats); + } + } + } +} + +CompactStats processFLECompact(OperationContext* opCtx, + const CompactStructuredEncryptionData& request, + GetTxnCallback getTxn, + const EncryptedStateCollectionsNamespaces& namespaces) { + ECOCStats ecocStats; + ECStats escStats, eccStats; + stdx::unordered_set<ECOCCompactionDocument> c; + + // Read the ECOC documents in a transaction + { + std::shared_ptr<txn_api::TransactionWithRetries> trun = getTxn(opCtx); + + // The function that handles the transaction may outlive this function so we need to use + // shared_ptrs + auto argsBlock = std::tie(c, request, namespaces, ecocStats); + auto sharedBlock = std::make_shared<decltype(argsBlock)>(argsBlock); + + auto swResult = runInTxnWithRetry( + opCtx, + trun, + [sharedBlock](const txn_api::TransactionClient& txnClient, ExecutorPtr txnExec) { + FLEQueryInterfaceImpl queryImpl(txnClient); + + auto [c2, request2, namespaces2, ecocStats2] = *sharedBlock.get(); + + c2 = getUniqueCompactionDocuments( + &queryImpl, request2, namespaces2.ecocRenameNss, &ecocStats2); + + return SemiFuture<void>::makeReady(); + }); + + uassertStatusOK(swResult); + uassertStatusOK(swResult.getValue().getEffectiveStatus()); + } + + // Each entry in 'C' represents a unique field/value pair. For each field/value pair, + // compact the ESC & ECC entries for that field/value pair in one transaction. + for (auto& ecocDoc : c) { + // start a new transaction + std::shared_ptr<txn_api::TransactionWithRetries> trun = getTxn(opCtx); + + // The function that handles the transaction may outlive this function so we need to use + // shared_ptrs + auto argsBlock = std::tie(ecocDoc, namespaces, escStats, eccStats); + auto sharedBlock = std::make_shared<decltype(argsBlock)>(argsBlock); + + auto swResult = runInTxnWithRetry( + opCtx, + trun, + [sharedBlock](const txn_api::TransactionClient& txnClient, ExecutorPtr txnExec) { + FLEQueryInterfaceImpl queryImpl(txnClient); + + auto [ecocDoc2, namespaces2, escStats2, eccStats2] = *sharedBlock.get(); + + compactOneFieldValuePair(&queryImpl, ecocDoc2, namespaces2, &escStats2, &eccStats2); + + return SemiFuture<void>::makeReady(); + }); + + uassertStatusOK(swResult); + uassertStatusOK(swResult.getValue().getEffectiveStatus()); + } + + return CompactStats(ecocStats, eccStats, escStats); +} + } // namespace mongo diff --git a/src/mongo/db/commands/fle2_compact.h b/src/mongo/db/commands/fle2_compact.h index b994e22a5a2..cf316e1be72 100644 --- a/src/mongo/db/commands/fle2_compact.h +++ b/src/mongo/db/commands/fle2_compact.h @@ -32,6 +32,7 @@ #include "mongo/base/status_with.h" #include "mongo/db/catalog/collection.h" #include "mongo/db/commands/fle2_compact_gen.h" +#include "mongo/db/fle_crud.h" namespace mongo { @@ -40,10 +41,38 @@ struct EncryptedStateCollectionsNamespaces { static StatusWith<EncryptedStateCollectionsNamespaces> createFromDataCollection( const Collection& edc); + NamespaceString edcNss; NamespaceString escNss; NamespaceString eccNss; NamespaceString ecocNss; NamespaceString ecocRenameNss; }; +CompactStats processFLECompact(OperationContext* opCtx, + const CompactStructuredEncryptionData& request, + GetTxnCallback getTxn, + const EncryptedStateCollectionsNamespaces& namespaces); + +/** + * Get all unique documents in the ECOC collection in their decrypted form. + * + * Used by unit tests. + */ +stdx::unordered_set<ECOCCompactionDocument> getUniqueCompactionDocuments( + FLEQueryInterface* queryImpl, + const CompactStructuredEncryptionData& request, + const NamespaceString& ecocNss, + ECOCStats* ecocStats); + +/** + * Performs compaction of the ESC and ECC entries for the encrypted field/value pair + * whose tokens are in the provided ECOC compaction document. + * + * Used by unit tests. + */ +void compactOneFieldValuePair(FLEQueryInterface* queryImpl, + const ECOCCompactionDocument& ecocDoc, + const EncryptedStateCollectionsNamespaces& namespaces, + ECStats* escStats, + ECStats* eccStats); } // namespace mongo diff --git a/src/mongo/db/commands/fle2_compact.idl b/src/mongo/db/commands/fle2_compact.idl index 6693ce7aee4..9adcc209503 100644 --- a/src/mongo/db/commands/fle2_compact.idl +++ b/src/mongo/db/commands/fle2_compact.idl @@ -36,16 +36,28 @@ structs: ECOCStats: description: "Stats about records in ECOC compact touched" fields: - read: exactInt64 - deleted: exactInt64 + read: + type: exactInt64 + default: 0 + deleted: + type: exactInt64 + default: 0 ECStats: description: "Stats about records in ECC & ESC compact touched" fields: - read: exactInt64 - inserted: exactInt64 - updated: exactInt64 - deleted: exactInt64 + read: + type: exactInt64 + default: 0 + inserted: + type: exactInt64 + default: 0 + updated: + type: exactInt64 + default: 0 + deleted: + type: exactInt64 + default: 0 CompactStats: description: "Stats about records in ECC, ECOC, and ESC compact touched" diff --git a/src/mongo/db/commands/fle2_compact_cmd.cpp b/src/mongo/db/commands/fle2_compact_cmd.cpp index ba3d8a7ed22..0cd2dabf61b 100644 --- a/src/mongo/db/commands/fle2_compact_cmd.cpp +++ b/src/mongo/db/commands/fle2_compact_cmd.cpp @@ -36,9 +36,12 @@ #include "mongo/crypto/encryption_fields_gen.h" #include "mongo/db/auth/authorization_session.h" #include "mongo/db/catalog/collection_catalog.h" +#include "mongo/db/catalog/create_collection.h" +#include "mongo/db/catalog/drop_collection.h" #include "mongo/db/catalog/rename_collection.h" #include "mongo/db/catalog_raii.h" #include "mongo/db/commands.h" +#include "mongo/db/commands/create_gen.h" #include "mongo/logv2/log.h" namespace mongo { @@ -46,9 +49,7 @@ namespace { CompactStats compactEncryptedCompactionCollection(OperationContext* opCtx, const CompactStructuredEncryptionData& request) { - mongo::ECOCStats ecocStats(0, 0); - mongo::ECStats eccStats(0, 0, 0, 0); - mongo::ECStats escStats(0, 0, 0, 0); + const auto& edcNss = request.getNamespace(); LOGV2(6319900, "Compacting the encrypted compaction collection", "namespace"_attr = edcNss); @@ -73,12 +74,21 @@ CompactStats compactEncryptedCompactionCollection(OperationContext* opCtx, uassert(6319903, "Feature flag FLE2 is not enabled", gFeatureFlagFLE2.isEnabledAndIgnoreFCV()); + uassert(6346807, + "Target namespace is not an encrypted collection", + edc->getCollectionOptions().encryptedFieldConfig); + + // Validate the request contains a compaction token for each encrypted field + const auto& efc = edc->getCollectionOptions().encryptedFieldConfig.value(); + CompactionHelpers::validateCompactionTokens(efc, request.getCompactionTokens()); + auto namespaces = uassertStatusOK(EncryptedStateCollectionsNamespaces::createFromDataCollection(*edc.get())); - + // Step 1: rename the ECOC collection if it exists auto ecoc = catalog->lookupCollectionByNamespace(opCtx, namespaces.ecocNss); auto ecocRename = catalog->lookupCollectionByNamespace(opCtx, namespaces.ecocRenameNss); + bool renamed = false; if (ecoc && !ecocRename) { LOGV2(6319901, @@ -89,12 +99,37 @@ CompactStats compactEncryptedCompactionCollection(OperationContext* opCtx, validateAndRunRenameCollection( opCtx, namespaces.ecocNss, namespaces.ecocRenameNss, renameOpts); ecoc.reset(); + renamed = true; } + // Step 2: create the ECOC collection if renamed or did not exist before the rename + if (!ecoc) { + uassertStatusOK( + createCollection(opCtx, namespaces.ecocNss, CreateCommand(namespaces.ecocNss))); + } + if (!ecocRename && !renamed) { + // no pre-existing renamed ECOC collection and the rename did not occur, + // so there is nothing to compact + return CompactStats(ECOCStats(), ECStats(), ECStats()); + } + + // Step 3: for each encrypted field in compactionTokens, get distinct set of entries 'C' + // from ECOC, and for each entry in 'C', compact ESC and ECC. + CompactStats stats = + processFLECompact(opCtx, request, &getTransactionWithRetriesForMongoD, namespaces); + + // Step 4: drop the renamed ECOC collection + DropReply dropReply; + uassertStatusOK( + dropCollection(opCtx, + namespaces.ecocRenameNss, + &dropReply, + DropCollectionSystemCollectionMode::kDisallowSystemCollectionDrops)); + LOGV2(6319902, "Done compacting the encrypted compaction collection", "namespace"_attr = request.getNamespace()); - return CompactStats(ecocStats, eccStats, escStats); + return stats; } class CompactStructuredEncryptionDataCmd final diff --git a/src/mongo/db/commands/fle_compact_test.cpp b/src/mongo/db/commands/fle_compact_test.cpp new file mode 100644 index 00000000000..d7360028744 --- /dev/null +++ b/src/mongo/db/commands/fle_compact_test.cpp @@ -0,0 +1,568 @@ +/** + * Copyright (C) 2022-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include <map> +#include <third_party/murmurhash3/MurmurHash3.h> + +#include "mongo/crypto/fle_crypto.h" +#include "mongo/db/commands/fle2_compact.h" +#include "mongo/db/fle_crud.h" +#include "mongo/db/fle_query_interface_mock.h" +#include "mongo/db/repl/replication_coordinator_mock.h" +#include "mongo/db/repl/storage_interface.h" +#include "mongo/db/repl/storage_interface_impl.h" +#include "mongo/db/service_context.h" +#include "mongo/db/service_context_d_test_fixture.h" +#include "mongo/unittest/unittest.h" +#include "mongo/util/assert_util.h" + +namespace mongo { +namespace { + +constexpr auto kUserKeyId = "ABCDEFAB-1234-9876-1234-123456789012"_sd; +static UUID userKeyId = uassertStatusOK(UUID::parse(kUserKeyId.toString())); + +const FLEUserKey& getUserKey() { + static std::string userVec = hexblob::decode( + "cbebdf05fe16099fef502a6d045c1cbb77d29d2fe19f51aec5079a81008305d8868358845d2e3ab38e4fa9cbffcd651a0fc07201d7c9ed9ca3279bfa7cd673ec37b362a0aaa92f95062405a999afd49e4b1f7f818f766c49715407011ac37fa9"_sd); + static FLEUserKey userKey(KeyMaterial(userVec.begin(), userVec.end())); + return userKey; +} + +class TestKeyVault : public FLEKeyVault { +public: + TestKeyVault() : _random(123456) {} + + KeyMaterial getKey(const UUID& uuid) override; + + uint64_t getCount() const { + return _dynamicKeys.size(); + } + +private: + PseudoRandom _random; + stdx::unordered_map<UUID, KeyMaterial, UUID::Hash> _dynamicKeys; +}; + +KeyMaterial TestKeyVault::getKey(const UUID& uuid) { + if (uuid == userKeyId) { + return getUserKey().data; + } else { + if (_dynamicKeys.find(uuid) != _dynamicKeys.end()) { + return _dynamicKeys[uuid]; + } + + std::vector<uint8_t> materialVector(96); + _random.fill(&materialVector[0], materialVector.size()); + KeyMaterial material(materialVector.begin(), materialVector.end()); + _dynamicKeys.insert({uuid, material}); + return material; + } +} + +UUID fieldNameToUUID(StringData field) { + std::array<uint8_t, UUID::kNumBytes> buf; + + MurmurHash3_x86_128(field.rawData(), field.size(), 123456, buf.data()); + return UUID::fromCDR(buf); +} + +class FleCompactTest : public ServiceContextMongoDTest { +public: + struct ESCTestTokens { + ESCDerivedFromDataTokenAndContentionFactorToken contentionDerived; + ESCTwiceDerivedTagToken twiceDerivedTag; + ESCTwiceDerivedValueToken twiceDerivedValue; + }; + struct ECCTestTokens { + ECCDerivedFromDataTokenAndContentionFactorToken contentionDerived; + ECCTwiceDerivedTagToken twiceDerivedTag; + ECCTwiceDerivedValueToken twiceDerivedValue; + }; + struct InsertionState { + uint64_t count{0}; + uint64_t pos{0}; + uint64_t toInsertCount{0}; + std::vector<std::pair<uint64_t, uint64_t>> toDeleteRanges; + std::map<uint64_t, int> insertedIds; + std::string value; + }; + +protected: + void setUp(); + void tearDown(); + + void createCollection(const NamespaceString& ns); + + void assertDocumentCounts(uint64_t edc, uint64_t esc, uint64_t ecc, uint64_t ecoc); + + void assertESCDocument(BSONObj obj, bool exists, uint64_t index, uint64_t count); + void assertESCNullDocument(BSONObj obj, bool exists, uint64_t position, uint64_t count); + + void assertECCNullDocument(BSONObj obj, bool exists, uint64_t position); + + ESCTestTokens getTestESCTokens(BSONObj obj); + ECCTestTokens getTestECCTokens(BSONObj obj); + + ECOCCompactionDocument generateTestECOCDocument(BSONObj obj); + + EncryptedFieldConfig generateEncryptedFieldConfig( + const std::set<std::string>& encryptedFieldNames); + + CompactStructuredEncryptionData generateCompactCommand( + const std::set<std::string>& encryptedFieldNames); + + std::vector<char> generatePlaceholder(UUID keyId, BSONElement value); + + void doSingleInsert(int id, BSONObj encryptedFieldsObj); + void doSingleDelete(int id, BSONObj encryptedFieldsObj); + + void insertFieldValues(StringData fieldName, std::map<std::string, InsertionState>& values); + void deleteFieldValues(StringData fieldName, std::map<std::string, InsertionState>& values); + +protected: + ServiceContext::UniqueOperationContext _opCtx; + + repl::StorageInterface* _storage{nullptr}; + + std::unique_ptr<FLEQueryInterfaceMock> _queryImpl; + + std::unique_ptr<repl::UnreplicatedWritesBlock> _uwb; + + TestKeyVault _keyVault; + + EncryptedStateCollectionsNamespaces _namespaces; +}; + +void FleCompactTest::setUp() { + ServiceContextMongoDTest::setUp(); + auto service = getServiceContext(); + + repl::ReplicationCoordinator::set(service, + std::make_unique<repl::ReplicationCoordinatorMock>(service)); + + _opCtx = cc().makeOperationContext(); + _uwb = std::make_unique<repl::UnreplicatedWritesBlock>(_opCtx.get()); + + repl::StorageInterface::set(service, std::make_unique<repl::StorageInterfaceImpl>()); + _storage = repl::StorageInterface::get(service); + + _queryImpl = std::make_unique<FLEQueryInterfaceMock>(_opCtx.get(), _storage); + + _namespaces.edcNss = NamespaceString("test.edc"); + _namespaces.escNss = NamespaceString("test.esc"); + _namespaces.eccNss = NamespaceString("test.ecc"); + _namespaces.ecocNss = NamespaceString("test.ecoc"); + _namespaces.ecocRenameNss = NamespaceString("test.ecoc.compact"); + + createCollection(_namespaces.edcNss); + createCollection(_namespaces.escNss); + createCollection(_namespaces.eccNss); + createCollection(_namespaces.ecocNss); +} + +void FleCompactTest::tearDown() { + _uwb.reset(nullptr); + _opCtx.reset(nullptr); + ServiceContextMongoDTest::tearDown(); +} + +void FleCompactTest::createCollection(const NamespaceString& ns) { + CollectionOptions collectionOptions; + collectionOptions.uuid = UUID::gen(); + auto statusCC = _storage->createCollection( + _opCtx.get(), NamespaceString(ns.db(), ns.coll()), collectionOptions); + ASSERT_OK(statusCC); +} + +void FleCompactTest::assertDocumentCounts(uint64_t edc, uint64_t esc, uint64_t ecc, uint64_t ecoc) { + ASSERT_EQ(_queryImpl->countDocuments(_namespaces.edcNss), edc); + ASSERT_EQ(_queryImpl->countDocuments(_namespaces.escNss), esc); + ASSERT_EQ(_queryImpl->countDocuments(_namespaces.eccNss), ecc); + ASSERT_EQ(_queryImpl->countDocuments(_namespaces.ecocNss), ecoc); +} + +void FleCompactTest::assertESCDocument(BSONObj obj, bool exists, uint64_t index, uint64_t count) { + auto tokens = getTestESCTokens(obj); + auto doc = _queryImpl->getById(_namespaces.escNss, + ESCCollection::generateId(tokens.twiceDerivedTag, index)); + + ASSERT_EQ(doc.isEmpty(), !exists); + + if (exists) { + auto escDoc = + uassertStatusOK(ESCCollection::decryptDocument(tokens.twiceDerivedValue, doc)); + ASSERT_FALSE(escDoc.compactionPlaceholder); + ASSERT_EQ(escDoc.position, 0); + ASSERT_EQ(escDoc.count, count); + } +} + +void FleCompactTest::assertESCNullDocument(BSONObj obj, bool exists, uint64_t pos, uint64_t count) { + auto tokens = getTestESCTokens(obj); + auto doc = _queryImpl->getById(_namespaces.escNss, + ESCCollection::generateId(tokens.twiceDerivedTag, boost::none)); + + ASSERT_EQ(doc.isEmpty(), !exists); + + if (exists) { + auto nullDoc = + uassertStatusOK(ESCCollection::decryptNullDocument(tokens.twiceDerivedValue, doc)); + ASSERT_EQ(nullDoc.position, pos); + ASSERT_EQ(nullDoc.count, count); + } +} + +void FleCompactTest::assertECCNullDocument(BSONObj obj, bool exists, uint64_t pos) { + auto tokens = getTestECCTokens(obj); + auto doc = _queryImpl->getById(_namespaces.eccNss, + ECCCollection::generateId(tokens.twiceDerivedTag, boost::none)); + + ASSERT_EQ(doc.isEmpty(), !exists); + + if (exists) { + auto nullDoc = + uassertStatusOK(ECCCollection::decryptNullDocument(tokens.twiceDerivedValue, doc)); + ASSERT_EQ(nullDoc.position, pos); + } +} + +FleCompactTest::ESCTestTokens FleCompactTest::getTestESCTokens(BSONObj obj) { + auto element = obj.firstElement(); + auto indexKeyId = fieldNameToUUID(element.fieldNameStringData()); + auto c1token = FLELevel1TokenGenerator::generateCollectionsLevel1Token( + _keyVault.getIndexKeyById(indexKeyId).key); + auto escToken = FLECollectionTokenGenerator::generateESCToken(c1token); + auto eltCdr = ConstDataRange(element.value(), element.value() + element.valuesize()); + auto escDataToken = + FLEDerivedFromDataTokenGenerator::generateESCDerivedFromDataToken(escToken, eltCdr); + + FleCompactTest::ESCTestTokens tokens; + tokens.contentionDerived = FLEDerivedFromDataTokenAndContentionFactorTokenGenerator:: + generateESCDerivedFromDataTokenAndContentionFactorToken(escDataToken, 0); + tokens.twiceDerivedValue = + FLETwiceDerivedTokenGenerator::generateESCTwiceDerivedValueToken(tokens.contentionDerived); + tokens.twiceDerivedTag = + FLETwiceDerivedTokenGenerator::generateESCTwiceDerivedTagToken(tokens.contentionDerived); + return tokens; +} + +FleCompactTest::ECCTestTokens FleCompactTest::getTestECCTokens(BSONObj obj) { + auto element = obj.firstElement(); + auto indexKeyId = fieldNameToUUID(element.fieldNameStringData()); + auto c1token = FLELevel1TokenGenerator::generateCollectionsLevel1Token( + _keyVault.getIndexKeyById(indexKeyId).key); + auto eccToken = FLECollectionTokenGenerator::generateECCToken(c1token); + auto eltCdr = ConstDataRange(element.value(), element.value() + element.valuesize()); + auto eccDataToken = + FLEDerivedFromDataTokenGenerator::generateECCDerivedFromDataToken(eccToken, eltCdr); + + FleCompactTest::ECCTestTokens tokens; + tokens.contentionDerived = FLEDerivedFromDataTokenAndContentionFactorTokenGenerator:: + generateECCDerivedFromDataTokenAndContentionFactorToken(eccDataToken, 0); + tokens.twiceDerivedValue = + FLETwiceDerivedTokenGenerator::generateECCTwiceDerivedValueToken(tokens.contentionDerived); + tokens.twiceDerivedTag = + FLETwiceDerivedTokenGenerator::generateECCTwiceDerivedTagToken(tokens.contentionDerived); + return tokens; +} + +ECOCCompactionDocument FleCompactTest::generateTestECOCDocument(BSONObj obj) { + ECOCCompactionDocument doc; + doc.fieldName = obj.firstElementFieldName(); + doc.esc = getTestESCTokens(obj).contentionDerived; + doc.ecc = getTestECCTokens(obj).contentionDerived; + return doc; +} + +EncryptedFieldConfig FleCompactTest::generateEncryptedFieldConfig( + const std::set<std::string>& encryptedFieldNames) { + BSONObjBuilder builder; + EncryptedFieldConfig efc; + std::vector<EncryptedField> encryptedFields; + + for (const auto& field : encryptedFieldNames) { + EncryptedField ef; + ef.setKeyId(fieldNameToUUID(field)); + ef.setPath(field); + ef.setBsonType("string"); + QueryTypeConfig q(QueryTypeEnum::Equality); + auto x = ef.getQueries(); + x = std::move(q); + ef.setQueries(x); + encryptedFields.push_back(std::move(ef)); + } + + efc.setEscCollection(_namespaces.escNss.coll()); + efc.setEccCollection(_namespaces.eccNss.coll()); + efc.setEcocCollection(_namespaces.ecocNss.coll()); + efc.setFields(std::move(encryptedFields)); + return efc; +} + +CompactStructuredEncryptionData FleCompactTest::generateCompactCommand( + const std::set<std::string>& encryptedFieldNames) { + CompactStructuredEncryptionData cmd(_namespaces.edcNss); + auto efc = generateEncryptedFieldConfig(encryptedFieldNames); + auto compactionTokens = FLEClientCrypto::generateCompactionTokens(efc, &_keyVault); + cmd.setCompactionTokens(compactionTokens); + return cmd; +} + +std::vector<char> FleCompactTest::generatePlaceholder(UUID keyId, BSONElement value) { + FLE2EncryptionPlaceholder ep; + + ep.setAlgorithm(mongo::Fle2AlgorithmInt::kEquality); + ep.setUserKeyId(userKeyId); + ep.setIndexKeyId(keyId); + ep.setValue(value); + ep.setType(mongo::Fle2PlaceholderType::kInsert); + ep.setMaxContentionCounter(0); + + BSONObj obj = ep.toBSON(); + + std::vector<char> v; + v.resize(obj.objsize() + 1); + v[0] = static_cast<uint8_t>(EncryptedBinDataType::kFLE2Placeholder); + std::copy(obj.objdata(), obj.objdata() + obj.objsize(), v.begin() + 1); + return v; +} + +void FleCompactTest::doSingleInsert(int id, BSONObj encryptedFieldsObj) { + BSONObjBuilder builder; + builder.append("_id", id); + builder.append("plainText", "sample"); + + for (auto&& elt : encryptedFieldsObj) { + UUID uuid = fieldNameToUUID(elt.fieldNameStringData()); + auto buf = generatePlaceholder(uuid, elt); + builder.appendBinData( + elt.fieldNameStringData(), buf.size(), BinDataType::Encrypt, buf.data()); + } + + auto clientDoc = builder.obj(); + + auto result = FLEClientCrypto::transformPlaceholders(clientDoc, &_keyVault); + + auto serverPayload = EDCServerCollection::getEncryptedFieldInfo(result); + + auto efc = + generateEncryptedFieldConfig(encryptedFieldsObj.getFieldNames<std::set<std::string>>()); + + uassertStatusOK( + processInsert(_queryImpl.get(), _namespaces.edcNss, serverPayload, efc, result)); +} + +void FleCompactTest::doSingleDelete(int id, BSONObj encryptedFieldsObj) { + auto efc = + generateEncryptedFieldConfig(encryptedFieldsObj.getFieldNames<std::set<std::string>>()); + + auto doc = EncryptionInformationHelpers::encryptionInformationSerializeForDelete( + _namespaces.edcNss, efc, &_keyVault); + + auto ei = EncryptionInformation::parse(IDLParserErrorContext("test"), doc); + + write_ops::DeleteOpEntry entry; + entry.setQ(BSON("_id" << id)); + entry.setMulti(false); + + write_ops::DeleteCommandRequest deleteRequest(_namespaces.edcNss); + deleteRequest.setDeletes({entry}); + deleteRequest.getWriteCommandRequestBase().setEncryptionInformation(ei); + + std::unique_ptr<CollatorInterface> collator; + auto expCtx = make_intrusive<ExpressionContext>(_opCtx.get(), + std::move(collator), + deleteRequest.getNamespace(), + deleteRequest.getLegacyRuntimeConstants(), + deleteRequest.getLet()); + + processDelete(_queryImpl.get(), expCtx, deleteRequest); +} + +void FleCompactTest::insertFieldValues(StringData field, + std::map<std::string, InsertionState>& values) { + static int insertId = 1; + + for (auto& [value, state] : values) { + for (; state.toInsertCount > 0; state.toInsertCount--) { + BSONObjBuilder builder; + builder.append(field, value); + doSingleInsert(insertId, builder.obj()); + ++state.pos; + ++state.count; + state.insertedIds[state.count] = insertId; + ++insertId; + } + } +} + +void FleCompactTest::deleteFieldValues(StringData field, + std::map<std::string, InsertionState>& values) { + for (auto& [value, state] : values) { + BSONObjBuilder builder; + builder.append(field, value); + auto entry = builder.obj(); + + for (auto& range : state.toDeleteRanges) { + for (auto ctr = range.first; ctr <= range.second; ctr++) { + if (state.insertedIds.find(ctr) == state.insertedIds.end()) { + continue; + } + doSingleDelete(state.insertedIds[ctr], entry); + state.insertedIds.erase(ctr); + } + } + } +} + +TEST_F(FleCompactTest, GetUniqueECOCDocsFromEmptyECOC) { + ECOCStats stats; + std::set<std::string> fieldSet = {"first", "ssn"}; + auto cmd = generateCompactCommand(fieldSet); + auto docs = getUniqueCompactionDocuments(_queryImpl.get(), cmd, _namespaces.ecocNss, &stats); + ASSERT(docs.empty()); +} + +TEST_F(FleCompactTest, GetUniqueECOCDocsMultipleFieldsWithManyDuplicateValues) { + ECOCStats stats; + std::set<std::string> fieldSet = {"first", "ssn", "city", "state", "zip"}; + int numInserted = 0; + int uniqueValuesPerField = 10; + stdx::unordered_set<ECOCCompactionDocument> expected; + + for (auto& field : fieldSet) { + std::map<std::string, InsertionState> values; + for (int i = 1; i <= uniqueValuesPerField; i++) { + auto val = "value_" + std::to_string(i); + expected.insert(generateTestECOCDocument(BSON(field << val))); + values[val].toInsertCount = i; + numInserted += i; + } + insertFieldValues(field, values); + } + + assertDocumentCounts(numInserted, numInserted, 0, numInserted); + + auto cmd = generateCompactCommand(fieldSet); + auto docs = getUniqueCompactionDocuments(_queryImpl.get(), cmd, _namespaces.ecocNss, &stats); + + ASSERT(docs == expected); +} + +TEST_F(FleCompactTest, CompactValueWithNonExistentESCAndECCEntries) { + ECStats escStats, eccStats; + auto testPair = BSON("first" + << "brian"); + auto ecocDoc = generateTestECOCDocument(testPair); + + assertDocumentCounts(0, 0, 0, 0); + + compactOneFieldValuePair(_queryImpl.get(), ecocDoc, _namespaces, &escStats, &eccStats); + + // nothing should have changed in the state collections + assertDocumentCounts(0, 0, 0, 0); +} + +TEST_F(FleCompactTest, CompactValueWithESCEntriesExcludingNullDoc) { + ECStats escStats, eccStats; + std::map<std::string, InsertionState> values; + constexpr auto key = "first"_sd; + const std::string val1 = "roger"; + const std::string val2 = "roderick"; + + values[val1].toInsertCount = 15; + values[val2].toInsertCount = 1; + insertFieldValues(key, values); + assertDocumentCounts(16, 16, 0, 16); + + // compact the value with multiple entries + auto testPair = BSON(key << val1); + auto ecocDoc = generateTestECOCDocument(testPair); + + compactOneFieldValuePair(_queryImpl.get(), ecocDoc, _namespaces, &escStats, &eccStats); + assertDocumentCounts(16, 2, 0, 16); + assertESCNullDocument(testPair, true, 15, 15); + + // compact the value with just a single entry + testPair = BSON(key << val2); + ecocDoc = generateTestECOCDocument(testPair); + + compactOneFieldValuePair(_queryImpl.get(), ecocDoc, _namespaces, &escStats, &eccStats); + assertDocumentCounts(16, 2, 0, 16); + assertESCNullDocument(testPair, true, 1, 1); +} + +TEST_F(FleCompactTest, CompactValueWithESCEntriesIncludingNullDoc) { + ECStats escStats, eccStats; + std::map<std::string, InsertionState> values; + constexpr auto key = "first"_sd; + const std::string val = "ruben"; + auto testPair = BSON(key << val); + auto ecocDoc = generateTestECOCDocument(testPair); + + values[val].toInsertCount = 34; + insertFieldValues(key, values); + assertDocumentCounts(34, 34, 0, 34); + + // compact once to get the null doc + compactOneFieldValuePair(_queryImpl.get(), ecocDoc, _namespaces, &escStats, &eccStats); + assertDocumentCounts(34, 1, 0, 34); + assertESCNullDocument(testPair, true, 34, 34); + + // insert more values following the null doc + values[val].toInsertCount = 16; + insertFieldValues(key, values); + assertDocumentCounts(50, 17, 0, 50); + + // compact again, but now with a null doc + compactOneFieldValuePair(_queryImpl.get(), ecocDoc, _namespaces, &escStats, &eccStats); + assertDocumentCounts(50, 1, 0, 50); + // null doc has pos value of 51 instead of 50 to account for the placeholder document + assertESCNullDocument(testPair, true, 51, 50); + + // compact again, but now just the null doc + compactOneFieldValuePair(_queryImpl.get(), ecocDoc, _namespaces, &escStats, &eccStats); + assertDocumentCounts(50, 1, 0, 50); + // null doc's pos value bumped to 52 for the placeholder doc + assertESCNullDocument(testPair, true, 52, 50); + + // insert one more, and compact + values[val].toInsertCount = 1; + insertFieldValues(key, values); + assertDocumentCounts(51, 2, 0, 51); + compactOneFieldValuePair(_queryImpl.get(), ecocDoc, _namespaces, &escStats, &eccStats); + assertDocumentCounts(51, 1, 0, 51); + assertESCNullDocument(testPair, true, 54, 51); +} + +} // namespace +} // namespace mongo |