summaryrefslogtreecommitdiff
path: root/src/mongo/db/commands/fle2_cleanup_cmd.cpp
blob: de3cbe51e90cf0959ea968b9db1c77fada970c03 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
/**
 *    Copyright (C) 2022-present MongoDB, Inc.
 *
 *    This program is free software: you can redistribute it and/or modify
 *    it under the terms of the Server Side Public License, version 1,
 *    as published by MongoDB, Inc.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    Server Side Public License for more details.
 *
 *    You should have received a copy of the Server Side Public License
 *    along with this program. If not, see
 *    <http://www.mongodb.com/licensing/server-side-public-license>.
 *
 *    As a special exception, the copyright holders give permission to link the
 *    code of portions of this program with the OpenSSL library under certain
 *    conditions as described in each individual source file and distribute
 *    linked combinations including the program with the OpenSSL library. You
 *    must comply with the Server Side Public License in all respects for
 *    all of the code used other than as permitted herein. If you modify file(s)
 *    with this exception, you may extend this exception to your version of the
 *    file(s), but you are not obligated to do so. If you do not wish to do so,
 *    delete this exception statement from your version. If you delete this
 *    exception statement from all source files in the program, then also delete
 *    it in the license file.
 */


#include "mongo/platform/basic.h"

#include "mongo/crypto/fle_options_gen.h"
#include "mongo/db/auth/authorization_session.h"
#include "mongo/db/catalog/create_collection.h"
#include "mongo/db/catalog/drop_collection.h"
#include "mongo/db/catalog/rename_collection.h"
#include "mongo/db/catalog_raii.h"
#include "mongo/db/commands.h"
#include "mongo/db/commands/create_gen.h"
#include "mongo/db/commands/feature_compatibility_version.h"
#include "mongo/db/commands/fle2_compact.h"
#include "mongo/db/s/sharding_state.h"
#include "mongo/logv2/log.h"

#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kStorage

namespace mongo {

namespace {

void createQEClusteredStateCollection(OperationContext* opCtx, const NamespaceString& nss) {
    CreateCommand createCmd(nss);
    mongo::ClusteredIndexSpec clusterIdxSpec(BSON("_id" << 1), true);
    createCmd.setClusteredIndex(
        stdx::variant<bool, mongo::ClusteredIndexSpec>(std::move(clusterIdxSpec)));
    auto status = createCollection(opCtx, createCmd);
    if (!status.isOK()) {
        if (status != ErrorCodes::NamespaceExists) {
            uassertStatusOK(status);
        }
        LOGV2_DEBUG(
            7618801, 1, "Create collection failed because namespace already exists", logAttrs(nss));
    }
}

void dropQEStateCollection(OperationContext* opCtx, const NamespaceString& nss) {
    DropReply dropReply;
    uassertStatusOK(
        dropCollection(opCtx,
                       nss,
                       &dropReply,
                       DropCollectionSystemCollectionMode::kDisallowSystemCollectionDrops));
    LOGV2_DEBUG(7618802, 1, "QE state collection drop finished", "reply"_attr = dropReply);
}

/**
 * QE cleanup is similar to QE compact in that it also performs "compaction" of the
 * ESC collection by removing stale ESC non-anchors. Unlike compact, cleanup also removes
 * stale ESC anchors. It also differs from compact in that instead of inserting "anchors"
 * to the ESC, cleanup only inserts or updates "null" anchors.
 *
 * At a high level, the cleanup algorithm works as follows:
 * 1. The _ids of random ESC non-anchors are first read into an in-memory set 'P'.
 * 2. (*) a temporary 'esc.deletes' collection is created. This will collection will contain
       the _ids of anchor documents that cleanup will remove towards the end of the operation.
 * 3. The ECOC is renamed to a temporary namespace (hereby referred to as 'ecoc.compact').
 * 4. Unique entries from 'ecoc.compact' are decoded into an in-memory set of tokens: 'C'.
 * 5. For each token in 'C', the following is performed:
 *    a. Start a transaction
 *    b. Run EmuBinary to collect the latest anchor and non-anchor positions for the current token.
 *    c. (*) Insert (or update an existing) null anchor which encodes the latest positions.
 *    d. (*) If there are anchors corresponding to the current token, insert their _ids
 *       into 'esc.deletes'. These anchors are now stale and are marked for deletion.
 *    e. Commit transaction
 * 6. Delete every document in the ESC whose _id can be found in 'P'
 * 7. (*) Delete every document in the ESC whose _id can be found in 'esc.deletes'
 * 8. (*) Drop 'esc.deletes'
 * 9. Drop 'ecoc.compact'
 *
 * Steps marked with (*) are unique to the cleanup operation.
 */
CleanupStats cleanupEncryptedCollection(OperationContext* opCtx,
                                        const CleanupStructuredEncryptionData& request) {
    CurOp::get(opCtx)->debug().shouldOmitDiagnosticInformation = true;

    uassert(7618803,
            str::stream() << "Feature flag `FLE2CleanupCommand` must be enabled to run "
                          << CleanupStructuredEncryptionData::kCommandName,
            gFeatureFlagFLE2CleanupCommand.isEnabled(serverGlobalParams.featureCompatibility));

    uassert(7618804,
            str::stream() << CleanupStructuredEncryptionData::kCommandName
                          << " must be run through mongos in a sharded cluster",
            !ShardingState::get(opCtx)->enabled());

    // Since this command holds an IX lock on the DB and the global lock throughout
    // the lifetime of this operation, setFCV should not be allowed to abort the transaction
    // performing the cleanup. Otherwise, on retry, the transaction may attempt to
    // acquire the global lock in IX mode, while setFCV is already waiting to acquire it
    // in S mode, causing a deadlock.
    FixedFCVRegion fixedFcv(opCtx);

    const auto& edcNss = request.getNamespace();

    AutoGetDb autoDb(opCtx, edcNss.dbName(), MODE_IX);
    uassert(ErrorCodes::NamespaceNotFound,
            str::stream() << "Database '" << edcNss.dbName().toStringForErrorMsg()
                          << "' does not exist",
            autoDb.getDb());
    Lock::CollectionLock edcLock(opCtx, edcNss, MODE_IS);

    // Validate the request and acquire the relevant namespaces
    EncryptedStateCollectionsNamespaces namespaces;
    {
        auto catalog = CollectionCatalog::get(opCtx);

        // Check the data collection exists and is not a view
        auto edc = catalog->lookupCollectionByNamespace(opCtx, edcNss);
        if (!edc) {
            uassert(ErrorCodes::CommandNotSupportedOnView,
                    "Cannot cleanup structured encryption data on a view",
                    !catalog->lookupView(opCtx, edcNss));
            uasserted(ErrorCodes::NamespaceNotFound,
                      str::stream()
                          << "Collection '" << edcNss.toStringForErrorMsg() << "' does not exist");
        }

        validateCleanupRequest(request, *edc);

        namespaces =
            uassertStatusOK(EncryptedStateCollectionsNamespaces::createFromDataCollection(*edc));
    }

    // Acquire exclusive lock on the associated 'ecoc.lock' namespace to serialize calls
    // to cleanup and compact on the same EDC namespace.
    Lock::CollectionLock compactionLock(opCtx, namespaces.ecocLockNss, MODE_X);

    LOGV2(7618805, "Cleaning up the encrypted compaction collection", logAttrs(edcNss));

    CleanupStats stats({}, {});
    FLECompactESCDeleteSet escDeleteSet;
    auto tagsPerDelete =
        ServerParameterSet::getClusterParameterSet()
            ->get<ClusterParameterWithStorage<FLECompactionOptions>>("fleCompactionOptions")
            ->getValue(boost::none)
            .getMaxESCEntriesPerCompactionDelete();

    // If 'esc.deletes' exists, clean up the matching anchors in ESC and drop 'esc.deletes'
    {
        AutoGetCollection escDeletes(opCtx, namespaces.escDeletesNss, MODE_IS);
        if (escDeletes) {
            LOGV2(7618806,
                  "Cleaning up ESC deletes collection from a prior cleanup operation",
                  logAttrs(namespaces.escDeletesNss));
            cleanupESCAnchors(
                opCtx, namespaces.escNss, namespaces.escDeletesNss, tagsPerDelete, &stats.getEsc());
        }
    }
    dropQEStateCollection(opCtx, namespaces.escDeletesNss);

    bool createEcoc = false;
    bool renameEcoc = false;
    {
        AutoGetCollection ecoc(opCtx, namespaces.ecocNss, MODE_IS);
        AutoGetCollection ecocCompact(opCtx, namespaces.ecocRenameNss, MODE_IS);

        // Early exit if there's no ECOC
        if (!ecoc && !ecocCompact) {
            LOGV2(7618807,
                  "Skipping cleanup as there is no ECOC collection to compact",
                  "ecocNss"_attr = namespaces.ecocNss,
                  "ecocCompactNss"_attr = namespaces.ecocRenameNss);
            return stats;
        }

        createEcoc = !ecoc;

        // Set up the temporary 'ecoc.compact' collection
        if (ecoc && !ecocCompact) {
            // Load the random set of ESC non-anchor entries to be deleted post-cleanup
            auto memoryLimit =
                ServerParameterSet::getClusterParameterSet()
                    ->get<ClusterParameterWithStorage<FLECompactionOptions>>("fleCompactionOptions")
                    ->getValue(boost::none)
                    .getMaxCompactionSize();
            escDeleteSet =
                readRandomESCNonAnchorIds(opCtx, namespaces.escNss, memoryLimit, &stats.getEsc());
            renameEcoc = createEcoc = true;
        } else /* ecocCompact exists */ {
            LOGV2(7618808,
                  "Resuming compaction from a stale ECOC collection",
                  logAttrs(namespaces.ecocRenameNss));
        }
    }

    if (renameEcoc) {
        LOGV2(7618809,
              "Renaming the encrypted compaction collection",
              "ecocNss"_attr = namespaces.ecocNss,
              "ecocRenameNss"_attr = namespaces.ecocRenameNss);
        RenameCollectionOptions renameOpts;
        validateAndRunRenameCollection(
            opCtx, namespaces.ecocNss, namespaces.ecocRenameNss, renameOpts);
    }

    if (createEcoc) {
        createQEClusteredStateCollection(opCtx, namespaces.ecocNss);
    }

    // Create the temporary 'esc.deletes' clustered collection
    createQEClusteredStateCollection(opCtx, namespaces.escDeletesNss);

    {
        AutoGetCollection ecocCompact(opCtx, namespaces.ecocRenameNss, MODE_IS);
        AutoGetCollection escDeletes(opCtx, namespaces.escDeletesNss, MODE_IS);

        uassert(ErrorCodes::NamespaceNotFound,
                str::stream() << "Renamed encrypted compaction collection "
                              << namespaces.ecocRenameNss.toStringForErrorMsg()
                              << " no longer exists prior to cleanup",
                ecocCompact.getCollection());
        uassert(ErrorCodes::NamespaceNotFound,
                str::stream() << "ESC deletes collection "
                              << namespaces.escDeletesNss.toStringForErrorMsg()
                              << " no longer exists prior to cleanup",
                escDeletes.getCollection());

        // Clean up entries for each encrypted field in compactionTokens
        processFLECleanup(opCtx,
                          request,
                          &getTransactionWithRetriesForMongoD,
                          namespaces,
                          &stats.getEsc(),
                          &stats.getEcoc());

        // Delete the entries in 'C' from the ESC
        cleanupESCNonAnchors(
            opCtx, namespaces.escNss, escDeleteSet, tagsPerDelete, &stats.getEsc());

        // Delete the entries in esc.deletes collection from the ESC
        cleanupESCAnchors(
            opCtx, namespaces.escNss, namespaces.escDeletesNss, tagsPerDelete, &stats.getEsc());
    }

    // Drop the 'esc.deletes' collection
    dropQEStateCollection(opCtx, namespaces.escDeletesNss);

    // Drop the 'ecoc.compact' collection
    dropQEStateCollection(opCtx, namespaces.ecocRenameNss);

    LOGV2(7618810,
          "Done cleaning up the encrypted compaction collection",
          logAttrs(request.getNamespace()));

    FLEStatusSection::get().updateCleanupStats(stats);
    return stats;
}

class CleanupStructuredEncryptionDataCmd final
    : public TypedCommand<CleanupStructuredEncryptionDataCmd> {
public:
    using Request = CleanupStructuredEncryptionData;
    using Reply = CleanupStructuredEncryptionData::Reply;
    using TC = TypedCommand<CleanupStructuredEncryptionDataCmd>;

    class Invocation final : public TC::InvocationBase {
    public:
        using TC::InvocationBase::InvocationBase;
        using TC::InvocationBase::request;

        Reply typedRun(OperationContext* opCtx) {
            return Reply(cleanupEncryptedCollection(opCtx, request()));
        }

    private:
        bool supportsWriteConcern() const final {
            return false;
        }

        void doCheckAuthorization(OperationContext* opCtx) const final {
            auto* as = AuthorizationSession::get(opCtx->getClient());
            uassert(ErrorCodes::Unauthorized,
                    "Not authorized to cleanup structured encryption data",
                    as->isAuthorizedForActionsOnResource(
                        ResourcePattern::forExactNamespace(request().getNamespace()),
                        ActionType::cleanupStructuredEncryptionData));
        }

        NamespaceString ns() const final {
            return request().getNamespace();
        }
    };

    typename TC::AllowedOnSecondary secondaryAllowed(ServiceContext*) const final {
        return BasicCommand::AllowedOnSecondary::kNever;
    }

    bool adminOnly() const final {
        return false;
    }

    std::set<StringData> sensitiveFieldNames() const final {
        return {CleanupStructuredEncryptionData::kCleanupTokensFieldName};
    }
} cleanupStructuredEncryptionDataCmd;


}  // namespace

}  // namespace mongo