diff options
author | William Schultz <william.schultz@mongodb.com> | 2017-04-25 16:10:05 -0400 |
---|---|---|
committer | William Schultz <william.schultz@mongodb.com> | 2017-04-25 17:46:51 -0400 |
commit | a956f48537415b3ba74d2b8c2bd1332b6f603dca (patch) | |
tree | 8e1e24d4dc6582dceed2e9e284901c1177126f0e /src/mongo | |
parent | e2196c8ee508a99c0d9472f061414da2f79c1e50 (diff) | |
download | mongo-a956f48537415b3ba74d2b8c2bd1332b6f603dca.tar.gz |
SERVER-28205 SERVER-28740 Make collMod oplog entries reversible
Diffstat (limited to 'src/mongo')
-rw-r--r-- | src/mongo/db/SConscript | 11 | ||||
-rw-r--r-- | src/mongo/db/catalog/coll_mod.cpp | 143 | ||||
-rw-r--r-- | src/mongo/db/op_observer.h | 41 | ||||
-rw-r--r-- | src/mongo/db/op_observer_impl.cpp | 54 | ||||
-rw-r--r-- | src/mongo/db/op_observer_impl.h | 4 | ||||
-rw-r--r-- | src/mongo/db/op_observer_impl_test.cpp | 180 | ||||
-rw-r--r-- | src/mongo/db/op_observer_noop.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/op_observer_noop.h | 4 |
8 files changed, 382 insertions, 59 deletions
diff --git a/src/mongo/db/SConscript b/src/mongo/db/SConscript index cdf8c5e6464..b9917f42b14 100644 --- a/src/mongo/db/SConscript +++ b/src/mongo/db/SConscript @@ -115,6 +115,17 @@ env.CppUnitTest( ], ) +env.CppUnitTest( + target= 'op_observer_impl_test', + source= 'op_observer_impl_test.cpp', + LIBDEPS=[ + 'common', + 'op_observer_d', + 'service_context_d_test_fixture', + '$BUILD_DIR/mongo/db/repl/replmocks', + ], +) + env.Library( target="dbmessage", source=[ diff --git a/src/mongo/db/catalog/coll_mod.cpp b/src/mongo/db/catalog/coll_mod.cpp index 10ee1e450d1..c1feeff855e 100644 --- a/src/mongo/db/catalog/coll_mod.cpp +++ b/src/mongo/db/catalog/coll_mod.cpp @@ -216,6 +216,47 @@ StatusWith<CollModRequest> parseCollModRequest(OperationContext* opCtx, return {std::move(cmr)}; } +/** + * Set a collection option flag for 'UsePowerOf2Sizes' or 'NoPadding'. Appends both the new and + * old flag setting to the given 'result' builder. + */ +void setCollectionOptionFlag(OperationContext* opCtx, + Collection* coll, + BSONElement& collOptionElement, + BSONObjBuilder* result) { + const StringData flagName = collOptionElement.fieldNameStringData(); + + int flag; + + if (flagName == "usePowerOf2Sizes") { + flag = CollectionOptions::Flag_UsePowerOf2Sizes; + } else if (flagName == "noPadding") { + flag = CollectionOptions::Flag_NoPadding; + } else { + flag = 0; + } + + CollectionCatalogEntry* cce = coll->getCatalogEntry(); + + const int oldFlags = cce->getCollectionOptions(opCtx).flags; + const bool oldSetting = oldFlags & flag; + const bool newSetting = collOptionElement.trueValue(); + + result->appendBool(flagName.toString() + "_old", oldSetting); + result->appendBool(flagName.toString() + "_new", newSetting); + + const int newFlags = newSetting ? (oldFlags | flag) // set flag + : (oldFlags & ~flag); // clear flag + + // NOTE we do this unconditionally to ensure that we note that the user has + // explicitly set flags, even if they are just setting the default. + cce->updateFlags(opCtx, newFlags); + + const CollectionOptions newOptions = cce->getCollectionOptions(opCtx); + invariant(newOptions.flags == newFlags); + invariant(newOptions.flagsSet); +} + Status collMod(OperationContext* opCtx, const NamespaceString& nss, const BSONObj& cmdObj, @@ -265,6 +306,8 @@ Status collMod(OperationContext* opCtx, WriteUnitOfWork wunit(opCtx); + // Handle collMod on a view and return early. The View Catalog handles the creation of oplog + // entries for modifications on a view. if (view) { if (!cmr.viewPipeLine.eoo()) view->setPipeline(cmr.viewPipeLine); @@ -283,73 +326,69 @@ Status collMod(OperationContext* opCtx, if (!errorStatus.isOK()) { return errorStatus; } - } else { - if (!cmr.indexExpireAfterSeconds.eoo()) { - BSONElement& newExpireSecs = cmr.indexExpireAfterSeconds; - BSONElement oldExpireSecs = cmr.idx->infoObj().getField("expireAfterSeconds"); - if (SimpleBSONElementComparator::kInstance.evaluate(oldExpireSecs != newExpireSecs)) { - result->appendAs(oldExpireSecs, "expireAfterSeconds_old"); - // Change the value of "expireAfterSeconds" on disk. - coll->getCatalogEntry()->updateTTLSetting( - opCtx, cmr.idx->indexName(), newExpireSecs.safeNumberLong()); - // Notify the index catalog that the definition of this index changed. - cmr.idx = coll->getIndexCatalog()->refreshEntry(opCtx, cmr.idx); - result->appendAs(newExpireSecs, "expireAfterSeconds_new"); - } - } + wunit.commit(); + return Status::OK(); + } - if (!cmr.collValidator.eoo()) - coll->setValidator(opCtx, cmr.collValidator.Obj()); + // In order to facilitate the replication rollback process, which makes a best effort attempt to + // "undo" a set of oplog operations, we store a snapshot of the old collection options to + // provide to the OpObserver. TTL index updates aren't a part of collection options so we + // save the relevant TTL index data in a separate object. - if (!cmr.collValidationAction.empty()) - coll->setValidationAction(opCtx, cmr.collValidationAction); + CollectionOptions oldCollOptions = coll->getCatalogEntry()->getCollectionOptions(opCtx); + boost::optional<TTLCollModInfo> ttlInfo; - if (!cmr.collValidationLevel.empty()) - coll->setValidationLevel(opCtx, cmr.collValidationLevel); + // Handle collMod operation type appropriately. - auto setCollectionOption = [&](BSONElement& COElement) { - typedef CollectionOptions CO; - const StringData name = COElement.fieldNameStringData(); + // TTLIndex + if (!cmr.indexExpireAfterSeconds.eoo()) { + BSONElement& newExpireSecs = cmr.indexExpireAfterSeconds; + BSONElement oldExpireSecs = cmr.idx->infoObj().getField("expireAfterSeconds"); - int flag = (name == "usePowerOf2Sizes") - ? CO::Flag_UsePowerOf2Sizes - : (name == "noPadding") ? CO::Flag_NoPadding : 0; + if (SimpleBSONElementComparator::kInstance.evaluate(oldExpireSecs != newExpireSecs)) { + result->appendAs(oldExpireSecs, "expireAfterSeconds_old"); - CollectionCatalogEntry* cce = coll->getCatalogEntry(); + // Change the value of "expireAfterSeconds" on disk. + coll->getCatalogEntry()->updateTTLSetting( + opCtx, cmr.idx->indexName(), newExpireSecs.safeNumberLong()); - const int oldFlags = cce->getCollectionOptions(opCtx).flags; - const bool oldSetting = oldFlags & flag; - const bool newSetting = COElement.trueValue(); + // Notify the index catalog that the definition of this index changed. + cmr.idx = coll->getIndexCatalog()->refreshEntry(opCtx, cmr.idx); + result->appendAs(newExpireSecs, "expireAfterSeconds_new"); + } - result->appendBool(name.toString() + "_old", oldSetting); - result->appendBool(name.toString() + "_new", newSetting); + // Save previous TTL index expiration. + ttlInfo = TTLCollModInfo{Seconds(newExpireSecs.safeNumberLong()), + Seconds(oldExpireSecs.safeNumberLong()), + cmr.idx->indexName()}; + } - const int newFlags = newSetting ? (oldFlags | flag) // set flag - : (oldFlags & ~flag); // clear flag + // Validator + if (!cmr.collValidator.eoo()) + coll->setValidator(opCtx, cmr.collValidator.Obj()); - // NOTE we do this unconditionally to ensure that we note that the user has - // explicitly set flags, even if they are just setting the default. - cce->updateFlags(opCtx, newFlags); + // ValidationAction + if (!cmr.collValidationAction.empty()) + coll->setValidationAction(opCtx, cmr.collValidationAction); - const CollectionOptions newOptions = cce->getCollectionOptions(opCtx); - invariant(newOptions.flags == newFlags); - invariant(newOptions.flagsSet); - }; + // ValidationLevel + if (!cmr.collValidationLevel.empty()) + coll->setValidationLevel(opCtx, cmr.collValidationLevel); - if (!cmr.usePowerOf2Sizes.eoo()) { - setCollectionOption(cmr.usePowerOf2Sizes); - } + // UsePowerof2Sizes + if (!cmr.usePowerOf2Sizes.eoo()) + setCollectionOptionFlag(opCtx, coll, cmr.usePowerOf2Sizes, result); - if (!cmr.noPadding.eoo()) { - setCollectionOption(cmr.noPadding); - } + // NoPadding + if (!cmr.noPadding.eoo()) + setCollectionOptionFlag(opCtx, coll, cmr.noPadding, result); - // Only observe non-view collMods, as view operations are observed as operations on the - // system.views collection. - getGlobalServiceContext()->getOpObserver()->onCollMod( - opCtx, nss, coll->uuid(opCtx), oplogEntryBuilder.obj()); - } + + // Only observe non-view collMods, as view operations are observed as operations on the + // system.views collection. + getGlobalServiceContext()->getOpObserver()->onCollMod( + opCtx, nss, coll->uuid(opCtx), oplogEntryBuilder.obj(), oldCollOptions, ttlInfo); wunit.commit(); diff --git a/src/mongo/db/op_observer.h b/src/mongo/db/op_observer.h index 23cd5cce4a6..5a3e089f261 100644 --- a/src/mongo/db/op_observer.h +++ b/src/mongo/db/op_observer.h @@ -62,6 +62,12 @@ struct OplogUpdateEntryArgs { bool fromMigrate; }; +struct TTLCollModInfo { + Seconds expireAfterSeconds; + Seconds oldExpireAfterSeconds; + std::string indexName; +}; + class OpObserver { MONGO_DISALLOW_COPYING(OpObserver); @@ -103,10 +109,43 @@ public: const NamespaceString& collectionName, const CollectionOptions& options, const BSONObj& idIndex) = 0; + /** + * This function logs an oplog entry when a 'collMod' command on a collection is executed. + * Since 'collMod' commands can take a variety of different formats, the 'o' field of the + * oplog entry is populated with the 'collMod' command object. For TTL index updates, we + * transform key pattern index specifications into index name specifications, for uniformity. + * All other collMod fields are added to the 'o' object without modifications. + * + * To facilitate the rollback process, 'oldCollOptions' contains the previous state of all + * collection options i.e. the state prior to completion of the current collMod command. + * 'ttlInfo' contains the index name and previous expiration time of a TTL index. The old + * collection options will be stored in the 'o2.collectionOptions_old' field, and the old TTL + * expiration value in the 'o2.expireAfterSeconds_old' field. + * + * Oplog Entry Example ('o' and 'o2' fields shown): + * + * { + * ... + * o: { + * collMod: "test", + * validationLevel: "off", + * index: {name: "indexName_1", expireAfterSeconds: 600} + * } + * o2: { + * collectionOptions_old: { + * validationLevel: "strict", + * }, + * expireAfterSeconds_old: 300 + * } + * } + * + */ virtual void onCollMod(OperationContext* opCtx, const NamespaceString& nss, OptionalCollectionUUID uuid, - const BSONObj& collModCmd) = 0; + const BSONObj& collModCmd, + const CollectionOptions& oldCollOptions, + boost::optional<TTLCollModInfo> ttlInfo) = 0; virtual void onDropDatabase(OperationContext* opCtx, const std::string& dbName) = 0; virtual void onDropCollection(OperationContext* opCtx, const NamespaceString& collectionName, diff --git a/src/mongo/db/op_observer_impl.cpp b/src/mongo/db/op_observer_impl.cpp index 11be66004c5..2f0cf6eef00 100644 --- a/src/mongo/db/op_observer_impl.cpp +++ b/src/mongo/db/op_observer_impl.cpp @@ -219,18 +219,66 @@ void OpObserverImpl::onCreateCollection(OperationContext* opCtx, logOpForDbHash(opCtx, dbName); } +namespace { +/** + * Given a raw collMod command object and associated collection metadata, create and return the + * object for the 'o' field of a collMod oplog entry. For TTL index updates, we make sure the oplog + * entry always stores the index name, instead of a key pattern. + */ +BSONObj makeCollModCmdObj(const BSONObj& collModCmd, + const CollectionOptions& oldCollOptions, + boost::optional<TTLCollModInfo> ttlInfo) { + BSONObjBuilder cmdObjBuilder; + std::string ttlIndexFieldName = "index"; + + // Add all fields from the original collMod command. + for (auto elem : collModCmd) { + // We normalize all TTL collMod oplog entry objects to use the index name, even if the + // command used an index key pattern. + if (elem.fieldNameStringData() == ttlIndexFieldName && ttlInfo) { + BSONObjBuilder ttlIndexObjBuilder; + ttlIndexObjBuilder.append("name", ttlInfo->indexName); + ttlIndexObjBuilder.append("expireAfterSeconds", + durationCount<Seconds>(ttlInfo->expireAfterSeconds)); + + cmdObjBuilder.append(ttlIndexFieldName, ttlIndexObjBuilder.obj()); + } else { + cmdObjBuilder.append(elem); + } + } + + return cmdObjBuilder.obj(); +} +} + void OpObserverImpl::onCollMod(OperationContext* opCtx, const NamespaceString& nss, OptionalCollectionUUID uuid, - const BSONObj& collModCmd) { + const BSONObj& collModCmd, + const CollectionOptions& oldCollOptions, + boost::optional<TTLCollModInfo> ttlInfo) { + const NamespaceString cmdNss = nss.getCommandNS(); + // Create the 'o' field object. + BSONObj cmdObj = makeCollModCmdObj(collModCmd, oldCollOptions, ttlInfo); + + // Create the 'o2' field object. We save the old collection metadata and TTL expiration. + BSONObjBuilder o2Builder; + o2Builder.append("collectionOptions_old", oldCollOptions.toBSON()); + if (ttlInfo) { + auto oldExpireAfterSeconds = durationCount<Seconds>(ttlInfo->oldExpireAfterSeconds); + o2Builder.append("expireAfterSeconds_old", oldExpireAfterSeconds); + } + + const BSONObj o2Obj = o2Builder.obj(); + if (!nss.isSystemDotProfile()) { // do not replicate system.profile modifications - repl::logOp(opCtx, "c", cmdNss, uuid, collModCmd, nullptr, false); + repl::logOp(opCtx, "c", cmdNss, uuid, cmdObj, &o2Obj, false); } - getGlobalAuthorizationManager()->logOp(opCtx, "c", cmdNss, collModCmd, nullptr); + getGlobalAuthorizationManager()->logOp(opCtx, "c", cmdNss, cmdObj, nullptr); logOpForDbHash(opCtx, cmdNss); } diff --git a/src/mongo/db/op_observer_impl.h b/src/mongo/db/op_observer_impl.h index 49f6f1cc879..aaff2de5f64 100644 --- a/src/mongo/db/op_observer_impl.h +++ b/src/mongo/db/op_observer_impl.h @@ -67,7 +67,9 @@ public: void onCollMod(OperationContext* opCtx, const NamespaceString& nss, OptionalCollectionUUID uuid, - const BSONObj& idIndex) override; + const BSONObj& collModCmd, + const CollectionOptions& oldCollOptions, + boost::optional<TTLCollModInfo> ttlInfo) override; void onDropDatabase(OperationContext* opCtx, const std::string& dbName) override; void onDropCollection(OperationContext* opCtx, const NamespaceString& collectionName, diff --git a/src/mongo/db/op_observer_impl_test.cpp b/src/mongo/db/op_observer_impl_test.cpp new file mode 100644 index 00000000000..4a5638d5294 --- /dev/null +++ b/src/mongo/db/op_observer_impl_test.cpp @@ -0,0 +1,180 @@ +/** +* Copyright (C) 2017 MongoDB Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +* +* As a special exception, the copyright holders give permission to link the +* code of portions of this program with the OpenSSL library under certain +* conditions as described in each individual source file and distribute +* linked combinations including the program with the OpenSSL library. You +* must comply with the GNU Affero General Public License in all respects for +* all of the code used other than as permitted herein. If you modify file(s) +* with this exception, you may extend this exception to your version of the +* file(s), but you are not obligated to do so. If you do not wish to do so, +* delete this exception statement from your version. If you delete this +* exception statement from all source files in the program, then also delete +* it in the license file. +*/ + + +#include "mongo/db/op_observer_impl.h" +#include "mongo/db/client.h" +#include "mongo/db/db_raii.h" +#include "mongo/db/field_parser.h" +#include "mongo/db/repl/oplog.h" +#include "mongo/db/repl/oplog_interface_local.h" +#include "mongo/db/repl/replication_coordinator_mock.h" +#include "mongo/db/service_context_d_test_fixture.h" + + +namespace mongo { +namespace { + +class OpObserverTest : public ServiceContextMongoDTest { + +public: + void setUp() { + + // Set up mongod. + ServiceContextMongoDTest::setUp(); + repl::ReplSettings replSettings; + replSettings.setOplogSizeBytes(5 * 1024 * 1024); + replSettings.setReplSetString("repl"); + + auto service = getServiceContext(); + auto opCtx = cc().makeOperationContext(); + + // Set up ReplicationCoordinator and create oplog. + repl::ReplicationCoordinator::set( + service, stdx::make_unique<repl::ReplicationCoordinatorMock>(service, replSettings)); + repl::setOplogCollectionName(); + repl::createOplog(opCtx.get()); + + // Ensure that we are primary. + auto replCoord = repl::ReplicationCoordinator::get(opCtx.get()); + ASSERT_TRUE(replCoord->setFollowerMode(repl::MemberState::RS_PRIMARY)); + } + + // Assert that oplog only has a single entry and return that oplog entry. + BSONObj getSingleOplogEntry(OperationContext* opCtx) { + repl::OplogInterfaceLocal oplogInterface(opCtx, repl::rsOplogName); + auto oplogIter = oplogInterface.makeIterator(); + auto opEntry = unittest::assertGet(oplogIter->next()); + ASSERT_EQUALS(ErrorCodes::CollectionIsEmpty, oplogIter->next().getStatus()); + return opEntry.first; + } +}; + +TEST_F(OpObserverTest, CollModWithCollectionOptionsAndTTLInfo) { + OpObserverImpl opObserver; + auto opCtx = cc().makeOperationContext(); + auto uuid = CollectionUUID::gen(); + + // Create 'collMod' command. + NamespaceString nss("test.coll"); + BSONObj collModCmd = BSON("collMod" << nss.coll() << "validationLevel" + << "off" + << "validationAction" + << "warn" + // We verify that 'onCollMod' ignores this field. + << "index" + << "indexData"); + + CollectionOptions oldCollOpts; + oldCollOpts.validationLevel = "strict"; + oldCollOpts.validationAction = "error"; + oldCollOpts.flags = 2; + oldCollOpts.flagsSet = true; + + TTLCollModInfo ttlInfo; + ttlInfo.expireAfterSeconds = Seconds(10); + ttlInfo.oldExpireAfterSeconds = Seconds(5); + ttlInfo.indexName = "name_of_index"; + + // Write to the oplog. + { + AutoGetDb autoDb(opCtx.get(), nss.db(), MODE_X); + WriteUnitOfWork wunit(opCtx.get()); + opObserver.onCollMod(opCtx.get(), nss, uuid, collModCmd, oldCollOpts, ttlInfo); + wunit.commit(); + } + + auto oplogEntry = getSingleOplogEntry(opCtx.get()); + + // Ensure that collMod fields were properly added to the oplog entry. + auto o = oplogEntry.getObjectField("o"); + auto oExpected = + BSON("collMod" << nss.coll() << "validationLevel" + << "off" + << "validationAction" + << "warn" + << "index" + << BSON("name" << ttlInfo.indexName << "expireAfterSeconds" + << durationCount<Seconds>(ttlInfo.expireAfterSeconds))); + ASSERT_BSONOBJ_EQ(oExpected, o); + + // Ensure that the old collection metadata was saved. + auto o2 = oplogEntry.getObjectField("o2"); + auto o2Expected = + BSON("collectionOptions_old" << BSON("flags" << oldCollOpts.flags << "validationLevel" + << oldCollOpts.validationLevel + << "validationAction" + << oldCollOpts.validationAction) + << "expireAfterSeconds_old" + << durationCount<Seconds>(ttlInfo.oldExpireAfterSeconds)); + + ASSERT_BSONOBJ_EQ(o2Expected, o2); +} + +TEST_F(OpObserverTest, CollModWithOnlyCollectionOptions) { + OpObserverImpl opObserver; + auto opCtx = cc().makeOperationContext(); + auto uuid = CollectionUUID::gen(); + + // Create 'collMod' command. + NamespaceString nss("test.coll"); + BSONObj collModCmd = BSON("collMod" << nss.coll() << "validationLevel" + << "off" + << "validationAction" + << "warn"); + + CollectionOptions oldCollOpts; + oldCollOpts.validationLevel = "strict"; + oldCollOpts.validationAction = "error"; + + // Write to the oplog. + { + AutoGetDb autoDb(opCtx.get(), nss.db(), MODE_X); + WriteUnitOfWork wunit(opCtx.get()); + opObserver.onCollMod(opCtx.get(), nss, uuid, collModCmd, oldCollOpts, boost::none); + wunit.commit(); + } + + auto oplogEntry = getSingleOplogEntry(opCtx.get()); + + // Ensure that collMod fields were properly added to oplog entry. + auto o = oplogEntry.getObjectField("o"); + auto oExpected = collModCmd; + ASSERT_BSONOBJ_EQ(oExpected, o); + + // Ensure that the old collection metadata was saved and that TTL info is not present. + auto o2 = oplogEntry.getObjectField("o2"); + auto o2Expected = + BSON("collectionOptions_old" + << BSON("validationLevel" << oldCollOpts.validationLevel << "validationAction" + << oldCollOpts.validationAction)); + ASSERT_BSONOBJ_EQ(o2Expected, o2); +} + +} // namespace +} // namespace mongo diff --git a/src/mongo/db/op_observer_noop.cpp b/src/mongo/db/op_observer_noop.cpp index f7c2ac000bc..34234940816 100644 --- a/src/mongo/db/op_observer_noop.cpp +++ b/src/mongo/db/op_observer_noop.cpp @@ -66,7 +66,9 @@ void OpObserverNoop::onCreateCollection(OperationContext*, void OpObserverNoop::onCollMod(OperationContext*, const NamespaceString&, OptionalCollectionUUID, - const BSONObj&) {} + const BSONObj&, + const CollectionOptions& oldCollOptions, + boost::optional<TTLCollModInfo> ttlInfo) {} void OpObserverNoop::onDropDatabase(OperationContext*, const std::string&) {} diff --git a/src/mongo/db/op_observer_noop.h b/src/mongo/db/op_observer_noop.h index b96861f2ed0..1701a875f3a 100644 --- a/src/mongo/db/op_observer_noop.h +++ b/src/mongo/db/op_observer_noop.h @@ -67,7 +67,9 @@ public: void onCollMod(OperationContext* opCtx, const NamespaceString& nss, OptionalCollectionUUID uuid, - const BSONObj& collModCmd) override; + const BSONObj& collModCmd, + const CollectionOptions& oldCollOptions, + boost::optional<TTLCollModInfo> ttlInfo) override; void onDropDatabase(OperationContext* opCtx, const std::string& dbName) override; void onDropCollection(OperationContext* opCtx, const NamespaceString& collectionName, |