diff options
4 files changed, 109 insertions, 47 deletions
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp index 1fd9045d955..b1bdbd6b1ad 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp @@ -847,6 +847,14 @@ Status WiredTigerKVEngine::_salvageIfNeeded(const char* uri) { WT_SESSION* session = sessionWrapper.getSession(); int rc = (session->verify)(session, uri, nullptr); + // WT may return EBUSY if the database contains dirty data. If we checkpoint and retry the + // operation it will attempt to clean up the dirty elements during checkpointing, thus allowing + // the operation to succeed if it was the only reason to fail. + if (rc == EBUSY) { + _checkpoint(session); + rc = (session->verify)(session, uri, nullptr); + } + if (rc == 0) { LOGV2(22327, "Verify succeeded. Not salvaging.", "uri"_attr = uri); return Status::OK(); @@ -861,7 +869,13 @@ Status WiredTigerKVEngine::_salvageIfNeeded(const char* uri) { } LOGV2(22328, "Verify failed. Running a salvage operation.", "uri"_attr = uri); - auto status = wtRCToStatus(session->salvage(session, uri, nullptr), session, "Salvage failed:"); + rc = session->salvage(session, uri, nullptr); + // Same reasoning for handling EBUSY errors as above. + if (rc == EBUSY) { + _checkpoint(session); + rc = session->salvage(session, uri, nullptr); + } + auto status = wtRCToStatus(rc, session, "Salvage failed:"); if (status.isOK()) { return {ErrorCodes::DataModifiedByRepair, str::stream() << "Salvaged data for " << uri}; } @@ -913,6 +927,13 @@ Status WiredTigerKVEngine::_rebuildIdent(WT_SESSION* session, const char* uri) { } int rc = session->drop(session, uri, nullptr); + // WT may return EBUSY if the database contains dirty data. If we checkpoint and retry the + // operation it will attempt to clean up the dirty elements during checkpointing, thus allowing + // the operation to succeed if it was the only reason to fail. + if (rc == EBUSY) { + _checkpoint(session); + rc = session->drop(session, uri, nullptr); + } if (rc != 0) { auto status = wtRCToStatus(rc, session); LOGV2_ERROR(22358, @@ -1714,16 +1735,35 @@ std::unique_ptr<RecordStore> WiredTigerKVEngine::makeTemporaryRecordStore(Operat void WiredTigerKVEngine::alterIdentMetadata(OperationContext* opCtx, StringData ident, const IndexDescriptor* desc) { - WiredTigerSession session(_conn); - std::string uri = _uri(ident); - // Make the alter call to update metadata without taking exclusive lock to avoid conflicts with // concurrent operations. std::string alterString = WiredTigerIndex::generateAppMetadataString(*desc) + "exclusive_refreshed=false,"; - invariantWTOK( - session.getSession()->alter(session.getSession(), uri.c_str(), alterString.c_str()), - session.getSession()); + std::string uri = _uri(ident); + auto status = alterMetadata(uri, alterString); + invariantStatusOK(status); +} + +Status WiredTigerKVEngine::alterMetadata(StringData uri, StringData config) { + // Use a dedicated session in an alter operation to avoid transaction issues. + WiredTigerSession session(_conn); + auto sessionPtr = session.getSession(); + + auto uriNullTerminated = uri.toString(); + auto configNullTerminated = config.toString(); + + auto ret = + sessionPtr->alter(sessionPtr, uriNullTerminated.c_str(), configNullTerminated.c_str()); + // WT may return EBUSY if the database contains dirty data. If we checkpoint and retry the + // operation it will attempt to clean up the dirty elements during checkpointing, thus allowing + // the operation to succeed if it was the only reason to fail. + if (ret == EBUSY) { + _checkpoint(sessionPtr); + ret = + sessionPtr->alter(sessionPtr, uriNullTerminated.c_str(), configNullTerminated.c_str()); + } + + return wtRCToStatus(ret, sessionPtr); } Status WiredTigerKVEngine::dropIdent(RecoveryUnit* ru, @@ -1891,7 +1931,11 @@ bool WiredTigerKVEngine::supportsDirectoryPerDB() const { return true; } -void WiredTigerKVEngine::checkpoint() { +void WiredTigerKVEngine::_checkpoint(WT_SESSION* session) { + // TODO: SERVER-64507: Investigate whether we can smartly rely on one checkpointer if two or + // more threads checkpoint at the same time. + stdx::lock_guard lk(_checkpointMutex); + const Timestamp stableTimestamp = getStableTimestamp(); const Timestamp initialDataTimestamp = getInitialDataTimestamp(); @@ -1923,9 +1967,7 @@ void WiredTigerKVEngine::checkpoint() { // Third, stableTimestamp >= initialDataTimestamp: Take stable checkpoint. Steady state // case. if (initialDataTimestamp.asULL() <= 1) { - UniqueWiredTigerSession session = _sessionCache->getSession(); - WT_SESSION* s = session->getSession(); - invariantWTOK(s->checkpoint(s, "use_timestamp=false"), s); + invariantWTOK(session->checkpoint(session, "use_timestamp=false"), session); LOGV2_FOR_RECOVERY(5576602, 2, "Completed unstable checkpoint.", @@ -1946,9 +1988,7 @@ void WiredTigerKVEngine::checkpoint() { "stableTimestamp"_attr = stableTimestamp, "oplogNeededForRollback"_attr = toString(oplogNeededForRollback)); - UniqueWiredTigerSession session = _sessionCache->getSession(); - WT_SESSION* s = session->getSession(); - invariantWTOK(s->checkpoint(s, "use_timestamp=true"), s); + invariantWTOK(session->checkpoint(session, "use_timestamp=true"), session); if (oplogNeededForRollback.isOK()) { // Now that the checkpoint is durable, publish the oplog needed to recover from it. @@ -1962,6 +2002,12 @@ void WiredTigerKVEngine::checkpoint() { } } +void WiredTigerKVEngine::checkpoint() { + UniqueWiredTigerSession session = _sessionCache->getSession(); + WT_SESSION* s = session->getSession(); + return _checkpoint(s); +} + bool WiredTigerKVEngine::hasIdent(OperationContext* opCtx, StringData ident) const { return _hasUri(WiredTigerRecoveryUnit::get(opCtx)->getSession()->getSession(), _uri(ident)); } diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h index b8462730957..a3ec8770a11 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h @@ -185,6 +185,8 @@ public: StringData ident, const IndexDescriptor* desc) override; + Status alterMetadata(StringData uri, StringData config); + void flushAllFiles(OperationContext* opCtx, bool callerHoldsReadLock) override; Status beginBackup(OperationContext* opCtx) override; @@ -384,6 +386,8 @@ private: StorageEngine::DropIdentCallback callback; }; + void _checkpoint(WT_SESSION* session); + /** * Opens a connection on the WiredTiger database 'path' with the configuration 'wtOpenConfig'. * Only returns when successful. Intializes both '_conn' and '_fileVersion'. @@ -511,5 +515,10 @@ private: // Pins the oplog so that OplogStones will not truncate oplog history equal or newer to this // timestamp. AtomicWord<std::uint64_t> _pinnedOplogTimestamp; + + // Limits the actions of concurrent checkpoint callers as we update some internal data during a + // checkpoint. WT has a mutex of its own to only have one checkpoint active at all times so this + // is only to protect our internal updates. + Mutex _checkpointMutex = MONGO_MAKE_LATCH("WiredTigerKVEngine::_checkpointMutex"); }; } // namespace mongo diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_util.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_util.cpp index 30fc736b576..4d655dc4da4 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_util.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_util.cpp @@ -126,23 +126,27 @@ void removeTableChecksFile() { } } -void setTableWriteTimestampAssertion(WT_SESSION* session, const std::string& uri, bool on) { +void setTableWriteTimestampAssertion(WiredTigerSessionCache* sessionCache, + const std::string& uri, + bool on) { const std::string setting = on ? "assert=(write_timestamp=on)" : "assert=(write_timestamp=off)"; LOGV2_DEBUG(6003700, 1, "Changing table write timestamp assertion settings", "uri"_attr = uri, "writeTimestampAssertionOn"_attr = on); - int ret = session->alter(session, uri.c_str(), setting.c_str()); - if (ret) { - LOGV2_FATAL(6003701, - "Failed to update write timestamp assertion setting", - "uri"_attr = uri, - "writeTimestampAssertionOn"_attr = on, - "error"_attr = ret, - "metadata"_attr = - redact(WiredTigerUtil::getMetadataCreate(session, uri).getValue()), - "message"_attr = session->strerror(session, ret)); + auto status = sessionCache->getKVEngine()->alterMetadata(uri, setting); + if (!status.isOK()) { + auto sessionPtr = sessionCache->getSession(); + LOGV2_FATAL( + 6003701, + "Failed to update write timestamp assertion setting", + "uri"_attr = uri, + "writeTimestampAssertionOn"_attr = on, + "error"_attr = status.code(), + "metadata"_attr = + redact(WiredTigerUtil::getMetadataCreate(sessionPtr->getSession(), uri).getValue()), + "message"_attr = status.reason()); } } @@ -861,18 +865,12 @@ Status WiredTigerUtil::setTableLogging(OperationContext* opCtx, const std::strin WiredTigerSessionCache* sessionCache = WiredTigerRecoveryUnit::get(opCtx)->getSessionCache(); sessionCache->closeAllCursors(uri); - // Use a dedicated session for alter operations to avoid transaction issues. - WiredTigerSession session(sessionCache->conn()); - return setTableLogging(session.getSession(), uri, on); -} - -Status WiredTigerUtil::setTableLogging(WT_SESSION* session, const std::string& uri, bool on) { invariant(!storageGlobalParams.readOnly); stdx::lock_guard<Latch> lk(_tableLoggingInfoMutex); // Update the table logging settings regardless if we're no longer starting up the process. if (!_tableLoggingInfo.isInitializing) { - return _setTableLogging(session, uri, on); + return _setTableLogging(sessionCache, uri, on); } // During the start up process, the table logging settings are checked for each table to verify @@ -924,12 +922,12 @@ Status WiredTigerUtil::setTableLogging(WT_SESSION* session, const std::string& u _tableLoggingInfo.hasPreviouslyIncompleteTableChecks); } - return _setTableLogging(session, uri, on); + return _setTableLogging(sessionCache, uri, on); } if (!_tableLoggingInfo.isFirstTable) { if (_tableLoggingInfo.changeTableLogging) { - return _setTableLogging(session, uri, on); + return _setTableLogging(sessionCache, uri, on); } // The table logging settings do not need to be modified. @@ -946,7 +944,7 @@ Status WiredTigerUtil::setTableLogging(WT_SESSION* session, const std::string& u // Check if the first tables logging settings need to be modified. const std::string setting = on ? "log=(enabled=true)" : "log=(enabled=false)"; - const std::string existingMetadata = getMetadataCreate(session, uri).getValue(); + const std::string existingMetadata = getMetadataCreate(opCtx, uri).getValue(); if (existingMetadata.find(setting) != std::string::npos) { // The table is running with the expected logging settings. LOGV2(4366408, @@ -964,7 +962,7 @@ Status WiredTigerUtil::setTableLogging(WT_SESSION* session, const std::string& u "Modifying the table logging settings for all existing WiredTiger tables", "loggingEnabled"_attr = on); - Status status = _setTableLogging(session, uri, on); + Status status = _setTableLogging(sessionCache, uri, on); if (MONGO_unlikely(crashAfterUpdatingFirstTableLoggingSettings.shouldFail())) { LOGV2_FATAL_NOTRACE( @@ -973,7 +971,11 @@ Status WiredTigerUtil::setTableLogging(WT_SESSION* session, const std::string& u return status; } -Status WiredTigerUtil::_setTableLogging(WT_SESSION* session, const std::string& uri, bool on) { +Status WiredTigerUtil::_setTableLogging(WiredTigerSessionCache* sessionCache, + const std::string& uri, + bool on) { + auto engine = sessionCache->getKVEngine(); + const std::string setting = on ? "log=(enabled=true)" : "log=(enabled=false)"; // This method does some "weak" parsing to see if the table is in the expected logging @@ -983,7 +985,11 @@ Status WiredTigerUtil::_setTableLogging(WT_SESSION* session, const std::string& // // If the settings need to be changed (only expected at startup), the alter table call must // succeed. - std::string existingMetadata = getMetadataCreate(session, uri).getValue(); + std::string existingMetadata; + { + auto session = sessionCache->getSession(); + existingMetadata = getMetadataCreate(session->getSession(), uri).getValue(); + } if (existingMetadata.find("log=(enabled=true)") != std::string::npos && existingMetadata.find("log=(enabled=false)") != std::string::npos) { // Sanity check against a table having multiple logging specifications. @@ -999,24 +1005,24 @@ Status WiredTigerUtil::_setTableLogging(WT_SESSION* session, const std::string& LOGV2_DEBUG( 22432, 1, "Changing table logging settings", "uri"_attr = uri, "loggingEnabled"_attr = on); - int ret = session->alter(session, uri.c_str(), setting.c_str()); - if (ret) { + auto status = engine->alterMetadata(uri, setting); + if (!status.isOK()) { LOGV2_FATAL(50756, "Failed to update log setting", "uri"_attr = uri, "loggingEnabled"_attr = on, - "error"_attr = ret, + "error"_attr = status.code(), "metadata"_attr = redact(existingMetadata), - "message"_attr = session->strerror(session, ret)); + "message"_attr = status.reason()); } // The write timestamp assertion setting only needs to be changed at startup. It will be turned // on when logging is disabled, and off when logging is enabled. if (TestingProctor::instance().isEnabled()) { - setTableWriteTimestampAssertion(session, uri, !on); + setTableWriteTimestampAssertion(sessionCache, uri, !on); } else { // Disables the assertion when the testing proctor is off. - setTableWriteTimestampAssertion(session, uri, false /* on */); + setTableWriteTimestampAssertion(sessionCache, uri, false /* on */); } return Status::OK(); diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_util.h b/src/mongo/db/storage/wiredtiger/wiredtiger_util.h index 4f873a47345..eb2dd66c863 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_util.h +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_util.h @@ -46,6 +46,7 @@ class OperationContext; class WiredTigerConfigParser; class WiredTigerKVEngine; class WiredTigerSession; +class WiredTigerSessionCache; Status wtRCToStatus_slow(int retCode, WT_SESSION* session, StringData prefix); @@ -305,8 +306,6 @@ public: static Status setTableLogging(OperationContext* opCtx, const std::string& uri, bool on); - static Status setTableLogging(WT_SESSION* session, const std::string& uri, bool on); - /** * Generates a WiredTiger connection configuration given the LOGV2 WiredTiger components * verbosity levels. @@ -328,7 +327,9 @@ private: template <typename T> static T _castStatisticsValue(uint64_t statisticsValue, T maximumResultType); - static Status _setTableLogging(WT_SESSION* session, const std::string& uri, bool on); + static Status _setTableLogging(WiredTigerSessionCache* sessionCache, + const std::string& uri, + bool on); // Used to keep track of the table logging setting modifications during start up. The mutex must // be held prior to accessing any of the member variables in the struct. |