summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJordi Olivares Provencio <jordi.olivares-provencio@mongodb.com>2022-03-16 09:42:53 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-03-16 10:09:37 +0000
commitdfe7541231e794887be35eff95482285f6e21f35 (patch)
tree6bc6720674587f643ffb1d4bb6e358746d9c1da5
parent465ab57cc70ff86170e78196b8e001ff8c74ef57 (diff)
downloadmongo-dfe7541231e794887be35eff95482285f6e21f35.tar.gz
SERVER-64026 Update WT operations that require exclusive access to a dhandle
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp74
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h9
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_util.cpp66
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_util.h7
4 files changed, 109 insertions, 47 deletions
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
index 1fd9045d955..b1bdbd6b1ad 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
@@ -847,6 +847,14 @@ Status WiredTigerKVEngine::_salvageIfNeeded(const char* uri) {
WT_SESSION* session = sessionWrapper.getSession();
int rc = (session->verify)(session, uri, nullptr);
+ // WT may return EBUSY if the database contains dirty data. If we checkpoint and retry the
+ // operation it will attempt to clean up the dirty elements during checkpointing, thus allowing
+ // the operation to succeed if it was the only reason to fail.
+ if (rc == EBUSY) {
+ _checkpoint(session);
+ rc = (session->verify)(session, uri, nullptr);
+ }
+
if (rc == 0) {
LOGV2(22327, "Verify succeeded. Not salvaging.", "uri"_attr = uri);
return Status::OK();
@@ -861,7 +869,13 @@ Status WiredTigerKVEngine::_salvageIfNeeded(const char* uri) {
}
LOGV2(22328, "Verify failed. Running a salvage operation.", "uri"_attr = uri);
- auto status = wtRCToStatus(session->salvage(session, uri, nullptr), session, "Salvage failed:");
+ rc = session->salvage(session, uri, nullptr);
+ // Same reasoning for handling EBUSY errors as above.
+ if (rc == EBUSY) {
+ _checkpoint(session);
+ rc = session->salvage(session, uri, nullptr);
+ }
+ auto status = wtRCToStatus(rc, session, "Salvage failed:");
if (status.isOK()) {
return {ErrorCodes::DataModifiedByRepair, str::stream() << "Salvaged data for " << uri};
}
@@ -913,6 +927,13 @@ Status WiredTigerKVEngine::_rebuildIdent(WT_SESSION* session, const char* uri) {
}
int rc = session->drop(session, uri, nullptr);
+ // WT may return EBUSY if the database contains dirty data. If we checkpoint and retry the
+ // operation it will attempt to clean up the dirty elements during checkpointing, thus allowing
+ // the operation to succeed if it was the only reason to fail.
+ if (rc == EBUSY) {
+ _checkpoint(session);
+ rc = session->drop(session, uri, nullptr);
+ }
if (rc != 0) {
auto status = wtRCToStatus(rc, session);
LOGV2_ERROR(22358,
@@ -1714,16 +1735,35 @@ std::unique_ptr<RecordStore> WiredTigerKVEngine::makeTemporaryRecordStore(Operat
void WiredTigerKVEngine::alterIdentMetadata(OperationContext* opCtx,
StringData ident,
const IndexDescriptor* desc) {
- WiredTigerSession session(_conn);
- std::string uri = _uri(ident);
-
// Make the alter call to update metadata without taking exclusive lock to avoid conflicts with
// concurrent operations.
std::string alterString =
WiredTigerIndex::generateAppMetadataString(*desc) + "exclusive_refreshed=false,";
- invariantWTOK(
- session.getSession()->alter(session.getSession(), uri.c_str(), alterString.c_str()),
- session.getSession());
+ std::string uri = _uri(ident);
+ auto status = alterMetadata(uri, alterString);
+ invariantStatusOK(status);
+}
+
+Status WiredTigerKVEngine::alterMetadata(StringData uri, StringData config) {
+ // Use a dedicated session in an alter operation to avoid transaction issues.
+ WiredTigerSession session(_conn);
+ auto sessionPtr = session.getSession();
+
+ auto uriNullTerminated = uri.toString();
+ auto configNullTerminated = config.toString();
+
+ auto ret =
+ sessionPtr->alter(sessionPtr, uriNullTerminated.c_str(), configNullTerminated.c_str());
+ // WT may return EBUSY if the database contains dirty data. If we checkpoint and retry the
+ // operation it will attempt to clean up the dirty elements during checkpointing, thus allowing
+ // the operation to succeed if it was the only reason to fail.
+ if (ret == EBUSY) {
+ _checkpoint(sessionPtr);
+ ret =
+ sessionPtr->alter(sessionPtr, uriNullTerminated.c_str(), configNullTerminated.c_str());
+ }
+
+ return wtRCToStatus(ret, sessionPtr);
}
Status WiredTigerKVEngine::dropIdent(RecoveryUnit* ru,
@@ -1891,7 +1931,11 @@ bool WiredTigerKVEngine::supportsDirectoryPerDB() const {
return true;
}
-void WiredTigerKVEngine::checkpoint() {
+void WiredTigerKVEngine::_checkpoint(WT_SESSION* session) {
+ // TODO: SERVER-64507: Investigate whether we can smartly rely on one checkpointer if two or
+ // more threads checkpoint at the same time.
+ stdx::lock_guard lk(_checkpointMutex);
+
const Timestamp stableTimestamp = getStableTimestamp();
const Timestamp initialDataTimestamp = getInitialDataTimestamp();
@@ -1923,9 +1967,7 @@ void WiredTigerKVEngine::checkpoint() {
// Third, stableTimestamp >= initialDataTimestamp: Take stable checkpoint. Steady state
// case.
if (initialDataTimestamp.asULL() <= 1) {
- UniqueWiredTigerSession session = _sessionCache->getSession();
- WT_SESSION* s = session->getSession();
- invariantWTOK(s->checkpoint(s, "use_timestamp=false"), s);
+ invariantWTOK(session->checkpoint(session, "use_timestamp=false"), session);
LOGV2_FOR_RECOVERY(5576602,
2,
"Completed unstable checkpoint.",
@@ -1946,9 +1988,7 @@ void WiredTigerKVEngine::checkpoint() {
"stableTimestamp"_attr = stableTimestamp,
"oplogNeededForRollback"_attr = toString(oplogNeededForRollback));
- UniqueWiredTigerSession session = _sessionCache->getSession();
- WT_SESSION* s = session->getSession();
- invariantWTOK(s->checkpoint(s, "use_timestamp=true"), s);
+ invariantWTOK(session->checkpoint(session, "use_timestamp=true"), session);
if (oplogNeededForRollback.isOK()) {
// Now that the checkpoint is durable, publish the oplog needed to recover from it.
@@ -1962,6 +2002,12 @@ void WiredTigerKVEngine::checkpoint() {
}
}
+void WiredTigerKVEngine::checkpoint() {
+ UniqueWiredTigerSession session = _sessionCache->getSession();
+ WT_SESSION* s = session->getSession();
+ return _checkpoint(s);
+}
+
bool WiredTigerKVEngine::hasIdent(OperationContext* opCtx, StringData ident) const {
return _hasUri(WiredTigerRecoveryUnit::get(opCtx)->getSession()->getSession(), _uri(ident));
}
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h
index b8462730957..a3ec8770a11 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h
@@ -185,6 +185,8 @@ public:
StringData ident,
const IndexDescriptor* desc) override;
+ Status alterMetadata(StringData uri, StringData config);
+
void flushAllFiles(OperationContext* opCtx, bool callerHoldsReadLock) override;
Status beginBackup(OperationContext* opCtx) override;
@@ -384,6 +386,8 @@ private:
StorageEngine::DropIdentCallback callback;
};
+ void _checkpoint(WT_SESSION* session);
+
/**
* Opens a connection on the WiredTiger database 'path' with the configuration 'wtOpenConfig'.
* Only returns when successful. Intializes both '_conn' and '_fileVersion'.
@@ -511,5 +515,10 @@ private:
// Pins the oplog so that OplogStones will not truncate oplog history equal or newer to this
// timestamp.
AtomicWord<std::uint64_t> _pinnedOplogTimestamp;
+
+ // Limits the actions of concurrent checkpoint callers as we update some internal data during a
+ // checkpoint. WT has a mutex of its own to only have one checkpoint active at all times so this
+ // is only to protect our internal updates.
+ Mutex _checkpointMutex = MONGO_MAKE_LATCH("WiredTigerKVEngine::_checkpointMutex");
};
} // namespace mongo
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_util.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_util.cpp
index 30fc736b576..4d655dc4da4 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_util.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_util.cpp
@@ -126,23 +126,27 @@ void removeTableChecksFile() {
}
}
-void setTableWriteTimestampAssertion(WT_SESSION* session, const std::string& uri, bool on) {
+void setTableWriteTimestampAssertion(WiredTigerSessionCache* sessionCache,
+ const std::string& uri,
+ bool on) {
const std::string setting = on ? "assert=(write_timestamp=on)" : "assert=(write_timestamp=off)";
LOGV2_DEBUG(6003700,
1,
"Changing table write timestamp assertion settings",
"uri"_attr = uri,
"writeTimestampAssertionOn"_attr = on);
- int ret = session->alter(session, uri.c_str(), setting.c_str());
- if (ret) {
- LOGV2_FATAL(6003701,
- "Failed to update write timestamp assertion setting",
- "uri"_attr = uri,
- "writeTimestampAssertionOn"_attr = on,
- "error"_attr = ret,
- "metadata"_attr =
- redact(WiredTigerUtil::getMetadataCreate(session, uri).getValue()),
- "message"_attr = session->strerror(session, ret));
+ auto status = sessionCache->getKVEngine()->alterMetadata(uri, setting);
+ if (!status.isOK()) {
+ auto sessionPtr = sessionCache->getSession();
+ LOGV2_FATAL(
+ 6003701,
+ "Failed to update write timestamp assertion setting",
+ "uri"_attr = uri,
+ "writeTimestampAssertionOn"_attr = on,
+ "error"_attr = status.code(),
+ "metadata"_attr =
+ redact(WiredTigerUtil::getMetadataCreate(sessionPtr->getSession(), uri).getValue()),
+ "message"_attr = status.reason());
}
}
@@ -861,18 +865,12 @@ Status WiredTigerUtil::setTableLogging(OperationContext* opCtx, const std::strin
WiredTigerSessionCache* sessionCache = WiredTigerRecoveryUnit::get(opCtx)->getSessionCache();
sessionCache->closeAllCursors(uri);
- // Use a dedicated session for alter operations to avoid transaction issues.
- WiredTigerSession session(sessionCache->conn());
- return setTableLogging(session.getSession(), uri, on);
-}
-
-Status WiredTigerUtil::setTableLogging(WT_SESSION* session, const std::string& uri, bool on) {
invariant(!storageGlobalParams.readOnly);
stdx::lock_guard<Latch> lk(_tableLoggingInfoMutex);
// Update the table logging settings regardless if we're no longer starting up the process.
if (!_tableLoggingInfo.isInitializing) {
- return _setTableLogging(session, uri, on);
+ return _setTableLogging(sessionCache, uri, on);
}
// During the start up process, the table logging settings are checked for each table to verify
@@ -924,12 +922,12 @@ Status WiredTigerUtil::setTableLogging(WT_SESSION* session, const std::string& u
_tableLoggingInfo.hasPreviouslyIncompleteTableChecks);
}
- return _setTableLogging(session, uri, on);
+ return _setTableLogging(sessionCache, uri, on);
}
if (!_tableLoggingInfo.isFirstTable) {
if (_tableLoggingInfo.changeTableLogging) {
- return _setTableLogging(session, uri, on);
+ return _setTableLogging(sessionCache, uri, on);
}
// The table logging settings do not need to be modified.
@@ -946,7 +944,7 @@ Status WiredTigerUtil::setTableLogging(WT_SESSION* session, const std::string& u
// Check if the first tables logging settings need to be modified.
const std::string setting = on ? "log=(enabled=true)" : "log=(enabled=false)";
- const std::string existingMetadata = getMetadataCreate(session, uri).getValue();
+ const std::string existingMetadata = getMetadataCreate(opCtx, uri).getValue();
if (existingMetadata.find(setting) != std::string::npos) {
// The table is running with the expected logging settings.
LOGV2(4366408,
@@ -964,7 +962,7 @@ Status WiredTigerUtil::setTableLogging(WT_SESSION* session, const std::string& u
"Modifying the table logging settings for all existing WiredTiger tables",
"loggingEnabled"_attr = on);
- Status status = _setTableLogging(session, uri, on);
+ Status status = _setTableLogging(sessionCache, uri, on);
if (MONGO_unlikely(crashAfterUpdatingFirstTableLoggingSettings.shouldFail())) {
LOGV2_FATAL_NOTRACE(
@@ -973,7 +971,11 @@ Status WiredTigerUtil::setTableLogging(WT_SESSION* session, const std::string& u
return status;
}
-Status WiredTigerUtil::_setTableLogging(WT_SESSION* session, const std::string& uri, bool on) {
+Status WiredTigerUtil::_setTableLogging(WiredTigerSessionCache* sessionCache,
+ const std::string& uri,
+ bool on) {
+ auto engine = sessionCache->getKVEngine();
+
const std::string setting = on ? "log=(enabled=true)" : "log=(enabled=false)";
// This method does some "weak" parsing to see if the table is in the expected logging
@@ -983,7 +985,11 @@ Status WiredTigerUtil::_setTableLogging(WT_SESSION* session, const std::string&
//
// If the settings need to be changed (only expected at startup), the alter table call must
// succeed.
- std::string existingMetadata = getMetadataCreate(session, uri).getValue();
+ std::string existingMetadata;
+ {
+ auto session = sessionCache->getSession();
+ existingMetadata = getMetadataCreate(session->getSession(), uri).getValue();
+ }
if (existingMetadata.find("log=(enabled=true)") != std::string::npos &&
existingMetadata.find("log=(enabled=false)") != std::string::npos) {
// Sanity check against a table having multiple logging specifications.
@@ -999,24 +1005,24 @@ Status WiredTigerUtil::_setTableLogging(WT_SESSION* session, const std::string&
LOGV2_DEBUG(
22432, 1, "Changing table logging settings", "uri"_attr = uri, "loggingEnabled"_attr = on);
- int ret = session->alter(session, uri.c_str(), setting.c_str());
- if (ret) {
+ auto status = engine->alterMetadata(uri, setting);
+ if (!status.isOK()) {
LOGV2_FATAL(50756,
"Failed to update log setting",
"uri"_attr = uri,
"loggingEnabled"_attr = on,
- "error"_attr = ret,
+ "error"_attr = status.code(),
"metadata"_attr = redact(existingMetadata),
- "message"_attr = session->strerror(session, ret));
+ "message"_attr = status.reason());
}
// The write timestamp assertion setting only needs to be changed at startup. It will be turned
// on when logging is disabled, and off when logging is enabled.
if (TestingProctor::instance().isEnabled()) {
- setTableWriteTimestampAssertion(session, uri, !on);
+ setTableWriteTimestampAssertion(sessionCache, uri, !on);
} else {
// Disables the assertion when the testing proctor is off.
- setTableWriteTimestampAssertion(session, uri, false /* on */);
+ setTableWriteTimestampAssertion(sessionCache, uri, false /* on */);
}
return Status::OK();
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_util.h b/src/mongo/db/storage/wiredtiger/wiredtiger_util.h
index 4f873a47345..eb2dd66c863 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_util.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_util.h
@@ -46,6 +46,7 @@ class OperationContext;
class WiredTigerConfigParser;
class WiredTigerKVEngine;
class WiredTigerSession;
+class WiredTigerSessionCache;
Status wtRCToStatus_slow(int retCode, WT_SESSION* session, StringData prefix);
@@ -305,8 +306,6 @@ public:
static Status setTableLogging(OperationContext* opCtx, const std::string& uri, bool on);
- static Status setTableLogging(WT_SESSION* session, const std::string& uri, bool on);
-
/**
* Generates a WiredTiger connection configuration given the LOGV2 WiredTiger components
* verbosity levels.
@@ -328,7 +327,9 @@ private:
template <typename T>
static T _castStatisticsValue(uint64_t statisticsValue, T maximumResultType);
- static Status _setTableLogging(WT_SESSION* session, const std::string& uri, bool on);
+ static Status _setTableLogging(WiredTigerSessionCache* sessionCache,
+ const std::string& uri,
+ bool on);
// Used to keep track of the table logging setting modifications during start up. The mutex must
// be held prior to accessing any of the member variables in the struct.