diff options
Diffstat (limited to 'src/mongo/s/catalog_cache.h')
-rw-r--r-- | src/mongo/s/catalog_cache.h | 352 |
1 files changed, 75 insertions, 277 deletions
diff --git a/src/mongo/s/catalog_cache.h b/src/mongo/s/catalog_cache.h index a957189183a..796b9e10136 100644 --- a/src/mongo/s/catalog_cache.h +++ b/src/mongo/s/catalog_cache.h @@ -45,8 +45,6 @@ namespace mongo { class BSONObjBuilder; -class CachedDatabaseInfo; -class OperationContext; static constexpr int kMaxNumStaleVersionRetries = 10; @@ -64,21 +62,21 @@ extern const OperationContext::Decoration<bool> operationShouldBlockBehindCatalo * in fact is impossible to compare two different DatabaseVersion that have different UUIDs. * * This class wrap a DatabaseVersion object to make it always comparable by timestamping it with a - * node-local sequence number (_dbVersionLocalSequence). + * node-local sequence number (_uuidDisambiguatingSequenceNum). * * This class class should go away once a cluster-wide comparable DatabaseVersion will be * implemented. */ class ComparableDatabaseVersion { public: - /* - * Create a ComparableDatabaseVersion that wraps the given DatabaseVersion. - * Each object created through this method will have a local sequence number grater then the + /** + * Creates a ComparableDatabaseVersion that wraps the given DatabaseVersion. + * Each object created through this method will have a local sequence number greater than the * previously created ones. */ static ComparableDatabaseVersion makeComparableDatabaseVersion(const DatabaseVersion& version); - /* + /** * Empty constructor needed by the ReadThroughCache. * * Instances created through this constructor will be always less then the ones created through @@ -86,39 +84,28 @@ public: */ ComparableDatabaseVersion() = default; - const DatabaseVersion& getVersion() const; - - uint64_t getLocalSequenceNum() const; - - BSONObj toBSON() const; + const DatabaseVersion& getVersion() const { + return *_dbVersion; + } std::string toString() const; - // Rerturns true if the two versions have the same UUID bool sameUuid(const ComparableDatabaseVersion& other) const { - return _dbVersion.getUuid() == other._dbVersion.getUuid(); + return _dbVersion->getUuid() == other._dbVersion->getUuid(); } - bool operator==(const ComparableDatabaseVersion& other) const { - return sameUuid(other) && (_dbVersion.getLastMod() == other._dbVersion.getLastMod()); - } + bool operator==(const ComparableDatabaseVersion& other) const; bool operator!=(const ComparableDatabaseVersion& other) const { return !(*this == other); } - /* - * In the case the two compared instances have different UUIDs the most recently created one - * will be grater, otherwise the comparision will be driven by the lastMod field of the - * underlying DatabaseVersion. + /** + * In case the two compared instances have different UUIDs, the most recently created one will + * be greater, otherwise the comparison will be driven by the lastMod field of the underlying + * DatabaseVersion. */ - bool operator<(const ComparableDatabaseVersion& other) const { - if (sameUuid(other)) { - return _dbVersion.getLastMod() < other._dbVersion.getLastMod(); - } else { - return _localSequenceNum < other._localSequenceNum; - } - } + bool operator<(const ComparableDatabaseVersion& other) const; bool operator>(const ComparableDatabaseVersion& other) const { return other < *this; @@ -133,92 +120,18 @@ public: } private: - static AtomicWord<uint64_t> _localSequenceNumSource; + static AtomicWord<uint64_t> _uuidDisambiguatingSequenceNumSource; + + ComparableDatabaseVersion(const DatabaseVersion& version, + uint64_t uuidDisambiguatingSequenceNum) + : _dbVersion(version), _uuidDisambiguatingSequenceNum(uuidDisambiguatingSequenceNum) {} - ComparableDatabaseVersion(const DatabaseVersion& version, uint64_t localSequenceNum) - : _dbVersion(version), _localSequenceNum(localSequenceNum) {} + boost::optional<DatabaseVersion> _dbVersion; - DatabaseVersion _dbVersion; // Locally incremented sequence number that allows to compare two database versions with // different UUIDs. Each new comparableDatabaseVersion will have a greater sequence number then // the ones created before. - uint64_t _localSequenceNum{0}; -}; - -/** - * Constructed to be used exclusively by the CatalogCache as a vector clock (Time) to drive - * CollectionCache's lookups. - * - * The ChunkVersion class contains an non comparable epoch, which makes impossible to compare two - * ChunkVersions when their epochs's differ. - * - * This class wraps a ChunkVersion object with a node-local sequence number (_localSequenceNum) that - * allows the comparision. - * - * This class should go away once a cluster-wide comparable ChunkVersion is implemented. - */ -class ComparableChunkVersion { -public: - static ComparableChunkVersion makeComparableChunkVersion(const ChunkVersion& version); - - ComparableChunkVersion() = default; - - const ChunkVersion& getVersion() const; - - uint64_t getLocalSequenceNum() const; - - BSONObj toBSON() const; - - std::string toString() const; - - bool sameEpoch(const ComparableChunkVersion& other) const { - return _chunkVersion.epoch() == other._chunkVersion.epoch(); - } - - bool operator==(const ComparableChunkVersion& other) const { - return sameEpoch(other) && - (_chunkVersion.majorVersion() == other._chunkVersion.majorVersion() && - _chunkVersion.minorVersion() == other._chunkVersion.minorVersion()); - } - - bool operator!=(const ComparableChunkVersion& other) const { - return !(*this == other); - } - - bool operator<(const ComparableChunkVersion& other) const { - if (sameEpoch(other)) { - return _chunkVersion.majorVersion() < other._chunkVersion.majorVersion() || - (_chunkVersion.majorVersion() == other._chunkVersion.majorVersion() && - _chunkVersion.minorVersion() < other._chunkVersion.minorVersion()); - } else { - return _localSequenceNum < other._localSequenceNum; - } - } - - bool operator>(const ComparableChunkVersion& other) const { - return other < *this; - } - - bool operator<=(const ComparableChunkVersion& other) const { - return !(*this > other); - } - - bool operator>=(const ComparableChunkVersion& other) const { - return !(*this < other); - } - -private: - static AtomicWord<uint64_t> _localSequenceNumSource; - - ComparableChunkVersion(const ChunkVersion& version, uint64_t localSequenceNum) - : _chunkVersion(version), _localSequenceNum(localSequenceNum) {} - - ChunkVersion _chunkVersion; - - // Locally incremented sequence number that allows to compare two colection versions with - // different epochs. Each new comparableChunkVersion will have a greater sequence number than - // the ones created before. - uint64_t _localSequenceNum{0}; + uint64_t _uuidDisambiguatingSequenceNum{0}; }; /** @@ -298,21 +211,9 @@ public: /** * Same as getCollectionRoutingInfo above, but in addition causes the namespace to be refreshed. - * - * When forceRefreshFromThisThread is false, it's possible for this call to - * join an ongoing refresh from another thread forceRefreshFromThisThread. - * forceRefreshFromThisThread checks whether it joined another thread and - * then forces it to try again, which is necessary in cases where calls to - * getCollectionRoutingInfoWithRefresh must be causally consistent - * - * TODO: Remove this parameter in favor of using collection creation time + - * collection version to decide when a refresh is necessary and provide - * proper causal consistency */ - StatusWith<ChunkManager> getCollectionRoutingInfoWithRefresh( - OperationContext* opCtx, - const NamespaceString& nss, - bool forceRefreshFromThisThread = false); + StatusWith<ChunkManager> getCollectionRoutingInfoWithRefresh(OperationContext* opCtx, + const NamespaceString& nss); /** * Same as getCollectionRoutingInfoWithRefresh above, but in addition returns a @@ -333,11 +234,6 @@ public: const boost::optional<DatabaseVersion>& wantedVersion); /** - * Gets whether this operation should block behind a catalog cache refresh. - */ - static bool getOperationShouldBlockBehindCatalogCacheRefresh(OperationContext* opCtx); - - /** * Sets whether this operation should block behind a catalog cache refresh. */ static void setOperationShouldBlockBehindCatalogCacheRefresh(OperationContext* opCtx, @@ -349,18 +245,9 @@ public: * requests to block on an upcoming catalog cache refresh. */ void invalidateShardOrEntireCollectionEntryForShardedCollection( - OperationContext* opCtx, const NamespaceString& nss, - boost::optional<ChunkVersion> wantedVersion, - const ChunkVersion& receivedVersion, - ShardId shardId); - - /** - * Non-blocking method that marks the current collection entry for the namespace as needing - * refresh due to an epoch change. Will cause all further targetting attempts for this - * namespace to block on a catalog cache refresh. - */ - void onEpochChange(const NamespaceString& nss); + const boost::optional<ChunkVersion>& wantedVersion, + const ShardId& shardId); /** * Throws a StaleConfigException if this catalog cache does not have an entry for the given @@ -370,16 +257,8 @@ public: * version to throw a StaleConfigException. */ void checkEpochOrThrow(const NamespaceString& nss, - ChunkVersion targetCollectionVersion, - const ShardId& shardId) const; - - /** - * Non-blocking method, which invalidates the shard for the routing table for the specified - * namespace. If that shard is targetted in the future, getCollectionRoutingInfo will wait on a - * refresh. - */ - void invalidateShardForShardedCollection(const NamespaceString& nss, - const ShardId& staleShardId); + const ChunkVersion& targetCollectionVersion, + const ShardId& shardId); /** * Non-blocking method, which invalidates all namespaces which contain data on the specified @@ -388,12 +267,6 @@ public: void invalidateEntriesThatReferenceShard(const ShardId& shardId); /** - * Non-blocking method, which removes the entire specified collection from the cache (resulting - * in full refresh on subsequent access) - */ - void purgeCollection(const NamespaceString& nss); - - /** * Non-blocking method, which removes the entire specified database (including its collections) * from the cache. */ @@ -416,35 +289,17 @@ public: */ void checkAndRecordOperationBlockedByRefresh(OperationContext* opCtx, mongo::LogicalOp opType); + /** + * Non-blocking method that marks the current collection entry for the namespace as needing + * refresh. Will cause all further targetting attempts to block on a catalog cache refresh, + * even if they do not require causal consistency. + */ + void invalidateCollectionEntry_LINEARIZABLE(const NamespaceString& nss); + private: // Make the cache entries friends so they can access the private classes below friend class CachedDatabaseInfo; - /** - * Cache entry describing a collection. - */ - struct CollectionRoutingInfoEntry { - CollectionRoutingInfoEntry() = default; - // Disable copy (and move) semantics - CollectionRoutingInfoEntry(const CollectionRoutingInfoEntry&) = delete; - CollectionRoutingInfoEntry& operator=(const CollectionRoutingInfoEntry&) = delete; - - // Specifies whether this cache entry needs a refresh (in which case routingInfo should not - // be relied on) or it doesn't, in which case there should be a non-null routingInfo. - bool needsRefresh{true}; - - // Specifies whether the namespace has had an epoch change, which indicates that every - // shard should block on an upcoming refresh. - bool epochHasChanged{true}; - - // Contains a notification to be waited on for the refresh to complete (only available if - // needsRefresh is true) - std::shared_ptr<Notification<Status>> refreshCompletionNotification; - - // Contains the cached routing information (only available if needsRefresh is false) - std::shared_ptr<RoutingTableHistory> routingInfo; - }; - class DatabaseCache : public ReadThroughCache<std::string, DatabaseType, ComparableDatabaseVersion> { public: @@ -461,88 +316,54 @@ private: Mutex _mutex = MONGO_MAKE_LATCH("DatabaseCache::_mutex"); }; - /** - * Non-blocking call which schedules an asynchronous refresh for the specified namespace. The - * namespace must be in the 'needRefresh' state. - */ - void _scheduleCollectionRefresh(WithLock, - ServiceContext* service, - std::shared_ptr<CollectionRoutingInfoEntry> collEntry, - NamespaceString const& nss, - int refreshAttempt); + class CollectionCache : public RoutingTableHistoryCache { + public: + CollectionCache(ServiceContext* service, + ThreadPoolInterface& threadPool, + CatalogCacheLoader& catalogCacheLoader); - /** - * Marks a collection entry as needing refresh. Will create the collection entry if one does - * not exist. Also marks the epoch as changed, which will cause all further targetting requests - * against this namespace to block upon a catalog cache refresh. - */ - void _createOrGetCollectionEntryAndMarkEpochStale(const NamespaceString& nss); + void reportStats(BSONObjBuilder* builder) const; - /** - * Marks a collection entry as needing refresh. Will create the collection entry if one does - * not exist. Will mark the given shard ID as stale, which will cause all further targetting - * requests for the given shard for this namespace to block upon a catalog cache refresh. - */ - void _createOrGetCollectionEntryAndMarkShardStale(const NamespaceString& nss, - const ShardId& shardId); + private: + LookupResult _lookupCollection(OperationContext* opCtx, + const NamespaceString& nss, + const ValueHandle& collectionHistory, + const ComparableChunkVersion& previousChunkVersion); - /** - * Marks a collection entry as needing refresh. Will create the collection entry if one does - * not exist. - */ - void _createOrGetCollectionEntryAndMarkAsNeedsRefresh(const NamespaceString& nss); + CatalogCacheLoader& _catalogCacheLoader; + Mutex _mutex = MONGO_MAKE_LATCH("CollectionCache::_mutex"); - /** - * Retrieves the collection entry for the given namespace, creating the entry if one does not - * already exist. - */ - std::shared_ptr<CollectionRoutingInfoEntry> _createOrGetCollectionEntry( - WithLock wl, const NamespaceString& nss); + struct Stats { + // Tracks how many incremental refreshes are waiting to complete currently + AtomicWord<long long> numActiveIncrementalRefreshes{0}; - /** - * Used as a flag to indicate whether or not this thread performed its own - * refresh for certain helper functions - * - * kPerformedRefresh is used only when the calling thread performed the - * refresh *itself* - * - * kDidNotPerformRefresh is used either when there was an error or when - * this thread joined an ongoing refresh - */ - enum class RefreshAction { - kPerformedRefresh, - kDidNotPerformRefresh, - }; + // Cumulative, always-increasing counter of how many incremental refreshes have been + // kicked off + AtomicWord<long long> countIncrementalRefreshesStarted{0}; - /** - * Return type for helper functions performing refreshes so that they can - * indicate both status and whether or not this thread performed its own - * refresh - */ - struct RefreshResult { - // Status containing result of refresh - StatusWith<ChunkManager> statusWithInfo; - RefreshAction actionTaken; - }; + // Tracks how many full refreshes are waiting to complete currently + AtomicWord<long long> numActiveFullRefreshes{0}; - /** - * Retrieves the collection routing info for this namespace after blocking on a catalog cache - * refresh. - */ - CatalogCache::RefreshResult _getCollectionRoutingInfoWithForcedRefresh( - OperationContext* opctx, const NamespaceString& nss); + // Cumulative, always-increasing counter of how many full refreshes have been kicked off + AtomicWord<long long> countFullRefreshesStarted{0}; - /** - * Helper function used when we need the refresh action taken (e.g. when we - * want to force refresh) - */ - CatalogCache::RefreshResult _getCollectionRoutingInfo(OperationContext* opCtx, - const NamespaceString& nss); + // Cumulative, always-increasing counter of how many full or incremental refreshes + // failed for whatever reason + AtomicWord<long long> countFailedRefreshes{0}; - CatalogCache::RefreshResult _getCollectionRoutingInfoAt( - OperationContext* opCtx, - const NamespaceString& nss, - boost::optional<Timestamp> atClusterTime); + /** + * Reports the accumulated statistics for serverStatus. + */ + void report(BSONObjBuilder* builder) const; + + } _stats; + + void _updateRefreshesStats(const bool isIncremental, const bool add); + }; + + StatusWith<ChunkManager> _getCollectionRoutingInfoAt(OperationContext* opCtx, + const NamespaceString& nss, + boost::optional<Timestamp> atClusterTime); // Interface from which chunks will be retrieved CatalogCacheLoader& _cacheLoader; @@ -557,23 +378,6 @@ private: // combined AtomicWord<long long> totalRefreshWaitTimeMicros{0}; - // Tracks how many incremental refreshes are waiting to complete currently - AtomicWord<long long> numActiveIncrementalRefreshes{0}; - - // Cumulative, always-increasing counter of how many incremental refreshes have been kicked - // off - AtomicWord<long long> countIncrementalRefreshesStarted{0}; - - // Tracks how many full refreshes are waiting to complete currently - AtomicWord<long long> numActiveFullRefreshes{0}; - - // Cumulative, always-increasing counter of how many full refreshes have been kicked off - AtomicWord<long long> countFullRefreshesStarted{0}; - - // Cumulative, always-increasing counter of how many full or incremental refreshes failed - // for whatever reason - AtomicWord<long long> countFailedRefreshes{0}; - // Cumulative, always-increasing counter of how many operations have been blocked by a // catalog cache refresh. Broken down by operation type to match the operations tracked // by the OpCounters class. @@ -595,15 +399,9 @@ private: std::shared_ptr<ThreadPool> _executor; - DatabaseCache _databaseCache; - // Mutex to serialize access to the collection cache - mutable Mutex _mutex = MONGO_MAKE_LATCH("CatalogCache::_mutex"); - // Map from full collection name to the routing info for that collection, grouped by database - using CollectionInfoMap = StringMap<std::shared_ptr<CollectionRoutingInfoEntry>>; - using CollectionsByDbMap = StringMap<CollectionInfoMap>; - CollectionsByDbMap _collectionsByDb; + CollectionCache _collectionCache; }; } // namespace mongo |