diff options
author | Kevin Albertson <kevin.albertson@10gen.com> | 2016-07-19 09:32:39 -0400 |
---|---|---|
committer | Kevin Albertson <kevin.albertson@10gen.com> | 2016-08-03 09:58:22 -0400 |
commit | 30a6aeb8ec685903818051866095f773c53a842a (patch) | |
tree | 61fe009ba948c17f9ce7445ca0a96ba83b10b20e /src/mongo/db/views | |
parent | bae81366b312990a173427737db9d74a9a5ca021 (diff) | |
download | mongo-30a6aeb8ec685903818051866095f773c53a842a.tar.gz |
SERVER-24768 Add view cycle and depth checking
Diffstat (limited to 'src/mongo/db/views')
-rw-r--r-- | src/mongo/db/views/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/db/views/view_catalog.cpp | 102 | ||||
-rw-r--r-- | src/mongo/db/views/view_catalog.h | 18 | ||||
-rw-r--r-- | src/mongo/db/views/view_graph.cpp | 221 | ||||
-rw-r--r-- | src/mongo/db/views/view_graph.h | 133 |
5 files changed, 451 insertions, 24 deletions
diff --git a/src/mongo/db/views/SConscript b/src/mongo/db/views/SConscript index ee25270984c..a82c893ee0d 100644 --- a/src/mongo/db/views/SConscript +++ b/src/mongo/db/views/SConscript @@ -21,6 +21,7 @@ env.Library( source=[ 'view.cpp', 'view_catalog.cpp', + 'view_graph.cpp', 'resolved_view.cpp', ], LIBDEPS=[ diff --git a/src/mongo/db/views/view_catalog.cpp b/src/mongo/db/views/view_catalog.cpp index fa9f73cc209..33cc9737ad9 100644 --- a/src/mongo/db/views/view_catalog.cpp +++ b/src/mongo/db/views/view_catalog.cpp @@ -38,6 +38,10 @@ #include "mongo/base/string_data.h" #include "mongo/db/namespace_string.h" #include "mongo/db/operation_context.h" +#include "mongo/db/pipeline/aggregation_request.h" +#include "mongo/db/pipeline/document_source.h" +#include "mongo/db/pipeline/expression_context.h" +#include "mongo/db/pipeline/pipeline.h" #include "mongo/db/server_parameters.h" #include "mongo/db/storage/recovery_unit.h" #include "mongo/db/views/view.h" @@ -51,8 +55,6 @@ namespace mongo { ExportedServerParameter<bool, ServerParameterType::kStartupOnly> enableViewsParameter( ServerParameterSet::getGlobal(), "enableViews", &enableViews); -const std::uint32_t ViewCatalog::kMaxViewDepth = 20; - Status ViewCatalog::reloadIfNeeded(OperationContext* txn) { stdx::lock_guard<stdx::mutex> lk(_mutex); return _reloadIfNeeded_inlock(txn); @@ -77,24 +79,83 @@ Status ViewCatalog::_reloadIfNeeded_inlock(OperationContext* txn) { return status; } -void ViewCatalog::_createOrUpdateView_inlock(OperationContext* txn, - const NamespaceString& viewName, - const NamespaceString& viewOn, - const BSONArray& pipeline) { +Status ViewCatalog::_createOrUpdateView_inlock(OperationContext* txn, + const NamespaceString& viewName, + const NamespaceString& viewOn, + const BSONArray& pipeline) { invariant(_valid.load()); BSONObj viewDef = BSON("_id" << viewName.ns() << "viewOn" << viewOn.coll() << "pipeline" << pipeline); - _durable->upsert(txn, viewName, viewDef); BSONObj ownedPipeline = pipeline.getOwned(); - _viewMap[viewName.ns()] = std::make_shared<ViewDefinition>( + auto view = std::make_shared<ViewDefinition>( viewName.db(), viewName.coll(), viewOn.coll(), ownedPipeline); - txn->recoveryUnit()->onRollback([this, viewName]() { this->_viewMap.erase(viewName.ns()); }); + + // Check that the resulting dependency graph is acyclic and within the maximum depth. + Status graphStatus = _upsertIntoGraph(txn, *(view.get())); + if (!graphStatus.isOK()) { + return graphStatus; + } + + _durable->upsert(txn, viewName, viewDef); + _viewMap[viewName.ns()] = view; + txn->recoveryUnit()->onRollback([this, viewName]() { + this->_viewMap.erase(viewName.ns()); + this->_viewGraphNeedsRefresh = true; + }); // We may get invalidated, but we're exclusively locked, so the change must be ours. txn->recoveryUnit()->onCommit([this]() { this->_valid.store(true); }); + return Status::OK(); } +Status ViewCatalog::_upsertIntoGraph(OperationContext* txn, const ViewDefinition& viewDef) { + + // Performs the insert into the graph. + auto doInsert = [this, &txn](const ViewDefinition& viewDef, bool needsValidation) -> Status { + // Parse the pipeline for this view to get the namespaces it references. + AggregationRequest request(viewDef.viewOn(), viewDef.pipeline()); + boost::intrusive_ptr<ExpressionContext> expCtx = new ExpressionContext(txn, request); + auto pipelineStatus = Pipeline::parse(viewDef.pipeline(), expCtx); + if (!pipelineStatus.isOK()) { + uassert(40255, + str::stream() << "Invalid pipeline for existing view " << viewDef.name().ns() + << "; " + << pipelineStatus.getStatus().reason(), + !needsValidation); + return pipelineStatus.getStatus(); + } + + std::vector<NamespaceString> refs = pipelineStatus.getValue()->getInvolvedCollections(); + refs.push_back(viewDef.viewOn()); + + if (needsValidation) { + return _viewGraph.insertAndValidate(viewDef.name(), refs); + } else { + _viewGraph.insertWithoutValidating(viewDef.name(), refs); + return Status::OK(); + } + }; + + if (_viewGraphNeedsRefresh) { + _viewGraph.clear(); + for (auto&& iter : _viewMap) { + auto status = doInsert(*(iter.second.get()), false); + // If we cannot fully refresh the graph, we will keep '_viewGraphNeedsRefresh' true. + if (!status.isOK()) { + return status; + } + } + // Only if the inserts completed without error will we no longer need a refresh. + _viewGraphNeedsRefresh = false; + } + + // Remove the view definition first in case this is an update. If it is not in the graph, it + // is simply a no-op. + _viewGraph.remove(viewDef.name()); + + return doInsert(viewDef, true); +} Status ViewCatalog::createView(OperationContext* txn, const NamespaceString& viewName, @@ -116,10 +177,7 @@ Status ViewCatalog::createView(OperationContext* txn, return Status(ErrorCodes::InvalidNamespace, str::stream() << "invalid name for 'viewOn': " << viewOn.coll()); - // TODO(SERVER-24768): Need to ensure view is correct and doesn't introduce a cycle. - - _createOrUpdateView_inlock(txn, viewName, viewOn, pipeline); - return Status::OK(); + return _createOrUpdateView_inlock(txn, viewName, viewOn, pipeline); } Status ViewCatalog::modifyView(OperationContext* txn, @@ -132,7 +190,8 @@ Status ViewCatalog::modifyView(OperationContext* txn, return Status(ErrorCodes::BadValue, "View must be created on a view or collection in the same database"); - if (!_lookup_inlock(txn, StringData(viewName.ns()))) + ViewDefinition* viewPtr = _lookup_inlock(txn, viewName.ns()); + if (!viewPtr) return Status(ErrorCodes::NamespaceNotFound, str::stream() << "cannot modify missing view " << viewName.ns()); @@ -140,8 +199,11 @@ Status ViewCatalog::modifyView(OperationContext* txn, return Status(ErrorCodes::InvalidNamespace, str::stream() << "invalid name for 'viewOn': " << viewOn.coll()); - _createOrUpdateView_inlock(txn, viewName, viewOn, pipeline); - return Status::OK(); + ViewDefinition savedDefinition = *viewPtr; + txn->recoveryUnit()->onRollback([this, txn, viewName, savedDefinition]() { + this->_viewMap[viewName.ns()] = std::make_shared<ViewDefinition>(savedDefinition); + }); + return _createOrUpdateView_inlock(txn, viewName, viewOn, pipeline); } Status ViewCatalog::dropView(OperationContext* txn, const NamespaceString& viewName) { @@ -158,8 +220,10 @@ Status ViewCatalog::dropView(OperationContext* txn, const NamespaceString& viewN invariant(_valid.load()); _durable->remove(txn, viewName); + _viewGraph.remove(savedDefinition.name()); _viewMap.erase(viewName.ns()); - txn->recoveryUnit()->onRollback([this, viewName, savedDefinition]() { + txn->recoveryUnit()->onRollback([this, txn, viewName, savedDefinition]() { + this->_viewGraphNeedsRefresh = true; this->_viewMap[viewName.ns()] = std::make_shared<ViewDefinition>(savedDefinition); }); @@ -188,7 +252,7 @@ StatusWith<ResolvedView> ViewCatalog::resolveView(OperationContext* txn, const NamespaceString* resolvedNss = &nss; std::vector<BSONObj> resolvedPipeline; - for (std::uint32_t i = 0; i < ViewCatalog::kMaxViewDepth; i++) { + for (int i = 0; i < ViewGraph::kMaxViewDepth; i++) { ViewDefinition* view = _lookup_inlock(txn, resolvedNss->ns()); if (!view) return StatusWith<ResolvedView>({*resolvedNss, resolvedPipeline}); @@ -207,6 +271,6 @@ StatusWith<ResolvedView> ViewCatalog::resolveView(OperationContext* txn, return {ErrorCodes::ViewDepthLimitExceeded, str::stream() << "View depth too deep or view cycle detected; maximum depth is " - << kMaxViewDepth}; + << ViewGraph::kMaxViewDepth}; } } // namespace mongo diff --git a/src/mongo/db/views/view_catalog.h b/src/mongo/db/views/view_catalog.h index e96278c8ce9..8dadf38c1c1 100644 --- a/src/mongo/db/views/view_catalog.h +++ b/src/mongo/db/views/view_catalog.h @@ -41,6 +41,7 @@ #include "mongo/db/views/durable_view_catalog.h" #include "mongo/db/views/resolved_view.h" #include "mongo/db/views/view.h" +#include "mongo/db/views/view_graph.h" #include "mongo/stdx/mutex.h" #include "mongo/util/string_map.h" @@ -57,7 +58,6 @@ class ViewCatalog { public: // TODO(SERVER-23700): Make this a unique_ptr once StringMap supports move-only types. using ViewMap = StringMap<std::shared_ptr<ViewDefinition>>; - static const std::uint32_t kMaxViewDepth; explicit ViewCatalog(DurableViewCatalog* durable) : _durable(durable) {} @@ -133,13 +133,19 @@ public: */ void invalidate() { _valid.store(false); + _viewGraphNeedsRefresh = true; } private: - void _createOrUpdateView_inlock(OperationContext* txn, - const NamespaceString& viewName, - const NamespaceString& viewOn, - const BSONArray& pipeline); + Status _createOrUpdateView_inlock(OperationContext* txn, + const NamespaceString& viewName, + const NamespaceString& viewOn, + const BSONArray& pipeline); + /** + * Parses the view definition pipeline, attempts to upsert into the view graph, and refreshes + * the graph if necessary. Returns an error status if the resulting graph would be invalid. + */ + Status _upsertIntoGraph(OperationContext* txn, const ViewDefinition& viewDef); ViewDefinition* _lookup_inlock(OperationContext* txn, StringData ns); Status _reloadIfNeeded_inlock(OperationContext* txn); @@ -148,5 +154,7 @@ private: ViewMap _viewMap; DurableViewCatalog* _durable; AtomicBool _valid; + ViewGraph _viewGraph; + bool _viewGraphNeedsRefresh = true; // Defers initializing the graph until the first insert. }; } // namespace mongo diff --git a/src/mongo/db/views/view_graph.cpp b/src/mongo/db/views/view_graph.cpp new file mode 100644 index 00000000000..27a19a9fbcd --- /dev/null +++ b/src/mongo/db/views/view_graph.cpp @@ -0,0 +1,221 @@ +/** +* Copyright (C) 2016 MongoDB Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +* +* As a special exception, the copyright holders give permission to link the +* code of portions of this program with the OpenSSL library under certain +* conditions as described in each individual source file and distribute +* linked combinations including the program with the OpenSSL library. You +* must comply with the GNU Affero General Public License in all respects for +* all of the code used other than as permitted herein. If you modify file(s) +* with this exception, you may extend this exception to your version of the +* file(s), but you are not obligated to do so. If you do not wish to do so, +* delete this exception statement from your version. If you delete this +* exception statement from all source files in the program, then also delete +* it in the license file. +*/ + +#include "mongo/platform/basic.h" + +#include "mongo/db/views/view_graph.h" + +#include "mongo/db/views/view.h" + +namespace mongo { + +const int ViewGraph::kMaxViewDepth = 20; +uint64_t ViewGraph::_idCounter = 0; + +void ViewGraph::clear() { + _graph.clear(); + _namespaceIds.clear(); +} + +Status ViewGraph::insertAndValidate(const NamespaceString& viewNss, + const std::vector<NamespaceString>& refs) { + insertWithoutValidating(viewNss, refs); + + // Perform validation on this newly inserted view. Note, if the graph was put in an invalid + // state through unvalidated inserts (e.g. if the user manually edits system.views) + // this may not necessarily be detected. We only check for errors introduced by this view. + uint64_t nodeId = _getNodeId(viewNss); + + auto viewDepthLimitExceeded = [this, &viewNss]() -> Status { + this->remove(viewNss); + return Status(ErrorCodes::ViewDepthLimitExceeded, + str::stream() << "View depth limit exceeded; maximum depth is " + << kMaxViewDepth); + }; + + // Check for cycles and get the height of the children. + HeightMap heightMap; + std::vector<uint64_t> cycleVertices; + ErrorCodes::Error childRes = + _getChildrenHeightAndCheckCycle(nodeId, nodeId, 0, &heightMap, &cycleVertices); + if (childRes == ErrorCodes::ViewDepthLimitExceeded) { + return viewDepthLimitExceeded(); + } else if (childRes == ErrorCodes::GraphContainsCycle) { + // Make the error message with the namespaces of the cycle and remove the node. + str::stream ss; + ss << "View cycle detected: "; + for (auto cycleIter = cycleVertices.rbegin(); cycleIter != cycleVertices.rend(); + cycleIter++) { + ss << _graph[*cycleIter].ns << " => "; + } + ss << viewNss.ns(); + remove(viewNss); + return Status(ErrorCodes::GraphContainsCycle, ss); + } + + // Subtract one since the child height includes the non-view leaf node(s). + int childrenHeight = heightMap[nodeId].height - 1; + + // Get the height of the parents to obtain the diameter through this node. + heightMap.clear(); + ErrorCodes::Error parentRes = _getParentsHeight(nodeId, 0, &heightMap); + if (parentRes == ErrorCodes::ViewDepthLimitExceeded) { + return viewDepthLimitExceeded(); + } + + // Check the combined heights of the children and parents. + int parentsHeight = heightMap[nodeId].height; + // Subtract one since the parent and children heights include the current node. + int diameter = parentsHeight + childrenHeight - 1; + + if (diameter > kMaxViewDepth) { + return viewDepthLimitExceeded(); + } + + return Status::OK(); +} + +void ViewGraph::insertWithoutValidating(const NamespaceString& viewNss, + const std::vector<NamespaceString>& refs) { + uint64_t nodeId = _getNodeId(viewNss); + // Note, the parent pointers of this node are set when the parents are inserted. + // This sets the name and children pointers of the node for this view, as well as the parent + // pointers for its children. + Node* node = &(_graph[nodeId]); + invariant(node->children.empty()); + node->ns = viewNss.ns(); + + for (const NamespaceString& childNss : refs) { + uint64_t childId = _getNodeId(childNss); + node->children.insert(childId); + _graph[childId].parents.insert(nodeId); + } +} + +void ViewGraph::remove(const NamespaceString& viewNss) { + uint64_t nodeId = _getNodeId(viewNss); + Node* node = &(_graph[nodeId]); + + // Remove self-reference pointers if they exist. + node->children.erase(nodeId); + node->parents.erase(nodeId); + + // Remove child->parent pointers. + for (uint64_t childId : node->children) { + Node* childNode = &(_graph[childId]); + childNode->parents.erase(nodeId); + // If the child has no remaining references or children, remove it. + if (childNode->parents.size() == 0 && childNode->children.size() == 0) { + _graph.erase(childId); + _namespaceIds.erase(childNode->ns); + } + } + + // Remove all child pointers since this view no longer references anything. + node->children.clear(); + + // Only remove node if there are no remaining references to this node. + if (node->parents.size() == 0) { + _graph.erase(nodeId); + _namespaceIds.erase(node->ns); + } +} + +ErrorCodes::Error ViewGraph::_getParentsHeight(uint64_t currentId, + int currentDepth, + HeightMap* heightMap) { + const Node& currentNode = _graph[currentId]; + int maxHeightOfParents = 0; + + // Return early if we've already exceeded the maximum depth. This will also be triggered if + // we're traversing a cycle introduced through unvalidated inserts. + if (currentDepth > kMaxViewDepth) { + return ErrorCodes::ViewDepthLimitExceeded; + } + + for (uint64_t parentId : currentNode.parents) { + if (!(*heightMap)[parentId].checked) { + auto res = _getParentsHeight(parentId, currentDepth + 1, heightMap); + if (res != ErrorCodes::OK) { + return res; + } + } + maxHeightOfParents = std::max(maxHeightOfParents, (*heightMap)[parentId].height); + } + + (*heightMap)[currentId].checked = true; + (*heightMap)[currentId].height = maxHeightOfParents + 1; + + return ErrorCodes::OK; +} + +ErrorCodes::Error ViewGraph::_getChildrenHeightAndCheckCycle(uint64_t startingId, + uint64_t currentId, + int currentDepth, + HeightMap* heightMap, + std::vector<uint64_t>* cycleIds) { + // Check children of current node. + const Node& currentNode = _graph[currentId]; + if (currentDepth > 0 && currentId == startingId) { + return ErrorCodes::GraphContainsCycle; + } + + // Return early if we've already exceeded the maximum depth. This will also be triggered if + // we're traversing a cycle introduced through unvalidated inserts. + if (currentDepth > kMaxViewDepth) { + return ErrorCodes::ViewDepthLimitExceeded; + } + + int maxHeightOfChildren = 0; + for (uint64_t childId : currentNode.children) { + if (!(*heightMap)[childId].checked) { + auto res = _getChildrenHeightAndCheckCycle( + startingId, childId, currentDepth + 1, heightMap, cycleIds); + if (res == ErrorCodes::GraphContainsCycle) { + cycleIds->push_back(currentId); + return res; + } else if (res != ErrorCodes::OK) { + return res; + } + } + maxHeightOfChildren = std::max(maxHeightOfChildren, (*heightMap)[childId].height); + } + + (*heightMap)[currentId].checked = true; + (*heightMap)[currentId].height = maxHeightOfChildren + 1; + return ErrorCodes::OK; +} + +uint64_t ViewGraph::_getNodeId(const NamespaceString& nss) { + if (_namespaceIds.find(nss.ns()) == _namespaceIds.end()) { + _namespaceIds[nss.ns()] = _idCounter++; + } + return _namespaceIds[nss.ns()]; +} + +} // namespace mongo diff --git a/src/mongo/db/views/view_graph.h b/src/mongo/db/views/view_graph.h new file mode 100644 index 00000000000..20214444d2c --- /dev/null +++ b/src/mongo/db/views/view_graph.h @@ -0,0 +1,133 @@ +/** +* Copyright (C) 2016 MongoDB Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +* +* As a special exception, the copyright holders give permission to link the +* code of portions of this program with the OpenSSL library under certain +* conditions as described in each individual source file and distribute +* linked combinations including the program with the OpenSSL library. You +* must comply with the GNU Affero General Public License in all respects for +* all of the code used other than as permitted herein. If you modify file(s) +* with this exception, you may extend this exception to your version of the +* file(s), but you are not obligated to do so. If you do not wish to do so, +* delete this exception statement from your version. If you delete this +* exception statement from all source files in the program, then also delete +* it in the license file. +*/ +#pragma once + +#include <unordered_map> +#include <unordered_set> +#include <vector> + +#include "mongo/base/status.h" +#include "mongo/db/namespace_string.h" +#include "mongo/util/string_map.h" + +namespace mongo { +class ViewDefinition; + +/** + * Validates that the graph of view dependencies is acyclic and within the allowed depth. + * Each node is represented by an integer id, and stores integer ids for its parents and children + * in the graph. + * + * This is owned and managed by the ViewCatalog. + */ +class ViewGraph { + MONGO_DISALLOW_COPYING(ViewGraph); + +public: + static const int kMaxViewDepth; + + ViewGraph() = default; + + /** + * Called when a view is added to the catalog. 'refs' are a list of namespaces that the view + * represented by 'viewNss' references in its viewOn or pipeline. Checks if this view introduces + * a cycle or max diameter. If an error is detected, it will not insert. + */ + Status insertAndValidate(const NamespaceString& viewNss, + const std::vector<NamespaceString>& refs); + + /** + * Called when view definitions are being reloaded from the catalog (e.g. on restart of mongod). + * Does the same as insertAndValidate except does not check for cycles or max diameter. + */ + void insertWithoutValidating(const NamespaceString& viewNss, + const std::vector<NamespaceString>& refs); + + /** + * Called when a view is removed from the catalog. If the view does not exist in the graph it is + * a no-op. The view may also have a cycle or max diameter through it. + */ + void remove(const NamespaceString& viewNss); + + /** + * Deletes the view graph and all references to namespaces. + */ + void clear(); + +private: + // A graph node represents a namespace. The parent-child relation is defined as a parent + // references the child either through viewOn or in $lookup/$graphLookup/$facet in its pipeline. + // E.g. the command {create: "a", viewOn: "b", pipeline: [{$lookup: {from: "c"}}]} + // means the node for "a" is a parent of nodes for "b" and "c" since it references them. + struct Node { + // Note, a view may refer to the same child more than once, but we only need to know the + // set of children and parents, since we do not need to traverse duplicates. + std::unordered_set<uint64_t> parents; + std::unordered_set<uint64_t> children; + std::string ns; + }; + + // Bookkeeping for graph traversals. + struct NodeHeight { + bool checked = false; + int height = 0; + }; + + using HeightMap = std::unordered_map<uint64_t, NodeHeight>; + + /** + * Recursively traverses parents of this node and computes their heights. Returns an error + * if the maximum depth is exceeded. + */ + ErrorCodes::Error _getParentsHeight(uint64_t currentId, int currentDepth, HeightMap* heightMap); + + /** + * Recursively traverses children of the starting node and computes their heights. Returns an + * error if the maximum depth is exceeded or a cycle is detected through the starting node. + */ + ErrorCodes::Error _getChildrenHeightAndCheckCycle(uint64_t startingId, + uint64_t currentId, + int currentDepth, + HeightMap* heightMap, + std::vector<uint64_t>* cycleIds); + + /** + * Gets the id for this namespace, and creates an id if it doesn't exist. + */ + uint64_t _getNodeId(const NamespaceString& ns); + + // Maps namespaces to an internal node id. A mapping exists for every namespace referenced, + // i.e. existing views, collections, and non-existing namespaces. + StringMap<uint64_t> _namespaceIds; + + // Maps node ids to nodes. There is a 1-1 correspondance with _namespaceIds, hence the lifetime + // of a node is the same as the lifetime as its corresponding node id. + std::unordered_map<uint64_t, Node> _graph; + static uint64_t _idCounter; +}; +} // namespace mongo |