diff options
author | Geert Bosch <geert@mongodb.com> | 2019-01-10 17:57:22 -0500 |
---|---|---|
committer | Geert Bosch <geert@mongodb.com> | 2019-01-14 15:51:20 -0500 |
commit | 71b46ab799f870d1ddbf2d6d6e133b39724c945c (patch) | |
tree | 14bc3fad0c8bf08937da05bd5c81ae968c6a3b0f | |
parent | 8587570a32118e43f8327f77ce6180da679d3539 (diff) | |
download | mongo-71b46ab799f870d1ddbf2d6d6e133b39724c945c.tar.gz |
SERVER-38939 Be more memory efficient for leaf nodes in biggie
-rw-r--r-- | src/mongo/db/storage/biggie/store.h | 138 | ||||
-rw-r--r-- | src/mongo/db/storage/biggie/store_test.cpp | 98 |
2 files changed, 126 insertions, 110 deletions
diff --git a/src/mongo/db/storage/biggie/store.h b/src/mongo/db/storage/biggie/store.h index 2c36bb4f14e..26588e66a9f 100644 --- a/src/mongo/db/storage/biggie/store.h +++ b/src/mongo/db/storage/biggie/store.h @@ -404,7 +404,7 @@ public: if (node->_children[i] != nullptr) { // If there is a sub-tree found, it must have data, therefore it's necessary // to traverse to the right most node. - _current = node->_children[i].get(); + _current = node->child(i).get(); _traverseRightSubtree(); return; } @@ -560,7 +560,7 @@ public: size_t depth = prev->_depth + prev->_trieKey.size(); while (depth < key.size()) { uint8_t c = static_cast<uint8_t>(charKey[depth]); - node = prev->_children[c].get(); + node = prev->child(c).get(); if (node == nullptr) { return false; } @@ -623,7 +623,7 @@ public: // 'parent' may only have one child, in which case we need to evaluate whether or not // this node is redundant. - _compressOnlyChild(parent); + parent->compressIfSingleChild(); return true; } @@ -689,7 +689,7 @@ public: if (idx != UINT8_MAX) context.push_back(std::make_pair(node, idx + 1)); - if (!node->_children[idx]) + if (idx >= node->_children.size() || !node->_children[idx]) break; node = node->_children[idx].get(); @@ -735,7 +735,7 @@ public: std::tie(node, idx) = context.back(); context.pop_back(); - for (auto iter = idx + node->_children.begin(); iter != node->_children.end(); ++iter) { + for (auto iter = idx + node->_children.begin(); iter < node->_children.end(); ++iter) { if (!(*iter)) continue; @@ -779,7 +779,7 @@ public: return std::distance(iter1, iter2); } - std::string to_string_for_test() { + std::string toString() { return _walkTree(_root.get(), 0); } @@ -810,6 +810,40 @@ private: virtual ~Node() = default; + + /** + * Compresses a child node into its parent if necessary. This is required when an erase + * results in a node with no value and only one child. + */ + void compressIfSingleChild() { + // Don't compress if this node has an actual value associated with it or is the root. + if (_data || _trieKey.empty()) { + return; + } + + // Determine if this node has only one child. + std::shared_ptr<Node> onlyChild = nullptr; + + for (size_t i = 0; i < _children.size(); ++i) { + if (_children[i] != nullptr) { + if (onlyChild != nullptr) { + return; + } + onlyChild = _children[i]; + } + } + + // Append the child's key onto the parent. + for (char item : onlyChild->_trieKey) { + _trieKey.push_back(item); + } + + if (onlyChild->_data) { + _data.emplace(onlyChild->_data->first, onlyChild->_data->second); + } + _children = onlyChild->_children; + } + friend void swap(Node& first, Node& second) { std::swap(first.trieKey, second.trieKey); std::swap(first.depth, second.depth); @@ -830,11 +864,25 @@ private: return true; } + std::shared_ptr<Node> child(uint8_t nr) const { + return nr < _children.size() ? _children[nr] : nullptr; + } + + std::shared_ptr<Node>& child(uint8_t nr) { + while (_children.size() <= nr) + _children.emplace_back(nullptr); + return _children[nr]; + } + + const std::vector<std::shared_ptr<Node>>& children() const { + return _children; + } + protected: unsigned int _depth = 0; std::vector<uint8_t> _trieKey; boost::optional<value_type> _data; - std::array<std::shared_ptr<Node>, 256> _children; + std::vector<std::shared_ptr<Node>> _children; }; /** @@ -911,8 +959,9 @@ private: } ret.push_back('\n'); - for (auto child : node->_children) { + for (auto& child : node->_children) { if (child != nullptr) { + ret.append(std::to_string(child.use_count())); ret.append(_walkTree(child.get(), depth + 1)); } } @@ -939,7 +988,7 @@ private: depth = _root->_depth + _root->_trieKey.size(); uint8_t childFirstChar = static_cast<uint8_t>(charKey[depth]); - auto node = _root->_children[childFirstChar]; + auto node = _root->child(childFirstChar); while (node != nullptr) { @@ -956,7 +1005,7 @@ private: depth = node->_depth + node->_trieKey.size(); childFirstChar = static_cast<uint8_t>(charKey[depth]); - node = node->_children[childFirstChar]; + node = node->child(childFirstChar); } return nullptr; @@ -1013,7 +1062,7 @@ private: if (node.use_count() - 1 > 1) { // Copy node on a modifying operation when it isn't owned uniquely. node = std::make_shared<Node>(*node); - prev->_children[childFirstChar] = node; + prev->child(childFirstChar) = node; } // 'node' is uniquely owned at this point, so we are free to modify it. @@ -1046,7 +1095,7 @@ private: // Change the current node's trieKey and make a child of the new node. newKey = _makeKey(node->_trieKey, mismatchIdx, node->_trieKey.size() - mismatchIdx); - newNode->_children[newKey.front()] = node; + newNode->child(newKey.front()) = node; node->_trieKey = newKey; node->_depth = newNode->_depth + newNode->_trieKey.size(); @@ -1063,7 +1112,7 @@ private: // Update an internal node. if (!value) { node->_data = boost::none; - _compressOnlyChild(node.get()); + node->compressIfSingleChild(); } else { _root->_count++; _root->_dataSize += value->second.size(); @@ -1078,7 +1127,7 @@ private: childFirstChar = static_cast<uint8_t>(charKey[depth]); prev = node.get(); - node = node->_children[childFirstChar]; + node = node->child(childFirstChar); } // Add a completely new child to a node. The new key at this depth does not @@ -1126,7 +1175,7 @@ private: if (value) { newNode->_data.emplace(value->first, value->second); } - node->_children[key.front()] = newNode; + node->child(key.front()) = newNode; return newNode.get(); } @@ -1147,7 +1196,7 @@ private: while (depth < key.size()) { uint8_t c = static_cast<uint8_t>(charKey[depth]); - node = node->_children[c].get(); + node = node->child(c).get(); context.push_back(node); depth = node->_depth + node->_trieKey.size(); } @@ -1172,39 +1221,6 @@ private: } /** - * Compresses a child node into its parent if necessary. This is required when an erase results - * in a node with no value and only one child. - */ - void _compressOnlyChild(Node* node) { - // Don't compress if this node has an actual value associated with it or is the root. - if (node->_data || node->_trieKey.empty()) { - return; - } - - // Determine if this node has only one child. - std::shared_ptr<Node> onlyChild = nullptr; - - for (size_t i = 0; i < node->_children.size(); ++i) { - if (node->_children[i] != nullptr) { - if (onlyChild != nullptr) { - return; - } - onlyChild = node->_children[i]; - } - } - - // Append the child's key onto the parent. - for (char item : onlyChild->_trieKey) { - node->_trieKey.push_back(item); - } - - if (onlyChild->_data) { - node->_data.emplace(onlyChild->_data->first, onlyChild->_data->second); - } - node->_children = onlyChild->_children; - } - - /** * Rebuilds the context by replacing stale raw pointers with the new pointers. The pointers * can become stale when running an operation that copies the node on modification, like * insert or erase. @@ -1214,7 +1230,7 @@ private: context[0] = replaceNode; for (size_t node = 1; node < context.size(); node++) { - replaceNode = replaceNode->_children[trieKeyIndex[node - 1]].get(); + replaceNode = replaceNode->child(trieKeyIndex[node - 1]).get(); context[node] = replaceNode; } } @@ -1239,13 +1255,13 @@ private: for (size_t idx = 1; idx < context.size(); idx++) { node = context[idx]; - if (prev->_children[node->_trieKey.front()].use_count() > 1) { + if (prev->child(node->_trieKey.front()).use_count() > 1) { std::shared_ptr<Node> nodeCopy = std::make_shared<Node>(*node); - prev->_children[nodeCopy->_trieKey.front()] = nodeCopy; + prev->child(nodeCopy->_trieKey.front()) = nodeCopy; context[idx] = nodeCopy.get(); prev = nodeCopy.get(); } else { - prev = prev->_children[node->_trieKey.front()].get(); + prev = prev->child(node->_trieKey.front()).get(); } } @@ -1360,9 +1376,9 @@ private: // Since _makeBranchUnique may make changes to the pointer addresses in recursive calls. current = context.back(); - Node* node = current->_children[key].get(); - Node* baseNode = base->_children[key].get(); - Node* otherNode = other->_children[key].get(); + Node* node = current->child(key).get(); + Node* baseNode = base->child(key).get(); + Node* otherNode = other->child(key).get(); if (!node && !baseNode && !otherNode) continue; @@ -1380,7 +1396,7 @@ private: // modifications that go on in _makeBranchUnique. _rebuildContext(context, trieKeyIndex); - current->_children[key] = other->_children[key]; + current->child(key) = other->child(key); } else if (!otherNode || (baseNode && baseNode != otherNode)) { // Either the master tree and working tree remove the same branch, or the master // tree updated the branch while the working tree removed the branch, resulting @@ -1393,12 +1409,12 @@ private: current = _makeBranchUnique(context); _rebuildContext(context, trieKeyIndex); - current->_children[key] = nullptr; + current->child(key) = nullptr; } else if (baseNode && otherNode && baseNode == node) { // If base and current point to the same node, then master changed. current = _makeBranchUnique(context); _rebuildContext(context, trieKeyIndex); - current->_children[key] = other->_children[key]; + current->child(key) = other->child(key); } } else if (baseNode && otherNode && baseNode != otherNode) { // If all three are unique and leaf nodes, then it is a merge conflict. @@ -1438,10 +1454,10 @@ private: Node* _begin(Node* root) const noexcept { Node* node = root; while (!node->_data) { - if (node->_children.empty()) + if (node->children().empty()) return nullptr; - for (auto child : node->_children) { + for (auto child : node->children()) { if (child != nullptr) { node = child.get(); break; diff --git a/src/mongo/db/storage/biggie/store_test.cpp b/src/mongo/db/storage/biggie/store_test.cpp index 0528fccfcc1..484457c70d9 100644 --- a/src/mongo/db/storage/biggie/store_test.cpp +++ b/src/mongo/db/storage/biggie/store_test.cpp @@ -1975,79 +1975,79 @@ TEST_F(RadixStoreTest, PathCompressionTest) { value_type value7 = std::make_pair("foodie", "7"); thisStore.insert(value_type(value1)); - ASSERT_EQ(thisStore.to_string_for_test(), "\n food*\n"); + ASSERT_EQ(thisStore.toString(), "\n1 food*\n"); // Add a key that is a prefix of a key already in the tree thisStore.insert(value_type(value2)); - ASSERT_EQ(thisStore.to_string_for_test(), - "\n foo*" - "\n d*\n"); + ASSERT_EQ(thisStore.toString(), + "\n1 foo*" + "\n1 d*\n"); // Add a key with no prefix already in the tree thisStore.insert(value_type(value3)); - ASSERT_EQ(thisStore.to_string_for_test(), - "\n bar*" - "\n foo*" - "\n d*\n"); + ASSERT_EQ(thisStore.toString(), + "\n1 bar*" + "\n1 foo*" + "\n1 d*\n"); // Add a key that shares a prefix with a key in the tree thisStore.insert(value_type(value4)); - ASSERT_EQ(thisStore.to_string_for_test(), - "\n ba" - "\n r*" - "\n tter*" - "\n foo*" - "\n d*\n"); + ASSERT_EQ(thisStore.toString(), + "\n1 ba" + "\n1 r*" + "\n1 tter*" + "\n1 foo*" + "\n1 d*\n"); // Add another level to the tree thisStore.insert(value_type(value5)); - ASSERT_EQ(thisStore.to_string_for_test(), - "\n ba" - "\n r*" - "\n tt" - "\n er*" - "\n y*" - "\n foo*" - "\n d*\n"); + ASSERT_EQ(thisStore.toString(), + "\n1 ba" + "\n1 r*" + "\n1 tt" + "\n1 er*" + "\n1 y*" + "\n1 foo*" + "\n1 d*\n"); // Erase a key that causes the path to be compressed thisStore.erase(value2.first); - ASSERT_EQ(thisStore.to_string_for_test(), - "\n ba" - "\n r*" - "\n tt" - "\n er*" - "\n y*" - "\n food*\n"); + ASSERT_EQ(thisStore.toString(), + "\n1 ba" + "\n1 r*" + "\n1 tt" + "\n1 er*" + "\n1 y*" + "\n1 food*\n"); // Erase a key that causes the path to be compressed thisStore.erase(value3.first); - ASSERT_EQ(thisStore.to_string_for_test(), - "\n batt" - "\n er*" - "\n y*" - "\n food*\n"); + ASSERT_EQ(thisStore.toString(), + "\n1 batt" + "\n1 er*" + "\n1 y*" + "\n1 food*\n"); // Add a key that causes a node with children to be split thisStore.insert(value_type(value6)); - ASSERT_EQ(thisStore.to_string_for_test(), - "\n bat" - "\n s*" - "\n t" - "\n er*" - "\n y*" - "\n food*\n"); + ASSERT_EQ(thisStore.toString(), + "\n1 bat" + "\n1 s*" + "\n1 t" + "\n1 er*" + "\n1 y*" + "\n1 food*\n"); // Add a key that has a prefix already in the tree with a value thisStore.insert(value_type(value7)); - ASSERT_EQ(thisStore.to_string_for_test(), - "\n bat" - "\n s*" - "\n t" - "\n er*" - "\n y*" - "\n food*" - "\n ie*\n"); + ASSERT_EQ(thisStore.toString(), + "\n1 bat" + "\n1 s*" + "\n1 t" + "\n1 er*" + "\n1 y*" + "\n1 food*" + "\n1 ie*\n"); } TEST_F(RadixStoreTest, MergeOneTest) { |