diff options
author | Ruoxin Xu <ruoxin.xu@mongodb.com> | 2020-08-31 10:57:29 +0100 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-09-22 13:33:33 +0000 |
commit | 13096e75c224d4ab7231a8788005c41fee8d7991 (patch) | |
tree | c158e1d498f9d00536629ee11e8123e14322b16c /src/mongo | |
parent | 2e51850ef5456cfaa7f43fe271c78bcc5f399104 (diff) | |
download | mongo-13096e75c224d4ab7231a8788005c41fee8d7991.tar.gz |
SERVER-49817 Optimize document diffing code
Diffstat (limited to 'src/mongo')
-rw-r--r-- | src/mongo/bson/bsonobj.h | 19 | ||||
-rw-r--r-- | src/mongo/db/update/document_diff_calculator.cpp | 69 |
2 files changed, 64 insertions, 24 deletions
diff --git a/src/mongo/bson/bsonobj.h b/src/mongo/bson/bsonobj.h index 12374110747..ecdf8ef7b24 100644 --- a/src/mongo/bson/bsonobj.h +++ b/src/mongo/bson/bsonobj.h @@ -763,6 +763,25 @@ public: _theend = end - 1; } + /* + * Advance '_pos' by currentElement.size(). The element passed in must be equivalent to the + * current element '_pos' is at. + */ + void advance(const BSONElement& currentElement) { + dassert(BSONElement(_pos).size() == currentElement.size()); + _pos += currentElement.size(); + } + + /** + * Return true if the current element is equal to 'otherElement'. + * Do *not* use with moreWithEOO() as the function will return false if the current element and + * 'otherElement' are EOO. + */ + bool currentElementBinaryEqual(const BSONElement& otherElement) { + auto sz = otherElement.size(); + return sz <= (_theend - _pos) && memcmp(otherElement.rawdata(), _pos, sz) == 0; + } + /** @return true if more elements exist to be enumerated. */ bool more() const { return _pos < _theend; diff --git a/src/mongo/db/update/document_diff_calculator.cpp b/src/mongo/db/update/document_diff_calculator.cpp index dcc6309d7c0..60c6de8f4e8 100644 --- a/src/mongo/db/update/document_diff_calculator.cpp +++ b/src/mongo/db/update/document_diff_calculator.cpp @@ -48,27 +48,37 @@ std::unique_ptr<diff_tree::ArrayNode> computeArrayDiff(const BSONObj& pre, const auto postItr = BSONObjIterator(post); const size_t postObjSize = static_cast<size_t>(post.objsize()); size_t nFieldsInPostArray = 0; - for (; preItr.more() && postItr.more(); ++preItr, ++postItr, ++nFieldsInPostArray) { + while (preItr.more() && postItr.more()) { // Bailout if the generated diff so far is larger than the 'post' object. if (postObjSize < diffNode->getObjSize()) { return nullptr; } - if (!(*preItr).binaryEqual(*postItr)) { + + auto preVal = *preItr; + auto postVal = *postItr; + + if (!preVal.binaryEqual(postVal)) { // If both are arrays or objects, then recursively compute the diff of the respective // array or object. - if ((*preItr).type() == (*postItr).type() && - ((*preItr).type() == BSONType::Object || (*preItr).type() == BSONType::Array)) { - calculateSubDiffHelper(*preItr, *postItr, nFieldsInPostArray, diffNode.get()); + if (preVal.type() == postVal.type() && + (preVal.type() == BSONType::Object || preVal.type() == BSONType::Array)) { + calculateSubDiffHelper(preVal, postVal, nFieldsInPostArray, diffNode.get()); } else { - diffNode->addUpdate(nFieldsInPostArray, *postItr); + diffNode->addUpdate(nFieldsInPostArray, postVal); } } + + preItr.advance(preVal); + postItr.advance(postVal); + ++nFieldsInPostArray; } // When we reach here, only one of postItr or preItr can have more fields. If postItr has more // fields, we need to add all the remaining fields. - for (; postItr.more(); ++postItr, ++nFieldsInPostArray) { - diffNode->addUpdate(nFieldsInPostArray, *postItr); + for (; postItr.more(); ++nFieldsInPostArray) { + auto postVal = *postItr; + diffNode->addUpdate(nFieldsInPostArray, postVal); + postItr.advance(postVal); } // If preItr has more fields, we can ignore the remaining fields, since we only need to do a @@ -88,18 +98,25 @@ std::unique_ptr<diff_tree::DocumentSubDiffNode> computeDocDiff(const BSONObj& pr const size_t postObjSize = static_cast<size_t>(post.objsize()); std::set<StringData> deletes; while (preItr.more() && postItr.more()) { - auto preVal = *preItr; - auto postVal = *postItr; - // Bailout if the generated diff so far is larger than the 'post' object. if (postObjSize < diffNode->getObjSize()) { return nullptr; } + + auto preVal = *preItr; + // Fast path for case where they're equal. + if (postItr.currentElementBinaryEqual(preVal)) { + // Here the current element of preItr and postItr are equal. So it's safe to advance + // 'postItr' using 'preVal'. This way we can save the cost of constructing 'postVal'. + preItr.advance(preVal); + postItr.advance(preVal); + continue; + } + + auto postVal = *postItr; if (preVal.fieldNameStringData() == postVal.fieldNameStringData()) { - if (preVal.binaryEqual(postVal)) { - // They're identical. Move on. - } else if (preVal.type() == postVal.type() && - (preVal.type() == BSONType::Object || preVal.type() == BSONType::Array)) { + if (preVal.type() == postVal.type() && + (preVal.type() == BSONType::Object || preVal.type() == BSONType::Array)) { // Both are either arrays or objects, recursively compute the diff of the respective // array or object. calculateSubDiffHelper( @@ -108,30 +125,34 @@ std::unique_ptr<diff_tree::DocumentSubDiffNode> computeDocDiff(const BSONObj& pr // Any other case, just replace with the 'postVal'. diffNode->addUpdate((*preItr).fieldNameStringData(), postVal); } - preItr.next(); - postItr.next(); + preItr.advance(preVal); + postItr.advance(postVal); } else { // If the 'preVal' field name does not exist in the 'post' object then, just remove it. // If it present, we do nothing for now, since the field gets inserted later. deletes.insert(preVal.fieldNameStringData()); - preItr.next(); + preItr.advance(preVal); } } // When we reach here, only one of postItr or preItr can have more fields. Record remaining // fields in preItr as removals. - for (; preItr.more(); preItr.next()) { + while (preItr.more()) { // Note that we don't need to record these into the 'deletes' set because there are no more // fields in the post image. invariant(!postItr.more()); - diffNode->addDelete((*preItr).fieldNameStringData()); + auto next = (*preItr); + diffNode->addDelete(next.fieldNameStringData()); + preItr.advance(next); } // Record remaining fields in postItr as creates. - for (; postItr.more(); postItr.next()) { - auto fieldName = (*postItr).fieldNameStringData(); - diffNode->addInsert(fieldName, *postItr); - deletes.erase(fieldName); + while (postItr.more()) { + auto next = (*postItr); + + diffNode->addInsert(next.fieldNameStringData(), next); + deletes.erase(next.fieldNameStringData()); + postItr.advance(next); } for (auto&& deleteField : deletes) { diffNode->addDelete(deleteField); |