diff options
author | Justin Seyster <justin.seyster@mongodb.com> | 2017-07-06 12:26:13 -0400 |
---|---|---|
committer | Justin Seyster <justin.seyster@mongodb.com> | 2017-07-06 12:26:13 -0400 |
commit | 3ea2d70f0260b1ec6ec8c60d42d6a6669a802ef2 (patch) | |
tree | 74726f49c4e14ce5cce1b69c56b2157c3f033bc9 /src | |
parent | 72e31a462ab80abfdfc36fb76443ec48fc3f65c9 (diff) | |
download | mongo-3ea2d70f0260b1ec6ec8c60d42d6a6669a802ef2.tar.gz |
SERVER-29762 Look up arrays by index during update.
This patch moves getPositionalPathSpecification() from its location in
document_path_support.h to stringutils.h, where it is renamed to
parseUnsignedBase10Integer(). This function should now be what all
code uses that needs to use a path component to look up an element in
an array.
There are two problems with trying to look up an array element by
string, which this patch addresses (for one particular case). The
first is that it will fail if the path component has leading zeros
(e.g. "a.01.b").
The second problem is that mutable BSON does not maintain the
invariant that array elements have their index as their field name
throughout the entire lifetime of the mutable document. Array lookups
by string fail during the times that this invariant does not hold
(notably, after a mutable array has been extended in length).
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/db/pipeline/document_path_support.cpp | 23 | ||||
-rw-r--r-- | src/mongo/db/update/path_support.cpp | 27 | ||||
-rw-r--r-- | src/mongo/db/update/path_support.h | 6 | ||||
-rw-r--r-- | src/mongo/db/update/pop_node.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/update/update_object_node.cpp | 33 | ||||
-rw-r--r-- | src/mongo/db/update/update_object_node_test.cpp | 149 | ||||
-rw-r--r-- | src/mongo/util/stringutils.cpp | 16 | ||||
-rw-r--r-- | src/mongo/util/stringutils.h | 8 | ||||
-rw-r--r-- | src/mongo/util/stringutils_test.cpp | 82 |
9 files changed, 290 insertions, 58 deletions
diff --git a/src/mongo/db/pipeline/document_path_support.cpp b/src/mongo/db/pipeline/document_path_support.cpp index 914763e696b..38dbcce6ac2 100644 --- a/src/mongo/db/pipeline/document_path_support.cpp +++ b/src/mongo/db/pipeline/document_path_support.cpp @@ -28,8 +28,6 @@ #include "mongo/platform/basic.h" -#include <boost/optional.hpp> -#include <cctype> #include <vector> #include "mongo/db/pipeline/document_path_support.h" @@ -39,6 +37,7 @@ #include "mongo/db/pipeline/document.h" #include "mongo/db/pipeline/field_path.h" #include "mongo/db/pipeline/value.h" +#include "mongo/util/stringutils.h" namespace mongo { namespace document_path_support { @@ -46,24 +45,6 @@ namespace document_path_support { namespace { /** - * Returns the array index that should be used if 'fieldName' represents a positional path - * specification (as '0' does in 'a.0'), or boost::none if 'fieldName' does not represent a - * positional path specification. - */ -boost::optional<size_t> getPositionalPathSpecification(StringData fieldName) { - // Do not accept positions like '-4' or '+4' - if (!std::isdigit(fieldName[0])) { - return boost::none; - } - unsigned int index; - auto status = parseNumberFromStringWithBase<unsigned int>(fieldName, 10, &index); - if (status.isOK()) { - return static_cast<size_t>(index); - } - return boost::none; -} - -/** * If 'value' is an array, invokes 'callback' once on each element of 'value'. Otherwise, if 'value' * is not missing, invokes 'callback' on 'value' itself. */ @@ -99,7 +80,7 @@ void visitAllValuesAtPathHelper(Document doc, // positional specifications, if applicable. For example, it will consume "0" and "1" from the // path "a.0.1.b" if the value at "a" is an array with arrays inside it. while (fieldPathIndex < path.getPathLength() && nextValue.isArray()) { - if (auto index = getPositionalPathSpecification(path.getFieldName(fieldPathIndex))) { + if (auto index = parseUnsignedBase10Integer(path.getFieldName(fieldPathIndex))) { nextValue = nextValue[*index]; ++fieldPathIndex; } else { diff --git a/src/mongo/db/update/path_support.cpp b/src/mongo/db/update/path_support.cpp index 73a7b5528db..4f33006444b 100644 --- a/src/mongo/db/update/path_support.cpp +++ b/src/mongo/db/update/path_support.cpp @@ -34,6 +34,7 @@ #include "mongo/bson/mutable/element.h" #include "mongo/util/assert_util.h" #include "mongo/util/mongoutils/str.h" +#include "mongo/util/stringutils.h" namespace mongo { namespace pathsupport { @@ -69,19 +70,6 @@ Status maybePadTo(mutablebson::Element* elemArray, size_t sizeRequired) { } // unnamed namespace -bool isNumericPathComponent(StringData str, size_t* num) { - size_t res = 0; - for (size_t i = 0; i < str.size(); ++i) { - if (str[i] < '0' || str[i] > '9') { - return false; - } else { - res = res * 10 + (str[i] - '0'); - } - } - *num = res; - return true; -} - Status findLongestPrefix(const FieldRef& prefix, mutablebson::Element root, size_t* idxFound, @@ -97,7 +85,7 @@ Status findLongestPrefix(const FieldRef& prefix, mutablebson::Element curr = root; mutablebson::Element prev = root; size_t i = 0; - size_t numericPart = 0; + boost::optional<size_t> numericPart; bool viable = true; for (; i < prefixSize; i++) { // If prefix wants to reach 'curr' by applying a non-numeric index to an array @@ -111,10 +99,11 @@ Status findLongestPrefix(const FieldRef& prefix, break; case Array: - if (!isNumericPathComponent(prefixPart, &numericPart)) { + numericPart = parseUnsignedBase10Integer(prefixPart); + if (!numericPart) { viable = false; } else { - curr = prev[numericPart]; + curr = prev[*numericPart]; } break; @@ -184,8 +173,8 @@ StatusWith<mutablebson::Element> createPathAt(const FieldRef& prefix, size_t i = idxFound; bool inArray = false; if (elemFound.getType() == mongo::Array) { - size_t newIdx = 0; - if (!isNumericPathComponent(prefix.getPart(idxFound), &newIdx)) { + boost::optional<size_t> newIdx = parseUnsignedBase10Integer(prefix.getPart(idxFound)); + if (!newIdx) { return Status(ErrorCodes::PathNotViable, str::stream() << "Cannot create field '" << prefix.getPart(idxFound) << "' in element {" @@ -193,7 +182,7 @@ StatusWith<mutablebson::Element> createPathAt(const FieldRef& prefix, << "}"); } - status = maybePadTo(&elemFound, newIdx); + status = maybePadTo(&elemFound, *newIdx); if (!status.isOK()) { return status; } diff --git a/src/mongo/db/update/path_support.h b/src/mongo/db/update/path_support.h index a27be4b93f6..4f6511be72b 100644 --- a/src/mongo/db/update/path_support.h +++ b/src/mongo/db/update/path_support.h @@ -183,12 +183,6 @@ BSONElement findParentEqualityElement(const EqualityMatches& equalities, */ Status addEqualitiesToDoc(const EqualityMatches& equalities, mutablebson::Document* doc); -/** - * Returns true if the path component represents an array index, and returns that array index in - * 'num'. Otherwise, returns false. - */ -bool isNumericPathComponent(StringData pathComponent, size_t* num); - } // namespace pathsupport } // namespace mongo diff --git a/src/mongo/db/update/pop_node.cpp b/src/mongo/db/update/pop_node.cpp index 5ec86bdc14d..a5a94ba3005 100644 --- a/src/mongo/db/update/pop_node.cpp +++ b/src/mongo/db/update/pop_node.cpp @@ -32,6 +32,7 @@ #include "mongo/db/matcher/expression_parser.h" #include "mongo/db/update/path_support.h" +#include "mongo/util/stringutils.h" namespace mongo { @@ -82,9 +83,8 @@ void PopNode::apply(mutablebson::Element element, return; } - size_t arrayIndex; if (element.getType() == BSONType::Array && - pathsupport::isNumericPathComponent(pathToCreate->getPart(0), &arrayIndex)) { + parseUnsignedBase10Integer(pathToCreate->getPart(0))) { *noop = true; return; } diff --git a/src/mongo/db/update/update_object_node.cpp b/src/mongo/db/update/update_object_node.cpp index f73ac0a5fdb..fa274085ea8 100644 --- a/src/mongo/db/update/update_object_node.cpp +++ b/src/mongo/db/update/update_object_node.cpp @@ -35,6 +35,7 @@ #include "mongo/db/update/update_array_node.h" #include "mongo/db/update/update_leaf_node.h" #include "mongo/stdx/memory.h" +#include "mongo/util/stringutils.h" namespace mongo { @@ -98,14 +99,38 @@ void applyChild(const UpdateNode& child, bool* indexesAffected, bool* noop) { - auto childElement = *element; auto pathTakenSizeBefore = pathTaken->numParts(); - // If 'field' exists in 'element', append 'field' to the end of 'pathTaken' and advance - // 'childElement'. Otherwise, append 'field' to the end of 'pathToCreate'. - if (pathToCreate->empty() && (childElement = (*element)[field]).ok()) { + // A non-ok value for childElement will indicate that we need to append 'field' to the + // 'pathToCreate' FieldRef. + auto childElement = element->getDocument().end(); + invariant(!childElement.ok()); + if (!pathToCreate->empty()) { + // We're already traversing a path with elements that don't exist yet, so we will definitely + // need to append. + } else if (element->getType() == BSONType::Object) { + childElement = element->findFirstChildNamed(field); + } else if (element->getType() == BSONType::Array) { + boost::optional<size_t> indexFromField = parseUnsignedBase10Integer(field); + if (indexFromField) { + childElement = element->findNthChild(*indexFromField); + } else { + // We're trying to traverse an array element, but the path specifies a name instead of + // an index. We append the name to 'pathToCreate' for now, even though we know we won't + // be able to create it. If the update eventually needs to create the path, + // pathsupport::createPathAt() will provide a sensible PathNotViable UserError. + } + } + + if (childElement.ok()) { + // The path we've traversed so far already exists in our document, and 'childElement' + // represents the Element indicated by the 'field' name or index, which we indicate by + // updating the 'pathTaken' FieldRef. pathTaken->appendPart(field); } else { + // We are traversing path components that do not exist in our document. Any update modifier + // that creates new path components (i.e., any PathCreatingNode update nodes) will need to + // create this component, so we append it to the 'pathToCreate' FieldRef. childElement = *element; pathToCreate->appendPart(field); } diff --git a/src/mongo/db/update/update_object_node_test.cpp b/src/mongo/db/update/update_object_node_test.cpp index 61607e2292a..e6bfdeb65ed 100644 --- a/src/mongo/db/update/update_object_node_test.cpp +++ b/src/mongo/db/update/update_object_node_test.cpp @@ -2810,6 +2810,155 @@ TEST(UpdateObjectNodeTest, ApplyDoNotUseStoredMergedPositional) { logDoc2.getObject()); } +/** + * The leading zero case is interesting, because if we try to look up an array element by the index + * string, a leading zero will cause the lookup to fail. That is, even if 'element' is an array, + * element["02"] will not find the element with subscript 2. + */ +TEST(UpdateObjectNodeTest, ApplyToArrayByIndexWithLeadingZero) { + auto setUpdate = fromjson("{$set: {'a.02': 2}}"); + const CollatorInterface* collator = nullptr; + std::map<StringData, std::unique_ptr<ArrayFilter>> arrayFilters; + std::set<std::string> foundIdentifiers; + UpdateObjectNode root; + ASSERT_OK(UpdateObjectNode::parseAndMerge(&root, + modifiertable::ModifierType::MOD_SET, + setUpdate["$set"]["a.02"], + collator, + arrayFilters, + foundIdentifiers)); + + Document doc(fromjson("{a: [0, 0, 0, 0, 0]}")); + FieldRef pathToCreate(""); + FieldRef pathTaken(""); + StringData matchedField; + auto fromReplication = false; + auto validateForStorage = true; + FieldRefSet immutablePaths; + UpdateIndexData indexData; + indexData.addPath("a"); + Document logDoc; + LogBuilder logBuilder(logDoc.root()); + auto indexesAffected = false; + auto noop = false; + root.apply(doc.root(), + &pathToCreate, + &pathTaken, + matchedField, + fromReplication, + validateForStorage, + immutablePaths, + &indexData, + &logBuilder, + &indexesAffected, + &noop); + ASSERT_TRUE(indexesAffected); + ASSERT_FALSE(noop); + ASSERT_BSONOBJ_EQ(fromjson("{a: [0, 0, 2, 0, 0]}"), doc.getObject()); + ASSERT_TRUE(doc.isInPlaceModeEnabled()); + ASSERT_BSONOBJ_EQ(fromjson("{$set: {'a.02': 2}}"), logDoc.getObject()); +} + +/** + * This test mimics a failure we saw in SERVER-29762. The failure occurred when the 'a.10' update + (which was applied first) padded the empty array to have 10 elements, but the new padding + elements did not have field names to match their array indexes. As a result, the 'a.2' update + failed. + */ +TEST(UpdateObjectNodeTest, ApplyMultipleArrayUpdates) { + auto setUpdate = fromjson("{$set: {'a.2': 2, 'a.10': 10}}"); + const CollatorInterface* collator = nullptr; + std::map<StringData, std::unique_ptr<ArrayFilter>> arrayFilters; + std::set<std::string> foundIdentifiers; + UpdateObjectNode root; + ASSERT_OK(UpdateObjectNode::parseAndMerge(&root, + modifiertable::ModifierType::MOD_SET, + setUpdate["$set"]["a.2"], + collator, + arrayFilters, + foundIdentifiers)); + ASSERT_OK(UpdateObjectNode::parseAndMerge(&root, + modifiertable::ModifierType::MOD_SET, + setUpdate["$set"]["a.10"], + collator, + arrayFilters, + foundIdentifiers)); + + Document doc(fromjson("{a: []}")); + FieldRef pathToCreate(""); + FieldRef pathTaken(""); + StringData matchedField; + auto fromReplication = false; + auto validateForStorage = true; + FieldRefSet immutablePaths; + UpdateIndexData indexData; + indexData.addPath("a"); + Document logDoc; + LogBuilder logBuilder(logDoc.root()); + auto indexesAffected = false; + auto noop = false; + root.apply(doc.root(), + &pathToCreate, + &pathTaken, + matchedField, + fromReplication, + validateForStorage, + immutablePaths, + &indexData, + &logBuilder, + &indexesAffected, + &noop); + ASSERT_TRUE(indexesAffected); + ASSERT_FALSE(noop); + ASSERT_BSONOBJ_EQ( + fromjson("{a: [null, null, 2, null, null, null, null, null, null, null, 10]}"), + doc.getObject()); + ASSERT_FALSE(doc.isInPlaceModeEnabled()); + ASSERT_BSONOBJ_EQ(fromjson("{$set: {'a.10': 10, 'a.2': 2}}"), logDoc.getObject()); +} + +TEST(UpdateObjectNodeTest, ApplyUpdateToNonViablePathInArray) { + auto setUpdate = fromjson("{$set: {'a.b': 3}}"); + const CollatorInterface* collator = nullptr; + std::map<StringData, std::unique_ptr<ArrayFilter>> arrayFilters; + std::set<std::string> foundIdentifiers; + UpdateObjectNode root; + ASSERT_OK(UpdateObjectNode::parseAndMerge(&root, + modifiertable::ModifierType::MOD_SET, + setUpdate["$set"]["a.b"], + collator, + arrayFilters, + foundIdentifiers)); + + Document doc(fromjson("{a: [{b: 1}, {b: 2}]}")); + FieldRef pathToCreate(""); + FieldRef pathTaken(""); + StringData matchedField; + auto fromReplication = false; + auto validateForStorage = true; + FieldRefSet immutablePaths; + UpdateIndexData indexData; + indexData.addPath("a"); + Document logDoc; + LogBuilder logBuilder(logDoc.root()); + auto indexesAffected = false; + auto noop = false; + ASSERT_THROWS_CODE_AND_WHAT(root.apply(doc.root(), + &pathToCreate, + &pathTaken, + matchedField, + fromReplication, + validateForStorage, + immutablePaths, + &indexData, + &logBuilder, + &indexesAffected, + &noop), + UserException, + ErrorCodes::PathNotViable, + "Cannot create field 'b' in element {a: [ { b: 1 }, { b: 2 } ]}"); +} + TEST(UpdateObjectNodeTest, SetAndPopModifiersWithCommonPrefixApplySuccessfully) { auto update = fromjson("{$set: {'a.b': 5}, $pop: {'a.c': -1}}"); const CollatorInterface* collator = nullptr; diff --git a/src/mongo/util/stringutils.cpp b/src/mongo/util/stringutils.cpp index 3f695fc9e4b..f9bf74eb778 100644 --- a/src/mongo/util/stringutils.cpp +++ b/src/mongo/util/stringutils.cpp @@ -29,8 +29,11 @@ #include "mongo/platform/basic.h" +#include <cctype> + #include "mongo/util/stringutils.h" +#include "mongo/base/parse_number.h" #include "mongo/util/hex.h" namespace mongo { @@ -224,4 +227,17 @@ std::string escape(StringData sd, bool escape_slash) { return ret.str(); } +boost::optional<size_t> parseUnsignedBase10Integer(StringData fieldName) { + // Do not accept positions like '-4' or '+4' + if (!std::isdigit(fieldName[0])) { + return boost::none; + } + unsigned int index; + auto status = parseNumberFromStringWithBase<unsigned int>(fieldName, 10, &index); + if (status.isOK()) { + return static_cast<size_t>(index); + } + return boost::none; +} + } // namespace mongo diff --git a/src/mongo/util/stringutils.h b/src/mongo/util/stringutils.h index 5f4be4f9163..13e982e22e5 100644 --- a/src/mongo/util/stringutils.h +++ b/src/mongo/util/stringutils.h @@ -31,6 +31,7 @@ #include <ctype.h> +#include <boost/optional.hpp> #include <memory> #include <string> #include <vector> @@ -105,4 +106,11 @@ int versionCmp(const StringData rhs, const StringData lhs); */ std::string escape(StringData s, bool escape_slash = false); +/** + * Converts 'integer' from a base-10 string to a size_t value or returns boost::none if 'integer' + * is not a valid base-10 string. A valid string is not allowed to have anything but decimal + * numerals, not even a +/- prefix or leading/trailing whitespace. + */ +boost::optional<size_t> parseUnsignedBase10Integer(StringData integer); + } // namespace mongo diff --git a/src/mongo/util/stringutils_test.cpp b/src/mongo/util/stringutils_test.cpp index 302fc5c3de3..80bbfc5db10 100644 --- a/src/mongo/util/stringutils_test.cpp +++ b/src/mongo/util/stringutils_test.cpp @@ -36,7 +36,7 @@ namespace mongo { using std::string; -TEST(Comparison, Basic) { +TEST(StringUtilsTest, Basic) { // // Basic version comparison tests with different version string types // @@ -58,7 +58,7 @@ TEST(Comparison, Basic) { ASSERT(versionCmp("1.2.3-pre", "1.2.3") < 0); } -TEST(LexNumCmp, Simple1) { +TEST(StringUtilsTest, Simple1) { ASSERT_EQUALS(0, LexNumCmp::cmp("a.b.c", "a.b.c", false)); } @@ -69,7 +69,7 @@ void assertCmp(int expected, StringData s1, StringData s2, bool lexOnly = false) ASSERT_EQUALS(expected < 0, cmp(s1, s2)); } -TEST(LexNumCmp, Simple2) { +TEST(StringUtilsTest, Simple2) { ASSERT(!isdigit((char)255)); assertCmp(0, "a", "a"); @@ -163,14 +163,14 @@ TEST(LexNumCmp, Simple2) { assertCmp(0, "ac.t", "ac.t"); } -TEST(LexNumCmp, LexOnly) { +TEST(StringUtilsTest, LexOnly) { assertCmp(-1, "0", "00", true); assertCmp(1, "1", "01", true); assertCmp(-1, "1", "11", true); assertCmp(1, "2", "11", true); } -TEST(LexNumCmp, Substring1) { +TEST(StringUtilsTest, Substring1) { assertCmp(0, "1234", "1234", false); assertCmp(0, StringData("1234"), StringData("1234"), false); assertCmp(0, StringData("1234", 4), StringData("1234", 4), false); @@ -180,7 +180,7 @@ TEST(LexNumCmp, Substring1) { assertCmp(0, StringData("0001", 3), StringData("0000", 3), false); } -TEST(IntegerToHex, VariousConversions) { +TEST(StringUtilsTest, VariousConversions) { ASSERT_EQUALS(std::string("0"), integerToHex(0)); ASSERT_EQUALS(std::string("1"), integerToHex(1)); ASSERT_EQUALS(std::string("1337"), integerToHex(0x1337)); @@ -193,4 +193,74 @@ TEST(IntegerToHex, VariousConversions) { ASSERT_EQUALS(std::string("8000000000000000"), integerToHex(std::numeric_limits<long long>::min())); } + +TEST(StringUtilsTest, CanParseZero) { + boost::optional<size_t> result = parseUnsignedBase10Integer("0"); + ASSERT(result && *result == 0); +} + +TEST(StringUtilsTest, CanParseDoubleZero) { + boost::optional<size_t> result = parseUnsignedBase10Integer("00"); + ASSERT(result && *result == 0); +} + +TEST(StringUtilsTest, PositivePrefixFailsToParse) { + boost::optional<size_t> result = parseUnsignedBase10Integer("+0"); + ASSERT(!result); +} + +TEST(StringUtilsTest, NegativePrefixFailsToParse) { + boost::optional<size_t> result = parseUnsignedBase10Integer("-0"); + ASSERT(!result); +} + +TEST(StringUtilsTest, CanParseIntValue) { + boost::optional<size_t> result = parseUnsignedBase10Integer("10"); + ASSERT(result && *result == 10); +} + +TEST(StringUtilsTest, CanParseIntValueWithLeadingZeros) { + boost::optional<size_t> result = parseUnsignedBase10Integer("0010"); + ASSERT(result && *result == 10); +} + +TEST(StringUtilsTest, TrailingLetterFailsToParse) { + boost::optional<size_t> result = parseUnsignedBase10Integer("5a"); + ASSERT(!result); +} + +TEST(StringUtilsTest, LeadingLetterFailsToParse) { + boost::optional<size_t> result = parseUnsignedBase10Integer("a5"); + ASSERT(!result); +} + +TEST(StringUtilsTest, LetterWithinNumberFailsToParse) { + boost::optional<size_t> result = parseUnsignedBase10Integer("5a5"); + ASSERT(!result); +} + +TEST(StringUtilsTest, HexStringFailsToParse) { + boost::optional<size_t> result = parseUnsignedBase10Integer("0xfeed"); + ASSERT(!result); +} + +TEST(StringUtilsTest, BinaryStringFailsToParse) { + boost::optional<size_t> result = parseUnsignedBase10Integer("0b11010010"); + ASSERT(!result); +} + +TEST(StringUtilsTest, LeadingWhitespaceFailsToParse) { + boost::optional<size_t> result = parseUnsignedBase10Integer(" 10"); + ASSERT(!result); +} + +TEST(StringUtilsTest, TrailingWhitespaceFailsToParse) { + boost::optional<size_t> result = parseUnsignedBase10Integer("10 "); + ASSERT(!result); +} + +TEST(StringUtilsTest, WhitespaceWithinNumberFailsToParse) { + boost::optional<size_t> result = parseUnsignedBase10Integer(" 10"); + ASSERT(!result); } +} // namespace mongo |