SERVER-37058: Update with numeric field names inside an array can cause validation to fail

author: Nick Zolnierz <nicholas.zolnierz@mongodb.com> 2018-09-21 14:35:16 -0400
committer: Nick Zolnierz <nicholas.zolnierz@mongodb.com> 2018-10-04 09:53:18 -0400
commit: 41820e4b371e389e8cbe965dc9aff14f657a9040 (patch)
tree: 511ef7e3e15a067f0f0a2dd93495b0f3b18e6050 /src/mongo/db/update_index_data.cpp
parent: 1e026f75dc41cca4c7293e42b5b49cb9e46d0ea3 (diff)
download: mongo-41820e4b371e389e8cbe965dc9aff14f657a9040.tar.gz
1 files changed, 33 insertions, 86 deletions
diff --git a/src/mongo/db/update_index_data.cpp b/src/mongo/db/update_index_data.cpp
index 2b1144d40a3..e3bd3c6d808 100644
--- a/src/mongo/db/update_index_data.cpp
+++ b/src/mongo/db/update_index_data.cpp
@@ -30,21 +30,13 @@
 
 #include "mongo/db/update_index_data.h"
 #include "mongo/bson/util/builder.h"
-#include "mongo/db/field_ref.h"
 
 namespace mongo {
 
-using std::string;
-
 UpdateIndexData::UpdateIndexData() : _allPathsIndexed(false) {}
 
-void UpdateIndexData::addPath(StringData path) {
-    string s;
-    if (getCanonicalIndexField(path, &s)) {
-        _canonicalPaths.insert(s);
-    } else {
-        _canonicalPaths.insert(path.toString());
-    }
+void UpdateIndexData::addPath(const FieldRef& path) {
+    _canonicalPaths.insert(getCanonicalIndexField(path));
 }
 
 void UpdateIndexData::addPathComponent(StringData pathComponent) {
@@ -61,33 +53,21 @@ void UpdateIndexData::clear() {
     _allPathsIndexed = false;
 }
 
-bool UpdateIndexData::mightBeIndexed(StringData path) const {
+bool UpdateIndexData::mightBeIndexed(const FieldRef& path) const {
     if (_allPathsIndexed) {
         return true;
     }
 
-    StringData use = path;
-    string x;
-    if (getCanonicalIndexField(path, &x))
-        use = StringData(x);
-
-    for (std::set<string>::const_iterator i = _canonicalPaths.begin(); i != _canonicalPaths.end();
-         ++i) {
-        StringData idx(*i);
-
-        if (_startsWith(use, idx))
-            return true;
+    FieldRef canonicalPath = getCanonicalIndexField(path);
 
-        if (_startsWith(idx, use))
+    for (const auto& idx : _canonicalPaths) {
+        if (_startsWith(canonicalPath, idx) || _startsWith(idx, canonicalPath))
             return true;
     }
 
-    FieldRef pathFieldRef(path);
-    for (std::set<string>::const_iterator i = _pathComponents.begin(); i != _pathComponents.end();
-         ++i) {
-        const string& pathComponent = *i;
-        for (size_t partIdx = 0; partIdx < pathFieldRef.numParts(); ++partIdx) {
-            if (pathComponent == pathFieldRef.getPart(partIdx)) {
+    for (const auto& pathComponent : _pathComponents) {
+        for (size_t partIdx = 0; partIdx < path.numParts(); ++partIdx) {
+            if (pathComponent == path.getPart(partIdx)) {
                 return true;
             }
         }
@@ -96,74 +76,41 @@ bool UpdateIndexData::mightBeIndexed(StringData path) const {
     return false;
 }
 
-bool UpdateIndexData::_startsWith(StringData a, StringData b) const {
-    if (!a.startsWith(b))
-        return false;
-
-    // make sure there is a dot or EOL right after
-
-    if (a.size() == b.size())
-        return true;
-
-    return a[b.size()] == '.';
+bool UpdateIndexData::_startsWith(const FieldRef& a, const FieldRef& b) const {
+    return (a == b) || (b.isPrefixOf(a));
 }
 
-bool getCanonicalIndexField(StringData fullName, string* out) {
-    // check if fieldName contains ".$" or ".###" substrings (#=digit) and skip them
-    // however do not skip the first field even if it meets these criteria
-
-    if (fullName.find('.') == string::npos)
-        return false;
+FieldRef UpdateIndexData::getCanonicalIndexField(const FieldRef& path) {
+    if (path.numParts() <= 1)
+        return path;
 
-    bool modified = false;
+    // The first part of the path must always be a valid field name, since it's not possible to
+    // store a top-level array or '$' field name in a document.
+    FieldRef buf(path.getPart(0));
+    for (size_t i = 1; i < path.numParts(); ++i) {
+        auto pathComponent = path.getPart(i);
 
-    StringBuilder buf;
-    for (size_t i = 0; i < fullName.size(); i++) {
-        char c = fullName[i];
-
-        if (c != '.') {
-            buf << c;
+        if (pathComponent == "$"_sd) {
             continue;
         }
 
-        if (i + 1 == fullName.size()) {
-            // ends with '.'
-            buf << c;
-            continue;
-        }
-
-        // check for ".$", skip if present
-        if (fullName[i + 1] == '$') {
-            // only do this if its not something like $a
-            if (i + 2 >= fullName.size() || fullName[i + 2] == '.') {
-                i++;
-                modified = true;
-                continue;
-            }
-        }
-
-        // check for ".###" for any number of digits (no letters)
-        if (isdigit(fullName[i + 1])) {
-            size_t j = i;
-            // skip digits
-            while (j + 1 < fullName.size() && isdigit(fullName[j + 1]))
-                j++;
-
-            if (j + 1 == fullName.size() || fullName[j + 1] == '.') {
-                // only digits found, skip forward
-                i = j;
-                modified = true;
-                continue;
+        if (FieldRef::isNumericPathComponentLenient(pathComponent)) {
+            // Peek ahead to see if the next component is also all digits. This implies that the
+            // update is attempting to create a numeric field name which would violate the
+            // "ambiguous field name in array" constraint for multi-key indexes. Break early in this
+            // case and conservatively return that this path affects the prefix of the consecutive
+            // numerical path components. For instance, an input such as 'a.0.1.b.c' would return
+            // the canonical index path of 'a'.
+            if ((i + 1) < path.numParts() &&
+                FieldRef::isNumericPathComponentLenient(path.getPart(i + 1))) {
+                break;
             }
+            continue;
         }
 
-        buf << c;
+        buf.appendPart(pathComponent);
     }
 
-    if (!modified)
-        return false;
-
-    *out = buf.str();
-    return true;
+    return buf;
 }
 }
author	Nick Zolnierz <nicholas.zolnierz@mongodb.com>	2018-09-21 14:35:16 -0400
committer	Nick Zolnierz <nicholas.zolnierz@mongodb.com>	2018-10-04 09:53:18 -0400
commit	41820e4b371e389e8cbe965dc9aff14f657a9040 (patch)
tree	511ef7e3e15a067f0f0a2dd93495b0f3b18e6050 /src/mongo/db/update_index_data.cpp
parent	1e026f75dc41cca4c7293e42b5b49cb9e46d0ea3 (diff)
download	mongo-41820e4b371e389e8cbe965dc9aff14f657a9040.tar.gz