path: unwind regular expressions in Windows

This is the second part to removing REDOS vulnerabilities from v4.x The function `splitTailRe` exposed a REDOS vulnerability. It was only utilized in the Windows implementation of a number of the path utilities. In v6.x a change landed that unwound this regular expression, and in turn patched the vulnerability. This commit copies the unwound implementation currently found on v8.x. It is completely self contained. I attempted to keep all warnings and deprecations the same as the v4.x implementation, but may have missed something buried in the large unwound functions. Refs: https://github.com/nodejs/node/commit/b212be08f6
author: Myles Borins <mylesborins@google.com> 2018-02-22 14:12:06 -0500
committer: Myles Borins <mylesborins@google.com> 2018-02-22 17:47:13 -0500
commit: bf00665af68c8aaf43112a244535bd1094d68f16 (patch)
tree: d7eaa55587001a306a55a7adfdc217f19a3b9512
parent: 4196fcf23ea81e7c3a4301604dd730427a0623af (diff)
download: node-new-bf00665af68c8aaf43112a244535bd1094d68f16.tar.gz
1 files changed, 396 insertions, 44 deletions
diff --git a/lib/path.js b/lib/path.js
index a100bde79a..c3b5271bfa 100644
--- a/lib/path.js
+++ b/lib/path.js
@@ -65,26 +65,8 @@ function trimArray(arr) {
 const splitDeviceRe =
     /^([a-zA-Z]:|[\\\/]{2}[^\\\/]+[\\\/]+[^\\\/]+)?([\\\/])?([\s\S]*?)$/;
 
-// Regex to split the tail part of the above into [*, dir, basename, ext]
-const splitTailRe =
-    /^([\s\S]*?)((?:\.{1,2}|[^\\\/]+?|)(\.[^.\/\\]*|))(?:[\\\/]*)$/;
-
 var win32 = {};
 
-// Function to split a filename into [root, dir, basename, ext]
-function win32SplitPath(filename) {
-  // Separate device+slash from tail
-  const result = splitDeviceRe.exec(filename);
-  const device = (result[1] || '') + (result[2] || '');
-  const tail = result[3];
-  // Split the tail into dir, basename and extension
-  const result2 = splitTailRe.exec(tail);
-  const dir = result2[1];
-  const basename = result2[2];
-  const ext = result2[3];
-  return [device, dir, basename, ext];
-}
-
 function win32StatPath(path) {
   const result = splitDeviceRe.exec(path);
   const device = result[1] || '';
@@ -318,21 +300,104 @@ win32._makeLong = function(path) {
 
 
 win32.dirname = function(path) {
-  const result = win32SplitPath(path);
-  const root = result[0];
-  var dir = result[1];
-
-  if (!root && !dir) {
-    // No dirname whatsoever
+  const len = path.length;
+  if (len === 0)
     return '.';
+  var rootEnd = -1;
+  var end = -1;
+  var matchedSlash = true;
+  var offset = 0;
+  var code = path.charCodeAt(0);
+
+  // Try to match a root
+  if (len > 1) {
+    if (code === 47/*/*/ || code === 92/*\*/) {
+      // Possible UNC root
+
+      rootEnd = offset = 1;
+
+      code = path.charCodeAt(1);
+      if (code === 47/*/*/ || code === 92/*\*/) {
+        // Matched double path separator at beginning
+        var j = 2;
+        var last = j;
+        // Match 1 or more non-path separators
+        for (; j < len; ++j) {
+          code = path.charCodeAt(j);
+          if (code === 47/*/*/ || code === 92/*\*/)
+            break;
+        }
+        if (j < len && j !== last) {
+          // Matched!
+          last = j;
+          // Match 1 or more path separators
+          for (; j < len; ++j) {
+            code = path.charCodeAt(j);
+            if (code !== 47/*/*/ && code !== 92/*\*/)
+              break;
+          }
+          if (j < len && j !== last) {
+            // Matched!
+            last = j;
+            // Match 1 or more non-path separators
+            for (; j < len; ++j) {
+              code = path.charCodeAt(j);
+              if (code === 47/*/*/ || code === 92/*\*/)
+                break;
+            }
+            if (j === len) {
+              // We matched a UNC root only
+              return path;
+            }
+            if (j !== last) {
+              // We matched a UNC root with leftovers
+
+              // Offset by 1 to include the separator after the UNC root to
+              // treat it as a "normal root" on top of a (UNC) root
+              rootEnd = offset = j + 1;
+            }
+          }
+        }
+      }
+    } else if ((code >= 65/*A*/ && code <= 90/*Z*/) ||
+               (code >= 97/*a*/ && code <= 122/*z*/)) {
+      // Possible device root
+
+      if (path.charCodeAt(1) === 58/*:*/) {
+        rootEnd = offset = 2;
+        if (len > 2) {
+          code = path.charCodeAt(2);
+          if (code === 47/*/*/ || code === 92/*\*/)
+            rootEnd = offset = 3;
+        }
+      }
+    }
+  } else if (code === 47/*/*/ || code === 92/*\*/) {
+    // `path` contains just a path separator, exit early to avoid
+    // unnecessary work
+    return path;
   }
 
-  if (dir) {
-    // It has a dirname, strip trailing slash
-    dir = dir.substr(0, dir.length - 1);
+  for (var i = len - 1; i >= offset; --i) {
+    code = path.charCodeAt(i);
+    if (code === 47/*/*/ || code === 92/*\*/) {
+      if (!matchedSlash) {
+        end = i;
+        break;
+      }
+    } else {
+      // We saw the first non-path separator
+      matchedSlash = false;
+    }
   }
 
-  return root + dir;
+  if (end === -1) {
+    if (rootEnd === -1)
+      return '.';
+    else
+      end = rootEnd;
+  }
+  return path.slice(0, end);
 };
 
 
@@ -340,17 +405,155 @@ win32.basename = function(path, ext) {
   if (ext !== undefined && typeof ext !== 'string')
     throw new TypeError('ext must be a string');
 
-  var f = win32SplitPath(path)[2];
-  // TODO: make this comparison case-insensitive on windows?
-  if (ext && f.substr(-1 * ext.length) === ext) {
-    f = f.substr(0, f.length - ext.length);
+  var start = 0;
+  var end = -1;
+  var matchedSlash = true;
+  var i;
+
+  // Check for a drive letter prefix so as not to mistake the following
+  // path separator as an extra separator at the end of the path that can be
+  // disregarded
+  if (path.length >= 2) {
+    const drive = path.charCodeAt(0);
+    if ((drive >= 65/*A*/ && drive <= 90/*Z*/) ||
+        (drive >= 97/*a*/ && drive <= 122/*z*/)) {
+      if (path.charCodeAt(1) === 58/*:*/)
+        start = 2;
+    }
+  }
+
+  if (ext !== undefined && ext.length > 0 && ext.length <= path.length) {
+    if (ext.length === path.length && ext === path)
+      return '';
+    var extIdx = ext.length - 1;
+    var firstNonSlashEnd = -1;
+    for (i = path.length - 1; i >= start; --i) {
+      const code = path.charCodeAt(i);
+      if (code === 47/*/*/ || code === 92/*\*/) {
+        // If we reached a path separator that was not part of a set of path
+        // separators at the end of the string, stop now
+        if (!matchedSlash) {
+          start = i + 1;
+          break;
+        }
+      } else {
+        if (firstNonSlashEnd === -1) {
+          // We saw the first non-path separator, remember this index in case
+          // we need it if the extension ends up not matching
+          matchedSlash = false;
+          firstNonSlashEnd = i + 1;
+        }
+        if (extIdx >= 0) {
+          // Try to match the explicit extension
+          if (code === ext.charCodeAt(extIdx)) {
+            if (--extIdx === -1) {
+              // We matched the extension, so mark this as the end of our path
+              // component
+              end = i;
+            }
+          } else {
+            // Extension does not match, so our result is the entire path
+            // component
+            extIdx = -1;
+            end = firstNonSlashEnd;
+          }
+        }
+      }
+    }
+
+    if (start === end)
+      end = firstNonSlashEnd;
+    else if (end === -1)
+      end = path.length;
+    return path.slice(start, end);
+  } else {
+    for (i = path.length - 1; i >= start; --i) {
+      const code = path.charCodeAt(i);
+      if (code === 47/*/*/ || code === 92/*\*/) {
+        // If we reached a path separator that was not part of a set of path
+        // separators at the end of the string, stop now
+        if (!matchedSlash) {
+          start = i + 1;
+          break;
+        }
+      } else if (end === -1) {
+        // We saw the first non-path separator, mark this as the end of our
+        // path component
+        matchedSlash = false;
+        end = i + 1;
+      }
+    }
+
+    if (end === -1)
+      return '';
+    return path.slice(start, end);
   }
-  return f;
 };
 
 
 win32.extname = function(path) {
-  return win32SplitPath(path)[3];
+  var start = 0;
+  var startDot = -1;
+  var startPart = 0;
+  var end = -1;
+  var matchedSlash = true;
+  // Track the state of characters (if any) we see before our first dot and
+  // after any path separator we find
+  var preDotState = 0;
+
+  // Check for a drive letter prefix so as not to mistake the following
+  // path separator as an extra separator at the end of the path that can be
+  // disregarded
+  if (path.length >= 2) {
+    const code = path.charCodeAt(0);
+    if (path.charCodeAt(1) === 58/*:*/ &&
+        ((code >= 65/*A*/ && code <= 90/*Z*/) ||
+         (code >= 97/*a*/ && code <= 122/*z*/))) {
+      start = startPart = 2;
+    }
+  }
+
+  for (var i = path.length - 1; i >= start; --i) {
+    const code = path.charCodeAt(i);
+    if (code === 47/*/*/ || code === 92/*\*/) {
+      // If we reached a path separator that was not part of a set of path
+      // separators at the end of the string, stop now
+      if (!matchedSlash) {
+        startPart = i + 1;
+        break;
+      }
+      continue;
+    }
+    if (end === -1) {
+      // We saw the first non-path separator, mark this as the end of our
+      // extension
+      matchedSlash = false;
+      end = i + 1;
+    }
+    if (code === 46/*.*/) {
+      // If this is our first dot, mark it as the start of our extension
+      if (startDot === -1)
+        startDot = i;
+      else if (preDotState !== 1)
+        preDotState = 1;
+    } else if (startDot !== -1) {
+      // We saw a non-dot and non-path separator before our dot, so we should
+      // have a good chance at having a non-empty extension
+      preDotState = -1;
+    }
+  }
+
+  if (startDot === -1 ||
+      end === -1 ||
+      // We saw a non-dot character immediately before the dot
+      preDotState === 0 ||
+      // The (right-most) trimmed path component is exactly '..'
+      (preDotState === 1 &&
+       startDot === end - 1 &&
+       startDot === startPart + 1)) {
+    return '';
+  }
+  return path.slice(startDot, end);
 };
 
 
@@ -382,17 +585,166 @@ win32.format = function(pathObject) {
 };
 
 
-win32.parse = function(pathString) {
-  assertPath(pathString);
+win32.parse = function(path) {
+  assertPath(path);
 
-  var allParts = win32SplitPath(pathString);
-  return {
-    root: allParts[0],
-    dir: allParts[0] + allParts[1].slice(0, -1),
-    base: allParts[2],
-    ext: allParts[3],
-    name: allParts[2].slice(0, allParts[2].length - allParts[3].length)
-  };
+  var ret = { root: '', dir: '', base: '', ext: '', name: '' };
+  if (path.length === 0)
+    return ret;
+
+  var len = path.length;
+  var rootEnd = 0;
+  var code = path.charCodeAt(0);
+
+  // Try to match a root
+  if (len > 1) {
+    if (code === 47/*/*/ || code === 92/*\*/) {
+      // Possible UNC root
+
+      code = path.charCodeAt(1);
+      rootEnd = 1;
+      if (code === 47/*/*/ || code === 92/*\*/) {
+        // Matched double path separator at beginning
+        var j = 2;
+        var last = j;
+        // Match 1 or more non-path separators
+        for (; j < len; ++j) {
+          code = path.charCodeAt(j);
+          if (code === 47/*/*/ || code === 92/*\*/)
+            break;
+        }
+        if (j < len && j !== last) {
+          // Matched!
+          last = j;
+          // Match 1 or more path separators
+          for (; j < len; ++j) {
+            code = path.charCodeAt(j);
+            if (code !== 47/*/*/ && code !== 92/*\*/)
+              break;
+          }
+          if (j < len && j !== last) {
+            // Matched!
+            last = j;
+            // Match 1 or more non-path separators
+            for (; j < len; ++j) {
+              code = path.charCodeAt(j);
+              if (code === 47/*/*/ || code === 92/*\*/)
+                break;
+            }
+            if (j === len) {
+              // We matched a UNC root only
+
+              rootEnd = j;
+            } else if (j !== last) {
+              // We matched a UNC root with leftovers
+
+              rootEnd = j + 1;
+            }
+          }
+        }
+      }
+    } else if ((code >= 65/*A*/ && code <= 90/*Z*/) ||
+               (code >= 97/*a*/ && code <= 122/*z*/)) {
+      // Possible device root
+
+      if (path.charCodeAt(1) === 58/*:*/) {
+        rootEnd = 2;
+        if (len > 2) {
+          code = path.charCodeAt(2);
+          if (code === 47/*/*/ || code === 92/*\*/) {
+            if (len === 3) {
+              // `path` contains just a drive root, exit early to avoid
+              // unnecessary work
+              ret.root = ret.dir = path;
+              return ret;
+            }
+            rootEnd = 3;
+          }
+        } else {
+          // `path` contains just a drive root, exit early to avoid
+          // unnecessary work
+          ret.root = ret.dir = path;
+          return ret;
+        }
+      }
+    }
+  } else if (code === 47/*/*/ || code === 92/*\*/) {
+    // `path` contains just a path separator, exit early to avoid
+    // unnecessary work
+    ret.root = ret.dir = path;
+    return ret;
+  }
+
+  if (rootEnd > 0)
+    ret.root = path.slice(0, rootEnd);
+
+  var startDot = -1;
+  var startPart = rootEnd;
+  var end = -1;
+  var matchedSlash = true;
+  var i = path.length - 1;
+
+  // Track the state of characters (if any) we see before our first dot and
+  // after any path separator we find
+  var preDotState = 0;
+
+  // Get non-dir info
+  for (; i >= rootEnd; --i) {
+    code = path.charCodeAt(i);
+    if (code === 47/*/*/ || code === 92/*\*/) {
+      // If we reached a path separator that was not part of a set of path
+      // separators at the end of the string, stop now
+      if (!matchedSlash) {
+        startPart = i + 1;
+        break;
+      }
+      continue;
+    }
+    if (end === -1) {
+      // We saw the first non-path separator, mark this as the end of our
+      // extension
+      matchedSlash = false;
+      end = i + 1;
+    }
+    if (code === 46/*.*/) {
+      // If this is our first dot, mark it as the start of our extension
+      if (startDot === -1)
+        startDot = i;
+      else if (preDotState !== 1)
+        preDotState = 1;
+    } else if (startDot !== -1) {
+      // We saw a non-dot and non-path separator before our dot, so we should
+      // have a good chance at having a non-empty extension
+      preDotState = -1;
+    }
+  }
+
+  if (startDot === -1 ||
+      end === -1 ||
+      // We saw a non-dot character immediately before the dot
+      preDotState === 0 ||
+      // The (right-most) trimmed path component is exactly '..'
+      (preDotState === 1 &&
+       startDot === end - 1 &&
+       startDot === startPart + 1)) {
+    if (end !== -1) {
+      ret.base = ret.name = path.slice(startPart, end);
+    }
+  } else {
+    ret.name = path.slice(startPart, startDot);
+    ret.base = path.slice(startPart, end);
+    ret.ext = path.slice(startDot, end);
+  }
+
+  // If the directory is the root, use the entire root as the `dir` including
+  // the trailing slash if any (`C:\abc` -> `C:\`). Otherwise, strip out the
+  // trailing slash (`C:\abc\def` -> `C:\abc`).
+  if (startPart > 0 && startPart !== rootEnd)
+    ret.dir = path.slice(0, startPart - 1);
+  else
+    ret.dir = ret.root;
+
+  return ret;
 };
author	Myles Borins <mylesborins@google.com>	2018-02-22 14:12:06 -0500
committer	Myles Borins <mylesborins@google.com>	2018-02-22 17:47:13 -0500
commit	bf00665af68c8aaf43112a244535bd1094d68f16 (patch)
tree	d7eaa55587001a306a55a7adfdc217f19a3b9512
parent	4196fcf23ea81e7c3a4301604dd730427a0623af (diff)
download	node-new-bf00665af68c8aaf43112a244535bd1094d68f16.tar.gz