summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMyles Borins <mylesborins@google.com>2018-02-22 14:12:06 -0500
committerMyles Borins <mylesborins@google.com>2018-02-22 17:47:13 -0500
commitbf00665af68c8aaf43112a244535bd1094d68f16 (patch)
treed7eaa55587001a306a55a7adfdc217f19a3b9512
parent4196fcf23ea81e7c3a4301604dd730427a0623af (diff)
downloadnode-new-bf00665af68c8aaf43112a244535bd1094d68f16.tar.gz
path: unwind regular expressions in Windows
This is the second part to removing REDOS vulnerabilities from v4.x The function `splitTailRe` exposed a REDOS vulnerability. It was only utilized in the Windows implementation of a number of the path utilities. In v6.x a change landed that unwound this regular expression, and in turn patched the vulnerability. This commit copies the unwound implementation currently found on v8.x. It is completely self contained. I attempted to keep all warnings and deprecations the same as the v4.x implementation, but may have missed something buried in the large unwound functions. Refs: https://github.com/nodejs/node/commit/b212be08f6
-rw-r--r--lib/path.js440
1 files changed, 396 insertions, 44 deletions
diff --git a/lib/path.js b/lib/path.js
index a100bde79a..c3b5271bfa 100644
--- a/lib/path.js
+++ b/lib/path.js
@@ -65,26 +65,8 @@ function trimArray(arr) {
const splitDeviceRe =
/^([a-zA-Z]:|[\\\/]{2}[^\\\/]+[\\\/]+[^\\\/]+)?([\\\/])?([\s\S]*?)$/;
-// Regex to split the tail part of the above into [*, dir, basename, ext]
-const splitTailRe =
- /^([\s\S]*?)((?:\.{1,2}|[^\\\/]+?|)(\.[^.\/\\]*|))(?:[\\\/]*)$/;
-
var win32 = {};
-// Function to split a filename into [root, dir, basename, ext]
-function win32SplitPath(filename) {
- // Separate device+slash from tail
- const result = splitDeviceRe.exec(filename);
- const device = (result[1] || '') + (result[2] || '');
- const tail = result[3];
- // Split the tail into dir, basename and extension
- const result2 = splitTailRe.exec(tail);
- const dir = result2[1];
- const basename = result2[2];
- const ext = result2[3];
- return [device, dir, basename, ext];
-}
-
function win32StatPath(path) {
const result = splitDeviceRe.exec(path);
const device = result[1] || '';
@@ -318,21 +300,104 @@ win32._makeLong = function(path) {
win32.dirname = function(path) {
- const result = win32SplitPath(path);
- const root = result[0];
- var dir = result[1];
-
- if (!root && !dir) {
- // No dirname whatsoever
+ const len = path.length;
+ if (len === 0)
return '.';
+ var rootEnd = -1;
+ var end = -1;
+ var matchedSlash = true;
+ var offset = 0;
+ var code = path.charCodeAt(0);
+
+ // Try to match a root
+ if (len > 1) {
+ if (code === 47/*/*/ || code === 92/*\*/) {
+ // Possible UNC root
+
+ rootEnd = offset = 1;
+
+ code = path.charCodeAt(1);
+ if (code === 47/*/*/ || code === 92/*\*/) {
+ // Matched double path separator at beginning
+ var j = 2;
+ var last = j;
+ // Match 1 or more non-path separators
+ for (; j < len; ++j) {
+ code = path.charCodeAt(j);
+ if (code === 47/*/*/ || code === 92/*\*/)
+ break;
+ }
+ if (j < len && j !== last) {
+ // Matched!
+ last = j;
+ // Match 1 or more path separators
+ for (; j < len; ++j) {
+ code = path.charCodeAt(j);
+ if (code !== 47/*/*/ && code !== 92/*\*/)
+ break;
+ }
+ if (j < len && j !== last) {
+ // Matched!
+ last = j;
+ // Match 1 or more non-path separators
+ for (; j < len; ++j) {
+ code = path.charCodeAt(j);
+ if (code === 47/*/*/ || code === 92/*\*/)
+ break;
+ }
+ if (j === len) {
+ // We matched a UNC root only
+ return path;
+ }
+ if (j !== last) {
+ // We matched a UNC root with leftovers
+
+ // Offset by 1 to include the separator after the UNC root to
+ // treat it as a "normal root" on top of a (UNC) root
+ rootEnd = offset = j + 1;
+ }
+ }
+ }
+ }
+ } else if ((code >= 65/*A*/ && code <= 90/*Z*/) ||
+ (code >= 97/*a*/ && code <= 122/*z*/)) {
+ // Possible device root
+
+ if (path.charCodeAt(1) === 58/*:*/) {
+ rootEnd = offset = 2;
+ if (len > 2) {
+ code = path.charCodeAt(2);
+ if (code === 47/*/*/ || code === 92/*\*/)
+ rootEnd = offset = 3;
+ }
+ }
+ }
+ } else if (code === 47/*/*/ || code === 92/*\*/) {
+ // `path` contains just a path separator, exit early to avoid
+ // unnecessary work
+ return path;
}
- if (dir) {
- // It has a dirname, strip trailing slash
- dir = dir.substr(0, dir.length - 1);
+ for (var i = len - 1; i >= offset; --i) {
+ code = path.charCodeAt(i);
+ if (code === 47/*/*/ || code === 92/*\*/) {
+ if (!matchedSlash) {
+ end = i;
+ break;
+ }
+ } else {
+ // We saw the first non-path separator
+ matchedSlash = false;
+ }
}
- return root + dir;
+ if (end === -1) {
+ if (rootEnd === -1)
+ return '.';
+ else
+ end = rootEnd;
+ }
+ return path.slice(0, end);
};
@@ -340,17 +405,155 @@ win32.basename = function(path, ext) {
if (ext !== undefined && typeof ext !== 'string')
throw new TypeError('ext must be a string');
- var f = win32SplitPath(path)[2];
- // TODO: make this comparison case-insensitive on windows?
- if (ext && f.substr(-1 * ext.length) === ext) {
- f = f.substr(0, f.length - ext.length);
+ var start = 0;
+ var end = -1;
+ var matchedSlash = true;
+ var i;
+
+ // Check for a drive letter prefix so as not to mistake the following
+ // path separator as an extra separator at the end of the path that can be
+ // disregarded
+ if (path.length >= 2) {
+ const drive = path.charCodeAt(0);
+ if ((drive >= 65/*A*/ && drive <= 90/*Z*/) ||
+ (drive >= 97/*a*/ && drive <= 122/*z*/)) {
+ if (path.charCodeAt(1) === 58/*:*/)
+ start = 2;
+ }
+ }
+
+ if (ext !== undefined && ext.length > 0 && ext.length <= path.length) {
+ if (ext.length === path.length && ext === path)
+ return '';
+ var extIdx = ext.length - 1;
+ var firstNonSlashEnd = -1;
+ for (i = path.length - 1; i >= start; --i) {
+ const code = path.charCodeAt(i);
+ if (code === 47/*/*/ || code === 92/*\*/) {
+ // If we reached a path separator that was not part of a set of path
+ // separators at the end of the string, stop now
+ if (!matchedSlash) {
+ start = i + 1;
+ break;
+ }
+ } else {
+ if (firstNonSlashEnd === -1) {
+ // We saw the first non-path separator, remember this index in case
+ // we need it if the extension ends up not matching
+ matchedSlash = false;
+ firstNonSlashEnd = i + 1;
+ }
+ if (extIdx >= 0) {
+ // Try to match the explicit extension
+ if (code === ext.charCodeAt(extIdx)) {
+ if (--extIdx === -1) {
+ // We matched the extension, so mark this as the end of our path
+ // component
+ end = i;
+ }
+ } else {
+ // Extension does not match, so our result is the entire path
+ // component
+ extIdx = -1;
+ end = firstNonSlashEnd;
+ }
+ }
+ }
+ }
+
+ if (start === end)
+ end = firstNonSlashEnd;
+ else if (end === -1)
+ end = path.length;
+ return path.slice(start, end);
+ } else {
+ for (i = path.length - 1; i >= start; --i) {
+ const code = path.charCodeAt(i);
+ if (code === 47/*/*/ || code === 92/*\*/) {
+ // If we reached a path separator that was not part of a set of path
+ // separators at the end of the string, stop now
+ if (!matchedSlash) {
+ start = i + 1;
+ break;
+ }
+ } else if (end === -1) {
+ // We saw the first non-path separator, mark this as the end of our
+ // path component
+ matchedSlash = false;
+ end = i + 1;
+ }
+ }
+
+ if (end === -1)
+ return '';
+ return path.slice(start, end);
}
- return f;
};
win32.extname = function(path) {
- return win32SplitPath(path)[3];
+ var start = 0;
+ var startDot = -1;
+ var startPart = 0;
+ var end = -1;
+ var matchedSlash = true;
+ // Track the state of characters (if any) we see before our first dot and
+ // after any path separator we find
+ var preDotState = 0;
+
+ // Check for a drive letter prefix so as not to mistake the following
+ // path separator as an extra separator at the end of the path that can be
+ // disregarded
+ if (path.length >= 2) {
+ const code = path.charCodeAt(0);
+ if (path.charCodeAt(1) === 58/*:*/ &&
+ ((code >= 65/*A*/ && code <= 90/*Z*/) ||
+ (code >= 97/*a*/ && code <= 122/*z*/))) {
+ start = startPart = 2;
+ }
+ }
+
+ for (var i = path.length - 1; i >= start; --i) {
+ const code = path.charCodeAt(i);
+ if (code === 47/*/*/ || code === 92/*\*/) {
+ // If we reached a path separator that was not part of a set of path
+ // separators at the end of the string, stop now
+ if (!matchedSlash) {
+ startPart = i + 1;
+ break;
+ }
+ continue;
+ }
+ if (end === -1) {
+ // We saw the first non-path separator, mark this as the end of our
+ // extension
+ matchedSlash = false;
+ end = i + 1;
+ }
+ if (code === 46/*.*/) {
+ // If this is our first dot, mark it as the start of our extension
+ if (startDot === -1)
+ startDot = i;
+ else if (preDotState !== 1)
+ preDotState = 1;
+ } else if (startDot !== -1) {
+ // We saw a non-dot and non-path separator before our dot, so we should
+ // have a good chance at having a non-empty extension
+ preDotState = -1;
+ }
+ }
+
+ if (startDot === -1 ||
+ end === -1 ||
+ // We saw a non-dot character immediately before the dot
+ preDotState === 0 ||
+ // The (right-most) trimmed path component is exactly '..'
+ (preDotState === 1 &&
+ startDot === end - 1 &&
+ startDot === startPart + 1)) {
+ return '';
+ }
+ return path.slice(startDot, end);
};
@@ -382,17 +585,166 @@ win32.format = function(pathObject) {
};
-win32.parse = function(pathString) {
- assertPath(pathString);
+win32.parse = function(path) {
+ assertPath(path);
- var allParts = win32SplitPath(pathString);
- return {
- root: allParts[0],
- dir: allParts[0] + allParts[1].slice(0, -1),
- base: allParts[2],
- ext: allParts[3],
- name: allParts[2].slice(0, allParts[2].length - allParts[3].length)
- };
+ var ret = { root: '', dir: '', base: '', ext: '', name: '' };
+ if (path.length === 0)
+ return ret;
+
+ var len = path.length;
+ var rootEnd = 0;
+ var code = path.charCodeAt(0);
+
+ // Try to match a root
+ if (len > 1) {
+ if (code === 47/*/*/ || code === 92/*\*/) {
+ // Possible UNC root
+
+ code = path.charCodeAt(1);
+ rootEnd = 1;
+ if (code === 47/*/*/ || code === 92/*\*/) {
+ // Matched double path separator at beginning
+ var j = 2;
+ var last = j;
+ // Match 1 or more non-path separators
+ for (; j < len; ++j) {
+ code = path.charCodeAt(j);
+ if (code === 47/*/*/ || code === 92/*\*/)
+ break;
+ }
+ if (j < len && j !== last) {
+ // Matched!
+ last = j;
+ // Match 1 or more path separators
+ for (; j < len; ++j) {
+ code = path.charCodeAt(j);
+ if (code !== 47/*/*/ && code !== 92/*\*/)
+ break;
+ }
+ if (j < len && j !== last) {
+ // Matched!
+ last = j;
+ // Match 1 or more non-path separators
+ for (; j < len; ++j) {
+ code = path.charCodeAt(j);
+ if (code === 47/*/*/ || code === 92/*\*/)
+ break;
+ }
+ if (j === len) {
+ // We matched a UNC root only
+
+ rootEnd = j;
+ } else if (j !== last) {
+ // We matched a UNC root with leftovers
+
+ rootEnd = j + 1;
+ }
+ }
+ }
+ }
+ } else if ((code >= 65/*A*/ && code <= 90/*Z*/) ||
+ (code >= 97/*a*/ && code <= 122/*z*/)) {
+ // Possible device root
+
+ if (path.charCodeAt(1) === 58/*:*/) {
+ rootEnd = 2;
+ if (len > 2) {
+ code = path.charCodeAt(2);
+ if (code === 47/*/*/ || code === 92/*\*/) {
+ if (len === 3) {
+ // `path` contains just a drive root, exit early to avoid
+ // unnecessary work
+ ret.root = ret.dir = path;
+ return ret;
+ }
+ rootEnd = 3;
+ }
+ } else {
+ // `path` contains just a drive root, exit early to avoid
+ // unnecessary work
+ ret.root = ret.dir = path;
+ return ret;
+ }
+ }
+ }
+ } else if (code === 47/*/*/ || code === 92/*\*/) {
+ // `path` contains just a path separator, exit early to avoid
+ // unnecessary work
+ ret.root = ret.dir = path;
+ return ret;
+ }
+
+ if (rootEnd > 0)
+ ret.root = path.slice(0, rootEnd);
+
+ var startDot = -1;
+ var startPart = rootEnd;
+ var end = -1;
+ var matchedSlash = true;
+ var i = path.length - 1;
+
+ // Track the state of characters (if any) we see before our first dot and
+ // after any path separator we find
+ var preDotState = 0;
+
+ // Get non-dir info
+ for (; i >= rootEnd; --i) {
+ code = path.charCodeAt(i);
+ if (code === 47/*/*/ || code === 92/*\*/) {
+ // If we reached a path separator that was not part of a set of path
+ // separators at the end of the string, stop now
+ if (!matchedSlash) {
+ startPart = i + 1;
+ break;
+ }
+ continue;
+ }
+ if (end === -1) {
+ // We saw the first non-path separator, mark this as the end of our
+ // extension
+ matchedSlash = false;
+ end = i + 1;
+ }
+ if (code === 46/*.*/) {
+ // If this is our first dot, mark it as the start of our extension
+ if (startDot === -1)
+ startDot = i;
+ else if (preDotState !== 1)
+ preDotState = 1;
+ } else if (startDot !== -1) {
+ // We saw a non-dot and non-path separator before our dot, so we should
+ // have a good chance at having a non-empty extension
+ preDotState = -1;
+ }
+ }
+
+ if (startDot === -1 ||
+ end === -1 ||
+ // We saw a non-dot character immediately before the dot
+ preDotState === 0 ||
+ // The (right-most) trimmed path component is exactly '..'
+ (preDotState === 1 &&
+ startDot === end - 1 &&
+ startDot === startPart + 1)) {
+ if (end !== -1) {
+ ret.base = ret.name = path.slice(startPart, end);
+ }
+ } else {
+ ret.name = path.slice(startPart, startDot);
+ ret.base = path.slice(startPart, end);
+ ret.ext = path.slice(startDot, end);
+ }
+
+ // If the directory is the root, use the entire root as the `dir` including
+ // the trailing slash if any (`C:\abc` -> `C:\`). Otherwise, strip out the
+ // trailing slash (`C:\abc\def` -> `C:\abc`).
+ if (startPart > 0 && startPart !== rootEnd)
+ ret.dir = path.slice(0, startPart - 1);
+ else
+ ret.dir = ret.root;
+
+ return ret;
};