summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTimothy Gu <timothygu99@gmail.com>2017-05-04 23:43:47 -0700
committerTimothy Gu <timothygu99@gmail.com>2017-05-19 23:19:57 -0700
commit841bb4c61f07a5c2bf818a3b985ea0f88257210e (patch)
treea60f5452529fbfe24f1d0be0616c3ff369eb9987
parent525497596a51ef2e6653b930ca525046d27c9fd5 (diff)
downloadnode-new-841bb4c61f07a5c2bf818a3b985ea0f88257210e.tar.gz
url: fix C0 control and whitespace handling
PR-URL: https://github.com/nodejs/node/pull/12846 Fixes: https://github.com/nodejs/node/issues/12825 Refs: https://github.com/w3c/web-platform-tests/pull/5792 Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Joyee Cheung <joyeec9h3@gmail.com>
-rw-r--r--src/node_url.cc59
-rw-r--r--src/node_url.h17
-rw-r--r--test/fixtures/url-tests.js32
3 files changed, 81 insertions, 27 deletions
diff --git a/src/node_url.cc b/src/node_url.cc
index 2bdc080953..703ff4ffd6 100644
--- a/src/node_url.cc
+++ b/src/node_url.cc
@@ -133,6 +133,9 @@ enum url_error_cb_args {
// https://infra.spec.whatwg.org/#ascii-tab-or-newline
CHAR_TEST(8, IsASCIITabOrNewline, (ch == '\t' || ch == '\n' || ch == '\r'))
+// https://infra.spec.whatwg.org/#c0-control-or-space
+CHAR_TEST(8, IsC0ControlOrSpace, (ch >= '\0' && ch <= ' '))
+
// https://infra.spec.whatwg.org/#ascii-digit
CHAR_TEST(8, IsASCIIDigit, (ch >= '0' && ch <= '9'))
@@ -1134,15 +1137,45 @@ static inline void ShortenUrlPath(struct url_data* url) {
}
void URL::Parse(const char* input,
- const size_t len,
+ size_t len,
enum url_parse_state state_override,
struct url_data* url,
+ bool has_url,
const struct url_data* base,
bool has_base) {
+ const char* p = input;
+ const char* end = input + len;
+
+ if (!has_url) {
+ for (const char* ptr = p; ptr < end; ptr++) {
+ if (IsC0ControlOrSpace(*ptr))
+ p++;
+ else
+ break;
+ }
+ for (const char* ptr = end - 1; ptr >= p; ptr--) {
+ if (IsC0ControlOrSpace(*ptr))
+ end--;
+ else
+ break;
+ }
+ len = end - p;
+ }
+
+ std::string whitespace_stripped;
+ whitespace_stripped.reserve(len);
+ for (const char* ptr = p; ptr < end; ptr++)
+ if (!IsASCIITabOrNewline(*ptr))
+ whitespace_stripped += *ptr;
+
+ input = whitespace_stripped.c_str();
+ len = whitespace_stripped.size();
+ p = input;
+ end = input + len;
+
bool atflag = false;
bool sbflag = false;
bool uflag = false;
- int wskip = 0;
std::string buffer;
url->scheme.reserve(len);
@@ -1159,9 +1192,6 @@ void URL::Parse(const char* input,
enum url_parse_state state = has_state_override ? state_override :
kSchemeStart;
- const char* p = input;
- const char* end = input + len;
-
if (state < kSchemeStart || state > kFragment) {
url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
return;
@@ -1171,18 +1201,6 @@ void URL::Parse(const char* input,
const char ch = p < end ? p[0] : kEOL;
const size_t remaining = end == p ? 0 : (end - p - 1);
- if (IsASCIITabOrNewline(ch)) {
- if (state == kAuthority) {
- // It's necessary to keep track of how much whitespace
- // is being ignored when in kAuthority state because of
- // how the buffer is managed. TODO: See if there's a better
- // way
- wskip++;
- }
- p++;
- continue;
- }
-
bool special = (url->flags & URL_FLAGS_SPECIAL);
bool cannot_be_base;
const bool special_back_slash = (special && ch == '\\');
@@ -1500,7 +1518,7 @@ void URL::Parse(const char* input,
url->flags |= URL_FLAGS_FAILED;
return;
}
- p -= buffer.size() + 1 + wskip;
+ p -= buffer.size() + 1;
buffer.clear();
state = kHost;
} else {
@@ -1892,16 +1910,17 @@ static void Parse(Environment* env,
HandleScope handle_scope(isolate);
Context::Scope context_scope(context);
+ const bool has_context = context_obj->IsObject();
const bool has_base = base_obj->IsObject();
struct url_data base;
struct url_data url;
- if (context_obj->IsObject())
+ if (has_context)
HarvestContext(env, &url, context_obj.As<Object>());
if (has_base)
HarvestBase(env, &base, base_obj.As<Object>());
- URL::Parse(input, len, state_override, &url, &base, has_base);
+ URL::Parse(input, len, state_override, &url, has_context, &base, has_base);
if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) ||
((state_override != kUnknownState) &&
(url.flags & URL_FLAGS_TERMINATED)))
diff --git a/src/node_url.h b/src/node_url.h
index 49bfb264e8..72ac366ec1 100644
--- a/src/node_url.h
+++ b/src/node_url.h
@@ -81,30 +81,35 @@ struct url_data {
class URL {
public:
static void Parse(const char* input,
- const size_t len,
+ size_t len,
enum url_parse_state state_override,
struct url_data* url,
+ bool has_url,
const struct url_data* base,
bool has_base);
URL(const char* input, const size_t len) {
- Parse(input, len, kUnknownState, &context_, nullptr, false);
+ Parse(input, len, kUnknownState, &context_, false, nullptr, false);
}
URL(const char* input, const size_t len, const URL* base) {
if (base != nullptr)
- Parse(input, len, kUnknownState, &context_, &(base->context_), true);
+ Parse(input, len, kUnknownState,
+ &context_, false,
+ &(base->context_), true);
else
- Parse(input, len, kUnknownState, &context_, nullptr, false);
+ Parse(input, len, kUnknownState, &context_, false, nullptr, false);
}
URL(const char* input, const size_t len,
const char* base, const size_t baselen) {
if (base != nullptr && baselen > 0) {
URL _base(base, baselen);
- Parse(input, len, kUnknownState, &context_, &(_base.context_), true);
+ Parse(input, len, kUnknownState,
+ &context_, false,
+ &(_base.context_), true);
} else {
- Parse(input, len, kUnknownState, &context_, nullptr, false);
+ Parse(input, len, kUnknownState, &context_, false, nullptr, false);
}
}
diff --git a/test/fixtures/url-tests.js b/test/fixtures/url-tests.js
index befe325f93..f75ea12990 100644
--- a/test/fixtures/url-tests.js
+++ b/test/fixtures/url-tests.js
@@ -1,7 +1,7 @@
'use strict';
/* WPT Refs:
- https://github.com/w3c/web-platform-tests/blob/28541bb/url/urltestdata.json
+ https://github.com/w3c/web-platform-tests/blob/0f26c418a5/url/urltestdata.json
License: http://www.w3.org/Consortium/Legal/2008/04-testsuite-copyright.html
*/
module.exports =
@@ -3566,6 +3566,22 @@ module.exports =
"search": "",
"hash": ""
},
+ "Leading and trailing C0 control or space",
+ {
+ "input": "\u0000\u001b\u0004\u0012 http://example.com/\u001f \u000d ",
+ "base": "about:blank",
+ "href": "http://example.com/",
+ "origin": "http://example.com",
+ "protocol": "http:",
+ "username": "",
+ "password": "",
+ "host": "example.com",
+ "hostname": "example.com",
+ "port": "",
+ "pathname": "/",
+ "search": "",
+ "hash": ""
+ },
"Ideographic full stop (full-width period for Chinese, etc.) should be treated as a dot. U+3002 is mapped to U+002E (dot)",
{
"input": "http://www.foo怂bar.com",
@@ -5488,6 +5504,20 @@ module.exports =
"hash": ""
},
{
+ "input": "C|\n/",
+ "base": "file://host/dir/file",
+ "href": "file:///C:/",
+ "protocol": "file:",
+ "username": "",
+ "password": "",
+ "host": "",
+ "hostname": "",
+ "port": "",
+ "pathname": "/C:/",
+ "search": "",
+ "hash": ""
+ },
+ {
"input": "C|\\",
"base": "file://host/dir/file",
"href": "file:///C:/",