diff options
author | Timothy Gu <timothygu99@gmail.com> | 2017-05-04 23:43:47 -0700 |
---|---|---|
committer | Timothy Gu <timothygu99@gmail.com> | 2017-05-19 23:19:57 -0700 |
commit | 841bb4c61f07a5c2bf818a3b985ea0f88257210e (patch) | |
tree | a60f5452529fbfe24f1d0be0616c3ff369eb9987 /src | |
parent | 525497596a51ef2e6653b930ca525046d27c9fd5 (diff) | |
download | node-new-841bb4c61f07a5c2bf818a3b985ea0f88257210e.tar.gz |
url: fix C0 control and whitespace handling
PR-URL: https://github.com/nodejs/node/pull/12846
Fixes: https://github.com/nodejs/node/issues/12825
Refs: https://github.com/w3c/web-platform-tests/pull/5792
Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Joyee Cheung <joyeec9h3@gmail.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/node_url.cc | 59 | ||||
-rw-r--r-- | src/node_url.h | 17 |
2 files changed, 50 insertions, 26 deletions
diff --git a/src/node_url.cc b/src/node_url.cc index 2bdc080953..703ff4ffd6 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -133,6 +133,9 @@ enum url_error_cb_args { // https://infra.spec.whatwg.org/#ascii-tab-or-newline CHAR_TEST(8, IsASCIITabOrNewline, (ch == '\t' || ch == '\n' || ch == '\r')) +// https://infra.spec.whatwg.org/#c0-control-or-space +CHAR_TEST(8, IsC0ControlOrSpace, (ch >= '\0' && ch <= ' ')) + // https://infra.spec.whatwg.org/#ascii-digit CHAR_TEST(8, IsASCIIDigit, (ch >= '0' && ch <= '9')) @@ -1134,15 +1137,45 @@ static inline void ShortenUrlPath(struct url_data* url) { } void URL::Parse(const char* input, - const size_t len, + size_t len, enum url_parse_state state_override, struct url_data* url, + bool has_url, const struct url_data* base, bool has_base) { + const char* p = input; + const char* end = input + len; + + if (!has_url) { + for (const char* ptr = p; ptr < end; ptr++) { + if (IsC0ControlOrSpace(*ptr)) + p++; + else + break; + } + for (const char* ptr = end - 1; ptr >= p; ptr--) { + if (IsC0ControlOrSpace(*ptr)) + end--; + else + break; + } + len = end - p; + } + + std::string whitespace_stripped; + whitespace_stripped.reserve(len); + for (const char* ptr = p; ptr < end; ptr++) + if (!IsASCIITabOrNewline(*ptr)) + whitespace_stripped += *ptr; + + input = whitespace_stripped.c_str(); + len = whitespace_stripped.size(); + p = input; + end = input + len; + bool atflag = false; bool sbflag = false; bool uflag = false; - int wskip = 0; std::string buffer; url->scheme.reserve(len); @@ -1159,9 +1192,6 @@ void URL::Parse(const char* input, enum url_parse_state state = has_state_override ? state_override : kSchemeStart; - const char* p = input; - const char* end = input + len; - if (state < kSchemeStart || state > kFragment) { url->flags |= URL_FLAGS_INVALID_PARSE_STATE; return; @@ -1171,18 +1201,6 @@ void URL::Parse(const char* input, const char ch = p < end ? p[0] : kEOL; const size_t remaining = end == p ? 0 : (end - p - 1); - if (IsASCIITabOrNewline(ch)) { - if (state == kAuthority) { - // It's necessary to keep track of how much whitespace - // is being ignored when in kAuthority state because of - // how the buffer is managed. TODO: See if there's a better - // way - wskip++; - } - p++; - continue; - } - bool special = (url->flags & URL_FLAGS_SPECIAL); bool cannot_be_base; const bool special_back_slash = (special && ch == '\\'); @@ -1500,7 +1518,7 @@ void URL::Parse(const char* input, url->flags |= URL_FLAGS_FAILED; return; } - p -= buffer.size() + 1 + wskip; + p -= buffer.size() + 1; buffer.clear(); state = kHost; } else { @@ -1892,16 +1910,17 @@ static void Parse(Environment* env, HandleScope handle_scope(isolate); Context::Scope context_scope(context); + const bool has_context = context_obj->IsObject(); const bool has_base = base_obj->IsObject(); struct url_data base; struct url_data url; - if (context_obj->IsObject()) + if (has_context) HarvestContext(env, &url, context_obj.As<Object>()); if (has_base) HarvestBase(env, &base, base_obj.As<Object>()); - URL::Parse(input, len, state_override, &url, &base, has_base); + URL::Parse(input, len, state_override, &url, has_context, &base, has_base); if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) || ((state_override != kUnknownState) && (url.flags & URL_FLAGS_TERMINATED))) diff --git a/src/node_url.h b/src/node_url.h index 49bfb264e8..72ac366ec1 100644 --- a/src/node_url.h +++ b/src/node_url.h @@ -81,30 +81,35 @@ struct url_data { class URL { public: static void Parse(const char* input, - const size_t len, + size_t len, enum url_parse_state state_override, struct url_data* url, + bool has_url, const struct url_data* base, bool has_base); URL(const char* input, const size_t len) { - Parse(input, len, kUnknownState, &context_, nullptr, false); + Parse(input, len, kUnknownState, &context_, false, nullptr, false); } URL(const char* input, const size_t len, const URL* base) { if (base != nullptr) - Parse(input, len, kUnknownState, &context_, &(base->context_), true); + Parse(input, len, kUnknownState, + &context_, false, + &(base->context_), true); else - Parse(input, len, kUnknownState, &context_, nullptr, false); + Parse(input, len, kUnknownState, &context_, false, nullptr, false); } URL(const char* input, const size_t len, const char* base, const size_t baselen) { if (base != nullptr && baselen > 0) { URL _base(base, baselen); - Parse(input, len, kUnknownState, &context_, &(_base.context_), true); + Parse(input, len, kUnknownState, + &context_, false, + &(_base.context_), true); } else { - Parse(input, len, kUnknownState, &context_, nullptr, false); + Parse(input, len, kUnknownState, &context_, false, nullptr, false); } } |