summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnna Henningsen <anna@addaleax.net>2017-12-05 14:23:38 +0100
committerMyles Borins <mylesborins@google.com>2018-02-12 19:28:37 -0500
commitf306d3eb7a7af954352f76c62c7c15427c89d05d (patch)
treebd3f491654f9adf719bd445888f908bb14923cc8
parent1976c7c7a5f8b38ade0b39937604114f6055d6d0 (diff)
downloadnode-new-f306d3eb7a7af954352f76c62c7c15427c89d05d.tar.gz
src: make url host a proper C++ class
- Gives `URLHost` a proper destructor that clears memory depending on the type of the host (This fixes a memory leak) - Hide the host type enums and class layout as implementation details - Make the `Parse` methods members of `URLHost` - Turn `WriteHost` into a `ToString()` method on the `URLHost` class - Verify that at the beginning of a parse attempt, the type is set to “failed” - Remove a lot of `goto`s from the source code 🐢🚀 Backport-PR-URL: https://github.com/nodejs/node/pull/18324 PR-URL: https://github.com/nodejs/node/pull/17470 Fixes: https://github.com/nodejs/node/issues/17448 Reviewed-By: Timothy Gu <timothygu99@gmail.com>
-rw-r--r--src/node_url.cc269
1 files changed, 147 insertions, 122 deletions
diff --git a/src/node_url.cc b/src/node_url.cc
index acd8b86044..aa2709a5f8 100644
--- a/src/node_url.cc
+++ b/src/node_url.cc
@@ -55,27 +55,71 @@ const char kEOL = -1;
const char16_t kUnicodeReplacementCharacter = 0xFFFD;
// https://url.spec.whatwg.org/#concept-host
-union url_host_value {
- std::string domain;
- uint32_t ipv4;
- uint16_t ipv6[8];
- std::string opaque;
- ~url_host_value() {}
-};
+class URLHost {
+ public:
+ ~URLHost();
+
+ void ParseIPv4Host(const char* input, size_t length, bool* is_ipv4);
+ void ParseIPv6Host(const char* input, size_t length);
+ void ParseOpaqueHost(const char* input, size_t length);
+ void ParseHost(const char* input,
+ size_t length,
+ bool is_special,
+ bool unicode = false);
+
+ inline bool ParsingFailed() const { return type_ == HostType::H_FAILED; }
+ std::string ToString() const;
+
+ private:
+ enum class HostType {
+ H_FAILED,
+ H_DOMAIN,
+ H_IPV4,
+ H_IPV6,
+ H_OPAQUE,
+ };
-enum url_host_type {
- HOST_TYPE_FAILED = -1,
- HOST_TYPE_DOMAIN = 0,
- HOST_TYPE_IPV4 = 1,
- HOST_TYPE_IPV6 = 2,
- HOST_TYPE_OPAQUE = 3,
-};
+ union Value {
+ std::string domain;
+ uint32_t ipv4;
+ uint16_t ipv6[8];
+ std::string opaque;
+
+ ~Value() {}
+ Value() : ipv4(0) {}
+ };
-struct url_host {
- url_host_value value;
- enum url_host_type type;
+ Value value_;
+ HostType type_ = HostType::H_FAILED;
+
+ // Setting the string members of the union with = is brittle because
+ // it relies on them being initialized to a state that requires no
+ // destruction of old data.
+ // For a long time, that worked well enough because ParseIPv6Host() happens
+ // to zero-fill `value_`, but that really is relying on standard library
+ // internals too much.
+ // These helpers are the easiest solution but we might want to consider
+ // just not forcing strings into an union.
+ inline void SetOpaque(std::string&& string) {
+ type_ = HostType::H_OPAQUE;
+ new(&value_.opaque) std::string(std::move(string));
+ }
+
+ inline void SetDomain(std::string&& string) {
+ type_ = HostType::H_DOMAIN;
+ new(&value_.domain) std::string(std::move(string));
+ }
};
+URLHost::~URLHost() {
+ using string = std::string;
+ switch (type_) {
+ case HostType::H_DOMAIN: value_.domain.~string(); break;
+ case HostType::H_OPAQUE: value_.opaque.~string(); break;
+ default: break;
+ }
+}
+
#define ARGS(XX) \
XX(ARG_FLAGS) \
XX(ARG_PROTOCOL) \
@@ -601,11 +645,11 @@ inline bool ToASCII(const std::string& input, std::string* output) {
}
#endif
-url_host_type ParseIPv6Host(url_host* host, const char* input, size_t length) {
- url_host_type type = HOST_TYPE_FAILED;
+void URLHost::ParseIPv6Host(const char* input, size_t length) {
+ CHECK_EQ(type_, HostType::H_FAILED);
for (unsigned n = 0; n < 8; n++)
- host->value.ipv6[n] = 0;
- uint16_t* piece_pointer = &host->value.ipv6[0];
+ value_.ipv6[n] = 0;
+ uint16_t* piece_pointer = &value_.ipv6[0];
uint16_t* last_piece = piece_pointer + 8;
uint16_t* compress_pointer = nullptr;
const char* pointer = input;
@@ -614,7 +658,7 @@ url_host_type ParseIPv6Host(url_host* host, const char* input, size_t length) {
char ch = pointer < end ? pointer[0] : kEOL;
if (ch == ':') {
if (length < 2 || pointer[1] != ':')
- goto end;
+ return;
pointer += 2;
ch = pointer < end ? pointer[0] : kEOL;
piece_pointer++;
@@ -622,10 +666,10 @@ url_host_type ParseIPv6Host(url_host* host, const char* input, size_t length) {
}
while (ch != kEOL) {
if (piece_pointer > last_piece)
- goto end;
+ return;
if (ch == ':') {
if (compress_pointer != nullptr)
- goto end;
+ return;
pointer++;
ch = pointer < end ? pointer[0] : kEOL;
piece_pointer++;
@@ -643,11 +687,11 @@ url_host_type ParseIPv6Host(url_host* host, const char* input, size_t length) {
switch (ch) {
case '.':
if (len == 0)
- goto end;
+ return;
pointer -= len;
ch = pointer < end ? pointer[0] : kEOL;
if (piece_pointer > last_piece - 2)
- goto end;
+ return;
numbers_seen = 0;
while (ch != kEOL) {
value = 0xffffffff;
@@ -656,22 +700,22 @@ url_host_type ParseIPv6Host(url_host* host, const char* input, size_t length) {
pointer++;
ch = pointer < end ? pointer[0] : kEOL;
} else {
- goto end;
+ return;
}
}
if (!IsASCIIDigit(ch))
- goto end;
+ return;
while (IsASCIIDigit(ch)) {
unsigned number = ch - '0';
if (value == 0xffffffff) {
value = number;
} else if (value == 0) {
- goto end;
+ return;
} else {
value = value * 10 + number;
}
if (value > 255)
- goto end;
+ return;
pointer++;
ch = pointer < end ? pointer[0] : kEOL;
}
@@ -681,18 +725,18 @@ url_host_type ParseIPv6Host(url_host* host, const char* input, size_t length) {
piece_pointer++;
}
if (numbers_seen != 4)
- goto end;
+ return;
continue;
case ':':
pointer++;
ch = pointer < end ? pointer[0] : kEOL;
if (ch == kEOL)
- goto end;
+ return;
break;
case kEOL:
break;
default:
- goto end;
+ return;
}
*piece_pointer = value;
piece_pointer++;
@@ -701,7 +745,7 @@ url_host_type ParseIPv6Host(url_host* host, const char* input, size_t length) {
if (compress_pointer != nullptr) {
swaps = piece_pointer - compress_pointer;
piece_pointer = last_piece - 1;
- while (piece_pointer != &host->value.ipv6[0] && swaps > 0) {
+ while (piece_pointer != &value_.ipv6[0] && swaps > 0) {
uint16_t temp = *piece_pointer;
uint16_t* swap_piece = compress_pointer + swaps - 1;
*piece_pointer = *swap_piece;
@@ -711,12 +755,9 @@ url_host_type ParseIPv6Host(url_host* host, const char* input, size_t length) {
}
} else if (compress_pointer == nullptr &&
piece_pointer != last_piece) {
- goto end;
+ return;
}
- type = HOST_TYPE_IPV6;
- end:
- host->type = type;
- return type;
+ type_ = HostType::H_IPV6;
}
inline int64_t ParseNumber(const char* start, const char* end) {
@@ -754,8 +795,9 @@ inline int64_t ParseNumber(const char* start, const char* end) {
return strtoll(start, NULL, R);
}
-url_host_type ParseIPv4Host(url_host* host, const char* input, size_t length) {
- url_host_type type = HOST_TYPE_DOMAIN;
+void URLHost::ParseIPv4Host(const char* input, size_t length, bool* is_ipv4) {
+ CHECK_EQ(type_, HostType::H_FAILED);
+ *is_ipv4 = false;
const char* pointer = input;
const char* mark = input;
const char* end = pointer + length;
@@ -764,19 +806,19 @@ url_host_type ParseIPv4Host(url_host* host, const char* input, size_t length) {
uint64_t numbers[4];
int tooBigNumbers = 0;
if (length == 0)
- goto end;
+ return;
while (pointer <= end) {
const char ch = pointer < end ? pointer[0] : kEOL;
const int remaining = end - pointer - 1;
if (ch == '.' || ch == kEOL) {
if (++parts > 4)
- goto end;
+ return;
if (pointer == mark)
- goto end;
+ return;
int64_t n = ParseNumber(mark, pointer);
if (n < 0)
- goto end;
+ return;
if (n > 255) {
tooBigNumbers++;
@@ -789,6 +831,7 @@ url_host_type ParseIPv4Host(url_host* host, const char* input, size_t length) {
pointer++;
}
CHECK_GT(parts, 0);
+ *is_ipv4 = true;
// If any but the last item in numbers is greater than 255, return failure.
// If the last item in numbers is greater than or equal to
@@ -796,97 +839,81 @@ url_host_type ParseIPv4Host(url_host* host, const char* input, size_t length) {
if (tooBigNumbers > 1 ||
(tooBigNumbers == 1 && numbers[parts - 1] <= 255) ||
numbers[parts - 1] >= pow(256, static_cast<double>(5 - parts))) {
- type = HOST_TYPE_FAILED;
- goto end;
+ return;
}
- type = HOST_TYPE_IPV4;
+ type_ = HostType::H_IPV4;
val = numbers[parts - 1];
for (int n = 0; n < parts - 1; n++) {
double b = 3 - n;
val += numbers[n] * pow(256, b);
}
- host->value.ipv4 = val;
- end:
- host->type = type;
- return type;
+ value_.ipv4 = val;
}
-url_host_type ParseOpaqueHost(url_host* host,
- const char* input,
- size_t length) {
- url_host_type type = HOST_TYPE_OPAQUE;
+void URLHost::ParseOpaqueHost(const char* input, size_t length) {
+ CHECK_EQ(type_, HostType::H_FAILED);
std::string output;
output.reserve(length * 3);
for (size_t i = 0; i < length; i++) {
const char ch = input[i];
if (ch != '%' && IsForbiddenHostCodePoint(ch)) {
- type = HOST_TYPE_FAILED;
- goto end;
+ return;
} else {
AppendOrEscape(&output, ch, C0_CONTROL_ENCODE_SET);
}
}
- host->value.opaque = output;
- end:
- host->type = type;
- return type;
+ SetOpaque(std::move(output));
}
-url_host_type ParseHost(url_host* host,
- const char* input,
+void URLHost::ParseHost(const char* input,
size_t length,
bool is_special,
- bool unicode = false) {
- url_host_type type = HOST_TYPE_FAILED;
+ bool unicode) {
+ CHECK_EQ(type_, HostType::H_FAILED);
const char* pointer = input;
- std::string decoded;
if (length == 0)
- goto end;
+ return;
if (pointer[0] == '[') {
if (pointer[length - 1] != ']')
- goto end;
- return ParseIPv6Host(host, ++pointer, length - 2);
+ return;
+ return ParseIPv6Host(++pointer, length - 2);
}
if (!is_special)
- return ParseOpaqueHost(host, input, length);
+ return ParseOpaqueHost(input, length);
// First, we have to percent decode
- decoded = PercentDecode(input, length);
+ std::string decoded = PercentDecode(input, length);
// Then we have to punycode toASCII
if (!ToASCII(decoded, &decoded))
- goto end;
+ return;
// If any of the following characters are still present, we have to fail
for (size_t n = 0; n < decoded.size(); n++) {
const char ch = decoded[n];
if (IsForbiddenHostCodePoint(ch)) {
- goto end;
+ return;
}
}
// Check to see if it's an IPv4 IP address
- type = ParseIPv4Host(host, decoded.c_str(), decoded.length());
- if (type == HOST_TYPE_IPV4 || type == HOST_TYPE_FAILED)
- goto end;
+ bool is_ipv4;
+ ParseIPv4Host(decoded.c_str(), decoded.length(), &is_ipv4);
+ if (is_ipv4)
+ return;
// If the unicode flag is set, run the result through punycode ToUnicode
if (unicode && !ToUnicode(decoded, &decoded))
- goto end;
+ return;
// It's not an IPv4 or IPv6 address, it must be a domain
- type = HOST_TYPE_DOMAIN;
- host->value.domain = decoded;
-
- end:
- host->type = type;
- return type;
+ SetDomain(std::move(decoded));
}
// Locates the longest sequence of 0 segments in an IPv6 address
@@ -920,59 +947,59 @@ inline T* FindLongestZeroSequence(T* values, size_t len) {
return result;
}
-url_host_type WriteHost(const url_host* host, std::string* dest) {
- dest->clear();
- switch (host->type) {
- case HOST_TYPE_DOMAIN:
- *dest = host->value.domain;
+std::string URLHost::ToString() const {
+ std::string dest;
+ switch (type_) {
+ case HostType::H_DOMAIN:
+ return value_.domain;
+ break;
+ case HostType::H_OPAQUE:
+ return value_.opaque;
break;
- case HOST_TYPE_IPV4: {
- dest->reserve(15);
- uint32_t value = host->value.ipv4;
+ case HostType::H_IPV4: {
+ dest.reserve(15);
+ uint32_t value = value_.ipv4;
for (int n = 0; n < 4; n++) {
char buf[4];
snprintf(buf, sizeof(buf), "%d", value % 256);
- dest->insert(0, buf);
+ dest.insert(0, buf);
if (n < 3)
- dest->insert(0, 1, '.');
+ dest.insert(0, 1, '.');
value /= 256;
}
break;
}
- case HOST_TYPE_IPV6: {
- dest->reserve(41);
- *dest+= '[';
- const uint16_t* start = &host->value.ipv6[0];
+ case HostType::H_IPV6: {
+ dest.reserve(41);
+ dest += '[';
+ const uint16_t* start = &value_.ipv6[0];
const uint16_t* compress_pointer =
FindLongestZeroSequence(start, 8);
bool ignore0 = false;
for (int n = 0; n <= 7; n++) {
- const uint16_t* piece = &host->value.ipv6[n];
+ const uint16_t* piece = &value_.ipv6[n];
if (ignore0 && *piece == 0)
continue;
else if (ignore0)
ignore0 = false;
if (compress_pointer == piece) {
- *dest += n == 0 ? "::" : ":";
+ dest += n == 0 ? "::" : ":";
ignore0 = true;
continue;
}
char buf[5];
snprintf(buf, sizeof(buf), "%x", *piece);
- *dest += buf;
+ dest += buf;
if (n < 7)
- *dest += ':';
+ dest += ':';
}
- *dest += ']';
+ dest += ']';
break;
}
- case HOST_TYPE_OPAQUE:
- *dest = host->value.opaque;
- break;
- case HOST_TYPE_FAILED:
+ case HostType::H_FAILED:
break;
}
- return host->type;
+ return dest;
}
bool ParseHost(const std::string& input,
@@ -983,11 +1010,11 @@ bool ParseHost(const std::string& input,
output->clear();
return true;
}
- url_host host{{""}, HOST_TYPE_DOMAIN};
- ParseHost(&host, input.c_str(), input.length(), is_special, unicode);
- if (host.type == HOST_TYPE_FAILED)
+ URLHost host;
+ host.ParseHost(input.c_str(), input.length(), is_special, unicode);
+ if (host.ParsingFailed())
return false;
- WriteHost(&host, output);
+ *output = host.ToString();
return true;
}
@@ -2043,15 +2070,14 @@ static void DomainToASCII(const FunctionCallbackInfo<Value>& args) {
CHECK(args[0]->IsString());
Utf8Value value(env->isolate(), args[0]);
- url_host host{{""}, HOST_TYPE_DOMAIN};
+ URLHost host;
// Assuming the host is used for a special scheme.
- ParseHost(&host, *value, value.length(), true);
- if (host.type == HOST_TYPE_FAILED) {
+ host.ParseHost(*value, value.length(), true);
+ if (host.ParsingFailed()) {
args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
return;
}
- std::string out;
- WriteHost(&host, &out);
+ std::string out = host.ToString();
args.GetReturnValue().Set(
String::NewFromUtf8(env->isolate(),
out.c_str(),
@@ -2064,15 +2090,14 @@ static void DomainToUnicode(const FunctionCallbackInfo<Value>& args) {
CHECK(args[0]->IsString());
Utf8Value value(env->isolate(), args[0]);
- url_host host{{""}, HOST_TYPE_DOMAIN};
+ URLHost host;
// Assuming the host is used for a special scheme.
- ParseHost(&host, *value, value.length(), true, true);
- if (host.type == HOST_TYPE_FAILED) {
+ host.ParseHost(*value, value.length(), true, true);
+ if (host.ParsingFailed()) {
args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
return;
}
- std::string out;
- WriteHost(&host, &out);
+ std::string out = host.ToString();
args.GetReturnValue().Set(
String::NewFromUtf8(env->isolate(),
out.c_str(),