summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--benchmark/url/new-url-parse.js57
-rw-r--r--lib/internal/url.js629
-rw-r--r--lib/url.js70
-rw-r--r--node.gyp2
-rw-r--r--src/node_i18n.cc12
-rw-r--r--src/node_i18n.h7
-rw-r--r--src/node_url.cc1406
-rw-r--r--src/node_url.h538
-rw-r--r--test/fixtures/url-setter-tests.json1134
-rw-r--r--test/parallel/test-whatwg-url-parsing.js122
-rw-r--r--test/parallel/test-whatwg-url-searchparams.js36
-rw-r--r--test/parallel/test-whatwg-url-setters.js24
12 files changed, 3965 insertions, 72 deletions
diff --git a/benchmark/url/new-url-parse.js b/benchmark/url/new-url-parse.js
new file mode 100644
index 0000000000..ef60e81847
--- /dev/null
+++ b/benchmark/url/new-url-parse.js
@@ -0,0 +1,57 @@
+'use strict';
+const common = require('../common.js');
+const url = require('url');
+const v8 = require('v8');
+
+const bench = common.createBenchmark(main, {
+ type: 'one two three four five'.split(' '),
+ method: ['old', 'new'],
+ n: [25e4]
+});
+
+function useOld(n, input) {
+ // Force-optimize url.parse() so that the benchmark doesn't get
+ // disrupted by the optimizer kicking in halfway through.
+ url.parse(input);
+ v8.setFlagsFromString('--allow_natives_syntax');
+ eval('%OptimizeFunctionOnNextCall(url.parse)');
+
+ bench.start();
+ for (var i = 0; i < n; i += 1)
+ url.parse(input);
+ bench.end(n);
+}
+
+function useNew(n, input) {
+ bench.start();
+ for (var i = 0; i < n; i += 1)
+ new url.URL(input);
+ bench.end(n);
+}
+
+function main(conf) {
+ const type = conf.type;
+ const n = conf.n | 0;
+ const method = conf.method;
+
+ var inputs = {
+ one: 'http://nodejs.org/docs/latest/api/url.html#url_url_format_urlobj',
+ two: 'http://blog.nodejs.org/',
+ three: 'https://encrypted.google.com/search?q=url&q=site:npmjs.org&hl=en',
+ four: 'javascript:alert("node is awesome");',
+ //five: 'some.ran/dom/url.thing?oh=yes#whoo',
+ five: 'https://user:pass@example.com/',
+ };
+ var input = inputs[type] || '';
+
+ switch (method) {
+ case 'old':
+ useOld(n, input);
+ break;
+ case 'new':
+ useNew(n, input);
+ break;
+ default:
+ throw new Error('Unknown method');
+ }
+}
diff --git a/lib/internal/url.js b/lib/internal/url.js
new file mode 100644
index 0000000000..79b9e1cb0f
--- /dev/null
+++ b/lib/internal/url.js
@@ -0,0 +1,629 @@
+'use strict';
+
+function getPunycode() {
+ try {
+ return process.binding('icu');
+ } catch (err) {
+ return require('punycode');
+ }
+}
+const punycode = getPunycode();
+const binding = process.binding('url');
+const context = Symbol('context');
+const cannotBeBase = Symbol('cannot-be-base');
+const special = Symbol('special');
+const searchParams = Symbol('query');
+const querystring = require('querystring');
+
+const kScheme = Symbol('scheme');
+const kHost = Symbol('host');
+const kPort = Symbol('port');
+const kDomain = Symbol('domain');
+
+function StorageObject() {}
+StorageObject.prototype = Object.create(null);
+
+class OpaqueOrigin {
+ toString() {
+ return 'null';
+ }
+
+ get effectiveDomain() {
+ return this;
+ }
+}
+
+class TupleOrigin {
+ constructor(scheme, host, port, domain) {
+ this[kScheme] = scheme;
+ this[kHost] = host;
+ this[kPort] = port;
+ this[kDomain] = domain;
+ }
+
+ get scheme() {
+ return this[kScheme];
+ }
+
+ get host() {
+ return this[kHost];
+ }
+
+ get port() {
+ return this[kPort];
+ }
+
+ get domain() {
+ return this[kDomain];
+ }
+
+ get effectiveDomain() {
+ return this[kDomain] || this[kHost];
+ }
+
+ toString(unicode = false) {
+ var result = this.scheme;
+ result += '://';
+ result += unicode ? URL.domainToUnicode(this.host) : this.host;
+ if (this.port !== undefined && this.port !== null)
+ result += `:${this.port}`;
+ return result;
+ }
+}
+
+class URL {
+ constructor(input, base) {
+ if (base !== undefined && !(base instanceof URL))
+ base = new URL(String(base));
+ input = String(input);
+ const base_context = base ? base[context] : undefined;
+ this[context] = new StorageObject();
+ binding.parse(input.trim(), -1, base_context, undefined,
+ (flags, protocol, username, password,
+ host, port, path, query, fragment) => {
+ if (flags & binding.URL_FLAGS_FAILED)
+ throw new TypeError('Invalid URL');
+ this[context].flags = flags;
+ this[context].scheme = protocol;
+ this[context].username = username;
+ this[context].password = password;
+ this[context].port = port;
+ this[context].path = path;
+ this[context].query = query;
+ this[context].fragment = fragment;
+ this[context].host = host;
+ this[searchParams] = new URLSearchParams(this);
+ });
+ }
+
+ get origin() {
+ return URL.originFor(this).toString(true);
+ }
+
+ get [special]() {
+ return (this[context].flags & binding.URL_FLAGS_SPECIAL) != 0;
+ }
+
+ get [cannotBeBase]() {
+ return (this[context].flags & binding.URL_FLAGS_CANNOT_BE_BASE) != 0;
+ }
+
+ get protocol() {
+ return this[context].scheme;
+ }
+
+ get searchParams() {
+ return this[searchParams];
+ }
+
+ set protocol(scheme) {
+ scheme = String(scheme);
+ if (scheme.length === 0)
+ return;
+ binding.parse(scheme,
+ binding.kSchemeStart,
+ null,
+ this[context],
+ (flags, protocol, username, password,
+ host, port, path, query, fragment) => {
+ if (flags & binding.URL_FLAGS_FAILED)
+ return;
+ const newIsSpecial = (flags & binding.URL_FLAGS_SPECIAL) != 0;
+ if ((this[special] && !newIsSpecial) ||
+ (!this[special] && newIsSpecial) ||
+ (newIsSpecial && !this[special] &&
+ this[context].host === undefined)) {
+ return;
+ }
+ if (newIsSpecial) {
+ this[context].flags |= binding.URL_FLAGS_SPECIAL;
+ } else {
+ this[context].flags &= ~binding.URL_FLAGS_SPECIAL;
+ }
+ if (protocol) {
+ this[context].scheme = protocol;
+ this[context].flags |= binding.URL_FLAGS_HAS_SCHEME;
+ } else {
+ this[context].flags &= ~binding.URL_FLAGS_HAS_SCHEME;
+ }
+ });
+ }
+
+ get username() {
+ return this[context].username || '';
+ }
+
+ set username(username) {
+ username = String(username);
+ if (!this.hostname)
+ return;
+ if (!username) {
+ this[context].username = null;
+ this[context].flags &= ~binding.URL_FLAGS_HAS_USERNAME;
+ return;
+ }
+ this[context].username = binding.encodeAuth(username);
+ this[context].flags |= binding.URL_FLAGS_HAS_USERNAME;
+ }
+
+ get password() {
+ return this[context].password || '';
+ }
+
+ set password(password) {
+ password = String(password);
+ if (!this.hostname)
+ return;
+ if (!password) {
+ this[context].password = null;
+ this[context].flags &= ~binding.URL_FLAGS_HAS_PASSWORD;
+ return;
+ }
+ this[context].password = binding.encodeAuth(password);
+ this[context].flags |= binding.URL_FLAGS_HAS_PASSWORD;
+ }
+
+ get host() {
+ var ret = this[context].host || '';
+ if (this[context].port !== undefined)
+ ret += `:${this[context].port}`;
+ return ret;
+ }
+
+ set host(host) {
+ host = String(host);
+ if (this[cannotBeBase] ||
+ (this[special] && host.length === 0)) {
+ // Cannot set the host if cannot-be-base is set or
+ // scheme is special and host length is zero
+ return;
+ }
+ if (!host) {
+ this[context].host = null;
+ this[context].flags &= ~binding.URL_FLAGS_HAS_HOST;
+ return;
+ }
+ binding.parse(host, binding.kHost, null, this[context],
+ (flags, protocol, username, password,
+ host, port, path, query, fragment) => {
+ if (flags & binding.URL_FLAGS_FAILED)
+ return;
+ if (host) {
+ this[context].host = host;
+ this[context].flags |= binding.URL_FLAGS_HAS_HOST;
+ } else {
+ this[context].flags &= ~binding.URL_FLAGS_HAS_HOST;
+ }
+ if (port !== undefined)
+ this[context].port = port;
+ });
+ }
+
+ get hostname() {
+ return this[context].host || '';
+ }
+
+ set hostname(host) {
+ host = String(host);
+ if (this[cannotBeBase] ||
+ (this[special] && host.length === 0)) {
+ // Cannot set the host if cannot-be-base is set or
+ // scheme is special and host length is zero
+ return;
+ }
+ if (!host) {
+ this[context].host = null;
+ this[context].flags &= ~binding.URL_FLAGS_HAS_HOST;
+ return;
+ }
+ binding.parse(host,
+ binding.kHostname,
+ null,
+ this[context],
+ (flags, protocol, username, password,
+ host, port, path, query, fragment) => {
+ if (flags & binding.URL_FLAGS_FAILED)
+ return;
+ if (host) {
+ this[context].host = host;
+ this[context].flags |= binding.URL_FLAGS_HAS_HOST;
+ } else {
+ this[context].flags &= ~binding.URL_FLAGS_HAS_HOST;
+ }
+ });
+ }
+
+ get port() {
+ const port = this[context].port;
+ return port === undefined ? '' : String(port);
+ }
+
+ set port(port) {
+ if (!this[context].host || this[cannotBeBase] || this.protocol === 'file:')
+ return;
+ port = String(port);
+ if (port === '') {
+ // Currently, if port number is empty, left unchanged.
+ // TODO(jasnell): This might be changing in the spec
+ return;
+ }
+ binding.parse(port, binding.kPort, null, this[context],
+ (flags, protocol, username, password,
+ host, port, path, query, fragment) => {
+ if (flags & binding.URL_FLAGS_FAILED)
+ return;
+ this[context].port = port;
+ });
+ }
+
+ get pathname() {
+ if (this[cannotBeBase])
+ return this[context].path[0];
+ return this[context].path !== undefined ?
+ `/${this[context].path.join('/')}` : '';
+ }
+
+ set pathname(path) {
+ if (this[cannotBeBase])
+ return;
+ path = String(path);
+ binding.parse(path,
+ binding.kPathStart,
+ null,
+ this[context],
+ (flags, protocol, username, password,
+ host, port, path, query, fragment) => {
+ if (flags & binding.URL_FLAGS_FAILED)
+ return;
+ if (path) {
+ this[context].path = path;
+ this[context].flags |= binding.URL_FLAGS_HAS_PATH;
+ } else {
+ this[context].flags &= ~binding.URL_FLAGS_HAS_PATH;
+ }
+ });
+ }
+
+ get search() {
+ return !this[context].query ? '' : `?${this[context].query}`;
+ }
+
+ set search(search) {
+ update(this, search);
+ this[searchParams][searchParams] = querystring.parse(this.search);
+ }
+
+ get hash() {
+ return !this[context].fragment ? '' : `#${this[context].fragment}`;
+ }
+
+ set hash(hash) {
+ hash = String(hash);
+ if (this.protocol === 'javascript:')
+ return;
+ if (!hash) {
+ this[context].fragment = null;
+ this[context].flags &= ~binding.URL_FLAGS_HAS_FRAGMENT;
+ return;
+ }
+ if (hash[0] === '#') hash = hash.slice(1);
+ this[context].fragment = '';
+ binding.parse(hash,
+ binding.kFragment,
+ null,
+ this[context],
+ (flags, protocol, username, password,
+ host, port, path, query, fragment) => {
+ if (flags & binding.URL_FLAGS_FAILED)
+ return;
+ if (fragment) {
+ this[context].fragment = fragment;
+ this[context].flags |= binding.URL_FLAGS_HAS_FRAGMENT;
+ } else {
+ this[context].flags &= ~binding.URL_FLAGS_HAS_FRAGMENT;
+ }
+ });
+ }
+
+ get href() {
+ return this.toString();
+ }
+
+ toString(options) {
+ options = options || {};
+ const fragment =
+ options.fragment !== undefined ?
+ !!options.fragment : true;
+ const unicode = !!options.unicode;
+ var ret;
+ if (this.protocol)
+ ret = this.protocol;
+ if (this[context].host !== undefined) {
+ ret += '//';
+ const has_username = typeof this[context].username === 'string';
+ const has_password = typeof this[context].password === 'string';
+ if (has_username || has_password) {
+ if (has_username)
+ ret += this[context].username;
+ if (has_password)
+ ret += `:${this[context].password}`;
+ ret += '@';
+ }
+ if (unicode) {
+ ret += punycode.toUnicode(this.hostname);
+ if (this.port !== undefined)
+ ret += `:${this.port}`;
+ } else {
+ ret += this.host;
+ }
+ } else if (this[context].scheme === 'file:') {
+ ret += '//';
+ }
+ if (this.pathname)
+ ret += this.pathname;
+ if (typeof this[context].query === 'string')
+ ret += `?${this[context].query}`;
+ if (fragment & typeof this[context].fragment === 'string')
+ ret += `#${this[context].fragment}`;
+ return ret;
+ }
+
+ inspect(depth, opts) {
+ var ret = 'URL {\n';
+ ret += ` href: ${this.href}\n`;
+ if (this[context].scheme !== undefined)
+ ret += ` protocol: ${this.protocol}\n`;
+ if (this[context].username !== undefined)
+ ret += ` username: ${this.username}\n`;
+ if (this[context].password !== undefined) {
+ const pwd = opts.showHidden ? this[context].password : '--------';
+ ret += ` password: ${pwd}\n`;
+ }
+ if (this[context].host !== undefined)
+ ret += ` hostname: ${this.hostname}\n`;
+ if (this[context].port !== undefined)
+ ret += ` port: ${this.port}\n`;
+ if (this[context].path !== undefined)
+ ret += ` pathname: ${this.pathname}\n`;
+ if (this[context].query !== undefined)
+ ret += ` search: ${this.search}\n`;
+ if (this[context].fragment !== undefined)
+ ret += ` hash: ${this.hash}\n`;
+ if (opts.showHidden) {
+ ret += ` cannot-be-base: ${this[cannotBeBase]}\n`;
+ ret += ` special: ${this[special]}\n;`;
+ }
+ ret += '}';
+ return ret;
+ }
+}
+
+var hexTable = new Array(256);
+for (var i = 0; i < 256; ++i)
+ hexTable[i] = '%' + ((i < 16 ? '0' : '') + i.toString(16)).toUpperCase();
+function encodeAuth(str) {
+ // faster encodeURIComponent alternative for encoding auth uri components
+ var out = '';
+ var lastPos = 0;
+ for (var i = 0; i < str.length; ++i) {
+ var c = str.charCodeAt(i);
+
+ // These characters do not need escaping:
+ // ! - . _ ~
+ // ' ( ) * :
+ // digits
+ // alpha (uppercase)
+ // alpha (lowercase)
+ if (c === 0x21 || c === 0x2D || c === 0x2E || c === 0x5F || c === 0x7E ||
+ (c >= 0x27 && c <= 0x2A) ||
+ (c >= 0x30 && c <= 0x3A) ||
+ (c >= 0x41 && c <= 0x5A) ||
+ (c >= 0x61 && c <= 0x7A)) {
+ continue;
+ }
+
+ if (i - lastPos > 0)
+ out += str.slice(lastPos, i);
+
+ lastPos = i + 1;
+
+ // Other ASCII characters
+ if (c < 0x80) {
+ out += hexTable[c];
+ continue;
+ }
+
+ // Multi-byte characters ...
+ if (c < 0x800) {
+ out += hexTable[0xC0 | (c >> 6)] + hexTable[0x80 | (c & 0x3F)];
+ continue;
+ }
+ if (c < 0xD800 || c >= 0xE000) {
+ out += hexTable[0xE0 | (c >> 12)] +
+ hexTable[0x80 | ((c >> 6) & 0x3F)] +
+ hexTable[0x80 | (c & 0x3F)];
+ continue;
+ }
+ // Surrogate pair
+ ++i;
+ var c2;
+ if (i < str.length)
+ c2 = str.charCodeAt(i) & 0x3FF;
+ else
+ c2 = 0;
+ c = 0x10000 + (((c & 0x3FF) << 10) | c2);
+ out += hexTable[0xF0 | (c >> 18)] +
+ hexTable[0x80 | ((c >> 12) & 0x3F)] +
+ hexTable[0x80 | ((c >> 6) & 0x3F)] +
+ hexTable[0x80 | (c & 0x3F)];
+ }
+ if (lastPos === 0)
+ return str;
+ if (lastPos < str.length)
+ return out + str.slice(lastPos);
+ return out;
+}
+
+function update(url, search) {
+ search = String(search);
+ if (!search) {
+ url[context].query = null;
+ url[context].flags &= ~binding.URL_FLAGS_HAS_QUERY;
+ return;
+ }
+ if (search[0] === '?') search = search.slice(1);
+ url[context].query = '';
+ binding.parse(search,
+ binding.kQuery,
+ null,
+ url[context],
+ (flags, protocol, username, password,
+ host, port, path, query, fragment) => {
+ if (flags & binding.URL_FLAGS_FAILED)
+ return;
+ if (query) {
+ url[context].query = query;
+ url[context].flags |= binding.URL_FLAGS_HAS_QUERY;
+ } else {
+ url[context].flags &= ~binding.URL_FLAGS_HAS_QUERY;
+ }
+ });
+}
+
+class URLSearchParams {
+ constructor(url) {
+ this[context] = url;
+ this[searchParams] = querystring.parse(url[context].search || '');
+ }
+
+ append(name, value) {
+ const obj = this[searchParams];
+ name = String(name);
+ value = String(value);
+ var existing = obj[name];
+ if (!existing) {
+ obj[name] = value;
+ } else if (Array.isArray(existing)) {
+ existing.push(value);
+ } else {
+ obj[name] = [existing, value];
+ }
+ update(this[context], querystring.stringify(obj));
+ }
+
+ delete(name) {
+ const obj = this[searchParams];
+ name = String(name);
+ delete obj[name];
+ update(this[context], querystring.stringify(obj));
+ }
+
+ set(name, value) {
+ const obj = this[searchParams];
+ name = String(name);
+ value = String(value);
+ obj[name] = value;
+ update(this[context], querystring.stringify(obj));
+ }
+
+ get(name) {
+ const obj = this[searchParams];
+ name = String(name);
+ var value = obj[name];
+ return Array.isArray(value) ? value[0] : value;
+ }
+
+ getAll(name) {
+ const obj = this[searchParams];
+ name = String(name);
+ var value = obj[name];
+ return value === undefined ? [] : Array.isArray(value) ? value : [value];
+ }
+
+ has(name) {
+ const obj = this[searchParams];
+ name = String(name);
+ return name in obj;
+ }
+
+ *[Symbol.iterator]() {
+ const obj = this[searchParams];
+ for (const name in obj) {
+ const value = obj[name];
+ if (Array.isArray(value)) {
+ for (const item of value)
+ yield [name, item];
+ } else {
+ yield [name, value];
+ }
+ }
+ }
+
+ toString() {
+ return querystring.stringify(this[searchParams]);
+ }
+}
+
+URL.originFor = function(url) {
+ if (!(url instanceof URL))
+ url = new URL(url);
+ var origin;
+ const protocol = url.protocol;
+ switch (protocol) {
+ case 'blob:':
+ if (url[context].path && url[context].path.length > 0) {
+ try {
+ return (new URL(url[context].path[0])).origin;
+ } catch (err) {
+ // fall through... do nothing
+ }
+ }
+ origin = new OpaqueOrigin();
+ break;
+ case 'ftp:':
+ case 'gopher:':
+ case 'http:':
+ case 'https:':
+ case 'ws:':
+ case 'wss:':
+ case 'file':
+ origin = new TupleOrigin(protocol.slice(0, -1),
+ url[context].host,
+ url[context].port,
+ null);
+ break;
+ default:
+ origin = new OpaqueOrigin();
+ }
+ return origin;
+};
+
+URL.domainToASCII = function(domain) {
+ return binding.domainToASCII(String(domain));
+};
+URL.domainToUnicode = function(domain) {
+ return binding.domainToUnicode(String(domain));
+};
+
+exports.URL = URL;
+exports.encodeAuth = encodeAuth;
diff --git a/lib/url.js b/lib/url.js
index d935726872..201ebfedcc 100644
--- a/lib/url.js
+++ b/lib/url.js
@@ -10,10 +10,14 @@ function importPunycode() {
const { toASCII } = importPunycode();
+const internalUrl = require('internal/url');
+const encodeAuth = internalUrl.encodeAuth;
exports.parse = urlParse;
exports.resolve = urlResolve;
exports.resolveObject = urlResolveObject;
exports.format = urlFormat;
+exports.URL = internalUrl.URL;
+
exports.Url = Url;
@@ -942,69 +946,3 @@ function spliceOne(list, index) {
list[i] = list[k];
list.pop();
}
-
-var hexTable = new Array(256);
-for (var i = 0; i < 256; ++i)
- hexTable[i] = '%' + ((i < 16 ? '0' : '') + i.toString(16)).toUpperCase();
-function encodeAuth(str) {
- // faster encodeURIComponent alternative for encoding auth uri components
- var out = '';
- var lastPos = 0;
- for (var i = 0; i < str.length; ++i) {
- var c = str.charCodeAt(i);
-
- // These characters do not need escaping:
- // ! - . _ ~
- // ' ( ) * :
- // digits
- // alpha (uppercase)
- // alpha (lowercase)
- if (c === 0x21 || c === 0x2D || c === 0x2E || c === 0x5F || c === 0x7E ||
- (c >= 0x27 && c <= 0x2A) ||
- (c >= 0x30 && c <= 0x3A) ||
- (c >= 0x41 && c <= 0x5A) ||
- (c >= 0x61 && c <= 0x7A)) {
- continue;
- }
-
- if (i - lastPos > 0)
- out += str.slice(lastPos, i);
-
- lastPos = i + 1;
-
- // Other ASCII characters
- if (c < 0x80) {
- out += hexTable[c];
- continue;
- }
-
- // Multi-byte characters ...
- if (c < 0x800) {
- out += hexTable[0xC0 | (c >> 6)] + hexTable[0x80 | (c & 0x3F)];
- continue;
- }
- if (c < 0xD800 || c >= 0xE000) {
- out += hexTable[0xE0 | (c >> 12)] +
- hexTable[0x80 | ((c >> 6) & 0x3F)] +
- hexTable[0x80 | (c & 0x3F)];
- continue;
- }
- // Surrogate pair
- ++i;
- var c2;
- if (i < str.length)
- c2 = str.charCodeAt(i) & 0x3FF;
- else
- c2 = 0;
- c = 0x10000 + (((c & 0x3FF) << 10) | c2);
- out += hexTable[0xF0 | (c >> 18)] +
- hexTable[0x80 | ((c >> 12) & 0x3F)] +
- hexTable[0x80 | ((c >> 6) & 0x3F)] +
- hexTable[0x80 | (c & 0x3F)];
- }
- if (lastPos === 0)
- return str;
- if (lastPos < str.length)
- return out + str.slice(lastPos);
- return out;
-}
diff --git a/node.gyp b/node.gyp
index a998d82926..fa98547e09 100644
--- a/node.gyp
+++ b/node.gyp
@@ -89,6 +89,7 @@
'lib/internal/readline.js',
'lib/internal/repl.js',
'lib/internal/socket_list.js',
+ 'lib/internal/url.js',
'lib/internal/util.js',
'lib/internal/v8_prof_polyfill.js',
'lib/internal/v8_prof_processor.js',
@@ -158,6 +159,7 @@
'src/node_main.cc',
'src/node_os.cc',
'src/node_revert.cc',
+ 'src/node_url.cc',
'src/node_util.cc',
'src/node_v8.cc',
'src/node_stat_watcher.cc',
diff --git a/src/node_i18n.cc b/src/node_i18n.cc
index 0f3b9b76e6..f89ae40a55 100644
--- a/src/node_i18n.cc
+++ b/src/node_i18n.cc
@@ -79,9 +79,9 @@ bool InitializeICUDirectory(const char* icu_data_path) {
}
}
-static int32_t ToUnicode(MaybeStackBuffer<char>* buf,
- const char* input,
- size_t length) {
+int32_t ToUnicode(MaybeStackBuffer<char>* buf,
+ const char* input,
+ size_t length) {
UErrorCode status = U_ZERO_ERROR;
uint32_t options = UIDNA_DEFAULT;
options |= UIDNA_NONTRANSITIONAL_TO_UNICODE;
@@ -113,9 +113,9 @@ static int32_t ToUnicode(MaybeStackBuffer<char>* buf,
return len;
}
-static int32_t ToASCII(MaybeStackBuffer<char>* buf,
- const char* input,
- size_t length) {
+int32_t ToASCII(MaybeStackBuffer<char>* buf,
+ const char* input,
+ size_t length) {
UErrorCode status = U_ZERO_ERROR;
uint32_t options = UIDNA_DEFAULT;
options |= UIDNA_NONTRANSITIONAL_TO_ASCII;
diff --git a/src/node_i18n.h b/src/node_i18n.h
index 31ad18fa47..21a579526d 100644
--- a/src/node_i18n.h
+++ b/src/node_i18n.h
@@ -15,6 +15,13 @@ namespace i18n {
bool InitializeICUDirectory(const char* icu_data_path);
+int32_t ToASCII(MaybeStackBuffer<char>* buf,
+ const char* input,
+ size_t length);
+int32_t ToUnicode(MaybeStackBuffer<char>* buf,
+ const char* input,
+ size_t length);
+
} // namespace i18n
} // namespace node
diff --git a/src/node_url.cc b/src/node_url.cc
new file mode 100644
index 0000000000..f5b1a143f1
--- /dev/null
+++ b/src/node_url.cc
@@ -0,0 +1,1406 @@
+#include "node_url.h"
+#include "node.h"
+#include "node_internals.h"
+#include "env.h"
+#include "env-inl.h"
+#include "util.h"
+#include "util-inl.h"
+#include "v8.h"
+#include "base-object.h"
+#include "base-object-inl.h"
+#include "node_i18n.h"
+
+#include <string>
+#include <vector>
+#include <stdio.h>
+#include <cmath>
+
+#if defined(NODE_HAVE_I18N_SUPPORT)
+#include <unicode/utf8.h>
+#include <unicode/utf.h>
+#endif
+
+namespace node {
+
+using v8::Array;
+using v8::Context;
+using v8::Function;
+using v8::FunctionCallbackInfo;
+using v8::HandleScope;
+using v8::Integer;
+using v8::Isolate;
+using v8::Local;
+using v8::Null;
+using v8::Object;
+using v8::String;
+using v8::Undefined;
+using v8::Value;
+
+#define GET(env, obj, name) \
+ obj->Get(env->context(), \
+ OneByteString(env->isolate(), name)).ToLocalChecked()
+
+#define GET_AND_SET(env, obj, name, data, flag) \
+ { \
+ Local<Value> val = GET(env, obj, #name); \
+ if (val->IsString()) { \
+ Utf8Value value(env->isolate(), val.As<String>()); \
+ data->name = *value; \
+ data->flags |= flag; \
+ } \
+ }
+
+#define CANNOT_BE_BASE() url.flags |= URL_FLAGS_CANNOT_BE_BASE;
+#define INVALID_PARSE_STATE() url.flags |= URL_FLAGS_INVALID_PARSE_STATE;
+#define SPECIAL() \
+ { \
+ url.flags |= URL_FLAGS_SPECIAL; \
+ special = true; \
+ }
+#define TERMINATE() \
+ { \
+ url.flags |= URL_FLAGS_TERMINATED; \
+ goto done; \
+ }
+#define FAILED() \
+ { \
+ url.flags |= URL_FLAGS_FAILED; \
+ goto done; \
+ }
+
+#define CHECK_FLAG(flags, name) (flags & URL_FLAGS_##name) /* NOLINT */
+
+#define IS_CANNOT_BE_BASE(flags) CHECK_FLAG(flags, CANNOT_BE_BASE)
+#define IS_FAILED(flags) CHECK_FLAG(flags, FAILED)
+
+#define DOES_HAVE_SCHEME(url) CHECK_FLAG(url.flags, HAS_SCHEME)
+#define DOES_HAVE_USERNAME(url) CHECK_FLAG(url.flags, HAS_USERNAME)
+#define DOES_HAVE_PASSWORD(url) CHECK_FLAG(url.flags, HAS_PASSWORD)
+#define DOES_HAVE_HOST(url) CHECK_FLAG(url.flags, HAS_HOST)
+#define DOES_HAVE_PATH(url) CHECK_FLAG(url.flags, HAS_PATH)
+#define DOES_HAVE_QUERY(url) CHECK_FLAG(url.flags, HAS_QUERY)
+#define DOES_HAVE_FRAGMENT(url) CHECK_FLAG(url.flags, HAS_FRAGMENT)
+
+#define SET_HAVE_SCHEME() url.flags |= URL_FLAGS_HAS_SCHEME;
+#define SET_HAVE_USERNAME() url.flags |= URL_FLAGS_HAS_USERNAME;
+#define SET_HAVE_PASSWORD() url.flags |= URL_FLAGS_HAS_PASSWORD;
+#define SET_HAVE_HOST() url.flags |= URL_FLAGS_HAS_HOST;
+#define SET_HAVE_PATH() url.flags |= URL_FLAGS_HAS_PATH;
+#define SET_HAVE_QUERY() url.flags |= URL_FLAGS_HAS_QUERY;
+#define SET_HAVE_FRAGMENT() url.flags |= URL_FLAGS_HAS_FRAGMENT;
+
+#define UTF8STRING(isolate, str) \
+ String::NewFromUtf8(isolate, str.c_str(), v8::NewStringType::kNormal) \
+ .ToLocalChecked()
+
+namespace url {
+
+#if defined(NODE_HAVE_I18N_SUPPORT)
+ static int ToUnicode(std::string* input, std::string* output) {
+ MaybeStackBuffer<char> buf;
+ if (i18n::ToUnicode(&buf, input->c_str(), input->length()) < 0)
+ return -1;
+ output->assign(*buf, buf.length());
+ return 0;
+ }
+
+ static int ToASCII(std::string* input, std::string* output) {
+ MaybeStackBuffer<char> buf;
+ if (i18n::ToASCII(&buf, input->c_str(), input->length()) < 0)
+ return -1;
+ output->assign(*buf, buf.length());
+ return 0;
+ }
+
+ // Unfortunately there's not really a better way to do this.
+ // Iterate through each encoded codepoint and verify that
+ // it is a valid unicode codepoint.
+ static int IsValidUTF8(std::string* input) {
+ const char* p = input->c_str();
+ int32_t len = input->length();
+ for (int32_t i = 0; i < len;) {
+ UChar32 c;
+ U8_NEXT_UNSAFE(p, i, c);
+ if (!U_IS_UNICODE_CHAR(c))
+ return -1;
+ }
+ return 0;
+ }
+#else
+ // Intentional non-ops if ICU is not present.
+ static int ToUnicode(std::string* input, std::string* output) {
+ output->reserve(input.length());
+ *output = input->c_str();
+ }
+
+ static int ToASCII(std::string* input, std::string* output) {
+ output->reserve(input.length());
+ *output = input->c_str();
+ }
+
+ static int IsValidUTF8(std::string* input) {
+ return 0;
+ }
+#endif
+
+ static url_host_type ParseIPv6Host(url_host* host,
+ const char* input,
+ size_t length) {
+ url_host_type type = HOST_TYPE_FAILED;
+ for (unsigned n = 0; n < 8; n++)
+ host->value.ipv6[n] = 0;
+ uint16_t* piece_pointer = &host->value.ipv6[0];
+ uint16_t* last_piece = piece_pointer + 8;
+ uint16_t* compress_pointer = nullptr;
+ const char* pointer = input;
+ const char* end = pointer + length;
+ unsigned value, len, swaps, dots;
+ char ch = pointer < end ? pointer[0] : kEOL;
+ if (ch == ':') {
+ if (length < 2 || pointer[1] != ':')
+ goto end;
+ pointer += 2;
+ ch = pointer < end ? pointer[0] : kEOL;
+ piece_pointer++;
+ compress_pointer = piece_pointer;
+ }
+ while (ch != kEOL) {
+ if (piece_pointer > last_piece)
+ goto end;
+ if (ch == ':') {
+ if (compress_pointer != nullptr)
+ goto end;
+ pointer++;
+ ch = pointer < end ? pointer[0] : kEOL;
+ piece_pointer++;
+ compress_pointer = piece_pointer;
+ continue;
+ }
+ value = 0;
+ len = 0;
+ while (len < 4 && ASCII_HEX_DIGIT(ch)) {
+ value = value * 0x10 + hex2bin(ch);
+ pointer++;
+ ch = pointer < end ? pointer[0] : kEOL;
+ len++;
+ }
+ switch (ch) {
+ case '.':
+ if (len == 0)
+ goto end;
+ pointer -= len;
+ ch = pointer < end ? pointer[0] : kEOL;
+ if (piece_pointer > last_piece - 2)
+ goto end;
+ dots = 0;
+ while (ch != kEOL) {
+ value = 0xffffffff;
+ if (!ASCII_DIGIT(ch))
+ goto end;
+ while (ASCII_DIGIT(ch)) {
+ unsigned number = ch - '0';
+ if (value == 0xffffffff) {
+ value = number;
+ } else if (value == 0) {
+ goto end;
+ } else {
+ value = value * 10 + number;
+ }
+ if (value > 255)
+ goto end;
+ pointer++;
+ ch = pointer < end ? pointer[0] : kEOL;
+ }
+ if (dots < 3 && ch != '.')
+ goto end;
+ *piece_pointer = *piece_pointer * 0x100 + value;
+ if (dots & 0x1)
+ piece_pointer++;
+ if (ch != kEOL) {
+ pointer++;
+ ch = pointer < end ? pointer[0] : kEOL;
+ }
+ if (dots == 3 && ch != kEOL)
+ goto end;
+ dots++;
+ }
+ continue;
+ case ':':
+ pointer++;
+ ch = pointer < end ? pointer[0] : kEOL;
+ if (ch == kEOL)
+ goto end;
+ break;
+ case kEOL:
+ break;
+ default:
+ goto end;
+ }
+ *piece_pointer = value;
+ piece_pointer++;
+ }
+
+ if (compress_pointer != nullptr) {
+ swaps = piece_pointer - compress_pointer;
+ piece_pointer = last_piece - 1;
+ while (piece_pointer != &host->value.ipv6[0] && swaps > 0) {
+ uint16_t temp = *piece_pointer;
+ uint16_t* swap_piece = compress_pointer + swaps - 1;
+ *piece_pointer = *swap_piece;
+ *swap_piece = temp;
+ piece_pointer--;
+ swaps--;
+ }
+ } else if (compress_pointer == nullptr &&
+ piece_pointer != last_piece) {
+ goto end;
+ }
+ type = HOST_TYPE_IPV6;
+ end:
+ host->type = type;
+ return type;
+ }
+
+ static inline int ParseNumber(const char* start, const char* end) {
+ unsigned R = 10;
+ if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') {
+ start += 2;
+ R = 16;
+ }
+ if (end - start == 0) {
+ return 0;
+ } else if (R == 10 && end - start > 1 && start[0] == '0') {
+ start++;
+ R = 8;
+ }
+ const char* p = start;
+
+ while (p < end) {
+ const char ch = p[0];
+ switch (R) {
+ case 8:
+ if (ch < '0' || ch > '7')
+ return -1;
+ break;
+ case 10:
+ if (!ASCII_DIGIT(ch))
+ return -1;
+ break;
+ case 16:
+ if (!ASCII_HEX_DIGIT(ch))
+ return -1;
+ break;
+ }
+ p++;
+ }
+ return strtol(start, NULL, R);
+ }
+
+ static url_host_type ParseIPv4Host(url_host* host,
+ const char* input,
+ size_t length) {
+ url_host_type type = HOST_TYPE_DOMAIN;
+ const char* pointer = input;
+ const char* mark = input;
+ const char* end = pointer + length;
+ int parts = 0;
+ uint32_t val = 0;
+ unsigned numbers[4];
+ if (length == 0)
+ goto end;
+
+ while (pointer <= end) {
+ const char ch = pointer < end ? pointer[0] : kEOL;
+ const int remaining = end - pointer - 1;
+ if (ch == '.' || ch == kEOL) {
+ if (++parts > 4 || pointer - mark == 0)
+ break;
+ int n = ParseNumber(mark, pointer);
+ if (n < 0) {
+ type = HOST_TYPE_DOMAIN;
+ goto end;
+ }
+ if (pointer - mark == 10) {
+ numbers[parts - 1] = n;
+ break;
+ }
+ if (n > 255) {
+ type = HOST_TYPE_FAILED;
+ goto end;
+ }
+ numbers[parts - 1] = n;
+ mark = pointer + 1;
+ if (ch == '.' && remaining == 0)
+ break;
+ }
+ pointer++;
+ }
+
+ type = HOST_TYPE_IPV4;
+ if (parts > 0) {
+ val = numbers[parts - 1];
+ for (int n = 0; n < parts - 1; n++) {
+ double b = 3-n;
+ val += numbers[n] * pow(256, b);
+ }
+ }
+
+ host->value.ipv4 = val;
+ end:
+ host->type = type;
+ return type;
+ }
+
+ static url_host_type ParseHost(url_host* host,
+ const char* input,
+ size_t length,
+ bool unicode = false) {
+ url_host_type type = HOST_TYPE_FAILED;
+ const char* pointer = input;
+ std::string decoded;
+
+ if (length == 0)
+ goto end;
+
+ if (pointer[0] == '[') {
+ if (pointer[length - 1] != ']')
+ goto end;
+ return ParseIPv6Host(host, ++pointer, length - 2);
+ }
+
+ // First, we have to percent decode
+ if (PercentDecode(input, length, &decoded) < 0)
+ goto end;
+
+ // If there are any invalid UTF8 byte sequences, we have to fail.
+ // Unfortunately this means iterating through the string and checking
+ // each decoded codepoint.
+ if (IsValidUTF8(&decoded) < 0)
+ goto end;
+
+ // Then we have to punycode toASCII
+ if (ToASCII(&decoded, &decoded) < 0)
+ goto end;
+
+ // If any of the following characters are still present, we have to fail
+ for (size_t n = 0; n < decoded.size(); n++) {
+ const char ch = decoded[n];
+ if (ch == 0x00 || ch == 0x09 || ch == 0x0a || ch == 0x0d ||
+ ch == 0x20 || ch == '#' || ch == '%' || ch == '/' ||
+ ch == '?' || ch == '@' || ch == '[' || ch == '\\' ||
+ ch == ']') {
+ goto end;
+ }
+ }
+
+ // Check to see if it's an IPv4 IP address
+ type = ParseIPv4Host(host, decoded.c_str(), decoded.length());
+ if (type == HOST_TYPE_IPV4 || type == HOST_TYPE_FAILED)
+ goto end;
+
+ // If the unicode flag is set, run the result through punycode ToUnicode
+ if (unicode && ToUnicode(&decoded, &decoded) < 0)
+ goto end;
+
+ // It's not an IPv4 or IPv6 address, it must be a domain
+ type = HOST_TYPE_DOMAIN;
+ host->value.domain = decoded;
+
+ end:
+ host->type = type;
+ return type;
+ }
+
+ // Locates the longest sequence of 0 segments in an IPv6 address
+ // in order to use the :: compression when serializing
+ static inline uint16_t* FindLongestZeroSequence(uint16_t* values,
+ size_t len) {
+ uint16_t* start = values;
+ uint16_t* end = start + len;
+ uint16_t* result = nullptr;
+
+ uint16_t* current = nullptr;
+ unsigned counter = 0, longest = 1;
+
+ while (start < end) {
+ if (*start == 0) {
+ if (current == nullptr)
+ current = start;
+ counter++;
+ } else {
+ if (counter > longest) {
+ longest = counter;
+ result = current;
+ }
+ counter = 0;
+ current = nullptr;
+ }
+ start++;
+ }
+ if (counter > longest)
+ result = current;
+ return result;
+ }
+
+ static url_host_type WriteHost(url_host* host, std::string* dest) {
+ dest->clear();
+ switch (host->type) {
+ case HOST_TYPE_DOMAIN:
+ *dest = host->value.domain;
+ break;
+ case HOST_TYPE_IPV4: {
+ dest->reserve(15);
+ uint32_t value = host->value.ipv4;
+ for (int n = 0; n < 4; n++) {
+ char buf[4];
+ char* buffer = buf;
+ snprintf(buffer, sizeof(buf), "%d", value % 256);
+ dest->insert(0, buf);
+ if (n < 3)
+ dest->insert(0, 1, '.');
+ value /= 256;
+ }
+ break;
+ }
+ case HOST_TYPE_IPV6: {
+ dest->reserve(41);
+ *dest+= '[';
+ uint16_t* start = &host->value.ipv6[0];
+ uint16_t* compress_pointer =
+ FindLongestZeroSequence(start, 8);
+ for (int n = 0; n <= 7; n++) {
+ uint16_t* piece = &host->value.ipv6[n];
+ if (compress_pointer == piece) {
+ *dest += n == 0 ? "::" : ":";
+ while (*piece == 0 && n < 8) {
+ n++;
+ piece = &host->value.ipv6[n];
+ }
+ if (n == 8)
+ break;
+ }
+ char buf[5];
+ char* buffer = buf;
+ snprintf(buffer, sizeof(buf), "%x", *piece);
+ *dest += buf;
+ if (n < 7)
+ *dest += ':';
+ }
+ *dest += ']';
+ break;
+ }
+ case HOST_TYPE_FAILED:
+ break;
+ }
+ return host->type;
+ }
+
+ static int ParseHost(std::string* input,
+ std::string* output,
+ bool unicode = false) {
+ if (input->length() == 0)
+ return 0;
+ url_host host{{""}, HOST_TYPE_DOMAIN};
+ ParseHost(&host, input->c_str(), input->length(), unicode);
+ if (host.type == HOST_TYPE_FAILED)
+ return -1;
+ WriteHost(&host, output);
+ return 0;
+ }
+
+ static inline void Copy(Isolate* isolate,
+ Local<Array> ary,
+ std::vector<std::string>* vec) {
+ const int32_t len = ary->Length();
+ if (len == 0)
+ return; // nothing to copy
+ vec->reserve(len);
+ for (int32_t n = 0; n < len; n++) {
+ Local<Value> val = ary->Get(n);
+ if (val->IsString()) {
+ Utf8Value value(isolate, val.As<String>());
+ vec->push_back(std::string(*value, value.length()));
+ }
+ }
+ }
+
+ static inline Local<Array> Copy(Isolate* isolate,
+ std::vector<std::string> vec) {
+ Local<Array> ary = Array::New(isolate, vec.size());
+ for (size_t n = 0; n < vec.size(); n++)
+ ary->Set(n, UTF8STRING(isolate, vec[n]));
+ return ary;
+ }
+
+ static inline void HarvestBase(Environment* env,
+ struct url_data* base,
+ Local<Object> base_obj) {
+ Local<Value> flags = GET(env, base_obj, "flags");
+ if (flags->IsInt32())
+ base->flags = flags->Int32Value();
+
+ GET_AND_SET(env, base_obj, scheme, base, URL_FLAGS_HAS_SCHEME);
+ GET_AND_SET(env, base_obj, username, base, URL_FLAGS_HAS_USERNAME);
+ GET_AND_SET(env, base_obj, password, base, URL_FLAGS_HAS_PASSWORD);
+ GET_AND_SET(env, base_obj, host, base, URL_FLAGS_HAS_HOST);
+ GET_AND_SET(env, base_obj, query, base, URL_FLAGS_HAS_QUERY);
+ GET_AND_SET(env, base_obj, fragment, base, URL_FLAGS_HAS_FRAGMENT);
+ Local<Value> port = GET(env, base_obj, "port");
+ if (port->IsInt32())
+ base->port = port->Int32Value();
+ Local<Value> path = GET(env, base_obj, "path");
+ if (path->IsArray()) {
+ base->flags |= URL_FLAGS_HAS_PATH;
+ Copy(env->isolate(), path.As<Array>(), &(base->path));
+ }
+ }
+
+ static inline void HarvestContext(Environment* env,
+ struct url_data* context,
+ Local<Object> context_obj) {
+ Local<Value> flags = GET(env, context_obj, "flags");
+ if (flags->IsInt32()) {
+ int32_t _flags = flags->Int32Value();
+ if (_flags & URL_FLAGS_SPECIAL)
+ context->flags |= URL_FLAGS_SPECIAL;
+ if (_flags & URL_FLAGS_CANNOT_BE_BASE)
+ context->flags |= URL_FLAGS_CANNOT_BE_BASE;
+ }
+ Local<Value> scheme = GET(env, context_obj, "scheme");
+ if (scheme->IsString()) {
+ Utf8Value value(env->isolate(), scheme);
+ context->scheme.assign(*value, value.length());
+ }
+ Local<Value> port = GET(env, context_obj, "port");
+ if (port->IsInt32())
+ context->port = port->Int32Value();
+ }
+
+ // Single dot segment can be ".", "%2e", or "%2E"
+ static inline bool IsSingleDotSegment(std::string str) {
+ switch (str.size()) {
+ case 1:
+ return str == ".";
+ case 3:
+ return str[0] == '%' &&
+ str[1] == '2' &&
+ TO_LOWER(str[2]) == 'e';
+ default:
+ return false;
+ }
+ }
+
+ // Double dot segment can be:
+ // "..", ".%2e", ".%2E", "%2e.", "%2E.",
+ // "%2e%2e", "%2E%2E", "%2e%2E", or "%2E%2e"
+ static inline bool IsDoubleDotSegment(std::string str) {
+ switch (str.size()) {
+ case 2:
+ return str == "..";
+ case 4:
+ if (str[0] != '.' && str[0] != '%')
+ return false;
+ return ((str[0] == '.' &&
+ str[1] == '%' &&
+ str[2] == '2' &&
+ TO_LOWER(str[3]) == 'e') ||
+ (str[0] == '%' &&
+ str[1] == '2' &&
+ TO_LOWER(str[2]) == 'e' &&
+ str[3] == '.'));
+ case 6:
+ return (str[0] == '%' &&
+ str[1] == '2' &&
+ TO_LOWER(str[2]) == 'e' &&
+ str[3] == '%' &&
+ str[4] == '2' &&
+ TO_LOWER(str[5]) == 'e');
+ default:
+ return false;
+ }
+ }
+
+ static void Parse(Environment* env,
+ Local<Value> recv,
+ const char* input,
+ const size_t len,
+ enum url_parse_state override,
+ Local<Object> base_obj,
+ Local<Object> context_obj,
+ Local<Function> cb) {
+ Isolate* isolate = env->isolate();
+ Local<Context> context = env->context();
+ HandleScope handle_scope(isolate);
+ Context::Scope context_scope(context);
+
+ const bool has_base = base_obj->IsObject();
+ bool atflag = false;
+ bool sbflag = false;
+ bool uflag = false;
+ bool base_is_file = false;
+ int wskip = 0;
+
+ struct url_data base;
+ struct url_data url;
+ if (context_obj->IsObject())
+ HarvestContext(env, &url, context_obj);
+ if (has_base)
+ HarvestBase(env, &base, base_obj);
+
+ std::string buffer;
+ url.scheme.reserve(len);
+ url.username.reserve(len);
+ url.password.reserve(len);
+ url.host.reserve(len);
+ url.path.reserve(len);
+ url.query.reserve(len);
+ url.fragment.reserve(len);
+ buffer.reserve(len);
+
+ // Set the initial parse state.
+ const bool state_override = override != kUnknownState;
+ enum url_parse_state state = state_override ? override : kSchemeStart;
+
+ const char* p = input;
+ const char* end = input + len;
+
+ if (state < kSchemeStart || state > kFragment) {
+ INVALID_PARSE_STATE();
+ goto done;
+ }
+
+ while (p <= end) {
+ const char ch = p < end ? p[0] : kEOL;
+
+ if (TAB_AND_NEWLINE(ch)) {
+ if (state == kAuthority) {
+ // It's necessary to keep track of how much whitespace
+ // is being ignored when in kAuthority state because of
+ // how the buffer is managed. TODO: See if there's a better
+ // way
+ wskip++;
+ }
+ p++;
+ continue;
+ }
+
+ bool special = url.flags & URL_FLAGS_SPECIAL;
+ const bool special_back_slash = (special && ch == '\\');
+ switch (state) {
+ case kSchemeStart:
+ if (ASCII_ALPHA(ch)) {
+ buffer += TO_LOWER(ch);
+ state = kScheme;
+ } else if (!state_override) {
+ state = kNoScheme;
+ continue;
+ } else {
+ TERMINATE()
+ }
+ break;
+ case kScheme:
+ if (SCHEME_CHAR(ch)) {
+ buffer += TO_LOWER(ch);
+ p++;
+ continue;
+ } else if (ch == ':' || (state_override && ch == kEOL)) {
+ buffer += ':';
+ if (buffer.size() > 0) {
+ SET_HAVE_SCHEME()
+ url.scheme = buffer;
+ }
+ if (IsSpecial(url.scheme)) {
+ SPECIAL()
+ } else {
+ url.flags &= ~URL_FLAGS_SPECIAL;
+ }
+ if (state_override)
+ goto done;
+ buffer.clear();
+ if (url.scheme == "file:") {
+ state = kFile;
+ } else if (special &&
+ has_base &&
+ DOES_HAVE_SCHEME(base) &&
+ url.scheme == base.scheme) {
+ state = kSpecialRelativeOrAuthority;
+ } else if (special) {
+ state = kSpecialAuthoritySlashes;
+ } else if (p[1] == '/') {
+ state = kPathOrAuthority;
+ p++;
+ } else {
+ CANNOT_BE_BASE()
+ SET_HAVE_PATH()
+ url.path.push_back("");
+ state = kCannotBeBase;
+ }
+ } else if (!state_override) {
+ buffer.clear();
+ state = kNoScheme;
+ p = input;
+ continue;
+ } else {
+ TERMINATE()
+ }
+ break;
+ case kNoScheme:
+ if (!has_base || (IS_CANNOT_BE_BASE(base.flags) && ch != '#')) {
+ FAILED()
+ } else if (IS_CANNOT_BE_BASE(base.flags) && ch == '#') {
+ SET_HAVE_SCHEME()
+ url.scheme = base.scheme;
+ if (IsSpecial(url.scheme)) {
+ SPECIAL()
+ } else {
+ url.flags &= ~URL_FLAGS_SPECIAL;
+ }
+ if (DOES_HAVE_PATH(base)) {
+ SET_HAVE_PATH()
+ url.path = base.path;
+ }
+ if (DOES_HAVE_QUERY(base)) {
+ SET_HAVE_QUERY()
+ url.query = base.query;
+ }
+ if (DOES_HAVE_FRAGMENT(base)) {
+ SET_HAVE_FRAGMENT()
+ url.fragment = base.fragment;
+ }
+ CANNOT_BE_BASE()
+ state = kFragment;
+ } else if (has_base &&
+ DOES_HAVE_SCHEME(base) &&
+ base.scheme != "file:") {
+ state = kRelative;
+ continue;
+ } else {
+ SET_HAVE_SCHEME()
+ url.scheme = "file:";
+ SPECIAL()
+ state = kFile;
+ continue;
+ }
+ break;
+ case kSpecialRelativeOrAuthority:
+ if (ch == '/' && p[1] == '/') {
+ state = kSpecialAuthorityIgnoreSlashes;
+ p++;
+ } else {
+ state = kRelative;
+ continue;
+ }
+ break;
+ case kPathOrAuthority:
+ if (ch == '/') {
+ state = kAuthority;
+ } else {
+ state = kPath;
+ continue;
+ }
+ break;
+ case kRelative:
+ SET_HAVE_SCHEME()
+ url.scheme = base.scheme;
+ if (IsSpecial(url.scheme)) {
+ SPECIAL()
+ } else {
+ url.flags &= ~URL_FLAGS_SPECIAL;
+ }
+ switch (ch) {
+ case kEOL:
+ if (DOES_HAVE_USERNAME(base)) {
+ SET_HAVE_USERNAME()
+ url.username = base.username;
+ }
+ if (DOES_HAVE_PASSWORD(base)) {
+ SET_HAVE_PASSWORD()
+ url.password = base.password;
+ }
+ if (DOES_HAVE_HOST(base)) {
+ SET_HAVE_HOST()
+ url.host = base.host;
+ }
+ if (DOES_HAVE_QUERY(base)) {
+ SET_HAVE_QUERY()
+ url.query = base.query;
+ }
+ if (DOES_HAVE_PATH(base)) {
+ SET_HAVE_PATH()
+ url.path = base.path;
+ }
+ url.port = base.port;
+ break;
+ case '/':
+ state = kRelativeSlash;
+ break;
+ case '?':
+ if (DOES_HAVE_USERNAME(base)) {
+ SET_HAVE_USERNAME()
+ url.username = base.username;
+ }
+ if (DOES_HAVE_PASSWORD(base)) {
+ SET_HAVE_PASSWORD()
+ url.password = base.password;
+ }
+ if (DOES_HAVE_HOST(base)) {
+ SET_HAVE_HOST()
+ url.host = base.host;
+ }
+ if (DOES_HAVE_PATH(base)) {
+ SET_HAVE_PATH()
+ url.path = base.path;
+ }
+ url.port = base.port;
+ state = kQuery;
+ break;
+ case '#':
+ if (DOES_HAVE_USERNAME(base)) {
+ SET_HAVE_USERNAME()
+ url.username = base.username;
+ }
+ if (DOES_HAVE_PASSWORD(base)) {
+ SET_HAVE_PASSWORD()
+ url.password = base.password;
+ }
+ if (DOES_HAVE_HOST(base)) {
+ SET_HAVE_HOST()
+ url.host = base.host;
+ }
+ if (DOES_HAVE_QUERY(base)) {
+ SET_HAVE_QUERY()
+ url.query = base.query;
+ }
+ if (DOES_HAVE_PATH(base)) {
+ SET_HAVE_PATH()
+ url.path = base.path;
+ }
+ url.port = base.port;
+ state = kFragment;
+ break;
+ default:
+ if (special_back_slash) {
+ state = kRelativeSlash;
+ } else {
+ if (DOES_HAVE_USERNAME(base)) {
+ SET_HAVE_USERNAME()
+ url.username = base.username;
+ }
+ if (DOES_HAVE_PASSWORD(base)) {
+ SET_HAVE_PASSWORD()
+ url.password = base.password;
+ }
+ if (DOES_HAVE_HOST(base)) {
+ SET_HAVE_HOST()
+ url.host = base.host;
+ }
+ if (DOES_HAVE_PATH(base)) {
+ SET_HAVE_PATH()
+ url.path = base.path;
+ if (!url.path.empty())
+ url.path.pop_back();
+ }
+ url.port = base.port;
+ state = kPath;
+ continue;
+ }
+ }
+ break;
+ case kRelativeSlash:
+ if (ch == '/' || special_back_slash) {
+ state = kSpecialAuthorityIgnoreSlashes;
+ } else {
+ if (DOES_HAVE_USERNAME(base)) {
+ SET_HAVE_USERNAME()
+ url.username = base.username;
+ }
+ if (DOES_HAVE_PASSWORD(base)) {
+ SET_HAVE_PASSWORD()
+ url.password = base.password;
+ }
+ if (DOES_HAVE_HOST(base)) {
+ SET_HAVE_HOST()
+ url.host = base.host;
+ }
+ url.port = base.port;
+ state = kPath;
+ continue;
+ }
+ break;
+ case kSpecialAuthoritySlashes:
+ state = kSpecialAuthorityIgnoreSlashes;
+ if (ch == '/' && p[1] == '/') {
+ p++;
+ } else {
+ continue;
+ }
+ break;
+ case kSpecialAuthorityIgnoreSlashes:
+ if (ch != '/' && ch != '\\') {
+ state = kAuthority;
+ continue;
+ }
+ break;
+ case kAuthority:
+ if (ch == '@') {
+ if (atflag) {
+ buffer.reserve(buffer.size() + 3);
+ buffer.insert(0, "%40");
+ }
+ atflag = true;
+ const size_t blen = buffer.size();
+ if (blen > 0 && buffer[0] != ':') {
+ SET_HAVE_USERNAME()
+ }
+ for (size_t n = 0; n < blen; n++) {
+ const char bch = buffer[n];
+ if (bch == ':') {
+ SET_HAVE_PASSWORD()
+ if (!uflag) {
+ uflag = true;
+ continue;
+ }
+ }
+ if (uflag) {
+ AppendOrEscape(&url.password, bch, UserinfoEncodeSet);
+ } else {
+ AppendOrEscape(&url.username, bch, UserinfoEncodeSet);
+ }
+ }
+ buffer.clear();
+ } else if (ch == kEOL ||
+ ch == '/' ||
+ ch == '?' ||
+ ch == '#' ||
+ special_back_slash) {
+ p -= buffer.size() + 1 + wskip;
+ buffer.clear();
+ state = kHost;
+ } else {
+ buffer += ch;
+ }
+ break;
+ case kHost:
+ case kHostname:
+ if (ch == ':' && !sbflag) {
+ if (special && buffer.size() == 0)
+ FAILED()
+ SET_HAVE_HOST()
+ if (ParseHost(&buffer, &url.host) < 0)
+ FAILED()
+ buffer.clear();
+ state = kPort;
+ if (override == kHostname)
+ TERMINATE()
+ } else if (ch == kEOL ||
+ ch == '/' ||
+ ch == '?' ||
+ ch == '#' ||
+ special_back_slash) {
+ p--;
+ if (special && buffer.size() == 0)
+ FAILED()
+ SET_HAVE_HOST()
+ if (ParseHost(&buffer, &url.host) < 0)
+ FAILED()
+ buffer.clear();
+ state = kPathStart;
+ if (state_override)
+ TERMINATE()
+ } else {
+ if (ch == '[')
+ sbflag = true;
+ if (ch == ']')
+ sbflag = false;
+ buffer += TO_LOWER(ch);
+ }
+ break;
+ case kPort:
+ if (ASCII_DIGIT(ch)) {
+ buffer += ch;
+ } else if (state_override ||
+ ch == kEOL ||
+ ch == '/' ||
+ ch == '?' ||
+ ch == '#' ||
+ special_back_slash) {
+ if (buffer.size() > 0) {
+ int port = 0;
+ for (size_t i = 0; i < buffer.size(); i++)
+ port = port * 10 + buffer[i] - '0';
+ if (port >= 0 && port <= 0xffff) {
+ url.port = NormalizePort(url.scheme, port);
+ } else if (!state_override) {
+ FAILED()
+ }
+ buffer.clear();
+ }
+ state = kPathStart;
+ continue;
+ } else {
+ FAILED();
+ }
+ break;
+ case kFile:
+ base_is_file = (
+ has_base &&
+ DOES_HAVE_SCHEME(base) &&
+ base.scheme == "file:");
+ switch (ch) {
+ case kEOL:
+ if (base_is_file) {
+ if (DOES_HAVE_HOST(base)) {
+ SET_HAVE_HOST()
+ url.host = base.host;
+ }
+ if (DOES_HAVE_PATH(base)) {
+ SET_HAVE_PATH()
+ url.path = base.path;
+ }
+ if (DOES_HAVE_QUERY(base)) {
+ SET_HAVE_QUERY()
+ url.query = base.query;
+ }
+ }
+ break;
+ case '\\':
+ case '/':
+ state = kFileSlash;
+ break;
+ case '?':
+ if (base_is_file) {
+ if (DOES_HAVE_HOST(base)) {
+ SET_HAVE_HOST()
+ url.host = base.host;
+ }
+ if (DOES_HAVE_PATH(base)) {
+ SET_HAVE_PATH()
+ url.path = base.path;
+ }
+ SET_HAVE_QUERY()
+ state = kQuery;
+ }
+ break;
+ case '#':
+ if (base_is_file) {
+ if (DOES_HAVE_HOST(base)) {
+ SET_HAVE_HOST()
+ url.host = base.host;
+ }
+ if (DOES_HAVE_PATH(base)) {
+ SET_HAVE_PATH()
+ url.path = base.path;
+ }
+ if (DOES_HAVE_QUERY(base)) {
+ SET_HAVE_QUERY()
+ url.query = base.query;
+ }
+ state = kFragment;
+ }
+ break;
+ default:
+ if (base_is_file &&
+ (!WINDOWS_DRIVE_LETTER(ch, p[1]) ||
+ end - p == 1 ||
+ (p[2] != '/' &&
+ p[2] != '\\' &&
+ p[2] != '?' &&
+ p[2] != '#'))) {
+ if (DOES_HAVE_HOST(base)) {
+ SET_HAVE_HOST()
+ url.host = base.host;
+ }
+ if (DOES_HAVE_PATH(base)) {
+ SET_HAVE_PATH()
+ url.path = base.path;
+ }
+ if (!url.path.empty())
+ url.path.pop_back();
+ }
+ state = kPath;
+ continue;
+ }
+ break;
+ case kFileSlash:
+ if (ch == '/' || ch == '\\') {
+ state = kFileHost;
+ } else {
+ if (has_base &&
+ DOES_HAVE_SCHEME(base) &&
+ base.scheme == "file:" &&
+ DOES_HAVE_PATH(base) &&
+ base.path.size() > 0 &&
+ NORMALIZED_WINDOWS_DRIVE_LETTER(base.path[0])) {
+ SET_HAVE_PATH()
+ url.path.push_back(base.path[0]);
+ }
+ state = kPath;
+ continue;
+ }
+ break;
+ case kFileHost:
+ if (ch == kEOL ||
+ ch == '/' ||
+ ch == '\\' ||
+ ch == '?' ||
+ ch == '#') {
+ if (buffer.size() == 2 &&
+ WINDOWS_DRIVE_LETTER(buffer[0], buffer[1])) {
+ state = kPath;
+ } else if (buffer.size() == 0) {
+ state = kPathStart;
+ } else {
+ if (buffer != "localhost") {
+ SET_HAVE_HOST()
+ if (ParseHost(&buffer, &url.host) < 0)
+ FAILED()
+ }
+ buffer.clear();
+ state = kPathStart;
+ }
+ continue;
+ } else {
+ buffer += ch;
+ }
+ break;
+ case kPathStart:
+ state = kPath;
+ if (ch != '/' && !special_back_slash)
+ continue;
+ break;
+ case kPath:
+ if (ch == kEOL ||
+ ch == '/' ||
+ special_back_slash ||
+ (!state_override && (ch == '?' || ch == '#'))) {
+ if (IsDoubleDotSegment(buffer)) {
+ if (!url.path.empty())
+ url.path.pop_back();
+ if (ch != '/' && !special_back_slash) {
+ SET_HAVE_PATH()
+ url.path.push_back("");
+ }
+ } else if (IsSingleDotSegment(buffer)) {
+ if (ch != '/' && !special_back_slash) {
+ SET_HAVE_PATH();
+ url.path.push_back("");
+ }
+ } else {
+ if (DOES_HAVE_SCHEME(url) &&
+ url.scheme == "file:" &&
+ url.path.empty() &&
+ buffer.size() == 2 &&
+ WINDOWS_DRIVE_LETTER(buffer[0], buffer[1])) {
+ url.flags &= ~URL_FLAGS_HAS_HOST;
+ buffer[1] = ':';
+ }
+ SET_HAVE_PATH()
+ std::string segment(buffer.c_str(), buffer.size());
+ url.path.push_back(segment);
+ }
+ buffer.clear();
+ if (ch == '?') {
+ SET_HAVE_QUERY()
+ state = kQuery;
+ } else if (ch == '#') {
+ state = kFragment;
+ }
+ } else {
+ if (ch == '%' && p[1] == '2' && TO_LOWER(p[2]) == 'e') {
+ buffer += '.';
+ p += 2;
+ } else {
+ AppendOrEscape(&buffer, ch, DefaultEncodeSet);
+ }
+ }
+ break;
+ case kCannotBeBase:
+ switch (ch) {
+ case '?':
+ state = kQuery;
+ break;
+ case '#':
+ state = kFragment;
+ break;
+ default:
+ if (url.path.size() == 0)
+ url.path.push_back("");
+ if (url.path.size() > 0 && ch != kEOL)
+ AppendOrEscape(&url.path[0], ch, SimpleEncodeSet);
+ }
+ break;
+ case kQuery:
+ if (ch == kEOL || (!state_override && ch == '#')) {
+ SET_HAVE_QUERY()
+ url.query = buffer;
+ buffer.clear();
+ if (ch == '#')
+ state = kFragment;
+ } else {
+ AppendOrEscape(&buffer, ch, QueryEncodeSet);
+ }
+ break;
+ case kFragment:
+ switch (ch) {
+ case kEOL:
+ SET_HAVE_FRAGMENT()
+ url.fragment = buffer;
+ break;
+ case 0:
+ break;
+ default:
+ buffer += ch;
+ }
+ break;
+ default:
+ INVALID_PARSE_STATE()
+ goto done;
+ }
+
+ p++;
+ }
+
+ done:
+
+ // Define the return value placeholders
+ const Local<Value> undef = Undefined(isolate);
+ Local<Value> argv[9] = {
+ undef,
+ undef,
+ undef,
+ undef,
+ undef,
+ undef,
+ undef,
+ undef,
+ undef,
+ };
+
+ argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags);
+ if (!IS_FAILED(url.flags)) {
+ if (DOES_HAVE_SCHEME(url))
+ argv[ARG_PROTOCOL] = OneByteString(isolate, url.scheme.c_str());
+ if (DOES_HAVE_USERNAME(url))
+ argv[ARG_USERNAME] = UTF8STRING(isolate, url.username);
+ if (DOES_HAVE_PASSWORD(url))
+ argv[ARG_PASSWORD] = UTF8STRING(isolate, url.password);
+ if (DOES_HAVE_HOST(url))
+ argv[ARG_HOST] = UTF8STRING(isolate, url.host);
+ if (DOES_HAVE_QUERY(url))
+ argv[ARG_QUERY] = UTF8STRING(isolate, url.query);
+ if (DOES_HAVE_FRAGMENT(url))
+ argv[ARG_FRAGMENT] = UTF8STRING(isolate, url.fragment);
+ if (url.port > -1)
+ argv[ARG_PORT] = Integer::New(isolate, url.port);
+ if (DOES_HAVE_PATH(url))
+ argv[ARG_PATH] = Copy(isolate, url.path);
+ }
+
+ cb->Call(context, recv, 9, argv);
+ }
+
+ static void Parse(const FunctionCallbackInfo<Value>& args) {
+ Environment* env = Environment::GetCurrent(args);
+ CHECK_GE(args.Length(), 5);
+ CHECK(args[0]->IsString());
+ CHECK(args[2]->IsUndefined() ||
+ args[2]->IsNull() ||
+ args[2]->IsObject());
+ CHECK(args[3]->IsUndefined() ||
+ args[3]->IsNull() ||
+ args[3]->IsObject());
+ CHECK(args[4]->IsFunction());
+ Utf8Value input(env->isolate(), args[0]);
+ enum url_parse_state override = kUnknownState;
+ if (args[1]->IsNumber())
+ override = (enum url_parse_state)(args[1]->Uint32Value());
+
+ Parse(env, args.This(),
+ *input, input.length(),
+ override,
+ args[2].As<Object>(),
+ args[3].As<Object>(),
+ args[4].As<Function>());
+ }
+
+ static void EncodeAuthSet(const FunctionCallbackInfo<Value>& args) {
+ Environment* env = Environment::GetCurrent(args);
+ CHECK_GE(args.Length(), 1);
+ CHECK(args[0]->IsString());
+ Utf8Value value(env->isolate(), args[0]);
+ std::string output;
+ const size_t len = value.length();
+ output.reserve(len);
+ for (size_t n = 0; n < len; n++) {
+ const char ch = (*value)[n];
+ AppendOrEscape(&output, ch, UserinfoEncodeSet);
+ }
+ args.GetReturnValue().Set(
+ String::NewFromUtf8(env->isolate(),
+ output.c_str(),
+ v8::NewStringType::kNormal).ToLocalChecked());
+ }
+
+ static void DomainToASCII(const FunctionCallbackInfo<Value>& args) {
+ Environment* env = Environment::GetCurrent(args);
+ CHECK_GE(args.Length(), 1);
+ CHECK(args[0]->IsString());
+ Utf8Value value(env->isolate(), args[0]);
+
+ url_host host{{""}, HOST_TYPE_DOMAIN};
+ ParseHost(&host, *value, value.length());
+ if (host.type == HOST_TYPE_FAILED) {
+ args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
+ return;
+ }
+ std::string out;
+ WriteHost(&host, &out);
+ args.GetReturnValue().Set(
+ String::NewFromUtf8(env->isolate(),
+ out.c_str(),
+ v8::NewStringType::kNormal).ToLocalChecked());
+ }
+
+ static void DomainToUnicode(const FunctionCallbackInfo<Value>& args) {
+ Environment* env = Environment::GetCurrent(args);
+ CHECK_GE(args.Length(), 1);
+ CHECK(args[0]->IsString());
+ Utf8Value value(env->isolate(), args[0]);
+
+ url_host host{{""}, HOST_TYPE_DOMAIN};
+ ParseHost(&host, *value, value.length(), true);
+ if (host.type == HOST_TYPE_FAILED) {
+ args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
+ return;
+ }
+ std::string out;
+ WriteHost(&host, &out);
+ args.GetReturnValue().Set(
+ String::NewFromUtf8(env->isolate(),
+ out.c_str(),
+ v8::NewStringType::kNormal).ToLocalChecked());
+ }
+
+ static void Init(Local<Object> target,
+ Local<Value> unused,
+ Local<Context> context,
+ void* priv) {
+ Environment* env = Environment::GetCurrent(context);
+ env->SetMethod(target, "parse", Parse);
+ env->SetMethod(target, "encodeAuth", EncodeAuthSet);
+ env->SetMethod(target, "domainToASCII", DomainToASCII);
+ env->SetMethod(target, "domainToUnicode", DomainToUnicode);
+
+#define XX(name, _) NODE_DEFINE_CONSTANT(target, name);
+ FLAGS(XX)
+#undef XX
+
+#define XX(name) NODE_DEFINE_CONSTANT(target, name);
+ ARGS(XX)
+ PARSESTATES(XX)
+#undef XX
+ }
+} // namespace url
+} // namespace node
+
+NODE_MODULE_CONTEXT_AWARE_BUILTIN(url, node::url::Init)
diff --git a/src/node_url.h b/src/node_url.h
new file mode 100644
index 0000000000..198c29938b
--- /dev/null
+++ b/src/node_url.h
@@ -0,0 +1,538 @@
+#ifndef SRC_NODE_URL_H_
+#define SRC_NODE_URL_H_
+
+#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS
+
+#include "node.h"
+#include <string>
+
+namespace node {
+namespace url {
+
+#define BIT_AT(a, i) \
+ (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \
+ (1 << ((unsigned int) (i) & 7))))
+#define TAB_AND_NEWLINE(ch) \
+ (ch == 0x09 || ch == 0x0a || ch == 0x0d)
+#define ASCII_DIGIT(ch) \
+ (ch >= 0x30 && ch <= 0x39)
+#define ASCII_HEX_DIGIT(ch) \
+ (ASCII_DIGIT(ch) || (ch >= 0x41 && ch <= 0x46) || (ch >= 0x61 && ch <= 0x66))
+#define ASCII_ALPHA(ch) \
+ ((ch >= 0x41 && ch <= 0x5a) || (ch >= 0x61 && ch <= 0x7a))
+#define ASCII_ALPHANUMERIC(ch) \
+ (ASCII_DIGIT(ch) || ASCII_ALPHA(ch))
+#define TO_LOWER(ch) \
+ (ASCII_ALPHA(ch) ? (ch | 0x20) : ch)
+#define SCHEME_CHAR(ch) \
+ (ASCII_ALPHANUMERIC(ch) || ch == '+' || ch == '-' || ch == '.')
+#define WINDOWS_DRIVE_LETTER(ch, next) \
+ (ASCII_ALPHA(ch) && (next == ':' || next == '|'))
+#define NORMALIZED_WINDOWS_DRIVE_LETTER(str) \
+ (str.length() == 2 && \
+ ASCII_ALPHA(str[0]) && \
+ str[1] == ':')
+
+static const char* hex[256] = {
+ "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07",
+ "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F",
+ "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17",
+ "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F",
+ "%20", "%21", "%22", "%23", "%24", "%25", "%26", "%27",
+ "%28", "%29", "%2A", "%2B", "%2C", "%2D", "%2E", "%2F",
+ "%30", "%31", "%32", "%33", "%34", "%35", "%36", "%37",
+ "%38", "%39", "%3A", "%3B", "%3C", "%3D", "%3E", "%3F",
+ "%40", "%41", "%42", "%43", "%44", "%45", "%46", "%47",
+ "%48", "%49", "%4A", "%4B", "%4C", "%4D", "%4E", "%4F",
+ "%50", "%51", "%52", "%53", "%54", "%55", "%56", "%57",
+ "%58", "%59", "%5A", "%5B", "%5C", "%5D", "%5E", "%5F",
+ "%60", "%61", "%62", "%63", "%64", "%65", "%66", "%67",
+ "%68", "%69", "%6A", "%6B", "%6C", "%6D", "%6E", "%6F",
+ "%70", "%71", "%72", "%73", "%74", "%75", "%76", "%77",
+ "%78", "%79", "%7A", "%7B", "%7C", "%7D", "%7E", "%7F",
+ "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87",
+ "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F",
+ "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97",
+ "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
+ "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7",
+ "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF",
+ "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7",
+ "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF",
+ "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
+ "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF",
+ "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7",
+ "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF",
+ "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7",
+ "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
+ "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7",
+ "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
+};
+
+static const uint8_t SIMPLE_ENCODE_SET[32] = {
+ // 00 01 02 03 04 05 06 07
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 08 09 0A 0B 0C 0D 0E 0F
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 10 11 12 13 14 15 16 17
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 18 19 1A 1B 1C 1D 1E 1F
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 20 21 22 23 24 25 26 27
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 28 29 2A 2B 2C 2D 2E 2F
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 30 31 32 33 34 35 36 37
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 38 39 3A 3B 3C 3D 3E 3F
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 40 41 42 43 44 45 46 47
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 48 49 4A 4B 4C 4D 4E 4F
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 50 51 52 53 54 55 56 57
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 58 59 5A 5B 5C 5D 5E 5F
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 60 61 62 63 64 65 66 67
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 68 69 6A 6B 6C 6D 6E 6F
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 70 71 72 73 74 75 76 77
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 78 79 7A 7B 7C 7D 7E 7F
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
+ // 80 81 82 83 84 85 86 87
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 88 89 8A 8B 8C 8D 8E 8F
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 90 91 92 93 94 95 96 97
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 98 99 9A 9B 9C 9D 9E 9F
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // A0 A1 A2 A3 A4 A5 A6 A7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // A8 A9 AA AB AC AD AE AF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // B0 B1 B2 B3 B4 B5 B6 B7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // B8 B9 BA BB BC BD BE BF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // C0 C1 C2 C3 C4 C5 C6 C7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // C8 C9 CA CB CC CD CE CF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // D0 D1 D2 D3 D4 D5 D6 D7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // D8 D9 DA DB DC DD DE DF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // E0 E1 E2 E3 E4 E5 E6 E7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // E8 E9 EA EB EC ED EE EF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // F0 F1 F2 F3 F4 F5 F6 F7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // F8 F9 FA FB FC FD FE FF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
+};
+
+static const uint8_t DEFAULT_ENCODE_SET[32] = {
+ // 00 01 02 03 04 05 06 07
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 08 09 0A 0B 0C 0D 0E 0F
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 10 11 12 13 14 15 16 17
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 18 19 1A 1B 1C 1D 1E 1F
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 20 21 22 23 24 25 26 27
+ 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 28 29 2A 2B 2C 2D 2E 2F
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 30 31 32 33 34 35 36 37
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 38 39 3A 3B 3C 3D 3E 3F
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x80,
+ // 40 41 42 43 44 45 46 47
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 48 49 4A 4B 4C 4D 4E 4F
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 50 51 52 53 54 55 56 57
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 58 59 5A 5B 5C 5D 5E 5F
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 60 61 62 63 64 65 66 67
+ 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 68 69 6A 6B 6C 6D 6E 6F
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 70 71 72 73 74 75 76 77
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 78 79 7A 7B 7C 7D 7E 7F
+ 0x00 | 0x00 | 0x00 | 0x08 | 0x00 | 0x20 | 0x00 | 0x80,
+ // 80 81 82 83 84 85 86 87
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 88 89 8A 8B 8C 8D 8E 8F
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 90 91 92 93 94 95 96 97
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 98 99 9A 9B 9C 9D 9E 9F
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // A0 A1 A2 A3 A4 A5 A6 A7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // A8 A9 AA AB AC AD AE AF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // B0 B1 B2 B3 B4 B5 B6 B7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // B8 B9 BA BB BC BD BE BF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // C0 C1 C2 C3 C4 C5 C6 C7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // C8 C9 CA CB CC CD CE CF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // D0 D1 D2 D3 D4 D5 D6 D7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // D8 D9 DA DB DC DD DE DF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // E0 E1 E2 E3 E4 E5 E6 E7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // E8 E9 EA EB EC ED EE EF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // F0 F1 F2 F3 F4 F5 F6 F7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // F8 F9 FA FB FC FD FE FF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
+};
+
+static const uint8_t USERINFO_ENCODE_SET[32] = {
+ // 00 01 02 03 04 05 06 07
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 08 09 0A 0B 0C 0D 0E 0F
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 10 11 12 13 14 15 16 17
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 18 19 1A 1B 1C 1D 1E 1F
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 20 21 22 23 24 25 26 27
+ 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 28 29 2A 2B 2C 2D 2E 2F
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
+ // 30 31 32 33 34 35 36 37
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 38 39 3A 3B 3C 3D 3E 3F
+ 0x00 | 0x00 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 40 41 42 43 44 45 46 47
+ 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 48 49 4A 4B 4C 4D 4E 4F
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 50 51 52 53 54 55 56 57
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 58 59 5A 5B 5C 5D 5E 5F
+ 0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x40 | 0x00,
+ // 60 61 62 63 64 65 66 67
+ 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 68 69 6A 6B 6C 6D 6E 6F
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 70 71 72 73 74 75 76 77
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 78 79 7A 7B 7C 7D 7E 7F
+ 0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x00 | 0x80,
+ // 80 81 82 83 84 85 86 87
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 88 89 8A 8B 8C 8D 8E 8F
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 90 91 92 93 94 95 96 97
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 98 99 9A 9B 9C 9D 9E 9F
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // A0 A1 A2 A3 A4 A5 A6 A7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // A8 A9 AA AB AC AD AE AF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // B0 B1 B2 B3 B4 B5 B6 B7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // B8 B9 BA BB BC BD BE BF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // C0 C1 C2 C3 C4 C5 C6 C7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // C8 C9 CA CB CC CD CE CF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // D0 D1 D2 D3 D4 D5 D6 D7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // D8 D9 DA DB DC DD DE DF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // E0 E1 E2 E3 E4 E5 E6 E7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // E8 E9 EA EB EC ED EE EF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // F0 F1 F2 F3 F4 F5 F6 F7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // F8 F9 FA FB FC FD FE FF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
+};
+
+static const uint8_t QUERY_ENCODE_SET[32] = {
+ // 00 01 02 03 04 05 06 07
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 08 09 0A 0B 0C 0D 0E 0F
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 10 11 12 13 14 15 16 17
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 18 19 1A 1B 1C 1D 1E 1F
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 20 21 22 23 24 25 26 27
+ 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 28 29 2A 2B 2C 2D 2E 2F
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 30 31 32 33 34 35 36 37
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 38 39 3A 3B 3C 3D 3E 3F
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00,
+ // 40 41 42 43 44 45 46 47
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 48 49 4A 4B 4C 4D 4E 4F
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 50 51 52 53 54 55 56 57
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 58 59 5A 5B 5C 5D 5E 5F
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 60 61 62 63 64 65 66 67
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 68 69 6A 6B 6C 6D 6E 6F
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 70 71 72 73 74 75 76 77
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 78 79 7A 7B 7C 7D 7E 7F
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
+ // 80 81 82 83 84 85 86 87
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 88 89 8A 8B 8C 8D 8E 8F
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 90 91 92 93 94 95 96 97
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 98 99 9A 9B 9C 9D 9E 9F
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // A0 A1 A2 A3 A4 A5 A6 A7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // A8 A9 AA AB AC AD AE AF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // B0 B1 B2 B3 B4 B5 B6 B7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // B8 B9 BA BB BC BD BE BF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // C0 C1 C2 C3 C4 C5 C6 C7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // C8 C9 CA CB CC CD CE CF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // D0 D1 D2 D3 D4 D5 D6 D7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // D8 D9 DA DB DC DD DE DF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // E0 E1 E2 E3 E4 E5 E6 E7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // E8 E9 EA EB EC ED EE EF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // F0 F1 F2 F3 F4 F5 F6 F7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // F8 F9 FA FB FC FD FE FF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
+};
+
+// Must return true if the character is to be percent-encoded
+typedef bool (*must_escape_cb)(const unsigned char ch);
+
+// Appends ch to str. If test(ch) returns true, the ch will
+// be percent-encoded then appended.
+static inline void AppendOrEscape(std::string* str,
+ const unsigned char ch,
+ must_escape_cb test) {
+ if (test(ch))
+ *str += hex[ch];
+ else
+ *str += ch;
+}
+
+static inline bool SimpleEncodeSet(const unsigned char ch) {
+ return BIT_AT(SIMPLE_ENCODE_SET, ch);
+}
+
+static inline bool DefaultEncodeSet(const unsigned char ch) {
+ return BIT_AT(DEFAULT_ENCODE_SET, ch);
+}
+
+static inline bool UserinfoEncodeSet(const unsigned char ch) {
+ return BIT_AT(USERINFO_ENCODE_SET, ch);
+}
+
+static inline bool QueryEncodeSet(const unsigned char ch) {
+ return BIT_AT(QUERY_ENCODE_SET, ch);
+}
+
+static inline unsigned hex2bin(const char ch) {
+ if (ch >= '0' && ch <= '9')
+ return ch - '0';
+ if (ch >= 'A' && ch <= 'F')
+ return 10 + (ch - 'A');
+ if (ch >= 'a' && ch <= 'f')
+ return 10 + (ch - 'a');
+ return static_cast<unsigned>(-1);
+}
+
+static inline int PercentDecode(const char* input,
+ size_t len,
+ std::string* dest) {
+ if (len == 0)
+ return 0;
+ dest->reserve(len);
+ const char* pointer = input;
+ const char* end = input + len;
+ size_t remaining = pointer - end - 1;
+ while (pointer < end) {
+ const char ch = pointer[0];
+ remaining = (end - pointer) + 1;
+ if (ch != '%' || remaining < 2 ||
+ (ch == '%' &&
+ (!ASCII_HEX_DIGIT(pointer[1]) ||
+ !ASCII_HEX_DIGIT(pointer[2])))) {
+ *dest += ch;
+ pointer++;
+ continue;
+ } else {
+ unsigned a = hex2bin(pointer[1]);
+ unsigned b = hex2bin(pointer[2]);
+ char c = static_cast<char>(a * 16 + b);
+ *dest += static_cast<char>(c);
+ pointer += 3;
+ }
+ }
+ return 0;
+}
+
+#define SPECIALS(XX) \
+ XX("ftp:", 21) \
+ XX("file:", -1) \
+ XX("gopher:", 70) \
+ XX("http:", 80) \
+ XX("https:", 443) \
+ XX("ws:", 80) \
+ XX("wss:", 443)
+
+#define PARSESTATES(XX) \
+ XX(kSchemeStart) \
+ XX(kScheme) \
+ XX(kNoScheme) \
+ XX(kSpecialRelativeOrAuthority) \
+ XX(kPathOrAuthority) \
+ XX(kRelative) \
+ XX(kRelativeSlash) \
+ XX(kSpecialAuthoritySlashes) \
+ XX(kSpecialAuthorityIgnoreSlashes) \
+ XX(kAuthority) \
+ XX(kHost) \
+ XX(kHostname) \
+ XX(kPort) \
+ XX(kFile) \
+ XX(kFileSlash) \
+ XX(kFileHost) \
+ XX(kPathStart) \
+ XX(kPath) \
+ XX(kCannotBeBase) \
+ XX(kQuery) \
+ XX(kFragment)
+
+#define FLAGS(XX) \
+ XX(URL_FLAGS_NONE, 0) \
+ XX(URL_FLAGS_FAILED, 0x01) \
+ XX(URL_FLAGS_CANNOT_BE_BASE, 0x02) \
+ XX(URL_FLAGS_INVALID_PARSE_STATE, 0x04) \
+ XX(URL_FLAGS_TERMINATED, 0x08) \
+ XX(URL_FLAGS_SPECIAL, 0x10) \
+ XX(URL_FLAGS_HAS_SCHEME, 0x20) \
+ XX(URL_FLAGS_HAS_USERNAME, 0x40) \
+ XX(URL_FLAGS_HAS_PASSWORD, 0x80) \
+ XX(URL_FLAGS_HAS_HOST, 0x100) \
+ XX(URL_FLAGS_HAS_PATH, 0x200) \
+ XX(URL_FLAGS_HAS_QUERY, 0x400) \
+ XX(URL_FLAGS_HAS_FRAGMENT, 0x800)
+
+#define ARGS(XX) \
+ XX(ARG_FLAGS) \
+ XX(ARG_PROTOCOL) \
+ XX(ARG_USERNAME) \
+ XX(ARG_PASSWORD) \
+ XX(ARG_HOST) \
+ XX(ARG_PORT) \
+ XX(ARG_PATH) \
+ XX(ARG_QUERY) \
+ XX(ARG_FRAGMENT)
+
+static const char kEOL = -1;
+
+enum url_parse_state {
+ kUnknownState = -1,
+#define XX(name) name,
+ PARSESTATES(XX)
+#undef XX
+} url_parse_state;
+
+enum url_flags {
+#define XX(name, val) name = val,
+ FLAGS(XX)
+#undef XX
+} url_flags;
+
+enum url_cb_args {
+#define XX(name) name,
+ ARGS(XX)
+#undef XX
+} url_cb_args;
+
+static inline bool IsSpecial(std::string scheme) {
+#define XX(name, _) if (scheme == name) return true;
+ SPECIALS(XX);
+#undef XX
+ return false;
+}
+
+static inline int NormalizePort(std::string scheme, int p) {
+#define XX(name, port) if (scheme == name && p == port) return -1;
+ SPECIALS(XX);
+#undef XX
+ return p;
+}
+
+struct url_data {
+ int32_t flags = URL_FLAGS_NONE;
+ int port = -1;
+ std::string scheme;
+ std::string username;
+ std::string password;
+ std::string host;
+ std::string query;
+ std::string fragment;
+ std::vector<std::string> path;
+};
+
+union url_host_value {
+ std::string domain;
+ uint32_t ipv4;
+ uint16_t ipv6[8];
+ ~url_host_value() {}
+};
+
+enum url_host_type {
+ HOST_TYPE_FAILED = -1,
+ HOST_TYPE_DOMAIN = 0,
+ HOST_TYPE_IPV4 = 1,
+ HOST_TYPE_IPV6 = 2
+};
+
+struct url_host {
+ url_host_value value;
+ enum url_host_type type;
+};
+} // namespace url
+
+} // namespace node
+
+#endif // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS
+
+#endif // SRC_NODE_URL_H_
diff --git a/test/fixtures/url-setter-tests.json b/test/fixtures/url-setter-tests.json
new file mode 100644
index 0000000000..10d6895828
--- /dev/null
+++ b/test/fixtures/url-setter-tests.json
@@ -0,0 +1,1134 @@
+{
+ "comment": [
+ "## Tests for setters of https://url.spec.whatwg.org/#urlutils-members",
+ "",
+ "This file contains a JSON object.",
+ "Other than 'comment', each key is an attribute of the `URL` interface",
+ "defined in WHATWG’s URL Standard.",
+ "The values are arrays of test case objects for that attribute.",
+ "",
+ "To run a test case for the attribute `attr`:",
+ "",
+ "* Create a new `URL` object with the value for the 'href' key",
+ " the constructor single parameter. (Without a base URL.)",
+ " This must not throw.",
+ "* Set the attribute `attr` to (invoke its setter with)",
+ " with the value of for 'new_value' key.",
+ "* The value for the 'expected' key is another object.",
+ " For each `key` / `value` pair of that object,",
+ " get the attribute `key` (invoke its getter).",
+ " The returned string must be equal to `value`.",
+ "",
+ "Note: the 'href' setter is already covered by urltestdata.json.",
+ "Source: https://github.com/w3c/web-platform-tests/tree/master/url"
+ ],
+ "protocol": [
+ {
+ "comment": "The empty string is not a valid scheme. Setter leaves the URL unchanged.",
+ "href": "a://example.net",
+ "new_value": "",
+ "expected": {
+ "href": "a://example.net/",
+ "protocol": "a:"
+ }
+ },
+ {
+ "href": "a://example.net",
+ "new_value": "b",
+ "expected": {
+ "href": "b://example.net/",
+ "protocol": "b:"
+ }
+ },
+ {
+ "comment": "Upper-case ASCII is lower-cased",
+ "href": "a://example.net",
+ "new_value": "B",
+ "expected": {
+ "href": "b://example.net/",
+ "protocol": "b:"
+ }
+ },
+ {
+ "comment": "Non-ASCII is rejected",
+ "href": "a://example.net",
+ "new_value": "é",
+ "expected": {
+ "href": "a://example.net/",
+ "protocol": "a:"
+ }
+ },
+ {
+ "comment": "No leading digit",
+ "href": "a://example.net",
+ "new_value": "0b",
+ "expected": {
+ "href": "a://example.net/",
+ "protocol": "a:"
+ }
+ },
+ {
+ "comment": "No leading punctuation",
+ "href": "a://example.net",
+ "new_value": "+b",
+ "expected": {
+ "href": "a://example.net/",
+ "protocol": "a:"
+ }
+ },
+ {
+ "href": "a://example.net",
+ "new_value": "bC0+-.",
+ "expected": {
+ "href": "bc0+-.://example.net/",
+ "protocol": "bc0+-.:"
+ }
+ },
+ {
+ "comment": "Only some punctuation is acceptable",
+ "href": "a://example.net",
+ "new_value": "b,c",
+ "expected": {
+ "href": "a://example.net/",
+ "protocol": "a:"
+ }
+ },
+ {
+ "comment": "Non-ASCII is rejected",
+ "href": "a://example.net",
+ "new_value": "bé",
+ "expected": {
+ "href": "a://example.net/",
+ "protocol": "a:"
+ }
+ },
+ {
+ "comment": "Can’t switch from special scheme to non-special. Note: this may change, see https://github.com/whatwg/url/issues/104",
+ "href": "http://example.net",
+ "new_value": "b",
+ "expected": {
+ "href": "http://example.net/",
+ "protocol": "http:"
+ }
+ },
+ {
+ "comment": "Cannot-be-a-base URL doesn’t have a host, but URL in a special scheme must.",
+ "href": "mailto:me@example.net",
+ "new_value": "http",
+ "expected": {
+ "href": "mailto:me@example.net",
+ "protocol": "mailto:"
+ }
+ },
+ {
+ "comment": "Can’t switch from non-special scheme to special. Note: this may change, see https://github.com/whatwg/url/issues/104",
+ "href": "ssh://me@example.net",
+ "new_value": "http",
+ "expected": {
+ "href": "ssh://me@example.net/",
+ "protocol": "ssh:"
+ }
+ },
+ {
+ "comment": "Stuff after the first ':' is ignored",
+ "href": "http://example.net",
+ "new_value": "https:foo : bar",
+ "expected": {
+ "href": "https://example.net/",
+ "protocol": "https:"
+ }
+ },
+ {
+ "comment": "Stuff after the first ':' is ignored",
+ "href": "data:text/html,<p>Test",
+ "new_value": "view-source+data:foo : bar",
+ "expected": {
+ "href": "view-source+data:text/html,<p>Test",
+ "protocol": "view-source+data:"
+ }
+ }
+ ],
+ "username": [
+ {
+ "comment": "No host means no username",
+ "href": "file:///home/you/index.html",
+ "new_value": "me",
+ "expected": {
+ "href": "file:///home/you/index.html",
+ "username": ""
+ }
+ },
+ {
+ "comment": "No host means no username",
+ "href": "unix:/run/foo.socket",
+ "new_value": "me",
+ "expected": {
+ "href": "unix:/run/foo.socket",
+ "username": ""
+ }
+ },
+ {
+ "comment": "Cannot-be-a-base means no username",
+ "href": "mailto:you@example.net",
+ "new_value": "me",
+ "expected": {
+ "href": "mailto:you@example.net",
+ "username": ""
+ }
+ },
+ {
+ "href": "http://example.net",
+ "new_value": "me",
+ "expected": {
+ "href": "http://me@example.net/",
+ "username": "me"
+ }
+ },
+ {
+ "href": "http://:secret@example.net",
+ "new_value": "me",
+ "expected": {
+ "href": "http://me:secret@example.net/",
+ "username": "me"
+ }
+ },
+ {
+ "href": "http://me@example.net",
+ "new_value": "",
+ "expected": {
+ "href": "http://example.net/",
+ "username": ""
+ }
+ },
+ {
+ "href": "http://me:secret@example.net",
+ "new_value": "",
+ "expected": {
+ "href": "http://:secret@example.net/",
+ "username": ""
+ }
+ },
+ {
+ "comment": "UTF-8 percent encoding with the userinfo encode set.",
+ "href": "http://example.net",
+ "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé",
+ "expected": {
+ "href": "http://%00%01%09%0A%0D%1F%20!%22%23$%&'()*+,-.%2F09%3A%3B%3C%3D%3E%3F%40AZ%5B%5C%5D%5E_%60az%7B%7C%7D~%7F%C2%80%C2%81%C3%89%C3%A9@example.net/",
+ "username": "%00%01%09%0A%0D%1F%20!%22%23$%&'()*+,-.%2F09%3A%3B%3C%3D%3E%3F%40AZ%5B%5C%5D%5E_%60az%7B%7C%7D~%7F%C2%80%C2%81%C3%89%C3%A9"
+ }
+ },
+ {
+ "comment": "Bytes already percent-encoded are left as-is.",
+ "href": "http://example.net",
+ "new_value": "%c3%89té",
+ "expected": {
+ "href": "http://%c3%89t%C3%A9@example.net/",
+ "username": "%c3%89t%C3%A9"
+ }
+ }
+ ],
+ "password": [
+ {
+ "comment": "No host means no password",
+ "href": "file:///home/me/index.html",
+ "new_value": "secret",
+ "expected": {
+ "href": "file:///home/me/index.html",
+ "password": ""
+ }
+ },
+ {
+ "comment": "No host means no password",
+ "href": "unix:/run/foo.socket",
+ "new_value": "secret",
+ "expected": {
+ "href": "unix:/run/foo.socket",
+ "password": ""
+ }
+ },
+ {
+ "comment": "Cannot-be-a-base means no password",
+ "href": "mailto:me@example.net",
+ "new_value": "secret",
+ "expected": {
+ "href": "mailto:me@example.net",
+ "password": ""
+ }
+ },
+ {
+ "href": "http://example.net",
+ "new_value": "secret",
+ "expected": {
+ "href": "http://:secret@example.net/",
+ "password": "secret"
+ }
+ },
+ {
+ "href": "http://me@example.net",
+ "new_value": "secret",
+ "expected": {
+ "href": "http://me:secret@example.net/",
+ "password": "secret"
+ }
+ },
+ {
+ "href": "http://:secret@example.net",
+ "new_value": "",
+ "expected": {
+ "href": "http://example.net/",
+ "password": ""
+ }
+ },
+ {
+ "href": "http://me:secret@example.net",
+ "new_value": "",
+ "expected": {
+ "href": "http://me@example.net/",
+ "password": ""
+ }
+ },
+ {
+ "comment": "UTF-8 percent encoding with the userinfo encode set.",
+ "href": "http://example.net",
+ "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé",
+ "expected": {
+ "href": "http://:%00%01%09%0A%0D%1F%20!%22%23$%&'()*+,-.%2F09%3A%3B%3C%3D%3E%3F%40AZ%5B%5C%5D%5E_%60az%7B%7C%7D~%7F%C2%80%C2%81%C3%89%C3%A9@example.net/",
+ "password": "%00%01%09%0A%0D%1F%20!%22%23$%&'()*+,-.%2F09%3A%3B%3C%3D%3E%3F%40AZ%5B%5C%5D%5E_%60az%7B%7C%7D~%7F%C2%80%C2%81%C3%89%C3%A9"
+ }
+ },
+ {
+ "comment": "Bytes already percent-encoded are left as-is.",
+ "href": "http://example.net",
+ "new_value": "%c3%89té",
+ "expected": {
+ "href": "http://:%c3%89t%C3%A9@example.net/",
+ "password": "%c3%89t%C3%A9"
+ }
+ }
+ ],
+ "host": [
+ {
+ "comment": "Cannot-be-a-base means no host",
+ "href": "mailto:me@example.net",
+ "new_value": "example.com",
+ "expected": {
+ "href": "mailto:me@example.net",
+ "host": ""
+ }
+ },
+ {
+ "comment": "Cannot-be-a-base means no password",
+ "href": "data:text/plain,Stuff",
+ "new_value": "example.net",
+ "expected": {
+ "href": "data:text/plain,Stuff",
+ "host": ""
+ }
+ },
+ {
+ "href": "http://example.net",
+ "new_value": "example.com:8080",
+ "expected": {
+ "href": "http://example.com:8080/",
+ "host": "example.com:8080",
+ "hostname": "example.com",
+ "port": "8080"
+ }
+ },
+ {
+ "comment": "Port number is unchanged if not specified in the new value",
+ "href": "http://example.net:8080",
+ "new_value": "example.com",
+ "expected": {
+ "href": "http://example.com:8080/",
+ "host": "example.com:8080",
+ "hostname": "example.com",
+ "port": "8080"
+ }
+ },
+ {
+ "comment": "Port number is unchanges if empty in the new value. Note: this may change, see https://github.com/whatwg/url/pull/113",
+ "href": "http://example.net:8080",
+ "new_value": "example.com:",
+ "expected": {
+ "href": "http://example.com:8080/",
+ "host": "example.com:8080",
+ "hostname": "example.com",
+ "port": "8080"
+ }
+ },
+ {
+
+ "comment": "The empty host is not valid for special schemes",
+ "href": "http://example.net",
+ "new_value": "",
+ "expected": {
+ "href": "http://example.net/",
+ "host": "example.net"
+ }
+ },
+ {
+ "comment": "The empty host is OK for non-special schemes",
+ "href": "view-source+http://example.net/foo",
+ "new_value": "",
+ "expected": {
+ "href": "view-source+http:///foo",
+ "host": ""
+ }
+ },
+ {
+ "comment": "Path-only URLs can gain a host",
+ "href": "a:/foo",
+ "new_value": "example.net",
+ "expected": {
+ "href": "a://example.net/foo",
+ "host": "example.net"
+ }
+ },
+ {
+ "comment": "IPv4 address syntax is normalized",
+ "href": "http://example.net",
+ "new_value": "0x7F000001:8080",
+ "expected": {
+ "href": "http://127.0.0.1:8080/",
+ "host": "127.0.0.1:8080",
+ "hostname": "127.0.0.1",
+ "port": "8080"
+ }
+ },
+ {
+ "comment": "IPv6 address syntax is normalized",
+ "href": "http://example.net",
+ "new_value": "[::0:01]:2",
+ "expected": {
+ "href": "http://[::1]:2/",
+ "host": "[::1]:2",
+ "hostname": "[::1]",
+ "port": "2"
+ }
+ },
+ {
+ "comment": "Default port number is removed",
+ "href": "http://example.net",
+ "new_value": "example.com:80",
+ "expected": {
+ "href": "http://example.com/",
+ "host": "example.com",
+ "hostname": "example.com",
+ "port": ""
+ }
+ },
+ {
+ "comment": "Default port number is removed",
+ "href": "https://example.net",
+ "new_value": "example.com:443",
+ "expected": {
+ "href": "https://example.com/",
+ "host": "example.com",
+ "hostname": "example.com",
+ "port": ""
+ }
+ },
+ {
+ "comment": "Default port number is only removed for the relevant scheme",
+ "href": "https://example.net",
+ "new_value": "example.com:80",
+ "expected": {
+ "href": "https://example.com:80/",
+ "host": "example.com:80",
+ "hostname": "example.com",
+ "port": "80"
+ }
+ },
+ {
+ "comment": "Stuff after a / delimiter is ignored",
+ "href": "http://example.net/path",
+ "new_value": "example.com/stuff",
+ "expected": {
+ "href": "http://example.com/path",
+ "host": "example.com",
+ "hostname": "example.com",
+ "port": ""
+ }
+ },
+ {
+ "comment": "Stuff after a / delimiter is ignored",
+ "href": "http://example.net/path",
+ "new_value": "example.com:8080/stuff",
+ "expected": {
+ "href": "http://example.com:8080/path",
+ "host": "example.com:8080",
+ "hostname": "example.com",
+ "port": "8080"
+ }
+ },
+ {
+ "comment": "Stuff after a ? delimiter is ignored",
+ "href": "http://example.net/path",
+ "new_value": "example.com?stuff",
+ "expected": {
+ "href": "http://example.com/path",
+ "host": "example.com",
+ "hostname": "example.com",
+ "port": ""
+ }
+ },
+ {
+ "comment": "Stuff after a ? delimiter is ignored",
+ "href": "http://example.net/path",
+ "new_value": "example.com:8080?stuff",
+ "expected": {
+ "href": "http://example.com:8080/path",
+ "host": "example.com:8080",
+ "hostname": "example.com",
+ "port": "8080"
+ }
+ },
+ {
+ "comment": "Stuff after a # delimiter is ignored",
+ "href": "http://example.net/path",
+ "new_value": "example.com#stuff",
+ "expected": {
+ "href": "http://example.com/path",
+ "host": "example.com",
+ "hostname": "example.com",
+ "port": ""
+ }
+ },
+ {
+ "comment": "Stuff after a # delimiter is ignored",
+ "href": "http://example.net/path",
+ "new_value": "example.com:8080#stuff",
+ "expected": {
+ "href": "http://example.com:8080/path",
+ "host": "example.com:8080",
+ "hostname": "example.com",
+ "port": "8080"
+ }
+ },
+ {
+ "comment": "Stuff after a \\ delimiter is ignored for special schemes",
+ "href": "http://example.net/path",
+ "new_value": "example.com\\stuff",
+ "expected": {
+ "href": "http://example.com/path",
+ "host": "example.com",
+ "hostname": "example.com",
+ "port": ""
+ }
+ },
+ {
+ "comment": "Stuff after a \\ delimiter is ignored for special schemes",
+ "href": "http://example.net/path",
+ "new_value": "example.com:8080\\stuff",
+ "expected": {
+ "href": "http://example.com:8080/path",
+ "host": "example.com:8080",
+ "hostname": "example.com",
+ "port": "8080"
+ }
+ },
+ {
+ "comment": "\\ is not a delimiter for non-special schemes, and it’s invalid in a domain",
+ "href": "view-source+http://example.net/path",
+ "new_value": "example.com\\stuff",
+ "expected": {
+ "href": "view-source+http://example.net/path",
+ "host": "example.net",
+ "hostname": "example.net",
+ "port": ""
+ }
+ },
+ {
+ "comment": "Anything other than ASCII digit stops the port parser in a setter but is not an error",
+ "href": "view-source+http://example.net/path",
+ "new_value": "example.com:8080stuff2",
+ "expected": {
+ "href": "view-source+http://example.com:8080/path",
+ "host": "example.com:8080",
+ "hostname": "example.com",
+ "port": "8080"
+ }
+ },
+ {
+ "comment": "Anything other than ASCII digit stops the port parser in a setter but is not an error",
+ "href": "http://example.net/path",
+ "new_value": "example.com:8080stuff2",
+ "expected": {
+ "href": "http://example.com:8080/path",
+ "host": "example.com:8080",
+ "hostname": "example.com",
+ "port": "8080"
+ }
+ },
+ {
+ "comment": "Anything other than ASCII digit stops the port parser in a setter but is not an error",
+ "href": "http://example.net/path",
+ "new_value": "example.com:8080+2",
+ "expected": {
+ "href": "http://example.com:8080/path",
+ "host": "example.com:8080",
+ "hostname": "example.com",
+ "port": "8080"
+ }
+ },
+ {
+ "comment": "Port numbers are 16 bit integers",
+ "href": "http://example.net/path",
+ "new_value": "example.com:65535",
+ "expected": {
+ "href": "http://example.com:65535/path",
+ "host": "example.com:65535",
+ "hostname": "example.com",
+ "port": "65535"
+ }
+ },
+ {
+ "comment": "Port numbers are 16 bit integers, overflowing is an error. Hostname is still set, though.",
+ "href": "http://example.net/path",
+ "new_value": "example.com:65536",
+ "expected": {
+ "href": "http://example.com/path",
+ "host": "example.com",
+ "hostname": "example.com",
+ "port": ""
+ }
+ }
+ ],
+ "hostname": [
+ {
+ "comment": "Cannot-be-a-base means no host",
+ "href": "mailto:me@example.net",
+ "new_value": "example.com",
+ "expected": {
+ "href": "mailto:me@example.net",
+ "host": ""
+ }
+ },
+ {
+ "comment": "Cannot-be-a-base means no password",
+ "href": "data:text/plain,Stuff",
+ "new_value": "example.net",
+ "expected": {
+ "href": "data:text/plain,Stuff",
+ "host": ""
+ }
+ },
+ {
+ "href": "http://example.net:8080",
+ "new_value": "example.com",
+ "expected": {
+ "href": "http://example.com:8080/",
+ "host": "example.com:8080",
+ "hostname": "example.com",
+ "port": "8080"
+ }
+ },
+ {
+ "comment": "The empty host is not valid for special schemes",
+ "href": "http://example.net",
+ "new_value": "",
+ "expected": {
+ "href": "http://example.net/",
+ "host": "example.net"
+ }
+ },
+ {
+ "comment": "The empty host is OK for non-special schemes",
+ "href": "view-source+http://example.net/foo",
+ "new_value": "",
+ "expected": {
+ "href": "view-source+http:///foo",
+ "host": ""
+ }
+ },
+ {
+ "comment": "Path-only URLs can gain a host",
+ "href": "a:/foo",
+ "new_value": "example.net",
+ "expected": {
+ "href": "a://example.net/foo",
+ "host": "example.net"
+ }
+ },
+ {
+ "comment": "IPv4 address syntax is normalized",
+ "href": "http://example.net:8080",
+ "new_value": "0x7F000001",
+ "expected": {
+ "href": "http://127.0.0.1:8080/",
+ "host": "127.0.0.1:8080",
+ "hostname": "127.0.0.1",
+ "port": "8080"
+ }
+ },
+ {
+ "comment": "IPv6 address syntax is normalized",
+ "href": "http://example.net",
+ "new_value": "[::0:01]",
+ "expected": {
+ "href": "http://[::1]/",
+ "host": "[::1]",
+ "hostname": "[::1]",
+ "port": ""
+ }
+ },
+ {
+ "comment": "Stuff after a : delimiter is ignored",
+ "href": "http://example.net/path",
+ "new_value": "example.com:8080",
+ "expected": {
+ "href": "http://example.com/path",
+ "host": "example.com",
+ "hostname": "example.com",
+ "port": ""
+ }
+ },
+ {
+ "comment": "Stuff after a : delimiter is ignored",
+ "href": "http://example.net:8080/path",
+ "new_value": "example.com:",
+ "expected": {
+ "href": "http://example.com:8080/path",
+ "host": "example.com:8080",
+ "hostname": "example.com",
+ "port": "8080"
+ }
+ },
+ {
+ "comment": "Stuff after a / delimiter is ignored",
+ "href": "http://example.net/path",
+ "new_value": "example.com/stuff",
+ "expected": {
+ "href": "http://example.com/path",
+ "host": "example.com",
+ "hostname": "example.com",
+ "port": ""
+ }
+ },
+ {
+ "comment": "Stuff after a ? delimiter is ignored",
+ "href": "http://example.net/path",
+ "new_value": "example.com?stuff",
+ "expected": {
+ "href": "http://example.com/path",
+ "host": "example.com",
+ "hostname": "example.com",
+ "port": ""
+ }
+ },
+ {
+ "comment": "Stuff after a # delimiter is ignored",
+ "href": "http://example.net/path",
+ "new_value": "example.com#stuff",
+ "expected": {
+ "href": "http://example.com/path",
+ "host": "example.com",
+ "hostname": "example.com",
+ "port": ""
+ }
+ },
+ {
+ "comment": "Stuff after a \\ delimiter is ignored for special schemes",
+ "href": "http://example.net/path",
+ "new_value": "example.com\\stuff",
+ "expected": {
+ "href": "http://example.com/path",
+ "host": "example.com",
+ "hostname": "example.com",
+ "port": ""
+ }
+ },
+ {
+ "comment": "\\ is not a delimiter for non-special schemes, and it’s invalid in a domain",
+ "href": "view-source+http://example.net/path",
+ "new_value": "example.com\\stuff",
+ "expected": {
+ "href": "view-source+http://example.net/path",
+ "host": "example.net",
+ "hostname": "example.net",
+ "port": ""
+ }
+ }
+ ],
+ "port": [
+ {
+ "href": "http://example.net",
+ "new_value": "8080",
+ "expected": {
+ "href": "http://example.net:8080/",
+ "host": "example.net:8080",
+ "hostname": "example.net",
+ "port": "8080"
+ }
+ },
+ {
+ "comment": "Port number is unchanged if empty in the new value. Note: this may change, see https://github.com/whatwg/url/pull/113",
+ "href": "http://example.net:8080",
+ "new_value": "",
+ "expected": {
+ "href": "http://example.net:8080/",
+ "host": "example.net:8080",
+ "hostname": "example.net",
+ "port": "8080"
+ }
+ },
+ {
+ "comment": "Default port number is removed",
+ "href": "http://example.net:8080",
+ "new_value": "80",
+ "expected": {
+ "href": "http://example.net/",
+ "host": "example.net",
+ "hostname": "example.net",
+ "port": ""
+ }
+ },
+ {
+ "comment": "Default port number is removed",
+ "href": "https://example.net:4433",
+ "new_value": "443",
+ "expected": {
+ "href": "https://example.net/",
+ "host": "example.net",
+ "hostname": "example.net",
+ "port": ""
+ }
+ },
+ {
+ "comment": "Default port number is only removed for the relevant scheme",
+ "href": "https://example.net",
+ "new_value": "80",
+ "expected": {
+ "href": "https://example.net:80/",
+ "host": "example.net:80",
+ "hostname": "example.net",
+ "port": "80"
+ }
+ },
+ {
+ "comment": "Stuff after a / delimiter is ignored",
+ "href": "http://example.net/path",
+ "new_value": "8080/stuff",
+ "expected": {
+ "href": "http://example.net:8080/path",
+ "host": "example.net:8080",
+ "hostname": "example.net",
+ "port": "8080"
+ }
+ },
+ {
+ "comment": "Stuff after a ? delimiter is ignored",
+ "href": "http://example.net/path",
+ "new_value": "8080?stuff",
+ "expected": {
+ "href": "http://example.net:8080/path",
+ "host": "example.net:8080",
+ "hostname": "example.net",
+ "port": "8080"
+ }
+ },
+ {
+ "comment": "Stuff after a # delimiter is ignored",
+ "href": "http://example.net/path",
+ "new_value": "8080#stuff",
+ "expected": {
+ "href": "http://example.net:8080/path",
+ "host": "example.net:8080",
+ "hostname": "example.net",
+ "port": "8080"
+ }
+ },
+ {
+ "comment": "Stuff after a \\ delimiter is ignored for special schemes",
+ "href": "http://example.net/path",
+ "new_value": "8080\\stuff",
+ "expected": {
+ "href": "http://example.net:8080/path",
+ "host": "example.net:8080",
+ "hostname": "example.net",
+ "port": "8080"
+ }
+ },
+ {
+ "comment": "Anything other than ASCII digit stops the port parser in a setter but is not an error",
+ "href": "view-source+http://example.net/path",
+ "new_value": "8080stuff2",
+ "expected": {
+ "href": "view-source+http://example.net:8080/path",
+ "host": "example.net:8080",
+ "hostname": "example.net",
+ "port": "8080"
+ }
+ },
+ {
+ "comment": "Anything other than ASCII digit stops the port parser in a setter but is not an error",
+ "href": "http://example.net/path",
+ "new_value": "8080stuff2",
+ "expected": {
+ "href": "http://example.net:8080/path",
+ "host": "example.net:8080",
+ "hostname": "example.net",
+ "port": "8080"
+ }
+ },
+ {
+ "comment": "Anything other than ASCII digit stops the port parser in a setter but is not an error",
+ "href": "http://example.net/path",
+ "new_value": "8080+2",
+ "expected": {
+ "href": "http://example.net:8080/path",
+ "host": "example.net:8080",
+ "hostname": "example.net",
+ "port": "8080"
+ }
+ },
+ {
+ "comment": "Port numbers are 16 bit integers",
+ "href": "http://example.net/path",
+ "new_value": "65535",
+ "expected": {
+ "href": "http://example.net:65535/path",
+ "host": "example.net:65535",
+ "hostname": "example.net",
+ "port": "65535"
+ }
+ },
+ {
+ "comment": "Port numbers are 16 bit integers, overflowing is an error",
+ "href": "http://example.net:8080/path",
+ "new_value": "65536",
+ "expected": {
+ "href": "http://example.net:8080/path",
+ "host": "example.net:8080",
+ "hostname": "example.net",
+ "port": "8080"
+ }
+ }
+ ],
+ "pathname": [
+ {
+ "comment": "Cannot-be-a-base don’t have a path",
+ "href": "mailto:me@example.net",
+ "new_value": "/foo",
+ "expected": {
+ "href": "mailto:me@example.net",
+ "pathname": "me@example.net"
+ }
+ },
+ {
+ "href": "unix:/run/foo.socket?timeout=10",
+ "new_value": "/var/log/../run/bar.socket",
+ "expected": {
+ "href": "unix:/var/run/bar.socket?timeout=10",
+ "pathname": "/var/run/bar.socket"
+ }
+ },
+ {
+ "href": "https://example.net#nav",
+ "new_value": "home",
+ "expected": {
+ "href": "https://example.net/home#nav",
+ "pathname": "/home"
+ }
+ },
+ {
+ "href": "https://example.net#nav",
+ "new_value": "../home",
+ "expected": {
+ "href": "https://example.net/home#nav",
+ "pathname": "/home"
+ }
+ },
+ {
+ "comment": "\\ is a segment delimiter for 'special' URLs",
+ "href": "http://example.net/home?lang=fr#nav",
+ "new_value": "\\a\\%2E\\b\\%2e.\\c",
+ "expected": {
+ "href": "http://example.net/a/c?lang=fr#nav",
+ "pathname": "/a/c"
+ }
+ },
+ {
+ "comment": "\\ is *not* a segment delimiter for non-'special' URLs",
+ "href": "view-source+http://example.net/home?lang=fr#nav",
+ "new_value": "\\a\\%2E\\b\\%2e.\\c",
+ "expected": {
+ "href": "view-source+http://example.net/\\a\\.\\b\\..\\c?lang=fr#nav",
+ "pathname": "/\\a\\.\\b\\..\\c"
+ }
+ },
+ {
+ "comment": "UTF-8 percent encoding with the default encode set. Tabs and newlines are removed.",
+ "href": "a:/",
+ "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé",
+ "expected": {
+ "href": "a:/%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E%3F@AZ[\\]^_%60az%7B|%7D~%7F%C2%80%C2%81%C3%89%C3%A9",
+ "pathname": "/%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E%3F@AZ[\\]^_%60az%7B|%7D~%7F%C2%80%C2%81%C3%89%C3%A9"
+ }
+ },
+ {
+ "comment": "Bytes already percent-encoded are left as-is, except %2E.",
+ "href": "http://example.net",
+ "new_value": "%2e%2E%c3%89té",
+ "expected": {
+ "href": "http://example.net/..%c3%89t%C3%A9",
+ "pathname": "/..%c3%89t%C3%A9"
+ }
+ }
+ ],
+ "search": [
+ {
+ "href": "https://example.net#nav",
+ "new_value": "lang=fr",
+ "expected": {
+ "href": "https://example.net/?lang=fr#nav",
+ "search": "?lang=fr"
+ }
+ },
+ {
+ "href": "https://example.net?lang=en-US#nav",
+ "new_value": "lang=fr",
+ "expected": {
+ "href": "https://example.net/?lang=fr#nav",
+ "search": "?lang=fr"
+ }
+ },
+ {
+ "href": "https://example.net?lang=en-US#nav",
+ "new_value": "?lang=fr",
+ "expected": {
+ "href": "https://example.net/?lang=fr#nav",
+ "search": "?lang=fr"
+ }
+ },
+ {
+ "href": "https://example.net?lang=en-US#nav",
+ "new_value": "??lang=fr",
+ "expected": {
+ "href": "https://example.net/??lang=fr#nav",
+ "search": "??lang=fr"
+ }
+ },
+ {
+"skip": "we do not pass this, but we do match chromes behavior",
+ "href": "https://example.net?lang=en-US#nav",
+ "new_value": "?",
+ "expected": {
+ "href": "https://example.net/?#nav",
+ "search": ""
+ }
+ },
+ {
+ "href": "https://example.net?lang=en-US#nav",
+ "new_value": "",
+ "expected": {
+ "href": "https://example.net/#nav",
+ "search": ""
+ }
+ },
+ {
+ "href": "https://example.net?lang=en-US",
+ "new_value": "",
+ "expected": {
+ "href": "https://example.net/",
+ "search": ""
+ }
+ },
+ {
+ "href": "https://example.net",
+ "new_value": "",
+ "expected": {
+ "href": "https://example.net/",
+ "search": ""
+ }
+ },
+ {
+ "comment": "UTF-8 percent encoding with the query encode set. Tabs and newlines are removed.",
+ "href": "a:/",
+ "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé",
+ "expected": {
+ "href": "a:/?%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9",
+ "search": "?%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9"
+ }
+ },
+ {
+ "comment": "Bytes already percent-encoded are left as-is",
+ "href": "http://example.net",
+ "new_value": "%c3%89té",
+ "expected": {
+ "href": "http://example.net/?%c3%89t%C3%A9",
+ "search": "?%c3%89t%C3%A9"
+ }
+ }
+ ],
+ "hash": [
+ {
+ "href": "https://example.net",
+ "new_value": "main",
+ "expected": {
+ "href": "https://example.net/#main",
+ "hash": "#main"
+ }
+ },
+ {
+ "href": "https://example.net#nav",
+ "new_value": "main",
+ "expected": {
+ "href": "https://example.net/#main",
+ "hash": "#main"
+ }
+ },
+ {
+ "href": "https://example.net?lang=en-US",
+ "new_value": "##nav",
+ "expected": {
+ "href": "https://example.net/?lang=en-US##nav",
+ "hash": "##nav"
+ }
+ },
+ {
+ "href": "https://example.net?lang=en-US#nav",
+ "new_value": "#main",
+ "expected": {
+ "href": "https://example.net/?lang=en-US#main",
+ "hash": "#main"
+ }
+ },
+ {
+"skip": "we do not pass this, but we do match chromes behavior",
+ "href": "https://example.net?lang=en-US#nav",
+ "new_value": "#",
+ "expected": {
+ "href": "https://example.net/?lang=en-US#",
+ "hash": ""
+ }
+ },
+ {
+ "href": "https://example.net?lang=en-US#nav",
+ "new_value": "",
+ "expected": {
+ "href": "https://example.net/?lang=en-US",
+ "hash": ""
+ }
+ },
+ {
+ "comment": "No percent-encoding at all (!); nuls, tabs, and newlines are removed",
+ "href": "a:/",
+ "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé",
+ "expected": {
+ "href": "a:/#\u0001\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé",
+ "hash": "#\u0001\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé"
+ }
+ },
+ {
+ "comment": "Bytes already percent-encoded are left as-is",
+ "href": "http://example.net",
+ "new_value": "%c3%89té",
+ "expected": {
+ "href": "http://example.net/#%c3%89té",
+ "hash": "#%c3%89té"
+ }
+ }
+ ]
+}
diff --git a/test/parallel/test-whatwg-url-parsing.js b/test/parallel/test-whatwg-url-parsing.js
new file mode 100644
index 0000000000..568bc82175
--- /dev/null
+++ b/test/parallel/test-whatwg-url-parsing.js
@@ -0,0 +1,122 @@
+'use strict';
+
+const common = require('../common');
+const URL = require('url').URL;
+const path = require('path');
+const assert = require('assert');
+const tests = require(path.join(common.fixturesDir, 'url-tests.json'));
+
+for (const test of tests) {
+ if (typeof test === 'string')
+ continue;
+
+ if (test.failure) {
+ assert.throws(() => new URL(test.input, test.base), /Invalid URL/);
+ } else {
+ assert.doesNotThrow(() => {
+ const url = new URL(test.input, test.base);
+ assert.strictEqual(url.href, test.href);
+ });
+ }
+}
+
+const additional_tests = [
+ {
+ 'url': 'tftp://foobar.com/someconfig;mode=netascii',
+ 'protocol': 'tftp:',
+ 'hostname': 'foobar.com',
+ 'pathname': '/someconfig;mode=netascii'
+ },
+ {
+ 'url': 'telnet://user:pass@foobar.com:23/',
+ 'protocol': 'telnet:',
+ 'username': 'user',
+ 'password': 'pass',
+ 'hostname': 'foobar.com',
+ 'port': '23',
+ 'pathname': '/'
+ },
+ {
+ 'url': 'ut2004://10.10.10.10:7777/Index.ut2',
+ 'protocol': 'ut2004:',
+ 'hostname': '10.10.10.10',
+ 'port': '7777',
+ 'pathname': '/Index.ut2'
+ },
+ {
+ 'url': 'redis://foo:bar@somehost:6379/0?baz=bam&qux=baz',
+ 'protocol': 'redis:',
+ 'username': 'foo',
+ 'password': 'bar',
+ 'hostname': 'somehost',
+ 'port': '6379',
+ 'pathname': '/0',
+ 'search': '?baz=bam&qux=baz'
+ },
+ {
+ 'url': 'rsync://foo@host:911/sup',
+ 'protocol': 'rsync:',
+ 'username': 'foo',
+ 'hostname': 'host',
+ 'port': '911',
+ 'pathname': '/sup'
+ },
+ {
+ 'url': 'git://github.com/foo/bar.git',
+ 'protocol': 'git:',
+ 'hostname': 'github.com',
+ 'pathname': '/foo/bar.git'
+ },
+ {
+ 'url': 'irc://myserver.com:6999/channel?passwd',
+ 'protocol': 'irc:',
+ 'hostname': 'myserver.com',
+ 'port': '6999',
+ 'pathname': '/channel',
+ 'search': '?passwd'
+ },
+ {
+ 'url': 'dns://fw.example.org:9999/foo.bar.org?type=TXT',
+ 'protocol': 'dns:',
+ 'hostname': 'fw.example.org',
+ 'port': '9999',
+ 'pathname': '/foo.bar.org',
+ 'search': '?type=TXT'
+ },
+ {
+ 'url': 'ldap://localhost:389/ou=People,o=JNDITutorial',
+ 'protocol': 'ldap:',
+ 'hostname': 'localhost',
+ 'port': '389',
+ 'pathname': '/ou=People,o=JNDITutorial'
+ },
+ {
+ 'url': 'git+https://github.com/foo/bar',
+ 'protocol': 'git+https:',
+ 'hostname': 'github.com',
+ 'pathname': '/foo/bar'
+ },
+ {
+ 'url': 'urn:ietf:rfc:2648',
+ 'protocol': 'urn:',
+ 'pathname': 'ietf:rfc:2648'
+ },
+ {
+ 'url': 'tag:joe@example.org,2001:foo/bar',
+ 'protocol': 'tag:',
+ 'pathname': 'joe@example.org,2001:foo/bar'
+ }
+];
+
+additional_tests.forEach((test) => {
+ const u = new URL(test.url);
+ if (test.protocol) assert.strictEqual(test.protocol, u.protocol);
+ if (test.username) assert.strictEqual(test.username, u.username);
+ if (test.password) assert.strictEqual(test.password, u.password);
+ if (test.hostname) assert.strictEqual(test.hostname, u.hostname);
+ if (test.host) assert.strictEqual(test.host, u.host);
+ if (test.port !== undefined) assert.strictEqual(test.port, u.port);
+ if (test.pathname) assert.strictEqual(test.pathname, u.pathname);
+ if (test.search) assert.strictEqual(test.search, u.search);
+ if (test.hash) assert.strictEqual(test.hash, u.hash);
+});
diff --git a/test/parallel/test-whatwg-url-searchparams.js b/test/parallel/test-whatwg-url-searchparams.js
new file mode 100644
index 0000000000..99e2e6a748
--- /dev/null
+++ b/test/parallel/test-whatwg-url-searchparams.js
@@ -0,0 +1,36 @@
+'use strict';
+
+require('../common');
+const assert = require('assert');
+const URL = require('url').URL;
+
+const serialized = 'a=a&a=1&a=true&a=undefined&a=null&a=%5Bobject%20Object%5D';
+const values = ['a', 1, true, undefined, null, {}];
+
+const m = new URL('http://example.org');
+const sp = m.searchParams;
+
+assert(sp);
+assert.strictEqual(sp.toString(), '');
+assert.strictEqual(m.search, '');
+
+assert(!sp.has('a'));
+values.forEach((i) => sp.set('a', i));
+assert(sp.has('a'));
+assert.strictEqual(sp.get('a'), '[object Object]');
+sp.delete('a');
+assert(!sp.has('a'));
+values.forEach((i) => sp.append('a', i));
+assert(sp.has('a'));
+assert.strictEqual(sp.getAll('a').length, 6);
+assert.strictEqual(sp.get('a'), 'a');
+
+assert.strictEqual(sp.toString(), serialized);
+
+assert.strictEqual(m.search, `?${serialized}`);
+
+var key, val, n = 0;
+for ([key, val] of sp) {
+ assert.strictEqual(key, 'a');
+ assert.strictEqual(val, String(values[n++]));
+}
diff --git a/test/parallel/test-whatwg-url-setters.js b/test/parallel/test-whatwg-url-setters.js
new file mode 100644
index 0000000000..4c29ef098c
--- /dev/null
+++ b/test/parallel/test-whatwg-url-setters.js
@@ -0,0 +1,24 @@
+'use strict';
+
+const common = require('../common');
+const path = require('path');
+const URL = require('url').URL;
+const assert = require('assert');
+const attrs = require(path.join(common.fixturesDir, 'url-setter-tests.json'));
+
+for (const attr in attrs) {
+ if (attr === 'comment')
+ continue;
+ const tests = attrs[attr];
+ var n = 0;
+ for (const test of tests) {
+ if (test.skip) continue;
+ n++;
+ const url = new URL(test.href);
+ url[attr] = test.new_value;
+ for (const test_attr in test.expected) {
+ assert.equal(test.expected[test_attr], url[test_attr],
+ `${n} ${attr} ${test_attr} ${test.href} ${test.comment}`);
+ }
+ }
+}