summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/node_i18n.cc54
-rw-r--r--src/node_i18n.h18
-rw-r--r--test/fixtures/url-idna.js432
-rw-r--r--test/fixtures/url-toascii.js156
-rw-r--r--test/parallel/test-icu-punycode.js30
-rw-r--r--test/parallel/test-whatwg-url-domainto.js21
-rw-r--r--test/parallel/test-whatwg-url-toascii.js85
7 files changed, 543 insertions, 253 deletions
diff --git a/src/node_i18n.cc b/src/node_i18n.cc
index dc50f9995a..f35bf26855 100644
--- a/src/node_i18n.cc
+++ b/src/node_i18n.cc
@@ -450,6 +450,9 @@ int32_t ToUnicode(MaybeStackBuffer<char>* buf,
&info,
&status);
+ // Do not check info.errors like we do with ToASCII since ToUnicode always
+ // returns a string, despite any possible errors that may have occurred.
+
if (status == U_BUFFER_OVERFLOW_ERROR) {
status = U_ZERO_ERROR;
buf->AllocateSufficientStorage(len);
@@ -477,9 +480,18 @@ int32_t ToUnicode(MaybeStackBuffer<char>* buf,
int32_t ToASCII(MaybeStackBuffer<char>* buf,
const char* input,
size_t length,
- bool lenient) {
+ enum idna_mode mode) {
UErrorCode status = U_ZERO_ERROR;
- uint32_t options = UIDNA_NONTRANSITIONAL_TO_ASCII | UIDNA_CHECK_BIDI;
+ uint32_t options = // CheckHyphens = false; handled later
+ UIDNA_CHECK_BIDI | // CheckBidi = true
+ UIDNA_CHECK_CONTEXTJ | // CheckJoiners = true
+ UIDNA_NONTRANSITIONAL_TO_ASCII; // Nontransitional_Processing
+ if (mode == IDNA_STRICT) {
+ options |= UIDNA_USE_STD3_RULES; // UseSTD3ASCIIRules = beStrict
+ // VerifyDnsLength = beStrict;
+ // handled later
+ }
+
UIDNA* uidna = uidna_openUTS46(options, &status);
if (U_FAILURE(status))
return -1;
@@ -501,21 +513,17 @@ int32_t ToASCII(MaybeStackBuffer<char>* buf,
&status);
}
- // The WHATWG URL "domain to ASCII" algorithm explicitly sets the
- // VerifyDnsLength flag to false, which disables the domain name length
- // verification step in ToASCII (as specified by UTS #46). Unfortunately,
- // ICU4C's IDNA module does not support disabling this flag through `options`,
- // so just filter out the errors that may be caused by the verification step
- // afterwards.
- info.errors &= ~UIDNA_ERROR_EMPTY_LABEL;
- info.errors &= ~UIDNA_ERROR_LABEL_TOO_LONG;
- info.errors &= ~UIDNA_ERROR_DOMAIN_NAME_TOO_LONG;
-
- // These error conditions are mandated unconditionally by UTS #46 version
- // 9.0.0 (rev. 17), but were found to be incompatible with actual domain
- // names in the wild. As such, in the current UTS #46 draft (rev. 18) these
- // checks are made optional depending on the CheckHyphens flag, which will be
- // disabled in WHATWG URL's "domain to ASCII" algorithm soon.
+ // In UTS #46 which specifies ToASCII, certain error conditions are
+ // configurable through options, and the WHATWG URL Standard promptly elects
+ // to disable some of them to accomodate for real-world use cases.
+ // Unfortunately, ICU4C's IDNA module does not support disabling some of
+ // these options through `options` above, and thus continues throwing
+ // unnecessary errors. To counter this situation, we just filter out the
+ // errors that may have happened afterwards, before deciding whether to
+ // return an error from this function.
+
+ // CheckHyphens = false
+ // (Specified in the current UTS #46 draft rev. 18.)
// Refs:
// - https://github.com/whatwg/url/issues/53
// - https://github.com/whatwg/url/pull/309
@@ -526,7 +534,14 @@ int32_t ToASCII(MaybeStackBuffer<char>* buf,
info.errors &= ~UIDNA_ERROR_LEADING_HYPHEN;
info.errors &= ~UIDNA_ERROR_TRAILING_HYPHEN;
- if (U_FAILURE(status) || (!lenient && info.errors != 0)) {
+ if (mode != IDNA_STRICT) {
+ // VerifyDnsLength = beStrict
+ info.errors &= ~UIDNA_ERROR_EMPTY_LABEL;
+ info.errors &= ~UIDNA_ERROR_LABEL_TOO_LONG;
+ info.errors &= ~UIDNA_ERROR_DOMAIN_NAME_TOO_LONG;
+ }
+
+ if (U_FAILURE(status) || (mode != IDNA_LENIENT && info.errors != 0)) {
len = -1;
buf->SetLength(0);
} else {
@@ -564,9 +579,10 @@ static void ToASCII(const FunctionCallbackInfo<Value>& args) {
Utf8Value val(env->isolate(), args[0]);
// optional arg
bool lenient = args[1]->BooleanValue(env->context()).FromJust();
+ enum idna_mode mode = lenient ? IDNA_LENIENT : IDNA_DEFAULT;
MaybeStackBuffer<char> buf;
- int32_t len = ToASCII(&buf, *val, val.length(), lenient);
+ int32_t len = ToASCII(&buf, *val, val.length(), mode);
if (len < 0) {
return env->ThrowError("Cannot convert name to ASCII");
diff --git a/src/node_i18n.h b/src/node_i18n.h
index cc1f3e6ea5..adf9feb414 100644
--- a/src/node_i18n.h
+++ b/src/node_i18n.h
@@ -37,10 +37,26 @@ namespace i18n {
bool InitializeICUDirectory(const std::string& path);
+enum idna_mode {
+ // Default mode for maximum compatibility.
+ IDNA_DEFAULT,
+ // Ignore all errors in IDNA conversion, if possible.
+ IDNA_LENIENT,
+ // Enforce STD3 rules (UseSTD3ASCIIRules) and DNS length restrictions
+ // (VerifyDnsLength). Corresponds to `beStrict` flag in the "domain to ASCII"
+ // algorithm.
+ IDNA_STRICT
+};
+
+// Implements the WHATWG URL Standard "domain to ASCII" algorithm.
+// https://url.spec.whatwg.org/#concept-domain-to-ascii
int32_t ToASCII(MaybeStackBuffer<char>* buf,
const char* input,
size_t length,
- bool lenient = false);
+ enum idna_mode mode = IDNA_DEFAULT);
+
+// Implements the WHATWG URL Standard "domain to Unicode" algorithm.
+// https://url.spec.whatwg.org/#concept-domain-to-unicode
int32_t ToUnicode(MaybeStackBuffer<char>* buf,
const char* input,
size_t length);
diff --git a/test/fixtures/url-idna.js b/test/fixtures/url-idna.js
index cbfe702e93..4b8f5a48cc 100644
--- a/test/fixtures/url-idna.js
+++ b/test/fixtures/url-idna.js
@@ -1,223 +1,215 @@
'use strict';
// Credit for list: http://www.i18nguy.com/markup/idna-examples.html
-module.exports = {
- valid: [
- { ascii: 'xn--mgbaal8b0b9b2b.icom.museum',
- unicode: 'افغانستا.icom.museum'
- },
- {
- ascii: 'xn--lgbbat1ad8j.icom.museum',
- unicode: 'الجزائر.icom.museum'
- },
- {
- ascii: 'xn--sterreich-z7a.icom.museum',
- unicode: 'österreich.icom.museum'
- },
- {
- ascii: 'xn--54b6eqazv8bc7e.icom.museum',
- unicode: 'বাংলাদেশ.icom.museum'
- },
- {
- ascii: 'xn--80abmy0agn7e.icom.museum',
- unicode: 'беларусь.icom.museum'
- },
- {
- ascii: 'xn--belgi-rsa.icom.museum',
- unicode: 'belgië.icom.museum'
- },
- {
- ascii: 'xn--80abgvm6a7d2b.icom.museum',
- unicode: 'българия.icom.museum'
- },
- {
- ascii: 'xn--mgbfqim.icom.museum',
- unicode: 'تشادر.icom.museum'
- },
- {
- ascii: 'xn--fiqs8s.icom.museum',
- unicode: '中国.icom.museum'
- },
- {
- ascii: 'xn--mgbu4chg.icom.museum',
- unicode: 'القمر.icom.museum'
- },
- {
- ascii: 'xn--vxakcego.icom.museum',
- unicode: 'κυπρος.icom.museum'
- },
- {
- ascii: 'xn--eskrepublika-ebb62d.icom.museum',
- unicode: 'českárepublika.icom.museum'
- },
- {
- ascii: 'xn--wgbh1c.icom.museum',
- unicode: 'مصر.icom.museum'
- },
- {
- ascii: 'xn--hxakic4aa.icom.museum',
- unicode: 'ελλάδα.icom.museum'
- },
- {
- ascii: 'xn--magyarorszg-t7a.icom.museum',
- unicode: 'magyarország.icom.museum'
- },
- {
- ascii: 'xn--sland-ysa.icom.museum',
- unicode: 'ísland.icom.museum'
- },
- {
- ascii: 'xn--h2brj9c.icom.museum',
- unicode: 'भारत.icom.museum'
- },
- {
- ascii: 'xn--mgba3a4fra.icom.museum',
- unicode: 'ايران.icom.museum'
- },
- {
- ascii: 'xn--ire-9la.icom.museum',
- unicode: 'éire.icom.museum'
- },
- {
- ascii: 'xn--4dbklr2c8d.xn--4dbrk0ce.museum',
- unicode: 'איקו״ם.ישראל.museum'
- },
- {
- ascii: 'xn--wgv71a.icom.museum',
- unicode: '日本.icom.museum'
- },
- {
- ascii: 'xn--igbhzh7gpa.icom.museum',
- unicode: 'الأردن.icom.museum'
- },
- {
- ascii: 'xn--80aaa0a6awh12ed.icom.museum',
- unicode: 'қазақстан.icom.museum'
- },
- {
- ascii: 'xn--3e0b707e.icom.museum',
- unicode: '한국.icom.museum'
- },
- {
- ascii: 'xn--80afmksoji0fc.icom.museum',
- unicode: 'кыргызстан.icom.museum'
- },
- {
- ascii: 'xn--q7ce6a.icom.museum',
- unicode: 'ລາວ.icom.museum'
- },
- {
- ascii: 'xn--mgbb7fjb.icom.museum',
- unicode: 'لبنان.icom.museum'
- },
- {
- ascii: 'xn--80aaldqjmmi6x.icom.museum',
- unicode: 'македонија.icom.museum'
- },
- {
- ascii: 'xn--mgbah1a3hjkrd.icom.museum',
- unicode: 'موريتانيا.icom.museum'
- },
- {
- ascii: 'xn--mxico-bsa.icom.museum',
- unicode: 'méxico.icom.museum'
- },
- {
- ascii: 'xn--c1aqabffc0aq.icom.museum',
- unicode: 'монголулс.icom.museum'
- },
- {
- ascii: 'xn--mgbc0a9azcg.icom.museum',
- unicode: 'المغرب.icom.museum'
- },
- {
- ascii: 'xn--l2bey1c2b.icom.museum',
- unicode: 'नेपाल.icom.museum'
- },
- {
- ascii: 'xn--mgb9awbf.icom.museum',
- unicode: 'عمان.icom.museum'
- },
- {
- ascii: 'xn--wgbl6a.icom.museum',
- unicode: 'قطر.icom.museum'
- },
- {
- ascii: 'xn--romnia-yta.icom.museum',
- unicode: 'românia.icom.museum'
- },
- {
- ascii: 'xn--h1alffa9f.xn--h1aegh.museum',
- unicode: 'россия.иком.museum'
- },
- {
- ascii: 'xn--80aaabm1ab4blmeec9e7n.xn--h1aegh.museum',
- unicode: 'србијаицрнагора.иком.museum'
- },
- {
- ascii: 'xn--xkc2al3hye2a.icom.museum',
- unicode: 'இலங்கை.icom.museum'
- },
- {
- ascii: 'xn--espaa-rta.icom.museum',
- unicode: 'españa.icom.museum'
- },
- {
- ascii: 'xn--o3cw4h.icom.museum',
- unicode: 'ไทย.icom.museum'
- },
- {
- ascii: 'xn--pgbs0dh.icom.museum',
- unicode: 'تونس.icom.museum'
- },
- {
- ascii: 'xn--trkiye-3ya.icom.museum',
- unicode: 'türkiye.icom.museum'
- },
- {
- ascii: 'xn--80aaxgrpt.icom.museum',
- unicode: 'украина.icom.museum'
- },
- {
- ascii: 'xn--vitnam-jk8b.icom.museum',
- unicode: 'việtnam.icom.museum'
- },
- // long label
- {
- ascii: `${'a'.repeat(64)}.com`,
- unicode: `${'a'.repeat(64)}.com`,
- },
- // long URL
- {
- ascii: `${`${'a'.repeat(64)}.`.repeat(4)}com`,
- unicode: `${`${'a'.repeat(64)}.`.repeat(4)}com`
- },
- // URLs with hyphen
- {
- ascii: 'r4---sn-a5mlrn7s.gevideo.com',
- unicode: 'r4---sn-a5mlrn7s.gevideo.com'
- },
- {
- ascii: '-sn-a5mlrn7s.gevideo.com',
- unicode: '-sn-a5mlrn7s.gevideo.com'
- },
- {
- ascii: 'sn-a5mlrn7s-.gevideo.com',
- unicode: 'sn-a5mlrn7s-.gevideo.com'
- },
- {
- ascii: '-sn-a5mlrn7s-.gevideo.com',
- unicode: '-sn-a5mlrn7s-.gevideo.com'
- },
- {
- ascii: '-sn--a5mlrn7s-.gevideo.com',
- unicode: '-sn--a5mlrn7s-.gevideo.com'
- }
- ],
- invalid: [
- // invalid character
- '\ufffd.com',
- // invalid bi-directional character
- 'تشادرlatin.icom.museum'
- ]
-}
+module.exports = [
+ { ascii: 'xn--mgbaal8b0b9b2b.icom.museum',
+ unicode: 'افغانستا.icom.museum'
+ },
+ {
+ ascii: 'xn--lgbbat1ad8j.icom.museum',
+ unicode: 'الجزائر.icom.museum'
+ },
+ {
+ ascii: 'xn--sterreich-z7a.icom.museum',
+ unicode: 'österreich.icom.museum'
+ },
+ {
+ ascii: 'xn--54b6eqazv8bc7e.icom.museum',
+ unicode: 'বাংলাদেশ.icom.museum'
+ },
+ {
+ ascii: 'xn--80abmy0agn7e.icom.museum',
+ unicode: 'беларусь.icom.museum'
+ },
+ {
+ ascii: 'xn--belgi-rsa.icom.museum',
+ unicode: 'belgië.icom.museum'
+ },
+ {
+ ascii: 'xn--80abgvm6a7d2b.icom.museum',
+ unicode: 'българия.icom.museum'
+ },
+ {
+ ascii: 'xn--mgbfqim.icom.museum',
+ unicode: 'تشادر.icom.museum'
+ },
+ {
+ ascii: 'xn--fiqs8s.icom.museum',
+ unicode: '中国.icom.museum'
+ },
+ {
+ ascii: 'xn--mgbu4chg.icom.museum',
+ unicode: 'القمر.icom.museum'
+ },
+ {
+ ascii: 'xn--vxakcego.icom.museum',
+ unicode: 'κυπρος.icom.museum'
+ },
+ {
+ ascii: 'xn--eskrepublika-ebb62d.icom.museum',
+ unicode: 'českárepublika.icom.museum'
+ },
+ {
+ ascii: 'xn--wgbh1c.icom.museum',
+ unicode: 'مصر.icom.museum'
+ },
+ {
+ ascii: 'xn--hxakic4aa.icom.museum',
+ unicode: 'ελλάδα.icom.museum'
+ },
+ {
+ ascii: 'xn--magyarorszg-t7a.icom.museum',
+ unicode: 'magyarország.icom.museum'
+ },
+ {
+ ascii: 'xn--sland-ysa.icom.museum',
+ unicode: 'ísland.icom.museum'
+ },
+ {
+ ascii: 'xn--h2brj9c.icom.museum',
+ unicode: 'भारत.icom.museum'
+ },
+ {
+ ascii: 'xn--mgba3a4fra.icom.museum',
+ unicode: 'ايران.icom.museum'
+ },
+ {
+ ascii: 'xn--ire-9la.icom.museum',
+ unicode: 'éire.icom.museum'
+ },
+ {
+ ascii: 'xn--4dbklr2c8d.xn--4dbrk0ce.museum',
+ unicode: 'איקו״ם.ישראל.museum'
+ },
+ {
+ ascii: 'xn--wgv71a.icom.museum',
+ unicode: '日本.icom.museum'
+ },
+ {
+ ascii: 'xn--igbhzh7gpa.icom.museum',
+ unicode: 'الأردن.icom.museum'
+ },
+ {
+ ascii: 'xn--80aaa0a6awh12ed.icom.museum',
+ unicode: 'қазақстан.icom.museum'
+ },
+ {
+ ascii: 'xn--3e0b707e.icom.museum',
+ unicode: '한국.icom.museum'
+ },
+ {
+ ascii: 'xn--80afmksoji0fc.icom.museum',
+ unicode: 'кыргызстан.icom.museum'
+ },
+ {
+ ascii: 'xn--q7ce6a.icom.museum',
+ unicode: 'ລາວ.icom.museum'
+ },
+ {
+ ascii: 'xn--mgbb7fjb.icom.museum',
+ unicode: 'لبنان.icom.museum'
+ },
+ {
+ ascii: 'xn--80aaldqjmmi6x.icom.museum',
+ unicode: 'македонија.icom.museum'
+ },
+ {
+ ascii: 'xn--mgbah1a3hjkrd.icom.museum',
+ unicode: 'موريتانيا.icom.museum'
+ },
+ {
+ ascii: 'xn--mxico-bsa.icom.museum',
+ unicode: 'méxico.icom.museum'
+ },
+ {
+ ascii: 'xn--c1aqabffc0aq.icom.museum',
+ unicode: 'монголулс.icom.museum'
+ },
+ {
+ ascii: 'xn--mgbc0a9azcg.icom.museum',
+ unicode: 'المغرب.icom.museum'
+ },
+ {
+ ascii: 'xn--l2bey1c2b.icom.museum',
+ unicode: 'नेपाल.icom.museum'
+ },
+ {
+ ascii: 'xn--mgb9awbf.icom.museum',
+ unicode: 'عمان.icom.museum'
+ },
+ {
+ ascii: 'xn--wgbl6a.icom.museum',
+ unicode: 'قطر.icom.museum'
+ },
+ {
+ ascii: 'xn--romnia-yta.icom.museum',
+ unicode: 'românia.icom.museum'
+ },
+ {
+ ascii: 'xn--h1alffa9f.xn--h1aegh.museum',
+ unicode: 'россия.иком.museum'
+ },
+ {
+ ascii: 'xn--80aaabm1ab4blmeec9e7n.xn--h1aegh.museum',
+ unicode: 'србијаицрнагора.иком.museum'
+ },
+ {
+ ascii: 'xn--xkc2al3hye2a.icom.museum',
+ unicode: 'இலங்கை.icom.museum'
+ },
+ {
+ ascii: 'xn--espaa-rta.icom.museum',
+ unicode: 'españa.icom.museum'
+ },
+ {
+ ascii: 'xn--o3cw4h.icom.museum',
+ unicode: 'ไทย.icom.museum'
+ },
+ {
+ ascii: 'xn--pgbs0dh.icom.museum',
+ unicode: 'تونس.icom.museum'
+ },
+ {
+ ascii: 'xn--trkiye-3ya.icom.museum',
+ unicode: 'türkiye.icom.museum'
+ },
+ {
+ ascii: 'xn--80aaxgrpt.icom.museum',
+ unicode: 'украина.icom.museum'
+ },
+ {
+ ascii: 'xn--vitnam-jk8b.icom.museum',
+ unicode: 'việtnam.icom.museum'
+ },
+ // long label
+ {
+ ascii: `${'a'.repeat(64)}.com`,
+ unicode: `${'a'.repeat(64)}.com`,
+ },
+ // long URL
+ {
+ ascii: `${`${'a'.repeat(64)}.`.repeat(4)}com`,
+ unicode: `${`${'a'.repeat(64)}.`.repeat(4)}com`
+ },
+ // URLs with hyphen
+ {
+ ascii: 'r4---sn-a5mlrn7s.gevideo.com',
+ unicode: 'r4---sn-a5mlrn7s.gevideo.com'
+ },
+ {
+ ascii: '-sn-a5mlrn7s.gevideo.com',
+ unicode: '-sn-a5mlrn7s.gevideo.com'
+ },
+ {
+ ascii: 'sn-a5mlrn7s-.gevideo.com',
+ unicode: 'sn-a5mlrn7s-.gevideo.com'
+ },
+ {
+ ascii: '-sn-a5mlrn7s-.gevideo.com',
+ unicode: '-sn-a5mlrn7s-.gevideo.com'
+ },
+ {
+ ascii: '-sn--a5mlrn7s-.gevideo.com',
+ unicode: '-sn--a5mlrn7s-.gevideo.com'
+ }
+];
diff --git a/test/fixtures/url-toascii.js b/test/fixtures/url-toascii.js
new file mode 100644
index 0000000000..ea5e0f22ba
--- /dev/null
+++ b/test/fixtures/url-toascii.js
@@ -0,0 +1,156 @@
+'use strict';
+
+/* WPT Refs:
+ https://github.com/w3c/web-platform-tests/blob/4839a0a804/url/toascii.json
+ License: http://www.w3.org/Consortium/Legal/2008/04-testsuite-copyright.html
+*/
+module.exports =
+[
+ "This resource is focused on highlighting issues with UTS #46 ToASCII",
+ {
+ "comment": "Label with hyphens in 3rd and 4th position",
+ "input": "aa--",
+ "output": "aa--"
+ },
+ {
+ "input": "a†--",
+ "output": "xn--a---kp0a"
+ },
+ {
+ "input": "ab--c",
+ "output": "ab--c"
+ },
+ {
+ "comment": "Label with leading hyphen",
+ "input": "-x",
+ "output": "-x"
+ },
+ {
+ "input": "-†",
+ "output": "xn----xhn"
+ },
+ {
+ "input": "-x.xn--nxa",
+ "output": "-x.xn--nxa"
+ },
+ {
+ "input": "-x.β",
+ "output": "-x.xn--nxa"
+ },
+ {
+ "comment": "Label with trailing hyphen",
+ "input": "x-.xn--nxa",
+ "output": "x-.xn--nxa"
+ },
+ {
+ "input": "x-.β",
+ "output": "x-.xn--nxa"
+ },
+ {
+ "comment": "Empty labels",
+ "input": "x..xn--nxa",
+ "output": "x..xn--nxa"
+ },
+ {
+ "input": "x..β",
+ "output": "x..xn--nxa"
+ },
+ {
+ "comment": "Invalid Punycode",
+ "input": "xn--a",
+ "output": null
+ },
+ {
+ "input": "xn--a.xn--nxa",
+ "output": null
+ },
+ {
+ "input": "xn--a.β",
+ "output": null
+ },
+ {
+ "comment": "Valid Punycode",
+ "input": "xn--nxa.xn--nxa",
+ "output": "xn--nxa.xn--nxa"
+ },
+ {
+ "comment": "Mixed",
+ "input": "xn--nxa.β",
+ "output": "xn--nxa.xn--nxa"
+ },
+ {
+ "input": "ab--c.xn--nxa",
+ "output": "ab--c.xn--nxa"
+ },
+ {
+ "input": "ab--c.β",
+ "output": "ab--c.xn--nxa"
+ },
+ {
+ "comment": "CheckJoiners is true",
+ "input": "\u200D.example",
+ "output": null
+ },
+ {
+ "input": "xn--1ug.example",
+ "output": null
+ },
+ {
+ "comment": "CheckBidi is true",
+ "input": "يa",
+ "output": null
+ },
+ {
+ "input": "xn--a-yoc",
+ "output": null
+ },
+ {
+ "comment": "processing_option is Nontransitional_Processing",
+ "input": "ශ්‍රී",
+ "output": "xn--10cl1a0b660p"
+ },
+ {
+ "input": "نامه‌ای",
+ "output": "xn--mgba3gch31f060k"
+ },
+ {
+ "comment": "U+FFFD",
+ "input": "\uFFFD.com",
+ "output": null
+ },
+ {
+ "comment": "U+FFFD character encoded in Punycode",
+ "input": "xn--zn7c.com",
+ "output": null
+ },
+ {
+ "comment": "Label longer than 63 code points",
+ "input": "x01234567890123456789012345678901234567890123456789012345678901x",
+ "output": "x01234567890123456789012345678901234567890123456789012345678901x"
+ },
+ {
+ "input": "x01234567890123456789012345678901234567890123456789012345678901†",
+ "output": "xn--x01234567890123456789012345678901234567890123456789012345678901-6963b"
+ },
+ {
+ "input": "x01234567890123456789012345678901234567890123456789012345678901x.xn--nxa",
+ "output": "x01234567890123456789012345678901234567890123456789012345678901x.xn--nxa"
+ },
+ {
+ "input": "x01234567890123456789012345678901234567890123456789012345678901x.β",
+ "output": "x01234567890123456789012345678901234567890123456789012345678901x.xn--nxa"
+ },
+ {
+ "comment": "Domain excluding TLD longer than 253 code points",
+ "input": "01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x",
+ "output": "01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x"
+ },
+ {
+ "input": "01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.xn--nxa",
+ "output": "01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.xn--nxa"
+ },
+ {
+ "input": "01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.β",
+ "output": "01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.xn--nxa"
+ }
+]
diff --git a/test/parallel/test-icu-punycode.js b/test/parallel/test-icu-punycode.js
index ba2014bdc8..13db2dd7ba 100644
--- a/test/parallel/test-icu-punycode.js
+++ b/test/parallel/test-icu-punycode.js
@@ -10,9 +10,10 @@ const icu = process.binding('icu');
const assert = require('assert');
const tests = require('../fixtures/url-idna.js');
+const wptToASCIITests = require('../fixtures/url-toascii.js');
{
- for (const [i, { ascii, unicode }] of tests.valid.entries()) {
+ for (const [i, { ascii, unicode }] of tests.entries()) {
assert.strictEqual(ascii, icu.toASCII(unicode), `toASCII(${i + 1})`);
assert.strictEqual(unicode, icu.toUnicode(ascii), `toUnicode(${i + 1})`);
assert.strictEqual(ascii, icu.toASCII(icu.toUnicode(ascii)),
@@ -23,13 +24,24 @@ const tests = require('../fixtures/url-idna.js');
}
{
- for (const [i, url] of tests.invalid.entries()) {
- assert.throws(() => icu.toASCII(url),
- /^Error: Cannot convert name to ASCII$/,
- `ToASCII invalid case ${i + 1}`);
- assert.doesNotThrow(() => icu.toASCII(url, true),
- `ToASCII invalid case ${i + 1} in lenient mode`);
- assert.doesNotThrow(() => icu.toUnicode(url),
- `ToUnicode invalid case ${i + 1}`);
+ for (const [i, test] of wptToASCIITests.entries()) {
+ if (typeof test === 'string')
+ continue; // skip comments
+ const { comment, input, output } = test;
+ let caseComment = `case ${i + 1}`;
+ if (comment)
+ caseComment += ` (${comment})`;
+ if (output === null) {
+ assert.throws(() => icu.toASCII(input),
+ /^Error: Cannot convert name to ASCII$/,
+ `ToASCII ${caseComment}`);
+ assert.doesNotThrow(() => icu.toASCII(input, true),
+ `ToASCII ${caseComment} in lenient mode`);
+ } else {
+ assert.strictEqual(icu.toASCII(input), output, `ToASCII ${caseComment}`);
+ assert.strictEqual(icu.toASCII(input, true), output,
+ `ToASCII ${caseComment} in lenient mode`);
+ }
+ assert.doesNotThrow(() => icu.toUnicode(input), `ToUnicode ${caseComment}`);
}
}
diff --git a/test/parallel/test-whatwg-url-domainto.js b/test/parallel/test-whatwg-url-domainto.js
index 90d9ee4a8c..b399f24136 100644
--- a/test/parallel/test-whatwg-url-domainto.js
+++ b/test/parallel/test-whatwg-url-domainto.js
@@ -11,6 +11,7 @@ const { domainToASCII, domainToUnicode } = require('url');
// Tests below are not from WPT.
const tests = require('../fixtures/url-idna.js');
+const wptToASCIITests = require('../fixtures/url-toascii.js');
{
const expectedError = common.expectsError(
@@ -22,7 +23,7 @@ const tests = require('../fixtures/url-idna.js');
}
{
- for (const [i, { ascii, unicode }] of tests.valid.entries()) {
+ for (const [i, { ascii, unicode }] of tests.entries()) {
assert.strictEqual(ascii, domainToASCII(unicode),
`domainToASCII(${i + 1})`);
assert.strictEqual(unicode, domainToUnicode(ascii),
@@ -35,8 +36,20 @@ const tests = require('../fixtures/url-idna.js');
}
{
- for (const [i, url] of tests.invalid.entries()) {
- assert.strictEqual(domainToASCII(url), '', `Invalid case ${i + 1}`);
- assert.strictEqual(domainToUnicode(url), '', `Invalid case ${i + 1}`);
+ for (const [i, test] of wptToASCIITests.entries()) {
+ if (typeof test === 'string')
+ continue; // skip comments
+ const { comment, input, output } = test;
+ let caseComment = `Case ${i + 1}`;
+ if (comment)
+ caseComment += ` (${comment})`;
+ if (output === null) {
+ assert.strictEqual(domainToASCII(input), '', caseComment);
+ assert.strictEqual(domainToUnicode(input), '', caseComment);
+ } else {
+ assert.strictEqual(domainToASCII(input), output, caseComment);
+ const roundtripped = domainToASCII(domainToUnicode(input));
+ assert.strictEqual(roundtripped, output, caseComment);
+ }
}
}
diff --git a/test/parallel/test-whatwg-url-toascii.js b/test/parallel/test-whatwg-url-toascii.js
new file mode 100644
index 0000000000..bd986c96a4
--- /dev/null
+++ b/test/parallel/test-whatwg-url-toascii.js
@@ -0,0 +1,85 @@
+'use strict';
+const common = require('../common');
+const path = require('path');
+const { URL } = require('url');
+const { test, assert_equals, assert_throws } = require('../common/wpt');
+
+if (!common.hasIntl) {
+ // A handful of the tests fail when ICU is not included.
+ common.skip('missing Intl');
+ return;
+}
+
+const request = {
+ response: require(path.join(common.fixturesDir, 'url-toascii'))
+};
+
+/* eslint-disable */
+/* WPT Refs:
+ https://github.com/w3c/web-platform-tests/blob/4839a0a804/url/toascii.window.js
+ License: http://www.w3.org/Consortium/Legal/2008/04-testsuite-copyright.html
+*/
+// async_test(t => {
+// const request = new XMLHttpRequest()
+// request.open("GET", "toascii.json")
+// request.send()
+// request.responseType = "json"
+// request.onload = t.step_func_done(() => {
+ runTests(request.response)
+// })
+// }, "Loading data…")
+
+function makeURL(type, input) {
+ input = "https://" + input + "/x"
+ if(type === "url") {
+ return new URL(input)
+ } else {
+ const url = document.createElement(type)
+ url.href = input
+ return url
+ }
+}
+
+function runTests(tests) {
+ for(var i = 0, l = tests.length; i < l; i++) {
+ let hostTest = tests[i]
+ if (typeof hostTest === "string") {
+ continue // skip comments
+ }
+ const typeName = { "url": "URL", "a": "<a>", "area": "<area>" }
+ // ;["url", "a", "area"].forEach((type) => {
+ ;["url"].forEach((type) => {
+ test(() => {
+ if(hostTest.output !== null) {
+ const url = makeURL("url", hostTest.input)
+ assert_equals(url.host, hostTest.output)
+ assert_equals(url.hostname, hostTest.output)
+ assert_equals(url.pathname, "/x")
+ assert_equals(url.href, "https://" + hostTest.output + "/x")
+ } else {
+ if(type === "url") {
+ assert_throws(new TypeError, () => makeURL("url", hostTest.input))
+ } else {
+ const url = makeURL(type, hostTest.input)
+ assert_equals(url.host, "")
+ assert_equals(url.hostname, "")
+ assert_equals(url.pathname, "")
+ assert_equals(url.href, "https://" + hostTest.input + "/x")
+ }
+ }
+ }, hostTest.input + " (using " + typeName[type] + ")")
+ ;["host", "hostname"].forEach((val) => {
+ test(() => {
+ const url = makeURL(type, "x")
+ url[val] = hostTest.input
+ if(hostTest.output !== null) {
+ assert_equals(url[val], hostTest.output)
+ } else {
+ assert_equals(url[val], "x")
+ }
+ }, hostTest.input + " (using " + typeName[type] + "." + val + ")")
+ })
+ })
+ }
+}
+/* eslint-enable */