summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames M Snell <jasnell@gmail.com>2016-06-21 14:03:05 -0700
committerJeremiah Senkpiel <fishrock123@rocketmail.com>2016-07-05 22:36:51 +0200
commitd0e24923a69326b23ac14cc631724fc537521f9e (patch)
treed1d251811a1c2aaec20446acff679a65eb2fe2d3
parent12b199369d08cd4c09120411a173dbfba48521f8 (diff)
downloadnode-new-d0e24923a69326b23ac14cc631724fc537521f9e.tar.gz
net: use icu's punycode implementation
ICU has a punycode implementation built in. Use it instead of the javascript implementation because it's much faster. PR-URL: https://github.com/nodejs/node/pull/7355 Reviewed-By: Trevor Norris <trev.norris@gmail.com> Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl>
-rw-r--r--benchmark/net/punycode.js75
-rw-r--r--lib/url.js12
-rw-r--r--src/node_i18n.cc132
-rw-r--r--test/parallel/test-icu-punycode.js72
-rw-r--r--tools/icu/icu-generic.gyp9
-rw-r--r--tools/icu/icu_small.json3
6 files changed, 291 insertions, 12 deletions
diff --git a/benchmark/net/punycode.js b/benchmark/net/punycode.js
new file mode 100644
index 0000000000..f4d22557ac
--- /dev/null
+++ b/benchmark/net/punycode.js
@@ -0,0 +1,75 @@
+'use strict';
+
+const common = require('../common.js');
+const icu = process.binding('icu');
+const punycode = require('punycode');
+
+const bench = common.createBenchmark(main, {
+ method: ['punycode', 'icu'],
+ n: [1024],
+ val: [
+ 'افغانستا.icom.museum',
+ 'الجزائر.icom.museum',
+ 'österreich.icom.museum',
+ 'বাংলাদেশ.icom.museum',
+ 'беларусь.icom.museum',
+ 'belgië.icom.museum',
+ 'българия.icom.museum',
+ 'تشادر.icom.museum',
+ '中国.icom.museum',
+ 'القمر.icom.museum',
+ 'κυπρος.icom.museum',
+ 'českárepublika.icom.museum',
+ 'مصر.icom.museum',
+ 'ελλάδα.icom.museum',
+ 'magyarország.icom.museum',
+ 'ísland.icom.museum',
+ 'भारत.icom.museum',
+ 'ايران.icom.museum',
+ 'éire.icom.museum',
+ 'איקו״ם.ישראל.museum',
+ '日本.icom.museum',
+ 'الأردن.icom.museum'
+ ]
+});
+
+function usingPunycode(val) {
+ punycode.toUnicode(punycode.toASCII(val));
+}
+
+function usingICU(val) {
+ icu.toUnicode(icu.toASCII(val));
+}
+
+function runPunycode(n, val) {
+ common.v8ForceOptimization(usingPunycode, val);
+ var i = 0;
+ bench.start();
+ for (; i < n; i++)
+ usingPunycode(val);
+ bench.end(n);
+}
+
+function runICU(n, val) {
+ common.v8ForceOptimization(usingICU, val);
+ var i = 0;
+ bench.start();
+ for (; i < n; i++)
+ usingICU(val);
+ bench.end(n);
+}
+
+function main(conf) {
+ const n = +conf.n;
+ const val = conf.val;
+ switch (conf.method) {
+ case 'punycode':
+ runPunycode(n, val);
+ break;
+ case 'icu':
+ runICU(n, val);
+ break;
+ default:
+ throw new Error('Unexpected method');
+ }
+}
diff --git a/lib/url.js b/lib/url.js
index c4d6ed2e33..4a2a879bf3 100644
--- a/lib/url.js
+++ b/lib/url.js
@@ -1,6 +1,14 @@
'use strict';
-const punycode = require('punycode');
+function importPunycode() {
+ try {
+ return process.binding('icu');
+ } catch (e) {
+ return require('punycode');
+ }
+}
+
+const { toASCII } = importPunycode();
exports.parse = urlParse;
exports.resolve = urlResolve;
@@ -309,7 +317,7 @@ Url.prototype.parse = function(url, parseQueryString, slashesDenoteHost) {
// It only converts parts of the domain name that
// have non-ASCII characters, i.e. it doesn't matter if
// you call it with a domain that already is ASCII-only.
- this.hostname = punycode.toASCII(this.hostname);
+ this.hostname = toASCII(this.hostname);
}
var p = this.port ? ':' + this.port : '';
diff --git a/src/node_i18n.cc b/src/node_i18n.cc
index 3e5b3a9129..0f3b9b76e6 100644
--- a/src/node_i18n.cc
+++ b/src/node_i18n.cc
@@ -23,8 +23,16 @@
#if defined(NODE_HAVE_I18N_SUPPORT)
+#include "node.h"
+#include "env.h"
+#include "env-inl.h"
+#include "util.h"
+#include "util-inl.h"
+#include "v8.h"
+
#include <unicode/putil.h>
#include <unicode/udata.h>
+#include <unicode/uidna.h>
#ifdef NODE_HAVE_SMALL_ICU
/* if this is defined, we have a 'secondary' entry point.
@@ -43,6 +51,13 @@ extern "C" const char U_DATA_API SMALL_ICUDATA_ENTRY_POINT[];
namespace node {
+using v8::Context;
+using v8::FunctionCallbackInfo;
+using v8::Local;
+using v8::Object;
+using v8::String;
+using v8::Value;
+
bool flag_icu_data_dir = false;
namespace i18n {
@@ -64,7 +79,124 @@ bool InitializeICUDirectory(const char* icu_data_path) {
}
}
+static int32_t ToUnicode(MaybeStackBuffer<char>* buf,
+ const char* input,
+ size_t length) {
+ UErrorCode status = U_ZERO_ERROR;
+ uint32_t options = UIDNA_DEFAULT;
+ options |= UIDNA_NONTRANSITIONAL_TO_UNICODE;
+ UIDNA* uidna = uidna_openUTS46(options, &status);
+ if (U_FAILURE(status))
+ return -1;
+ UIDNAInfo info = UIDNA_INFO_INITIALIZER;
+
+ int32_t len = uidna_nameToUnicodeUTF8(uidna,
+ input, length,
+ **buf, buf->length(),
+ &info,
+ &status);
+
+ if (status == U_BUFFER_OVERFLOW_ERROR) {
+ status = U_ZERO_ERROR;
+ buf->AllocateSufficientStorage(len);
+ len = uidna_nameToUnicodeUTF8(uidna,
+ input, length,
+ **buf, buf->length(),
+ &info,
+ &status);
+ }
+
+ if (U_FAILURE(status))
+ len = -1;
+
+ uidna_close(uidna);
+ return len;
+}
+
+static int32_t ToASCII(MaybeStackBuffer<char>* buf,
+ const char* input,
+ size_t length) {
+ UErrorCode status = U_ZERO_ERROR;
+ uint32_t options = UIDNA_DEFAULT;
+ options |= UIDNA_NONTRANSITIONAL_TO_ASCII;
+ UIDNA* uidna = uidna_openUTS46(options, &status);
+ if (U_FAILURE(status))
+ return -1;
+ UIDNAInfo info = UIDNA_INFO_INITIALIZER;
+
+ int32_t len = uidna_nameToASCII_UTF8(uidna,
+ input, length,
+ **buf, buf->length(),
+ &info,
+ &status);
+
+ if (status == U_BUFFER_OVERFLOW_ERROR) {
+ status = U_ZERO_ERROR;
+ buf->AllocateSufficientStorage(len);
+ len = uidna_nameToASCII_UTF8(uidna,
+ input, length,
+ **buf, buf->length(),
+ &info,
+ &status);
+ }
+
+ if (U_FAILURE(status))
+ len = -1;
+
+ uidna_close(uidna);
+ return len;
+}
+
+static void ToUnicode(const FunctionCallbackInfo<Value>& args) {
+ Environment* env = Environment::GetCurrent(args);
+ CHECK_GE(args.Length(), 1);
+ CHECK(args[0]->IsString());
+ Utf8Value val(env->isolate(), args[0]);
+ MaybeStackBuffer<char> buf;
+ int32_t len = ToUnicode(&buf, *val, val.length());
+
+ if (len < 0) {
+ return env->ThrowError("Cannot convert name to Unicode");
+ }
+
+ args.GetReturnValue().Set(
+ String::NewFromUtf8(env->isolate(),
+ *buf,
+ v8::NewStringType::kNormal,
+ len).ToLocalChecked());
+}
+
+static void ToASCII(const FunctionCallbackInfo<Value>& args) {
+ Environment* env = Environment::GetCurrent(args);
+ CHECK_GE(args.Length(), 1);
+ CHECK(args[0]->IsString());
+ Utf8Value val(env->isolate(), args[0]);
+ MaybeStackBuffer<char> buf;
+ int32_t len = ToASCII(&buf, *val, val.length());
+
+ if (len < 0) {
+ return env->ThrowError("Cannot convert name to ASCII");
+ }
+
+ args.GetReturnValue().Set(
+ String::NewFromUtf8(env->isolate(),
+ *buf,
+ v8::NewStringType::kNormal,
+ len).ToLocalChecked());
+}
+
+void Init(Local<Object> target,
+ Local<Value> unused,
+ Local<Context> context,
+ void* priv) {
+ Environment* env = Environment::GetCurrent(context);
+ env->SetMethod(target, "toUnicode", ToUnicode);
+ env->SetMethod(target, "toASCII", ToASCII);
+}
+
} // namespace i18n
} // namespace node
+NODE_MODULE_CONTEXT_AWARE_BUILTIN(icu, node::i18n::Init)
+
#endif // NODE_HAVE_I18N_SUPPORT
diff --git a/test/parallel/test-icu-punycode.js b/test/parallel/test-icu-punycode.js
new file mode 100644
index 0000000000..d9b36e7df7
--- /dev/null
+++ b/test/parallel/test-icu-punycode.js
@@ -0,0 +1,72 @@
+'use strict';
+
+const common = require('../common');
+const icu = getPunycode();
+const assert = require('assert');
+
+function getPunycode() {
+ try {
+ return process.binding('icu');
+ } catch (err) {
+ return undefined;
+ }
+}
+
+if (!icu) {
+ common.skip('icu punycode tests because ICU is not present.');
+ return;
+}
+
+// Credit for list: http://www.i18nguy.com/markup/idna-examples.html
+const tests = [
+ 'افغانستا.icom.museum',
+ 'الجزائر.icom.museum',
+ 'österreich.icom.museum',
+ 'বাংলাদেশ.icom.museum',
+ 'беларусь.icom.museum',
+ 'belgië.icom.museum',
+ 'българия.icom.museum',
+ 'تشادر.icom.museum',
+ '中国.icom.museum',
+ 'القمر.icom.museum',
+ 'κυπρος.icom.museum',
+ 'českárepublika.icom.museum',
+ 'مصر.icom.museum',
+ 'ελλάδα.icom.museum',
+ 'magyarország.icom.museum',
+ 'ísland.icom.museum',
+ 'भारत.icom.museum',
+ 'ايران.icom.museum',
+ 'éire.icom.museum',
+ 'איקו״ם.ישראל.museum',
+ '日本.icom.museum',
+ 'الأردن.icom.museum',
+ 'қазақстан.icom.museum',
+ '한국.icom.museum',
+ 'кыргызстан.icom.museum',
+ 'ລາວ.icom.museum',
+ 'لبنان.icom.museum',
+ 'македонија.icom.museum',
+ 'موريتانيا.icom.museum',
+ 'méxico.icom.museum',
+ 'монголулс.icom.museum',
+ 'المغرب.icom.museum',
+ 'नेपाल.icom.museum',
+ 'عمان.icom.museum',
+ 'قطر.icom.museum',
+ 'românia.icom.museum',
+ 'россия.иком.museum',
+ 'србијаицрнагора.иком.museum',
+ 'இலங்கை.icom.museum',
+ 'españa.icom.museum',
+ 'ไทย.icom.museum',
+ 'تونس.icom.museum',
+ 'türkiye.icom.museum',
+ 'украина.icom.museum',
+ 'việtnam.icom.museum'
+];
+
+// Testing the roundtrip
+tests.forEach((i) => {
+ assert.strictEqual(i, icu.toUnicode(icu.toASCII(i)));
+});
diff --git a/tools/icu/icu-generic.gyp b/tools/icu/icu-generic.gyp
index a61b294141..9d466ac392 100644
--- a/tools/icu/icu-generic.gyp
+++ b/tools/icu/icu-generic.gyp
@@ -37,8 +37,7 @@
'defines': [
# ICU cannot swap the initial data without this.
# http://bugs.icu-project.org/trac/ticket/11046
- 'UCONFIG_NO_LEGACY_CONVERSION=1',
- 'UCONFIG_NO_IDNA=1',
+ 'UCONFIG_NO_LEGACY_CONVERSION=1'
],
}],
],
@@ -428,9 +427,6 @@
#'<(icu_path)/source/common/ubidi_props_data.h',
# and the callers
'<(icu_path)/source/common/ushape.cpp',
- '<(icu_path)/source/common/usprep.cpp',
- '<(icu_path)/source/common/uts46.cpp',
- '<(icu_path)/source/common/uidna.cpp',
]}],
[ 'icu_ver_major == 57', { 'sources!': [
# work around http://bugs.icu-project.org/trac/ticket/12451
@@ -447,9 +443,6 @@
#'<(icu_path)/source/common/ubidi_props_data.h',
# and the callers
'<(icu_path)/source/common/ushape.cpp',
- '<(icu_path)/source/common/usprep.cpp',
- '<(icu_path)/source/common/uts46.cpp',
- '<(icu_path)/source/common/uidna.cpp',
]}],
[ 'OS == "solaris"', { 'defines': [
'_XOPEN_SOURCE_EXTENDED=0',
diff --git a/tools/icu/icu_small.json b/tools/icu/icu_small.json
index e434794e91..de26e2cbb1 100644
--- a/tools/icu/icu_small.json
+++ b/tools/icu/icu_small.json
@@ -24,7 +24,7 @@
"region": "none",
"zone": "locales",
"converters": "none",
- "stringprep": "none",
+ "stringprep": "locales",
"translit": "none",
"brkfiles": "none",
"brkdict": "none",
@@ -34,7 +34,6 @@
"remove": [
"cnvalias.icu",
"postalCodeData.res",
- "uts46.nrm",
"genderList.res",
"brkitr/root.res",
"unames.icu"