summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnna Henningsen <anna@addaleax.net>2020-04-25 02:31:09 +0200
committerRuben Bridgewater <ruben@bridgewater.de>2020-04-28 13:58:28 +0200
commitc239cc650cb0681f67b8cc47fcd507c4192647e7 (patch)
tree0412c373decb256e1672f2e3783a800db76c66c0
parent0f4d513873ecfdec4834398cdbe5493696814d39 (diff)
downloadnode-new-c239cc650cb0681f67b8cc47fcd507c4192647e7.tar.gz
util,readline: NFC-normalize strings before getStringWidth
The assumption here is that decomposed characters render like their composed character equivalents, and that working with the former comes with a risk of over-estimating string widths given that we compute them on a per-code-point basis. The regression test added here (한글 vs 한글) is an example of that happening. PR-URL: https://github.com/nodejs/node/pull/33052 Reviewed-By: Gus Caplan <me@gus.host> Reviewed-By: Michaël Zasso <targos@protonmail.com> Reviewed-By: Anto Aravinth <anto.aravinth.cse@gmail.com> Reviewed-By: Colin Ihrig <cjihrig@gmail.com> Reviewed-By: James M Snell <jasnell@gmail.com>
-rw-r--r--lib/internal/util/inspect.js15
-rw-r--r--test/parallel/test-icu-stringwidth.js9
2 files changed, 19 insertions, 5 deletions
diff --git a/lib/internal/util/inspect.js b/lib/internal/util/inspect.js
index 862aed58b3..10242f2b5e 100644
--- a/lib/internal/util/inspect.js
+++ b/lib/internal/util/inspect.js
@@ -1917,6 +1917,13 @@ function formatWithOptionsInternal(inspectOptions, ...args) {
return str;
}
+function prepareStringForGetStringWidth(str, removeControlChars) {
+ str = str.normalize('NFC');
+ if (removeControlChars)
+ str = stripVTControlCharacters(str);
+ return str;
+}
+
if (internalBinding('config').hasIntl) {
const icu = internalBinding('icu');
// icu.getStringWidth(string, ambiguousAsFullWidth, expandEmojiSequence)
@@ -1926,8 +1933,8 @@ if (internalBinding('config').hasIntl) {
// the receiving end supports.
getStringWidth = function getStringWidth(str, removeControlChars = true) {
let width = 0;
- if (removeControlChars)
- str = stripVTControlCharacters(str);
+
+ str = prepareStringForGetStringWidth(str, removeControlChars);
for (let i = 0; i < str.length; i++) {
// Try to avoid calling into C++ by first handling the ASCII portion of
// the string. If it is fully ASCII, we skip the C++ part.
@@ -1947,9 +1954,7 @@ if (internalBinding('config').hasIntl) {
getStringWidth = function getStringWidth(str, removeControlChars = true) {
let width = 0;
- if (removeControlChars)
- str = stripVTControlCharacters(str);
-
+ str = prepareStringForGetStringWidth(str, removeControlChars);
for (const char of str) {
const code = char.codePointAt(0);
if (isFullWidthCodePoint(code)) {
diff --git a/test/parallel/test-icu-stringwidth.js b/test/parallel/test-icu-stringwidth.js
index 66d75c4cbe..4e8389961d 100644
--- a/test/parallel/test-icu-stringwidth.js
+++ b/test/parallel/test-icu-stringwidth.js
@@ -87,3 +87,12 @@ for (let i = 0; i < 256; i++) {
assert.strictEqual(getStringWidth(char), 1);
}
}
+
+{
+ const a = '한글'.normalize('NFD'); // 한글
+ const b = '한글'.normalize('NFC'); // 한글
+ assert.strictEqual(a.length, 6);
+ assert.strictEqual(b.length, 2);
+ assert.strictEqual(getStringWidth(a), 4);
+ assert.strictEqual(getStringWidth(b), 4);
+}