diff options
author | Anna Henningsen <anna@addaleax.net> | 2020-04-25 02:31:09 +0200 |
---|---|---|
committer | Ruben Bridgewater <ruben@bridgewater.de> | 2020-04-28 13:58:28 +0200 |
commit | c239cc650cb0681f67b8cc47fcd507c4192647e7 (patch) | |
tree | 0412c373decb256e1672f2e3783a800db76c66c0 | |
parent | 0f4d513873ecfdec4834398cdbe5493696814d39 (diff) | |
download | node-new-c239cc650cb0681f67b8cc47fcd507c4192647e7.tar.gz |
util,readline: NFC-normalize strings before getStringWidth
The assumption here is that decomposed characters render like their
composed character equivalents, and that working with the former
comes with a risk of over-estimating string widths given that
we compute them on a per-code-point basis. The regression test
added here (한글 vs 한글) is an example of that happening.
PR-URL: https://github.com/nodejs/node/pull/33052
Reviewed-By: Gus Caplan <me@gus.host>
Reviewed-By: Michaël Zasso <targos@protonmail.com>
Reviewed-By: Anto Aravinth <anto.aravinth.cse@gmail.com>
Reviewed-By: Colin Ihrig <cjihrig@gmail.com>
Reviewed-By: James M Snell <jasnell@gmail.com>
-rw-r--r-- | lib/internal/util/inspect.js | 15 | ||||
-rw-r--r-- | test/parallel/test-icu-stringwidth.js | 9 |
2 files changed, 19 insertions, 5 deletions
diff --git a/lib/internal/util/inspect.js b/lib/internal/util/inspect.js index 862aed58b3..10242f2b5e 100644 --- a/lib/internal/util/inspect.js +++ b/lib/internal/util/inspect.js @@ -1917,6 +1917,13 @@ function formatWithOptionsInternal(inspectOptions, ...args) { return str; } +function prepareStringForGetStringWidth(str, removeControlChars) { + str = str.normalize('NFC'); + if (removeControlChars) + str = stripVTControlCharacters(str); + return str; +} + if (internalBinding('config').hasIntl) { const icu = internalBinding('icu'); // icu.getStringWidth(string, ambiguousAsFullWidth, expandEmojiSequence) @@ -1926,8 +1933,8 @@ if (internalBinding('config').hasIntl) { // the receiving end supports. getStringWidth = function getStringWidth(str, removeControlChars = true) { let width = 0; - if (removeControlChars) - str = stripVTControlCharacters(str); + + str = prepareStringForGetStringWidth(str, removeControlChars); for (let i = 0; i < str.length; i++) { // Try to avoid calling into C++ by first handling the ASCII portion of // the string. If it is fully ASCII, we skip the C++ part. @@ -1947,9 +1954,7 @@ if (internalBinding('config').hasIntl) { getStringWidth = function getStringWidth(str, removeControlChars = true) { let width = 0; - if (removeControlChars) - str = stripVTControlCharacters(str); - + str = prepareStringForGetStringWidth(str, removeControlChars); for (const char of str) { const code = char.codePointAt(0); if (isFullWidthCodePoint(code)) { diff --git a/test/parallel/test-icu-stringwidth.js b/test/parallel/test-icu-stringwidth.js index 66d75c4cbe..4e8389961d 100644 --- a/test/parallel/test-icu-stringwidth.js +++ b/test/parallel/test-icu-stringwidth.js @@ -87,3 +87,12 @@ for (let i = 0; i < 256; i++) { assert.strictEqual(getStringWidth(char), 1); } } + +{ + const a = '한글'.normalize('NFD'); // 한글 + const b = '한글'.normalize('NFC'); // 한글 + assert.strictEqual(a.length, 6); + assert.strictEqual(b.length, 2); + assert.strictEqual(getStringWidth(a), 4); + assert.strictEqual(getStringWidth(b), 4); +} |