diff options
author | Bruno Haible <bruno@clisp.org> | 2021-12-29 22:34:24 +0100 |
---|---|---|
committer | Bruno Haible <bruno@clisp.org> | 2021-12-29 22:40:35 +0100 |
commit | f66fe3153797b7d1c61741d0f4561afe623cad91 (patch) | |
tree | ee915d122e86e0b5ba5ba25a6367b33d9645ebe6 /lib | |
parent | 5209414e95ed830b8b687ae71be8911ef0969e3e (diff) | |
download | gnulib-f66fe3153797b7d1c61741d0f4561afe623cad91.tar.gz |
uniwidth: Update to Unicode 10.0.0.
* lib/uniwidth/width.c (uc_width): Assign width 2 to the characters
0x231A..0x231B, 0x23E9..0x23EC, 0x23F0, 0x23F3, 0x25FD..0x25FE,
0x2614..0x2615, 0x2648..0x2653, 0x267F, 0x2693, 0x26A1, 0x26AA..0x26AB,
0x26BD..0x26BE, 0x26C4..0x26C5, 0x26CE, 0x26D4, 0x26EA, 0x26F2..0x26F3,
0x26F5, 0x26FA, 0x26FD, 0x2705, 0x270A..0x270B, 0x2728, 0x274C, 0x274E,
0x2753..0x2755, 0x2757, 0x2795..0x2797, 0x27B0, 0x27BF, 0x2B1B..0x2B1C,
0x2B50, 0x2B55, 0xA960..0xA97C, 0x16FE0..0x16FE1, 0x17000..0x187EC,
0x18800..0x18AF2, 0x1B000..0x1B11F, 0x1B170..0x1B2FB, 0x1F004, 0x1F0CF,
0x1F18E, 0x1F191..0x1F19A, 0x1F200..0x1F320, 0x1F32D..0x1F335,
0x1F337..0x1F37C, 0x1F37E..0x1F393, 0x1F3A0..0x1F3CA, 0x1F3CF..0x1F3D3,
0x1F3E0..0x1F3F0, 0x1F3F4, 0x1F3F8..0x1F43E, 0x1F440, 0x1F442..0x1F4FC,
0x1F4FF..0x1F53D, 0x1F54B..0x1F54E, 0x1F550..0x1F567, 0x1F57A,
0x1F595..0x1F596, 0x1F5A4, 0x1F5FB..0x1F64F, 0x1F680..0x1F6C5, 0x1F6CC,
0x1F6D0..0x1F6D2, 0x1F6EB..0x1F6EC, 0x1F6F4..0x1F6F8, 0x1F910..0x1F9E6.
Assign ambiguous width to the characters 0x3248..0x324F.
* tests/uniwidth/test-uc_width2.sh: Expect these changes.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/uniwidth/width.c | 87 |
1 files changed, 83 insertions, 4 deletions
diff --git a/lib/uniwidth/width.c b/lib/uniwidth/width.c index cc3dd6eb88..4db700ea1a 100644 --- a/lib/uniwidth/width.c +++ b/lib/uniwidth/width.c @@ -450,20 +450,99 @@ uc_width (ucs4_t uc, const char *encoding) } } /* Test for double-width character. - * Generated from "grep '^[^;]\{4,5\};[WF]' EastAsianWidth.txt" - * and "grep '^[^;]\{4,5\};[^WF]' EastAsianWidth.txt" + * Generated from "grep '^[^;]\+;[WF]' EastAsianWidth.txt" + * and "grep '^[^;]\+;[^WF]' EastAsianWidth.txt" */ if (uc >= 0x1100 && ((uc < 0x1160) /* Hangul Jamo */ - || (uc >= 0x2329 && uc < 0x232b) /* Angle Brackets */ + || ((uc >> 9) == 0x11 + && ((uc >= 0x231a && uc < 0x231c) /* Watch, Hourglass */ + || (uc >= 0x2329 && uc < 0x232b) /* Angle Brackets */ + || (uc >= 0x23e9 && uc < 0x23ed) /* Black double triangles */ + || uc == 0x23f0 /* Alarm clock */ + || uc == 0x23f3)) /* Hourglass */ + || ((uc >> 9) == 0x12 + && (uc >= 0x25fd && uc < 0x25ff)) /* Medium small squares */ + || ((uc >> 9) == 0x13 /* Miscellaneous symbols, dingbats */ + && ((uc >= 0x2614 && uc < 0x2616) + || (uc >= 0x2648 && uc < 0x2654) + || uc == 0x267f + || uc == 0x2693 + || uc == 0x26a1 + || (uc >= 0x26aa && uc < 0x26ac) + || (uc >= 0x26bd && uc < 0x26bf) + || (uc >= 0x26c4 && uc < 0x26c6) + || uc == 0x26ce + || uc == 0x26d4 + || uc == 0x26ea + || (uc >= 0x26f2 && uc < 0x26f4) + || uc == 0x26f5 + || uc == 0x26fa + || uc == 0x26fd + || uc == 0x2705 + || (uc >= 0x270a && uc < 0x270c) + || uc == 0x2728 + || uc == 0x274c + || uc == 0x274e + || (uc >= 0x2753 && uc < 0x2756) + || uc == 0x2757 + || (uc >= 0x2795 && uc < 0x2798) + || uc == 0x27b0 + || uc == 0x27bf)) + || ((uc >> 9) == 0x15 + && ((uc >= 0x2b1b && uc < 0x2b1d) /* Large squares */ + || uc == 0x2b50 + || uc == 0x2b55)) || (uc >= 0x2e80 && uc < 0xa4d0 /* CJK ... Yi */ - && !(uc == 0x303f) && !(uc >= 0x4dc0 && uc < 0x4e00)) + && !(uc == 0x303f) + && !(uc >= 0x3248 && uc < 0x3250) + && !(uc >= 0x4dc0 && uc < 0x4e00)) + || (uc >= 0xa960 && uc < 0xa97d) /* Hangul Jamo Extended-A */ || (uc >= 0xac00 && uc < 0xd7a4) /* Hangul Syllables */ || (uc >= 0xf900 && uc < 0xfb00) /* CJK Compatibility Ideographs */ || (uc >= 0xfe10 && uc < 0xfe20) /* Presentation Forms for Vertical */ || (uc >= 0xfe30 && uc < 0xfe70) /* CJK Compatibility Forms */ || (uc >= 0xff00 && uc < 0xff61) /* Fullwidth Forms */ || (uc >= 0xffe0 && uc < 0xffe7) /* Fullwidth Signs */ + || (uc >= 0x16fe0 && uc < 0x16fe2) /* Tangut mark, Nushu mark */ + || (uc >= 0x17000 && uc < 0x187ed) /* Tangut */ + || (uc >= 0x18800 && uc < 0x18af3) /* Tangut components */ + || (uc >= 0x1b000 && uc < 0x1b120) /* Kana supplement, Kana Extended-A */ + || (uc >= 0x1b170 && uc < 0x1b2fc) /* Nushu */ + || ((uc >> 9) == 0xf8 + && (uc == 0x1f004 + || uc == 0x1f0cf + || uc == 0x1f18e + || (uc >= 0x1f191 && uc < 0x1f19b))) + || ((uc >> 9) == 0xf9 /* Miscellaneous symbols and pictographs */ + && ((uc >= 0x1f200 && uc < 0x1f321) + || (uc >= 0x1f32d && uc < 0x1f336) + || (uc >= 0x1f337 && uc < 0x1f37d) + || (uc >= 0x1f37e && uc < 0x1f394) + || (uc >= 0x1f3a0 && uc < 0x1f3cb) + || (uc >= 0x1f3cf && uc < 0x1f3d4) + || (uc >= 0x1f3e0 && uc < 0x1f3f1) + || uc == 0x1f3f4 + || uc >= 0x1f3f8)) + || ((uc >> 9) == 0xfa + && (uc < 0x1f43f + || uc == 0x1f440 + || (uc >= 0x1f442 && uc < 0x1f4fd) + || (uc >= 0x1f4ff && uc < 0x1f53e) + || (uc >= 0x1f54b && uc < 0x1f54f) + || (uc >= 0x1f550 && uc < 0x1f568) + || uc == 0x1f57a + || (uc >= 0x1f595 && uc < 0x1f597) + || uc == 0x1f5a4 + || uc >= 0x1f5fb)) + || ((uc >> 9) == 0xfb + && (uc < 0x1f650 + || (uc >= 0x1f680 && uc < 0x1f6c6) + || uc == 0x1f6cc + || (uc >= 0x1f6d0 && uc < 0x1f6d3) + || (uc >= 0x1f6eb && uc < 0x1f6ed) + || (uc >= 0x1f6f4 && uc < 0x1f6f9))) + || (uc >= 0x1f910 && uc < 0x1f9e7) || (uc >= 0x20000 && uc <= 0x2ffff) /* Supplementary Ideographic Plane */ || (uc >= 0x30000 && uc <= 0x3ffff) /* Tertiary Ideographic Plane */ ) ) |