From 183761ac24544b355aaf362e62d05fa1c184baf8 Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Sat, 13 May 2023 17:26:24 +0200 Subject: Do not interpret nor emit invalid Unicode encoding forms Surrogates are invalid in both UTF-32 and UTF-8. See https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G28875 and https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G31703 --- test/keysym.c | 4 ++++ test/utf8.c | 2 ++ 2 files changed, 6 insertions(+) (limited to 'test') diff --git a/test/keysym.c b/test/keysym.c index 38f967d..a4dba0c 100644 --- a/test/keysym.c +++ b/test/keysym.c @@ -222,6 +222,8 @@ main(void) assert(test_utf8(0x10005d0, "א")); assert(test_utf8(0x110ffff, "\xf4\x8f\xbf\xbf")); + assert(test_utf8(0x0100d800, NULL) == 0); // Unicode surrogates + assert(test_utf8(0x0100dfff, NULL) == 0); // Unicode surrogates assert(test_utf8(0x1110000, NULL) == 0); assert(test_utf32_to_keysym('y', XKB_KEY_y)); @@ -255,6 +257,8 @@ main(void) assert(test_utf32_to_keysym(0x20ac, XKB_KEY_EuroSign)); // Unicode non-characters + assert(test_utf32_to_keysym(0xd800, XKB_KEY_NoSymbol)); // Unicode surrogates + assert(test_utf32_to_keysym(0xdfff, XKB_KEY_NoSymbol)); // Unicode surrogates assert(test_utf32_to_keysym(0xfdd0, XKB_KEY_NoSymbol)); assert(test_utf32_to_keysym(0xfdef, XKB_KEY_NoSymbol)); assert(test_utf32_to_keysym(0xfffe, XKB_KEY_NoSymbol)); diff --git a/test/utf8.c b/test/utf8.c index 214e356..aa3c0d5 100644 --- a/test/utf8.c +++ b/test/utf8.c @@ -170,6 +170,8 @@ test_utf32_to_utf8(void) check_utf32_to_utf8(0x40, 2, "\x40"); check_utf32_to_utf8(0xA1, 3, "\xc2\xa1"); check_utf32_to_utf8(0x2701, 4, "\xe2\x9c\x81"); + check_utf32_to_utf8(0xd800, 0, ""); // Unicode surrogates + check_utf32_to_utf8(0xdfff, 0, ""); // Unicode surrogates check_utf32_to_utf8(0x1f004, 5, "\xf0\x9f\x80\x84"); check_utf32_to_utf8(0x110000, 0, ""); check_utf32_to_utf8(0xffffffff, 0, ""); -- cgit v1.2.1