diff options
author | Ran Benita <ran234@gmail.com> | 2018-06-23 22:00:19 +0300 |
---|---|---|
committer | Ran Benita <ran234@gmail.com> | 2018-06-23 22:53:42 +0300 |
commit | 5cee660f692b67fdb2ce677fc516382bf0e93af1 (patch) | |
tree | c5fa7cfb94875e14da00dff1d68207fd9c2044dd | |
parent | b63196e91e7d78e88a9012f3f44152eeef5358cf (diff) | |
download | xorg-lib-libxkbcommon-5cee660f692b67fdb2ce677fc516382bf0e93af1.tar.gz |
keysym-utf: reject out-of-range Unicode codepoints in xkb_keysym_to_utf{8,32}
It used to be UTF-8 was defined for inputs > 0x10FFFF, but nowadays
that's the maximum and a codepoint is encoded up to 4 bytes, not 6.
Fixes: https://github.com/xkbcommon/libxkbcommon/issues/58
Fixes: https://github.com/xkbcommon/libxkbcommon/issues/59
Reported-by: @andrecbarros
Signed-off-by: Ran Benita <ran234@gmail.com>
-rw-r--r-- | src/keysym-utf.c | 12 | ||||
-rw-r--r-- | src/utf8.c | 10 | ||||
-rw-r--r-- | test/keysym.c | 5 | ||||
-rw-r--r-- | test/utf8.c | 26 |
4 files changed, 43 insertions, 10 deletions
diff --git a/src/keysym-utf.c b/src/keysym-utf.c index 1ccfc0e..c0e76f5 100644 --- a/src/keysym-utf.c +++ b/src/keysym-utf.c @@ -881,9 +881,15 @@ xkb_keysym_to_utf32(xkb_keysym_t keysym) keysym == XKB_KEY_KP_Enter || keysym == XKB_KEY_KP_Equal) return keysym & 0x7f; - /* also check for directly encoded 24-bit UCS characters */ - if ((keysym & 0xff000000) == 0x01000000) - return keysym & 0x00ffffff; + /* also check for directly encoded Unicode codepoints */ + /* + * In theory, this is supposed to start from 0x100100, such that the ASCII + * range, which is already covered by 0x00-0xff, can't be encoded in two + * ways. However, changing this after a couple of decades probably won't + * go well, so it stays as it is. + */ + if (0x01000000 <= keysym && keysym <= 0x0110ffff) + return keysym - 0x01000000; /* search main table */ return bin_search(keysymtab, ARRAY_SIZE(keysymtab) - 1, keysym); @@ -49,17 +49,13 @@ utf32_to_utf8(uint32_t unichar, char *buffer) length = 3; head = 0xe0; } - else if (unichar <= 0x1fffff) { + else if (unichar <= 0x10ffff) { length = 4; head = 0xf0; } - else if (unichar <= 0x3ffffff) { - length = 5; - head = 0xf8; - } else { - length = 6; - head = 0xfc; + buffer[0] = '\0'; + return 0; } for (count = length - 1, shift = 0; count > 0; count--, shift += 6) diff --git a/test/keysym.c b/test/keysym.c index 4414523..e5347dd 100644 --- a/test/keysym.c +++ b/test/keysym.c @@ -79,6 +79,7 @@ test_utf8(xkb_keysym_t keysym, const char *expected) fprintf(stderr, "Received keysym %#x -> %s (%u bytes)\n\n", keysym, s, (unsigned) strlen(s)); + assert(expected != NULL); return streq(s, expected); } @@ -175,6 +176,10 @@ main(void) assert(test_utf8(XKB_KEY_KP_Multiply, "*")); assert(test_utf8(XKB_KEY_KP_Subtract, "-")); + assert(test_utf8(0x10005d0, "א")); + assert(test_utf8(0x110ffff, "\xf4\x8f\xbf\xbf")); + assert(test_utf8(0x1110000, NULL) == 0); + assert(xkb_keysym_is_lower(XKB_KEY_a)); assert(xkb_keysym_is_lower(XKB_KEY_Greek_lambda)); assert(xkb_keysym_is_lower(xkb_keysym_from_name("U03b1", 0))); /* GREEK SMALL LETTER ALPHA */ diff --git a/test/utf8.c b/test/utf8.c index 60673c1..1d1c073 100644 --- a/test/utf8.c +++ b/test/utf8.c @@ -25,8 +25,10 @@ #include <inttypes.h> #include <stdbool.h> #include <stddef.h> +#include <string.h> #include "utf8.h" +#include "utils.h" #define VALID(lit) assert(is_valid_utf8(lit, sizeof(lit)-1)) #define INVALID(lit) assert(!is_valid_utf8(lit, sizeof(lit)-1)) @@ -148,10 +150,34 @@ test_is_valid_utf8(void) /* INVALID("\xEF\xBF\xBF"); */ } +static void +check_utf32_to_utf8(uint32_t unichar, int expected_length, const char *expected) { + char buffer[7]; + int length; + + length = utf32_to_utf8(unichar, buffer); + + assert(length == expected_length); + assert(streq(buffer, expected)); +} + +static void +test_utf32_to_utf8(void) +{ + check_utf32_to_utf8(0x0, 2, ""); + check_utf32_to_utf8(0x40, 2, "\x40"); + check_utf32_to_utf8(0xA1, 3, "\xc2\xa1"); + check_utf32_to_utf8(0x2701, 4, "\xe2\x9c\x81"); + check_utf32_to_utf8(0x1f004, 5, "\xf0\x9f\x80\x84"); + check_utf32_to_utf8(0x110000, 0, ""); + check_utf32_to_utf8(0xffffffff, 0, ""); +} + int main(void) { test_is_valid_utf8(); + test_utf32_to_utf8(); return 0; } |