summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRan Benita <ran234@gmail.com>2018-06-23 22:00:19 +0300
committerRan Benita <ran234@gmail.com>2018-06-23 22:53:42 +0300
commit5cee660f692b67fdb2ce677fc516382bf0e93af1 (patch)
treec5fa7cfb94875e14da00dff1d68207fd9c2044dd
parentb63196e91e7d78e88a9012f3f44152eeef5358cf (diff)
downloadxorg-lib-libxkbcommon-5cee660f692b67fdb2ce677fc516382bf0e93af1.tar.gz
keysym-utf: reject out-of-range Unicode codepoints in xkb_keysym_to_utf{8,32}
It used to be UTF-8 was defined for inputs > 0x10FFFF, but nowadays that's the maximum and a codepoint is encoded up to 4 bytes, not 6. Fixes: https://github.com/xkbcommon/libxkbcommon/issues/58 Fixes: https://github.com/xkbcommon/libxkbcommon/issues/59 Reported-by: @andrecbarros Signed-off-by: Ran Benita <ran234@gmail.com>
-rw-r--r--src/keysym-utf.c12
-rw-r--r--src/utf8.c10
-rw-r--r--test/keysym.c5
-rw-r--r--test/utf8.c26
4 files changed, 43 insertions, 10 deletions
diff --git a/src/keysym-utf.c b/src/keysym-utf.c
index 1ccfc0e..c0e76f5 100644
--- a/src/keysym-utf.c
+++ b/src/keysym-utf.c
@@ -881,9 +881,15 @@ xkb_keysym_to_utf32(xkb_keysym_t keysym)
keysym == XKB_KEY_KP_Enter || keysym == XKB_KEY_KP_Equal)
return keysym & 0x7f;
- /* also check for directly encoded 24-bit UCS characters */
- if ((keysym & 0xff000000) == 0x01000000)
- return keysym & 0x00ffffff;
+ /* also check for directly encoded Unicode codepoints */
+ /*
+ * In theory, this is supposed to start from 0x100100, such that the ASCII
+ * range, which is already covered by 0x00-0xff, can't be encoded in two
+ * ways. However, changing this after a couple of decades probably won't
+ * go well, so it stays as it is.
+ */
+ if (0x01000000 <= keysym && keysym <= 0x0110ffff)
+ return keysym - 0x01000000;
/* search main table */
return bin_search(keysymtab, ARRAY_SIZE(keysymtab) - 1, keysym);
diff --git a/src/utf8.c b/src/utf8.c
index a7fa82e..a76b001 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -49,17 +49,13 @@ utf32_to_utf8(uint32_t unichar, char *buffer)
length = 3;
head = 0xe0;
}
- else if (unichar <= 0x1fffff) {
+ else if (unichar <= 0x10ffff) {
length = 4;
head = 0xf0;
}
- else if (unichar <= 0x3ffffff) {
- length = 5;
- head = 0xf8;
- }
else {
- length = 6;
- head = 0xfc;
+ buffer[0] = '\0';
+ return 0;
}
for (count = length - 1, shift = 0; count > 0; count--, shift += 6)
diff --git a/test/keysym.c b/test/keysym.c
index 4414523..e5347dd 100644
--- a/test/keysym.c
+++ b/test/keysym.c
@@ -79,6 +79,7 @@ test_utf8(xkb_keysym_t keysym, const char *expected)
fprintf(stderr, "Received keysym %#x -> %s (%u bytes)\n\n", keysym, s,
(unsigned) strlen(s));
+ assert(expected != NULL);
return streq(s, expected);
}
@@ -175,6 +176,10 @@ main(void)
assert(test_utf8(XKB_KEY_KP_Multiply, "*"));
assert(test_utf8(XKB_KEY_KP_Subtract, "-"));
+ assert(test_utf8(0x10005d0, "א"));
+ assert(test_utf8(0x110ffff, "\xf4\x8f\xbf\xbf"));
+ assert(test_utf8(0x1110000, NULL) == 0);
+
assert(xkb_keysym_is_lower(XKB_KEY_a));
assert(xkb_keysym_is_lower(XKB_KEY_Greek_lambda));
assert(xkb_keysym_is_lower(xkb_keysym_from_name("U03b1", 0))); /* GREEK SMALL LETTER ALPHA */
diff --git a/test/utf8.c b/test/utf8.c
index 60673c1..1d1c073 100644
--- a/test/utf8.c
+++ b/test/utf8.c
@@ -25,8 +25,10 @@
#include <inttypes.h>
#include <stdbool.h>
#include <stddef.h>
+#include <string.h>
#include "utf8.h"
+#include "utils.h"
#define VALID(lit) assert(is_valid_utf8(lit, sizeof(lit)-1))
#define INVALID(lit) assert(!is_valid_utf8(lit, sizeof(lit)-1))
@@ -148,10 +150,34 @@ test_is_valid_utf8(void)
/* INVALID("\xEF\xBF\xBF"); */
}
+static void
+check_utf32_to_utf8(uint32_t unichar, int expected_length, const char *expected) {
+ char buffer[7];
+ int length;
+
+ length = utf32_to_utf8(unichar, buffer);
+
+ assert(length == expected_length);
+ assert(streq(buffer, expected));
+}
+
+static void
+test_utf32_to_utf8(void)
+{
+ check_utf32_to_utf8(0x0, 2, "");
+ check_utf32_to_utf8(0x40, 2, "\x40");
+ check_utf32_to_utf8(0xA1, 3, "\xc2\xa1");
+ check_utf32_to_utf8(0x2701, 4, "\xe2\x9c\x81");
+ check_utf32_to_utf8(0x1f004, 5, "\xf0\x9f\x80\x84");
+ check_utf32_to_utf8(0x110000, 0, "");
+ check_utf32_to_utf8(0xffffffff, 0, "");
+}
+
int
main(void)
{
test_is_valid_utf8();
+ test_utf32_to_utf8();
return 0;
}