summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTodd Carson <toc@daybefore.net>2023-03-23 21:52:40 -1000
committerPhilip Withnall <pwithnall@endlessos.org>2023-04-14 16:52:16 +0100
commitc82f9adb19e2a1d8d56ab810b7b5468982b9b751 (patch)
tree8ed475575ba76453bcc1ae66ae73948a5bc15622
parent7f4726d151a616720bfd4e4f6a2c3a2d471f8b33 (diff)
downloadglib-c82f9adb19e2a1d8d56ab810b7b5468982b9b751.tar.gz
Test g_utf8_normalize() handling of invalid UTF-8 inputs
Add three classes of test case for which g_utf8_normalize() should safely return NULL: - Strings ending with a truncated multibyte character which would extend past the NUL terminator - Strings ending with a multibyte character which extends past the length limit provided by the max_len argument - Strings containing an invalid multibyte character in any position
-rw-r--r--glib/tests/unicode-normalize.c38
1 files changed, 38 insertions, 0 deletions
diff --git a/glib/tests/unicode-normalize.c b/glib/tests/unicode-normalize.c
index 451c03f34..191e5bb98 100644
--- a/glib/tests/unicode-normalize.c
+++ b/glib/tests/unicode-normalize.c
@@ -146,12 +146,50 @@ test_unicode_normalize (void)
g_string_free (buffer, TRUE);
}
+static void
+test_unicode_normalize_invalid (void)
+{
+ /* g_utf8_normalize() should return NULL for all of these invalid inputs */
+ const struct
+ {
+ gssize max_len;
+ const gchar *str;
+ } test_vectors[] = {
+ /* input ending with truncated multibyte encoding */
+ { -1, "\xC0" },
+ { 1, "\xC0\x80" },
+ { -1, "\xE0\x80" },
+ { 2, "\xE0\x80\x80" },
+ { -1, "\xF0\x80\x80" },
+ { 3, "\xF0\x80\x80\x80" },
+ { -1, "\xF8\x80\x80\x80" },
+ { 4, "\xF8\x80\x80\x80\x80" },
+ { 3, "\x20\xE2\x84\xAA" },
+ { -1, "\x20\xE2\x00\xAA" },
+ { -1, "\xC0\x80\xE0\x80" },
+ { 4, "\xC0\x80\xE0\x80\x80" },
+ /* input containing invalid multibyte encoding */
+ { -1, "\xED\x85\x9C\xED\x15\x9C\xED\x85\x9C" },
+ };
+ gsize i;
+
+ for (i = 0; i < G_N_ELEMENTS (test_vectors); i++)
+ {
+ g_test_message ("Invalid UTF-8 vector %" G_GSIZE_FORMAT, i);
+ g_assert_null (g_utf8_normalize (test_vectors[i].str,
+ test_vectors[i].max_len,
+ G_NORMALIZE_ALL));
+ }
+}
+
int
main (int argc, char **argv)
{
g_test_init (&argc, &argv, NULL);
g_test_add_func ("/unicode/normalize", test_unicode_normalize);
+ g_test_add_func ("/unicode/normalize-invalid",
+ test_unicode_normalize_invalid);
return g_test_run ();
}