diff options
author | Emmanuele Bassi <ebassi@gmail.com> | 2023-04-13 21:35:51 +0000 |
---|---|---|
committer | Emmanuele Bassi <ebassi@gmail.com> | 2023-04-13 21:35:51 +0000 |
commit | 2af45f9c20dc4e08b11b12c53344e43473e1d96e (patch) | |
tree | 466dc06d2105854adca92957b0f756f8cc30e8c3 | |
parent | 18ae2a3d4e6dd273697b7b32e710ffd21d723696 (diff) | |
parent | 6b39af34aa7e3f4f8a6a7225eb719c9d7778f9ea (diff) | |
download | glib-2af45f9c20dc4e08b11b12c53344e43473e1d96e.tar.gz |
Merge branch 'libicu-tests' into 'main'
unicode: add tests for g_utf8_normalize() and empty strings
See merge request GNOME/glib!3326
-rw-r--r-- | glib/gunicode.h | 2 | ||||
-rw-r--r-- | glib/tests/unicode.c | 52 |
2 files changed, 53 insertions, 1 deletions
diff --git a/glib/gunicode.h b/glib/gunicode.h index 63117ea28..38e29b84f 100644 --- a/glib/gunicode.h +++ b/glib/gunicode.h @@ -210,7 +210,7 @@ typedef enum * Since new unicode versions may add new types here, applications should be ready * to handle unknown values. They may be regarded as %G_UNICODE_BREAK_UNKNOWN. * - * See [Unicode Line Breaking Algorithm](http://www.unicode.org/unicode/reports/tr14/). + * See [Unicode Line Breaking Algorithm](https://www.unicode.org/reports/tr14/). */ typedef enum { diff --git a/glib/tests/unicode.c b/glib/tests/unicode.c index d4955c6d1..5d66977c2 100644 --- a/glib/tests/unicode.c +++ b/glib/tests/unicode.c @@ -459,6 +459,10 @@ test_strup (void) /* Tricky, comparing two unicode strings with an ASCII function */ g_assert_cmpstr (str_up, ==, "AAZZ09X;\003E\357\274\241\357\274\241"); g_free (str_up); + + str_up = g_utf8_strup ("", 0); + g_assert_cmpstr (str_up, ==, ""); + g_free (str_up); } /* Test that g_utf8_strdown() returns the correct value for various @@ -484,6 +488,10 @@ test_strdown (void) /* Tricky, comparing two unicode strings with an ASCII function */ g_assert_cmpstr (str_down, ==, "aazz09x;\003\007\357\275\201\357\275\201"); g_free (str_down); + + str_down = g_utf8_strdown ("", 0); + g_assert_cmpstr (str_down, ==, ""); + g_free (str_down); } /* Test that g_utf8_strup() and g_utf8_strdown() return the correct @@ -576,6 +584,10 @@ test_casefold (void) /* Tricky, comparing two unicode strings with an ASCII function */ g_assert_cmpstr (str_casefold, ==, "aazz09x;\357\275\201\357\275\201"); g_free (str_casefold); + + str_casefold = g_utf8_casefold ("", 0); + g_assert_cmpstr (str_casefold, ==, ""); + g_free (str_casefold); } static void @@ -1889,6 +1901,45 @@ test_iso15924 (void) #undef PACK } +static void +test_normalize (void) +{ + guint i; + typedef struct + { + const gchar *str; + const gchar *nfd; + const gchar *nfc; + const gchar *nfkd; + const gchar *nfkc; + } Test; + Test tests[] = { + { "Äffin", "A\u0308ffin", "Äffin", "A\u0308ffin", "Äffin" }, + { "Ä\uFB03n", "A\u0308\uFB03n", "Ä\uFB03n", "A\u0308ffin", "Äffin" }, + { "Henry IV", "Henry IV", "Henry IV", "Henry IV", "Henry IV" }, + { "Henry \u2163", "Henry \u2163", "Henry \u2163", "Henry IV", "Henry IV" }, + { "non-utf\x88", NULL, NULL, NULL, NULL }, + { "", "", "", "", "" }, + }; + +#define TEST(str, mode, expected) \ + { \ + gchar *normalized = g_utf8_normalize (str, -1, mode); \ + g_assert_cmpstr (normalized, ==, expected); \ + g_free (normalized); \ + } + + for (i = 0; i < G_N_ELEMENTS (tests); i++) + { + TEST (tests[i].str, G_NORMALIZE_NFD, tests[i].nfd); + TEST (tests[i].str, G_NORMALIZE_NFC, tests[i].nfc); + TEST (tests[i].str, G_NORMALIZE_NFKD, tests[i].nfkd); + TEST (tests[i].str, G_NORMALIZE_NFKC, tests[i].nfkc); + } + +#undef TEST +} + int main (int argc, char *argv[]) @@ -1933,6 +1984,7 @@ main (int argc, g_test_add_func ("/unicode/xdigit", test_xdigit); g_test_add_func ("/unicode/xdigit-value", test_xdigit_value); g_test_add_func ("/unicode/zero-width", test_zerowidth); + g_test_add_func ("/unicode/normalize", test_normalize); return g_test_run(); } |