summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEmmanuele Bassi <ebassi@gmail.com>2023-04-13 21:35:51 +0000
committerEmmanuele Bassi <ebassi@gmail.com>2023-04-13 21:35:51 +0000
commit2af45f9c20dc4e08b11b12c53344e43473e1d96e (patch)
tree466dc06d2105854adca92957b0f756f8cc30e8c3
parent18ae2a3d4e6dd273697b7b32e710ffd21d723696 (diff)
parent6b39af34aa7e3f4f8a6a7225eb719c9d7778f9ea (diff)
downloadglib-2af45f9c20dc4e08b11b12c53344e43473e1d96e.tar.gz
Merge branch 'libicu-tests' into 'main'
unicode: add tests for g_utf8_normalize() and empty strings See merge request GNOME/glib!3326
-rw-r--r--glib/gunicode.h2
-rw-r--r--glib/tests/unicode.c52
2 files changed, 53 insertions, 1 deletions
diff --git a/glib/gunicode.h b/glib/gunicode.h
index 63117ea28..38e29b84f 100644
--- a/glib/gunicode.h
+++ b/glib/gunicode.h
@@ -210,7 +210,7 @@ typedef enum
* Since new unicode versions may add new types here, applications should be ready
* to handle unknown values. They may be regarded as %G_UNICODE_BREAK_UNKNOWN.
*
- * See [Unicode Line Breaking Algorithm](http://www.unicode.org/unicode/reports/tr14/).
+ * See [Unicode Line Breaking Algorithm](https://www.unicode.org/reports/tr14/).
*/
typedef enum
{
diff --git a/glib/tests/unicode.c b/glib/tests/unicode.c
index d4955c6d1..5d66977c2 100644
--- a/glib/tests/unicode.c
+++ b/glib/tests/unicode.c
@@ -459,6 +459,10 @@ test_strup (void)
/* Tricky, comparing two unicode strings with an ASCII function */
g_assert_cmpstr (str_up, ==, "AAZZ09X;\003E\357\274\241\357\274\241");
g_free (str_up);
+
+ str_up = g_utf8_strup ("", 0);
+ g_assert_cmpstr (str_up, ==, "");
+ g_free (str_up);
}
/* Test that g_utf8_strdown() returns the correct value for various
@@ -484,6 +488,10 @@ test_strdown (void)
/* Tricky, comparing two unicode strings with an ASCII function */
g_assert_cmpstr (str_down, ==, "aazz09x;\003\007\357\275\201\357\275\201");
g_free (str_down);
+
+ str_down = g_utf8_strdown ("", 0);
+ g_assert_cmpstr (str_down, ==, "");
+ g_free (str_down);
}
/* Test that g_utf8_strup() and g_utf8_strdown() return the correct
@@ -576,6 +584,10 @@ test_casefold (void)
/* Tricky, comparing two unicode strings with an ASCII function */
g_assert_cmpstr (str_casefold, ==, "aazz09x;\357\275\201\357\275\201");
g_free (str_casefold);
+
+ str_casefold = g_utf8_casefold ("", 0);
+ g_assert_cmpstr (str_casefold, ==, "");
+ g_free (str_casefold);
}
static void
@@ -1889,6 +1901,45 @@ test_iso15924 (void)
#undef PACK
}
+static void
+test_normalize (void)
+{
+ guint i;
+ typedef struct
+ {
+ const gchar *str;
+ const gchar *nfd;
+ const gchar *nfc;
+ const gchar *nfkd;
+ const gchar *nfkc;
+ } Test;
+ Test tests[] = {
+ { "Äffin", "A\u0308ffin", "Äffin", "A\u0308ffin", "Äffin" },
+ { "Ä\uFB03n", "A\u0308\uFB03n", "Ä\uFB03n", "A\u0308ffin", "Äffin" },
+ { "Henry IV", "Henry IV", "Henry IV", "Henry IV", "Henry IV" },
+ { "Henry \u2163", "Henry \u2163", "Henry \u2163", "Henry IV", "Henry IV" },
+ { "non-utf\x88", NULL, NULL, NULL, NULL },
+ { "", "", "", "", "" },
+ };
+
+#define TEST(str, mode, expected) \
+ { \
+ gchar *normalized = g_utf8_normalize (str, -1, mode); \
+ g_assert_cmpstr (normalized, ==, expected); \
+ g_free (normalized); \
+ }
+
+ for (i = 0; i < G_N_ELEMENTS (tests); i++)
+ {
+ TEST (tests[i].str, G_NORMALIZE_NFD, tests[i].nfd);
+ TEST (tests[i].str, G_NORMALIZE_NFC, tests[i].nfc);
+ TEST (tests[i].str, G_NORMALIZE_NFKD, tests[i].nfkd);
+ TEST (tests[i].str, G_NORMALIZE_NFKC, tests[i].nfkc);
+ }
+
+#undef TEST
+}
+
int
main (int argc,
char *argv[])
@@ -1933,6 +1984,7 @@ main (int argc,
g_test_add_func ("/unicode/xdigit", test_xdigit);
g_test_add_func ("/unicode/xdigit-value", test_xdigit_value);
g_test_add_func ("/unicode/zero-width", test_zerowidth);
+ g_test_add_func ("/unicode/normalize", test_normalize);
return g_test_run();
}