summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSebastian Dröge <slomo@coaxion.net>2021-02-10 16:35:26 +0000
committerSebastian Dröge <slomo@coaxion.net>2021-02-10 16:35:26 +0000
commit6528760468a744b15d9319b50aef3e868a1c3d30 (patch)
treee6a73629aef246afb7191dda39af705cdd12ab70
parent3c5339266d2c0ff3ee0bb8904246dc240c281c77 (diff)
parente008301cf8cb66871cfa05a61ee46fef57781e55 (diff)
downloadglib-6528760468a744b15d9319b50aef3e868a1c3d30.tar.gz
Merge branch 'kjellahl/turkish-strdown' into 'master'
guniprop: Fix g_utf8_strdown() for Turkish locale See merge request GNOME/glib!1930
-rw-r--r--glib/guniprop.c16
-rw-r--r--glib/tests/unicode.c42
2 files changed, 52 insertions, 6 deletions
diff --git a/glib/guniprop.c b/glib/guniprop.c
index 619b39908..2903367d4 100644
--- a/glib/guniprop.c
+++ b/glib/guniprop.c
@@ -987,14 +987,18 @@ real_tolower (const gchar *str,
last = p;
p = g_utf8_next_char (p);
- if (locale_type == LOCALE_TURKIC && (c == 'I' ||
+ if (locale_type == LOCALE_TURKIC && (c == 'I' || c == 0x130 ||
c == G_UNICHAR_FULLWIDTH_I))
- {
- if (g_utf8_get_char (p) == 0x0307)
+ {
+ gboolean combining_dot = (c == 'I' || c == G_UNICHAR_FULLWIDTH_I) &&
+ g_utf8_get_char (p) == 0x0307;
+ if (combining_dot || c == 0x130)
{
- /* I + COMBINING DOT ABOVE => i (U+0069) */
- len += g_unichar_to_utf8 (0x0069, out_buffer ? out_buffer + len : NULL);
- p = g_utf8_next_char (p);
+ /* I + COMBINING DOT ABOVE => i (U+0069)
+ * LATIN CAPITAL LETTER I WITH DOT ABOVE => i (U+0069) */
+ len += g_unichar_to_utf8 (0x0069, out_buffer ? out_buffer + len : NULL);
+ if (combining_dot)
+ p = g_utf8_next_char (p);
}
else
{
diff --git a/glib/tests/unicode.c b/glib/tests/unicode.c
index fa8bd1fa1..bf1ad52ab 100644
--- a/glib/tests/unicode.c
+++ b/glib/tests/unicode.c
@@ -464,6 +464,47 @@ test_strdown (void)
g_free (str_down);
}
+/* Test that g_utf8_strup() and g_utf8_strdown() return the correct
+ * value for Turkish 'i' with and without dot above. */
+static void
+test_turkish_strupdown (void)
+{
+ char *str_up = NULL;
+ char *str_down = NULL;
+ const char *str = "iII"
+ "\xcc\x87" /* COMBINING DOT ABOVE (U+307) */
+ "\xc4\xb1" /* LATIN SMALL LETTER DOTLESS I (U+131) */
+ "\xc4\xb0"; /* LATIN CAPITAL LETTER I WITH DOT ABOVE (U+130) */
+
+ char *oldlocale = g_strdup (setlocale (LC_ALL, "tr_TR"));
+
+ if (oldlocale == NULL)
+ {
+ g_test_skip ("locale tr_TR not available");
+ return;
+ }
+
+ str_up = g_utf8_strup (str, strlen (str));
+ str_down = g_utf8_strdown (str, strlen (str));
+ /* i => LATIN CAPITAL LETTER I WITH DOT ABOVE,
+ * I => I,
+ * I + COMBINING DOT ABOVE => I + COMBINING DOT ABOVE,
+ * LATIN SMALL LETTER DOTLESS I => I,
+ * LATIN CAPITAL LETTER I WITH DOT ABOVE => LATIN CAPITAL LETTER I WITH DOT ABOVE */
+ g_assert_cmpstr (str_up, ==, "\xc4\xb0II\xcc\x87I\xc4\xb0");
+ /* i => i,
+ * I => LATIN SMALL LETTER DOTLESS I,
+ * I + COMBINING DOT ABOVE => i,
+ * LATIN SMALL LETTER DOTLESS I => LATIN SMALL LETTER DOTLESS I,
+ * LATIN CAPITAL LETTER I WITH DOT ABOVE => i */
+ g_assert_cmpstr (str_down, ==, "i\xc4\xb1i\xc4\xb1i");
+ g_free (str_up);
+ g_free (str_down);
+
+ setlocale (LC_ALL, oldlocale);
+ g_free (oldlocale);
+}
+
/* Test that g_utf8_casefold() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
@@ -1644,6 +1685,7 @@ main (int argc,
g_test_add_func ("/unicode/space", test_space);
g_test_add_func ("/unicode/strdown", test_strdown);
g_test_add_func ("/unicode/strup", test_strup);
+ g_test_add_func ("/unicode/turkish-strupdown", test_turkish_strupdown);
g_test_add_func ("/unicode/title", test_title);
g_test_add_func ("/unicode/upper", test_upper);
g_test_add_func ("/unicode/validate", test_unichar_validate);