Merge branch 'kjellahl/turkish-strdown' into 'master'

guniprop: Fix g_utf8_strdown() for Turkish locale See merge request GNOME/glib!1930
author: Sebastian Dröge <slomo@coaxion.net> 2021-02-10 16:35:26 +0000
committer: Sebastian Dröge <slomo@coaxion.net> 2021-02-10 16:35:26 +0000
commit: 6528760468a744b15d9319b50aef3e868a1c3d30 (patch)
tree: e6a73629aef246afb7191dda39af705cdd12ab70
parent: 3c5339266d2c0ff3ee0bb8904246dc240c281c77 (diff)
parent: e008301cf8cb66871cfa05a61ee46fef57781e55 (diff)
download: glib-6528760468a744b15d9319b50aef3e868a1c3d30.tar.gz
2 files changed, 52 insertions, 6 deletions
diff --git a/glib/guniprop.c b/glib/guniprop.c
index 619b39908..2903367d4 100644
--- a/glib/guniprop.c
+++ b/glib/guniprop.c
@@ -987,14 +987,18 @@ real_tolower (const gchar *str,
       last = p;
       p = g_utf8_next_char (p);
 
-      if (locale_type == LOCALE_TURKIC && (c == 'I' ||
+      if (locale_type == LOCALE_TURKIC && (c == 'I' || c == 0x130 ||
                                            c == G_UNICHAR_FULLWIDTH_I))
-	{
-          if (g_utf8_get_char (p) == 0x0307)
+        {
+          gboolean combining_dot = (c == 'I' || c == G_UNICHAR_FULLWIDTH_I) &&
+                                   g_utf8_get_char (p) == 0x0307;
+          if (combining_dot || c == 0x130)
             {
-              /* I + COMBINING DOT ABOVE => i (U+0069) */
-              len += g_unichar_to_utf8 (0x0069, out_buffer ? out_buffer + len : NULL); 
-              p = g_utf8_next_char (p);
+              /* I + COMBINING DOT ABOVE => i (U+0069)
+               * LATIN CAPITAL LETTER I WITH DOT ABOVE => i (U+0069) */
+              len += g_unichar_to_utf8 (0x0069, out_buffer ? out_buffer + len : NULL);
+              if (combining_dot)
+                p = g_utf8_next_char (p);
             }
           else
             {
diff --git a/glib/tests/unicode.c b/glib/tests/unicode.c
index fa8bd1fa1..bf1ad52ab 100644
--- a/glib/tests/unicode.c
+++ b/glib/tests/unicode.c
@@ -464,6 +464,47 @@ test_strdown (void)
   g_free (str_down);
 }
 
+/* Test that g_utf8_strup() and g_utf8_strdown() return the correct
+ * value for Turkish 'i' with and without dot above. */
+static void
+test_turkish_strupdown (void)
+{
+  char *str_up = NULL;
+  char *str_down = NULL;
+  const char *str = "iII"
+                    "\xcc\x87"  /* COMBINING DOT ABOVE (U+307) */
+                    "\xc4\xb1"  /* LATIN SMALL LETTER DOTLESS I (U+131) */
+                    "\xc4\xb0"; /* LATIN CAPITAL LETTER I WITH DOT ABOVE (U+130) */
+
+  char *oldlocale = g_strdup (setlocale (LC_ALL, "tr_TR"));
+
+  if (oldlocale == NULL)
+    {
+      g_test_skip ("locale tr_TR not available");
+      return;
+    }
+
+  str_up = g_utf8_strup (str, strlen (str));
+  str_down = g_utf8_strdown (str, strlen (str));
+  /* i => LATIN CAPITAL LETTER I WITH DOT ABOVE,
+   * I => I,
+   * I + COMBINING DOT ABOVE => I + COMBINING DOT ABOVE,
+   * LATIN SMALL LETTER DOTLESS I => I,
+   * LATIN CAPITAL LETTER I WITH DOT ABOVE => LATIN CAPITAL LETTER I WITH DOT ABOVE */
+  g_assert_cmpstr (str_up, ==, "\xc4\xb0II\xcc\x87I\xc4\xb0");
+  /* i => i,
+   * I => LATIN SMALL LETTER DOTLESS I,
+   * I + COMBINING DOT ABOVE => i,
+   * LATIN SMALL LETTER DOTLESS I => LATIN SMALL LETTER DOTLESS I,
+   * LATIN CAPITAL LETTER I WITH DOT ABOVE => i */
+  g_assert_cmpstr (str_down, ==, "i\xc4\xb1i\xc4\xb1i");
+  g_free (str_up);
+  g_free (str_down);
+
+  setlocale (LC_ALL, oldlocale);
+  g_free (oldlocale);
+}
+
 /* Test that g_utf8_casefold() returns the correct value for various
  * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
 static void
@@ -1644,6 +1685,7 @@ main (int   argc,
   g_test_add_func ("/unicode/space", test_space);
   g_test_add_func ("/unicode/strdown", test_strdown);
   g_test_add_func ("/unicode/strup", test_strup);
+  g_test_add_func ("/unicode/turkish-strupdown", test_turkish_strupdown);
   g_test_add_func ("/unicode/title", test_title);
   g_test_add_func ("/unicode/upper", test_upper);
   g_test_add_func ("/unicode/validate", test_unichar_validate);
author	Sebastian Dröge <slomo@coaxion.net>	2021-02-10 16:35:26 +0000
committer	Sebastian Dröge <slomo@coaxion.net>	2021-02-10 16:35:26 +0000
commit	6528760468a744b15d9319b50aef3e868a1c3d30 (patch)
tree	e6a73629aef246afb7191dda39af705cdd12ab70
parent	3c5339266d2c0ff3ee0bb8904246dc240c281c77 (diff)
parent	e008301cf8cb66871cfa05a61ee46fef57781e55 (diff)
download	glib-6528760468a744b15d9319b50aef3e868a1c3d30.tar.gz