Merge branch 'middle-dot' into 'main'

Support for Catalan middle dot in word start and end See merge request GNOME/pango!485
author: Matthias Clasen <mclasen@redhat.com> 2021-10-27 23:29:13 +0000
committer: Matthias Clasen <mclasen@redhat.com> 2021-10-27 23:29:13 +0000
commit: 64180fcba8d534c4aa831ce906ddb7cd137ec6d8 (patch)
tree: 1457fbad71e6e9763fdef29d01f0774348b85c8c
parent: 12810d50a10ee378c253c3f8c2efe1516cd60ebc (diff)
parent: 89f69b369b9b551a75cbabe0af5eb3931afae272 (diff)
download: pango-64180fcba8d534c4aa831ce906ddb7cd137ec6d8.tar.gz
4 files changed, 77 insertions, 0 deletions
diff --git a/pango/break-latin.c b/pango/break-latin.c
new file mode 100644
index 00000000..608bff94
--- /dev/null
+++ b/pango/break-latin.c
@@ -0,0 +1,61 @@
+/* Pango
+ * break-latin.c:
+ *
+ * Copyright (C) 2021 Jordi Mas i Hernàndez <jmas@softcatala.org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ */
+
+#include "config.h"
+
+#include "pango-break.h"
+#include "pango-impl-utils.h"
+
+static void
+break_latin (const char          *text,
+	    int                  len,
+	    const PangoAnalysis *analysis G_GNUC_UNUSED,
+	    PangoLogAttr        *attrs,
+	    int                  attrs_len G_GNUC_UNUSED)
+{
+
+    if (analysis && analysis->language &&
+        g_ascii_strncasecmp (pango_language_to_string (analysis->language), "ca-", 3) != 0)
+        return;
+
+    const gchar *next;
+    gunichar wc;
+    gunichar prev_wc = 0;
+
+    next = text;
+    for (int i = 0; i < len; i++)
+    {
+        wc = g_utf8_get_char (next);
+        next = g_utf8_next_char (next);
+
+        /* Catalan middle dot does not break words */
+        if (wc == 0x00b7)
+        {
+            gunichar middle_next = g_utf8_get_char (next);
+            if (g_unichar_tolower (middle_next) == 'l' && g_unichar_tolower (prev_wc) == 'l')
+            {
+              attrs[i].is_word_end = FALSE;
+              attrs[i+1].is_word_start = FALSE;
+            }
+        }
+        prev_wc = wc;
+    }
+}
+
diff --git a/pango/break.c b/pango/break.c
index 917d6031..3a3e0b23 100644
--- a/pango/break.c
+++ b/pango/break.c
@@ -1664,6 +1664,7 @@ default_break (const char    *text,
 #include "break-arabic.c"
 #include "break-indic.c"
 #include "break-thai.c"
+#include "break-latin.c"
 
 static gboolean
 break_script (const char          *item_text,
@@ -1694,6 +1695,11 @@ break_script (const char          *item_text,
     case PANGO_SCRIPT_THAI:
       break_thai (item_text, item_length, analysis, attrs, attrs_len);
       break;
+
+    case PANGO_SCRIPT_LATIN:
+      break_latin (item_text, item_length, analysis, attrs, attrs_len);
+      break;
+
     default:
       return FALSE;
     }
diff --git a/tests/breaks/seventeen.break b/tests/breaks/seventeen.break
new file mode 100644
index 00000000..82a207fb
--- /dev/null
+++ b/tests/breaks/seventeen.break
@@ -0,0 +1,3 @@
+<span lang='ca-es'>És una prova d'instal·lació</span>
+<span lang='ca-fr'>És una prova d'instal·lació</span>
+<span lang='en-US'>És una prova d'instal·lació</span>
diff --git a/tests/breaks/seventeen.expected b/tests/breaks/seventeen.expected
new file mode 100644
index 00000000..8f5f2749
--- /dev/null
+++ b/tests/breaks/seventeen.expected
@@ -0,0 +1,7 @@
+Text:         ⁦É⁩ ⁦s⁩  [ ]  ⁦u⁩ ⁦n⁩ ⁦a⁩  [ ]  ⁦p⁩ ⁦r⁩ ⁦o⁩ ⁦v⁩ ⁦a⁩  [ ]  ⁦d⁩ ⁦'⁩ ⁦i⁩ ⁦n⁩ ⁦s⁩ ⁦t⁩ ⁦a⁩ ⁦l⁩ ⁦·⁩ ⁦l⁩ ⁦a⁩ ⁦c⁩ ⁦i⁩ ⁦ó⁩  [0x0a]  ⁦É⁩ ⁦s⁩  [ ]  ⁦u⁩ ⁦n⁩ ⁦a⁩  [ ]  ⁦p⁩ ⁦r⁩ ⁦o⁩ ⁦v⁩ ⁦a⁩  [ ]  ⁦d⁩ ⁦'⁩ ⁦i⁩ ⁦n⁩ ⁦s⁩ ⁦t⁩ ⁦a⁩ ⁦l⁩ ⁦·⁩ ⁦l⁩ ⁦a⁩ ⁦c⁩ ⁦i⁩ ⁦ó⁩  [0x0a]  ⁦É⁩ ⁦s⁩  [ ]  ⁦u⁩ ⁦n⁩ ⁦a⁩  [ ]  ⁦p⁩ ⁦r⁩ ⁦o⁩ ⁦v⁩ ⁦a⁩  [ ]  ⁦d⁩ ⁦'⁩ ⁦i⁩ ⁦n⁩ ⁦s⁩ ⁦t⁩ ⁦a⁩ ⁦l⁩ ⁦·⁩ ⁦l⁩ ⁦a⁩ ⁦c⁩ ⁦i⁩ ⁦ó⁩  [0x0a]  
+Breaks:     c  c c    lc c c c    lc c c c c c    lc c c c c c c c c c c c c c c       Lc c c    lc c c c    lc c c c c c    lc c c c c c c c c c c c c c c       Lc c c    lc c c c    lc c c c c c    lc c c c c c c c c c c c c c c       Lc
+Whitespace:      x           x               x                                 w            x           x               x                                 w            x           x               x                                 w       w 
+Sentences:  bs                                                                 e       bs                                                                 e       bs                                                                 e       b 
+Words:      bs   be   bs     be   bs         be   bs e s                       be      bs   be   bs     be   bs         be   bs e s                       be      bs   be   bs     be   bs         be   bs e s           e s         be      b 
+Graphemes:  b  b b    b  b b b    b  b b b b b    b  b b b b b b b b b b b b b b       b  b b    b  b b b    b  b b b b b    b  b b b b b b b b b b b b b b       b  b b    b  b b b    b  b b b b b    b  b b b b b b b b b b b b b b       b 
+Hyphens:       i         i i         i i i i           i i i i i i   i i i i i            i         i i         i i i i           i i i i i i   i i i i i            i         i i         i i i i           i i i i i i   i i i i i
author	Matthias Clasen <mclasen@redhat.com>	2021-10-27 23:29:13 +0000
committer	Matthias Clasen <mclasen@redhat.com>	2021-10-27 23:29:13 +0000
commit	64180fcba8d534c4aa831ce906ddb7cd137ec6d8 (patch)
tree	1457fbad71e6e9763fdef29d01f0774348b85c8c
parent	12810d50a10ee378c253c3f8c2efe1516cd60ebc (diff)
parent	89f69b369b9b551a75cbabe0af5eb3931afae272 (diff)
download	pango-64180fcba8d534c4aa831ce906ddb7cd137ec6d8.tar.gz