summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKhaled Hosny <khaled@aliftype.com>2023-04-02 15:47:33 +0200
committerKhaled Hosny <khaled@aliftype.com>2023-04-02 15:47:33 +0200
commitd26a0dcf3071ee428b3b4c436a0d156d3efcfe0d (patch)
tree9d487fa248d87f4ff6e637e8b6513f06fa2e4361
parent818b549bafbaef9323ceedffb3930113ae008af7 (diff)
downloadpango-script-itemize.tar.gz
itemize: Improve script itemizationscript-itemize
Merge marks, and other cluster extenders with the previous script. Logic copied from: https://searchfox.org/mozilla-central/rev/dbc0cd5615f9cd7337d3e05b7c3925af5f35c0ee/gfx/thebes/gfxScriptItemizer.cpp#113-122 Handling of script extensions is still missing as GLib does not seem to have an API to retrieve it. Fixes https://gitlab.gnome.org/GNOME/pango/-/issues/739
-rw-r--r--pango/pango-script.c23
-rw-r--r--tests/itemize/three.expected9
-rw-r--r--tests/itemize/three.items1
3 files changed, 30 insertions, 3 deletions
diff --git a/pango/pango-script.c b/pango/pango-script.c
index 71c519fd..fa9ab21d 100644
--- a/pango/pango-script.c
+++ b/pango/pango-script.c
@@ -286,12 +286,29 @@ get_pair_index (gunichar ch)
return -1;
}
+static gboolean
+is_cluster_extender (gunichar ch)
+{
+ GUnicodeType type = g_unichar_type (ch);
+ return (type >= G_UNICODE_SPACING_MARK && type <= G_UNICODE_NON_SPACING_MARK) ||
+ (ch >= 0x200C && ch <= 0x200D) || /* ZWJ, ZWNJ */
+ (ch >= 0xFF9E && ch <= 0xFF9F) || /* katakana sound marks */
+ (ch >= 0x1F3FB && ch <= 0x1F3FF) || /* fitzpatrick skin tone modifiers */
+ (ch >= 0xE0020 && ch <= 0xE007F); /* emoji (flag) tag characters */
+}
+
/* duplicated in pango-language.c */
#define REAL_SCRIPT(script) \
((script) > PANGO_SCRIPT_INHERITED && (script) != PANGO_SCRIPT_UNKNOWN)
-#define SAME_SCRIPT(script1, script2) \
- (!REAL_SCRIPT (script1) || !REAL_SCRIPT (script2) || (script1) == (script2))
+#define IS_CLUSTER_EXTENDER(ch) \
+ g_unichar_type (ch)
+
+/* TODO: Use Unicode ScriptExtensions */
+#define SAME_SCRIPT(script1, script2, ch) \
+ (!REAL_SCRIPT (script1) || !REAL_SCRIPT (script2) || \
+ (script1) == (script2) || \
+ is_cluster_extender (ch))
#define IS_OPEN(pair_index) (((pair_index) & 1) == 0)
@@ -372,7 +389,7 @@ pango_script_iter_next (PangoScriptIter *iter)
}
}
- if (SAME_SCRIPT (iter->script_code, sc))
+ if (SAME_SCRIPT (iter->script_code, sc, ch))
{
if (!REAL_SCRIPT (iter->script_code) && REAL_SCRIPT (sc))
{
diff --git a/tests/itemize/three.expected b/tests/itemize/three.expected
new file mode 100644
index 00000000..eb45fbd3
--- /dev/null
+++ b/tests/itemize/three.expected
@@ -0,0 +1,9 @@
+<span font="Cantarell 11" fallback="false">عַرַبִ</span>
+
+Items: عַرַبִ
+Chars: 6(0)
+Font: Cantarell 11
+Script: arabic
+Lang: ar
+Bidi: 1
+Attrs: 0 12 fallback false
diff --git a/tests/itemize/three.items b/tests/itemize/three.items
new file mode 100644
index 00000000..394767e5
--- /dev/null
+++ b/tests/itemize/three.items
@@ -0,0 +1 @@
+<span font="Cantarell 11" fallback="false">عַرַبִ</span>