From d26a0dcf3071ee428b3b4c436a0d156d3efcfe0d Mon Sep 17 00:00:00 2001 From: Khaled Hosny Date: Sun, 2 Apr 2023 15:47:33 +0200 Subject: itemize: Improve script itemization Merge marks, and other cluster extenders with the previous script. Logic copied from: https://searchfox.org/mozilla-central/rev/dbc0cd5615f9cd7337d3e05b7c3925af5f35c0ee/gfx/thebes/gfxScriptItemizer.cpp#113-122 Handling of script extensions is still missing as GLib does not seem to have an API to retrieve it. Fixes https://gitlab.gnome.org/GNOME/pango/-/issues/739 --- pango/pango-script.c | 23 ++++++++++++++++++++--- tests/itemize/three.expected | 9 +++++++++ tests/itemize/three.items | 1 + 3 files changed, 30 insertions(+), 3 deletions(-) create mode 100644 tests/itemize/three.expected create mode 100644 tests/itemize/three.items diff --git a/pango/pango-script.c b/pango/pango-script.c index 71c519fd..fa9ab21d 100644 --- a/pango/pango-script.c +++ b/pango/pango-script.c @@ -286,12 +286,29 @@ get_pair_index (gunichar ch) return -1; } +static gboolean +is_cluster_extender (gunichar ch) +{ + GUnicodeType type = g_unichar_type (ch); + return (type >= G_UNICODE_SPACING_MARK && type <= G_UNICODE_NON_SPACING_MARK) || + (ch >= 0x200C && ch <= 0x200D) || /* ZWJ, ZWNJ */ + (ch >= 0xFF9E && ch <= 0xFF9F) || /* katakana sound marks */ + (ch >= 0x1F3FB && ch <= 0x1F3FF) || /* fitzpatrick skin tone modifiers */ + (ch >= 0xE0020 && ch <= 0xE007F); /* emoji (flag) tag characters */ +} + /* duplicated in pango-language.c */ #define REAL_SCRIPT(script) \ ((script) > PANGO_SCRIPT_INHERITED && (script) != PANGO_SCRIPT_UNKNOWN) -#define SAME_SCRIPT(script1, script2) \ - (!REAL_SCRIPT (script1) || !REAL_SCRIPT (script2) || (script1) == (script2)) +#define IS_CLUSTER_EXTENDER(ch) \ + g_unichar_type (ch) + +/* TODO: Use Unicode ScriptExtensions */ +#define SAME_SCRIPT(script1, script2, ch) \ + (!REAL_SCRIPT (script1) || !REAL_SCRIPT (script2) || \ + (script1) == (script2) || \ + is_cluster_extender (ch)) #define IS_OPEN(pair_index) (((pair_index) & 1) == 0) @@ -372,7 +389,7 @@ pango_script_iter_next (PangoScriptIter *iter) } } - if (SAME_SCRIPT (iter->script_code, sc)) + if (SAME_SCRIPT (iter->script_code, sc, ch)) { if (!REAL_SCRIPT (iter->script_code) && REAL_SCRIPT (sc)) { diff --git a/tests/itemize/three.expected b/tests/itemize/three.expected new file mode 100644 index 00000000..eb45fbd3 --- /dev/null +++ b/tests/itemize/three.expected @@ -0,0 +1,9 @@ +عַرַبִ + +Items: عַرַبִ +Chars: 6(0) +Font: Cantarell 11 +Script: arabic +Lang: ar +Bidi: 1 +Attrs: 0 12 fallback false diff --git a/tests/itemize/three.items b/tests/itemize/three.items new file mode 100644 index 00000000..394767e5 --- /dev/null +++ b/tests/itemize/three.items @@ -0,0 +1 @@ +عַرַبִ -- cgit v1.2.1