diff options
author | Khaled Hosny <khaled@aliftype.com> | 2023-04-02 15:47:33 +0200 |
---|---|---|
committer | Khaled Hosny <khaled@aliftype.com> | 2023-04-02 15:47:33 +0200 |
commit | d26a0dcf3071ee428b3b4c436a0d156d3efcfe0d (patch) | |
tree | 9d487fa248d87f4ff6e637e8b6513f06fa2e4361 | |
parent | 818b549bafbaef9323ceedffb3930113ae008af7 (diff) | |
download | pango-script-itemize.tar.gz |
itemize: Improve script itemizationscript-itemize
Merge marks, and other cluster extenders with the previous script. Logic
copied from:
https://searchfox.org/mozilla-central/rev/dbc0cd5615f9cd7337d3e05b7c3925af5f35c0ee/gfx/thebes/gfxScriptItemizer.cpp#113-122
Handling of script extensions is still missing as GLib does not seem to
have an API to retrieve it.
Fixes https://gitlab.gnome.org/GNOME/pango/-/issues/739
-rw-r--r-- | pango/pango-script.c | 23 | ||||
-rw-r--r-- | tests/itemize/three.expected | 9 | ||||
-rw-r--r-- | tests/itemize/three.items | 1 |
3 files changed, 30 insertions, 3 deletions
diff --git a/pango/pango-script.c b/pango/pango-script.c index 71c519fd..fa9ab21d 100644 --- a/pango/pango-script.c +++ b/pango/pango-script.c @@ -286,12 +286,29 @@ get_pair_index (gunichar ch) return -1; } +static gboolean +is_cluster_extender (gunichar ch) +{ + GUnicodeType type = g_unichar_type (ch); + return (type >= G_UNICODE_SPACING_MARK && type <= G_UNICODE_NON_SPACING_MARK) || + (ch >= 0x200C && ch <= 0x200D) || /* ZWJ, ZWNJ */ + (ch >= 0xFF9E && ch <= 0xFF9F) || /* katakana sound marks */ + (ch >= 0x1F3FB && ch <= 0x1F3FF) || /* fitzpatrick skin tone modifiers */ + (ch >= 0xE0020 && ch <= 0xE007F); /* emoji (flag) tag characters */ +} + /* duplicated in pango-language.c */ #define REAL_SCRIPT(script) \ ((script) > PANGO_SCRIPT_INHERITED && (script) != PANGO_SCRIPT_UNKNOWN) -#define SAME_SCRIPT(script1, script2) \ - (!REAL_SCRIPT (script1) || !REAL_SCRIPT (script2) || (script1) == (script2)) +#define IS_CLUSTER_EXTENDER(ch) \ + g_unichar_type (ch) + +/* TODO: Use Unicode ScriptExtensions */ +#define SAME_SCRIPT(script1, script2, ch) \ + (!REAL_SCRIPT (script1) || !REAL_SCRIPT (script2) || \ + (script1) == (script2) || \ + is_cluster_extender (ch)) #define IS_OPEN(pair_index) (((pair_index) & 1) == 0) @@ -372,7 +389,7 @@ pango_script_iter_next (PangoScriptIter *iter) } } - if (SAME_SCRIPT (iter->script_code, sc)) + if (SAME_SCRIPT (iter->script_code, sc, ch)) { if (!REAL_SCRIPT (iter->script_code) && REAL_SCRIPT (sc)) { diff --git a/tests/itemize/three.expected b/tests/itemize/three.expected new file mode 100644 index 00000000..eb45fbd3 --- /dev/null +++ b/tests/itemize/three.expected @@ -0,0 +1,9 @@ +<span font="Cantarell 11" fallback="false">عַرַبִ</span> + +Items: عַرַبִ +Chars: 6(0) +Font: Cantarell 11 +Script: arabic +Lang: ar +Bidi: 1 +Attrs: 0 12 fallback false diff --git a/tests/itemize/three.items b/tests/itemize/three.items new file mode 100644 index 00000000..394767e5 --- /dev/null +++ b/tests/itemize/three.items @@ -0,0 +1 @@ +<span font="Cantarell 11" fallback="false">عַرַبִ</span> |