/* Pango2 * itemize.c: Turning text into items * * Copyright (C) 2000, 2006 Red Hat Software * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ #include "config.h" #include #include #include "pango-context-private.h" #include "pango-impl-utils.h" #include "pango-font-private.h" #include "pango-fontset.h" #include "pango-fontmap-private.h" #include "pango-script-private.h" #include "pango-emoji-private.h" #include "pango-attr-iterator-private.h" #include "pango-attr-private.h" #include "pango-item-private.h" #include "pango-bidi-private.h" #include /* {{{ Font cache */ /* * We cache the results of character,fontset => font in a hash table */ typedef struct { GHashTable *hash; } FontCache; typedef struct { Pango2Font *font; int position; /* position of the font in the fontset */ } FontElement; static void font_cache_destroy (FontCache *cache) { g_hash_table_destroy (cache->hash); g_slice_free (FontCache, cache); } static void font_element_destroy (FontElement *element) { if (element->font) g_object_unref (element->font); g_slice_free (FontElement, element); } static FontCache * get_font_cache (Pango2Fontset *fontset) { FontCache *cache; static GQuark cache_quark = 0; /* MT-safe */ if (G_UNLIKELY (!cache_quark)) cache_quark = g_quark_from_static_string ("pango-font-cache"); retry: cache = g_object_get_qdata (G_OBJECT (fontset), cache_quark); if (G_UNLIKELY (!cache)) { cache = g_slice_new (FontCache); cache->hash = g_hash_table_new_full (g_direct_hash, NULL, NULL, (GDestroyNotify)font_element_destroy); if (!g_object_replace_qdata (G_OBJECT (fontset), cache_quark, NULL, cache, (GDestroyNotify)font_cache_destroy, NULL)) { font_cache_destroy (cache); goto retry; } } return cache; } static gboolean font_cache_get (FontCache *cache, gunichar wc, Pango2Font **font, int *position) { FontElement *element; element = g_hash_table_lookup (cache->hash, GUINT_TO_POINTER (wc)); if (element) { *font = element->font; *position = element->position; return TRUE; } else return FALSE; } static void font_cache_insert (FontCache *cache, gunichar wc, Pango2Font *font, int position) { FontElement *element = g_slice_new (FontElement); element->font = font ? g_object_ref (font) : NULL; element->position = position; g_hash_table_insert (cache->hash, GUINT_TO_POINTER (wc), element); } /* }}} */ /* {{{ Width Iter */ typedef struct _Pango2WidthIter Pango2WidthIter; struct _Pango2WidthIter { const char *text_start; const char *text_end; const char *start; const char *end; gboolean upright; }; static gboolean width_iter_is_upright (gunichar ch) { /* https://www.unicode.org/Public/11.0.0/ucd/VerticalOrientation.txt * VO=U or Tu table generated by tools/gen-vertical-orientation-U-table.py. * * FIXME: In the future, If GLib supports VerticalOrientation, please use it. */ static const gunichar upright[][2] = { {0x00A7, 0x00A7}, {0x00A9, 0x00A9}, {0x00AE, 0x00AE}, {0x00B1, 0x00B1}, {0x00BC, 0x00BE}, {0x00D7, 0x00D7}, {0x00F7, 0x00F7}, {0x02EA, 0x02EB}, {0x1100, 0x11FF}, {0x1401, 0x167F}, {0x18B0, 0x18FF}, {0x2016, 0x2016}, {0x2020, 0x2021}, {0x2030, 0x2031}, {0x203B, 0x203C}, {0x2042, 0x2042}, {0x2047, 0x2049}, {0x2051, 0x2051}, {0x2065, 0x2065}, {0x20DD, 0x20E0}, {0x20E2, 0x20E4}, {0x2100, 0x2101}, {0x2103, 0x2109}, {0x210F, 0x210F}, {0x2113, 0x2114}, {0x2116, 0x2117}, {0x211E, 0x2123}, {0x2125, 0x2125}, {0x2127, 0x2127}, {0x2129, 0x2129}, {0x212E, 0x212E}, {0x2135, 0x213F}, {0x2145, 0x214A}, {0x214C, 0x214D}, {0x214F, 0x2189}, {0x218C, 0x218F}, {0x221E, 0x221E}, {0x2234, 0x2235}, {0x2300, 0x2307}, {0x230C, 0x231F}, {0x2324, 0x2328}, {0x232B, 0x232B}, {0x237D, 0x239A}, {0x23BE, 0x23CD}, {0x23CF, 0x23CF}, {0x23D1, 0x23DB}, {0x23E2, 0x2422}, {0x2424, 0x24FF}, {0x25A0, 0x2619}, {0x2620, 0x2767}, {0x2776, 0x2793}, {0x2B12, 0x2B2F}, {0x2B50, 0x2B59}, {0x2BB8, 0x2BD1}, {0x2BD3, 0x2BEB}, {0x2BF0, 0x2BFF}, {0x2E80, 0x3007}, {0x3012, 0x3013}, {0x3020, 0x302F}, {0x3031, 0x309F}, {0x30A1, 0x30FB}, {0x30FD, 0xA4CF}, {0xA960, 0xA97F}, {0xAC00, 0xD7FF}, {0xE000, 0xFAFF}, {0xFE10, 0xFE1F}, {0xFE30, 0xFE48}, {0xFE50, 0xFE57}, {0xFE5F, 0xFE62}, {0xFE67, 0xFE6F}, {0xFF01, 0xFF07}, {0xFF0A, 0xFF0C}, {0xFF0E, 0xFF19}, {0xFF1F, 0xFF3A}, {0xFF3C, 0xFF3C}, {0xFF3E, 0xFF3E}, {0xFF40, 0xFF5A}, {0xFFE0, 0xFFE2}, {0xFFE4, 0xFFE7}, {0xFFF0, 0xFFF8}, {0xFFFC, 0xFFFD}, {0x10980, 0x1099F}, {0x11580, 0x115FF}, {0x11A00, 0x11AAF}, {0x13000, 0x1342F}, {0x14400, 0x1467F}, {0x16FE0, 0x18AFF}, {0x1B000, 0x1B12F}, {0x1B170, 0x1B2FF}, {0x1D000, 0x1D1FF}, {0x1D2E0, 0x1D37F}, {0x1D800, 0x1DAAF}, {0x1F000, 0x1F7FF}, {0x1F900, 0x1FA6F}, {0x20000, 0x2FFFD}, {0x30000, 0x3FFFD}, {0xF0000, 0xFFFFD}, {0x100000, 0x10FFFD} }; static const int max = sizeof(upright) / sizeof(upright[0]); int st = 0; int ed = max; if (ch < upright[0][0]) return FALSE; while (st <= ed) { int mid = (st + ed) / 2; if (upright[mid][0] <= ch && ch <= upright[mid][1]) return TRUE; else if (upright[mid][0] <= ch) st = mid + 1; else ed = mid - 1; } return FALSE; } static void width_iter_next (Pango2WidthIter *iter) { gboolean met_joiner = FALSE; iter->start = iter->end; if (iter->end < iter->text_end) { gunichar ch = g_utf8_get_char (iter->end); iter->upright = width_iter_is_upright (ch); } while (iter->end < iter->text_end) { gunichar ch = g_utf8_get_char (iter->end); /* for zero width joiner */ if (ch == 0x200D) { iter->end = g_utf8_next_char (iter->end); met_joiner = TRUE; continue; } /* ignore the upright check if met joiner */ if (met_joiner) { iter->end = g_utf8_next_char (iter->end); met_joiner = FALSE; continue; } /* for variation selector, tag and emoji modifier. */ if (G_UNLIKELY (ch == 0xFE0EU || ch == 0xFE0FU || (ch >= 0xE0020 && ch <= 0xE007F) || (ch >= 0x1F3FB && ch <= 0x1F3FF))) { iter->end = g_utf8_next_char (iter->end); continue; } if (width_iter_is_upright (ch) != iter->upright) break; iter->end = g_utf8_next_char (iter->end); } } static void width_iter_init (Pango2WidthIter *iter, const char *text, int length) { iter->text_start = text; iter->text_end = text + length; iter->start = iter->end = text; width_iter_next (iter); } static void width_iter_fini (Pango2WidthIter *iter) { } /* }}} */ /* {{{ Itemization */ typedef struct _ItemizeState ItemizeState; typedef enum { EMBEDDING_CHANGED = 1 << 0, SCRIPT_CHANGED = 1 << 1, LANG_CHANGED = 1 << 2, FONT_CHANGED = 1 << 3, DERIVED_LANG_CHANGED = 1 << 4, WIDTH_CHANGED = 1 << 5, EMOJI_CHANGED = 1 << 6, } ChangedFlags; struct _ItemizeState { Pango2Context *context; const char *text; const char *end; const char *run_start; const char *run_end; GList *result; Pango2Item *item; guint8 *embedding_levels; int embedding_end_offset; const char *embedding_end; guint8 embedding; Pango2Gravity gravity; Pango2GravityHint gravity_hint; Pango2Gravity resolved_gravity; Pango2Gravity font_desc_gravity; gboolean centered_baseline; Pango2AttrIterator *attr_iter; gboolean free_attr_iter; const char *attr_end; Pango2FontDescription *font_desc; Pango2FontDescription *emoji_font_desc; Pango2Language *lang; GSList *extra_attrs; gboolean copy_extra_attrs; ChangedFlags changed; Pango2ScriptIter script_iter; const char *script_end; GUnicodeScript script; Pango2WidthIter width_iter; Pango2EmojiIter emoji_iter; Pango2EmojiPresentation preferred; Pango2Language *derived_lang; Pango2Fontset *current_fonts; FontCache *cache; Pango2Font *base_font; gboolean enable_fallback; const char *first_space; /* first of a sequence of spaces we've seen */ int font_position; /* position of the current font in the fontset */ }; static void update_embedding_end (ItemizeState *state) { state->embedding = state->embedding_levels[state->embedding_end_offset]; while (state->embedding_end < state->end && state->embedding_levels[state->embedding_end_offset] == state->embedding) { state->embedding_end_offset++; state->embedding_end = g_utf8_next_char (state->embedding_end); } state->changed |= EMBEDDING_CHANGED; } static Pango2Attribute * find_attribute (GSList *attr_list, Pango2AttrType type) { GSList *node; for (node = attr_list; node; node = node->next) if (((Pango2Attribute *) node->data)->type == type) return (Pango2Attribute *) node->data; return NULL; } static void update_attr_iterator (ItemizeState *state) { Pango2Language *old_lang; Pango2Attribute *attr; int end_index; Pango2EmojiPresentation old_preferred; pango2_attr_iterator_range (state->attr_iter, NULL, &end_index); if (end_index < state->end - state->text) state->attr_end = state->text + end_index; else state->attr_end = state->end; if (state->emoji_font_desc) { pango2_font_description_free (state->emoji_font_desc); state->emoji_font_desc = NULL; } old_lang = state->lang; if (state->font_desc) pango2_font_description_free (state->font_desc); state->font_desc = pango2_font_description_copy_static (state->context->font_desc); pango2_attr_iterator_get_font (state->attr_iter, state->font_desc, &state->lang, &state->extra_attrs); if (pango2_font_description_get_set_fields (state->font_desc) & PANGO2_FONT_MASK_GRAVITY) state->font_desc_gravity = pango2_font_description_get_gravity (state->font_desc); else state->font_desc_gravity = PANGO2_GRAVITY_AUTO; state->copy_extra_attrs = FALSE; if (!state->lang) state->lang = state->context->language; attr = find_attribute (state->extra_attrs, PANGO2_ATTR_FALLBACK); state->enable_fallback = (attr == NULL || attr->int_value); attr = find_attribute (state->extra_attrs, PANGO2_ATTR_GRAVITY); state->gravity = attr == NULL ? PANGO2_GRAVITY_AUTO : attr->int_value; attr = find_attribute (state->extra_attrs, PANGO2_ATTR_GRAVITY_HINT); state->gravity_hint = attr == NULL ? state->context->gravity_hint : (Pango2GravityHint)attr->int_value; old_preferred = state->preferred; attr = find_attribute (state->extra_attrs, PANGO2_ATTR_EMOJI_PRESENTATION); state->preferred = attr ? attr->int_value : state->context->presentation; state->changed |= FONT_CHANGED; if (state->lang != old_lang) state->changed |= LANG_CHANGED; if (state->preferred != old_preferred) state->changed |= EMOJI_CHANGED; } static void update_end (ItemizeState *state) { state->run_end = state->embedding_end; if (state->attr_end < state->run_end) state->run_end = state->attr_end; if (state->script_end < state->run_end) state->run_end = state->script_end; if (state->width_iter.end < state->run_end) state->run_end = state->width_iter.end; if (state->emoji_iter.end < state->run_end) state->run_end = state->emoji_iter.end; } static void itemize_state_init (ItemizeState *state, Pango2Context *context, const char *text, Pango2Direction base_dir, int start_index, int length, Pango2AttrList *attrs, Pango2AttrIterator *cached_iter, const Pango2FontDescription *desc) { state->context = context; state->text = text; state->end = text + start_index + length; state->result = NULL; state->item = NULL; state->run_start = text + start_index; state->changed = EMBEDDING_CHANGED | SCRIPT_CHANGED | LANG_CHANGED | FONT_CHANGED | WIDTH_CHANGED | EMOJI_CHANGED; /* First, apply the bidirectional algorithm to break * the text into directional runs. */ state->embedding_levels = pango2_log2vis_get_embedding_levels (text + start_index, length, &base_dir); state->embedding_end_offset = 0; state->embedding_end = text + start_index; update_embedding_end (state); state->gravity = PANGO2_GRAVITY_AUTO; state->centered_baseline = PANGO2_GRAVITY_IS_VERTICAL (state->context->resolved_gravity); state->gravity_hint = state->context->gravity_hint; state->resolved_gravity = PANGO2_GRAVITY_AUTO; state->preferred = context->presentation; /* Initialize the attribute iterator */ if (cached_iter) { state->attr_iter = cached_iter; state->free_attr_iter = FALSE; } else if (attrs) { state->attr_iter = pango2_attr_list_get_iterator (attrs); state->free_attr_iter = TRUE; } else { state->attr_iter = NULL; state->free_attr_iter = FALSE; } state->emoji_font_desc = NULL; if (state->attr_iter) { state->font_desc = NULL; state->lang = NULL; pango2_attr_iterator_advance (state->attr_iter, start_index); update_attr_iterator (state); } else { state->font_desc = pango2_font_description_copy_static (desc ? desc : state->context->font_desc); state->lang = state->context->language; state->extra_attrs = NULL; state->copy_extra_attrs = FALSE; state->attr_end = state->end; state->enable_fallback = TRUE; } /* Initialize the script iterator */ _pango2_script_iter_init (&state->script_iter, text + start_index, length); pango2_script_iter_get_range (&state->script_iter, NULL, &state->script_end, &state->script); width_iter_init (&state->width_iter, text + start_index, length); pango2_emoji_iter_init (&state->emoji_iter, text + start_index, length); if (!PANGO2_GRAVITY_IS_VERTICAL (state->context->resolved_gravity)) state->width_iter.end = state->end; else if (pango2_emoji_iter_get (&state->emoji_iter, state->preferred) == EMOJI_PRESENTATION_EMOJI) state->width_iter.end = MAX (state->width_iter.end, state->emoji_iter.end); update_end (state); if (pango2_font_description_get_set_fields (state->font_desc) & PANGO2_FONT_MASK_GRAVITY) state->font_desc_gravity = pango2_font_description_get_gravity (state->font_desc); else state->font_desc_gravity = PANGO2_GRAVITY_AUTO; state->derived_lang = NULL; state->current_fonts = NULL; state->cache = NULL; state->base_font = NULL; state->first_space = NULL; state->font_position = 0xffff; } static gboolean itemize_state_next (ItemizeState *state) { if (state->run_end == state->end) return FALSE; state->changed = 0; state->run_start = state->run_end; if (state->run_end == state->embedding_end) { update_embedding_end (state); } if (state->run_end == state->attr_end) { pango2_attr_iterator_next (state->attr_iter); update_attr_iterator (state); } if (state->run_end == state->script_end) { pango2_script_iter_next (&state->script_iter); pango2_script_iter_get_range (&state->script_iter, NULL, &state->script_end, &state->script); state->changed |= SCRIPT_CHANGED; } if (state->run_end == state->emoji_iter.end) { pango2_emoji_iter_next (&state->emoji_iter); state->changed |= EMOJI_CHANGED; if (pango2_emoji_iter_get (&state->emoji_iter, state->preferred) == EMOJI_PRESENTATION_EMOJI) state->width_iter.end = MAX (state->width_iter.end, state->emoji_iter.end); } if (state->run_end == state->width_iter.end) { width_iter_next (&state->width_iter); state->changed |= WIDTH_CHANGED; } update_end (state); return TRUE; } static GSList * copy_attr_slist (GSList *attr_slist) { GSList *new_list = NULL; GSList *l; for (l = attr_slist; l; l = l->next) new_list = g_slist_prepend (new_list, pango2_attribute_copy (l->data)); return g_slist_reverse (new_list); } static void itemize_state_fill_font (ItemizeState *state, Pango2Font *font) { GList *l; for (l = state->result; l; l = l->next) { Pango2Item *item = l->data; if (item->analysis.font) break; if (font) item->analysis.font = g_object_ref (font); } } static void itemize_state_add_character (ItemizeState *state, Pango2Font *font, int font_position, gboolean force_break, const char *pos, gboolean is_space) { const char *first_space = state->first_space; int n_spaces = 0; if (is_space) { if (state->first_space == NULL) state->first_space = pos; } else state->first_space = NULL; if (state->item) { if (!state->item->analysis.font && font) { itemize_state_fill_font (state, font); state->font_position = font_position; } else if (state->item->analysis.font && !font) { font = state->item->analysis.font; font_position = state->font_position; } if (!force_break && state->item->analysis.font == font) { state->item->num_chars++; return; } /* Font is changing, we are about to end the current item. * If it ended in a sequence of spaces (but wasn't only spaces), * check if we should move those spaces to the new item (since * the font is less "fallback". * * See https://gitlab.gnome.org/GNOME/pango/-/issues/249 */ if (state->text + state->item->offset < first_space && font_position < state->font_position) { n_spaces = g_utf8_strlen (first_space, pos - first_space); state->item->num_chars -= n_spaces; pos = first_space; } state->item->length = (pos - state->text) - state->item->offset; } state->item = pango2_item_new (); state->item->offset = pos - state->text; state->item->length = 0; state->item->num_chars = n_spaces + 1; if (font) g_object_ref (font); state->item->analysis.font = font; state->font_position = font_position; state->item->analysis.level = state->embedding; state->item->analysis.gravity = state->resolved_gravity; /* The level vs. gravity dance: * - If gravity is SOUTH, leave level untouched. * - If gravity is NORTH, step level one up, to * not get mirrored upside-down text. * - If gravity is EAST, step up to an even level, as * it's a clockwise-rotated layout, so the rotated * top is unrotated left. * - If gravity is WEST, step up to an odd level, as * it's a counter-clockwise-rotated layout, so the rotated * top is unrotated right. * * A similar dance is performed in pango-layout.c: * line_set_resolved_dir(). Keep in synch. */ switch (state->item->analysis.gravity) { case PANGO2_GRAVITY_SOUTH: default: break; case PANGO2_GRAVITY_NORTH: state->item->analysis.level++; break; case PANGO2_GRAVITY_EAST: state->item->analysis.level += 1; state->item->analysis.level &= ~1; break; case PANGO2_GRAVITY_WEST: state->item->analysis.level |= 1; break; } state->item->analysis.flags |= state->centered_baseline ? PANGO2_ANALYSIS_FLAG_CENTERED_BASELINE : 0; state->item->analysis.script = state->script; state->item->analysis.language = state->derived_lang; if (state->copy_extra_attrs) { state->item->analysis.extra_attrs = copy_attr_slist (state->extra_attrs); } else { state->item->analysis.extra_attrs = state->extra_attrs; state->copy_extra_attrs = TRUE; } state->result = g_list_prepend (state->result, state->item); } typedef struct { Pango2Font *font; int position; } GetFontInfo; static gboolean get_font_foreach (Pango2Fontset *fontset, Pango2Font *font, gpointer data) { GetFontInfo *info = data; if (font == info->font) return TRUE; info->position++; return FALSE; } static Pango2Font * get_base_font (ItemizeState *state) { if (!state->base_font) state->base_font = pango2_font_map_load_font (state->context->font_map, state->context, state->font_desc); return state->base_font; } static gboolean get_font (ItemizeState *state, gunichar wc, Pango2Font **font, int *position) { GetFontInfo info; /* We'd need a separate cache when fallback is disabled, but since lookup * with fallback disabled is faster anyways, we just skip caching */ if (state->enable_fallback && font_cache_get (state->cache, wc, font, position)) return TRUE; info.font = NULL; info.position = 0; if (state->enable_fallback) { info.font = pango2_fontset_get_font (state->current_fonts, wc); if (info.font) g_object_unref (info.font); pango2_fontset_foreach (state->current_fonts, get_font_foreach, &info); } if (!info.font) info.font = get_base_font (state); *font = info.font; *position = info.position; /* skip caching if fallback disabled (see above) */ if (state->enable_fallback) font_cache_insert (state->cache, wc, *font, *position); return TRUE; } static Pango2Language * compute_derived_language (Pango2Language *lang, GUnicodeScript script) { Pango2Language *derived_lang; /* Make sure the language tag is consistent with the derived * script. There is no point in marking up a section of * Arabic text with the "en" language tag. */ if (lang && pango2_language_includes_script (lang, script)) derived_lang = lang; else { derived_lang = pango2_script_get_sample_language (script); /* If we don't find a sample language for the script, we * use a language tag that shouldn't actually be used * anywhere. This keeps fontconfig (for the Pango2Fc* * backend) from using the language tag to affect the * sort order. I don't have a reference for 'xx' being * safe here, though Keith Packard claims it is. */ if (!derived_lang) derived_lang = pango2_language_from_string ("xx"); } return derived_lang; } static void itemize_state_update_for_new_run (ItemizeState *state) { /* This block should be moved to update_attr_iterator, but I'm too lazy to * do it right now */ if (state->changed & (FONT_CHANGED | SCRIPT_CHANGED | WIDTH_CHANGED)) { /* Font-desc gravity overrides everything */ if (state->font_desc_gravity != PANGO2_GRAVITY_AUTO) { state->resolved_gravity = state->font_desc_gravity; } else { Pango2Gravity gravity = state->gravity; Pango2GravityHint gravity_hint = state->gravity_hint; if (G_LIKELY (gravity == PANGO2_GRAVITY_AUTO)) gravity = state->context->resolved_gravity; state->resolved_gravity = pango2_gravity_get_for_script_and_width (state->script, state->width_iter.upright, gravity, gravity_hint); } if (state->font_desc_gravity != state->resolved_gravity) { pango2_font_description_set_gravity (state->font_desc, state->resolved_gravity); state->changed |= FONT_CHANGED; } } if (state->changed & (SCRIPT_CHANGED | LANG_CHANGED)) { Pango2Language *old_derived_lang = state->derived_lang; state->derived_lang = compute_derived_language (state->lang, state->script); if (old_derived_lang != state->derived_lang) state->changed |= DERIVED_LANG_CHANGED; } if (state->changed & (EMOJI_CHANGED)) { state->changed |= FONT_CHANGED; } if (state->changed & (FONT_CHANGED | DERIVED_LANG_CHANGED) && state->current_fonts) { g_object_unref (state->current_fonts); state->current_fonts = NULL; state->cache = NULL; } if (!state->current_fonts) { Pango2FontDescription *font_desc = state->font_desc; Pango2Language *lang = state->derived_lang; EmojiPresentation presentation = pango2_emoji_iter_get (&state->emoji_iter, state->preferred); if (presentation == EMOJI_PRESENTATION_EMOJI) { if (!state->emoji_font_desc) { state->emoji_font_desc = pango2_font_description_copy_static (state->font_desc); pango2_font_description_set_family_static (state->emoji_font_desc, "emoji"); } font_desc = state->emoji_font_desc; lang = pango2_language_from_string ("und-zsye"); } else if (presentation == EMOJI_PRESENTATION_TEXT) { font_desc = state->font_desc; lang = pango2_language_from_string ("und-zsye"); } state->current_fonts = pango2_font_map_load_fontset (state->context->font_map, state->context, font_desc, lang); state->cache = get_font_cache (state->current_fonts); } if ((state->changed & FONT_CHANGED) && state->base_font) { g_object_unref (state->base_font); state->base_font = NULL; } } /* We don't want space characters to affect font selection; in general, * it's always wrong to select a font just to render a space. * * We assume that all fonts have the ASCII space, and for other space * characters if they don't, HarfBuzz will compatibility-decompose them * to ASCII space... * See bugs #355987 and #701652. * * We don't want to change fonts just for variation selectors. * See bug #781123. * * We don't want to change fonts for default ignorables such as Cf chars. * Note that Cf chars in the Arabic block are visible and need to have * a font, so we exclude. * * Finally, don't change fonts for line or paragraph separators. * * Note that we want spaces to use the 'better' font, comparing * the font that is used before and after the space. This is handled * in itemize_state_add_character(). */ static gboolean consider_as_space (gunichar wc) { GUnicodeType type = g_unichar_type (wc); return type == G_UNICODE_CONTROL || (type == G_UNICODE_FORMAT && !((wc >= 0x600 && wc <= 0x06ff) || wc == 0x70f || wc == 0x8e2)) || type == G_UNICODE_SURROGATE || type == G_UNICODE_LINE_SEPARATOR || type == G_UNICODE_PARAGRAPH_SEPARATOR || (type == G_UNICODE_SPACE_SEPARATOR && wc != 0x1680u /* OGHAM SPACE MARK */) || (wc >= 0xfe00u && wc <= 0xfe0fu) || (wc >= 0xe0100u && wc <= 0xe01efu); } static void itemize_state_process_run (ItemizeState *state) { const char *p; gboolean last_was_forced_break = FALSE; gboolean is_space; gunichar prev_wc = 0; /* Only one character has type G_UNICODE_LINE_SEPARATOR in Unicode 4.0; * update this if that changes. */ #define LINE_SEPARATOR 0x2028 itemize_state_update_for_new_run (state); /* We should never get an empty run */ g_assert (state->run_end != state->run_start); for (p = state->run_start; p < state->run_end; p = g_utf8_next_char (p)) { gunichar wc = g_utf8_get_char (p); gboolean is_forced_break = wc == '\t' || wc == '\r' || wc == '\n' || wc == 0x2028 || wc == 0x2029; Pango2Font *font; int font_position; if (consider_as_space (wc)) { font = NULL; font_position = 0xffff; is_space = TRUE; } else { get_font (state, wc, &font, &font_position); is_space = FALSE; } /* Don't break between \r and \n */ if (prev_wc == '\r' && wc == '\n') state->item->num_chars++; else itemize_state_add_character (state, font, font_position, is_forced_break || last_was_forced_break, p, is_space); last_was_forced_break = is_forced_break; prev_wc = wc; } /* Finish the final item from the current segment */ state->item->length = (p - state->text) - state->item->offset; if (!state->item->analysis.font) { Pango2Font *font; int position; if (G_UNLIKELY (!get_font (state, ' ', &font, &position))) { /* If no font was found, warn once per fontmap/script pair */ Pango2FontMap *fontmap = state->context->font_map; char *script_tag = g_strdup_printf ("g-unicode-script-%d", state->script); if (!g_object_get_data (G_OBJECT (fontmap), script_tag)) { g_warning ("failed to choose a font, expect ugly output. script='%d'", state->script); g_object_set_data_full (G_OBJECT (fontmap), script_tag, GINT_TO_POINTER (1), NULL); } g_free (script_tag); font = NULL; } itemize_state_fill_font (state, font); } state->item = NULL; } static void itemize_state_finish (ItemizeState *state) { g_free (state->embedding_levels); if (state->free_attr_iter) pango2_attr_iterator_destroy (state->attr_iter); _pango2_script_iter_fini (&state->script_iter); pango2_font_description_free (state->font_desc); pango2_font_description_free (state->emoji_font_desc); width_iter_fini (&state->width_iter); pango2_emoji_iter_fini (&state->emoji_iter); if (state->current_fonts) g_object_unref (state->current_fonts); if (state->base_font) g_object_unref (state->base_font); } /* }}} */ /* {{{ Post-processing */ /* {{{ Handling font scale */ typedef struct { Pango2Attribute *attr; double scale; } ScaleItem; static gboolean collect_font_scale (Pango2Context *context, GList **stack, Pango2Item *item, Pango2Item *prev, double *scale, gboolean *is_small_caps) { gboolean retval = FALSE; GList *l; for (GSList *l = item->analysis.extra_attrs; l; l = l->next) { Pango2Attribute *attr = l->data; if (attr->type == PANGO2_ATTR_FONT_SCALE) { if (attr->start_index == item->offset) { ScaleItem *entry; int y_scale; hb_position_t y_size; hb_position_t cap_height; hb_position_t x_height; entry = g_new (ScaleItem, 1); entry->attr = attr; *stack = g_list_prepend (*stack, entry); switch (attr->int_value) { case PANGO2_FONT_SCALE_NONE: break; case PANGO2_FONT_SCALE_SUPERSCRIPT: if (prev && hb_ot_metrics_get_position (pango2_font_get_hb_font (prev->analysis.font), HB_OT_METRICS_TAG_SUPERSCRIPT_EM_Y_SIZE, &y_size)) { hb_font_get_scale (pango2_font_get_hb_font (prev->analysis.font), NULL, &y_scale); entry->scale = y_size / (double) y_scale; } else { entry->scale = 1 / 1.2; } break; case PANGO2_FONT_SCALE_SUBSCRIPT: if (prev && hb_ot_metrics_get_position (pango2_font_get_hb_font (prev->analysis.font), HB_OT_METRICS_TAG_SUBSCRIPT_EM_Y_SIZE, &y_size)) { hb_font_get_scale (pango2_font_get_hb_font (prev->analysis.font), NULL, &y_scale); entry->scale = y_size / (double) y_scale; } else { entry->scale = 1 / 1.2; } break; case PANGO2_FONT_SCALE_SMALL_CAPS: if (hb_ot_metrics_get_position (pango2_font_get_hb_font (item->analysis.font), HB_OT_METRICS_TAG_CAP_HEIGHT, &cap_height) && hb_ot_metrics_get_position (pango2_font_get_hb_font (item->analysis.font), HB_OT_METRICS_TAG_X_HEIGHT, &x_height)) { entry->scale = x_height / (double) cap_height; } else { entry->scale = 0.8; } break; default: g_assert_not_reached (); } } } } *scale = 1.0; *is_small_caps = TRUE; for (l = *stack; l; l = l->next) { ScaleItem *entry = l->data; *scale *= entry->scale; if (entry->attr->int_value != PANGO2_FONT_SCALE_SMALL_CAPS) *is_small_caps = FALSE; retval = TRUE; } l = *stack; while (l) { ScaleItem *entry = l->data; GList *next = l->next; if (entry->attr->end_index == item->offset + item->length) { *stack = g_list_delete_link (*stack, l); g_free (entry); } l = next; } return retval; } static void apply_scale_to_item (Pango2Context *context, Pango2Item *item, double scale, gboolean is_small_caps) { Pango2FontDescription *desc; double size; if (!item->analysis.font) return; if (is_small_caps) pango2_analysis_set_size_font (&item->analysis, item->analysis.font); desc = pango2_font_describe (item->analysis.font); size = scale * pango2_font_description_get_size (desc); if (pango2_font_description_get_size_is_absolute (desc)) pango2_font_description_set_absolute_size (desc, size); else pango2_font_description_set_size (desc, size); g_object_unref (item->analysis.font); item->analysis.font = pango2_font_map_load_font (context->font_map, context, desc); pango2_font_description_free (desc); } static void apply_font_scale (Pango2Context *context, GList *items) { Pango2Item *prev = NULL; GList *stack = NULL; for (GList *l = items; l; l = l->next) { Pango2Item *item = l->data; double scale; gboolean is_small_caps; if (collect_font_scale (context, &stack, item, prev, &scale, &is_small_caps)) apply_scale_to_item (context, item, scale, is_small_caps); prev = item; } if (stack != NULL) { g_warning ("Leftover font scales"); g_list_free_full (stack, g_free); } } /* }}} */ /* { {{ Handling Casing variants */ static gboolean all_features_supported (Pango2Item *item, hb_tag_t *features, guint n_features) { hb_font_t *font = pango2_font_get_hb_font (item->analysis.font); hb_face_t *face = hb_font_get_face (font); hb_script_t script; hb_language_t language; guint script_count = HB_OT_MAX_TAGS_PER_SCRIPT; hb_tag_t script_tags[HB_OT_MAX_TAGS_PER_SCRIPT]; hb_tag_t chosen_script; guint language_count = HB_OT_MAX_TAGS_PER_LANGUAGE; hb_tag_t language_tags[HB_OT_MAX_TAGS_PER_LANGUAGE]; guint script_index, language_index; guint index; script = g_unicode_script_to_iso15924 (item->analysis.script); language = hb_language_from_string (pango2_language_to_string (item->analysis.language), -1); hb_ot_tags_from_script_and_language (script, language, &script_count, script_tags, &language_count, language_tags); hb_ot_layout_table_select_script (face, HB_OT_TAG_GSUB, script_count, script_tags, &script_index, &chosen_script); hb_ot_layout_script_select_language (face, HB_OT_TAG_GSUB, script_index, language_count, language_tags, &language_index); for (int i = 0; i < n_features; i++) { if (!hb_ot_layout_language_find_feature (face, HB_OT_TAG_GSUB, script_index, language_index, features[i], &index)) return FALSE; } return TRUE; } static gboolean variant_supported (Pango2Item *item, Pango2Variant variant) { hb_tag_t features[2]; guint num_features = 0; switch (variant) { case PANGO2_VARIANT_NORMAL: case PANGO2_VARIANT_TITLE_CAPS: return TRUE; case PANGO2_VARIANT_SMALL_CAPS: features[num_features++] = HB_TAG ('s', 'm', 'c', 'p'); break; case PANGO2_VARIANT_ALL_SMALL_CAPS: features[num_features++] = HB_TAG ('s', 'm', 'c', 'p'); features[num_features++] = HB_TAG ('c', '2', 's', 'c'); break; case PANGO2_VARIANT_PETITE_CAPS: features[num_features++] = HB_TAG ('p', 'c', 'a', 'p'); break; case PANGO2_VARIANT_ALL_PETITE_CAPS: features[num_features++] = HB_TAG ('p', 'c', 'a', 'p'); features[num_features++] = HB_TAG ('c', '2', 'p', 'c'); break; case PANGO2_VARIANT_UNICASE: features[num_features++] = HB_TAG ('u', 'n', 'i', 'c'); break; default: g_assert_not_reached (); } return all_features_supported (item, features, num_features); } static Pango2Variant get_font_variant (Pango2Item *item) { Pango2FontDescription *desc; Pango2Variant variant = PANGO2_VARIANT_NORMAL; if (item->analysis.font) { desc = pango2_font_describe (item->analysis.font); variant = pango2_font_description_get_variant (desc); pango2_font_description_free (desc); } return variant; } static Pango2TextTransform find_text_transform (const Pango2Analysis *analysis) { GSList *l; Pango2TextTransform transform = PANGO2_TEXT_TRANSFORM_NONE; for (l = analysis->extra_attrs; l; l = l->next) { Pango2Attribute *attr = l->data; if (attr->type == PANGO2_ATTR_TEXT_TRANSFORM) transform = (Pango2TextTransform) attr->int_value; } return transform; } /* Split list_item into upper- and lowercase runs, and * add font scale and text transform attributes to make * them be appear according to variant. The log_attrs are * needed for taking text transforms into account when * determining the case of characters int he run. */ static void split_item_for_variant (const char *text, Pango2LogAttr *log_attrs, Pango2Variant variant, GList *list_item) { Pango2Item *item = list_item->data; const char *start, *end; const char *p, *p0; gunichar wc; Pango2TextTransform transform = PANGO2_TEXT_TRANSFORM_NONE; Pango2FontScale lowercase_scale = PANGO2_FONT_SCALE_NONE; Pango2FontScale uppercase_scale = PANGO2_FONT_SCALE_NONE; Pango2TextTransform item_transform; gboolean is_word_start; int offset; switch (variant) { case PANGO2_VARIANT_ALL_SMALL_CAPS: case PANGO2_VARIANT_ALL_PETITE_CAPS: uppercase_scale = PANGO2_FONT_SCALE_SMALL_CAPS; G_GNUC_FALLTHROUGH; case PANGO2_VARIANT_SMALL_CAPS: case PANGO2_VARIANT_PETITE_CAPS: transform = PANGO2_TEXT_TRANSFORM_UPPERCASE; lowercase_scale = PANGO2_FONT_SCALE_SMALL_CAPS; break; case PANGO2_VARIANT_UNICASE: uppercase_scale = PANGO2_FONT_SCALE_SMALL_CAPS; break; case PANGO2_VARIANT_NORMAL: case PANGO2_VARIANT_TITLE_CAPS: default: g_assert_not_reached (); } item_transform = find_text_transform (&item->analysis); start = text + item->offset; end = start + item->length; offset = item->char_offset; p = start; while (p < end) { p0 = p; wc = g_utf8_get_char (p); is_word_start = log_attrs && log_attrs[offset].is_word_start; while (p < end && (item_transform == PANGO2_TEXT_TRANSFORM_LOWERCASE || consider_as_space (wc) || (g_unichar_islower (wc) && !(item_transform == PANGO2_TEXT_TRANSFORM_UPPERCASE || (item_transform == PANGO2_TEXT_TRANSFORM_CAPITALIZE && is_word_start))))) { p = g_utf8_next_char (p); wc = g_utf8_get_char (p); offset++; is_word_start = log_attrs && log_attrs[offset].is_word_start; } if (p0 < p) { Pango2Item *new_item; Pango2Attribute *attr; /* p0 .. p is a lowercase segment */ if (p < end) { new_item = pango2_item_split (item, p - p0, g_utf8_strlen (p0, p - p0)); list_item->data = new_item; list_item = g_list_insert_before (list_item, list_item->next, item); list_item = list_item->next; } else { new_item = item; } if (transform != PANGO2_TEXT_TRANSFORM_NONE) { attr = pango2_attr_text_transform_new (transform); attr->start_index = new_item->offset; attr->end_index = new_item->offset + new_item->length; new_item->analysis.extra_attrs = g_slist_append (new_item->analysis.extra_attrs, attr); } if (lowercase_scale != PANGO2_FONT_SCALE_NONE) { attr = pango2_attr_font_scale_new (lowercase_scale); attr->start_index = new_item->offset; attr->end_index = new_item->offset + new_item->length; new_item->analysis.extra_attrs = g_slist_append (new_item->analysis.extra_attrs, attr); } } p0 = p; wc = g_utf8_get_char (p); is_word_start = log_attrs && log_attrs[offset].is_word_start; while (p < end && (item_transform == PANGO2_TEXT_TRANSFORM_UPPERCASE || consider_as_space (wc) || !(item_transform == PANGO2_TEXT_TRANSFORM_LOWERCASE || g_unichar_islower (wc)) || (item_transform == PANGO2_TEXT_TRANSFORM_CAPITALIZE && is_word_start))) { p = g_utf8_next_char (p); wc = g_utf8_get_char (p); offset++; is_word_start = log_attrs && log_attrs[offset].is_word_start; } if (p0 < p) { Pango2Item *new_item; Pango2Attribute *attr; /* p0 .. p is a uppercase segment */ if (p < end) { new_item = pango2_item_split (item, p - p0, g_utf8_strlen (p0, p - p0)); list_item->data = new_item; list_item = g_list_insert_before (list_item, list_item->next, item); list_item = list_item->next; } else { new_item = item; } if (uppercase_scale != PANGO2_FONT_SCALE_NONE) { attr = pango2_attr_font_scale_new (uppercase_scale); attr->start_index = new_item->offset; attr->end_index = new_item->offset + new_item->length; new_item->analysis.extra_attrs = g_slist_append (new_item->analysis.extra_attrs, attr); } } } } static void handle_variants_for_item (const char *text, Pango2LogAttr *log_attrs, GList *l) { Pango2Item *item = l->data; Pango2Variant variant; variant = get_font_variant (item); if (!variant_supported (item, variant)) split_item_for_variant (text, log_attrs, variant, l); } static void handle_variants (const char *text, Pango2LogAttr *log_attrs, GList *items) { GList *next; for (GList *l = items; l; l = next) { next = l->next; handle_variants_for_item (text, log_attrs, l); } } /* }}} */ static GList * reorder_items (Pango2Context *context, GList *items) { int char_offset = 0; items = g_list_reverse (items); /* Also cmpute the char offset for each item here */ for (GList *l = items; l; l = l->next) { Pango2Item *item = l->data; item->char_offset = char_offset; char_offset += item->num_chars; } return items; } static GList * post_process_items (Pango2Context *context, const char *text, Pango2LogAttr *log_attrs, GList *items) { handle_variants (text, log_attrs, items); apply_font_scale (context, items); return items; } /* }}} */ /* {{{ Private API */ /* Like pango2_itemize, but takes a font description. * In contrast to pango2_itemize, this function does * not call pango2_itemize_post_process_items, so you need to do that * separately, after applying attributes that affect segmentation and * computing the log attrs. */ GList * pango2_itemize_with_font (Pango2Context *context, Pango2Direction base_dir, const char *text, int start_index, int length, Pango2AttrList *attrs, Pango2AttrIterator *cached_iter, const Pango2FontDescription *desc) { ItemizeState state; g_return_val_if_fail (context->font_map != NULL, NULL); if (length == 0 || g_utf8_get_char (text + start_index) == '\0') return NULL; itemize_state_init (&state, context, text, base_dir, start_index, length, attrs, cached_iter, desc); do itemize_state_process_run (&state); while (itemize_state_next (&state)); itemize_state_finish (&state); return reorder_items (context, state.result); } /* Apply post-processing steps that may require log attrs. */ GList * pango2_itemize_post_process_items (Pango2Context *context, const char *text, Pango2LogAttr *log_attrs, GList *items) { return post_process_items (context, text, log_attrs, items); } /* }}} */ /* {{{ Public API */ /** * pango2_itemize: * @context: a structure holding information that affects * the itemization process. * @base_dir: base direction to use for bidirectional processing * @text: the text to itemize. * @start_index: first byte in @text to process * @length: the number of bytes (not characters) to process * after @start_index. This must be >= 0. * @attrs: the set of attributes that apply to @text. * * Breaks a piece of text into segments with consistent directional * level and font. * * Each byte of @text will be contained in exactly one of the items in the * returned list; the generated list of items will be in logical order (the * start offsets of the items are ascending). * * The base direction is used when computing bidirectional levels. * [func@itemize] gets the base direction from the `Pango2Context` * (see [method@Pango2.Context.set_base_dir]). * * Return value: (transfer full) (element-type Pango2.Item): a `GList` of * [struct@Pango2.Item] structures. The items should be freed using * [method@Pango2.Item.free] probably in combination with [func@GLib.List.free_full]. */ GList * pango2_itemize (Pango2Context *context, Pango2Direction base_dir, const char *text, int start_index, int length, Pango2AttrList *attrs) { GList *items; g_return_val_if_fail (context != NULL, NULL); g_return_val_if_fail (start_index >= 0, NULL); g_return_val_if_fail (length >= 0, NULL); g_return_val_if_fail (length == 0 || text != NULL, NULL); items = pango2_itemize_with_font (context, base_dir, text, start_index, length, attrs, NULL, NULL); return pango2_itemize_post_process_items (context, text, NULL, items); } /* }}} */ /* vim:set foldmethod=marker expandtab: */