/* Pango * itemize.c: Turning text into items * * Copyright (C) 2000, 2006 Red Hat Software * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ #include "config.h" #include #include #include "pango-context-private.h" #include "pango-impl-utils.h" #include "pango-font-private.h" #include "pango-fontset-private.h" #include "pango-fontmap-private.h" #include "pango-script-private.h" #include "pango-emoji-private.h" #include "pango-attributes-private.h" #include "pango-item-private.h" /* {{{ Font cache */ /* * We cache the results of character,fontset => font in a hash table */ typedef struct { GHashTable *hash; } FontCache; typedef struct { PangoFont *font; int position; /* position of the font in the fontset */ } FontElement; static void font_cache_destroy (FontCache *cache) { g_hash_table_destroy (cache->hash); g_slice_free (FontCache, cache); } static void font_element_destroy (FontElement *element) { if (element->font) g_object_unref (element->font); g_slice_free (FontElement, element); } static FontCache * get_font_cache (PangoFontset *fontset) { FontCache *cache; static GQuark cache_quark = 0; /* MT-safe */ if (G_UNLIKELY (!cache_quark)) cache_quark = g_quark_from_static_string ("pango-font-cache"); retry: cache = g_object_get_qdata (G_OBJECT (fontset), cache_quark); if (G_UNLIKELY (!cache)) { cache = g_slice_new (FontCache); cache->hash = g_hash_table_new_full (g_direct_hash, NULL, NULL, (GDestroyNotify)font_element_destroy); if (!g_object_replace_qdata (G_OBJECT (fontset), cache_quark, NULL, cache, (GDestroyNotify)font_cache_destroy, NULL)) { font_cache_destroy (cache); goto retry; } } return cache; } static gboolean font_cache_get (FontCache *cache, gunichar wc, PangoFont **font, int *position) { FontElement *element; element = g_hash_table_lookup (cache->hash, GUINT_TO_POINTER (wc)); if (element) { *font = element->font; *position = element->position; return TRUE; } else return FALSE; } static void font_cache_insert (FontCache *cache, gunichar wc, PangoFont *font, int position) { FontElement *element = g_slice_new (FontElement); element->font = font ? g_object_ref (font) : NULL; element->position = position; g_hash_table_insert (cache->hash, GUINT_TO_POINTER (wc), element); } /* }}} */ /* {{{ Width Iter */ typedef struct _PangoWidthIter PangoWidthIter; struct _PangoWidthIter { const gchar *text_start; const gchar *text_end; const gchar *start; const gchar *end; gboolean upright; }; static gboolean width_iter_is_upright (gunichar ch) { /* https://www.unicode.org/Public/11.0.0/ucd/VerticalOrientation.txt * VO=U or Tu table generated by tools/gen-vertical-orientation-U-table.py. * * FIXME: In the future, If GLib supports VerticalOrientation, please use it. */ static const gunichar upright[][2] = { {0x00A7, 0x00A7}, {0x00A9, 0x00A9}, {0x00AE, 0x00AE}, {0x00B1, 0x00B1}, {0x00BC, 0x00BE}, {0x00D7, 0x00D7}, {0x00F7, 0x00F7}, {0x02EA, 0x02EB}, {0x1100, 0x11FF}, {0x1401, 0x167F}, {0x18B0, 0x18FF}, {0x2016, 0x2016}, {0x2020, 0x2021}, {0x2030, 0x2031}, {0x203B, 0x203C}, {0x2042, 0x2042}, {0x2047, 0x2049}, {0x2051, 0x2051}, {0x2065, 0x2065}, {0x20DD, 0x20E0}, {0x20E2, 0x20E4}, {0x2100, 0x2101}, {0x2103, 0x2109}, {0x210F, 0x210F}, {0x2113, 0x2114}, {0x2116, 0x2117}, {0x211E, 0x2123}, {0x2125, 0x2125}, {0x2127, 0x2127}, {0x2129, 0x2129}, {0x212E, 0x212E}, {0x2135, 0x213F}, {0x2145, 0x214A}, {0x214C, 0x214D}, {0x214F, 0x2189}, {0x218C, 0x218F}, {0x221E, 0x221E}, {0x2234, 0x2235}, {0x2300, 0x2307}, {0x230C, 0x231F}, {0x2324, 0x2328}, {0x232B, 0x232B}, {0x237D, 0x239A}, {0x23BE, 0x23CD}, {0x23CF, 0x23CF}, {0x23D1, 0x23DB}, {0x23E2, 0x2422}, {0x2424, 0x24FF}, {0x25A0, 0x2619}, {0x2620, 0x2767}, {0x2776, 0x2793}, {0x2B12, 0x2B2F}, {0x2B50, 0x2B59}, {0x2BB8, 0x2BD1}, {0x2BD3, 0x2BEB}, {0x2BF0, 0x2BFF}, {0x2E80, 0x3007}, {0x3012, 0x3013}, {0x3020, 0x302F}, {0x3031, 0x309F}, {0x30A1, 0x30FB}, {0x30FD, 0xA4CF}, {0xA960, 0xA97F}, {0xAC00, 0xD7FF}, {0xE000, 0xFAFF}, {0xFE10, 0xFE1F}, {0xFE30, 0xFE48}, {0xFE50, 0xFE57}, {0xFE5F, 0xFE62}, {0xFE67, 0xFE6F}, {0xFF01, 0xFF07}, {0xFF0A, 0xFF0C}, {0xFF0E, 0xFF19}, {0xFF1F, 0xFF3A}, {0xFF3C, 0xFF3C}, {0xFF3E, 0xFF3E}, {0xFF40, 0xFF5A}, {0xFFE0, 0xFFE2}, {0xFFE4, 0xFFE7}, {0xFFF0, 0xFFF8}, {0xFFFC, 0xFFFD}, {0x10980, 0x1099F}, {0x11580, 0x115FF}, {0x11A00, 0x11AAF}, {0x13000, 0x1342F}, {0x14400, 0x1467F}, {0x16FE0, 0x18AFF}, {0x1B000, 0x1B12F}, {0x1B170, 0x1B2FF}, {0x1D000, 0x1D1FF}, {0x1D2E0, 0x1D37F}, {0x1D800, 0x1DAAF}, {0x1F000, 0x1F7FF}, {0x1F900, 0x1FA6F}, {0x20000, 0x2FFFD}, {0x30000, 0x3FFFD}, {0xF0000, 0xFFFFD}, {0x100000, 0x10FFFD} }; static const int max = sizeof(upright) / sizeof(upright[0]); int st = 0; int ed = max; if (ch < upright[0][0]) return FALSE; while (st <= ed) { int mid = (st + ed) / 2; if (upright[mid][0] <= ch && ch <= upright[mid][1]) return TRUE; else if (upright[mid][0] <= ch) st = mid + 1; else ed = mid - 1; } return FALSE; } static void width_iter_next (PangoWidthIter *iter) { gboolean met_joiner = FALSE; iter->start = iter->end; if (iter->end < iter->text_end) { gunichar ch = g_utf8_get_char (iter->end); iter->upright = width_iter_is_upright (ch); } while (iter->end < iter->text_end) { gunichar ch = g_utf8_get_char (iter->end); /* for zero width joiner */ if (ch == 0x200D) { iter->end = g_utf8_next_char (iter->end); met_joiner = TRUE; continue; } /* ignore the upright check if met joiner */ if (met_joiner) { iter->end = g_utf8_next_char (iter->end); met_joiner = FALSE; continue; } /* for variation selector, tag and emoji modifier. */ if (G_UNLIKELY (ch == 0xFE0EU || ch == 0xFE0FU || (ch >= 0xE0020 && ch <= 0xE007F) || (ch >= 0x1F3FB && ch <= 0x1F3FF))) { iter->end = g_utf8_next_char (iter->end); continue; } if (width_iter_is_upright (ch) != iter->upright) break; iter->end = g_utf8_next_char (iter->end); } } static void width_iter_init (PangoWidthIter *iter, const char *text, int length) { iter->text_start = text; iter->text_end = text + length; iter->start = iter->end = text; width_iter_next (iter); } static void width_iter_fini (PangoWidthIter *iter) { } /* }}} */ /* {{{ Itemization */ typedef struct _ItemizeState ItemizeState; typedef enum { EMBEDDING_CHANGED = 1 << 0, SCRIPT_CHANGED = 1 << 1, LANG_CHANGED = 1 << 2, FONT_CHANGED = 1 << 3, DERIVED_LANG_CHANGED = 1 << 4, WIDTH_CHANGED = 1 << 5, EMOJI_CHANGED = 1 << 6, } ChangedFlags; struct _ItemizeState { PangoContext *context; const char *text; const char *end; const char *run_start; const char *run_end; GList *result; PangoItem *item; guint8 *embedding_levels; int embedding_end_offset; const char *embedding_end; guint8 embedding; PangoGravity gravity; PangoGravityHint gravity_hint; PangoGravity resolved_gravity; PangoGravity font_desc_gravity; gboolean centered_baseline; PangoAttrIterator *attr_iter; gboolean free_attr_iter; const char *attr_end; PangoFontDescription *font_desc; PangoFontDescription *emoji_font_desc; PangoLanguage *lang; GSList *extra_attrs; gboolean copy_extra_attrs; ChangedFlags changed; PangoScriptIter script_iter; const char *script_end; PangoScript script; PangoWidthIter width_iter; PangoEmojiIter emoji_iter; PangoLanguage *derived_lang; PangoFontset *current_fonts; FontCache *cache; PangoFont *base_font; gboolean enable_fallback; const char *first_space; /* first of a sequence of spaces we've seen */ int font_position; /* position of the current font in the fontset */ }; static void update_embedding_end (ItemizeState *state) { state->embedding = state->embedding_levels[state->embedding_end_offset]; while (state->embedding_end < state->end && state->embedding_levels[state->embedding_end_offset] == state->embedding) { state->embedding_end_offset++; state->embedding_end = g_utf8_next_char (state->embedding_end); } state->changed |= EMBEDDING_CHANGED; } static PangoAttribute * find_attribute (GSList *attr_list, PangoAttrType type) { GSList *node; for (node = attr_list; node; node = node->next) if (((PangoAttribute *) node->data)->klass->type == type) return (PangoAttribute *) node->data; return NULL; } static void update_attr_iterator (ItemizeState *state) { PangoLanguage *old_lang; PangoAttribute *attr; int end_index; pango_attr_iterator_range (state->attr_iter, NULL, &end_index); if (end_index < state->end - state->text) state->attr_end = state->text + end_index; else state->attr_end = state->end; if (state->emoji_font_desc) { pango_font_description_free (state->emoji_font_desc); state->emoji_font_desc = NULL; } old_lang = state->lang; if (state->font_desc) pango_font_description_free (state->font_desc); state->font_desc = pango_font_description_copy_static (state->context->font_desc); pango_attr_iterator_get_font (state->attr_iter, state->font_desc, &state->lang, &state->extra_attrs); if (pango_font_description_get_set_fields (state->font_desc) & PANGO_FONT_MASK_GRAVITY) state->font_desc_gravity = pango_font_description_get_gravity (state->font_desc); else state->font_desc_gravity = PANGO_GRAVITY_AUTO; state->copy_extra_attrs = FALSE; if (!state->lang) state->lang = state->context->language; attr = find_attribute (state->extra_attrs, PANGO_ATTR_FALLBACK); state->enable_fallback = (attr == NULL || ((PangoAttrInt *)attr)->value); attr = find_attribute (state->extra_attrs, PANGO_ATTR_GRAVITY); state->gravity = attr == NULL ? PANGO_GRAVITY_AUTO : ((PangoAttrInt *)attr)->value; attr = find_attribute (state->extra_attrs, PANGO_ATTR_GRAVITY_HINT); state->gravity_hint = attr == NULL ? state->context->gravity_hint : (PangoGravityHint)((PangoAttrInt *)attr)->value; state->changed |= FONT_CHANGED; if (state->lang != old_lang) state->changed |= LANG_CHANGED; } static void update_end (ItemizeState *state) { state->run_end = state->embedding_end; if (state->attr_end < state->run_end) state->run_end = state->attr_end; if (state->script_end < state->run_end) state->run_end = state->script_end; if (state->width_iter.end < state->run_end) state->run_end = state->width_iter.end; if (state->emoji_iter.end < state->run_end) state->run_end = state->emoji_iter.end; } static void itemize_state_init (ItemizeState *state, PangoContext *context, const char *text, PangoDirection base_dir, int start_index, int length, PangoAttrList *attrs, PangoAttrIterator *cached_iter, const PangoFontDescription *desc) { state->context = context; state->text = text; state->end = text + start_index + length; state->result = NULL; state->item = NULL; state->run_start = text + start_index; state->changed = EMBEDDING_CHANGED | SCRIPT_CHANGED | LANG_CHANGED | FONT_CHANGED | WIDTH_CHANGED | EMOJI_CHANGED; /* First, apply the bidirectional algorithm to break * the text into directional runs. */ state->embedding_levels = pango_log2vis_get_embedding_levels (text + start_index, length, &base_dir); state->embedding_end_offset = 0; state->embedding_end = text + start_index; update_embedding_end (state); state->gravity = PANGO_GRAVITY_AUTO; state->centered_baseline = PANGO_GRAVITY_IS_VERTICAL (state->context->resolved_gravity); state->gravity_hint = state->context->gravity_hint; state->resolved_gravity = PANGO_GRAVITY_AUTO; /* Initialize the attribute iterator */ if (cached_iter) { state->attr_iter = cached_iter; state->free_attr_iter = FALSE; } else if (attrs) { state->attr_iter = pango_attr_list_get_iterator (attrs); state->free_attr_iter = TRUE; } else { state->attr_iter = NULL; state->free_attr_iter = FALSE; } state->emoji_font_desc = NULL; if (state->attr_iter) { state->font_desc = NULL; state->lang = NULL; pango_attr_iterator_advance (state->attr_iter, start_index); update_attr_iterator (state); } else { state->font_desc = pango_font_description_copy_static (desc ? desc : state->context->font_desc); state->lang = state->context->language; state->extra_attrs = NULL; state->copy_extra_attrs = FALSE; state->attr_end = state->end; state->enable_fallback = TRUE; } /* Initialize the script iterator */ _pango_script_iter_init (&state->script_iter, text + start_index, length); pango_script_iter_get_range (&state->script_iter, NULL, &state->script_end, &state->script); width_iter_init (&state->width_iter, text + start_index, length); _pango_emoji_iter_init (&state->emoji_iter, text + start_index, length); if (!PANGO_GRAVITY_IS_VERTICAL (state->context->resolved_gravity)) state->width_iter.end = state->end; else if (state->emoji_iter.is_emoji) state->width_iter.end = MAX (state->width_iter.end, state->emoji_iter.end); update_end (state); if (pango_font_description_get_set_fields (state->font_desc) & PANGO_FONT_MASK_GRAVITY) state->font_desc_gravity = pango_font_description_get_gravity (state->font_desc); else state->font_desc_gravity = PANGO_GRAVITY_AUTO; state->derived_lang = NULL; state->current_fonts = NULL; state->cache = NULL; state->base_font = NULL; state->first_space = NULL; state->font_position = 0xffff; } static gboolean itemize_state_next (ItemizeState *state) { if (state->run_end == state->end) return FALSE; state->changed = 0; state->run_start = state->run_end; if (state->run_end == state->embedding_end) { update_embedding_end (state); } if (state->run_end == state->attr_end) { pango_attr_iterator_next (state->attr_iter); update_attr_iterator (state); } if (state->run_end == state->script_end) { pango_script_iter_next (&state->script_iter); pango_script_iter_get_range (&state->script_iter, NULL, &state->script_end, &state->script); state->changed |= SCRIPT_CHANGED; } if (state->run_end == state->emoji_iter.end) { _pango_emoji_iter_next (&state->emoji_iter); state->changed |= EMOJI_CHANGED; if (state->emoji_iter.is_emoji) state->width_iter.end = MAX (state->width_iter.end, state->emoji_iter.end); } if (state->run_end == state->width_iter.end) { width_iter_next (&state->width_iter); state->changed |= WIDTH_CHANGED; } update_end (state); return TRUE; } static GSList * copy_attr_slist (GSList *attr_slist) { GSList *new_list = NULL; GSList *l; for (l = attr_slist; l; l = l->next) new_list = g_slist_prepend (new_list, pango_attribute_copy (l->data)); return g_slist_reverse (new_list); } static void itemize_state_fill_font (ItemizeState *state, PangoFont *font) { GList *l; for (l = state->result; l; l = l->next) { PangoItem *item = l->data; if (item->analysis.font) break; if (font) item->analysis.font = g_object_ref (font); } } static void itemize_state_add_character (ItemizeState *state, PangoFont *font, int font_position, gboolean force_break, const char *pos, gboolean is_space) { const char *first_space = state->first_space; int n_spaces = 0; if (is_space) { if (state->first_space == NULL) state->first_space = pos; } else state->first_space = NULL; if (state->item) { if (!state->item->analysis.font && font) { itemize_state_fill_font (state, font); state->font_position = font_position; } else if (state->item->analysis.font && !font) { font = state->item->analysis.font; font_position = state->font_position; } if (!force_break && state->item->analysis.font == font) { state->item->num_chars++; return; } /* Font is changing, we are about to end the current item. * If it ended in a sequence of spaces (but wasn't only spaces), * check if we should move those spaces to the new item (since * the font is less "fallback". * * See https://gitlab.gnome.org/GNOME/pango/-/issues/249 */ if (state->text + state->item->offset < first_space && font_position < state->font_position) { n_spaces = g_utf8_strlen (first_space, pos - first_space); state->item->num_chars -= n_spaces; pos = first_space; } state->item->length = (pos - state->text) - state->item->offset; } state->item = pango_item_new (); state->item->offset = pos - state->text; state->item->length = 0; state->item->num_chars = n_spaces + 1; if (font) g_object_ref (font); state->item->analysis.font = font; state->font_position = font_position; state->item->analysis.level = state->embedding; state->item->analysis.gravity = state->resolved_gravity; /* The level vs. gravity dance: * - If gravity is SOUTH, leave level untouched. * - If gravity is NORTH, step level one up, to * not get mirrored upside-down text. * - If gravity is EAST, step up to an even level, as * it's a clockwise-rotated layout, so the rotated * top is unrotated left. * - If gravity is WEST, step up to an odd level, as * it's a counter-clockwise-rotated layout, so the rotated * top is unrotated right. * * A similar dance is performed in pango-layout.c: * line_set_resolved_dir(). Keep in synch. */ switch (state->item->analysis.gravity) { case PANGO_GRAVITY_SOUTH: default: break; case PANGO_GRAVITY_NORTH: state->item->analysis.level++; break; case PANGO_GRAVITY_EAST: state->item->analysis.level += 1; state->item->analysis.level &= ~1; break; case PANGO_GRAVITY_WEST: state->item->analysis.level |= 1; break; } state->item->analysis.flags |= state->centered_baseline ? PANGO_ANALYSIS_FLAG_CENTERED_BASELINE : 0; state->item->analysis.script = state->script; state->item->analysis.language = state->derived_lang; if (state->copy_extra_attrs) { state->item->analysis.extra_attrs = copy_attr_slist (state->extra_attrs); } else { state->item->analysis.extra_attrs = state->extra_attrs; state->copy_extra_attrs = TRUE; } state->result = g_list_prepend (state->result, state->item); } typedef struct { PangoLanguage *lang; gunichar wc; PangoFont *font; int position; } GetFontInfo; static gboolean get_font_foreach (PangoFontset *fontset, PangoFont *font, gpointer data) { GetFontInfo *info = data; if (G_UNLIKELY (!font)) return FALSE; if (pango_font_has_char (font, info->wc)) { info->font = font; return TRUE; } if (!fontset) { info->font = font; return TRUE; } info->position++; return FALSE; } static PangoFont * get_base_font (ItemizeState *state) { if (!state->base_font) state->base_font = pango_font_map_load_font (state->context->font_map, state->context, state->font_desc); return state->base_font; } static gboolean get_font (ItemizeState *state, gunichar wc, PangoFont **font, int *position) { GetFontInfo info; /* We'd need a separate cache when fallback is disabled, but since lookup * with fallback disabled is faster anyways, we just skip caching */ if (state->enable_fallback && font_cache_get (state->cache, wc, font, position)) return TRUE; info.lang = state->derived_lang; info.wc = wc; info.font = NULL; info.position = 0; if (state->enable_fallback) pango_fontset_foreach (state->current_fonts, get_font_foreach, &info); else get_font_foreach (NULL, get_base_font (state), &info); *font = info.font; *position = info.position; /* skip caching if fallback disabled (see above) */ if (state->enable_fallback) font_cache_insert (state->cache, wc, *font, *position); return TRUE; } static PangoLanguage * compute_derived_language (PangoLanguage *lang, PangoScript script) { PangoLanguage *derived_lang; /* Make sure the language tag is consistent with the derived * script. There is no point in marking up a section of * Arabic text with the "en" language tag. */ if (lang && pango_language_includes_script (lang, script)) derived_lang = lang; else { derived_lang = pango_script_get_sample_language (script); /* If we don't find a sample language for the script, we * use a language tag that shouldn't actually be used * anywhere. This keeps fontconfig (for the PangoFc* * backend) from using the language tag to affect the * sort order. I don't have a reference for 'xx' being * safe here, though Keith Packard claims it is. */ if (!derived_lang) derived_lang = pango_language_from_string ("xx"); } return derived_lang; } static void itemize_state_update_for_new_run (ItemizeState *state) { /* This block should be moved to update_attr_iterator, but I'm too lazy to * do it right now */ if (state->changed & (FONT_CHANGED | SCRIPT_CHANGED | WIDTH_CHANGED)) { /* Font-desc gravity overrides everything */ if (state->font_desc_gravity != PANGO_GRAVITY_AUTO) { state->resolved_gravity = state->font_desc_gravity; } else { PangoGravity gravity = state->gravity; PangoGravityHint gravity_hint = state->gravity_hint; if (G_LIKELY (gravity == PANGO_GRAVITY_AUTO)) gravity = state->context->resolved_gravity; state->resolved_gravity = pango_gravity_get_for_script_and_width (state->script, state->width_iter.upright, gravity, gravity_hint); } if (state->font_desc_gravity != state->resolved_gravity) { pango_font_description_set_gravity (state->font_desc, state->resolved_gravity); state->changed |= FONT_CHANGED; } } if (state->changed & (SCRIPT_CHANGED | LANG_CHANGED)) { PangoLanguage *old_derived_lang = state->derived_lang; state->derived_lang = compute_derived_language (state->lang, state->script); if (old_derived_lang != state->derived_lang) state->changed |= DERIVED_LANG_CHANGED; } if (state->changed & (EMOJI_CHANGED)) { state->changed |= FONT_CHANGED; } if (state->changed & (FONT_CHANGED | DERIVED_LANG_CHANGED) && state->current_fonts) { g_object_unref (state->current_fonts); state->current_fonts = NULL; state->cache = NULL; } if (!state->current_fonts) { gboolean is_emoji = state->emoji_iter.is_emoji; if (is_emoji && !state->emoji_font_desc) { state->emoji_font_desc = pango_font_description_copy_static (state->font_desc); pango_font_description_set_family_static (state->emoji_font_desc, "emoji"); } state->current_fonts = pango_font_map_load_fontset (state->context->font_map, state->context, is_emoji ? state->emoji_font_desc : state->font_desc, state->derived_lang); state->cache = get_font_cache (state->current_fonts); } if ((state->changed & FONT_CHANGED) && state->base_font) { g_object_unref (state->base_font); state->base_font = NULL; } } static void itemize_state_process_run (ItemizeState *state) { const char *p; gboolean last_was_forced_break = FALSE; gboolean is_space; /* Only one character has type G_UNICODE_LINE_SEPARATOR in Unicode 4.0; * update this if that changes. */ #define LINE_SEPARATOR 0x2028 itemize_state_update_for_new_run (state); /* We should never get an empty run */ g_assert (state->run_end != state->run_start); for (p = state->run_start; p < state->run_end; p = g_utf8_next_char (p)) { gunichar wc = g_utf8_get_char (p); gboolean is_forced_break = (wc == '\t' || wc == LINE_SEPARATOR); PangoFont *font; int font_position; GUnicodeType type; /* We don't want space characters to affect font selection; in general, * it's always wrong to select a font just to render a space. * We assume that all fonts have the ASCII space, and for other space * characters if they don't, HarfBuzz will compatibility-decompose them * to ASCII space... * See bugs #355987 and #701652. * * We don't want to change fonts just for variation selectors. * See bug #781123. * * Finally, don't change fonts for line or paragraph separators. * * Note that we want spaces to use the 'better' font, comparing * the font that is used before and after the space. This is handled * in itemize_state_add_character(). */ type = g_unichar_type (wc); if (G_UNLIKELY (type == G_UNICODE_CONTROL || type == G_UNICODE_FORMAT || type == G_UNICODE_SURROGATE || type == G_UNICODE_LINE_SEPARATOR || type == G_UNICODE_PARAGRAPH_SEPARATOR || (type == G_UNICODE_SPACE_SEPARATOR && wc != 0x1680u /* OGHAM SPACE MARK */) || (wc >= 0xfe00u && wc <= 0xfe0fu) || (wc >= 0xe0100u && wc <= 0xe01efu))) { font = NULL; font_position = 0xffff; is_space = TRUE; } else { get_font (state, wc, &font, &font_position); is_space = FALSE; } itemize_state_add_character (state, font, font_position, is_forced_break || last_was_forced_break, p, is_space); last_was_forced_break = is_forced_break; } /* Finish the final item from the current segment */ state->item->length = (p - state->text) - state->item->offset; if (!state->item->analysis.font) { PangoFont *font; int position; if (G_UNLIKELY (!get_font (state, ' ', &font, &position))) { /* If no font was found, warn once per fontmap/script pair */ PangoFontMap *fontmap = state->context->font_map; char *script_tag = g_strdup_printf ("g-unicode-script-%d", state->script); if (!g_object_get_data (G_OBJECT (fontmap), script_tag)) { g_warning ("failed to choose a font, expect ugly output. script='%d'", state->script); g_object_set_data_full (G_OBJECT (fontmap), script_tag, GINT_TO_POINTER (1), NULL); } g_free (script_tag); font = NULL; } itemize_state_fill_font (state, font); } state->item = NULL; } static void itemize_state_finish (ItemizeState *state) { g_free (state->embedding_levels); if (state->free_attr_iter) pango_attr_iterator_destroy (state->attr_iter); _pango_script_iter_fini (&state->script_iter); pango_font_description_free (state->font_desc); pango_font_description_free (state->emoji_font_desc); width_iter_fini (&state->width_iter); _pango_emoji_iter_fini (&state->emoji_iter); if (state->current_fonts) g_object_unref (state->current_fonts); if (state->base_font) g_object_unref (state->base_font); } /* }}} */ /* {{{ Public API */ /* Like pango_itemize_with_base_dir, but takes a font description */ GList * pango_itemize_with_font (PangoContext *context, PangoDirection base_dir, const char *text, int start_index, int length, PangoAttrList *attrs, PangoAttrIterator *cached_iter, const PangoFontDescription *desc) { ItemizeState state; GList *items; int char_offset; if (length == 0 || g_utf8_get_char (text + start_index) == '\0') return NULL; itemize_state_init (&state, context, text, base_dir, start_index, length, attrs, cached_iter, desc); do itemize_state_process_run (&state); while (itemize_state_next (&state)); itemize_state_finish (&state); items = g_list_reverse (state.result); /* Compute the char offset for each item */ char_offset = 0; for (GList *l = items; l; l = l->next) { PangoItemPrivate *item = l->data; item->char_offset = char_offset; char_offset += item->num_chars; } return items; } /** * pango_itemize_with_base_dir: * @context: a structure holding information that affects * the itemization process. * @base_dir: base direction to use for bidirectional processing * @text: the text to itemize. * @start_index: first byte in @text to process * @length: the number of bytes (not characters) to process * after @start_index. This must be >= 0. * @attrs: the set of attributes that apply to @text. * @cached_iter: (nullable): Cached attribute iterator * * Like `pango_itemize()`, but with an explicitly specified base direction. * * The base direction is used when computing bidirectional levels. * (see [method@Pango.Context.set_base_dir]). [func@itemize] gets the * base direction from the `PangoContext`. * * Return value: (transfer full) (element-type Pango.Item): a `GList` of * [struct@Pango.Item] structures. The items should be freed using * [method@Pango.Item.free] probably in combination with g_list_free_full(). * * Since: 1.4 */ GList * pango_itemize_with_base_dir (PangoContext *context, PangoDirection base_dir, const char *text, int start_index, int length, PangoAttrList *attrs, PangoAttrIterator *cached_iter) { g_return_val_if_fail (context != NULL, NULL); g_return_val_if_fail (start_index >= 0, NULL); g_return_val_if_fail (length >= 0, NULL); g_return_val_if_fail (length == 0 || text != NULL, NULL); return pango_itemize_with_font (context, base_dir, text, start_index, length, attrs, cached_iter, NULL); } /** * pango_itemize: * @context: a structure holding information that affects * the itemization process. * @text: the text to itemize. Must be valid UTF-8 * @start_index: first byte in @text to process * @length: the number of bytes (not characters) to process * after @start_index. This must be >= 0. * @attrs: the set of attributes that apply to @text. * @cached_iter: (nullable): Cached attribute iterator * * Breaks a piece of text into segments with consistent directional * level and font. * * Each byte of @text will be contained in exactly one of the items in the * returned list; the generated list of items will be in logical order (the * start offsets of the items are ascending). * * @cached_iter should be an iterator over @attrs currently positioned * at a range before or containing @start_index; @cached_iter will be * advanced to the range covering the position just after * @start_index + @length. (i.e. if itemizing in a loop, just keep passing * in the same @cached_iter). * * Return value: (transfer full) (element-type Pango.Item): a `GList` of * [struct@Pango.Item] structures. The items should be freed using * [method@Pango.Item.free] probably in combination with g_list_free_full(). */ GList * pango_itemize (PangoContext *context, const char *text, int start_index, int length, PangoAttrList *attrs, PangoAttrIterator *cached_iter) { g_return_val_if_fail (context != NULL, NULL); g_return_val_if_fail (start_index >= 0, NULL); g_return_val_if_fail (length >= 0, NULL); g_return_val_if_fail (length == 0 || text != NULL, NULL); return pango_itemize_with_font (context, context->base_dir, text, start_index, length, attrs, cached_iter, NULL); } /* }}} */ /* vim:set foldmethod=marker expandtab: */