diff options
author | Matthias Clasen <mclasen@redhat.com> | 2021-08-20 11:17:26 -0400 |
---|---|---|
committer | Matthias Clasen <mclasen@redhat.com> | 2021-08-20 12:18:28 -0400 |
commit | 2573ad950a7a6cfb5c5e6da466adbdaeb356857a (patch) | |
tree | d47e628432e3f128c1627d4ad79a1dd2bf80fb9a | |
parent | d1d3be80056ae846589389d46544a072e337020a (diff) | |
download | pango-2573ad950a7a6cfb5c5e6da466adbdaeb356857a.tar.gz |
Some code reorg
Split the itemization code into its own file,
and move things around a bit.
-rw-r--r-- | pango/itemize.c | 1151 | ||||
-rw-r--r-- | pango/meson.build | 1 | ||||
-rw-r--r-- | pango/pango-attributes-private.h | 2 | ||||
-rw-r--r-- | pango/pango-attributes.c | 22 | ||||
-rw-r--r-- | pango/pango-context-private.h | 63 | ||||
-rw-r--r-- | pango/pango-context.c | 1153 | ||||
-rw-r--r-- | pango/pango-item.h | 3 |
7 files changed, 1244 insertions, 1151 deletions
diff --git a/pango/itemize.c b/pango/itemize.c new file mode 100644 index 00000000..27a9865d --- /dev/null +++ b/pango/itemize.c @@ -0,0 +1,1151 @@ +/* Pango + * itemize.c: Turning text into items + * + * Copyright (C) 2000, 2006 Red Hat Software + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +#include "config.h" +#include <string.h> +#include <stdlib.h> + +#include "pango-context-private.h" +#include "pango-impl-utils.h" + +#include "pango-font-private.h" +#include "pango-fontset-private.h" +#include "pango-fontmap-private.h" +#include "pango-script-private.h" +#include "pango-emoji-private.h" +#include "pango-attributes-private.h" + + +/* {{{ Font cache */ + +/* + * We cache the results of character,fontset => font in a hash table + */ + +typedef struct { + GHashTable *hash; +} FontCache; + +typedef struct { + PangoFont *font; + int position; /* position of the font in the fontset */ +} FontElement; + +static void +font_cache_destroy (FontCache *cache) +{ + g_hash_table_destroy (cache->hash); + g_slice_free (FontCache, cache); +} + +static void +font_element_destroy (FontElement *element) +{ + if (element->font) + g_object_unref (element->font); + g_slice_free (FontElement, element); +} + +static FontCache * +get_font_cache (PangoFontset *fontset) +{ + FontCache *cache; + + static GQuark cache_quark = 0; /* MT-safe */ + if (G_UNLIKELY (!cache_quark)) + cache_quark = g_quark_from_static_string ("pango-font-cache"); + +retry: + cache = g_object_get_qdata (G_OBJECT (fontset), cache_quark); + if (G_UNLIKELY (!cache)) + { + cache = g_slice_new (FontCache); + cache->hash = g_hash_table_new_full (g_direct_hash, NULL, + NULL, (GDestroyNotify)font_element_destroy); + if (!g_object_replace_qdata (G_OBJECT (fontset), cache_quark, NULL, + cache, (GDestroyNotify)font_cache_destroy, + NULL)) + { + font_cache_destroy (cache); + goto retry; + } + } + + return cache; +} + +static gboolean +font_cache_get (FontCache *cache, + gunichar wc, + PangoFont **font, + int *position) +{ + FontElement *element; + + element = g_hash_table_lookup (cache->hash, GUINT_TO_POINTER (wc)); + if (element) + { + *font = element->font; + *position = element->position; + return TRUE; + } + else + return FALSE; +} + +static void +font_cache_insert (FontCache *cache, + gunichar wc, + PangoFont *font, + int position) +{ + FontElement *element = g_slice_new (FontElement); + element->font = font ? g_object_ref (font) : NULL; + element->position = position; + + g_hash_table_insert (cache->hash, GUINT_TO_POINTER (wc), element); +} + +/* }}} */ +/* {{{ Width Iter */ + +typedef struct _PangoWidthIter PangoWidthIter; + +struct _PangoWidthIter +{ + const gchar *text_start; + const gchar *text_end; + const gchar *start; + const gchar *end; + gboolean upright; +}; + +static gboolean +width_iter_is_upright (gunichar ch) +{ + /* https://www.unicode.org/Public/11.0.0/ucd/VerticalOrientation.txt + * VO=U or Tu table generated by tools/gen-vertical-orientation-U-table.py. + * + * FIXME: In the future, If GLib supports VerticalOrientation, please use it. + */ + static const gunichar upright[][2] = { + {0x00A7, 0x00A7}, {0x00A9, 0x00A9}, {0x00AE, 0x00AE}, {0x00B1, 0x00B1}, + {0x00BC, 0x00BE}, {0x00D7, 0x00D7}, {0x00F7, 0x00F7}, {0x02EA, 0x02EB}, + {0x1100, 0x11FF}, {0x1401, 0x167F}, {0x18B0, 0x18FF}, {0x2016, 0x2016}, + {0x2020, 0x2021}, {0x2030, 0x2031}, {0x203B, 0x203C}, {0x2042, 0x2042}, + {0x2047, 0x2049}, {0x2051, 0x2051}, {0x2065, 0x2065}, {0x20DD, 0x20E0}, + {0x20E2, 0x20E4}, {0x2100, 0x2101}, {0x2103, 0x2109}, {0x210F, 0x210F}, + {0x2113, 0x2114}, {0x2116, 0x2117}, {0x211E, 0x2123}, {0x2125, 0x2125}, + {0x2127, 0x2127}, {0x2129, 0x2129}, {0x212E, 0x212E}, {0x2135, 0x213F}, + {0x2145, 0x214A}, {0x214C, 0x214D}, {0x214F, 0x2189}, {0x218C, 0x218F}, + {0x221E, 0x221E}, {0x2234, 0x2235}, {0x2300, 0x2307}, {0x230C, 0x231F}, + {0x2324, 0x2328}, {0x232B, 0x232B}, {0x237D, 0x239A}, {0x23BE, 0x23CD}, + {0x23CF, 0x23CF}, {0x23D1, 0x23DB}, {0x23E2, 0x2422}, {0x2424, 0x24FF}, + {0x25A0, 0x2619}, {0x2620, 0x2767}, {0x2776, 0x2793}, {0x2B12, 0x2B2F}, + {0x2B50, 0x2B59}, {0x2BB8, 0x2BD1}, {0x2BD3, 0x2BEB}, {0x2BF0, 0x2BFF}, + {0x2E80, 0x3007}, {0x3012, 0x3013}, {0x3020, 0x302F}, {0x3031, 0x309F}, + {0x30A1, 0x30FB}, {0x30FD, 0xA4CF}, {0xA960, 0xA97F}, {0xAC00, 0xD7FF}, + {0xE000, 0xFAFF}, {0xFE10, 0xFE1F}, {0xFE30, 0xFE48}, {0xFE50, 0xFE57}, + {0xFE5F, 0xFE62}, {0xFE67, 0xFE6F}, {0xFF01, 0xFF07}, {0xFF0A, 0xFF0C}, + {0xFF0E, 0xFF19}, {0xFF1F, 0xFF3A}, {0xFF3C, 0xFF3C}, {0xFF3E, 0xFF3E}, + {0xFF40, 0xFF5A}, {0xFFE0, 0xFFE2}, {0xFFE4, 0xFFE7}, {0xFFF0, 0xFFF8}, + {0xFFFC, 0xFFFD}, {0x10980, 0x1099F}, {0x11580, 0x115FF}, {0x11A00, 0x11AAF}, + {0x13000, 0x1342F}, {0x14400, 0x1467F}, {0x16FE0, 0x18AFF}, {0x1B000, 0x1B12F}, + {0x1B170, 0x1B2FF}, {0x1D000, 0x1D1FF}, {0x1D2E0, 0x1D37F}, {0x1D800, 0x1DAAF}, + {0x1F000, 0x1F7FF}, {0x1F900, 0x1FA6F}, {0x20000, 0x2FFFD}, {0x30000, 0x3FFFD}, + {0xF0000, 0xFFFFD}, {0x100000, 0x10FFFD} + }; + static const int max = sizeof(upright) / sizeof(upright[0]); + int st = 0; + int ed = max; + + if (ch < upright[0][0]) + return FALSE; + + while (st <= ed) + { + int mid = (st + ed) / 2; + if (upright[mid][0] <= ch && ch <= upright[mid][1]) + return TRUE; + else + if (upright[mid][0] <= ch) + st = mid + 1; + else + ed = mid - 1; + } + + return FALSE; +} + +static void +width_iter_next (PangoWidthIter *iter) +{ + gboolean met_joiner = FALSE; + iter->start = iter->end; + + if (iter->end < iter->text_end) + { + gunichar ch = g_utf8_get_char (iter->end); + iter->upright = width_iter_is_upright (ch); + } + + while (iter->end < iter->text_end) + { + gunichar ch = g_utf8_get_char (iter->end); + + /* for zero width joiner */ + if (ch == 0x200D) + { + iter->end = g_utf8_next_char (iter->end); + met_joiner = TRUE; + continue; + } + + /* ignore the upright check if met joiner */ + if (met_joiner) + { + iter->end = g_utf8_next_char (iter->end); + met_joiner = FALSE; + continue; + } + + /* for variation selector, tag and emoji modifier. */ + if (G_UNLIKELY (ch == 0xFE0EU || ch == 0xFE0FU || + (ch >= 0xE0020 && ch <= 0xE007F) || + (ch >= 0x1F3FB && ch <= 0x1F3FF))) + { + iter->end = g_utf8_next_char (iter->end); + continue; + } + + if (width_iter_is_upright (ch) != iter->upright) + break; + + iter->end = g_utf8_next_char (iter->end); + } +} + +static void +width_iter_init (PangoWidthIter *iter, + const char *text, + int length) +{ + iter->text_start = text; + iter->text_end = text + length; + iter->start = iter->end = text; + + width_iter_next (iter); +} + +static void +width_iter_fini (PangoWidthIter *iter) +{ +} + +/* }}} */ +/* {{{ Itemization */ + +typedef struct _ItemizeState ItemizeState; + + +typedef enum { + EMBEDDING_CHANGED = 1 << 0, + SCRIPT_CHANGED = 1 << 1, + LANG_CHANGED = 1 << 2, + FONT_CHANGED = 1 << 3, + DERIVED_LANG_CHANGED = 1 << 4, + WIDTH_CHANGED = 1 << 5, + EMOJI_CHANGED = 1 << 6, +} ChangedFlags; + + +struct _ItemizeState +{ + PangoContext *context; + const char *text; + const char *end; + + const char *run_start; + const char *run_end; + + GList *result; + PangoItem *item; + + guint8 *embedding_levels; + int embedding_end_offset; + const char *embedding_end; + guint8 embedding; + + PangoGravity gravity; + PangoGravityHint gravity_hint; + PangoGravity resolved_gravity; + PangoGravity font_desc_gravity; + gboolean centered_baseline; + + PangoAttrIterator *attr_iter; + gboolean free_attr_iter; + const char *attr_end; + PangoFontDescription *font_desc; + PangoFontDescription *emoji_font_desc; + PangoLanguage *lang; + GSList *extra_attrs; + gboolean copy_extra_attrs; + + ChangedFlags changed; + + PangoScriptIter script_iter; + const char *script_end; + PangoScript script; + + PangoWidthIter width_iter; + PangoEmojiIter emoji_iter; + + PangoLanguage *derived_lang; + + PangoFontset *current_fonts; + FontCache *cache; + PangoFont *base_font; + gboolean enable_fallback; + + const char *first_space; /* first of a sequence of spaces we've seen */ + int font_position; /* position of the current font in the fontset */ +}; + +static void +update_embedding_end (ItemizeState *state) +{ + state->embedding = state->embedding_levels[state->embedding_end_offset]; + while (state->embedding_end < state->end && + state->embedding_levels[state->embedding_end_offset] == state->embedding) + { + state->embedding_end_offset++; + state->embedding_end = g_utf8_next_char (state->embedding_end); + } + + state->changed |= EMBEDDING_CHANGED; +} + +static PangoAttribute * +find_attribute (GSList *attr_list, + PangoAttrType type) +{ + GSList *node; + + for (node = attr_list; node; node = node->next) + if (((PangoAttribute *) node->data)->klass->type == type) + return (PangoAttribute *) node->data; + + return NULL; +} + +static void +update_attr_iterator (ItemizeState *state) +{ + PangoLanguage *old_lang; + PangoAttribute *attr; + int end_index; + + pango_attr_iterator_range (state->attr_iter, NULL, &end_index); + if (end_index < state->end - state->text) + state->attr_end = state->text + end_index; + else + state->attr_end = state->end; + + if (state->emoji_font_desc) + { + pango_font_description_free (state->emoji_font_desc); + state->emoji_font_desc = NULL; + } + + old_lang = state->lang; + if (state->font_desc) + pango_font_description_free (state->font_desc); + state->font_desc = pango_font_description_copy_static (state->context->font_desc); + pango_attr_iterator_get_font (state->attr_iter, state->font_desc, + &state->lang, &state->extra_attrs); + if (pango_font_description_get_set_fields (state->font_desc) & PANGO_FONT_MASK_GRAVITY) + state->font_desc_gravity = pango_font_description_get_gravity (state->font_desc); + else + state->font_desc_gravity = PANGO_GRAVITY_AUTO; + + state->copy_extra_attrs = FALSE; + + if (!state->lang) + state->lang = state->context->language; + + attr = find_attribute (state->extra_attrs, PANGO_ATTR_FALLBACK); + state->enable_fallback = (attr == NULL || ((PangoAttrInt *)attr)->value); + + attr = find_attribute (state->extra_attrs, PANGO_ATTR_GRAVITY); + state->gravity = attr == NULL ? PANGO_GRAVITY_AUTO : ((PangoAttrInt *)attr)->value; + + attr = find_attribute (state->extra_attrs, PANGO_ATTR_GRAVITY_HINT); + state->gravity_hint = attr == NULL ? state->context->gravity_hint : (PangoGravityHint)((PangoAttrInt *)attr)->value; + + state->changed |= FONT_CHANGED; + if (state->lang != old_lang) + state->changed |= LANG_CHANGED; +} + +static void +update_end (ItemizeState *state) +{ + state->run_end = state->embedding_end; + if (state->attr_end < state->run_end) + state->run_end = state->attr_end; + if (state->script_end < state->run_end) + state->run_end = state->script_end; + if (state->width_iter.end < state->run_end) + state->run_end = state->width_iter.end; + if (state->emoji_iter.end < state->run_end) + state->run_end = state->emoji_iter.end; +} + + +static void +itemize_state_init (ItemizeState *state, + PangoContext *context, + const char *text, + PangoDirection base_dir, + int start_index, + int length, + PangoAttrList *attrs, + PangoAttrIterator *cached_iter, + const PangoFontDescription *desc) +{ + state->context = context; + state->text = text; + state->end = text + start_index + length; + + state->result = NULL; + state->item = NULL; + + state->run_start = text + start_index; + state->changed = EMBEDDING_CHANGED | SCRIPT_CHANGED | LANG_CHANGED | + FONT_CHANGED | WIDTH_CHANGED | EMOJI_CHANGED; + + /* First, apply the bidirectional algorithm to break + * the text into directional runs. + */ + state->embedding_levels = pango_log2vis_get_embedding_levels (text + start_index, length, &base_dir); + + state->embedding_end_offset = 0; + state->embedding_end = text + start_index; + update_embedding_end (state); + + state->gravity = PANGO_GRAVITY_AUTO; + state->centered_baseline = PANGO_GRAVITY_IS_VERTICAL (state->context->resolved_gravity); + state->gravity_hint = state->context->gravity_hint; + state->resolved_gravity = PANGO_GRAVITY_AUTO; + + /* Initialize the attribute iterator + */ + if (cached_iter) + { + state->attr_iter = cached_iter; + state->free_attr_iter = FALSE; + } + else if (attrs) + { + state->attr_iter = pango_attr_list_get_iterator (attrs); + state->free_attr_iter = TRUE; + } + else + { + state->attr_iter = NULL; + state->free_attr_iter = FALSE; + } + + state->emoji_font_desc = NULL; + if (state->attr_iter) + { + state->font_desc = NULL; + state->lang = NULL; + + pango_attr_iterator_advance (state->attr_iter, start_index); + update_attr_iterator (state); + } + else + { + state->font_desc = pango_font_description_copy_static (desc ? desc : state->context->font_desc); + state->lang = state->context->language; + state->extra_attrs = NULL; + state->copy_extra_attrs = FALSE; + + state->attr_end = state->end; + state->enable_fallback = TRUE; + } + + /* Initialize the script iterator + */ + _pango_script_iter_init (&state->script_iter, text + start_index, length); + pango_script_iter_get_range (&state->script_iter, NULL, + &state->script_end, &state->script); + + width_iter_init (&state->width_iter, text + start_index, length); + _pango_emoji_iter_init (&state->emoji_iter, text + start_index, length); + + if (!PANGO_GRAVITY_IS_VERTICAL (state->context->resolved_gravity)) + state->width_iter.end = state->end; + else + if (state->emoji_iter.is_emoji) + state->width_iter.end = MAX (state->width_iter.end, state->emoji_iter.end); + + update_end (state); + + if (pango_font_description_get_set_fields (state->font_desc) & PANGO_FONT_MASK_GRAVITY) + state->font_desc_gravity = pango_font_description_get_gravity (state->font_desc); + else + state->font_desc_gravity = PANGO_GRAVITY_AUTO; + + state->derived_lang = NULL; + state->current_fonts = NULL; + state->cache = NULL; + state->base_font = NULL; + state->first_space = NULL; + state->font_position = 0xffff; +} + +static gboolean +itemize_state_next (ItemizeState *state) +{ + if (state->run_end == state->end) + return FALSE; + + state->changed = 0; + + state->run_start = state->run_end; + + if (state->run_end == state->embedding_end) + { + update_embedding_end (state); + } + + if (state->run_end == state->attr_end) + { + pango_attr_iterator_next (state->attr_iter); + update_attr_iterator (state); + } + + if (state->run_end == state->script_end) + { + pango_script_iter_next (&state->script_iter); + pango_script_iter_get_range (&state->script_iter, NULL, + &state->script_end, &state->script); + state->changed |= SCRIPT_CHANGED; + } + if (state->run_end == state->emoji_iter.end) + { + _pango_emoji_iter_next (&state->emoji_iter); + state->changed |= EMOJI_CHANGED; + + if (state->emoji_iter.is_emoji) + state->width_iter.end = MAX (state->width_iter.end, state->emoji_iter.end); + } + if (state->run_end == state->width_iter.end) + { + width_iter_next (&state->width_iter); + state->changed |= WIDTH_CHANGED; + } + + update_end (state); + + return TRUE; +} + +static GSList * +copy_attr_slist (GSList *attr_slist) +{ + GSList *new_list = NULL; + GSList *l; + + for (l = attr_slist; l; l = l->next) + new_list = g_slist_prepend (new_list, pango_attribute_copy (l->data)); + + return g_slist_reverse (new_list); +} + +static void +itemize_state_fill_font (ItemizeState *state, + PangoFont *font) +{ + GList *l; + + for (l = state->result; l; l = l->next) + { + PangoItem *item = l->data; + if (item->analysis.font) + break; + if (font) + item->analysis.font = g_object_ref (font); + } +} + +static void +itemize_state_add_character (ItemizeState *state, + PangoFont *font, + int font_position, + gboolean force_break, + const char *pos, + gboolean is_space) +{ + const char *first_space = state->first_space; + int n_spaces = 0; + + if (is_space) + { + if (state->first_space == NULL) + state->first_space = pos; + } + else + state->first_space = NULL; + + if (state->item) + { + if (!state->item->analysis.font && font) + { + itemize_state_fill_font (state, font); + state->font_position = font_position; + } + else if (state->item->analysis.font && !font) + { + font = state->item->analysis.font; + font_position = state->font_position; + } + + if (!force_break && + state->item->analysis.font == font) + { + state->item->num_chars++; + return; + } + + /* Font is changing, we are about to end the current item. + * If it ended in a sequence of spaces (but wasn't only spaces), + * check if we should move those spaces to the new item (since + * the font is less "fallback". + * + * See https://gitlab.gnome.org/GNOME/pango/-/issues/249 + */ + if (state->text + state->item->offset < first_space && + font_position < state->font_position) + { + n_spaces = g_utf8_strlen (first_space, pos - first_space); + state->item->num_chars -= n_spaces; + pos = first_space; + } + + state->item->length = (pos - state->text) - state->item->offset; + } + + state->item = pango_item_new (); + state->item->offset = pos - state->text; + state->item->length = 0; + state->item->num_chars = n_spaces + 1; + + if (font) + g_object_ref (font); + state->item->analysis.font = font; + state->font_position = font_position; + + state->item->analysis.level = state->embedding; + state->item->analysis.gravity = state->resolved_gravity; + + /* The level vs. gravity dance: + * - If gravity is SOUTH, leave level untouched. + * - If gravity is NORTH, step level one up, to + * not get mirrored upside-down text. + * - If gravity is EAST, step up to an even level, as + * it's a clockwise-rotated layout, so the rotated + * top is unrotated left. + * - If gravity is WEST, step up to an odd level, as + * it's a counter-clockwise-rotated layout, so the rotated + * top is unrotated right. + * + * A similar dance is performed in pango-layout.c: + * line_set_resolved_dir(). Keep in synch. + */ + switch (state->item->analysis.gravity) + { + case PANGO_GRAVITY_SOUTH: + default: + break; + case PANGO_GRAVITY_NORTH: + state->item->analysis.level++; + break; + case PANGO_GRAVITY_EAST: + state->item->analysis.level += 1; + state->item->analysis.level &= ~1; + break; + case PANGO_GRAVITY_WEST: + state->item->analysis.level |= 1; + break; + } + + state->item->analysis.flags = state->centered_baseline ? PANGO_ANALYSIS_FLAG_CENTERED_BASELINE : 0; + + state->item->analysis.script = state->script; + state->item->analysis.language = state->derived_lang; + + if (state->copy_extra_attrs) + { + state->item->analysis.extra_attrs = copy_attr_slist (state->extra_attrs); + } + else + { + state->item->analysis.extra_attrs = state->extra_attrs; + state->copy_extra_attrs = TRUE; + } + + state->result = g_list_prepend (state->result, state->item); +} + +typedef struct { + PangoLanguage *lang; + gunichar wc; + PangoFont *font; + int position; +} GetFontInfo; + +static gboolean +get_font_foreach (PangoFontset *fontset, + PangoFont *font, + gpointer data) +{ + GetFontInfo *info = data; + + if (G_UNLIKELY (!font)) + return FALSE; + + if (pango_font_has_char (font, info->wc)) + { + info->font = font; + return TRUE; + } + + if (!fontset) + { + info->font = font; + return TRUE; + } + + info->position++; + + return FALSE; +} + +static PangoFont * +get_base_font (ItemizeState *state) +{ + if (!state->base_font) + state->base_font = pango_font_map_load_font (state->context->font_map, + state->context, + state->font_desc); + return state->base_font; +} + +static gboolean +get_font (ItemizeState *state, + gunichar wc, + PangoFont **font, + int *position) +{ + GetFontInfo info; + + /* We'd need a separate cache when fallback is disabled, but since lookup + * with fallback disabled is faster anyways, we just skip caching */ + if (state->enable_fallback && font_cache_get (state->cache, wc, font, position)) + return TRUE; + + info.lang = state->derived_lang; + info.wc = wc; + info.font = NULL; + info.position = 0; + + if (state->enable_fallback) + pango_fontset_foreach (state->current_fonts, get_font_foreach, &info); + else + get_font_foreach (NULL, get_base_font (state), &info); + + *font = info.font; + *position = info.position; + + /* skip caching if fallback disabled (see above) */ + if (state->enable_fallback) + font_cache_insert (state->cache, wc, *font, *position); + + return TRUE; +} + +static PangoLanguage * +compute_derived_language (PangoLanguage *lang, + PangoScript script) +{ + PangoLanguage *derived_lang; + + /* Make sure the language tag is consistent with the derived + * script. There is no point in marking up a section of + * Arabic text with the "en" language tag. + */ + if (lang && pango_language_includes_script (lang, script)) + derived_lang = lang; + else + { + derived_lang = pango_script_get_sample_language (script); + /* If we don't find a sample language for the script, we + * use a language tag that shouldn't actually be used + * anywhere. This keeps fontconfig (for the PangoFc* + * backend) from using the language tag to affect the + * sort order. I don't have a reference for 'xx' being + * safe here, though Keith Packard claims it is. + */ + if (!derived_lang) + derived_lang = pango_language_from_string ("xx"); + } + + return derived_lang; +} + +static void +itemize_state_update_for_new_run (ItemizeState *state) +{ + /* This block should be moved to update_attr_iterator, but I'm too lazy to + * do it right now */ + if (state->changed & (FONT_CHANGED | SCRIPT_CHANGED | WIDTH_CHANGED)) + { + /* Font-desc gravity overrides everything */ + if (state->font_desc_gravity != PANGO_GRAVITY_AUTO) + { + state->resolved_gravity = state->font_desc_gravity; + } + else + { + PangoGravity gravity = state->gravity; + PangoGravityHint gravity_hint = state->gravity_hint; + + if (G_LIKELY (gravity == PANGO_GRAVITY_AUTO)) + gravity = state->context->resolved_gravity; + + state->resolved_gravity = pango_gravity_get_for_script_and_width (state->script, + state->width_iter.upright, + gravity, + gravity_hint); + } + + if (state->font_desc_gravity != state->resolved_gravity) + { + pango_font_description_set_gravity (state->font_desc, state->resolved_gravity); + state->changed |= FONT_CHANGED; + } + } + + if (state->changed & (SCRIPT_CHANGED | LANG_CHANGED)) + { + PangoLanguage *old_derived_lang = state->derived_lang; + state->derived_lang = compute_derived_language (state->lang, state->script); + if (old_derived_lang != state->derived_lang) + state->changed |= DERIVED_LANG_CHANGED; + } + + if (state->changed & (EMOJI_CHANGED)) + { + state->changed |= FONT_CHANGED; + } + + if (state->changed & (FONT_CHANGED | DERIVED_LANG_CHANGED) && + state->current_fonts) + { + g_object_unref (state->current_fonts); + state->current_fonts = NULL; + state->cache = NULL; + } + + if (!state->current_fonts) + { + gboolean is_emoji = state->emoji_iter.is_emoji; + if (is_emoji && !state->emoji_font_desc) + { + state->emoji_font_desc = pango_font_description_copy_static (state->font_desc); + pango_font_description_set_family_static (state->emoji_font_desc, "emoji"); + } + state->current_fonts = pango_font_map_load_fontset (state->context->font_map, + state->context, + is_emoji ? state->emoji_font_desc : state->font_desc, + state->derived_lang); + state->cache = get_font_cache (state->current_fonts); + } + + if ((state->changed & FONT_CHANGED) && state->base_font) + { + g_object_unref (state->base_font); + state->base_font = NULL; + } +} + +static void +itemize_state_process_run (ItemizeState *state) +{ + const char *p; + gboolean last_was_forced_break = FALSE; + gboolean is_space; + + /* Only one character has type G_UNICODE_LINE_SEPARATOR in Unicode 4.0; + * update this if that changes. */ +#define LINE_SEPARATOR 0x2028 + + itemize_state_update_for_new_run (state); + + /* We should never get an empty run */ + g_assert (state->run_end != state->run_start); + + for (p = state->run_start; + p < state->run_end; + p = g_utf8_next_char (p)) + { + gunichar wc = g_utf8_get_char (p); + gboolean is_forced_break = (wc == '\t' || wc == LINE_SEPARATOR); + PangoFont *font; + int font_position; + GUnicodeType type; + + /* We don't want space characters to affect font selection; in general, + * it's always wrong to select a font just to render a space. + * We assume that all fonts have the ASCII space, and for other space + * characters if they don't, HarfBuzz will compatibility-decompose them + * to ASCII space... + * See bugs #355987 and #701652. + * + * We don't want to change fonts just for variation selectors. + * See bug #781123. + * + * Finally, don't change fonts for line or paragraph separators. + * + * Note that we want spaces to use the 'better' font, comparing + * the font that is used before and after the space. This is handled + * in itemize_state_add_character(). + */ + type = g_unichar_type (wc); + if (G_UNLIKELY (type == G_UNICODE_CONTROL || + type == G_UNICODE_FORMAT || + type == G_UNICODE_SURROGATE || + type == G_UNICODE_LINE_SEPARATOR || + type == G_UNICODE_PARAGRAPH_SEPARATOR || + (type == G_UNICODE_SPACE_SEPARATOR && wc != 0x1680u /* OGHAM SPACE MARK */) || + (wc >= 0xfe00u && wc <= 0xfe0fu) || + (wc >= 0xe0100u && wc <= 0xe01efu))) + { + font = NULL; + font_position = 0xffff; + is_space = TRUE; + } + else + { + get_font (state, wc, &font, &font_position); + is_space = FALSE; + } + + itemize_state_add_character (state, font, font_position, + is_forced_break || last_was_forced_break, + p, + is_space); + + last_was_forced_break = is_forced_break; + } + + /* Finish the final item from the current segment */ + state->item->length = (p - state->text) - state->item->offset; + if (!state->item->analysis.font) + { + PangoFont *font; + int position; + + if (G_UNLIKELY (!get_font (state, ' ', &font, &position))) + { + /* If no font was found, warn once per fontmap/script pair */ + PangoFontMap *fontmap = state->context->font_map; + char *script_tag = g_strdup_printf ("g-unicode-script-%d", state->script); + + if (!g_object_get_data (G_OBJECT (fontmap), script_tag)) + { + g_warning ("failed to choose a font, expect ugly output. script='%d'", + state->script); + + g_object_set_data_full (G_OBJECT (fontmap), script_tag, + GINT_TO_POINTER (1), NULL); + } + + g_free (script_tag); + + font = NULL; + } + itemize_state_fill_font (state, font); + } + state->item = NULL; +} + +static void +itemize_state_finish (ItemizeState *state) +{ + g_free (state->embedding_levels); + if (state->free_attr_iter) + pango_attr_iterator_destroy (state->attr_iter); + _pango_script_iter_fini (&state->script_iter); + pango_font_description_free (state->font_desc); + pango_font_description_free (state->emoji_font_desc); + width_iter_fini (&state->width_iter); + _pango_emoji_iter_fini (&state->emoji_iter); + + if (state->current_fonts) + g_object_unref (state->current_fonts); + if (state->base_font) + g_object_unref (state->base_font); +} +/* }}} */ +/* {{{ Public API */ + +/* Like pango_itemize, but takes a font description */ +GList * +pango_itemize_with_font (PangoContext *context, + const char *text, + int start_index, + int length, + const PangoFontDescription *desc) +{ + ItemizeState state; + + if (length == 0) + return NULL; + + itemize_state_init (&state, context, text, context->base_dir, start_index, length, + NULL, NULL, desc); + + do + itemize_state_process_run (&state); + while (itemize_state_next (&state)); + + itemize_state_finish (&state); + + return g_list_reverse (state.result); +} + +/** + * pango_itemize_with_base_dir: + * @context: a structure holding information that affects + * the itemization process. + * @base_dir: base direction to use for bidirectional processing + * @text: the text to itemize. + * @start_index: first byte in @text to process + * @length: the number of bytes (not characters) to process + * after @start_index. This must be >= 0. + * @attrs: the set of attributes that apply to @text. + * @cached_iter: (nullable): Cached attribute iterator + * + * Like `pango_itemize()`, but with an explicitly specified base direction. + * + * The base direction is used when computing bidirectional levels. + * (see [method@Pango.Context.set_base_dir]). [func@itemize] gets the + * base direction from the `PangoContext`. + * + * Return value: (transfer full) (element-type Pango.Item): a `GList` of + * [struct@Pango.Item] structures. The items should be freed using + * [method@Pango.Item.free] probably in combination with g_list_free_full(). + * + * Since: 1.4 + */ +GList * +pango_itemize_with_base_dir (PangoContext *context, + PangoDirection base_dir, + const char *text, + int start_index, + int length, + PangoAttrList *attrs, + PangoAttrIterator *cached_iter) +{ + ItemizeState state; + + g_return_val_if_fail (context != NULL, NULL); + g_return_val_if_fail (start_index >= 0, NULL); + g_return_val_if_fail (length >= 0, NULL); + g_return_val_if_fail (length == 0 || text != NULL, NULL); + + if (length == 0 || g_utf8_get_char (text + start_index) == '\0') + return NULL; + + itemize_state_init (&state, context, text, base_dir, start_index, length, + attrs, cached_iter, NULL); + + do + itemize_state_process_run (&state); + while (itemize_state_next (&state)); + + itemize_state_finish (&state); + + return g_list_reverse (state.result); +} + +/** + * pango_itemize: + * @context: a structure holding information that affects + * the itemization process. + * @text: the text to itemize. Must be valid UTF-8 + * @start_index: first byte in @text to process + * @length: the number of bytes (not characters) to process + * after @start_index. This must be >= 0. + * @attrs: the set of attributes that apply to @text. + * @cached_iter: (nullable): Cached attribute iterator + * + * Breaks a piece of text into segments with consistent directional + * level and font. + * + * Each byte of @text will be contained in exactly one of the items in the + * returned list; the generated list of items will be in logical order (the + * start offsets of the items are ascending). + * + * @cached_iter should be an iterator over @attrs currently positioned + * at a range before or containing @start_index; @cached_iter will be + * advanced to the range covering the position just after + * @start_index + @length. (i.e. if itemizing in a loop, just keep passing + * in the same @cached_iter). + * + * Return value: (transfer full) (element-type Pango.Item): a `GList` of + * [struct@Pango.Item] structures. The items should be freed using + * [method@Pango.Item.free] probably in combination with g_list_free_full(). + */ +GList * +pango_itemize (PangoContext *context, + const char *text, + int start_index, + int length, + PangoAttrList *attrs, + PangoAttrIterator *cached_iter) +{ + g_return_val_if_fail (context != NULL, NULL); + g_return_val_if_fail (start_index >= 0, NULL); + g_return_val_if_fail (length >= 0, NULL); + g_return_val_if_fail (length == 0 || text != NULL, NULL); + + return pango_itemize_with_base_dir (context, context->base_dir, + text, start_index, length, attrs, cached_iter); +} + +/* }}} */ + +/* vim:set foldmethod=marker expandtab: */ diff --git a/pango/meson.build b/pango/meson.build index 9e1d8d93..b97809a1 100644 --- a/pango/meson.build +++ b/pango/meson.build @@ -3,6 +3,7 @@ pango_sources = [ 'ellipsize.c', 'fonts.c', 'glyphstring.c', + 'itemize.c', 'modules.c', 'pango-attributes.c', 'pango-bidi-type.c', diff --git a/pango/pango-attributes-private.h b/pango/pango-attributes-private.h index f7384fd0..4c427695 100644 --- a/pango/pango-attributes-private.h +++ b/pango/pango-attributes-private.h @@ -46,6 +46,8 @@ void _pango_attr_list_get_iterator (PangoAttrList *list, PangoAttrIterator *iterator); void _pango_attr_iterator_destroy (PangoAttrIterator *iterator); +gboolean pango_attr_iterator_advance (PangoAttrIterator *iterator, + int index); #endif diff --git a/pango/pango-attributes.c b/pango/pango-attributes.c index fdb37f56..4a141792 100644 --- a/pango/pango-attributes.c +++ b/pango/pango-attributes.c @@ -2549,6 +2549,28 @@ pango_attr_iterator_get_attrs (PangoAttrIterator *iterator) return attrs; } +gboolean +pango_attr_iterator_advance (PangoAttrIterator *iterator, + int index) +{ + int start_range, end_range; + + pango_attr_iterator_range (iterator, &start_range, &end_range); + + while (index >= end_range) + { + if (!pango_attr_iterator_next (iterator)) + return FALSE; + pango_attr_iterator_range (iterator, &start_range, &end_range); + } + + if (start_range > index) + g_warning ("pango_attr_iterator_advance(): iterator had already " + "moved beyond the index"); + + return TRUE; +} + /** * pango_attribute_as_int: diff --git a/pango/pango-context-private.h b/pango/pango-context-private.h new file mode 100644 index 00000000..240c07d4 --- /dev/null +++ b/pango/pango-context-private.h @@ -0,0 +1,63 @@ +/* Pango + * pango-context-private.h: Internal structures of PangoContext + * + * Copyright (C) 2004 Red Hat Software + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +#ifndef __PANGO_CONTEXT_PRIVATE_H__ +#define __PANGO_CONTEXT_PRIVATE_H__ + +#include <pango/pango-context.h> + +G_BEGIN_DECLS + +struct _PangoContext +{ + GObject parent_instance; + guint serial; + guint fontmap_serial; + + PangoLanguage *set_language; + PangoLanguage *language; + PangoDirection base_dir; + PangoGravity base_gravity; + PangoGravity resolved_gravity; + PangoGravityHint gravity_hint; + + PangoFontDescription *font_desc; + + PangoMatrix *matrix; + + PangoFontMap *font_map; + + PangoFontMetrics *metrics; + + gboolean round_glyph_positions; +}; + +GList * pango_itemize_with_font (PangoContext *context, + const char *text, + int start_index, + int length, + const PangoFontDescription *desc); + + +G_END_DECLS + +#endif /* __PANGO_CONTEXT_PRIVATE_H__ */ + diff --git a/pango/pango-context.c b/pango/pango-context.c index 2cb3304c..d76b0ae2 100644 --- a/pango/pango-context.c +++ b/pango/pango-context.c @@ -24,6 +24,7 @@ #include <stdlib.h> #include "pango-context.h" +#include "pango-context-private.h" #include "pango-impl-utils.h" #include "pango-font-private.h" @@ -44,29 +45,6 @@ * * To obtain a `PangoContext`, use [method@Pango.FontMap.create_context]. */ -struct _PangoContext -{ - GObject parent_instance; - guint serial; - guint fontmap_serial; - - PangoLanguage *set_language; - PangoLanguage *language; - PangoDirection base_dir; - PangoGravity base_gravity; - PangoGravity resolved_gravity; - PangoGravityHint gravity_hint; - - PangoFontDescription *font_desc; - - PangoMatrix *matrix; - - PangoFontMap *font_map; - - PangoFontMetrics *metrics; - - gboolean round_glyph_positions; -}; struct _PangoContextClass { @@ -595,1133 +573,6 @@ pango_context_get_gravity_hint (PangoContext *context) return context->gravity_hint; } -/**********************************************************************/ - -static gboolean -advance_attr_iterator_to (PangoAttrIterator *iterator, - int start_index) -{ - int start_range, end_range; - - pango_attr_iterator_range (iterator, &start_range, &end_range); - - while (start_index >= end_range) - { - if (!pango_attr_iterator_next (iterator)) - return FALSE; - pango_attr_iterator_range (iterator, &start_range, &end_range); - } - - if (start_range > start_index) - g_warning ("In pango_itemize(), the cached iterator passed in " - "had already moved beyond the start_index"); - - return TRUE; -} - -/*************************************************************************** - * We cache the results of character,fontset => font in a hash table - ***************************************************************************/ - -typedef struct { - GHashTable *hash; -} FontCache; - -typedef struct { - PangoFont *font; - int position; /* position of the font in the fontset */ -} FontElement; - -static void -font_cache_destroy (FontCache *cache) -{ - g_hash_table_destroy (cache->hash); - g_slice_free (FontCache, cache); -} - -static void -font_element_destroy (FontElement *element) -{ - if (element->font) - g_object_unref (element->font); - g_slice_free (FontElement, element); -} - -static FontCache * -get_font_cache (PangoFontset *fontset) -{ - FontCache *cache; - - static GQuark cache_quark = 0; /* MT-safe */ - if (G_UNLIKELY (!cache_quark)) - cache_quark = g_quark_from_static_string ("pango-font-cache"); - -retry: - cache = g_object_get_qdata (G_OBJECT (fontset), cache_quark); - if (G_UNLIKELY (!cache)) - { - cache = g_slice_new (FontCache); - cache->hash = g_hash_table_new_full (g_direct_hash, NULL, - NULL, (GDestroyNotify)font_element_destroy); - if (!g_object_replace_qdata (G_OBJECT (fontset), cache_quark, NULL, - cache, (GDestroyNotify)font_cache_destroy, - NULL)) - { - font_cache_destroy (cache); - goto retry; - } - } - - return cache; -} - -static gboolean -font_cache_get (FontCache *cache, - gunichar wc, - PangoFont **font, - int *position) -{ - FontElement *element; - - element = g_hash_table_lookup (cache->hash, GUINT_TO_POINTER (wc)); - if (element) - { - *font = element->font; - *position = element->position; - return TRUE; - } - else - return FALSE; -} - -static void -font_cache_insert (FontCache *cache, - gunichar wc, - PangoFont *font, - int position) -{ - FontElement *element = g_slice_new (FontElement); - element->font = font ? g_object_ref (font) : NULL; - element->position = position; - - g_hash_table_insert (cache->hash, GUINT_TO_POINTER (wc), element); -} - -/**********************************************************************/ - -typedef enum { - EMBEDDING_CHANGED = 1 << 0, - SCRIPT_CHANGED = 1 << 1, - LANG_CHANGED = 1 << 2, - FONT_CHANGED = 1 << 3, - DERIVED_LANG_CHANGED = 1 << 4, - WIDTH_CHANGED = 1 << 5, - EMOJI_CHANGED = 1 << 6, -} ChangedFlags; - - - -typedef struct _PangoWidthIter PangoWidthIter; - -struct _PangoWidthIter -{ - const gchar *text_start; - const gchar *text_end; - const gchar *start; - const gchar *end; - gboolean upright; -}; - -typedef struct _ItemizeState ItemizeState; - - - -struct _ItemizeState -{ - PangoContext *context; - const char *text; - const char *end; - - const char *run_start; - const char *run_end; - - GList *result; - PangoItem *item; - - guint8 *embedding_levels; - int embedding_end_offset; - const char *embedding_end; - guint8 embedding; - - PangoGravity gravity; - PangoGravityHint gravity_hint; - PangoGravity resolved_gravity; - PangoGravity font_desc_gravity; - gboolean centered_baseline; - - PangoAttrIterator *attr_iter; - gboolean free_attr_iter; - const char *attr_end; - PangoFontDescription *font_desc; - PangoFontDescription *emoji_font_desc; - PangoLanguage *lang; - GSList *extra_attrs; - gboolean copy_extra_attrs; - - ChangedFlags changed; - - PangoScriptIter script_iter; - const char *script_end; - PangoScript script; - - PangoWidthIter width_iter; - PangoEmojiIter emoji_iter; - - PangoLanguage *derived_lang; - - PangoFontset *current_fonts; - FontCache *cache; - PangoFont *base_font; - gboolean enable_fallback; - - const char *first_space; /* first of a sequence of spaces we've seen */ - int font_position; /* position of the current font in the fontset */ -}; - -static void -update_embedding_end (ItemizeState *state) -{ - state->embedding = state->embedding_levels[state->embedding_end_offset]; - while (state->embedding_end < state->end && - state->embedding_levels[state->embedding_end_offset] == state->embedding) - { - state->embedding_end_offset++; - state->embedding_end = g_utf8_next_char (state->embedding_end); - } - - state->changed |= EMBEDDING_CHANGED; -} - -static PangoAttribute * -find_attribute (GSList *attr_list, - PangoAttrType type) -{ - GSList *node; - - for (node = attr_list; node; node = node->next) - if (((PangoAttribute *) node->data)->klass->type == type) - return (PangoAttribute *) node->data; - - return NULL; -} - -static void -update_attr_iterator (ItemizeState *state) -{ - PangoLanguage *old_lang; - PangoAttribute *attr; - int end_index; - - pango_attr_iterator_range (state->attr_iter, NULL, &end_index); - if (end_index < state->end - state->text) - state->attr_end = state->text + end_index; - else - state->attr_end = state->end; - - if (state->emoji_font_desc) - { - pango_font_description_free (state->emoji_font_desc); - state->emoji_font_desc = NULL; - } - - old_lang = state->lang; - if (state->font_desc) - pango_font_description_free (state->font_desc); - state->font_desc = pango_font_description_copy_static (state->context->font_desc); - pango_attr_iterator_get_font (state->attr_iter, state->font_desc, - &state->lang, &state->extra_attrs); - if (pango_font_description_get_set_fields (state->font_desc) & PANGO_FONT_MASK_GRAVITY) - state->font_desc_gravity = pango_font_description_get_gravity (state->font_desc); - else - state->font_desc_gravity = PANGO_GRAVITY_AUTO; - - state->copy_extra_attrs = FALSE; - - if (!state->lang) - state->lang = state->context->language; - - attr = find_attribute (state->extra_attrs, PANGO_ATTR_FALLBACK); - state->enable_fallback = (attr == NULL || ((PangoAttrInt *)attr)->value); - - attr = find_attribute (state->extra_attrs, PANGO_ATTR_GRAVITY); - state->gravity = attr == NULL ? PANGO_GRAVITY_AUTO : ((PangoAttrInt *)attr)->value; - - attr = find_attribute (state->extra_attrs, PANGO_ATTR_GRAVITY_HINT); - state->gravity_hint = attr == NULL ? state->context->gravity_hint : (PangoGravityHint)((PangoAttrInt *)attr)->value; - - state->changed |= FONT_CHANGED; - if (state->lang != old_lang) - state->changed |= LANG_CHANGED; -} - -static void -update_end (ItemizeState *state) -{ - state->run_end = state->embedding_end; - if (state->attr_end < state->run_end) - state->run_end = state->attr_end; - if (state->script_end < state->run_end) - state->run_end = state->script_end; - if (state->width_iter.end < state->run_end) - state->run_end = state->width_iter.end; - if (state->emoji_iter.end < state->run_end) - state->run_end = state->emoji_iter.end; -} - -static gboolean -width_iter_is_upright (gunichar ch) -{ - /* https://www.unicode.org/Public/11.0.0/ucd/VerticalOrientation.txt - * VO=U or Tu table generated by tools/gen-vertical-orientation-U-table.py. - * - * FIXME: In the future, If GLib supports VerticalOrientation, please use it. - */ - static const gunichar upright[][2] = { - {0x00A7, 0x00A7}, {0x00A9, 0x00A9}, {0x00AE, 0x00AE}, {0x00B1, 0x00B1}, - {0x00BC, 0x00BE}, {0x00D7, 0x00D7}, {0x00F7, 0x00F7}, {0x02EA, 0x02EB}, - {0x1100, 0x11FF}, {0x1401, 0x167F}, {0x18B0, 0x18FF}, {0x2016, 0x2016}, - {0x2020, 0x2021}, {0x2030, 0x2031}, {0x203B, 0x203C}, {0x2042, 0x2042}, - {0x2047, 0x2049}, {0x2051, 0x2051}, {0x2065, 0x2065}, {0x20DD, 0x20E0}, - {0x20E2, 0x20E4}, {0x2100, 0x2101}, {0x2103, 0x2109}, {0x210F, 0x210F}, - {0x2113, 0x2114}, {0x2116, 0x2117}, {0x211E, 0x2123}, {0x2125, 0x2125}, - {0x2127, 0x2127}, {0x2129, 0x2129}, {0x212E, 0x212E}, {0x2135, 0x213F}, - {0x2145, 0x214A}, {0x214C, 0x214D}, {0x214F, 0x2189}, {0x218C, 0x218F}, - {0x221E, 0x221E}, {0x2234, 0x2235}, {0x2300, 0x2307}, {0x230C, 0x231F}, - {0x2324, 0x2328}, {0x232B, 0x232B}, {0x237D, 0x239A}, {0x23BE, 0x23CD}, - {0x23CF, 0x23CF}, {0x23D1, 0x23DB}, {0x23E2, 0x2422}, {0x2424, 0x24FF}, - {0x25A0, 0x2619}, {0x2620, 0x2767}, {0x2776, 0x2793}, {0x2B12, 0x2B2F}, - {0x2B50, 0x2B59}, {0x2BB8, 0x2BD1}, {0x2BD3, 0x2BEB}, {0x2BF0, 0x2BFF}, - {0x2E80, 0x3007}, {0x3012, 0x3013}, {0x3020, 0x302F}, {0x3031, 0x309F}, - {0x30A1, 0x30FB}, {0x30FD, 0xA4CF}, {0xA960, 0xA97F}, {0xAC00, 0xD7FF}, - {0xE000, 0xFAFF}, {0xFE10, 0xFE1F}, {0xFE30, 0xFE48}, {0xFE50, 0xFE57}, - {0xFE5F, 0xFE62}, {0xFE67, 0xFE6F}, {0xFF01, 0xFF07}, {0xFF0A, 0xFF0C}, - {0xFF0E, 0xFF19}, {0xFF1F, 0xFF3A}, {0xFF3C, 0xFF3C}, {0xFF3E, 0xFF3E}, - {0xFF40, 0xFF5A}, {0xFFE0, 0xFFE2}, {0xFFE4, 0xFFE7}, {0xFFF0, 0xFFF8}, - {0xFFFC, 0xFFFD}, {0x10980, 0x1099F}, {0x11580, 0x115FF}, {0x11A00, 0x11AAF}, - {0x13000, 0x1342F}, {0x14400, 0x1467F}, {0x16FE0, 0x18AFF}, {0x1B000, 0x1B12F}, - {0x1B170, 0x1B2FF}, {0x1D000, 0x1D1FF}, {0x1D2E0, 0x1D37F}, {0x1D800, 0x1DAAF}, - {0x1F000, 0x1F7FF}, {0x1F900, 0x1FA6F}, {0x20000, 0x2FFFD}, {0x30000, 0x3FFFD}, - {0xF0000, 0xFFFFD}, {0x100000, 0x10FFFD} - }; - static const int max = sizeof(upright) / sizeof(upright[0]); - int st = 0; - int ed = max; - - if (ch < upright[0][0]) - return FALSE; - - while (st <= ed) - { - int mid = (st + ed) / 2; - if (upright[mid][0] <= ch && ch <= upright[mid][1]) - return TRUE; - else - if (upright[mid][0] <= ch) - st = mid + 1; - else - ed = mid - 1; - } - - return FALSE; -} - -static void -width_iter_next (PangoWidthIter *iter) -{ - gboolean met_joiner = FALSE; - iter->start = iter->end; - - if (iter->end < iter->text_end) - { - gunichar ch = g_utf8_get_char (iter->end); - iter->upright = width_iter_is_upright (ch); - } - - while (iter->end < iter->text_end) - { - gunichar ch = g_utf8_get_char (iter->end); - - /* for zero width joiner */ - if (ch == 0x200D) - { - iter->end = g_utf8_next_char (iter->end); - met_joiner = TRUE; - continue; - } - - /* ignore the upright check if met joiner */ - if (met_joiner) - { - iter->end = g_utf8_next_char (iter->end); - met_joiner = FALSE; - continue; - } - - /* for variation selector, tag and emoji modifier. */ - if (G_UNLIKELY (ch == 0xFE0EU || ch == 0xFE0FU || - (ch >= 0xE0020 && ch <= 0xE007F) || - (ch >= 0x1F3FB && ch <= 0x1F3FF))) - { - iter->end = g_utf8_next_char (iter->end); - continue; - } - - if (width_iter_is_upright (ch) != iter->upright) - break; - - iter->end = g_utf8_next_char (iter->end); - } -} - -static void -width_iter_init (PangoWidthIter *iter, - const char *text, - int length) -{ - iter->text_start = text; - iter->text_end = text + length; - iter->start = iter->end = text; - - width_iter_next (iter); -} - -static void -width_iter_fini (PangoWidthIter* iter) -{ -} - -static void -itemize_state_init (ItemizeState *state, - PangoContext *context, - const char *text, - PangoDirection base_dir, - int start_index, - int length, - PangoAttrList *attrs, - PangoAttrIterator *cached_iter, - const PangoFontDescription *desc) -{ - state->context = context; - state->text = text; - state->end = text + start_index + length; - - state->result = NULL; - state->item = NULL; - - state->run_start = text + start_index; - state->changed = EMBEDDING_CHANGED | SCRIPT_CHANGED | LANG_CHANGED | - FONT_CHANGED | WIDTH_CHANGED | EMOJI_CHANGED; - - /* First, apply the bidirectional algorithm to break - * the text into directional runs. - */ - state->embedding_levels = pango_log2vis_get_embedding_levels (text + start_index, length, &base_dir); - - state->embedding_end_offset = 0; - state->embedding_end = text + start_index; - update_embedding_end (state); - - state->gravity = PANGO_GRAVITY_AUTO; - state->centered_baseline = PANGO_GRAVITY_IS_VERTICAL (state->context->resolved_gravity); - state->gravity_hint = state->context->gravity_hint; - state->resolved_gravity = PANGO_GRAVITY_AUTO; - - /* Initialize the attribute iterator - */ - if (cached_iter) - { - state->attr_iter = cached_iter; - state->free_attr_iter = FALSE; - } - else if (attrs) - { - state->attr_iter = pango_attr_list_get_iterator (attrs); - state->free_attr_iter = TRUE; - } - else - { - state->attr_iter = NULL; - state->free_attr_iter = FALSE; - } - - state->emoji_font_desc = NULL; - if (state->attr_iter) - { - state->font_desc = NULL; - state->lang = NULL; - - advance_attr_iterator_to (state->attr_iter, start_index); - update_attr_iterator (state); - } - else - { - state->font_desc = pango_font_description_copy_static (desc ? desc : state->context->font_desc); - state->lang = state->context->language; - state->extra_attrs = NULL; - state->copy_extra_attrs = FALSE; - - state->attr_end = state->end; - state->enable_fallback = TRUE; - } - - /* Initialize the script iterator - */ - _pango_script_iter_init (&state->script_iter, text + start_index, length); - pango_script_iter_get_range (&state->script_iter, NULL, - &state->script_end, &state->script); - - width_iter_init (&state->width_iter, text + start_index, length); - _pango_emoji_iter_init (&state->emoji_iter, text + start_index, length); - - if (!PANGO_GRAVITY_IS_VERTICAL (state->context->resolved_gravity)) - state->width_iter.end = state->end; - else - if (state->emoji_iter.is_emoji) - state->width_iter.end = MAX (state->width_iter.end, state->emoji_iter.end); - - update_end (state); - - if (pango_font_description_get_set_fields (state->font_desc) & PANGO_FONT_MASK_GRAVITY) - state->font_desc_gravity = pango_font_description_get_gravity (state->font_desc); - else - state->font_desc_gravity = PANGO_GRAVITY_AUTO; - - state->derived_lang = NULL; - state->current_fonts = NULL; - state->cache = NULL; - state->base_font = NULL; - state->first_space = NULL; - state->font_position = 0xffff; -} - -static gboolean -itemize_state_next (ItemizeState *state) -{ - if (state->run_end == state->end) - return FALSE; - - state->changed = 0; - - state->run_start = state->run_end; - - if (state->run_end == state->embedding_end) - { - update_embedding_end (state); - } - - if (state->run_end == state->attr_end) - { - pango_attr_iterator_next (state->attr_iter); - update_attr_iterator (state); - } - - if (state->run_end == state->script_end) - { - pango_script_iter_next (&state->script_iter); - pango_script_iter_get_range (&state->script_iter, NULL, - &state->script_end, &state->script); - state->changed |= SCRIPT_CHANGED; - } - if (state->run_end == state->emoji_iter.end) - { - _pango_emoji_iter_next (&state->emoji_iter); - state->changed |= EMOJI_CHANGED; - - if (state->emoji_iter.is_emoji) - state->width_iter.end = MAX (state->width_iter.end, state->emoji_iter.end); - } - if (state->run_end == state->width_iter.end) - { - width_iter_next (&state->width_iter); - state->changed |= WIDTH_CHANGED; - } - - update_end (state); - - return TRUE; -} - -static GSList * -copy_attr_slist (GSList *attr_slist) -{ - GSList *new_list = NULL; - GSList *l; - - for (l = attr_slist; l; l = l->next) - new_list = g_slist_prepend (new_list, pango_attribute_copy (l->data)); - - return g_slist_reverse (new_list); -} - -static void -itemize_state_fill_font (ItemizeState *state, - PangoFont *font) -{ - GList *l; - - for (l = state->result; l; l = l->next) - { - PangoItem *item = l->data; - if (item->analysis.font) - break; - if (font) - item->analysis.font = g_object_ref (font); - } -} - -static void -itemize_state_add_character (ItemizeState *state, - PangoFont *font, - int font_position, - gboolean force_break, - const char *pos, - gboolean is_space) -{ - const char *first_space = state->first_space; - int n_spaces = 0; - - if (is_space) - { - if (state->first_space == NULL) - state->first_space = pos; - } - else - state->first_space = NULL; - - if (state->item) - { - if (!state->item->analysis.font && font) - { - itemize_state_fill_font (state, font); - state->font_position = font_position; - } - else if (state->item->analysis.font && !font) - { - font = state->item->analysis.font; - font_position = state->font_position; - } - - if (!force_break && - state->item->analysis.font == font) - { - state->item->num_chars++; - return; - } - - /* Font is changing, we are about to end the current item. - * If it ended in a sequence of spaces (but wasn't only spaces), - * check if we should move those spaces to the new item (since - * the font is less "fallback". - * - * See https://gitlab.gnome.org/GNOME/pango/-/issues/249 - */ - if (state->text + state->item->offset < first_space && - font_position < state->font_position) - { - n_spaces = g_utf8_strlen (first_space, pos - first_space); - state->item->num_chars -= n_spaces; - pos = first_space; - } - - state->item->length = (pos - state->text) - state->item->offset; - } - - state->item = pango_item_new (); - state->item->offset = pos - state->text; - state->item->length = 0; - state->item->num_chars = n_spaces + 1; - - if (font) - g_object_ref (font); - state->item->analysis.font = font; - state->font_position = font_position; - - state->item->analysis.level = state->embedding; - state->item->analysis.gravity = state->resolved_gravity; - - /* The level vs. gravity dance: - * - If gravity is SOUTH, leave level untouched. - * - If gravity is NORTH, step level one up, to - * not get mirrored upside-down text. - * - If gravity is EAST, step up to an even level, as - * it's a clockwise-rotated layout, so the rotated - * top is unrotated left. - * - If gravity is WEST, step up to an odd level, as - * it's a counter-clockwise-rotated layout, so the rotated - * top is unrotated right. - * - * A similar dance is performed in pango-layout.c: - * line_set_resolved_dir(). Keep in synch. - */ - switch (state->item->analysis.gravity) - { - case PANGO_GRAVITY_SOUTH: - default: - break; - case PANGO_GRAVITY_NORTH: - state->item->analysis.level++; - break; - case PANGO_GRAVITY_EAST: - state->item->analysis.level += 1; - state->item->analysis.level &= ~1; - break; - case PANGO_GRAVITY_WEST: - state->item->analysis.level |= 1; - break; - } - - state->item->analysis.flags = state->centered_baseline ? PANGO_ANALYSIS_FLAG_CENTERED_BASELINE : 0; - - state->item->analysis.script = state->script; - state->item->analysis.language = state->derived_lang; - - if (state->copy_extra_attrs) - { - state->item->analysis.extra_attrs = copy_attr_slist (state->extra_attrs); - } - else - { - state->item->analysis.extra_attrs = state->extra_attrs; - state->copy_extra_attrs = TRUE; - } - - state->result = g_list_prepend (state->result, state->item); -} - -typedef struct { - PangoLanguage *lang; - gunichar wc; - PangoFont *font; - int position; -} GetFontInfo; - -static gboolean -get_font_foreach (PangoFontset *fontset, - PangoFont *font, - gpointer data) -{ - GetFontInfo *info = data; - - if (G_UNLIKELY (!font)) - return FALSE; - - if (pango_font_has_char (font, info->wc)) - { - info->font = font; - return TRUE; - } - - if (!fontset) - { - info->font = font; - return TRUE; - } - - info->position++; - - return FALSE; -} - -static PangoFont * -get_base_font (ItemizeState *state) -{ - if (!state->base_font) - state->base_font = pango_font_map_load_font (state->context->font_map, - state->context, - state->font_desc); - return state->base_font; -} - -static gboolean -get_font (ItemizeState *state, - gunichar wc, - PangoFont **font, - int *position) -{ - GetFontInfo info; - - /* We'd need a separate cache when fallback is disabled, but since lookup - * with fallback disabled is faster anyways, we just skip caching */ - if (state->enable_fallback && font_cache_get (state->cache, wc, font, position)) - return TRUE; - - info.lang = state->derived_lang; - info.wc = wc; - info.font = NULL; - info.position = 0; - - if (state->enable_fallback) - pango_fontset_foreach (state->current_fonts, get_font_foreach, &info); - else - get_font_foreach (NULL, get_base_font (state), &info); - - *font = info.font; - *position = info.position; - - /* skip caching if fallback disabled (see above) */ - if (state->enable_fallback) - font_cache_insert (state->cache, wc, *font, *position); - - return TRUE; -} - -static PangoLanguage * -compute_derived_language (PangoLanguage *lang, - PangoScript script) -{ - PangoLanguage *derived_lang; - - /* Make sure the language tag is consistent with the derived - * script. There is no point in marking up a section of - * Arabic text with the "en" language tag. - */ - if (lang && pango_language_includes_script (lang, script)) - derived_lang = lang; - else - { - derived_lang = pango_script_get_sample_language (script); - /* If we don't find a sample language for the script, we - * use a language tag that shouldn't actually be used - * anywhere. This keeps fontconfig (for the PangoFc* - * backend) from using the language tag to affect the - * sort order. I don't have a reference for 'xx' being - * safe here, though Keith Packard claims it is. - */ - if (!derived_lang) - derived_lang = pango_language_from_string ("xx"); - } - - return derived_lang; -} - -static void -itemize_state_update_for_new_run (ItemizeState *state) -{ - /* This block should be moved to update_attr_iterator, but I'm too lazy to - * do it right now */ - if (state->changed & (FONT_CHANGED | SCRIPT_CHANGED | WIDTH_CHANGED)) - { - /* Font-desc gravity overrides everything */ - if (state->font_desc_gravity != PANGO_GRAVITY_AUTO) - { - state->resolved_gravity = state->font_desc_gravity; - } - else - { - PangoGravity gravity = state->gravity; - PangoGravityHint gravity_hint = state->gravity_hint; - - if (G_LIKELY (gravity == PANGO_GRAVITY_AUTO)) - gravity = state->context->resolved_gravity; - - state->resolved_gravity = pango_gravity_get_for_script_and_width (state->script, - state->width_iter.upright, - gravity, - gravity_hint); - } - - if (state->font_desc_gravity != state->resolved_gravity) - { - pango_font_description_set_gravity (state->font_desc, state->resolved_gravity); - state->changed |= FONT_CHANGED; - } - } - - if (state->changed & (SCRIPT_CHANGED | LANG_CHANGED)) - { - PangoLanguage *old_derived_lang = state->derived_lang; - state->derived_lang = compute_derived_language (state->lang, state->script); - if (old_derived_lang != state->derived_lang) - state->changed |= DERIVED_LANG_CHANGED; - } - - if (state->changed & (EMOJI_CHANGED)) - { - state->changed |= FONT_CHANGED; - } - - if (state->changed & (FONT_CHANGED | DERIVED_LANG_CHANGED) && - state->current_fonts) - { - g_object_unref (state->current_fonts); - state->current_fonts = NULL; - state->cache = NULL; - } - - if (!state->current_fonts) - { - gboolean is_emoji = state->emoji_iter.is_emoji; - if (is_emoji && !state->emoji_font_desc) - { - state->emoji_font_desc = pango_font_description_copy_static (state->font_desc); - pango_font_description_set_family_static (state->emoji_font_desc, "emoji"); - } - state->current_fonts = pango_font_map_load_fontset (state->context->font_map, - state->context, - is_emoji ? state->emoji_font_desc : state->font_desc, - state->derived_lang); - state->cache = get_font_cache (state->current_fonts); - } - - if ((state->changed & FONT_CHANGED) && state->base_font) - { - g_object_unref (state->base_font); - state->base_font = NULL; - } -} - -static void -itemize_state_process_run (ItemizeState *state) -{ - const char *p; - gboolean last_was_forced_break = FALSE; - gboolean is_space; - - /* Only one character has type G_UNICODE_LINE_SEPARATOR in Unicode 4.0; - * update this if that changes. */ -#define LINE_SEPARATOR 0x2028 - - itemize_state_update_for_new_run (state); - - /* We should never get an empty run */ - g_assert (state->run_end != state->run_start); - - for (p = state->run_start; - p < state->run_end; - p = g_utf8_next_char (p)) - { - gunichar wc = g_utf8_get_char (p); - gboolean is_forced_break = (wc == '\t' || wc == LINE_SEPARATOR); - PangoFont *font; - int font_position; - GUnicodeType type; - - /* We don't want space characters to affect font selection; in general, - * it's always wrong to select a font just to render a space. - * We assume that all fonts have the ASCII space, and for other space - * characters if they don't, HarfBuzz will compatibility-decompose them - * to ASCII space... - * See bugs #355987 and #701652. - * - * We don't want to change fonts just for variation selectors. - * See bug #781123. - * - * Finally, don't change fonts for line or paragraph separators. - * - * Note that we want spaces to use the 'better' font, comparing - * the font that is used before and after the space. This is handled - * in itemize_state_add_character(). - */ - type = g_unichar_type (wc); - if (G_UNLIKELY (type == G_UNICODE_CONTROL || - type == G_UNICODE_FORMAT || - type == G_UNICODE_SURROGATE || - type == G_UNICODE_LINE_SEPARATOR || - type == G_UNICODE_PARAGRAPH_SEPARATOR || - (type == G_UNICODE_SPACE_SEPARATOR && wc != 0x1680u /* OGHAM SPACE MARK */) || - (wc >= 0xfe00u && wc <= 0xfe0fu) || - (wc >= 0xe0100u && wc <= 0xe01efu))) - { - font = NULL; - font_position = 0xffff; - is_space = TRUE; - } - else - { - get_font (state, wc, &font, &font_position); - is_space = FALSE; - } - - itemize_state_add_character (state, font, font_position, - is_forced_break || last_was_forced_break, - p, - is_space); - - last_was_forced_break = is_forced_break; - } - - /* Finish the final item from the current segment */ - state->item->length = (p - state->text) - state->item->offset; - if (!state->item->analysis.font) - { - PangoFont *font; - int position; - - if (G_UNLIKELY (!get_font (state, ' ', &font, &position))) - { - /* If no font was found, warn once per fontmap/script pair */ - PangoFontMap *fontmap = state->context->font_map; - char *script_tag = g_strdup_printf ("g-unicode-script-%d", state->script); - - if (!g_object_get_data (G_OBJECT (fontmap), script_tag)) - { - g_warning ("failed to choose a font, expect ugly output. script='%d'", - state->script); - - g_object_set_data_full (G_OBJECT (fontmap), script_tag, - GINT_TO_POINTER (1), NULL); - } - - g_free (script_tag); - - font = NULL; - } - itemize_state_fill_font (state, font); - } - state->item = NULL; -} - -static void -itemize_state_finish (ItemizeState *state) -{ - g_free (state->embedding_levels); - if (state->free_attr_iter) - pango_attr_iterator_destroy (state->attr_iter); - _pango_script_iter_fini (&state->script_iter); - pango_font_description_free (state->font_desc); - pango_font_description_free (state->emoji_font_desc); - width_iter_fini (&state->width_iter); - _pango_emoji_iter_fini (&state->emoji_iter); - - if (state->current_fonts) - g_object_unref (state->current_fonts); - if (state->base_font) - g_object_unref (state->base_font); -} - -/** - * pango_itemize_with_base_dir: - * @context: a structure holding information that affects - * the itemization process. - * @base_dir: base direction to use for bidirectional processing - * @text: the text to itemize. - * @start_index: first byte in @text to process - * @length: the number of bytes (not characters) to process - * after @start_index. This must be >= 0. - * @attrs: the set of attributes that apply to @text. - * @cached_iter: (nullable): Cached attribute iterator - * - * Like `pango_itemize()`, but with an explicitly specified base direction. - * - * The base direction is used when computing bidirectional levels. - * (see [method@Pango.Context.set_base_dir]). [func@itemize] gets the - * base direction from the `PangoContext`. - * - * Return value: (transfer full) (element-type Pango.Item): a `GList` of - * [struct@Pango.Item] structures. The items should be freed using - * [method@Pango.Item.free] probably in combination with g_list_free_full(). - * - * Since: 1.4 - */ -GList * -pango_itemize_with_base_dir (PangoContext *context, - PangoDirection base_dir, - const char *text, - int start_index, - int length, - PangoAttrList *attrs, - PangoAttrIterator *cached_iter) -{ - ItemizeState state; - - g_return_val_if_fail (context != NULL, NULL); - g_return_val_if_fail (start_index >= 0, NULL); - g_return_val_if_fail (length >= 0, NULL); - g_return_val_if_fail (length == 0 || text != NULL, NULL); - - if (length == 0 || g_utf8_get_char (text + start_index) == '\0') - return NULL; - - itemize_state_init (&state, context, text, base_dir, start_index, length, - attrs, cached_iter, NULL); - - do - itemize_state_process_run (&state); - while (itemize_state_next (&state)); - - itemize_state_finish (&state); - - return g_list_reverse (state.result); -} - -static GList * -itemize_with_font (PangoContext *context, - const char *text, - int start_index, - int length, - const PangoFontDescription *desc) -{ - ItemizeState state; - - if (length == 0) - return NULL; - - itemize_state_init (&state, context, text, context->base_dir, start_index, length, - NULL, NULL, desc); - - do - itemize_state_process_run (&state); - while (itemize_state_next (&state)); - - itemize_state_finish (&state); - - return g_list_reverse (state.result); -} - -/** - * pango_itemize: - * @context: a structure holding information that affects - * the itemization process. - * @text: the text to itemize. Must be valid UTF-8 - * @start_index: first byte in @text to process - * @length: the number of bytes (not characters) to process - * after @start_index. This must be >= 0. - * @attrs: the set of attributes that apply to @text. - * @cached_iter: (nullable): Cached attribute iterator - * - * Breaks a piece of text into segments with consistent directional - * level and font. - * - * Each byte of @text will be contained in exactly one of the items in the - * returned list; the generated list of items will be in logical order (the - * start offsets of the items are ascending). - * - * @cached_iter should be an iterator over @attrs currently positioned - * at a range before or containing @start_index; @cached_iter will be - * advanced to the range covering the position just after - * @start_index + @length. (i.e. if itemizing in a loop, just keep passing - * in the same @cached_iter). - * - * Return value: (transfer full) (element-type Pango.Item): a `GList` of - * [struct@Pango.Item] structures. The items should be freed using - * [method@Pango.Item.free] probably in combination with g_list_free_full(). - */ -GList * -pango_itemize (PangoContext *context, - const char *text, - int start_index, - int length, - PangoAttrList *attrs, - PangoAttrIterator *cached_iter) -{ - g_return_val_if_fail (context != NULL, NULL); - g_return_val_if_fail (start_index >= 0, NULL); - g_return_val_if_fail (length >= 0, NULL); - g_return_val_if_fail (length == 0 || text != NULL, NULL); - - return pango_itemize_with_base_dir (context, context->base_dir, - text, start_index, length, attrs, cached_iter); -} static gboolean get_first_metrics_foreach (PangoFontset *fontset, @@ -1861,7 +712,7 @@ pango_context_get_metrics (PangoContext *context, sample_str = pango_language_get_sample_string (language); text_len = strlen (sample_str); - items = itemize_with_font (context, sample_str, 0, text_len, desc); + items = pango_itemize_with_font (context, sample_str, 0, text_len, desc); update_metrics_from_items (metrics, language, sample_str, text_len, items); diff --git a/pango/pango-item.h b/pango/pango-item.h index 7f9dc7ab..83ea99bb 100644 --- a/pango/pango-item.h +++ b/pango/pango-item.h @@ -131,10 +131,13 @@ PANGO_AVAILABLE_IN_ALL PangoItem * pango_item_split (PangoItem *orig, int split_index, int split_offset); + PANGO_AVAILABLE_IN_1_44 void pango_item_apply_attrs (PangoItem *item, PangoAttrIterator *iter); +/* Itemization */ + PANGO_AVAILABLE_IN_ALL GList * pango_itemize (PangoContext *context, const char *text, |