summaryrefslogtreecommitdiff
path: root/pango/itemize.c
diff options
context:
space:
mode:
authorMatthias Clasen <mclasen@redhat.com>2021-08-20 11:17:26 -0400
committerMatthias Clasen <mclasen@redhat.com>2021-08-20 12:18:28 -0400
commit2573ad950a7a6cfb5c5e6da466adbdaeb356857a (patch)
treed47e628432e3f128c1627d4ad79a1dd2bf80fb9a /pango/itemize.c
parentd1d3be80056ae846589389d46544a072e337020a (diff)
downloadpango-2573ad950a7a6cfb5c5e6da466adbdaeb356857a.tar.gz
Some code reorg
Split the itemization code into its own file, and move things around a bit.
Diffstat (limited to 'pango/itemize.c')
-rw-r--r--pango/itemize.c1151
1 files changed, 1151 insertions, 0 deletions
diff --git a/pango/itemize.c b/pango/itemize.c
new file mode 100644
index 00000000..27a9865d
--- /dev/null
+++ b/pango/itemize.c
@@ -0,0 +1,1151 @@
+/* Pango
+ * itemize.c: Turning text into items
+ *
+ * Copyright (C) 2000, 2006 Red Hat Software
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include "config.h"
+#include <string.h>
+#include <stdlib.h>
+
+#include "pango-context-private.h"
+#include "pango-impl-utils.h"
+
+#include "pango-font-private.h"
+#include "pango-fontset-private.h"
+#include "pango-fontmap-private.h"
+#include "pango-script-private.h"
+#include "pango-emoji-private.h"
+#include "pango-attributes-private.h"
+
+
+/* {{{ Font cache */
+
+/*
+ * We cache the results of character,fontset => font in a hash table
+ */
+
+typedef struct {
+ GHashTable *hash;
+} FontCache;
+
+typedef struct {
+ PangoFont *font;
+ int position; /* position of the font in the fontset */
+} FontElement;
+
+static void
+font_cache_destroy (FontCache *cache)
+{
+ g_hash_table_destroy (cache->hash);
+ g_slice_free (FontCache, cache);
+}
+
+static void
+font_element_destroy (FontElement *element)
+{
+ if (element->font)
+ g_object_unref (element->font);
+ g_slice_free (FontElement, element);
+}
+
+static FontCache *
+get_font_cache (PangoFontset *fontset)
+{
+ FontCache *cache;
+
+ static GQuark cache_quark = 0; /* MT-safe */
+ if (G_UNLIKELY (!cache_quark))
+ cache_quark = g_quark_from_static_string ("pango-font-cache");
+
+retry:
+ cache = g_object_get_qdata (G_OBJECT (fontset), cache_quark);
+ if (G_UNLIKELY (!cache))
+ {
+ cache = g_slice_new (FontCache);
+ cache->hash = g_hash_table_new_full (g_direct_hash, NULL,
+ NULL, (GDestroyNotify)font_element_destroy);
+ if (!g_object_replace_qdata (G_OBJECT (fontset), cache_quark, NULL,
+ cache, (GDestroyNotify)font_cache_destroy,
+ NULL))
+ {
+ font_cache_destroy (cache);
+ goto retry;
+ }
+ }
+
+ return cache;
+}
+
+static gboolean
+font_cache_get (FontCache *cache,
+ gunichar wc,
+ PangoFont **font,
+ int *position)
+{
+ FontElement *element;
+
+ element = g_hash_table_lookup (cache->hash, GUINT_TO_POINTER (wc));
+ if (element)
+ {
+ *font = element->font;
+ *position = element->position;
+ return TRUE;
+ }
+ else
+ return FALSE;
+}
+
+static void
+font_cache_insert (FontCache *cache,
+ gunichar wc,
+ PangoFont *font,
+ int position)
+{
+ FontElement *element = g_slice_new (FontElement);
+ element->font = font ? g_object_ref (font) : NULL;
+ element->position = position;
+
+ g_hash_table_insert (cache->hash, GUINT_TO_POINTER (wc), element);
+}
+
+/* }}} */
+/* {{{ Width Iter */
+
+typedef struct _PangoWidthIter PangoWidthIter;
+
+struct _PangoWidthIter
+{
+ const gchar *text_start;
+ const gchar *text_end;
+ const gchar *start;
+ const gchar *end;
+ gboolean upright;
+};
+
+static gboolean
+width_iter_is_upright (gunichar ch)
+{
+ /* https://www.unicode.org/Public/11.0.0/ucd/VerticalOrientation.txt
+ * VO=U or Tu table generated by tools/gen-vertical-orientation-U-table.py.
+ *
+ * FIXME: In the future, If GLib supports VerticalOrientation, please use it.
+ */
+ static const gunichar upright[][2] = {
+ {0x00A7, 0x00A7}, {0x00A9, 0x00A9}, {0x00AE, 0x00AE}, {0x00B1, 0x00B1},
+ {0x00BC, 0x00BE}, {0x00D7, 0x00D7}, {0x00F7, 0x00F7}, {0x02EA, 0x02EB},
+ {0x1100, 0x11FF}, {0x1401, 0x167F}, {0x18B0, 0x18FF}, {0x2016, 0x2016},
+ {0x2020, 0x2021}, {0x2030, 0x2031}, {0x203B, 0x203C}, {0x2042, 0x2042},
+ {0x2047, 0x2049}, {0x2051, 0x2051}, {0x2065, 0x2065}, {0x20DD, 0x20E0},
+ {0x20E2, 0x20E4}, {0x2100, 0x2101}, {0x2103, 0x2109}, {0x210F, 0x210F},
+ {0x2113, 0x2114}, {0x2116, 0x2117}, {0x211E, 0x2123}, {0x2125, 0x2125},
+ {0x2127, 0x2127}, {0x2129, 0x2129}, {0x212E, 0x212E}, {0x2135, 0x213F},
+ {0x2145, 0x214A}, {0x214C, 0x214D}, {0x214F, 0x2189}, {0x218C, 0x218F},
+ {0x221E, 0x221E}, {0x2234, 0x2235}, {0x2300, 0x2307}, {0x230C, 0x231F},
+ {0x2324, 0x2328}, {0x232B, 0x232B}, {0x237D, 0x239A}, {0x23BE, 0x23CD},
+ {0x23CF, 0x23CF}, {0x23D1, 0x23DB}, {0x23E2, 0x2422}, {0x2424, 0x24FF},
+ {0x25A0, 0x2619}, {0x2620, 0x2767}, {0x2776, 0x2793}, {0x2B12, 0x2B2F},
+ {0x2B50, 0x2B59}, {0x2BB8, 0x2BD1}, {0x2BD3, 0x2BEB}, {0x2BF0, 0x2BFF},
+ {0x2E80, 0x3007}, {0x3012, 0x3013}, {0x3020, 0x302F}, {0x3031, 0x309F},
+ {0x30A1, 0x30FB}, {0x30FD, 0xA4CF}, {0xA960, 0xA97F}, {0xAC00, 0xD7FF},
+ {0xE000, 0xFAFF}, {0xFE10, 0xFE1F}, {0xFE30, 0xFE48}, {0xFE50, 0xFE57},
+ {0xFE5F, 0xFE62}, {0xFE67, 0xFE6F}, {0xFF01, 0xFF07}, {0xFF0A, 0xFF0C},
+ {0xFF0E, 0xFF19}, {0xFF1F, 0xFF3A}, {0xFF3C, 0xFF3C}, {0xFF3E, 0xFF3E},
+ {0xFF40, 0xFF5A}, {0xFFE0, 0xFFE2}, {0xFFE4, 0xFFE7}, {0xFFF0, 0xFFF8},
+ {0xFFFC, 0xFFFD}, {0x10980, 0x1099F}, {0x11580, 0x115FF}, {0x11A00, 0x11AAF},
+ {0x13000, 0x1342F}, {0x14400, 0x1467F}, {0x16FE0, 0x18AFF}, {0x1B000, 0x1B12F},
+ {0x1B170, 0x1B2FF}, {0x1D000, 0x1D1FF}, {0x1D2E0, 0x1D37F}, {0x1D800, 0x1DAAF},
+ {0x1F000, 0x1F7FF}, {0x1F900, 0x1FA6F}, {0x20000, 0x2FFFD}, {0x30000, 0x3FFFD},
+ {0xF0000, 0xFFFFD}, {0x100000, 0x10FFFD}
+ };
+ static const int max = sizeof(upright) / sizeof(upright[0]);
+ int st = 0;
+ int ed = max;
+
+ if (ch < upright[0][0])
+ return FALSE;
+
+ while (st <= ed)
+ {
+ int mid = (st + ed) / 2;
+ if (upright[mid][0] <= ch && ch <= upright[mid][1])
+ return TRUE;
+ else
+ if (upright[mid][0] <= ch)
+ st = mid + 1;
+ else
+ ed = mid - 1;
+ }
+
+ return FALSE;
+}
+
+static void
+width_iter_next (PangoWidthIter *iter)
+{
+ gboolean met_joiner = FALSE;
+ iter->start = iter->end;
+
+ if (iter->end < iter->text_end)
+ {
+ gunichar ch = g_utf8_get_char (iter->end);
+ iter->upright = width_iter_is_upright (ch);
+ }
+
+ while (iter->end < iter->text_end)
+ {
+ gunichar ch = g_utf8_get_char (iter->end);
+
+ /* for zero width joiner */
+ if (ch == 0x200D)
+ {
+ iter->end = g_utf8_next_char (iter->end);
+ met_joiner = TRUE;
+ continue;
+ }
+
+ /* ignore the upright check if met joiner */
+ if (met_joiner)
+ {
+ iter->end = g_utf8_next_char (iter->end);
+ met_joiner = FALSE;
+ continue;
+ }
+
+ /* for variation selector, tag and emoji modifier. */
+ if (G_UNLIKELY (ch == 0xFE0EU || ch == 0xFE0FU ||
+ (ch >= 0xE0020 && ch <= 0xE007F) ||
+ (ch >= 0x1F3FB && ch <= 0x1F3FF)))
+ {
+ iter->end = g_utf8_next_char (iter->end);
+ continue;
+ }
+
+ if (width_iter_is_upright (ch) != iter->upright)
+ break;
+
+ iter->end = g_utf8_next_char (iter->end);
+ }
+}
+
+static void
+width_iter_init (PangoWidthIter *iter,
+ const char *text,
+ int length)
+{
+ iter->text_start = text;
+ iter->text_end = text + length;
+ iter->start = iter->end = text;
+
+ width_iter_next (iter);
+}
+
+static void
+width_iter_fini (PangoWidthIter *iter)
+{
+}
+
+/* }}} */
+/* {{{ Itemization */
+
+typedef struct _ItemizeState ItemizeState;
+
+
+typedef enum {
+ EMBEDDING_CHANGED = 1 << 0,
+ SCRIPT_CHANGED = 1 << 1,
+ LANG_CHANGED = 1 << 2,
+ FONT_CHANGED = 1 << 3,
+ DERIVED_LANG_CHANGED = 1 << 4,
+ WIDTH_CHANGED = 1 << 5,
+ EMOJI_CHANGED = 1 << 6,
+} ChangedFlags;
+
+
+struct _ItemizeState
+{
+ PangoContext *context;
+ const char *text;
+ const char *end;
+
+ const char *run_start;
+ const char *run_end;
+
+ GList *result;
+ PangoItem *item;
+
+ guint8 *embedding_levels;
+ int embedding_end_offset;
+ const char *embedding_end;
+ guint8 embedding;
+
+ PangoGravity gravity;
+ PangoGravityHint gravity_hint;
+ PangoGravity resolved_gravity;
+ PangoGravity font_desc_gravity;
+ gboolean centered_baseline;
+
+ PangoAttrIterator *attr_iter;
+ gboolean free_attr_iter;
+ const char *attr_end;
+ PangoFontDescription *font_desc;
+ PangoFontDescription *emoji_font_desc;
+ PangoLanguage *lang;
+ GSList *extra_attrs;
+ gboolean copy_extra_attrs;
+
+ ChangedFlags changed;
+
+ PangoScriptIter script_iter;
+ const char *script_end;
+ PangoScript script;
+
+ PangoWidthIter width_iter;
+ PangoEmojiIter emoji_iter;
+
+ PangoLanguage *derived_lang;
+
+ PangoFontset *current_fonts;
+ FontCache *cache;
+ PangoFont *base_font;
+ gboolean enable_fallback;
+
+ const char *first_space; /* first of a sequence of spaces we've seen */
+ int font_position; /* position of the current font in the fontset */
+};
+
+static void
+update_embedding_end (ItemizeState *state)
+{
+ state->embedding = state->embedding_levels[state->embedding_end_offset];
+ while (state->embedding_end < state->end &&
+ state->embedding_levels[state->embedding_end_offset] == state->embedding)
+ {
+ state->embedding_end_offset++;
+ state->embedding_end = g_utf8_next_char (state->embedding_end);
+ }
+
+ state->changed |= EMBEDDING_CHANGED;
+}
+
+static PangoAttribute *
+find_attribute (GSList *attr_list,
+ PangoAttrType type)
+{
+ GSList *node;
+
+ for (node = attr_list; node; node = node->next)
+ if (((PangoAttribute *) node->data)->klass->type == type)
+ return (PangoAttribute *) node->data;
+
+ return NULL;
+}
+
+static void
+update_attr_iterator (ItemizeState *state)
+{
+ PangoLanguage *old_lang;
+ PangoAttribute *attr;
+ int end_index;
+
+ pango_attr_iterator_range (state->attr_iter, NULL, &end_index);
+ if (end_index < state->end - state->text)
+ state->attr_end = state->text + end_index;
+ else
+ state->attr_end = state->end;
+
+ if (state->emoji_font_desc)
+ {
+ pango_font_description_free (state->emoji_font_desc);
+ state->emoji_font_desc = NULL;
+ }
+
+ old_lang = state->lang;
+ if (state->font_desc)
+ pango_font_description_free (state->font_desc);
+ state->font_desc = pango_font_description_copy_static (state->context->font_desc);
+ pango_attr_iterator_get_font (state->attr_iter, state->font_desc,
+ &state->lang, &state->extra_attrs);
+ if (pango_font_description_get_set_fields (state->font_desc) & PANGO_FONT_MASK_GRAVITY)
+ state->font_desc_gravity = pango_font_description_get_gravity (state->font_desc);
+ else
+ state->font_desc_gravity = PANGO_GRAVITY_AUTO;
+
+ state->copy_extra_attrs = FALSE;
+
+ if (!state->lang)
+ state->lang = state->context->language;
+
+ attr = find_attribute (state->extra_attrs, PANGO_ATTR_FALLBACK);
+ state->enable_fallback = (attr == NULL || ((PangoAttrInt *)attr)->value);
+
+ attr = find_attribute (state->extra_attrs, PANGO_ATTR_GRAVITY);
+ state->gravity = attr == NULL ? PANGO_GRAVITY_AUTO : ((PangoAttrInt *)attr)->value;
+
+ attr = find_attribute (state->extra_attrs, PANGO_ATTR_GRAVITY_HINT);
+ state->gravity_hint = attr == NULL ? state->context->gravity_hint : (PangoGravityHint)((PangoAttrInt *)attr)->value;
+
+ state->changed |= FONT_CHANGED;
+ if (state->lang != old_lang)
+ state->changed |= LANG_CHANGED;
+}
+
+static void
+update_end (ItemizeState *state)
+{
+ state->run_end = state->embedding_end;
+ if (state->attr_end < state->run_end)
+ state->run_end = state->attr_end;
+ if (state->script_end < state->run_end)
+ state->run_end = state->script_end;
+ if (state->width_iter.end < state->run_end)
+ state->run_end = state->width_iter.end;
+ if (state->emoji_iter.end < state->run_end)
+ state->run_end = state->emoji_iter.end;
+}
+
+
+static void
+itemize_state_init (ItemizeState *state,
+ PangoContext *context,
+ const char *text,
+ PangoDirection base_dir,
+ int start_index,
+ int length,
+ PangoAttrList *attrs,
+ PangoAttrIterator *cached_iter,
+ const PangoFontDescription *desc)
+{
+ state->context = context;
+ state->text = text;
+ state->end = text + start_index + length;
+
+ state->result = NULL;
+ state->item = NULL;
+
+ state->run_start = text + start_index;
+ state->changed = EMBEDDING_CHANGED | SCRIPT_CHANGED | LANG_CHANGED |
+ FONT_CHANGED | WIDTH_CHANGED | EMOJI_CHANGED;
+
+ /* First, apply the bidirectional algorithm to break
+ * the text into directional runs.
+ */
+ state->embedding_levels = pango_log2vis_get_embedding_levels (text + start_index, length, &base_dir);
+
+ state->embedding_end_offset = 0;
+ state->embedding_end = text + start_index;
+ update_embedding_end (state);
+
+ state->gravity = PANGO_GRAVITY_AUTO;
+ state->centered_baseline = PANGO_GRAVITY_IS_VERTICAL (state->context->resolved_gravity);
+ state->gravity_hint = state->context->gravity_hint;
+ state->resolved_gravity = PANGO_GRAVITY_AUTO;
+
+ /* Initialize the attribute iterator
+ */
+ if (cached_iter)
+ {
+ state->attr_iter = cached_iter;
+ state->free_attr_iter = FALSE;
+ }
+ else if (attrs)
+ {
+ state->attr_iter = pango_attr_list_get_iterator (attrs);
+ state->free_attr_iter = TRUE;
+ }
+ else
+ {
+ state->attr_iter = NULL;
+ state->free_attr_iter = FALSE;
+ }
+
+ state->emoji_font_desc = NULL;
+ if (state->attr_iter)
+ {
+ state->font_desc = NULL;
+ state->lang = NULL;
+
+ pango_attr_iterator_advance (state->attr_iter, start_index);
+ update_attr_iterator (state);
+ }
+ else
+ {
+ state->font_desc = pango_font_description_copy_static (desc ? desc : state->context->font_desc);
+ state->lang = state->context->language;
+ state->extra_attrs = NULL;
+ state->copy_extra_attrs = FALSE;
+
+ state->attr_end = state->end;
+ state->enable_fallback = TRUE;
+ }
+
+ /* Initialize the script iterator
+ */
+ _pango_script_iter_init (&state->script_iter, text + start_index, length);
+ pango_script_iter_get_range (&state->script_iter, NULL,
+ &state->script_end, &state->script);
+
+ width_iter_init (&state->width_iter, text + start_index, length);
+ _pango_emoji_iter_init (&state->emoji_iter, text + start_index, length);
+
+ if (!PANGO_GRAVITY_IS_VERTICAL (state->context->resolved_gravity))
+ state->width_iter.end = state->end;
+ else
+ if (state->emoji_iter.is_emoji)
+ state->width_iter.end = MAX (state->width_iter.end, state->emoji_iter.end);
+
+ update_end (state);
+
+ if (pango_font_description_get_set_fields (state->font_desc) & PANGO_FONT_MASK_GRAVITY)
+ state->font_desc_gravity = pango_font_description_get_gravity (state->font_desc);
+ else
+ state->font_desc_gravity = PANGO_GRAVITY_AUTO;
+
+ state->derived_lang = NULL;
+ state->current_fonts = NULL;
+ state->cache = NULL;
+ state->base_font = NULL;
+ state->first_space = NULL;
+ state->font_position = 0xffff;
+}
+
+static gboolean
+itemize_state_next (ItemizeState *state)
+{
+ if (state->run_end == state->end)
+ return FALSE;
+
+ state->changed = 0;
+
+ state->run_start = state->run_end;
+
+ if (state->run_end == state->embedding_end)
+ {
+ update_embedding_end (state);
+ }
+
+ if (state->run_end == state->attr_end)
+ {
+ pango_attr_iterator_next (state->attr_iter);
+ update_attr_iterator (state);
+ }
+
+ if (state->run_end == state->script_end)
+ {
+ pango_script_iter_next (&state->script_iter);
+ pango_script_iter_get_range (&state->script_iter, NULL,
+ &state->script_end, &state->script);
+ state->changed |= SCRIPT_CHANGED;
+ }
+ if (state->run_end == state->emoji_iter.end)
+ {
+ _pango_emoji_iter_next (&state->emoji_iter);
+ state->changed |= EMOJI_CHANGED;
+
+ if (state->emoji_iter.is_emoji)
+ state->width_iter.end = MAX (state->width_iter.end, state->emoji_iter.end);
+ }
+ if (state->run_end == state->width_iter.end)
+ {
+ width_iter_next (&state->width_iter);
+ state->changed |= WIDTH_CHANGED;
+ }
+
+ update_end (state);
+
+ return TRUE;
+}
+
+static GSList *
+copy_attr_slist (GSList *attr_slist)
+{
+ GSList *new_list = NULL;
+ GSList *l;
+
+ for (l = attr_slist; l; l = l->next)
+ new_list = g_slist_prepend (new_list, pango_attribute_copy (l->data));
+
+ return g_slist_reverse (new_list);
+}
+
+static void
+itemize_state_fill_font (ItemizeState *state,
+ PangoFont *font)
+{
+ GList *l;
+
+ for (l = state->result; l; l = l->next)
+ {
+ PangoItem *item = l->data;
+ if (item->analysis.font)
+ break;
+ if (font)
+ item->analysis.font = g_object_ref (font);
+ }
+}
+
+static void
+itemize_state_add_character (ItemizeState *state,
+ PangoFont *font,
+ int font_position,
+ gboolean force_break,
+ const char *pos,
+ gboolean is_space)
+{
+ const char *first_space = state->first_space;
+ int n_spaces = 0;
+
+ if (is_space)
+ {
+ if (state->first_space == NULL)
+ state->first_space = pos;
+ }
+ else
+ state->first_space = NULL;
+
+ if (state->item)
+ {
+ if (!state->item->analysis.font && font)
+ {
+ itemize_state_fill_font (state, font);
+ state->font_position = font_position;
+ }
+ else if (state->item->analysis.font && !font)
+ {
+ font = state->item->analysis.font;
+ font_position = state->font_position;
+ }
+
+ if (!force_break &&
+ state->item->analysis.font == font)
+ {
+ state->item->num_chars++;
+ return;
+ }
+
+ /* Font is changing, we are about to end the current item.
+ * If it ended in a sequence of spaces (but wasn't only spaces),
+ * check if we should move those spaces to the new item (since
+ * the font is less "fallback".
+ *
+ * See https://gitlab.gnome.org/GNOME/pango/-/issues/249
+ */
+ if (state->text + state->item->offset < first_space &&
+ font_position < state->font_position)
+ {
+ n_spaces = g_utf8_strlen (first_space, pos - first_space);
+ state->item->num_chars -= n_spaces;
+ pos = first_space;
+ }
+
+ state->item->length = (pos - state->text) - state->item->offset;
+ }
+
+ state->item = pango_item_new ();
+ state->item->offset = pos - state->text;
+ state->item->length = 0;
+ state->item->num_chars = n_spaces + 1;
+
+ if (font)
+ g_object_ref (font);
+ state->item->analysis.font = font;
+ state->font_position = font_position;
+
+ state->item->analysis.level = state->embedding;
+ state->item->analysis.gravity = state->resolved_gravity;
+
+ /* The level vs. gravity dance:
+ * - If gravity is SOUTH, leave level untouched.
+ * - If gravity is NORTH, step level one up, to
+ * not get mirrored upside-down text.
+ * - If gravity is EAST, step up to an even level, as
+ * it's a clockwise-rotated layout, so the rotated
+ * top is unrotated left.
+ * - If gravity is WEST, step up to an odd level, as
+ * it's a counter-clockwise-rotated layout, so the rotated
+ * top is unrotated right.
+ *
+ * A similar dance is performed in pango-layout.c:
+ * line_set_resolved_dir(). Keep in synch.
+ */
+ switch (state->item->analysis.gravity)
+ {
+ case PANGO_GRAVITY_SOUTH:
+ default:
+ break;
+ case PANGO_GRAVITY_NORTH:
+ state->item->analysis.level++;
+ break;
+ case PANGO_GRAVITY_EAST:
+ state->item->analysis.level += 1;
+ state->item->analysis.level &= ~1;
+ break;
+ case PANGO_GRAVITY_WEST:
+ state->item->analysis.level |= 1;
+ break;
+ }
+
+ state->item->analysis.flags = state->centered_baseline ? PANGO_ANALYSIS_FLAG_CENTERED_BASELINE : 0;
+
+ state->item->analysis.script = state->script;
+ state->item->analysis.language = state->derived_lang;
+
+ if (state->copy_extra_attrs)
+ {
+ state->item->analysis.extra_attrs = copy_attr_slist (state->extra_attrs);
+ }
+ else
+ {
+ state->item->analysis.extra_attrs = state->extra_attrs;
+ state->copy_extra_attrs = TRUE;
+ }
+
+ state->result = g_list_prepend (state->result, state->item);
+}
+
+typedef struct {
+ PangoLanguage *lang;
+ gunichar wc;
+ PangoFont *font;
+ int position;
+} GetFontInfo;
+
+static gboolean
+get_font_foreach (PangoFontset *fontset,
+ PangoFont *font,
+ gpointer data)
+{
+ GetFontInfo *info = data;
+
+ if (G_UNLIKELY (!font))
+ return FALSE;
+
+ if (pango_font_has_char (font, info->wc))
+ {
+ info->font = font;
+ return TRUE;
+ }
+
+ if (!fontset)
+ {
+ info->font = font;
+ return TRUE;
+ }
+
+ info->position++;
+
+ return FALSE;
+}
+
+static PangoFont *
+get_base_font (ItemizeState *state)
+{
+ if (!state->base_font)
+ state->base_font = pango_font_map_load_font (state->context->font_map,
+ state->context,
+ state->font_desc);
+ return state->base_font;
+}
+
+static gboolean
+get_font (ItemizeState *state,
+ gunichar wc,
+ PangoFont **font,
+ int *position)
+{
+ GetFontInfo info;
+
+ /* We'd need a separate cache when fallback is disabled, but since lookup
+ * with fallback disabled is faster anyways, we just skip caching */
+ if (state->enable_fallback && font_cache_get (state->cache, wc, font, position))
+ return TRUE;
+
+ info.lang = state->derived_lang;
+ info.wc = wc;
+ info.font = NULL;
+ info.position = 0;
+
+ if (state->enable_fallback)
+ pango_fontset_foreach (state->current_fonts, get_font_foreach, &info);
+ else
+ get_font_foreach (NULL, get_base_font (state), &info);
+
+ *font = info.font;
+ *position = info.position;
+
+ /* skip caching if fallback disabled (see above) */
+ if (state->enable_fallback)
+ font_cache_insert (state->cache, wc, *font, *position);
+
+ return TRUE;
+}
+
+static PangoLanguage *
+compute_derived_language (PangoLanguage *lang,
+ PangoScript script)
+{
+ PangoLanguage *derived_lang;
+
+ /* Make sure the language tag is consistent with the derived
+ * script. There is no point in marking up a section of
+ * Arabic text with the "en" language tag.
+ */
+ if (lang && pango_language_includes_script (lang, script))
+ derived_lang = lang;
+ else
+ {
+ derived_lang = pango_script_get_sample_language (script);
+ /* If we don't find a sample language for the script, we
+ * use a language tag that shouldn't actually be used
+ * anywhere. This keeps fontconfig (for the PangoFc*
+ * backend) from using the language tag to affect the
+ * sort order. I don't have a reference for 'xx' being
+ * safe here, though Keith Packard claims it is.
+ */
+ if (!derived_lang)
+ derived_lang = pango_language_from_string ("xx");
+ }
+
+ return derived_lang;
+}
+
+static void
+itemize_state_update_for_new_run (ItemizeState *state)
+{
+ /* This block should be moved to update_attr_iterator, but I'm too lazy to
+ * do it right now */
+ if (state->changed & (FONT_CHANGED | SCRIPT_CHANGED | WIDTH_CHANGED))
+ {
+ /* Font-desc gravity overrides everything */
+ if (state->font_desc_gravity != PANGO_GRAVITY_AUTO)
+ {
+ state->resolved_gravity = state->font_desc_gravity;
+ }
+ else
+ {
+ PangoGravity gravity = state->gravity;
+ PangoGravityHint gravity_hint = state->gravity_hint;
+
+ if (G_LIKELY (gravity == PANGO_GRAVITY_AUTO))
+ gravity = state->context->resolved_gravity;
+
+ state->resolved_gravity = pango_gravity_get_for_script_and_width (state->script,
+ state->width_iter.upright,
+ gravity,
+ gravity_hint);
+ }
+
+ if (state->font_desc_gravity != state->resolved_gravity)
+ {
+ pango_font_description_set_gravity (state->font_desc, state->resolved_gravity);
+ state->changed |= FONT_CHANGED;
+ }
+ }
+
+ if (state->changed & (SCRIPT_CHANGED | LANG_CHANGED))
+ {
+ PangoLanguage *old_derived_lang = state->derived_lang;
+ state->derived_lang = compute_derived_language (state->lang, state->script);
+ if (old_derived_lang != state->derived_lang)
+ state->changed |= DERIVED_LANG_CHANGED;
+ }
+
+ if (state->changed & (EMOJI_CHANGED))
+ {
+ state->changed |= FONT_CHANGED;
+ }
+
+ if (state->changed & (FONT_CHANGED | DERIVED_LANG_CHANGED) &&
+ state->current_fonts)
+ {
+ g_object_unref (state->current_fonts);
+ state->current_fonts = NULL;
+ state->cache = NULL;
+ }
+
+ if (!state->current_fonts)
+ {
+ gboolean is_emoji = state->emoji_iter.is_emoji;
+ if (is_emoji && !state->emoji_font_desc)
+ {
+ state->emoji_font_desc = pango_font_description_copy_static (state->font_desc);
+ pango_font_description_set_family_static (state->emoji_font_desc, "emoji");
+ }
+ state->current_fonts = pango_font_map_load_fontset (state->context->font_map,
+ state->context,
+ is_emoji ? state->emoji_font_desc : state->font_desc,
+ state->derived_lang);
+ state->cache = get_font_cache (state->current_fonts);
+ }
+
+ if ((state->changed & FONT_CHANGED) && state->base_font)
+ {
+ g_object_unref (state->base_font);
+ state->base_font = NULL;
+ }
+}
+
+static void
+itemize_state_process_run (ItemizeState *state)
+{
+ const char *p;
+ gboolean last_was_forced_break = FALSE;
+ gboolean is_space;
+
+ /* Only one character has type G_UNICODE_LINE_SEPARATOR in Unicode 4.0;
+ * update this if that changes. */
+#define LINE_SEPARATOR 0x2028
+
+ itemize_state_update_for_new_run (state);
+
+ /* We should never get an empty run */
+ g_assert (state->run_end != state->run_start);
+
+ for (p = state->run_start;
+ p < state->run_end;
+ p = g_utf8_next_char (p))
+ {
+ gunichar wc = g_utf8_get_char (p);
+ gboolean is_forced_break = (wc == '\t' || wc == LINE_SEPARATOR);
+ PangoFont *font;
+ int font_position;
+ GUnicodeType type;
+
+ /* We don't want space characters to affect font selection; in general,
+ * it's always wrong to select a font just to render a space.
+ * We assume that all fonts have the ASCII space, and for other space
+ * characters if they don't, HarfBuzz will compatibility-decompose them
+ * to ASCII space...
+ * See bugs #355987 and #701652.
+ *
+ * We don't want to change fonts just for variation selectors.
+ * See bug #781123.
+ *
+ * Finally, don't change fonts for line or paragraph separators.
+ *
+ * Note that we want spaces to use the 'better' font, comparing
+ * the font that is used before and after the space. This is handled
+ * in itemize_state_add_character().
+ */
+ type = g_unichar_type (wc);
+ if (G_UNLIKELY (type == G_UNICODE_CONTROL ||
+ type == G_UNICODE_FORMAT ||
+ type == G_UNICODE_SURROGATE ||
+ type == G_UNICODE_LINE_SEPARATOR ||
+ type == G_UNICODE_PARAGRAPH_SEPARATOR ||
+ (type == G_UNICODE_SPACE_SEPARATOR && wc != 0x1680u /* OGHAM SPACE MARK */) ||
+ (wc >= 0xfe00u && wc <= 0xfe0fu) ||
+ (wc >= 0xe0100u && wc <= 0xe01efu)))
+ {
+ font = NULL;
+ font_position = 0xffff;
+ is_space = TRUE;
+ }
+ else
+ {
+ get_font (state, wc, &font, &font_position);
+ is_space = FALSE;
+ }
+
+ itemize_state_add_character (state, font, font_position,
+ is_forced_break || last_was_forced_break,
+ p,
+ is_space);
+
+ last_was_forced_break = is_forced_break;
+ }
+
+ /* Finish the final item from the current segment */
+ state->item->length = (p - state->text) - state->item->offset;
+ if (!state->item->analysis.font)
+ {
+ PangoFont *font;
+ int position;
+
+ if (G_UNLIKELY (!get_font (state, ' ', &font, &position)))
+ {
+ /* If no font was found, warn once per fontmap/script pair */
+ PangoFontMap *fontmap = state->context->font_map;
+ char *script_tag = g_strdup_printf ("g-unicode-script-%d", state->script);
+
+ if (!g_object_get_data (G_OBJECT (fontmap), script_tag))
+ {
+ g_warning ("failed to choose a font, expect ugly output. script='%d'",
+ state->script);
+
+ g_object_set_data_full (G_OBJECT (fontmap), script_tag,
+ GINT_TO_POINTER (1), NULL);
+ }
+
+ g_free (script_tag);
+
+ font = NULL;
+ }
+ itemize_state_fill_font (state, font);
+ }
+ state->item = NULL;
+}
+
+static void
+itemize_state_finish (ItemizeState *state)
+{
+ g_free (state->embedding_levels);
+ if (state->free_attr_iter)
+ pango_attr_iterator_destroy (state->attr_iter);
+ _pango_script_iter_fini (&state->script_iter);
+ pango_font_description_free (state->font_desc);
+ pango_font_description_free (state->emoji_font_desc);
+ width_iter_fini (&state->width_iter);
+ _pango_emoji_iter_fini (&state->emoji_iter);
+
+ if (state->current_fonts)
+ g_object_unref (state->current_fonts);
+ if (state->base_font)
+ g_object_unref (state->base_font);
+}
+/* }}} */
+/* {{{ Public API */
+
+/* Like pango_itemize, but takes a font description */
+GList *
+pango_itemize_with_font (PangoContext *context,
+ const char *text,
+ int start_index,
+ int length,
+ const PangoFontDescription *desc)
+{
+ ItemizeState state;
+
+ if (length == 0)
+ return NULL;
+
+ itemize_state_init (&state, context, text, context->base_dir, start_index, length,
+ NULL, NULL, desc);
+
+ do
+ itemize_state_process_run (&state);
+ while (itemize_state_next (&state));
+
+ itemize_state_finish (&state);
+
+ return g_list_reverse (state.result);
+}
+
+/**
+ * pango_itemize_with_base_dir:
+ * @context: a structure holding information that affects
+ * the itemization process.
+ * @base_dir: base direction to use for bidirectional processing
+ * @text: the text to itemize.
+ * @start_index: first byte in @text to process
+ * @length: the number of bytes (not characters) to process
+ * after @start_index. This must be >= 0.
+ * @attrs: the set of attributes that apply to @text.
+ * @cached_iter: (nullable): Cached attribute iterator
+ *
+ * Like `pango_itemize()`, but with an explicitly specified base direction.
+ *
+ * The base direction is used when computing bidirectional levels.
+ * (see [method@Pango.Context.set_base_dir]). [func@itemize] gets the
+ * base direction from the `PangoContext`.
+ *
+ * Return value: (transfer full) (element-type Pango.Item): a `GList` of
+ * [struct@Pango.Item] structures. The items should be freed using
+ * [method@Pango.Item.free] probably in combination with g_list_free_full().
+ *
+ * Since: 1.4
+ */
+GList *
+pango_itemize_with_base_dir (PangoContext *context,
+ PangoDirection base_dir,
+ const char *text,
+ int start_index,
+ int length,
+ PangoAttrList *attrs,
+ PangoAttrIterator *cached_iter)
+{
+ ItemizeState state;
+
+ g_return_val_if_fail (context != NULL, NULL);
+ g_return_val_if_fail (start_index >= 0, NULL);
+ g_return_val_if_fail (length >= 0, NULL);
+ g_return_val_if_fail (length == 0 || text != NULL, NULL);
+
+ if (length == 0 || g_utf8_get_char (text + start_index) == '\0')
+ return NULL;
+
+ itemize_state_init (&state, context, text, base_dir, start_index, length,
+ attrs, cached_iter, NULL);
+
+ do
+ itemize_state_process_run (&state);
+ while (itemize_state_next (&state));
+
+ itemize_state_finish (&state);
+
+ return g_list_reverse (state.result);
+}
+
+/**
+ * pango_itemize:
+ * @context: a structure holding information that affects
+ * the itemization process.
+ * @text: the text to itemize. Must be valid UTF-8
+ * @start_index: first byte in @text to process
+ * @length: the number of bytes (not characters) to process
+ * after @start_index. This must be >= 0.
+ * @attrs: the set of attributes that apply to @text.
+ * @cached_iter: (nullable): Cached attribute iterator
+ *
+ * Breaks a piece of text into segments with consistent directional
+ * level and font.
+ *
+ * Each byte of @text will be contained in exactly one of the items in the
+ * returned list; the generated list of items will be in logical order (the
+ * start offsets of the items are ascending).
+ *
+ * @cached_iter should be an iterator over @attrs currently positioned
+ * at a range before or containing @start_index; @cached_iter will be
+ * advanced to the range covering the position just after
+ * @start_index + @length. (i.e. if itemizing in a loop, just keep passing
+ * in the same @cached_iter).
+ *
+ * Return value: (transfer full) (element-type Pango.Item): a `GList` of
+ * [struct@Pango.Item] structures. The items should be freed using
+ * [method@Pango.Item.free] probably in combination with g_list_free_full().
+ */
+GList *
+pango_itemize (PangoContext *context,
+ const char *text,
+ int start_index,
+ int length,
+ PangoAttrList *attrs,
+ PangoAttrIterator *cached_iter)
+{
+ g_return_val_if_fail (context != NULL, NULL);
+ g_return_val_if_fail (start_index >= 0, NULL);
+ g_return_val_if_fail (length >= 0, NULL);
+ g_return_val_if_fail (length == 0 || text != NULL, NULL);
+
+ return pango_itemize_with_base_dir (context, context->base_dir,
+ text, start_index, length, attrs, cached_iter);
+}
+
+/* }}} */
+
+/* vim:set foldmethod=marker expandtab: */