From 94e5f18e80aeb95b14217ed7f7efa790eb48c3c9 Mon Sep 17 00:00:00 2001 From: Matthias Clasen Date: Sat, 28 Aug 2021 10:23:52 -0400 Subject: tests: Only print known analysis flags We're going to steal one bit for private purposes. --- tests/test-layout.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/test-layout.c b/tests/test-layout.c index f142228b..0139c138 100644 --- a/tests/test-layout.c +++ b/tests/test-layout.c @@ -127,6 +127,10 @@ dump_lines (PangoLayout *layout, GString *string) pango_layout_iter_free (iter); } +#define ANALYSIS_FLAGS (PANGO_ANALYSIS_FLAG_CENTERED_BASELINE | \ + PANGO_ANALYSIS_FLAG_IS_ELLIPSIS | \ + PANGO_ANALYSIS_FLAG_NEED_HYPHEN) + static void dump_runs (PangoLayout *layout, GString *string) { @@ -160,7 +164,7 @@ dump_runs (PangoLayout *layout, GString *string) g_string_append_printf (string, "i=%d, index=%d, chars=%d, level=%d, gravity=%s, flags=%d, font=%s, script=%s, language=%s, '%s'\n", i, index, item->num_chars, item->analysis.level, gravity_name (item->analysis.gravity), - item->analysis.flags, + item->analysis.flags & ANALYSIS_FLAGS, opt_show_font ? font : "OMITTED", /* for some reason, this fails on build.gnome.org, so leave it out */ script_name (item->analysis.script), pango_language_to_string (item->analysis.language), -- cgit v1.2.1 From 1fcd5ae9a9dfd3a7c5ccacd11ffd54a3ad93e643 Mon Sep 17 00:00:00 2001 From: Matthias Clasen Date: Sat, 28 Aug 2021 11:12:35 -0400 Subject: Cosmetic Improve the itemization code to have all variants take the same code paths, so we can do fixups in one place. --- pango/itemize.c | 38 ++++++++++++++++---------------------- pango/pango-context-private.h | 3 +++ pango/pango-context.c | 5 ++++- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/pango/itemize.c b/pango/itemize.c index 27a9865d..bd699bce 100644 --- a/pango/itemize.c +++ b/pango/itemize.c @@ -700,7 +700,7 @@ itemize_state_add_character (ItemizeState *state, break; } - state->item->analysis.flags = state->centered_baseline ? PANGO_ANALYSIS_FLAG_CENTERED_BASELINE : 0; + state->item->analysis.flags |= state->centered_baseline ? PANGO_ANALYSIS_FLAG_CENTERED_BASELINE : 0; state->item->analysis.script = state->script; state->item->analysis.language = state->derived_lang; @@ -1021,21 +1021,24 @@ itemize_state_finish (ItemizeState *state) /* }}} */ /* {{{ Public API */ -/* Like pango_itemize, but takes a font description */ +/* Like pango_itemize_with_base_dir, but takes a font description */ GList * pango_itemize_with_font (PangoContext *context, + PangoDirection base_dir, const char *text, int start_index, int length, + PangoAttrList *attrs, + PangoAttrIterator *cached_iter, const PangoFontDescription *desc) { ItemizeState state; - if (length == 0) + if (length == 0 || g_utf8_get_char (text + start_index) == '\0') return NULL; - itemize_state_init (&state, context, text, context->base_dir, start_index, length, - NULL, NULL, desc); + itemize_state_init (&state, context, text, base_dir, start_index, length, + attrs, cached_iter, desc); do itemize_state_process_run (&state); @@ -1079,26 +1082,15 @@ pango_itemize_with_base_dir (PangoContext *context, PangoAttrList *attrs, PangoAttrIterator *cached_iter) { - ItemizeState state; - g_return_val_if_fail (context != NULL, NULL); g_return_val_if_fail (start_index >= 0, NULL); g_return_val_if_fail (length >= 0, NULL); g_return_val_if_fail (length == 0 || text != NULL, NULL); - if (length == 0 || g_utf8_get_char (text + start_index) == '\0') - return NULL; - - itemize_state_init (&state, context, text, base_dir, start_index, length, - attrs, cached_iter, NULL); - - do - itemize_state_process_run (&state); - while (itemize_state_next (&state)); - - itemize_state_finish (&state); - - return g_list_reverse (state.result); + return pango_itemize_with_font (context, base_dir, + text, start_index, length, + attrs, cached_iter, + NULL); } /** @@ -1142,8 +1134,10 @@ pango_itemize (PangoContext *context, g_return_val_if_fail (length >= 0, NULL); g_return_val_if_fail (length == 0 || text != NULL, NULL); - return pango_itemize_with_base_dir (context, context->base_dir, - text, start_index, length, attrs, cached_iter); + return pango_itemize_with_font (context, context->base_dir, + text, start_index, length, + attrs, cached_iter, + NULL); } /* }}} */ diff --git a/pango/pango-context-private.h b/pango/pango-context-private.h index 240c07d4..d65406e1 100644 --- a/pango/pango-context-private.h +++ b/pango/pango-context-private.h @@ -51,9 +51,12 @@ struct _PangoContext }; GList * pango_itemize_with_font (PangoContext *context, + PangoDirection base_dir, const char *text, int start_index, int length, + PangoAttrList *attrs, + PangoAttrIterator *cached_iter, const PangoFontDescription *desc); diff --git a/pango/pango-context.c b/pango/pango-context.c index d76b0ae2..2301138f 100644 --- a/pango/pango-context.c +++ b/pango/pango-context.c @@ -712,7 +712,10 @@ pango_context_get_metrics (PangoContext *context, sample_str = pango_language_get_sample_string (language); text_len = strlen (sample_str); - items = pango_itemize_with_font (context, sample_str, 0, text_len, desc); + items = pango_itemize_with_font (context, context->base_dir, + sample_str, 0, text_len, + NULL, NULL, + desc); update_metrics_from_items (metrics, language, sample_str, text_len, items); -- cgit v1.2.1 From a03bf5bc6b07ba6e2442c02d6777978c5cecbd9a Mon Sep 17 00:00:00 2001 From: Matthias Clasen Date: Sat, 28 Aug 2021 09:47:07 -0400 Subject: item: Add a char offset Add a char_offset field to PangoItem, compute it as part of itemization and update it when splitting items. Keeping this number around cuts down on the amount of list and utf8 walking we need to do later. We have to do some extra shenanigans to preserve abi in the face of pango's open-coded structs, so we introduce a PangoItemPrivate type that is used internally. On 64bit, PangoItem has a 4 byte whole, so we can keep the size of PangoItemPrivate the same. No such luck on 32bit. --- pango/itemize.c | 16 +++++++++- pango/pango-item-private.h | 73 ++++++++++++++++++++++++++++++++++++++++++++++ pango/pango-item.c | 19 ++++++++---- pango/pango-item.h | 7 +++-- 4 files changed, 106 insertions(+), 9 deletions(-) create mode 100644 pango/pango-item-private.h diff --git a/pango/itemize.c b/pango/itemize.c index bd699bce..11bc2513 100644 --- a/pango/itemize.c +++ b/pango/itemize.c @@ -32,6 +32,7 @@ #include "pango-script-private.h" #include "pango-emoji-private.h" #include "pango-attributes-private.h" +#include "pango-item-private.h" /* {{{ Font cache */ @@ -1033,6 +1034,8 @@ pango_itemize_with_font (PangoContext *context, const PangoFontDescription *desc) { ItemizeState state; + GList *items; + int char_offset; if (length == 0 || g_utf8_get_char (text + start_index) == '\0') return NULL; @@ -1046,7 +1049,18 @@ pango_itemize_with_font (PangoContext *context, itemize_state_finish (&state); - return g_list_reverse (state.result); + items = g_list_reverse (state.result); + + /* Compute the char offset for each item */ + char_offset = 0; + for (GList *l = items; l; l = l->next) + { + PangoItemPrivate *item = l->data; + item->char_offset = char_offset; + char_offset += item->num_chars; + } + + return items; } /** diff --git a/pango/pango-item-private.h b/pango/pango-item-private.h new file mode 100644 index 00000000..8bb7e1cd --- /dev/null +++ b/pango/pango-item-private.h @@ -0,0 +1,73 @@ +/* Pango + * + * Copyright (C) 2021 Matthias Clasen + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +#ifndef __PANGO_ITEM_PRIVATE_H__ +#define __PANGO_ITEM_PRIVATE_H__ + +#include + +G_BEGIN_DECLS + +/** + * We have to do some extra work for adding the char_offset field + * to PangoItem to preserve ABI in the face of pango's open-coded + * structs. + * + * Internally, pango uses the PangoItemPrivate type, and we use + * a bit in the PangoAnalysis flags to indicate whether we are + * dealing with a PangoItemPrivate struct or not. + */ + +#define PANGO_ANALYSIS_FLAG_HAS_CHAR_OFFSET (1 << 7) + +typedef struct _PangoItemPrivate PangoItemPrivate; + +#ifdef __x86_64__ + +struct _PangoItemPrivate +{ + int offset; + int length; + int num_chars; + int char_offset; + PangoAnalysis analysis; +}; + +#else + +struct _PangoItemPrivate +{ + int offset; + int length; + int num_chars; + PangoAnalysis analysis; + int char_offset; +} + +#endif + +G_STATIC_ASSERT (offsetof (PangoItem, offset) == offsetof (PangoItemPrivate, offset)); +G_STATIC_ASSERT (offsetof (PangoItem, length) == offsetof (PangoItemPrivate, length)); +G_STATIC_ASSERT (offsetof (PangoItem, num_chars) == offsetof (PangoItemPrivate, num_chars)); +G_STATIC_ASSERT (offsetof (PangoItem, analysis) == offsetof (PangoItemPrivate, analysis)); + +G_END_DECLS + +#endif /* __PANGO_ITEM_PRIVATE_H__ */ diff --git a/pango/pango-item.c b/pango/pango-item.c index ce38e6d2..484d5f1f 100644 --- a/pango/pango-item.c +++ b/pango/pango-item.c @@ -21,7 +21,7 @@ #include "config.h" #include "pango-attributes.h" -#include "pango-item.h" +#include "pango-item-private.h" #include "pango-impl-utils.h" /** @@ -35,9 +35,11 @@ PangoItem * pango_item_new (void) { - PangoItem *result = g_slice_new0 (PangoItem); + PangoItemPrivate *result = g_slice_new0 (PangoItemPrivate); - return result; + result->analysis.flags |= PANGO_ANALYSIS_FLAG_HAS_CHAR_OFFSET; + + return (PangoItem *)result; } /** @@ -57,11 +59,13 @@ pango_item_copy (PangoItem *item) if (item == NULL) return NULL; - result = g_slice_new (PangoItem); + result = pango_item_new (); result->offset = item->offset; result->length = item->length; result->num_chars = item->num_chars; + if (item->analysis.flags & PANGO_ANALYSIS_FLAG_HAS_CHAR_OFFSET) + ((PangoItemPrivate *)result)->char_offset = ((PangoItemPrivate *)item)->char_offset; result->analysis = item->analysis; if (result->analysis.font) @@ -101,7 +105,10 @@ pango_item_free (PangoItem *item) if (item->analysis.font) g_object_unref (item->analysis.font); - g_slice_free (PangoItem, item); + if (item->analysis.flags & PANGO_ANALYSIS_FLAG_HAS_CHAR_OFFSET) + g_slice_free (PangoItemPrivate, (PangoItemPrivate *)item); + else + g_slice_free (PangoItem, item); } G_DEFINE_BOXED_TYPE (PangoItem, pango_item, @@ -151,6 +158,8 @@ pango_item_split (PangoItem *orig, orig->offset += split_index; orig->length -= split_index; orig->num_chars -= split_offset; + if (orig->analysis.flags & PANGO_ANALYSIS_FLAG_HAS_CHAR_OFFSET) + ((PangoItemPrivate *)orig)->char_offset += split_offset; return new_item; } diff --git a/pango/pango-item.h b/pango/pango-item.h index 9e0596f2..8122be19 100644 --- a/pango/pango-item.h +++ b/pango/pango-item.h @@ -100,6 +100,7 @@ struct _PangoAnalysis * @offset: byte offset of the start of this item in text. * @length: length of this item in bytes. * @num_chars: number of Unicode characters in the item. + * @char_offset: character offset of the start of this item in text. Since 1.50 * @analysis: analysis results for the item. * * The `PangoItem` structure stores information about a segment of text. @@ -109,9 +110,9 @@ struct _PangoAnalysis */ struct _PangoItem { - gint offset; - gint length; - gint num_chars; + int offset; + int length; + int num_chars; PangoAnalysis analysis; }; -- cgit v1.2.1 From 98891da9fa7475f71f4f86146fac703634a4600d Mon Sep 17 00:00:00 2001 From: Matthias Clasen Date: Sat, 28 Aug 2021 11:21:49 -0400 Subject: test-itemize: Improve generation It is very annoying that gtest will dump its random seed into the output as soon as one calls g_test_init(). This pollutes our generated output. Work around that by deferring the g_test_init() call until after we've dealt with generating output. --- tests/test-itemize.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test-itemize.c b/tests/test-itemize.c index 105b453d..921a1fdc 100644 --- a/tests/test-itemize.c +++ b/tests/test-itemize.c @@ -313,13 +313,11 @@ main (int argc, char *argv[]) const gchar *name; gchar *path; - g_test_init (&argc, &argv, NULL); - context = pango_font_map_create_context (pango_cairo_font_map_get_default ()); pango_context_set_language (context, pango_language_from_string ("en-us")); /* allow to easily generate expected output for new test cases */ - if (argc > 1) + if (argc > 1 && argv[1][0] != '-') { GString *string; @@ -330,6 +328,8 @@ main (int argc, char *argv[]) return 0; } + g_test_init (&argc, &argv, NULL); + path = g_test_build_filename (G_TEST_DIST, "itemize", NULL); dir = g_dir_open (path, 0, &error); g_free (path); -- cgit v1.2.1 From 3f8f21293fe6ffa8709b0ff206beda78f1719eae Mon Sep 17 00:00:00 2001 From: Matthias Clasen Date: Sat, 28 Aug 2021 11:13:37 -0400 Subject: test-itemize: Print more information Print out the items num_chars and char_offset, so we can check that the char_offset implementation works. Update affected test outputs. --- tests/itemize/one.expected | 1 + tests/itemize/two.expected | 1 + tests/test-itemize.c | 18 +++++++++++++++++- 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/tests/itemize/one.expected b/tests/itemize/one.expected index deaa544d..aacf60d1 100644 --- a/tests/itemize/one.expected +++ b/tests/itemize/one.expected @@ -1,6 +1,7 @@ a b
c Items: a b |[0x2028] |c +Chars: 3(0) |1(3) |1(4) Font: Cantarell 11 |Cantarell 11 |Cantarell 11 Script: latin |latin |latin Lang: en-us |en-us |en-us diff --git a/tests/itemize/two.expected b/tests/itemize/two.expected index ebe9757c..06518c88 100644 --- a/tests/itemize/two.expected +++ b/tests/itemize/two.expected @@ -1,6 +1,7 @@ one two two three Items: one tw |o | two th |r |ee +Chars: 6(0) |1(6) |7(7) |1(14) |2(15) Font: Cantarell 11 |Cantarell 11 |Cantarell 11 |Cantarell Bold 11|Cantarell 11 Script: latin |latin |latin |latin |latin Lang: en-us |en-us |en-us |en-us |en-us diff --git a/tests/test-itemize.c b/tests/test-itemize.c index 921a1fdc..00af2256 100644 --- a/tests/test-itemize.c +++ b/tests/test-itemize.c @@ -31,6 +31,8 @@ #include #include "test-common.h" +#include "pango/pango-item-private.h" + static PangoContext *context; @@ -104,13 +106,22 @@ apply_attributes_to_items (GList *items, pango_attr_iterator_destroy (iter); } +static int +get_item_char_offset (PangoItem *item) +{ + if (item->analysis.flags & PANGO_ANALYSIS_FLAG_HAS_CHAR_OFFSET) + return ((PangoItemPrivate *)item)->char_offset; + + return -1; +} + static void test_file (const gchar *filename, GString *string) { gchar *contents; gsize length; GError *error = NULL; - GString *s1, *s2, *s3, *s4, *s5, *s6; + GString *s1, *s2, *s3, *s4, *s5, *s6, *s7; char *test; char *text; PangoAttrList *attrs; @@ -136,6 +147,7 @@ test_file (const gchar *filename, GString *string) s4 = g_string_new ("Lang: "); s5 = g_string_new ("Bidi: "); s6 = g_string_new ("Attrs: "); + s7 = g_string_new ("Chars: "); length = strlen (text); if (text[length - 1] == '\n') @@ -168,6 +180,7 @@ test_file (const gchar *filename, GString *string) g_string_append_printf (s4, "%s%s", sep, pango_language_to_string (item->analysis.language)); g_string_append_printf (s5, "%s%d", sep, item->analysis.level); g_string_append_printf (s6, "%s", sep); + g_string_append_printf (s7, "%s%d(%d)", sep, item->num_chars, get_item_char_offset (item)); for (a = item->analysis.extra_attrs; a; a = a->next) { PangoAttribute *attr = a->data; @@ -189,10 +202,12 @@ test_file (const gchar *filename, GString *string) g_string_append_printf (s4, "%*s", (int)(m - s4->len), ""); g_string_append_printf (s5, "%*s", (int)(m - s5->len), ""); g_string_append_printf (s6, "%*s", (int)(m - s6->len), ""); + g_string_append_printf (s7, "%*s", (int)(m - s7->len), ""); } g_string_append_printf (string, "%s\n", test); g_string_append_printf (string, "%s\n", s1->str); + g_string_append_printf (string, "%s\n", s7->str); g_string_append_printf (string, "%s\n", s2->str); g_string_append_printf (string, "%s\n", s3->str); g_string_append_printf (string, "%s\n", s4->str); @@ -205,6 +220,7 @@ test_file (const gchar *filename, GString *string) g_string_free (s4, TRUE); g_string_free (s5, TRUE); g_string_free (s6, TRUE); + g_string_free (s7, TRUE); g_list_free_full (items, (GDestroyNotify)pango_item_free); pango_attr_list_unref (attrs); -- cgit v1.2.1 From 7c166d34620268212c2c5e0b0ca404dd2d6fc387 Mon Sep 17 00:00:00 2001 From: Matthias Clasen Date: Sat, 28 Aug 2021 11:28:45 -0400 Subject: layout: Use char_offset in a few places There is more we can do here, but this is a start. --- pango/pango-layout.c | 32 +++++++++----------------------- 1 file changed, 9 insertions(+), 23 deletions(-) diff --git a/pango/pango-layout.c b/pango/pango-layout.c index ac396ba0..c721b41b 100644 --- a/pango/pango-layout.c +++ b/pango/pango-layout.c @@ -76,7 +76,7 @@ #include "config.h" #include "pango-glyph.h" /* For pango_shape() */ #include "pango-break.h" -#include "pango-item.h" +#include "pango-item-private.h" #include "pango-engine.h" #include "pango-impl-utils.h" #include "pango-glyph-item.h" @@ -5932,18 +5932,11 @@ justify_clusters (PangoLayoutLine *line, dir = run->item->analysis.level % 2 == 0 ? +1 : -1; - /* We need character offset of the start of the run. We don't have this. - * Compute by counting from the beginning of the line. The naming is - * confusing. Note that: - * - * run->item->offset is byte offset of start of run in layout->text. - * state->line_start_index is byte offset of start of line in layout->text. - * state->line_start_offset is character offset of start of line in layout->text. + /* Note: we simply assert here, since our items are all internally + * created. If that ever changes, we need to add a fallback here. */ - g_assert (run->item->offset >= state->line_start_index); - offset = state->line_start_offset - + pango_utf8_strlen (text + state->line_start_index, - run->item->offset - state->line_start_index); + g_assert (run->item->analysis.flags & PANGO_ANALYSIS_FLAG_HAS_CHAR_OFFSET); + offset = ((PangoItemPrivate *)run->item)->char_offset; for (have_cluster = dir > 0 ? pango_glyph_item_iter_init_start (&cluster_iter, run, text) : @@ -6077,18 +6070,11 @@ justify_words (PangoLayoutLine *line, gboolean have_cluster; int offset; - /* We need character offset of the start of the run. We don't have this. - * Compute by counting from the beginning of the line. The naming is - * confusing. Note that: - * - * run->item->offset is byte offset of start of run in layout->text. - * state->line_start_index is byte offset of start of line in layout->text. - * state->line_start_offset is character offset of start of line in layout->text. + /* Note: we simply assert here, since our items are all internally + * created. If that ever changes, we need to add a fallback here. */ - g_assert (run->item->offset >= state->line_start_index); - offset = state->line_start_offset - + pango_utf8_strlen (text + state->line_start_index, - run->item->offset - state->line_start_index); + g_assert (run->item->analysis.flags & PANGO_ANALYSIS_FLAG_HAS_CHAR_OFFSET); + offset = ((PangoItemPrivate *)run->item)->char_offset; for (have_cluster = pango_glyph_item_iter_init_start (&cluster_iter, run, text); have_cluster; -- cgit v1.2.1