diff options
author | Matthias Clasen <mclasen@redhat.com> | 2021-08-24 00:52:53 +0000 |
---|---|---|
committer | Matthias Clasen <mclasen@redhat.com> | 2021-08-24 00:52:53 +0000 |
commit | 4740e552b3c8ca005beea88001a82ea6bb266076 (patch) | |
tree | 25a7c9825d322932b58038db4aac44ecf010dbc5 | |
parent | 8cae1c0762fa35cbe41d35a34d8e048965d287ac (diff) | |
parent | 1349e9a424dc5425dd087b382d6042f5cba3b661 (diff) | |
download | pango-4740e552b3c8ca005beea88001a82ea6bb266076.tar.gz |
Merge branch 'log-attr-things' into 'main'
break-thai: Fix up word break handling
See merge request GNOME/pango!434
-rw-r--r-- | docs/pango_rendering.md | 9 | ||||
-rw-r--r-- | pango/break-thai.c | 28 | ||||
-rw-r--r-- | pango/break.c | 144 | ||||
-rw-r--r-- | pango/pango-break.h | 14 | ||||
-rw-r--r-- | pango/pango-layout.c | 22 | ||||
-rw-r--r-- | tests/breaks/four.expected | 12 |
6 files changed, 153 insertions, 76 deletions
diff --git a/docs/pango_rendering.md b/docs/pango_rendering.md index 2745c0c0..4bec5176 100644 --- a/docs/pango_rendering.md +++ b/docs/pango_rendering.md @@ -15,18 +15,19 @@ various stages of this pipeline and the APIs that implement them. Itemization : breaks a piece of text into segments with consistent direction and shaping - properies. Among other things, this determines which font to use for each + properties. Among other things, this determines which font to use for each character. Use [func@Pango.itemize] or [func@Pango.itemize_with_base_dir] to itemize text. Shaping : converts characters into glyphs. Use [func@Pango.shape], - [func@Pango.shape_full] or [func@Pango.shape_with_flags] to shape text. + [func@Pango.shape_full] or [func@Pango.shape_item] to shape text. Line Breaking : determines where line breaks should be inserted into a sequence of glyphs. - The function [func@Pango.break] determines possible line breaks. The actual - line breaking is done by [class@Pango.Layout]. + The functions [func@Pango.default_break], [func@Pango.tailor_break] and + [func@Pango.attr_break] determine possible line breaks. The actual line + breaking is done by [class@Pango.Layout]. Justification : adjusts inter-word spacing to form lines of even length. This is done by diff --git a/pango/break-thai.c b/pango/break-thai.c index 871c0869..02a18cc5 100644 --- a/pango/break-thai.c +++ b/pango/break-thai.c @@ -92,15 +92,27 @@ break_thai (const char *text, G_UNLOCK (thai_brk); for (cnt = 0; cnt < len; cnt++) - if (attrs[brk_pnts[cnt]].is_char_break) { - /* Only allow additional line breaks if line-breaking is NOT - * prohibited. (The alternative would be to set is_char_break to - * TRUE as well. NOT setting it will break invariants that any - * line break opportunity is also a char break opportunity. */ - attrs[brk_pnts[cnt]].is_line_break = TRUE; - attrs[brk_pnts[cnt]].is_word_start = TRUE; - attrs[brk_pnts[cnt]].is_word_end = TRUE; + if (!attrs[brk_pnts[cnt]].is_line_break) + { + /* Insert line breaks where there wasn't one. + * Satisfy invariants by marking it as char break too. + */ + attrs[brk_pnts[cnt]].is_char_break = TRUE; + attrs[brk_pnts[cnt]].is_line_break = TRUE; + } + if (!(attrs[brk_pnts[cnt]].is_word_start || + attrs[brk_pnts[cnt]].is_word_end)) + { + /* If we find a break in the middle of a sequence + * of characters, end and start a word. We must + * be careful only to do that if default_break + * did not already find a word start or end, + * otherwise we mess up the sequence. + */ + attrs[brk_pnts[cnt]].is_word_start = TRUE; + attrs[brk_pnts[cnt]].is_word_end = TRUE; + } } if (brk_pnts != brk_stack) diff --git a/pango/break.c b/pango/break.c index b2586da2..c55d5f22 100644 --- a/pango/break.c +++ b/pango/break.c @@ -139,29 +139,12 @@ typedef enum WordNumbers } WordType; - -/** - * pango_default_break: - * @text: text to break. Must be valid UTF-8 - * @length: length of text in bytes (may be -1 if @text is nul-terminated) - * @analysis: (nullable): a `PangoAnalysis` structure for the @text - * @attrs: logical attributes to fill in - * @attrs_len: size of the array passed as @attrs - * - * This is the default break algorithm. - * - * It applies Unicode rules without language-specific - * tailoring, therefore the @analyis argument is unused - * and can be %NULL. - * - * See [func@Pango.tailor_break] for language-specific breaks. - */ -void -pango_default_break (const gchar *text, - gint length, - PangoAnalysis *analysis G_GNUC_UNUSED, - PangoLogAttr *attrs, - int attrs_len G_GNUC_UNUSED) +static void +default_break (const char *text, + int length, + PangoAnalysis *analysis G_GNUC_UNUSED, + PangoLogAttr *attrs, + int attrs_len G_GNUC_UNUSED) { /* The rationale for all this is in section 5.15 of the Unicode 3.0 book, * the line breaking stuff is also in TR14 on unicode.org @@ -1639,7 +1622,7 @@ break_script (const char *item_text, } /* }}} */ -/* {{{ Attribute-based tailoring */ +/* {{{ Attribute-based customization */ static gboolean break_attrs (const char *text, @@ -1707,11 +1690,11 @@ break_attrs (const char *text, static gboolean tailor_break (const char *text, - int length, - PangoAnalysis *analysis, + int length, + PangoAnalysis *analysis, int item_offset, - PangoLogAttr *attrs, - int attrs_len) + PangoLogAttr *attrs, + int attrs_len) { gboolean res; @@ -1732,6 +1715,34 @@ tailor_break (const char *text, /* {{{ Public API */ /** + * pango_default_break: + * @text: text to break. Must be valid UTF-8 + * @length: length of text in bytes (may be -1 if @text is nul-terminated) + * @analysis: (nullable): a `PangoAnalysis` structure for the @text + * @attrs: logical attributes to fill in + * @attrs_len: size of the array passed as @attrs + * + * This is the default break algorithm. + * + * It applies Unicode rules without language-specific + * tailoring, therefore the @analyis argument is unused + * and can be %NULL. + * + * See [func@Pango.tailor_break] for language-specific breaks. + * + * See [func@Pango.attr_break] for attribute-based customization. + */ +void +pango_default_break (const char *text, + int length, + PangoAnalysis *analysis G_GNUC_UNUSED, + PangoLogAttr *attrs, + int attrs_len G_GNUC_UNUSED) +{ + default_break (text, length, analysis, attrs, attrs_len); +} + +/** * pango_break: * @text: the text to process. Must be valid UTF-8 * @length: length of @text in bytes (may be -1 if @text is nul-terminated) @@ -1745,11 +1756,11 @@ tailor_break (const char *text, * For most purposes you may want to use * [func@Pango.get_log_attrs]. * - * Deprecated: 1.44: Use [func@Pango.default_break] and - * [func@Pango.tailor_break] + * Deprecated: 1.44: Use [func@Pango.default_break], + * [func@Pango.tailor_break] and func@Pango.attr_break]. */ void -pango_break (const gchar *text, +pango_break (const char *text, gint length, PangoAnalysis *analysis, PangoLogAttr *attrs, @@ -1758,7 +1769,7 @@ pango_break (const gchar *text, g_return_if_fail (analysis != NULL); g_return_if_fail (attrs != NULL); - pango_default_break (text, length, analysis, attrs, attrs_len); + default_break (text, length, analysis, attrs, attrs_len); tailor_break (text, length, analysis, -1, attrs, attrs_len); } @@ -1769,12 +1780,11 @@ pango_break (const gchar *text, * @analysis: `PangoAnalysis` for @text * @offset: Byte offset of @text from the beginning of the * paragraph, or -1 to ignore attributes from @analysis - * @log_attrs: (array length=log_attrs_len): array with one `PangoLogAttr` + * @attrs: (array length=attrs_len): array with one `PangoLogAttr` * per character in @text, plus one extra, to be filled in - * @log_attrs_len: length of @log_attrs array + * @attrs_len: length of @attrs array * - * Apply language-specific tailoring to the breaks - * in @log_attrs. + * Apply language-specific tailoring to the breaks in @attrs. * * The line breaks are assumed to have been produced * by [func@Pango.default_break]. @@ -1782,6 +1792,10 @@ pango_break (const gchar *text, * If @offset is not -1, it is used to apply attributes * from @analysis that are relevant to line breaking. * + * Note that it is better to pass -1 for @offset and + * use [func@Pango.attr_break] to apply attributes to + * the whole paragraph. + * * Since: 1.44 */ void @@ -1789,13 +1803,13 @@ pango_tailor_break (const char *text, int length, PangoAnalysis *analysis, int offset, - PangoLogAttr *log_attrs, - int log_attrs_len) + PangoLogAttr *attrs, + int attrs_len) { - PangoLogAttr *start = log_attrs; + PangoLogAttr *start = attrs; PangoLogAttr attr_before = *start; - if (tailor_break (text, length, analysis, offset, log_attrs, log_attrs_len)) + if (tailor_break (text, length, analysis, offset, attrs, attrs_len)) { /* if tailored, we enforce some of the attrs from before * tailoring at the boundary @@ -1810,18 +1824,50 @@ pango_tailor_break (const char *text, } /** + * pango_attr_break: + * @text: text to break. Must be valid UTF-8 + * @length: length of text in bytes (may be -1 if @text is nul-terminated) + * @attr_list: `PangoAttrList` to apply + * @offset: Byte offset of @text from the beginning of the paragraph + * @attrs: (array length=attrs_len): array with one `PangoLogAttr` + * per character in @text, plus one extra, to be filled in + * @attrs_len: length of @attrs array + * + * Apply customization from attributes to the breaks in @attrs. + * + * The line breaks are assumed to have been produced + * by [func@Pango.default_break] and [func@Pango.tailor_break]. + * + * Since: 1.50 + */ +void +pango_attr_break (const char *text, + int length, + PangoAttrList *attr_list, + int offset, + PangoLogAttr *attrs, + int attrs_len) +{ + GSList *attributes; + + attributes = pango_attr_list_get_attributes (attr_list); + break_attrs (text, length, attributes, offset, attrs, attrs_len); + g_slist_free_full (attributes, (GDestroyNotify)pango_attribute_destroy); +} + +/** * pango_get_log_attrs: * @text: text to process. Must be valid UTF-8 * @length: length in bytes of @text * @level: embedding level, or -1 if unknown * @language: language tag - * @log_attrs: (array length=attrs_len): array with one `PangoLogAttr` + * @attrs: (array length=attrs_len): array with one `PangoLogAttr` * per character in @text, plus one extra, to be filled in - * @attrs_len: length of @log_attrs array + * @attrs_len: length of @attrs array * * Computes a `PangoLogAttr` for each character in @text. * - * The @log_attrs array must have one `PangoLogAttr` for + * The @attrs array must have one `PangoLogAttr` for * each position in @text; if @text contains N characters, * it has N+1 positions, including the last position at the * end of the text. @text should be an entire paragraph; @@ -1834,7 +1880,7 @@ pango_get_log_attrs (const char *text, int length, int level, PangoLanguage *language, - PangoLogAttr *log_attrs, + PangoLogAttr *attrs, int attrs_len) { int chars_broken; @@ -1842,12 +1888,12 @@ pango_get_log_attrs (const char *text, PangoScriptIter iter; g_return_if_fail (length == 0 || text != NULL); - g_return_if_fail (log_attrs != NULL); + g_return_if_fail (attrs != NULL); analysis.level = level; analysis.language = language; - pango_default_break (text, length, &analysis, log_attrs, attrs_len); + pango_default_break (text, length, &analysis, attrs, attrs_len); chars_broken = 0; @@ -1867,7 +1913,7 @@ pango_get_log_attrs (const char *text, run_end - run_start, &analysis, -1, - log_attrs + chars_broken, + attrs + chars_broken, chars_in_range + 1); chars_broken += chars_in_range; @@ -1881,4 +1927,6 @@ pango_get_log_attrs (const char *text, attrs_len); } -/* }}} */ + /* }}} */ + +/* vim:set foldmethod=marker expandtab: */ diff --git a/pango/pango-break.h b/pango/pango-break.h index 81526e9a..a8e6c5b9 100644 --- a/pango/pango-break.h +++ b/pango/pango-break.h @@ -105,7 +105,7 @@ void pango_get_log_attrs (const char *text, int length, int level, PangoLanguage *language, - PangoLogAttr *log_attrs, + PangoLogAttr *attrs, int attrs_len); PANGO_AVAILABLE_IN_ALL @@ -120,8 +120,16 @@ void pango_tailor_break (const char *text, int length, PangoAnalysis *analysis, int offset, - PangoLogAttr *log_attrs, - int log_attrs_len); + PangoLogAttr *attrs, + int attrs_len); + +PANGO_AVAILABLE_IN_1_50 +void pango_attr_break (const char *text, + int length, + PangoAttrList *attr_list, + int offset, + PangoLogAttr *attrs, + int attrs_len); G_END_DECLS diff --git a/pango/pango-layout.c b/pango/pango-layout.c index f78d7daf..b6e0c217 100644 --- a/pango/pango-layout.c +++ b/pango/pango-layout.c @@ -4214,12 +4214,13 @@ process_line (PangoLayout *layout, } static void -get_items_log_attrs (const char *text, - int start, - int length, - GList *items, - PangoLogAttr *log_attrs, - int log_attrs_len) +get_items_log_attrs (const char *text, + int start, + int length, + GList *items, + PangoAttrList *attrs, + PangoLogAttr *log_attrs, + int log_attrs_len) { int offset = 0; GList *l; @@ -4235,12 +4236,18 @@ get_items_log_attrs (const char *text, pango_tailor_break (text + item->offset, item->length, &item->analysis, - item->offset, + -1, log_attrs + offset, item->num_chars + 1); offset += item->num_chars; } + + if (attrs && items) + { + PangoItem *item = items->data; + pango_attr_break (text + start, length, attrs, item->offset, log_attrs, log_attrs_len); + } } static PangoAttrList * @@ -4517,6 +4524,7 @@ pango_layout_check_lines (PangoLayout *layout) start - layout->text, delimiter_index + delim_len, state.items, + shape_attrs, layout->log_attrs + start_offset, layout->n_chars + 1 - start_offset); diff --git a/tests/breaks/four.expected b/tests/breaks/four.expected index ce58e10b..2f29d778 100644 --- a/tests/breaks/four.expected +++ b/tests/breaks/four.expected @@ -1,6 +1,6 @@ -Text: ภ า ษ า ไ ท ย [ ] ห รื อ [ ] ภ า ษ า ไ ท ย ก ล า ง [ ] เ ป็ น ภ า ษ า ร า ช ก า ร แ ล ะ ภ า ษ า ป ร ะ จ ำ ช า ติ ข อ ง ป ร ะ เ ท ศ ไ ท ย [ ] ภ า ษ า ไ ท ย เ ป็ น ภ า ษ า ใ น ก ลุ่ ม ภ า ษ า ไ ท ซึ่ ง เ ป็ น ก ลุ่ ม ย่ อ ย ข อ ง ต ร ะ กู ล ภ า ษ า ข ร้ า [ ] ไ ท [ ] สั น นิ ษ ฐ า น ว่ า [ ] ภ า ษ า ใ น ต ร ะ กู ล นี้ มี ถิ่ น ก ำ เ นิ ด จ า ก ท า ง ต อ น ใ ต้ ข อ ง ป ร ะ เ ท ศ จี น [ ] แ ล ะ นั ก ภ า ษ า ศ า ส ต ร์ บ า ง ส่ ว น เ ส น อ ว่ า [ ] ภ า ษ า ไ ท ย น่ า จ ะ มี ค ว า ม เ ชื่ อ ม โ ย ง กั บ ต ร ะ กู ล ภ า ษ า อ อ ส โ ต ร [ ] เ อ เ ชี ย ติ ก [ ] ต ร ะ กู ล ภ า ษ า อ อ ส โ ต ร นี เ ซี ย น [ ] แ ล ะ ต ร ะ กู ล ภ า ษ า จี น [ ] ทิ เ บ ต [0x0a] -Breaks: c c c c lc c c c lc c c c lc c c c lc c c lc c c c c lc c c lc c c c lc c c c c c lc c c lc c c c lc c c c c lc c c lc c c lc c c c c c lc c c c lc c c c lc c c lc c c lc c c c lc c lc c c lc c c c lc c lc c lc c c lc c c lc c c lc c c lc c c c c lc c c c lc c c c lc c c lc c c c c c c lc c c lc c c c lc c lc c c c c lc lc lc c lc c c c c lc c c lc c c lc c c lc c lc c c lc c c c c c lc c c lc c c lc c lc c c c c c c c c lc c c lc c c lc c c c lc c c lc c c c lc c c lc c lc c lc lc c c c lc c c c lc c c lc c lc c c c c lc c c c lc c lc c c c c lc c c c c lc c c lc c c c c lc c c c lc c lc c c c c c c c c c lc c c lc c c c c lc c c c lc c c lc c c c c c -Whitespace: x x x x x x x x x x x x x w w -Sentences: bs e b -Words: bs b b b bse b b be bse b b be bse b b b bse b b bse b b b be bse b b bse b b b bse b b b b b bse b b bse b b b bse b b b b bse b b bse b b bse b b b b b bse b b be bse b b b bse b b bse b b bse b b b bse b bse b b bse b b b bse b bse b bse b b bse b b bse b b bse b b bse b b b b bse b b b bse b b be bse b be bse b b b b b b bse b be bse b b b bse b bse b b b b bse bse bse b bse b b b b bse b b bse b b bse b b bse b bse b b bse b b b b b bse b be bse b b bse b bse b b b b b b b b bse b b bse b b bse b b b bse b be bse b b b bse b b bse b bse b bse bse b b b bse b b b bse b b bse b bse b b b b bse b b b bse b bse b b b be bse b b b b bse b be bse b b b b bse b b b bse b bse b b b b b b b b be bse b b bse b b b b bse b b b bse b be bse b b b be b -Graphemes: b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b +Text: ภ า ษ า ไ ท ย [ ] ห รื อ [ ] ภ า ษ า ไ ท ย ก ล า ง [ ] เ ป็ น ภ า ษ า ร า ช ก า ร แ ล ะ ภ า ษ า ป ร ะ จ ำ ช า ติ ข อ ง ป ร ะ เ ท ศ ไ ท ย [ ] ภ า ษ า ไ ท ย เ ป็ น ภ า ษ า ใ น ก ลุ่ ม ภ า ษ า ไ ท ซึ่ ง เ ป็ น ก ลุ่ ม ย่ อ ย ข อ ง ต ร ะ กู ล ภ า ษ า ข ร้ า [ ] ไ ท [ ] สั น นิ ษ ฐ า น ว่ า [ ] ภ า ษ า ใ น ต ร ะ กู ล นี้ มี ถิ่ น ก ำ เ นิ ด จ า ก ท า ง ต อ น ใ ต้ ข อ ง ป ร ะ เ ท ศ จี น [ ] แ ล ะ นั ก ภ า ษ า ศ า ส ต ร์ บ า ง ส่ ว น เ ส น อ ว่ า [ ] ภ า ษ า ไ ท ย น่ า จ ะ มี ค ว า ม เ ชื่ อ ม โ ย ง กั บ ต ร ะ กู ล ภ า ษ า อ อ ส โ ต ร [ ] เ อ เ ชี ย ติ ก [ ] ต ร ะ กู ล ภ า ษ า อ อ ส โ ต ร นี เ ซี ย น [ ] แ ล ะ ต ร ะ กู ล ภ า ษ า จี น [ ] ทิ เ บ ต [0x0a] +Breaks: c c c c lc c c c lc c c c lc c c c lc c c lc c c c c lc c c lc c c c lc c c c c c lc c c lc c c c lc c c c c lc c c lc c c lc c c c c c lc c c c lc c c c lc c c lc c c lc c c c lc c lc c c lc c c c lc c lc c lc c c lc c c lc c c lc c c lc c c c c lc c c c lc c c c lc c c lc c c c c c c lc c c lc c c c lc c lc c c c c lc lc lc c lc c c c c lc c c lc c c lc c c lc c lc c c lc c c c c c lc c c lc c c lc c lc c c c c c c c c lc c c lc c c lc c c c lc c c lc c c c lc c c lc c lc c lc lc c c c lc c c c lc c c lc c lc c c c c lc c c c lc c lc c c c c lc c c c c lc c c lc c c c c lc c c c lc c lc c c c c c c c c c lc c c lc c c c c lc c c c lc c c lc c c c c c +Whitespace: x x x x x x x x x x x x x w w +Sentences: bs e b +Words: bs b b b bse b b be bs b b be bs b b b bse b b bse b b b be bs b b bse b b b bse b b b b b bse b b bse b b b bse b b b b bse b b bse b b bse b b b b b bse b b be bs b b b bse b b bse b b bse b b b bse b bse b b bse b b b bse b bse b bse b b bse b b bse b b bse b b bse b b b b bse b b b bse b b be bs b be bs b b b b b b bse b be bs b b b bse b bse b b b b bse bse bse b bse b b b b bse b b bse b b bse b b bse b bse b b bse b b b b b bse b be bs b b bse b bse b b b b b b b b bse b b bse b b bse b b b bse b be bs b b b bse b b bse b bse b bse bse b b b bse b b b bse b b bse b bse b b b b bse b b b bse b bse b b b be bs b b b b bse b be bs b b b b bse b b b bse b bse b b b b b b b b be bs b b bse b b b b bse b b b bse b be bs b b b be b +Graphemes: b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b |