From aa606673d3ae4dae54cef5d7d2de031b8f0b73a1 Mon Sep 17 00:00:00 2001 From: Matthias Clasen Date: Sun, 9 May 2021 23:26:32 -0400 Subject: break: Fix an inconsistency with sentence breaks When we retroactively remove a sentence boundary because of rule SB8, we were not cleaning up the sencence_start/end markers that have already been derived from it. This can be seen in urls like http://www.unicode.org/reports/tr29, where we don't have any sentence boundaries, but we leave a stray sentence_start/end at the first / after ".org". --- pango/break.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/pango/break.c b/pango/break.c index 6b8e5c01..c34d0512 100644 --- a/pango/break.c +++ b/pango/break.c @@ -306,7 +306,6 @@ pango_default_break (const gchar *text, /* Emoji extended pictographics */ gboolean is_Extended_Pictographic; - wc = next_wc; break_type = next_break_type; @@ -932,7 +931,20 @@ pango_default_break (const gchar *text, prev_prev_SB_type == SB_ATerm_Close_Sp) && IS_OTHER_TERM(prev_SB_type) && SB_type == SB_Lower) - attrs[prev_SB_i].is_sentence_boundary = FALSE; + { + attrs[prev_SB_i].is_sentence_boundary = FALSE; + attrs[prev_SB_i].is_sentence_start = FALSE; + attrs[prev_SB_i].is_sentence_end = FALSE; + last_sentence_start = -1; + for (int j = prev_SB_i - 1; j >= 0; j--) + { + if (attrs[j].is_sentence_boundary) + { + last_sentence_start = j; + break; + } + } + } else if ((prev_SB_type == SB_ATerm || prev_SB_type == SB_ATerm_Close_Sp || prev_SB_type == SB_STerm || @@ -1537,8 +1549,9 @@ pango_default_break (const gchar *text, /* meets space character, move sentence start */ if (last_sentence_start != -1 && last_sentence_start == i - 1 && - attrs[i - 1].is_white) + attrs[i - 1].is_white) { last_sentence_start++; + } } -- cgit v1.2.1