summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthias Clasen <mclasen@redhat.com>2021-05-09 23:26:32 -0400
committerMatthias Clasen <mclasen@redhat.com>2021-05-09 23:29:54 -0400
commitaa606673d3ae4dae54cef5d7d2de031b8f0b73a1 (patch)
treef9e60723a2fded72bb5ef652d4e439cee80c32c7
parentb3ebe097a3042737165ba497dc5b08f235df8167 (diff)
downloadpango-fix-sentence-breaks.tar.gz
break: Fix an inconsistency with sentence breaksfix-sentence-breaks
When we retroactively remove a sentence boundary because of rule SB8, we were not cleaning up the sencence_start/end markers that have already been derived from it. This can be seen in urls like http://www.unicode.org/reports/tr29, where we don't have any sentence boundaries, but we leave a stray sentence_start/end at the first / after ".org".
-rw-r--r--pango/break.c19
1 files changed, 16 insertions, 3 deletions
diff --git a/pango/break.c b/pango/break.c
index 6b8e5c01..c34d0512 100644
--- a/pango/break.c
+++ b/pango/break.c
@@ -306,7 +306,6 @@ pango_default_break (const gchar *text,
/* Emoji extended pictographics */
gboolean is_Extended_Pictographic;
-
wc = next_wc;
break_type = next_break_type;
@@ -932,7 +931,20 @@ pango_default_break (const gchar *text,
prev_prev_SB_type == SB_ATerm_Close_Sp) &&
IS_OTHER_TERM(prev_SB_type) &&
SB_type == SB_Lower)
- attrs[prev_SB_i].is_sentence_boundary = FALSE;
+ {
+ attrs[prev_SB_i].is_sentence_boundary = FALSE;
+ attrs[prev_SB_i].is_sentence_start = FALSE;
+ attrs[prev_SB_i].is_sentence_end = FALSE;
+ last_sentence_start = -1;
+ for (int j = prev_SB_i - 1; j >= 0; j--)
+ {
+ if (attrs[j].is_sentence_boundary)
+ {
+ last_sentence_start = j;
+ break;
+ }
+ }
+ }
else if ((prev_SB_type == SB_ATerm ||
prev_SB_type == SB_ATerm_Close_Sp ||
prev_SB_type == SB_STerm ||
@@ -1537,8 +1549,9 @@ pango_default_break (const gchar *text,
/* meets space character, move sentence start */
if (last_sentence_start != -1 &&
last_sentence_start == i - 1 &&
- attrs[i - 1].is_white)
+ attrs[i - 1].is_white) {
last_sentence_start++;
+ }
}