summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthias Clasen <mclasen@redhat.com>2021-08-26 12:09:26 +0000
committerMatthias Clasen <mclasen@redhat.com>2021-08-26 12:09:26 +0000
commitbcf0fa27d6fd4177e02bac9bb417c3f695332676 (patch)
treee0b6c010fdf07af518dede6952ee928183b14f50
parent6a33b14848ca7f1a33d601222b010a90eed5c048 (diff)
parent6688340072f4e93936260fe7110500e52a8ffcdb (diff)
downloadpango-bcf0fa27d6fd4177e02bac9bb417c3f695332676.tar.gz
Merge branch 'more-log-attr-things' into 'main'
Fixes and validation for log attrs See merge request GNOME/pango!442
-rw-r--r--pango/break-indic.c11
-rw-r--r--pango/break.c54
-rw-r--r--tests/breaks/eight.break1
-rw-r--r--tests/breaks/eight.expected7
-rw-r--r--tests/breaks/eleven.expected14
-rw-r--r--tests/breaks/fifteen.expected14
-rw-r--r--tests/breaks/five.expected14
-rw-r--r--tests/breaks/four.expected14
-rw-r--r--tests/breaks/fourteen.expected14
-rw-r--r--tests/breaks/nine.expected14
-rw-r--r--tests/breaks/one.expected14
-rw-r--r--tests/breaks/seven.break2
-rw-r--r--tests/breaks/seven.expected7
-rw-r--r--tests/breaks/six.break1
-rw-r--r--tests/breaks/six.expected7
-rw-r--r--tests/breaks/sixteen.expected14
-rw-r--r--tests/breaks/ten.expected14
-rw-r--r--tests/breaks/thirteen.expected14
-rw-r--r--tests/breaks/three.expected14
-rw-r--r--tests/breaks/twelve.expected14
-rw-r--r--tests/breaks/two.expected14
-rw-r--r--tests/meson.build2
-rw-r--r--tests/test-break.c6
-rw-r--r--tests/validate-log-attrs.c528
-rw-r--r--tests/validate-log-attrs.h52
25 files changed, 751 insertions, 109 deletions
diff --git a/pango/break-indic.c b/pango/break-indic.c
index bb44d64c..64f300a9 100644
--- a/pango/break-indic.c
+++ b/pango/break-indic.c
@@ -94,10 +94,13 @@
static void
not_cursor_position (PangoLogAttr *attr)
{
- attr->is_cursor_position = FALSE;
- attr->is_char_break = FALSE;
- attr->is_line_break = FALSE;
- attr->is_mandatory_break = FALSE;
+ if (!attr->is_mandatory_break)
+ {
+ attr->is_cursor_position = FALSE;
+ attr->is_char_break = FALSE;
+ attr->is_line_break = FALSE;
+ attr->is_mandatory_break = FALSE;
+ }
}
static void
diff --git a/pango/break.c b/pango/break.c
index 864ac339..0cb6666d 100644
--- a/pango/break.c
+++ b/pango/break.c
@@ -939,11 +939,11 @@ default_break (const char *text,
SB_type == SB_Lower)
{
attrs[prev_SB_i].is_sentence_boundary = FALSE;
- attrs[prev_SB_i].is_sentence_start = FALSE;
attrs[prev_SB_i].is_sentence_end = FALSE;
last_sentence_start = -1;
for (int j = prev_SB_i - 1; j >= 0; j--)
{
+ attrs[j].is_sentence_end = FALSE;
if (attrs[j].is_sentence_boundary)
{
last_sentence_start = j;
@@ -1541,7 +1541,7 @@ default_break (const char *text,
/* meets sentence end, mark both sentence start and end */
if (last_sentence_start != -1 && is_sentence_boundary) {
- if (last_non_space != -1) {
+ if (last_non_space >= last_sentence_start) {
attrs[last_sentence_start].is_sentence_start = TRUE;
attrs[last_non_space].is_sentence_end = TRUE;
}
@@ -1642,14 +1642,15 @@ default_break (const char *text,
i--;
- attrs[i].is_cursor_position = TRUE; /* Rule GB2 */
attrs[0].is_cursor_position = TRUE; /* Rule GB1 */
+ attrs[i].is_cursor_position = TRUE; /* Rule GB2 */
- attrs[i].is_word_boundary = TRUE; /* Rule WB2 */
attrs[0].is_word_boundary = TRUE; /* Rule WB1 */
+ attrs[i].is_word_boundary = TRUE; /* Rule WB2 */
- attrs[i].is_line_break = TRUE; /* Rule LB3 */
attrs[0].is_line_break = FALSE; /* Rule LB2 */
+ attrs[i].is_line_break = TRUE; /* Rule LB3 */
+ attrs[i].is_mandatory_break = TRUE; /* Rule LB3 */
}
/* }}} */
@@ -1866,11 +1867,14 @@ handle_words (const char *text,
if (start >= offset)
{
gboolean in_word = FALSE;
- for (pos = start_pos - 1; pos >= 0; pos--)
+ for (pos = start_pos; pos >= 0; pos--)
{
if (log_attrs[pos].is_word_end)
- break;
- if (log_attrs[pos].is_word_start)
+ {
+ in_word = pos == start_pos;
+ break;
+ }
+ if (pos < start_pos && log_attrs[pos].is_word_start)
{
in_word = TRUE;
break;
@@ -1881,7 +1885,8 @@ handle_words (const char *text,
log_attrs[start_pos].is_word_boundary = TRUE;
/* Allow line breaks before words */
- log_attrs[start_pos].is_line_break = TRUE;
+ if (start_pos > 0)
+ log_attrs[start_pos].is_line_break = TRUE;
tailored = TRUE;
}
@@ -1889,11 +1894,14 @@ handle_words (const char *text,
if (end < offset + length)
{
gboolean in_word = FALSE;
- for (pos = end_pos + 1; pos < log_attrs_len; pos++)
+ for (pos = end_pos; pos < log_attrs_len; pos++)
{
if (log_attrs[pos].is_word_start)
- break;
- if (log_attrs[pos].is_word_end)
+ {
+ in_word = pos == end_pos;
+ break;
+ }
+ if (pos > end_pos && log_attrs[pos].is_word_end)
{
in_word = TRUE;
break;
@@ -2161,7 +2169,13 @@ pango_default_break (const char *text,
PangoLogAttr *attrs,
int attrs_len G_GNUC_UNUSED)
{
+ PangoLogAttr before = *attrs;
+
default_break (text, length, analysis, attrs, attrs_len);
+
+ attrs->is_line_break |= before.is_line_break;
+ attrs->is_mandatory_break |= before.is_mandatory_break;
+ attrs->is_cursor_position |= before.is_cursor_position;
}
/**
@@ -2270,10 +2284,24 @@ pango_attr_break (const char *text,
PangoLogAttr *attrs,
int attrs_len)
{
+ PangoLogAttr *start = attrs;
+ PangoLogAttr attr_before = *start;
GSList *attributes;
attributes = pango_attr_list_get_attributes (attr_list);
- break_attrs (text, length, attributes, offset, attrs, attrs_len);
+ if (break_attrs (text, length, attributes, offset, attrs, attrs_len))
+ {
+ /* if tailored, we enforce some of the attrs from before
+ * tailoring at the boundary
+ */
+
+ start->backspace_deletes_character = attr_before.backspace_deletes_character;
+
+ start->is_line_break |= attr_before.is_line_break;
+ start->is_mandatory_break |= attr_before.is_mandatory_break;
+ start->is_cursor_position |= attr_before.is_cursor_position;
+ }
+
g_slist_free_full (attributes, (GDestroyNotify)pango_attribute_destroy);
}
diff --git a/tests/breaks/eight.break b/tests/breaks/eight.break
new file mode 100644
index 00000000..5af6b6e2
--- /dev/null
+++ b/tests/breaks/eight.break
@@ -0,0 +1 @@
+200 24.10 99% alpha10beta 100-10 greek-roman
diff --git a/tests/breaks/eight.expected b/tests/breaks/eight.expected
new file mode 100644
index 00000000..39794d22
--- /dev/null
+++ b/tests/breaks/eight.expected
@@ -0,0 +1,7 @@
+Text: ⁦2⁩ ⁦0⁩ ⁦0⁩ [ ] ⁦2⁩ ⁦4⁩ ⁦.⁩ ⁦1⁩ ⁦0⁩ [ ] ⁦9⁩ ⁦9⁩ ⁦%⁩ [ ] ⁦a⁩ ⁦l⁩ ⁦p⁩ ⁦h⁩ ⁦a⁩ ⁦1⁩ ⁦0⁩ ⁦b⁩ ⁦e⁩ ⁦t⁩ ⁦a⁩ [ ] ⁦1⁩ ⁦0⁩ ⁦0⁩ ⁦-⁩ ⁦1⁩ ⁦0⁩ [ ] ⁦g⁩ ⁦r⁩ ⁦e⁩ ⁦e⁩ ⁦k⁩ ⁦-⁩ ⁦r⁩ ⁦o⁩ ⁦m⁩ ⁦a⁩ ⁦n⁩ [0x0a]
+Breaks: c c c c lc c c c c c lc c c c lc c c c c c c c c c c c lc c c c c c c lc c c c c c lc c c c c c Lc
+Whitespace: x x x x x w w
+Sentences: bs e b
+Words: bs be bs e s be bs be b bs be bs be bs be bs be bs be b
+Graphemes: b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b
+Hyphens: i i i i i i i i i i i i i i i i
diff --git a/tests/breaks/eleven.expected b/tests/breaks/eleven.expected
index f5b26708..8df89869 100644
--- a/tests/breaks/eleven.expected
+++ b/tests/breaks/eleven.expected
@@ -1,7 +1,7 @@
-Text: ⁦❤⁩ ⁦️⁩ ⁦︎⁩ ⁦︎⁩ ⁦👨⁩ [0x200d]⁦🦰⁩ ⁦👨⁩⁦🏿⁩ [0x200d]⁦🦱⁩ ⁦0⁩ ⁦️⁩ ⁦⃣⁩ ⁦🏴⁩[0xe0075][0xe0073][0xe0063][0xe0061][0xe007f] ⁦🇩⁩⁦🇪⁩ ⁦️⁩ [0x0a]
-Breaks: c lc lc lc lc lc c c
-Whitespace: w w
-Sentences: bs e b
-Words: b b b bs be b b b
-Graphemes: b b b b b b b b
-Hyphens: i i i i i i i i
+Text: ⁦❤⁩ ⁦️⁩ ⁦︎⁩ ⁦︎⁩ ⁦👨⁩ [0x200d]⁦🦰⁩ ⁦👨⁩⁦🏿⁩ [0x200d]⁦🦱⁩ ⁦0⁩ ⁦️⁩ ⁦⃣⁩ ⁦🏴⁩[0xe0075][0xe0073][0xe0063][0xe0061][0xe007f] ⁦🇩⁩⁦🇪⁩ ⁦️⁩ [0x0a]
+Breaks: c lc lc lc lc lc c lc
+Whitespace: w w
+Sentences: bs e b
+Words: b b b bs be b b b
+Graphemes: b b b b b b b b
+Hyphens: i i i i i i i i
diff --git a/tests/breaks/fifteen.expected b/tests/breaks/fifteen.expected
index 3f5e4655..93b37c39 100644
--- a/tests/breaks/fifteen.expected
+++ b/tests/breaks/fifteen.expected
@@ -1,7 +1,7 @@
-Text: ⁦o⁩ ⁦n⁩ ⁦e⁩ [ ] ⁦t⁩ ⁦w⁩ ⁦o⁩ ⁦-⁩ ⁦t⁩ ⁦h⁩ ⁦r⁩ ⁦e⁩ ⁦e⁩ [ ] ⁦f⁩ ⁦o⁩ [0xad] ⁦u⁩ ⁦r⁩ [0x0a]
-Breaks: c c c c lc c c c lc c c c c c lc c c lc c c c
-Whitespace: x x w w
-Sentences: bs e b
-Words: bs be bs be bs be bs be b
-Graphemes: b b b b b b b b b b b b b b b b b b b b b
-Hyphens: i i i i i i i i i
+Text: ⁦o⁩ ⁦n⁩ ⁦e⁩ [ ] ⁦t⁩ ⁦w⁩ ⁦o⁩ ⁦-⁩ ⁦t⁩ ⁦h⁩ ⁦r⁩ ⁦e⁩ ⁦e⁩ [ ] ⁦f⁩ ⁦o⁩ [0xad] ⁦u⁩ ⁦r⁩ [0x0a]
+Breaks: c c c c lc c c c lc c c c c c lc c c lc c c lc
+Whitespace: x x w w
+Sentences: bs e b
+Words: bs be bs be bs be bs be b
+Graphemes: b b b b b b b b b b b b b b b b b b b b b
+Hyphens: i i i i i i i i i
diff --git a/tests/breaks/five.expected b/tests/breaks/five.expected
index 17ce3a58..bfdcabaa 100644
--- a/tests/breaks/five.expected
+++ b/tests/breaks/five.expected
@@ -1,7 +1,7 @@
-Text: ⁦a⁩ ⁦b⁩ [0x2028] ⁦c⁩ ⁦d⁩ [0x0a]
-Breaks: lc c c Lc c c c
-Whitespace: w w w
-Sentences: bs e bs e b
-Words: bs be b
-Graphemes: b b b b b b b
-Hyphens: i i
+Text: ⁦a⁩ ⁦b⁩ [0x2028] ⁦c⁩ ⁦d⁩ [0x0a]
+Breaks: c c c Lc c c Lc
+Whitespace: w w w
+Sentences: bs e bs e b
+Words: bs be b
+Graphemes: b b b b b b b
+Hyphens: i i
diff --git a/tests/breaks/four.expected b/tests/breaks/four.expected
index e94af24a..8476ccba 100644
--- a/tests/breaks/four.expected
+++ b/tests/breaks/four.expected
@@ -1,7 +1,7 @@
-Text: ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ไ⁩ ⁦ท⁩ ⁦ย⁩ [ ] ⁦ห⁩ ⁦ร⁩ ⁦ื⁩ ⁦อ⁩ [ ] ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ไ⁩ ⁦ท⁩ ⁦ย⁩ ⁦ก⁩ ⁦ล⁩ ⁦า⁩ ⁦ง⁩ [ ] ⁦เ⁩ ⁦ป⁩ ⁦็⁩ ⁦น⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ร⁩ ⁦า⁩ ⁦ช⁩ ⁦ก⁩ ⁦า⁩ ⁦ร⁩ ⁦แ⁩ ⁦ล⁩ ⁦ะ⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ป⁩ ⁦ร⁩ ⁦ะ⁩ ⁦จ⁩ ⁦ำ⁩ ⁦ช⁩ ⁦า⁩ ⁦ต⁩ ⁦ิ⁩ ⁦ข⁩ ⁦อ⁩ ⁦ง⁩ ⁦ป⁩ ⁦ร⁩ ⁦ะ⁩ ⁦เ⁩ ⁦ท⁩ ⁦ศ⁩ ⁦ไ⁩ ⁦ท⁩ ⁦ย⁩ [ ] ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ไ⁩ ⁦ท⁩ ⁦ย⁩ ⁦เ⁩ ⁦ป⁩ ⁦็⁩ ⁦น⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ใ⁩ ⁦น⁩ ⁦ก⁩ ⁦ล⁩ ⁦ุ⁩ ⁦่⁩ ⁦ม⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ไ⁩ ⁦ท⁩ ⁦ซ⁩ ⁦ึ⁩ ⁦่⁩ ⁦ง⁩ ⁦เ⁩ ⁦ป⁩ ⁦็⁩ ⁦น⁩ ⁦ก⁩ ⁦ล⁩ ⁦ุ⁩ ⁦่⁩ ⁦ม⁩ ⁦ย⁩ ⁦่⁩ ⁦อ⁩ ⁦ย⁩ ⁦ข⁩ ⁦อ⁩ ⁦ง⁩ ⁦ต⁩ ⁦ร⁩ ⁦ะ⁩ ⁦ก⁩ ⁦ู⁩ ⁦ล⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ข⁩ ⁦ร⁩ ⁦้⁩ ⁦า⁩ [ ] ⁦ไ⁩ ⁦ท⁩ [ ] ⁦ส⁩ ⁦ั⁩ ⁦น⁩ ⁦น⁩ ⁦ิ⁩ ⁦ษ⁩ ⁦ฐ⁩ ⁦า⁩ ⁦น⁩ ⁦ว⁩ ⁦่⁩ ⁦า⁩ [ ] ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ใ⁩ ⁦น⁩ ⁦ต⁩ ⁦ร⁩ ⁦ะ⁩ ⁦ก⁩ ⁦ู⁩ ⁦ล⁩ ⁦น⁩ ⁦ี⁩ ⁦้⁩ ⁦ม⁩ ⁦ี⁩ ⁦ถ⁩ ⁦ิ⁩ ⁦่⁩ ⁦น⁩ ⁦ก⁩ ⁦ำ⁩ ⁦เ⁩ ⁦น⁩ ⁦ิ⁩ ⁦ด⁩ ⁦จ⁩ ⁦า⁩ ⁦ก⁩ ⁦ท⁩ ⁦า⁩ ⁦ง⁩ ⁦ต⁩ ⁦อ⁩ ⁦น⁩ ⁦ใ⁩ ⁦ต⁩ ⁦้⁩ ⁦ข⁩ ⁦อ⁩ ⁦ง⁩ ⁦ป⁩ ⁦ร⁩ ⁦ะ⁩ ⁦เ⁩ ⁦ท⁩ ⁦ศ⁩ ⁦จ⁩ ⁦ี⁩ ⁦น⁩ [ ] ⁦แ⁩ ⁦ล⁩ ⁦ะ⁩ ⁦น⁩ ⁦ั⁩ ⁦ก⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ศ⁩ ⁦า⁩ ⁦ส⁩ ⁦ต⁩ ⁦ร⁩ ⁦์⁩ ⁦บ⁩ ⁦า⁩ ⁦ง⁩ ⁦ส⁩ ⁦่⁩ ⁦ว⁩ ⁦น⁩ ⁦เ⁩ ⁦ส⁩ ⁦น⁩ ⁦อ⁩ ⁦ว⁩ ⁦่⁩ ⁦า⁩ [ ] ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ไ⁩ ⁦ท⁩ ⁦ย⁩ ⁦น⁩ ⁦่⁩ ⁦า⁩ ⁦จ⁩ ⁦ะ⁩ ⁦ม⁩ ⁦ี⁩ ⁦ค⁩ ⁦ว⁩ ⁦า⁩ ⁦ม⁩ ⁦เ⁩ ⁦ช⁩ ⁦ื⁩ ⁦่⁩ ⁦อ⁩ ⁦ม⁩ ⁦โ⁩ ⁦ย⁩ ⁦ง⁩ ⁦ก⁩ ⁦ั⁩ ⁦บ⁩ ⁦ต⁩ ⁦ร⁩ ⁦ะ⁩ ⁦ก⁩ ⁦ู⁩ ⁦ล⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦อ⁩ ⁦อ⁩ ⁦ส⁩ ⁦โ⁩ ⁦ต⁩ ⁦ร⁩ [ ] ⁦เ⁩ ⁦อ⁩ ⁦เ⁩ ⁦ช⁩ ⁦ี⁩ ⁦ย⁩ ⁦ต⁩ ⁦ิ⁩ ⁦ก⁩ [ ] ⁦ต⁩ ⁦ร⁩ ⁦ะ⁩ ⁦ก⁩ ⁦ู⁩ ⁦ล⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦อ⁩ ⁦อ⁩ ⁦ส⁩ ⁦โ⁩ ⁦ต⁩ ⁦ร⁩ ⁦น⁩ ⁦ี⁩ ⁦เ⁩ ⁦ซ⁩ ⁦ี⁩ ⁦ย⁩ ⁦น⁩ [ ] ⁦แ⁩ ⁦ล⁩ ⁦ะ⁩ ⁦ต⁩ ⁦ร⁩ ⁦ะ⁩ ⁦ก⁩ ⁦ู⁩ ⁦ล⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦จ⁩ ⁦ี⁩ ⁦น⁩ [ ] ⁦ท⁩ ⁦ิ⁩ ⁦เ⁩ ⁦บ⁩ ⁦ต⁩ [0x0a]
-Breaks: c c c c lc c c c lc c c c lc c c c lc c c lc c c c c lc c c lc c c c lc c c c c c lc c c lc c c c lc c c c c lc c c lc c c lc c c c c c lc c c c lc c c c lc c c lc c c lc c c c lc c lc c c lc c c c lc c lc c lc c c lc c c lc c c lc c c lc c c c c lc c c c lc c c c lc c c lc c c c c c c lc c c lc c c c lc c lc c c c c lc lc lc c lc c c c c lc c c lc c c lc c c lc c lc c c lc c c c c c lc c c lc c c lc c lc c c c c c c c c lc c c lc c c lc c c c lc c c lc c c c lc c c lc c lc c lc lc c c c lc c c c lc c c lc c lc c c c c lc c c c lc c lc c c c c lc c c c c lc c c lc c c c c lc c c c lc c lc c c c c c c c c c lc c c lc c c c c lc c c c lc c c lc c c c c c
-Whitespace: x x x x x x x x x x x x x w w
-Sentences: bs e b
-Words: bs b b b bse b b be bs b b be bs b b b bse b b bse b b b be bs b b bse b b b bse b b b b b bse b b bse b b b bse b b b b bse b b bse b b bse b b b b b bse b b be bs b b b bse b b bse b b bse b b b bse b bse b b bse b b b bse b bse b bse b b bse b b bse b b bse b b bse b b b b bse b b b bse b b be bs b be bs b b b b b b bse b be bs b b b bse b bse b b b b bse bse bse b bse b b b b bse b b bse b b bse b b bse b bse b b bse b b b b b bse b be bs b b bse b bse b b b b b b b b bse b b bse b b bse b b b bse b be bs b b b bse b b bse b bse b bse bse b b b bse b b b bse b b bse b bse b b b b bse b b b bse b bse b b b be bs b b b b bse b be bs b b b b bse b b b bse b bse b b b b b b b b be bs b b bse b b b b bse b b b bse b be bs b b b be b
-Graphemes: b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b
-Hyphens: i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i
+Text: ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ไ⁩ ⁦ท⁩ ⁦ย⁩ [ ] ⁦ห⁩ ⁦ร⁩ ⁦ื⁩ ⁦อ⁩ [ ] ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ไ⁩ ⁦ท⁩ ⁦ย⁩ ⁦ก⁩ ⁦ล⁩ ⁦า⁩ ⁦ง⁩ [ ] ⁦เ⁩ ⁦ป⁩ ⁦็⁩ ⁦น⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ร⁩ ⁦า⁩ ⁦ช⁩ ⁦ก⁩ ⁦า⁩ ⁦ร⁩ ⁦แ⁩ ⁦ล⁩ ⁦ะ⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ป⁩ ⁦ร⁩ ⁦ะ⁩ ⁦จ⁩ ⁦ำ⁩ ⁦ช⁩ ⁦า⁩ ⁦ต⁩ ⁦ิ⁩ ⁦ข⁩ ⁦อ⁩ ⁦ง⁩ ⁦ป⁩ ⁦ร⁩ ⁦ะ⁩ ⁦เ⁩ ⁦ท⁩ ⁦ศ⁩ ⁦ไ⁩ ⁦ท⁩ ⁦ย⁩ [ ] ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ไ⁩ ⁦ท⁩ ⁦ย⁩ ⁦เ⁩ ⁦ป⁩ ⁦็⁩ ⁦น⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ใ⁩ ⁦น⁩ ⁦ก⁩ ⁦ล⁩ ⁦ุ⁩ ⁦่⁩ ⁦ม⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ไ⁩ ⁦ท⁩ ⁦ซ⁩ ⁦ึ⁩ ⁦่⁩ ⁦ง⁩ ⁦เ⁩ ⁦ป⁩ ⁦็⁩ ⁦น⁩ ⁦ก⁩ ⁦ล⁩ ⁦ุ⁩ ⁦่⁩ ⁦ม⁩ ⁦ย⁩ ⁦่⁩ ⁦อ⁩ ⁦ย⁩ ⁦ข⁩ ⁦อ⁩ ⁦ง⁩ ⁦ต⁩ ⁦ร⁩ ⁦ะ⁩ ⁦ก⁩ ⁦ู⁩ ⁦ล⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ข⁩ ⁦ร⁩ ⁦้⁩ ⁦า⁩ [ ] ⁦ไ⁩ ⁦ท⁩ [ ] ⁦ส⁩ ⁦ั⁩ ⁦น⁩ ⁦น⁩ ⁦ิ⁩ ⁦ษ⁩ ⁦ฐ⁩ ⁦า⁩ ⁦น⁩ ⁦ว⁩ ⁦่⁩ ⁦า⁩ [ ] ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ใ⁩ ⁦น⁩ ⁦ต⁩ ⁦ร⁩ ⁦ะ⁩ ⁦ก⁩ ⁦ู⁩ ⁦ล⁩ ⁦น⁩ ⁦ี⁩ ⁦้⁩ ⁦ม⁩ ⁦ี⁩ ⁦ถ⁩ ⁦ิ⁩ ⁦่⁩ ⁦น⁩ ⁦ก⁩ ⁦ำ⁩ ⁦เ⁩ ⁦น⁩ ⁦ิ⁩ ⁦ด⁩ ⁦จ⁩ ⁦า⁩ ⁦ก⁩ ⁦ท⁩ ⁦า⁩ ⁦ง⁩ ⁦ต⁩ ⁦อ⁩ ⁦น⁩ ⁦ใ⁩ ⁦ต⁩ ⁦้⁩ ⁦ข⁩ ⁦อ⁩ ⁦ง⁩ ⁦ป⁩ ⁦ร⁩ ⁦ะ⁩ ⁦เ⁩ ⁦ท⁩ ⁦ศ⁩ ⁦จ⁩ ⁦ี⁩ ⁦น⁩ [ ] ⁦แ⁩ ⁦ล⁩ ⁦ะ⁩ ⁦น⁩ ⁦ั⁩ ⁦ก⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ศ⁩ ⁦า⁩ ⁦ส⁩ ⁦ต⁩ ⁦ร⁩ ⁦์⁩ ⁦บ⁩ ⁦า⁩ ⁦ง⁩ ⁦ส⁩ ⁦่⁩ ⁦ว⁩ ⁦น⁩ ⁦เ⁩ ⁦ส⁩ ⁦น⁩ ⁦อ⁩ ⁦ว⁩ ⁦่⁩ ⁦า⁩ [ ] ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ไ⁩ ⁦ท⁩ ⁦ย⁩ ⁦น⁩ ⁦่⁩ ⁦า⁩ ⁦จ⁩ ⁦ะ⁩ ⁦ม⁩ ⁦ี⁩ ⁦ค⁩ ⁦ว⁩ ⁦า⁩ ⁦ม⁩ ⁦เ⁩ ⁦ช⁩ ⁦ื⁩ ⁦่⁩ ⁦อ⁩ ⁦ม⁩ ⁦โ⁩ ⁦ย⁩ ⁦ง⁩ ⁦ก⁩ ⁦ั⁩ ⁦บ⁩ ⁦ต⁩ ⁦ร⁩ ⁦ะ⁩ ⁦ก⁩ ⁦ู⁩ ⁦ล⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦อ⁩ ⁦อ⁩ ⁦ส⁩ ⁦โ⁩ ⁦ต⁩ ⁦ร⁩ [ ] ⁦เ⁩ ⁦อ⁩ ⁦เ⁩ ⁦ช⁩ ⁦ี⁩ ⁦ย⁩ ⁦ต⁩ ⁦ิ⁩ ⁦ก⁩ [ ] ⁦ต⁩ ⁦ร⁩ ⁦ะ⁩ ⁦ก⁩ ⁦ู⁩ ⁦ล⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦อ⁩ ⁦อ⁩ ⁦ส⁩ ⁦โ⁩ ⁦ต⁩ ⁦ร⁩ ⁦น⁩ ⁦ี⁩ ⁦เ⁩ ⁦ซ⁩ ⁦ี⁩ ⁦ย⁩ ⁦น⁩ [ ] ⁦แ⁩ ⁦ล⁩ ⁦ะ⁩ ⁦ต⁩ ⁦ร⁩ ⁦ะ⁩ ⁦ก⁩ ⁦ู⁩ ⁦ล⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦จ⁩ ⁦ี⁩ ⁦น⁩ [ ] ⁦ท⁩ ⁦ิ⁩ ⁦เ⁩ ⁦บ⁩ ⁦ต⁩ [0x0a]
+Breaks: c c c c lc c c c lc c c c lc c c c lc c c lc c c c c lc c c lc c c c lc c c c c c lc c c lc c c c lc c c c c lc c c lc c c lc c c c c c lc c c c lc c c c lc c c lc c c lc c c c lc c lc c c lc c c c lc c lc c lc c c lc c c lc c c lc c c lc c c c c lc c c c lc c c c lc c c lc c c c c c c lc c c lc c c c lc c lc c c c c lc lc lc c lc c c c c lc c c lc c c lc c c lc c lc c c lc c c c c c lc c c lc c c lc c lc c c c c c c c c lc c c lc c c lc c c c lc c c lc c c c lc c c lc c lc c lc lc c c c lc c c c lc c c lc c lc c c c c lc c c c lc c lc c c c c lc c c c c lc c c lc c c c c lc c c c lc c lc c c c c c c c c c lc c c lc c c c c lc c c c lc c c lc c c c c lc
+Whitespace: x x x x x x x x x x x x x w w
+Sentences: bs e b
+Words: bs b b b bse b b be bs b b be bs b b b bse b b bse b b b be bs b b bse b b b bse b b b b b bse b b bse b b b bse b b b b bse b b bse b b bse b b b b b bse b b be bs b b b bse b b bse b b bse b b b bse b bse b b bse b b b bse b bse b bse b b bse b b bse b b bse b b bse b b b b bse b b b bse b b be bs b be bs b b b b b b bse b be bs b b b bse b bse b b b b bse bse bse b bse b b b b bse b b bse b b bse b b bse b bse b b bse b b b b b bse b be bs b b bse b bse b b b b b b b b bse b b bse b b bse b b b bse b be bs b b b bse b b bse b bse b bse bse b b b bse b b b bse b b bse b bse b b b b bse b b b bse b bse b b b be bs b b b b bse b be bs b b b b bse b b b bse b bse b b b b b b b b be bs b b bse b b b b bse b b b bse b be bs b b b be b
+Graphemes: b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b
+Hyphens: i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i
diff --git a/tests/breaks/fourteen.expected b/tests/breaks/fourteen.expected
index c8a22617..db779ed0 100644
--- a/tests/breaks/fourteen.expected
+++ b/tests/breaks/fourteen.expected
@@ -1,7 +1,7 @@
-Text: ⁦a⁩ ⁦b⁩ ⁦c⁩ [ ] ⁦α⁩ ⁦β⁩ ⁦γ⁩ [0x0a]
-Breaks: c l l c c
-Whitespace: x w w
-Sentences: bs e b
-Words: bs bse bse be b
-Graphemes: b b b b b b b b b
-Hyphens: i i i i
+Text: ⁦a⁩ ⁦b⁩ ⁦c⁩ [ ] ⁦α⁩ ⁦β⁩ ⁦γ⁩ [0x0a]
+Breaks: c l l c lc
+Whitespace: x w w
+Sentences: bs e b
+Words: bs bse bse be b
+Graphemes: b b b b b b b b b
+Hyphens: i i i i
diff --git a/tests/breaks/nine.expected b/tests/breaks/nine.expected
index 818a78aa..96da8230 100644
--- a/tests/breaks/nine.expected
+++ b/tests/breaks/nine.expected
@@ -1,7 +1,7 @@
-Text: ⁦म⁩ ⁦ी⁩ [ ] ⁦क⁩ ⁦ा⁩ ⁦च⁩ [ ] ⁦ख⁩ ⁦ा⁩ ⁦ऊ⁩ [ ] ⁦श⁩ ⁦क⁩ ⁦त⁩ ⁦ो⁩ ⁦,⁩ [ ] ⁦म⁩ ⁦ल⁩ ⁦ा⁩ [ ] ⁦त⁩ ⁦े⁩ [ ] ⁦द⁩ ⁦ु⁩ ⁦ख⁩ ⁦त⁩ [ ] ⁦न⁩ ⁦ा⁩ ⁦ह⁩ [0x0a] ⁦म⁩ ⁦ै⁩ ⁦ं⁩ [ ] ⁦क⁩ ⁦ा⁩ ⁦ँ⁩ ⁦च⁩ [ ] ⁦ख⁩ ⁦ा⁩ [ ] ⁦स⁩ ⁦क⁩ ⁦त⁩ ⁦ा⁩ [ ] ⁦ह⁩ ⁦ू⁩ ⁦ँ⁩ ⁦,⁩ [ ] ⁦म⁩ ⁦ु⁩ ⁦झ⁩ ⁦े⁩ [ ] ⁦उ⁩ ⁦स⁩ [ ] ⁦स⁩ ⁦े⁩ [ ] ⁦क⁩ ⁦ो⁩ ⁦ई⁩ [ ] ⁦प⁩ ⁦ी⁩ ⁦ड⁩ ⁦ा⁩ [ ] ⁦न⁩ ⁦ह⁩ ⁦ी⁩ ⁦ं⁩ [ ] ⁦ह⁩ ⁦ो⁩ ⁦त⁩ [0x0a] ⁦ந⁩ ⁦ா⁩ ⁦ன⁩ ⁦்⁩ [ ] ⁦க⁩ ⁦ண⁩ ⁦்⁩ ⁦ண⁩ ⁦ா⁩ ⁦ட⁩ ⁦ி⁩ [ ] ⁦ச⁩ ⁦ா⁩ ⁦ப⁩ ⁦்⁩ ⁦ப⁩ ⁦ி⁩ ⁦ட⁩ ⁦ு⁩ ⁦வ⁩ ⁦ே⁩ ⁦ன⁩ ⁦்⁩ ⁦,⁩ [ ] ⁦அ⁩ ⁦த⁩ ⁦ன⁩ ⁦ா⁩ ⁦ல⁩ ⁦்⁩ [ ] ⁦எ⁩ ⁦ன⁩ ⁦க⁩ ⁦்⁩ ⁦க⁩ ⁦ு⁩ [ ] ⁦ஒ⁩ ⁦ர⁩ ⁦ு⁩ [ ] ⁦க⁩ ⁦ே⁩ ⁦ட⁩ ⁦ு⁩ ⁦ம⁩ ⁦்⁩ [ ] ⁦வ⁩ ⁦ர⁩ ⁦ா⁩ ⁦த⁩ [0x0a] ⁦ﻢ⁩ ⁦ﯾ⁩ ⁦ں⁩ [ ] ⁦ﮎ⁩ ⁦ﺎ⁩ ⁦ﻨ⁩ ⁦ﭼ⁩ [ ] ⁦ﮎ⁩ ⁦ھ⁩ ⁦ﺍ⁩ [ ] ⁦ﺲ⁩ ⁦ﮑ⁩ ⁦ﺗ⁩ ⁦ﺍ⁩ [ ] ⁦ہ⁩ ⁦ﻭ⁩ ⁦ں⁩ [ ] ⁦ﺍ⁩ ⁦ﻭ⁩ ⁦ﺭ⁩ [ ] ⁦ﻢ⁩ ⁦ﺟ⁩ ⁦ھ⁩ ⁦ے⁩ [ ] ⁦ﺖ⁩ ⁦ﮑ⁩ ⁦ﻠ⁩ ⁦ﯿ⁩ ⁦ﻓ⁩ [ ] ⁦ﻥ⁩ ⁦ہ⁩ ⁦ﯼ⁩ ⁦ں⁩ [ ] ⁦ہ⁩ ⁦ﻮ⁩ ⁦ﺘ⁩ ⁦ﯾ⁩ [ ] [0x0a] ⁦ﺰ⁩ ⁦ﻫ⁩ [ ] ⁦ﺶ⁩ ⁦ﻴ⁩ ⁦ﺸ⁩ ⁦ﻫ⁩ [ ] ⁦ﺥ⁩ ⁦ﻭ⁩ ⁦ړ⁩ ⁦ﻝ⁩ ⁦ې⁩ [ ] ⁦ﺶ⁩ ⁦ﻣ⁩ ⁦،⁩ [ ] ⁦ﻪ⁩ ⁦ﻐ⁩ ⁦ﻫ⁩ [ ] ⁦ﻡ⁩ ⁦ﺍ⁩ [ ] ⁦ﻦ⁩ ⁦ﻫ⁩ [ ] ⁦ﺥ⁩ ⁦ﻭ⁩ ⁦ږ⁩ ⁦ﻮ⁩ ⁦ﻳ⁩ [0x0a]
-Breaks: c c lc c c lc c c lc c c c c lc c c lc c lc c c c lc c c c c lc c c lc c lc c c c lc c c lc c c lc c c lc c lc c c lc c c lc c c lc c c c c c lc c c c c lc c c c c c c c lc c c c c lc c c c c lc c c lc c c c lc c c c c c c c lc c c c c lc c c c lc c c c c lc c c c lc c c c lc c c c c lc c c c c c lc c c c c lc c c c c c c c c lc c c c c lc c c c c c lc c c c lc c c c lc c c lc c c lc c c c c c c
-Whitespace: x x x x x x x w x x x x x x x x x x x w x x x x x x x w x x x x x x x x x x w x x x x x x x w w
-Sentences: bs e bs e bs e bs e bs e b
-Words: bs be bs be bs be bs be b bs be bs be bs be bs be bs be bs be bs be bs be bs be b bs be bs be bs be bs be bs be bs be bs be bs be bs be bs be b bs be bs be bs be bs be bs be bs be bs be bs be bs be bs be bs be bs be bs be bs be bs be b bs be bs be bs be bs be b bs be bs be bs be bs be b
-Graphemes: b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b
-Hyphens: i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i
+Text: ⁦म⁩ ⁦ी⁩ [ ] ⁦क⁩ ⁦ा⁩ ⁦च⁩ [ ] ⁦ख⁩ ⁦ा⁩ ⁦ऊ⁩ [ ] ⁦श⁩ ⁦क⁩ ⁦त⁩ ⁦ो⁩ ⁦,⁩ [ ] ⁦म⁩ ⁦ल⁩ ⁦ा⁩ [ ] ⁦त⁩ ⁦े⁩ [ ] ⁦द⁩ ⁦ु⁩ ⁦ख⁩ ⁦त⁩ [ ] ⁦न⁩ ⁦ा⁩ ⁦ह⁩ [0x0a] ⁦म⁩ ⁦ै⁩ ⁦ं⁩ [ ] ⁦क⁩ ⁦ा⁩ ⁦ँ⁩ ⁦च⁩ [ ] ⁦ख⁩ ⁦ा⁩ [ ] ⁦स⁩ ⁦क⁩ ⁦त⁩ ⁦ा⁩ [ ] ⁦ह⁩ ⁦ू⁩ ⁦ँ⁩ ⁦,⁩ [ ] ⁦म⁩ ⁦ु⁩ ⁦झ⁩ ⁦े⁩ [ ] ⁦उ⁩ ⁦स⁩ [ ] ⁦स⁩ ⁦े⁩ [ ] ⁦क⁩ ⁦ो⁩ ⁦ई⁩ [ ] ⁦प⁩ ⁦ी⁩ ⁦ड⁩ ⁦ा⁩ [ ] ⁦न⁩ ⁦ह⁩ ⁦ी⁩ ⁦ं⁩ [ ] ⁦ह⁩ ⁦ो⁩ ⁦त⁩ [0x0a] ⁦ந⁩ ⁦ா⁩ ⁦ன⁩ ⁦்⁩ [ ] ⁦க⁩ ⁦ண⁩ ⁦்⁩ ⁦ண⁩ ⁦ா⁩ ⁦ட⁩ ⁦ி⁩ [ ] ⁦ச⁩ ⁦ா⁩ ⁦ப⁩ ⁦்⁩ ⁦ப⁩ ⁦ி⁩ ⁦ட⁩ ⁦ு⁩ ⁦வ⁩ ⁦ே⁩ ⁦ன⁩ ⁦்⁩ ⁦,⁩ [ ] ⁦அ⁩ ⁦த⁩ ⁦ன⁩ ⁦ா⁩ ⁦ல⁩ ⁦்⁩ [ ] ⁦எ⁩ ⁦ன⁩ ⁦க⁩ ⁦்⁩ ⁦க⁩ ⁦ு⁩ [ ] ⁦ஒ⁩ ⁦ர⁩ ⁦ு⁩ [ ] ⁦க⁩ ⁦ே⁩ ⁦ட⁩ ⁦ு⁩ ⁦ம⁩ ⁦்⁩ [ ] ⁦வ⁩ ⁦ர⁩ ⁦ா⁩ ⁦த⁩ [0x0a] ⁦ﻢ⁩ ⁦ﯾ⁩ ⁦ں⁩ [ ] ⁦ﮎ⁩ ⁦ﺎ⁩ ⁦ﻨ⁩ ⁦ﭼ⁩ [ ] ⁦ﮎ⁩ ⁦ھ⁩ ⁦ﺍ⁩ [ ] ⁦ﺲ⁩ ⁦ﮑ⁩ ⁦ﺗ⁩ ⁦ﺍ⁩ [ ] ⁦ہ⁩ ⁦ﻭ⁩ ⁦ں⁩ [ ] ⁦ﺍ⁩ ⁦ﻭ⁩ ⁦ﺭ⁩ [ ] ⁦ﻢ⁩ ⁦ﺟ⁩ ⁦ھ⁩ ⁦ے⁩ [ ] ⁦ﺖ⁩ ⁦ﮑ⁩ ⁦ﻠ⁩ ⁦ﯿ⁩ ⁦ﻓ⁩ [ ] ⁦ﻥ⁩ ⁦ہ⁩ ⁦ﯼ⁩ ⁦ں⁩ [ ] ⁦ہ⁩ ⁦ﻮ⁩ ⁦ﺘ⁩ ⁦ﯾ⁩ [ ] [0x0a] ⁦ﺰ⁩ ⁦ﻫ⁩ [ ] ⁦ﺶ⁩ ⁦ﻴ⁩ ⁦ﺸ⁩ ⁦ﻫ⁩ [ ] ⁦ﺥ⁩ ⁦ﻭ⁩ ⁦ړ⁩ ⁦ﻝ⁩ ⁦ې⁩ [ ] ⁦ﺶ⁩ ⁦ﻣ⁩ ⁦،⁩ [ ] ⁦ﻪ⁩ ⁦ﻐ⁩ ⁦ﻫ⁩ [ ] ⁦ﻡ⁩ ⁦ﺍ⁩ [ ] ⁦ﻦ⁩ ⁦ﻫ⁩ [ ] ⁦ﺥ⁩ ⁦ﻭ⁩ ⁦ږ⁩ ⁦ﻮ⁩ ⁦ﻳ⁩ [0x0a]
+Breaks: c c lc c c lc c c lc c c c c lc c c lc c lc c c c lc c c Lc c lc c c lc c lc c c c lc c c lc c c lc c c lc c lc c c lc c c lc c c lc c c Lc c c lc c c c c lc c c c c c c c lc c c c c lc c c c c lc c c lc c c c lc c c c Lc c c c lc c c c c lc c c c lc c c c c lc c c c lc c c c lc c c c c lc c c c c c lc c c c c lc c c c c c Lc c c lc c c c c lc c c c c c lc c c c lc c c c lc c c lc c c lc c c c c c Lc
+Whitespace: x x x x x x x w x x x x x x x x x x x w x x x x x x x w x x x x x x x x x x w x x x x x x x w w
+Sentences: bs e bs e bs e bs e bs e b
+Words: bs be bs be bs be bs be b bs be bs be bs be bs be bs be bs be bs be bs be bs be b bs be bs be bs be bs be bs be bs be bs be bs be bs be bs be b bs be bs be bs be bs be bs be bs be bs be bs be bs be bs be bs be bs be bs be bs be bs be b bs be bs be bs be bs be b bs be bs be bs be bs be b
+Graphemes: b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b
+Hyphens: i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i
diff --git a/tests/breaks/one.expected b/tests/breaks/one.expected
index cc257131..44fee3ef 100644
--- a/tests/breaks/one.expected
+++ b/tests/breaks/one.expected
@@ -1,7 +1,7 @@
-Text: ⁦a⁩ ⁦b⁩ ⁦c⁩ ⁦/⁩ ⁦d⁩ ⁦e⁩ ⁦f⁩ [ ] ⁦g⁩ ⁦h⁩ ⁦i⁩ [0xad] ⁦j⁩ ⁦k⁩ ⁦l⁩ ⁦.⁩ [ ] ⁦B⁩ ⁦l⁩ ⁦a⁩ [0x0a]
-Breaks: c c c c lc c c c lc c c c lc c c c c lc c c c c
-Whitespace: x x w w
-Sentences: bs e bs e b
-Words: bs be bs be bs be b bs be b
-Graphemes: b b b b b b b b b b b b b b b b b b b b b b
-Hyphens: i i i i i i i i i i i
+Text: ⁦a⁩ ⁦b⁩ ⁦c⁩ ⁦/⁩ ⁦d⁩ ⁦e⁩ ⁦f⁩ [ ] ⁦g⁩ ⁦h⁩ ⁦i⁩ [0xad] ⁦j⁩ ⁦k⁩ ⁦l⁩ ⁦.⁩ [ ] ⁦B⁩ ⁦l⁩ ⁦a⁩ [0x0a]
+Breaks: c c c c lc c c c lc c c c lc c c c c lc c c c lc
+Whitespace: x x w w
+Sentences: bs e bs e b
+Words: bs be bs be bs be b bs be b
+Graphemes: b b b b b b b b b b b b b b b b b b b b b b
+Hyphens: i i i i i i i i i i i
diff --git a/tests/breaks/seven.break b/tests/breaks/seven.break
new file mode 100644
index 00000000..167a17b7
--- /dev/null
+++ b/tests/breaks/seven.break
@@ -0,0 +1,2 @@
+bla
+bla
diff --git a/tests/breaks/seven.expected b/tests/breaks/seven.expected
new file mode 100644
index 00000000..9062fb31
--- /dev/null
+++ b/tests/breaks/seven.expected
@@ -0,0 +1,7 @@
+Text: ⁦b⁩ ⁦l⁩ ⁦a⁩ [0x0a] ⁦b⁩ ⁦l⁩ ⁦a⁩ [0x0a]
+Breaks: c c c c Lc c c c Lc
+Whitespace: w w w
+Sentences: bs e bs e b
+Words: bs be bs be b
+Graphemes: b b b b b b b b b
+Hyphens: i i i i
diff --git a/tests/breaks/six.break b/tests/breaks/six.break
new file mode 100644
index 00000000..2161574d
--- /dev/null
+++ b/tests/breaks/six.break
@@ -0,0 +1 @@
+a file:///<span segment="word">ho/</span><span segment="word">bo-mo/</span>Bla-txt file
diff --git a/tests/breaks/six.expected b/tests/breaks/six.expected
new file mode 100644
index 00000000..85e882a3
--- /dev/null
+++ b/tests/breaks/six.expected
@@ -0,0 +1,7 @@
+Text: ⁦a⁩ [ ] ⁦f⁩ ⁦i⁩ ⁦l⁩ ⁦e⁩ ⁦:⁩ ⁦/⁩ ⁦/⁩ ⁦/⁩ ⁦h⁩ ⁦o⁩ ⁦/⁩ ⁦b⁩ ⁦o⁩ ⁦-⁩ ⁦m⁩ ⁦o⁩ ⁦/⁩ ⁦B⁩ ⁦l⁩ ⁦a⁩ ⁦-⁩ ⁦t⁩ ⁦x⁩ ⁦t⁩ [ ] ⁦f⁩ ⁦i⁩ ⁦l⁩ ⁦e⁩ [0x0a]
+Breaks: c c lc c c c c c c c lc c c lc c c c c c lc c c c lc c c c lc c c c c Lc
+Whitespace: x x w w
+Sentences: bs e b
+Words: bs be bs be b b b bs bse bse be bs be bs be b
+Graphemes: b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b
+Hyphens: i i i i i i i i i i i i i
diff --git a/tests/breaks/sixteen.expected b/tests/breaks/sixteen.expected
index 75f20b9f..221358e7 100644
--- a/tests/breaks/sixteen.expected
+++ b/tests/breaks/sixteen.expected
@@ -1,7 +1,7 @@
-Text: ⁦h⁩ ⁦y⁩ ⁦‧⁩ ⁦p⁩ ⁦h⁩ ⁦e⁩ ⁦n⁩ ⁦|⁩ ⁦a⁩ ⁦t⁩ ⁦i⁩ ⁦o⁩ ⁦n⁩ [ ] ⁦o⁩ ⁦v⁩ ⁦e⁩ ⁦r⁩ [0xad] ⁦l⁩ ⁦o⁩ ⁦a⁩ ⁦d⁩ [0x0a]
-Breaks: c c c lc c c c c lc c c c c c lc c c c c lc c c c c c
-Whitespace: x w w
-Sentences: bs e b
-Words: bs e s be bs be bs be b
-Graphemes: b b b b b b b b b b b b b b b b b b b b b b b b b
-Hyphens: i ri i i i ri i i i i i i i i i i i
+Text: ⁦h⁩ ⁦y⁩ ⁦‧⁩ ⁦p⁩ ⁦h⁩ ⁦e⁩ ⁦n⁩ ⁦|⁩ ⁦a⁩ ⁦t⁩ ⁦i⁩ ⁦o⁩ ⁦n⁩ [ ] ⁦o⁩ ⁦v⁩ ⁦e⁩ ⁦r⁩ [0xad] ⁦l⁩ ⁦o⁩ ⁦a⁩ ⁦d⁩ [0x0a]
+Breaks: c c c lc c c c c lc c c c c c lc c c c c lc c c c c lc
+Whitespace: x w w
+Sentences: bs e b
+Words: bs e s be bs be bs be b
+Graphemes: b b b b b b b b b b b b b b b b b b b b b b b b b
+Hyphens: i ri i i i ri i i i i i i i i i i i
diff --git a/tests/breaks/ten.expected b/tests/breaks/ten.expected
index c9f91dc8..aa768c1b 100644
--- a/tests/breaks/ten.expected
+++ b/tests/breaks/ten.expected
@@ -1,7 +1,7 @@
-Text: ⁦i⁩ ⁦ක⁩ ⁦්⁩ [0x200d] ⁦ක⁩ [ ] ⁦a⁩ [0x200c] ⁦a⁩ ⁦்⁩ [0x0a]
-Breaks: c c c lc c c c
-Whitespace: x w w
-Sentences: bs e b
-Words: bs be bs be b
-Graphemes: b b b b b b b
-Hyphens: i i i i i i i
+Text: ⁦i⁩ ⁦ක⁩ ⁦්⁩ [0x200d] ⁦ක⁩ [ ] ⁦a⁩ [0x200c] ⁦a⁩ ⁦்⁩ [0x0a]
+Breaks: c c c lc c c lc
+Whitespace: x w w
+Sentences: bs e b
+Words: bs be bs be b
+Graphemes: b b b b b b b
+Hyphens: i i i i i i i
diff --git a/tests/breaks/thirteen.expected b/tests/breaks/thirteen.expected
index 70742cff..e6b1787d 100644
--- a/tests/breaks/thirteen.expected
+++ b/tests/breaks/thirteen.expected
@@ -1,7 +1,7 @@
-Text: ⁦a⁩ [ ] ⁦a⁩ ⁦b⁩ [0x200b] ⁦s⁩ ⁦p⁩ [0x200b] [ ] [ ] ⁦d⁩ ⁦e⁩ [0xad] ⁦f⁩ ⁦g⁩ [ ] ⁦b⁩ [0x0a]
-Breaks: c c lc c c lc c c c c lc c c lc c c lc c c
-Whitespace: x x x x w w
-Sentences: bs e b
-Words: bs be bs be bs be b
-Graphemes: b b b b b b b b b b b b b b b b b b b
-Hyphens: i i i i i i
+Text: ⁦a⁩ [ ] ⁦a⁩ ⁦b⁩ [0x200b] ⁦s⁩ ⁦p⁩ [0x200b] [ ] [ ] ⁦d⁩ ⁦e⁩ [0xad] ⁦f⁩ ⁦g⁩ [ ] ⁦b⁩ [0x0a]
+Breaks: c c lc c c lc c c c c lc c c lc c c lc c lc
+Whitespace: x x x x w w
+Sentences: bs e b
+Words: bs be bs be bs be b
+Graphemes: b b b b b b b b b b b b b b b b b b b
+Hyphens: i i i i i i
diff --git a/tests/breaks/three.expected b/tests/breaks/three.expected
index c2c89158..3962cbc5 100644
--- a/tests/breaks/three.expected
+++ b/tests/breaks/three.expected
@@ -1,7 +1,7 @@
-Text: ⁦o⁩ ⁦n⁩ ⁦e⁩ [ ] ⁦t⁩ ⁦w⁩ ⁦o⁩ [0x2028] ⁦r⁩ ⁦e⁩ ⁦d⁩ [ ] ⁦b⁩ ⁦l⁩ ⁦u⁩ ⁦e⁩ [0x200d] ⁦g⁩ ⁦r⁩ ⁦e⁩ ⁦e⁩ ⁦n⁩ [0x0a]
-Breaks: c c c c lc c c c Lc c c c lc c c c c c c c c c c
-Whitespace: x w x w w
-Sentences: bs e bs e b
-Words: bs be bs be bs be bs be b
-Graphemes: b b b b b b b b b b b b b b b b b b b b b b b
-Hyphens: i i i i i i i i i i i i i i i
+Text: ⁦o⁩ ⁦n⁩ ⁦e⁩ [ ] ⁦t⁩ ⁦w⁩ ⁦o⁩ [0x2028] ⁦r⁩ ⁦e⁩ ⁦d⁩ [ ] ⁦b⁩ ⁦l⁩ ⁦u⁩ ⁦e⁩ [0x200d] ⁦g⁩ ⁦r⁩ ⁦e⁩ ⁦e⁩ ⁦n⁩ [0x0a]
+Breaks: c c c c lc c c c Lc c c c lc c c c c c c c c c lc
+Whitespace: x w x w w
+Sentences: bs e bs e b
+Words: bs be bs be bs be bs be b
+Graphemes: b b b b b b b b b b b b b b b b b b b b b b b
+Hyphens: i i i i i i i i i i i i i i i
diff --git a/tests/breaks/twelve.expected b/tests/breaks/twelve.expected
index 63e14e64..cfb8e625 100644
--- a/tests/breaks/twelve.expected
+++ b/tests/breaks/twelve.expected
@@ -1,7 +1,7 @@
-Text: ⁦t⁩ ⁦h⁩ ⁦e⁩ [ ] ⁦f⁩ ⁦i⁩ ⁦l⁩ ⁦e⁩ [ ] ⁦/⁩ ⁦p⁩ ⁦a⁩ ⁦t⁩ ⁦h⁩ ⁦/⁩ ⁦t⁩ ⁦o⁩ ⁦/⁩ ⁦m⁩ ⁦y⁩ ⁦/⁩ ⁦h⁩ ⁦o⁩ ⁦m⁩ ⁦e⁩ [ ] ⁦i⁩ ⁦s⁩ [ ] ⁦c⁩ ⁦u⁩ ⁦r⁩ ⁦s⁩ ⁦e⁩ ⁦d⁩ ⁦.⁩ [0x0a]
-Breaks: c c c c lc c c c c lc c c c c c lc c c lc c c lc c c c c lc c c lc c c c c c c c c
-Whitespace: x x x x w w
-Sentences: bs e b
-Words: bs be bs be bs bse bs bse be bs be bs be b b
-Graphemes: b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b
-Hyphens: i i i i i i i i i i i i i i i i i i i
+Text: ⁦t⁩ ⁦h⁩ ⁦e⁩ [ ] ⁦f⁩ ⁦i⁩ ⁦l⁩ ⁦e⁩ [ ] ⁦/⁩ ⁦p⁩ ⁦a⁩ ⁦t⁩ ⁦h⁩ ⁦/⁩ ⁦t⁩ ⁦o⁩ ⁦/⁩ ⁦m⁩ ⁦y⁩ ⁦/⁩ ⁦h⁩ ⁦o⁩ ⁦m⁩ ⁦e⁩ [ ] ⁦i⁩ ⁦s⁩ [ ] ⁦c⁩ ⁦u⁩ ⁦r⁩ ⁦s⁩ ⁦e⁩ ⁦d⁩ ⁦.⁩ [0x0a]
+Breaks: c c c c lc c c c c lc c c c c c lc c c lc c c lc c c c c lc c c lc c c c c c c c Lc
+Whitespace: x x x x w w
+Sentences: bs e b
+Words: bs be bs be bs bse bse bse be bs be bs be b b
+Graphemes: b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b
+Hyphens: i i i i i i i i i i i i i i i i i i i
diff --git a/tests/breaks/two.expected b/tests/breaks/two.expected
index 0280c52f..2921d224 100644
--- a/tests/breaks/two.expected
+++ b/tests/breaks/two.expected
@@ -1,7 +1,7 @@
-Text: ⁦g⁩ ⁦o⁩ ⁦r⁩ ⁦i⁩ ⁦l⁩ ⁦·⁩ ⁦l⁩ ⁦e⁩ ⁦s⁩ [0x0a]
-Breaks: c c c c c c c c c c c
-Whitespace: w w
-Sentences: bs e b
-Words: bs e s be b
-Graphemes: b b b b b b b b b b b
-Hyphens: i i i i i i i
+Text: ⁦g⁩ ⁦o⁩ ⁦r⁩ ⁦i⁩ ⁦l⁩ ⁦·⁩ ⁦l⁩ ⁦e⁩ ⁦s⁩ [0x0a]
+Breaks: c c c c c c c c c c lc
+Whitespace: w w
+Sentences: bs e b
+Words: bs e s be b
+Graphemes: b b b b b b b b b b b
+Hyphens: i i i i i i i
diff --git a/tests/meson.build b/tests/meson.build
index 4776b2c9..90bb94ba 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -52,7 +52,7 @@ if cairo_dep.found()
[ 'testmisc', [ 'testmisc.c' ], [ libpangocairo_dep, libpangoft2_dep, glib_dep, harfbuzz_dep ] ],
[ 'cxx-test', [ 'cxx-test.cpp' ], [ libpangocairo_dep, gobject_dep, harfbuzz_dep ] ],
[ 'test-harfbuzz', [ 'test-harfbuzz.c' ], [ libpangocairo_dep, gobject_dep, harfbuzz_dep ] ],
- [ 'test-break', [ 'test-break.c', 'test-common.c' ], [libpangocairo_dep, glib_dep, harfbuzz_dep ] ]
+ [ 'test-break', [ 'test-break.c', 'test-common.c', 'validate-log-attrs.c' ], [libpangocairo_dep, glib_dep, harfbuzz_dep ] ]
]
if pango_cairo_backends.contains('png')
diff --git a/tests/test-break.c b/tests/test-break.c
index f7fcf6ec..fecca168 100644
--- a/tests/test-break.c
+++ b/tests/test-break.c
@@ -30,6 +30,7 @@
#include "config.h"
#include <pango/pangocairo.h>
#include "test-common.h"
+#include "validate-log-attrs.h"
static PangoContext *context;
@@ -95,6 +96,11 @@ test_file (const gchar *filename, GString *string)
g_assert_cmpint (len, ==, len2);
g_assert_true (memcmp (attrs, attrs2, sizeof (PangoLogAttr) * len) == 0);
+ if (!pango_validate_log_attrs (text, length, attrs, len, &error))
+ {
+ g_warning ("%s: Log attrs invalid: %s", filename, error->message);
+ g_assert_not_reached ();
+ }
layout2 = pango_layout_copy (layout);
attrs2 = pango_layout_get_log_attrs_readonly (layout2, &len2);
diff --git a/tests/validate-log-attrs.c b/tests/validate-log-attrs.c
new file mode 100644
index 00000000..6cbdd6f9
--- /dev/null
+++ b/tests/validate-log-attrs.c
@@ -0,0 +1,528 @@
+/* Pango
+ *
+ * Copyright (C) 1999 Red Hat Software
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include "config.h"
+
+#include "validate-log-attrs.h"
+#include "pango.h"
+#include <string.h>
+
+/* {{{ Validation */
+
+G_DEFINE_QUARK(pango-validate-error-quark, pango_validate_error)
+
+typedef gboolean (* CharForeachFunc) (int pos,
+ gunichar wc,
+ gunichar prev_wc,
+ gunichar next_wc,
+ GUnicodeType type,
+ GUnicodeType prev_type,
+ GUnicodeType next_type,
+ const PangoLogAttr *attr,
+ const PangoLogAttr *prev_attr,
+ const PangoLogAttr *next_attr,
+ gboolean *after_zws,
+ GError **error);
+
+static gboolean
+log_attr_foreach (const char *text,
+ int length,
+ const PangoLogAttr *attrs,
+ int attrs_len,
+ CharForeachFunc func,
+ GError **error)
+{
+ const gchar *next = text;
+ const gchar *end = text + length;
+ gint i = 0;
+ gunichar prev_wc;
+ gunichar next_wc;
+ GUnicodeType prev_type;
+ GUnicodeType next_type;
+ gboolean after_zws;
+
+ if (next == end)
+ goto done;
+
+ prev_type = (GUnicodeType) -1;
+ prev_wc = 0;
+
+ next_wc = g_utf8_get_char (next);
+ next_type = g_unichar_type (next_wc);
+
+ after_zws = FALSE;
+
+ while (next_wc != 0)
+ {
+ GUnicodeType type;
+ gunichar wc;
+
+ wc = next_wc;
+ type = next_type;
+
+ next = g_utf8_next_char (next);
+
+ if (next >= end)
+ next_wc = 0;
+ else
+ next_wc = g_utf8_get_char (next);
+
+ if (next_wc)
+ next_type = g_unichar_type (next_wc);
+
+ if (!func (i,
+ wc, prev_wc, next_wc,
+ type, prev_type, next_type,
+ &attrs[i],
+ i != 0 ? &attrs[i - 1] : NULL,
+ &attrs[i + 1],
+ &after_zws,
+ error))
+ return FALSE;
+
+ prev_type = type;
+ prev_wc = wc;
+ i++;
+ }
+
+done:
+ return TRUE;
+}
+
+static gboolean
+check_line_char (int pos,
+ gunichar wc,
+ gunichar prev_wc,
+ gunichar next_wc,
+ GUnicodeType type,
+ GUnicodeType prev_type,
+ GUnicodeType next_type,
+ const PangoLogAttr *attr,
+ const PangoLogAttr *prev_attr,
+ const PangoLogAttr *next_attr,
+ gboolean *after_zws,
+ GError **error)
+{
+ GUnicodeBreakType break_type;
+ GUnicodeBreakType prev_break_type;
+
+ break_type = g_unichar_break_type (wc);
+
+ if (prev_wc)
+ prev_break_type = g_unichar_break_type (prev_wc);
+ else
+ prev_break_type = G_UNICODE_BREAK_UNKNOWN;
+
+ if (prev_break_type == G_UNICODE_BREAK_ZERO_WIDTH_SPACE ||
+ (prev_break_type == G_UNICODE_BREAK_SPACE && *after_zws))
+ *after_zws = TRUE;
+ else
+ *after_zws = FALSE;
+
+ if (wc == '\n' && prev_wc == '\r')
+ {
+ if (attr->is_line_break)
+ {
+ g_set_error (error,
+ PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
+ "char %#x %d: Do not break between \\r and \\n (LB5)", wc, pos);
+ return FALSE;
+ }
+ }
+
+ if (prev_wc == 0 && wc != 0)
+ {
+ if (attr->is_line_break)
+ {
+ g_set_error (error,
+ PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
+ "char %#x %d: Do not break before first char (LB2)", wc, pos);
+ return FALSE;
+ }
+ }
+
+ if (next_wc == 0)
+ {
+ if (!next_attr->is_line_break)
+ {
+ g_set_error (error,
+ PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
+ "char %#x %d: Always break after the last char (LB3)", wc, pos);
+ return FALSE;
+ }
+ }
+
+ if (prev_break_type == G_UNICODE_BREAK_MANDATORY)
+ {
+ if (!attr->is_mandatory_break)
+ {
+ g_set_error (error,
+ PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
+ "char %#x %d: Always break after hard line breaks (LB4)", wc, pos);
+ return FALSE;
+ }
+ }
+
+ if (prev_break_type == G_UNICODE_BREAK_CARRIAGE_RETURN ||
+ prev_break_type == G_UNICODE_BREAK_LINE_FEED ||
+ prev_break_type == G_UNICODE_BREAK_NEXT_LINE)
+ {
+ if (!attr->is_mandatory_break)
+ {
+ g_set_error (error,
+ PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
+ "char %#x %d: Always break after CR, LF and NL (LB5)", wc, pos);
+ return FALSE;
+ }
+ }
+
+ if (break_type == G_UNICODE_BREAK_MANDATORY ||
+ break_type == G_UNICODE_BREAK_CARRIAGE_RETURN ||
+ break_type == G_UNICODE_BREAK_LINE_FEED ||
+ break_type == G_UNICODE_BREAK_NEXT_LINE)
+ {
+ if (attr->is_line_break)
+ {
+ g_set_error (error,
+ PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
+ "char %#x %d: Do not break before hard line beaks (LB6)", wc, pos);
+ return FALSE;
+ }
+ }
+
+ if (break_type == G_UNICODE_BREAK_SPACE ||
+ break_type == G_UNICODE_BREAK_ZERO_WIDTH_SPACE)
+ {
+ if (attr->is_line_break && prev_attr != NULL &&
+ !attr->is_mandatory_break &&
+ !(next_wc && g_unichar_break_type (next_wc) == G_UNICODE_BREAK_COMBINING_MARK))
+ {
+ g_set_error (error,
+ PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
+ "char %#x %d: Can't break before a space unless mandatory precedes or combining mark follows (LB7)", wc, pos);
+ return FALSE;
+ }
+ }
+
+ if (break_type != G_UNICODE_BREAK_ZERO_WIDTH_SPACE &&
+ break_type != G_UNICODE_BREAK_SPACE &&
+ *after_zws)
+ {
+ if (!attr->is_line_break)
+ {
+ g_set_error (error,
+ PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
+ "char %#x %d: Break before a char following ZWS, even if spaces intervene (LB8)", wc, pos);
+ return FALSE;
+ }
+ }
+
+ if (break_type == G_UNICODE_BREAK_ZERO_WIDTH_JOINER)
+ {
+ if (attr->is_line_break)
+ {
+ g_set_error (error,
+ PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
+ "char %#x %d: Do not break after ZWJ (LB8a)", wc, pos);
+ return FALSE;
+ }
+ }
+
+ /* TODO: check LB9 */
+
+ if (prev_break_type == G_UNICODE_BREAK_WORD_JOINER ||
+ break_type == G_UNICODE_BREAK_WORD_JOINER)
+ {
+ if (attr->is_line_break)
+ {
+ g_set_error (error,
+ PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
+ "char %#x %d: Do not break before or after WJ (LB11)", wc, pos);
+ return FALSE;
+ }
+ }
+
+ if (prev_break_type == G_UNICODE_BREAK_NON_BREAKING_GLUE)
+ {
+ g_set_error (error,
+ PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
+ "char %#x %d: Do not break after GL (LB12)", wc, pos);
+ return FALSE;
+ }
+
+ /* internal consistency */
+
+ if (attr->is_mandatory_break && !attr->is_line_break)
+ {
+ g_set_error (error,
+ PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
+ "char %#x %d: Mandatory breaks must also be marked as regular breaks", wc, pos);
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static gboolean
+check_line_invariants (const char *text,
+ int length,
+ const PangoLogAttr *attrs,
+ int attrs_len,
+ GError **error)
+{
+ return log_attr_foreach (text, length,
+ attrs, attrs_len,
+ check_line_char, error);
+}
+
+static gboolean
+check_grapheme_invariants (const char *text,
+ int length,
+ const PangoLogAttr *attrs,
+ int attrs_len,
+ GError **error)
+{
+ return TRUE;
+}
+
+static gboolean
+check_word_invariants (const char *text,
+ int length,
+ const PangoLogAttr *attrs,
+ int attrs_len,
+ GError **error)
+{
+ enum {
+ AFTER_START,
+ AFTER_END
+ } state = AFTER_END;
+
+ for (int i = 0; i < attrs_len; i++)
+ {
+ /* Check that word starts and ends are alternating */
+ switch (state)
+ {
+ case AFTER_END:
+ if (attrs[i].is_word_start)
+ {
+ if (attrs[i].is_word_end)
+ state = AFTER_END;
+ else
+ state = AFTER_START;
+ break;
+ }
+ if (attrs[i].is_word_end)
+ {
+ g_set_error (error,
+ PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_WORD,
+ "char %d: Unexpected word end", i);
+ return FALSE;
+ }
+ break;
+
+ case AFTER_START:
+ if (attrs[i].is_word_end)
+ {
+ if (attrs[i].is_word_start)
+ state = AFTER_START;
+ else
+ state = AFTER_END;
+ break;
+ }
+ if (attrs[i].is_word_start)
+ {
+ g_set_error (error,
+ PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_WORD,
+ "char %d: Unexpected word start", i);
+ return FALSE;
+ }
+ break;
+
+ default:
+ g_assert_not_reached ();
+ }
+
+ /* Check that words don't end in the middle of graphemes */
+ if (attrs[i].is_word_boundary && !attrs[i].is_cursor_position)
+ {
+ g_set_error (error,
+ PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_SENTENCE,
+ "char %d: Word ends inside a grapheme", i);
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+}
+
+static gboolean
+check_sentence_invariants (const char *text,
+ int length,
+ const PangoLogAttr *attrs,
+ int attrs_len,
+ GError **error)
+{
+ enum {
+ AFTER_START,
+ AFTER_END
+ } state = AFTER_END;
+
+ for (int i = 0; i < attrs_len; i++)
+ {
+ /* Check that word starts and ends are alternating */
+ switch (state)
+ {
+ case AFTER_END:
+ if (attrs[i].is_sentence_start)
+ {
+ if (attrs[i].is_sentence_end)
+ state = AFTER_END;
+ else
+ state = AFTER_START;
+ break;
+ }
+ if (attrs[i].is_sentence_end)
+ {
+ g_set_error (error,
+ PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_SENTENCE,
+ "char %d: Unexpected sentence end", i);
+ return FALSE;
+ }
+ break;
+
+ case AFTER_START:
+ if (attrs[i].is_sentence_end)
+ {
+ if (attrs[i].is_sentence_start)
+ state = AFTER_START;
+ else
+ state = AFTER_END;
+ break;
+ }
+ if (attrs[i].is_sentence_start)
+ {
+ g_set_error (error,
+ PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_SENTENCE,
+ "char %d: Unexpected sentence start", i);
+ return FALSE;
+ }
+ break;
+
+ default:
+ g_assert_not_reached ();
+ }
+ }
+
+ return TRUE;
+}
+
+static gboolean
+check_space_invariants (const char *text,
+ int length,
+ const PangoLogAttr *log_attrs,
+ int attrs_len,
+ GError **error)
+{
+ for (int i = 0; i < attrs_len; i++)
+ {
+ if (log_attrs[i].is_expandable_space && !log_attrs[i].is_white)
+ {
+ g_set_error (error,
+ PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_SPACE,
+ "char %d: Expandable space must be space", i);
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+}
+
+/* }}} */
+/* {{{ Public API */
+
+/*
+ * pango_validate_log_attrs:
+ * @text: text to which @log_attrs belong
+ * @length: length of @text
+ * @log_attrs: `PangoLogAttr` array to validate
+ * @attrs_len: length of @log_attrs
+ *
+ * Apply sanity checks to @log_attrs.
+ *
+ * This function checks some conditions that Pango
+ * relies on. It is not guaranteed to be an exhaustive
+ * validity test. Currentlty, it checks that
+ *
+ * - There's no break before the first char
+ * - Mandatory breaks are line breaks
+ * - Line breaks are char breaks
+ * - Lines aren't broken between \\r and \\n
+ * - Lines aren't broken before a space (unless the break
+ * is mandatory, or the space precedes a combining mark)
+ * - Lines aren't broken between two open punctuation
+ * or between two close punctuation characters
+ * - Lines aren't broken between a letter and a quotation mark
+ * - Word starts and ends alternate
+ * - Sentence starts and ends alternate
+ * - Expandable spaces are spaces
+ * - Words don't end in the middle of graphemes
+ * - Sentences don't end in the middle of words
+ *
+ * Returns: %TRUE if @log_attrs are valid
+ */
+gboolean
+pango_validate_log_attrs (const char *text,
+ int length,
+ const PangoLogAttr *log_attrs,
+ int attrs_len,
+ GError **error)
+{
+ int n_chars;
+
+ n_chars = g_utf8_strlen (text, length);
+ if (attrs_len != n_chars + 1)
+ {
+ g_set_error_literal (error,
+ PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_FAILED,
+ "Array has wrong length");
+ return FALSE;
+ }
+
+ if (!check_line_invariants (text, length, log_attrs, attrs_len, error))
+ return FALSE;
+
+ if (!check_grapheme_invariants (text, length, log_attrs, attrs_len, error))
+ return FALSE;
+
+ if (!check_word_invariants (text, length, log_attrs, attrs_len, error))
+ return FALSE;
+
+ if (!check_sentence_invariants (text, length, log_attrs, attrs_len, error))
+ return FALSE;
+
+ if (!check_space_invariants (text, length, log_attrs, attrs_len, error))
+ return FALSE;
+
+ return TRUE;
+}
+
+ /* }}} */
+
+/* vim:set foldmethod=marker expandtab: */
diff --git a/tests/validate-log-attrs.h b/tests/validate-log-attrs.h
new file mode 100644
index 00000000..22b9ae63
--- /dev/null
+++ b/tests/validate-log-attrs.h
@@ -0,0 +1,52 @@
+/* Pango
+ *
+ * Copyright (C) 1999 Red Hat Software
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __VALIDATE_LOG_ATTRS_H__
+#define __VALIDATE_LOG_ATTRS_H__
+
+#include <glib.h>
+
+G_BEGIN_DECLS
+
+#include <pango/pango-item.h>
+
+#define PANGO_VALIDATE_ERROR (pango_validate_error_quark ())
+
+typedef enum
+{
+ PANGO_VALIDATE_ERROR_FAILED,
+ PANGO_VALIDATE_ERROR_BREAK,
+ PANGO_VALIDATE_ERROR_GRAPHEME,
+ PANGO_VALIDATE_ERROR_WORD,
+ PANGO_VALIDATE_ERROR_SENTENCE,
+ PANGO_VALIDATE_ERROR_SPACE
+} PangoValidateError;
+
+GQuark pango_validate_error_quark (void);
+
+gboolean pango_validate_log_attrs (const char *text,
+ int length,
+ const PangoLogAttr *log_attrs,
+ int attrs_len,
+ GError **error);
+
+G_END_DECLS
+
+#endif /* __VALIDATE_LOG_ATTRS_H__ */