summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthias Clasen <mclasen@redhat.com>2021-08-24 00:52:53 +0000
committerMatthias Clasen <mclasen@redhat.com>2021-08-24 00:52:53 +0000
commit4740e552b3c8ca005beea88001a82ea6bb266076 (patch)
tree25a7c9825d322932b58038db4aac44ecf010dbc5
parent8cae1c0762fa35cbe41d35a34d8e048965d287ac (diff)
parent1349e9a424dc5425dd087b382d6042f5cba3b661 (diff)
downloadpango-4740e552b3c8ca005beea88001a82ea6bb266076.tar.gz
Merge branch 'log-attr-things' into 'main'
break-thai: Fix up word break handling See merge request GNOME/pango!434
-rw-r--r--docs/pango_rendering.md9
-rw-r--r--pango/break-thai.c28
-rw-r--r--pango/break.c144
-rw-r--r--pango/pango-break.h14
-rw-r--r--pango/pango-layout.c22
-rw-r--r--tests/breaks/four.expected12
6 files changed, 153 insertions, 76 deletions
diff --git a/docs/pango_rendering.md b/docs/pango_rendering.md
index 2745c0c0..4bec5176 100644
--- a/docs/pango_rendering.md
+++ b/docs/pango_rendering.md
@@ -15,18 +15,19 @@ various stages of this pipeline and the APIs that implement them.
Itemization
: breaks a piece of text into segments with consistent direction and shaping
- properies. Among other things, this determines which font to use for each
+ properties. Among other things, this determines which font to use for each
character. Use [func@Pango.itemize] or [func@Pango.itemize_with_base_dir]
to itemize text.
Shaping
: converts characters into glyphs. Use [func@Pango.shape],
- [func@Pango.shape_full] or [func@Pango.shape_with_flags] to shape text.
+ [func@Pango.shape_full] or [func@Pango.shape_item] to shape text.
Line Breaking
: determines where line breaks should be inserted into a sequence of glyphs.
- The function [func@Pango.break] determines possible line breaks. The actual
- line breaking is done by [class@Pango.Layout].
+ The functions [func@Pango.default_break], [func@Pango.tailor_break] and
+ [func@Pango.attr_break] determine possible line breaks. The actual line
+ breaking is done by [class@Pango.Layout].
Justification
: adjusts inter-word spacing to form lines of even length. This is done by
diff --git a/pango/break-thai.c b/pango/break-thai.c
index 871c0869..02a18cc5 100644
--- a/pango/break-thai.c
+++ b/pango/break-thai.c
@@ -92,15 +92,27 @@ break_thai (const char *text,
G_UNLOCK (thai_brk);
for (cnt = 0; cnt < len; cnt++)
- if (attrs[brk_pnts[cnt]].is_char_break)
{
- /* Only allow additional line breaks if line-breaking is NOT
- * prohibited. (The alternative would be to set is_char_break to
- * TRUE as well. NOT setting it will break invariants that any
- * line break opportunity is also a char break opportunity. */
- attrs[brk_pnts[cnt]].is_line_break = TRUE;
- attrs[brk_pnts[cnt]].is_word_start = TRUE;
- attrs[brk_pnts[cnt]].is_word_end = TRUE;
+ if (!attrs[brk_pnts[cnt]].is_line_break)
+ {
+ /* Insert line breaks where there wasn't one.
+ * Satisfy invariants by marking it as char break too.
+ */
+ attrs[brk_pnts[cnt]].is_char_break = TRUE;
+ attrs[brk_pnts[cnt]].is_line_break = TRUE;
+ }
+ if (!(attrs[brk_pnts[cnt]].is_word_start ||
+ attrs[brk_pnts[cnt]].is_word_end))
+ {
+ /* If we find a break in the middle of a sequence
+ * of characters, end and start a word. We must
+ * be careful only to do that if default_break
+ * did not already find a word start or end,
+ * otherwise we mess up the sequence.
+ */
+ attrs[brk_pnts[cnt]].is_word_start = TRUE;
+ attrs[brk_pnts[cnt]].is_word_end = TRUE;
+ }
}
if (brk_pnts != brk_stack)
diff --git a/pango/break.c b/pango/break.c
index b2586da2..c55d5f22 100644
--- a/pango/break.c
+++ b/pango/break.c
@@ -139,29 +139,12 @@ typedef enum
WordNumbers
} WordType;
-
-/**
- * pango_default_break:
- * @text: text to break. Must be valid UTF-8
- * @length: length of text in bytes (may be -1 if @text is nul-terminated)
- * @analysis: (nullable): a `PangoAnalysis` structure for the @text
- * @attrs: logical attributes to fill in
- * @attrs_len: size of the array passed as @attrs
- *
- * This is the default break algorithm.
- *
- * It applies Unicode rules without language-specific
- * tailoring, therefore the @analyis argument is unused
- * and can be %NULL.
- *
- * See [func@Pango.tailor_break] for language-specific breaks.
- */
-void
-pango_default_break (const gchar *text,
- gint length,
- PangoAnalysis *analysis G_GNUC_UNUSED,
- PangoLogAttr *attrs,
- int attrs_len G_GNUC_UNUSED)
+static void
+default_break (const char *text,
+ int length,
+ PangoAnalysis *analysis G_GNUC_UNUSED,
+ PangoLogAttr *attrs,
+ int attrs_len G_GNUC_UNUSED)
{
/* The rationale for all this is in section 5.15 of the Unicode 3.0 book,
* the line breaking stuff is also in TR14 on unicode.org
@@ -1639,7 +1622,7 @@ break_script (const char *item_text,
}
/* }}} */
-/* {{{ Attribute-based tailoring */
+/* {{{ Attribute-based customization */
static gboolean
break_attrs (const char *text,
@@ -1707,11 +1690,11 @@ break_attrs (const char *text,
static gboolean
tailor_break (const char *text,
- int length,
- PangoAnalysis *analysis,
+ int length,
+ PangoAnalysis *analysis,
int item_offset,
- PangoLogAttr *attrs,
- int attrs_len)
+ PangoLogAttr *attrs,
+ int attrs_len)
{
gboolean res;
@@ -1732,6 +1715,34 @@ tailor_break (const char *text,
/* {{{ Public API */
/**
+ * pango_default_break:
+ * @text: text to break. Must be valid UTF-8
+ * @length: length of text in bytes (may be -1 if @text is nul-terminated)
+ * @analysis: (nullable): a `PangoAnalysis` structure for the @text
+ * @attrs: logical attributes to fill in
+ * @attrs_len: size of the array passed as @attrs
+ *
+ * This is the default break algorithm.
+ *
+ * It applies Unicode rules without language-specific
+ * tailoring, therefore the @analyis argument is unused
+ * and can be %NULL.
+ *
+ * See [func@Pango.tailor_break] for language-specific breaks.
+ *
+ * See [func@Pango.attr_break] for attribute-based customization.
+ */
+void
+pango_default_break (const char *text,
+ int length,
+ PangoAnalysis *analysis G_GNUC_UNUSED,
+ PangoLogAttr *attrs,
+ int attrs_len G_GNUC_UNUSED)
+{
+ default_break (text, length, analysis, attrs, attrs_len);
+}
+
+/**
* pango_break:
* @text: the text to process. Must be valid UTF-8
* @length: length of @text in bytes (may be -1 if @text is nul-terminated)
@@ -1745,11 +1756,11 @@ tailor_break (const char *text,
* For most purposes you may want to use
* [func@Pango.get_log_attrs].
*
- * Deprecated: 1.44: Use [func@Pango.default_break] and
- * [func@Pango.tailor_break]
+ * Deprecated: 1.44: Use [func@Pango.default_break],
+ * [func@Pango.tailor_break] and func@Pango.attr_break].
*/
void
-pango_break (const gchar *text,
+pango_break (const char *text,
gint length,
PangoAnalysis *analysis,
PangoLogAttr *attrs,
@@ -1758,7 +1769,7 @@ pango_break (const gchar *text,
g_return_if_fail (analysis != NULL);
g_return_if_fail (attrs != NULL);
- pango_default_break (text, length, analysis, attrs, attrs_len);
+ default_break (text, length, analysis, attrs, attrs_len);
tailor_break (text, length, analysis, -1, attrs, attrs_len);
}
@@ -1769,12 +1780,11 @@ pango_break (const gchar *text,
* @analysis: `PangoAnalysis` for @text
* @offset: Byte offset of @text from the beginning of the
* paragraph, or -1 to ignore attributes from @analysis
- * @log_attrs: (array length=log_attrs_len): array with one `PangoLogAttr`
+ * @attrs: (array length=attrs_len): array with one `PangoLogAttr`
* per character in @text, plus one extra, to be filled in
- * @log_attrs_len: length of @log_attrs array
+ * @attrs_len: length of @attrs array
*
- * Apply language-specific tailoring to the breaks
- * in @log_attrs.
+ * Apply language-specific tailoring to the breaks in @attrs.
*
* The line breaks are assumed to have been produced
* by [func@Pango.default_break].
@@ -1782,6 +1792,10 @@ pango_break (const gchar *text,
* If @offset is not -1, it is used to apply attributes
* from @analysis that are relevant to line breaking.
*
+ * Note that it is better to pass -1 for @offset and
+ * use [func@Pango.attr_break] to apply attributes to
+ * the whole paragraph.
+ *
* Since: 1.44
*/
void
@@ -1789,13 +1803,13 @@ pango_tailor_break (const char *text,
int length,
PangoAnalysis *analysis,
int offset,
- PangoLogAttr *log_attrs,
- int log_attrs_len)
+ PangoLogAttr *attrs,
+ int attrs_len)
{
- PangoLogAttr *start = log_attrs;
+ PangoLogAttr *start = attrs;
PangoLogAttr attr_before = *start;
- if (tailor_break (text, length, analysis, offset, log_attrs, log_attrs_len))
+ if (tailor_break (text, length, analysis, offset, attrs, attrs_len))
{
/* if tailored, we enforce some of the attrs from before
* tailoring at the boundary
@@ -1810,18 +1824,50 @@ pango_tailor_break (const char *text,
}
/**
+ * pango_attr_break:
+ * @text: text to break. Must be valid UTF-8
+ * @length: length of text in bytes (may be -1 if @text is nul-terminated)
+ * @attr_list: `PangoAttrList` to apply
+ * @offset: Byte offset of @text from the beginning of the paragraph
+ * @attrs: (array length=attrs_len): array with one `PangoLogAttr`
+ * per character in @text, plus one extra, to be filled in
+ * @attrs_len: length of @attrs array
+ *
+ * Apply customization from attributes to the breaks in @attrs.
+ *
+ * The line breaks are assumed to have been produced
+ * by [func@Pango.default_break] and [func@Pango.tailor_break].
+ *
+ * Since: 1.50
+ */
+void
+pango_attr_break (const char *text,
+ int length,
+ PangoAttrList *attr_list,
+ int offset,
+ PangoLogAttr *attrs,
+ int attrs_len)
+{
+ GSList *attributes;
+
+ attributes = pango_attr_list_get_attributes (attr_list);
+ break_attrs (text, length, attributes, offset, attrs, attrs_len);
+ g_slist_free_full (attributes, (GDestroyNotify)pango_attribute_destroy);
+}
+
+/**
* pango_get_log_attrs:
* @text: text to process. Must be valid UTF-8
* @length: length in bytes of @text
* @level: embedding level, or -1 if unknown
* @language: language tag
- * @log_attrs: (array length=attrs_len): array with one `PangoLogAttr`
+ * @attrs: (array length=attrs_len): array with one `PangoLogAttr`
* per character in @text, plus one extra, to be filled in
- * @attrs_len: length of @log_attrs array
+ * @attrs_len: length of @attrs array
*
* Computes a `PangoLogAttr` for each character in @text.
*
- * The @log_attrs array must have one `PangoLogAttr` for
+ * The @attrs array must have one `PangoLogAttr` for
* each position in @text; if @text contains N characters,
* it has N+1 positions, including the last position at the
* end of the text. @text should be an entire paragraph;
@@ -1834,7 +1880,7 @@ pango_get_log_attrs (const char *text,
int length,
int level,
PangoLanguage *language,
- PangoLogAttr *log_attrs,
+ PangoLogAttr *attrs,
int attrs_len)
{
int chars_broken;
@@ -1842,12 +1888,12 @@ pango_get_log_attrs (const char *text,
PangoScriptIter iter;
g_return_if_fail (length == 0 || text != NULL);
- g_return_if_fail (log_attrs != NULL);
+ g_return_if_fail (attrs != NULL);
analysis.level = level;
analysis.language = language;
- pango_default_break (text, length, &analysis, log_attrs, attrs_len);
+ pango_default_break (text, length, &analysis, attrs, attrs_len);
chars_broken = 0;
@@ -1867,7 +1913,7 @@ pango_get_log_attrs (const char *text,
run_end - run_start,
&analysis,
-1,
- log_attrs + chars_broken,
+ attrs + chars_broken,
chars_in_range + 1);
chars_broken += chars_in_range;
@@ -1881,4 +1927,6 @@ pango_get_log_attrs (const char *text,
attrs_len);
}
-/* }}} */
+ /* }}} */
+
+/* vim:set foldmethod=marker expandtab: */
diff --git a/pango/pango-break.h b/pango/pango-break.h
index 81526e9a..a8e6c5b9 100644
--- a/pango/pango-break.h
+++ b/pango/pango-break.h
@@ -105,7 +105,7 @@ void pango_get_log_attrs (const char *text,
int length,
int level,
PangoLanguage *language,
- PangoLogAttr *log_attrs,
+ PangoLogAttr *attrs,
int attrs_len);
PANGO_AVAILABLE_IN_ALL
@@ -120,8 +120,16 @@ void pango_tailor_break (const char *text,
int length,
PangoAnalysis *analysis,
int offset,
- PangoLogAttr *log_attrs,
- int log_attrs_len);
+ PangoLogAttr *attrs,
+ int attrs_len);
+
+PANGO_AVAILABLE_IN_1_50
+void pango_attr_break (const char *text,
+ int length,
+ PangoAttrList *attr_list,
+ int offset,
+ PangoLogAttr *attrs,
+ int attrs_len);
G_END_DECLS
diff --git a/pango/pango-layout.c b/pango/pango-layout.c
index f78d7daf..b6e0c217 100644
--- a/pango/pango-layout.c
+++ b/pango/pango-layout.c
@@ -4214,12 +4214,13 @@ process_line (PangoLayout *layout,
}
static void
-get_items_log_attrs (const char *text,
- int start,
- int length,
- GList *items,
- PangoLogAttr *log_attrs,
- int log_attrs_len)
+get_items_log_attrs (const char *text,
+ int start,
+ int length,
+ GList *items,
+ PangoAttrList *attrs,
+ PangoLogAttr *log_attrs,
+ int log_attrs_len)
{
int offset = 0;
GList *l;
@@ -4235,12 +4236,18 @@ get_items_log_attrs (const char *text,
pango_tailor_break (text + item->offset,
item->length,
&item->analysis,
- item->offset,
+ -1,
log_attrs + offset,
item->num_chars + 1);
offset += item->num_chars;
}
+
+ if (attrs && items)
+ {
+ PangoItem *item = items->data;
+ pango_attr_break (text + start, length, attrs, item->offset, log_attrs, log_attrs_len);
+ }
}
static PangoAttrList *
@@ -4517,6 +4524,7 @@ pango_layout_check_lines (PangoLayout *layout)
start - layout->text,
delimiter_index + delim_len,
state.items,
+ shape_attrs,
layout->log_attrs + start_offset,
layout->n_chars + 1 - start_offset);
diff --git a/tests/breaks/four.expected b/tests/breaks/four.expected
index ce58e10b..2f29d778 100644
--- a/tests/breaks/four.expected
+++ b/tests/breaks/four.expected
@@ -1,6 +1,6 @@
-Text: ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ไ⁩ ⁦ท⁩ ⁦ย⁩ [ ] ⁦ห⁩ ⁦ร⁩⁦ื⁩ ⁦อ⁩ [ ] ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ไ⁩ ⁦ท⁩ ⁦ย⁩ ⁦ก⁩ ⁦ล⁩ ⁦า⁩ ⁦ง⁩ [ ] ⁦เ⁩ ⁦ป⁩⁦็⁩ ⁦น⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ร⁩ ⁦า⁩ ⁦ช⁩ ⁦ก⁩ ⁦า⁩ ⁦ร⁩ ⁦แ⁩ ⁦ล⁩ ⁦ะ⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ป⁩ ⁦ร⁩ ⁦ะ⁩ ⁦จ⁩ ⁦ำ⁩ ⁦ช⁩ ⁦า⁩ ⁦ต⁩⁦ิ⁩ ⁦ข⁩ ⁦อ⁩ ⁦ง⁩ ⁦ป⁩ ⁦ร⁩ ⁦ะ⁩ ⁦เ⁩ ⁦ท⁩ ⁦ศ⁩ ⁦ไ⁩ ⁦ท⁩ ⁦ย⁩ [ ] ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ไ⁩ ⁦ท⁩ ⁦ย⁩ ⁦เ⁩ ⁦ป⁩⁦็⁩ ⁦น⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ใ⁩ ⁦น⁩ ⁦ก⁩ ⁦ล⁩⁦ุ⁩⁦่⁩ ⁦ม⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ไ⁩ ⁦ท⁩ ⁦ซ⁩⁦ึ⁩⁦่⁩ ⁦ง⁩ ⁦เ⁩ ⁦ป⁩⁦็⁩ ⁦น⁩ ⁦ก⁩ ⁦ล⁩⁦ุ⁩⁦่⁩ ⁦ม⁩ ⁦ย⁩⁦่⁩ ⁦อ⁩ ⁦ย⁩ ⁦ข⁩ ⁦อ⁩ ⁦ง⁩ ⁦ต⁩ ⁦ร⁩ ⁦ะ⁩ ⁦ก⁩⁦ู⁩ ⁦ล⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ข⁩ ⁦ร⁩⁦้⁩ ⁦า⁩ [ ] ⁦ไ⁩ ⁦ท⁩ [ ] ⁦ส⁩⁦ั⁩ ⁦น⁩ ⁦น⁩⁦ิ⁩ ⁦ษ⁩ ⁦ฐ⁩ ⁦า⁩ ⁦น⁩ ⁦ว⁩⁦่⁩ ⁦า⁩ [ ] ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ใ⁩ ⁦น⁩ ⁦ต⁩ ⁦ร⁩ ⁦ะ⁩ ⁦ก⁩⁦ู⁩ ⁦ล⁩ ⁦น⁩⁦ี⁩⁦้⁩ ⁦ม⁩⁦ี⁩ ⁦ถ⁩⁦ิ⁩⁦่⁩ ⁦น⁩ ⁦ก⁩ ⁦ำ⁩ ⁦เ⁩ ⁦น⁩⁦ิ⁩ ⁦ด⁩ ⁦จ⁩ ⁦า⁩ ⁦ก⁩ ⁦ท⁩ ⁦า⁩ ⁦ง⁩ ⁦ต⁩ ⁦อ⁩ ⁦น⁩ ⁦ใ⁩ ⁦ต⁩⁦้⁩ ⁦ข⁩ ⁦อ⁩ ⁦ง⁩ ⁦ป⁩ ⁦ร⁩ ⁦ะ⁩ ⁦เ⁩ ⁦ท⁩ ⁦ศ⁩ ⁦จ⁩⁦ี⁩ ⁦น⁩ [ ] ⁦แ⁩ ⁦ล⁩ ⁦ะ⁩ ⁦น⁩⁦ั⁩ ⁦ก⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ศ⁩ ⁦า⁩ ⁦ส⁩ ⁦ต⁩ ⁦ร⁩⁦์⁩ ⁦บ⁩ ⁦า⁩ ⁦ง⁩ ⁦ส⁩⁦่⁩ ⁦ว⁩ ⁦น⁩ ⁦เ⁩ ⁦ส⁩ ⁦น⁩ ⁦อ⁩ ⁦ว⁩⁦่⁩ ⁦า⁩ [ ] ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ไ⁩ ⁦ท⁩ ⁦ย⁩ ⁦น⁩⁦่⁩ ⁦า⁩ ⁦จ⁩ ⁦ะ⁩ ⁦ม⁩⁦ี⁩ ⁦ค⁩ ⁦ว⁩ ⁦า⁩ ⁦ม⁩ ⁦เ⁩ ⁦ช⁩⁦ื⁩⁦่⁩ ⁦อ⁩ ⁦ม⁩ ⁦โ⁩ ⁦ย⁩ ⁦ง⁩ ⁦ก⁩⁦ั⁩ ⁦บ⁩ ⁦ต⁩ ⁦ร⁩ ⁦ะ⁩ ⁦ก⁩⁦ู⁩ ⁦ล⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦อ⁩ ⁦อ⁩ ⁦ส⁩ ⁦โ⁩ ⁦ต⁩ ⁦ร⁩ [ ] ⁦เ⁩ ⁦อ⁩ ⁦เ⁩ ⁦ช⁩⁦ี⁩ ⁦ย⁩ ⁦ต⁩⁦ิ⁩ ⁦ก⁩ [ ] ⁦ต⁩ ⁦ร⁩ ⁦ะ⁩ ⁦ก⁩⁦ู⁩ ⁦ล⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦อ⁩ ⁦อ⁩ ⁦ส⁩ ⁦โ⁩ ⁦ต⁩ ⁦ร⁩ ⁦น⁩⁦ี⁩ ⁦เ⁩ ⁦ซ⁩⁦ี⁩ ⁦ย⁩ ⁦น⁩ [ ] ⁦แ⁩ ⁦ล⁩ ⁦ะ⁩ ⁦ต⁩ ⁦ร⁩ ⁦ะ⁩ ⁦ก⁩⁦ู⁩ ⁦ล⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦จ⁩⁦ี⁩ ⁦น⁩ [ ] ⁦ท⁩⁦ิ⁩ ⁦เ⁩ ⁦บ⁩ ⁦ต⁩ [0x0a]
-Breaks: c c c c lc c c c lc c c c lc c c c lc c c lc c c c c lc c c lc c c c lc c c c c c lc c c lc c c c lc c c c c lc c c lc c c lc c c c c c lc c c c lc c c c lc c c lc c c lc c c c lc c lc c c lc c c c lc c lc c lc c c lc c c lc c c lc c c lc c c c c lc c c c lc c c c lc c c lc c c c c c c lc c c lc c c c lc c lc c c c c lc lc lc c lc c c c c lc c c lc c c lc c c lc c lc c c lc c c c c c lc c c lc c c lc c lc c c c c c c c c lc c c lc c c lc c c c lc c c lc c c c lc c c lc c lc c lc lc c c c lc c c c lc c c lc c lc c c c c lc c c c lc c lc c c c c lc c c c c lc c c lc c c c c lc c c c lc c lc c c c c c c c c c lc c c lc c c c c lc c c c lc c c lc c c c c c
-Whitespace: x x x x x x x x x x x x x w w
-Sentences: bs e b
-Words: bs b b b bse b b be bse b b be bse b b b bse b b bse b b b be bse b b bse b b b bse b b b b b bse b b bse b b b bse b b b b bse b b bse b b bse b b b b b bse b b be bse b b b bse b b bse b b bse b b b bse b bse b b bse b b b bse b bse b bse b b bse b b bse b b bse b b bse b b b b bse b b b bse b b be bse b be bse b b b b b b bse b be bse b b b bse b bse b b b b bse bse bse b bse b b b b bse b b bse b b bse b b bse b bse b b bse b b b b b bse b be bse b b bse b bse b b b b b b b b bse b b bse b b bse b b b bse b be bse b b b bse b b bse b bse b bse bse b b b bse b b b bse b b bse b bse b b b b bse b b b bse b bse b b b be bse b b b b bse b be bse b b b b bse b b b bse b bse b b b b b b b b be bse b b bse b b b b bse b b b bse b be bse b b b be b
-Graphemes: b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b
+Text: ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ไ⁩ ⁦ท⁩ ⁦ย⁩ [ ] ⁦ห⁩ ⁦ร⁩⁦ื⁩ ⁦อ⁩ [ ] ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ไ⁩ ⁦ท⁩ ⁦ย⁩ ⁦ก⁩ ⁦ล⁩ ⁦า⁩ ⁦ง⁩ [ ] ⁦เ⁩ ⁦ป⁩⁦็⁩ ⁦น⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ร⁩ ⁦า⁩ ⁦ช⁩ ⁦ก⁩ ⁦า⁩ ⁦ร⁩ ⁦แ⁩ ⁦ล⁩ ⁦ะ⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ป⁩ ⁦ร⁩ ⁦ะ⁩ ⁦จ⁩ ⁦ำ⁩ ⁦ช⁩ ⁦า⁩ ⁦ต⁩⁦ิ⁩ ⁦ข⁩ ⁦อ⁩ ⁦ง⁩ ⁦ป⁩ ⁦ร⁩ ⁦ะ⁩ ⁦เ⁩ ⁦ท⁩ ⁦ศ⁩ ⁦ไ⁩ ⁦ท⁩ ⁦ย⁩ [ ] ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ไ⁩ ⁦ท⁩ ⁦ย⁩ ⁦เ⁩ ⁦ป⁩⁦็⁩ ⁦น⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ใ⁩ ⁦น⁩ ⁦ก⁩ ⁦ล⁩⁦ุ⁩⁦่⁩ ⁦ม⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ไ⁩ ⁦ท⁩ ⁦ซ⁩⁦ึ⁩⁦่⁩ ⁦ง⁩ ⁦เ⁩ ⁦ป⁩⁦็⁩ ⁦น⁩ ⁦ก⁩ ⁦ล⁩⁦ุ⁩⁦่⁩ ⁦ม⁩ ⁦ย⁩⁦่⁩ ⁦อ⁩ ⁦ย⁩ ⁦ข⁩ ⁦อ⁩ ⁦ง⁩ ⁦ต⁩ ⁦ร⁩ ⁦ะ⁩ ⁦ก⁩⁦ู⁩ ⁦ล⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ข⁩ ⁦ร⁩⁦้⁩ ⁦า⁩ [ ] ⁦ไ⁩ ⁦ท⁩ [ ] ⁦ส⁩⁦ั⁩ ⁦น⁩ ⁦น⁩⁦ิ⁩ ⁦ษ⁩ ⁦ฐ⁩ ⁦า⁩ ⁦น⁩ ⁦ว⁩⁦่⁩ ⁦า⁩ [ ] ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ใ⁩ ⁦น⁩ ⁦ต⁩ ⁦ร⁩ ⁦ะ⁩ ⁦ก⁩⁦ู⁩ ⁦ล⁩ ⁦น⁩⁦ี⁩⁦้⁩ ⁦ม⁩⁦ี⁩ ⁦ถ⁩⁦ิ⁩⁦่⁩ ⁦น⁩ ⁦ก⁩ ⁦ำ⁩ ⁦เ⁩ ⁦น⁩⁦ิ⁩ ⁦ด⁩ ⁦จ⁩ ⁦า⁩ ⁦ก⁩ ⁦ท⁩ ⁦า⁩ ⁦ง⁩ ⁦ต⁩ ⁦อ⁩ ⁦น⁩ ⁦ใ⁩ ⁦ต⁩⁦้⁩ ⁦ข⁩ ⁦อ⁩ ⁦ง⁩ ⁦ป⁩ ⁦ร⁩ ⁦ะ⁩ ⁦เ⁩ ⁦ท⁩ ⁦ศ⁩ ⁦จ⁩⁦ี⁩ ⁦น⁩ [ ] ⁦แ⁩ ⁦ล⁩ ⁦ะ⁩ ⁦น⁩⁦ั⁩ ⁦ก⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ศ⁩ ⁦า⁩ ⁦ส⁩ ⁦ต⁩ ⁦ร⁩⁦์⁩ ⁦บ⁩ ⁦า⁩ ⁦ง⁩ ⁦ส⁩⁦่⁩ ⁦ว⁩ ⁦น⁩ ⁦เ⁩ ⁦ส⁩ ⁦น⁩ ⁦อ⁩ ⁦ว⁩⁦่⁩ ⁦า⁩ [ ] ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ไ⁩ ⁦ท⁩ ⁦ย⁩ ⁦น⁩⁦่⁩ ⁦า⁩ ⁦จ⁩ ⁦ะ⁩ ⁦ม⁩⁦ี⁩ ⁦ค⁩ ⁦ว⁩ ⁦า⁩ ⁦ม⁩ ⁦เ⁩ ⁦ช⁩⁦ื⁩⁦่⁩ ⁦อ⁩ ⁦ม⁩ ⁦โ⁩ ⁦ย⁩ ⁦ง⁩ ⁦ก⁩⁦ั⁩ ⁦บ⁩ ⁦ต⁩ ⁦ร⁩ ⁦ะ⁩ ⁦ก⁩⁦ู⁩ ⁦ล⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦อ⁩ ⁦อ⁩ ⁦ส⁩ ⁦โ⁩ ⁦ต⁩ ⁦ร⁩ [ ] ⁦เ⁩ ⁦อ⁩ ⁦เ⁩ ⁦ช⁩⁦ี⁩ ⁦ย⁩ ⁦ต⁩⁦ิ⁩ ⁦ก⁩ [ ] ⁦ต⁩ ⁦ร⁩ ⁦ะ⁩ ⁦ก⁩⁦ู⁩ ⁦ล⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦อ⁩ ⁦อ⁩ ⁦ส⁩ ⁦โ⁩ ⁦ต⁩ ⁦ร⁩ ⁦น⁩⁦ี⁩ ⁦เ⁩ ⁦ซ⁩⁦ี⁩ ⁦ย⁩ ⁦น⁩ [ ] ⁦แ⁩ ⁦ล⁩ ⁦ะ⁩ ⁦ต⁩ ⁦ร⁩ ⁦ะ⁩ ⁦ก⁩⁦ู⁩ ⁦ล⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦จ⁩⁦ี⁩ ⁦น⁩ [ ] ⁦ท⁩⁦ิ⁩ ⁦เ⁩ ⁦บ⁩ ⁦ต⁩ [0x0a]
+Breaks: c c c c lc c c c lc c c c lc c c c lc c c lc c c c c lc c c lc c c c lc c c c c c lc c c lc c c c lc c c c c lc c c lc c c lc c c c c c lc c c c lc c c c lc c c lc c c lc c c c lc c lc c c lc c c c lc c lc c lc c c lc c c lc c c lc c c lc c c c c lc c c c lc c c c lc c c lc c c c c c c lc c c lc c c c lc c lc c c c c lc lc lc c lc c c c c lc c c lc c c lc c c lc c lc c c lc c c c c c lc c c lc c c lc c lc c c c c c c c c lc c c lc c c lc c c c lc c c lc c c c lc c c lc c lc c lc lc c c c lc c c c lc c c lc c lc c c c c lc c c c lc c lc c c c c lc c c c c lc c c lc c c c c lc c c c lc c lc c c c c c c c c c lc c c lc c c c c lc c c c lc c c lc c c c c c
+Whitespace: x x x x x x x x x x x x x w w
+Sentences: bs e b
+Words: bs b b b bse b b be bs b b be bs b b b bse b b bse b b b be bs b b bse b b b bse b b b b b bse b b bse b b b bse b b b b bse b b bse b b bse b b b b b bse b b be bs b b b bse b b bse b b bse b b b bse b bse b b bse b b b bse b bse b bse b b bse b b bse b b bse b b bse b b b b bse b b b bse b b be bs b be bs b b b b b b bse b be bs b b b bse b bse b b b b bse bse bse b bse b b b b bse b b bse b b bse b b bse b bse b b bse b b b b b bse b be bs b b bse b bse b b b b b b b b bse b b bse b b bse b b b bse b be bs b b b bse b b bse b bse b bse bse b b b bse b b b bse b b bse b bse b b b b bse b b b bse b bse b b b be bs b b b b bse b be bs b b b b bse b b b bse b bse b b b b b b b b be bs b b bse b b b b bse b b b bse b be bs b b b be b
+Graphemes: b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b