diff options
author | Matthias Clasen <mclasen@redhat.com> | 2021-08-20 12:07:53 -0400 |
---|---|---|
committer | Matthias Clasen <mclasen@redhat.com> | 2021-08-20 12:21:18 -0400 |
commit | 2b097607b7f2b7ab468dcc47f7b58898a1e03b9f (patch) | |
tree | 2afe35be4b50b751313473e2bbb06964931a3c7d /pango/pango-utils.c | |
parent | ba5c0b74ed537e8d11491709373a92d69b0aa1f0 (diff) | |
download | pango-2b097607b7f2b7ab468dcc47f7b58898a1e03b9f.tar.gz |
Cosmetics: Move a function around
pango_find_paragraph_boundary has nothing to do
with breaks, so move it to pango-utils.h
Diffstat (limited to 'pango/pango-utils.c')
-rw-r--r-- | pango/pango-utils.c | 102 |
1 files changed, 102 insertions, 0 deletions
diff --git a/pango/pango-utils.c b/pango/pango-utils.c index 894935e7..412f417a 100644 --- a/pango/pango-utils.c +++ b/pango/pango-utils.c @@ -1154,3 +1154,105 @@ _pango_shape_get_extents (gint n_chars, } } +/** + * pango_find_paragraph_boundary: + * @text: UTF-8 text + * @length: length of @text in bytes, or -1 if nul-terminated + * @paragraph_delimiter_index: (out): return location for index of + * delimiter + * @next_paragraph_start: (out): return location for start of next + * paragraph + * + * Locates a paragraph boundary in @text. + * + * A boundary is caused by delimiter characters, such as + * a newline, carriage return, carriage return-newline pair, + * or Unicode paragraph separator character. + * + * The index of the run of delimiters is returned in + * @paragraph_delimiter_index. The index of the start + * of the paragrap (index after all delimiters) is stored + * in @next_paragraph_start. + * + * If no delimiters are found, both @paragraph_delimiter_index + * and @next_paragraph_start are filled with the length of @text + * (an index one off the end). + */ +void +pango_find_paragraph_boundary (const char *text, + int length, + int *paragraph_delimiter_index, + int *next_paragraph_start) +{ + const char *p = text; + const char *end; + const char *start = NULL; + const char *delimiter = NULL; + + /* Only one character has type G_UNICODE_PARAGRAPH_SEPARATOR in + * Unicode 5.0; update the following code if that changes. + */ + + /* prev_sep is the first byte of the previous separator. Since + * the valid separators are \r, \n, and PARAGRAPH_SEPARATOR, the + * first byte is enough to identify it. + */ + char prev_sep; + +#define PARAGRAPH_SEPARATOR_STRING "\xE2\x80\xA9" + + if (length < 0) + length = strlen (text); + + end = text + length; + + if (paragraph_delimiter_index) + *paragraph_delimiter_index = length; + + if (next_paragraph_start) + *next_paragraph_start = length; + + if (length == 0) + return; + + prev_sep = 0; + while (p < end) + { + if (prev_sep == '\n' || + prev_sep == PARAGRAPH_SEPARATOR_STRING[0]) + { + g_assert (delimiter); + start = p; + break; + } + else if (prev_sep == '\r') + { + /* don't break between \r and \n */ + if (*p != '\n') + { + g_assert (delimiter); + start = p; + break; + } + } + + if (*p == '\n' || + *p == '\r' || + !strncmp(p, PARAGRAPH_SEPARATOR_STRING, strlen (PARAGRAPH_SEPARATOR_STRING))) + { + if (delimiter == NULL) + delimiter = p; + prev_sep = *p; + } + else + prev_sep = 0; + + p = g_utf8_next_char (p); + } + + if (delimiter && paragraph_delimiter_index) + *paragraph_delimiter_index = delimiter - text; + + if (start && next_paragraph_start) + *next_paragraph_start = start - text; +} |