summaryrefslogtreecommitdiff
path: root/pango/pango-utils.c
diff options
context:
space:
mode:
Diffstat (limited to 'pango/pango-utils.c')
-rw-r--r--pango/pango-utils.c102
1 files changed, 102 insertions, 0 deletions
diff --git a/pango/pango-utils.c b/pango/pango-utils.c
index 894935e7..412f417a 100644
--- a/pango/pango-utils.c
+++ b/pango/pango-utils.c
@@ -1154,3 +1154,105 @@ _pango_shape_get_extents (gint n_chars,
}
}
+/**
+ * pango_find_paragraph_boundary:
+ * @text: UTF-8 text
+ * @length: length of @text in bytes, or -1 if nul-terminated
+ * @paragraph_delimiter_index: (out): return location for index of
+ * delimiter
+ * @next_paragraph_start: (out): return location for start of next
+ * paragraph
+ *
+ * Locates a paragraph boundary in @text.
+ *
+ * A boundary is caused by delimiter characters, such as
+ * a newline, carriage return, carriage return-newline pair,
+ * or Unicode paragraph separator character.
+ *
+ * The index of the run of delimiters is returned in
+ * @paragraph_delimiter_index. The index of the start
+ * of the paragrap (index after all delimiters) is stored
+ * in @next_paragraph_start.
+ *
+ * If no delimiters are found, both @paragraph_delimiter_index
+ * and @next_paragraph_start are filled with the length of @text
+ * (an index one off the end).
+ */
+void
+pango_find_paragraph_boundary (const char *text,
+ int length,
+ int *paragraph_delimiter_index,
+ int *next_paragraph_start)
+{
+ const char *p = text;
+ const char *end;
+ const char *start = NULL;
+ const char *delimiter = NULL;
+
+ /* Only one character has type G_UNICODE_PARAGRAPH_SEPARATOR in
+ * Unicode 5.0; update the following code if that changes.
+ */
+
+ /* prev_sep is the first byte of the previous separator. Since
+ * the valid separators are \r, \n, and PARAGRAPH_SEPARATOR, the
+ * first byte is enough to identify it.
+ */
+ char prev_sep;
+
+#define PARAGRAPH_SEPARATOR_STRING "\xE2\x80\xA9"
+
+ if (length < 0)
+ length = strlen (text);
+
+ end = text + length;
+
+ if (paragraph_delimiter_index)
+ *paragraph_delimiter_index = length;
+
+ if (next_paragraph_start)
+ *next_paragraph_start = length;
+
+ if (length == 0)
+ return;
+
+ prev_sep = 0;
+ while (p < end)
+ {
+ if (prev_sep == '\n' ||
+ prev_sep == PARAGRAPH_SEPARATOR_STRING[0])
+ {
+ g_assert (delimiter);
+ start = p;
+ break;
+ }
+ else if (prev_sep == '\r')
+ {
+ /* don't break between \r and \n */
+ if (*p != '\n')
+ {
+ g_assert (delimiter);
+ start = p;
+ break;
+ }
+ }
+
+ if (*p == '\n' ||
+ *p == '\r' ||
+ !strncmp(p, PARAGRAPH_SEPARATOR_STRING, strlen (PARAGRAPH_SEPARATOR_STRING)))
+ {
+ if (delimiter == NULL)
+ delimiter = p;
+ prev_sep = *p;
+ }
+ else
+ prev_sep = 0;
+
+ p = g_utf8_next_char (p);
+ }
+
+ if (delimiter && paragraph_delimiter_index)
+ *paragraph_delimiter_index = delimiter - text;
+
+ if (start && next_paragraph_start)
+ *next_paragraph_start = start - text;
+}