summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDevin Torres <devin@devintorr.es>2014-10-04 16:56:22 -0500
committerDevin Torres <devin@devintorr.es>2014-10-04 16:56:22 -0500
commit4e1b16cfab1f99191f5dd9b9498df4162fd44ff9 (patch)
tree934a4450a299a80bf459b6496ccd3ce8c75fc9a4
parent278702f09780c25f87faaca82e9b310f27e43d4f (diff)
parent23a8c1933b686f595e5124bf92c4f1ab0b01da6e (diff)
downloadrust-hoedown-4e1b16cfab1f99191f5dd9b9498df4162fd44ff9.tar.gz
Merge pull request #130 from MarkLodato/utf8-tab-expansion
Make tab expansion UTF-8 aware.
-rw-r--r--src/document.c11
1 files changed, 10 insertions, 1 deletions
diff --git a/src/document.c b/src/document.c
index 3ab7c22..f2ce139 100644
--- a/src/document.c
+++ b/src/document.c
@@ -2671,13 +2671,22 @@ is_ref(const uint8_t *data, size_t beg, size_t end, size_t *last, struct link_re
static void expand_tabs(hoedown_buffer *ob, const uint8_t *line, size_t size)
{
+ /* This code makes two assumptions:
+ * - Input is valid UTF-8. (Any byte with top two bits 10 is skipped,
+ * whether or not it is a valid UTF-8 continuation byte.)
+ * - Input contains no combining characters. (Combining characters
+ * should be skipped but are not.)
+ */
size_t i = 0, tab = 0;
while (i < size) {
size_t org = i;
while (i < size && line[i] != '\t') {
- i++; tab++;
+ i++;
+ /* ignore UTF-8 continuation bytes */
+ if ((line[i] & 0xc0) != 0x80)
+ tab++;
}
if (i > org)