diff options
author | Devin Torres <devin@devintorr.es> | 2014-10-04 16:56:22 -0500 |
---|---|---|
committer | Devin Torres <devin@devintorr.es> | 2014-10-04 16:56:22 -0500 |
commit | 4e1b16cfab1f99191f5dd9b9498df4162fd44ff9 (patch) | |
tree | 934a4450a299a80bf459b6496ccd3ce8c75fc9a4 | |
parent | 278702f09780c25f87faaca82e9b310f27e43d4f (diff) | |
parent | 23a8c1933b686f595e5124bf92c4f1ab0b01da6e (diff) | |
download | rust-hoedown-4e1b16cfab1f99191f5dd9b9498df4162fd44ff9.tar.gz |
Merge pull request #130 from MarkLodato/utf8-tab-expansion
Make tab expansion UTF-8 aware.
-rw-r--r-- | src/document.c | 11 |
1 files changed, 10 insertions, 1 deletions
diff --git a/src/document.c b/src/document.c index 3ab7c22..f2ce139 100644 --- a/src/document.c +++ b/src/document.c @@ -2671,13 +2671,22 @@ is_ref(const uint8_t *data, size_t beg, size_t end, size_t *last, struct link_re static void expand_tabs(hoedown_buffer *ob, const uint8_t *line, size_t size) { + /* This code makes two assumptions: + * - Input is valid UTF-8. (Any byte with top two bits 10 is skipped, + * whether or not it is a valid UTF-8 continuation byte.) + * - Input contains no combining characters. (Combining characters + * should be skipped but are not.) + */ size_t i = 0, tab = 0; while (i < size) { size_t org = i; while (i < size && line[i] != '\t') { - i++; tab++; + i++; + /* ignore UTF-8 continuation bytes */ + if ((line[i] & 0xc0) != 0x80) + tab++; } if (i > org) |