diff options
author | Xavier Mendez <jmendeth@gmail.com> | 2014-08-26 16:45:58 +0200 |
---|---|---|
committer | Xavier Mendez <jmendeth@gmail.com> | 2014-08-26 16:45:58 +0200 |
commit | c6238419ffb1fa542f37dfe67916d525069dc53c (patch) | |
tree | 3e6e7e8da3a730c3d4a6d74b43cee925e35b65d5 | |
parent | 6590101008066f69ad36f6ea5cd47e4da16baaff (diff) | |
parent | 62908524c52f2b52c5d35d32390fc97b7b632c32 (diff) | |
download | rust-hoedown-c6238419ffb1fa542f37dfe67916d525069dc53c.tar.gz |
Merge pull request #110 from jmendeth/moar-bugfixes
Moar bugfixes
-rw-r--r-- | bin/hoedown.c | 2 | ||||
-rw-r--r-- | src/document.c | 265 |
2 files changed, 123 insertions, 144 deletions
diff --git a/bin/hoedown.c b/bin/hoedown.c index fdad045..1440c55 100644 --- a/bin/hoedown.c +++ b/bin/hoedown.c @@ -65,7 +65,7 @@ static struct extension_info extensions_info[] = { {HOEDOWN_EXT_QUOTE, "quote", "Render \"quotes\" as <q>quotes</q>."}, {HOEDOWN_EXT_SUPERSCRIPT, "superscript", "Parse super^script."}, - {HOEDOWN_EXT_LAX_SPACING, "lax-spacing", "Allow HTML blocks on the same line as text."}, + {HOEDOWN_EXT_LAX_SPACING, "lax-spacing", "Don't require a blank line between some blocks."}, {HOEDOWN_EXT_NO_INTRA_EMPHASIS, "disable-intra-emphasis", "Disable emphasis_between_words."}, {HOEDOWN_EXT_SPACE_HEADERS, "space-headers", "Require a space after '#' in headers."}, diff --git a/src/document.c b/src/document.c index 8ae8eb9..ba0e2f5 100644 --- a/src/document.c +++ b/src/document.c @@ -328,6 +328,28 @@ _isspace(int c) return c == ' ' || c == '\n'; } +/* + * Replace all spacing characters in data with spaces. As a special + * case, this collapses a newline with the previous space, if possible. + */ +static inline void +replace_spacing(hoedown_buffer *ob, const uint8_t *data, size_t size) +{ + size_t i = 0, mark; + hoedown_buffer_grow(ob, size); + while (1) { + mark = i; + while (i < size && data[i] != '\n') i++; + hoedown_buffer_put(ob, data + mark, i - mark); + + if (i >= size) break; + + if (!(i > 0 && data[i-1] == ' ')) + hoedown_buffer_putc(ob, ' '); + i++; + } +} + /**************************** * INLINE PARSING FUNCTIONS * ****************************/ @@ -534,7 +556,7 @@ find_emph_char(uint8_t *data, size_t size, uint8_t c) } i++; - while (i < size && (data[i] == ' ' || data[i] == '\n')) + while (i < size && _isspace(data[i])) i++; if (i >= size) @@ -979,51 +1001,35 @@ char_autolink_url(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size return link_len; } -/* char_link • '[': parsing a link or an image */ +/* char_link • '[': parsing a link, a footnote or an image */ static size_t char_link(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size) { - int is_img = (offset && data[-1] == '!'), level; + int is_img = (offset && data[-1] == '!' && !is_escaped(data - offset, offset - 1)); + int is_footnote = (doc->ext_flags & HOEDOWN_EXT_FOOTNOTES && data[1] == '^'); size_t i = 1, txt_e, link_b = 0, link_e = 0, title_b = 0, title_e = 0; - hoedown_buffer *content = 0; - hoedown_buffer *link = 0; - hoedown_buffer *title = 0; - hoedown_buffer *u_link = 0; + hoedown_buffer *content = NULL; + hoedown_buffer *link = NULL; + hoedown_buffer *title = NULL; + hoedown_buffer *u_link = NULL; size_t org_work_size = doc->work_bufs[BUFFER_SPAN].size; - int text_has_nl = 0, ret = 0; - int in_title = 0, qtype = 0; + int ret = 0, in_title = 0, qtype = 0; /* checking whether the correct renderer exists */ - if ((is_img && !doc->md.image) || (!is_img && !doc->md.link)) + if ((is_footnote && !doc->md.footnote_ref) || (is_img && !doc->md.image) + || (!is_img && !is_footnote && !doc->md.link)) goto cleanup; /* looking for the matching closing bracket */ - for (level = 1; i < size; i++) { - if (data[i] == '\n') - text_has_nl = 1; - - else if (is_escaped(data, i)) - continue; - - else if (data[i] == '[') - level++; - - else if (data[i] == ']') { - level--; - if (level <= 0) - break; - } - } - - if (i >= size) - goto cleanup; - + i += find_emph_char(data + i, size - i, ']'); txt_e = i; - i++; + + if (i < size && data[i] == ']') i++; + else goto cleanup; /* footnote link */ - if (doc->ext_flags & HOEDOWN_EXT_FOOTNOTES && data[1] == '^') { - hoedown_buffer id = { 0, 0, 0, 0, NULL, NULL, NULL }; + if (is_footnote) { + hoedown_buffer id = { NULL, 0, 0, 0, NULL, NULL, NULL }; struct footnote_ref *fr; if (txt_e < 3) @@ -1137,7 +1143,7 @@ char_link(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offse /* reference style link */ else if (i < size && data[i] == '[') { - hoedown_buffer id = { 0, 0, 0, 0, NULL, NULL, NULL }; + hoedown_buffer *id = newbuf(doc, BUFFER_SPAN); struct link_ref *lr; /* looking for the id */ @@ -1148,30 +1154,12 @@ char_link(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offse link_e = i; /* finding the link_ref */ - if (link_b == link_e) { - if (text_has_nl) { - hoedown_buffer *b = newbuf(doc, BUFFER_SPAN); - size_t j; - - for (j = 1; j < txt_e; j++) { - if (data[j] != '\n') - hoedown_buffer_putc(b, data[j]); - else if (data[j - 1] != ' ') - hoedown_buffer_putc(b, ' '); - } - - id.data = b->data; - id.size = b->size; - } else { - id.data = data + 1; - id.size = txt_e - 1; - } - } else { - id.data = data + link_b; - id.size = link_e - link_b; - } + if (link_b == link_e) + replace_spacing(id, data + 1, txt_e - 1); + else + hoedown_buffer_put(id, data + link_b, link_e - link_b); - lr = find_link_ref(doc->refs, id.data, id.size); + lr = find_link_ref(doc->refs, id->data, id->size); if (!lr) goto cleanup; @@ -1183,30 +1171,14 @@ char_link(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offse /* shortcut reference style link */ else { - hoedown_buffer id = { 0, 0, 0, 0, NULL, NULL, NULL }; + hoedown_buffer *id = newbuf(doc, BUFFER_SPAN); struct link_ref *lr; /* crafting the id */ - if (text_has_nl) { - hoedown_buffer *b = newbuf(doc, BUFFER_SPAN); - size_t j; - - for (j = 1; j < txt_e; j++) { - if (data[j] != '\n') - hoedown_buffer_putc(b, data[j]); - else if (data[j - 1] != ' ') - hoedown_buffer_putc(b, ' '); - } - - id.data = b->data; - id.size = b->size; - } else { - id.data = data + 1; - id.size = txt_e - 1; - } + replace_spacing(id, data + 1, txt_e - 1); /* finding the link_ref */ - lr = find_link_ref(doc->refs, id.data, id.size); + lr = find_link_ref(doc->refs, id->data, id->size); if (!lr) goto cleanup; @@ -1218,7 +1190,7 @@ char_link(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offse i = txt_e + 1; } - /* building content: img alt is escaped, link content is parsed */ + /* building content: img alt is kept, only link content is parsed */ if (txt_e > 1) { content = newbuf(doc, BUFFER_SPAN); if (is_img) { @@ -1392,6 +1364,11 @@ parse_codefence(uint8_t *data, size_t size, hoedown_buffer *lang, size_t *width, lang->data = data + lang_start; lang->size = i - lang_start; + /* Avoid parsing a codespan as a fence */ + i = lang_start + 2; + while (i < size && !(data[i] == *chr && data[i-1] == *chr && data[i-2] == *chr)) i++; + if (i < size) return 0; + return w; } @@ -1832,21 +1809,21 @@ parse_listitem(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t has_next_oli = prefix_oli(data + beg + i, end - beg - i); } - /* checking for ul/ol switch */ - if (in_empty && ( - ((*flags & HOEDOWN_LIST_ORDERED) && has_next_uli) || - (!(*flags & HOEDOWN_LIST_ORDERED) && has_next_oli))){ - *flags |= HOEDOWN_LI_END; - break; /* the following item must have same list type */ - } - /* checking for a new item */ if ((has_next_uli && !is_hrule(data + beg + i, end - beg - i)) || has_next_oli) { if (in_empty) has_inside_empty = 1; - if (pre <= orgpre) /* the following item must have */ - break; /* the same (or less) indentation */ + /* the following item must have the same (or less) indentation */ + if (pre <= orgpre) { + /* if the following item has different list type, we end this list */ + if (in_empty && ( + ((*flags & HOEDOWN_LIST_ORDERED) && has_next_uli) || + (!(*flags & HOEDOWN_LIST_ORDERED) && has_next_oli))) + *flags |= HOEDOWN_LI_END; + + break; + } if (!sublist) sublist = work->size; @@ -1858,13 +1835,13 @@ parse_listitem(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t *flags |= HOEDOWN_LI_END; break; } - else if (in_empty) { + + if (in_empty) { hoedown_buffer_putc(work, '\n'); has_inside_empty = 1; + in_empty = 0; } - in_empty = 0; - /* adding the line without prefix into the working buffer */ hoedown_buffer_put(work, data + beg + i, end - beg - i); beg = end; @@ -1999,80 +1976,83 @@ parse_footnote_list(hoedown_buffer *ob, hoedown_document *doc, struct footnote_l popbuf(doc, BUFFER_BLOCK); } -/* htmlblock_end • checking end of HTML block : </tag>[ \t]*\n[ \t*]\n */ -/* returns the length on match, 0 otherwise */ -static size_t -htmlblock_end_tag( +/* htmlblock_is_end • check for end of HTML block : </tag>( *)\n */ +/* returns tag length on match, 0 otherwise */ +/* assumes data starts with "<" */ +static inline size_t +htmlblock_is_end( const char *tag, size_t tag_len, hoedown_document *doc, uint8_t *data, size_t size) { - size_t i, w; + size_t i = tag_len + 3, w; - /* checking if tag is a match */ - if (tag_len + 3 >= size || + /* try to match the end tag */ + /* note: we're not considering tags like "</tag >" which are still valid */ + if (i > size || + data[1] != '/' || strncasecmp((char *)data + 2, tag, tag_len) != 0 || data[tag_len + 2] != '>') return 0; - /* checking white lines */ - i = tag_len + 3; - w = 0; - if (i < size && (w = is_empty(data + i, size - i)) == 0) - return 0; /* non-blank after tag */ - i += w; - w = 0; - - if (i < size) - w = is_empty(data + i, size - i); + /* rest of the line must be empty */ + if ((w = is_empty(data + i, size - i)) == 0 && i < size) + return 0; return i + w; } +/* htmlblock_find_end • try to find HTML block ending tag */ +/* returns the length on match, 0 otherwise */ static size_t -htmlblock_end(const char *curtag, +htmlblock_find_end( + const char *tag, + size_t tag_len, hoedown_document *doc, uint8_t *data, - size_t size, - int start_of_line) + size_t size) { - size_t tag_size = strlen(curtag); - size_t i = 1, end_tag; - int block_lines = 0; - - while (i < size) { - i++; - while (i < size && !(data[i - 1] == '<' && data[i] == '/')) { - if (data[i] == '\n') - block_lines++; + size_t i = 0, w; - i++; - } + while (1) { + while (i < size && data[i] != '<') i++; + if (i >= size) return 0; - /* If we are only looking for unindented tags, skip the tag - * if it doesn't follow a newline. - * - * The only exception to this is if the tag is still on the - * initial line; in that case it still counts as a closing - * tag - */ - if (start_of_line && block_lines > 0 && data[i - 2] != '\n') - continue; + w = htmlblock_is_end(tag, tag_len, doc, data + i, size - i); + if (w) return i + w; + i++; + } +} - if (i + 2 + tag_size >= size) - break; +/* htmlblock_find_end_strict • try to find end of HTML block in strict mode */ +/* (it must be an unindented line, and have a blank line afterwads) */ +/* returns the length on match, 0 otherwise */ +static size_t +htmlblock_find_end_strict( + const char *tag, + size_t tag_len, + hoedown_document *doc, + uint8_t *data, + size_t size) +{ + size_t i = 0, mark; - end_tag = htmlblock_end_tag(curtag, tag_size, doc, data + i - 1, size - i + 1); - if (end_tag) - return i + end_tag - 1; + while (1) { + mark = i; + while (i < size && data[i] != '\n') i++; + if (i < size) i++; + if (i == mark) return 0; + + if (data[mark] == ' ' && mark > 0) continue; + mark += htmlblock_find_end(tag, tag_len, doc, data + mark, i - mark); + if (mark == i && (is_empty(data + i, size - i) || i >= size)) break; } - return 0; + return i; } - /* parse_htmlblock • parsing of inline HTML block */ static size_t parse_htmlblock(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, int do_render) @@ -2137,15 +2117,14 @@ parse_htmlblock(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t return 0; } - /* looking for an unindented matching closing tag */ - /* followed by a blank line */ - tag_end = htmlblock_end(curtag, doc, data, size, 1); + /* looking for a matching closing tag in strict mode */ + size_t tag_len = strlen(curtag); + tag_end = htmlblock_find_end_strict(curtag, tag_len, doc, data, size); /* if not found, trying a second pass looking for indented match */ /* but not if tag is "ins" or "del" (following original Markdown.pl) */ - if (!tag_end && strcmp(curtag, "ins") != 0 && strcmp(curtag, "del") != 0) { - tag_end = htmlblock_end(curtag, doc, data, size, 0); - } + if (!tag_end && strcmp(curtag, "ins") != 0 && strcmp(curtag, "del") != 0) + tag_end = htmlblock_find_end(curtag, tag_len, doc, data, size); if (!tag_end) return 0; |