summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorXavier Mendez <jmendeth@gmail.com>2014-08-26 16:45:58 +0200
committerXavier Mendez <jmendeth@gmail.com>2014-08-26 16:45:58 +0200
commitc6238419ffb1fa542f37dfe67916d525069dc53c (patch)
tree3e6e7e8da3a730c3d4a6d74b43cee925e35b65d5
parent6590101008066f69ad36f6ea5cd47e4da16baaff (diff)
parent62908524c52f2b52c5d35d32390fc97b7b632c32 (diff)
downloadrust-hoedown-c6238419ffb1fa542f37dfe67916d525069dc53c.tar.gz
Merge pull request #110 from jmendeth/moar-bugfixes
Moar bugfixes
-rw-r--r--bin/hoedown.c2
-rw-r--r--src/document.c265
2 files changed, 123 insertions, 144 deletions
diff --git a/bin/hoedown.c b/bin/hoedown.c
index fdad045..1440c55 100644
--- a/bin/hoedown.c
+++ b/bin/hoedown.c
@@ -65,7 +65,7 @@ static struct extension_info extensions_info[] = {
{HOEDOWN_EXT_QUOTE, "quote", "Render \"quotes\" as <q>quotes</q>."},
{HOEDOWN_EXT_SUPERSCRIPT, "superscript", "Parse super^script."},
- {HOEDOWN_EXT_LAX_SPACING, "lax-spacing", "Allow HTML blocks on the same line as text."},
+ {HOEDOWN_EXT_LAX_SPACING, "lax-spacing", "Don't require a blank line between some blocks."},
{HOEDOWN_EXT_NO_INTRA_EMPHASIS, "disable-intra-emphasis", "Disable emphasis_between_words."},
{HOEDOWN_EXT_SPACE_HEADERS, "space-headers", "Require a space after '#' in headers."},
diff --git a/src/document.c b/src/document.c
index 8ae8eb9..ba0e2f5 100644
--- a/src/document.c
+++ b/src/document.c
@@ -328,6 +328,28 @@ _isspace(int c)
return c == ' ' || c == '\n';
}
+/*
+ * Replace all spacing characters in data with spaces. As a special
+ * case, this collapses a newline with the previous space, if possible.
+ */
+static inline void
+replace_spacing(hoedown_buffer *ob, const uint8_t *data, size_t size)
+{
+ size_t i = 0, mark;
+ hoedown_buffer_grow(ob, size);
+ while (1) {
+ mark = i;
+ while (i < size && data[i] != '\n') i++;
+ hoedown_buffer_put(ob, data + mark, i - mark);
+
+ if (i >= size) break;
+
+ if (!(i > 0 && data[i-1] == ' '))
+ hoedown_buffer_putc(ob, ' ');
+ i++;
+ }
+}
+
/****************************
* INLINE PARSING FUNCTIONS *
****************************/
@@ -534,7 +556,7 @@ find_emph_char(uint8_t *data, size_t size, uint8_t c)
}
i++;
- while (i < size && (data[i] == ' ' || data[i] == '\n'))
+ while (i < size && _isspace(data[i]))
i++;
if (i >= size)
@@ -979,51 +1001,35 @@ char_autolink_url(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size
return link_len;
}
-/* char_link • '[': parsing a link or an image */
+/* char_link • '[': parsing a link, a footnote or an image */
static size_t
char_link(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
{
- int is_img = (offset && data[-1] == '!'), level;
+ int is_img = (offset && data[-1] == '!' && !is_escaped(data - offset, offset - 1));
+ int is_footnote = (doc->ext_flags & HOEDOWN_EXT_FOOTNOTES && data[1] == '^');
size_t i = 1, txt_e, link_b = 0, link_e = 0, title_b = 0, title_e = 0;
- hoedown_buffer *content = 0;
- hoedown_buffer *link = 0;
- hoedown_buffer *title = 0;
- hoedown_buffer *u_link = 0;
+ hoedown_buffer *content = NULL;
+ hoedown_buffer *link = NULL;
+ hoedown_buffer *title = NULL;
+ hoedown_buffer *u_link = NULL;
size_t org_work_size = doc->work_bufs[BUFFER_SPAN].size;
- int text_has_nl = 0, ret = 0;
- int in_title = 0, qtype = 0;
+ int ret = 0, in_title = 0, qtype = 0;
/* checking whether the correct renderer exists */
- if ((is_img && !doc->md.image) || (!is_img && !doc->md.link))
+ if ((is_footnote && !doc->md.footnote_ref) || (is_img && !doc->md.image)
+ || (!is_img && !is_footnote && !doc->md.link))
goto cleanup;
/* looking for the matching closing bracket */
- for (level = 1; i < size; i++) {
- if (data[i] == '\n')
- text_has_nl = 1;
-
- else if (is_escaped(data, i))
- continue;
-
- else if (data[i] == '[')
- level++;
-
- else if (data[i] == ']') {
- level--;
- if (level <= 0)
- break;
- }
- }
-
- if (i >= size)
- goto cleanup;
-
+ i += find_emph_char(data + i, size - i, ']');
txt_e = i;
- i++;
+
+ if (i < size && data[i] == ']') i++;
+ else goto cleanup;
/* footnote link */
- if (doc->ext_flags & HOEDOWN_EXT_FOOTNOTES && data[1] == '^') {
- hoedown_buffer id = { 0, 0, 0, 0, NULL, NULL, NULL };
+ if (is_footnote) {
+ hoedown_buffer id = { NULL, 0, 0, 0, NULL, NULL, NULL };
struct footnote_ref *fr;
if (txt_e < 3)
@@ -1137,7 +1143,7 @@ char_link(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offse
/* reference style link */
else if (i < size && data[i] == '[') {
- hoedown_buffer id = { 0, 0, 0, 0, NULL, NULL, NULL };
+ hoedown_buffer *id = newbuf(doc, BUFFER_SPAN);
struct link_ref *lr;
/* looking for the id */
@@ -1148,30 +1154,12 @@ char_link(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offse
link_e = i;
/* finding the link_ref */
- if (link_b == link_e) {
- if (text_has_nl) {
- hoedown_buffer *b = newbuf(doc, BUFFER_SPAN);
- size_t j;
-
- for (j = 1; j < txt_e; j++) {
- if (data[j] != '\n')
- hoedown_buffer_putc(b, data[j]);
- else if (data[j - 1] != ' ')
- hoedown_buffer_putc(b, ' ');
- }
-
- id.data = b->data;
- id.size = b->size;
- } else {
- id.data = data + 1;
- id.size = txt_e - 1;
- }
- } else {
- id.data = data + link_b;
- id.size = link_e - link_b;
- }
+ if (link_b == link_e)
+ replace_spacing(id, data + 1, txt_e - 1);
+ else
+ hoedown_buffer_put(id, data + link_b, link_e - link_b);
- lr = find_link_ref(doc->refs, id.data, id.size);
+ lr = find_link_ref(doc->refs, id->data, id->size);
if (!lr)
goto cleanup;
@@ -1183,30 +1171,14 @@ char_link(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offse
/* shortcut reference style link */
else {
- hoedown_buffer id = { 0, 0, 0, 0, NULL, NULL, NULL };
+ hoedown_buffer *id = newbuf(doc, BUFFER_SPAN);
struct link_ref *lr;
/* crafting the id */
- if (text_has_nl) {
- hoedown_buffer *b = newbuf(doc, BUFFER_SPAN);
- size_t j;
-
- for (j = 1; j < txt_e; j++) {
- if (data[j] != '\n')
- hoedown_buffer_putc(b, data[j]);
- else if (data[j - 1] != ' ')
- hoedown_buffer_putc(b, ' ');
- }
-
- id.data = b->data;
- id.size = b->size;
- } else {
- id.data = data + 1;
- id.size = txt_e - 1;
- }
+ replace_spacing(id, data + 1, txt_e - 1);
/* finding the link_ref */
- lr = find_link_ref(doc->refs, id.data, id.size);
+ lr = find_link_ref(doc->refs, id->data, id->size);
if (!lr)
goto cleanup;
@@ -1218,7 +1190,7 @@ char_link(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offse
i = txt_e + 1;
}
- /* building content: img alt is escaped, link content is parsed */
+ /* building content: img alt is kept, only link content is parsed */
if (txt_e > 1) {
content = newbuf(doc, BUFFER_SPAN);
if (is_img) {
@@ -1392,6 +1364,11 @@ parse_codefence(uint8_t *data, size_t size, hoedown_buffer *lang, size_t *width,
lang->data = data + lang_start;
lang->size = i - lang_start;
+ /* Avoid parsing a codespan as a fence */
+ i = lang_start + 2;
+ while (i < size && !(data[i] == *chr && data[i-1] == *chr && data[i-2] == *chr)) i++;
+ if (i < size) return 0;
+
return w;
}
@@ -1832,21 +1809,21 @@ parse_listitem(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t
has_next_oli = prefix_oli(data + beg + i, end - beg - i);
}
- /* checking for ul/ol switch */
- if (in_empty && (
- ((*flags & HOEDOWN_LIST_ORDERED) && has_next_uli) ||
- (!(*flags & HOEDOWN_LIST_ORDERED) && has_next_oli))){
- *flags |= HOEDOWN_LI_END;
- break; /* the following item must have same list type */
- }
-
/* checking for a new item */
if ((has_next_uli && !is_hrule(data + beg + i, end - beg - i)) || has_next_oli) {
if (in_empty)
has_inside_empty = 1;
- if (pre <= orgpre) /* the following item must have */
- break; /* the same (or less) indentation */
+ /* the following item must have the same (or less) indentation */
+ if (pre <= orgpre) {
+ /* if the following item has different list type, we end this list */
+ if (in_empty && (
+ ((*flags & HOEDOWN_LIST_ORDERED) && has_next_uli) ||
+ (!(*flags & HOEDOWN_LIST_ORDERED) && has_next_oli)))
+ *flags |= HOEDOWN_LI_END;
+
+ break;
+ }
if (!sublist)
sublist = work->size;
@@ -1858,13 +1835,13 @@ parse_listitem(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t
*flags |= HOEDOWN_LI_END;
break;
}
- else if (in_empty) {
+
+ if (in_empty) {
hoedown_buffer_putc(work, '\n');
has_inside_empty = 1;
+ in_empty = 0;
}
- in_empty = 0;
-
/* adding the line without prefix into the working buffer */
hoedown_buffer_put(work, data + beg + i, end - beg - i);
beg = end;
@@ -1999,80 +1976,83 @@ parse_footnote_list(hoedown_buffer *ob, hoedown_document *doc, struct footnote_l
popbuf(doc, BUFFER_BLOCK);
}
-/* htmlblock_end • checking end of HTML block : </tag>[ \t]*\n[ \t*]\n */
-/* returns the length on match, 0 otherwise */
-static size_t
-htmlblock_end_tag(
+/* htmlblock_is_end • check for end of HTML block : </tag>( *)\n */
+/* returns tag length on match, 0 otherwise */
+/* assumes data starts with "<" */
+static inline size_t
+htmlblock_is_end(
const char *tag,
size_t tag_len,
hoedown_document *doc,
uint8_t *data,
size_t size)
{
- size_t i, w;
+ size_t i = tag_len + 3, w;
- /* checking if tag is a match */
- if (tag_len + 3 >= size ||
+ /* try to match the end tag */
+ /* note: we're not considering tags like "</tag >" which are still valid */
+ if (i > size ||
+ data[1] != '/' ||
strncasecmp((char *)data + 2, tag, tag_len) != 0 ||
data[tag_len + 2] != '>')
return 0;
- /* checking white lines */
- i = tag_len + 3;
- w = 0;
- if (i < size && (w = is_empty(data + i, size - i)) == 0)
- return 0; /* non-blank after tag */
- i += w;
- w = 0;
-
- if (i < size)
- w = is_empty(data + i, size - i);
+ /* rest of the line must be empty */
+ if ((w = is_empty(data + i, size - i)) == 0 && i < size)
+ return 0;
return i + w;
}
+/* htmlblock_find_end • try to find HTML block ending tag */
+/* returns the length on match, 0 otherwise */
static size_t
-htmlblock_end(const char *curtag,
+htmlblock_find_end(
+ const char *tag,
+ size_t tag_len,
hoedown_document *doc,
uint8_t *data,
- size_t size,
- int start_of_line)
+ size_t size)
{
- size_t tag_size = strlen(curtag);
- size_t i = 1, end_tag;
- int block_lines = 0;
-
- while (i < size) {
- i++;
- while (i < size && !(data[i - 1] == '<' && data[i] == '/')) {
- if (data[i] == '\n')
- block_lines++;
+ size_t i = 0, w;
- i++;
- }
+ while (1) {
+ while (i < size && data[i] != '<') i++;
+ if (i >= size) return 0;
- /* If we are only looking for unindented tags, skip the tag
- * if it doesn't follow a newline.
- *
- * The only exception to this is if the tag is still on the
- * initial line; in that case it still counts as a closing
- * tag
- */
- if (start_of_line && block_lines > 0 && data[i - 2] != '\n')
- continue;
+ w = htmlblock_is_end(tag, tag_len, doc, data + i, size - i);
+ if (w) return i + w;
+ i++;
+ }
+}
- if (i + 2 + tag_size >= size)
- break;
+/* htmlblock_find_end_strict • try to find end of HTML block in strict mode */
+/* (it must be an unindented line, and have a blank line afterwads) */
+/* returns the length on match, 0 otherwise */
+static size_t
+htmlblock_find_end_strict(
+ const char *tag,
+ size_t tag_len,
+ hoedown_document *doc,
+ uint8_t *data,
+ size_t size)
+{
+ size_t i = 0, mark;
- end_tag = htmlblock_end_tag(curtag, tag_size, doc, data + i - 1, size - i + 1);
- if (end_tag)
- return i + end_tag - 1;
+ while (1) {
+ mark = i;
+ while (i < size && data[i] != '\n') i++;
+ if (i < size) i++;
+ if (i == mark) return 0;
+
+ if (data[mark] == ' ' && mark > 0) continue;
+ mark += htmlblock_find_end(tag, tag_len, doc, data + mark, i - mark);
+ if (mark == i && (is_empty(data + i, size - i) || i >= size)) break;
}
- return 0;
+ return i;
}
-
/* parse_htmlblock • parsing of inline HTML block */
static size_t
parse_htmlblock(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, int do_render)
@@ -2137,15 +2117,14 @@ parse_htmlblock(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t
return 0;
}
- /* looking for an unindented matching closing tag */
- /* followed by a blank line */
- tag_end = htmlblock_end(curtag, doc, data, size, 1);
+ /* looking for a matching closing tag in strict mode */
+ size_t tag_len = strlen(curtag);
+ tag_end = htmlblock_find_end_strict(curtag, tag_len, doc, data, size);
/* if not found, trying a second pass looking for indented match */
/* but not if tag is "ins" or "del" (following original Markdown.pl) */
- if (!tag_end && strcmp(curtag, "ins") != 0 && strcmp(curtag, "del") != 0) {
- tag_end = htmlblock_end(curtag, doc, data, size, 0);
- }
+ if (!tag_end && strcmp(curtag, "ins") != 0 && strcmp(curtag, "del") != 0)
+ tag_end = htmlblock_find_end(curtag, tag_len, doc, data, size);
if (!tag_end)
return 0;