summaryrefslogtreecommitdiff
path: root/src/document.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/document.c')
-rw-r--r--src/document.c398
1 files changed, 214 insertions, 184 deletions
diff --git a/src/document.c b/src/document.c
index acf2572..ee0102f 100644
--- a/src/document.c
+++ b/src/document.c
@@ -1,5 +1,3 @@
-/* document.c - generic markdown parser */
-
#include "document.h"
#include <assert.h>
@@ -9,7 +7,9 @@
#include "stack.h"
-#ifdef _MSC_VER
+#ifndef _MSC_VER
+#include <strings.h>
+#else
#define strncasecmp _strnicmp
#endif
@@ -88,7 +88,7 @@ enum markdown_char_t {
MD_CHAR_LINK,
MD_CHAR_LANGLE,
MD_CHAR_ESCAPE,
- MD_CHAR_ENTITITY,
+ MD_CHAR_ENTITY,
MD_CHAR_AUTOLINK_URL,
MD_CHAR_AUTOLINK_EMAIL,
MD_CHAR_AUTOLINK_WWW,
@@ -114,16 +114,16 @@ static char_trigger markdown_char_ptrs[] = {
&char_math
};
-/* render • structure containing state for a parser instance */
struct hoedown_document {
hoedown_renderer md;
+ hoedown_renderer_data data;
struct link_ref *refs[REF_TABLE_SIZE];
struct footnote_list footnotes_found;
struct footnote_list footnotes_used;
uint8_t active_char[256];
hoedown_stack work_bufs[2];
- unsigned int ext_flags;
+ hoedown_extensions ext_flags;
size_t max_nesting;
int in_link_body;
};
@@ -132,7 +132,7 @@ struct hoedown_document {
* HELPER FUNCTIONS *
***************************/
-static inline hoedown_buffer *
+static hoedown_buffer *
newbuf(hoedown_document *doc, int type)
{
static const size_t buf_size[2] = {256, 64};
@@ -151,7 +151,7 @@ newbuf(hoedown_document *doc, int type)
return work;
}
-static inline void
+static void
popbuf(hoedown_document *doc, int type)
{
doc->work_bufs[type].size--;
@@ -194,10 +194,7 @@ add_link_ref(
struct link_ref **references,
const uint8_t *name, size_t name_size)
{
- struct link_ref *ref = calloc(1, sizeof(struct link_ref));
-
- if (!ref)
- return NULL;
+ struct link_ref *ref = hoedown_calloc(1, sizeof(struct link_ref));
ref->id = hash_link_ref(name, name_size);
ref->next = references[ref->id % REF_TABLE_SIZE];
@@ -246,9 +243,7 @@ free_link_refs(struct link_ref **references)
static struct footnote_ref *
create_footnote_ref(struct footnote_list *list, const uint8_t *name, size_t name_size)
{
- struct footnote_ref *ref = calloc(1, sizeof(struct footnote_ref));
- if (!ref)
- return NULL;
+ struct footnote_ref *ref = hoedown_calloc(1, sizeof(struct footnote_ref));
ref->id = hash_link_ref(name, name_size);
@@ -258,7 +253,7 @@ create_footnote_ref(struct footnote_list *list, const uint8_t *name, size_t name
static int
add_footnote_ref(struct footnote_list *list, struct footnote_ref *ref)
{
- struct footnote_item *item = calloc(1, sizeof(struct footnote_item));
+ struct footnote_item *item = hoedown_calloc(1, sizeof(struct footnote_item));
if (!item)
return 0;
item->ref = ref;
@@ -325,14 +320,14 @@ free_footnote_list(struct footnote_list *list, int free_refs)
* should instead extract an Unicode codepoint from
* this character and check for space properties.
*/
-static inline int
+static int
_isspace(int c)
{
return c == ' ' || c == '\n';
}
/* is_empty_all: verify that all the data is spacing */
-static inline int
+static int
is_empty_all(const uint8_t *data, size_t size)
{
size_t i = 0;
@@ -344,7 +339,7 @@ is_empty_all(const uint8_t *data, size_t size)
* Replace all spacing characters in data with spaces. As a special
* case, this collapses a newline with the previous space, if possible.
*/
-static inline void
+static void
replace_spacing(hoedown_buffer *ob, const uint8_t *data, size_t size)
{
size_t i = 0, mark;
@@ -400,7 +395,7 @@ is_mail_autolink(uint8_t *data, size_t size)
/* tag_length • returns the length of the given tag, or 0 is it's not valid */
static size_t
-tag_length(uint8_t *data, size_t size, enum hoedown_autolink *autolink)
+tag_length(uint8_t *data, size_t size, hoedown_autolink_type *autolink)
{
size_t i, j;
@@ -454,7 +449,7 @@ tag_length(uint8_t *data, size_t size, enum hoedown_autolink *autolink)
*autolink = HOEDOWN_AUTOLINK_NONE;
}
- /* looking for sometinhg looking like a tag end */
+ /* looking for something looking like a tag end */
while (i < size && data[i] != '>') i++;
if (i >= size) return 0;
return i + 1;
@@ -464,9 +459,9 @@ tag_length(uint8_t *data, size_t size, enum hoedown_autolink *autolink)
static void
parse_inline(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size)
{
- size_t i = 0, end = 0;
- uint8_t action = 0;
+ size_t i = 0, end = 0, consumed = 0;
hoedown_buffer work = { 0, 0, 0, 0, NULL, NULL, NULL };
+ uint8_t *active_char = doc->active_char;
if (doc->work_bufs[BUFFER_SPAN].size +
doc->work_bufs[BUFFER_BLOCK].size > doc->max_nesting)
@@ -474,14 +469,13 @@ parse_inline(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t si
while (i < size) {
/* copying inactive chars into the output */
- while (end < size && (action = doc->active_char[data[end]]) == 0) {
+ while (end < size && active_char[data[end]] == 0)
end++;
- }
if (doc->md.normal_text) {
work.data = data + i;
work.size = end - i;
- doc->md.normal_text(ob, &work, doc->md.opaque);
+ doc->md.normal_text(ob, &work, &doc->data);
}
else
hoedown_buffer_put(ob, data + i, end - i);
@@ -489,12 +483,13 @@ parse_inline(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t si
if (end >= size) break;
i = end;
- end = markdown_char_ptrs[(int)action](ob, doc, data + i, i, size - i);
+ end = markdown_char_ptrs[ (int)active_char[data[end]] ](ob, doc, data + i, i - consumed, size - i);
if (!end) /* no action from the callback */
end = i + 1;
else {
i += end;
end = i;
+ consumed = i;
}
}
}
@@ -554,7 +549,7 @@ find_emph_char(uint8_t *data, size_t size, uint8_t c)
}
/* not a well-formed codespan; use found matching emph char */
- if (i >= size) return tmp_i;
+ if (bt < span_nb && i >= size) return tmp_i;
}
/* skipping a link */
else if (data[i] == '[') {
@@ -633,9 +628,9 @@ parse_emph1(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t siz
parse_inline(work, doc, data, i);
if (doc->ext_flags & HOEDOWN_EXT_UNDERLINE && c == '_')
- r = doc->md.underline(ob, work, doc->md.opaque);
+ r = doc->md.underline(ob, work, &doc->data);
else
- r = doc->md.emphasis(ob, work, doc->md.opaque);
+ r = doc->md.emphasis(ob, work, &doc->data);
popbuf(doc, BUFFER_SPAN);
return r ? i + 1 : 0;
@@ -663,11 +658,11 @@ parse_emph2(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t siz
parse_inline(work, doc, data, i);
if (c == '~')
- r = doc->md.strikethrough(ob, work, doc->md.opaque);
+ r = doc->md.strikethrough(ob, work, &doc->data);
else if (c == '=')
- r = doc->md.highlight(ob, work, doc->md.opaque);
+ r = doc->md.highlight(ob, work, &doc->data);
else
- r = doc->md.double_emphasis(ob, work, doc->md.opaque);
+ r = doc->md.double_emphasis(ob, work, &doc->data);
popbuf(doc, BUFFER_SPAN);
return r ? i + 2 : 0;
@@ -699,7 +694,7 @@ parse_emph3(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t siz
hoedown_buffer *work = newbuf(doc, BUFFER_SPAN);
parse_inline(work, doc, data, i);
- r = doc->md.triple_emphasis(ob, work, doc->md.opaque);
+ r = doc->md.triple_emphasis(ob, work, &doc->data);
popbuf(doc, BUFFER_SPAN);
return r ? i + 3 : 0;
@@ -723,36 +718,41 @@ parse_emph3(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t siz
static size_t
parse_math(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size, const char *end, size_t delimsz, int displaymode)
{
+ hoedown_buffer text = { NULL, 0, 0, 0, NULL, NULL, NULL };
size_t i = delimsz;
- if (!doc->md.math) return 0;
+
+ if (!doc->md.math)
+ return 0;
/* find ending delimiter */
while (1) {
- while (i < size && data[i] != (uint8_t)end[0]) i++;
- if (i >= size) return 0;
+ while (i < size && data[i] != (uint8_t)end[0])
+ i++;
+
+ if (i >= size)
+ return 0;
if (!is_escaped(data, i) && !(i + delimsz > size)
&& memcmp(data + i, end, delimsz) == 0)
break;
+
i++;
}
/* prepare buffers */
- hoedown_buffer text = { data + delimsz, i - delimsz, 0, 0, NULL, NULL, NULL };
-
- /* enforce spacing around the span */
- i += delimsz;
- if (offset && !_isspace(data[-1])) return 0;
- if (i < size && !_isspace(data[i])) return 0;
+ text.data = data + delimsz;
+ text.size = i - delimsz;
/* if this is a $$ and MATH_EXPLICIT is not active,
- * guess wether displaymode should be enabled from the context */
+ * guess whether displaymode should be enabled from the context */
+ i += delimsz;
if (delimsz == 2 && !(doc->ext_flags & HOEDOWN_EXT_MATH_EXPLICIT))
displaymode = is_empty_all(data - offset, offset) && is_empty_all(data + i, size - i);
/* call callback */
- if (doc->md.math(ob, &text, displaymode, doc->md.opaque))
+ if (doc->md.math(ob, &text, displaymode, &doc->data))
return i;
+
return 0;
}
@@ -806,7 +806,7 @@ char_linebreak(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t
while (ob->size && ob->data[ob->size - 1] == ' ')
ob->size--;
- return doc->md.linebreak(ob, doc->md.opaque) ? 1 : 0;
+ return doc->md.linebreak(ob, &doc->data) ? 1 : 0;
}
@@ -814,6 +814,7 @@ char_linebreak(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t
static size_t
char_codespan(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
{
+ hoedown_buffer work = { NULL, 0, 0, 0, NULL, NULL, NULL };
size_t end, nb = 0, i, f_begin, f_end;
/* counting the number of backticks in the delimiter */
@@ -841,11 +842,13 @@ char_codespan(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t o
/* real code span */
if (f_begin < f_end) {
- hoedown_buffer work = { data + f_begin, f_end - f_begin, 0, 0, NULL, NULL, NULL };
- if (!doc->md.codespan(ob, &work, doc->md.opaque))
+ work.data = data + f_begin;
+ work.size = f_end - f_begin;
+
+ if (!doc->md.codespan(ob, &work, &doc->data))
end = 0;
} else {
- if (!doc->md.codespan(ob, 0, doc->md.opaque))
+ if (!doc->md.codespan(ob, 0, &doc->data))
end = 0;
}
@@ -887,11 +890,11 @@ char_quote(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offs
hoedown_buffer *work = newbuf(doc, BUFFER_SPAN);
parse_inline(work, doc, data + f_begin, f_end - f_begin);
- if (!doc->md.quote(ob, work, doc->md.opaque))
+ if (!doc->md.quote(ob, work, &doc->data))
end = 0;
popbuf(doc, BUFFER_SPAN);
} else {
- if (!doc->md.quote(ob, 0, doc->md.opaque))
+ if (!doc->md.quote(ob, 0, &doc->data))
end = 0;
}
@@ -921,7 +924,7 @@ char_escape(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t off
if (doc->md.normal_text) {
work.data = data + 1;
work.size = 1;
- doc->md.normal_text(ob, &work, doc->md.opaque);
+ doc->md.normal_text(ob, &work, &doc->data);
}
else hoedown_buffer_putc(ob, data[1]);
} else if (size == 1) {
@@ -953,7 +956,7 @@ char_entity(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t off
if (doc->md.entity) {
work.data = data;
work.size = end;
- doc->md.entity(ob, &work, doc->md.opaque);
+ doc->md.entity(ob, &work, &doc->data);
}
else hoedown_buffer_put(ob, data, end);
@@ -964,22 +967,25 @@ char_entity(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t off
static size_t
char_langle_tag(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
{
- enum hoedown_autolink altype = HOEDOWN_AUTOLINK_NONE;
+ hoedown_buffer work = { NULL, 0, 0, 0, NULL, NULL, NULL };
+ hoedown_autolink_type altype = HOEDOWN_AUTOLINK_NONE;
size_t end = tag_length(data, size, &altype);
- hoedown_buffer work = { data, end, 0, 0, NULL, NULL, NULL };
int ret = 0;
+ work.data = data;
+ work.size = end;
+
if (end > 2) {
if (doc->md.autolink && altype != HOEDOWN_AUTOLINK_NONE) {
hoedown_buffer *u_link = newbuf(doc, BUFFER_SPAN);
work.data = data + 1;
work.size = end - 2;
unscape_text(u_link, &work);
- ret = doc->md.autolink(ob, u_link, altype, doc->md.opaque);
+ ret = doc->md.autolink(ob, u_link, altype, &doc->data);
popbuf(doc, BUFFER_SPAN);
}
- else if (doc->md.raw_html_tag)
- ret = doc->md.raw_html_tag(ob, &work, doc->md.opaque);
+ else if (doc->md.raw_html)
+ ret = doc->md.raw_html(ob, &work, &doc->data);
}
if (!ret) return 0;
@@ -1002,14 +1008,18 @@ char_autolink_www(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size
HOEDOWN_BUFPUTSL(link_url, "http://");
hoedown_buffer_put(link_url, link->data, link->size);
- ob->size -= rewind;
+ if (ob->size > rewind)
+ ob->size -= rewind;
+ else
+ ob->size = 0;
+
if (doc->md.normal_text) {
link_text = newbuf(doc, BUFFER_SPAN);
- doc->md.normal_text(link_text, link, doc->md.opaque);
- doc->md.link(ob, link_url, NULL, link_text, doc->md.opaque);
+ doc->md.normal_text(link_text, link, &doc->data);
+ doc->md.link(ob, link_text, link_url, NULL, &doc->data);
popbuf(doc, BUFFER_SPAN);
} else {
- doc->md.link(ob, link_url, NULL, link, doc->md.opaque);
+ doc->md.link(ob, link, link_url, NULL, &doc->data);
}
popbuf(doc, BUFFER_SPAN);
}
@@ -1030,8 +1040,12 @@ char_autolink_email(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, si
link = newbuf(doc, BUFFER_SPAN);
if ((link_len = hoedown_autolink__email(&rewind, link, data, offset, size, 0)) > 0) {
- ob->size -= rewind;
- doc->md.autolink(ob, link, HOEDOWN_AUTOLINK_EMAIL, doc->md.opaque);
+ if (ob->size > rewind)
+ ob->size -= rewind;
+ else
+ ob->size = 0;
+
+ doc->md.autolink(ob, link, HOEDOWN_AUTOLINK_EMAIL, &doc->data);
}
popbuf(doc, BUFFER_SPAN);
@@ -1050,8 +1064,12 @@ char_autolink_url(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size
link = newbuf(doc, BUFFER_SPAN);
if ((link_len = hoedown_autolink__url(&rewind, link, data, offset, size, 0)) > 0) {
- ob->size -= rewind;
- doc->md.autolink(ob, link, HOEDOWN_AUTOLINK_NORMAL, doc->md.opaque);
+ if (ob->size > rewind)
+ ob->size -= rewind;
+ else
+ ob->size = 0;
+
+ doc->md.autolink(ob, link, HOEDOWN_AUTOLINK_NORMAL, &doc->data);
}
popbuf(doc, BUFFER_SPAN);
@@ -1106,7 +1124,7 @@ char_link(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offse
/* render */
if (doc->md.footnote_ref)
- ret = doc->md.footnote_ref(ob, fr->num, doc->md.opaque);
+ ret = doc->md.footnote_ref(ob, fr->num, &doc->data);
}
goto cleanup;
@@ -1181,8 +1199,10 @@ char_link(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offse
link_e--;
/* remove optional angle brackets around the link */
- if (data[link_b] == '<') link_b++;
- if (data[link_e - 1] == '>') link_e--;
+ if (data[link_b] == '<' && data[link_e - 1] == '>') {
+ link_b++;
+ link_e--;
+ }
/* building escaped link and title */
if (link_e > link_b) {
@@ -1271,9 +1291,9 @@ char_link(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offse
if (ob->size && ob->data[ob->size - 1] == '!')
ob->size -= 1;
- ret = doc->md.image(ob, u_link, title, content, doc->md.opaque);
+ ret = doc->md.image(ob, u_link, title, content, &doc->data);
} else {
- ret = doc->md.link(ob, u_link, title, content, doc->md.opaque);
+ ret = doc->md.link(ob, content, u_link, title, &doc->data);
}
/* cleanup */
@@ -1312,7 +1332,7 @@ char_superscript(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_
sup = newbuf(doc, BUFFER_SPAN);
parse_inline(sup, doc, data + sup_start, sup_len - sup_start);
- doc->md.superscript(ob, sup, doc->md.opaque);
+ doc->md.superscript(ob, sup, &doc->data);
popbuf(doc, BUFFER_SPAN);
return (sup_start == 2) ? sup_len + 1 : sup_len;
@@ -1616,7 +1636,7 @@ parse_blockquote(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_
parse_block(out, doc, work_data, work_size);
if (doc->md.blockquote)
- doc->md.blockquote(ob, out, doc->md.opaque);
+ doc->md.blockquote(ob, out, &doc->data);
popbuf(doc, BUFFER_BLOCK);
return end;
}
@@ -1628,9 +1648,11 @@ parse_htmlblock(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t
static size_t
parse_paragraph(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size)
{
+ hoedown_buffer work = { NULL, 0, 0, 0, NULL, NULL, NULL };
size_t i = 0, end = 0;
int level = 0;
- hoedown_buffer work = { data, 0, 0, 0, NULL, NULL, NULL };
+
+ work.data = data;
while (i < size) {
for (end = i + 1; end < size && data[end - 1] != '\n'; end++) /* empty */;
@@ -1648,37 +1670,6 @@ parse_paragraph(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t
break;
}
- /*
- * Early termination of a paragraph with the same logic
- * as Markdown 1.0.0. If this logic is applied, the
- * Markdown 1.0.3 test suite won't pass cleanly
- *
- * :: If the first character in a new line is not a letter,
- * let's check to see if there's some kind of block starting
- * here
- */
- if ((doc->ext_flags & HOEDOWN_EXT_LAX_SPACING) && !isalnum(data[i])) {
- if (prefix_oli(data + i, size - i) ||
- prefix_uli(data + i, size - i)) {
- end = i;
- break;
- }
-
- /* see if an html block starts here */
- if (data[i] == '<' && doc->md.blockhtml &&
- parse_htmlblock(ob, doc, data + i, size - i, 0)) {
- end = i;
- break;
- }
-
- /* see if a code fence starts here */
- if ((doc->ext_flags & HOEDOWN_EXT_FENCED_CODE) != 0 &&
- is_codefence(data + i, size - i, NULL, NULL)) {
- end = i;
- break;
- }
- }
-
i = end;
}
@@ -1690,7 +1681,7 @@ parse_paragraph(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t
hoedown_buffer *tmp = newbuf(doc, BUFFER_BLOCK);
parse_inline(tmp, doc, work.data, work.size);
if (doc->md.paragraph)
- doc->md.paragraph(ob, tmp, doc->md.opaque);
+ doc->md.paragraph(ob, tmp, &doc->data);
popbuf(doc, BUFFER_BLOCK);
} else {
hoedown_buffer *header_work;
@@ -1712,7 +1703,7 @@ parse_paragraph(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t
parse_inline(tmp, doc, work.data, work.size);
if (doc->md.paragraph)
- doc->md.paragraph(ob, tmp, doc->md.opaque);
+ doc->md.paragraph(ob, tmp, &doc->data);
popbuf(doc, BUFFER_BLOCK);
work.data += beg;
@@ -1725,7 +1716,7 @@ parse_paragraph(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t
parse_inline(header_work, doc, work.data, work.size);
if (doc->md.header)
- doc->md.header(ob, header_work, (int)level, doc->md.opaque);
+ doc->md.header(ob, header_work, (int)level, &doc->data);
popbuf(doc, BUFFER_SPAN);
}
@@ -1737,23 +1728,27 @@ parse_paragraph(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t
static size_t
parse_fencedcode(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size)
{
+ hoedown_buffer text = { 0, 0, 0, 0, NULL, NULL, NULL };
+ hoedown_buffer lang = { 0, 0, 0, 0, NULL, NULL, NULL };
size_t i = 0, text_start, line_start;
size_t w, w2;
size_t width, width2;
uint8_t chr, chr2;
- hoedown_buffer text = { 0, 0, 0, 0, NULL, NULL, NULL };
- hoedown_buffer lang = { 0, 0, 0, 0, NULL, NULL, NULL };
- // parse codefence line
- while (i < size && data[i] != '\n') i++;
+ /* parse codefence line */
+ while (i < size && data[i] != '\n')
+ i++;
+
w = parse_codefence(data, i, &lang, &width, &chr);
- if (!w) return 0;
+ if (!w)
+ return 0;
- // search for end
+ /* search for end */
i++;
text_start = i;
while ((line_start = i) < size) {
- while (i < size && data[i] != '\n') i++;
+ while (i < size && data[i] != '\n')
+ i++;
w2 = is_codefence(data + line_start, i - line_start, &width2, &chr2);
if (w == w2 && width == width2 && chr == chr2 &&
@@ -1762,11 +1757,12 @@ parse_fencedcode(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_
i++;
}
+
text.data = data + text_start;
text.size = line_start - text_start;
if (doc->md.blockcode)
- doc->md.blockcode(ob, text.size ? &text : NULL, lang.size ? &lang : NULL, doc->md.opaque);
+ doc->md.blockcode(ob, text.size ? &text : NULL, lang.size ? &lang : NULL, &doc->data);
return i;
}
@@ -1806,7 +1802,7 @@ parse_blockcode(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t
hoedown_buffer_putc(work, '\n');
if (doc->md.blockcode)
- doc->md.blockcode(ob, work, NULL, doc->md.opaque);
+ doc->md.blockcode(ob, work, NULL, &doc->data);
popbuf(doc, BUFFER_BLOCK);
return beg;
@@ -1815,7 +1811,7 @@ parse_blockcode(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t
/* parse_listitem • parsing of a single list item */
/* assuming initial prefix is already removed */
static size_t
-parse_listitem(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, int *flags)
+parse_listitem(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, hoedown_list_flags *flags)
{
hoedown_buffer *work = 0, *inter = 0;
size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i;
@@ -1942,7 +1938,7 @@ parse_listitem(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t
/* render of li itself */
if (doc->md.listitem)
- doc->md.listitem(ob, inter, *flags, doc->md.opaque);
+ doc->md.listitem(ob, inter, *flags, &doc->data);
popbuf(doc, BUFFER_SPAN);
popbuf(doc, BUFFER_SPAN);
@@ -1952,7 +1948,7 @@ parse_listitem(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t
/* parse_list • parsing ordered or unordered list block */
static size_t
-parse_list(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, int flags)
+parse_list(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, hoedown_list_flags flags)
{
hoedown_buffer *work = 0;
size_t i = 0, j;
@@ -1968,7 +1964,7 @@ parse_list(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size
}
if (doc->md.list)
- doc->md.list(ob, work, flags, doc->md.opaque);
+ doc->md.list(ob, work, flags, &doc->data);
popbuf(doc, BUFFER_BLOCK);
return i;
}
@@ -2000,7 +1996,7 @@ parse_atxheader(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t
parse_inline(work, doc, data + i, end - i);
if (doc->md.header)
- doc->md.header(ob, work, (int)level, doc->md.opaque);
+ doc->md.header(ob, work, (int)level, &doc->data);
popbuf(doc, BUFFER_SPAN);
}
@@ -2018,7 +2014,7 @@ parse_footnote_def(hoedown_buffer *ob, hoedown_document *doc, unsigned int num,
parse_block(work, doc, data, size);
if (doc->md.footnote_def)
- doc->md.footnote_def(ob, work, num, doc->md.opaque);
+ doc->md.footnote_def(ob, work, num, &doc->data);
popbuf(doc, BUFFER_SPAN);
}
@@ -2043,14 +2039,14 @@ parse_footnote_list(hoedown_buffer *ob, hoedown_document *doc, struct footnote_l
}
if (doc->md.footnotes)
- doc->md.footnotes(ob, work, doc->md.opaque);
+ doc->md.footnotes(ob, work, &doc->data);
popbuf(doc, BUFFER_BLOCK);
}
/* htmlblock_is_end • check for end of HTML block : </tag>( *)\n */
/* returns tag length on match, 0 otherwise */
/* assumes data starts with "<" */
-static inline size_t
+static size_t
htmlblock_is_end(
const char *tag,
size_t tag_len,
@@ -2115,7 +2111,7 @@ htmlblock_find_end_strict(
while (i < size && data[i] != '\n') i++;
if (i < size) i++;
if (i == mark) return 0;
-
+
if (data[mark] == ' ' && mark > 0) continue;
mark += htmlblock_find_end(tag, tag_len, doc, data + mark, i - mark);
if (mark == i && (is_empty(data + i, size - i) || i >= size)) break;
@@ -2128,9 +2124,11 @@ htmlblock_find_end_strict(
static size_t
parse_htmlblock(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, int do_render)
{
- size_t i, j = 0, tag_end;
+ hoedown_buffer work = { NULL, 0, 0, 0, NULL, NULL, NULL };
+ size_t i, j = 0, tag_len, tag_end;
const char *curtag = NULL;
- hoedown_buffer work = { data, 0, 0, 0, NULL, NULL, NULL };
+
+ work.data = data;
/* identification of the opening tag */
if (size < 2 || data[0] != '<')
@@ -2161,7 +2159,7 @@ parse_htmlblock(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t
if (j) {
work.size = i + j;
if (do_render && doc->md.blockhtml)
- doc->md.blockhtml(ob, &work, doc->md.opaque);
+ doc->md.blockhtml(ob, &work, &doc->data);
return work.size;
}
}
@@ -2178,7 +2176,7 @@ parse_htmlblock(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t
if (j) {
work.size = i + j;
if (do_render && doc->md.blockhtml)
- doc->md.blockhtml(ob, &work, doc->md.opaque);
+ doc->md.blockhtml(ob, &work, &doc->data);
return work.size;
}
}
@@ -2189,7 +2187,7 @@ parse_htmlblock(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t
}
/* looking for a matching closing tag in strict mode */
- size_t tag_len = strlen(curtag);
+ tag_len = strlen(curtag);
tag_end = htmlblock_find_end_strict(curtag, tag_len, doc, data, size);
/* if not found, trying a second pass looking for indented match */
@@ -2203,7 +2201,7 @@ parse_htmlblock(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t
/* the end of the block has been found */
work.size = tag_end;
if (do_render && doc->md.blockhtml)
- doc->md.blockhtml(ob, &work, doc->md.opaque);
+ doc->md.blockhtml(ob, &work, &doc->data);
return tag_end;
}
@@ -2215,10 +2213,10 @@ parse_table_row(
uint8_t *data,
size_t size,
size_t columns,
- int *col_data,
- int header_flag)
+ hoedown_table_flags *col_data,
+ hoedown_table_flags header_flag)
{
- size_t i = 0, col;
+ size_t i = 0, col, len;
hoedown_buffer *row_work = 0;
if (!doc->md.table_cell || !doc->md.table_row)
@@ -2240,8 +2238,16 @@ parse_table_row(
cell_start = i;
- size_t len = find_emph_char(data + i, size - i, '|');
- i += len ? len : size - i;
+ len = find_emph_char(data + i, size - i, '|');
+
+ /* Two possibilities for len == 0:
+ 1) No more pipe char found in the current line.
+ 2) The next pipe is right after the current one, i.e. empty cell.
+ For case 1, we skip to the end of line; for case 2 we just continue.
+ */
+ if (len == 0 && i < size && data[i] != '|')
+ len = size - i;
+ i += len;
cell_end = i - 1;
@@ -2249,7 +2255,7 @@ parse_table_row(
cell_end--;
parse_inline(cell_work, doc, data + cell_start, 1 + cell_end - cell_start);
- doc->md.table_cell(row_work, cell_work, col_data[col] | header_flag, doc->md.opaque);
+ doc->md.table_cell(row_work, cell_work, col_data[col] | header_flag, &doc->data);
popbuf(doc, BUFFER_SPAN);
i++;
@@ -2257,10 +2263,10 @@ parse_table_row(
for (; col < columns; ++col) {
hoedown_buffer empty_cell = { 0, 0, 0, 0, NULL, NULL, NULL };
- doc->md.table_cell(row_work, &empty_cell, col_data[col] | header_flag, doc->md.opaque);
+ doc->md.table_cell(row_work, &empty_cell, col_data[col] | header_flag, &doc->data);
}
- doc->md.table_row(ob, row_work, doc->md.opaque);
+ doc->md.table_row(ob, row_work, &doc->data);
popbuf(doc, BUFFER_SPAN);
}
@@ -2272,7 +2278,7 @@ parse_table_header(
uint8_t *data,
size_t size,
size_t *columns,
- int **column_data)
+ hoedown_table_flags **column_data)
{
int pipes;
size_t i = 0, col, header_end, under_end;
@@ -2300,7 +2306,7 @@ parse_table_header(
return 0;
*columns = pipes + 1;
- *column_data = calloc(*columns, sizeof(int));
+ *column_data = hoedown_calloc(*columns, sizeof(hoedown_table_flags));
/* Parse the header underline */
i++;
@@ -2366,12 +2372,14 @@ parse_table(
{
size_t i;
+ hoedown_buffer *work = 0;
hoedown_buffer *header_work = 0;
hoedown_buffer *body_work = 0;
size_t columns;
- int *col_data = NULL;
+ hoedown_table_flags *col_data = NULL;
+ work = newbuf(doc, BUFFER_BLOCK);
header_work = newbuf(doc, BUFFER_SPAN);
body_work = newbuf(doc, BUFFER_BLOCK);
@@ -2405,13 +2413,20 @@ parse_table(
i++;
}
+ if (doc->md.table_header)
+ doc->md.table_header(work, header_work, &doc->data);
+
+ if (doc->md.table_body)
+ doc->md.table_body(work, body_work, &doc->data);
+
if (doc->md.table)
- doc->md.table(ob, header_work, body_work, doc->md.opaque);
+ doc->md.table(ob, work, &doc->data);
}
free(col_data);
popbuf(doc, BUFFER_SPAN);
popbuf(doc, BUFFER_BLOCK);
+ popbuf(doc, BUFFER_BLOCK);
return i;
}
@@ -2443,7 +2458,7 @@ parse_block(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t siz
else if (is_hrule(txt_data, end)) {
if (doc->md.hrule)
- doc->md.hrule(ob, doc->md.opaque);
+ doc->md.hrule(ob, &doc->data);
while (beg < size && data[beg] != '\n')
beg++;
@@ -2523,7 +2538,7 @@ is_footnote(const uint8_t *data, size_t beg, size_t end, size_t *last, struct fo
start = i;
- /* process lines similiar to a list item */
+ /* process lines similar to a list item */
while (i < end) {
while (i < end && data[i] != '\n' && data[i] != '\r') i++;
@@ -2560,7 +2575,7 @@ is_footnote(const uint8_t *data, size_t beg, size_t end, size_t *last, struct fo
hoedown_buffer_put(contents, data + start + ind, i - start - ind);
/* add carriage return */
if (i < end) {
- hoedown_buffer_put(contents, "\n", 1);
+ hoedown_buffer_putc(contents, '\n');
if (i < end && (data[i] == '\n' || data[i] == '\r')) {
i++;
if (i < end && data[i] == '\n' && data[i - 1] == '\r') i++;
@@ -2703,13 +2718,22 @@ is_ref(const uint8_t *data, size_t beg, size_t end, size_t *last, struct link_re
static void expand_tabs(hoedown_buffer *ob, const uint8_t *line, size_t size)
{
+ /* This code makes two assumptions:
+ * - Input is valid UTF-8. (Any byte with top two bits 10 is skipped,
+ * whether or not it is a valid UTF-8 continuation byte.)
+ * - Input contains no combining characters. (Combining characters
+ * should be skipped but are not.)
+ */
size_t i = 0, tab = 0;
while (i < size) {
size_t org = i;
while (i < size && line[i] != '\t') {
- i++; tab++;
+ /* ignore UTF-8 continuation bytes */
+ if ((line[i] & 0xc0) != 0x80)
+ tab++;
+ i++;
}
if (i > org)
@@ -2733,24 +2757,27 @@ static void expand_tabs(hoedown_buffer *ob, const uint8_t *line, size_t size)
hoedown_document *
hoedown_document_new(
const hoedown_renderer *renderer,
- unsigned int extensions,
+ hoedown_extensions extensions,
size_t max_nesting)
{
hoedown_document *doc = NULL;
assert(max_nesting > 0 && renderer);
- doc = malloc(sizeof(hoedown_document));
- if (!doc)
- return NULL;
-
+ doc = hoedown_malloc(sizeof(hoedown_document));
memcpy(&doc->md, renderer, sizeof(hoedown_renderer));
- hoedown_stack_new(&doc->work_bufs[BUFFER_BLOCK], 4);
- hoedown_stack_new(&doc->work_bufs[BUFFER_SPAN], 8);
+ doc->data.opaque = renderer->opaque;
+
+ hoedown_stack_init(&doc->work_bufs[BUFFER_BLOCK], 4);
+ hoedown_stack_init(&doc->work_bufs[BUFFER_SPAN], 8);
memset(doc->active_char, 0x0, 256);
+ if (extensions & HOEDOWN_EXT_UNDERLINE && doc->md.underline) {
+ doc->active_char['_'] = MD_CHAR_EMPHASIS;
+ }
+
if (doc->md.emphasis || doc->md.double_emphasis || doc->md.triple_emphasis) {
doc->active_char['*'] = MD_CHAR_EMPHASIS;
doc->active_char['_'] = MD_CHAR_EMPHASIS;
@@ -2766,12 +2793,12 @@ hoedown_document_new(
if (doc->md.linebreak)
doc->active_char['\n'] = MD_CHAR_LINEBREAK;
- if (doc->md.image || doc->md.link)
+ if (doc->md.image || doc->md.link || doc->md.footnotes || doc->md.footnote_ref)
doc->active_char['['] = MD_CHAR_LINK;
doc->active_char['<'] = MD_CHAR_LANGLE;
doc->active_char['\\'] = MD_CHAR_ESCAPE;
- doc->active_char['&'] = MD_CHAR_ENTITITY;
+ doc->active_char['&'] = MD_CHAR_ENTITY;
if (extensions & HOEDOWN_EXT_AUTOLINK) {
doc->active_char[':'] = MD_CHAR_AUTOLINK_URL;
@@ -2797,7 +2824,7 @@ hoedown_document_new(
}
void
-hoedown_document_render(hoedown_document *doc, hoedown_buffer *ob, const uint8_t *document, size_t doc_size)
+hoedown_document_render(hoedown_document *doc, hoedown_buffer *ob, const uint8_t *data, size_t size)
{
static const uint8_t UTF8_BOM[] = {0xEF, 0xBB, 0xBF};
@@ -2807,11 +2834,9 @@ hoedown_document_render(hoedown_document *doc, hoedown_buffer *ob, const uint8_t
int footnotes_enabled;
text = hoedown_buffer_new(64);
- if (!text)
- return;
/* Preallocate enough space for our buffer to avoid expanding while copying */
- hoedown_buffer_grow(text, doc_size);
+ hoedown_buffer_grow(text, size);
/* reset the references table */
memset(&doc->refs, 0x0, REF_TABLE_SIZE * sizeof(void *));
@@ -2829,26 +2854,26 @@ hoedown_document_render(hoedown_document *doc, hoedown_buffer *ob, const uint8_t
/* Skip a possible UTF-8 BOM, even though the Unicode standard
* discourages having these in UTF-8 documents */
- if (doc_size >= 3 && memcmp(document, UTF8_BOM, 3) == 0)
+ if (size >= 3 && memcmp(data, UTF8_BOM, 3) == 0)
beg += 3;
- while (beg < doc_size) /* iterating over lines */
- if (footnotes_enabled && is_footnote(document, beg, doc_size, &end, &doc->footnotes_found))
+ while (beg < size) /* iterating over lines */
+ if (footnotes_enabled && is_footnote(data, beg, size, &end, &doc->footnotes_found))
beg = end;
- else if (is_ref(document, beg, doc_size, &end, doc->refs))
+ else if (is_ref(data, beg, size, &end, doc->refs))
beg = end;
else { /* skipping to the next line */
end = beg;
- while (end < doc_size && document[end] != '\n' && document[end] != '\r')
+ while (end < size && data[end] != '\n' && data[end] != '\r')
end++;
/* adding the line body if present */
if (end > beg)
- expand_tabs(text, document + beg, end - beg);
+ expand_tabs(text, data + beg, end - beg);
- while (end < doc_size && (document[end] == '\n' || document[end] == '\r')) {
+ while (end < size && (data[end] == '\n' || data[end] == '\r')) {
/* add one \n per newline */
- if (document[end] == '\n' || (end + 1 < doc_size && document[end + 1] != '\n'))
+ if (data[end] == '\n' || (end + 1 < size && data[end + 1] != '\n'))
hoedown_buffer_putc(text, '\n');
end++;
}
@@ -2861,7 +2886,7 @@ hoedown_document_render(hoedown_document *doc, hoedown_buffer *ob, const uint8_t
/* second pass: actual rendering */
if (doc->md.doc_header)
- doc->md.doc_header(ob, doc->md.opaque);
+ doc->md.doc_header(ob, 0, &doc->data);
if (text->size) {
/* adding a final newline if not already present */
@@ -2876,7 +2901,7 @@ hoedown_document_render(hoedown_document *doc, hoedown_buffer *ob, const uint8_t
parse_footnote_list(ob, doc, &doc->footnotes_used);
if (doc->md.doc_footer)
- doc->md.doc_footer(ob, doc->md.opaque);
+ doc->md.doc_footer(ob, 0, &doc->data);
/* clean-up */
hoedown_buffer_free(text);
@@ -2891,40 +2916,45 @@ hoedown_document_render(hoedown_document *doc, hoedown_buffer *ob, const uint8_t
}
void
-hoedown_document_render_inline(hoedown_document *doc, hoedown_buffer *ob, const uint8_t *document, size_t doc_size)
+hoedown_document_render_inline(hoedown_document *doc, hoedown_buffer *ob, const uint8_t *data, size_t size)
{
size_t i = 0, mark;
hoedown_buffer *text = hoedown_buffer_new(64);
- if (!text)
- return;
/* reset the references table */
memset(&doc->refs, 0x0, REF_TABLE_SIZE * sizeof(void *));
- /* first pass: convert all spacing to spaces */
- hoedown_buffer_grow(text, doc_size);
+ /* first pass: expand tabs and process newlines */
+ hoedown_buffer_grow(text, size);
while (1) {
mark = i;
- while (i < doc_size && document[i] != '\n' && document[i] != '\r')
+ while (i < size && data[i] != '\n' && data[i] != '\r')
i++;
- expand_tabs(text, document + mark, i - mark);
+ expand_tabs(text, data + mark, i - mark);
- if (i >= doc_size)
+ if (i >= size)
break;
- while (i < doc_size && (document[i] == '\n' || document[i] == '\r')) {
+ while (i < size && (data[i] == '\n' || data[i] == '\r')) {
/* add one \n per newline */
- if (document[i] == '\n' || (i + 1 < doc_size && document[i + 1] != '\n'))
+ if (data[i] == '\n' || (i + 1 < size && data[i + 1] != '\n'))
hoedown_buffer_putc(text, '\n');
i++;
}
}
/* second pass: actual rendering */
- hoedown_buffer_grow(ob, doc_size + (doc_size >> 1));
+ hoedown_buffer_grow(ob, text->size + (text->size >> 1));
+
+ if (doc->md.doc_header)
+ doc->md.doc_header(ob, 1, &doc->data);
+
parse_inline(ob, doc, text->data, text->size);
+ if (doc->md.doc_footer)
+ doc->md.doc_footer(ob, 1, &doc->data);
+
/* clean-up */
hoedown_buffer_free(text);
@@ -2943,8 +2973,8 @@ hoedown_document_free(hoedown_document *doc)
for (i = 0; i < (size_t)doc->work_bufs[BUFFER_BLOCK].asize; ++i)
hoedown_buffer_free(doc->work_bufs[BUFFER_BLOCK].item[i]);
- hoedown_stack_free(&doc->work_bufs[BUFFER_SPAN]);
- hoedown_stack_free(&doc->work_bufs[BUFFER_BLOCK]);
+ hoedown_stack_uninit(&doc->work_bufs[BUFFER_SPAN]);
+ hoedown_stack_uninit(&doc->work_bufs[BUFFER_BLOCK]);
free(doc);
}