diff options
author | Brian Anderson <andersrb@gmail.com> | 2015-09-21 12:41:18 -0700 |
---|---|---|
committer | Brian Anderson <andersrb@gmail.com> | 2015-09-21 12:41:18 -0700 |
commit | 49c64b3cb5999cad0118fa5cb0cc60f5fcc70493 (patch) | |
tree | dd136dcea4700b26582bcd950ccacf1f19c2dd86 /src/escape.c | |
parent | 794d91b37020242446c53713c7ce0c09b33b5f0c (diff) | |
parent | ca4609d56a77378ef9cf6dd3caff3aec006ecce9 (diff) | |
download | rust-hoedown-49c64b3cb5999cad0118fa5cb0cc60f5fcc70493.tar.gz |
Upgrade hoedown to 3.0.5.
Diffstat (limited to 'src/escape.c')
-rw-r--r-- | src/escape.c | 143 |
1 files changed, 70 insertions, 73 deletions
diff --git a/src/escape.c b/src/escape.c index 749eb75..122c6ec 100644 --- a/src/escape.c +++ b/src/escape.c @@ -4,6 +4,11 @@ #include <stdio.h> #include <string.h> + +#define likely(x) __builtin_expect((x),1) +#define unlikely(x) __builtin_expect((x),0) + + /* * The following characters will not be escaped: * @@ -15,9 +20,9 @@ * - The characters which are *not* safe to be in * an URL because they are RESERVED characters. * - * We asume (lazily) that any RESERVED char that + * We assume (lazily) that any RESERVED char that * appears inside an URL is actually meant to - * have its native function (i.e. as an URL + * have its native function (i.e. as an URL * component/separator) and hence needs no escaping. * * There are two exceptions: the chacters & (amp) @@ -30,58 +35,55 @@ * */ static const uint8_t HREF_SAFE[UINT8_MAX+1] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; void -hoedown_escape_href(hoedown_buffer *ob, const uint8_t *src, size_t size) +hoedown_escape_href(hoedown_buffer *ob, const uint8_t *data, size_t size) { static const char hex_chars[] = "0123456789ABCDEF"; - size_t i = 0, org; + size_t i = 0, mark; char hex_str[3]; hex_str[0] = '%'; while (i < size) { - org = i; - while (i < size && HREF_SAFE[src[i]] != 0) - i++; + mark = i; + while (i < size && HREF_SAFE[data[i]]) i++; - if (i > org) { - if (org == 0) { - if (i >= size) { - hoedown_buffer_put(ob, src, size); - return; - } - - } + /* Optimization for cases where there's nothing to escape */ + if (mark == 0 && i >= size) { + hoedown_buffer_put(ob, data, size); + return; + } - hoedown_buffer_put(ob, src + org, i - org); + if (likely(i > mark)) { + hoedown_buffer_put(ob, data + mark, i - mark); } /* escaping */ if (i >= size) break; - switch (src[i]) { + switch (data[i]) { /* amp appears all the time in URLs, but needs * HTML-entity escaping to be inside an href */ - case '&': + case '&': HOEDOWN_BUFPUTSL(ob, "&"); break; @@ -91,7 +93,7 @@ hoedown_escape_href(hoedown_buffer *ob, const uint8_t *src, size_t size) case '\'': HOEDOWN_BUFPUTSL(ob, "'"); break; - + /* the space can be escaped to %20 or a plus * sign. we're going with the generic escape * for now. the plus thing is more commonly seen @@ -104,15 +106,16 @@ hoedown_escape_href(hoedown_buffer *ob, const uint8_t *src, size_t size) /* every other character goes with a %XX escaping */ default: - hex_str[1] = hex_chars[(src[i] >> 4) & 0xF]; - hex_str[2] = hex_chars[src[i] & 0xF]; - hoedown_buffer_put(ob, hex_str, 3); + hex_str[1] = hex_chars[(data[i] >> 4) & 0xF]; + hex_str[2] = hex_chars[data[i] & 0xF]; + hoedown_buffer_put(ob, (uint8_t *)hex_str, 3); } i++; } } + /** * According to the OWASP rules: * @@ -125,21 +128,21 @@ hoedown_escape_href(hoedown_buffer *ob, const uint8_t *src, size_t size) * */ static const uint8_t HTML_ESCAPE_TABLE[UINT8_MAX+1] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; @@ -154,36 +157,30 @@ static const char *HTML_ESCAPES[] = { }; void -hoedown_escape_html(hoedown_buffer *ob, const uint8_t *src, size_t size, int secure) +hoedown_escape_html(hoedown_buffer *ob, const uint8_t *data, size_t size, int secure) { - size_t i = 0, org, esc = 0; - - while (i < size) { - org = i; - while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0) - i++; - - if (i > org) { - if (org == 0) { - if (i >= size) { - hoedown_buffer_put(ob, src, size); - return; - } + size_t i = 0, mark; - } + while (1) { + mark = i; + while (i < size && HTML_ESCAPE_TABLE[data[i]] == 0) i++; - hoedown_buffer_put(ob, src + org, i - org); + /* Optimization for cases where there's nothing to escape */ + if (mark == 0 && i >= size) { + hoedown_buffer_put(ob, data, size); + return; } - /* escaping */ - if (i >= size) - break; + if (likely(i > mark)) + hoedown_buffer_put(ob, data + mark, i - mark); + + if (i >= size) break; /* The forward slash is only escaped in secure mode */ - if (src[i] == '/' && !secure) { + if (!secure && data[i] == '/') { hoedown_buffer_putc(ob, '/'); } else { - hoedown_buffer_puts(ob, HTML_ESCAPES[esc]); + hoedown_buffer_puts(ob, HTML_ESCAPES[HTML_ESCAPE_TABLE[data[i]]]); } i++; |