summaryrefslogtreecommitdiff
path: root/src/escape.c
diff options
context:
space:
mode:
authorBrian Anderson <andersrb@gmail.com>2015-09-21 12:41:18 -0700
committerBrian Anderson <andersrb@gmail.com>2015-09-21 12:41:18 -0700
commit49c64b3cb5999cad0118fa5cb0cc60f5fcc70493 (patch)
treedd136dcea4700b26582bcd950ccacf1f19c2dd86 /src/escape.c
parent794d91b37020242446c53713c7ce0c09b33b5f0c (diff)
parentca4609d56a77378ef9cf6dd3caff3aec006ecce9 (diff)
downloadrust-hoedown-49c64b3cb5999cad0118fa5cb0cc60f5fcc70493.tar.gz
Merge pull request #4 from hoedown/masterHEADmaster
Upgrade hoedown to 3.0.5.
Diffstat (limited to 'src/escape.c')
-rw-r--r--src/escape.c143
1 files changed, 70 insertions, 73 deletions
diff --git a/src/escape.c b/src/escape.c
index 749eb75..122c6ec 100644
--- a/src/escape.c
+++ b/src/escape.c
@@ -4,6 +4,11 @@
#include <stdio.h>
#include <string.h>
+
+#define likely(x) __builtin_expect((x),1)
+#define unlikely(x) __builtin_expect((x),0)
+
+
/*
* The following characters will not be escaped:
*
@@ -15,9 +20,9 @@
* - The characters which are *not* safe to be in
* an URL because they are RESERVED characters.
*
- * We asume (lazily) that any RESERVED char that
+ * We assume (lazily) that any RESERVED char that
* appears inside an URL is actually meant to
- * have its native function (i.e. as an URL
+ * have its native function (i.e. as an URL
* component/separator) and hence needs no escaping.
*
* There are two exceptions: the chacters & (amp)
@@ -30,58 +35,55 @@
*
*/
static const uint8_t HREF_SAFE[UINT8_MAX+1] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
void
-hoedown_escape_href(hoedown_buffer *ob, const uint8_t *src, size_t size)
+hoedown_escape_href(hoedown_buffer *ob, const uint8_t *data, size_t size)
{
static const char hex_chars[] = "0123456789ABCDEF";
- size_t i = 0, org;
+ size_t i = 0, mark;
char hex_str[3];
hex_str[0] = '%';
while (i < size) {
- org = i;
- while (i < size && HREF_SAFE[src[i]] != 0)
- i++;
+ mark = i;
+ while (i < size && HREF_SAFE[data[i]]) i++;
- if (i > org) {
- if (org == 0) {
- if (i >= size) {
- hoedown_buffer_put(ob, src, size);
- return;
- }
-
- }
+ /* Optimization for cases where there's nothing to escape */
+ if (mark == 0 && i >= size) {
+ hoedown_buffer_put(ob, data, size);
+ return;
+ }
- hoedown_buffer_put(ob, src + org, i - org);
+ if (likely(i > mark)) {
+ hoedown_buffer_put(ob, data + mark, i - mark);
}
/* escaping */
if (i >= size)
break;
- switch (src[i]) {
+ switch (data[i]) {
/* amp appears all the time in URLs, but needs
* HTML-entity escaping to be inside an href */
- case '&':
+ case '&':
HOEDOWN_BUFPUTSL(ob, "&amp;");
break;
@@ -91,7 +93,7 @@ hoedown_escape_href(hoedown_buffer *ob, const uint8_t *src, size_t size)
case '\'':
HOEDOWN_BUFPUTSL(ob, "&#x27;");
break;
-
+
/* the space can be escaped to %20 or a plus
* sign. we're going with the generic escape
* for now. the plus thing is more commonly seen
@@ -104,15 +106,16 @@ hoedown_escape_href(hoedown_buffer *ob, const uint8_t *src, size_t size)
/* every other character goes with a %XX escaping */
default:
- hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
- hex_str[2] = hex_chars[src[i] & 0xF];
- hoedown_buffer_put(ob, hex_str, 3);
+ hex_str[1] = hex_chars[(data[i] >> 4) & 0xF];
+ hex_str[2] = hex_chars[data[i] & 0xF];
+ hoedown_buffer_put(ob, (uint8_t *)hex_str, 3);
}
i++;
}
}
+
/**
* According to the OWASP rules:
*
@@ -125,21 +128,21 @@ hoedown_escape_href(hoedown_buffer *ob, const uint8_t *src, size_t size)
*
*/
static const uint8_t HTML_ESCAPE_TABLE[UINT8_MAX+1] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
@@ -154,36 +157,30 @@ static const char *HTML_ESCAPES[] = {
};
void
-hoedown_escape_html(hoedown_buffer *ob, const uint8_t *src, size_t size, int secure)
+hoedown_escape_html(hoedown_buffer *ob, const uint8_t *data, size_t size, int secure)
{
- size_t i = 0, org, esc = 0;
-
- while (i < size) {
- org = i;
- while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
- i++;
-
- if (i > org) {
- if (org == 0) {
- if (i >= size) {
- hoedown_buffer_put(ob, src, size);
- return;
- }
+ size_t i = 0, mark;
- }
+ while (1) {
+ mark = i;
+ while (i < size && HTML_ESCAPE_TABLE[data[i]] == 0) i++;
- hoedown_buffer_put(ob, src + org, i - org);
+ /* Optimization for cases where there's nothing to escape */
+ if (mark == 0 && i >= size) {
+ hoedown_buffer_put(ob, data, size);
+ return;
}
- /* escaping */
- if (i >= size)
- break;
+ if (likely(i > mark))
+ hoedown_buffer_put(ob, data + mark, i - mark);
+
+ if (i >= size) break;
/* The forward slash is only escaped in secure mode */
- if (src[i] == '/' && !secure) {
+ if (!secure && data[i] == '/') {
hoedown_buffer_putc(ob, '/');
} else {
- hoedown_buffer_puts(ob, HTML_ESCAPES[esc]);
+ hoedown_buffer_puts(ob, HTML_ESCAPES[HTML_ESCAPE_TABLE[data[i]]]);
}
i++;