diff options
Diffstat (limited to 'ext/standard/html.c')
-rw-r--r-- | ext/standard/html.c | 68 |
1 files changed, 46 insertions, 22 deletions
diff --git a/ext/standard/html.c b/ext/standard/html.c index 8845a8aa8c..7932194cb8 100644 --- a/ext/standard/html.c +++ b/ext/standard/html.c @@ -483,11 +483,26 @@ struct basic_entities_dec { } \ mbseq[mbpos++] = (mbchar); } +/* skip one byte and return */ +#define MB_FAILURE(pos) do { \ + *newpos = pos + 1; \ + *status = FAILURE; \ + return 0; \ +} while (0) + #define CHECK_LEN(pos, chars_need) \ - if((str_len - (pos)) < chars_need) { \ - *newpos = pos; \ - *status = FAILURE; \ - return 0; \ + if (chars_need < 1) { \ + if((str_len - (pos)) < chars_need) { \ + *newpos = pos; \ + *status = FAILURE; \ + return 0; \ + } \ + } else { \ + if((str_len - (pos)) < chars_need) { \ + *newpos = pos + 1; \ + *status = FAILURE; \ + return 0; \ + } \ } /* {{{ get_next_char @@ -526,10 +541,12 @@ inline static unsigned int get_next_char(enum entity_charset charset, this_char = c; pos++; } else if (c < 0xc0) { - *status = FAILURE; - return 0; + MB_FAILURE(pos); } else if (c < 0xe0) { CHECK_LEN(pos, 2); + if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) { + MB_FAILURE(pos); + } this_char = ((c & 0x1f) << 6) | (str[pos + 1] & 0x3f); if (this_char < 0x80) { *status = FAILURE; @@ -540,10 +557,15 @@ inline static unsigned int get_next_char(enum entity_charset charset, pos += 2; } else if (c < 0xf0) { CHECK_LEN(pos, 3); + if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) { + MB_FAILURE(pos); + } + if (str[pos + 2] < 0x80 || str[pos + 2] > 0xbf) { + MB_FAILURE(pos); + } this_char = ((c & 0x0f) << 12) | ((str[pos + 1] & 0x3f) << 6) | (str[pos + 2] & 0x3f); if (this_char < 0x800) { - *status = FAILURE; - return 0; + MB_FAILURE(pos); } MB_WRITE((unsigned char)c); MB_WRITE((unsigned char)str[pos + 1]); @@ -551,10 +573,18 @@ inline static unsigned int get_next_char(enum entity_charset charset, pos += 3; } else if (c < 0xf8) { CHECK_LEN(pos, 4); + if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) { + MB_FAILURE(pos); + } + if (str[pos + 2] < 0x80 || str[pos + 2] > 0xbf) { + MB_FAILURE(pos); + } + if (str[pos + 3] < 0x80 || str[pos + 3] > 0xbf) { + MB_FAILURE(pos); + } this_char = ((c & 0x07) << 18) | ((str[pos + 1] & 0x3f) << 12) | ((str[pos + 2] & 0x3f) << 6) | (str[pos + 3] & 0x3f); if (this_char < 0x10000) { - *status = FAILURE; - return 0; + MB_FAILURE(pos); } MB_WRITE((unsigned char)c); MB_WRITE((unsigned char)str[pos + 1]); @@ -562,8 +592,7 @@ inline static unsigned int get_next_char(enum entity_charset charset, MB_WRITE((unsigned char)str[pos + 3]); pos += 4; } else { - *status = FAILURE; - return 0; + MB_FAILURE(pos); } } break; @@ -585,8 +614,7 @@ inline static unsigned int get_next_char(enum entity_charset charset, MB_WRITE(next_char); this_char = (this_char << 8) | next_char; } else { - *status = FAILURE; - return 0; + MB_FAILURE(pos); } } else { MB_WRITE(this_char); @@ -611,8 +639,7 @@ inline static unsigned int get_next_char(enum entity_charset charset, MB_WRITE(next_char); this_char = (this_char << 8) | next_char; } else { - *status = FAILURE; - return 0; + MB_FAILURE(pos); } } else { MB_WRITE(this_char); @@ -634,8 +661,7 @@ inline static unsigned int get_next_char(enum entity_charset charset, MB_WRITE(next_char); this_char = (this_char << 8) | next_char; } else { - *status = FAILURE; - return 0; + MB_FAILURE(pos); } } else if (this_char == 0x8e) { /* peek at the next char */ @@ -647,8 +673,7 @@ inline static unsigned int get_next_char(enum entity_charset charset, MB_WRITE(next_char); this_char = (this_char << 8) | next_char; } else { - *status = FAILURE; - return 0; + MB_FAILURE(pos); } } else if (this_char == 0x8f) { /* peek at the next two char */ @@ -665,8 +690,7 @@ inline static unsigned int get_next_char(enum entity_charset charset, MB_WRITE(next2_char); this_char = (this_char << 16) | (next_char << 8) | next_char; } else { - *status = FAILURE; - return 0; + MB_FAILURE(pos); } } else { MB_WRITE(this_char); |