diff options
author | Stanislav Malyshev <stas@php.net> | 2007-10-03 05:05:08 +0000 |
---|---|---|
committer | Stanislav Malyshev <stas@php.net> | 2007-10-03 05:05:08 +0000 |
commit | 6e1dfff1ed584d8f3d593ac910fe36f3aab4bf1c (patch) | |
tree | a972baf1150fc1327a59c64127dfebc765064e3e /ext/standard/html.c | |
parent | b380ff014a84a5cfc5a342058452b35552cc04e6 (diff) | |
download | php-git-6e1dfff1ed584d8f3d593ac910fe36f3aab4bf1c.tar.gz |
MFB do not accept partial multibyte sequences in html* functions
Diffstat (limited to 'ext/standard/html.c')
-rw-r--r-- | ext/standard/html.c | 54 |
1 files changed, 44 insertions, 10 deletions
diff --git a/ext/standard/html.c b/ext/standard/html.c index 5b102f3d82..0160cef571 100644 --- a/ext/standard/html.c +++ b/ext/standard/html.c @@ -484,18 +484,29 @@ struct basic_entities_dec { } \ mbseq[mbpos++] = (mbchar); } +#define CHECK_LEN(pos, chars_need) \ + if((str_len - (pos)) < chars_need) { \ + *status = FAILURE; \ + return 0; \ + } + /* {{{ get_next_char */ inline static unsigned short get_next_char(enum entity_charset charset, unsigned char * str, + int str_len, int * newpos, unsigned char * mbseq, - int * mbseqlen) + int * mbseqlen, + int *status) { int pos = *newpos; int mbpos = 0; int mbspace = *mbseqlen; unsigned short this_char = str[pos++]; + unsigned char next_char; + + *status = SUCCESS; if (mbspace <= 0) { *mbseqlen = 0; @@ -555,6 +566,7 @@ inline static unsigned short get_next_char(enum entity_charset charset, break; default: /* invalid */ + *status = FAILURE; more = 0; } } @@ -562,21 +574,27 @@ inline static unsigned short get_next_char(enum entity_charset charset, else if (this_char < 0xe0) { stat = 0x10; /* 2 byte */ utf = (this_char & 0x1f) << 6; + CHECK_LEN(pos, 1); } else if (this_char < 0xf0) { stat = 0x20; /* 3 byte */ utf = (this_char & 0xf) << 12; + CHECK_LEN(pos, 2); } else if (this_char < 0xf8) { stat = 0x30; /* 4 byte */ utf = (this_char & 0x7) << 18; + CHECK_LEN(pos, 3); } else if (this_char < 0xfc) { stat = 0x40; /* 5 byte */ utf = (this_char & 0x3) << 24; + CHECK_LEN(pos, 4); } else if (this_char < 0xfe) { stat = 0x50; /* 6 byte */ utf = (this_char & 0x1) << 30; + CHECK_LEN(pos, 5); } else { /* invalid; bail */ more = 0; + *status = FAILURE; break; } @@ -594,7 +612,8 @@ inline static unsigned short get_next_char(enum entity_charset charset, /* check if this is the first of a 2-byte sequence */ if (this_char >= 0xa1 && this_char <= 0xfe) { /* peek at the next char */ - unsigned char next_char = str[pos]; + CHECK_LEN(pos, 1); + next_char = str[pos]; if ((next_char >= 0x40 && next_char <= 0x7e) || (next_char >= 0xa1 && next_char <= 0xfe)) { /* yes, this a wide char */ @@ -614,7 +633,8 @@ inline static unsigned short get_next_char(enum entity_charset charset, (this_char >= 0xe0 && this_char <= 0xef) ) { /* peek at the next char */ - unsigned char next_char = str[pos]; + CHECK_LEN(pos, 1); + next_char = str[pos]; if ((next_char >= 0x40 && next_char <= 0x7e) || (next_char >= 0x80 && next_char <= 0xfc)) { @@ -633,7 +653,8 @@ inline static unsigned short get_next_char(enum entity_charset charset, /* check if this is the first of a multi-byte sequence */ if (this_char >= 0xa1 && this_char <= 0xfe) { /* peek at the next char */ - unsigned char next_char = str[pos]; + CHECK_LEN(pos, 1); + next_char = str[pos]; if (next_char >= 0xa1 && next_char <= 0xfe) { /* yes, this a jis kanji char */ this_char <<= 8; @@ -644,7 +665,8 @@ inline static unsigned short get_next_char(enum entity_charset charset, } else if (this_char == 0x8e) { /* peek at the next char */ - unsigned char next_char = str[pos]; + CHECK_LEN(pos, 1); + next_char = str[pos]; if (next_char >= 0xa1 && next_char <= 0xdf) { /* JIS X 0201 kana */ this_char <<= 8; @@ -655,8 +677,10 @@ inline static unsigned short get_next_char(enum entity_charset charset, } else if (this_char == 0x8f) { /* peek at the next two char */ - unsigned char next_char = str[pos]; - unsigned char next2_char = str[pos+1]; + unsigned char next2_char; + CHECK_LEN(pos, 2); + next_char = str[pos]; + next2_char = str[pos+1]; if ((next_char >= 0xa1 && next_char <= 0xfe) && (next2_char >= 0xa1 && next2_char <= 0xfe)) { /* JIS X 0212 hojo-kanji */ @@ -1098,13 +1122,23 @@ PHPAPI char *php_escape_html_entities_ex(unsigned char *old, int oldlen, int *ne maxlen = 128; replaced = emalloc (maxlen); len = 0; - + //Sleep(10*1000); i = 0; while (i < oldlen) { unsigned char mbsequence[16]; /* allow up to 15 characters in a multibyte sequence */ int mbseqlen = sizeof(mbsequence); - unsigned short this_char = get_next_char(charset, old, &i, mbsequence, &mbseqlen); - + int status = SUCCESS; + unsigned short this_char = get_next_char(charset, old, oldlen, &i, mbsequence, &mbseqlen, &status); + + if(status == FAILURE) { + /* invalid MB sequence */ + efree(replaced); + if(!PG(display_errors)) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid multibyte sequence in argument"); + } + *newlen = 0; + return STR_EMPTY_ALLOC(); + } matches_map = 0; if (len + 16 > maxlen) |