diff options
author | Moriyoshi Koizumi <moriyoshi@php.net> | 2003-10-02 06:57:21 +0000 |
---|---|---|
committer | Moriyoshi Koizumi <moriyoshi@php.net> | 2003-10-02 06:57:21 +0000 |
commit | 647408978e77c1d9bc0b8ea1f00a4973ea7d84ee (patch) | |
tree | 4d2b4bee3196567522952ac4e8b89ecf3a2cccbf | |
parent | ffb57ad555a68b31e8b701c97022f25cb0d2f7b9 (diff) | |
download | php-git-647408978e77c1d9bc0b8ea1f00a4973ea7d84ee.tar.gz |
Fixed bug #25707 (html_entity_decode over-decodes &lt;)
-rw-r--r-- | ext/standard/html.c | 90 | ||||
-rw-r--r-- | ext/standard/tests/strings/bug25707.phpt | 12 |
2 files changed, 64 insertions, 38 deletions
diff --git a/ext/standard/html.c b/ext/standard/html.c index 595563caf6..942d862c41 100644 --- a/ext/standard/html.c +++ b/ext/standard/html.c @@ -417,7 +417,6 @@ static const struct { int entitylen; int flags; } basic_entities[] = { - { '&', "&", 5, 0 }, { '"', """, 6, ENT_HTML_QUOTE_DOUBLE }, { '\'', "'", 6, ENT_HTML_QUOTE_SINGLE }, { '\'', "'", 5, ENT_HTML_QUOTE_SINGLE }, @@ -917,54 +916,69 @@ PHPAPI char *php_unescape_html_entities(unsigned char *old, int oldlen, int *new ret = replaced; } - /* replace numeric entities */ + /* replace numeric entities & "&" */ lim = ret + retlen; - for (p = ret, q = ret; p < lim; p++) { + for (p = ret, q = ret; p < lim;) { int code; - if (p < lim - 1 && p[0] == '&' && p[1] == '#') { - code = strtol(p + 2, &next, 10); - if (next != NULL && *next == ';') { - switch (charset) { - case cs_utf_8: - q += php_utf32_utf8(q, code); - break; + if (p[0] == '&') { + if (p + 2 < lim) { + if (p[1] == '#') { + code = strtol(p + 2, &next, 10); + if (next != NULL && *next == ';') { + switch (charset) { + case cs_utf_8: + q += php_utf32_utf8(q, code); + break; - case cs_8859_1: - case cs_8859_5: - case cs_8859_15: - if (0xa0 <= code && code <= 0xff) { - *(q++) = code; - } - break; + case cs_8859_1: + case cs_8859_5: + case cs_8859_15: + if (0xa0 <= code && code <= 0xff) { + *(q++) = code; + } + break; - case cs_cp1252: - case cs_cp1251: - case cs_cp866: - if (0x80 <= code && code <= 0xff) { - *(q++) = code; - } - break; + case cs_cp1252: + case cs_cp1251: + case cs_cp866: + if (0x80 <= code && code <= 0xff) { + *(q++) = code; + } + break; - case cs_big5: - case cs_gb2312: - case cs_big5hkscs: - case cs_sjis: - case cs_eucjp: - if (code <= 0x7f) { - *(q++) = code; - } - break; + case cs_big5: + case cs_gb2312: + case cs_big5hkscs: + case cs_sjis: + case cs_eucjp: + if (code <= 0x7f) { + *(q++) = code; + } + break; - default: - break; + default: + break; + } + p = next + 1; + } else { + *(q++) = *(p++); + *(q++) = *(p++); + } + } else if (p + 4 < lim && + p[1] == 'a' && p[2] == 'm' &&p[3] == 'p' && + p[4] == ';') { + *(q++) = '&'; + p += 5; + } else { + *(q++) = *(p++); + *(q++) = *(p++); } - p = next; } else { - *(q++) = *p; + *(q++) = *(p++); } } else { - *(q++) = *p; + *(q++) = *(p++); } } *q = '\0'; diff --git a/ext/standard/tests/strings/bug25707.phpt b/ext/standard/tests/strings/bug25707.phpt new file mode 100644 index 0000000000..4954fb1ae5 --- /dev/null +++ b/ext/standard/tests/strings/bug25707.phpt @@ -0,0 +1,12 @@ +--TEST-- +Bug #25707 (html_entity_decode over-decodes &lt;) +--FILE-- +<?php +var_dump(html_entity_decode("&lt;", ENT_COMPAT, 'ISO-8859-1')); +var_dump(html_entity_decode("&#38;", ENT_COMPAT, 'ISO-8859-1')); +var_dump(html_entity_decode("&#38;lt;", ENT_COMPAT, 'ISO-8859-1')); +?> +--EXPECT-- +string(4) "<" +string(5) "&" +string(8) "&lt;" |