summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMoriyoshi Koizumi <moriyoshi@php.net>2003-10-02 06:57:21 +0000
committerMoriyoshi Koizumi <moriyoshi@php.net>2003-10-02 06:57:21 +0000
commit647408978e77c1d9bc0b8ea1f00a4973ea7d84ee (patch)
tree4d2b4bee3196567522952ac4e8b89ecf3a2cccbf
parentffb57ad555a68b31e8b701c97022f25cb0d2f7b9 (diff)
downloadphp-git-647408978e77c1d9bc0b8ea1f00a4973ea7d84ee.tar.gz
Fixed bug #25707 (html_entity_decode over-decodes &amp;lt;)
-rw-r--r--ext/standard/html.c90
-rw-r--r--ext/standard/tests/strings/bug25707.phpt12
2 files changed, 64 insertions, 38 deletions
diff --git a/ext/standard/html.c b/ext/standard/html.c
index 595563caf6..942d862c41 100644
--- a/ext/standard/html.c
+++ b/ext/standard/html.c
@@ -417,7 +417,6 @@ static const struct {
int entitylen;
int flags;
} basic_entities[] = {
- { '&', "&amp;", 5, 0 },
{ '"', "&quot;", 6, ENT_HTML_QUOTE_DOUBLE },
{ '\'', "&#039;", 6, ENT_HTML_QUOTE_SINGLE },
{ '\'', "&#39;", 5, ENT_HTML_QUOTE_SINGLE },
@@ -917,54 +916,69 @@ PHPAPI char *php_unescape_html_entities(unsigned char *old, int oldlen, int *new
ret = replaced;
}
- /* replace numeric entities */
+ /* replace numeric entities & "&amp;" */
lim = ret + retlen;
- for (p = ret, q = ret; p < lim; p++) {
+ for (p = ret, q = ret; p < lim;) {
int code;
- if (p < lim - 1 && p[0] == '&' && p[1] == '#') {
- code = strtol(p + 2, &next, 10);
- if (next != NULL && *next == ';') {
- switch (charset) {
- case cs_utf_8:
- q += php_utf32_utf8(q, code);
- break;
+ if (p[0] == '&') {
+ if (p + 2 < lim) {
+ if (p[1] == '#') {
+ code = strtol(p + 2, &next, 10);
+ if (next != NULL && *next == ';') {
+ switch (charset) {
+ case cs_utf_8:
+ q += php_utf32_utf8(q, code);
+ break;
- case cs_8859_1:
- case cs_8859_5:
- case cs_8859_15:
- if (0xa0 <= code && code <= 0xff) {
- *(q++) = code;
- }
- break;
+ case cs_8859_1:
+ case cs_8859_5:
+ case cs_8859_15:
+ if (0xa0 <= code && code <= 0xff) {
+ *(q++) = code;
+ }
+ break;
- case cs_cp1252:
- case cs_cp1251:
- case cs_cp866:
- if (0x80 <= code && code <= 0xff) {
- *(q++) = code;
- }
- break;
+ case cs_cp1252:
+ case cs_cp1251:
+ case cs_cp866:
+ if (0x80 <= code && code <= 0xff) {
+ *(q++) = code;
+ }
+ break;
- case cs_big5:
- case cs_gb2312:
- case cs_big5hkscs:
- case cs_sjis:
- case cs_eucjp:
- if (code <= 0x7f) {
- *(q++) = code;
- }
- break;
+ case cs_big5:
+ case cs_gb2312:
+ case cs_big5hkscs:
+ case cs_sjis:
+ case cs_eucjp:
+ if (code <= 0x7f) {
+ *(q++) = code;
+ }
+ break;
- default:
- break;
+ default:
+ break;
+ }
+ p = next + 1;
+ } else {
+ *(q++) = *(p++);
+ *(q++) = *(p++);
+ }
+ } else if (p + 4 < lim &&
+ p[1] == 'a' && p[2] == 'm' &&p[3] == 'p' &&
+ p[4] == ';') {
+ *(q++) = '&';
+ p += 5;
+ } else {
+ *(q++) = *(p++);
+ *(q++) = *(p++);
}
- p = next;
} else {
- *(q++) = *p;
+ *(q++) = *(p++);
}
} else {
- *(q++) = *p;
+ *(q++) = *(p++);
}
}
*q = '\0';
diff --git a/ext/standard/tests/strings/bug25707.phpt b/ext/standard/tests/strings/bug25707.phpt
new file mode 100644
index 0000000000..4954fb1ae5
--- /dev/null
+++ b/ext/standard/tests/strings/bug25707.phpt
@@ -0,0 +1,12 @@
+--TEST--
+Bug #25707 (html_entity_decode over-decodes &amp;lt;)
+--FILE--
+<?php
+var_dump(html_entity_decode("&amp;lt;", ENT_COMPAT, 'ISO-8859-1'));
+var_dump(html_entity_decode("&amp;#38;", ENT_COMPAT, 'ISO-8859-1'));
+var_dump(html_entity_decode("&amp;#38;lt;", ENT_COMPAT, 'ISO-8859-1'));
+?>
+--EXPECT--
+string(4) "&lt;"
+string(5) "&#38;"
+string(8) "&#38;lt;"