summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ext/mbstring/config.m42
-rw-r--r--ext/mbstring/html_entities.c291
-rw-r--r--ext/mbstring/mbfilter.c235
-rw-r--r--ext/mbstring/mbfilter.h12
-rw-r--r--ext/mbstring/tests/020.phpt33
5 files changed, 572 insertions, 1 deletions
diff --git a/ext/mbstring/config.m4 b/ext/mbstring/config.m4
index 60cfb3396d..a0656c11f3 100644
--- a/ext/mbstring/config.m4
+++ b/ext/mbstring/config.m4
@@ -30,7 +30,7 @@ if test "$PHP_MBSTRING" != "no"; then
AC_DEFINE(HAVE_MBSTR_KR,1,[whether to have korean support])
AC_DEFINE(HAVE_MBSTR_RU,1,[whether to have russian support])
fi
- PHP_NEW_EXTENSION(mbstring, mbfilter_ja.c mbfilter_cn.c mbfilter_tw.c mbfilter_kr.c mbfilter_ru.c mbfilter.c mbstring.c mbregex.c php_mbregex.c, $ext_shared)
+ PHP_NEW_EXTENSION(mbstring, mbfilter_ja.c mbfilter_cn.c mbfilter_tw.c mbfilter_kr.c mbfilter_ru.c mbfilter.c mbstring.c mbregex.c php_mbregex.c html_entities.c, $ext_shared)
else
PHP_MBSTR_ENC_TRANS=no
fi
diff --git a/ext/mbstring/html_entities.c b/ext/mbstring/html_entities.c
new file mode 100644
index 0000000000..482c001bfd
--- /dev/null
+++ b/ext/mbstring/html_entities.c
@@ -0,0 +1,291 @@
+/*
+ +----------------------------------------------------------------------+
+ | PHP Version 4 |
+ +----------------------------------------------------------------------+
+ | Copyright (c) 2001 The PHP Group |
+ +----------------------------------------------------------------------+
+ | This source file is subject to version 2.02 of the PHP license, |
+ | that is bundled with this package in the file LICENSE, and is |
+ | available at through the world-wide-web at |
+ | http://www.php.net/license/2_02.txt. |
+ | If you did not receive a copy of the PHP license and are unable to |
+ | obtain it through the world-wide-web, please send a note to |
+ | license@php.net so we can mail you a copy immediately. |
+ +----------------------------------------------------------------------+
+ | Author: Marcus Boerger <helly@php.net> |
+ +----------------------------------------------------------------------+
+ */
+
+/* $Id$ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "php.h"
+#include "php_globals.h"
+
+#include <stdlib.h>
+#include "mbfilter.h"
+
+const mbfl_html_entity mbfl_html_entity_list[] = { /*
+ {"quot", 34}, DO NOT CONVERT THESE AUTOMATICALLY
+ {"amp", 38},
+ {"lt", 60},
+ {"gt", 62}, */
+ {"nbsp", 160},
+ {"iexcl", 161},
+ {"cent", 162},
+ {"pound", 163},
+ {"curren", 164},
+ {"yen", 165},
+ {"brvbar", 166},
+ {"sect", 167},
+ {"uml", 168},
+ {"copy", 169},
+ {"ordf", 170},
+ {"laquo", 171},
+ {"not", 172},
+ {"shy", 173},
+ {"reg", 174},
+ {"macr", 175},
+ {"deg", 176},
+ {"plusmn", 177},
+ {"sup2", 178},
+ {"sup3", 179},
+ {"acute", 180},
+ {"micro", 181},
+ {"para", 182},
+ {"middot", 183},
+ {"cedil", 184},
+ {"sup1", 185},
+ {"ordm", 186},
+ {"raquo", 187},
+ {"frac14", 188},
+ {"frac12", 189},
+ {"frac34", 190},
+ {"iquest", 191},
+ {"Agrave", 192},
+ {"Aacute", 193},
+ {"Acirc", 194},
+ {"Atilde", 195},
+ {"Auml", 196},
+ {"Aring", 197},
+ {"AElig", 198},
+ {"Ccedil", 199},
+ {"Egrave", 200},
+ {"Eacute", 201},
+ {"Ecirc", 202},
+ {"Euml", 203},
+ {"Igrave", 204},
+ {"Iacute", 205},
+ {"Icirc", 206},
+ {"Iuml", 207},
+ {"ETH", 208},
+ {"Ntilde", 209},
+ {"Ograve", 210},
+ {"Oacute", 211},
+ {"Ocirc", 212},
+ {"Otilde", 213},
+ {"Ouml", 214},
+ {"times", 215},
+ {"Oslash", 216},
+ {"Ugrave", 217},
+ {"Uacute", 218},
+ {"Ucirc", 219},
+ {"Uuml", 220},
+ {"Yacute", 221},
+ {"THORN", 222},
+ {"szlig", 223},
+ {"agrave", 224},
+ {"aacute", 225},
+ {"acirc", 226},
+ {"atilde", 227},
+ {"auml", 228},
+ {"aring", 229},
+ {"aelig", 230},
+ {"ccedil", 231},
+ {"egrave", 232},
+ {"eacute", 233},
+ {"ecirc", 234},
+ {"euml", 235},
+ {"igrave", 236},
+ {"iacute", 237},
+ {"icirc", 238},
+ {"iuml", 239},
+ {"eth", 240},
+ {"ntilde", 241},
+ {"ograve", 242},
+ {"oacute", 243},
+ {"ocirc", 244},
+ {"otilde", 245},
+ {"ouml", 246},
+ {"divide", 247},
+ {"oslash", 248},
+ {"ugrave", 249},
+ {"uacute", 250},
+ {"ucirc", 251},
+ {"uuml", 252},
+ {"yacute", 253},
+ {"thorn", 254},
+ {"yuml", 255},
+ {"OElig", 338},
+ {"oelig", 339},
+ {"Scaron", 352},
+ {"scaron", 353},
+ {"Yuml", 376},
+ {"fnof", 402},
+ {"circ", 710},
+ {"tilde", 732},
+ {"Alpha", 913},
+ {"Beta", 914},
+ {"Gamma", 915},
+ {"Delta", 916},
+ {"Epsilon", 917},
+ {"Zeta", 918},
+ {"Eta", 919},
+ {"Theta", 920},
+ {"Iota", 921},
+ {"Kappa", 922},
+ {"Lambda", 923},
+ {"Mu", 924},
+ {"Nu", 925},
+ {"Xi", 926},
+ {"Omicron", 927},
+ {"Pi", 928},
+ {"Rho", 929},
+ {"Sigma", 931},
+ {"Tau", 932},
+ {"Upsilon", 933},
+ {"Phi", 934},
+ {"Chi", 935},
+ {"Psi", 936},
+ {"Omega", 937},
+ {"beta", 946},
+ {"gamma", 947},
+ {"delta", 948},
+ {"epsilon", 949},
+ {"zeta", 950},
+ {"eta", 951},
+ {"theta", 952},
+ {"iota", 953},
+ {"kappa", 954},
+ {"lambda", 955},
+ {"mu", 956},
+ {"nu", 957},
+ {"xi", 958},
+ {"omicron", 959},
+ {"pi", 960},
+ {"rho", 961},
+ {"sigmaf", 962},
+ {"sigma", 963},
+ {"tau", 964},
+ {"upsilon", 965},
+ {"phi", 966},
+ {"chi", 967},
+ {"psi", 968},
+ {"omega", 969},
+ {"thetasym", 977},
+ {"upsih", 978},
+ {"piv", 982},
+ {"ensp", 8194},
+ {"emsp", 8195},
+ {"thinsp", 8201},
+ {"zwnj", 8204},
+ {"zwj", 8205},
+ {"lrm", 8206},
+ {"rlm", 8207},
+ {"ndash", 8211},
+ {"mdash", 8212},
+ {"lsquo", 8216},
+ {"rsquo", 8217},
+ {"sbquo", 8218},
+ {"ldquo", 8220},
+ {"rdquo", 8221},
+ {"bdquo", 8222},
+ {"dagger", 8224},
+ {"Dagger", 8225},
+ {"bull", 8226},
+ {"hellip", 8230},
+ {"permil", 8240},
+ {"prime", 8242},
+ {"Prime", 8243},
+ {"lsaquo", 8249},
+ {"rsaquo", 8250},
+ {"oline", 8254},
+ {"frasl", 8260},
+ {"euro", 8364},
+ {"weierp", 8472},
+ {"image", 8465},
+ {"real", 8476},
+ {"trade", 8482},
+ {"alefsym", 8501},
+ {"larr", 8592},
+ {"uarr", 8593},
+ {"rarr", 8594},
+ {"darr", 8595},
+ {"harr", 8596},
+ {"crarr", 8629},
+ {"lArr", 8656},
+ {"uArr", 8657},
+ {"rArr", 8658},
+ {"dArr", 8659},
+ {"hArr", 8660},
+ {"forall", 8704},
+ {"part", 8706},
+ {"exist", 8707},
+ {"empty", 8709},
+ {"nabla", 8711},
+ {"isin", 8712},
+ {"notin", 8713},
+ {"ni", 8715},
+ {"prod", 8719},
+ {"sum", 8721},
+ {"minus", 8722},
+ {"lowast", 8727},
+ {"radic", 8730},
+ {"prop", 8733},
+ {"infin", 8734},
+ {"ang", 8736},
+ {"and", 8743},
+ {"or", 8744},
+ {"cap", 8745},
+ {"cup", 8746},
+ {"int", 8747},
+ {"there4", 8756},
+ {"sim", 8764},
+ {"cong", 8773},
+ {"asymp", 8776},
+ {"ne", 8800},
+ {"equiv", 8801},
+ {"le", 8804},
+ {"ge", 8805},
+ {"sub", 8834},
+ {"sup", 8835},
+ {"nsub", 8836},
+ {"sube", 8838},
+ {"supe", 8839},
+ {"oplus", 8853},
+ {"otimes", 8855},
+ {"perp", 8869},
+ {"sdot", 8901},
+ {"lceil", 8968},
+ {"rceil", 8969},
+ {"lfloor", 8970},
+ {"rfloor", 8971},
+ {"lang", 9001},
+ {"rang", 9002},
+ {"loz", 9674},
+ {"spades", 9824},
+ {"clubs", 9827},
+ {"hearts", 9829},
+ {"diams", 9830},
+ {NULL, -1} /* mark end of table */
+};
+
+/*
+ * Local variables:
+ * tab-width: 4
+ * c-basic-offset: 4
+ * End:
+ */
diff --git a/ext/mbstring/mbfilter.c b/ext/mbstring/mbfilter.c
index 041e4e03b9..87e645e835 100644
--- a/ext/mbstring/mbfilter.c
+++ b/ext/mbstring/mbfilter.c
@@ -397,6 +397,25 @@ static const unsigned char mblen_table_uhc[] = { /* 0x81-0xFE */
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
};
+static const unsigned char mblen_table_html[] = { /* 0x00, 0x80 - 0xFF, only valid for numeric entities */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6
+};
+
/* encoding structure */
static const char *mbfl_encoding_pass_aliases[] = {"none", NULL};
@@ -483,6 +502,17 @@ static const mbfl_encoding mbfl_encoding_uuencode = {
MBFL_ENCTYPE_SBCS
};
+static const char *mbfl_encoding_html_ent_aliases[] = {"HTML", "html", NULL};
+
+static const mbfl_encoding mbfl_encoding_html_ent = {
+ mbfl_no_encoding_html_ent,
+ "HTML-ENTITIES",
+ "html",
+ (const char *(*)[])&mbfl_encoding_html_ent_aliases,
+ NULL, /* mblen_table_html, Do not use table instead calulate length based on entities actually used */
+ MBFL_ENCTYPE_HTML_ENT
+};
+
static const char *mbfl_encoding_qprint_aliases[] = {"qprint", NULL};
static const mbfl_encoding mbfl_encoding_qprint = {
@@ -1025,6 +1055,7 @@ static const mbfl_encoding *mbfl_encoding_ptr_list[] = {
&mbfl_encoding_byte4le,
&mbfl_encoding_base64,
&mbfl_encoding_uuencode,
+ &mbfl_encoding_html_ent,
&mbfl_encoding_qprint,
&mbfl_encoding_7bit,
&mbfl_encoding_8bit,
@@ -1115,6 +1146,13 @@ static int mbfl_filt_conv_base64dec(int c, mbfl_convert_filter *filter TSRMLS_DC
static int mbfl_filt_conv_base64dec_flush(mbfl_convert_filter *filter TSRMLS_DC);
static int mbfl_filt_conv_uudec(int c, mbfl_convert_filter *filter TSRMLS_DC);
+static void mbfl_filt_conv_html_dec_ctor(mbfl_convert_filter *filter TSRMLS_DC);
+static void mbfl_filt_conv_html_dec_dtor(mbfl_convert_filter *filter TSRMLS_DC);
+static int mbfl_filt_conv_html_enc(int c, mbfl_convert_filter *filter TSRMLS_DC);
+static int mbfl_filt_conv_html_enc_flush(mbfl_convert_filter *filter TSRMLS_DC);
+static int mbfl_filt_conv_html_dec(int c, mbfl_convert_filter *filter TSRMLS_DC);
+static int mbfl_filt_conv_html_dec_flush(mbfl_convert_filter *filter TSRMLS_DC);
+
static int mbfl_filt_conv_qprintenc(int c, mbfl_convert_filter *filter TSRMLS_DC);
static int mbfl_filt_conv_qprintenc_flush(mbfl_convert_filter *filter TSRMLS_DC);
static int mbfl_filt_conv_qprintdec(int c, mbfl_convert_filter *filter TSRMLS_DC);
@@ -1307,6 +1345,22 @@ static const struct mbfl_convert_vtbl vtbl_uuencode_8bit = {
mbfl_filt_conv_uudec,
mbfl_filt_conv_common_flush };
+static const struct mbfl_convert_vtbl vtbl_wchar_html = {
+ mbfl_no_encoding_wchar,
+ mbfl_no_encoding_html_ent,
+ mbfl_filt_conv_common_ctor,
+ mbfl_filt_conv_common_dtor,
+ mbfl_filt_conv_html_enc,
+ mbfl_filt_conv_html_enc_flush };
+
+static const struct mbfl_convert_vtbl vtbl_html_wchar = {
+ mbfl_no_encoding_html_ent,
+ mbfl_no_encoding_wchar,
+ mbfl_filt_conv_html_dec_ctor,
+ mbfl_filt_conv_html_dec_dtor,
+ mbfl_filt_conv_html_dec,
+ mbfl_filt_conv_html_dec_flush };
+
static const struct mbfl_convert_vtbl vtbl_8bit_qprint = {
mbfl_no_encoding_8bit,
mbfl_no_encoding_qprint,
@@ -2185,6 +2239,8 @@ static const struct mbfl_convert_vtbl *mbfl_convert_filter_list[] = {
&vtbl_8bit_b64,
&vtbl_b64_8bit,
&vtbl_uuencode_8bit,
+ &vtbl_wchar_html,
+ &vtbl_html_wchar,
&vtbl_8bit_qprint,
&vtbl_qprint_8bit,
&vtbl_8bit_7bit,
@@ -3468,6 +3524,185 @@ mbfl_filt_conv_base64dec_flush(mbfl_convert_filter *filter TSRMLS_DC)
}
/*
+ * any => HTML
+ */
+static int
+mbfl_filt_conv_html_enc(int c, mbfl_convert_filter *filter TSRMLS_DC)
+{
+ int tmp[10];
+ int i = 0, p = 0, e;
+
+ if (c<256 && mblen_table_html[c]==1) {
+ CK((*filter->output_function)(c, filter->data TSRMLS_CC));
+ } else {
+ /*php_error(E_NOTICE, "%s() mbfl_filt_conv_html_enc(0x%08X = %d)", get_active_function_name(TSRMLS_C), c, c);*/
+ CK((*filter->output_function)('&', filter->data TSRMLS_CC));
+ while (1) {
+ e = mbfl_html_entity_list[i].code;
+ if (c < e || e == -1) {
+ break;
+ }
+ if (c == e) {
+ while(mbfl_html_entity_list[i].name[p]) {
+ CK((*filter->output_function)((int)mbfl_html_entity_list[i].name[p++], filter->data TSRMLS_CC));
+ }
+ break;
+ }
+ i++;
+ }
+ if (!p) {
+ CK((*filter->output_function)('#', filter->data TSRMLS_CC));
+ do {
+ tmp[i++] = '0'+c%10;
+ c /= 10;
+ } while (c);
+ do {
+ CK((*filter->output_function)(tmp[--i], filter->data TSRMLS_CC));
+ } while(i);
+ }
+ CK((*filter->output_function)(';', filter->data TSRMLS_CC));
+ }
+ return c;
+}
+
+static int
+mbfl_filt_conv_html_enc_flush(mbfl_convert_filter *filter TSRMLS_DC)
+{
+ filter->status = 0;
+ filter->cache = 0;
+ return 0;
+}
+
+/*
+ * HTML => any
+ */
+#define html_enc_buffer_size 16
+static const char html_entity_chars[] = "#0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
+
+static void
+mbfl_filt_conv_html_dec_ctor(mbfl_convert_filter *filter TSRMLS_DC)
+{
+ filter->status = 0;
+ filter->cache = (int)mbfl_malloc(html_enc_buffer_size);
+}
+
+static void
+mbfl_filt_conv_html_dec_dtor(mbfl_convert_filter *filter TSRMLS_DC)
+{
+ filter->status = 0;
+ if (filter->cache)
+ {
+ mbfl_free((void*)filter->cache);
+ }
+ filter->cache = 0;
+}
+
+static int
+mbfl_filt_conv_html_dec(int c, mbfl_convert_filter *filter TSRMLS_DC)
+{
+ int pos, ent = 0;
+ const mbfl_html_entity *entity;
+ char *buffer = (char*)filter->cache;
+
+ if (!filter->status)
+ {
+ if (c == '&' )
+ {
+ filter->status = 1;
+ buffer[0] = '&';
+ }
+ else
+ {
+ CK((*filter->output_function)(c, filter->data TSRMLS_CC));
+ }
+ }
+ else
+ {
+ if (c == ';')
+ {
+ buffer[filter->status] = 0;
+ if (buffer[1]=='#')
+ {
+ /* numeric entity */
+ for (pos=2; pos<filter->status; pos++)
+ ent = ent*10 + (buffer[pos] - '0');
+ CK((*filter->output_function)(ent, filter->data TSRMLS_CC));
+ filter->status = 0;
+ /*php_error(E_NOTICE,"%s() mbstring decoded '%s'=%d", get_active_function_name(TSRMLS_C), buffer, ent);*/
+ }
+ else
+ {
+ /* named entity */
+ entity = mbfl_html_entity_list;
+ while (entity->name)
+ {
+ if (!strcmp(buffer+1, entity->name))
+ {
+ ent = entity->code;
+ break;
+ }
+ entity++;
+ }
+ if (ent)
+ {
+ /* decoded */
+ CK((*filter->output_function)(ent, filter->data TSRMLS_CC));
+ filter->status = 0;
+ /*php_error(E_NOTICE,"%s() mbstring decoded '%s'=%d", get_active_function_name(TSRMLS_C), buffer, ent);*/
+ }
+ else
+ {
+ /* failure */
+ buffer[filter->status++] = ';';
+ buffer[filter->status] = 0;
+ php_error(E_WARNING, "%s() mbstring cannot decode '%s'", get_active_function_name(TSRMLS_C), buffer);
+ mbfl_filt_conv_html_dec_flush(filter TSRMLS_CC);
+ }
+ }
+ }
+ else
+ {
+ /* add character */
+ buffer[filter->status++] = c;
+ /* add character and check */
+ if (!strchr(html_entity_chars, c) || filter->status+1==html_enc_buffer_size || (c=='#' && filter->status>2))
+ {
+ /* illegal character or end of buffer */
+ if (c=='&')
+ filter->status--;
+ buffer[filter->status] = 0;
+ php_error(E_WARNING, "%s() mbstring cannot decode '%s'", get_active_function_name(TSRMLS_C), buffer);
+ mbfl_filt_conv_html_dec_flush(filter TSRMLS_CC);
+ if (c=='&')
+ {
+ filter->status = 1;
+ buffer[0] = '&';
+ }
+ }
+ }
+ }
+ return c;
+}
+
+static int
+mbfl_filt_conv_html_dec_flush(mbfl_convert_filter *filter TSRMLS_DC)
+{
+ int status, pos = 0;
+ char *buffer;
+
+ buffer = (char*)filter->cache;
+ status = filter->status;
+ /* flush fragments */
+ while (status--)
+ {
+ CK((*filter->output_function)(buffer[pos++], filter->data TSRMLS_CC));
+ }
+ filter->status = 0;
+ /*filter->buffer = 0; of cause NOT*/
+ return 0;
+}
+
+/*
* any => Quoted-Printable
*/
#define MBFL_QPRINT_STS_MIME_HEADER 0x1000000
diff --git a/ext/mbstring/mbfilter.h b/ext/mbstring/mbfilter.h
index 3e3c9dd11c..563a11ea89 100644
--- a/ext/mbstring/mbfilter.h
+++ b/ext/mbstring/mbfilter.h
@@ -128,6 +128,7 @@ enum mbfl_no_encoding {
mbfl_no_encoding_byte4le,
mbfl_no_encoding_base64,
mbfl_no_encoding_uuencode,
+ mbfl_no_encoding_html_ent,
mbfl_no_encoding_qprint,
mbfl_no_encoding_7bit,
mbfl_no_encoding_8bit,
@@ -222,6 +223,7 @@ typedef struct _mbfl_encoding {
#define MBFL_ENCTYPE_MWC4BE 0x00000400
#define MBFL_ENCTYPE_MWC4LE 0x00000800
#define MBFL_ENCTYPE_SHFTCODE 0x00001000
+#define MBFL_ENCTYPE_HTML_ENT 0x00002000
/* wchar plane, special charactor */
#define MBFL_WCSPLANE_MASK 0xffff
@@ -574,4 +576,14 @@ mbfl_html_numeric_entity(mbfl_string *string, mbfl_string *result, int *convmap,
mbfl_string *
mbfl_ja_jp_hantozen(mbfl_string *string, mbfl_string *result, int mode TSRMLS_DC);
+/*
+ * HTML Entity table
+ */
+typedef struct _mbfl_html_entity {
+ char * name;
+ int code;
+} mbfl_html_entity;
+
+extern const mbfl_html_entity mbfl_html_entity_list[];
+
#endif /* MBFL_MBFILTER_H */
diff --git a/ext/mbstring/tests/020.phpt b/ext/mbstring/tests/020.phpt
new file mode 100644
index 0000000000..cc13d9e3ff
--- /dev/null
+++ b/ext/mbstring/tests/020.phpt
@@ -0,0 +1,33 @@
+--TEST--
+HTML input/output
+--SKIPIF--
+<?php
+ ini_set('include_path','.');
+ include('skipif.inc');
+?>
+--INI--
+arg_separator.input="x"
+error_reporting=0
+mbstring.http_input=HTML-ENTITIES
+mbstring.internal_encoding=UTF8
+mbstring.http_output=HTML-ENTITIES
+mbstring.encoding_translation=On
+--FILE--
+<?php
+// enable output encoding through output handler
+ob_start("mb_output_handler");
+// &#64... are must be decoded on input these are not reencoded on output.
+// If you see &#64;&#65;&#66; on output this means input encoding fails.
+// If you do not see &auml;... on output this means output encoding fails.
+// Using UTF-8 internally allows to encode/decode ALL characters.
+// &128... will stay as they are since their character codes are above 127
+// and they do not have a named entity representaion.
+?>
+<?php echo mb_http_input('l').'>'.mb_internal_encoding().'>'.mb_http_output();?>
+
+<?php mb_parse_str("test=&&;&&#64;&#65;&#66;&#128;&#129;&#130;&auml;&ouml;&uuml;&euro;&lang;&rang;");
+echo "test='$test'";
+?>
+--EXPECT--
+HTML-ENTITIES>UTF-8>HTML-ENTITIES
+test='&&;&@AB&#128;&#129;&#130;&auml;&ouml;&uuml;&euro;&lang;&rang;' \ No newline at end of file