summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xUPGRADING3
-rw-r--r--ext/standard/html.c18
-rw-r--r--ext/standard/html_tables.h2
-rw-r--r--ext/standard/html_tables/html_table_gen.php2
-rw-r--r--ext/standard/tests/strings/htmlentities05.phpt9
-rw-r--r--ext/standard/tests/strings/htmlentities06.phpt7
-rw-r--r--ext/standard/tests/strings/htmlentities07.phpt8
-rw-r--r--ext/standard/tests/strings/htmlentities08.phpt9
-rw-r--r--ext/standard/tests/strings/htmlentities09.phpt9
-rw-r--r--ext/standard/tests/strings/htmlentities16.phpt6
10 files changed, 28 insertions, 45 deletions
diff --git a/UPGRADING b/UPGRADING
index 9c5d18affb..53a7c8a489 100755
--- a/UPGRADING
+++ b/UPGRADING
@@ -148,6 +148,9 @@ UPGRADE NOTES - PHP X.Y
behavior follows the recommendations of Unicode Technical Report #36.
- htmlspecialchars_decode/html_entity_decode now decode ' if the document
type is ENT_XML1, ENT_XHTML, or ENT_HTML5.
+- Charset detection with $charset == '' no longer turns to mbstring's
+ internal encoding defined through mb_internal_encoding(). Only the encoding
+ defined through the ini setting mbstring.internal_encoding is considered.
- number_format() no longer truncates multibyte decimal points and thousand
separators to the first byte.
- The third parameter ($matches) to preg_match_all() is now optional. If
diff --git a/ext/standard/html.c b/ext/standard/html.c
index 510d2f565e..1d989f818e 100644
--- a/ext/standard/html.c
+++ b/ext/standard/html.c
@@ -367,6 +367,7 @@ static enum entity_charset determine_charset(char *charset_hint TSRMLS_DC)
int i;
enum entity_charset charset = cs_utf_8;
int len = 0;
+ const zend_encoding *zenc;
/* Default is now UTF-8 */
if (charset_hint == NULL)
@@ -376,9 +377,20 @@ static enum entity_charset determine_charset(char *charset_hint TSRMLS_DC)
goto det_charset;
}
- charset_hint = (char*)zend_multibyte_get_internal_encoding(TSRMLS_C);
- if (charset_hint != NULL && (len=strlen(charset_hint)) != 0) {
- goto det_charset;
+ zenc = zend_multibyte_get_internal_encoding(TSRMLS_C);
+ if (zenc != NULL) {
+ charset_hint = zend_multibyte_get_encoding_name(zenc);
+ if (charset_hint != NULL && (len=strlen(charset_hint)) != 0) {
+ if ((len == 4) /* sizeof (none|auto|pass) */ &&
+ (!memcmp("pass", charset_hint, 4) ||
+ !memcmp("auto", charset_hint, 4) ||
+ !memcmp("auto", charset_hint, 4))) {
+ charset_hint = NULL;
+ len = 0;
+ } else {
+ goto det_charset;
+ }
+ }
}
charset_hint = SG(default_charset);
diff --git a/ext/standard/html_tables.h b/ext/standard/html_tables.h
index c6a75bef8d..278b6db428 100644
--- a/ext/standard/html_tables.h
+++ b/ext/standard/html_tables.h
@@ -33,7 +33,7 @@ enum entity_charset { cs_utf_8, cs_8859_1, cs_cp1252, cs_8859_15, cs_cp1251,
cs_gb2312, cs_big5hkscs, cs_sjis, cs_eucjp,
cs_numelems /* used to count the number of charsets */
};
-#define CHARSET_UNICODE_COMPAT(cs) ((cs) <= cs_utf_8)
+#define CHARSET_UNICODE_COMPAT(cs) ((cs) <= cs_8859_1)
#define CHARSET_SINGLE_BYTE(cs) ((cs) > cs_utf_8 && (cs) < cs_big5)
#define CHARSET_PARTIAL_SUPPORT(cs) ((cs) >= cs_big5)
diff --git a/ext/standard/html_tables/html_table_gen.php b/ext/standard/html_tables/html_table_gen.php
index dcd21b370b..05997f0375 100644
--- a/ext/standard/html_tables/html_table_gen.php
+++ b/ext/standard/html_tables/html_table_gen.php
@@ -56,7 +56,7 @@ enum entity_charset { cs_utf_8, cs_8859_1, cs_cp1252, cs_8859_15, cs_cp1251,
cs_gb2312, cs_big5hkscs, cs_sjis, cs_eucjp,
cs_numelems /* used to count the number of charsets */
};
-#define CHARSET_UNICODE_COMPAT(cs) ((cs) <= cs_utf_8)
+#define CHARSET_UNICODE_COMPAT(cs) ((cs) <= cs_8859_1)
#define CHARSET_SINGLE_BYTE(cs) ((cs) > cs_utf_8 && (cs) < cs_big5)
#define CHARSET_PARTIAL_SUPPORT(cs) ((cs) >= cs_big5)
diff --git a/ext/standard/tests/strings/htmlentities05.phpt b/ext/standard/tests/strings/htmlentities05.phpt
index 779cf289b0..7f8adb6b99 100644
--- a/ext/standard/tests/strings/htmlentities05.phpt
+++ b/ext/standard/tests/strings/htmlentities05.phpt
@@ -2,19 +2,12 @@
htmlentities() test 5 (mbstring / cp1252)
--INI--
output_handler=
+mbstring.internal_encoding=cp1252
--SKIPIF--
<?php
extension_loaded("mbstring") or die("skip mbstring not available\n");
- mb_internal_encoding('cp1252');
- $php_errormsg = NULL;
- @htmlentities("\x82\x86\x99\x9f", ENT_QUOTES, '');
- if ($php_errormsg) {
- die("skip cp1252 chracter set is not supported on this platform.\n");
- }
-?>
--FILE--
<?php
- mb_internal_encoding('cp1252');
print mb_internal_encoding()."\n";
var_dump(htmlentities("\x82\x86\x99\x9f", ENT_QUOTES, ''));
var_dump(htmlentities("\x80\xa2\xa3\xa4\xa5", ENT_QUOTES, ''));
diff --git a/ext/standard/tests/strings/htmlentities06.phpt b/ext/standard/tests/strings/htmlentities06.phpt
index 44d1466da9..4cc68d233f 100644
--- a/ext/standard/tests/strings/htmlentities06.phpt
+++ b/ext/standard/tests/strings/htmlentities06.phpt
@@ -2,15 +2,10 @@
htmlentities() test 6 (mbstring / ISO-8859-15)
--INI--
output_handler=
+mbstring.internal_encoding=ISO-8859-15
--SKIPIF--
<?php
extension_loaded("mbstring") or die("skip mbstring not available\n");
- @mb_internal_encoding('ISO-8859-15');
- @htmlentities("\xbc\xbd\xbe", ENT_QUOTES, '');
- if (@$php_errormsg) {
- die("skip ISO-8859-15 chracter set is not supported on this platform.\n");
- }
-?>
--FILE--
<?php
mb_internal_encoding('ISO-8859-15');
diff --git a/ext/standard/tests/strings/htmlentities07.phpt b/ext/standard/tests/strings/htmlentities07.phpt
index efd06f08ad..144734c12e 100644
--- a/ext/standard/tests/strings/htmlentities07.phpt
+++ b/ext/standard/tests/strings/htmlentities07.phpt
@@ -2,16 +2,10 @@
htmlentities() test 7 (mbstring / ISO-8859-1)
--INI--
output_handler=
+mbstring.internal_encoding=ISO-8859-1
--SKIPIF--
<?php
extension_loaded("mbstring") or die("skip mbstring not available\n");
- mb_internal_encoding('ISO-8859-1');
- $php_errormsg = NULL;
- @htmlentities("\xe4\xf6\xfc", ENT_QUOTES, '');
- if ($php_errormsg) {
- die("skip ISO-8859-1 chracter set is not supported on this platform.\n");
- }
-?>
--FILE--
<?php
mb_internal_encoding('ISO-8859-1');
diff --git a/ext/standard/tests/strings/htmlentities08.phpt b/ext/standard/tests/strings/htmlentities08.phpt
index 0f8f912f27..1f6dc90fcb 100644
--- a/ext/standard/tests/strings/htmlentities08.phpt
+++ b/ext/standard/tests/strings/htmlentities08.phpt
@@ -2,16 +2,11 @@
htmlentities() test 8 (mbstring / EUC-JP)
--INI--
output_handler=
+error_reporting=~E_STRICT
+mbstring.internal_encoding=EUC-JP
--SKIPIF--
<?php
extension_loaded("mbstring") or die("skip mbstring not available\n");
- mb_internal_encoding('EUC-JP');
- $php_errormsg = NULL;
- @htmlentities("\xa1\xa2\xa1\xa3\xa1\xa4", ENT_QUOTES, '');
- if ($php_errormsg) {
- die("skip EUC-JP chracter set is not supported on this platform.\n");
- }
-?>
--FILE--
<?php
mb_internal_encoding('EUC-JP');
diff --git a/ext/standard/tests/strings/htmlentities09.phpt b/ext/standard/tests/strings/htmlentities09.phpt
index 9127a71d06..c80a77d40f 100644
--- a/ext/standard/tests/strings/htmlentities09.phpt
+++ b/ext/standard/tests/strings/htmlentities09.phpt
@@ -2,16 +2,11 @@
htmlentities() test 9 (mbstring / Shift_JIS)
--INI--
output_handler=
+error_reporting=~E_STRICT
+mbstring.internal_encoding=Shift_JIS
--SKIPIF--
<?php
extension_loaded("mbstring") or die("skip mbstring not available\n");
- mb_internal_encoding('Shift_JIS');
- $php_errormsg = NULL;
- @htmlentities("\x81\x41\x81\x42\x81\x43", ENT_QUOTES, '');
- if ($php_errormsg) {
- die("skip Shift_JIS chracter set is not supported on this platform.\n");
- }
-?>
--FILE--
<?php
mb_internal_encoding('Shift_JIS');
diff --git a/ext/standard/tests/strings/htmlentities16.phpt b/ext/standard/tests/strings/htmlentities16.phpt
index c49584e90f..ed54670520 100644
--- a/ext/standard/tests/strings/htmlentities16.phpt
+++ b/ext/standard/tests/strings/htmlentities16.phpt
@@ -2,17 +2,13 @@
htmlentities() test 16 (mbstring / cp1251)
--INI--
output_handler=
+mbstring.internal_encoding=cp1251
--SKIPIF--
<?php
extension_loaded("mbstring") or die("skip mbstring not available\n");
- if (!@mb_internal_encoding('cp1251') ||
- @htmlentities("\x88\xa9\xd2\xcf\xd3\xcb\xcf\xdb\xce\xd9\xca", ENT_QUOTES, '') == '') {
- die("skip cp1251 character set is not available in this build.\n");
- }
?>
--FILE--
<?php
-mb_internal_encoding('cp1251');
$str = "\x88\xa9\xf0\xee\xf1\xea\xee\xf8\xed\xfb\xe9";
var_dump(bin2hex($str), bin2hex(htmlentities($str, ENT_QUOTES, '')));
var_dump(htmlentities($str, ENT_QUOTES | ENT_HTML5, ''));