summaryrefslogtreecommitdiff
path: root/ext
diff options
context:
space:
mode:
authorMoriyoshi Koizumi <moriyoshi@php.net>2002-10-22 20:11:48 +0000
committerMoriyoshi Koizumi <moriyoshi@php.net>2002-10-22 20:11:48 +0000
commit68a5d1bca3e66dac6459c7d2741c1d72561c8d27 (patch)
treead4ecc8fc64e4463708a5f894bf14ac0110cf9a5 /ext
parentd6e639a02fee2c5d00a19fd6f8f0c42a810b7be3 (diff)
downloadphp-git-68a5d1bca3e66dac6459c7d2741c1d72561c8d27.tar.gz
Improved determine_charset() to use mbstring.internal_encoding when the last
param is a null string. (If the param is omitted, it takes iso-8859-1 as the default charset in favour of backwards compatibility.)
Diffstat (limited to 'ext')
-rw-r--r--ext/standard/html.c35
1 files changed, 35 insertions, 0 deletions
diff --git a/ext/standard/html.c b/ext/standard/html.c
index a939084ba1..3d329f3e3a 100644
--- a/ext/standard/html.c
+++ b/ext/standard/html.c
@@ -31,6 +31,11 @@
#include <langinfo.h>
#endif
+#if HAVE_MBSTRING
+# include "ext/mbstring/mbstring.h"
+ZEND_EXTERN_MODULE_GLOBALS(mbstring)
+#endif
+
enum entity_charset { cs_terminator, cs_8859_1, cs_cp1252,
cs_8859_15, cs_utf_8, cs_big5, cs_gb2312,
cs_big5hkscs, cs_sjis, cs_eucjp};
@@ -525,6 +530,36 @@ static enum entity_charset determine_charset(char *charset_hint TSRMLS_DC)
return cs_8859_1;
if (strlen(charset_hint) == 0) {
+#if HAVE_MBSTRING
+ /* XXX: Ugly things. Why don't we look for a more sophisticated way? */
+ switch (MBSTRG(internal_encoding)) {
+ case mbfl_no_encoding_utf8:
+ return cs_utf_8;
+
+ case mbfl_no_encoding_euc_jp:
+ case mbfl_no_encoding_eucjp_win:
+ return cs_eucjp;
+
+ case mbfl_no_encoding_sjis:
+ case mbfl_no_encoding_sjis_win:
+ case mbfl_no_encoding_sjis_mac:
+ return cs_sjis;
+
+ case mbfl_no_encoding_cp1252:
+ return cs_cp1252;
+
+ case mbfl_no_encoding_8859_15:
+ return cs_8859_15;
+
+ case mbfl_no_encoding_big5:
+ return cs_big5;
+
+ case mbfl_no_encoding_euc_cn:
+ case mbfl_no_encoding_hz:
+ case mbfl_no_encoding_cp936:
+ return cs_gb2312;
+ }
+#endif
/* try to detect the charset for the locale */
#if HAVE_NL_LANGINFO && HAVE_LOCALE_H && defined(CODESET)
charset_hint = nl_langinfo(CODESET);