summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMoriyoshi Koizumi <moriyoshi@php.net>2008-07-31 17:37:12 +0000
committerMoriyoshi Koizumi <moriyoshi@php.net>2008-07-31 17:37:12 +0000
commit2d08b5aa865c4d1ee407359e2050ec865f90990e (patch)
tree9a005822221a3890b1b8ae8d50cec6b4f83d5ff6
parent00788aa163fe287cad466ccf46d3d453a664e142 (diff)
downloadphp-git-2d08b5aa865c4d1ee407359e2050ec865f90990e.tar.gz
- MFH: Fixed bug #44617 (wrong HTML entity output when substitute_character=entity)
-rw-r--r--ext/mbstring/libmbfl/mbfl/mbfl_convert.c60
-rw-r--r--ext/mbstring/tests/mb_substitute_character.phpt56
2 files changed, 60 insertions, 56 deletions
diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_convert.c b/ext/mbstring/libmbfl/mbfl/mbfl_convert.c
index c7ec3d3a87..63003d0f0a 100644
--- a/ext/mbstring/libmbfl/mbfl/mbfl_convert.c
+++ b/ext/mbstring/libmbfl/mbfl/mbfl_convert.c
@@ -363,22 +363,6 @@ int mbfl_convert_filter_strcat(mbfl_convert_filter *filter, const unsigned char
return 0;
}
-#if 0
-static int
-mbfl_convert_filter_strncat(mbfl_convert_filter *filter, const unsigned char *p,
- int n)
-{
- while (n > 0) {
- if ((*filter->filter_function)(*p++, filter) < 0) {
- return -1;
- }
- n--;
- }
-
- return n;
-}
-#endif
-
/* illegal character output function for conv-filter */
int
mbfl_filt_conv_illegal_output(int c, mbfl_convert_filter *filter)
@@ -393,14 +377,9 @@ mbfl_filt_conv_illegal_output(int c, mbfl_convert_filter *filter)
ret = (*filter->filter_function)(filter->illegal_substchar, filter);
break;
case MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG:
- case MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY:
if (c >= 0) {
if (c < MBFL_WCSGROUP_UCS4MAX) { /* unicode */
- if (mode_backup == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
- ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"U+");
- } else { /* entity */
- ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"&#");
- }
+ ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"U+");
} else {
if (c < MBFL_WCSGROUP_WCHARMAX) {
m = c & ~MBFL_WCSPLANE_MASK;
@@ -444,9 +423,38 @@ mbfl_filt_conv_illegal_output(int c, mbfl_convert_filter *filter)
if (m == 0 && ret >= 0) {
ret = (*filter->filter_function)(mbfl_hexchar_table[0], filter);
}
- if (mode_backup == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
- ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)";");
+ }
+ }
+ break;
+ case MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY:
+ if (c >= 0) {
+ if (c < MBFL_WCSGROUP_UCS4MAX) { /* unicode */
+ ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"&#x");
+ if (ret < 0)
+ break;
+
+ m = 0;
+ r = 28;
+ while (r >= 0) {
+ n = (c >> r) & 0xf;
+ if (n || m) {
+ m = 1;
+ ret = (*filter->filter_function)(mbfl_hexchar_table[n], filter);
+ if (ret < 0) {
+ break;
+ }
+ }
+ r -= 4;
+ }
+ if (ret < 0) {
+ break;
+ }
+ if (m == 0) {
+ ret = (*filter->filter_function)(mbfl_hexchar_table[0], filter);
}
+ ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)";");
+ } else {
+ ret = (*filter->filter_function)(filter->illegal_substchar, filter);
}
}
break;
@@ -468,8 +476,8 @@ const struct mbfl_convert_vtbl * mbfl_convert_filter_get_vtbl(enum mbfl_no_encod
to == mbfl_no_encoding_7bit) {
from = mbfl_no_encoding_8bit;
} else if (from == mbfl_no_encoding_base64 ||
- from == mbfl_no_encoding_qprint ||
- from == mbfl_no_encoding_uuencode) {
+ from == mbfl_no_encoding_qprint ||
+ from == mbfl_no_encoding_uuencode) {
to = mbfl_no_encoding_8bit;
}
diff --git a/ext/mbstring/tests/mb_substitute_character.phpt b/ext/mbstring/tests/mb_substitute_character.phpt
index 2adfddb10d..db1198822f 100644
--- a/ext/mbstring/tests/mb_substitute_character.phpt
+++ b/ext/mbstring/tests/mb_substitute_character.phpt
@@ -10,40 +10,36 @@ include_once('common.inc');
// Note: It does not return TRUE/FALSE for setting char
-// Use Unicode val
-$r = mb_substitute_character(0x3013);
-//$r = mb_substitute_character('U+3013');
-($r === TRUE) ? print "OK_UTF\n" : print("NG_UTF: ".gettype($r)." $r\n");
-print mb_substitute_character() . "\n";
+var_dump(mb_substitute_character(0x3044));
+var_dump(mb_substitute_character());
+var_dump(bin2hex(mb_convert_encoding("\xe2\x99\xa0\xe3\x81\x82", "CP932", "UTF-8")));
+var_dump(mb_substitute_character('long'));
+var_dump(mb_substitute_character());
+var_dump(bin2hex(mb_convert_encoding("\xe2\x99\xa0\xe3\x81\x82", "CP932", "UTF-8")));
-// Use "long"
-$r = mb_substitute_character('long');
-($r === TRUE) ? print "OK_LONG\n" : print("NG_LONG: ".gettype($r)." $r\n");
-print mb_substitute_character() . "\n";
+var_dump(mb_substitute_character('none'));
+var_dump(mb_substitute_character());
+var_dump(bin2hex(mb_convert_encoding("\xe2\x99\xa0\xe3\x81\x82", "CP932", "UTF-8")));
+var_dump(mb_substitute_character('entity'));
+var_dump(mb_substitute_character());
+var_dump(bin2hex(mb_convert_encoding("\xe2\x99\xa0\xe3\x81\x82", "CP932", "UTF-8")));
-// Use "none"
-$r = mb_substitute_character('none');
-($r === TRUE) ? print "OK_NONE\n" : print("NG_NONE: ".gettype($r)." $r\n");
-print mb_substitute_character() . "\n";
-
-
-// Set invalid string. Should fail.
-print "== INVALID PARAMETER ==\n";
-$r = mb_substitute_character('BAD_NAME');
-($r === FALSE) ? print "OK_BAD_NAME\n" : print("NG_BAD_NAME: ".gettype($r)." $r\n");
-
+var_dump(mb_substitute_character('BAD_NAME'));
?>
-
--EXPECT--
-OK_UTF
-12307
-OK_LONG
-long
-OK_NONE
-none
-== INVALID PARAMETER ==
+bool(true)
+int(12356)
+string(8) "82a282a0"
+bool(true)
+string(4) "long"
+string(16) "552b3236363082a0"
+bool(true)
+string(4) "none"
+string(4) "82a0"
+bool(true)
+string(6) "entity"
+string(20) "262378323636303b82a0"
ERR: Warning
-OK_BAD_NAME
-
+bool(false)