summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMoriyoshi Koizumi <moriyoshi@php.net>2009-02-24 15:09:43 +0000
committerMoriyoshi Koizumi <moriyoshi@php.net>2009-02-24 15:09:43 +0000
commit7db52b84b78644f2182b6938ed2dc001c868d581 (patch)
tree90430062f7211257872fd19cc767a5659544f0a4
parentc5903bd6ab73466d63dabba09da1f26c02419961 (diff)
downloadphp-git-7db52b84b78644f2182b6938ed2dc001c868d581.tar.gz
- Revert the patch then.
-rw-r--r--ext/mbstring/libmbfl/filters/mbfilter_utf32.c16
-rw-r--r--ext/mbstring/libmbfl/filters/mbfilter_utf8.c9
-rw-r--r--ext/mbstring/libmbfl/mbfl/mbfl_consts.h1
-rw-r--r--ext/mbstring/tests/illformed_utf_sequences.phpt148
4 files changed, 7 insertions, 167 deletions
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_utf32.c b/ext/mbstring/libmbfl/filters/mbfilter_utf32.c
index 56d6dd4c97..4b0e9b9e0f 100644
--- a/ext/mbstring/libmbfl/filters/mbfilter_utf32.c
+++ b/ext/mbstring/libmbfl/filters/mbfilter_utf32.c
@@ -171,9 +171,7 @@ int mbfl_filt_conv_utf32_wchar(int c, mbfl_convert_filter *filter)
CK((*filter->output_function)(0xfeff, filter->data));
} else {
filter->status &= ~0xff;
- if (n < MBFL_WCSPLANE_UTF32MAX && (n < 0xd800 || n > 0xdfff)) {
- CK((*filter->output_function)(n, filter->data));
- }
+ CK((*filter->output_function)(n, filter->data));
}
break;
}
@@ -203,9 +201,7 @@ int mbfl_filt_conv_utf32be_wchar(int c, mbfl_convert_filter *filter)
} else {
filter->status = 0;
n = (c & 0xff) | filter->cache;
- if (n < MBFL_WCSPLANE_UTF32MAX && (n < 0xd800 || n > 0xdfff)) {
- CK((*filter->output_function)(n, filter->data));
- }
+ CK((*filter->output_function)(n, filter->data));
}
return c;
}
@@ -215,7 +211,7 @@ int mbfl_filt_conv_utf32be_wchar(int c, mbfl_convert_filter *filter)
*/
int mbfl_filt_conv_wchar_utf32be(int c, mbfl_convert_filter *filter)
{
- if (c >= 0 && c < MBFL_WCSPLANE_UTF32MAX) {
+ if (c >= 0 && c < MBFL_WCSGROUP_UCS4MAX) {
CK((*filter->output_function)((c >> 24) & 0xff, filter->data));
CK((*filter->output_function)((c >> 16) & 0xff, filter->data));
CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
@@ -251,9 +247,7 @@ int mbfl_filt_conv_utf32le_wchar(int c, mbfl_convert_filter *filter)
} else {
filter->status = 0;
n = ((c & 0xff) << 24) | filter->cache;
- if (n < MBFL_WCSPLANE_UTF32MAX && (n < 0xd800 || n > 0xdfff)) {
- CK((*filter->output_function)(n, filter->data));
- }
+ CK((*filter->output_function)(n, filter->data));
}
return c;
}
@@ -263,7 +257,7 @@ int mbfl_filt_conv_utf32le_wchar(int c, mbfl_convert_filter *filter)
*/
int mbfl_filt_conv_wchar_utf32le(int c, mbfl_convert_filter *filter)
{
- if (c >= 0 && c < MBFL_WCSPLANE_UTF32MAX) {
+ if (c >= 0 && c < MBFL_WCSGROUP_UCS4MAX) {
CK((*filter->output_function)(c & 0xff, filter->data));
CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
CK((*filter->output_function)((c >> 16) & 0xff, filter->data));
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_utf8.c b/ext/mbstring/libmbfl/filters/mbfilter_utf8.c
index 20ff983e11..8b95897eac 100644
--- a/ext/mbstring/libmbfl/filters/mbfilter_utf8.c
+++ b/ext/mbstring/libmbfl/filters/mbfilter_utf8.c
@@ -106,8 +106,7 @@ int mbfl_filt_conv_utf8_wchar(int c, mbfl_convert_filter *filter)
}
filter->status = 0;
} else if (c < 0xc0) {
- int status = filter->status & 0xff;
- switch (status) {
+ switch (filter->status & 0xff) {
case 0x10: /* 2byte code 2nd char */
case 0x21: /* 3byte code 3rd char */
case 0x32: /* 4byte code 4th char */
@@ -115,11 +114,7 @@ int mbfl_filt_conv_utf8_wchar(int c, mbfl_convert_filter *filter)
case 0x54: /* 6byte code 6th char */
filter->status = 0;
s = filter->cache | (c & 0x3f);
- if ((status == 0x10 && s >= 0x80) ||
- (status == 0x21 && s >= 0x800 && (s < 0xd800 || s > 0xdfff)) ||
- (status == 0x32 && s >= 0x10000) ||
- (status == 0x43 && s >= 0x200000) ||
- (status == 0x54 && s >= 0x4000000 && s < MBFL_WCSGROUP_UCS4MAX)) {
+ if (s >= 0x80) {
CK((*filter->output_function)(s, filter->data));
}
break;
diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_consts.h b/ext/mbstring/libmbfl/mbfl/mbfl_consts.h
index cf4eaff1db..f500766b49 100644
--- a/ext/mbstring/libmbfl/mbfl/mbfl_consts.h
+++ b/ext/mbstring/libmbfl/mbfl/mbfl_consts.h
@@ -47,7 +47,6 @@
/* wchar plane, special charactor */
#define MBFL_WCSPLANE_MASK 0xffff
#define MBFL_WCSPLANE_UCS2MAX 0x00010000
-#define MBFL_WCSPLANE_UTF32MAX 0x00110000
#define MBFL_WCSPLANE_SUPMIN 0x00010000
#define MBFL_WCSPLANE_SUPMAX 0x00200000
#define MBFL_WCSPLANE_JIS0208 0x70e10000 /* JIS HEX : 2121h - 7E7Eh */
diff --git a/ext/mbstring/tests/illformed_utf_sequences.phpt b/ext/mbstring/tests/illformed_utf_sequences.phpt
deleted file mode 100644
index a462cd0745..0000000000
--- a/ext/mbstring/tests/illformed_utf_sequences.phpt
+++ /dev/null
@@ -1,148 +0,0 @@
---TEST--
-Unicode standard conformance test (ill-formed UTF sequences.)
---SKIPIF--
-<?php extension_loaded('mbstring') or die('skip mbstring not available'); ?>
---FILE--
-<?php
-echo "UTF-8 redundancy\n";
-var_dump(bin2hex(mb_convert_encoding("\x31\x32\x33", "UCS-4BE", "UTF-8")));
-var_dump(bin2hex(mb_convert_encoding("\x41\x42\x43", "UCS-4BE", "UTF-8")));
-var_dump(bin2hex(mb_convert_encoding("\xc0\xb1\xc0\xb2\xc0\xb3", "UCS-4BE", "UTF-8")));
-var_dump(bin2hex(mb_convert_encoding("\xc1\x81\xc1\x82\xc1\x83", "UCS-4BE", "UTF-8")));
-var_dump(bin2hex(mb_convert_encoding("\xe0\x80\xb1\xe0\x80\xb2\xe0\x80\xb3", "UCS-4BE", "UTF-8")));
-var_dump(bin2hex(mb_convert_encoding("\xe0\x81\x81\xe0\x81\x82\xe0\x81\x83", "UCS-4BE", "UTF-8")));
-var_dump(bin2hex(mb_convert_encoding("\xf0\x80\x80\xb1\xf0\x80\x80\xb2\xf0\x80\x80\xb3", "UCS-4BE", "UTF-8")));
-var_dump(bin2hex(mb_convert_encoding("\xf0\x80\x81\x81\xf0\x80\x81\x82\xf0\x81\x83", "UCS-4BE", "UTF-8")));
-var_dump(bin2hex(mb_convert_encoding("\xf8\x80\x80\x80\xb1\xf8\x80\x80\x80\xb2\xf8\x80\x80\x80\xb3", "UCS-4BE", "UTF-8")));
-var_dump(bin2hex(mb_convert_encoding("\xf8\x80\x80\x81\x81\xf8\x80\x80\x81\x82\xf8\x80\x80\x81\x83", "UCS-4BE", "UTF-8")));
-var_dump(bin2hex(mb_convert_encoding("\xfc\x80\x80\x80\x80\xb1\xfc\x80\x80\x80\x80\xb2\xfc\x80\x80\x80\x80\xb3", "UCS-4BE", "UTF-8")));
-var_dump(bin2hex(mb_convert_encoding("\xfc\x80\x80\x80\x81\x81\xfc\x80\x80\x80\x81\x82\xfc\x80\x80\x80\x81\x83", "UCS-4BE", "UTF-8")));
-
-var_dump(bin2hex(mb_convert_encoding("\xc2\xa2\xc2\xa3\xc2\xa5", "UCS-4BE", "UTF-8")));
-var_dump(bin2hex(mb_convert_encoding("\xe0\x82\xa2\xe0\x82\xa3\xe0\x82\xa5", "UCS-4BE", "UTF-8")));
-var_dump(bin2hex(mb_convert_encoding("\xf0\x80\x82\xa2\xf0\x80\x82\xa3\xf0\x80\x82\xa5", "UCS-4BE", "UTF-8")));
-var_dump(bin2hex(mb_convert_encoding("\xf8\x80\x80\x82\xa2\xf8\x80\x80\x82\xa3\xf8\x80\x80\x82\xa5", "UCS-4BE", "UTF-8")));
-var_dump(bin2hex(mb_convert_encoding("\xfc\x80\x80\x80\x82\xa2\xfc\x80\x80\x80\x82\xa3\xfc\x80\x80\x80\x82\xa5", "UCS-4BE", "UTF-8")));
-
-var_dump(bin2hex(mb_convert_encoding("\xc1\xbf", "UCS-4BE", "UTF-8")));
-var_dump(bin2hex(mb_convert_encoding("\xc2\x80", "UCS-4BE", "UTF-8")));
-var_dump(bin2hex(mb_convert_encoding("\xdf\xbf", "UCS-4BE", "UTF-8")));
-var_dump(bin2hex(mb_convert_encoding("\xe0\x9f\xff", "UCS-4BE", "UTF-8")));
-var_dump(bin2hex(mb_convert_encoding("\xe0\xa0\x80", "UCS-4BE", "UTF-8")));
-var_dump(bin2hex(mb_convert_encoding("\xef\xbf\xbf", "UCS-4BE", "UTF-8")));
-var_dump(bin2hex(mb_convert_encoding("\xf0\x8f\xbf\xbf", "UCS-4BE", "UTF-8")));
-var_dump(bin2hex(mb_convert_encoding("\xf0\x90\x80\x80", "UCS-4BE", "UTF-8")));
-var_dump(bin2hex(mb_convert_encoding("\xf7\xbf\xbf\xbf", "UCS-4BE", "UTF-8")));
-var_dump(bin2hex(mb_convert_encoding("\xf8\x87\xbf\xbf\xbf", "UCS-4BE", "UTF-8")));
-var_dump(bin2hex(mb_convert_encoding("\xf8\x88\x80\x80\x80", "UCS-4BE", "UTF-8")));
-var_dump(bin2hex(mb_convert_encoding("\xfb\xbf\xbf\xbf\xbf", "UCS-4BE", "UTF-8")));
-var_dump(bin2hex(mb_convert_encoding("\xfc\x83\xbf\xbf\xbf\xbf", "UCS-4BE", "UTF-8")));
-var_dump(bin2hex(mb_convert_encoding("\xfc\x84\x80\x80\x80\x80", "UCS-4BE", "UTF-8")));
-var_dump(bin2hex(mb_convert_encoding("\xfd\xaf\xbf\xbf\xbf\xbf", "UCS-4BE", "UTF-8")));
-var_dump(bin2hex(mb_convert_encoding("\xfd\xbf\xbf\xbf\xbf\xbf", "UCS-4BE", "UTF-8")));
-
-echo "UTF-8 and surrogates area\n";
-$out = '';
-for ($i = 0xd7ff; $i <= 0xe000; ++$i) {
- $out .= mb_convert_encoding(pack('C3', 0xe0 | ($i >> 12), 0x80 | ($i >> 6) & 0x3f, 0x80 | $i & 0x3f), "UCS-4BE", "UTF-8");
-}
-var_dump(bin2hex($out));
-
-echo "UTF-32 code range\n";
-var_dump(bin2hex(mb_convert_encoding("\x00\x11\x00\x00", "UCS-4BE", "UTF-32BE")));
-var_dump(bin2hex(mb_convert_encoding("\x00\x10\xff\xff", "UCS-4BE", "UTF-32BE")));
-var_dump(bin2hex(mb_convert_encoding("\x00\x00\x11\x00", "UCS-4BE", "UTF-32LE")));
-var_dump(bin2hex(mb_convert_encoding("\xff\xff\x10\x00", "UCS-4BE", "UTF-32LE")));
-var_dump(bin2hex(mb_convert_encoding("\x00\x11\x00\x00", "UCS-4BE", "UTF-32")));
-var_dump(bin2hex(mb_convert_encoding("\x00\x10\xff\xff", "UCS-4BE", "UTF-32")));
-var_dump(bin2hex(mb_convert_encoding("\x00\x00\xfe\xff\x00\x11\x00\x00", "UCS-4BE", "UTF-32")));
-var_dump(bin2hex(mb_convert_encoding("\x00\x00\xfe\xff\x00\x10\xff\xff", "UCS-4BE", "UTF-32")));
-var_dump(bin2hex(mb_convert_encoding("\xff\xfe\x00\x00\x00\x00\x11\x00", "UCS-4BE", "UTF-32")));
-var_dump(bin2hex(mb_convert_encoding("\xff\xfe\x00\x00\xff\xff\x10\x00", "UCS-4BE", "UTF-32")));
-
-echo "UTF-32 and surrogates area\n";
-$out = '';
-for ($i = 0xd7ff; $i <= 0xe000; ++$i) {
- $out .= mb_convert_encoding(pack('C4', $i >> 24, ($i >> 16) & 0xff, ($i >> 8) & 0xff, $i & 0xff), "UCS-4BE", "UTF-32BE");
-}
-var_dump(bin2hex($out));
-
-$out = '';
-for ($i = 0xd7ff; $i <= 0xe000; ++$i) {
- $out .= mb_convert_encoding(pack('C4', $i & 0xff, ($i >> 8) & 0xff, ($i >> 16) & 0xff, ($i >> 24) & 0xff), "UCS-4BE", "UTF-32LE");
-}
-var_dump(bin2hex($out));
-
-$out = '';
-for ($i = 0xd7ff; $i <= 0xe000; ++$i) {
- $out .= mb_convert_encoding(pack('C4', $i >> 24, ($i >> 16) & 0xff, ($i >> 8) & 0xff, $i & 0xff), "UCS-4BE", "UTF-32");
-}
-var_dump(bin2hex($out));
-
-$out = '';
-for ($i = 0xd7ff; $i <= 0xe000; ++$i) {
- $out .= mb_convert_encoding("\x00\x00\xfe\xff". pack('C4', $i >> 24, ($i >> 16) & 0xff, ($i >> 8) & 0xff, $i & 0xff), "UCS-4BE", "UTF-32");
-}
-var_dump(bin2hex(str_replace("\x00\x00\xfe\xff", "", $out)));
-
-
-$out = '';
-for ($i = 0xd7ff; $i <= 0xe000; ++$i) {
- $out .= mb_convert_encoding("\xff\xfe\x00\x00". pack('C4', $i & 0xff, ($i >> 8) & 0xff, ($i >> 16) & 0xff, ($i >> 24) & 0xff), "UCS-4BE", "UTF-32");
-}
-var_dump(bin2hex(str_replace("\x00\x00\xfe\xff", "", $out)));
-?>
---EXPECT--
-UTF-8 redundancy
-string(24) "000000310000003200000033"
-string(24) "000000410000004200000043"
-string(0) ""
-string(0) ""
-string(0) ""
-string(0) ""
-string(0) ""
-string(0) ""
-string(0) ""
-string(0) ""
-string(0) ""
-string(0) ""
-string(24) "000000a2000000a3000000a5"
-string(0) ""
-string(0) ""
-string(0) ""
-string(0) ""
-string(0) ""
-string(8) "00000080"
-string(8) "000007ff"
-string(0) ""
-string(8) "00000800"
-string(8) "0000ffff"
-string(0) ""
-string(8) "00010000"
-string(8) "001fffff"
-string(0) ""
-string(8) "00200000"
-string(8) "03ffffff"
-string(0) ""
-string(8) "04000000"
-string(8) "6fffffff"
-string(0) ""
-UTF-8 and surrogates area
-string(16) "0000d7ff0000e000"
-UTF-32 code range
-string(0) ""
-string(8) "0010ffff"
-string(0) ""
-string(8) "0010ffff"
-string(0) ""
-string(8) "0010ffff"
-string(8) "0000feff"
-string(16) "0000feff0010ffff"
-string(8) "0000feff"
-string(16) "0000feff0010ffff"
-UTF-32 and surrogates area
-string(16) "0000d7ff0000e000"
-string(16) "0000d7ff0000e000"
-string(16) "0000d7ff0000e000"
-string(16) "0000d7ff0000e000"
-string(16) "0000d7ff0000e000"