summaryrefslogtreecommitdiff
path: root/ext/mbstring
diff options
context:
space:
mode:
authorAlex Dowad <alexinbeijing@gmail.com>2020-11-14 21:15:11 +0200
committerAlex Dowad <alexinbeijing@gmail.com>2020-11-25 20:51:45 +0200
commite4ee97911132c6ad4dee372369472316a33b4eee (patch)
tree0a220096aaf7b90475d553ceb084b9fbcf279752 /ext/mbstring
parent315d48b4340f79731882b7f87422801a065475b8 (diff)
downloadphp-git-e4ee97911132c6ad4dee372369472316a33b4eee.tar.gz
0x5C is not a Yen sign in CP932 (or CP51932)
When Microsoft created CP932 (their version of Shift-JIS), they explicitly used bytes 0-0x7F to represent ASCII characters rather than JIS X 0201 characters. So when converting Unicode to CP932, it is not correct to convert U+00A5 to CP932 0x5C. Fortunately, CP932 does have a multi-byte FULLWIDTH YEN SIGN character which we can use instead. CP51932 uses the same extended character set as CP932; while CP932 is MicroSoft's extended version of Shift-JIS, CP51932 is their extended version of EUC-JP. So the same reasoning applies to CP51932.
Diffstat (limited to 'ext/mbstring')
-rw-r--r--ext/mbstring/libmbfl/filters/mbfilter_cp51932.c4
-rw-r--r--ext/mbstring/libmbfl/filters/mbfilter_cp932.c4
-rw-r--r--ext/mbstring/tests/cp51932_encoding.phpt3
-rw-r--r--ext/mbstring/tests/cp932_encoding.phpt2
4 files changed, 9 insertions, 4 deletions
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp51932.c b/ext/mbstring/libmbfl/filters/mbfilter_cp51932.c
index d9fecc9d4d..aa52d05481 100644
--- a/ext/mbstring/libmbfl/filters/mbfilter_cp51932.c
+++ b/ext/mbstring/libmbfl/filters/mbfilter_cp51932.c
@@ -214,8 +214,8 @@ mbfl_filt_conv_wchar_cp51932(int c, mbfl_convert_filter *filter)
}
if (s1 >= 0x8080) s1 = -1; /* we don't support JIS X0213 */
if (s1 <= 0) {
- if (c == 0xa5) { /* YEN SIGN */
- s1 = 0x005c; /* YEN SIGN */
+ if (c == 0xa5) { /* YEN SIGN */
+ s1 = 0x216F; /* FULLWIDTH YEN SIGN */
} else if (c == 0x203e) { /* OVER LINE */
s1 = 0x007e; /* FULLWIDTH MACRON */
} else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp932.c b/ext/mbstring/libmbfl/filters/mbfilter_cp932.c
index 6246600de8..ec192faa2b 100644
--- a/ext/mbstring/libmbfl/filters/mbfilter_cp932.c
+++ b/ext/mbstring/libmbfl/filters/mbfilter_cp932.c
@@ -251,8 +251,8 @@ mbfl_filt_conv_wchar_cp932(int c, mbfl_convert_filter *filter)
s2 = 1;
}
if (s1 <= 0) {
- if (c == 0xa5) { /* YEN SIGN */
- s1 = 0x005c; /* YEN SIGN */
+ if (c == 0xa5) { /* YEN SIGN */
+ s1 = 0x216F; /* FULLWIDTH YEN SIGN */
} else if (c == 0x203e) { /* OVER LINE */
s1 = 0x007e; /* FULLWIDTH MACRON */
} else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */
diff --git a/ext/mbstring/tests/cp51932_encoding.phpt b/ext/mbstring/tests/cp51932_encoding.phpt
index 8dbbeb85a0..2fc25fd3c8 100644
--- a/ext/mbstring/tests/cp51932_encoding.phpt
+++ b/ext/mbstring/tests/cp51932_encoding.phpt
@@ -84,6 +84,9 @@ unset($fromUnicode["\x30\x94"]); // Don't map hiragana vu to katakana vu
for ($i = 0; $i <= 0x7F; $i++)
$validChars[chr($i)] = "\x00" . chr($i);
+/* U+00A5 is YEN SIGN; convert to FULLWIDTH YEN SIGN */
+$fromUnicode["\x00\xA5"] = "\xA1\xEF";
+
testAllValidChars($validChars, 'CP51932', 'UTF-16BE', false);
testAllValidChars($fromUnicode, 'UTF-16BE', 'CP51932', false);
echo "CP51932 verification and conversion works on all valid characters\n";
diff --git a/ext/mbstring/tests/cp932_encoding.phpt b/ext/mbstring/tests/cp932_encoding.phpt
index ddcf8a449b..ec9e76f1f6 100644
--- a/ext/mbstring/tests/cp932_encoding.phpt
+++ b/ext/mbstring/tests/cp932_encoding.phpt
@@ -30,6 +30,8 @@ for ($i = 0xF0; $i <= 0xF9; $i++) {
$fromUnicode["\x00\xA2"] = "\x81\x91";
/* U+00A3 is POUND SIGN; convert to FULLWIDTH POUND SIGN */
$fromUnicode["\x00\xA3"] = "\x81\x92";
+/* U+00A5 is YEN SIGN; convert to FULLWIDTH YEN SIGN */
+$fromUnicode["\x00\xA5"] = "\x81\x8F";
/* We map the JIS X 0208 FULLWIDTH TILDE to U+FF5E (FULLWIDTH TILDE)
* But when converting Unicode to CP932, we also accept U+301C (WAVE DASH) */