summaryrefslogtreecommitdiff
path: root/ext/mbstring
diff options
context:
space:
mode:
authorAlex Dowad <alexinbeijing@gmail.com>2020-10-07 22:12:27 +0200
committerAlex Dowad <alexinbeijing@gmail.com>2020-11-25 19:52:19 +0200
commit8ae0473324ab7f26ee9401252b3133aa4e0fe169 (patch)
tree318c3352b4d36d13560a8da25512b49c3486950b /ext/mbstring
parent2759874a4250e45f25a1807cfaed7ab5bc07ae14 (diff)
downloadphp-git-8ae0473324ab7f26ee9401252b3133aa4e0fe169.tar.gz
Don't pass invalid JIS X 0208 characters through
Many Japanese encodings, such as JIS7/8, Shift JIS, ISO-2022-JP, EUC-JP, and so on encode characters from the JIS X 0208 character set. JIS X 0208 is based on the concept of a 94x94 table, with numbered rows and columns. However, more than a thousand of the cells in that table are empty; JIS X 0208 does not actually use all 94x94=8,836 possible kuten codes. mbstring had a dubious feature whereby, if a Japanese string contained one of these 'unmapped' kuten codes, and it was being converted to another Japanese encoding which was also based on JIS X 0208, the non-existent character would be silently passed through, and the unmapped kuten code would be re-encoded using the normal encoding method of the target text encoding. Again, this _only_ happened if converting the text with the funky kuten code to a Japanese encoding. If one tried converting it to Unicode, mbstring would treat that as an error. If somebody, somewhere, made their own private extension to JIS X 0208, and used the regular Japanese encodings like Shift JIS and EUC-JP to encode this private character set, then this feature might conceivably be useful. But how likely is that? If someone is using Shift JIS, EUC-JP, ISO-2022-JP, etc. to encode a funky version of JIS X 0208 with extra characters added, then that should be treated as a separate text encoding. The code which flags such characters with MBFL_WCSPLANE_JIS0208 is retained solely for error reporting in `mbfl_filt_conv_illegal_output`.
Diffstat (limited to 'ext/mbstring')
-rw-r--r--ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c4
-rw-r--r--ext/mbstring/libmbfl/filters/mbfilter_cp51932.c8
-rw-r--r--ext/mbstring/libmbfl/filters/mbfilter_cp932.c2
-rw-r--r--ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.c5
-rw-r--r--ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c2
-rw-r--r--ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_mobile.c2
-rw-r--r--ext/mbstring/libmbfl/filters/mbfilter_jis.c4
-rw-r--r--ext/mbstring/libmbfl/filters/mbfilter_sjis_mac.c2
-rw-r--r--ext/mbstring/libmbfl/filters/mbfilter_sjis_mobile.c2
-rw-r--r--ext/mbstring/libmbfl/filters/mbfilter_sjis_open.c2
10 files changed, 2 insertions, 31 deletions
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c b/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c
index a4d1724e8f..b630d3fcb0 100644
--- a/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c
+++ b/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c
@@ -409,9 +409,7 @@ mbfl_filt_conv_wchar_jis_ms(int c, mbfl_convert_filter *filter)
/* do some transliteration */
if (s <= 0) {
c1 = c & ~MBFL_WCSPLANE_MASK;
- if (c1 == MBFL_WCSPLANE_JIS0208) {
- s = c & MBFL_WCSPLANE_MASK;
- } else if (c1 == MBFL_WCSPLANE_JIS0212) {
+ if (c1 == MBFL_WCSPLANE_JIS0212) {
s = c & MBFL_WCSPLANE_MASK;
s |= 0x8080;
} else if (c == 0xa5) { /* YEN SIGN */
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp51932.c b/ext/mbstring/libmbfl/filters/mbfilter_cp51932.c
index cb95469408..25944a10dd 100644
--- a/ext/mbstring/libmbfl/filters/mbfilter_cp51932.c
+++ b/ext/mbstring/libmbfl/filters/mbfilter_cp51932.c
@@ -216,14 +216,6 @@ mbfl_filt_conv_wchar_cp51932(int c, mbfl_convert_filter *filter)
if (s1 >= ((85 + 0x20) << 8)) { /* 85ku - 120ku */
s1 = -1;
}
- } else if (c1 == MBFL_WCSPLANE_JIS0208) {
- s1 = c & MBFL_WCSPLANE_MASK;
- if ((s1 >= ((85 + 0x20) << 8) && /* 85ku - 94ku */
- s1 <= ((88 + 0x20) << 8)) ||/* IBM extension */
- (s1 >= ((93 + 0x20) << 8) && /* 89ku - 92ku */
- s1 <= ((94 + 0x20) << 8))) {
- s1 = -1;
- }
} else if (c == 0xa5) { /* YEN SIGN */
s1 = 0x005c; /* YEN SIGN */
} else if (c == 0x203e) { /* OVER LINE */
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp932.c b/ext/mbstring/libmbfl/filters/mbfilter_cp932.c
index c0fe18e038..64ccd57361 100644
--- a/ext/mbstring/libmbfl/filters/mbfilter_cp932.c
+++ b/ext/mbstring/libmbfl/filters/mbfilter_cp932.c
@@ -255,8 +255,6 @@ mbfl_filt_conv_wchar_cp932(int c, mbfl_convert_filter *filter)
if (c1 == MBFL_WCSPLANE_WINCP932) {
s1 = c & MBFL_WCSPLANE_MASK;
s2 = 1;
- } else if (c1 == MBFL_WCSPLANE_JIS0208) {
- s1 = c & MBFL_WCSPLANE_MASK;
} else if (c1 == MBFL_WCSPLANE_JIS0212) {
s1 = c & MBFL_WCSPLANE_MASK;
s1 |= 0x8080;
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.c b/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.c
index b93fc9101a..df51be0183 100644
--- a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.c
+++ b/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.c
@@ -279,11 +279,6 @@ mbfl_filt_conv_wchar_eucjpwin(int c, mbfl_convert_filter *filter)
if (s1 >= ((85 + 0x20) << 8)) { /* 85ku - 120ku */
s1 = -1;
}
- } else if (c1 == MBFL_WCSPLANE_JIS0208) {
- s1 = c & MBFL_WCSPLANE_MASK;
- if (s1 >= ((85 + 0x20) << 8)) { /* 85ku - 94ku */
- s1 = -1;
- }
} else if (c1 == MBFL_WCSPLANE_JIS0212) {
s1 = c & MBFL_WCSPLANE_MASK;
if (s1 >= ((83 + 0x20) << 8)) { /* 83ku - 94ku */
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c
index 950365045c..af54a1c605 100644
--- a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c
+++ b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c
@@ -306,8 +306,6 @@ mbfl_filt_conv_wchar_2022jpms(int c, mbfl_convert_filter *filter)
if (c1 == MBFL_WCSPLANE_WINCP932) {
s1 = c & MBFL_WCSPLANE_MASK;
s2 = 1;
- } else if (c1 == MBFL_WCSPLANE_JIS0208) {
- s1 = c & MBFL_WCSPLANE_MASK;
} else if (c1 == MBFL_WCSPLANE_JIS0212) {
s1 = c & MBFL_WCSPLANE_MASK;
s1 |= 0x8080;
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_mobile.c b/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_mobile.c
index e6300675e8..1b4857dfe2 100644
--- a/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_mobile.c
+++ b/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_mobile.c
@@ -334,8 +334,6 @@ mbfl_filt_conv_wchar_2022jp_mobile(int c, mbfl_convert_filter *filter)
if (c1 == MBFL_WCSPLANE_WINCP932) {
s1 = c & MBFL_WCSPLANE_MASK;
s2 = 1;
- } else if (c1 == MBFL_WCSPLANE_JIS0208) {
- s1 = c & MBFL_WCSPLANE_MASK;
} else if (c1 == MBFL_WCSPLANE_JIS0212) {
s1 = c & MBFL_WCSPLANE_MASK;
s1 |= 0x8080;
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_jis.c b/ext/mbstring/libmbfl/filters/mbfilter_jis.c
index ba43872d83..8ba46a5b9a 100644
--- a/ext/mbstring/libmbfl/filters/mbfilter_jis.c
+++ b/ext/mbstring/libmbfl/filters/mbfilter_jis.c
@@ -288,9 +288,7 @@ mbfl_filt_conv_wchar_jis(int c, mbfl_convert_filter *filter)
}
if (s <= 0) {
c1 = c & ~MBFL_WCSPLANE_MASK;
- if (c1 == MBFL_WCSPLANE_JIS0208) {
- s = c & MBFL_WCSPLANE_MASK;
- } else if (c1 == MBFL_WCSPLANE_JIS0212) {
+ if (c1 == MBFL_WCSPLANE_JIS0212) {
s = c & MBFL_WCSPLANE_MASK;
s |= 0x8080;
} else if (c == 0xa5) { /* YEN SIGN */
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis_mac.c b/ext/mbstring/libmbfl/filters/mbfilter_sjis_mac.c
index 45b87a8f98..6361195606 100644
--- a/ext/mbstring/libmbfl/filters/mbfilter_sjis_mac.c
+++ b/ext/mbstring/libmbfl/filters/mbfilter_sjis_mac.c
@@ -420,8 +420,6 @@ mbfl_filt_conv_wchar_sjis_mac(int c, mbfl_convert_filter *filter)
if (c1 == MBFL_WCSPLANE_WINCP932) {
s1 = c & MBFL_WCSPLANE_MASK;
s2 = 1;
- } else if (c1 == MBFL_WCSPLANE_JIS0208) {
- s1 = c & MBFL_WCSPLANE_MASK;
} else if (c1 == MBFL_WCSPLANE_JIS0212) {
s1 = c & MBFL_WCSPLANE_MASK;
s1 |= 0x8080;
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis_mobile.c b/ext/mbstring/libmbfl/filters/mbfilter_sjis_mobile.c
index 255a457c58..10ac7d9ce4 100644
--- a/ext/mbstring/libmbfl/filters/mbfilter_sjis_mobile.c
+++ b/ext/mbstring/libmbfl/filters/mbfilter_sjis_mobile.c
@@ -794,8 +794,6 @@ mbfl_filt_conv_wchar_sjis_mobile(int c, mbfl_convert_filter *filter)
if (c1 == MBFL_WCSPLANE_WINCP932) {
s1 = c & MBFL_WCSPLANE_MASK;
s2 = 1;
- } else if (c1 == MBFL_WCSPLANE_JIS0208) {
- s1 = c & MBFL_WCSPLANE_MASK;
} else if (c1 == MBFL_WCSPLANE_JIS0212) {
s1 = c & MBFL_WCSPLANE_MASK;
s1 |= 0x8080;
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis_open.c b/ext/mbstring/libmbfl/filters/mbfilter_sjis_open.c
index d37f01568e..ea19e6b105 100644
--- a/ext/mbstring/libmbfl/filters/mbfilter_sjis_open.c
+++ b/ext/mbstring/libmbfl/filters/mbfilter_sjis_open.c
@@ -245,8 +245,6 @@ mbfl_filt_conv_wchar_sjis_open(int c, mbfl_convert_filter *filter)
if (c1 == MBFL_WCSPLANE_WINCP932) {
s1 = c & MBFL_WCSPLANE_MASK;
s2 = 1;
- } else if (c1 == MBFL_WCSPLANE_JIS0208) {
- s1 = c & MBFL_WCSPLANE_MASK;
} else if (c1 == MBFL_WCSPLANE_JIS0212) {
s1 = c & MBFL_WCSPLANE_MASK;
s1 |= 0x8080;