diff options
author | Alex Dowad <alexinbeijing@gmail.com> | 2020-10-07 22:12:27 +0200 |
---|---|---|
committer | Alex Dowad <alexinbeijing@gmail.com> | 2020-11-25 19:52:19 +0200 |
commit | 8ae0473324ab7f26ee9401252b3133aa4e0fe169 (patch) | |
tree | 318c3352b4d36d13560a8da25512b49c3486950b /ext/mbstring | |
parent | 2759874a4250e45f25a1807cfaed7ab5bc07ae14 (diff) | |
download | php-git-8ae0473324ab7f26ee9401252b3133aa4e0fe169.tar.gz |
Don't pass invalid JIS X 0208 characters through
Many Japanese encodings, such as JIS7/8, Shift JIS, ISO-2022-JP, EUC-JP, and
so on encode characters from the JIS X 0208 character set. JIS X 0208 is based
on the concept of a 94x94 table, with numbered rows and columns. However,
more than a thousand of the cells in that table are empty; JIS X 0208 does not
actually use all 94x94=8,836 possible kuten codes.
mbstring had a dubious feature whereby, if a Japanese string contained one of
these 'unmapped' kuten codes, and it was being converted to another Japanese
encoding which was also based on JIS X 0208, the non-existent character would
be silently passed through, and the unmapped kuten code would be re-encoded
using the normal encoding method of the target text encoding.
Again, this _only_ happened if converting the text with the funky kuten code
to a Japanese encoding. If one tried converting it to Unicode, mbstring would
treat that as an error.
If somebody, somewhere, made their own private extension to JIS X 0208, and
used the regular Japanese encodings like Shift JIS and EUC-JP to encode this
private character set, then this feature might conceivably be useful. But how
likely is that? If someone is using Shift JIS, EUC-JP, ISO-2022-JP, etc. to
encode a funky version of JIS X 0208 with extra characters added, then that
should be treated as a separate text encoding.
The code which flags such characters with MBFL_WCSPLANE_JIS0208 is retained
solely for error reporting in `mbfl_filt_conv_illegal_output`.
Diffstat (limited to 'ext/mbstring')
10 files changed, 2 insertions, 31 deletions
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c b/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c index a4d1724e8f..b630d3fcb0 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c @@ -409,9 +409,7 @@ mbfl_filt_conv_wchar_jis_ms(int c, mbfl_convert_filter *filter) /* do some transliteration */ if (s <= 0) { c1 = c & ~MBFL_WCSPLANE_MASK; - if (c1 == MBFL_WCSPLANE_JIS0208) { - s = c & MBFL_WCSPLANE_MASK; - } else if (c1 == MBFL_WCSPLANE_JIS0212) { + if (c1 == MBFL_WCSPLANE_JIS0212) { s = c & MBFL_WCSPLANE_MASK; s |= 0x8080; } else if (c == 0xa5) { /* YEN SIGN */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp51932.c b/ext/mbstring/libmbfl/filters/mbfilter_cp51932.c index cb95469408..25944a10dd 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp51932.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp51932.c @@ -216,14 +216,6 @@ mbfl_filt_conv_wchar_cp51932(int c, mbfl_convert_filter *filter) if (s1 >= ((85 + 0x20) << 8)) { /* 85ku - 120ku */ s1 = -1; } - } else if (c1 == MBFL_WCSPLANE_JIS0208) { - s1 = c & MBFL_WCSPLANE_MASK; - if ((s1 >= ((85 + 0x20) << 8) && /* 85ku - 94ku */ - s1 <= ((88 + 0x20) << 8)) ||/* IBM extension */ - (s1 >= ((93 + 0x20) << 8) && /* 89ku - 92ku */ - s1 <= ((94 + 0x20) << 8))) { - s1 = -1; - } } else if (c == 0xa5) { /* YEN SIGN */ s1 = 0x005c; /* YEN SIGN */ } else if (c == 0x203e) { /* OVER LINE */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp932.c b/ext/mbstring/libmbfl/filters/mbfilter_cp932.c index c0fe18e038..64ccd57361 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp932.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp932.c @@ -255,8 +255,6 @@ mbfl_filt_conv_wchar_cp932(int c, mbfl_convert_filter *filter) if (c1 == MBFL_WCSPLANE_WINCP932) { s1 = c & MBFL_WCSPLANE_MASK; s2 = 1; - } else if (c1 == MBFL_WCSPLANE_JIS0208) { - s1 = c & MBFL_WCSPLANE_MASK; } else if (c1 == MBFL_WCSPLANE_JIS0212) { s1 = c & MBFL_WCSPLANE_MASK; s1 |= 0x8080; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.c b/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.c index b93fc9101a..df51be0183 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.c @@ -279,11 +279,6 @@ mbfl_filt_conv_wchar_eucjpwin(int c, mbfl_convert_filter *filter) if (s1 >= ((85 + 0x20) << 8)) { /* 85ku - 120ku */ s1 = -1; } - } else if (c1 == MBFL_WCSPLANE_JIS0208) { - s1 = c & MBFL_WCSPLANE_MASK; - if (s1 >= ((85 + 0x20) << 8)) { /* 85ku - 94ku */ - s1 = -1; - } } else if (c1 == MBFL_WCSPLANE_JIS0212) { s1 = c & MBFL_WCSPLANE_MASK; if (s1 >= ((83 + 0x20) << 8)) { /* 83ku - 94ku */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c index 950365045c..af54a1c605 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c @@ -306,8 +306,6 @@ mbfl_filt_conv_wchar_2022jpms(int c, mbfl_convert_filter *filter) if (c1 == MBFL_WCSPLANE_WINCP932) { s1 = c & MBFL_WCSPLANE_MASK; s2 = 1; - } else if (c1 == MBFL_WCSPLANE_JIS0208) { - s1 = c & MBFL_WCSPLANE_MASK; } else if (c1 == MBFL_WCSPLANE_JIS0212) { s1 = c & MBFL_WCSPLANE_MASK; s1 |= 0x8080; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_mobile.c b/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_mobile.c index e6300675e8..1b4857dfe2 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_mobile.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_mobile.c @@ -334,8 +334,6 @@ mbfl_filt_conv_wchar_2022jp_mobile(int c, mbfl_convert_filter *filter) if (c1 == MBFL_WCSPLANE_WINCP932) { s1 = c & MBFL_WCSPLANE_MASK; s2 = 1; - } else if (c1 == MBFL_WCSPLANE_JIS0208) { - s1 = c & MBFL_WCSPLANE_MASK; } else if (c1 == MBFL_WCSPLANE_JIS0212) { s1 = c & MBFL_WCSPLANE_MASK; s1 |= 0x8080; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_jis.c b/ext/mbstring/libmbfl/filters/mbfilter_jis.c index ba43872d83..8ba46a5b9a 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_jis.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_jis.c @@ -288,9 +288,7 @@ mbfl_filt_conv_wchar_jis(int c, mbfl_convert_filter *filter) } if (s <= 0) { c1 = c & ~MBFL_WCSPLANE_MASK; - if (c1 == MBFL_WCSPLANE_JIS0208) { - s = c & MBFL_WCSPLANE_MASK; - } else if (c1 == MBFL_WCSPLANE_JIS0212) { + if (c1 == MBFL_WCSPLANE_JIS0212) { s = c & MBFL_WCSPLANE_MASK; s |= 0x8080; } else if (c == 0xa5) { /* YEN SIGN */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis_mac.c b/ext/mbstring/libmbfl/filters/mbfilter_sjis_mac.c index 45b87a8f98..6361195606 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_sjis_mac.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_sjis_mac.c @@ -420,8 +420,6 @@ mbfl_filt_conv_wchar_sjis_mac(int c, mbfl_convert_filter *filter) if (c1 == MBFL_WCSPLANE_WINCP932) { s1 = c & MBFL_WCSPLANE_MASK; s2 = 1; - } else if (c1 == MBFL_WCSPLANE_JIS0208) { - s1 = c & MBFL_WCSPLANE_MASK; } else if (c1 == MBFL_WCSPLANE_JIS0212) { s1 = c & MBFL_WCSPLANE_MASK; s1 |= 0x8080; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis_mobile.c b/ext/mbstring/libmbfl/filters/mbfilter_sjis_mobile.c index 255a457c58..10ac7d9ce4 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_sjis_mobile.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_sjis_mobile.c @@ -794,8 +794,6 @@ mbfl_filt_conv_wchar_sjis_mobile(int c, mbfl_convert_filter *filter) if (c1 == MBFL_WCSPLANE_WINCP932) { s1 = c & MBFL_WCSPLANE_MASK; s2 = 1; - } else if (c1 == MBFL_WCSPLANE_JIS0208) { - s1 = c & MBFL_WCSPLANE_MASK; } else if (c1 == MBFL_WCSPLANE_JIS0212) { s1 = c & MBFL_WCSPLANE_MASK; s1 |= 0x8080; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis_open.c b/ext/mbstring/libmbfl/filters/mbfilter_sjis_open.c index d37f01568e..ea19e6b105 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_sjis_open.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_sjis_open.c @@ -245,8 +245,6 @@ mbfl_filt_conv_wchar_sjis_open(int c, mbfl_convert_filter *filter) if (c1 == MBFL_WCSPLANE_WINCP932) { s1 = c & MBFL_WCSPLANE_MASK; s2 = 1; - } else if (c1 == MBFL_WCSPLANE_JIS0208) { - s1 = c & MBFL_WCSPLANE_MASK; } else if (c1 == MBFL_WCSPLANE_JIS0212) { s1 = c & MBFL_WCSPLANE_MASK; s1 |= 0x8080; |