diff options
author | Alex Dowad <alexinbeijing@gmail.com> | 2020-09-19 20:34:13 +0200 |
---|---|---|
committer | Alex Dowad <alexinbeijing@gmail.com> | 2020-10-16 22:17:45 +0200 |
commit | a6603b60f760f41f7a7e5b4bb5b3bf871972f6b4 (patch) | |
tree | 4ec4bda34c932e9f2d4a12524d97be161399aed7 /ext/mbstring/libmbfl | |
parent | 23270d7f9eb337ad640512aca79530a7c1f96a59 (diff) | |
download | php-git-a6603b60f760f41f7a7e5b4bb5b3bf871972f6b4.tar.gz |
Add identify filter for ISO-8859-6 (Latin/Arabic)
Note that some text encoding conversion libraries, such as Solaris iconv
and FreeBSD iconv, map 0x30-0x39 to the Arabic script numerals rather than
the 'regular' Roman numerals. (That is, to Unicode codepoints 0x660-0x669.)
Further, Windows CP28596 adds more mappings to use the unused bytes in
ISO-8859-6.
Diffstat (limited to 'ext/mbstring/libmbfl')
-rw-r--r-- | ext/mbstring/libmbfl/filters/mbfilter_iso8859_6.c | 12 |
1 files changed, 11 insertions, 1 deletions
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_6.c b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_6.c index c010908689..ead2b49e0b 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_6.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_6.c @@ -31,6 +31,8 @@ #include "mbfilter_iso8859_6.h" #include "unicode_table_iso8859_6.h" +static int mbfl_filt_ident_iso8859_6(int c, mbfl_identify_filter *filter); + static const char *mbfl_encoding_8859_6_aliases[] = {"ISO8859-6", "arabic", NULL}; const mbfl_encoding mbfl_encoding_8859_6 = { @@ -47,7 +49,7 @@ const mbfl_encoding mbfl_encoding_8859_6 = { const struct mbfl_identify_vtbl vtbl_identify_8859_6 = { mbfl_no_encoding_8859_6, mbfl_filt_ident_common_ctor, - mbfl_filt_ident_true + mbfl_filt_ident_iso8859_6 }; const struct mbfl_convert_vtbl vtbl_8859_6_wchar = { @@ -132,3 +134,11 @@ int mbfl_filt_conv_wchar_8859_6(int c, mbfl_convert_filter *filter) return c; } + +static int mbfl_filt_ident_iso8859_6(int c, mbfl_identify_filter *filter) +{ + if (c >= 0xA0 && !iso8859_6_ucs_table[c - 0xA0]) { + filter->status = 1; + } + return c; +} |