diff options
Diffstat (limited to 'ext/mbstring/libmbfl/filters/mbfilter_ucs2.c')
-rw-r--r-- | ext/mbstring/libmbfl/filters/mbfilter_ucs2.c | 125 |
1 files changed, 52 insertions, 73 deletions
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_ucs2.c b/ext/mbstring/libmbfl/filters/mbfilter_ucs2.c index 68172efbe6..efe8cc2d4a 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_ucs2.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_ucs2.c @@ -30,15 +30,24 @@ #include "mbfilter.h" #include "mbfilter_ucs2.h" +static int mbfl_filt_conv_ucs2_wchar_flush(mbfl_convert_filter *filter); + static const char *mbfl_encoding_ucs2_aliases[] = {"ISO-10646-UCS-2", "UCS2" , "UNICODE", NULL}; +/* This library historically had encodings called 'byte2be' and 'byte2le' + * which were almost identical to UCS-2, except that they would truncate + * Unicode codepoints higher than 0xFFFF quietly + * Maintain minimal support by aliasing to UCS-2 */ +static const char *mbfl_encoding_ucs2be_aliases[] = {"byte2be", NULL}; +static const char *mbfl_encoding_ucs2le_aliases[] = {"byte2le", NULL}; + const mbfl_encoding mbfl_encoding_ucs2 = { mbfl_no_encoding_ucs2, "UCS-2", "UCS-2", - (const char *(*)[])&mbfl_encoding_ucs2_aliases, + mbfl_encoding_ucs2_aliases, NULL, - MBFL_ENCTYPE_WCS2BE, + MBFL_ENCTYPE_WCS2, &vtbl_ucs2_wchar, &vtbl_wchar_ucs2 }; @@ -47,9 +56,9 @@ const mbfl_encoding mbfl_encoding_ucs2be = { mbfl_no_encoding_ucs2be, "UCS-2BE", "UCS-2BE", + mbfl_encoding_ucs2be_aliases, NULL, - NULL, - MBFL_ENCTYPE_WCS2BE, + MBFL_ENCTYPE_WCS2, &vtbl_ucs2be_wchar, &vtbl_wchar_ucs2be }; @@ -58,9 +67,9 @@ const mbfl_encoding mbfl_encoding_ucs2le = { mbfl_no_encoding_ucs2le, "UCS-2LE", "UCS-2LE", + mbfl_encoding_ucs2le_aliases, NULL, - NULL, - MBFL_ENCTYPE_WCS2LE, + MBFL_ENCTYPE_WCS2, &vtbl_ucs2le_wchar, &vtbl_wchar_ucs2le }; @@ -71,7 +80,7 @@ const struct mbfl_convert_vtbl vtbl_ucs2_wchar = { mbfl_filt_conv_common_ctor, NULL, mbfl_filt_conv_ucs2_wchar, - mbfl_filt_conv_common_flush, + mbfl_filt_conv_ucs2_wchar_flush, NULL, }; @@ -91,7 +100,7 @@ const struct mbfl_convert_vtbl vtbl_ucs2be_wchar = { mbfl_filt_conv_common_ctor, NULL, mbfl_filt_conv_ucs2be_wchar, - mbfl_filt_conv_common_flush, + mbfl_filt_conv_ucs2_wchar_flush, NULL, }; @@ -111,7 +120,7 @@ const struct mbfl_convert_vtbl vtbl_ucs2le_wchar = { mbfl_filt_conv_common_ctor, NULL, mbfl_filt_conv_ucs2le_wchar, - mbfl_filt_conv_common_flush, + mbfl_filt_conv_ucs2_wchar_flush, NULL, }; @@ -127,113 +136,83 @@ const struct mbfl_convert_vtbl vtbl_wchar_ucs2le = { #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) -/* - * UCS-2 => wchar - */ int mbfl_filt_conv_ucs2_wchar(int c, mbfl_convert_filter *filter) { - int n, endian; - - endian = filter->status & 0xff00; - switch (filter->status & 0xff) { - case 0: - if (endian) { - n = c & 0xff; - } else { - n = (c & 0xff) << 8; - } - filter->cache = n; - filter->status++; - break; - default: - if (endian) { - n = (c & 0xff) << 8; + if (filter->status == 0) { + filter->status = 1; + filter->cache = c & 0xFF; + } else { + filter->status = 0; + int n = (filter->cache << 8) | (c & 0xFF); + if (n == 0xFFFE) { + /* Found little-endian byte order mark */ + filter->filter_function = mbfl_filt_conv_ucs2le_wchar; } else { - n = c & 0xff; - } - n |= filter->cache; - if (n == 0xfffe) { - if (endian) { - filter->status = 0; /* big-endian */ - } else { - filter->status = 0x100; /* little-endian */ + filter->filter_function = mbfl_filt_conv_ucs2be_wchar; + if (n != 0xFEFF) { + CK((*filter->output_function)(n, filter->data)); } - CK((*filter->output_function)(0xfeff, filter->data)); - } else { - filter->status &= ~0xff; - CK((*filter->output_function)(n, filter->data)); } - break; } - return c; } -/* - * UCS-2BE => wchar - */ int mbfl_filt_conv_ucs2be_wchar(int c, mbfl_convert_filter *filter) { - int n; - if (filter->status == 0) { filter->status = 1; - n = (c & 0xff) << 8; - filter->cache = n; + filter->cache = (c & 0xFF) << 8; } else { filter->status = 0; - n = (c & 0xff) | filter->cache; - CK((*filter->output_function)(n, filter->data)); + CK((*filter->output_function)((c & 0xFF) | filter->cache, filter->data)); } return c; } -/* - * wchar => UCS-2BE - */ int mbfl_filt_conv_wchar_ucs2be(int c, mbfl_convert_filter *filter) { if (c >= 0 && c < MBFL_WCSPLANE_UCS2MAX) { - CK((*filter->output_function)((c >> 8) & 0xff, filter->data)); - CK((*filter->output_function)(c & 0xff, filter->data)); + CK((*filter->output_function)((c >> 8) & 0xFF, filter->data)); + CK((*filter->output_function)(c & 0xFF, filter->data)); } else { CK(mbfl_filt_conv_illegal_output(c, filter)); } - return c; } -/* - * UCS-2LE => wchar - */ int mbfl_filt_conv_ucs2le_wchar(int c, mbfl_convert_filter *filter) { - int n; - if (filter->status == 0) { filter->status = 1; - n = c & 0xff; - filter->cache = n; + filter->cache = c & 0xFF; } else { filter->status = 0; - n = ((c & 0xff) << 8) | filter->cache; - CK((*filter->output_function)(n, filter->data)); + CK((*filter->output_function)(((c & 0xFF) << 8) | filter->cache, filter->data)); } return c; } - -/* - * wchar => UCS-2LE - */ int mbfl_filt_conv_wchar_ucs2le(int c, mbfl_convert_filter *filter) { if (c >= 0 && c < MBFL_WCSPLANE_UCS2MAX) { - CK((*filter->output_function)(c & 0xff, filter->data)); - CK((*filter->output_function)((c >> 8) & 0xff, filter->data)); + CK((*filter->output_function)(c & 0xFF, filter->data)); + CK((*filter->output_function)((c >> 8) & 0xFF, filter->data)); } else { CK(mbfl_filt_conv_illegal_output(c, filter)); } - return c; } + +static int mbfl_filt_conv_ucs2_wchar_flush(mbfl_convert_filter *filter) +{ + if (filter->status) { + /* Input string was truncated */ + CK((*filter->output_function)(filter->cache | MBFL_WCSGROUP_THROUGH, filter->data)); + } + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} |