diff options
Diffstat (limited to 'ext/mbstring/libmbfl/filters/mbfilter_utf32.c')
-rw-r--r-- | ext/mbstring/libmbfl/filters/mbfilter_utf32.c | 175 |
1 files changed, 58 insertions, 117 deletions
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_utf32.c b/ext/mbstring/libmbfl/filters/mbfilter_utf32.c index b936ab4102..e56a728ddc 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_utf32.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_utf32.c @@ -30,15 +30,17 @@ #include "mbfilter.h" #include "mbfilter_utf32.h" +static int mbfl_filt_conv_utf32_wchar_flush(mbfl_convert_filter *filter); + static const char *mbfl_encoding_utf32_aliases[] = {"utf32", NULL}; const mbfl_encoding mbfl_encoding_utf32 = { mbfl_no_encoding_utf32, "UTF-32", "UTF-32", - (const char *(*)[])&mbfl_encoding_utf32_aliases, + mbfl_encoding_utf32_aliases, NULL, - MBFL_ENCTYPE_WCS4BE, + MBFL_ENCTYPE_WCS4, &vtbl_utf32_wchar, &vtbl_wchar_utf32 }; @@ -49,7 +51,7 @@ const mbfl_encoding mbfl_encoding_utf32be = { "UTF-32BE", NULL, NULL, - MBFL_ENCTYPE_WCS4BE, + MBFL_ENCTYPE_WCS4, &vtbl_utf32be_wchar, &vtbl_wchar_utf32be }; @@ -60,7 +62,7 @@ const mbfl_encoding mbfl_encoding_utf32le = { "UTF-32LE", NULL, NULL, - MBFL_ENCTYPE_WCS4LE, + MBFL_ENCTYPE_WCS4, &vtbl_utf32le_wchar, &vtbl_wchar_utf32le }; @@ -71,7 +73,7 @@ const struct mbfl_convert_vtbl vtbl_utf32_wchar = { mbfl_filt_conv_common_ctor, NULL, mbfl_filt_conv_utf32_wchar, - mbfl_filt_conv_common_flush, + mbfl_filt_conv_utf32_wchar_flush, NULL, }; @@ -91,7 +93,7 @@ const struct mbfl_convert_vtbl vtbl_utf32be_wchar = { mbfl_filt_conv_common_ctor, NULL, mbfl_filt_conv_utf32be_wchar, - mbfl_filt_conv_common_flush, + mbfl_filt_conv_utf32_wchar_flush, NULL, }; @@ -111,7 +113,7 @@ const struct mbfl_convert_vtbl vtbl_utf32le_wchar = { mbfl_filt_conv_common_ctor, NULL, mbfl_filt_conv_utf32le_wchar, - mbfl_filt_conv_common_flush, + mbfl_filt_conv_utf32_wchar_flush, NULL, }; @@ -127,106 +129,53 @@ const struct mbfl_convert_vtbl vtbl_wchar_utf32le = { #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) -/* - * UTF-32 => wchar - */ -int mbfl_filt_conv_utf32_wchar(int c, mbfl_convert_filter *filter) +static int emit_char_if_valid(int n, mbfl_convert_filter *filter) { - int n, endian; + if (n < MBFL_WCSPLANE_UTF32MAX && (n < 0xD800 || n > 0xDFFF)) { + CK((*filter->output_function)(n, filter->data)); + } else { + n = (n & MBFL_WCSGROUP_MASK) | MBFL_WCSGROUP_THROUGH; + CK((*filter->output_function)(n, filter->data)); + } + return 0; +} - endian = filter->status & 0xff00; - switch (filter->status & 0xff) { - case 0: - if (endian) { - n = c & 0xff; - } else { - n = (c & 0xffu) << 24; - } - filter->cache = n; - filter->status++; - break; - case 1: - if (endian) { - n = (c & 0xff) << 8; - } else { - n = (c & 0xff) << 16; - } - filter->cache |= n; - filter->status++; - break; - case 2: - if (endian) { - n = (c & 0xff) << 16; - } else { - n = (c & 0xff) << 8; - } - filter->cache |= n; +int mbfl_filt_conv_utf32_wchar(int c, mbfl_convert_filter *filter) +{ + if (filter->status < 3) { + filter->cache = (filter->cache << 8) | (c & 0xFF); filter->status++; - break; - default: - if (endian) { - n = (c & 0xffu) << 24; - } else { - n = c & 0xff; - } - n |= filter->cache; - if ((n & 0xffff) == 0 && ((n >> 16) & 0xffff) == 0xfffe) { - if (endian) { - filter->status = 0; /* big-endian */ - } else { - filter->status = 0x100; /* little-endian */ - } - CK((*filter->output_function)(0xfeff, filter->data)); + } else { + int n = ((unsigned int)filter->cache << 8) | (c & 0xFF); + filter->cache = filter->status = 0; + + if (n == 0xFFFE0000) { + /* Found a little-endian byte order mark */ + filter->filter_function = mbfl_filt_conv_utf32le_wchar; } else { - filter->status &= ~0xff; - if (n < MBFL_WCSPLANE_UTF32MAX && (n < 0xd800 || n > 0xdfff)) { - CK((*filter->output_function)(n, filter->data)); - } else { - n = (n & MBFL_WCSGROUP_MASK) | MBFL_WCSGROUP_THROUGH; - CK((*filter->output_function)(n, filter->data)); + filter->filter_function = mbfl_filt_conv_utf32be_wchar; + if (n != 0xFEFF) { + CK(emit_char_if_valid(n, filter)); } } - break; } return c; } -/* - * UTF-32BE => wchar - */ int mbfl_filt_conv_utf32be_wchar(int c, mbfl_convert_filter *filter) { - int n; - - if (filter->status == 0) { - filter->status = 1; - n = (c & 0xffu) << 24; - filter->cache = n; - } else if (filter->status == 1) { - filter->status = 2; - n = (c & 0xff) << 16; - filter->cache |= n; - } else if (filter->status == 2) { - filter->status = 3; - n = (c & 0xff) << 8; - filter->cache |= n; + if (filter->status < 3) { + filter->cache = (filter->cache << 8) | (c & 0xFF); + filter->status++; } else { - filter->status = 0; - n = (c & 0xff) | filter->cache; - if (n < MBFL_WCSPLANE_UTF32MAX && (n < 0xd800 || n > 0xdfff)) { - CK((*filter->output_function)(n, filter->data)); - } else { - n = (n & MBFL_WCSGROUP_MASK) | MBFL_WCSGROUP_THROUGH; - CK((*filter->output_function)(n, filter->data)); - } + int n = ((unsigned int)filter->cache << 8) | (c & 0xFF); + filter->cache = filter->status = 0; + CK(emit_char_if_valid(n, filter)); } return c; } -/* - * wchar => UTF-32BE - */ int mbfl_filt_conv_wchar_utf32be(int c, mbfl_convert_filter *filter) { if (c >= 0 && c < MBFL_WCSPLANE_UTF32MAX) { @@ -241,41 +190,19 @@ int mbfl_filt_conv_wchar_utf32be(int c, mbfl_convert_filter *filter) return c; } -/* - * UTF-32LE => wchar - */ int mbfl_filt_conv_utf32le_wchar(int c, mbfl_convert_filter *filter) { - int n; - - if (filter->status == 0) { - filter->status = 1; - n = (c & 0xff); - filter->cache = n; - } else if (filter->status == 1) { - filter->status = 2; - n = (c & 0xff) << 8; - filter->cache |= n; - } else if (filter->status == 2) { - filter->status = 3; - n = (c & 0xff) << 16; - filter->cache |= n; + if (filter->status < 3) { + filter->cache |= ((c & 0xFFU) << (8 * filter->status)); + filter->status++; } else { - filter->status = 0; - n = ((c & 0xffu) << 24) | filter->cache; - if (n < MBFL_WCSPLANE_UTF32MAX && (n < 0xd800 || n > 0xdfff)) { - CK((*filter->output_function)(n, filter->data)); - } else { - n = (n & MBFL_WCSGROUP_MASK) | MBFL_WCSGROUP_THROUGH; - CK((*filter->output_function)(n, filter->data)); - } + int n = ((c & 0xFFU) << 24) | filter->cache; + filter->cache = filter->status = 0; + CK(emit_char_if_valid(n, filter)); } return c; } -/* - * wchar => UTF-32LE - */ int mbfl_filt_conv_wchar_utf32le(int c, mbfl_convert_filter *filter) { if (c >= 0 && c < MBFL_WCSPLANE_UTF32MAX) { @@ -289,3 +216,17 @@ int mbfl_filt_conv_wchar_utf32le(int c, mbfl_convert_filter *filter) return c; } + +static int mbfl_filt_conv_utf32_wchar_flush(mbfl_convert_filter *filter) +{ + if (filter->status) { + /* Input string was truncated */ + CK((*filter->output_function)(filter->cache | MBFL_WCSGROUP_THROUGH, filter->data)); + } + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} |