From ea539c8b88c9278363b6de0b39446e4e8e043391 Mon Sep 17 00:00:00 2001 From: Moriyoshi Koizumi Date: Fri, 12 Mar 2010 04:55:37 +0000 Subject: - Update the bundled libmbfl to the latest on upstream. --- ext/mbstring/config.m4 | 9 +- ext/mbstring/config.w32 | 4 +- ext/mbstring/libmbfl/Makefile.am | 6 +- ext/mbstring/libmbfl/Makefile.bcc32 | 33 - ext/mbstring/libmbfl/config.h.bcc32 | 12 - ext/mbstring/libmbfl/config.h.in | 9 +- ext/mbstring/libmbfl/config.h.vc6 | 12 - ext/mbstring/libmbfl/configure.in | 2 +- ext/mbstring/libmbfl/filters/Makefile.am | 11 +- ext/mbstring/libmbfl/filters/Makefile.bcc32 | 59 - ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c | 1299 ++++++++++++++++++++ ext/mbstring/libmbfl/filters/mbfilter_cp5022x.h | 64 + ext/mbstring/libmbfl/filters/mbfilter_cp51932.c | 19 +- ext/mbstring/libmbfl/filters/mbfilter_cp932.c | 40 +- ext/mbstring/libmbfl/filters/mbfilter_cp932.h | 12 +- ext/mbstring/libmbfl/filters/mbfilter_htmlent.c | 11 + .../libmbfl/filters/mbfilter_iso2022_jp_ms.c | 8 +- ext/mbstring/libmbfl/filters/mbfilter_iso2022_kr.c | 6 + ext/mbstring/libmbfl/filters/mbfilter_jis.c | 5 + ext/mbstring/libmbfl/filters/mbfilter_sjis_open.c | 350 ++++++ ext/mbstring/libmbfl/filters/mbfilter_sjis_open.h | 43 + .../filters/mbfilter_tl_jisx0201_jisx0208.c | 302 +++++ .../filters/mbfilter_tl_jisx0201_jisx0208.h | 79 ++ ext/mbstring/libmbfl/filters/mbfilter_utf7.c | 5 + .../filters/translit_kana_jisx0201_jisx0208.h | 67 + ext/mbstring/libmbfl/libmbfl.dsp | 807 ------------ ext/mbstring/libmbfl/libmbfl.dsw | 29 - ext/mbstring/libmbfl/libmbfl.sln | 19 - ext/mbstring/libmbfl/libmbfl.vcproj | 977 --------------- ext/mbstring/libmbfl/mbfl.def | 80 -- ext/mbstring/libmbfl/mbfl.rc | 24 - ext/mbstring/libmbfl/mbfl/Makefile.am | 2 +- ext/mbstring/libmbfl/mbfl/Makefile.bcc32 | 18 - ext/mbstring/libmbfl/mbfl/eaw_table.h | 4 +- ext/mbstring/libmbfl/mbfl/mbfilter.c | 757 +++++------- ext/mbstring/libmbfl/mbfl/mbfilter.h | 4 +- ext/mbstring/libmbfl/mbfl/mbfilter_8bit.h | 2 +- ext/mbstring/libmbfl/mbfl/mbfilter_pass.h | 4 +- ext/mbstring/libmbfl/mbfl/mbfilter_wchar.h | 2 +- ext/mbstring/libmbfl/mbfl/mbfl_convert.c | 176 ++- ext/mbstring/libmbfl/mbfl/mbfl_convert.h | 7 + ext/mbstring/libmbfl/mbfl/mbfl_encoding.c | 14 +- ext/mbstring/libmbfl/mbfl/mbfl_encoding.h | 9 +- ext/mbstring/libmbfl/mbfl/mbfl_filter_output.c | 12 +- ext/mbstring/libmbfl/mbfl/mbfl_filter_output.h | 1 + ext/mbstring/libmbfl/mbfl/mbfl_ident.c | 9 +- ext/mbstring/libmbfl/mksbcc32.bat | 5 - ext/mbstring/libmbfl/nls/Makefile.am | 1 - ext/mbstring/libmbfl/nls/Makefile.bcc32 | 18 - ext/mbstring/libmbfl/rules.mak.bcc32 | 7 - .../libmbfl/tests/conv_encoding.tests/Makefile.am | 32 +- .../libmbfl/tests/conv_encoding.tests/gen_exp.c | 497 ++++++++ .../libmbfl/tests/conv_encoding.tests/yensign.exp | 99 ++ ext/mbstring/libmbfl/tests/sample.c | 50 +- .../libmbfl/tests/strcut.tests/iso2022jp.exp | 4 +- 55 files changed, 3429 insertions(+), 2708 deletions(-) delete mode 100644 ext/mbstring/libmbfl/Makefile.bcc32 delete mode 100644 ext/mbstring/libmbfl/config.h.bcc32 delete mode 100644 ext/mbstring/libmbfl/config.h.vc6 delete mode 100644 ext/mbstring/libmbfl/filters/Makefile.bcc32 create mode 100644 ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c create mode 100644 ext/mbstring/libmbfl/filters/mbfilter_cp5022x.h create mode 100644 ext/mbstring/libmbfl/filters/mbfilter_sjis_open.c create mode 100644 ext/mbstring/libmbfl/filters/mbfilter_sjis_open.h create mode 100644 ext/mbstring/libmbfl/filters/mbfilter_tl_jisx0201_jisx0208.c create mode 100644 ext/mbstring/libmbfl/filters/mbfilter_tl_jisx0201_jisx0208.h create mode 100644 ext/mbstring/libmbfl/filters/translit_kana_jisx0201_jisx0208.h delete mode 100644 ext/mbstring/libmbfl/libmbfl.dsp delete mode 100644 ext/mbstring/libmbfl/libmbfl.dsw delete mode 100755 ext/mbstring/libmbfl/libmbfl.sln delete mode 100755 ext/mbstring/libmbfl/libmbfl.vcproj delete mode 100644 ext/mbstring/libmbfl/mbfl.def delete mode 100644 ext/mbstring/libmbfl/mbfl.rc delete mode 100644 ext/mbstring/libmbfl/mbfl/Makefile.bcc32 delete mode 100644 ext/mbstring/libmbfl/mksbcc32.bat delete mode 100644 ext/mbstring/libmbfl/nls/Makefile.bcc32 delete mode 100644 ext/mbstring/libmbfl/rules.mak.bcc32 create mode 100644 ext/mbstring/libmbfl/tests/conv_encoding.tests/gen_exp.c create mode 100644 ext/mbstring/libmbfl/tests/conv_encoding.tests/yensign.exp diff --git a/ext/mbstring/config.m4 b/ext/mbstring/config.m4 index 0b24884042..a28af46629 100644 --- a/ext/mbstring/config.m4 +++ b/ext/mbstring/config.m4 @@ -229,17 +229,20 @@ AC_DEFUN([PHP_MBSTRING_SETUP_LIBMBFL], [ libmbfl/filters/mbfilter_cp1251.c libmbfl/filters/mbfilter_cp1252.c libmbfl/filters/mbfilter_cp1254.c + libmbfl/filters/mbfilter_cp5022x.c + libmbfl/filters/mbfilter_cp51932.c + libmbfl/filters/mbfilter_cp850.c libmbfl/filters/mbfilter_cp866.c libmbfl/filters/mbfilter_cp932.c libmbfl/filters/mbfilter_cp936.c libmbfl/filters/mbfilter_euc_cn.c libmbfl/filters/mbfilter_euc_jp.c libmbfl/filters/mbfilter_euc_jp_win.c - libmbfl/filters/mbfilter_cp51932.c libmbfl/filters/mbfilter_euc_kr.c libmbfl/filters/mbfilter_euc_tw.c libmbfl/filters/mbfilter_htmlent.c libmbfl/filters/mbfilter_hz.c + libmbfl/filters/mbfilter_iso2022_jp_ms.c libmbfl/filters/mbfilter_iso2022_kr.c libmbfl/filters/mbfilter_iso8859_1.c libmbfl/filters/mbfilter_iso8859_10.c @@ -256,11 +259,12 @@ AC_DEFUN([PHP_MBSTRING_SETUP_LIBMBFL], [ libmbfl/filters/mbfilter_iso8859_8.c libmbfl/filters/mbfilter_iso8859_9.c libmbfl/filters/mbfilter_jis.c - libmbfl/filters/mbfilter_iso2022_jp_ms.c libmbfl/filters/mbfilter_koi8r.c libmbfl/filters/mbfilter_armscii8.c libmbfl/filters/mbfilter_qprint.c libmbfl/filters/mbfilter_sjis.c + libmbfl/filters/mbfilter_sjis_open.c + libmbfl/filters/mbfilter_tl_jisx0201_jisx0208.c libmbfl/filters/mbfilter_ucs2.c libmbfl/filters/mbfilter_ucs4.c libmbfl/filters/mbfilter_uhc.c @@ -271,7 +275,6 @@ AC_DEFUN([PHP_MBSTRING_SETUP_LIBMBFL], [ libmbfl/filters/mbfilter_utf8.c libmbfl/filters/mbfilter_uuencode.c libmbfl/filters/mbfilter_koi8u.c - libmbfl/filters/mbfilter_cp850.c libmbfl/mbfl/mbfilter.c libmbfl/mbfl/mbfilter_8bit.c libmbfl/mbfl/mbfilter_pass.c diff --git a/ext/mbstring/config.w32 b/ext/mbstring/config.w32 index 0675840176..31ade6d9a7 100644 --- a/ext/mbstring/config.w32 +++ b/ext/mbstring/config.w32 @@ -34,7 +34,9 @@ if (PHP_MBSTRING != "no") { mbfilter_ucs4.c mbfilter_uhc.c mbfilter_utf16.c mbfilter_utf32.c \ mbfilter_utf7.c mbfilter_utf7imap.c mbfilter_utf8.c \ mbfilter_koi8u.c mbfilter_cp1254.c \ - mbfilter_uuencode.c mbfilter_armscii8.c mbfilter_cp850.c ", "mbstring"); + mbfilter_uuencode.c mbfilter_armscii8.c mbfilter_cp850.c \ + mbfilter_cp5022x.c mbfilter_sjis_open.c \ + mbfilter_tl_jisx0201_jisx0208.c", "mbstring"); ADD_SOURCES("ext/mbstring/libmbfl/mbfl", "mbfilter.c mbfilter_8bit.c \ mbfilter_pass.c mbfilter_wchar.c mbfl_convert.c mbfl_encoding.c \ diff --git a/ext/mbstring/libmbfl/Makefile.am b/ext/mbstring/libmbfl/Makefile.am index 070a7fcc62..7886104865 100644 --- a/ext/mbstring/libmbfl/Makefile.am +++ b/ext/mbstring/libmbfl/Makefile.am @@ -2,8 +2,6 @@ AUTOMAKE_OPTIONS=dejagnu DEJATOOL=conv_encoding conv_kana strwidth strcut RUNTESTDEFAULTFLAGS=--tool $$tool --srcdir "$$srcdir"/tests LANG=C -EXTRA_DIST=AUTHORS DISCLAIMER LICENSE Makefile.bcc32 \ - config.h.bcc32 config.h.vc6 \ - libmbfl.dsp libmbfl.dsw libmbfl.sln libmbfl.vcproj mbfl.rc \ - mksbcc32.bat rules.mak.bcc32 +EXTRA_DIST=AUTHORS DISCLAIMER LICENSE buildconf SUBDIRS = nls filters mbfl tests +CLEANFILES=*.log *.sum diff --git a/ext/mbstring/libmbfl/Makefile.bcc32 b/ext/mbstring/libmbfl/Makefile.bcc32 deleted file mode 100644 index 6dde6b8384..0000000000 --- a/ext/mbstring/libmbfl/Makefile.bcc32 +++ /dev/null @@ -1,33 +0,0 @@ -!include rules.mak.bcc32 - -SUBDIRS=mbfl nls filters - -all: mbfl.dll mbfl.lib - -mbfl.lib: mbfl.dll - implib -a mbfl.lib mbfl.dll - -mbfl.dll: compile mbfl.res - @if exist linker.rsp del linker.rsp - @for %i in ($(SUBDIRS)) do @for %j in (%i\*.obj) do @echo %j+ >> linker.rsp - ilink32 $(LDFLAGS) @linker.rsp,mbfl.dll,,$(LIBS),,mbfl.res - del linker.rsp - -mbfl.res: mbfl.rc - brcc32 -r mbfl.rc - -prepare: config.h.bcc32 - @if exist linker.rsp del linker.rsp - @copy config.h.bcc32 config.h - -clean: - @for %i in ($(SUBDIRS)) do $(COMSPEC) /C mksbcc32.bat $(MAKE) %i clean - @if exist linker.rsp del linker.rsp - @if exist mbfl.RES del mbfl.RES - @if exist mbfl.dll del mbfl.dll - @if exist mbfl.lib del mbfl.lib - @if exist config.h del config.h - -compile: prepare - @for %i in ($(SUBDIRS)) do $(COMSPEC) /C mksbcc32.bat $(MAKE) %i - diff --git a/ext/mbstring/libmbfl/config.h.bcc32 b/ext/mbstring/libmbfl/config.h.bcc32 deleted file mode 100644 index 7458e441eb..0000000000 --- a/ext/mbstring/libmbfl/config.h.bcc32 +++ /dev/null @@ -1,12 +0,0 @@ -#define HAVE_STDIO_H 1 -#define HAVE_STDLIB_H 1 -#define HAVE_STDDEF_H 1 -#define HAVE_ASSERT_H 1 -#define HAVE_MEMORY_H 1 -/* #undef HAVE_STRINGS_H */ -#define HAVE_STRING_H 1 -/* #undef HAVE_STRCASECMP */ -#define HAVE_STRICMP 1 -#define HAVE_WIN32_NATIVE_THREAD 1 -#define USE_WIN32_NATIVE_THREAD 1 -#define ENABLE_THREADS 1 diff --git a/ext/mbstring/libmbfl/config.h.in b/ext/mbstring/libmbfl/config.h.in index 8e7493257e..bded33ae79 100644 --- a/ext/mbstring/libmbfl/config.h.in +++ b/ext/mbstring/libmbfl/config.h.in @@ -50,6 +50,10 @@ /* Define to 1 if you have the header file. */ #undef HAVE_UNISTD_H +/* Define to the sub-directory in which libtool stores uninstalled libraries. + */ +#undef LT_OBJDIR + /* Name of package */ #undef PACKAGE @@ -65,6 +69,9 @@ /* Define to the one symbol short name of this package. */ #undef PACKAGE_TARNAME +/* Define to the home page for this package. */ +#undef PACKAGE_URL + /* Define to the version of this package. */ #undef PACKAGE_VERSION @@ -83,5 +90,5 @@ /* Define to rpl_realloc if the replacement function should be used. */ #undef realloc -/* Define to `unsigned' if does not define. */ +/* Define to `unsigned int' if does not define. */ #undef size_t diff --git a/ext/mbstring/libmbfl/config.h.vc6 b/ext/mbstring/libmbfl/config.h.vc6 deleted file mode 100644 index 7458e441eb..0000000000 --- a/ext/mbstring/libmbfl/config.h.vc6 +++ /dev/null @@ -1,12 +0,0 @@ -#define HAVE_STDIO_H 1 -#define HAVE_STDLIB_H 1 -#define HAVE_STDDEF_H 1 -#define HAVE_ASSERT_H 1 -#define HAVE_MEMORY_H 1 -/* #undef HAVE_STRINGS_H */ -#define HAVE_STRING_H 1 -/* #undef HAVE_STRCASECMP */ -#define HAVE_STRICMP 1 -#define HAVE_WIN32_NATIVE_THREAD 1 -#define USE_WIN32_NATIVE_THREAD 1 -#define ENABLE_THREADS 1 diff --git a/ext/mbstring/libmbfl/configure.in b/ext/mbstring/libmbfl/configure.in index 9e0a1f7895..f7fd58f857 100644 --- a/ext/mbstring/libmbfl/configure.in +++ b/ext/mbstring/libmbfl/configure.in @@ -1,6 +1,6 @@ # Process this file with autoconf to produce a configure script. AC_INIT(mbfl/mbfilter.c) -AM_INIT_AUTOMAKE(libmbfl, 1.0.2) +AM_INIT_AUTOMAKE(libmbfl, 1.1.0) AC_CONFIG_SRCDIR(mbfl/mbfilter.c) AM_CONFIG_HEADER(config.h) diff --git a/ext/mbstring/libmbfl/filters/Makefile.am b/ext/mbstring/libmbfl/filters/Makefile.am index 804df823c4..99df73b8dc 100644 --- a/ext/mbstring/libmbfl/filters/Makefile.am +++ b/ext/mbstring/libmbfl/filters/Makefile.am @@ -1,4 +1,4 @@ -EXTRA_DIST=Makefile.bcc32 mk_sb_tbl.awk +EXTRA_DIST=mk_sb_tbl.awk noinst_LTLIBRARIES=libmbfl_filters.la INCLUDES=-I../mbfl libmbfl_filters_la_LDFLAGS=-version-info $(SHLIB_VERSION) @@ -32,6 +32,7 @@ libmbfl_filters_la_SOURCES=mbfilter_cp936.c \ mbfilter_uuencode.c \ mbfilter_base64.c \ mbfilter_sjis.c \ + mbfilter_sjis_open.c \ mbfilter_7bit.c \ mbfilter_qprint.c \ mbfilter_ucs4.c \ @@ -54,6 +55,8 @@ libmbfl_filters_la_SOURCES=mbfilter_cp936.c \ mbfilter_koi8u.c \ mbfilter_armscii8.c \ mbfilter_cp850.c \ + mbfilter_cp5022x.c \ + mbfilter_tl_jisx0201_jisx0208.c \ html_entities.c \ cp932_table.h \ html_entities.h \ @@ -98,6 +101,7 @@ libmbfl_filters_la_SOURCES=mbfilter_cp936.c \ mbfilter_armscii8.h \ mbfilter_qprint.h \ mbfilter_sjis.h \ + mbfilter_sjis_open.h \ mbfilter_ucs2.h \ mbfilter_ucs4.h \ mbfilter_uhc.h \ @@ -107,8 +111,10 @@ libmbfl_filters_la_SOURCES=mbfilter_cp936.c \ mbfilter_utf7imap.h \ mbfilter_utf8.h \ mbfilter_uuencode.h \ + mbfilter_cp5022x.h \ mbfilter_cp51932.h \ mbfilter_cp850.h \ + mbfilter_tl_jisx0201_jisx0208.h \ unicode_prop.h \ unicode_table_big5.h \ unicode_table_cns11643.h \ @@ -136,7 +142,8 @@ libmbfl_filters_la_SOURCES=mbfilter_cp936.c \ unicode_table_koi8u.h \ unicode_table_armscii8.h \ unicode_table_cp850.h \ - unicode_table_uhc.h + unicode_table_uhc.h \ + translit_kana_jisx0201_jisx0208.h mbfilter_iso8859_2.c: unicode_table_iso8859_2.h diff --git a/ext/mbstring/libmbfl/filters/Makefile.bcc32 b/ext/mbstring/libmbfl/filters/Makefile.bcc32 deleted file mode 100644 index 841c09632a..0000000000 --- a/ext/mbstring/libmbfl/filters/Makefile.bcc32 +++ /dev/null @@ -1,59 +0,0 @@ -!include ..\rules.mak.bcc32 -INCLUDES=$(INCLUDES) -I../mbfl -OBJS=mbfilter_cp936.obj \ - mbfilter_hz.obj \ - mbfilter_euc_tw.obj \ - mbfilter_big5.obj \ - mbfilter_euc_jp.obj \ - mbfilter_jis.obj \ - mbfilter_iso8859_1.obj \ - mbfilter_iso8859_2.obj \ - mbfilter_cp1252.obj \ - mbfilter_cp1251.obj \ - mbfilter_cp1254.obj \ - mbfilter_ascii.obj \ - mbfilter_iso8859_3.obj \ - mbfilter_iso8859_4.obj \ - mbfilter_iso8859_5.obj \ - mbfilter_iso8859_6.obj \ - mbfilter_iso8859_7.obj \ - mbfilter_iso8859_8.obj \ - mbfilter_iso8859_9.obj \ - mbfilter_iso8859_10.obj \ - mbfilter_iso8859_13.obj \ - mbfilter_iso8859_14.obj \ - mbfilter_iso8859_15.obj \ - mbfilter_iso8859_16.obj \ - mbfilter_htmlent.obj \ - mbfilter_byte2.obj \ - mbfilter_byte4.obj \ - mbfilter_uuencode.obj \ - mbfilter_base64.obj \ - mbfilter_sjis.obj \ - mbfilter_7bit.obj \ - mbfilter_qprint.obj \ - mbfilter_ucs4.obj \ - mbfilter_ucs2.obj \ - mbfilter_utf32.obj \ - mbfilter_utf16.obj \ - mbfilter_utf8.obj \ - mbfilter_utf7.obj \ - mbfilter_utf7imap.obj \ - mbfilter_euc_jp_win.obj \ - mbfilter_cp932.obj \ - mbfilter_euc_cn.obj \ - mbfilter_euc_kr.obj \ - mbfilter_uhc.obj \ - mbfilter_iso2022_kr.obj \ - mbfilter_cp866.obj \ - mbfilter_koi8r.obj \ - mbfilter_koi8u.obj \ - html_entities.obj \ - mbfilter_armscii8.obj \ - mbfilter_cp850.obj - -all: $(OBJS) - -clean: - @for %i in ($(OBJS)) do @if exist %i del %i - diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c b/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c new file mode 100644 index 0000000000..587bff88cf --- /dev/null +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c @@ -0,0 +1,1299 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: Moriyoshi Koizumi + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "mbfilter.h" +#include "mbfilter_cp5022x.h" +#include "mbfilter_jis.h" +#include "mbfilter_tl_jisx0201_jisx0208.h" + +#include "unicode_table_cp932_ext.h" +#include "unicode_table_jis.h" +#include "cp932_table.h" + +typedef struct _mbfl_filt_conv_wchar_cp50220_ctx { + mbfl_filt_tl_jisx0201_jisx0208_param tl_param; + mbfl_convert_filter last; +} mbfl_filt_conv_wchar_cp50220_ctx; + +static int mbfl_filt_ident_jis_ms(int c, mbfl_identify_filter *filter); +static int mbfl_filt_ident_cp50220(int c, mbfl_identify_filter *filter); +static int mbfl_filt_ident_cp50221(int c, mbfl_identify_filter *filter); +static int mbfl_filt_ident_cp50222(int c, mbfl_identify_filter *filter); +static void mbfl_filt_conv_wchar_cp50220_ctor(mbfl_convert_filter *filt); +static void mbfl_filt_conv_wchar_cp50220_dtor(mbfl_convert_filter *filt); +static void mbfl_filt_conv_wchar_cp50220_copy(mbfl_convert_filter *src, mbfl_convert_filter *dest); + +const mbfl_encoding mbfl_encoding_jis_ms = { + mbfl_no_encoding_jis_ms, + "JIS-ms", + "ISO-2022-JP", + NULL, + NULL, + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE +}; + +const mbfl_encoding mbfl_encoding_cp50220 = { + mbfl_no_encoding_cp50220, + "CP50220", + "ISO-2022-JP", + (const char *(*)[])NULL, + NULL, + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE +}; + +const mbfl_encoding mbfl_encoding_cp50220raw = { + mbfl_no_encoding_cp50220raw, + "CP50220raw", + "ISO-2022-JP", + (const char *(*)[])NULL, + NULL, + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE +}; + +const mbfl_encoding mbfl_encoding_cp50221 = { + mbfl_no_encoding_cp50221, + "CP50221", + "ISO-2022-JP", + NULL, + NULL, + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE +}; + +const mbfl_encoding mbfl_encoding_cp50222 = { + mbfl_no_encoding_cp50222, + "CP50222", + "ISO-2022-JP", + NULL, + NULL, + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE +}; + +const struct mbfl_identify_vtbl vtbl_identify_jis_ms = { + mbfl_no_encoding_jis_ms, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_jis_ms +}; + +const struct mbfl_identify_vtbl vtbl_identify_cp50220 = { + mbfl_no_encoding_cp50220, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_cp50220 +}; + +const struct mbfl_identify_vtbl vtbl_identify_cp50220raw = { + mbfl_no_encoding_cp50220raw, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_cp50220 +}; + +const struct mbfl_identify_vtbl vtbl_identify_cp50221 = { + mbfl_no_encoding_cp50221, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_cp50221 +}; + +const struct mbfl_identify_vtbl vtbl_identify_cp50222 = { + mbfl_no_encoding_cp50222, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_cp50222 +}; + +const struct mbfl_convert_vtbl vtbl_jis_ms_wchar = { + mbfl_no_encoding_jis_ms, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_jis_ms_wchar, + mbfl_filt_conv_common_flush, +}; + +const struct mbfl_convert_vtbl vtbl_wchar_jis_ms = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_jis_ms, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_wchar_jis_ms, + mbfl_filt_conv_any_jis_flush +}; + +const struct mbfl_convert_vtbl vtbl_cp50220_wchar = { + mbfl_no_encoding_cp50220, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_jis_ms_wchar, + mbfl_filt_conv_common_flush +}; + +const struct mbfl_convert_vtbl vtbl_wchar_cp50220 = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_cp50220, + mbfl_filt_conv_wchar_cp50220_ctor, + mbfl_filt_conv_wchar_cp50220_dtor, + mbfl_filt_conv_wchar_cp50221, + mbfl_filt_conv_any_jis_flush, + mbfl_filt_conv_wchar_cp50220_copy +}; + +const struct mbfl_convert_vtbl vtbl_cp50220raw_wchar = { + mbfl_no_encoding_cp50220raw, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_jis_ms_wchar, + mbfl_filt_conv_common_flush +}; + +const struct mbfl_convert_vtbl vtbl_wchar_cp50220raw = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_cp50220raw, + mbfl_filt_conv_wchar_cp50220_ctor, + mbfl_filt_conv_wchar_cp50220_dtor, + mbfl_filt_conv_wchar_cp50220raw, + mbfl_filt_conv_any_jis_flush, + mbfl_filt_conv_wchar_cp50220_copy +}; + +const struct mbfl_convert_vtbl vtbl_cp50221_wchar = { + mbfl_no_encoding_cp50221, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_jis_ms_wchar, + mbfl_filt_conv_common_flush +}; + +const struct mbfl_convert_vtbl vtbl_wchar_cp50221 = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_cp50221, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_wchar_cp50221, + mbfl_filt_conv_any_jis_flush +}; + +const struct mbfl_convert_vtbl vtbl_cp50222_wchar = { + mbfl_no_encoding_cp50222, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_jis_ms_wchar, + mbfl_filt_conv_common_flush +}; + +const struct mbfl_convert_vtbl vtbl_wchar_cp50222 = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_cp50222, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_wchar_cp50222, + mbfl_filt_conv_wchar_cp50222_flush +}; + +#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) + +/* + * JIS-ms => wchar + */ +int +mbfl_filt_conv_jis_ms_wchar(int c, mbfl_convert_filter *filter) +{ + int c1, s, w; + +retry: + switch (filter->status & 0xf) { +/* case 0x00: ASCII */ +/* case 0x10: X 0201 latin */ +/* case 0x20: X 0201 kana */ +/* case 0x80: X 0208 */ +/* case 0x90: X 0212 */ + case 0: + if (c == 0x1b) { + filter->status += 2; + } else if (c == 0x0e) { /* "kana in" */ + filter->status = 0x20; + } else if (c == 0x0f) { /* "kana out" */ + filter->status = 0; + } else if (filter->status == 0x10 && c == 0x5c) { /* YEN SIGN */ + CK((*filter->output_function)(0xa5, filter->data)); + } else if (filter->status == 0x10 && c == 0x7e) { /* OVER LINE */ + CK((*filter->output_function)(0x203e, filter->data)); + } else if (filter->status == 0x20 && c > 0x20 && c < 0x60) { /* kana */ + CK((*filter->output_function)(0xff40 + c, filter->data)); + } else if ((filter->status == 0x80 || filter->status == 0x90) && c > 0x20 && c < 0x7f) { /* kanji first char */ + filter->cache = c; + filter->status += 1; + } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ + CK((*filter->output_function)(c, filter->data)); + } else if (c > 0xa0 && c < 0xe0) { /* GR kana */ + CK((*filter->output_function)(0xfec0 + c, filter->data)); + } else { + w = c & MBFL_WCSGROUP_MASK; + w |= MBFL_WCSGROUP_THROUGH; + CK((*filter->output_function)(w, filter->data)); + } + break; + +/* case 0x81: X 0208 second char */ +/* case 0x91: X 0212 second char */ + case 1: + filter->status &= ~0xf; + c1 = filter->cache; + if (c > 0x20 && c < 0x7f) { + s = (c1 - 0x21)*94 + c - 0x21; + if (filter->status == 0x80) { + if (s >= 0 && s < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[s]; + } else if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { + w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; + } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { + w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; + } else if (s >= cp932ext3_ucs_table_min && s < cp932ext2_ucs_table_max) { + w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min]; + } else if (s >= 94 * 94 && s < 114 * 94) { + /* user-defined => PUA (Microsoft extended) */ + w = (s & 0xff) + ((s >> 8) - 94) * 94 + 0xe000; + } else if (s >= 212 * 94 && s < 222 * 94) { + /* user-defined => PUA (G3 85 - 94 Ku) */ + w = (s & 0xff) + ((s >> 8) - 212) * 94 + 0xe000 + 10 * 94; + } else { + w = 0; + } + if (w <= 0) { + w = (c1 << 8) | c; + w &= MBFL_WCSPLANE_MASK; + w |= MBFL_WCSPLANE_JIS0208; + } + } else { + if (s >= 0 && s < jisx0212_ucs_table_size) { + w = jisx0212_ucs_table[s]; + } else { + w = 0; + } + if (w <= 0) { + w = (c1 << 8) | c; + w &= MBFL_WCSPLANE_MASK; + w |= MBFL_WCSPLANE_JIS0212; + } + } + CK((*filter->output_function)(w, filter->data)); + } else if (c == 0x1b) { + filter->status += 2; + } else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */ + CK((*filter->output_function)(c, filter->data)); + } else { + w = (c1 << 8) | c; + w &= MBFL_WCSGROUP_MASK; + w |= MBFL_WCSGROUP_THROUGH; + CK((*filter->output_function)(w, filter->data)); + } + break; + + /* ESC */ +/* case 0x02: */ +/* case 0x12: */ +/* case 0x22: */ +/* case 0x82: */ +/* case 0x92: */ + case 2: + if (c == 0x24) { /* '$' */ + filter->status++; + } else if (c == 0x28) { /* '(' */ + filter->status += 3; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(0x1b, filter->data)); + goto retry; + } + break; + + /* ESC $ */ +/* case 0x03: */ +/* case 0x13: */ +/* case 0x23: */ +/* case 0x83: */ +/* case 0x93: */ + case 3: + if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ + filter->status = 0x80; + } else if (c == 0x28) { /* '(' */ + filter->status++; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(0x1b, filter->data)); + CK((*filter->output_function)(0x24, filter->data)); + goto retry; + } + break; + + /* ESC $ ( */ +/* case 0x04: */ +/* case 0x14: */ +/* case 0x24: */ +/* case 0x84: */ +/* case 0x94: */ + case 4: + if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ + filter->status = 0x80; + } else if (c == 0x44) { /* 'D' */ + filter->status = 0x90; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(0x1b, filter->data)); + CK((*filter->output_function)(0x24, filter->data)); + CK((*filter->output_function)(0x28, filter->data)); + goto retry; + } + break; + + /* ESC ( */ +/* case 0x05: */ +/* case 0x15: */ +/* case 0x25: */ +/* case 0x85: */ +/* case 0x95: */ + case 5: + if (c == 0x42 || c == 0x48) { /* 'B' or 'H' */ + filter->status = 0; + } else if (c == 0x4a) { /* 'J' */ + filter->status = 0x10; + } else if (c == 0x49) { /* 'I' */ + filter->status = 0x20; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(0x1b, filter->data)); + CK((*filter->output_function)(0x28, filter->data)); + goto retry; + } + break; + + default: + filter->status = 0; + break; + } + + return c; +} + +/* + * wchar => JIS + */ +int +mbfl_filt_conv_wchar_jis_ms(int c, mbfl_convert_filter *filter) +{ + int c1, s; + + s = 0; + if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { + s = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { + s = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; + } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { + s = ucs_i_jis_table[c - ucs_i_jis_table_min]; + } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { + s = ucs_r_jis_table[c - ucs_r_jis_table_min]; + } else if (c >= 0xe000 && c < (0xe000 + 10 * 94)) { + /* PUE => Microsoft extended (pseudo 95ku - 114ku) */ + /* See http://www.opengroup.or.jp/jvc/cde/ucs-conv.html#ch4_2 */ + s = c - 0xe000; + s = (s / 94 + 0x75) << 8 | (s % 94 + 0x21); + } else if (c >= (0xe000 + 10 * 94) && c <= (0xe000 + 20 * 94)) { + /* PUE => JISX0212 user-defined (G3 85ku - 94ku) */ + /* See http://www.opengroup.or.jp/jvc/cde/ucs-conv.html#ch4_2 */ + s = c - (0xe000 + 10 * 94); + s = (s / 94 + 0xf5) << 8 | (s % 94 + 0xa1); + } + + /* do some transliteration */ + if (s <= 0) { + c1 = c & ~MBFL_WCSPLANE_MASK; + if (c1 == MBFL_WCSPLANE_JIS0208) { + s = c & MBFL_WCSPLANE_MASK; + } else if (c1 == MBFL_WCSPLANE_JIS0212) { + s = c & MBFL_WCSPLANE_MASK; + s |= 0x8080; + } else if (c == 0xa5) { /* YEN SIGN */ + s = 0x1005c; + } else if (c == 0x203e) { /* OVER LINE */ + s = 0x1007e; + } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x2140; + } else if (c == 0xff5e) { /* FULLWIDTH TILDE */ + s = 0x2141; + } else if (c == 0x2225) { /* PARALLEL TO */ + s = 0x2142; + } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ + s = 0x215d; + } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ + s = 0x2171; + } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ + s = 0x2172; + } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ + s = 0x224c; + } + } + if (s <= 0 || s >= 0x8080 && s < 0x10000) { + int i; + s = -1; + + for (i = 0; + i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { + const int oh = cp932ext1_ucs_table_min / 94; + + if (c == cp932ext1_ucs_table[i]) { + s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21); + break; + } + } + + if (s < 0) { + const int oh = cp932ext2_ucs_table_min / 94; + const int cp932ext2_ucs_table_size = + cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; + for (i = 0; i < cp932ext2_ucs_table_size; i++) { + if (c == cp932ext2_ucs_table[i]) { + s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21); + break; + } + } + } + + if (s < 0) { + const int cp932ext3_ucs_table_size = + cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; + const int limit = cp932ext3_ucs_table_size > + cp932ext3_eucjp_table_size ? + cp932ext3_eucjp_table_size: + cp932ext3_ucs_table_size; + for (i = 0; i < limit; i++) { + if (c == cp932ext3_ucs_table[i]) { + s = cp932ext3_eucjp_table[i]; + break; + } + } + } + + if (c == 0) { + s = 0; + } else if (s <= 0) { + s = -1; + } + } + + if (s >= 0) { + if (s < 0x80) { /* ASCII */ + if ((filter->status & 0xff00) != 0) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + } + filter->status = 0; + CK((*filter->output_function)(s, filter->data)); + } else if (s < 0x100) { /* kana */ + if ((filter->status & 0xff00) != 0x100) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x49, filter->data)); /* 'I' */ + } + filter->status = 0x100; + CK((*filter->output_function)(s & 0x7f, filter->data)); + } else if (s < 0x8080) { /* X 0208 */ + if ((filter->status & 0xff00) != 0x200) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x24, filter->data)); /* '$' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + } + filter->status = 0x200; + CK((*filter->output_function)((s >> 8) & 0x7f, filter->data)); + CK((*filter->output_function)(s & 0x7f, filter->data)); + } else if (s < 0x10000) { /* X 0212 */ + if ((filter->status & 0xff00) != 0x300) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x24, filter->data)); /* '$' */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x44, filter->data)); /* 'D' */ + } + filter->status = 0x300; + CK((*filter->output_function)((s >> 8) & 0x7f, filter->data)); + CK((*filter->output_function)(s & 0x7f, filter->data)); + } else { /* X 0201 latin */ + if ((filter->status & 0xff00) != 0x400) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x4a, filter->data)); /* 'J' */ + } + filter->status = 0x400; + CK((*filter->output_function)(s & 0x7f, filter->data)); + } + } else { + if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + } + + return c; +} + +/* + * wchar => CP50220 + */ +static void +mbfl_filt_conv_wchar_cp50220_ctor(mbfl_convert_filter *filt) +{ + mbfl_filt_conv_wchar_cp50220_ctx *ctx; + + mbfl_filt_conv_common_ctor(filt); + + ctx = mbfl_malloc(sizeof(mbfl_filt_conv_wchar_cp50220_ctx)); + if (ctx == NULL) { + mbfl_filt_conv_common_dtor(filt); + return; + } + + ctx->tl_param.mode = MBFL_FILT_TL_HAN2ZEN_KATAKANA | MBFL_FILT_TL_HAN2ZEN_GLUE; + + ctx->last = *filt; + ctx->last.opaque = ctx; + ctx->last.data = filt->data; + filt->filter_function = vtbl_tl_jisx0201_jisx0208.filter_function; + filt->filter_flush = vtbl_tl_jisx0201_jisx0208.filter_flush; + filt->output_function = (int(*)(int, void *))ctx->last.filter_function; + filt->flush_function = (int(*)(void *))ctx->last.filter_flush; + filt->data = &ctx->last; + filt->opaque = ctx; + vtbl_tl_jisx0201_jisx0208.filter_ctor(filt); +} + +static void +mbfl_filt_conv_wchar_cp50220_copy(mbfl_convert_filter *src, mbfl_convert_filter *dest) +{ + mbfl_filt_conv_wchar_cp50220_ctx *ctx; + + *dest = *src; + ctx = mbfl_malloc(sizeof(mbfl_filt_conv_wchar_cp50220_ctx)); + if (ctx != NULL) { + *ctx = *(mbfl_filt_conv_wchar_cp50220_ctx*)src->opaque; + } + + dest->opaque = ctx; + dest->data = &ctx->last; +} + +static void +mbfl_filt_conv_wchar_cp50220_dtor(mbfl_convert_filter *filt) +{ + vtbl_tl_jisx0201_jisx0208.filter_dtor(filt); + + if (filt->opaque != NULL) { + mbfl_free(filt->opaque); + } + + mbfl_filt_conv_common_dtor(filt); +} + +/* + * wchar => cp50220raw + */ +int +mbfl_filt_conv_wchar_cp50220raw(int c, mbfl_convert_filter *filter) +{ + if (c & MBFL_WCSPLANE_JIS0208) { + const int s = c & MBFL_WCSPLANE_MASK; + + if ((filter->status & 0xff00) != 0x200) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x24, filter->data)); /* '$' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + filter->status = 0x200; + } + CK((*filter->output_function)((s >> 8) & 0x7f, filter->data)); + CK((*filter->output_function)(s & 0x7f, filter->data)); + return c; + } else { + return mbfl_filt_conv_wchar_cp50221(c, filter); + } +} + +/* + * wchar => CP50221 + */ +int +mbfl_filt_conv_wchar_cp50221(int c, mbfl_convert_filter *filter) +{ + int s = 0; + + if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { + s = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { + s = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; + } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { + s = ucs_i_jis_table[c - ucs_i_jis_table_min]; + } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { + s = ucs_r_jis_table[c - ucs_r_jis_table_min]; + } else if (c >= 0xe000 && c < (0xe000 + 10 * 94)) { + /* PUE => Microsoft extended */ + /* See http://www.opengroup.or.jp/jvc/cde/ucs-conv.html#ch4_2 */ + s = c - 0xe000; + s = (s / 94 + 0x75) << 8 | (s % 94 + 0x21); + } else if (c >= (0xe000 + 10 * 94) && c <= (0xe000 + 20 * 94)) { + /* PUE => JISX0212 user-defined (G3 85ku - 94ku) */ + /* See http://www.opengroup.or.jp/jvc/cde/ucs-conv.html#ch4_2 */ + s = c - (0xe000 + 10 * 94); + s = (s / 94 + 0xf5) << 8 | (s % 94 + 0xa1); + } + + if (s <= 0) { + if (c == 0xa5) { /* YEN SIGN */ + s = 0x1005c; + } else if (c == 0x203e) { /* OVER LINE */ + s = 0x1007e; + } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x2140; + } else if (c == 0xff5e) { /* FULLWIDTH TILDE */ + s = 0x2141; + } else if (c == 0x2225) { /* PARALLEL TO */ + s = 0x2142; + } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ + s = 0x215d; + } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ + s = 0x2171; + } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ + s = 0x2172; + } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ + s = 0x224c; + } + } + if (s <= 0 || s >= 0x8080 && s < 0x10000) { + int i; + s = -1; + + for (i = 0; + i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; + i++) { + const int oh = cp932ext1_ucs_table_min / 94; + + if (c == cp932ext1_ucs_table[i]) { + s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21); + break; + } + } + + if (s < 0) { + const int oh = cp932ext2_ucs_table_min / 94; + const int cp932ext2_ucs_table_size = + cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; + for (i = 0; i < cp932ext2_ucs_table_size; i++) { + if (c == cp932ext2_ucs_table[i]) { + s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21); + break; + } + } + } + + if (s < 0) { + const int cp932ext3_ucs_table_size = + cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; + const int limit = cp932ext3_ucs_table_size > + cp932ext3_eucjp_table_size ? + cp932ext3_eucjp_table_size: + cp932ext3_ucs_table_size; + for (i = 0; i < limit; i++) { + if (c == cp932ext3_ucs_table[i]) { + s = cp932ext3_eucjp_table[i]; + break; + } + } + } + + if (c == 0) { + s = 0; + } else if (s <= 0) { + s = -1; + } + } + + if (s >= 0) { + if (s < 0x80) { /* ASCII */ + if ((filter->status & 0xff00) != 0) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + filter->status = 0; + } + CK((*filter->output_function)(s, filter->data)); + } else if (s >= 0xa0 && s < 0xe0) { /* X 0201 kana */ + if ((filter->status & 0xff00) != 0x500) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x49, filter->data)); /* 'I' */ + filter->status = 0x500; + } + CK((*filter->output_function)(s - 0x80, filter->data)); + } else if (s < 0x8080) { /* X 0208 */ + if ((filter->status & 0xff00) != 0x200) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x24, filter->data)); /* '$' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + filter->status = 0x200; + } + CK((*filter->output_function)((s >> 8) & 0x7f, filter->data)); + CK((*filter->output_function)(s & 0x7f, filter->data)); + } else if (s < 0x10000) { /* X0212 */ + if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + } else { /* X 0201 latin */ + if ((filter->status & 0xff00) != 0x400) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x4a, filter->data)); /* 'J' */ + } + filter->status = 0x400; + CK((*filter->output_function)(s & 0x7f, filter->data)); + } + } else { + if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + } + + return c; +} + +/* + * wchar => CP50222 + */ +int +mbfl_filt_conv_wchar_cp50222(int c, mbfl_convert_filter *filter) +{ + int s; + + s = 0; + + if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { + s = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { + s = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; + } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { + s = ucs_i_jis_table[c - ucs_i_jis_table_min]; + } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { + s = ucs_r_jis_table[c - ucs_r_jis_table_min]; + } else if (c >= 0xe000 && c < (0xe000 + 10 * 94)) { + /* PUE => Microsoft extended */ + /* See http://www.opengroup.or.jp/jvc/cde/ucs-conv.html#ch4_2 */ + s = c - 0xe000; + s = (s / 94 + 0x75) << 8 | (s % 94 + 0x21); + } else if (c >= (0xe000 + 10 * 94) && c <= (0xe000 + 20 * 94)) { + /* PUE => JISX0212 user-defined (G3 85ku - 94ku) */ + /* See http://www.opengroup.or.jp/jvc/cde/ucs-conv.html#ch4_2 */ + s = c - (0xe000 + 10 * 94); + s = (s / 94 + 0xf5) << 8 | (s % 94 + 0xa1); + } + + if (s <= 0) { + if (c == 0xa5) { /* YEN SIGN */ + s = 0x1005c; + } else if (c == 0x203e) { /* OVER LINE */ + s = 0x1007e; + } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x2140; + } else if (c == 0xff5e) { /* FULLWIDTH TILDE */ + s = 0x2141; + } else if (c == 0x2225) { /* PARALLEL TO */ + s = 0x2142; + } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ + s = 0x215d; + } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ + s = 0x2171; + } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ + s = 0x2172; + } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ + s = 0x224c; + } + } + if (s <= 0 || s >= 0x8080 && s < 0x10000) { + int i; + s = -1; + + for (i = 0; + i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { + const int oh = cp932ext1_ucs_table_min / 94; + + if (c == cp932ext1_ucs_table[i]) { + s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21); + break; + } + } + + if (s <= 0) { + const int oh = cp932ext2_ucs_table_min / 94; + const int cp932ext2_ucs_table_size = + cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; + for (i = 0; i < cp932ext2_ucs_table_size; i++) { + if (c == cp932ext2_ucs_table[i]) { + s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21); + break; + } + } + } + + if (s <= 0) { + const int cp932ext3_ucs_table_size = + cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; + const int limit = cp932ext3_ucs_table_size > + cp932ext3_eucjp_table_size ? + cp932ext3_eucjp_table_size: + cp932ext3_ucs_table_size; + for (i = 0; i < limit; i++) { + if (c == cp932ext3_ucs_table[i]) { + s = cp932ext3_eucjp_table[i]; + break; + } + } + } + + if (c == 0) { + s = 0; + } else if (s <= 0) { + s = -1; + } + } + + if (s >= 0) { + if (s < 0x80) { /* ASCII */ + if ((filter->status & 0xff00) == 0x500) { + CK((*filter->output_function)(0x0f, filter->data)); /* SO */ + filter->status = 0; + } else if ((filter->status & 0xff00) != 0) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + filter->status = 0; + } + CK((*filter->output_function)(s, filter->data)); + } else if (s >= 0xa0 && s < 0xe0) { /* X 0201 kana */ + if ((filter->status & 0xff00) != 0x500) { + CK((*filter->output_function)(0x0e, filter->data)); /* SI */ + filter->status = 0x500; + } + CK((*filter->output_function)(s - 0x80, filter->data)); + } else if (s < 0x8080) { /* X 0208 */ + if ((filter->status & 0xff00) == 0x500) { + CK((*filter->output_function)(0x0f, filter->data)); /* SO */ + filter->status = 0; + } + if ((filter->status & 0xff00) != 0x200) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x24, filter->data)); /* '$' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + filter->status = 0x200; + } + CK((*filter->output_function)((s >> 8) & 0x7f, filter->data)); + CK((*filter->output_function)(s & 0x7f, filter->data)); + } else if (s < 0x10000) { /* X0212 */ + if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + } else { /* X 0201 latin */ + if ((filter->status & 0xff00) == 0x500) { + CK((*filter->output_function)(0x0f, filter->data)); /* SO */ + filter->status = 0; + } + if ((filter->status & 0xff00) != 0x400) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x4a, filter->data)); /* 'J' */ + } + filter->status = 0x400; + CK((*filter->output_function)(s & 0x7f, filter->data)); + } + } else { + if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + } + + return c; +} + +int +mbfl_filt_conv_wchar_cp50222_flush(mbfl_convert_filter *filter) +{ + /* back to latin */ + if ((filter->status & 0xff00) == 0x500) { + CK((*filter->output_function)(0x0f, filter->data)); /* SO */ + } else if ((filter->status & 0xff00) != 0) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + } + filter->status &= 0xff; + + if (filter->flush_function != NULL) { + return (*filter->flush_function)(filter->data); + } + + return 0; +} + + +static int mbfl_filt_ident_jis_ms(int c, mbfl_identify_filter *filter) +{ +retry: + switch (filter->status & 0xf) { +/* case 0x00: ASCII */ +/* case 0x10: X 0201 latin */ +/* case 0x20: X 0201 kana */ +/* case 0x80: X 0208 */ +/* case 0x90: X 0212 */ + case 0: + if (c == 0x1b) { + filter->status += 2; + } else if (c == 0x0e) { /* "kana in" */ + filter->status = 0x20; + } else if (c == 0x0f) { /* "kana out" */ + filter->status = 0; + } else if ((filter->status == 0x80 || filter->status == 0x90) && c > 0x20 && c < 0x7f) { /* kanji first char */ + filter->status += 1; + } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ + ; + } else { + filter->flag = 1; /* bad */ + } + break; + +/* case 0x81: X 0208 second char */ +/* case 0x91: X 0212 second char */ + case 1: + filter->status &= ~0xf; + if (c == 0x1b) { + goto retry; + } else if (c < 0x21 || c > 0x7e) { /* bad */ + filter->flag = 1; + } + break; + + /* ESC */ + case 2: + if (c == 0x24) { /* '$' */ + filter->status++; + } else if (c == 0x28) { /* '(' */ + filter->status += 3; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + /* ESC $ */ + case 3: + if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ + filter->status = 0x80; + } else if (c == 0x28) { /* '(' */ + filter->status++; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + /* ESC $ ( */ + case 4: + if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ + filter->status = 0x80; + } else if (c == 0x44) { /* 'D' */ + filter->status = 0x90; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + /* ESC ( */ + case 5: + if (c == 0x42 || c == 0x48) { /* 'B' or 'H' */ + filter->status = 0; + } else if (c == 0x4a) { /* 'J' */ + filter->status = 0x10; + } else if (c == 0x49) { /* 'I' */ + filter->status = 0x20; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + default: + filter->status = 0; + break; + } + + return c; +} + +static int mbfl_filt_ident_cp50220(int c, mbfl_identify_filter *filter) +{ +retry: + switch (filter->status & 0xf) { +/* case 0x00: ASCII */ +/* case 0x10: X 0201 latin */ +/* case 0x80: X 0208 */ + case 0: + if (c == 0x1b) { + filter->status += 2; + } else if (filter->status == 0x80 && c > 0x20 && c < 0x7f) { /* kanji first char */ + filter->status += 1; + } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ + ; + } else { + filter->flag = 1; /* bad */ + } + break; + +/* case 0x81: X 0208 second char */ + case 1: + if (c == 0x1b) { + filter->status++; + } else { + filter->status &= ~0xf; + if (c < 0x21 || c > 0x7e) { /* bad */ + filter->flag = 1; + } + } + break; + + /* ESC */ + case 2: + if (c == 0x24) { /* '$' */ + filter->status++; + } else if (c == 0x28) { /* '(' */ + filter->status += 3; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + /* ESC $ */ + case 3: + if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ + filter->status = 0x80; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + /* ESC ( */ + case 5: + if (c == 0x42) { /* 'B' */ + filter->status = 0; + } else if (c == 0x4a) { /* 'J' */ + filter->status = 0x10; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + default: + filter->status = 0; + break; + } + + return c; +} + +static int mbfl_filt_ident_cp50221(int c, mbfl_identify_filter *filter) +{ +retry: + switch (filter->status & 0xf) { +/* case 0x00: ASCII */ +/* case 0x10: X 0201 latin */ +/* case 0x80: X 0208 */ + case 0: + if (c == 0x1b) { + filter->status += 2; + } else if (filter->status == 0x80 && c > 0x20 && c < 0x7f) { /* kanji first char */ + filter->status += 1; + } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ + ; + } else { + filter->flag = 1; /* bad */ + } + break; + +/* case 0x81: X 0208 second char */ + case 1: + if (c == 0x1b) { + filter->status++; + } else { + filter->status &= ~0xf; + if (c < 0x21 || c > 0x7e) { /* bad */ + filter->flag = 1; + } + } + break; + + /* ESC */ + case 2: + if (c == 0x24) { /* '$' */ + filter->status++; + } else if (c == 0x28) { /* '(' */ + filter->status += 3; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + /* ESC $ */ + case 3: + if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ + filter->status = 0x80; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + /* ESC ( */ + case 5: + if (c == 0x42) { /* 'B' */ + filter->status = 0; + } else if (c == 0x4a) { /* 'J' */ + filter->status = 0x10; + } else if (c == 0x49) { /* 'I' */ + filter->status = 0x20; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + default: + filter->status = 0; + break; + } + + return c; +} + +static int mbfl_filt_ident_cp50222(int c, mbfl_identify_filter *filter) +{ +retry: + switch (filter->status & 0xf) { +/* case 0x00: ASCII */ +/* case 0x10: X 0201 latin */ +/* case 0x80: X 0208 */ + case 0: + if (c == 0x1b) { + filter->status += 2; + } else if (filter->status == 0x80 && c > 0x20 && c < 0x7f) { /* kanji first char */ + filter->status += 1; + } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ + ; + } else { + filter->flag = 1; /* bad */ + } + break; + +/* case 0x81: X 0208 second char */ + case 1: + if (c == 0x1b) { + filter->status++; + } else { + filter->status &= ~0xf; + if (c < 0x21 || c > 0x7e) { /* bad */ + filter->flag = 1; + } + } + break; + + /* ESC */ + case 2: + if (c == 0x24) { /* '$' */ + filter->status++; + } else if (c == 0x28) { /* '(' */ + filter->status += 3; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + /* ESC $ */ + case 3: + if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ + filter->status = 0x80; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + /* ESC ( */ + case 5: + if (c == 0x42) { /* 'B' */ + filter->status = 0; + } else if (c == 0x4a) { /* 'J' */ + filter->status = 0x10; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + default: + filter->status = 0; + break; + } + + return c; +} + + + diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.h b/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.h new file mode 100644 index 0000000000..a462f70ec4 --- /dev/null +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.h @@ -0,0 +1,64 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: + * + */ +/* + * The source code included in this files was separated from mbfilter_ja.c + * by moriyoshi koizumi on 4 dec 2002. + * + */ + +#ifndef MBFL_MBFILTER_CP50221_h +#define MBFL_MBFILTER_CP50221_h + +#include "mbfilter.h" + +extern const mbfl_encoding mbfl_encoding_jis_ms; +extern const mbfl_encoding mbfl_encoding_cp50220; +extern const mbfl_encoding mbfl_encoding_cp50220raw; +extern const mbfl_encoding mbfl_encoding_cp50221; +extern const mbfl_encoding mbfl_encoding_cp50222; +extern const struct mbfl_identify_vtbl vtbl_identify_jis_ms; +extern const struct mbfl_identify_vtbl vtbl_identify_cp50220; +extern const struct mbfl_identify_vtbl vtbl_identify_cp50220raw; +extern const struct mbfl_identify_vtbl vtbl_identify_cp50221; +extern const struct mbfl_identify_vtbl vtbl_identify_cp50222; +extern const struct mbfl_convert_vtbl vtbl_jis_ms_wchar; +extern const struct mbfl_convert_vtbl vtbl_wchar_jis_ms; +extern const struct mbfl_convert_vtbl vtbl_cp50220_wchar; +extern const struct mbfl_convert_vtbl vtbl_wchar_cp50220; +extern const struct mbfl_convert_vtbl vtbl_cp50220raw_wchar; +extern const struct mbfl_convert_vtbl vtbl_wchar_cp50220raw; +extern const struct mbfl_convert_vtbl vtbl_cp50221_wchar; +extern const struct mbfl_convert_vtbl vtbl_wchar_cp50221; +extern const struct mbfl_convert_vtbl vtbl_cp50222_wchar; +extern const struct mbfl_convert_vtbl vtbl_wchar_cp50222; + +int mbfl_filt_conv_jis_ms_wchar(int c, mbfl_convert_filter *filter); +int mbfl_filt_conv_wchar_jis_ms(int c, mbfl_convert_filter *filter); +int mbfl_filt_conv_wchar_cp50220(int c, mbfl_convert_filter *filter); +int mbfl_filt_conv_wchar_cp50220raw(int c, mbfl_convert_filter *filter); +int mbfl_filt_conv_wchar_cp50221(int c, mbfl_convert_filter *filter); +int mbfl_filt_conv_wchar_cp50222(int c, mbfl_convert_filter *filter); +int mbfl_filt_conv_wchar_cp50222_flush(mbfl_convert_filter *filter); + +#endif /* MBFL_MBFILTER_CP50221_h */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp51932.c b/ext/mbstring/libmbfl/filters/mbfilter_cp51932.c index f1505ae9b0..f84be1e290 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp51932.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp51932.c @@ -200,21 +200,6 @@ mbfl_filt_conv_cp51932_wchar(int c, mbfl_convert_filter *filter) return c; } -int -cp932ext3_to_cp51932(int c) -{ - int idx; - - idx = sjistoidx(c >> 8, c & 0xff); - if (idx >= sjistoidx(0xfa, 0x5c)) - idx -= sjistoidx(0xfa, 0x5c) - sjistoidx(0xed, 0x40); - else if (idx >= sjistoidx(0xfa, 0x55)) - idx -= sjistoidx(0xfa, 0x55) - sjistoidx(0xee, 0xfa); - else if (idx >= sjistoidx(0xfa, 0x40)) - idx -= sjistoidx(0xfa, 0x40) - sjistoidx(0xee, 0xef); - return idxtoeuc1(idx) << 8 | idxtoeuc2(idx); -} - /* * wchar => cp51932 */ @@ -250,9 +235,9 @@ mbfl_filt_conv_wchar_cp51932(int c, mbfl_convert_filter *filter) s1 = -1; } } else if (c == 0xa5) { /* YEN SIGN */ - s1 = 0x216f; /* FULLWIDTH YEN SIGN */ + s1 = 0x005c; /* YEN SIGN */ } else if (c == 0x203e) { /* OVER LINE */ - s1 = 0x2131; /* FULLWIDTH MACRON */ + s1 = 0x007e; /* FULLWIDTH MACRON */ } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ s1 = 0x2140; } else if (c == 0xff5e) { /* FULLWIDTH TILDE */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp932.c b/ext/mbstring/libmbfl/filters/mbfilter_cp932.c index 8fa254b6ce..6e54d53f44 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp932.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp932.c @@ -37,7 +37,7 @@ #include "unicode_table_cp932_ext.h" #include "unicode_table_jis.h" -static int mbfl_filt_ident_sjiswin(int c, mbfl_identify_filter *filter); +static int mbfl_filt_ident_cp932(int c, mbfl_identify_filter *filter); static const unsigned char mblen_table_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -58,39 +58,39 @@ static const unsigned char mblen_table_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 }; -static const char *mbfl_encoding_sjis_win_aliases[] = {"SJIS-open", "CP932", "Windows-31J", "MS_Kanji", NULL}; +static const char *mbfl_encoding_cp932_aliases[] = {"MS932", "Windows-31J", "MS_Kanji", NULL}; -const mbfl_encoding mbfl_encoding_sjis_win = { - mbfl_no_encoding_sjis_win, - "SJIS-win", +const mbfl_encoding mbfl_encoding_cp932 = { + mbfl_no_encoding_cp932, + "CP932", "Shift_JIS", - (const char *(*)[])&mbfl_encoding_sjis_win_aliases, + (const char *(*)[])&mbfl_encoding_cp932_aliases, mblen_table_sjis, MBFL_ENCTYPE_MBCS }; -const struct mbfl_identify_vtbl vtbl_identify_sjiswin = { - mbfl_no_encoding_sjis_win, +const struct mbfl_identify_vtbl vtbl_identify_cp932 = { + mbfl_no_encoding_cp932, mbfl_filt_ident_common_ctor, mbfl_filt_ident_common_dtor, - mbfl_filt_ident_sjiswin + mbfl_filt_ident_cp932 }; -const struct mbfl_convert_vtbl vtbl_sjiswin_wchar = { - mbfl_no_encoding_sjis_win, +const struct mbfl_convert_vtbl vtbl_cp932_wchar = { + mbfl_no_encoding_cp932, mbfl_no_encoding_wchar, mbfl_filt_conv_common_ctor, mbfl_filt_conv_common_dtor, - mbfl_filt_conv_sjiswin_wchar, + mbfl_filt_conv_cp932_wchar, mbfl_filt_conv_common_flush }; -const struct mbfl_convert_vtbl vtbl_wchar_sjiswin = { +const struct mbfl_convert_vtbl vtbl_wchar_cp932 = { mbfl_no_encoding_wchar, - mbfl_no_encoding_sjis_win, + mbfl_no_encoding_cp932, mbfl_filt_conv_common_ctor, mbfl_filt_conv_common_dtor, - mbfl_filt_conv_wchar_sjiswin, + mbfl_filt_conv_wchar_cp932, mbfl_filt_conv_common_flush }; @@ -144,7 +144,7 @@ const struct mbfl_convert_vtbl vtbl_wchar_sjiswin = { * SJIS-win => wchar */ int -mbfl_filt_conv_sjiswin_wchar(int c, mbfl_convert_filter *filter) +mbfl_filt_conv_cp932_wchar(int c, mbfl_convert_filter *filter) { int c1, s, s1, s2, w; @@ -229,7 +229,7 @@ mbfl_filt_conv_sjiswin_wchar(int c, mbfl_convert_filter *filter) * wchar => SJIS-win */ int -mbfl_filt_conv_wchar_sjiswin(int c, mbfl_convert_filter *filter) +mbfl_filt_conv_wchar_cp932(int c, mbfl_convert_filter *filter) { int c1, c2, s1, s2; @@ -261,9 +261,9 @@ mbfl_filt_conv_wchar_sjiswin(int c, mbfl_convert_filter *filter) s1 = c & MBFL_WCSPLANE_MASK; s1 |= 0x8080; } else if (c == 0xa5) { /* YEN SIGN */ - s1 = 0x216f; /* FULLWIDTH YEN SIGN */ + s1 = 0x005c; /* YEN SIGN */ } else if (c == 0x203e) { /* OVER LINE */ - s1 = 0x2131; /* FULLWIDTH MACRON */ + s1 = 0x007e; /* FULLWIDTH MACRON */ } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ s1 = 0x2140; } else if (c == 0xff5e) { /* FULLWIDTH TILDE */ @@ -327,7 +327,7 @@ mbfl_filt_conv_wchar_sjiswin(int c, mbfl_convert_filter *filter) return c; } -static int mbfl_filt_ident_sjiswin(int c, mbfl_identify_filter *filter) +static int mbfl_filt_ident_cp932(int c, mbfl_identify_filter *filter) { if (filter->status) { /* kanji second char */ if (c < 0x40 || c > 0xfc || c == 0x7f) { /* bad */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp932.h b/ext/mbstring/libmbfl/filters/mbfilter_cp932.h index f66baabea5..b6a211412e 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp932.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp932.h @@ -32,12 +32,12 @@ #include "mbfilter.h" -extern const mbfl_encoding mbfl_encoding_sjis_win; -extern const struct mbfl_identify_vtbl vtbl_identify_sjiswin; -extern const struct mbfl_convert_vtbl vtbl_sjiswin_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_sjiswin; +extern const mbfl_encoding mbfl_encoding_cp932; +extern const struct mbfl_identify_vtbl vtbl_identify_cp932; +extern const struct mbfl_convert_vtbl vtbl_cp932_wchar; +extern const struct mbfl_convert_vtbl vtbl_wchar_cp932; -int mbfl_filt_conv_sjiswin_wchar(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_wchar_sjiswin(int c, mbfl_convert_filter *filter); +int mbfl_filt_conv_cp932_wchar(int c, mbfl_convert_filter *filter); +int mbfl_filt_conv_wchar_cp932(int c, mbfl_convert_filter *filter); #endif /* MBFL_MBFILTER_CP932_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c b/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c index 0163520f5b..1fe0e6b732 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c @@ -146,6 +146,11 @@ int mbfl_filt_conv_html_enc_flush(mbfl_convert_filter *filter) { filter->status = 0; filter->opaque = NULL; + + if (filter->flush_function != NULL) { + (*filter->flush_function)(filter->data); + } + return 0; } @@ -289,12 +294,18 @@ int mbfl_filt_conv_html_dec_flush(mbfl_convert_filter *filter) buffer = (unsigned char*)filter->opaque; status = filter->status; filter->status = 0; + /* flush fragments */ while (status--) { int e = (*filter->output_function)(buffer[pos++], filter->data); if (e != 0) err = e; } + + if (filter->flush_function != NULL) { + (*filter->flush_function)(filter->data); + } + return err; } diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c index df961677d1..1bf77172b6 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c @@ -321,7 +321,7 @@ mbfl_filt_conv_wchar_2022jpms(int c, mbfl_convert_filter *filter) s1 = c & MBFL_WCSPLANE_MASK; s1 |= 0x8080; } else if (c == 0xa5) { /* YEN SIGN */ - s1 = 0x216f; /* FULLWIDTH YEN SIGN */ + s1 = 0x216f; /* FULLWIDTH YEN SIGN */ } else if (c == 0x203e) { /* OVER LINE */ s1 = 0x2131; /* FULLWIDTH MACRON */ } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ @@ -423,7 +423,13 @@ mbfl_filt_conv_any_2022jpms_flush(mbfl_convert_filter *filter) CK((*filter->output_function)(0x28, filter->data)); /* '(' */ CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ } + filter->status &= 0xff; + + if (filter->flush_function != NULL) { + return (*filter->flush_function)(filter->data); + } + return 0; } diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_kr.c b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_kr.c index 263476d628..77c95c5ad2 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_kr.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_kr.c @@ -276,7 +276,13 @@ mbfl_filt_conv_any_2022kr_flush(mbfl_convert_filter *filter) if ((filter->status & 0xff00) != 0) { CK((*filter->output_function)(0x0f, filter->data)); /* SI */ } + filter->status &= 0xff; + + if (filter->flush_function != NULL) { + return (*filter->flush_function)(filter->data); + } + return 0; } diff --git a/ext/mbstring/libmbfl/filters/mbfilter_jis.c b/ext/mbstring/libmbfl/filters/mbfilter_jis.c index 58336d4e28..6b1aef3643 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_jis.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_jis.c @@ -478,6 +478,11 @@ mbfl_filt_conv_any_jis_flush(mbfl_convert_filter *filter) CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ } filter->status &= 0xff; + + if (filter->flush_function != NULL) { + return (*filter->flush_function)(filter->data); + } + return 0; } diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis_open.c b/ext/mbstring/libmbfl/filters/mbfilter_sjis_open.c new file mode 100644 index 0000000000..38244a0ac9 --- /dev/null +++ b/ext/mbstring/libmbfl/filters/mbfilter_sjis_open.c @@ -0,0 +1,350 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: + * + */ +/* + * the source code included in this files was separated from mbfilter_ja.c + * by moriyoshi koizumi on 4 dec 2002. + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "mbfilter.h" +#include "mbfilter_sjis_open.h" + +#include "unicode_table_cp932_ext.h" +#include "unicode_table_jis.h" + +static int mbfl_filt_ident_sjis_open(int c, mbfl_identify_filter *filter); + +static const unsigned char mblen_table_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +}; + +static const char *mbfl_encoding_sjis_open_aliases[] = {"SJIS-open", "SJIS-ms", NULL}; + +const mbfl_encoding mbfl_encoding_sjis_open = { + mbfl_no_encoding_sjis_open, + "SJIS-win", + "Shift_JIS", + (const char *(*)[])&mbfl_encoding_sjis_open_aliases, + mblen_table_sjis, + MBFL_ENCTYPE_MBCS +}; + +const struct mbfl_identify_vtbl vtbl_identify_sjis_open = { + mbfl_no_encoding_sjis_open, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_sjis_open +}; + +const struct mbfl_convert_vtbl vtbl_sjis_open_wchar = { + mbfl_no_encoding_sjis_open, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_sjis_open_wchar, + mbfl_filt_conv_common_flush +}; + +const struct mbfl_convert_vtbl vtbl_wchar_sjis_open = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_sjis_open, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_wchar_sjis_open, + mbfl_filt_conv_common_flush +}; + +#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) + +#define SJIS_ENCODE(c1,c2,s1,s2) \ + do { \ + s1 = c1; \ + s1--; \ + s1 >>= 1; \ + if ((c1) < 0x5f) { \ + s1 += 0x71; \ + } else { \ + s1 += 0xb1; \ + } \ + s2 = c2; \ + if ((c1) & 1) { \ + if ((c2) < 0x60) { \ + s2--; \ + } \ + s2 += 0x20; \ + } else { \ + s2 += 0x7e; \ + } \ + } while (0) + +#define SJIS_DECODE(c1,c2,s1,s2) \ + do { \ + s1 = c1; \ + if (s1 < 0xa0) { \ + s1 -= 0x81; \ + } else { \ + s1 -= 0xc1; \ + } \ + s1 <<= 1; \ + s1 += 0x21; \ + s2 = c2; \ + if (s2 < 0x9f) { \ + if (s2 < 0x7f) { \ + s2++; \ + } \ + s2 -= 0x20; \ + } else { \ + s1++; \ + s2 -= 0x7e; \ + } \ + } while (0) + + +/* + * SJIS-win => wchar + */ +int +mbfl_filt_conv_sjis_open_wchar(int c, mbfl_convert_filter *filter) +{ + int c1, s, s1, s2, w; + + switch (filter->status) { + case 0: + if (c >= 0 && c < 0x80) { /* latin */ + CK((*filter->output_function)(c, filter->data)); + } else if (c > 0xa0 && c < 0xe0) { /* kana */ + CK((*filter->output_function)(0xfec0 + c, filter->data)); + } else if (c > 0x80 && c < 0xfd && c != 0xa0) { /* kanji first char */ + filter->status = 1; + filter->cache = c; + } else { + w = c & MBFL_WCSGROUP_MASK; + w |= MBFL_WCSGROUP_THROUGH; + CK((*filter->output_function)(w, filter->data)); + } + break; + + case 1: /* kanji second char */ + filter->status = 0; + c1 = filter->cache; + if (c >= 0x40 && c <= 0xfc && c != 0x7f) { + w = 0; + SJIS_DECODE(c1, c, s1, s2); + s = (s1 - 0x21)*94 + s2 - 0x21; + if (s <= 137) { + if (s == 31) { + w = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */ + } else if (s == 32) { + w = 0xff5e; /* FULLWIDTH TILDE */ + } else if (s == 33) { + w = 0x2225; /* PARALLEL TO */ + } else if (s == 60) { + w = 0xff0d; /* FULLWIDTH HYPHEN-MINUS */ + } else if (s == 80) { + w = 0xffe0; /* FULLWIDTH CENT SIGN */ + } else if (s == 81) { + w = 0xffe1; /* FULLWIDTH POUND SIGN */ + } else if (s == 137) { + w = 0xffe2; /* FULLWIDTH NOT SIGN */ + } + } + if (w == 0) { + if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */ + w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; + } else if (s >= 0 && s < jisx0208_ucs_table_size) { /* X 0208 */ + w = jisx0208_ucs_table[s]; + } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { /* vendor ext2 (89ku - 92ku) */ + w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; + } else if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) { /* vendor ext3 (115ku - 119ku) */ + w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min]; + } else if (s >= (94*94) && s < (114*94)) { /* user (95ku - 114ku) */ + w = s - (94*94) + 0xe000; + } + } + if (w <= 0) { + w = (s1 << 8) | s2; + w &= MBFL_WCSPLANE_MASK; + w |= MBFL_WCSPLANE_WINCP932; + } + CK((*filter->output_function)(w, filter->data)); + } else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */ + CK((*filter->output_function)(c, filter->data)); + } else { + w = (c1 << 8) | c; + w &= MBFL_WCSGROUP_MASK; + w |= MBFL_WCSGROUP_THROUGH; + CK((*filter->output_function)(w, filter->data)); + } + break; + + default: + filter->status = 0; + break; + } + + return c; +} + +/* + * wchar => SJIS-win + */ +int +mbfl_filt_conv_wchar_sjis_open(int c, mbfl_convert_filter *filter) +{ + int c1, c2, s1, s2; + + s1 = 0; + s2 = 0; + if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { + s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { + s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; + } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { + s1 = ucs_i_jis_table[c - ucs_i_jis_table_min]; + } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { + s1 = ucs_r_jis_table[c - ucs_r_jis_table_min]; + } else if (c >= 0xe000 && c < (0xe000 + 20*94)) { /* user (95ku - 114ku) */ + s1 = c - 0xe000; + c1 = s1/94 + 0x7f; + c2 = s1%94 + 0x21; + s1 = (c1 << 8) | c2; + s2 = 1; + } + if (s1 <= 0) { + c1 = c & ~MBFL_WCSPLANE_MASK; + if (c1 == MBFL_WCSPLANE_WINCP932) { + s1 = c & MBFL_WCSPLANE_MASK; + s2 = 1; + } else if (c1 == MBFL_WCSPLANE_JIS0208) { + s1 = c & MBFL_WCSPLANE_MASK; + } else if (c1 == MBFL_WCSPLANE_JIS0212) { + s1 = c & MBFL_WCSPLANE_MASK; + s1 |= 0x8080; + } else if (c == 0xa5) { /* YEN SIGN */ + s1 = 0x216f; /* FULLWIDTH YEN SIGN */ + } else if (c == 0x203e) { /* OVER LINE */ + s1 = 0x2131; /* FULLWIDTH MACRON */ + } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ + s1 = 0x2140; + } else if (c == 0xff5e) { /* FULLWIDTH TILDE */ + s1 = 0x2141; + } else if (c == 0x2225) { /* PARALLEL TO */ + s1 = 0x2142; + } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ + s1 = 0x215d; + } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ + s1 = 0x2171; + } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ + s1 = 0x2172; + } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ + s1 = 0x224c; + } + } + if ((s1 <= 0) || (s1 >= 0x8080 && s2 == 0)) { /* not found or X 0212 */ + s1 = -1; + c1 = 0; + c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; + while (c1 < c2) { /* CP932 vendor ext1 (13ku) */ + if (c == cp932ext1_ucs_table[c1]) { + s1 = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21); + break; + } + c1++; + } + if (s1 <= 0) { + c1 = 0; + c2 = cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; + while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */ + if (c == cp932ext3_ucs_table[c1]) { + s1 = ((c1/94 + 0x93) << 8) + (c1%94 + 0x21); + break; + } + c1++; + } + } + if (c == 0) { + s1 = 0; + } else if (s1 <= 0) { + s1 = -1; + } + } + if (s1 >= 0) { + if (s1 < 0x100) { /* latin or kana */ + CK((*filter->output_function)(s1, filter->data)); + } else { /* kanji */ + c1 = (s1 >> 8) & 0xff; + c2 = s1 & 0xff; + SJIS_ENCODE(c1, c2, s1, s2); + CK((*filter->output_function)(s1, filter->data)); + CK((*filter->output_function)(s2, filter->data)); + } + } else { + if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + } + + return c; +} + +static int mbfl_filt_ident_sjis_open(int c, mbfl_identify_filter *filter) +{ + if (filter->status) { /* kanji second char */ + if (c < 0x40 || c > 0xfc || c == 0x7f) { /* bad */ + filter->flag = 1; + } + filter->status = 0; + } else if (c >= 0 && c < 0x80) { /* latin ok */ + ; + } else if (c > 0xa0 && c < 0xe0) { /* kana ok */ + ; + } else if (c > 0x80 && c < 0xfd && c != 0xa0) { /* kanji first char */ + filter->status = 1; + } else { /* bad */ + filter->flag = 1; + } + + return c; +} + + diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis_open.h b/ext/mbstring/libmbfl/filters/mbfilter_sjis_open.h new file mode 100644 index 0000000000..0bfe1d21ab --- /dev/null +++ b/ext/mbstring/libmbfl/filters/mbfilter_sjis_open.h @@ -0,0 +1,43 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: + * + */ +/* + * the source code included in this files was separated from mbfilter_ja.c + * by moriyoshi koizumi on 4 dec 2002. + * + */ + +#ifndef MBFL_MBFILTER_SJIS_OPEN_H +#define MBFL_MBFILTER_SJIS_OPEN_H + +#include "mbfilter.h" + +extern const mbfl_encoding mbfl_encoding_sjis_open; +extern const struct mbfl_identify_vtbl vtbl_identify_sjis_open; +extern const struct mbfl_convert_vtbl vtbl_sjis_open_wchar; +extern const struct mbfl_convert_vtbl vtbl_wchar_sjis_open; + +int mbfl_filt_conv_sjis_open_wchar(int c, mbfl_convert_filter *filter); +int mbfl_filt_conv_wchar_sjis_open(int c, mbfl_convert_filter *filter); + +#endif /* MBFL_MBFILTER_SJIS_OPEN_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_tl_jisx0201_jisx0208.c b/ext/mbstring/libmbfl/filters/mbfilter_tl_jisx0201_jisx0208.c new file mode 100644 index 0000000000..c66c51f248 --- /dev/null +++ b/ext/mbstring/libmbfl/filters/mbfilter_tl_jisx0201_jisx0208.c @@ -0,0 +1,302 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: Moriyoshi Koizumi + * + */ + +#include "mbfl_allocators.h" +#include "mbfilter_tl_jisx0201_jisx0208.h" +#include "translit_kana_jisx0201_jisx0208.h" + +void +mbfl_filt_tl_jisx0201_jisx0208_init(mbfl_convert_filter *filt) +{ + mbfl_filt_conv_common_ctor(filt); +} + +void +mbfl_filt_tl_jisx0201_jisx0208_cleanup(mbfl_convert_filter *filt) +{ +} + +int +mbfl_filt_tl_jisx0201_jisx0208(int c, mbfl_convert_filter *filt) +{ + int s, n; + int mode = ((mbfl_filt_tl_jisx0201_jisx0208_param *)filt->opaque)->mode; + + s = c; + + if ((mode & MBFL_FILT_TL_HAN2ZEN_ALL) + && c >= 0x21 && c <= 0x7d && c != 0x22 && c != 0x27 && c != 0x5c) { + /* all except <"> <'> <\> <~> */ + s = c + 0xfee0; + } else if ((mode & MBFL_FILT_TL_HAN2ZEN_ALPHA) && + ((c >= 0x41 && c <= 0x5a) || (c >= 0x61 && c <= 0x7a))) { + /* alpha */ + s = c + 0xfee0; + } else if ((mode & MBFL_FILT_TL_HAN2ZEN_NUMERIC) && + c >= 0x30 && c <= 0x39) { + /* num */ + s = c + 0xfee0; + } else if ((mode & MBFL_FILT_TL_HAN2ZEN_SPACE) && c == 0x20) { + /* space */ + s = 0x3000; + } + + if (mode & + (MBFL_FILT_TL_HAN2ZEN_KATAKANA | MBFL_FILT_TL_HAN2ZEN_HIRAGANA)) { + /* hankaku kana to zenkaku kana */ + if ((mode & MBFL_FILT_TL_HAN2ZEN_KATAKANA) && + (mode & MBFL_FILT_TL_HAN2ZEN_GLUE)) { + /* hankaku kana to zenkaku katakana and glue voiced sound mark */ + if (c >= 0xff61 && c <= 0xff9f) { + if (filt->status) { + n = (filt->cache - 0xff60) & 0x3f; + if (c == 0xff9e && ((n >= 22 && n <= 36) || (n >= 42 && n <= 46))) { + filt->status = 0; + s = 0x3001 + hankana2zenkana_table[n]; + } else if (c == 0xff9e && n == 19) { + filt->status = 0; + s = 0x30f4; + } else if (c == 0xff9f && (n >= 42 && n <= 46)) { + filt->status = 0; + s = 0x3002 + hankana2zenkana_table[n]; + } else { + filt->status = 1; + filt->cache = c; + s = 0x3000 + hankana2zenkana_table[n]; + } + } else { + filt->status = 1; + filt->cache = c; + return c; + } + } else { + if (filt->status) { + n = (filt->cache - 0xff60) & 0x3f; + filt->status = 0; + (*filt->output_function)(0x3000 + hankana2zenkana_table[n], filt->data); + } + } + } else if ((mode & MBFL_FILT_TL_HAN2ZEN_HIRAGANA) && + (mode & MBFL_FILT_TL_HAN2ZEN_GLUE)) { + /* hankaku kana to zenkaku hirangana and glue voiced sound mark */ + if (c >= 0xff61 && c <= 0xff9f) { + if (filt->status) { + n = (filt->cache - 0xff60) & 0x3f; + if (c == 0xff9e && ((n >= 22 && n <= 36) || (n >= 42 && n <= 46))) { + filt->status = 0; + s = 0x3001 + hankana2zenhira_table[n]; + } else if (c == 0xff9f && (n >= 42 && n <= 46)) { + filt->status = 0; + s = 0x3002 + hankana2zenhira_table[n]; + } else { + filt->status = 1; + filt->cache = c; + s = 0x3000 + hankana2zenhira_table[n]; + } + } else { + filt->status = 1; + filt->cache = c; + return c; + } + } else { + if (filt->status) { + n = (filt->cache - 0xff60) & 0x3f; + filt->status = 0; + (*filt->output_function)(0x3000 + hankana2zenhira_table[n], filt->data); + } + } + } else if ((mode & MBFL_FILT_TL_HAN2ZEN_KATAKANA) && + c >= 0xff61 && c <= 0xff9f) { + /* hankaku kana to zenkaku katakana */ + s = 0x3000 + hankana2zenkana_table[c - 0xff60]; + } else if ((mode & MBFL_FILT_TL_HAN2ZEN_HIRAGANA) + && c >= 0xff61 && c <= 0xff9f) { + /* hankaku kana to zenkaku hirangana */ + s = 0x3000 + hankana2zenhira_table[c - 0xff60]; + } + } + + if (mode & MBFL_FILT_TL_HAN2ZEN_COMPAT1) { + /* special ascii to symbol */ + if (c == 0x5c) { + s = 0xffe5; /* FULLWIDTH YEN SIGN */ + } else if (c == 0xa5) { /* YEN SIGN */ + s = 0xffe5; /* FULLWIDTH YEN SIGN */ + } else if (c == 0x7e) { + s = 0xffe3; /* FULLWIDTH MACRON */ + } else if (c == 0x203e) { /* OVERLINE */ + s = 0xffe3; /* FULLWIDTH MACRON */ + } else if (c == 0x27) { + s = 0x2019; /* RIGHT SINGLE QUOTATION MARK */ + } else if (c == 0x22) { + s = 0x201d; /* RIGHT DOUBLE QUOTATION MARK */ + } + } else if (mode & MBFL_FILT_TL_HAN2ZEN_COMPAT2) { + /* special ascii to symbol */ + if (c == 0x5c) { + s = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */ + } else if (c == 0x7e) { + s = 0xff5e; /* FULLWIDTH TILDE */ + } else if (c == 0x27) { + s = 0xff07; /* FULLWIDTH APOSTROPHE */ + } else if (c == 0x22) { + s = 0xff02; /* FULLWIDTH QUOTATION MARK */ + } + } + + if (mode & 0xf0) { /* zenkaku to hankaku */ + if ((mode & 0x10) && c >= 0xff01 && c <= 0xff5d && c != 0xff02 && c != 0xff07 && c!= 0xff3c) { /* all except <"> <'> <\> <~> */ + s = c - 0xfee0; + } else if ((mode & 0x20) && ((c >= 0xff21 && c <= 0xff3a) || (c >= 0xff41 && c <= 0xff5a))) { /* alpha */ + s = c - 0xfee0; + } else if ((mode & 0x40) && (c >= 0xff10 && c <= 0xff19)) { /* num */ + s = c - 0xfee0; + } else if ((mode & 0x80) && (c == 0x3000)) { /* spase */ + s = 0x20; + } else if ((mode & 0x10) && (c == 0x2212)) { /* MINUS SIGN */ + s = 0x2d; + } + } + + if (mode & + (MBFL_FILT_TL_ZEN2HAN_KATAKANA | MBFL_FILT_TL_ZEN2HAN_HIRAGANA)) { + /* Zenkaku kana to hankaku kana */ + if ((mode & MBFL_FILT_TL_ZEN2HAN_KATAKANA) && + c >= 0x30a1 && c <= 0x30f4) { + /* Zenkaku katakana to hankaku kana */ + n = c - 0x30a1; + if (zenkana2hankana_table[n][1] != 0) { + (filt->output_function)(0xff00 + zenkana2hankana_table[n][0], filt->data); + s = 0xff00 + zenkana2hankana_table[n][1]; + } else { + s = 0xff00 + zenkana2hankana_table[n][0]; + } + } else if ((mode & MBFL_FILT_TL_ZEN2HAN_HIRAGANA) && + c >= 0x3041 && c <= 0x3093) { + /* Zenkaku hirangana to hankaku kana */ + n = c - 0x3041; + if (zenkana2hankana_table[n][1] != 0) { + (filt->output_function)(0xff00 + zenkana2hankana_table[n][0], filt->data); + s = 0xff00 + zenkana2hankana_table[n][1]; + } else { + s = 0xff00 + zenkana2hankana_table[n][0]; + } + } else if (c == 0x3001) { + s = 0xff64; /* HALFWIDTH IDEOGRAPHIC COMMA */ + } else if (c == 0x3002) { + s = 0xff61; /* HALFWIDTH IDEOGRAPHIC FULL STOP */ + } else if (c == 0x300c) { + s = 0xff62; /* HALFWIDTH LEFT CORNER BRACKET */ + } else if (c == 0x300d) { + s = 0xff63; /* HALFWIDTH RIGHT CORNER BRACKET */ + } else if (c == 0x309b) { + s = 0xff9e; /* HALFWIDTH KATAKANA VOICED SOUND MARK */ + } else if (c == 0x309c) { + s = 0xff9f; /* HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK */ + } else if (c == 0x30fc) { + s = 0xff70; /* HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK */ + } else if (c == 0x30fb) { + s = 0xff65; /* HALFWIDTH KATAKANA MIDDLE DOT */ + } + } else if (mode & (MBFL_FILT_TL_ZEN2HAN_HIRA2KANA + | MBFL_FILT_TL_ZEN2HAN_KANA2HIRA)) { + if ((mode & MBFL_FILT_TL_ZEN2HAN_HIRA2KANA) && + c >= 0x3041 && c <= 0x3093) { + /* Zenkaku hirangana to Zenkaku katakana */ + s = c + 0x60; + } else if ((mode & MBFL_FILT_TL_ZEN2HAN_KANA2HIRA) && + c >= 0x30a1 && c <= 0x30f3) { + /* Zenkaku katakana to Zenkaku hirangana */ + s = c - 0x60; + } + } + + if (mode & MBFL_FILT_TL_ZEN2HAN_COMPAT1) { /* special symbol to ascii */ + if (c == 0xffe5) { /* FULLWIDTH YEN SIGN */ + s = 0x5c; + } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x5c; + } else if (c == 0xffe3) { /* FULLWIDTH MACRON */ + s = 0x7e; + } else if (c == 0x203e) { /* OVERLINE */ + s = 0x7e; + } else if (c == 0x2018) { /* LEFT SINGLE QUOTATION MARK*/ + s = 0x27; + } else if (c == 0x2019) { /* RIGHT SINGLE QUOTATION MARK */ + s = 0x27; + } else if (c == 0x201c) { /* LEFT DOUBLE QUOTATION MARK */ + s = 0x22; + } else if (c == 0x201d) { /* RIGHT DOUBLE QUOTATION MARK */ + s = 0x22; + } + } + + if (mode & MBFL_FILT_TL_ZEN2HAN_COMPAT2) { /* special symbol to ascii */ + if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x5c; + } else if (c == 0xff5e) { /* FULLWIDTH TILDE */ + s = 0x7e; + } else if (c == 0xff07) { /* FULLWIDTH APOSTROPHE */ + s = 0x27; + } else if (c == 0xff02) { /* FULLWIDTH QUOTATION MARK */ + s = 0x22; + } + } + + return (*filt->output_function)(s, filt->data); +} + +int +mbfl_filt_tl_jisx0201_jisx0208_flush(mbfl_convert_filter *filt) +{ + int ret, n; + int mode = ((mbfl_filt_tl_jisx0201_jisx0208_param *)filt->opaque)->mode; + + ret = 0; + if (filt->status) { + n = (filt->cache - 0xff60) & 0x3f; + if (mode & 0x100) { /* hankaku kana to zenkaku katakana */ + ret = (*filt->output_function)(0x3000 + hankana2zenkana_table[n], filt->data); + } else if (mode & 0x200) { /* hankaku kana to zenkaku hirangana */ + ret = (*filt->output_function)(0x3000 + hankana2zenhira_table[n], filt->data); + } + filt->status = 0; + } + + if (filt->flush_function != NULL) { + return (*filt->flush_function)(filt->data); + } + + return ret; +} + +const struct mbfl_convert_vtbl vtbl_tl_jisx0201_jisx0208 = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_wchar, + mbfl_filt_tl_jisx0201_jisx0208_init, + mbfl_filt_tl_jisx0201_jisx0208_cleanup, + mbfl_filt_tl_jisx0201_jisx0208, + mbfl_filt_tl_jisx0201_jisx0208_flush +}; + diff --git a/ext/mbstring/libmbfl/filters/mbfilter_tl_jisx0201_jisx0208.h b/ext/mbstring/libmbfl/filters/mbfilter_tl_jisx0201_jisx0208.h new file mode 100644 index 0000000000..b52b38ee93 --- /dev/null +++ b/ext/mbstring/libmbfl/filters/mbfilter_tl_jisx0201_jisx0208.h @@ -0,0 +1,79 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: Moriyoshi Koizumi + * + */ + +#ifndef MBFILTER_TL_KANA_JISX0201_JISX0208_H +#define MBFILTER_TL_KANA_JISX0201_JISX0208_H + +#include "mbfl_convert.h" + +#define MBFL_FILT_TL_HAN2ZEN_ALL 0x00000001 +#define MBFL_FILT_TL_HAN2ZEN_ALPHA 0x00000002 +#define MBFL_FILT_TL_HAN2ZEN_NUMERIC 0x00000004 +#define MBFL_FILT_TL_HAN2ZEN_SPACE 0x00000008 +#define MBFL_FILT_TL_ZEN2HAN_ALL 0x00000010 +#define MBFL_FILT_TL_ZEN2HAN_ALPHA 0x00000020 +#define MBFL_FILT_TL_ZEN2HAN_NUMERIC 0x00000040 +#define MBFL_FILT_TL_ZEN2HAN_SPACE 0x00000080 +#define MBFL_FILT_TL_HAN2ZEN_KATAKANA 0x00000100 +#define MBFL_FILT_TL_HAN2ZEN_HIRAGANA 0x00000200 +#define MBFL_FILT_TL_HAN2ZEN_GLUE 0x00000800 +#define MBFL_FILT_TL_ZEN2HAN_KATAKANA 0x00001000 +#define MBFL_FILT_TL_ZEN2HAN_HIRAGANA 0x00002000 +#define MBFL_FILT_TL_ZEN2HAN_HIRA2KANA 0x00010000 +#define MBFL_FILT_TL_ZEN2HAN_KANA2HIRA 0x00020000 +#define MBFL_FILT_TL_HAN2ZEN_COMPAT1 0x00100000 +#define MBFL_FILT_TL_ZEN2HAN_COMPAT1 0x00200000 +#define MBFL_FILT_TL_HAN2ZEN_COMPAT2 0x00400000 +#define MBFL_FILT_TL_ZEN2HAN_COMPAT2 0x00800000 +#define MBFL_FILT_TL_HAN2ZEN_MASK ( \ + MBFL_FILT_TL_HAN2ZEN_ALL |\ + MBFL_FILT_TL_HAN2ZEN_ALPHA |\ + MBFL_FILT_TL_HAN2ZEN_NUMERIC |\ + MBFL_FILT_TL_HAN2ZEN_SPACE |\ + MBFL_FILT_TL_HAN2ZEN_KATAKANA |\ + MBFL_FILT_TL_HAN2ZEN_HIRAGANA |\ + MBFL_FILT_TL_HAN2ZEN_GLUE |\ + MBFL_FILT_TL_HAN2ZEN_COMPAT1 |\ + MBFL_FILT_TL_HAN2ZEN_COMPAT2) +#define MBFL_FILT_TL_ZEN2HAN_MASK ( \ + MBFL_FILT_TL_ZEN2HAN_ALL | \ + MBFL_FILT_TL_ZEN2HAN_ALPHA | \ + MBFL_FILT_TL_ZEN2HAN_NUMERIC | \ + MBFL_FILT_TL_ZEN2HAN_SPACE | \ + MBFL_FILT_TL_ZEN2HAN_KATAKANA | \ + MBFL_FILT_TL_ZEN2HAN_HIRAGANA | \ + MBFL_FILT_TL_ZEN2HAN_HIRA2KANA | \ + MBFL_FILT_TL_ZEN2HAN_KANA2HIRA | \ + MBFL_FILT_TL_ZEN2HAN_COMPAT1 | \ + MBFL_FILT_TL_ZEN2HAN_COMPAT2) + + +typedef struct _mbfl_filt_tl_jisx0201_jisx0208_param { + mbfl_convert_filter *next_filter; + int mode; +} mbfl_filt_tl_jisx0201_jisx0208_param; + +extern const struct mbfl_convert_vtbl vtbl_tl_jisx0201_jisx0208; + +#endif /* MBFILTER_TL_KANA_JISX0201_JISX0208_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_utf7.c b/ext/mbstring/libmbfl/filters/mbfilter_utf7.c index ea37073761..ad0205bee1 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_utf7.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_utf7.c @@ -405,6 +405,11 @@ int mbfl_filt_conv_wchar_utf7_flush(mbfl_convert_filter *filter) CK((*filter->output_function)(0x2d, filter->data)); /* '-' */ break; } + + if (filter->flush_function != NULL) { + (*filter->flush_function)(filter->data); + } + return 0; } diff --git a/ext/mbstring/libmbfl/filters/translit_kana_jisx0201_jisx0208.h b/ext/mbstring/libmbfl/filters/translit_kana_jisx0201_jisx0208.h new file mode 100644 index 0000000000..4f8f4b7b11 --- /dev/null +++ b/ext/mbstring/libmbfl/filters/translit_kana_jisx0201_jisx0208.h @@ -0,0 +1,67 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: Moriyoshi Koizumi + * + */ + +#ifndef TRANSLIT_KANA_JISX0201_JISX0208_H +#define TRANSLIT_KANA_JISX0201_JISX0208_H + +static const unsigned char hankana2zenkana_table[64] = { + 0x00,0x02,0x0C,0x0D,0x01,0xFB,0xF2,0xA1,0xA3,0xA5, + 0xA7,0xA9,0xE3,0xE5,0xE7,0xC3,0xFC,0xA2,0xA4,0xA6, + 0xA8,0xAA,0xAB,0xAD,0xAF,0xB1,0xB3,0xB5,0xB7,0xB9, + 0xBB,0xBD,0xBF,0xC1,0xC4,0xC6,0xC8,0xCA,0xCB,0xCC, + 0xCD,0xCE,0xCF,0xD2,0xD5,0xD8,0xDB,0xDE,0xDF,0xE0, + 0xE1,0xE2,0xE4,0xE6,0xE8,0xE9,0xEA,0xEB,0xEC,0xED, + 0xEF,0xF3,0x9B,0x9C +}; + +static const unsigned char hankana2zenhira_table[64] = { + 0x00,0x02,0x0C,0x0D,0x01,0xFB,0x92,0x41,0x43,0x45, + 0x47,0x49,0x83,0x85,0x87,0x63,0xFC,0x42,0x44,0x46, + 0x48,0x4A,0x4B,0x4D,0x4F,0x51,0x53,0x55,0x57,0x59, + 0x5B,0x5D,0x5F,0x61,0x64,0x66,0x68,0x6A,0x6B,0x6C, + 0x6D,0x6E,0x6F,0x72,0x75,0x78,0x7B,0x7E,0x7F,0x80, + 0x81,0x82,0x84,0x86,0x88,0x89,0x8A,0x8B,0x8C,0x8D, + 0x8F,0x93,0x9B,0x9C +}; +static const unsigned char zenkana2hankana_table[84][2] = { + {0x67,0x00},{0x71,0x00},{0x68,0x00},{0x72,0x00},{0x69,0x00}, + {0x73,0x00},{0x6A,0x00},{0x74,0x00},{0x6B,0x00},{0x75,0x00}, + {0x76,0x00},{0x76,0x9E},{0x77,0x00},{0x77,0x9E},{0x78,0x00}, + {0x78,0x9E},{0x79,0x00},{0x79,0x9E},{0x7A,0x00},{0x7A,0x9E}, + {0x7B,0x00},{0x7B,0x9E},{0x7C,0x00},{0x7C,0x9E},{0x7D,0x00}, + {0x7D,0x9E},{0x7E,0x00},{0x7E,0x9E},{0x7F,0x00},{0x7F,0x9E}, + {0x80,0x00},{0x80,0x9E},{0x81,0x00},{0x81,0x9E},{0x6F,0x00}, + {0x82,0x00},{0x82,0x9E},{0x83,0x00},{0x83,0x9E},{0x84,0x00}, + {0x84,0x9E},{0x85,0x00},{0x86,0x00},{0x87,0x00},{0x88,0x00}, + {0x89,0x00},{0x8A,0x00},{0x8A,0x9E},{0x8A,0x9F},{0x8B,0x00}, + {0x8B,0x9E},{0x8B,0x9F},{0x8C,0x00},{0x8C,0x9E},{0x8C,0x9F}, + {0x8D,0x00},{0x8D,0x9E},{0x8D,0x9F},{0x8E,0x00},{0x8E,0x9E}, + {0x8E,0x9F},{0x8F,0x00},{0x90,0x00},{0x91,0x00},{0x92,0x00}, + {0x93,0x00},{0x6C,0x00},{0x94,0x00},{0x6D,0x00},{0x95,0x00}, + {0x6E,0x00},{0x96,0x00},{0x97,0x00},{0x98,0x00},{0x99,0x00}, + {0x9A,0x00},{0x9B,0x00},{0x9C,0x00},{0x9C,0x00},{0x72,0x00}, + {0x74,0x00},{0x66,0x00},{0x9D,0x00},{0x73,0x9E} +}; + +#endif /* TRANSLIT_KANA_JISX0201_JISX0208_H */ diff --git a/ext/mbstring/libmbfl/libmbfl.dsp b/ext/mbstring/libmbfl/libmbfl.dsp deleted file mode 100644 index a6d9cfc720..0000000000 --- a/ext/mbstring/libmbfl/libmbfl.dsp +++ /dev/null @@ -1,807 +0,0 @@ -# Microsoft Developer Studio Project File - Name="libmbfl" - Package Owner=<4> -# Microsoft Developer Studio Generated Build File, Format Version 6.00 -# ** DO NOT EDIT ** - -# TARGTYPE "Win32 (x86) Dynamic-Link Library" 0x0102 - -CFG=libmbfl - Win32 Debug -!MESSAGE This is not a valid makefile. To build this project using NMAKE, -!MESSAGE use the Export Makefile command and run -!MESSAGE -!MESSAGE NMAKE /f "libmbfl.mak". -!MESSAGE -!MESSAGE You can specify a configuration when running NMAKE -!MESSAGE by defining the macro CFG on the command line. For example: -!MESSAGE -!MESSAGE NMAKE /f "libmbfl.mak" CFG="libmbfl - Win32 Debug" -!MESSAGE -!MESSAGE Possible choices for configuration are: -!MESSAGE -!MESSAGE "libmbfl - Win32 Release" (based on "Win32 (x86) Dynamic-Link Library") -!MESSAGE "libmbfl - Win32 Debug" (based on "Win32 (x86) Dynamic-Link Library") -!MESSAGE - -# Begin Project -# PROP AllowPerConfigDependencies 0 -# PROP Scc_ProjName "" -# PROP Scc_LocalPath "" -CPP=cl.exe -MTL=midl.exe -RSC=rc.exe - -!IF "$(CFG)" == "libmbfl - Win32 Release" - -# PROP BASE Use_MFC 0 -# PROP BASE Use_Debug_Libraries 0 -# PROP BASE Output_Dir "Release" -# PROP BASE Intermediate_Dir "Release" -# PROP BASE Target_Dir "" -# PROP Use_MFC 0 -# PROP Use_Debug_Libraries 0 -# PROP Output_Dir "Release" -# PROP Intermediate_Dir "Release" -# PROP Target_Dir "" -# ADD BASE CPP /nologo /MT /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "LIBMBFL_EXPORTS" /YX /FD /c -# ADD CPP /nologo /MT /W3 /GX /O2 /I "mbfl" /I "." /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "LIBMBFL_EXPORTS" /D "HAVE_CONFIG_H" /YX /FD /c -# ADD BASE MTL /nologo /D "NDEBUG" /mktyplib203 /win32 -# ADD MTL /nologo /D "NDEBUG" /mktyplib203 /win32 -# ADD BASE RSC /l 0x411 /d "NDEBUG" -# ADD RSC /l 0x409 /d "NDEBUG" -BSC32=bscmake.exe -# ADD BASE BSC32 /nologo -# ADD BSC32 /nologo -LINK32=link.exe -# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /machine:I386 -# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /machine:I386 - -!ELSEIF "$(CFG)" == "libmbfl - Win32 Debug" - -# PROP BASE Use_MFC 0 -# PROP BASE Use_Debug_Libraries 1 -# PROP BASE Output_Dir "Debug" -# PROP BASE Intermediate_Dir "Debug" -# PROP BASE Target_Dir "" -# PROP Use_MFC 0 -# PROP Use_Debug_Libraries 1 -# PROP Output_Dir "Debug" -# PROP Intermediate_Dir "Debug" -# PROP Ignore_Export_Lib 0 -# PROP Target_Dir "" -# ADD BASE CPP /nologo /MTd /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "LIBMBFL_EXPORTS" /YX /FD /GZ /c -# ADD CPP /nologo /MTd /W3 /Gm /GX /ZI /Od /I "mbfl" /I "." /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "LIBMBFL_EXPORTS" /D "MBFL_DLL_EXPORT" /D HAVE_CONFIG_H=1 /YX /FD /GZ /c -# ADD BASE MTL /nologo /D "_DEBUG" /mktyplib203 /win32 -# ADD MTL /nologo /D "_DEBUG" /mktyplib203 /win32 -# ADD BASE RSC /l 0x411 /d "_DEBUG" -# ADD RSC /l 0x411 /d "_DEBUG" -BSC32=bscmake.exe -# ADD BASE BSC32 /nologo -# ADD BSC32 /nologo -LINK32=link.exe -# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /debug /machine:I386 /pdbtype:sept -# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /debug /machine:I386 /pdbtype:sept - -!ENDIF - -# Begin Target - -# Name "libmbfl - Win32 Release" -# Name "libmbfl - Win32 Debug" -# Begin Group "Source Files" - -# PROP Default_Filter "vc6" -# Begin Source File - -SOURCE=.\filters\html_entities.c -# End Source File -# Begin Source File - -SOURCE=.\mbfl\mbfilter.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_7bit.c -# End Source File -# Begin Source File - -SOURCE=.\mbfl\mbfilter_8bit.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_ascii.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_base64.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_big5.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_byte2.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_byte4.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_cp1251.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_cp1252.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_cp866.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_cp932.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_cp936.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_euc_cn.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_euc_jp.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_euc_jp_win.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_euc_kr.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_euc_tw.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_htmlent.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_hz.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_iso2022_kr.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_iso8859_1.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_iso8859_10.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_iso8859_13.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_iso8859_14.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_iso8859_15.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_iso8859_16.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_iso8859_2.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_iso8859_3.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_iso8859_4.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_iso8859_5.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_iso8859_6.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_iso8859_7.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_iso8859_8.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_iso8859_9.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_jis.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_koi8r.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_koi8u.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_armscii8.c -# End Source File -# Begin Source File - -SOURCE=.\mbfl\mbfilter_pass.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_qprint.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_sjis.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_ucs2.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_ucs4.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_uhc.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_utf16.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_utf32.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_utf7.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_utf7imap.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_utf8.c -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_uuencode.c -# End Source File -# Begin Source File - -SOURCE=.\mbfl\mbfilter_wchar.c -# End Source File -# Begin Source File - -SOURCE=.\mbfl\mbfl_allocators.c -# End Source File -# Begin Source File - -SOURCE=.\mbfl\mbfl_convert.c -# End Source File -# Begin Source File - -SOURCE=.\mbfl\mbfl_encoding.c -# End Source File -# Begin Source File - -SOURCE=.\mbfl\mbfl_filter_output.c -# End Source File -# Begin Source File - -SOURCE=.\mbfl\mbfl_ident.c -# End Source File -# Begin Source File - -SOURCE=.\mbfl\mbfl_language.c -# End Source File -# Begin Source File - -SOURCE=.\mbfl\mbfl_memory_device.c -# End Source File -# Begin Source File - -SOURCE=.\mbfl\mbfl_string.c -# End Source File -# Begin Source File - -SOURCE=.\nls\nls_de.c -# End Source File -# Begin Source File - -SOURCE=.\nls\nls_en.c -# End Source File -# Begin Source File - -SOURCE=.\nls\nls_ja.c -# End Source File -# Begin Source File - -SOURCE=.\nls\nls_hy.c -# End Source File -# Begin Source File - -SOURCE=.\nls\nls_kr.c -# End Source File -# Begin Source File - -SOURCE=.\nls\nls_neutral.c -# End Source File -# Begin Source File - -SOURCE=.\nls\nls_ru.c -# End Source File -# Begin Source File - -SOURCE=.\nls\nls_uni.c -# End Source File -# Begin Source File - -SOURCE=.\nls\nls_zh.c -# End Source File -# End Group -# Begin Group "Header Files" - -# PROP Default_Filter "h;hpp;hxx;hm;inl" -# Begin Source File - -SOURCE=.\config.h.vc6 - -!IF "$(CFG)" == "libmbfl - Win32 Release" - -# Begin Custom Build -InputDir=. -InputPath=.\config.h.vc6 - -"$(InputDir)\config.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" - copy $(InputDir)\config.h.vc6 "$(InputDir)\config.h" - -# End Custom Build - -!ELSEIF "$(CFG)" == "libmbfl - Win32 Debug" - -# Begin Custom Build -InputDir=. -InputPath=.\config.h.vc6 - -"$(InputDir)\config.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" - copy $(InputDir)\config.h.vc6 "$(InputDir)\config.h" - -# End Custom Build - -!ENDIF - -# End Source File -# Begin Source File - -SOURCE=.\filters\cp932_table.h -# End Source File -# Begin Source File - -SOURCE=.\filters\html_entities.h -# End Source File -# Begin Source File - -SOURCE=.\mbfl\mbfilter.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_7bit.h -# End Source File -# Begin Source File - -SOURCE=.\mbfl\mbfilter_8bit.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_ascii.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_base64.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_big5.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_byte2.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_byte4.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_cp1251.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_cp1252.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_cp866.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_cp932.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_cp936.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_euc_cn.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_euc_jp.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_euc_jp_win.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_euc_kr.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_euc_tw.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_htmlent.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_hz.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_iso2022_kr.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_iso8859_1.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_iso8859_10.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_iso8859_13.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_iso8859_14.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_iso8859_15.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_iso8859_16.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_iso8859_2.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_iso8859_3.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_iso8859_4.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_iso8859_5.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_iso8859_6.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_iso8859_7.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_iso8859_8.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_iso8859_9.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_jis.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_koi8r.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_koi8u.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_armscii8.h -# End Source File -# Begin Source File - -SOURCE=.\mbfl\mbfilter_pass.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_qprint.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_sjis.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_ucs2.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_ucs4.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_uhc.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_utf16.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_utf32.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_utf7.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_utf7imap.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_utf8.h -# End Source File -# Begin Source File - -SOURCE=.\filters\mbfilter_uuencode.h -# End Source File -# Begin Source File - -SOURCE=.\mbfl\mbfilter_wchar.h -# End Source File -# Begin Source File - -SOURCE=.\mbfl\mbfl_allocators.h -# End Source File -# Begin Source File - -SOURCE=.\mbfl\mbfl_consts.h -# End Source File -# Begin Source File - -SOURCE=.\mbfl\mbfl_convert.h -# End Source File -# Begin Source File - -SOURCE=.\mbfl\mbfl_encoding.h -# End Source File -# Begin Source File - -SOURCE=.\mbfl\mbfl_filter_output.h -# End Source File -# Begin Source File - -SOURCE=.\mbfl\mbfl_ident.h -# End Source File -# Begin Source File - -SOURCE=.\mbfl\mbfl_language.h -# End Source File -# Begin Source File - -SOURCE=.\mbfl\mbfl_memory_device.h -# End Source File -# Begin Source File - -SOURCE=.\mbfl\mbfl_string.h -# End Source File -# Begin Source File - -SOURCE=.\nls\nls_de.h -# End Source File -# Begin Source File - -SOURCE=.\nls\nls_en.h -# End Source File -# Begin Source File - -SOURCE=.\nls\nls_ja.h -# End Source File -# Begin Source File - -SOURCE=.\nls\nls_hy.h -# End Source File -# Begin Source File - -SOURCE=.\nls\nls_kr.h -# End Source File -# Begin Source File - -SOURCE=.\nls\nls_neutral.h -# End Source File -# Begin Source File - -SOURCE=.\nls\nls_ru.h -# End Source File -# Begin Source File - -SOURCE=.\nls\nls_uni.h -# End Source File -# Begin Source File - -SOURCE=.\nls\nls_zh.h -# End Source File -# Begin Source File - -SOURCE=.\filters\unicode_prop.h -# End Source File -# Begin Source File - -SOURCE=.\filters\unicode_table_big5.h -# End Source File -# Begin Source File - -SOURCE=.\filters\unicode_table_cns11643.h -# End Source File -# Begin Source File - -SOURCE=.\filters\unicode_table_cp1251.h -# End Source File -# Begin Source File - -SOURCE=.\filters\unicode_table_cp1252.h -# End Source File -# Begin Source File - -SOURCE=.\filters\unicode_table_cp866.h -# End Source File -# Begin Source File - -SOURCE=.\filters\unicode_table_cp932_ext.h -# End Source File -# Begin Source File - -SOURCE=.\filters\unicode_table_cp936.h -# End Source File -# Begin Source File - -SOURCE=.\filters\unicode_table_iso8859_10.h -# End Source File -# Begin Source File - -SOURCE=.\filters\unicode_table_iso8859_13.h -# End Source File -# Begin Source File - -SOURCE=.\filters\unicode_table_iso8859_14.h -# End Source File -# Begin Source File - -SOURCE=.\filters\unicode_table_iso8859_15.h -# End Source File -# Begin Source File - -SOURCE=.\filters\unicode_table_iso8859_16.h -# End Source File -# Begin Source File - -SOURCE=.\filters\unicode_table_iso8859_2.h -# End Source File -# Begin Source File - -SOURCE=.\filters\unicode_table_iso8859_3.h -# End Source File -# Begin Source File - -SOURCE=.\filters\unicode_table_iso8859_4.h -# End Source File -# Begin Source File - -SOURCE=.\filters\unicode_table_iso8859_5.h -# End Source File -# Begin Source File - -SOURCE=.\filters\unicode_table_iso8859_6.h -# End Source File -# Begin Source File - -SOURCE=.\filters\unicode_table_iso8859_7.h -# End Source File -# Begin Source File - -SOURCE=.\filters\unicode_table_iso8859_8.h -# End Source File -# Begin Source File - -SOURCE=.\filters\unicode_table_iso8859_9.h -# End Source File -# Begin Source File - -SOURCE=.\filters\unicode_table_jis.h -# End Source File -# Begin Source File - -SOURCE=.\filters\unicode_table_koi8r.h -# End Source File -# Begin Source File - -SOURCE=.\filters\unicode_table_koi8u.h -# End Source File -# Begin Source File - -SOURCE=.\filters\unicode_table_armscii8.h -# End Source File -# Begin Source File - -SOURCE=.\filters\unicode_table_uhc.h -# End Source File -# End Group -# Begin Group "Resource Files" - -# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe" -# Begin Source File - -SOURCE=.\mbfl.rc -# End Source File -# End Group -# End Target -# End Project diff --git a/ext/mbstring/libmbfl/libmbfl.dsw b/ext/mbstring/libmbfl/libmbfl.dsw deleted file mode 100644 index 69e7bf1b9d..0000000000 --- a/ext/mbstring/libmbfl/libmbfl.dsw +++ /dev/null @@ -1,29 +0,0 @@ -Microsoft Developer Studio Workspace File, Format Version 6.00 -# WARNING: DO NOT EDIT OR DELETE THIS WORKSPACE FILE! - -############################################################################### - -Project: "libmbfl"=".\libmbfl.dsp" - Package Owner=<4> - -Package=<5> -{{{ -}}} - -Package=<4> -{{{ -}}} - -############################################################################### - -Global: - -Package=<5> -{{{ -}}} - -Package=<3> -{{{ -}}} - -############################################################################### - diff --git a/ext/mbstring/libmbfl/libmbfl.sln b/ext/mbstring/libmbfl/libmbfl.sln deleted file mode 100755 index becef513a5..0000000000 --- a/ext/mbstring/libmbfl/libmbfl.sln +++ /dev/null @@ -1,19 +0,0 @@ -Microsoft Visual Studio Solution File, Format Version 10.00 -# Visual C++ Express 2008 -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libmbfl", "libmbfl.vcproj", "{B3636594-A785-4270-A765-8EAE922B5207}" -EndProject -Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug|Win32 = Debug|Win32 - Release|Win32 = Release|Win32 - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {B3636594-A785-4270-A765-8EAE922B5207}.Debug|Win32.ActiveCfg = Debug|Win32 - {B3636594-A785-4270-A765-8EAE922B5207}.Debug|Win32.Build.0 = Debug|Win32 - {B3636594-A785-4270-A765-8EAE922B5207}.Release|Win32.ActiveCfg = Release|Win32 - {B3636594-A785-4270-A765-8EAE922B5207}.Release|Win32.Build.0 = Release|Win32 - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection -EndGlobal diff --git a/ext/mbstring/libmbfl/libmbfl.vcproj b/ext/mbstring/libmbfl/libmbfl.vcproj deleted file mode 100755 index 0111012d65..0000000000 --- a/ext/mbstring/libmbfl/libmbfl.vcproj +++ /dev/null @@ -1,977 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/ext/mbstring/libmbfl/mbfl.def b/ext/mbstring/libmbfl/mbfl.def deleted file mode 100644 index e6844f5f88..0000000000 --- a/ext/mbstring/libmbfl/mbfl.def +++ /dev/null @@ -1,80 +0,0 @@ -LIBRARY mbfl.dll - -DESCRIPTION "streamable kanji code filter" - -EXPORTS - _mbfl_buffer_converter_new @1 - _mbfl_buffer_converter_delete @2 - _mbfl_buffer_converter_reset @3 - _mbfl_buffer_converter_illegal_mode @4 - _mbfl_buffer_converter_illegal_substchar @5 - _mbfl_buffer_converter_strncat @6 - _mbfl_buffer_converter_feed @7 - _mbfl_buffer_converter_flush @8 - _mbfl_buffer_converter_getbuffer @9 - _mbfl_buffer_converter_result @10 - _mbfl_buffer_converter_feed_result @11 - _mbfl_encoding_detector_new @12 - _mbfl_encoding_detector_delete @13 - _mbfl_encoding_detector_feed @14 - _mbfl_encoding_detector_judge @15 - _mbfl_convert_encoding @16 - _mbfl_identify_encoding @17 - _mbfl_identify_encoding_name @18 - _mbfl_identify_encoding_no @19 - _mbfl_strlen @20 - _mbfl_oddlen @21 - _mbfl_strpos @22 - _mbfl_substr_count @23 - _mbfl_substr @24 - _mbfl_strcut @25 - _mbfl_strwidth @26 - _mbfl_strimwidth @27 - _mbfl_mime_header_encode @28 - _mbfl_mime_header_decode @29 - _mbfl_html_numeric_entity @30 - _mbfl_ja_jp_hantozen @31 - ___mbfl_allocators @32 - _mbfl_convert_filter_list @33 - _mbfl_convert_filter_new @34 - _mbfl_convert_filter_delete @35 - _mbfl_convert_filter_feed @36 - _mbfl_convert_filter_flush @37 - _mbfl_convert_filter_reset @38 - _mbfl_convert_filter_copy @39 - _mbfl_filt_conv_illegal_output @40 - _mbfl_convert_filter_select_vtbl @41 - _mbfl_convert_filter_get_vtbl @42 - _mbfl_filt_conv_common_ctor @43 - _mbfl_filt_conv_common_flush @44 - _mbfl_filt_conv_common_dtor @45 - _mbfl_encoding_8bit @46 - _mbfl_encoding_pass @47 - _mbfl_encoding_wchar @48 - _mbfl_name2encoding @49 - _mbfl_no2encoding @50 - _mbfl_name2no_encoding @51 - _mbfl_no_encoding2name @52 - _mbfl_no2preferred_mime_name @53 - _mbfl_is_support_encoding @54 - _mbfl_filter_output_pipe @55 - _mbfl_filter_output_null @56 - _mbfl_name2language @57 - _mbfl_no2language @58 - _mbfl_name2no_language @59 - _mbfl_no_language2name @60 - _mbfl_memory_device_init @61 - _mbfl_memory_device_realloc @62 - _mbfl_memory_device_clear @63 - _mbfl_memory_device_reset @64 - _mbfl_memory_device_result @65 - _mbfl_memory_device_output @66 - _mbfl_memory_device_output2 @67 - _mbfl_memory_device_output4 @68 - _mbfl_memory_device_strcat @69 - _mbfl_memory_device_devcat @70 - _mbfl_wchar_device_init @71 - _mbfl_wchar_device_output @72 - _mbfl_string_init @73 - _mbfl_string_init_set @74 - _mbfl_string_clear @75 diff --git a/ext/mbstring/libmbfl/mbfl.rc b/ext/mbstring/libmbfl/mbfl.rc deleted file mode 100644 index 9d6a0c78e5..0000000000 --- a/ext/mbstring/libmbfl/mbfl.rc +++ /dev/null @@ -1,24 +0,0 @@ -/* $Id$ */ -1 VERSIONINFO -FILEVERSION 1,0,2,0 -PRODUCTVERSION 1,0,2,0 -FILEFLAGSMASK 0 -FILEOS 0x40000 -FILETYPE 1 -{ - BLOCK "StringFileInfo" - { - BLOCK "040904E4" - { - VALUE "CompanyName", "-\0" - VALUE "FileDescription", "streamable kanji code filter\0" - VALUE "FileVersion", "1.0.2\0" - VALUE "InternalName", "mbfl\0" - VALUE "LegalCopyright", "GNU Lesser Public License Version 2.0\0" - VALUE "OriginalFilename", "mbfl.dll\0" - VALUE "ProductName", "mbfl\0" - VALUE "ProductVersion", "1.0.2\0" - } - } -} - diff --git a/ext/mbstring/libmbfl/mbfl/Makefile.am b/ext/mbstring/libmbfl/mbfl/Makefile.am index 6e662d14e5..6774d88792 100644 --- a/ext/mbstring/libmbfl/mbfl/Makefile.am +++ b/ext/mbstring/libmbfl/mbfl/Makefile.am @@ -1,4 +1,4 @@ -EXTRA_DIST=Makefile.bcc32 mk_eaw_tbl.awk +EXTRA_DIST=mk_eaw_tbl.awk lib_LTLIBRARIES=libmbfl.la libmbfl_la_SOURCES=mbfilter.c \ mbfl_string.c \ diff --git a/ext/mbstring/libmbfl/mbfl/Makefile.bcc32 b/ext/mbstring/libmbfl/mbfl/Makefile.bcc32 deleted file mode 100644 index 1b43a49efe..0000000000 --- a/ext/mbstring/libmbfl/mbfl/Makefile.bcc32 +++ /dev/null @@ -1,18 +0,0 @@ -!include ..\rules.mak.bcc32 -OBJS=mbfilter.obj \ - mbfilter_8bit.obj \ - mbfilter_pass.obj \ - mbfilter_wchar.obj \ - mbfl_allocators.obj \ - mbfl_convert.obj \ - mbfl_encoding.obj \ - mbfl_filter_output.obj \ - mbfl_ident.obj \ - mbfl_language.obj \ - mbfl_memory_device.obj \ - mbfl_string.obj - -all: $(OBJS) - -clean: - @for %i in ($(OBJS)) do @if exist %i del %i diff --git a/ext/mbstring/libmbfl/mbfl/eaw_table.h b/ext/mbstring/libmbfl/mbfl/eaw_table.h index 95c895df1b..a4f1e4fdf1 100644 --- a/ext/mbstring/libmbfl/mbfl/eaw_table.h +++ b/ext/mbstring/libmbfl/mbfl/eaw_table.h @@ -2,7 +2,7 @@ static const struct { int begin; int end; } mbfl_eaw_table[] = { - { 0x1100, 0x1159 }, + { 0x1100, 0x1159 }, { 0x115f, 0x115f }, { 0x2329, 0x232a }, { 0x2e80, 0x2e99 }, @@ -32,5 +32,5 @@ static const struct { { 0xff01, 0xff60 }, { 0xffe0, 0xffe6 }, { 0x20000, 0x2fffd }, - { 0x30000, 0x3fffd } + { 0x30000, 0x3fffd } }; diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter.c b/ext/mbstring/libmbfl/mbfl/mbfilter.c index 4997c5a886..d11cebe447 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfilter.c +++ b/ext/mbstring/libmbfl/mbfl/mbfilter.c @@ -102,6 +102,7 @@ #include "mbfilter.h" #include "mbfl_filter_output.h" #include "mbfilter_pass.h" +#include "filters/mbfilter_tl_jisx0201_jisx0208.h" #include "eaw_table.h" @@ -149,11 +150,15 @@ mbfl_buffer_converter_new( convd->filter1 = NULL; convd->filter2 = NULL; if (mbfl_convert_filter_get_vtbl(convd->from->no_encoding, convd->to->no_encoding) != NULL) { - convd->filter1 = mbfl_convert_filter_new(convd->from->no_encoding, convd->to->no_encoding, mbfl_memory_device_output, 0, &convd->device); + convd->filter1 = mbfl_convert_filter_new(convd->from->no_encoding, convd->to->no_encoding, mbfl_memory_device_output, NULL, &convd->device); } else { - convd->filter2 = mbfl_convert_filter_new(mbfl_no_encoding_wchar, convd->to->no_encoding, mbfl_memory_device_output, 0, &convd->device); + convd->filter2 = mbfl_convert_filter_new(mbfl_no_encoding_wchar, convd->to->no_encoding, mbfl_memory_device_output, NULL, &convd->device); if (convd->filter2 != NULL) { - convd->filter1 = mbfl_convert_filter_new(convd->from->no_encoding, mbfl_no_encoding_wchar, (int (*)(int, void*))convd->filter2->filter_function, NULL, convd->filter2); + convd->filter1 = mbfl_convert_filter_new(convd->from->no_encoding, + mbfl_no_encoding_wchar, + (int (*)(int, void*))convd->filter2->filter_function, + (int (*)(void*))convd->filter2->filter_flush, + convd->filter2); if (convd->filter1 == NULL) { mbfl_convert_filter_delete(convd->filter2); } @@ -468,7 +473,8 @@ enum mbfl_no_encoding mbfl_encoding_detector_judge(mbfl_encoding_detector *ident } n--; } - + + /* fallback judge */ if (encoding == mbfl_no_encoding_invalid) { n = identd->filter_list_size - 1; while (n >= 0) { @@ -477,7 +483,7 @@ enum mbfl_no_encoding mbfl_encoding_detector_judge(mbfl_encoding_detector *ident encoding = filter->encoding->no_encoding; } n--; - } + } } } @@ -611,8 +617,8 @@ mbfl_identify_encoding(mbfl_string *string, enum mbfl_no_encoding *elist, int el filter = &flist[i]; if (!filter->flag) { if (strict && filter->status) { - continue; - } + continue; + } encoding = filter->encoding; break; } @@ -628,7 +634,7 @@ mbfl_identify_encoding(mbfl_string *string, enum mbfl_no_encoding *elist, int el } } } - + /* cleanup */ /* dtors should be called in reverse order */ i = num; while (--i >= 0) { @@ -1326,7 +1332,6 @@ mbfl_substr( return result; } - /* * strcut */ @@ -1338,183 +1343,280 @@ mbfl_strcut( int length) { const mbfl_encoding *encoding; - int n, m, k, len, start, end; - unsigned char *p, *w; - const unsigned char *mbtab; mbfl_memory_device device; - mbfl_convert_filter *encoder, *encoder_tmp, *decoder, *decoder_tmp; + + /* validate the parameters */ + if (string == NULL || string->val == NULL || result == NULL) { + return NULL; + } + + if (from < 0 || length < 0) { + return NULL; + } + + if (from >= string->len) { + from = string->len; + } encoding = mbfl_no2encoding(string->no_encoding); - if (encoding == NULL || string == NULL || result == NULL) { + if (encoding == NULL) { return NULL; } + mbfl_string_init(result); result->no_language = string->no_language; result->no_encoding = string->no_encoding; - if ((encoding->flag & (MBFL_ENCTYPE_SBCS | MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE | MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) || - encoding->mblen_table != NULL) { - len = string->len; - start = from; - end = from + length; + if ((encoding->flag & (MBFL_ENCTYPE_SBCS + | MBFL_ENCTYPE_WCS2BE + | MBFL_ENCTYPE_WCS2LE + | MBFL_ENCTYPE_WCS4BE + | MBFL_ENCTYPE_WCS4LE)) + || encoding->mblen_table != NULL) { + const unsigned char *start = NULL; + const unsigned char *end = NULL; + unsigned char *w; + unsigned int sz; + if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) { - start /= 2; - start *= 2; - end = length/2; - end *= 2; - end += start; + from &= -2; + + if (from + length >= string->len) { + length = string->len - from; + } + + start = string->val + from; + end = start + (length & -2); } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) { - start /= 4; - start *= 4; - end = length/4; - end *= 4; - end += start; + from &= -4; + + if (from + length >= string->len) { + length = string->len - from; + } + + start = string->val + from; + end = start + (length & -4); + } else if ((encoding->flag & MBFL_ENCTYPE_SBCS)) { + start = string->val + from; + end = start + length; } else if (encoding->mblen_table != NULL) { - mbtab = encoding->mblen_table; - start = 0; - end = 0; - n = 0; - p = string->val; - if (p != NULL) { - /* search start position */ - for (;;) { - m = mbtab[*p]; - n += m; - p += m; - if (n > from) { - break; - } - start = n; - } - /* search end position */ - k = start + length; - if (k >= (int)string->len) { - end = string->len; - } else { - end = start; - while (n <= k) { - end = n; - m = mbtab[*p]; - n += m; - p += m; - } + const unsigned char *mbtab = encoding->mblen_table; + const unsigned char *p, *q; + int m; + + /* search start position */ + for (m = 0, p = string->val, q = p + from; + p < q; p += (m = mbtab[*p])); + + if (p > q) { + p -= m; + } + + start = p; + + /* search end position */ + if ((start - string->val) + length >= (int)string->len) { + end = string->val + string->len; + } else { + for (q = p + length; p < q; p += (m = mbtab[*p])); + + if (p > q) { + p -= m; } + end = p; } + } else { + /* never reached */ + return NULL; } - if (start > len) { - start = len; - } - if (start < 0) { - start = 0; - } - if (end > len) { - end = len; - } - if (end < 0) { - end = 0; - } - if (start > end) { - start = end; - } /* allocate memory and copy string */ - n = end - start; - result->len = 0; - result->val = w = (unsigned char*)mbfl_malloc((n + 8)*sizeof(unsigned char)); - if (w != NULL) { - result->len = n; - p = &(string->val[start]); - while (n > 0) { - *w++ = *p++; - n--; - } - *w++ = '\0'; - *w++ = '\0'; - *w++ = '\0'; - *w = '\0'; - } else { - result = NULL; + sz = end - start; + if ((w = (unsigned char*)mbfl_calloc(sz + 8, + sizeof(unsigned char))) == NULL) { + return NULL; } + + memcpy(w, start, sz); + w[sz] = '\0'; + w[sz + 1] = '\0'; + w[sz + 2] = '\0'; + w[sz + 3] = '\0'; + + result->val = w; + result->len = sz; } else { - /* wchar filter */ - encoder = mbfl_convert_filter_new( - string->no_encoding, - mbfl_no_encoding_wchar, - mbfl_filter_output_null, 0, 0); - encoder_tmp = mbfl_convert_filter_new( - string->no_encoding, - mbfl_no_encoding_wchar, - mbfl_filter_output_null, 0, 0); + mbfl_convert_filter *encoder = NULL; + mbfl_convert_filter *decoder = NULL; + const unsigned char *p, *q, *r; + struct { + mbfl_convert_filter encoder; + mbfl_convert_filter decoder; + const unsigned char *p; + int pos; + } bk, _bk; + /* output code filter */ - decoder = mbfl_convert_filter_new( - mbfl_no_encoding_wchar, - string->no_encoding, - mbfl_memory_device_output, 0, &device); - decoder_tmp = mbfl_convert_filter_new( - mbfl_no_encoding_wchar, - string->no_encoding, - mbfl_memory_device_output, 0, &device); - if (encoder == NULL || encoder_tmp == NULL || decoder == NULL || decoder_tmp == NULL) { - mbfl_convert_filter_delete(encoder); - mbfl_convert_filter_delete(encoder_tmp); + if (!(decoder = mbfl_convert_filter_new( + mbfl_no_encoding_wchar, + string->no_encoding, + mbfl_memory_device_output, 0, &device))) { + return NULL; + } + + /* wchar filter */ + if (!(encoder = mbfl_convert_filter_new( + string->no_encoding, + mbfl_no_encoding_wchar, + mbfl_filter_output_null, + NULL, NULL))) { mbfl_convert_filter_delete(decoder); - mbfl_convert_filter_delete(decoder_tmp); return NULL; } + mbfl_memory_device_init(&device, length + 8, 0); - k = 0; - n = 0; + p = string->val; - if (p != NULL) { - /* seartch start position */ - while (n < from) { - (*encoder->filter_function)(*p++, encoder); - n++; - } + + /* search start position */ + for (q = string->val + from; p < q; p++) { + (*encoder->filter_function)(*p, encoder); + } + + /* switch the drain direction */ + encoder->output_function = (int(*)(int,void *))decoder->filter_function; + encoder->flush_function = (int(*)(void *))decoder->filter_flush; + encoder->data = decoder; + + q = string->val + string->len; + + /* save the encoder, decoder state and the pointer */ + mbfl_convert_filter_copy(decoder, &_bk.decoder); + mbfl_convert_filter_copy(encoder, &_bk.encoder); + _bk.p = p; + _bk.pos = device.pos; + + if (length > q - p) { + length = q - p; + } + + if (length >= 20) { /* output a little shorter than "length" */ - encoder->output_function = mbfl_filter_output_pipe; - encoder->data = decoder; - k = length - 20; - len = string->len; - while (n < len && device.pos < k) { - (*encoder->filter_function)(*p++, encoder); - n++; - } - /* detect end position */ - for (;;) { - /* backup current state */ - k = device.pos; - mbfl_convert_filter_copy(encoder, encoder_tmp); - mbfl_convert_filter_copy(decoder, decoder_tmp); - if (n >= len) { - break; - } - /* feed 1byte and flush */ + /* XXX: the constant "20" was determined purely on the heuristics. */ + for (r = p + length - 20; p < r; p++) { (*encoder->filter_function)(*p, encoder); + } + + /* if the offset of the resulting string exceeds the length, + * then restore the state */ + if (device.pos > length) { + p = _bk.p; + device.pos = _bk.pos; + decoder->filter_dtor(decoder); + encoder->filter_dtor(encoder); + mbfl_convert_filter_copy(&_bk.decoder, decoder); + mbfl_convert_filter_copy(&_bk.encoder, encoder); + bk = _bk; + } else { + /* save the encoder, decoder state and the pointer */ + mbfl_convert_filter_copy(decoder, &bk.decoder); + mbfl_convert_filter_copy(encoder, &bk.encoder); + bk.p = p; + bk.pos = device.pos; + + /* flush the stream */ (*encoder->filter_flush)(encoder); - (*decoder->filter_flush)(decoder); + + /* if the offset of the resulting string exceeds the length, + * then restore the state */ if (device.pos > length) { - break; + bk.decoder.filter_dtor(&bk.decoder); + bk.encoder.filter_dtor(&bk.encoder); + + p = _bk.p; + device.pos = _bk.pos; + decoder->filter_dtor(decoder); + encoder->filter_dtor(encoder); + mbfl_convert_filter_copy(&_bk.decoder, decoder); + mbfl_convert_filter_copy(&_bk.encoder, encoder); + bk = _bk; + } else { + _bk.decoder.filter_dtor(&_bk.decoder); + _bk.encoder.filter_dtor(&_bk.encoder); + + p = bk.p; + device.pos = bk.pos; + decoder->filter_dtor(decoder); + encoder->filter_dtor(encoder); + mbfl_convert_filter_copy(&bk.decoder, decoder); + mbfl_convert_filter_copy(&bk.encoder, encoder); } - /* restore filter and re-feed data */ - device.pos = k; - mbfl_convert_filter_copy(encoder_tmp, encoder); - mbfl_convert_filter_copy(decoder_tmp, decoder); - (*encoder->filter_function)(*p, encoder); - p++; - n++; } - device.pos = k; - mbfl_convert_filter_copy(encoder_tmp, encoder); - mbfl_convert_filter_copy(decoder_tmp, decoder); - mbfl_convert_filter_flush(encoder); - mbfl_convert_filter_flush(decoder); + } else { + bk = _bk; + } + + /* detect end position */ + while (p < q) { + (*encoder->filter_function)(*p, encoder); + + if (device.pos > length) { + /* restore filter */ + p = bk.p; + device.pos = bk.pos; + decoder->filter_dtor(decoder); + encoder->filter_dtor(encoder); + mbfl_convert_filter_copy(&bk.decoder, decoder); + mbfl_convert_filter_copy(&bk.encoder, encoder); + break; + } + + p++; + + /* backup current state */ + mbfl_convert_filter_copy(decoder, &_bk.decoder); + mbfl_convert_filter_copy(encoder, &_bk.encoder); + _bk.pos = device.pos; + _bk.p = p; + + (*encoder->filter_flush)(encoder); + + if (device.pos > length) { + _bk.decoder.filter_dtor(&_bk.decoder); + _bk.encoder.filter_dtor(&_bk.encoder); + + /* restore filter */ + p = bk.p; + device.pos = bk.pos; + decoder->filter_dtor(decoder); + encoder->filter_dtor(encoder); + mbfl_convert_filter_copy(&bk.decoder, decoder); + mbfl_convert_filter_copy(&bk.encoder, encoder); + break; + } + + bk.decoder.filter_dtor(&bk.decoder); + bk.encoder.filter_dtor(&bk.encoder); + + p = _bk.p; + device.pos = _bk.pos; + decoder->filter_dtor(decoder); + encoder->filter_dtor(encoder); + mbfl_convert_filter_copy(&_bk.decoder, decoder); + mbfl_convert_filter_copy(&_bk.encoder, encoder); + + bk = _bk; } + + (*encoder->filter_flush)(encoder); + + bk.decoder.filter_dtor(&bk.decoder); + bk.encoder.filter_dtor(&bk.encoder); + result = mbfl_memory_device_result(&device, result); + mbfl_convert_filter_delete(encoder); - mbfl_convert_filter_delete(encoder_tmp); mbfl_convert_filter_delete(decoder); - mbfl_convert_filter_delete(decoder_tmp); } return result; @@ -1731,276 +1833,6 @@ mbfl_strimwidth( return result; } - - -/* - * convert Hankaku and Zenkaku - */ -struct collector_hantozen_data { - mbfl_convert_filter *next_filter; - int mode; - int status; - int cache; -}; - -static const unsigned char hankana2zenkata_table[64] = { - 0x00,0x02,0x0C,0x0D,0x01,0xFB,0xF2,0xA1,0xA3,0xA5, - 0xA7,0xA9,0xE3,0xE5,0xE7,0xC3,0xFC,0xA2,0xA4,0xA6, - 0xA8,0xAA,0xAB,0xAD,0xAF,0xB1,0xB3,0xB5,0xB7,0xB9, - 0xBB,0xBD,0xBF,0xC1,0xC4,0xC6,0xC8,0xCA,0xCB,0xCC, - 0xCD,0xCE,0xCF,0xD2,0xD5,0xD8,0xDB,0xDE,0xDF,0xE0, - 0xE1,0xE2,0xE4,0xE6,0xE8,0xE9,0xEA,0xEB,0xEC,0xED, - 0xEF,0xF3,0x9B,0x9C -}; -static const unsigned char hankana2zenhira_table[64] = { - 0x00,0x02,0x0C,0x0D,0x01,0xFB,0x92,0x41,0x43,0x45, - 0x47,0x49,0x83,0x85,0x87,0x63,0xFC,0x42,0x44,0x46, - 0x48,0x4A,0x4B,0x4D,0x4F,0x51,0x53,0x55,0x57,0x59, - 0x5B,0x5D,0x5F,0x61,0x64,0x66,0x68,0x6A,0x6B,0x6C, - 0x6D,0x6E,0x6F,0x72,0x75,0x78,0x7B,0x7E,0x7F,0x80, - 0x81,0x82,0x84,0x86,0x88,0x89,0x8A,0x8B,0x8C,0x8D, - 0x8F,0x93,0x9B,0x9C -}; -static const unsigned char zenkana2hankana_table[84][2] = { - {0x67,0x00},{0x71,0x00},{0x68,0x00},{0x72,0x00},{0x69,0x00}, - {0x73,0x00},{0x6A,0x00},{0x74,0x00},{0x6B,0x00},{0x75,0x00}, - {0x76,0x00},{0x76,0x9E},{0x77,0x00},{0x77,0x9E},{0x78,0x00}, - {0x78,0x9E},{0x79,0x00},{0x79,0x9E},{0x7A,0x00},{0x7A,0x9E}, - {0x7B,0x00},{0x7B,0x9E},{0x7C,0x00},{0x7C,0x9E},{0x7D,0x00}, - {0x7D,0x9E},{0x7E,0x00},{0x7E,0x9E},{0x7F,0x00},{0x7F,0x9E}, - {0x80,0x00},{0x80,0x9E},{0x81,0x00},{0x81,0x9E},{0x6F,0x00}, - {0x82,0x00},{0x82,0x9E},{0x83,0x00},{0x83,0x9E},{0x84,0x00}, - {0x84,0x9E},{0x85,0x00},{0x86,0x00},{0x87,0x00},{0x88,0x00}, - {0x89,0x00},{0x8A,0x00},{0x8A,0x9E},{0x8A,0x9F},{0x8B,0x00}, - {0x8B,0x9E},{0x8B,0x9F},{0x8C,0x00},{0x8C,0x9E},{0x8C,0x9F}, - {0x8D,0x00},{0x8D,0x9E},{0x8D,0x9F},{0x8E,0x00},{0x8E,0x9E}, - {0x8E,0x9F},{0x8F,0x00},{0x90,0x00},{0x91,0x00},{0x92,0x00}, - {0x93,0x00},{0x6C,0x00},{0x94,0x00},{0x6D,0x00},{0x95,0x00}, - {0x6E,0x00},{0x96,0x00},{0x97,0x00},{0x98,0x00},{0x99,0x00}, - {0x9A,0x00},{0x9B,0x00},{0x9C,0x00},{0x9C,0x00},{0x72,0x00}, - {0x74,0x00},{0x66,0x00},{0x9D,0x00},{0x73,0x9E} -}; - -static int -collector_hantozen(int c, void* data) -{ - int s, mode, n; - struct collector_hantozen_data *pc = (struct collector_hantozen_data*)data; - - s = c; - mode = pc->mode; - - if (mode & 0xf) { /* hankaku to zenkaku */ - if ((mode & 0x1) && c >= 0x21 && c <= 0x7d && c != 0x22 && c != 0x27 && c != 0x5c) { /* all except <"> <'> <\> <~> */ - s = c + 0xfee0; - } else if ((mode & 0x2) && ((c >= 0x41 && c <= 0x5a) || (c >= 0x61 && c <= 0x7a))) { /* alpha */ - s = c + 0xfee0; - } else if ((mode & 0x4) && c >= 0x30 && c <= 0x39) { /* num */ - s = c + 0xfee0; - } else if ((mode & 0x8) && c == 0x20) { /* spase */ - s = 0x3000; - } - } - - if (mode & 0xf0) { /* zenkaku to hankaku */ - if ((mode & 0x10) && c >= 0xff01 && c <= 0xff5d && c != 0xff02 && c != 0xff07 && c!= 0xff3c) { /* all except <"> <'> <\> <~> */ - s = c - 0xfee0; - } else if ((mode & 0x20) && ((c >= 0xff21 && c <= 0xff3a) || (c >= 0xff41 && c <= 0xff5a))) { /* alpha */ - s = c - 0xfee0; - } else if ((mode & 0x40) && (c >= 0xff10 && c <= 0xff19)) { /* num */ - s = c - 0xfee0; - } else if ((mode & 0x80) && (c == 0x3000)) { /* spase */ - s = 0x20; - } else if ((mode & 0x10) && (c == 0x2212)) { /* MINUS SIGN */ - s = 0x2d; - } - } - - if (mode & 0x300) { /* hankaku kana to zenkaku kana */ - if ((mode & 0x100) && (mode & 0x800)) { /* hankaku kana to zenkaku katakana and glue voiced sound mark */ - if (c >= 0xff61 && c <= 0xff9f) { - if (pc->status) { - n = (pc->cache - 0xff60) & 0x3f; - if (c == 0xff9e && ((n >= 22 && n <= 36) || (n >= 42 && n <= 46))) { - pc->status = 0; - s = 0x3001 + hankana2zenkata_table[n]; - } else if (c == 0xff9e && n == 19) { - pc->status = 0; - s = 0x30f4; - } else if (c == 0xff9f && (n >= 42 && n <= 46)) { - pc->status = 0; - s = 0x3002 + hankana2zenkata_table[n]; - } else { - pc->status = 1; - pc->cache = c; - s = 0x3000 + hankana2zenkata_table[n]; - } - } else { - pc->status = 1; - pc->cache = c; - return c; - } - } else { - if (pc->status) { - n = (pc->cache - 0xff60) & 0x3f; - pc->status = 0; - (*pc->next_filter->filter_function)(0x3000 + hankana2zenkata_table[n], pc->next_filter); - } - } - } else if ((mode & 0x200) && (mode & 0x800)) { /* hankaku kana to zenkaku hirangana and glue voiced sound mark */ - if (c >= 0xff61 && c <= 0xff9f) { - if (pc->status) { - n = (pc->cache - 0xff60) & 0x3f; - if (c == 0xff9e && ((n >= 22 && n <= 36) || (n >= 42 && n <= 46))) { - pc->status = 0; - s = 0x3001 + hankana2zenhira_table[n]; - } else if (c == 0xff9f && (n >= 42 && n <= 46)) { - pc->status = 0; - s = 0x3002 + hankana2zenhira_table[n]; - } else { - pc->status = 1; - pc->cache = c; - s = 0x3000 + hankana2zenhira_table[n]; - } - } else { - pc->status = 1; - pc->cache = c; - return c; - } - } else { - if (pc->status) { - n = (pc->cache - 0xff60) & 0x3f; - pc->status = 0; - (*pc->next_filter->filter_function)(0x3000 + hankana2zenhira_table[n], pc->next_filter); - } - } - } else if ((mode & 0x100) && c >= 0xff61 && c <= 0xff9f) { /* hankaku kana to zenkaku katakana */ - s = 0x3000 + hankana2zenkata_table[c - 0xff60]; - } else if ((mode & 0x200) && c >= 0xff61 && c <= 0xff9f) { /* hankaku kana to zenkaku hirangana */ - s = 0x3000 + hankana2zenhira_table[c - 0xff60]; - } - } - - if (mode & 0x3000) { /* Zenkaku kana to hankaku kana */ - if ((mode & 0x1000) && c >= 0x30a1 && c <= 0x30f4) { /* Zenkaku katakana to hankaku kana */ - n = c - 0x30a1; - if (zenkana2hankana_table[n][1] != 0) { - (*pc->next_filter->filter_function)(0xff00 + zenkana2hankana_table[n][0], pc->next_filter); - s = 0xff00 + zenkana2hankana_table[n][1]; - } else { - s = 0xff00 + zenkana2hankana_table[n][0]; - } - } else if ((mode & 0x2000) && c >= 0x3041 && c <= 0x3093) { /* Zenkaku hirangana to hankaku kana */ - n = c - 0x3041; - if (zenkana2hankana_table[n][1] != 0) { - (*pc->next_filter->filter_function)(0xff00 + zenkana2hankana_table[n][0], pc->next_filter); - s = 0xff00 + zenkana2hankana_table[n][1]; - } else { - s = 0xff00 + zenkana2hankana_table[n][0]; - } - } else if (c == 0x3001) { - s = 0xff64; /* HALFWIDTH IDEOGRAPHIC COMMA */ - } else if (c == 0x3002) { - s = 0xff61; /* HALFWIDTH IDEOGRAPHIC FULL STOP */ - } else if (c == 0x300c) { - s = 0xff62; /* HALFWIDTH LEFT CORNER BRACKET */ - } else if (c == 0x300d) { - s = 0xff63; /* HALFWIDTH RIGHT CORNER BRACKET */ - } else if (c == 0x309b) { - s = 0xff9e; /* HALFWIDTH KATAKANA VOICED SOUND MARK */ - } else if (c == 0x309c) { - s = 0xff9f; /* HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK */ - } else if (c == 0x30fc) { - s = 0xff70; /* HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK */ - } else if (c == 0x30fb) { - s = 0xff65; /* HALFWIDTH KATAKANA MIDDLE DOT */ - } - } else if (mode & 0x30000) { - if ((mode & 0x10000) && c >= 0x3041 && c <= 0x3093) { /* Zenkaku hirangana to Zenkaku katakana */ - s = c + 0x60; - } else if ((mode & 0x20000) && c >= 0x30a1 && c <= 0x30f3) { /* Zenkaku katakana to Zenkaku hirangana */ - s = c - 0x60; - } - } - - if (mode & 0x100000) { /* special ascii to symbol */ - if (c == 0x5c) { - s = 0xffe5; /* FULLWIDTH YEN SIGN */ - } else if (c == 0xa5) { /* YEN SIGN */ - s = 0xffe5; /* FULLWIDTH YEN SIGN */ - } else if (c == 0x7e) { - s = 0xffe3; /* FULLWIDTH MACRON */ - } else if (c == 0x203e) { /* OVERLINE */ - s = 0xffe3; /* FULLWIDTH MACRON */ - } else if (c == 0x27) { - s = 0x2019; /* RIGHT SINGLE QUOTATION MARK */ - } else if (c == 0x22) { - s = 0x201d; /* RIGHT DOUBLE QUOTATION MARK */ - } - } else if (mode & 0x200000) { /* special symbol to ascii */ - if (c == 0xffe5) { /* FULLWIDTH YEN SIGN */ - s = 0x5c; - } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ - s = 0x5c; - } else if (c == 0xffe3) { /* FULLWIDTH MACRON */ - s = 0x7e; - } else if (c == 0x203e) { /* OVERLINE */ - s = 0x7e; - } else if (c == 0x2018) { /* LEFT SINGLE QUOTATION MARK*/ - s = 0x27; - } else if (c == 0x2019) { /* RIGHT SINGLE QUOTATION MARK */ - s = 0x27; - } else if (c == 0x201c) { /* LEFT DOUBLE QUOTATION MARK */ - s = 0x22; - } else if (c == 0x201d) { /* RIGHT DOUBLE QUOTATION MARK */ - s = 0x22; - } - } - - if (mode & 0x400000) { /* special ascii to symbol */ - if (c == 0x5c) { - s = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */ - } else if (c == 0x7e) { - s = 0xff5e; /* FULLWIDTH TILDE */ - } else if (c == 0x27) { - s = 0xff07; /* FULLWIDTH APOSTROPHE */ - } else if (c == 0x22) { - s = 0xff02; /* FULLWIDTH QUOTATION MARK */ - } - } else if (mode & 0x800000) { /* special symbol to ascii */ - if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ - s = 0x5c; - } else if (c == 0xff5e) { /* FULLWIDTH TILDE */ - s = 0x7e; - } else if (c == 0xff07) { /* FULLWIDTH APOSTROPHE */ - s = 0x27; - } else if (c == 0xff02) { /* FULLWIDTH QUOTATION MARK */ - s = 0x22; - } - } - - return (*pc->next_filter->filter_function)(s, pc->next_filter); -} - -static int -collector_hantozen_flush(struct collector_hantozen_data *pc) -{ - int ret, n; - - ret = 0; - if (pc->status) { - n = (pc->cache - 0xff60) & 0x3f; - if (pc->mode & 0x100) { /* hankaku kana to zenkaku katakana */ - ret = (*pc->next_filter->filter_function)(0x3000 + hankana2zenkata_table[n], pc->next_filter); - } else if (pc->mode & 0x200) { /* hankaku kana to zenkaku hirangana */ - ret = (*pc->next_filter->filter_function)(0x3000 + hankana2zenhira_table[n], pc->next_filter); - } - pc->status = 0; - } - - return ret; -} - mbfl_string * mbfl_ja_jp_hantozen( mbfl_string *string, @@ -2011,39 +1843,67 @@ mbfl_ja_jp_hantozen( unsigned char *p; const mbfl_encoding *encoding; mbfl_memory_device device; - struct collector_hantozen_data pc; - mbfl_convert_filter *decoder; - mbfl_convert_filter *encoder; + mbfl_convert_filter *decoder = NULL; + mbfl_convert_filter *encoder = NULL; + mbfl_convert_filter *tl_filter = NULL; + mbfl_convert_filter *next_filter = NULL; + mbfl_filt_tl_jisx0201_jisx0208_param *param = NULL; - /* initialize */ + /* validate parameters */ if (string == NULL || result == NULL) { return NULL; } + encoding = mbfl_no2encoding(string->no_encoding); if (encoding == NULL) { return NULL; } + mbfl_memory_device_init(&device, string->len, 0); mbfl_string_init(result); + result->no_language = string->no_language; result->no_encoding = string->no_encoding; + decoder = mbfl_convert_filter_new( - mbfl_no_encoding_wchar, - string->no_encoding, - mbfl_memory_device_output, 0, &device); + mbfl_no_encoding_wchar, + string->no_encoding, + mbfl_memory_device_output, 0, &device); + if (decoder == NULL) { + goto out; + } + next_filter = decoder; + + param = + (mbfl_filt_tl_jisx0201_jisx0208_param *)mbfl_malloc(sizeof(mbfl_filt_tl_jisx0201_jisx0208_param)); + if (param == NULL) { + goto out; + } + + param->mode = mode; + + tl_filter = mbfl_convert_filter_new2( + &vtbl_tl_jisx0201_jisx0208, + (int(*)(int, void*))next_filter->filter_function, + (int(*)(void*))next_filter->filter_flush, + next_filter); + if (tl_filter == NULL) { + mbfl_free(param); + goto out; + } + + tl_filter->opaque = param; + next_filter = tl_filter; + encoder = mbfl_convert_filter_new( - string->no_encoding, - mbfl_no_encoding_wchar, - collector_hantozen, 0, &pc); - if (decoder == NULL || encoder == NULL) { - mbfl_convert_filter_delete(encoder); - mbfl_convert_filter_delete(decoder); - return NULL; + string->no_encoding, + mbfl_no_encoding_wchar, + (int(*)(int, void*))next_filter->filter_function, + (int(*)(void*))next_filter->filter_flush, + next_filter); + if (encoder == NULL) { + goto out; } - pc.next_filter = decoder; - pc.mode = mode; - pc.status = 0; - pc.cache = 0; /* feed data */ p = string->val; @@ -2058,11 +1918,22 @@ mbfl_ja_jp_hantozen( } mbfl_convert_filter_flush(encoder); - collector_hantozen_flush(&pc); - mbfl_convert_filter_flush(decoder); result = mbfl_memory_device_result(&device, result); - mbfl_convert_filter_delete(encoder); - mbfl_convert_filter_delete(decoder); +out: + if (tl_filter != NULL) { + if (tl_filter->opaque != NULL) { + mbfl_free(tl_filter->opaque); + } + mbfl_convert_filter_delete(tl_filter); + } + + if (decoder != NULL) { + mbfl_convert_filter_delete(decoder); + } + + if (encoder != NULL) { + mbfl_convert_filter_delete(encoder); + } return result; } diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter.h b/ext/mbstring/libmbfl/mbfl/mbfilter.h index a00c51b5f4..4565fc6985 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfilter.h +++ b/ext/mbstring/libmbfl/mbfl/mbfilter.h @@ -102,8 +102,8 @@ * version information */ #define MBFL_VERSION_MAJOR 1 -#define MBFL_VERSION_MINOR 0 -#define MBFL_VERSION_TEENY 2 +#define MBFL_VERSION_MINOR 1 +#define MBFL_VERSION_TEENY 0 /* * convert filter diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter_8bit.h b/ext/mbstring/libmbfl/mbfl/mbfilter_8bit.h index 4fc8922605..a87c564616 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfilter_8bit.h +++ b/ext/mbstring/libmbfl/mbfl/mbfilter_8bit.h @@ -34,6 +34,6 @@ #include "mbfl_defs.h" #include "mbfilter.h" -extern const mbfl_encoding mbfl_encoding_8bit; +MBFLAPI extern const mbfl_encoding mbfl_encoding_8bit; #endif /* MBFL_MBFILTER_8BIT_H */ diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter_pass.h b/ext/mbstring/libmbfl/mbfl/mbfilter_pass.h index 087aa2c3be..49d169c668 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfilter_pass.h +++ b/ext/mbstring/libmbfl/mbfl/mbfilter_pass.h @@ -33,8 +33,8 @@ #include "mbfl_defs.h" #include "mbfilter.h" -extern const mbfl_encoding mbfl_encoding_pass; -extern const struct mbfl_convert_vtbl vtbl_pass; +MBFLAPI extern const mbfl_encoding mbfl_encoding_pass; +MBFLAPI extern const struct mbfl_convert_vtbl vtbl_pass; MBFLAPI extern int mbfl_filt_conv_pass(int c, mbfl_convert_filter *filter); diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter_wchar.h b/ext/mbstring/libmbfl/mbfl/mbfilter_wchar.h index 24bf7473c1..9e9396a77f 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfilter_wchar.h +++ b/ext/mbstring/libmbfl/mbfl/mbfilter_wchar.h @@ -34,6 +34,6 @@ #include "mbfl_defs.h" #include "mbfilter.h" -extern const mbfl_encoding mbfl_encoding_wchar; +MBFLAPI extern const mbfl_encoding mbfl_encoding_wchar; #endif /* MBFL_MBFILTER_WCHAR_H */ diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_convert.c b/ext/mbstring/libmbfl/mbfl/mbfl_convert.c index 725a674b39..d81b533dd1 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_convert.c +++ b/ext/mbstring/libmbfl/mbfl/mbfl_convert.c @@ -51,6 +51,7 @@ #include "filters/mbfilter_euc_kr.h" #include "filters/mbfilter_iso2022_kr.h" #include "filters/mbfilter_sjis.h" +#include "filters/mbfilter_sjis_open.h" #include "filters/mbfilter_cp51932.h" #include "filters/mbfilter_jis.h" #include "filters/mbfilter_iso2022_jp_ms.h" @@ -65,6 +66,7 @@ #include "filters/mbfilter_cp1251.h" #include "filters/mbfilter_cp1252.h" #include "filters/mbfilter_cp1254.h" +#include "filters/mbfilter_cp5022x.h" #include "filters/mbfilter_iso8859_1.h" #include "filters/mbfilter_iso8859_2.h" #include "filters/mbfilter_iso8859_3.h" @@ -95,8 +97,6 @@ #include "filters/mbfilter_armscii8.h" #include "filters/mbfilter_cp850.h" -static void mbfl_convert_filter_reset_vtbl(mbfl_convert_filter *filter); - /* hex character table "0123456789ABCDEF" */ static char mbfl_hexchar_table[] = { 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x41,0x42,0x43,0x44,0x45,0x46 @@ -109,18 +109,22 @@ const struct mbfl_convert_vtbl *mbfl_convert_filter_list[] = { &vtbl_wchar_eucjp, &vtbl_sjis_wchar, &vtbl_wchar_sjis, + &vtbl_sjis_open_wchar, + &vtbl_wchar_sjis_open, &vtbl_cp51932_wchar, &vtbl_wchar_cp51932, &vtbl_jis_wchar, &vtbl_wchar_jis, + &vtbl_jis_ms_wchar, + &vtbl_wchar_jis_ms, &vtbl_2022jp_wchar, &vtbl_wchar_2022jp, &vtbl_2022jpms_wchar, &vtbl_wchar_2022jpms, &vtbl_eucjpwin_wchar, &vtbl_wchar_eucjpwin, - &vtbl_sjiswin_wchar, - &vtbl_wchar_sjiswin, + &vtbl_cp932_wchar, + &vtbl_wchar_cp932, &vtbl_euccn_wchar, &vtbl_wchar_euccn, &vtbl_cp936_wchar, @@ -149,6 +153,14 @@ const struct mbfl_convert_vtbl *mbfl_convert_filter_list[] = { &vtbl_wchar_cp1252, &vtbl_cp1254_wchar, &vtbl_wchar_cp1254, + &vtbl_cp50220_wchar, + &vtbl_wchar_cp50220, + &vtbl_cp50220raw_wchar, + &vtbl_wchar_cp50220raw, + &vtbl_cp50221_wchar, + &vtbl_wchar_cp50221, + &vtbl_cp50222_wchar, + &vtbl_wchar_cp50222, &vtbl_ascii_wchar, &vtbl_wchar_ascii, &vtbl_8859_1_wchar, @@ -230,6 +242,48 @@ const struct mbfl_convert_vtbl *mbfl_convert_filter_list[] = { NULL }; +static int +mbfl_convert_filter_common_init( + mbfl_convert_filter *filter, + enum mbfl_no_encoding from, + enum mbfl_no_encoding to, + const struct mbfl_convert_vtbl *vtbl, + int (*output_function)(int, void* ), + int (*flush_function)(void*), + void* data) +{ + /* encoding structure */ + if ((filter->from = mbfl_no2encoding(from)) == NULL) { + return 1; + } + + if ((filter->to = mbfl_no2encoding(to)) == NULL) { + return 1; + } + + if (output_function != NULL) { + filter->output_function = output_function; + } else { + filter->output_function = mbfl_filter_output_null; + } + + filter->flush_function = flush_function; + filter->data = data; + filter->illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR; + filter->illegal_substchar = 0x3f; /* '?' */ + filter->num_illegalchar = 0; + filter->filter_ctor = vtbl->filter_ctor; + filter->filter_dtor = vtbl->filter_dtor; + filter->filter_function = vtbl->filter_function; + filter->filter_flush = vtbl->filter_flush; + filter->filter_copy = vtbl->filter_copy; + + (*filter->filter_ctor)(filter); + + return 0; +} + + mbfl_convert_filter * mbfl_convert_filter_new( enum mbfl_no_encoding from, @@ -239,6 +293,13 @@ mbfl_convert_filter_new( void* data) { mbfl_convert_filter * filter; + const struct mbfl_convert_vtbl *vtbl; + + vtbl = mbfl_convert_filter_get_vtbl(from, to); + + if (vtbl == NULL) { + vtbl = &vtbl_pass; + } /* allocate */ filter = (mbfl_convert_filter *)mbfl_malloc(sizeof(mbfl_convert_filter)); @@ -246,32 +307,39 @@ mbfl_convert_filter_new( return NULL; } - /* encoding structure */ - filter->from = mbfl_no2encoding(from); - filter->to = mbfl_no2encoding(to); - if (filter->from == NULL) { - filter->from = &mbfl_encoding_pass; - } - if (filter->to == NULL) { - filter->to = &mbfl_encoding_pass; + if (mbfl_convert_filter_common_init(filter, from, to, vtbl, + output_function, flush_function, data)) { + mbfl_free(filter); + return NULL; } - if (output_function != NULL) { - filter->output_function = output_function; - } else { - filter->output_function = mbfl_filter_output_null; + return filter; +} + +mbfl_convert_filter * +mbfl_convert_filter_new2( + const struct mbfl_convert_vtbl *vtbl, + int (*output_function)(int, void* ), + int (*flush_function)(void*), + void* data) +{ + mbfl_convert_filter * filter; + + if (vtbl == NULL) { + vtbl = &vtbl_pass; } - filter->flush_function = flush_function; - filter->data = data; - filter->illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR; - filter->illegal_substchar = 0x3f; /* '?' */ - filter->num_illegalchar = 0; - /* setup the function table */ - mbfl_convert_filter_reset_vtbl(filter); + /* allocate */ + filter = (mbfl_convert_filter *)mbfl_malloc(sizeof(mbfl_convert_filter)); + if (filter == NULL) { + return NULL; + } - /* constructor */ - (*filter->filter_ctor)(filter); + if (mbfl_convert_filter_common_init(filter, vtbl->from, vtbl->to, vtbl, + output_function, flush_function, data)) { + mbfl_free(filter); + return NULL; + } return filter; } @@ -301,39 +369,32 @@ mbfl_convert_filter_flush(mbfl_convert_filter *filter) void mbfl_convert_filter_reset(mbfl_convert_filter *filter, enum mbfl_no_encoding from, enum mbfl_no_encoding to) { + const struct mbfl_convert_vtbl *vtbl; + /* destruct old filter */ (*filter->filter_dtor)(filter); - /* resset filter member */ - filter->from = mbfl_no2encoding(from); - filter->to = mbfl_no2encoding(to); + vtbl = mbfl_convert_filter_get_vtbl(from, to); - /* set the vtbl */ - mbfl_convert_filter_reset_vtbl(filter); + if (vtbl == NULL) { + vtbl = &vtbl_pass; + } - /* construct new filter */ - (*filter->filter_ctor)(filter); + mbfl_convert_filter_common_init(filter, from, to, vtbl, + filter->output_function, filter->flush_function, filter->data); } void mbfl_convert_filter_copy( mbfl_convert_filter *src, - mbfl_convert_filter *dist) + mbfl_convert_filter *dest) { - dist->filter_ctor = src->filter_ctor; - dist->filter_dtor = src->filter_dtor; - dist->filter_function = src->filter_function; - dist->filter_flush = src->filter_flush; - dist->output_function = src->output_function; - dist->flush_function = src->flush_function; - dist->data = src->data; - dist->status = src->status; - dist->cache = src->cache; - dist->from = src->from; - dist->to = src->to; - dist->illegal_mode = src->illegal_mode; - dist->illegal_substchar = src->illegal_substchar; - dist->num_illegalchar = src->num_illegalchar; + if (src->filter_copy != NULL) { + src->filter_copy(src, dest); + return; + } + + *dest = *src; } int mbfl_convert_filter_devcat(mbfl_convert_filter *filter, mbfl_memory_device *src) @@ -466,6 +527,7 @@ mbfl_filt_conv_illegal_output(int c, mbfl_convert_filter *filter) } filter->illegal_mode = mode_backup; filter->num_illegalchar++; + return ret; } @@ -494,22 +556,6 @@ const struct mbfl_convert_vtbl * mbfl_convert_filter_get_vtbl(enum mbfl_no_encod return NULL; } - -static void mbfl_convert_filter_reset_vtbl(mbfl_convert_filter *filter) -{ - const struct mbfl_convert_vtbl *vtbl; - - vtbl = mbfl_convert_filter_get_vtbl(filter->from->no_encoding, filter->to->no_encoding); - if (vtbl == NULL) { - vtbl = &vtbl_pass; - } - - filter->filter_ctor = vtbl->filter_ctor; - filter->filter_dtor = vtbl->filter_dtor; - filter->filter_function = vtbl->filter_function; - filter->filter_flush = vtbl->filter_flush; -} - /* * commonly used constructor and destructor */ @@ -523,6 +569,10 @@ int mbfl_filt_conv_common_flush(mbfl_convert_filter *filter) { filter->status = 0; filter->cache = 0; + + if (filter->flush_function != NULL) { + (*filter->flush_function)(filter->data); + } return 0; } diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_convert.h b/ext/mbstring/libmbfl/mbfl/mbfl_convert.h index bf8b3bd630..8b5ba5bc19 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_convert.h +++ b/ext/mbstring/libmbfl/mbfl/mbfl_convert.h @@ -40,6 +40,7 @@ typedef struct _mbfl_convert_filter mbfl_convert_filter; struct _mbfl_convert_filter { void (*filter_ctor)(mbfl_convert_filter *filter); void (*filter_dtor)(mbfl_convert_filter *filter); + void (*filter_copy)(mbfl_convert_filter *src, mbfl_convert_filter *dest); int (*filter_function)(int c, mbfl_convert_filter *filter); int (*filter_flush)(mbfl_convert_filter *filter); int (*output_function)(int c, void *data); @@ -62,6 +63,7 @@ struct mbfl_convert_vtbl { void (*filter_dtor)(mbfl_convert_filter *filter); int (*filter_function)(int c, mbfl_convert_filter *filter); int (*filter_flush)(mbfl_convert_filter *filter); + void (*filter_copy)(mbfl_convert_filter *src, mbfl_convert_filter *dest); }; MBFLAPI extern const struct mbfl_convert_vtbl *mbfl_convert_filter_list[]; @@ -72,6 +74,11 @@ MBFLAPI extern mbfl_convert_filter *mbfl_convert_filter_new( int (*output_function)(int, void *), int (*flush_function)(void *), void *data ); +MBFLAPI extern mbfl_convert_filter *mbfl_convert_filter_new2( + const struct mbfl_convert_vtbl *vtbl, + int (*output_function)(int, void *), + int (*flush_function)(void *), + void *data ); MBFLAPI extern void mbfl_convert_filter_delete(mbfl_convert_filter *filter); MBFLAPI extern int mbfl_convert_filter_feed(int c, mbfl_convert_filter *filter); MBFLAPI extern int mbfl_convert_filter_flush(mbfl_convert_filter *filter); diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c index 76956f0530..17955b2c9a 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c +++ b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c @@ -57,6 +57,7 @@ #include "filters/mbfilter_euc_kr.h" #include "filters/mbfilter_iso2022_kr.h" #include "filters/mbfilter_sjis.h" +#include "filters/mbfilter_sjis_open.h" #include "filters/mbfilter_cp51932.h" #include "filters/mbfilter_jis.h" #include "filters/mbfilter_iso2022_jp_ms.h" @@ -71,6 +72,7 @@ #include "filters/mbfilter_cp1251.h" #include "filters/mbfilter_cp1252.h" #include "filters/mbfilter_cp1254.h" +#include "filters/mbfilter_cp5022x.h" #include "filters/mbfilter_iso8859_1.h" #include "filters/mbfilter_iso8859_2.h" #include "filters/mbfilter_iso8859_3.h" @@ -153,7 +155,8 @@ static const mbfl_encoding *mbfl_encoding_ptr_list[] = { &mbfl_encoding_euc_jp, &mbfl_encoding_sjis, &mbfl_encoding_eucjp_win, - &mbfl_encoding_sjis_win, + &mbfl_encoding_sjis_open, + &mbfl_encoding_cp932, &mbfl_encoding_cp51932, &mbfl_encoding_jis, &mbfl_encoding_2022jp, @@ -188,6 +191,11 @@ static const mbfl_encoding *mbfl_encoding_ptr_list[] = { &mbfl_encoding_koi8u, &mbfl_encoding_armscii8, &mbfl_encoding_cp850, + &mbfl_encoding_jis_ms, + &mbfl_encoding_cp50220, + &mbfl_encoding_cp50220raw, + &mbfl_encoding_cp50221, + &mbfl_encoding_cp50222, NULL }; @@ -209,7 +217,7 @@ mbfl_name2encoding(const char *name) } } - /* search MIME charset name */ + /* serch MIME charset name */ i = 0; while ((encoding = mbfl_encoding_ptr_list[i++]) != NULL) { if (encoding->mime_name != NULL) { @@ -219,7 +227,7 @@ mbfl_name2encoding(const char *name) } } - /* search aliases */ + /* serch aliases */ i = 0; while ((encoding = mbfl_encoding_ptr_list[i++]) != NULL) { if (encoding->aliases != NULL) { diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h index 2599e1107e..fd3a66df6e 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h +++ b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h @@ -68,8 +68,8 @@ enum mbfl_no_encoding { mbfl_no_encoding_euc_jp, mbfl_no_encoding_sjis, mbfl_no_encoding_eucjp_win, - mbfl_no_encoding_sjis_win, - mbfl_no_encoding_sjis_mac, + mbfl_no_encoding_sjis_open, + mbfl_no_encoding_cp932, mbfl_no_encoding_cp51932, mbfl_no_encoding_jis, mbfl_no_encoding_2022jp, @@ -104,6 +104,11 @@ enum mbfl_no_encoding { mbfl_no_encoding_8859_16, mbfl_no_encoding_armscii8, mbfl_no_encoding_cp850, + mbfl_no_encoding_jis_ms, + mbfl_no_encoding_cp50220, + mbfl_no_encoding_cp50220raw, + mbfl_no_encoding_cp50221, + mbfl_no_encoding_cp50222, mbfl_no_encoding_charset_max }; diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_filter_output.c b/ext/mbstring/libmbfl/mbfl/mbfl_filter_output.c index 2b63478214..341047d5b0 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_filter_output.c +++ b/ext/mbstring/libmbfl/mbfl/mbfl_filter_output.c @@ -41,9 +41,17 @@ int mbfl_filter_output_pipe(int c, void* data) return (*filter->filter_function)(c, filter); } +int mbfl_filter_output_pipe_flush(void *data) +{ + mbfl_convert_filter *filter = (mbfl_convert_filter*)data; + if (filter->filter_flush != NULL) { + return (*filter->filter_flush)(filter); + } + + return 0; +} + int mbfl_filter_output_null(int c, void* data) { return c; } - - diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_filter_output.h b/ext/mbstring/libmbfl/mbfl/mbfl_filter_output.h index ce5192935e..d477653d3e 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_filter_output.h +++ b/ext/mbstring/libmbfl/mbfl/mbfl_filter_output.h @@ -32,6 +32,7 @@ #define MBFL_FILTER_OUTPUT_H MBFLAPI extern int mbfl_filter_output_pipe(int c, void* data); +MBFLAPI extern int mbfl_filter_output_pipe_flush(void* data); MBFLAPI extern int mbfl_filter_output_null(int c, void* data); #endif /* MBFL_FILTER_OUTPUT_H */ diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_ident.c b/ext/mbstring/libmbfl/mbfl/mbfl_ident.c index 4f3bd5c58d..9a89807053 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_ident.c +++ b/ext/mbstring/libmbfl/mbfl/mbfl_ident.c @@ -50,6 +50,7 @@ #include "filters/mbfilter_euc_kr.h" #include "filters/mbfilter_iso2022_kr.h" #include "filters/mbfilter_sjis.h" +#include "filters/mbfilter_sjis_open.h" #include "filters/mbfilter_jis.h" #include "filters/mbfilter_iso2022_jp_ms.h" #include "filters/mbfilter_euc_jp.h" @@ -64,6 +65,7 @@ #include "filters/mbfilter_cp1252.h" #include "filters/mbfilter_cp1254.h" #include "filters/mbfilter_cp51932.h" +#include "filters/mbfilter_cp5022x.h" #include "filters/mbfilter_iso8859_1.h" #include "filters/mbfilter_iso8859_2.h" #include "filters/mbfilter_iso8859_3.h" @@ -107,8 +109,9 @@ static const struct mbfl_identify_vtbl *mbfl_identify_filter_list[] = { &vtbl_identify_ascii, &vtbl_identify_eucjp, &vtbl_identify_sjis, + &vtbl_identify_sjis_open, &vtbl_identify_eucjpwin, - &vtbl_identify_sjiswin, + &vtbl_identify_cp932, &vtbl_identify_jis, &vtbl_identify_2022jp, &vtbl_identify_2022jpms, @@ -142,6 +145,10 @@ static const struct mbfl_identify_vtbl *mbfl_identify_filter_list[] = { &vtbl_identify_8859_15, &vtbl_identify_armscii8, &vtbl_identify_cp850, + &vtbl_identify_jis_ms, + &vtbl_identify_cp50220, + &vtbl_identify_cp50221, + &vtbl_identify_cp50222, &vtbl_identify_false, NULL }; diff --git a/ext/mbstring/libmbfl/mksbcc32.bat b/ext/mbstring/libmbfl/mksbcc32.bat deleted file mode 100644 index cdc3dd6ca3..0000000000 --- a/ext/mbstring/libmbfl/mksbcc32.bat +++ /dev/null @@ -1,5 +0,0 @@ -@echo off -cd %2 -%1 -f Makefile.bcc32 %3 -cd .. -exit diff --git a/ext/mbstring/libmbfl/nls/Makefile.am b/ext/mbstring/libmbfl/nls/Makefile.am index 454a07c638..a9f987b337 100644 --- a/ext/mbstring/libmbfl/nls/Makefile.am +++ b/ext/mbstring/libmbfl/nls/Makefile.am @@ -1,4 +1,3 @@ -EXTRA_DIST=Makefile.bcc32 noinst_LTLIBRARIES=libmbfl_nls.la INCLUDES=-I../mbfl libmbfl_nls_la_LDFLAGS=-version-info $(SHLIB_VERSION) diff --git a/ext/mbstring/libmbfl/nls/Makefile.bcc32 b/ext/mbstring/libmbfl/nls/Makefile.bcc32 deleted file mode 100644 index dea8689cb1..0000000000 --- a/ext/mbstring/libmbfl/nls/Makefile.bcc32 +++ /dev/null @@ -1,18 +0,0 @@ -!include ..\rules.mak.bcc32 -INCLUDES=$(INCLUDES) -I..\mbfl -OBJS=nls_ja.obj \ - nls_de.obj \ - nls_en.obj \ - nls_hy.obj \ - nls_tr.obj \ - nls_kr.obj \ - nls_ru.obj \ - nls_ua.obj \ - nls_zh.obj \ - nls_uni.obj \ - nls_neutral.obj - -all: $(OBJS) - -clean: - @for %i in ($(OBJS)) do @if exist %i del %i diff --git a/ext/mbstring/libmbfl/rules.mak.bcc32 b/ext/mbstring/libmbfl/rules.mak.bcc32 deleted file mode 100644 index 50fafa96b1..0000000000 --- a/ext/mbstring/libmbfl/rules.mak.bcc32 +++ /dev/null @@ -1,7 +0,0 @@ -.suffixes .c -CFLAGS=-DHAVE_CONFIG_H -DMBFL_DLL_EXPORT -DWIN32 -INCLUDES=-I. -I.. -LIBS=import32.lib cw32mt.lib c0d32.obj -LDFLAGS=-c -Tpd -Gn -.c.obj: - bcc32 $(CFLAGS) $(INCLUDES) -c $< diff --git a/ext/mbstring/libmbfl/tests/conv_encoding.tests/Makefile.am b/ext/mbstring/libmbfl/tests/conv_encoding.tests/Makefile.am index 37713c3952..6a626bd5af 100644 --- a/ext/mbstring/libmbfl/tests/conv_encoding.tests/Makefile.am +++ b/ext/mbstring/libmbfl/tests/conv_encoding.tests/Makefile.am @@ -1 +1,31 @@ -EXTRA_DIST=*.exp +EXTRA_DIST=\ + cp51932_cp50220raw.exp\ + ujis_sjis.exp\ + utf8_sjis.exp\ + yensign.exp +noinst_PROGRAMS=gen_exp +gen_exp_SOURCES=gen_exp.c +CLEANFILES=\ + uni_to_cp932.exp\ + uni_to_cp50220.exp\ + uni_to_cp50222.exp\ + cp932_to_uni.exp + +CP932.TXT: + $(FETCH_VIA_FTP) ftp://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.TXT + +cp932_to_uni.exp: gen_exp CP932.TXT + ./gen_exp from_cp932 > "$@" + +uni_to_cp932.exp: gen_exp CP932.TXT + ./gen_exp to_cp932 > "$@" + +uni_to_cp50220.exp: gen_exp CP932.TXT + ./gen_exp to_cp50220 > "$@" + +uni_to_cp50222.exp: gen_exp CP932.TXT + ./gen_exp to_cp50222 > "$@" + +check-local: uni_to_cp932.exp uni_to_cp50220.exp uni_to_cp50222.exp + +.PHONY: check-local diff --git a/ext/mbstring/libmbfl/tests/conv_encoding.tests/gen_exp.c b/ext/mbstring/libmbfl/tests/conv_encoding.tests/gen_exp.c new file mode 100644 index 0000000000..93a0fb30c9 --- /dev/null +++ b/ext/mbstring/libmbfl/tests/conv_encoding.tests/gen_exp.c @@ -0,0 +1,497 @@ +#include +#include +#include +#include +#include + +struct mappings_entry { + int cp_uni; + int n; + int cp_932[16]; +}; + +struct mappings { + size_t n; + size_t nalloc; + struct mappings_entry *entries; +}; + +static void mappings_init(struct mappings *map) +{ + map->n = 0; + map->nalloc = 0; + map->entries = 0; +} + +static void mappings_destroy(struct mappings *map) +{ + if (map->entries) + free(map->entries); +} + +static int mappings_grow(struct mappings *map) +{ + if (map->n >= map->nalloc) { + struct mappings_entry *new_entries; + size_t n = map->nalloc << 1, a; + if (n == 0) + n = 1; + else if (n <= map->n) + return 2; + a = sizeof(*map->entries) * n; + if (a / n != sizeof(*map->entries)) + return 2; + new_entries = realloc(map->entries, a); + if (!new_entries) + return 2; + map->entries = new_entries; + map->nalloc = n; + } + return 0; +} + +static int mappings_add(struct mappings *map, int cp_uni, int cp_932) +{ + size_t i; + size_t s = 0, e = map->n; + struct mappings_entry *entry; + + for (;;) { + i = (s + e) / 2; + entry = &map->entries[i]; + if (e == i || entry->cp_uni > cp_uni) { + if (e == i) { + int r = mappings_grow(map); + if (r) + return r; + if (map->n > i) { + size_t n = map->n - i, a = sizeof(*map->entries) * n; + if (a / n != sizeof(*map->entries)) + return 2; + memmove(&map->entries[i + 1], &map->entries[i], a); + } + ++map->n; + entry = &map->entries[i]; + entry->cp_uni = cp_uni; + entry->n = 0; + break; + } + e = i; + } else if (entry->cp_uni < cp_uni) { + if (s == i) { + int r = mappings_grow(map); + if (r) + return r; + if (map->n > i + 1) { + size_t n = (map->n - i - 1), a = sizeof(*map->entries) * n; + if (a / n != sizeof(*map->entries)) + return 2; + memmove(&map->entries[i + 2], &map->entries[i + 1], a); + } + ++map->n; + entry = &map->entries[i + 1]; + entry->cp_uni = cp_uni; + entry->n = 0; + break; + } + s = i; + } else { + break; + } + } + if (entry->n >= sizeof(entry->cp_932) / sizeof(*entry->cp_932)) + return 1; + entry->cp_932[entry->n++] = cp_932; + return 0; +} + +struct generator_entry { + const char *name; + const char *prologue; + const char *epilogue; + void(*visitor)(const struct mappings_entry *); +}; + +static int utf32_utf8(char *buf, int k) +{ + int retval = 0; + + if (k < 0x80) { + buf[0] = k; + retval = 1; + } else if (k < 0x800) { + buf[0] = 0xc0 | (k >> 6); + buf[1] = 0x80 | (k & 0x3f); + retval = 2; + } else if (k < 0x10000) { + buf[0] = 0xe0 | (k >> 12); + buf[1] = 0x80 | ((k >> 6) & 0x3f); + buf[2] = 0x80 | (k & 0x3f); + retval = 3; + } else if (k < 0x200000) { + buf[0] = 0xf0 | (k >> 18); + buf[1] = 0x80 | ((k >> 12) & 0x3f); + buf[2] = 0x80 | ((k >> 6) & 0x3f); + buf[3] = 0x80 | (k & 0x3f); + retval = 4; + } else if (k < 0x4000000) { + buf[0] = 0xf8 | (k >> 24); + buf[1] = 0x80 | ((k >> 18) & 0x3f); + buf[2] = 0x80 | ((k >> 12) & 0x3f); + buf[3] = 0x80 | ((k >> 6) & 0x3f); + buf[4] = 0x80 | (k & 0x3f); + retval = 5; + } else { + buf[0] = 0xfc | (k >> 30); + buf[1] = 0x80 | ((k >> 24) & 0x3f); + buf[2] = 0x80 | ((k >> 18) & 0x3f); + buf[3] = 0x80 | ((k >> 12) & 0x3f); + buf[4] = 0x80 | ((k >> 6) & 0x3f); + buf[5] = 0x80 | (k & 0x3f); + retval = 6; + } + buf[retval] = '\0'; + + return retval; +} + +static const char epilogue[] = +"close\n"; + +static const char prologue_to_cp932[] = +"#!/usr/bin/expect -f\n" +"spawn tests/conv_encoding Japanese CP932 UTF-8\n" +"set timeout 1\n" +"\n" +"expect_after {\n" +" \"\\[^\\r\\n\\]*\\r\\n\" { fail $test }\n" +"}\n"; + +static const char prologue_to_cp50220[] = +"#!/usr/bin/expect -f\n" +"spawn tests/conv_encoding Japanese CP50220 UTF-8\n" +"set timeout 1\n" +"\n" +"expect_after {\n" +" \"\\[^\\r\\n\\]*\\r\\n\" { fail $test }\n" +"}\n"; + +static const char prologue_to_cp50222[] = +"#!/usr/bin/expect -f\n" +"spawn tests/conv_encoding Japanese CP50222 UTF-8\n" +"set timeout 1\n" +"\n" +"expect_after {\n" +" \"\\[^\\r\\n\\]*\\r\\n\" { fail $test }\n" +"}\n"; + +static const char prologue_from_cp932[] = +"#!/usr/bin/expect -f\n" +"spawn tests/conv_encoding Japanese UTF-8 CP932\n" +"set timeout 1\n" +"\n" +"expect_after {\n" +" \"\\[^\\r\\n\\]*\\r\\n\" { fail $test }\n" +"}\n"; + +static void to_cp932_visitor(const struct mappings_entry *entry) +{ + char buf_uni[32], buf_cp932[8]; + int i; + + if (entry->cp_uni < 32 || entry->cp_uni == 127) + return; + + i = utf32_utf8(buf_uni, entry->cp_uni); + buf_uni[i * 4] = '\0'; + while (--i >= 0) { + unsigned char c = ((unsigned char *)buf_uni)[i]; + buf_uni[i * 4] = '\\'; + buf_uni[i * 4 + 1] = 'x'; + buf_uni[i * 4 + 2] = "0123456789abcdef"[c >> 4]; + buf_uni[i * 4 + 3] = "0123456789abcdef"[c & 15]; + } + + printf("set test \"U+%06X\"\n" + "send -- \"%s\r\"\n" + "sleep 0.001\n" + "expect {\n", entry->cp_uni, buf_uni); + + for (i = 0; i < entry->n; ++i) { + int len = 0; + const int c = entry->cp_932[i]; + if (c >= 0x100) { + len = 2; + sprintf(buf_cp932, "%%%02x%%%02x", (c >> 8) & 0xff, c & 0xff); + } else { + len = 1; + sprintf(buf_cp932, "%%%02x", c); + } + printf(" \"%s (%d)\\r\\n\" { pass $test }\n", buf_cp932, len); + } + + printf("}\n"); +} + +static void from_cp932_visitor(const struct mappings_entry *entry) +{ + char buf_uni[32], buf_cp932[8]; + int i, len; + + if (entry->cp_uni < 32 || entry->cp_uni == 127) + return; + + len = utf32_utf8(buf_uni, entry->cp_uni); + buf_uni[len * 3] = '\0'; + i = len; + while (--i >= 0) { + unsigned char c = ((unsigned char *)buf_uni)[i]; + buf_uni[i * 3] = '%'; + buf_uni[i * 3 + 1] = "0123456789abcdef"[c >> 4]; + buf_uni[i * 3 + 2] = "0123456789abcdef"[c & 15]; + } + + for (i = 0; i < entry->n; ++i) { + const int c = entry->cp_932[i]; + if (c >= 0x100) + sprintf(buf_cp932, "\\x%02x\\x%02x", (c >> 8) & 0xff, c & 0xff); + else + sprintf(buf_cp932, "\\x%02x", c); + printf("set test \"U+%06X\"\n" + "send -- \"%s\r\"\n" + "sleep 0.001\n" + "expect {\n" + " \"%s (%d)\\r\\n\" { pass $test }\n" + "}\n", + entry->cp_uni, buf_cp932, buf_uni, len); + } +} + +static void to_cp50220_visitor(const struct mappings_entry *entry) +{ + char buf_uni[32], buf_cp50220[32]; + int i; + + if (entry->cp_uni < 32 || entry->cp_uni == 127) + return; + + i = utf32_utf8(buf_uni, entry->cp_uni); + buf_uni[i * 4] = '\0'; + while (--i >= 0) { + unsigned char c = ((unsigned char *)buf_uni)[i]; + buf_uni[i * 4] = '\\'; + buf_uni[i * 4 + 1] = 'x'; + buf_uni[i * 4 + 2] = "0123456789abcdef"[c >> 4]; + buf_uni[i * 4 + 3] = "0123456789abcdef"[c & 15]; + } + + printf("set test \"U+%06X\"\n" + "send -- \"%s\r\"\n" + "sleep 0.001\n" + "expect {\n", entry->cp_uni, buf_uni); + + for (i = 0; i < entry->n; ++i) { + int len = 0; + const int c = entry->cp_932[i]; + if (c >= 0xa1 && c < 0xe0) { + static const int jisx0208_tl_map[] = { + 0x0000, 0x2123, 0x2156, 0x2157, 0x2122, 0x2126, 0x2572, 0x2521, + 0x2523, 0x2525, 0x2527, 0x2529, 0x2563, 0x2565, 0x2567, 0x2543, + 0x213c, 0x2522, 0x2524, 0x2526, 0x2528, 0x252a, 0x252b, 0x252d, + 0x252f, 0x2531, 0x2533, 0x2535, 0x2537, 0x2539, 0x253b, 0x253d, + 0x253f, 0x2541, 0x2544, 0x2546, 0x2548, 0x254a, 0x254b, 0x254c, + 0x254d, 0x254e, 0x254f, 0x2552, 0x2555, 0x2558, 0x255b, 0x255e, + 0x255f, 0x2560, 0x2561, 0x2562, 0x2564, 0x2566, 0x2568, 0x2569, + 0x256a, 0x256b, 0x256c, 0x256d, 0x256f, 0x2573, 0x212b, 0x212c + }; + const int j = jisx0208_tl_map[c - 0xa0]; + len = 8; + sprintf(buf_cp50220, "%%1b%%24%%42%%%02x%%%02x%%1b%%28%%42", j >> 8, j & 0xff); + } else if (c >= 0x100) { + const int j = ((((c & 0xff00) - (c >= 0xe000 ? 0xb000: 0x7000)) << 1) | ((c & 0xff) - (c & 0x80 ? 32: 31))) - ((c & 0xff) >= 159 ? 94: 0x100); + len = 8; + sprintf(buf_cp50220, "%%1b%%24%%42%%%02x%%%02x%%1b%%28%%42", j >> 8, j & 0xff); + } else { + len = 1; + sprintf(buf_cp50220, "%%%02x", c); + } + printf(" \"%s (%d)\\r\\n\" { pass $test }\n", buf_cp50220, len); + } + + printf("}\n"); +} + +static void to_cp50222_visitor(const struct mappings_entry *entry) +{ + char buf_uni[32], buf_cp50220[32]; + int i; + + if (entry->cp_uni < 32 || entry->cp_uni == 127) + return; + + i = utf32_utf8(buf_uni, entry->cp_uni); + buf_uni[i * 4] = '\0'; + while (--i >= 0) { + unsigned char c = ((unsigned char *)buf_uni)[i]; + buf_uni[i * 4] = '\\'; + buf_uni[i * 4 + 1] = 'x'; + buf_uni[i * 4 + 2] = "0123456789abcdef"[c >> 4]; + buf_uni[i * 4 + 3] = "0123456789abcdef"[c & 15]; + } + + printf("set test \"U+%06X\"\n" + "send -- \"%s\r\"\n" + "sleep 0.001\n" + "expect {\n", entry->cp_uni, buf_uni); + + for (i = 0; i < entry->n; ++i) { + int len = 0; + const int c = entry->cp_932[i]; + if (c >= 0xa1 && c < 0xe0) { + len = 3; + sprintf(buf_cp50220, "%%0e%%%02x%%0f", c - 0x80); + } else if (c >= 0x100) { + const int j = ((((c & 0xff00) - (c >= 0xe000 ? 0xb000: 0x7000)) << 1) | ((c & 0xff) - (c & 0x80 ? 32: 31))) - ((c & 0xff) >= 159 ? 94: 0x100); + len = 8; + sprintf(buf_cp50220, "%%1b%%24%%42%%%02x%%%02x%%1b%%28%%42", j >> 8, j & 0xff); + } else { + len = 1; + sprintf(buf_cp50220, "%%%02x", c); + } + printf(" \"%s (%d)\\r\\n\" { pass $test }\n", buf_cp50220, len); + } + + printf("}\n"); +} + + +static struct generator_entry entries[] = { + { "to_cp932", prologue_to_cp932, epilogue, to_cp932_visitor }, + { "to_cp50220", prologue_to_cp50220, epilogue, to_cp50220_visitor }, + { "to_cp50222", prologue_to_cp50222, epilogue, to_cp50222_visitor }, + { "from_cp932", prologue_from_cp932, epilogue, from_cp932_visitor }, + { NULL } +}; + +static const char cp932_txt[] = "CP932.TXT"; + +int main(int argc, char **argv) +{ + int retval = 0; + FILE *fp; + char buf[1024]; + struct generator_entry* gen; + struct mappings map; + + if (argc <= 1) { + fprintf(stderr, "usage: %s generator\n", argv[0]); + return 255; + } + + for (gen = entries;; ++gen) { + if (!gen->name) { + fprintf(stderr, "Unknown generator: %s\n", argv[1]); + return 1; + } + if (strcmp(gen->name, argv[1]) == 0) + break; + } + + fp = fopen(cp932_txt, "r"); + if (!fp) { + fprintf(stderr, "Failed to open %s\n", cp932_txt); + return 2; + } + + mappings_init(&map); + + while (fgets(buf, sizeof(buf), fp)) { + const char *fields[16]; + char *p = buf; + int field = 0; + int cp_932, cp_uni; + for (;;) { + char *q = 0; + int eol = 0; + + if (field >= sizeof(fields) / sizeof(*fields)) { + fprintf(stderr, "Too many fields (incorrect file?)\n"); + retval = 3; + goto out; + } + + for (;;) { + if (*p == '\0' || *p == '#' || *p == 0x0a) { + eol = 1; + break; + } else if (*p != ' ' && *p != '\t') { + break; + } + ++p; + } + + if (eol) + break; + + q = p; + + for (;;) { + if (*p == '\0' || *p == '#' || *p == 0x0a) { + eol = 1; + break; + } else if (*p == ' ' || *p == '\t') { + break; + } + ++p; + } + + *p = '\0'; + fields[field++] = q; + + if (eol) + break; + ++p; + } + if (field == 0 || field == 1) { + continue; + } else if (field != 2) { + fprintf(stderr, "Unexpected field count (expected 2, got %d)\n", field); + retval = 3; + goto out; + } + cp_932 = strtol(fields[0], NULL, 0); + if (errno == ERANGE || errno == EINVAL) { + fprintf(stderr, "Malformed field value: %s\n", fields[0]); + retval = 4; + goto out; + } + cp_uni = strtol(fields[1], NULL, 0); + if (errno == ERANGE || errno == EINVAL) { + fprintf(stderr, "Malformed field value: %s\n", fields[1]); + retval = 4; + goto out; + } + + if (mappings_add(&map, cp_uni, cp_932)) { + fprintf(stderr, "Too many mappings to the same Unicode codepoint (U+%06X)\n", cp_uni); + retval = 4; + goto out; + } + } + + { + size_t i; + printf("%s", gen->prologue); + for (i = 0; i < map.n; ++i) + gen->visitor(&map.entries[i]); + printf("%s", gen->epilogue); + } + +out: + mappings_destroy(&map); + return retval; +} + +/* + * vim: sts=4 sw=4 ts=4 noet + */ diff --git a/ext/mbstring/libmbfl/tests/conv_encoding.tests/yensign.exp b/ext/mbstring/libmbfl/tests/conv_encoding.tests/yensign.exp new file mode 100644 index 0000000000..03f2faffce --- /dev/null +++ b/ext/mbstring/libmbfl/tests/conv_encoding.tests/yensign.exp @@ -0,0 +1,99 @@ +#!/usr/bin/expect -f +set timeout 1 + +spawn tests/conv_encoding Japanese CP932 UTF-8 +expect_after { + "\[^\r\n\]*\r\n" { fail $test } +} +set test "CP932" +send "\xc2\xa5\r" +sleep 0.01 +expect { + "%5c (1)\r\n" { pass $test } +} +close + +spawn tests/conv_encoding Japanese SJIS-open UTF-8 +expect_after { + "\[^\r\n\]*\r\n" { fail $test } +} +set test "SJIS-open" +send "\xc2\xa5\r" +sleep 0.01 +expect { + "%81%8f (2)\r\n" { pass $test } +} +close + +spawn tests/conv_encoding Japanese eucJP-open UTF-8 +expect_after { + "\[^\r\n\]*\r\n" { fail $test } +} +set test "eucJP-open" +send "\xc2\xa5\r" +sleep 0.01 +expect { + "%a1%ef (2)\r\n" { pass $test } +} +close + +spawn tests/conv_encoding Japanese CP51932 UTF-8 +expect_after { + "\[^\r\n\]*\r\n" { fail $test } +} +set test "CP51932" +send "\xc2\xa5\r" +sleep 0.01 +expect { + "%5c (1)\r\n" { pass $test } +} +close + +spawn tests/conv_encoding Japanese ISO-2022-JP-MS UTF-8 +expect_after { + "\[^\r\n\]*\r\n" { fail $test } +} +set test "ISO-2022-JP-MS" +send "\xc2\xa5\r" +sleep 0.01 +expect { + "%1b%24%42%21%6f%1b%28%42 (8)\r\n" { pass $test } +} +close + +spawn tests/conv_encoding Japanese CP50220 UTF-8 +expect_after { + "\[^\r\n\]*\r\n" { fail $test } +} +set test "CP50220" +send "\xc2\xa5\r" +sleep 0.01 +expect { + "%1b%28%4a%5c%1b%28%42 (7)\r\n" { pass $test } +} +close + +spawn tests/conv_encoding Japanese CP50221 UTF-8 +expect_after { + "\[^\r\n\]*\r\n" { fail $test } +} +set test "CP50221" +send "\xc2\xa5\r" +sleep 0.01 +expect { + "%1b%28%4a%5c%1b%28%42 (7)\r\n" { pass $test } +} +close + +spawn tests/conv_encoding Japanese CP50222 UTF-8 +expect_after { + "\[^\r\n\]*\r\n" { fail $test } +} +set test "CP50222" +send "\xc2\xa5\r" +sleep 0.01 +expect { + "%1b%28%4a%5c%1b%28%42 (7)\r\n" { pass $test } +} +close +# vim: sts=4 ts=4 sw=4 et diff --git a/ext/mbstring/libmbfl/tests/sample.c b/ext/mbstring/libmbfl/tests/sample.c index 54f809378a..d96196495f 100644 --- a/ext/mbstring/libmbfl/tests/sample.c +++ b/ext/mbstring/libmbfl/tests/sample.c @@ -1,8 +1,8 @@ -/* - this is a small sample script to use libmbfl. - Rui Hirokawa - - this file is encoded in EUC-JP. +/** + * this is a small sample script to use libmbfl. + * Rui Hirokawa + * + * this file is encoded in EUC-JP. */ #include @@ -10,29 +10,29 @@ int main() { - enum mbfl_no_encoding from_encoding, to_encoding; - enum mbfl_no_language no_language; - mbfl_buffer_converter *convd = NULL; - mbfl_string string, result, *ret; - mbfl_language lang; - unsigned char input[] = "日本語文字列"; /* EUC-JP kanji string */ + enum mbfl_no_encoding from_encoding, to_encoding; + enum mbfl_no_language no_language; + mbfl_buffer_converter *convd = NULL; + mbfl_string string, result, *ret; + mbfl_language lang; + unsigned char input[] = "日本語文字列"; /* EUC-JP kanji string */ - no_language = mbfl_name2no_language("Japanese"); - from_encoding = mbfl_name2no_encoding("EUC-JP"); - to_encoding = mbfl_name2no_encoding("SJIS"); + no_language = mbfl_name2no_language("Japanese"); + from_encoding = mbfl_name2no_encoding("EUC-JP"); + to_encoding = mbfl_name2no_encoding("SJIS"); - mbfl_string_init(&string); - mbfl_string_init(&result); + mbfl_string_init(&string); + mbfl_string_init(&result); - string.no_encoding = from_encoding; - string.no_language = no_language; - string.val = (unsigned char *)input; - string.len = strlen(input); + string.no_encoding = from_encoding; + string.no_language = no_language; + string.val = (unsigned char *)input; + string.len = strlen(input); - convd = mbfl_buffer_converter_new(from_encoding, to_encoding, 0); - ret = mbfl_buffer_converter_feed_result(convd, &string, &result); - mbfl_buffer_converter_delete(convd); + convd = mbfl_buffer_converter_new(from_encoding, to_encoding, 0); + ret = mbfl_buffer_converter_feed_result(convd, &string, &result); + mbfl_buffer_converter_delete(convd); - puts(ret->val); - return 1; + puts(ret->val); + return 1; } diff --git a/ext/mbstring/libmbfl/tests/strcut.tests/iso2022jp.exp b/ext/mbstring/libmbfl/tests/strcut.tests/iso2022jp.exp index f203bbf5f0..7e3d0fd1ed 100644 --- a/ext/mbstring/libmbfl/tests/strcut.tests/iso2022jp.exp +++ b/ext/mbstring/libmbfl/tests/strcut.tests/iso2022jp.exp @@ -106,8 +106,8 @@ expect { begin_strcut_test 8 21 set test "non-asciish characters (2) ($from, $length)" -sleep 1 send "\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x24\x46\x24\x39\x24\x48\x1b\x28\x49\x4a\x5e\x4a\x5e\x4a\x5e\x43\x3d\x44\x1b\x28\x42\x74\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x1b\x28\x42\x74\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x1b\x28\x42\x74\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x1b\x28\x42\x74\r" +sleep 1 expect { -ex "%74%1b%24%42%25%39%1b%28%42%74%1b%24%42%25%48%24%46%1b%28%42 (20)\r\n" { pass $test @@ -117,8 +117,8 @@ expect { begin_strcut_test 11 17 set test "non-asciish characters (2) ($from, $length)" -sleep 1 send "\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x24\x46\x24\x39\x24\x48\x1b\x28\x49\x4a\x5e\x4a\x5e\x4a\x5e\x43\x3d\x44\x1b\x28\x42\x74\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x1b\x28\x42\x74\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x1b\x28\x42\x74\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x1b\x28\x42\x74\r" +sleep 1 expect { -ex "%1b%24%42%25%39%1b%28%42%74%1b%24%42%25%48%1b%28%42 (17)\r\n" { pass $test -- cgit v1.2.1