diff options
author | Lorry Tar Creator <lorry-tar-importer@baserock.org> | 2013-03-14 05:42:27 +0000 |
---|---|---|
committer | <> | 2013-04-03 16:25:08 +0000 |
commit | c4dd7a1a684490673e25aaf4fabec5df138854c4 (patch) | |
tree | 4d57c44caae4480efff02b90b9be86f44bf25409 /ext/mbstring/libmbfl/mbfl | |
download | php2-master.tar.gz |
Imported from /home/lorry/working-area/delta_php2/php-5.4.13.tar.bz2.HEADphp-5.4.13master
Diffstat (limited to 'ext/mbstring/libmbfl/mbfl')
29 files changed, 6662 insertions, 0 deletions
diff --git a/ext/mbstring/libmbfl/mbfl/Makefile.am b/ext/mbstring/libmbfl/mbfl/Makefile.am new file mode 100644 index 0000000..6774d88 --- /dev/null +++ b/ext/mbstring/libmbfl/mbfl/Makefile.am @@ -0,0 +1,48 @@ +EXTRA_DIST=mk_eaw_tbl.awk +lib_LTLIBRARIES=libmbfl.la +libmbfl_la_SOURCES=mbfilter.c \ + mbfl_string.c \ + mbfl_language.c \ + mbfl_encoding.c \ + mbfl_convert.c \ + mbfl_ident.c \ + mbfl_memory_device.c \ + mbfl_allocators.c \ + mbfl_filter_output.c \ + mbfilter_pass.c \ + mbfilter_wchar.c \ + mbfilter_8bit.c \ + eaw_table.h +libmbfl_filters_la=../filters/libmbfl_filters.la +libmbfl_nls_la=../nls/libmbfl_nls.la +libmbfl_la_LIBADD=$(libmbfl_filters_la) $(libmbfl_nls_la) +libmbfl_la_LDFLAGS=-version-info $(SHLIB_VERSION) +libmbfl_includedir=$(includedir)/mbfl +libmbfl_include_HEADERS=mbfilter.h \ + mbfl_consts.h \ + mbfl_encoding.h \ + mbfl_language.h \ + mbfl_string.h \ + mbfl_convert.h \ + mbfl_ident.h \ + mbfl_memory_device.h \ + mbfl_allocators.h \ + mbfl_defs.h \ + mbfl_filter_output.h \ + mbfilter_pass.h \ + mbfilter_wchar.h \ + mbfilter_8bit.h + +mbfilter.c: eaw_table.h + +eaw_table.h: mk_eaw_tbl.awk + $(AWK) -v TABLE_NAME=mbfl_eaw_table -f mk_eaw_tbl.awk EastAsianWidth.txt > $@ + +EastAsianWidth.txt: + $(FETCH_VIA_FTP) ftp://ftp.unicode.org/Public/UNIDATA/EastAsianWidth.txt + +$(libmbfl_filters_la): + $(MAKE) -C `dirname $(libmbfl_filters_la)` + +$(libmbfl_nls_la): + $(MAKE) -C `dirname $(libmbfl_nls_la)` diff --git a/ext/mbstring/libmbfl/mbfl/eaw_table.h b/ext/mbstring/libmbfl/mbfl/eaw_table.h new file mode 100644 index 0000000..af310ea --- /dev/null +++ b/ext/mbstring/libmbfl/mbfl/eaw_table.h @@ -0,0 +1,44 @@ +static const struct { + int begin; + int end; +} mbfl_eaw_table[] = { + { 0x1100, 0x115f }, + { 0x11a3, 0x11a7 }, + { 0x11fa, 0x11ff }, + { 0x2329, 0x232a }, + { 0x2e80, 0x2e99 }, + { 0x2e9b, 0x2ef3 }, + { 0x2f00, 0x2fd5 }, + { 0x2ff0, 0x2ffb }, + { 0x3000, 0x303e }, + { 0x3041, 0x3096 }, + { 0x3099, 0x30ff }, + { 0x3105, 0x312d }, + { 0x3131, 0x318e }, + { 0x3190, 0x31ba }, + { 0x31c0, 0x31e3 }, + { 0x31f0, 0x321e }, + { 0x3220, 0x3247 }, + { 0x3250, 0x32fe }, + { 0x3300, 0x4dbf }, + { 0x4e00, 0xa48c }, + { 0xa490, 0xa4c6 }, + { 0xa960, 0xa97c }, + { 0xac00, 0xd7a3 }, + { 0xd7b0, 0xd7c6 }, + { 0xd7cb, 0xd7fb }, + { 0xf900, 0xfaff }, + { 0xfe10, 0xfe19 }, + { 0xfe30, 0xfe52 }, + { 0xfe54, 0xfe66 }, + { 0xfe68, 0xfe6b }, + { 0xff01, 0xff60 }, + { 0xffe0, 0xffe6 }, + { 0x1b000, 0x1b001 }, + { 0x1f200, 0x1f202 }, + { 0x1f210, 0x1f23a }, + { 0x1f240, 0x1f248 }, + { 0x1f250, 0x1f251 }, + { 0x20000, 0x2fffd }, + { 0x30000, 0x3fffd } +}; diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter.c b/ext/mbstring/libmbfl/mbfl/mbfilter.c new file mode 100644 index 0000000..b3759f9 --- /dev/null +++ b/ext/mbstring/libmbfl/mbfl/mbfilter.c @@ -0,0 +1,3112 @@ +/* + * charset=UTF-8 + * vim600: encoding=utf-8 + */ + +/* + * "streamable kanji code filter and converter" + * + * Copyright (c) 1998,1999,2000,2001 HappySize, Inc. All rights reserved. + * + * This software is released under the GNU Lesser General Public License. + * (Version 2.1, February 1999) + * Please read the following detail of the licence (in japanese). + * + * ◆使用許諾条件◆ + * + * このソフトウェアは株式会社ハッピーサイズによって開発されました。株式会社ハッ + * ピーサイズは、著作権法および万国著作権条約の定めにより、このソフトウェアに関 + * するすべての権利を留保する権利を持ち、ここに行使します。株式会社ハッピーサイ + * ズは以下に明記した条件に従って、このソフトウェアを使用する排他的ではない権利 + * をお客様に許諾します。何人たりとも、以下の条件に反してこのソフトウェアを使用 + * することはできません。 + * + * このソフトウェアを「GNU Lesser General Public License (Version 2.1, February + * 1999)」に示された条件で使用することを、全ての方に許諾します。「GNU Lesser + * General Public License」を満たさない使用には、株式会社ハッピーサイズから書面 + * による許諾を得る必要があります。 + * + * 「GNU Lesser General Public License」の全文は以下のウェブページから取得でき + * ます。「GNU Lesser General Public License」とは、これまでLibrary General + * Public Licenseと呼ばれていたものです。 + * http://www.gnu.org/ --- GNUウェブサイト + * http://www.gnu.org/copyleft/lesser.html --- ライセンス文面 + * このライセンスの内容がわからない方、守れない方には使用を許諾しません。 + * + * しかしながら、当社とGNUプロジェクトとの特定の関係を示唆または主張するもので + * はありません。 + * + * ◆保証内容◆ + * + * このソフトウェアは、期待された動作・機能・性能を持つことを目標として設計され + * 開発されていますが、これを保証するものではありません。このソフトウェアは「こ + * のまま」の状態で提供されており、たとえばこのソフトウェアの有用性ないし特定の + * 目的に合致することといった、何らかの保証内容が、明示されたり暗黙に示されてい + * る場合であっても、その保証は無効です。このソフトウェアを使用した結果ないし使 + * 用しなかった結果によって、直接あるいは間接に受けた身体的な傷害、財産上の損害 + * 、データの損失あるいはその他の全ての損害については、その損害の可能性が使用者 + * 、当社あるいは第三者によって警告されていた場合であっても、当社はその損害の賠 + * 償および補填を行いません。この規定は他の全ての、書面上または書面に無い保証・ + * 契約・規定に優先します。 + * + * ◆著作権者の連絡先および使用条件についての問い合わせ先◆ + * + * 〒102-0073 + * 東京都千代田区九段北1-13-5日本地所第一ビル4F + * 株式会社ハッピーサイズ + * Phone: 03-3512-3655, Fax: 03-3512-3656 + * Email: sales@happysize.co.jp + * Web: http://happysize.com/ + * + * ◆著者◆ + * + * 金本 茂 <sgk@happysize.co.jp> + * + * ◆履歴◆ + * + * 1998/11/10 sgk implementation in C++ + * 1999/4/25 sgk Cで書きなおし。 + * 1999/4/26 sgk 入力フィルタを実装。漢字コードを推定しながらフィルタを追加。 + * 1999/6/?? Unicodeサポート。 + * 1999/6/22 sgk ライセンスをLGPLに変更。 + * + */ + +/* + * Unicode support + * + * Portions copyright (c) 1999,2000,2001 by the PHP3 internationalization team. + * All rights reserved. + * + */ + + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stddef.h> + +#ifdef HAVE_STRING_H +#include <string.h> +#endif + +#ifdef HAVE_STRINGS_H +#include <strings.h> +#endif + +#ifdef HAVE_STDDEF_H +#include <stddef.h> +#endif + +#include "mbfilter.h" +#include "mbfl_filter_output.h" +#include "mbfilter_pass.h" +#include "filters/mbfilter_tl_jisx0201_jisx0208.h" + +#include "eaw_table.h" + +/* hex character table "0123456789ABCDEF" */ +static char mbfl_hexchar_table[] = { + 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x41,0x42,0x43,0x44,0x45,0x46 +}; + + + +/* + * encoding filter + */ +#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) + + +/* + * buffering converter + */ +mbfl_buffer_converter * +mbfl_buffer_converter_new( + enum mbfl_no_encoding from, + enum mbfl_no_encoding to, + int buf_initsz) +{ + const mbfl_encoding *_from = mbfl_no2encoding(from); + const mbfl_encoding *_to = mbfl_no2encoding(to); + + return mbfl_buffer_converter_new2(_from ? _from: &mbfl_encoding_pass, _to ? _to: &mbfl_encoding_pass, buf_initsz); +} + +mbfl_buffer_converter * +mbfl_buffer_converter_new2( + const mbfl_encoding *from, + const mbfl_encoding *to, + int buf_initsz) +{ + mbfl_buffer_converter *convd; + + /* allocate */ + convd = (mbfl_buffer_converter*)mbfl_malloc(sizeof (mbfl_buffer_converter)); + if (convd == NULL) { + return NULL; + } + + /* initialize */ + convd->from = from; + convd->to = to; + + /* create convert filter */ + convd->filter1 = NULL; + convd->filter2 = NULL; + if (mbfl_convert_filter_get_vtbl(convd->from->no_encoding, convd->to->no_encoding) != NULL) { + convd->filter1 = mbfl_convert_filter_new(convd->from->no_encoding, convd->to->no_encoding, mbfl_memory_device_output, NULL, &convd->device); + } else { + convd->filter2 = mbfl_convert_filter_new(mbfl_no_encoding_wchar, convd->to->no_encoding, mbfl_memory_device_output, NULL, &convd->device); + if (convd->filter2 != NULL) { + convd->filter1 = mbfl_convert_filter_new(convd->from->no_encoding, + mbfl_no_encoding_wchar, + (int (*)(int, void*))convd->filter2->filter_function, + (int (*)(void*))convd->filter2->filter_flush, + convd->filter2); + if (convd->filter1 == NULL) { + mbfl_convert_filter_delete(convd->filter2); + } + } + } + if (convd->filter1 == NULL) { + return NULL; + } + + mbfl_memory_device_init(&convd->device, buf_initsz, buf_initsz/4); + + return convd; +} + + +void +mbfl_buffer_converter_delete(mbfl_buffer_converter *convd) +{ + if (convd != NULL) { + if (convd->filter1) { + mbfl_convert_filter_delete(convd->filter1); + } + if (convd->filter2) { + mbfl_convert_filter_delete(convd->filter2); + } + mbfl_memory_device_clear(&convd->device); + mbfl_free((void*)convd); + } +} + +void +mbfl_buffer_converter_reset(mbfl_buffer_converter *convd) +{ + mbfl_memory_device_reset(&convd->device); +} + +int +mbfl_buffer_converter_illegal_mode(mbfl_buffer_converter *convd, int mode) +{ + if (convd != NULL) { + if (convd->filter2 != NULL) { + convd->filter2->illegal_mode = mode; + } else if (convd->filter1 != NULL) { + convd->filter1->illegal_mode = mode; + } else { + return 0; + } + } + + return 1; +} + +int +mbfl_buffer_converter_illegal_substchar(mbfl_buffer_converter *convd, int substchar) +{ + if (convd != NULL) { + if (convd->filter2 != NULL) { + convd->filter2->illegal_substchar = substchar; + } else if (convd->filter1 != NULL) { + convd->filter1->illegal_substchar = substchar; + } else { + return 0; + } + } + + return 1; +} + +int +mbfl_buffer_converter_strncat(mbfl_buffer_converter *convd, const unsigned char *p, int n) +{ + mbfl_convert_filter *filter; + int (*filter_function)(int c, mbfl_convert_filter *filter); + + if (convd != NULL && p != NULL) { + filter = convd->filter1; + if (filter != NULL) { + filter_function = filter->filter_function; + while (n > 0) { + if ((*filter_function)(*p++, filter) < 0) { + break; + } + n--; + } + } + } + + return n; +} + +int +mbfl_buffer_converter_feed(mbfl_buffer_converter *convd, mbfl_string *string) +{ + return mbfl_buffer_converter_feed2(convd, string, NULL); +} + +int +mbfl_buffer_converter_feed2(mbfl_buffer_converter *convd, mbfl_string *string, int *loc) +{ + int n; + unsigned char *p; + mbfl_convert_filter *filter; + int (*filter_function)(int c, mbfl_convert_filter *filter); + + if (convd == NULL || string == NULL) { + return -1; + } + mbfl_memory_device_realloc(&convd->device, convd->device.pos + string->len, string->len/4); + /* feed data */ + n = string->len; + p = string->val; + + filter = convd->filter1; + if (filter != NULL) { + filter_function = filter->filter_function; + while (n > 0) { + if ((*filter_function)(*p++, filter) < 0) { + if (loc) { + *loc = p - string->val; + } + return -1; + } + n--; + } + } + if (loc) { + *loc = p - string->val; + } + return 0; +} + + +int +mbfl_buffer_converter_flush(mbfl_buffer_converter *convd) +{ + if (convd == NULL) { + return -1; + } + + if (convd->filter1 != NULL) { + mbfl_convert_filter_flush(convd->filter1); + } + if (convd->filter2 != NULL) { + mbfl_convert_filter_flush(convd->filter2); + } + + return 0; +} + +mbfl_string * +mbfl_buffer_converter_getbuffer(mbfl_buffer_converter *convd, mbfl_string *result) +{ + if (convd != NULL && result != NULL && convd->device.buffer != NULL) { + result->no_encoding = convd->to->no_encoding; + result->val = convd->device.buffer; + result->len = convd->device.pos; + } else { + result = NULL; + } + + return result; +} + +mbfl_string * +mbfl_buffer_converter_result(mbfl_buffer_converter *convd, mbfl_string *result) +{ + if (convd == NULL || result == NULL) { + return NULL; + } + result->no_encoding = convd->to->no_encoding; + return mbfl_memory_device_result(&convd->device, result); +} + +mbfl_string * +mbfl_buffer_converter_feed_result(mbfl_buffer_converter *convd, mbfl_string *string, + mbfl_string *result) +{ + if (convd == NULL || string == NULL || result == NULL) { + return NULL; + } + mbfl_buffer_converter_feed(convd, string); + if (convd->filter1 != NULL) { + mbfl_convert_filter_flush(convd->filter1); + } + if (convd->filter2 != NULL) { + mbfl_convert_filter_flush(convd->filter2); + } + result->no_encoding = convd->to->no_encoding; + return mbfl_memory_device_result(&convd->device, result); +} + +int mbfl_buffer_illegalchars(mbfl_buffer_converter *convd) +{ + int num_illegalchars = 0; + + if (convd == NULL) { + return 0; + } + + if (convd->filter1 != NULL) { + num_illegalchars += convd->filter1->num_illegalchar; + } + + if (convd->filter2 != NULL) { + num_illegalchars += convd->filter2->num_illegalchar; + } + + return (num_illegalchars); +} + +/* + * encoding detector + */ +mbfl_encoding_detector * +mbfl_encoding_detector_new(enum mbfl_no_encoding *elist, int elistsz, int strict) +{ + mbfl_encoding_detector *identd; + + int i, num; + mbfl_identify_filter *filter; + + if (elist == NULL || elistsz <= 0) { + return NULL; + } + + /* allocate */ + identd = (mbfl_encoding_detector*)mbfl_malloc(sizeof(mbfl_encoding_detector)); + if (identd == NULL) { + return NULL; + } + identd->filter_list = (mbfl_identify_filter **)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter *)); + if (identd->filter_list == NULL) { + mbfl_free(identd); + return NULL; + } + + /* create filters */ + i = 0; + num = 0; + while (i < elistsz) { + filter = mbfl_identify_filter_new(elist[i]); + if (filter != NULL) { + identd->filter_list[num] = filter; + num++; + } + i++; + } + identd->filter_list_size = num; + + /* set strict flag */ + identd->strict = strict; + + return identd; +} + +mbfl_encoding_detector * +mbfl_encoding_detector_new2(const mbfl_encoding **elist, int elistsz, int strict) +{ + mbfl_encoding_detector *identd; + + int i, num; + mbfl_identify_filter *filter; + + if (elist == NULL || elistsz <= 0) { + return NULL; + } + + /* allocate */ + identd = (mbfl_encoding_detector*)mbfl_malloc(sizeof(mbfl_encoding_detector)); + if (identd == NULL) { + return NULL; + } + identd->filter_list = (mbfl_identify_filter **)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter *)); + if (identd->filter_list == NULL) { + mbfl_free(identd); + return NULL; + } + + /* create filters */ + i = 0; + num = 0; + while (i < elistsz) { + filter = mbfl_identify_filter_new2(elist[i]); + if (filter != NULL) { + identd->filter_list[num] = filter; + num++; + } + i++; + } + identd->filter_list_size = num; + + /* set strict flag */ + identd->strict = strict; + + return identd; +} + + +void +mbfl_encoding_detector_delete(mbfl_encoding_detector *identd) +{ + int i; + + if (identd != NULL) { + if (identd->filter_list != NULL) { + i = identd->filter_list_size; + while (i > 0) { + i--; + mbfl_identify_filter_delete(identd->filter_list[i]); + } + mbfl_free((void *)identd->filter_list); + } + mbfl_free((void *)identd); + } +} + +int +mbfl_encoding_detector_feed(mbfl_encoding_detector *identd, mbfl_string *string) +{ + int i, n, num, bad, res; + unsigned char *p; + mbfl_identify_filter *filter; + + res = 0; + /* feed data */ + if (identd != NULL && string != NULL && string->val != NULL) { + num = identd->filter_list_size; + n = string->len; + p = string->val; + bad = 0; + while (n > 0) { + for (i = 0; i < num; i++) { + filter = identd->filter_list[i]; + if (!filter->flag) { + (*filter->filter_function)(*p, filter); + if (filter->flag) { + bad++; + } + } + } + if ((num - 1) <= bad) { + res = 1; + break; + } + p++; + n--; + } + } + + return res; +} + +const mbfl_encoding *mbfl_encoding_detector_judge2(mbfl_encoding_detector *identd) +{ + mbfl_identify_filter *filter; + const mbfl_encoding *encoding = NULL; + int n; + + /* judge */ + if (identd != NULL) { + n = identd->filter_list_size - 1; + while (n >= 0) { + filter = identd->filter_list[n]; + if (!filter->flag) { + if (!identd->strict || !filter->status) { + encoding = filter->encoding; + } + } + n--; + } + + /* fallback judge */ + if (!encoding) { + n = identd->filter_list_size - 1; + while (n >= 0) { + filter = identd->filter_list[n]; + if (!filter->flag) { + encoding = filter->encoding; + } + n--; + } + } + } + + return encoding; +} + +enum mbfl_no_encoding mbfl_encoding_detector_judge(mbfl_encoding_detector *identd) +{ + const mbfl_encoding *encoding = mbfl_encoding_detector_judge2(identd); + return !encoding ? mbfl_no_encoding_invalid: encoding->no_encoding; +} + + +/* + * encoding converter + */ +mbfl_string * +mbfl_convert_encoding( + mbfl_string *string, + mbfl_string *result, + enum mbfl_no_encoding toenc) +{ + int n; + unsigned char *p; + const mbfl_encoding *encoding; + mbfl_memory_device device; + mbfl_convert_filter *filter1; + mbfl_convert_filter *filter2; + + /* initialize */ + encoding = mbfl_no2encoding(toenc); + if (encoding == NULL || string == NULL || result == NULL) { + return NULL; + } + + filter1 = NULL; + filter2 = NULL; + if (mbfl_convert_filter_get_vtbl(string->no_encoding, toenc) != NULL) { + filter1 = mbfl_convert_filter_new(string->no_encoding, toenc, mbfl_memory_device_output, 0, &device); + } else { + filter2 = mbfl_convert_filter_new(mbfl_no_encoding_wchar, toenc, mbfl_memory_device_output, 0, &device); + if (filter2 != NULL) { + filter1 = mbfl_convert_filter_new(string->no_encoding, mbfl_no_encoding_wchar, (int (*)(int, void*))filter2->filter_function, NULL, filter2); + if (filter1 == NULL) { + mbfl_convert_filter_delete(filter2); + } + } + } + if (filter1 == NULL) { + return NULL; + } + + if (filter2 != NULL) { + filter2->illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR; + filter2->illegal_substchar = 0x3f; /* '?' */ + } + + mbfl_memory_device_init(&device, string->len, (string->len >> 2) + 8); + + /* feed data */ + n = string->len; + p = string->val; + if (p != NULL) { + while (n > 0) { + if ((*filter1->filter_function)(*p++, filter1) < 0) { + break; + } + n--; + } + } + + mbfl_convert_filter_flush(filter1); + mbfl_convert_filter_delete(filter1); + if (filter2 != NULL) { + mbfl_convert_filter_flush(filter2); + mbfl_convert_filter_delete(filter2); + } + + return mbfl_memory_device_result(&device, result); +} + + +/* + * identify encoding + */ +const mbfl_encoding * +mbfl_identify_encoding(mbfl_string *string, enum mbfl_no_encoding *elist, int elistsz, int strict) +{ + int i, n, num, bad; + unsigned char *p; + mbfl_identify_filter *flist, *filter; + const mbfl_encoding *encoding; + + /* flist is an array of mbfl_identify_filter instances */ + flist = (mbfl_identify_filter *)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter)); + if (flist == NULL) { + return NULL; + } + + num = 0; + if (elist != NULL) { + for (i = 0; i < elistsz; i++) { + if (!mbfl_identify_filter_init(&flist[num], elist[i])) { + num++; + } + } + } + + /* feed data */ + n = string->len; + p = string->val; + + if (p != NULL) { + bad = 0; + while (n > 0) { + for (i = 0; i < num; i++) { + filter = &flist[i]; + if (!filter->flag) { + (*filter->filter_function)(*p, filter); + if (filter->flag) { + bad++; + } + } + } + if ((num - 1) <= bad && !strict) { + break; + } + p++; + n--; + } + } + + /* judge */ + encoding = NULL; + + for (i = 0; i < num; i++) { + filter = &flist[i]; + if (!filter->flag) { + if (strict && filter->status) { + continue; + } + encoding = filter->encoding; + break; + } + } + + /* fall-back judge */ + if (!encoding) { + for (i = 0; i < num; i++) { + filter = &flist[i]; + if (!filter->flag && (!strict || !filter->status)) { + encoding = filter->encoding; + break; + } + } + } + + /* cleanup */ + /* dtors should be called in reverse order */ + i = num; while (--i >= 0) { + mbfl_identify_filter_cleanup(&flist[i]); + } + + mbfl_free((void *)flist); + + return encoding; +} + +const mbfl_encoding * +mbfl_identify_encoding2(mbfl_string *string, const mbfl_encoding **elist, int elistsz, int strict) +{ + int i, n, num, bad; + unsigned char *p; + mbfl_identify_filter *flist, *filter; + const mbfl_encoding *encoding; + + /* flist is an array of mbfl_identify_filter instances */ + flist = (mbfl_identify_filter *)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter)); + if (flist == NULL) { + return NULL; + } + + num = 0; + if (elist != NULL) { + for (i = 0; i < elistsz; i++) { + if (!mbfl_identify_filter_init2(&flist[num], elist[i])) { + num++; + } + } + } + + /* feed data */ + n = string->len; + p = string->val; + + if (p != NULL) { + bad = 0; + while (n > 0) { + for (i = 0; i < num; i++) { + filter = &flist[i]; + if (!filter->flag) { + (*filter->filter_function)(*p, filter); + if (filter->flag) { + bad++; + } + } + } + if ((num - 1) <= bad && !strict) { + break; + } + p++; + n--; + } + } + + /* judge */ + encoding = NULL; + + for (i = 0; i < num; i++) { + filter = &flist[i]; + if (!filter->flag) { + if (strict && filter->status) { + continue; + } + encoding = filter->encoding; + break; + } + } + + /* fall-back judge */ + if (!encoding) { + for (i = 0; i < num; i++) { + filter = &flist[i]; + if (!filter->flag && (!strict || !filter->status)) { + encoding = filter->encoding; + break; + } + } + } + + /* cleanup */ + /* dtors should be called in reverse order */ + i = num; while (--i >= 0) { + mbfl_identify_filter_cleanup(&flist[i]); + } + + mbfl_free((void *)flist); + + return encoding; +} + +/* + * strlen + */ +static int +filter_count_output(int c, void *data) +{ + (*(int *)data)++; + return c; +} + +int +mbfl_strlen(mbfl_string *string) +{ + int len, n, m, k; + unsigned char *p; + const unsigned char *mbtab; + const mbfl_encoding *encoding; + + encoding = mbfl_no2encoding(string->no_encoding); + if (encoding == NULL || string == NULL) { + return -1; + } + + len = 0; + if (encoding->flag & MBFL_ENCTYPE_SBCS) { + len = string->len; + } else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) { + len = string->len/2; + } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) { + len = string->len/4; + } else if (encoding->mblen_table != NULL) { + mbtab = encoding->mblen_table; + n = 0; + p = string->val; + k = string->len; + /* count */ + if (p != NULL) { + while (n < k) { + m = mbtab[*p]; + n += m; + p += m; + len++; + }; + } + } else { + /* wchar filter */ + mbfl_convert_filter *filter = mbfl_convert_filter_new( + string->no_encoding, + mbfl_no_encoding_wchar, + filter_count_output, 0, &len); + if (filter == NULL) { + return -1; + } + /* count */ + n = string->len; + p = string->val; + if (p != NULL) { + while (n > 0) { + (*filter->filter_function)(*p++, filter); + n--; + } + } + mbfl_convert_filter_delete(filter); + } + + return len; +} + + +/* + * strpos + */ +struct collector_strpos_data { + mbfl_convert_filter *next_filter; + mbfl_wchar_device needle; + int needle_len; + int start; + int output; + int found_pos; + int needle_pos; + int matched_pos; +}; + +static int +collector_strpos(int c, void* data) +{ + int *p, *h, *m, n; + struct collector_strpos_data *pc = (struct collector_strpos_data*)data; + + if (pc->output >= pc->start) { + if (c == (int)pc->needle.buffer[pc->needle_pos]) { + if (pc->needle_pos == 0) { + pc->found_pos = pc->output; /* found position */ + } + pc->needle_pos++; /* needle pointer */ + if (pc->needle_pos >= pc->needle_len) { + pc->matched_pos = pc->found_pos; /* matched position */ + pc->needle_pos--; + goto retry; + } + } else if (pc->needle_pos != 0) { +retry: + h = (int *)pc->needle.buffer; + h++; + for (;;) { + pc->found_pos++; + p = h; + m = (int *)pc->needle.buffer; + n = pc->needle_pos - 1; + while (n > 0 && *p == *m) { + n--; + p++; + m++; + } + if (n <= 0) { + if (*m != c) { + pc->needle_pos = 0; + } + break; + } else { + h++; + pc->needle_pos--; + } + } + } + } + + pc->output++; + return c; +} + +/* + * oddlen + */ +int +mbfl_oddlen(mbfl_string *string) +{ + int len, n, m, k; + unsigned char *p; + const unsigned char *mbtab; + const mbfl_encoding *encoding; + + + if (string == NULL) { + return -1; + } + encoding = mbfl_no2encoding(string->no_encoding); + if (encoding == NULL) { + return -1; + } + + len = 0; + if (encoding->flag & MBFL_ENCTYPE_SBCS) { + return 0; + } else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) { + return len % 2; + } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) { + return len % 4; + } else if (encoding->mblen_table != NULL) { + mbtab = encoding->mblen_table; + n = 0; + p = string->val; + k = string->len; + /* count */ + if (p != NULL) { + while (n < k) { + m = mbtab[*p]; + n += m; + p += m; + }; + } + return n-k; + } else { + /* how can i do ? */ + return 0; + } + /* NOT REACHED */ +} + +int +mbfl_strpos( + mbfl_string *haystack, + mbfl_string *needle, + int offset, + int reverse) +{ + int result; + mbfl_string _haystack_u8, _needle_u8; + const mbfl_string *haystack_u8, *needle_u8; + const unsigned char *u8_tbl; + + if (haystack == NULL || haystack->val == NULL || needle == NULL || needle->val == NULL) { + return -8; + } + + { + const mbfl_encoding *u8_enc; + u8_enc = mbfl_no2encoding(mbfl_no_encoding_utf8); + if (u8_enc == NULL || u8_enc->mblen_table == NULL) { + return -8; + } + u8_tbl = u8_enc->mblen_table; + } + + if (haystack->no_encoding != mbfl_no_encoding_utf8) { + mbfl_string_init(&_haystack_u8); + haystack_u8 = mbfl_convert_encoding(haystack, &_haystack_u8, mbfl_no_encoding_utf8); + if (haystack_u8 == NULL) { + result = -4; + goto out; + } + } else { + haystack_u8 = haystack; + } + + if (needle->no_encoding != mbfl_no_encoding_utf8) { + mbfl_string_init(&_needle_u8); + needle_u8 = mbfl_convert_encoding(needle, &_needle_u8, mbfl_no_encoding_utf8); + if (needle_u8 == NULL) { + result = -4; + goto out; + } + } else { + needle_u8 = needle; + } + + if (needle_u8->len < 1) { + result = -8; + goto out; + } + + result = -1; + if (haystack_u8->len < needle_u8->len) { + goto out; + } + + if (!reverse) { + unsigned int jtbl[1 << (sizeof(unsigned char) * 8)]; + unsigned int needle_u8_len = needle_u8->len; + unsigned int i; + const unsigned char *p, *q, *e; + const unsigned char *haystack_u8_val = haystack_u8->val, + *needle_u8_val = needle_u8->val; + for (i = 0; i < sizeof(jtbl) / sizeof(*jtbl); ++i) { + jtbl[i] = needle_u8_len + 1; + } + for (i = 0; i < needle_u8_len - 1; ++i) { + jtbl[needle_u8_val[i]] = needle_u8_len - i; + } + e = haystack_u8_val + haystack_u8->len; + p = haystack_u8_val; + while (--offset >= 0) { + if (p >= e) { + result = -16; + goto out; + } + p += u8_tbl[*p]; + } + p += needle_u8_len; + if (p > e) { + goto out; + } + while (p <= e) { + const unsigned char *pv = p; + q = needle_u8_val + needle_u8_len; + for (;;) { + if (q == needle_u8_val) { + result = 0; + while (p > haystack_u8_val) { + unsigned char c = *--p; + if (c < 0x80) { + ++result; + } else if ((c & 0xc0) != 0x80) { + ++result; + } + } + goto out; + } + if (*--q != *--p) { + break; + } + } + p += jtbl[*p]; + if (p <= pv) { + p = pv + 1; + } + } + } else { + unsigned int jtbl[1 << (sizeof(unsigned char) * 8)]; + unsigned int needle_u8_len = needle_u8->len, needle_len = 0; + unsigned int i; + const unsigned char *p, *e, *q, *qe; + const unsigned char *haystack_u8_val = haystack_u8->val, + *needle_u8_val = needle_u8->val; + for (i = 0; i < sizeof(jtbl) / sizeof(*jtbl); ++i) { + jtbl[i] = needle_u8_len; + } + for (i = needle_u8_len - 1; i > 0; --i) { + unsigned char c = needle_u8_val[i]; + jtbl[c] = i; + if (c < 0x80) { + ++needle_len; + } else if ((c & 0xc0) != 0x80) { + ++needle_len; + } + } + { + unsigned char c = needle_u8_val[0]; + if (c < 0x80) { + ++needle_len; + } else if ((c & 0xc0) != 0x80) { + ++needle_len; + } + } + e = haystack_u8_val; + p = e + haystack_u8->len; + qe = needle_u8_val + needle_u8_len; + if (offset < 0) { + if (-offset > needle_len) { + offset += needle_len; + while (offset < 0) { + unsigned char c; + if (p <= e) { + result = -16; + goto out; + } + c = *(--p); + if (c < 0x80) { + ++offset; + } else if ((c & 0xc0) != 0x80) { + ++offset; + } + } + } + } else { + const unsigned char *ee = haystack_u8_val + haystack_u8->len; + while (--offset >= 0) { + if (e >= ee) { + result = -16; + goto out; + } + e += u8_tbl[*e]; + } + } + if (p < e + needle_u8_len) { + goto out; + } + p -= needle_u8_len; + while (p >= e) { + const unsigned char *pv = p; + q = needle_u8_val; + for (;;) { + if (q == qe) { + result = 0; + p -= needle_u8_len; + while (p > haystack_u8_val) { + unsigned char c = *--p; + if (c < 0x80) { + ++result; + } else if ((c & 0xc0) != 0x80) { + ++result; + } + } + goto out; + } + if (*q != *p) { + break; + } + ++p, ++q; + } + p -= jtbl[*p]; + if (p >= pv) { + p = pv - 1; + } + } + } +out: + if (haystack_u8 == &_haystack_u8) { + mbfl_string_clear(&_haystack_u8); + } + if (needle_u8 == &_needle_u8) { + mbfl_string_clear(&_needle_u8); + } + return result; +} + +/* + * substr_count + */ + +int +mbfl_substr_count( + mbfl_string *haystack, + mbfl_string *needle + ) +{ + int n, result = 0; + unsigned char *p; + mbfl_convert_filter *filter; + struct collector_strpos_data pc; + + if (haystack == NULL || needle == NULL) { + return -8; + } + /* needle is converted into wchar */ + mbfl_wchar_device_init(&pc.needle); + filter = mbfl_convert_filter_new( + needle->no_encoding, + mbfl_no_encoding_wchar, + mbfl_wchar_device_output, 0, &pc.needle); + if (filter == NULL) { + return -4; + } + p = needle->val; + n = needle->len; + if (p != NULL) { + while (n > 0) { + if ((*filter->filter_function)(*p++, filter) < 0) { + break; + } + n--; + } + } + mbfl_convert_filter_flush(filter); + mbfl_convert_filter_delete(filter); + pc.needle_len = pc.needle.pos; + if (pc.needle.buffer == NULL) { + return -4; + } + if (pc.needle_len <= 0) { + mbfl_wchar_device_clear(&pc.needle); + return -2; + } + /* initialize filter and collector data */ + filter = mbfl_convert_filter_new( + haystack->no_encoding, + mbfl_no_encoding_wchar, + collector_strpos, 0, &pc); + if (filter == NULL) { + mbfl_wchar_device_clear(&pc.needle); + return -4; + } + pc.start = 0; + pc.output = 0; + pc.needle_pos = 0; + pc.found_pos = 0; + pc.matched_pos = -1; + + /* feed data */ + p = haystack->val; + n = haystack->len; + if (p != NULL) { + while (n > 0) { + if ((*filter->filter_function)(*p++, filter) < 0) { + pc.matched_pos = -4; + break; + } + if (pc.matched_pos >= 0) { + ++result; + pc.matched_pos = -1; + pc.needle_pos = 0; + } + n--; + } + } + mbfl_convert_filter_flush(filter); + mbfl_convert_filter_delete(filter); + mbfl_wchar_device_clear(&pc.needle); + + return result; +} + +/* + * substr + */ +struct collector_substr_data { + mbfl_convert_filter *next_filter; + int start; + int stop; + int output; +}; + +static int +collector_substr(int c, void* data) +{ + struct collector_substr_data *pc = (struct collector_substr_data*)data; + + if (pc->output >= pc->stop) { + return -1; + } + + if (pc->output >= pc->start) { + (*pc->next_filter->filter_function)(c, pc->next_filter); + } + + pc->output++; + + return c; +} + +mbfl_string * +mbfl_substr( + mbfl_string *string, + mbfl_string *result, + int from, + int length) +{ + const mbfl_encoding *encoding; + int n, m, k, len, start, end; + unsigned char *p, *w; + const unsigned char *mbtab; + + encoding = mbfl_no2encoding(string->no_encoding); + if (encoding == NULL || string == NULL || result == NULL) { + return NULL; + } + mbfl_string_init(result); + result->no_language = string->no_language; + result->no_encoding = string->no_encoding; + + if ((encoding->flag & (MBFL_ENCTYPE_SBCS | MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE | MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) || + encoding->mblen_table != NULL) { + len = string->len; + start = from; + end = from + length; + if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) { + start *= 2; + end = start + length*2; + } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) { + start *= 4; + end = start + length*4; + } else if (encoding->mblen_table != NULL) { + mbtab = encoding->mblen_table; + start = 0; + end = 0; + n = 0; + k = 0; + p = string->val; + if (p != NULL) { + /* search start position */ + while (k <= from) { + start = n; + if (n >= len) { + break; + } + m = mbtab[*p]; + n += m; + p += m; + k++; + } + /* detect end position */ + k = 0; + end = start; + while (k < length) { + end = n; + if (n >= len) { + break; + } + m = mbtab[*p]; + n += m; + p += m; + k++; + } + } + } + + if (start > len) { + start = len; + } + if (start < 0) { + start = 0; + } + if (end > len) { + end = len; + } + if (end < 0) { + end = 0; + } + if (start > end) { + start = end; + } + + /* allocate memory and copy */ + n = end - start; + result->len = 0; + result->val = w = (unsigned char*)mbfl_malloc((n + 8)*sizeof(unsigned char)); + if (w != NULL) { + p = string->val; + if (p != NULL) { + p += start; + result->len = n; + while (n > 0) { + *w++ = *p++; + n--; + } + } + *w++ = '\0'; + *w++ = '\0'; + *w++ = '\0'; + *w = '\0'; + } else { + result = NULL; + } + } else { + mbfl_memory_device device; + struct collector_substr_data pc; + mbfl_convert_filter *decoder; + mbfl_convert_filter *encoder; + + mbfl_memory_device_init(&device, length + 1, 0); + mbfl_string_init(result); + result->no_language = string->no_language; + result->no_encoding = string->no_encoding; + /* output code filter */ + decoder = mbfl_convert_filter_new( + mbfl_no_encoding_wchar, + string->no_encoding, + mbfl_memory_device_output, 0, &device); + /* wchar filter */ + encoder = mbfl_convert_filter_new( + string->no_encoding, + mbfl_no_encoding_wchar, + collector_substr, 0, &pc); + if (decoder == NULL || encoder == NULL) { + mbfl_convert_filter_delete(encoder); + mbfl_convert_filter_delete(decoder); + return NULL; + } + pc.next_filter = decoder; + pc.start = from; + pc.stop = from + length; + pc.output = 0; + + /* feed data */ + p = string->val; + n = string->len; + if (p != NULL) { + while (n > 0) { + if ((*encoder->filter_function)(*p++, encoder) < 0) { + break; + } + n--; + } + } + + mbfl_convert_filter_flush(encoder); + mbfl_convert_filter_flush(decoder); + result = mbfl_memory_device_result(&device, result); + mbfl_convert_filter_delete(encoder); + mbfl_convert_filter_delete(decoder); + } + + return result; +} + +/* + * strcut + */ +mbfl_string * +mbfl_strcut( + mbfl_string *string, + mbfl_string *result, + int from, + int length) +{ + const mbfl_encoding *encoding; + mbfl_memory_device device; + + /* validate the parameters */ + if (string == NULL || string->val == NULL || result == NULL) { + return NULL; + } + + if (from < 0 || length < 0) { + return NULL; + } + + if (from >= string->len) { + from = string->len; + } + + encoding = mbfl_no2encoding(string->no_encoding); + if (encoding == NULL) { + return NULL; + } + + mbfl_string_init(result); + result->no_language = string->no_language; + result->no_encoding = string->no_encoding; + + if ((encoding->flag & (MBFL_ENCTYPE_SBCS + | MBFL_ENCTYPE_WCS2BE + | MBFL_ENCTYPE_WCS2LE + | MBFL_ENCTYPE_WCS4BE + | MBFL_ENCTYPE_WCS4LE)) + || encoding->mblen_table != NULL) { + const unsigned char *start = NULL; + const unsigned char *end = NULL; + unsigned char *w; + unsigned int sz; + + if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) { + from &= -2; + + if (from + length >= string->len) { + length = string->len - from; + } + + start = string->val + from; + end = start + (length & -2); + } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) { + from &= -4; + + if (from + length >= string->len) { + length = string->len - from; + } + + start = string->val + from; + end = start + (length & -4); + } else if ((encoding->flag & MBFL_ENCTYPE_SBCS)) { + if (from + length >= string->len) { + length = string->len - from; + } + + start = string->val + from; + end = start + length; + } else if (encoding->mblen_table != NULL) { + const unsigned char *mbtab = encoding->mblen_table; + const unsigned char *p, *q; + int m; + + /* search start position */ + for (m = 0, p = string->val, q = p + from; + p < q; p += (m = mbtab[*p])); + + if (p > q) { + p -= m; + } + + start = p; + + /* search end position */ + if ((start - string->val) + length >= (int)string->len) { + end = string->val + string->len; + } else { + for (q = p + length; p < q; p += (m = mbtab[*p])); + + if (p > q) { + p -= m; + } + end = p; + } + } else { + /* never reached */ + return NULL; + } + + /* allocate memory and copy string */ + sz = end - start; + if ((w = (unsigned char*)mbfl_calloc(sz + 8, + sizeof(unsigned char))) == NULL) { + return NULL; + } + + memcpy(w, start, sz); + w[sz] = '\0'; + w[sz + 1] = '\0'; + w[sz + 2] = '\0'; + w[sz + 3] = '\0'; + + result->val = w; + result->len = sz; + } else { + mbfl_convert_filter *encoder = NULL; + mbfl_convert_filter *decoder = NULL; + const unsigned char *p, *q, *r; + struct { + mbfl_convert_filter encoder; + mbfl_convert_filter decoder; + const unsigned char *p; + int pos; + } bk, _bk; + + /* output code filter */ + if (!(decoder = mbfl_convert_filter_new( + mbfl_no_encoding_wchar, + string->no_encoding, + mbfl_memory_device_output, 0, &device))) { + return NULL; + } + + /* wchar filter */ + if (!(encoder = mbfl_convert_filter_new( + string->no_encoding, + mbfl_no_encoding_wchar, + mbfl_filter_output_null, + NULL, NULL))) { + mbfl_convert_filter_delete(decoder); + return NULL; + } + + mbfl_memory_device_init(&device, length + 8, 0); + + p = string->val; + + /* search start position */ + for (q = string->val + from; p < q; p++) { + (*encoder->filter_function)(*p, encoder); + } + + /* switch the drain direction */ + encoder->output_function = (int(*)(int,void *))decoder->filter_function; + encoder->flush_function = (int(*)(void *))decoder->filter_flush; + encoder->data = decoder; + + q = string->val + string->len; + + /* save the encoder, decoder state and the pointer */ + mbfl_convert_filter_copy(decoder, &_bk.decoder); + mbfl_convert_filter_copy(encoder, &_bk.encoder); + _bk.p = p; + _bk.pos = device.pos; + + if (length > q - p) { + length = q - p; + } + + if (length >= 20) { + /* output a little shorter than "length" */ + /* XXX: the constant "20" was determined purely on the heuristics. */ + for (r = p + length - 20; p < r; p++) { + (*encoder->filter_function)(*p, encoder); + } + + /* if the offset of the resulting string exceeds the length, + * then restore the state */ + if (device.pos > length) { + p = _bk.p; + device.pos = _bk.pos; + decoder->filter_dtor(decoder); + encoder->filter_dtor(encoder); + mbfl_convert_filter_copy(&_bk.decoder, decoder); + mbfl_convert_filter_copy(&_bk.encoder, encoder); + bk = _bk; + } else { + /* save the encoder, decoder state and the pointer */ + mbfl_convert_filter_copy(decoder, &bk.decoder); + mbfl_convert_filter_copy(encoder, &bk.encoder); + bk.p = p; + bk.pos = device.pos; + + /* flush the stream */ + (*encoder->filter_flush)(encoder); + + /* if the offset of the resulting string exceeds the length, + * then restore the state */ + if (device.pos > length) { + bk.decoder.filter_dtor(&bk.decoder); + bk.encoder.filter_dtor(&bk.encoder); + + p = _bk.p; + device.pos = _bk.pos; + decoder->filter_dtor(decoder); + encoder->filter_dtor(encoder); + mbfl_convert_filter_copy(&_bk.decoder, decoder); + mbfl_convert_filter_copy(&_bk.encoder, encoder); + bk = _bk; + } else { + _bk.decoder.filter_dtor(&_bk.decoder); + _bk.encoder.filter_dtor(&_bk.encoder); + + p = bk.p; + device.pos = bk.pos; + decoder->filter_dtor(decoder); + encoder->filter_dtor(encoder); + mbfl_convert_filter_copy(&bk.decoder, decoder); + mbfl_convert_filter_copy(&bk.encoder, encoder); + } + } + } else { + bk = _bk; + } + + /* detect end position */ + while (p < q) { + (*encoder->filter_function)(*p, encoder); + + if (device.pos > length) { + /* restore filter */ + p = bk.p; + device.pos = bk.pos; + decoder->filter_dtor(decoder); + encoder->filter_dtor(encoder); + mbfl_convert_filter_copy(&bk.decoder, decoder); + mbfl_convert_filter_copy(&bk.encoder, encoder); + break; + } + + p++; + + /* backup current state */ + mbfl_convert_filter_copy(decoder, &_bk.decoder); + mbfl_convert_filter_copy(encoder, &_bk.encoder); + _bk.pos = device.pos; + _bk.p = p; + + (*encoder->filter_flush)(encoder); + + if (device.pos > length) { + _bk.decoder.filter_dtor(&_bk.decoder); + _bk.encoder.filter_dtor(&_bk.encoder); + + /* restore filter */ + p = bk.p; + device.pos = bk.pos; + decoder->filter_dtor(decoder); + encoder->filter_dtor(encoder); + mbfl_convert_filter_copy(&bk.decoder, decoder); + mbfl_convert_filter_copy(&bk.encoder, encoder); + break; + } + + bk.decoder.filter_dtor(&bk.decoder); + bk.encoder.filter_dtor(&bk.encoder); + + p = _bk.p; + device.pos = _bk.pos; + decoder->filter_dtor(decoder); + encoder->filter_dtor(encoder); + mbfl_convert_filter_copy(&_bk.decoder, decoder); + mbfl_convert_filter_copy(&_bk.encoder, encoder); + + bk = _bk; + } + + (*encoder->filter_flush)(encoder); + + bk.decoder.filter_dtor(&bk.decoder); + bk.encoder.filter_dtor(&bk.encoder); + + result = mbfl_memory_device_result(&device, result); + + mbfl_convert_filter_delete(encoder); + mbfl_convert_filter_delete(decoder); + } + + return result; +} + + +/* + * strwidth + */ +static int is_fullwidth(int c) +{ + int i; + + if (c < mbfl_eaw_table[0].begin) { + return 0; + } + + for (i = 0; i < sizeof(mbfl_eaw_table) / sizeof(mbfl_eaw_table[0]); i++) { + if (mbfl_eaw_table[i].begin <= c && c <= mbfl_eaw_table[i].end) { + return 1; + } + } + + return 0; +} + +static int +filter_count_width(int c, void* data) +{ + (*(int *)data) += (is_fullwidth(c) ? 2: 1); + return c; +} + +int +mbfl_strwidth(mbfl_string *string) +{ + int len, n; + unsigned char *p; + mbfl_convert_filter *filter; + + len = 0; + if (string->len > 0 && string->val != NULL) { + /* wchar filter */ + filter = mbfl_convert_filter_new( + string->no_encoding, + mbfl_no_encoding_wchar, + filter_count_width, 0, &len); + if (filter == NULL) { + mbfl_convert_filter_delete(filter); + return -1; + } + + /* feed data */ + p = string->val; + n = string->len; + while (n > 0) { + (*filter->filter_function)(*p++, filter); + n--; + } + + mbfl_convert_filter_flush(filter); + mbfl_convert_filter_delete(filter); + } + + return len; +} + + +/* + * strimwidth + */ +struct collector_strimwidth_data { + mbfl_convert_filter *decoder; + mbfl_convert_filter *decoder_backup; + mbfl_memory_device device; + int from; + int width; + int outwidth; + int outchar; + int status; + int endpos; +}; + +static int +collector_strimwidth(int c, void* data) +{ + struct collector_strimwidth_data *pc = (struct collector_strimwidth_data*)data; + + switch (pc->status) { + case 10: + (*pc->decoder->filter_function)(c, pc->decoder); + break; + default: + if (pc->outchar >= pc->from) { + pc->outwidth += (is_fullwidth(c) ? 2: 1); + + if (pc->outwidth > pc->width) { + if (pc->status == 0) { + pc->endpos = pc->device.pos; + mbfl_convert_filter_copy(pc->decoder, pc->decoder_backup); + } + pc->status++; + (*pc->decoder->filter_function)(c, pc->decoder); + c = -1; + } else { + (*pc->decoder->filter_function)(c, pc->decoder); + } + } + pc->outchar++; + break; + } + + return c; +} + +mbfl_string * +mbfl_strimwidth( + mbfl_string *string, + mbfl_string *marker, + mbfl_string *result, + int from, + int width) +{ + struct collector_strimwidth_data pc; + mbfl_convert_filter *encoder; + int n, mkwidth; + unsigned char *p; + + if (string == NULL || result == NULL) { + return NULL; + } + mbfl_string_init(result); + result->no_language = string->no_language; + result->no_encoding = string->no_encoding; + mbfl_memory_device_init(&pc.device, width, 0); + + /* output code filter */ + pc.decoder = mbfl_convert_filter_new( + mbfl_no_encoding_wchar, + string->no_encoding, + mbfl_memory_device_output, 0, &pc.device); + pc.decoder_backup = mbfl_convert_filter_new( + mbfl_no_encoding_wchar, + string->no_encoding, + mbfl_memory_device_output, 0, &pc.device); + /* wchar filter */ + encoder = mbfl_convert_filter_new( + string->no_encoding, + mbfl_no_encoding_wchar, + collector_strimwidth, 0, &pc); + if (pc.decoder == NULL || pc.decoder_backup == NULL || encoder == NULL) { + mbfl_convert_filter_delete(encoder); + mbfl_convert_filter_delete(pc.decoder); + mbfl_convert_filter_delete(pc.decoder_backup); + return NULL; + } + mkwidth = 0; + if (marker) { + mkwidth = mbfl_strwidth(marker); + } + pc.from = from; + pc.width = width - mkwidth; + pc.outwidth = 0; + pc.outchar = 0; + pc.status = 0; + pc.endpos = 0; + + /* feed data */ + p = string->val; + n = string->len; + if (p != NULL) { + while (n > 0) { + n--; + if ((*encoder->filter_function)(*p++, encoder) < 0) { + break; + } + } + mbfl_convert_filter_flush(encoder); + if (pc.status != 0 && mkwidth > 0) { + pc.width += mkwidth; + while (n > 0) { + if ((*encoder->filter_function)(*p++, encoder) < 0) { + break; + } + n--; + } + mbfl_convert_filter_flush(encoder); + if (pc.status != 1) { + pc.status = 10; + pc.device.pos = pc.endpos; + mbfl_convert_filter_copy(pc.decoder_backup, pc.decoder); + mbfl_convert_filter_reset(encoder, marker->no_encoding, mbfl_no_encoding_wchar); + p = marker->val; + n = marker->len; + while (n > 0) { + if ((*encoder->filter_function)(*p++, encoder) < 0) { + break; + } + n--; + } + mbfl_convert_filter_flush(encoder); + } + } else if (pc.status != 0) { + pc.device.pos = pc.endpos; + mbfl_convert_filter_copy(pc.decoder_backup, pc.decoder); + } + mbfl_convert_filter_flush(pc.decoder); + } + result = mbfl_memory_device_result(&pc.device, result); + mbfl_convert_filter_delete(encoder); + mbfl_convert_filter_delete(pc.decoder); + mbfl_convert_filter_delete(pc.decoder_backup); + + return result; +} + +mbfl_string * +mbfl_ja_jp_hantozen( + mbfl_string *string, + mbfl_string *result, + int mode) +{ + int n; + unsigned char *p; + const mbfl_encoding *encoding; + mbfl_memory_device device; + mbfl_convert_filter *decoder = NULL; + mbfl_convert_filter *encoder = NULL; + mbfl_convert_filter *tl_filter = NULL; + mbfl_convert_filter *next_filter = NULL; + mbfl_filt_tl_jisx0201_jisx0208_param *param = NULL; + + /* validate parameters */ + if (string == NULL || result == NULL) { + return NULL; + } + + encoding = mbfl_no2encoding(string->no_encoding); + if (encoding == NULL) { + return NULL; + } + + mbfl_memory_device_init(&device, string->len, 0); + mbfl_string_init(result); + + result->no_language = string->no_language; + result->no_encoding = string->no_encoding; + + decoder = mbfl_convert_filter_new( + mbfl_no_encoding_wchar, + string->no_encoding, + mbfl_memory_device_output, 0, &device); + if (decoder == NULL) { + goto out; + } + next_filter = decoder; + + param = + (mbfl_filt_tl_jisx0201_jisx0208_param *)mbfl_malloc(sizeof(mbfl_filt_tl_jisx0201_jisx0208_param)); + if (param == NULL) { + goto out; + } + + param->mode = mode; + + tl_filter = mbfl_convert_filter_new2( + &vtbl_tl_jisx0201_jisx0208, + (int(*)(int, void*))next_filter->filter_function, + (int(*)(void*))next_filter->filter_flush, + next_filter); + if (tl_filter == NULL) { + mbfl_free(param); + goto out; + } + + tl_filter->opaque = param; + next_filter = tl_filter; + + encoder = mbfl_convert_filter_new( + string->no_encoding, + mbfl_no_encoding_wchar, + (int(*)(int, void*))next_filter->filter_function, + (int(*)(void*))next_filter->filter_flush, + next_filter); + if (encoder == NULL) { + goto out; + } + + /* feed data */ + p = string->val; + n = string->len; + if (p != NULL) { + while (n > 0) { + if ((*encoder->filter_function)(*p++, encoder) < 0) { + break; + } + n--; + } + } + + mbfl_convert_filter_flush(encoder); + result = mbfl_memory_device_result(&device, result); +out: + if (tl_filter != NULL) { + if (tl_filter->opaque != NULL) { + mbfl_free(tl_filter->opaque); + } + mbfl_convert_filter_delete(tl_filter); + } + + if (decoder != NULL) { + mbfl_convert_filter_delete(decoder); + } + + if (encoder != NULL) { + mbfl_convert_filter_delete(encoder); + } + + return result; +} + + +/* + * MIME header encode + */ +struct mime_header_encoder_data { + mbfl_convert_filter *conv1_filter; + mbfl_convert_filter *block_filter; + mbfl_convert_filter *conv2_filter; + mbfl_convert_filter *conv2_filter_backup; + mbfl_convert_filter *encod_filter; + mbfl_convert_filter *encod_filter_backup; + mbfl_memory_device outdev; + mbfl_memory_device tmpdev; + int status1; + int status2; + int prevpos; + int linehead; + int firstindent; + int encnamelen; + int lwsplen; + char encname[128]; + char lwsp[16]; +}; + +static int +mime_header_encoder_block_collector(int c, void *data) +{ + int n; + struct mime_header_encoder_data *pe = (struct mime_header_encoder_data *)data; + + switch (pe->status2) { + case 1: /* encoded word */ + pe->prevpos = pe->outdev.pos; + mbfl_convert_filter_copy(pe->conv2_filter, pe->conv2_filter_backup); + mbfl_convert_filter_copy(pe->encod_filter, pe->encod_filter_backup); + (*pe->conv2_filter->filter_function)(c, pe->conv2_filter); + (*pe->conv2_filter->filter_flush)(pe->conv2_filter); + (*pe->encod_filter->filter_flush)(pe->encod_filter); + n = pe->outdev.pos - pe->linehead + pe->firstindent; + pe->outdev.pos = pe->prevpos; + mbfl_convert_filter_copy(pe->conv2_filter_backup, pe->conv2_filter); + mbfl_convert_filter_copy(pe->encod_filter_backup, pe->encod_filter); + if (n >= 74) { + (*pe->conv2_filter->filter_flush)(pe->conv2_filter); + (*pe->encod_filter->filter_flush)(pe->encod_filter); + mbfl_memory_device_strncat(&pe->outdev, "\x3f\x3d", 2); /* ?= */ + mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen); + pe->linehead = pe->outdev.pos; + pe->firstindent = 0; + mbfl_memory_device_strncat(&pe->outdev, pe->encname, pe->encnamelen); + c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter); + } else { + c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter); + } + break; + + default: + mbfl_memory_device_strncat(&pe->outdev, pe->encname, pe->encnamelen); + c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter); + pe->status2 = 1; + break; + } + + return c; +} + +static int +mime_header_encoder_collector(int c, void *data) +{ + static int qp_table[256] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00 */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00 */ + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20 */ + 0, 0, 0, 0, 0, 0, 0 ,0, 0, 0, 0, 0, 0, 1, 0, 1, /* 0x10 */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40 */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* 0x50 */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60 */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* 0x70 */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x80 */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x90 */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xA0 */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xB0 */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xC0 */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xD0 */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xE0 */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 /* 0xF0 */ + }; + + int n; + struct mime_header_encoder_data *pe = (struct mime_header_encoder_data *)data; + + switch (pe->status1) { + case 11: /* encoded word */ + (*pe->block_filter->filter_function)(c, pe->block_filter); + break; + + default: /* ASCII */ + if (c <= 0x00ff && !qp_table[(c & 0xff)]) { /* ordinary characters */ + mbfl_memory_device_output(c, &pe->tmpdev); + pe->status1 = 1; + } else if (pe->status1 == 0 && c == 0x20) { /* repeat SPACE */ + mbfl_memory_device_output(c, &pe->tmpdev); + } else { + if (pe->tmpdev.pos < 74 && c == 0x20) { + n = pe->outdev.pos - pe->linehead + pe->tmpdev.pos + pe->firstindent; + if (n > 74) { + mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen); /* LWSP */ + pe->linehead = pe->outdev.pos; + pe->firstindent = 0; + } else if (pe->outdev.pos > 0) { + mbfl_memory_device_output(0x20, &pe->outdev); + } + mbfl_memory_device_devcat(&pe->outdev, &pe->tmpdev); + mbfl_memory_device_reset(&pe->tmpdev); + pe->status1 = 0; + } else { + n = pe->outdev.pos - pe->linehead + pe->encnamelen + pe->firstindent; + if (n > 60) { + mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen); /* LWSP */ + pe->linehead = pe->outdev.pos; + pe->firstindent = 0; + } else if (pe->outdev.pos > 0) { + mbfl_memory_device_output(0x20, &pe->outdev); + } + mbfl_convert_filter_devcat(pe->block_filter, &pe->tmpdev); + mbfl_memory_device_reset(&pe->tmpdev); + (*pe->block_filter->filter_function)(c, pe->block_filter); + pe->status1 = 11; + } + } + break; + } + + return c; +} + +mbfl_string * +mime_header_encoder_result(struct mime_header_encoder_data *pe, mbfl_string *result) +{ + if (pe->status1 >= 10) { + (*pe->conv2_filter->filter_flush)(pe->conv2_filter); + (*pe->encod_filter->filter_flush)(pe->encod_filter); + mbfl_memory_device_strncat(&pe->outdev, "\x3f\x3d", 2); /* ?= */ + } else if (pe->tmpdev.pos > 0) { + if (pe->outdev.pos > 0) { + if ((pe->outdev.pos - pe->linehead + pe->tmpdev.pos) > 74) { + mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen); + } else { + mbfl_memory_device_output(0x20, &pe->outdev); + } + } + mbfl_memory_device_devcat(&pe->outdev, &pe->tmpdev); + } + mbfl_memory_device_reset(&pe->tmpdev); + pe->prevpos = 0; + pe->linehead = 0; + pe->status1 = 0; + pe->status2 = 0; + + return mbfl_memory_device_result(&pe->outdev, result); +} + +struct mime_header_encoder_data* +mime_header_encoder_new( + enum mbfl_no_encoding incode, + enum mbfl_no_encoding outcode, + enum mbfl_no_encoding transenc) +{ + int n; + const char *s; + const mbfl_encoding *outencoding; + struct mime_header_encoder_data *pe; + + /* get output encoding and check MIME charset name */ + outencoding = mbfl_no2encoding(outcode); + if (outencoding == NULL || outencoding->mime_name == NULL || outencoding->mime_name[0] == '\0') { + return NULL; + } + + pe = (struct mime_header_encoder_data*)mbfl_malloc(sizeof(struct mime_header_encoder_data)); + if (pe == NULL) { + return NULL; + } + + mbfl_memory_device_init(&pe->outdev, 0, 0); + mbfl_memory_device_init(&pe->tmpdev, 0, 0); + pe->prevpos = 0; + pe->linehead = 0; + pe->firstindent = 0; + pe->status1 = 0; + pe->status2 = 0; + + /* make the encoding description string exp. "=?ISO-2022-JP?B?" */ + n = 0; + pe->encname[n++] = 0x3d; + pe->encname[n++] = 0x3f; + s = outencoding->mime_name; + while (*s) { + pe->encname[n++] = *s++; + } + pe->encname[n++] = 0x3f; + if (transenc == mbfl_no_encoding_qprint) { + pe->encname[n++] = 0x51; + } else { + pe->encname[n++] = 0x42; + transenc = mbfl_no_encoding_base64; + } + pe->encname[n++] = 0x3f; + pe->encname[n] = '\0'; + pe->encnamelen = n; + + n = 0; + pe->lwsp[n++] = 0x0d; + pe->lwsp[n++] = 0x0a; + pe->lwsp[n++] = 0x20; + pe->lwsp[n] = '\0'; + pe->lwsplen = n; + + /* transfer encode filter */ + pe->encod_filter = mbfl_convert_filter_new(outcode, transenc, mbfl_memory_device_output, 0, &(pe->outdev)); + pe->encod_filter_backup = mbfl_convert_filter_new(outcode, transenc, mbfl_memory_device_output, 0, &(pe->outdev)); + + /* Output code filter */ + pe->conv2_filter = mbfl_convert_filter_new(mbfl_no_encoding_wchar, outcode, mbfl_filter_output_pipe, 0, pe->encod_filter); + pe->conv2_filter_backup = mbfl_convert_filter_new(mbfl_no_encoding_wchar, outcode, mbfl_filter_output_pipe, 0, pe->encod_filter); + + /* encoded block filter */ + pe->block_filter = mbfl_convert_filter_new(mbfl_no_encoding_wchar, mbfl_no_encoding_wchar, mime_header_encoder_block_collector, 0, pe); + + /* Input code filter */ + pe->conv1_filter = mbfl_convert_filter_new(incode, mbfl_no_encoding_wchar, mime_header_encoder_collector, 0, pe); + + if (pe->encod_filter == NULL || + pe->encod_filter_backup == NULL || + pe->conv2_filter == NULL || + pe->conv2_filter_backup == NULL || + pe->conv1_filter == NULL) { + mime_header_encoder_delete(pe); + return NULL; + } + + if (transenc == mbfl_no_encoding_qprint) { + pe->encod_filter->status |= MBFL_QPRINT_STS_MIME_HEADER; + pe->encod_filter_backup->status |= MBFL_QPRINT_STS_MIME_HEADER; + } else { + pe->encod_filter->status |= MBFL_BASE64_STS_MIME_HEADER; + pe->encod_filter_backup->status |= MBFL_BASE64_STS_MIME_HEADER; + } + + return pe; +} + +void +mime_header_encoder_delete(struct mime_header_encoder_data *pe) +{ + if (pe) { + mbfl_convert_filter_delete(pe->conv1_filter); + mbfl_convert_filter_delete(pe->block_filter); + mbfl_convert_filter_delete(pe->conv2_filter); + mbfl_convert_filter_delete(pe->conv2_filter_backup); + mbfl_convert_filter_delete(pe->encod_filter); + mbfl_convert_filter_delete(pe->encod_filter_backup); + mbfl_memory_device_clear(&pe->outdev); + mbfl_memory_device_clear(&pe->tmpdev); + mbfl_free((void*)pe); + } +} + +int +mime_header_encoder_feed(int c, struct mime_header_encoder_data *pe) +{ + return (*pe->conv1_filter->filter_function)(c, pe->conv1_filter); +} + +mbfl_string * +mbfl_mime_header_encode( + mbfl_string *string, + mbfl_string *result, + enum mbfl_no_encoding outcode, + enum mbfl_no_encoding encoding, + const char *linefeed, + int indent) +{ + int n; + unsigned char *p; + struct mime_header_encoder_data *pe; + + mbfl_string_init(result); + result->no_language = string->no_language; + result->no_encoding = mbfl_no_encoding_ascii; + + pe = mime_header_encoder_new(string->no_encoding, outcode, encoding); + if (pe == NULL) { + return NULL; + } + + if (linefeed != NULL) { + n = 0; + while (*linefeed && n < 8) { + pe->lwsp[n++] = *linefeed++; + } + pe->lwsp[n++] = 0x20; + pe->lwsp[n] = '\0'; + pe->lwsplen = n; + } + if (indent > 0 && indent < 74) { + pe->firstindent = indent; + } + + n = string->len; + p = string->val; + while (n > 0) { + (*pe->conv1_filter->filter_function)(*p++, pe->conv1_filter); + n--; + } + + result = mime_header_encoder_result(pe, result); + mime_header_encoder_delete(pe); + + return result; +} + + +/* + * MIME header decode + */ +struct mime_header_decoder_data { + mbfl_convert_filter *deco_filter; + mbfl_convert_filter *conv1_filter; + mbfl_convert_filter *conv2_filter; + mbfl_memory_device outdev; + mbfl_memory_device tmpdev; + int cspos; + int status; + enum mbfl_no_encoding encoding; + enum mbfl_no_encoding incode; + enum mbfl_no_encoding outcode; +}; + +static int +mime_header_decoder_collector(int c, void* data) +{ + const mbfl_encoding *encoding; + struct mime_header_decoder_data *pd = (struct mime_header_decoder_data*)data; + + switch (pd->status) { + case 1: + if (c == 0x3f) { /* ? */ + mbfl_memory_device_output(c, &pd->tmpdev); + pd->cspos = pd->tmpdev.pos; + pd->status = 2; + } else { + mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev); + mbfl_memory_device_reset(&pd->tmpdev); + if (c == 0x3d) { /* = */ + mbfl_memory_device_output(c, &pd->tmpdev); + } else if (c == 0x0d || c == 0x0a) { /* CR or LF */ + pd->status = 9; + } else { + (*pd->conv1_filter->filter_function)(c, pd->conv1_filter); + pd->status = 0; + } + } + break; + case 2: /* store charset string */ + if (c == 0x3f) { /* ? */ + /* identify charset */ + mbfl_memory_device_output('\0', &pd->tmpdev); + encoding = mbfl_name2encoding((const char *)&pd->tmpdev.buffer[pd->cspos]); + if (encoding != NULL) { + pd->incode = encoding->no_encoding; + pd->status = 3; + } + mbfl_memory_device_unput(&pd->tmpdev); + mbfl_memory_device_output(c, &pd->tmpdev); + } else { + mbfl_memory_device_output(c, &pd->tmpdev); + if (pd->tmpdev.pos > 100) { /* too long charset string */ + pd->status = 0; + } else if (c == 0x0d || c == 0x0a) { /* CR or LF */ + mbfl_memory_device_unput(&pd->tmpdev); + pd->status = 9; + } + if (pd->status != 2) { + mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev); + mbfl_memory_device_reset(&pd->tmpdev); + } + } + break; + case 3: /* identify encoding */ + mbfl_memory_device_output(c, &pd->tmpdev); + if (c == 0x42 || c == 0x62) { /* 'B' or 'b' */ + pd->encoding = mbfl_no_encoding_base64; + pd->status = 4; + } else if (c == 0x51 || c == 0x71) { /* 'Q' or 'q' */ + pd->encoding = mbfl_no_encoding_qprint; + pd->status = 4; + } else { + if (c == 0x0d || c == 0x0a) { /* CR or LF */ + mbfl_memory_device_unput(&pd->tmpdev); + pd->status = 9; + } else { + pd->status = 0; + } + mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev); + mbfl_memory_device_reset(&pd->tmpdev); + } + break; + case 4: /* reset filter */ + mbfl_memory_device_output(c, &pd->tmpdev); + if (c == 0x3f) { /* ? */ + /* charset convert filter */ + mbfl_convert_filter_reset(pd->conv1_filter, pd->incode, mbfl_no_encoding_wchar); + /* decode filter */ + mbfl_convert_filter_reset(pd->deco_filter, pd->encoding, mbfl_no_encoding_8bit); + pd->status = 5; + } else { + if (c == 0x0d || c == 0x0a) { /* CR or LF */ + mbfl_memory_device_unput(&pd->tmpdev); + pd->status = 9; + } else { + pd->status = 0; + } + mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev); + } + mbfl_memory_device_reset(&pd->tmpdev); + break; + case 5: /* encoded block */ + if (c == 0x3f) { /* ? */ + pd->status = 6; + } else { + (*pd->deco_filter->filter_function)(c, pd->deco_filter); + } + break; + case 6: /* check end position */ + if (c == 0x3d) { /* = */ + /* flush and reset filter */ + (*pd->deco_filter->filter_flush)(pd->deco_filter); + (*pd->conv1_filter->filter_flush)(pd->conv1_filter); + mbfl_convert_filter_reset(pd->conv1_filter, mbfl_no_encoding_ascii, mbfl_no_encoding_wchar); + pd->status = 7; + } else { + (*pd->deco_filter->filter_function)(0x3f, pd->deco_filter); + if (c != 0x3f) { /* ? */ + (*pd->deco_filter->filter_function)(c, pd->deco_filter); + pd->status = 5; + } + } + break; + case 7: /* after encoded block */ + if (c == 0x0d || c == 0x0a) { /* CR LF */ + pd->status = 8; + } else { + mbfl_memory_device_output(c, &pd->tmpdev); + if (c == 0x3d) { /* = */ + pd->status = 1; + } else if (c != 0x20 && c != 0x09) { /* not space */ + mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev); + mbfl_memory_device_reset(&pd->tmpdev); + pd->status = 0; + } + } + break; + case 8: /* folding */ + case 9: /* folding */ + if (c != 0x0d && c != 0x0a && c != 0x20 && c != 0x09) { + if (c == 0x3d) { /* = */ + if (pd->status == 8) { + mbfl_memory_device_output(0x20, &pd->tmpdev); /* SPACE */ + } else { + (*pd->conv1_filter->filter_function)(0x20, pd->conv1_filter); + } + mbfl_memory_device_output(c, &pd->tmpdev); + pd->status = 1; + } else { + mbfl_memory_device_output(0x20, &pd->tmpdev); + mbfl_memory_device_output(c, &pd->tmpdev); + mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev); + mbfl_memory_device_reset(&pd->tmpdev); + pd->status = 0; + } + } + break; + default: /* non encoded block */ + if (c == 0x0d || c == 0x0a) { /* CR LF */ + pd->status = 9; + } else if (c == 0x3d) { /* = */ + mbfl_memory_device_output(c, &pd->tmpdev); + pd->status = 1; + } else { + (*pd->conv1_filter->filter_function)(c, pd->conv1_filter); + } + break; + } + + return c; +} + +mbfl_string * +mime_header_decoder_result(struct mime_header_decoder_data *pd, mbfl_string *result) +{ + switch (pd->status) { + case 1: + case 2: + case 3: + case 4: + case 7: + case 8: + case 9: + mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev); + break; + case 5: + case 6: + (*pd->deco_filter->filter_flush)(pd->deco_filter); + (*pd->conv1_filter->filter_flush)(pd->conv1_filter); + break; + } + (*pd->conv2_filter->filter_flush)(pd->conv2_filter); + mbfl_memory_device_reset(&pd->tmpdev); + pd->status = 0; + + return mbfl_memory_device_result(&pd->outdev, result); +} + +struct mime_header_decoder_data* +mime_header_decoder_new(enum mbfl_no_encoding outcode) +{ + struct mime_header_decoder_data *pd; + + pd = (struct mime_header_decoder_data*)mbfl_malloc(sizeof(struct mime_header_decoder_data)); + if (pd == NULL) { + return NULL; + } + + mbfl_memory_device_init(&pd->outdev, 0, 0); + mbfl_memory_device_init(&pd->tmpdev, 0, 0); + pd->cspos = 0; + pd->status = 0; + pd->encoding = mbfl_no_encoding_pass; + pd->incode = mbfl_no_encoding_ascii; + pd->outcode = outcode; + /* charset convert filter */ + pd->conv2_filter = mbfl_convert_filter_new(mbfl_no_encoding_wchar, pd->outcode, mbfl_memory_device_output, 0, &pd->outdev); + pd->conv1_filter = mbfl_convert_filter_new(pd->incode, mbfl_no_encoding_wchar, mbfl_filter_output_pipe, 0, pd->conv2_filter); + /* decode filter */ + pd->deco_filter = mbfl_convert_filter_new(pd->encoding, mbfl_no_encoding_8bit, mbfl_filter_output_pipe, 0, pd->conv1_filter); + + if (pd->conv1_filter == NULL || pd->conv2_filter == NULL || pd->deco_filter == NULL) { + mime_header_decoder_delete(pd); + return NULL; + } + + return pd; +} + +void +mime_header_decoder_delete(struct mime_header_decoder_data *pd) +{ + if (pd) { + mbfl_convert_filter_delete(pd->conv2_filter); + mbfl_convert_filter_delete(pd->conv1_filter); + mbfl_convert_filter_delete(pd->deco_filter); + mbfl_memory_device_clear(&pd->outdev); + mbfl_memory_device_clear(&pd->tmpdev); + mbfl_free((void*)pd); + } +} + +int +mime_header_decoder_feed(int c, struct mime_header_decoder_data *pd) +{ + return mime_header_decoder_collector(c, pd); +} + +mbfl_string * +mbfl_mime_header_decode( + mbfl_string *string, + mbfl_string *result, + enum mbfl_no_encoding outcode) +{ + int n; + unsigned char *p; + struct mime_header_decoder_data *pd; + + mbfl_string_init(result); + result->no_language = string->no_language; + result->no_encoding = outcode; + + pd = mime_header_decoder_new(outcode); + if (pd == NULL) { + return NULL; + } + + /* feed data */ + n = string->len; + p = string->val; + while (n > 0) { + mime_header_decoder_collector(*p++, pd); + n--; + } + + result = mime_header_decoder_result(pd, result); + mime_header_decoder_delete(pd); + + return result; +} + + + +/* + * convert HTML numeric entity + */ +struct collector_htmlnumericentity_data { + mbfl_convert_filter *decoder; + int status; + int cache; + int digit; + int *convmap; + int mapsize; +}; + +static int +collector_encode_htmlnumericentity(int c, void *data) +{ + struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)data; + int f, n, s, r, d, size, *mapelm; + + size = pc->mapsize; + f = 0; + n = 0; + while (n < size) { + mapelm = &(pc->convmap[n*4]); + if (c >= mapelm[0] && c <= mapelm[1]) { + s = (c + mapelm[2]) & mapelm[3]; + if (s >= 0) { + (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */ + (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */ + r = 100000000; + s %= r; + while (r > 0) { + d = s/r; + if (d || f) { + f = 1; + s %= r; + (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder); + } + r /= 10; + } + if (!f) { + f = 1; + (*pc->decoder->filter_function)(mbfl_hexchar_table[0], pc->decoder); + } + (*pc->decoder->filter_function)(0x3b, pc->decoder); /* ';' */ + } + } + if (f) { + break; + } + n++; + } + if (!f) { + (*pc->decoder->filter_function)(c, pc->decoder); + } + + return c; +} + +static int +collector_decode_htmlnumericentity(int c, void *data) +{ + struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)data; + int f, n, s, r, d, size, *mapelm; + + switch (pc->status) { + case 1: + if (c == 0x23) { /* '#' */ + pc->status = 2; + } else { + pc->status = 0; + (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */ + (*pc->decoder->filter_function)(c, pc->decoder); + } + break; + case 2: + if (c == 0x78) { /* 'x' */ + pc->status = 4; + } else if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */ + pc->cache = c - 0x30; + pc->status = 3; + pc->digit = 1; + } else { + pc->status = 0; + (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */ + (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */ + (*pc->decoder->filter_function)(c, pc->decoder); + } + break; + case 3: + s = 0; + f = 0; + if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */ + if (pc->digit > 9) { + pc->status = 0; + s = pc->cache; + f = 1; + } else { + s = pc->cache*10 + c - 0x30; + pc->cache = s; + pc->digit++; + } + } else { + pc->status = 0; + s = pc->cache; + f = 1; + n = 0; + size = pc->mapsize; + while (n < size) { + mapelm = &(pc->convmap[n*4]); + d = s - mapelm[2]; + if (d >= mapelm[0] && d <= mapelm[1]) { + f = 0; + (*pc->decoder->filter_function)(d, pc->decoder); + if (c != 0x3b) { /* ';' */ + (*pc->decoder->filter_function)(c, pc->decoder); + } + break; + } + n++; + } + } + if (f) { + (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */ + (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */ + r = 1; + n = pc->digit; + while (n > 0) { + r *= 10; + n--; + } + s %= r; + r /= 10; + while (r > 0) { + d = s/r; + s %= r; + r /= 10; + (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder); + } + (*pc->decoder->filter_function)(c, pc->decoder); + } + break; + case 4: + if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */ + pc->cache = c - 0x30; + pc->status = 5; + pc->digit = 1; + } else if (c >= 0x41 && c <= 0x46) { /* 'A' - 'F' */ + pc->cache = c - 0x41 + 10; + pc->status = 5; + pc->digit = 1; + } else if (c >= 0x61 && c <= 0x66) { /* 'a' - 'f' */ + pc->cache = c - 0x61 + 10; + pc->status = 5; + pc->digit = 1; + } else { + pc->status = 0; + (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */ + (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */ + (*pc->decoder->filter_function)(0x78, pc->decoder); /* 'x' */ + (*pc->decoder->filter_function)(c, pc->decoder); + } + break; + case 5: + s = 0; + f = 0; + if ((c >= 0x30 && c <= 0x39) || + (c >= 0x41 && c <= 0x46) || + (c >= 0x61 && c <= 0x66)) { /* '0' - '9' or 'a' - 'f' */ + if (pc->digit > 9) { + pc->status = 0; + s = pc->cache; + f = 1; + } else { + if (c >= 0x30 && c <= 0x39) { + s = pc->cache*16 + (c - 0x30); + } else if (c >= 0x41 && c <= 0x46) { + s = pc->cache*16 + (c - 0x41 + 10); + } else { + s = pc->cache*16 + (c - 0x61 + 10); + } + pc->cache = s; + pc->digit++; + } + } else { + pc->status = 0; + s = pc->cache; + f = 1; + n = 0; + size = pc->mapsize; + while (n < size) { + mapelm = &(pc->convmap[n*4]); + d = s - mapelm[2]; + if (d >= mapelm[0] && d <= mapelm[1]) { + f = 0; + (*pc->decoder->filter_function)(d, pc->decoder); + if (c != 0x3b) { /* ';' */ + (*pc->decoder->filter_function)(c, pc->decoder); + } + break; + } + n++; + } + } + if (f) { + (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */ + (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */ + (*pc->decoder->filter_function)(0x78, pc->decoder); /* 'x' */ + r = 1; + n = pc->digit; + while (n > 0) { + r *= 16; + n--; + } + s %= r; + r /= 16; + while (r > 0) { + d = s/r; + s %= r; + r /= 16; + (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder); + } + (*pc->decoder->filter_function)(c, pc->decoder); + } + break; + default: + if (c == 0x26) { /* '&' */ + pc->status = 1; + } else { + (*pc->decoder->filter_function)(c, pc->decoder); + } + break; + } + + return c; +} + +static int +collector_encode_hex_htmlnumericentity(int c, void *data) +{ + struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)data; + int f, n, s, r, d, size, *mapelm; + + size = pc->mapsize; + f = 0; + n = 0; + while (n < size) { + mapelm = &(pc->convmap[n*4]); + if (c >= mapelm[0] && c <= mapelm[1]) { + s = (c + mapelm[2]) & mapelm[3]; + if (s >= 0) { + (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */ + (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */ + (*pc->decoder->filter_function)(0x78, pc->decoder); /* 'x' */ + r = 0x1000000; + s %= r; + while (r > 0) { + d = s/r; + if (d || f) { + f = 1; + s %= r; + (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder); + } + r /= 16; + } + if (!f) { + f = 1; + (*pc->decoder->filter_function)(mbfl_hexchar_table[0], pc->decoder); + } + (*pc->decoder->filter_function)(0x3b, pc->decoder); /* ';' */ + } + } + if (f) { + break; + } + n++; + } + if (!f) { + (*pc->decoder->filter_function)(c, pc->decoder); + } + + return c; +} + +int mbfl_filt_decode_htmlnumericentity_flush(mbfl_convert_filter *filter) +{ + struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)filter; + int n, s, r, d; + + if (pc->status) { + switch (pc->status) { + case 1: /* '&' */ + (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */ + break; + case 2: /* '#' */ + (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */ + (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */ + break; + case 3: /* '0'-'9' */ + (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */ + (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */ + + s = pc->cache; + r = 1; + n = pc->digit; + while (n > 0) { + r *= 10; + n--; + } + s %= r; + r /= 10; + while (r > 0) { + d = s/r; + s %= r; + r /= 10; + (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder); + } + + break; + case 4: /* 'x' */ + (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */ + (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */ + (*pc->decoder->filter_function)(0x78, pc->decoder); /* 'x' */ + break; + case 5: /* '0'-'9','a'-'f' */ + (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */ + (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */ + (*pc->decoder->filter_function)(0x78, pc->decoder); /* 'x' */ + + s = pc->cache; + r = 1; + n = pc->digit; + while (n > 0) { + r *= 16; + n--; + } + s %= r; + r /= 16; + while (r > 0) { + d = s/r; + s %= r; + r /= 16; + (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder); + } + break; + default: + break; + } + } + + pc->status = 0; + pc->cache = 0; + pc->digit = 0; + + return 0; +} + + +mbfl_string * +mbfl_html_numeric_entity( + mbfl_string *string, + mbfl_string *result, + int *convmap, + int mapsize, + int type) +{ + struct collector_htmlnumericentity_data pc; + mbfl_memory_device device; + mbfl_convert_filter *encoder; + int n; + unsigned char *p; + + if (string == NULL || result == NULL) { + return NULL; + } + mbfl_string_init(result); + result->no_language = string->no_language; + result->no_encoding = string->no_encoding; + mbfl_memory_device_init(&device, string->len, 0); + + /* output code filter */ + pc.decoder = mbfl_convert_filter_new( + mbfl_no_encoding_wchar, + string->no_encoding, + mbfl_memory_device_output, 0, &device); + /* wchar filter */ + if (type == 0) { /* decimal output */ + encoder = mbfl_convert_filter_new( + string->no_encoding, + mbfl_no_encoding_wchar, + collector_encode_htmlnumericentity, 0, &pc); + } else if (type == 2) { /* hex output */ + encoder = mbfl_convert_filter_new( + string->no_encoding, + mbfl_no_encoding_wchar, + collector_encode_hex_htmlnumericentity, 0, &pc); + } else { /* type == 1: decimal/hex input */ + encoder = mbfl_convert_filter_new( + string->no_encoding, + mbfl_no_encoding_wchar, + collector_decode_htmlnumericentity, + (int (*)(void*))mbfl_filt_decode_htmlnumericentity_flush, &pc); + } + if (pc.decoder == NULL || encoder == NULL) { + mbfl_convert_filter_delete(encoder); + mbfl_convert_filter_delete(pc.decoder); + return NULL; + } + pc.status = 0; + pc.cache = 0; + pc.digit = 0; + pc.convmap = convmap; + pc.mapsize = mapsize; + + /* feed data */ + p = string->val; + n = string->len; + if (p != NULL) { + while (n > 0) { + if ((*encoder->filter_function)(*p++, encoder) < 0) { + break; + } + n--; + } + } + mbfl_convert_filter_flush(encoder); + mbfl_convert_filter_flush(pc.decoder); + result = mbfl_memory_device_result(&device, result); + mbfl_convert_filter_delete(encoder); + mbfl_convert_filter_delete(pc.decoder); + + return result; +} + +/* + * Local variables: + * tab-width: 4 + * c-basic-offset: 4 + * End: + */ diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter.h b/ext/mbstring/libmbfl/mbfl/mbfilter.h new file mode 100644 index 0000000..6108f93 --- /dev/null +++ b/ext/mbstring/libmbfl/mbfl/mbfilter.h @@ -0,0 +1,290 @@ +/* charset=UTF-8 + * vim: encoding=utf-8: + * */ + +/* + * "streamable kanji code filter and converter" + * + * Copyright (c) 1998,1999,2000,2001 HappySize, Inc. All rights reserved. + * + * This software is released under the GNU Lesser General Public License. + * (Version 2.1, February 1999) + * Please read the following detail of the licence (in japanese). + * + * ◆使用許諾条件◆ + * + * このソフトウェアは株式会社ハッピーサイズによって開発されました。株式会社ハッ + * ピーサイズは、著作権法および万国著作権条約の定めにより、このソフトウェアに関 + * するすべての権利を留保する権利を持ち、ここに行使します。株式会社ハッピーサイ + * ズは以下に明記した条件に従って、このソフトウェアを使用する排他的ではない権利 + * をお客様に許諾します。何人たりとも、以下の条件に反してこのソフトウェアを使用 + * することはできません。 + * + * このソフトウェアを「GNU Lesser General Public License (Version 2.1, February + * 1999)」に示された条件で使用することを、全ての方に許諾します。「GNU Lesser + * General Public License」を満たさない使用には、株式会社ハッピーサイズから書面 + * による許諾を得る必要があります。 + * + * 「GNU Lesser General Public License」の全文は以下のウェブページから取得でき + * ます。「GNU Lesser General Public License」とは、これまでLibrary General + * Public Licenseと呼ばれていたものです。 + * http://www.gnu.org/ --- GNUウェブサイト + * http://www.gnu.org/copyleft/lesser.html --- ライセンス文面 + * このライセンスの内容がわからない方、守れない方には使用を許諾しません。 + * + * しかしながら、当社とGNUプロジェクトとの特定の関係を示唆または主張するもので + * はありません。 + * + * ◆保証内容◆ + * + * このソフトウェアは、期待された動作・機能・性能を持つことを目標として設計され + * 開発されていますが、これを保証するものではありません。このソフトウェアは「こ + * のまま」の状態で提供されており、たとえばこのソフトウェアの有用性ないし特定の + * 目的に合致することといった、何らかの保証内容が、明示されたり暗黙に示されてい + * る場合であっても、その保証は無効です。このソフトウェアを使用した結果ないし使 + * 用しなかった結果によって、直接あるいは間接に受けた身体的な傷害、財産上の損害 + * 、データの損失あるいはその他の全ての損害については、その損害の可能性が使用者 + * 、当社あるいは第三者によって警告されていた場合であっても、当社はその損害の賠 + * 償および補填を行いません。この規定は他の全ての、書面上または書面に無い保証・ + * 契約・規定に優先します。 + * + * ◆著作権者の連絡先および使用条件についての問い合わせ先◆ + * + * 〒102-0073 + * 東京都千代田区九段北1-13-5日本地所第一ビル4F + * 株式会社ハッピーサイズ + * Phone: 03-3512-3655, Fax: 03-3512-3656 + * Email: sales@happysize.co.jp + * Web: http://happysize.com/ + * + * ◆著者◆ + * + * 金本 茂 <sgk@happysize.co.jp> + * + * ◆履歴◆ + * + * 1998/11/10 sgk implementation in C++ + * 1999/4/25 sgk Cで書きなおし。 + * 1999/4/26 sgk 入力フィルタを実装。漢字コードを推定しながらフィルタを追加。 + * 1999/6/?? Unicodeサポート。 + * 1999/6/22 sgk ライセンスをLGPLに変更。 + * + */ + +/* + * Unicode support + * + * Portions copyright (c) 1999,2000,2001 by the PHP3 internationalization team. + * All rights reserved. + * + */ + +/* + * + * streamable kanji code filter and converter + * mbfl : Multi Byte FiLter Liblary + * + */ + +#ifndef MBFL_MBFILTER_H +#define MBFL_MBFILTER_H + +#include "mbfl_defs.h" +#include "mbfl_consts.h" +#include "mbfl_allocators.h" +#include "mbfl_encoding.h" +#include "mbfl_language.h" +#include "mbfl_string.h" +#include "mbfl_convert.h" +#include "mbfl_ident.h" + +/* + * version information + */ +#define MBFL_VERSION_MAJOR 1 +#define MBFL_VERSION_MINOR 3 +#define MBFL_VERSION_TEENY 2 + +/* + * convert filter + */ +#define MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE 0 +#define MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR 1 +#define MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG 2 +#define MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY 3 + +/* + * buffering converter + */ +typedef struct _mbfl_buffer_converter mbfl_buffer_converter; + +struct _mbfl_buffer_converter { + mbfl_convert_filter *filter1; + mbfl_convert_filter *filter2; + mbfl_memory_device device; + const mbfl_encoding *from; + const mbfl_encoding *to; +}; + +MBFLAPI extern mbfl_buffer_converter * mbfl_buffer_converter_new(enum mbfl_no_encoding from, enum mbfl_no_encoding to, int buf_initsz); +MBFLAPI extern mbfl_buffer_converter * mbfl_buffer_converter_new2(const mbfl_encoding *from, const mbfl_encoding *to, int buf_initsz); +MBFLAPI extern void mbfl_buffer_converter_delete(mbfl_buffer_converter *convd); +MBFLAPI extern void mbfl_buffer_converter_reset(mbfl_buffer_converter *convd); +MBFLAPI extern int mbfl_buffer_converter_illegal_mode(mbfl_buffer_converter *convd, int mode); +MBFLAPI extern int mbfl_buffer_converter_illegal_substchar(mbfl_buffer_converter *convd, int substchar); +MBFLAPI extern int mbfl_buffer_converter_strncat(mbfl_buffer_converter *convd, const unsigned char *p, int n); +MBFLAPI extern int mbfl_buffer_converter_feed(mbfl_buffer_converter *convd, mbfl_string *string); +MBFLAPI extern int mbfl_buffer_converter_feed2(mbfl_buffer_converter *convd, mbfl_string *string, int *loc); +MBFLAPI extern int mbfl_buffer_converter_flush(mbfl_buffer_converter *convd); +MBFLAPI extern mbfl_string * mbfl_buffer_converter_getbuffer(mbfl_buffer_converter *convd, mbfl_string *result); +MBFLAPI extern mbfl_string * mbfl_buffer_converter_result(mbfl_buffer_converter *convd, mbfl_string *result); +MBFLAPI extern mbfl_string * mbfl_buffer_converter_feed_result(mbfl_buffer_converter *convd, mbfl_string *string, mbfl_string *result); +MBFLAPI extern int mbfl_buffer_illegalchars(mbfl_buffer_converter *convd); + +/* + * encoding detector + */ +typedef struct _mbfl_encoding_detector mbfl_encoding_detector; + +struct _mbfl_encoding_detector { + mbfl_identify_filter **filter_list; + int filter_list_size; + int strict; +}; + +MBFLAPI extern mbfl_encoding_detector * mbfl_encoding_detector_new(enum mbfl_no_encoding *elist, int elistsz, int strict); +MBFLAPI extern mbfl_encoding_detector * mbfl_encoding_detector_new2(const mbfl_encoding **elist, int elistsz, int strict); +MBFLAPI extern void mbfl_encoding_detector_delete(mbfl_encoding_detector *identd); +MBFLAPI extern int mbfl_encoding_detector_feed(mbfl_encoding_detector *identd, mbfl_string *string); +MBFLAPI extern enum mbfl_no_encoding mbfl_encoding_detector_judge(mbfl_encoding_detector *identd); +MBFLAPI extern const mbfl_encoding *mbfl_encoding_detector_judge2(mbfl_encoding_detector *identd); + + +/* + * encoding converter + */ +MBFLAPI extern mbfl_string * +mbfl_convert_encoding(mbfl_string *string, mbfl_string *result, enum mbfl_no_encoding toenc); + + +/* + * identify encoding + */ +MBFLAPI extern const mbfl_encoding * +mbfl_identify_encoding(mbfl_string *string, enum mbfl_no_encoding *elist, int elistsz, int strict); + +MBFLAPI extern const mbfl_encoding * +mbfl_identify_encoding2(mbfl_string *string, const mbfl_encoding **elist, int elistsz, int strict); +/* + * strlen + */ +MBFLAPI extern int +mbfl_strlen(mbfl_string *string); + +/* + * oddlen + */ +MBFLAPI extern int +mbfl_oddlen(mbfl_string *string); + +/* + * strpos + */ +MBFLAPI extern int +mbfl_strpos(mbfl_string *haystack, mbfl_string *needle, int offset, int reverse); + + +/* + * substr_count + */ +MBFLAPI extern int +mbfl_substr_count(mbfl_string *haystack, mbfl_string *needle); + +/* + * substr + */ +MBFLAPI extern mbfl_string * +mbfl_substr(mbfl_string *string, mbfl_string *result, int from, int length); + +/* + * strcut + */ +MBFLAPI extern mbfl_string * +mbfl_strcut(mbfl_string *string, mbfl_string *result, int from, int length); + +/* + * strwidth + */ +MBFLAPI extern int +mbfl_strwidth(mbfl_string *string); + +/* + * strimwidth + */ +MBFLAPI extern mbfl_string * +mbfl_strimwidth(mbfl_string *string, mbfl_string *marker, mbfl_string *result, int from, int width); + +/* + * MIME header encode + */ +struct mime_header_encoder_data; /* forward declaration */ + +MBFLAPI extern struct mime_header_encoder_data * +mime_header_encoder_new( + enum mbfl_no_encoding incode, + enum mbfl_no_encoding outcode, + enum mbfl_no_encoding encoding); + +MBFLAPI extern void +mime_header_encoder_delete(struct mime_header_encoder_data *pe); + +MBFLAPI extern int +mime_header_encoder_feed(int c, struct mime_header_encoder_data *pe); + +MBFLAPI extern mbfl_string * +mime_header_encoder_result(struct mime_header_encoder_data *pe, mbfl_string *result); + +MBFLAPI extern mbfl_string * +mbfl_mime_header_encode( + mbfl_string *string, mbfl_string *result, + enum mbfl_no_encoding outcode, + enum mbfl_no_encoding encoding, + const char *linefeed, + int indent); + +/* + * MIME header decode + */ +struct mime_header_decoder_data; /* forward declaration */ + +MBFLAPI extern struct mime_header_decoder_data * +mime_header_decoder_new(enum mbfl_no_encoding outcode); + +MBFLAPI extern void +mime_header_decoder_delete(struct mime_header_decoder_data *pd); + +MBFLAPI extern int +mime_header_decoder_feed(int c, struct mime_header_decoder_data *pd); + +MBFLAPI extern mbfl_string * +mime_header_decoder_result(struct mime_header_decoder_data *pd, mbfl_string *result); + +MBFLAPI extern mbfl_string * +mbfl_mime_header_decode( + mbfl_string *string, + mbfl_string *result, + enum mbfl_no_encoding outcode); + +/* + * convert HTML numeric entity + */ +MBFLAPI extern mbfl_string * +mbfl_html_numeric_entity(mbfl_string *string, mbfl_string *result, int *convmap, int mapsize, int type); + +/* + * convert of harfwidth and fullwidth for japanese + */ +MBFLAPI extern mbfl_string * +mbfl_ja_jp_hantozen(mbfl_string *string, mbfl_string *result, int mode); + +#endif /* MBFL_MBFILTER_H */ diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter_8bit.c b/ext/mbstring/libmbfl/mbfl/mbfilter_8bit.c new file mode 100644 index 0000000..9b19479 --- /dev/null +++ b/ext/mbstring/libmbfl/mbfl/mbfilter_8bit.c @@ -0,0 +1,50 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: + * + */ +/* + * The source code included in this files was separated from mbfilter.c + * by Moriyoshi Koizumi <moriyoshi@php.net> on 20 Dec 2002. The file + * mbfilter.c is included in this package . + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef HAVE_STDDEF_H +#include <stddef.h> +#endif + +#include "mbfilter.h" + +static const char *mbfl_encoding_8bit_aliases[] = {"binary", NULL}; + +const mbfl_encoding mbfl_encoding_8bit = { + mbfl_no_encoding_8bit, + "8bit", + "8bit", + (const char *(*)[])&mbfl_encoding_8bit_aliases, + NULL, + MBFL_ENCTYPE_SBCS +}; diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter_8bit.h b/ext/mbstring/libmbfl/mbfl/mbfilter_8bit.h new file mode 100644 index 0000000..a87c564 --- /dev/null +++ b/ext/mbstring/libmbfl/mbfl/mbfilter_8bit.h @@ -0,0 +1,39 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: + * + */ +/* + * The source code included in this files was separated from mbfilter.c + * by Moriyoshi Koizumi <moriyoshi@php.net> on 20 Dec 2002. The file + * mbfilter.c is included in this package . + * + */ + +#ifndef MBFL_MBFILTER_8BIT_H +#define MBFL_MBFILTER_8BIT_H + +#include "mbfl_defs.h" +#include "mbfilter.h" + +MBFLAPI extern const mbfl_encoding mbfl_encoding_8bit; + +#endif /* MBFL_MBFILTER_8BIT_H */ diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter_pass.c b/ext/mbstring/libmbfl/mbfl/mbfilter_pass.c new file mode 100644 index 0000000..b6da879 --- /dev/null +++ b/ext/mbstring/libmbfl/mbfl/mbfilter_pass.c @@ -0,0 +1,65 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: + * + */ +/* + * The source code included in this files was separated from mbfilter.c + * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002. + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef HAVE_STDDEF_H +#include <stddef.h> +#endif + +#include "mbfilter.h" +#include "mbfilter_pass.h" + +static const char *mbfl_encoding_pass_aliases[] = {"none", NULL}; + +const mbfl_encoding mbfl_encoding_pass = { + mbfl_no_encoding_pass, + "pass", + NULL, + (const char *(*)[])&mbfl_encoding_pass_aliases, + NULL, + 0 +}; + +const struct mbfl_convert_vtbl vtbl_pass = { + mbfl_no_encoding_pass, + mbfl_no_encoding_pass, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_pass, + mbfl_filt_conv_common_flush +}; + +int mbfl_filt_conv_pass(int c, mbfl_convert_filter *filter) +{ + return (*filter->output_function)(c, filter->data); +} + diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter_pass.h b/ext/mbstring/libmbfl/mbfl/mbfilter_pass.h new file mode 100644 index 0000000..49d169c --- /dev/null +++ b/ext/mbstring/libmbfl/mbfl/mbfilter_pass.h @@ -0,0 +1,41 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: + * + */ +/* + * The source code included in this files was separated from mbfilter.c + * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002. + * + */ + +#ifndef MBFL_MBFILTER_PASS_H +#define MBFL_MBFILTER_PASS_H + +#include "mbfl_defs.h" +#include "mbfilter.h" + +MBFLAPI extern const mbfl_encoding mbfl_encoding_pass; +MBFLAPI extern const struct mbfl_convert_vtbl vtbl_pass; + +MBFLAPI extern int mbfl_filt_conv_pass(int c, mbfl_convert_filter *filter); + +#endif /* MBFL_MBFILTER_PASS_H */ diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter_wchar.c b/ext/mbstring/libmbfl/mbfl/mbfilter_wchar.c new file mode 100644 index 0000000..50f7629 --- /dev/null +++ b/ext/mbstring/libmbfl/mbfl/mbfilter_wchar.c @@ -0,0 +1,48 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: + * + */ +/* + * The source code included in this files was separated from mbfilter.c + * by Moriyoshi Koizumi <moriyoshi@php.net> on 20 Dec 2002. The file + * mbfilter.c is included in this package . + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef HAVE_STDDEF_H +#include <stddef.h> +#endif + +#include "mbfilter.h" + +const mbfl_encoding mbfl_encoding_wchar = { + mbfl_no_encoding_wchar, + "wchar", + NULL, + NULL, + NULL, + MBFL_ENCTYPE_WCS4BE +}; diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter_wchar.h b/ext/mbstring/libmbfl/mbfl/mbfilter_wchar.h new file mode 100644 index 0000000..9e9396a --- /dev/null +++ b/ext/mbstring/libmbfl/mbfl/mbfilter_wchar.h @@ -0,0 +1,39 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: + * + */ +/* + * The source code included in this files was separated from mbfilter.c + * by Moriyoshi Koizumi <moriyoshi@php.net> on 20 Dec 2002. The file + * mbfilter.c is included in this package . + * + */ + +#ifndef MBFL_MBFILTER_WCHAR_H +#define MBFL_MBFILTER_WCHAR_H + +#include "mbfl_defs.h" +#include "mbfilter.h" + +MBFLAPI extern const mbfl_encoding mbfl_encoding_wchar; + +#endif /* MBFL_MBFILTER_WCHAR_H */ diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_allocators.c b/ext/mbstring/libmbfl/mbfl/mbfl_allocators.c new file mode 100644 index 0000000..be9d345 --- /dev/null +++ b/ext/mbstring/libmbfl/mbfl/mbfl_allocators.c @@ -0,0 +1,93 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: + * + */ +/* + * The source code included in this files was separated from mbfilter.c + * by Moriyoshi Koizumi <moriyoshi@php.net> on 20 Dec 2002. The file + * mbfilter.c is included in this package . + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +#ifdef HAVE_MEMORY_H +#include <memory.h> +#endif + +#ifdef HAVE_STRING_H +#include <string.h> +#endif + +#ifdef HAVE_STRINGS_H +#include <strings.h> +#endif + +#ifdef HAVE_STDDEF_H +#include <stddef.h> +#endif + +#include "mbfl_allocators.h" + +static void *__mbfl__malloc(unsigned int); +static void *__mbfl__realloc(void *, unsigned int); +static void *__mbfl__calloc(unsigned int, unsigned int); +static void __mbfl__free(void *); + +static mbfl_allocators default_allocators = { + __mbfl__malloc, + __mbfl__realloc, + __mbfl__calloc, + __mbfl__free, + __mbfl__malloc, + __mbfl__realloc, + __mbfl__free +}; + +mbfl_allocators *__mbfl_allocators = &default_allocators; + +static void *__mbfl__malloc(unsigned int sz) +{ + return malloc(sz); +} + +static void *__mbfl__realloc(void *ptr, unsigned int sz) +{ + return realloc(ptr, sz); +} + +static void *__mbfl__calloc(unsigned int nelems, unsigned int szelem) +{ + return calloc(nelems, szelem); +} + +static void __mbfl__free(void *ptr) +{ + free(ptr); +} + diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_allocators.h b/ext/mbstring/libmbfl/mbfl/mbfl_allocators.h new file mode 100644 index 0000000..a1539f0 --- /dev/null +++ b/ext/mbstring/libmbfl/mbfl/mbfl_allocators.h @@ -0,0 +1,56 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: + * + */ +/* + * The source code included in this files was separated from mbfilter.h + * by Moriyoshi Koizumi <moriyoshi@php.net> on 20 Dec 2002. The file + * mbfilter.h is included in this package . + * + */ + +#ifndef MBFL_ALLOCATORS_H +#define MBFL_ALLOCATORS_H + +#include "mbfl_defs.h" + +typedef struct _mbfl_allocators { + void *(*malloc)(unsigned int); + void *(*realloc)(void *, unsigned int); + void *(*calloc)(unsigned int, unsigned int); + void (*free)(void *); + void *(*pmalloc)(unsigned int); + void *(*prealloc)(void *, unsigned int); + void (*pfree)(void *); +} mbfl_allocators; + +MBFLAPI extern mbfl_allocators *__mbfl_allocators; + +#define mbfl_malloc (__mbfl_allocators->malloc) +#define mbfl_realloc (__mbfl_allocators->realloc) +#define mbfl_calloc (__mbfl_allocators->calloc) +#define mbfl_free (__mbfl_allocators->free) +#define mbfl_pmalloc (__mbfl_allocators->pmalloc) +#define mbfl_prealloc (__mbfl_allocators->preallloc) +#define mbfl_pfree (__mbfl_allocators->pfree) + +#endif /* MBFL_ALLOCATORS_H */ diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_consts.h b/ext/mbstring/libmbfl/mbfl/mbfl_consts.h new file mode 100644 index 0000000..6a630c8 --- /dev/null +++ b/ext/mbstring/libmbfl/mbfl/mbfl_consts.h @@ -0,0 +1,94 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: + * + */ +/* + * The source code included in this files was separated from mbfilter.h + * by Moriyoshi Koizumi <moriyoshi@php.net> on 20 Dec 2002. The file + * mbfilter.h is included in this package . + * + */ + +#ifndef MBFL_CONSTS_H +#define MBFL_CONSTS_H + +#define MBFL_ENCTYPE_SBCS 0x00000001 +#define MBFL_ENCTYPE_MBCS 0x00000002 +#define MBFL_ENCTYPE_WCS2BE 0x00000010 +#define MBFL_ENCTYPE_WCS2LE 0x00000020 +#define MBFL_ENCTYPE_MWC2BE 0x00000040 +#define MBFL_ENCTYPE_MWC2LE 0x00000080 +#define MBFL_ENCTYPE_WCS4BE 0x00000100 +#define MBFL_ENCTYPE_WCS4LE 0x00000200 +#define MBFL_ENCTYPE_MWC4BE 0x00000400 +#define MBFL_ENCTYPE_MWC4LE 0x00000800 +#define MBFL_ENCTYPE_SHFTCODE 0x00001000 +#define MBFL_ENCTYPE_ENC_STRM 0x00002000 +#define MBFL_ENCTYPE_GL_UNSAFE 0x00004000 + +/* wchar plane, special charactor */ +#define MBFL_WCSPLANE_MASK 0xffff +#define MBFL_WCSPLANE_UCS2MAX 0x00010000 +#define MBFL_WCSPLANE_UTF32MAX 0x00110000 +#define MBFL_WCSPLANE_SUPMIN 0x00010000 +#define MBFL_WCSPLANE_SUPMAX 0x00200000 +#define MBFL_WCSPLANE_JIS0213 0x70e00000 /* JIS HEX : 2121h - 7E7Eh */ +#define MBFL_WCSPLANE_JIS0208 0x70e10000 /* JIS HEX : 2121h - 7E7Eh */ +#define MBFL_WCSPLANE_JIS0212 0x70e20000 /* JIS HEX : 2121h - 7E7Eh */ +#define MBFL_WCSPLANE_WINCP932 0x70e30000 /* JIS HEX : 2121h - 9898h */ +#define MBFL_WCSPLANE_8859_1 0x70e40000 /* 00h - FFh */ +#define MBFL_WCSPLANE_8859_2 0x70e50000 /* 00h - FFh */ +#define MBFL_WCSPLANE_8859_3 0x70e60000 /* 00h - FFh */ +#define MBFL_WCSPLANE_8859_4 0x70e70000 /* 00h - FFh */ +#define MBFL_WCSPLANE_8859_5 0x70e80000 /* 00h - FFh */ +#define MBFL_WCSPLANE_8859_6 0x70e90000 /* 00h - FFh */ +#define MBFL_WCSPLANE_8859_7 0x70ea0000 /* 00h - FFh */ +#define MBFL_WCSPLANE_8859_8 0x70eb0000 /* 00h - FFh */ +#define MBFL_WCSPLANE_8859_9 0x70ec0000 /* 00h - FFh */ +#define MBFL_WCSPLANE_8859_10 0x70ed0000 /* 00h - FFh */ +#define MBFL_WCSPLANE_8859_13 0x70ee0000 /* 00h - FFh */ +#define MBFL_WCSPLANE_8859_14 0x70ef0000 /* 00h - FFh */ +#define MBFL_WCSPLANE_8859_15 0x70f00000 /* 00h - FFh */ +#define MBFL_WCSPLANE_KSC5601 0x70f10000 /* 2121h - 7E7Eh */ +#define MBFL_WCSPLANE_GB2312 0x70f20000 /* 2121h - 7E7Eh */ +#define MBFL_WCSPLANE_WINCP936 0x70f30000 /* 2121h - 9898h */ +#define MBFL_WCSPLANE_BIG5 0x70f40000 /* 2121h - 9898h */ +#define MBFL_WCSPLANE_CNS11643 0x70f50000 /* 2121h - 9898h */ +#define MBFL_WCSPLANE_UHC 0x70f60000 /* 8141h - fefeh */ +#define MBFL_WCSPLANE_CP1251 0x70f70000 +#define MBFL_WCSPLANE_CP866 0x70f80000 +#define MBFL_WCSPLANE_KOI8R 0x70f90000 +#define MBFL_WCSPLANE_8859_16 0x70fa0000 /* 00h - FFh */ +#define MBFL_WCSPLANE_ARMSCII8 0x70fb0000 +#define MBFL_WCSPLANE_KOI8U 0x70fc0000 +#define MBFL_WCSPLANE_CP1254 0x70fd0000 /* 00h - FFh */ +#define MBFL_WCSPLANE_CP850 0x70fe0000 /* 00h - FFh */ +#define MBFL_WCSPLANE_GB18030 0x70ff0000 /* a1a1h-e3329a35h */ +#define MBFL_WCSGROUP_MASK 0xffffff +#define MBFL_WCSGROUP_UCS4MAX 0x70000000 +#define MBFL_WCSGROUP_WCHARMAX 0x78000000 +#define MBFL_WCSGROUP_THROUGH 0x78000000 /* 000000h - FFFFFFh */ + +#define MBFL_QPRINT_STS_MIME_HEADER 0x1000000 +#define MBFL_BASE64_STS_MIME_HEADER 0x1000000 + +#endif /* MBFL_CONSTS_H */ diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_convert.c b/ext/mbstring/libmbfl/mbfl/mbfl_convert.c new file mode 100644 index 0000000..ae8deb2 --- /dev/null +++ b/ext/mbstring/libmbfl/mbfl/mbfl_convert.c @@ -0,0 +1,627 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: + * + */ +/* + * The source code included in this files was separated from mbfilter.c + * by Moriyoshi Koizumi <moriyoshi@php.net> on 20 Dec 2002. The file + * mbfilter.c is included in this package . + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef HAVE_STDDEF_H +#include <stddef.h> +#endif + +#include "mbfl_encoding.h" +#include "mbfl_allocators.h" +#include "mbfl_filter_output.h" +#include "mbfilter_pass.h" +#include "mbfilter_8bit.h" +#include "mbfilter_wchar.h" + +#include "filters/mbfilter_euc_cn.h" +#include "filters/mbfilter_hz.h" +#include "filters/mbfilter_euc_tw.h" +#include "filters/mbfilter_big5.h" +#include "filters/mbfilter_uhc.h" +#include "filters/mbfilter_euc_kr.h" +#include "filters/mbfilter_iso2022_kr.h" +#include "filters/mbfilter_sjis.h" +#include "filters/mbfilter_sjis_open.h" +#include "filters/mbfilter_sjis_2004.h" +#include "filters/mbfilter_sjis_mobile.h" +#include "filters/mbfilter_sjis_mac.h" +#include "filters/mbfilter_cp51932.h" +#include "filters/mbfilter_jis.h" +#include "filters/mbfilter_iso2022_jp_ms.h" +#include "filters/mbfilter_iso2022jp_2004.h" +#include "filters/mbfilter_iso2022jp_mobile.h" +#include "filters/mbfilter_euc_jp.h" +#include "filters/mbfilter_euc_jp_2004.h" +#include "filters/mbfilter_euc_jp_win.h" +#include "filters/mbfilter_gb18030.h" +#include "filters/mbfilter_ascii.h" +#include "filters/mbfilter_koi8r.h" +#include "filters/mbfilter_koi8u.h" +#include "filters/mbfilter_cp866.h" +#include "filters/mbfilter_cp932.h" +#include "filters/mbfilter_cp936.h" +#include "filters/mbfilter_cp1251.h" +#include "filters/mbfilter_cp1252.h" +#include "filters/mbfilter_cp1254.h" +#include "filters/mbfilter_cp5022x.h" +#include "filters/mbfilter_iso8859_1.h" +#include "filters/mbfilter_iso8859_2.h" +#include "filters/mbfilter_iso8859_3.h" +#include "filters/mbfilter_iso8859_4.h" +#include "filters/mbfilter_iso8859_5.h" +#include "filters/mbfilter_iso8859_6.h" +#include "filters/mbfilter_iso8859_7.h" +#include "filters/mbfilter_iso8859_8.h" +#include "filters/mbfilter_iso8859_9.h" +#include "filters/mbfilter_iso8859_10.h" +#include "filters/mbfilter_iso8859_13.h" +#include "filters/mbfilter_iso8859_14.h" +#include "filters/mbfilter_iso8859_15.h" +#include "filters/mbfilter_base64.h" +#include "filters/mbfilter_qprint.h" +#include "filters/mbfilter_uuencode.h" +#include "filters/mbfilter_7bit.h" +#include "filters/mbfilter_utf7.h" +#include "filters/mbfilter_utf7imap.h" +#include "filters/mbfilter_utf8.h" +#include "filters/mbfilter_utf8_mobile.h" +#include "filters/mbfilter_utf16.h" +#include "filters/mbfilter_utf32.h" +#include "filters/mbfilter_byte2.h" +#include "filters/mbfilter_byte4.h" +#include "filters/mbfilter_ucs4.h" +#include "filters/mbfilter_ucs2.h" +#include "filters/mbfilter_htmlent.h" +#include "filters/mbfilter_armscii8.h" +#include "filters/mbfilter_cp850.h" + +/* hex character table "0123456789ABCDEF" */ +static char mbfl_hexchar_table[] = { + 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x41,0x42,0x43,0x44,0x45,0x46 +}; + +const struct mbfl_convert_vtbl *mbfl_convert_filter_list[] = { + &vtbl_utf8_wchar, + &vtbl_wchar_utf8, + &vtbl_eucjp_wchar, + &vtbl_wchar_eucjp, + &vtbl_sjis_wchar, + &vtbl_wchar_sjis, + &vtbl_sjis_open_wchar, + &vtbl_wchar_sjis_open, + &vtbl_sjis2004_wchar, + &vtbl_wchar_sjis2004, + &vtbl_cp51932_wchar, + &vtbl_wchar_cp51932, + &vtbl_jis_wchar, + &vtbl_wchar_jis, + &vtbl_jis_ms_wchar, + &vtbl_wchar_jis_ms, + &vtbl_2022jp_wchar, + &vtbl_wchar_2022jp, + &vtbl_2022jpms_wchar, + &vtbl_wchar_2022jpms, + &vtbl_2022jp_2004_wchar, + &vtbl_wchar_2022jp_2004, + &vtbl_2022jp_kddi_wchar, + &vtbl_wchar_2022jp_kddi, + &vtbl_eucjpwin_wchar, + &vtbl_wchar_eucjpwin, + &vtbl_eucjp2004_wchar, + &vtbl_wchar_eucjp2004, + &vtbl_cp932_wchar, + &vtbl_wchar_cp932, + &vtbl_sjis_docomo_wchar, + &vtbl_wchar_sjis_docomo, + &vtbl_sjis_kddi_wchar, + &vtbl_wchar_sjis_kddi, + &vtbl_sjis_sb_wchar, + &vtbl_wchar_sjis_sb, + &vtbl_sjis_mac_wchar, + &vtbl_wchar_sjis_mac, + &vtbl_utf8_docomo_wchar, + &vtbl_wchar_utf8_docomo, + &vtbl_utf8_kddi_a_wchar, + &vtbl_wchar_utf8_kddi_a, + &vtbl_utf8_kddi_b_wchar, + &vtbl_wchar_utf8_kddi_b, + &vtbl_utf8_sb_wchar, + &vtbl_wchar_utf8_sb, + &vtbl_euccn_wchar, + &vtbl_wchar_euccn, + &vtbl_cp936_wchar, + &vtbl_wchar_cp936, + &vtbl_gb18030_wchar, + &vtbl_wchar_gb18030, + &vtbl_hz_wchar, + &vtbl_wchar_hz, + &vtbl_euctw_wchar, + &vtbl_wchar_euctw, + &vtbl_big5_wchar, + &vtbl_wchar_big5, + &vtbl_cp950_wchar, + &vtbl_wchar_cp950, + &vtbl_euckr_wchar, + &vtbl_wchar_euckr, + &vtbl_uhc_wchar, + &vtbl_wchar_uhc, + &vtbl_2022kr_wchar, + &vtbl_wchar_2022kr, + &vtbl_cp1251_wchar, + &vtbl_wchar_cp1251, + &vtbl_cp866_wchar, + &vtbl_wchar_cp866, + &vtbl_koi8r_wchar, + &vtbl_wchar_koi8r, + &vtbl_koi8u_wchar, + &vtbl_wchar_koi8u, + &vtbl_cp1252_wchar, + &vtbl_wchar_cp1252, + &vtbl_cp1254_wchar, + &vtbl_wchar_cp1254, + &vtbl_cp50220_wchar, + &vtbl_wchar_cp50220, + &vtbl_cp50220raw_wchar, + &vtbl_wchar_cp50220raw, + &vtbl_cp50221_wchar, + &vtbl_wchar_cp50221, + &vtbl_cp50222_wchar, + &vtbl_wchar_cp50222, + &vtbl_ascii_wchar, + &vtbl_wchar_ascii, + &vtbl_8859_1_wchar, + &vtbl_wchar_8859_1, + &vtbl_8859_2_wchar, + &vtbl_wchar_8859_2, + &vtbl_8859_3_wchar, + &vtbl_wchar_8859_3, + &vtbl_8859_4_wchar, + &vtbl_wchar_8859_4, + &vtbl_8859_5_wchar, + &vtbl_wchar_8859_5, + &vtbl_8859_6_wchar, + &vtbl_wchar_8859_6, + &vtbl_8859_7_wchar, + &vtbl_wchar_8859_7, + &vtbl_8859_8_wchar, + &vtbl_wchar_8859_8, + &vtbl_8859_9_wchar, + &vtbl_wchar_8859_9, + &vtbl_8859_10_wchar, + &vtbl_wchar_8859_10, + &vtbl_8859_13_wchar, + &vtbl_wchar_8859_13, + &vtbl_8859_14_wchar, + &vtbl_wchar_8859_14, + &vtbl_8859_15_wchar, + &vtbl_wchar_8859_15, + &vtbl_8bit_b64, + &vtbl_b64_8bit, + &vtbl_uuencode_8bit, + &vtbl_wchar_html, + &vtbl_html_wchar, + &vtbl_8bit_qprint, + &vtbl_qprint_8bit, + &vtbl_8bit_7bit, + &vtbl_7bit_8bit, + &vtbl_utf7_wchar, + &vtbl_wchar_utf7, + &vtbl_utf7imap_wchar, + &vtbl_wchar_utf7imap, + &vtbl_utf16_wchar, + &vtbl_wchar_utf16, + &vtbl_utf16be_wchar, + &vtbl_wchar_utf16be, + &vtbl_utf16le_wchar, + &vtbl_wchar_utf16le, + &vtbl_utf32_wchar, + &vtbl_wchar_utf32, + &vtbl_utf32be_wchar, + &vtbl_wchar_utf32be, + &vtbl_utf32le_wchar, + &vtbl_wchar_utf32le, + &vtbl_ucs4_wchar, + &vtbl_wchar_ucs4, + &vtbl_ucs4be_wchar, + &vtbl_wchar_ucs4be, + &vtbl_ucs4le_wchar, + &vtbl_wchar_ucs4le, + &vtbl_ucs2_wchar, + &vtbl_wchar_ucs2, + &vtbl_ucs2be_wchar, + &vtbl_wchar_ucs2be, + &vtbl_ucs2le_wchar, + &vtbl_wchar_ucs2le, + &vtbl_byte4be_wchar, + &vtbl_wchar_byte4be, + &vtbl_byte4le_wchar, + &vtbl_wchar_byte4le, + &vtbl_byte2be_wchar, + &vtbl_wchar_byte2be, + &vtbl_byte2le_wchar, + &vtbl_wchar_byte2le, + &vtbl_armscii8_wchar, + &vtbl_wchar_armscii8, + &vtbl_cp850_wchar, + &vtbl_wchar_cp850, + &vtbl_pass, + NULL +}; + +static int +mbfl_convert_filter_common_init( + mbfl_convert_filter *filter, + enum mbfl_no_encoding from, + enum mbfl_no_encoding to, + const struct mbfl_convert_vtbl *vtbl, + int (*output_function)(int, void* ), + int (*flush_function)(void*), + void* data) +{ + /* encoding structure */ + if ((filter->from = mbfl_no2encoding(from)) == NULL) { + return 1; + } + + if ((filter->to = mbfl_no2encoding(to)) == NULL) { + return 1; + } + + if (output_function != NULL) { + filter->output_function = output_function; + } else { + filter->output_function = mbfl_filter_output_null; + } + + filter->flush_function = flush_function; + filter->data = data; + filter->illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR; + filter->illegal_substchar = 0x3f; /* '?' */ + filter->num_illegalchar = 0; + filter->filter_ctor = vtbl->filter_ctor; + filter->filter_dtor = vtbl->filter_dtor; + filter->filter_function = vtbl->filter_function; + filter->filter_flush = vtbl->filter_flush; + filter->filter_copy = vtbl->filter_copy; + + (*filter->filter_ctor)(filter); + + return 0; +} + + +mbfl_convert_filter * +mbfl_convert_filter_new( + enum mbfl_no_encoding from, + enum mbfl_no_encoding to, + int (*output_function)(int, void* ), + int (*flush_function)(void*), + void* data) +{ + mbfl_convert_filter * filter; + const struct mbfl_convert_vtbl *vtbl; + + vtbl = mbfl_convert_filter_get_vtbl(from, to); + + if (vtbl == NULL) { + vtbl = &vtbl_pass; + } + + /* allocate */ + filter = (mbfl_convert_filter *)mbfl_malloc(sizeof(mbfl_convert_filter)); + if (filter == NULL) { + return NULL; + } + + if (mbfl_convert_filter_common_init(filter, from, to, vtbl, + output_function, flush_function, data)) { + mbfl_free(filter); + return NULL; + } + + return filter; +} + +mbfl_convert_filter * +mbfl_convert_filter_new2( + const struct mbfl_convert_vtbl *vtbl, + int (*output_function)(int, void* ), + int (*flush_function)(void*), + void* data) +{ + mbfl_convert_filter * filter; + + if (vtbl == NULL) { + vtbl = &vtbl_pass; + } + + /* allocate */ + filter = (mbfl_convert_filter *)mbfl_malloc(sizeof(mbfl_convert_filter)); + if (filter == NULL) { + return NULL; + } + + if (mbfl_convert_filter_common_init(filter, vtbl->from, vtbl->to, vtbl, + output_function, flush_function, data)) { + mbfl_free(filter); + return NULL; + } + + return filter; +} + +void +mbfl_convert_filter_delete(mbfl_convert_filter *filter) +{ + if (filter) { + (*filter->filter_dtor)(filter); + mbfl_free((void*)filter); + } +} + +int +mbfl_convert_filter_feed(int c, mbfl_convert_filter *filter) +{ + return (*filter->filter_function)(c, filter); +} + +int +mbfl_convert_filter_flush(mbfl_convert_filter *filter) +{ + (*filter->filter_flush)(filter); + return (filter->flush_function ? (*filter->flush_function)(filter->data) : 0); +} + +void mbfl_convert_filter_reset(mbfl_convert_filter *filter, + enum mbfl_no_encoding from, enum mbfl_no_encoding to) +{ + const struct mbfl_convert_vtbl *vtbl; + + /* destruct old filter */ + (*filter->filter_dtor)(filter); + + vtbl = mbfl_convert_filter_get_vtbl(from, to); + + if (vtbl == NULL) { + vtbl = &vtbl_pass; + } + + mbfl_convert_filter_common_init(filter, from, to, vtbl, + filter->output_function, filter->flush_function, filter->data); +} + +void +mbfl_convert_filter_copy( + mbfl_convert_filter *src, + mbfl_convert_filter *dest) +{ + if (src->filter_copy != NULL) { + src->filter_copy(src, dest); + return; + } + + *dest = *src; +} + +int mbfl_convert_filter_devcat(mbfl_convert_filter *filter, mbfl_memory_device *src) +{ + int n; + unsigned char *p; + + p = src->buffer; + n = src->pos; + while (n > 0) { + if ((*filter->filter_function)(*p++, filter) < 0) { + return -1; + } + n--; + } + + return n; +} + +int mbfl_convert_filter_strcat(mbfl_convert_filter *filter, const unsigned char *p) +{ + int c; + + while ((c = *p++) != '\0') { + if ((*filter->filter_function)(c, filter) < 0) { + return -1; + } + } + + return 0; +} + +/* illegal character output function for conv-filter */ +int +mbfl_filt_conv_illegal_output(int c, mbfl_convert_filter *filter) +{ + int mode_backup, ret, n, m, r; + + ret = 0; + mode_backup = filter->illegal_mode; + filter->illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE; + switch (mode_backup) { + case MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR: + ret = (*filter->filter_function)(filter->illegal_substchar, filter); + break; + case MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG: + if (c >= 0) { + if (c < MBFL_WCSGROUP_UCS4MAX) { /* unicode */ + ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"U+"); + } else { + if (c < MBFL_WCSGROUP_WCHARMAX) { + m = c & ~MBFL_WCSPLANE_MASK; + switch (m) { + case MBFL_WCSPLANE_JIS0208: + ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"JIS+"); + break; + case MBFL_WCSPLANE_JIS0212: + ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"JIS2+"); + break; + case MBFL_WCSPLANE_JIS0213: + ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"JIS3+"); + break; + case MBFL_WCSPLANE_WINCP932: + ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"W932+"); + break; + case MBFL_WCSPLANE_GB18030: + ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"GB+"); + break; + case MBFL_WCSPLANE_8859_1: + ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"I8859_1+"); + break; + default: + ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"?+"); + break; + } + c &= MBFL_WCSPLANE_MASK; + } else { + ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"BAD+"); + c &= MBFL_WCSGROUP_MASK; + } + } + if (ret >= 0) { + m = 0; + r = 28; + while (r >= 0) { + n = (c >> r) & 0xf; + if (n || m) { + m = 1; + ret = (*filter->filter_function)(mbfl_hexchar_table[n], filter); + if (ret < 0) { + break; + } + } + r -= 4; + } + if (m == 0 && ret >= 0) { + ret = (*filter->filter_function)(mbfl_hexchar_table[0], filter); + } + } + } + break; + case MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY: + if (c >= 0) { + if (c < MBFL_WCSGROUP_UCS4MAX) { /* unicode */ + ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"&#x"); + if (ret < 0) + break; + + m = 0; + r = 28; + while (r >= 0) { + n = (c >> r) & 0xf; + if (n || m) { + m = 1; + ret = (*filter->filter_function)(mbfl_hexchar_table[n], filter); + if (ret < 0) { + break; + } + } + r -= 4; + } + if (ret < 0) { + break; + } + if (m == 0) { + ret = (*filter->filter_function)(mbfl_hexchar_table[0], filter); + } + ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)";"); + } else { + ret = (*filter->filter_function)(filter->illegal_substchar, filter); + } + } + break; + default: + break; + } + filter->illegal_mode = mode_backup; + filter->num_illegalchar++; + + return ret; +} + +const struct mbfl_convert_vtbl * mbfl_convert_filter_get_vtbl(enum mbfl_no_encoding from, enum mbfl_no_encoding to) +{ + const struct mbfl_convert_vtbl *vtbl; + int i; + + if (to == mbfl_no_encoding_base64 || + to == mbfl_no_encoding_qprint || + to == mbfl_no_encoding_7bit) { + from = mbfl_no_encoding_8bit; + } else if (from == mbfl_no_encoding_base64 || + from == mbfl_no_encoding_qprint || + from == mbfl_no_encoding_uuencode) { + to = mbfl_no_encoding_8bit; + } + + i = 0; + while ((vtbl = mbfl_convert_filter_list[i++]) != NULL){ + if (vtbl->from == from && vtbl->to == to) { + return vtbl; + } + } + + return NULL; +} + +/* + * commonly used constructor and destructor + */ +void mbfl_filt_conv_common_ctor(mbfl_convert_filter *filter) +{ + filter->status = 0; + filter->cache = 0; +} + +int mbfl_filt_conv_common_flush(mbfl_convert_filter *filter) +{ + filter->status = 0; + filter->cache = 0; + + if (filter->flush_function != NULL) { + (*filter->flush_function)(filter->data); + } + return 0; +} + +void mbfl_filt_conv_common_dtor(mbfl_convert_filter *filter) +{ + filter->status = 0; + filter->cache = 0; +} + + diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_convert.h b/ext/mbstring/libmbfl/mbfl/mbfl_convert.h new file mode 100644 index 0000000..8b5ba5b --- /dev/null +++ b/ext/mbstring/libmbfl/mbfl/mbfl_convert.h @@ -0,0 +1,97 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: + * + */ +/* + * The source code included in this files was separated from mbfilter.h + * by Moriyoshi Koizumi <moriyoshi@php.net> on 20 Dec 2002. The file + * mbfilter.h is included in this package . + * + */ + +#ifndef MBFL_CONVERT_H +#define MBFL_CONVERT_H + +#include "mbfl_defs.h" +#include "mbfl_encoding.h" +#include "mbfl_memory_device.h" + +typedef struct _mbfl_convert_filter mbfl_convert_filter; + +struct _mbfl_convert_filter { + void (*filter_ctor)(mbfl_convert_filter *filter); + void (*filter_dtor)(mbfl_convert_filter *filter); + void (*filter_copy)(mbfl_convert_filter *src, mbfl_convert_filter *dest); + int (*filter_function)(int c, mbfl_convert_filter *filter); + int (*filter_flush)(mbfl_convert_filter *filter); + int (*output_function)(int c, void *data); + int (*flush_function)(void *data); + void *data; + int status; + int cache; + const mbfl_encoding *from; + const mbfl_encoding *to; + int illegal_mode; + int illegal_substchar; + int num_illegalchar; + void *opaque; +}; + +struct mbfl_convert_vtbl { + enum mbfl_no_encoding from; + enum mbfl_no_encoding to; + void (*filter_ctor)(mbfl_convert_filter *filter); + void (*filter_dtor)(mbfl_convert_filter *filter); + int (*filter_function)(int c, mbfl_convert_filter *filter); + int (*filter_flush)(mbfl_convert_filter *filter); + void (*filter_copy)(mbfl_convert_filter *src, mbfl_convert_filter *dest); +}; + +MBFLAPI extern const struct mbfl_convert_vtbl *mbfl_convert_filter_list[]; + +MBFLAPI extern mbfl_convert_filter *mbfl_convert_filter_new( + enum mbfl_no_encoding from, + enum mbfl_no_encoding to, + int (*output_function)(int, void *), + int (*flush_function)(void *), + void *data ); +MBFLAPI extern mbfl_convert_filter *mbfl_convert_filter_new2( + const struct mbfl_convert_vtbl *vtbl, + int (*output_function)(int, void *), + int (*flush_function)(void *), + void *data ); +MBFLAPI extern void mbfl_convert_filter_delete(mbfl_convert_filter *filter); +MBFLAPI extern int mbfl_convert_filter_feed(int c, mbfl_convert_filter *filter); +MBFLAPI extern int mbfl_convert_filter_flush(mbfl_convert_filter *filter); +MBFLAPI extern void mbfl_convert_filter_reset(mbfl_convert_filter *filter, enum mbfl_no_encoding from, enum mbfl_no_encoding to); +MBFLAPI extern void mbfl_convert_filter_copy(mbfl_convert_filter *src, mbfl_convert_filter *dist); +MBFLAPI extern int mbfl_filt_conv_illegal_output(int c, mbfl_convert_filter *filter); +MBFLAPI extern const struct mbfl_convert_vtbl * mbfl_convert_filter_get_vtbl(enum mbfl_no_encoding from, enum mbfl_no_encoding to); + +MBFLAPI extern void mbfl_filt_conv_common_ctor(mbfl_convert_filter *filter); +MBFLAPI extern int mbfl_filt_conv_common_flush(mbfl_convert_filter *filter); +MBFLAPI extern void mbfl_filt_conv_common_dtor(mbfl_convert_filter *filter); + +MBFLAPI extern int mbfl_convert_filter_devcat(mbfl_convert_filter *filter, mbfl_memory_device *src); +MBFLAPI extern int mbfl_convert_filter_strcat(mbfl_convert_filter *filter, const unsigned char *p); + +#endif /* MBFL_CONVERT_H */ diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_defs.h b/ext/mbstring/libmbfl/mbfl/mbfl_defs.h new file mode 100644 index 0000000..fcfac2b --- /dev/null +++ b/ext/mbstring/libmbfl/mbfl/mbfl_defs.h @@ -0,0 +1,56 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: + * + */ +/* + * The source code included in this files was separated from mbfilter.h + * by Moriyoshi Koizumi <moriyoshi@php.net> on 20 Dec 2002. The file + * mbfilter.h is included in this package . + * + */ + +#ifndef MBFL_DEFS_H +#define MBFL_DEFS_H + +#ifndef NULL +#ifdef __cplusplus +#define NULL (0L) +#else +#define NULL (void *)(0L) +#endif +#endif + +#ifdef WIN32 +#ifdef MBFL_DLL_EXPORT +#define MBFLAPI __declspec(dllexport) +#else +#define MBFLAPI __declspec(dllimport) +#endif /* MBFL_DLL_EXPORT */ +#else +#if defined(__GNUC__) && __GNUC__ >= 4 +#define MBFLAPI __attribute__((visibility("default"))) +#else +#define MBFLAPI +#endif /* defined(__GNUC__) && __GNUC__ >= 4 */ +#endif /* WIN32 */ + +#endif /* MBFL_DEFS_H */ diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c new file mode 100644 index 0000000..adf0c3a --- /dev/null +++ b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c @@ -0,0 +1,341 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: + * + */ +/* + * The source code included in this files was separated from mbfilter.c + * by Moriyoshi Koizumi <moriyoshi@php.net> on 20 Dec 2002. The file + * mbfilter.c is included in this package . + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef HAVE_STDDEF_H +#include <stddef.h> +#endif + +#ifdef HAVE_STRING_H +#include <string.h> +#endif + +#ifdef HAVE_STRINGS_H +#include <strings.h> +#endif + +#include "mbfl_encoding.h" +#include "mbfilter_pass.h" +#include "mbfilter_8bit.h" +#include "mbfilter_wchar.h" + +#include "filters/mbfilter_euc_cn.h" +#include "filters/mbfilter_hz.h" +#include "filters/mbfilter_euc_tw.h" +#include "filters/mbfilter_big5.h" +#include "filters/mbfilter_uhc.h" +#include "filters/mbfilter_euc_kr.h" +#include "filters/mbfilter_iso2022_kr.h" +#include "filters/mbfilter_sjis.h" +#include "filters/mbfilter_sjis_open.h" +#include "filters/mbfilter_sjis_mobile.h" +#include "filters/mbfilter_sjis_mac.h" +#include "filters/mbfilter_sjis_2004.h" +#include "filters/mbfilter_cp51932.h" +#include "filters/mbfilter_jis.h" +#include "filters/mbfilter_iso2022_jp_ms.h" +#include "filters/mbfilter_iso2022jp_2004.h" +#include "filters/mbfilter_iso2022jp_mobile.h" +#include "filters/mbfilter_euc_jp.h" +#include "filters/mbfilter_euc_jp_win.h" +#include "filters/mbfilter_euc_jp_2004.h" +#include "filters/mbfilter_gb18030.h" +#include "filters/mbfilter_ascii.h" +#include "filters/mbfilter_koi8r.h" +#include "filters/mbfilter_koi8u.h" +#include "filters/mbfilter_cp866.h" +#include "filters/mbfilter_cp932.h" +#include "filters/mbfilter_cp936.h" +#include "filters/mbfilter_cp1251.h" +#include "filters/mbfilter_cp1252.h" +#include "filters/mbfilter_cp1254.h" +#include "filters/mbfilter_cp5022x.h" +#include "filters/mbfilter_iso8859_1.h" +#include "filters/mbfilter_iso8859_2.h" +#include "filters/mbfilter_iso8859_3.h" +#include "filters/mbfilter_iso8859_4.h" +#include "filters/mbfilter_iso8859_5.h" +#include "filters/mbfilter_iso8859_6.h" +#include "filters/mbfilter_iso8859_7.h" +#include "filters/mbfilter_iso8859_8.h" +#include "filters/mbfilter_iso8859_9.h" +#include "filters/mbfilter_iso8859_10.h" +#include "filters/mbfilter_iso8859_13.h" +#include "filters/mbfilter_iso8859_14.h" +#include "filters/mbfilter_iso8859_15.h" +#include "filters/mbfilter_iso8859_16.h" +#include "filters/mbfilter_base64.h" +#include "filters/mbfilter_qprint.h" +#include "filters/mbfilter_uuencode.h" +#include "filters/mbfilter_7bit.h" +#include "filters/mbfilter_utf7.h" +#include "filters/mbfilter_utf7imap.h" +#include "filters/mbfilter_utf8.h" +#include "filters/mbfilter_utf8_mobile.h" +#include "filters/mbfilter_utf16.h" +#include "filters/mbfilter_utf32.h" +#include "filters/mbfilter_byte2.h" +#include "filters/mbfilter_byte4.h" +#include "filters/mbfilter_ucs4.h" +#include "filters/mbfilter_ucs2.h" +#include "filters/mbfilter_htmlent.h" +#include "filters/mbfilter_armscii8.h" +#include "filters/mbfilter_cp850.h" + +#ifndef HAVE_STRCASECMP +#ifdef HAVE_STRICMP +#define strcasecmp stricmp +#endif +#endif + + +static const char *mbfl_encoding_auto_aliases[] = {"unknown", NULL}; + +static const mbfl_encoding mbfl_encoding_auto = { + mbfl_no_encoding_auto, + "auto", + NULL, + (const char *(*)[])&mbfl_encoding_auto_aliases, + NULL, + 0 +}; + +static const mbfl_encoding *mbfl_encoding_ptr_list[] = { + &mbfl_encoding_pass, + &mbfl_encoding_auto, + &mbfl_encoding_wchar, + &mbfl_encoding_byte2be, + &mbfl_encoding_byte2le, + &mbfl_encoding_byte4be, + &mbfl_encoding_byte4le, + &mbfl_encoding_base64, + &mbfl_encoding_uuencode, + &mbfl_encoding_html_ent, + &mbfl_encoding_qprint, + &mbfl_encoding_7bit, + &mbfl_encoding_8bit, + &mbfl_encoding_ucs4, + &mbfl_encoding_ucs4be, + &mbfl_encoding_ucs4le, + &mbfl_encoding_ucs2, + &mbfl_encoding_ucs2be, + &mbfl_encoding_ucs2le, + &mbfl_encoding_utf32, + &mbfl_encoding_utf32be, + &mbfl_encoding_utf32le, + &mbfl_encoding_utf16, + &mbfl_encoding_utf16be, + &mbfl_encoding_utf16le, + &mbfl_encoding_utf8, + &mbfl_encoding_utf7, + &mbfl_encoding_utf7imap, + &mbfl_encoding_ascii, + &mbfl_encoding_euc_jp, + &mbfl_encoding_sjis, + &mbfl_encoding_eucjp_win, + &mbfl_encoding_eucjp2004, + &mbfl_encoding_sjis_open, + &mbfl_encoding_sjis_docomo, + &mbfl_encoding_sjis_kddi, + &mbfl_encoding_sjis_sb, + &mbfl_encoding_sjis_mac, + &mbfl_encoding_sjis2004, + &mbfl_encoding_utf8_docomo, + &mbfl_encoding_utf8_kddi_a, + &mbfl_encoding_utf8_kddi_b, + &mbfl_encoding_utf8_sb, + &mbfl_encoding_cp932, + &mbfl_encoding_cp51932, + &mbfl_encoding_jis, + &mbfl_encoding_2022jp, + &mbfl_encoding_2022jpms, + &mbfl_encoding_gb18030, + &mbfl_encoding_cp1252, + &mbfl_encoding_cp1254, + &mbfl_encoding_8859_1, + &mbfl_encoding_8859_2, + &mbfl_encoding_8859_3, + &mbfl_encoding_8859_4, + &mbfl_encoding_8859_5, + &mbfl_encoding_8859_6, + &mbfl_encoding_8859_7, + &mbfl_encoding_8859_8, + &mbfl_encoding_8859_9, + &mbfl_encoding_8859_10, + &mbfl_encoding_8859_13, + &mbfl_encoding_8859_14, + &mbfl_encoding_8859_15, + &mbfl_encoding_8859_16, + &mbfl_encoding_euc_cn, + &mbfl_encoding_cp936, + &mbfl_encoding_hz, + &mbfl_encoding_euc_tw, + &mbfl_encoding_big5, + &mbfl_encoding_cp950, + &mbfl_encoding_euc_kr, + &mbfl_encoding_uhc, + &mbfl_encoding_2022kr, + &mbfl_encoding_cp1251, + &mbfl_encoding_cp866, + &mbfl_encoding_koi8r, + &mbfl_encoding_koi8u, + &mbfl_encoding_armscii8, + &mbfl_encoding_cp850, + &mbfl_encoding_jis_ms, + &mbfl_encoding_2022jp_2004, + &mbfl_encoding_2022jp_kddi, + &mbfl_encoding_cp50220, + &mbfl_encoding_cp50220raw, + &mbfl_encoding_cp50221, + &mbfl_encoding_cp50222, + NULL +}; + +/* encoding resolver */ +const mbfl_encoding * +mbfl_name2encoding(const char *name) +{ + const mbfl_encoding *encoding; + int i, j; + + if (name == NULL) { + return NULL; + } + + i = 0; + while ((encoding = mbfl_encoding_ptr_list[i++]) != NULL){ + if (strcasecmp(encoding->name, name) == 0) { + return encoding; + } + } + + /* serch MIME charset name */ + i = 0; + while ((encoding = mbfl_encoding_ptr_list[i++]) != NULL) { + if (encoding->mime_name != NULL) { + if (strcasecmp(encoding->mime_name, name) == 0) { + return encoding; + } + } + } + + /* serch aliases */ + i = 0; + while ((encoding = mbfl_encoding_ptr_list[i++]) != NULL) { + if (encoding->aliases != NULL) { + j = 0; + while ((*encoding->aliases)[j] != NULL) { + if (strcasecmp((*encoding->aliases)[j], name) == 0) { + return encoding; + } + j++; + } + } + } + + return NULL; +} + +const mbfl_encoding * +mbfl_no2encoding(enum mbfl_no_encoding no_encoding) +{ + const mbfl_encoding *encoding; + int i; + + i = 0; + while ((encoding = mbfl_encoding_ptr_list[i++]) != NULL){ + if (encoding->no_encoding == no_encoding) { + return encoding; + } + } + + return NULL; +} + +enum mbfl_no_encoding +mbfl_name2no_encoding(const char *name) +{ + const mbfl_encoding *encoding; + + encoding = mbfl_name2encoding(name); + if (encoding == NULL) { + return mbfl_no_encoding_invalid; + } else { + return encoding->no_encoding; + } +} + +const char * +mbfl_no_encoding2name(enum mbfl_no_encoding no_encoding) +{ + const mbfl_encoding *encoding; + + encoding = mbfl_no2encoding(no_encoding); + if (encoding == NULL) { + return ""; + } else { + return encoding->name; + } +} + +const mbfl_encoding ** +mbfl_get_supported_encodings(void) +{ + return mbfl_encoding_ptr_list; +} + +const char * +mbfl_no2preferred_mime_name(enum mbfl_no_encoding no_encoding) +{ + const mbfl_encoding *encoding; + + encoding = mbfl_no2encoding(no_encoding); + if (encoding != NULL && encoding->mime_name != NULL && encoding->mime_name[0] != '\0') { + return encoding->mime_name; + } else { + return NULL; + } +} + +int +mbfl_is_support_encoding(const char *name) +{ + const mbfl_encoding *encoding; + + encoding = mbfl_name2encoding(name); + if (encoding == NULL) { + return 0; + } else { + return 1; + } +} diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h new file mode 100644 index 0000000..ca7717c --- /dev/null +++ b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h @@ -0,0 +1,152 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: + * + */ +/* + * The source code included in this files was separated from mbfilter.h + * by Moriyoshi Koizumi <moriyoshi@php.net> on 20 Dec 2002. The file + * mbfilter.h is included in this package . + * + */ + +#ifndef MBFL_ENCODING_H +#define MBFL_ENCODING_H + +#include "mbfl_defs.h" + +enum mbfl_no_encoding { + mbfl_no_encoding_invalid = -1, + mbfl_no_encoding_pass, + mbfl_no_encoding_auto, + mbfl_no_encoding_wchar, + mbfl_no_encoding_byte2be, + mbfl_no_encoding_byte2le, + mbfl_no_encoding_byte4be, + mbfl_no_encoding_byte4le, + mbfl_no_encoding_base64, + mbfl_no_encoding_uuencode, + mbfl_no_encoding_html_ent, + mbfl_no_encoding_qprint, + mbfl_no_encoding_7bit, + mbfl_no_encoding_8bit, + mbfl_no_encoding_charset_min, + mbfl_no_encoding_ucs4, + mbfl_no_encoding_ucs4be, + mbfl_no_encoding_ucs4le, + mbfl_no_encoding_ucs2, + mbfl_no_encoding_ucs2be, + mbfl_no_encoding_ucs2le, + mbfl_no_encoding_utf32, + mbfl_no_encoding_utf32be, + mbfl_no_encoding_utf32le, + mbfl_no_encoding_utf16, + mbfl_no_encoding_utf16be, + mbfl_no_encoding_utf16le, + mbfl_no_encoding_utf8, + mbfl_no_encoding_utf8_docomo, + mbfl_no_encoding_utf8_kddi_a, + mbfl_no_encoding_utf8_kddi_b, + mbfl_no_encoding_utf8_sb, + mbfl_no_encoding_utf7, + mbfl_no_encoding_utf7imap, + mbfl_no_encoding_ascii, + mbfl_no_encoding_euc_jp, + mbfl_no_encoding_eucjp2004, + mbfl_no_encoding_sjis, + mbfl_no_encoding_eucjp_win, + mbfl_no_encoding_sjis_open, + mbfl_no_encoding_sjis_docomo, + mbfl_no_encoding_sjis_kddi, + mbfl_no_encoding_sjis_sb, + mbfl_no_encoding_sjis_mac, + mbfl_no_encoding_sjis2004, + mbfl_no_encoding_cp932, + mbfl_no_encoding_cp51932, + mbfl_no_encoding_jis, + mbfl_no_encoding_2022jp, + mbfl_no_encoding_2022jp_2004, + mbfl_no_encoding_2022jp_kddi, + mbfl_no_encoding_2022jpms, + mbfl_no_encoding_gb18030, + mbfl_no_encoding_cp1252, + mbfl_no_encoding_cp1254, + mbfl_no_encoding_8859_1, + mbfl_no_encoding_8859_2, + mbfl_no_encoding_8859_3, + mbfl_no_encoding_8859_4, + mbfl_no_encoding_8859_5, + mbfl_no_encoding_8859_6, + mbfl_no_encoding_8859_7, + mbfl_no_encoding_8859_8, + mbfl_no_encoding_8859_9, + mbfl_no_encoding_8859_10, + mbfl_no_encoding_8859_13, + mbfl_no_encoding_8859_14, + mbfl_no_encoding_8859_15, + mbfl_no_encoding_euc_cn, + mbfl_no_encoding_cp936, + mbfl_no_encoding_euc_tw, + mbfl_no_encoding_big5, + mbfl_no_encoding_cp950, + mbfl_no_encoding_euc_kr, + mbfl_no_encoding_2022kr, + mbfl_no_encoding_uhc, + mbfl_no_encoding_hz, + mbfl_no_encoding_cp1251, + mbfl_no_encoding_cp866, + mbfl_no_encoding_koi8r, + mbfl_no_encoding_koi8u, + mbfl_no_encoding_8859_16, + mbfl_no_encoding_armscii8, + mbfl_no_encoding_cp850, + mbfl_no_encoding_jis_ms, + mbfl_no_encoding_cp50220, + mbfl_no_encoding_cp50220raw, + mbfl_no_encoding_cp50221, + mbfl_no_encoding_cp50222, + mbfl_no_encoding_charset_max +}; + +typedef enum mbfl_no_encoding mbfl_encoding_id; + +/* + * encoding + */ +typedef struct _mbfl_encoding { + enum mbfl_no_encoding no_encoding; + const char *name; + const char *mime_name; + const char *(*aliases)[]; + const unsigned char *mblen_table; + unsigned int flag; +} mbfl_encoding; + +MBFLAPI extern const mbfl_encoding * mbfl_name2encoding(const char *name); +MBFLAPI extern const mbfl_encoding * mbfl_no2encoding(enum mbfl_no_encoding no_encoding); +MBFLAPI extern enum mbfl_no_encoding mbfl_name2no_encoding(const char *name); +MBFLAPI extern const mbfl_encoding ** mbfl_get_supported_encodings(); +MBFLAPI extern const char * mbfl_no_encoding2name(enum mbfl_no_encoding no_encoding); +MBFLAPI extern const char * mbfl_no2preferred_mime_name(enum mbfl_no_encoding no_encoding); +MBFLAPI extern int mbfl_is_support_encoding(const char *name); + + +#endif /* MBFL_ENCODING_H */ diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_filter_output.c b/ext/mbstring/libmbfl/mbfl/mbfl_filter_output.c new file mode 100644 index 0000000..341047d --- /dev/null +++ b/ext/mbstring/libmbfl/mbfl/mbfl_filter_output.c @@ -0,0 +1,57 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: + * + */ +/* + * The source code included in this files was separated from mbfilter.c + * by Moriyoshi Koizumi <moriyoshi@php.net> on 20 Dec 2002. The file + * mbfilter.c is included in this package . + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "mbfl_convert.h" +#include "mbfl_filter_output.h" + +int mbfl_filter_output_pipe(int c, void* data) +{ + mbfl_convert_filter *filter = (mbfl_convert_filter*)data; + return (*filter->filter_function)(c, filter); +} + +int mbfl_filter_output_pipe_flush(void *data) +{ + mbfl_convert_filter *filter = (mbfl_convert_filter*)data; + if (filter->filter_flush != NULL) { + return (*filter->filter_flush)(filter); + } + + return 0; +} + +int mbfl_filter_output_null(int c, void* data) +{ + return c; +} diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_filter_output.h b/ext/mbstring/libmbfl/mbfl/mbfl_filter_output.h new file mode 100644 index 0000000..d477653 --- /dev/null +++ b/ext/mbstring/libmbfl/mbfl/mbfl_filter_output.h @@ -0,0 +1,38 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: + * + */ +/* + * The source code included in this files was separated from mbfilter.h + * by Moriyoshi Koizumi <moriyoshi@php.net> on 20 Dec 2002. The file + * mbfilter.h is included in this package . + * + */ + +#ifndef MBFL_FILTER_OUTPUT_H +#define MBFL_FILTER_OUTPUT_H + +MBFLAPI extern int mbfl_filter_output_pipe(int c, void* data); +MBFLAPI extern int mbfl_filter_output_pipe_flush(void* data); +MBFLAPI extern int mbfl_filter_output_null(int c, void* data); + +#endif /* MBFL_FILTER_OUTPUT_H */ diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_ident.c b/ext/mbstring/libmbfl/mbfl/mbfl_ident.c new file mode 100644 index 0000000..4d6283f --- /dev/null +++ b/ext/mbstring/libmbfl/mbfl/mbfl_ident.c @@ -0,0 +1,304 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: + * + */ +/* + * The source code included in this files was separated from mbfilter.c + * by Moriyoshi Koizumi <moriyoshi@php.net> on 20 Dec 2002. The file + * mbfilter.c is included in this package . + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef HAVE_STDDEF_H +#include <stddef.h> +#endif + +#include "mbfl_ident.h" +#include "mbfl_allocators.h" +#include "mbfilter_pass.h" +#include "mbfilter_8bit.h" +#include "mbfilter_wchar.h" + +#include "filters/mbfilter_euc_cn.h" +#include "filters/mbfilter_hz.h" +#include "filters/mbfilter_euc_tw.h" +#include "filters/mbfilter_big5.h" +#include "filters/mbfilter_uhc.h" +#include "filters/mbfilter_euc_kr.h" +#include "filters/mbfilter_iso2022_kr.h" +#include "filters/mbfilter_sjis.h" +#include "filters/mbfilter_sjis_open.h" +#include "filters/mbfilter_sjis_mobile.h" +#include "filters/mbfilter_jis.h" +#include "filters/mbfilter_iso2022_jp_ms.h" +#include "filters/mbfilter_iso2022jp_2004.h" +#include "filters/mbfilter_iso2022jp_mobile.h" +#include "filters/mbfilter_euc_jp.h" +#include "filters/mbfilter_euc_jp_win.h" +#include "filters/mbfilter_euc_jp_2004.h" +#include "filters/mbfilter_utf8_mobile.h" +#include "filters/mbfilter_ascii.h" +#include "filters/mbfilter_koi8r.h" +#include "filters/mbfilter_koi8u.h" +#include "filters/mbfilter_cp866.h" +#include "filters/mbfilter_cp932.h" +#include "filters/mbfilter_cp936.h" +#include "filters/mbfilter_cp1251.h" +#include "filters/mbfilter_cp1252.h" +#include "filters/mbfilter_cp1254.h" +#include "filters/mbfilter_cp51932.h" +#include "filters/mbfilter_cp5022x.h" +#include "filters/mbfilter_gb18030.h" +#include "filters/mbfilter_iso8859_1.h" +#include "filters/mbfilter_iso8859_2.h" +#include "filters/mbfilter_iso8859_3.h" +#include "filters/mbfilter_iso8859_4.h" +#include "filters/mbfilter_iso8859_5.h" +#include "filters/mbfilter_iso8859_6.h" +#include "filters/mbfilter_iso8859_7.h" +#include "filters/mbfilter_iso8859_8.h" +#include "filters/mbfilter_iso8859_9.h" +#include "filters/mbfilter_iso8859_10.h" +#include "filters/mbfilter_iso8859_13.h" +#include "filters/mbfilter_iso8859_14.h" +#include "filters/mbfilter_iso8859_15.h" +#include "filters/mbfilter_base64.h" +#include "filters/mbfilter_qprint.h" +#include "filters/mbfilter_uuencode.h" +#include "filters/mbfilter_7bit.h" +#include "filters/mbfilter_utf7.h" +#include "filters/mbfilter_utf7imap.h" +#include "filters/mbfilter_utf8.h" +#include "filters/mbfilter_utf16.h" +#include "filters/mbfilter_utf32.h" +#include "filters/mbfilter_byte2.h" +#include "filters/mbfilter_byte4.h" +#include "filters/mbfilter_ucs4.h" +#include "filters/mbfilter_ucs2.h" +#include "filters/mbfilter_htmlent.h" +#include "filters/mbfilter_armscii8.h" +#include "filters/mbfilter_cp850.h" + +static const struct mbfl_identify_vtbl vtbl_identify_false = { + mbfl_no_encoding_pass, + mbfl_filt_ident_false_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_false }; + + +static const struct mbfl_identify_vtbl *mbfl_identify_filter_list[] = { + &vtbl_identify_utf8, + &vtbl_identify_utf7, + &vtbl_identify_ascii, + &vtbl_identify_eucjp, + &vtbl_identify_sjis, + &vtbl_identify_sjis_open, + &vtbl_identify_eucjpwin, + &vtbl_identify_eucjp2004, + &vtbl_identify_cp932, + &vtbl_identify_jis, + &vtbl_identify_2022jp, + &vtbl_identify_2022jpms, + &vtbl_identify_2022jp_2004, + &vtbl_identify_2022jp_kddi, + &vtbl_identify_cp51932, + &vtbl_identify_sjis_docomo, + &vtbl_identify_sjis_kddi, + &vtbl_identify_sjis_sb, + &vtbl_identify_utf8_docomo, + &vtbl_identify_utf8_kddi_a, + &vtbl_identify_utf8_kddi_b, + &vtbl_identify_utf8_sb, + &vtbl_identify_euccn, + &vtbl_identify_cp936, + &vtbl_identify_hz, + &vtbl_identify_euctw, + &vtbl_identify_big5, + &vtbl_identify_cp950, + &vtbl_identify_euckr, + &vtbl_identify_uhc, + &vtbl_identify_2022kr, + &vtbl_identify_cp1251, + &vtbl_identify_cp866, + &vtbl_identify_koi8r, + &vtbl_identify_koi8u, + &vtbl_identify_cp1252, + &vtbl_identify_cp1254, + &vtbl_identify_8859_1, + &vtbl_identify_8859_2, + &vtbl_identify_8859_3, + &vtbl_identify_8859_4, + &vtbl_identify_8859_5, + &vtbl_identify_8859_6, + &vtbl_identify_8859_7, + &vtbl_identify_8859_8, + &vtbl_identify_8859_9, + &vtbl_identify_8859_10, + &vtbl_identify_8859_13, + &vtbl_identify_8859_14, + &vtbl_identify_8859_15, + &vtbl_identify_armscii8, + &vtbl_identify_cp850, + &vtbl_identify_jis_ms, + &vtbl_identify_cp50220, + &vtbl_identify_cp50221, + &vtbl_identify_cp50222, + &vtbl_identify_gb18030, + &vtbl_identify_false, + NULL +}; + + + +/* + * identify filter + */ +const struct mbfl_identify_vtbl * mbfl_identify_filter_get_vtbl(enum mbfl_no_encoding encoding) +{ + const struct mbfl_identify_vtbl * vtbl; + int i; + + i = 0; + while ((vtbl = mbfl_identify_filter_list[i++]) != NULL) { + if (vtbl->encoding == encoding) { + break; + } + } + + return vtbl; +} + +mbfl_identify_filter *mbfl_identify_filter_new(enum mbfl_no_encoding encoding) +{ + mbfl_identify_filter *filter; + + /* allocate */ + filter = (mbfl_identify_filter *)mbfl_malloc(sizeof(mbfl_identify_filter)); + if (filter == NULL) { + return NULL; + } + + if (mbfl_identify_filter_init(filter, encoding)) { + mbfl_free(filter); + return NULL; + } + + return filter; +} + +mbfl_identify_filter *mbfl_identify_filter_new2(const mbfl_encoding *encoding) +{ + mbfl_identify_filter *filter; + + /* allocate */ + filter = (mbfl_identify_filter *)mbfl_malloc(sizeof(mbfl_identify_filter)); + if (filter == NULL) { + return NULL; + } + + if (mbfl_identify_filter_init2(filter, encoding)) { + mbfl_free(filter); + return NULL; + } + + return filter; +} + + +int mbfl_identify_filter_init(mbfl_identify_filter *filter, enum mbfl_no_encoding encoding) +{ + const mbfl_encoding *enc = mbfl_no2encoding(encoding); + return mbfl_identify_filter_init2(filter, enc ? enc: &mbfl_encoding_pass); +} + +int mbfl_identify_filter_init2(mbfl_identify_filter *filter, const mbfl_encoding *encoding) +{ + const struct mbfl_identify_vtbl *vtbl; + + /* encoding structure */ + filter->encoding = encoding; + + filter->status = 0; + filter->flag = 0; + filter->score = 0; + + /* setup the function table */ + vtbl = mbfl_identify_filter_get_vtbl(filter->encoding->no_encoding); + if (vtbl == NULL) { + vtbl = &vtbl_identify_false; + } + filter->filter_ctor = vtbl->filter_ctor; + filter->filter_dtor = vtbl->filter_dtor; + filter->filter_function = vtbl->filter_function; + + /* constructor */ + (*filter->filter_ctor)(filter); + + return 0; +} + +void mbfl_identify_filter_delete(mbfl_identify_filter *filter) +{ + if (filter == NULL) { + return; + } + + mbfl_identify_filter_cleanup(filter); + mbfl_free((void*)filter); +} + +void mbfl_identify_filter_cleanup(mbfl_identify_filter *filter) +{ + (*filter->filter_dtor)(filter); +} + +void mbfl_filt_ident_common_ctor(mbfl_identify_filter *filter) +{ + filter->status = 0; + filter->flag = 0; +} + +void mbfl_filt_ident_common_dtor(mbfl_identify_filter *filter) +{ + filter->status = 0; +} + +int mbfl_filt_ident_false(int c, mbfl_identify_filter *filter) +{ + filter->flag = 1; /* bad */ + return c; +} + +void mbfl_filt_ident_false_ctor(mbfl_identify_filter *filter) +{ + filter->status = 0; + filter->flag = 1; +} + +int mbfl_filt_ident_true(int c, mbfl_identify_filter *filter) +{ + return c; +} diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_ident.h b/ext/mbstring/libmbfl/mbfl/mbfl_ident.h new file mode 100644 index 0000000..12d81cd --- /dev/null +++ b/ext/mbstring/libmbfl/mbfl/mbfl_ident.h @@ -0,0 +1,74 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: + * + */ +/* + * The source code included in this files was separated from mbfilter.h + * by Moriyoshi Koizumi <moriyoshi@php.net> on 20 Dec 2002. The file + * mbfilter.h is included in this package . + * + */ + +#ifndef MBFL_IDENT_H +#define MBFL_IDENT_H + +#include "mbfl_defs.h" +#include "mbfl_encoding.h" + +/* + * identify filter + */ +typedef struct _mbfl_identify_filter mbfl_identify_filter; + +struct _mbfl_identify_filter { + void (*filter_ctor)(mbfl_identify_filter *filter); + void (*filter_dtor)(mbfl_identify_filter *filter); + int (*filter_function)(int c, mbfl_identify_filter *filter); + int status; + int flag; + int score; + const mbfl_encoding *encoding; +}; + +struct mbfl_identify_vtbl { + enum mbfl_no_encoding encoding; + void (*filter_ctor)(mbfl_identify_filter *filter); + void (*filter_dtor)(mbfl_identify_filter *filter); + int (*filter_function)(int c, mbfl_identify_filter *filter); +}; + +MBFLAPI extern const struct mbfl_identify_vtbl * mbfl_identify_filter_get_vtbl(enum mbfl_no_encoding encoding); +MBFLAPI extern mbfl_identify_filter * mbfl_identify_filter_new(enum mbfl_no_encoding encoding); +MBFLAPI extern mbfl_identify_filter * mbfl_identify_filter_new2(const mbfl_encoding *encoding); +MBFLAPI extern void mbfl_identify_filter_delete(mbfl_identify_filter *filter); +MBFLAPI extern int mbfl_identify_filter_init(mbfl_identify_filter *filter, enum mbfl_no_encoding encoding); +MBFLAPI extern int mbfl_identify_filter_init2(mbfl_identify_filter *filter, const mbfl_encoding *encoding); +MBFLAPI void mbfl_identify_filter_cleanup(mbfl_identify_filter *filter); + +MBFLAPI extern void mbfl_filt_ident_common_ctor(mbfl_identify_filter *filter); +MBFLAPI extern void mbfl_filt_ident_common_dtor(mbfl_identify_filter *filter); +MBFLAPI extern void mbfl_filt_ident_false_ctor(mbfl_identify_filter *filter); + +MBFLAPI extern int mbfl_filt_ident_false(int c, mbfl_identify_filter *filter); +MBFLAPI extern int mbfl_filt_ident_true(int c, mbfl_identify_filter *filter); + +#endif /* MBFL_IDENT_H */ diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_language.c b/ext/mbstring/libmbfl/mbfl/mbfl_language.c new file mode 100644 index 0000000..4dd9726 --- /dev/null +++ b/ext/mbstring/libmbfl/mbfl/mbfl_language.c @@ -0,0 +1,171 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: + * + */ +/* + * The source code included in this files was separated from mbfilter.c + * by Moriyoshi Koizumi <moriyoshi@php.net> on 20 Dec 2002. The file + * mbfilter.c is included in this package . + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef HAVE_STDDEF_H +#include <stddef.h> +#endif + +#ifdef HAVE_STDDEF_H +#include <stddef.h> +#endif + +#ifdef HAVE_STRING_H +#include <string.h> +#endif + +#ifdef HAVE_STRINGS_H +#include <strings.h> +#endif + +#include "mbfl_encoding.h" +#include "mbfl_language.h" + +#include "nls/nls_ja.h" +#include "nls/nls_kr.h" +#include "nls/nls_zh.h" +#include "nls/nls_uni.h" +#include "nls/nls_de.h" +#include "nls/nls_ru.h" +#include "nls/nls_ua.h" +#include "nls/nls_en.h" +#include "nls/nls_hy.h" +#include "nls/nls_tr.h" +#include "nls/nls_neutral.h" + +#ifndef HAVE_STRCASECMP +#ifdef HAVE_STRICMP +#define strcasecmp stricmp +#endif +#endif + +static const mbfl_language *mbfl_language_ptr_table[] = { + &mbfl_language_uni, + &mbfl_language_japanese, + &mbfl_language_korean, + &mbfl_language_simplified_chinese, + &mbfl_language_traditional_chinese, + &mbfl_language_english, + &mbfl_language_german, + &mbfl_language_russian, + &mbfl_language_ukrainian, + &mbfl_language_armenian, + &mbfl_language_turkish, + &mbfl_language_neutral, + NULL +}; + +/* language resolver */ +const mbfl_language * +mbfl_name2language(const char *name) +{ + const mbfl_language *language; + int i, j; + + if (name == NULL) { + return NULL; + } + + i = 0; + while ((language = mbfl_language_ptr_table[i++]) != NULL){ + if (strcasecmp(language->name, name) == 0) { + return language; + } + } + + i = 0; + while ((language = mbfl_language_ptr_table[i++]) != NULL){ + if (strcasecmp(language->short_name, name) == 0) { + return language; + } + } + + /* serch aliases */ + i = 0; + while ((language = mbfl_language_ptr_table[i++]) != NULL) { + if (language->aliases != NULL) { + j = 0; + while ((*language->aliases)[j] != NULL) { + if (strcasecmp((*language->aliases)[j], name) == 0) { + return language; + } + j++; + } + } + } + + return NULL; +} + +const mbfl_language * +mbfl_no2language(enum mbfl_no_language no_language) +{ + const mbfl_language *language; + int i; + + i = 0; + while ((language = mbfl_language_ptr_table[i++]) != NULL){ + if (language->no_language == no_language) { + return language; + } + } + + return NULL; +} + +enum mbfl_no_language +mbfl_name2no_language(const char *name) +{ + const mbfl_language *language; + + language = mbfl_name2language(name); + if (language == NULL) { + return mbfl_no_language_invalid; + } else { + return language->no_language; + } +} + +const char * +mbfl_no_language2name(enum mbfl_no_language no_language) +{ + const mbfl_language *language; + + language = mbfl_no2language(no_language); + if (language == NULL) { + return ""; + } else { + return language->name; + } +} + diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_language.h b/ext/mbstring/libmbfl/mbfl/mbfl_language.h new file mode 100644 index 0000000..af42a01 --- /dev/null +++ b/ext/mbstring/libmbfl/mbfl/mbfl_language.h @@ -0,0 +1,87 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: + * + */ +/* + * The source code included in this files was separated from mbfilter.h + * by Moriyoshi Koizumi <moriyoshi@php.net> on 20 Dec 2002. The file + * mbfilter.h is included in this package . + * + */ + +#ifndef MBFL_LANGUAGE_H +#define MBFL_LANGUAGE_H + +#include "mbfl_defs.h" +#include "mbfl_encoding.h" + +enum mbfl_no_language { + mbfl_no_language_invalid = -1, + mbfl_no_language_neutral, + mbfl_no_language_uni, + mbfl_no_language_min, + mbfl_no_language_catalan, /* ca */ + mbfl_no_language_danish, /* da */ + mbfl_no_language_german, /* de */ + mbfl_no_language_english, /* en */ + mbfl_no_language_estonian, /* et */ + mbfl_no_language_greek, /* el */ + mbfl_no_language_spanish, /* es */ + mbfl_no_language_french, /* fr */ + mbfl_no_language_italian, /* it */ + mbfl_no_language_japanese, /* ja */ + mbfl_no_language_korean, /* ko */ + mbfl_no_language_dutch, /* nl */ + mbfl_no_language_polish, /* pl */ + mbfl_no_language_portuguese, /* pt */ + mbfl_no_language_swedish, /* sv */ + mbfl_no_language_simplified_chinese, /* zh-cn */ + mbfl_no_language_traditional_chinese, /* zh-tw */ + mbfl_no_language_russian, /* ru */ + mbfl_no_language_ukrainian, /* ua */ + mbfl_no_language_armenian, /* hy */ + mbfl_no_language_turkish, /* tr */ + mbfl_no_language_max +}; + +typedef enum mbfl_no_language mbfl_language_id; + +/* + * language + */ +typedef struct _mbfl_language { + enum mbfl_no_language no_language; + const char *name; + const char *short_name; + const char *(*aliases)[]; + enum mbfl_no_encoding mail_charset; + enum mbfl_no_encoding mail_header_encoding; + enum mbfl_no_encoding mail_body_encoding; +} mbfl_language; + +MBFLAPI extern const mbfl_language * mbfl_name2language(const char *name); +MBFLAPI extern const mbfl_language * mbfl_no2language(enum mbfl_no_language no_language); +MBFLAPI extern enum mbfl_no_language mbfl_name2no_language(const char *name); +MBFLAPI extern const char * mbfl_no_language2name(enum mbfl_no_language no_language); + + +#endif /* MBFL_LANGUAGE_H */ diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_memory_device.c b/ext/mbstring/libmbfl/mbfl/mbfl_memory_device.c new file mode 100644 index 0000000..7509ef1 --- /dev/null +++ b/ext/mbstring/libmbfl/mbfl/mbfl_memory_device.c @@ -0,0 +1,350 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: + * + */ +/* + * The source code included in this files was separated from mbfilter.c + * by Moriyoshi Koizumi <moriyoshi@php.net> on 20 Dec 2002. The file + * mbfilter.c is included in this package . + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef HAVE_STDDEF_H +#include <stddef.h> +#endif + +#include "mbfl_allocators.h" +#include "mbfl_string.h" +#include "mbfl_memory_device.h" + +/* + * memory device output functions + */ +void +mbfl_memory_device_init(mbfl_memory_device *device, int initsz, int allocsz) +{ + if (device) { + device->length = 0; + device->buffer = (unsigned char *)0; + if (initsz > 0) { + device->buffer = (unsigned char *)mbfl_malloc(initsz*sizeof(unsigned char)); + if (device->buffer != NULL) { + device->length = initsz; + } + } + device->pos= 0; + if (allocsz > MBFL_MEMORY_DEVICE_ALLOC_SIZE) { + device->allocsz = allocsz; + } else { + device->allocsz = MBFL_MEMORY_DEVICE_ALLOC_SIZE; + } + } +} + +void +mbfl_memory_device_realloc(mbfl_memory_device *device, int initsz, int allocsz) +{ + unsigned char *tmp; + + if (device) { + if (initsz > device->length) { + tmp = (unsigned char *)mbfl_realloc((void *)device->buffer, initsz*sizeof(unsigned char)); + if (tmp != NULL) { + device->buffer = tmp; + device->length = initsz; + } + } + if (allocsz > MBFL_MEMORY_DEVICE_ALLOC_SIZE) { + device->allocsz = allocsz; + } else { + device->allocsz = MBFL_MEMORY_DEVICE_ALLOC_SIZE; + } + } +} + +void +mbfl_memory_device_clear(mbfl_memory_device *device) +{ + if (device) { + if (device->buffer) { + mbfl_free(device->buffer); + } + device->buffer = (unsigned char *)0; + device->length = 0; + device->pos = 0; + } +} + +void +mbfl_memory_device_reset(mbfl_memory_device *device) +{ + if (device) { + device->pos = 0; + } +} + +void +mbfl_memory_device_unput(mbfl_memory_device *device) +{ + if (device->pos > 0) { + device->pos--; + } +} + +mbfl_string * +mbfl_memory_device_result(mbfl_memory_device *device, mbfl_string *result) +{ + if (device && result) { + result->len = device->pos; + mbfl_memory_device_output4('\0', device); + result->val = device->buffer; + device->buffer = (unsigned char *)0; + device->length = 0; + device->pos= 0; + if (result->val == NULL) { + result->len = 0; + result = NULL; + } + } else { + result = NULL; + } + + return result; +} + +int +mbfl_memory_device_output(int c, void *data) +{ + mbfl_memory_device *device = (mbfl_memory_device *)data; + + if (device->pos >= device->length) { + /* reallocate buffer */ + int newlen; + unsigned char *tmp; + + newlen = device->length + device->allocsz; + tmp = (unsigned char *)mbfl_realloc((void *)device->buffer, newlen*sizeof(unsigned char)); + if (tmp == NULL) { + return -1; + } + device->length = newlen; + device->buffer = tmp; + } + + device->buffer[device->pos++] = (unsigned char)c; + return c; +} + +int +mbfl_memory_device_output2(int c, void *data) +{ + mbfl_memory_device *device = (mbfl_memory_device *)data; + + if ((device->pos + 2) >= device->length) { + /* reallocate buffer */ + int newlen; + unsigned char *tmp; + + newlen = device->length + device->allocsz; + tmp = (unsigned char *)mbfl_realloc((void *)device->buffer, newlen*sizeof(unsigned char)); + if (tmp == NULL) { + return -1; + } + device->length = newlen; + device->buffer = tmp; + } + + device->buffer[device->pos++] = (unsigned char)((c >> 8) & 0xff); + device->buffer[device->pos++] = (unsigned char)(c & 0xff); + + return c; +} + +int +mbfl_memory_device_output4(int c, void* data) +{ + mbfl_memory_device *device = (mbfl_memory_device *)data; + + if ((device->pos + 4) >= device->length) { + /* reallocate buffer */ + int newlen; + unsigned char *tmp; + + newlen = device->length + device->allocsz; + tmp = (unsigned char *)mbfl_realloc((void *)device->buffer, newlen*sizeof(unsigned char)); + if (tmp == NULL) { + return -1; + } + device->length = newlen; + device->buffer = tmp; + } + + device->buffer[device->pos++] = (unsigned char)((c >> 24) & 0xff); + device->buffer[device->pos++] = (unsigned char)((c >> 16) & 0xff); + device->buffer[device->pos++] = (unsigned char)((c >> 8) & 0xff); + device->buffer[device->pos++] = (unsigned char)(c & 0xff); + + return c; +} + +int +mbfl_memory_device_strcat(mbfl_memory_device *device, const char *psrc) +{ + int len; + unsigned char *w; + const unsigned char *p; + + len = 0; + p = (const unsigned char*)psrc; + while (*p) { + p++; + len++; + } + + if ((device->pos + len) >= device->length) { + /* reallocate buffer */ + int newlen = device->length + (len + MBFL_MEMORY_DEVICE_ALLOC_SIZE)*sizeof(unsigned char); + unsigned char *tmp = (unsigned char *)mbfl_realloc((void *)device->buffer, newlen*sizeof(unsigned char)); + if (tmp == NULL) { + return -1; + } + device->length = newlen; + device->buffer = tmp; + } + + p = (const unsigned char*)psrc; + w = &device->buffer[device->pos]; + device->pos += len; + while (len > 0) { + *w++ = *p++; + len--; + } + + return len; +} + +int +mbfl_memory_device_strncat(mbfl_memory_device *device, const char *psrc, int len) +{ + unsigned char *w; + + if ((device->pos + len) >= device->length) { + /* reallocate buffer */ + int newlen = device->length + len + MBFL_MEMORY_DEVICE_ALLOC_SIZE; + unsigned char *tmp = (unsigned char *)mbfl_realloc((void *)device->buffer, newlen*sizeof(unsigned char)); + if (tmp == NULL) { + return -1; + } + device->length = newlen; + device->buffer = tmp; + } + + w = &device->buffer[device->pos]; + device->pos += len; + while (len > 0) { + *w++ = *psrc++; + len--; + } + + return len; +} + +int +mbfl_memory_device_devcat(mbfl_memory_device *dest, mbfl_memory_device *src) +{ + int n; + unsigned char *p, *w; + + if ((dest->pos + src->pos) >= dest->length) { + /* reallocate buffer */ + int newlen = dest->length + src->pos + MBFL_MEMORY_DEVICE_ALLOC_SIZE; + unsigned char *tmp = (unsigned char *)mbfl_realloc((void *)dest->buffer, newlen*sizeof(unsigned char)); + if (tmp == NULL) { + return -1; + } + dest->length = newlen; + dest->buffer = tmp; + } + + p = src->buffer; + w = &dest->buffer[dest->pos]; + n = src->pos; + dest->pos += n; + while (n > 0) { + *w++ = *p++; + n--; + } + + return n; +} + +void +mbfl_wchar_device_init(mbfl_wchar_device *device) +{ + if (device) { + device->buffer = (unsigned int *)0; + device->length = 0; + device->pos= 0; + device->allocsz = MBFL_MEMORY_DEVICE_ALLOC_SIZE; + } +} + +void +mbfl_wchar_device_clear(mbfl_wchar_device *device) +{ + if (device) { + if (device->buffer) { + mbfl_free(device->buffer); + } + device->buffer = (unsigned int*)0; + device->length = 0; + device->pos = 0; + } +} + +int +mbfl_wchar_device_output(int c, void *data) +{ + mbfl_wchar_device *device = (mbfl_wchar_device *)data; + + if (device->pos >= device->length) { + /* reallocate buffer */ + int newlen; + unsigned int *tmp; + + newlen = device->length + device->allocsz; + tmp = (unsigned int *)mbfl_realloc((void *)device->buffer, newlen*sizeof(int)); + if (tmp == NULL) { + return -1; + } + device->length = newlen; + device->buffer = tmp; + } + + device->buffer[device->pos++] = c; + + return c; +} diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_memory_device.h b/ext/mbstring/libmbfl/mbfl/mbfl_memory_device.h new file mode 100644 index 0000000..a2c1c40 --- /dev/null +++ b/ext/mbstring/libmbfl/mbfl/mbfl_memory_device.h @@ -0,0 +1,73 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: + * + */ +/* + * The source code included in this files was separated from mbfilter.h + * by Moriyoshi Koizumi <moriyoshi@php.net> on 20 Dec 2002. The file + * mbfilter.h is included in this package . + * + */ + +#ifndef MBFL_MEMORY_DEVICE_H +#define MBFL_MEMORY_DEVICE_H + +#include "mbfl_defs.h" +#include "mbfl_string.h" + +#define MBFL_MEMORY_DEVICE_ALLOC_SIZE 64 + +typedef struct _mbfl_memory_device { + unsigned char *buffer; + int length; + int pos; + int allocsz; +} mbfl_memory_device; + +typedef struct _mbfl_wchar_device { + unsigned int *buffer; + int length; + int pos; + int allocsz; +} mbfl_wchar_device; + +MBFLAPI extern void mbfl_memory_device_init(mbfl_memory_device *device, int initsz, int allocsz); +MBFLAPI extern void mbfl_memory_device_realloc(mbfl_memory_device *device, int initsz, int allocsz); +MBFLAPI extern void mbfl_memory_device_clear(mbfl_memory_device *device); +MBFLAPI extern void mbfl_memory_device_reset(mbfl_memory_device *device); +MBFLAPI extern mbfl_string * mbfl_memory_device_result(mbfl_memory_device *device, mbfl_string *result); +MBFLAPI extern void mbfl_memory_device_unput(mbfl_memory_device *device); +MBFLAPI extern int mbfl_memory_device_output(int c, void *data); +MBFLAPI extern int mbfl_memory_device_output2(int c, void *data); +MBFLAPI extern int mbfl_memory_device_output4(int c, void *data); +MBFLAPI extern int mbfl_memory_device_strcat(mbfl_memory_device *device, const char *psrc); +MBFLAPI extern int mbfl_memory_device_strncat(mbfl_memory_device *device, const char *psrc, int len); +MBFLAPI extern int mbfl_memory_device_devcat(mbfl_memory_device *dest, mbfl_memory_device *src); + +MBFLAPI extern void mbfl_wchar_device_init(mbfl_wchar_device *device); +MBFLAPI extern int mbfl_wchar_device_output(int c, void *data); +MBFLAPI extern void mbfl_wchar_device_clear(mbfl_wchar_device *device); + + + +#endif /* MBFL_MEMORY_DEVICE_H */ + diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_string.c b/ext/mbstring/libmbfl/mbfl/mbfl_string.c new file mode 100644 index 0000000..e0351a4 --- /dev/null +++ b/ext/mbstring/libmbfl/mbfl/mbfl_string.c @@ -0,0 +1,79 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: + * + */ +/* + * The source code included in this files was separated from mbfilter.c + * by Moriyoshi Koizumi <moriyoshi@php.net> on 20 Dec 2002. The file + * mbfilter.c is included in this package . + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef HAVE_STDDEF_H +#include <stddef.h> +#endif + +#include "mbfl_allocators.h" +#include "mbfl_string.h" + +/* + * string object + */ +void +mbfl_string_init(mbfl_string *string) +{ + if (string) { + string->no_language = mbfl_no_language_uni; + string->no_encoding = mbfl_no_encoding_pass; + string->val = (unsigned char*)NULL; + string->len = 0; + } +} + +void +mbfl_string_init_set(mbfl_string *string, mbfl_language_id no_language, mbfl_encoding_id no_encoding) +{ + if (string) { + string->no_language = no_language; + string->no_encoding = no_encoding; + string->val = (unsigned char*)NULL; + string->len = 0; + } +} + +void +mbfl_string_clear(mbfl_string *string) +{ + if (string) { + if (string->val != (unsigned char*)NULL) { + mbfl_free(string->val); + } + string->val = (unsigned char*)NULL; + string->len = 0; + } +} + + diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_string.h b/ext/mbstring/libmbfl/mbfl/mbfl_string.h new file mode 100644 index 0000000..d112131 --- /dev/null +++ b/ext/mbstring/libmbfl/mbfl/mbfl_string.h @@ -0,0 +1,56 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: + * + */ +/* + * The source code included in this files was separated from mbfilter.h + * by Moriyoshi Koizumi <moriyoshi@php.net> on 20 Dec 2002. The file + * mbfilter.h is included in this package . + * + */ + +#ifndef MBFL_STRING_H +#define MBFL_STRING_H + +#include "mbfl_defs.h" +#include "mbfl_encoding.h" +#include "mbfl_language.h" + +/* + * string object + */ +typedef struct _mbfl_string { + enum mbfl_no_language no_language; + enum mbfl_no_encoding no_encoding; + unsigned char *val; + unsigned int len; +} mbfl_string; + +MBFLAPI extern void mbfl_string_init(mbfl_string *string); +MBFLAPI extern void mbfl_string_init_set(mbfl_string *string, mbfl_language_id no_language, mbfl_encoding_id no_encoding); +MBFLAPI extern void mbfl_string_clear(mbfl_string *string); + +#ifndef NULL +#define NULL 0 +#endif + +#endif /* MBFL_STRING_H */ diff --git a/ext/mbstring/libmbfl/mbfl/mk_eaw_tbl.awk b/ext/mbstring/libmbfl/mbfl/mk_eaw_tbl.awk new file mode 100644 index 0000000..02c73f2 --- /dev/null +++ b/ext/mbstring/libmbfl/mbfl/mk_eaw_tbl.awk @@ -0,0 +1,81 @@ +#!/usr/bin/awk -f +# +# $Id$ +# +# Description: a script to generate east asian width table. +# + +BEGIN { + prev = -1 + comma = 0 + ORS = "" + FS = "[;.|# ]" + print "static const struct {\n\tint begin;\n\tint end;\n} " TABLE_NAME "[] = {\n\t" +} + +/^#/ { +} + +/^[0-9a-fA-F]+;/ { + if ($2 == "W" || $2 == "F") { + + v = strtonum( "0x" $1 ) + if (prev < 0) { + first = v + } else if (v - prev > 1) { + if (comma) { + print ",\n\t" + } + printf("{ 0x%04x, 0x%04x }", first, prev) + first = v + comma = 1 + } + prev = v + } else { + if (prev >= 0) { + if (comma) { + print ",\n\t" + } + printf("{ 0x%04x, 0x%04x }", first, prev) + prev = -1 + comma = 1 + } + } +} + +/^[0-9a-fA-F]+\.\./ { + if ($4 == "W" || $4 == "F") { + vs = strtonum( "0x" $1 ) + ve = strtonum( "0x" $3 ) + if (prev < 0) { + first = vs + } else if (vs - prev > 1) { + if (comma) { + print ",\n\t" + } + printf("{ 0x%04x, 0x%04x }", first, prev) + first = vs + comma = 1 + } + prev = ve + } else { + if (prev >= 0) { + if (comma) { + print ",\n\t" + } + printf("{ 0x%04x, 0x%04x }", first, prev) + prev = -1 + comma = 1 + } + } +} + +END { + if (prev >= 0) { + if (comma) { + print ",\n\t" + } + printf("{ 0x%04x, 0x%04x }", first, prev) + } + print "\n};\n" +} |