diff options
| author | Moriyoshi Koizumi <moriyoshi@php.net> | 2010-12-19 16:36:37 +0000 |
|---|---|---|
| committer | Moriyoshi Koizumi <moriyoshi@php.net> | 2010-12-19 16:36:37 +0000 |
| commit | bbf3d43c1ee0ad53b03c3821cd630f0746d5e954 (patch) | |
| tree | fd11ea79a69ee445ffde8310a3760603bf3df821 /ext/mbstring | |
| parent | c28cac404d2d0590ba2811f41331c60d09adbf1e (diff) | |
| download | php-git-bbf3d43c1ee0ad53b03c3821cd630f0746d5e954.tar.gz | |
* Refactor zend_multibyte facility.
Now mbstring.script_encoding is superseded by zend.script_encoding.
Diffstat (limited to 'ext/mbstring')
23 files changed, 692 insertions, 712 deletions
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_base64.c b/ext/mbstring/libmbfl/filters/mbfilter_base64.c index 13341f9e9f..198f38c3d2 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_base64.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_base64.c @@ -41,7 +41,7 @@ const mbfl_encoding mbfl_encoding_base64 = { "BASE64", NULL, NULL, - MBFL_ENCTYPE_SBCS + MBFL_ENCTYPE_ENC_STRM | MBFL_ENCTYPE_GL_UNSAFE }; const struct mbfl_convert_vtbl vtbl_8bit_b64 = { diff --git a/ext/mbstring/libmbfl/filters/mbfilter_big5.c b/ext/mbstring/libmbfl/filters/mbfilter_big5.c index fe5effe044..aa14e3058d 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_big5.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_big5.c @@ -65,7 +65,7 @@ const mbfl_encoding mbfl_encoding_big5 = { "BIG5", (const char *(*)[])&mbfl_encoding_big5_aliases, mblen_table_big5, - MBFL_ENCTYPE_MBCS + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE }; const struct mbfl_identify_vtbl vtbl_identify_big5 = { diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c b/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c index 587bff88cf..148d825559 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c @@ -54,7 +54,7 @@ const mbfl_encoding mbfl_encoding_jis_ms = { "ISO-2022-JP", NULL, NULL, - MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE }; const mbfl_encoding mbfl_encoding_cp50220 = { @@ -63,7 +63,7 @@ const mbfl_encoding mbfl_encoding_cp50220 = { "ISO-2022-JP", (const char *(*)[])NULL, NULL, - MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE }; const mbfl_encoding mbfl_encoding_cp50220raw = { @@ -72,7 +72,7 @@ const mbfl_encoding mbfl_encoding_cp50220raw = { "ISO-2022-JP", (const char *(*)[])NULL, NULL, - MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE }; const mbfl_encoding mbfl_encoding_cp50221 = { @@ -81,7 +81,7 @@ const mbfl_encoding mbfl_encoding_cp50221 = { "ISO-2022-JP", NULL, NULL, - MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE }; const mbfl_encoding mbfl_encoding_cp50222 = { @@ -90,7 +90,7 @@ const mbfl_encoding mbfl_encoding_cp50222 = { "ISO-2022-JP", NULL, NULL, - MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE }; const struct mbfl_identify_vtbl vtbl_identify_jis_ms = { diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp932.c b/ext/mbstring/libmbfl/filters/mbfilter_cp932.c index 6e54d53f44..40ba849651 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp932.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp932.c @@ -66,7 +66,7 @@ const mbfl_encoding mbfl_encoding_cp932 = { "Shift_JIS", (const char *(*)[])&mbfl_encoding_cp932_aliases, mblen_table_sjis, - MBFL_ENCTYPE_MBCS + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE }; const struct mbfl_identify_vtbl vtbl_identify_cp932 = { diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp936.c b/ext/mbstring/libmbfl/filters/mbfilter_cp936.c index 561dc3003b..4cfaa8eb4e 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp936.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp936.c @@ -65,7 +65,7 @@ const mbfl_encoding mbfl_encoding_cp936 = { "CP936", (const char *(*)[])&mbfl_encoding_cp936_aliases, mblen_table_cp936, - MBFL_ENCTYPE_MBCS + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE }; const struct mbfl_identify_vtbl vtbl_identify_cp936 = { diff --git a/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c b/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c index 1fe0e6b732..56c364d867 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c @@ -70,7 +70,7 @@ const mbfl_encoding mbfl_encoding_html_ent = { "HTML-ENTITIES", (const char *(*)[])&mbfl_encoding_html_ent_aliases, NULL, - MBFL_ENCTYPE_HTML_ENT + MBFL_ENCTYPE_ENC_STRM | MBFL_ENCTYPE_GL_UNSAFE }; const struct mbfl_convert_vtbl vtbl_wchar_html = { diff --git a/ext/mbstring/libmbfl/filters/mbfilter_hz.c b/ext/mbstring/libmbfl/filters/mbfilter_hz.c index 7c7eaffc07..81cea2bb3a 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_hz.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_hz.c @@ -44,7 +44,7 @@ const mbfl_encoding mbfl_encoding_hz = { "HZ-GB-2312", NULL, NULL, - MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE }; const struct mbfl_identify_vtbl vtbl_identify_hz = { diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c index 1bf77172b6..a93ee4e4bb 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c @@ -48,7 +48,7 @@ const mbfl_encoding mbfl_encoding_2022jpms = { "ISO-2022-JP", (const char *(*)[])&mbfl_encoding_2022jpms_aliases, NULL, - MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE }; const struct mbfl_identify_vtbl vtbl_identify_2022jpms = { diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_kr.c b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_kr.c index 77c95c5ad2..01c01a4477 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_kr.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_kr.c @@ -43,7 +43,7 @@ const mbfl_encoding mbfl_encoding_2022kr = { "ISO-2022-KR", NULL, NULL, - MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE }; const struct mbfl_identify_vtbl vtbl_identify_2022kr = { diff --git a/ext/mbstring/libmbfl/filters/mbfilter_jis.c b/ext/mbstring/libmbfl/filters/mbfilter_jis.c index 6b1aef3643..7fa1fd35b9 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_jis.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_jis.c @@ -46,7 +46,7 @@ const mbfl_encoding mbfl_encoding_jis = { "ISO-2022-JP", NULL, NULL, - MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE }; const mbfl_encoding mbfl_encoding_2022jp = { @@ -55,7 +55,7 @@ const mbfl_encoding mbfl_encoding_2022jp = { "ISO-2022-JP", NULL, NULL, - MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE }; const struct mbfl_identify_vtbl vtbl_identify_jis = { diff --git a/ext/mbstring/libmbfl/filters/mbfilter_qprint.c b/ext/mbstring/libmbfl/filters/mbfilter_qprint.c index 188d088ed0..df9752bc3b 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_qprint.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_qprint.c @@ -43,7 +43,7 @@ const mbfl_encoding mbfl_encoding_qprint = { "Quoted-Printable", (const char *(*)[])&mbfl_encoding_qprint_aliases, NULL, - MBFL_ENCTYPE_SBCS + MBFL_ENCTYPE_ENC_STRM | MBFL_ENCTYPE_GL_UNSAFE }; const struct mbfl_convert_vtbl vtbl_8bit_qprint = { diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis.c b/ext/mbstring/libmbfl/filters/mbfilter_sjis.c index 83ef565927..b74fca21d5 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_sjis.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_sjis.c @@ -66,7 +66,7 @@ const mbfl_encoding mbfl_encoding_sjis = { "Shift_JIS", (const char *(*)[])&mbfl_encoding_sjis_aliases, mblen_table_sjis, - MBFL_ENCTYPE_MBCS + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE }; const struct mbfl_identify_vtbl vtbl_identify_sjis = { diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis_open.c b/ext/mbstring/libmbfl/filters/mbfilter_sjis_open.c index 38244a0ac9..f24210c22f 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_sjis_open.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_sjis_open.c @@ -66,7 +66,7 @@ const mbfl_encoding mbfl_encoding_sjis_open = { "Shift_JIS", (const char *(*)[])&mbfl_encoding_sjis_open_aliases, mblen_table_sjis, - MBFL_ENCTYPE_MBCS + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE }; const struct mbfl_identify_vtbl vtbl_identify_sjis_open = { diff --git a/ext/mbstring/libmbfl/filters/mbfilter_utf7.c b/ext/mbstring/libmbfl/filters/mbfilter_utf7.c index ad0205bee1..2bb1dfada1 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_utf7.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_utf7.c @@ -57,7 +57,7 @@ const mbfl_encoding mbfl_encoding_utf7 = { "UTF-7", (const char *(*)[])&mbfl_encoding_utf7_aliases, NULL, - MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE }; const struct mbfl_identify_vtbl vtbl_identify_utf7 = { diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter.c b/ext/mbstring/libmbfl/mbfl/mbfilter.c index b8b1db2683..85cf59656e 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfilter.c +++ b/ext/mbstring/libmbfl/mbfl/mbfilter.c @@ -128,6 +128,18 @@ mbfl_buffer_converter_new( enum mbfl_no_encoding to, int buf_initsz) { + const mbfl_encoding *_from = mbfl_no2encoding(from); + const mbfl_encoding *_to = mbfl_no2encoding(to); + + return mbfl_buffer_converter_new2(_from ? _from: &mbfl_encoding_pass, _to ? _to: &mbfl_encoding_pass, buf_initsz); +} + +mbfl_buffer_converter * +mbfl_buffer_converter_new2( + const mbfl_encoding *from, + const mbfl_encoding *to, + int buf_initsz) +{ mbfl_buffer_converter *convd; /* allocate */ @@ -137,14 +149,8 @@ mbfl_buffer_converter_new( } /* initialize */ - convd->from = mbfl_no2encoding(from); - convd->to = mbfl_no2encoding(to); - if (convd->from == NULL) { - convd->from = &mbfl_encoding_pass; - } - if (convd->to == NULL) { - convd->to = &mbfl_encoding_pass; - } + convd->from = from; + convd->to = to; /* create convert filter */ convd->filter1 = NULL; @@ -173,6 +179,7 @@ mbfl_buffer_converter_new( return convd; } + void mbfl_buffer_converter_delete(mbfl_buffer_converter *convd) { @@ -251,6 +258,12 @@ mbfl_buffer_converter_strncat(mbfl_buffer_converter *convd, const unsigned char int mbfl_buffer_converter_feed(mbfl_buffer_converter *convd, mbfl_string *string) { + return mbfl_buffer_converter_feed2(convd, string, NULL); +} + +int +mbfl_buffer_converter_feed2(mbfl_buffer_converter *convd, mbfl_string *string, int *loc) +{ int n; unsigned char *p; mbfl_convert_filter *filter; @@ -263,20 +276,27 @@ mbfl_buffer_converter_feed(mbfl_buffer_converter *convd, mbfl_string *string) /* feed data */ n = string->len; p = string->val; + filter = convd->filter1; if (filter != NULL) { filter_function = filter->filter_function; while (n > 0) { if ((*filter_function)(*p++, filter) < 0) { + if (loc) { + *loc = p - string->val; + } return -1; } n--; } } - + if (loc) { + *loc = p - string->val; + } return 0; } + int mbfl_buffer_converter_flush(mbfl_buffer_converter *convd) { @@ -400,6 +420,49 @@ mbfl_encoding_detector_new(enum mbfl_no_encoding *elist, int elistsz, int strict return identd; } +mbfl_encoding_detector * +mbfl_encoding_detector_new2(const mbfl_encoding **elist, int elistsz, int strict) +{ + mbfl_encoding_detector *identd; + + int i, num; + mbfl_identify_filter *filter; + + if (elist == NULL || elistsz <= 0) { + return NULL; + } + + /* allocate */ + identd = (mbfl_encoding_detector*)mbfl_malloc(sizeof(mbfl_encoding_detector)); + if (identd == NULL) { + return NULL; + } + identd->filter_list = (mbfl_identify_filter **)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter *)); + if (identd->filter_list == NULL) { + mbfl_free(identd); + return NULL; + } + + /* create filters */ + i = 0; + num = 0; + while (i < elistsz) { + filter = mbfl_identify_filter_new2(elist[i]); + if (filter != NULL) { + identd->filter_list[num] = filter; + num++; + } + i++; + } + identd->filter_list_size = num; + + /* set strict flag */ + identd->strict = strict; + + return identd; +} + + void mbfl_encoding_detector_delete(mbfl_encoding_detector *identd) { @@ -454,33 +517,32 @@ mbfl_encoding_detector_feed(mbfl_encoding_detector *identd, mbfl_string *string) return res; } -enum mbfl_no_encoding mbfl_encoding_detector_judge(mbfl_encoding_detector *identd) +const mbfl_encoding *mbfl_encoding_detector_judge2(mbfl_encoding_detector *identd) { mbfl_identify_filter *filter; - enum mbfl_no_encoding encoding; + const mbfl_encoding *encoding = NULL; int n; /* judge */ - encoding = mbfl_no_encoding_invalid; if (identd != NULL) { n = identd->filter_list_size - 1; while (n >= 0) { filter = identd->filter_list[n]; if (!filter->flag) { if (!identd->strict || !filter->status) { - encoding = filter->encoding->no_encoding; + encoding = filter->encoding; } } n--; } /* fallback judge */ - if (encoding == mbfl_no_encoding_invalid) { + if (!encoding) { n = identd->filter_list_size - 1; while (n >= 0) { filter = identd->filter_list[n]; if (!filter->flag) { - encoding = filter->encoding->no_encoding; + encoding = filter->encoding; } n--; } @@ -490,6 +552,12 @@ enum mbfl_no_encoding mbfl_encoding_detector_judge(mbfl_encoding_detector *ident return encoding; } +enum mbfl_no_encoding mbfl_encoding_detector_judge(mbfl_encoding_detector *identd) +{ + const mbfl_encoding *encoding = mbfl_encoding_detector_judge2(identd); + return !encoding ? mbfl_no_encoding_invalid: encoding->no_encoding; +} + /* * encoding converter @@ -646,36 +714,88 @@ mbfl_identify_encoding(mbfl_string *string, enum mbfl_no_encoding *elist, int el return encoding; } -const char* -mbfl_identify_encoding_name(mbfl_string *string, enum mbfl_no_encoding *elist, int elistsz, int strict) +const mbfl_encoding * +mbfl_identify_encoding2(mbfl_string *string, const mbfl_encoding **elist, int elistsz, int strict) { + int i, n, num, bad; + unsigned char *p; + mbfl_identify_filter *flist, *filter; const mbfl_encoding *encoding; - encoding = mbfl_identify_encoding(string, elist, elistsz, strict); - if (encoding != NULL && - encoding->no_encoding > mbfl_no_encoding_charset_min && - encoding->no_encoding < mbfl_no_encoding_charset_max) { - return encoding->name; - } else { + /* flist is an array of mbfl_identify_filter instances */ + flist = (mbfl_identify_filter *)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter)); + if (flist == NULL) { return NULL; } -} -enum mbfl_no_encoding -mbfl_identify_encoding_no(mbfl_string *string, enum mbfl_no_encoding *elist, int elistsz, int strict) -{ - const mbfl_encoding *encoding; + num = 0; + if (elist != NULL) { + for (i = 0; i < elistsz; i++) { + if (!mbfl_identify_filter_init2(&flist[num], elist[i])) { + num++; + } + } + } - encoding = mbfl_identify_encoding(string, elist, elistsz, strict); - if (encoding != NULL && - encoding->no_encoding > mbfl_no_encoding_charset_min && - encoding->no_encoding < mbfl_no_encoding_charset_max) { - return encoding->no_encoding; - } else { - return mbfl_no_encoding_invalid; + /* feed data */ + n = string->len; + p = string->val; + + if (p != NULL) { + bad = 0; + while (n > 0) { + for (i = 0; i < num; i++) { + filter = &flist[i]; + if (!filter->flag) { + (*filter->filter_function)(*p, filter); + if (filter->flag) { + bad++; + } + } + } + if ((num - 1) <= bad && !strict) { + break; + } + p++; + n--; + } } -} + /* judge */ + encoding = NULL; + + for (i = 0; i < num; i++) { + filter = &flist[i]; + if (!filter->flag) { + if (strict && filter->status) { + continue; + } + encoding = filter->encoding; + break; + } + } + + /* fall-back judge */ + if (!encoding) { + for (i = 0; i < num; i++) { + filter = &flist[i]; + if (!filter->flag && (!strict || !filter->status)) { + encoding = filter->encoding; + break; + } + } + } + + /* cleanup */ + /* dtors should be called in reverse order */ + i = num; while (--i >= 0) { + mbfl_identify_filter_cleanup(&flist[i]); + } + + mbfl_free((void *)flist); + + return encoding; +} /* * strlen diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter.h b/ext/mbstring/libmbfl/mbfl/mbfilter.h index 4565fc6985..8e073c94d2 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfilter.h +++ b/ext/mbstring/libmbfl/mbfl/mbfilter.h @@ -127,12 +127,14 @@ struct _mbfl_buffer_converter { }; MBFLAPI extern mbfl_buffer_converter * mbfl_buffer_converter_new(enum mbfl_no_encoding from, enum mbfl_no_encoding to, int buf_initsz); +MBFLAPI extern mbfl_buffer_converter * mbfl_buffer_converter_new2(const mbfl_encoding *from, const mbfl_encoding *to, int buf_initsz); MBFLAPI extern void mbfl_buffer_converter_delete(mbfl_buffer_converter *convd); MBFLAPI extern void mbfl_buffer_converter_reset(mbfl_buffer_converter *convd); MBFLAPI extern int mbfl_buffer_converter_illegal_mode(mbfl_buffer_converter *convd, int mode); MBFLAPI extern int mbfl_buffer_converter_illegal_substchar(mbfl_buffer_converter *convd, int substchar); MBFLAPI extern int mbfl_buffer_converter_strncat(mbfl_buffer_converter *convd, const unsigned char *p, int n); MBFLAPI extern int mbfl_buffer_converter_feed(mbfl_buffer_converter *convd, mbfl_string *string); +MBFLAPI extern int mbfl_buffer_converter_feed2(mbfl_buffer_converter *convd, mbfl_string *string, int *loc); MBFLAPI extern int mbfl_buffer_converter_flush(mbfl_buffer_converter *convd); MBFLAPI extern mbfl_string * mbfl_buffer_converter_getbuffer(mbfl_buffer_converter *convd, mbfl_string *result); MBFLAPI extern mbfl_string * mbfl_buffer_converter_result(mbfl_buffer_converter *convd, mbfl_string *result); @@ -151,9 +153,11 @@ struct _mbfl_encoding_detector { }; MBFLAPI extern mbfl_encoding_detector * mbfl_encoding_detector_new(enum mbfl_no_encoding *elist, int elistsz, int strict); +MBFLAPI extern mbfl_encoding_detector * mbfl_encoding_detector_new2(const mbfl_encoding **elist, int elistsz, int strict); MBFLAPI extern void mbfl_encoding_detector_delete(mbfl_encoding_detector *identd); MBFLAPI extern int mbfl_encoding_detector_feed(mbfl_encoding_detector *identd, mbfl_string *string); MBFLAPI extern enum mbfl_no_encoding mbfl_encoding_detector_judge(mbfl_encoding_detector *identd); +MBFLAPI extern const mbfl_encoding *mbfl_encoding_detector_judge2(mbfl_encoding_detector *identd); /* @@ -169,12 +173,8 @@ mbfl_convert_encoding(mbfl_string *string, mbfl_string *result, enum mbfl_no_enc MBFLAPI extern const mbfl_encoding * mbfl_identify_encoding(mbfl_string *string, enum mbfl_no_encoding *elist, int elistsz, int strict); -MBFLAPI extern const char * -mbfl_identify_encoding_name(mbfl_string *string, enum mbfl_no_encoding *elist, int elistsz, int strict); - -MBFLAPI extern enum mbfl_no_encoding -mbfl_identify_encoding_no(mbfl_string *string, enum mbfl_no_encoding *elist, int elistsz, int strict); - +MBFLAPI extern const mbfl_encoding * +mbfl_identify_encoding2(mbfl_string *string, const mbfl_encoding **elist, int elistsz, int strict); /* * strlen */ diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_consts.h b/ext/mbstring/libmbfl/mbfl/mbfl_consts.h index b6c0bb2d87..05f11cdf22 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_consts.h +++ b/ext/mbstring/libmbfl/mbfl/mbfl_consts.h @@ -42,7 +42,8 @@ #define MBFL_ENCTYPE_MWC4BE 0x00000400 #define MBFL_ENCTYPE_MWC4LE 0x00000800 #define MBFL_ENCTYPE_SHFTCODE 0x00001000 -#define MBFL_ENCTYPE_HTML_ENT 0x00002000 +#define MBFL_ENCTYPE_ENC_STRM 0x00002000 +#define MBFL_ENCTYPE_GL_UNSAFE 0x00004000 /* wchar plane, special charactor */ #define MBFL_WCSPLANE_MASK 0xffff diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_ident.c b/ext/mbstring/libmbfl/mbfl/mbfl_ident.c index 9a89807053..0d61169af3 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_ident.c +++ b/ext/mbstring/libmbfl/mbfl/mbfl_ident.c @@ -191,15 +191,37 @@ mbfl_identify_filter *mbfl_identify_filter_new(enum mbfl_no_encoding encoding) return filter; } +mbfl_identify_filter *mbfl_identify_filter_new2(const mbfl_encoding *encoding) +{ + mbfl_identify_filter *filter; + + /* allocate */ + filter = (mbfl_identify_filter *)mbfl_malloc(sizeof(mbfl_identify_filter)); + if (filter == NULL) { + return NULL; + } + + if (mbfl_identify_filter_init2(filter, encoding)) { + mbfl_free(filter); + return NULL; + } + + return filter; +} + + int mbfl_identify_filter_init(mbfl_identify_filter *filter, enum mbfl_no_encoding encoding) { + const mbfl_encoding *enc = mbfl_no2encoding(encoding); + return mbfl_identify_filter_init2(filter, enc ? enc: &mbfl_encoding_pass); +} + +int mbfl_identify_filter_init2(mbfl_identify_filter *filter, const mbfl_encoding *encoding) +{ const struct mbfl_identify_vtbl *vtbl; /* encoding structure */ - filter->encoding = mbfl_no2encoding(encoding); - if (filter->encoding == NULL) { - filter->encoding = &mbfl_encoding_pass; - } + filter->encoding = encoding; filter->status = 0; filter->flag = 0; diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_ident.h b/ext/mbstring/libmbfl/mbfl/mbfl_ident.h index b0721fc413..12d81cde8c 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_ident.h +++ b/ext/mbstring/libmbfl/mbfl/mbfl_ident.h @@ -58,8 +58,10 @@ struct mbfl_identify_vtbl { MBFLAPI extern const struct mbfl_identify_vtbl * mbfl_identify_filter_get_vtbl(enum mbfl_no_encoding encoding); MBFLAPI extern mbfl_identify_filter * mbfl_identify_filter_new(enum mbfl_no_encoding encoding); +MBFLAPI extern mbfl_identify_filter * mbfl_identify_filter_new2(const mbfl_encoding *encoding); MBFLAPI extern void mbfl_identify_filter_delete(mbfl_identify_filter *filter); MBFLAPI extern int mbfl_identify_filter_init(mbfl_identify_filter *filter, enum mbfl_no_encoding encoding); +MBFLAPI extern int mbfl_identify_filter_init2(mbfl_identify_filter *filter, const mbfl_encoding *encoding); MBFLAPI void mbfl_identify_filter_cleanup(mbfl_identify_filter *filter); MBFLAPI extern void mbfl_filt_ident_common_ctor(mbfl_identify_filter *filter); diff --git a/ext/mbstring/mb_gpc.c b/ext/mbstring/mb_gpc.c index acfde4d5ae..396eb4a60a 100644 --- a/ext/mbstring/mb_gpc.c +++ b/ext/mbstring/mb_gpc.c @@ -27,6 +27,7 @@ #include "php.h" #include "php_ini.h" #include "php_variables.h" +#include "libmbfl/mbfl/mbfilter_pass.h" #include "mbstring.h" #include "ext/standard/php_string.h" #include "ext/standard/php_mail.h" @@ -56,7 +57,7 @@ MBSTRING_API SAPI_TREAT_DATA_FUNC(mbstr_treat_data) const char *c_var; zval *array_ptr; int free_buffer=0; - enum mbfl_no_encoding detected; + const mbfl_encoding *detected; php_mb_encoding_handler_info_t info; if (arg != PARSE_STRING) { @@ -136,16 +137,16 @@ MBSTRING_API SAPI_TREAT_DATA_FUNC(mbstr_treat_data) switch(arg) { case PARSE_POST: - MBSTRG(http_input_identify_post) = mbfl_no_encoding_invalid; + MBSTRG(http_input_identify_post) = NULL; break; case PARSE_GET: - MBSTRG(http_input_identify_get) = mbfl_no_encoding_invalid; + MBSTRG(http_input_identify_get) = NULL; break; case PARSE_COOKIE: - MBSTRG(http_input_identify_cookie) = mbfl_no_encoding_invalid; + MBSTRG(http_input_identify_cookie) = NULL; break; case PARSE_STRING: - MBSTRG(http_input_identify_string) = mbfl_no_encoding_invalid; + MBSTRG(http_input_identify_string) = NULL; break; } @@ -163,7 +164,7 @@ MBSTRING_API SAPI_TREAT_DATA_FUNC(mbstr_treat_data) detected = _php_mb_encoding_handler_ex(&info, array_ptr, res TSRMLS_CC); MBSTRG(http_input_identify) = detected; - if (detected != mbfl_no_encoding_invalid) { + if (detected) { switch(arg){ case PARSE_POST: MBSTRG(http_input_identify_post) = detected; @@ -191,7 +192,7 @@ MBSTRING_API SAPI_TREAT_DATA_FUNC(mbstr_treat_data) /* }}} */ /* {{{ mbfl_no_encoding _php_mb_encoding_handler_ex() */ -enum mbfl_no_encoding _php_mb_encoding_handler_ex(const php_mb_encoding_handler_info_t *info, zval *arg, char *res TSRMLS_DC) +const mbfl_encoding *_php_mb_encoding_handler_ex(const php_mb_encoding_handler_info_t *info, zval *arg, char *res TSRMLS_DC) { char *var, *val; const char *s1, *s2; @@ -200,13 +201,13 @@ enum mbfl_no_encoding _php_mb_encoding_handler_ex(const php_mb_encoding_handler_ int n, num, *len_list = NULL; unsigned int val_len, new_val_len; mbfl_string string, resvar, resval; - enum mbfl_no_encoding from_encoding = mbfl_no_encoding_invalid; + const mbfl_encoding *from_encoding = NULL; mbfl_encoding_detector *identd = NULL; mbfl_buffer_converter *convd = NULL; - mbfl_string_init_set(&string, info->to_language, info->to_encoding); - mbfl_string_init_set(&resvar, info->to_language, info->to_encoding); - mbfl_string_init_set(&resval, info->to_language, info->to_encoding); + mbfl_string_init_set(&string, info->to_language, info->to_encoding->no_encoding); + mbfl_string_init_set(&resvar, info->to_language, info->to_encoding->no_encoding); + mbfl_string_init_set(&resval, info->to_language, info->to_encoding->no_encoding); if (!res || *res == '\0') { goto out; @@ -257,12 +258,12 @@ enum mbfl_no_encoding _php_mb_encoding_handler_ex(const php_mb_encoding_handler_ /* initialize converter */ if (info->num_from_encodings <= 0) { - from_encoding = mbfl_no_encoding_pass; + from_encoding = &mbfl_encoding_pass; } else if (info->num_from_encodings == 1) { from_encoding = info->from_encodings[0]; } else { /* auto detect */ - from_encoding = mbfl_no_encoding_invalid; + from_encoding = NULL; identd = mbfl_encoding_detector_new((enum mbfl_no_encoding *)info->from_encodings, info->num_from_encodings, MBSTRG(strict_detection)); if (identd) { n = 0; @@ -274,10 +275,10 @@ enum mbfl_no_encoding _php_mb_encoding_handler_ex(const php_mb_encoding_handler_ } n++; } - from_encoding = mbfl_encoding_detector_judge(identd); + from_encoding = mbfl_encoding_detector_judge2(identd); mbfl_encoding_detector_delete(identd); } - if (from_encoding == mbfl_no_encoding_invalid) { + if (!from_encoding) { if (info->report_errors) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect encoding"); } @@ -286,8 +287,8 @@ enum mbfl_no_encoding _php_mb_encoding_handler_ex(const php_mb_encoding_handler_ } convd = NULL; - if (from_encoding != mbfl_no_encoding_pass) { - convd = mbfl_buffer_converter_new(from_encoding, info->to_encoding, 0); + if (from_encoding != &mbfl_encoding_pass) { + convd = mbfl_buffer_converter_new2(from_encoding, info->to_encoding, 0); if (convd != NULL) { mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode)); mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar)); @@ -300,7 +301,7 @@ enum mbfl_no_encoding _php_mb_encoding_handler_ex(const php_mb_encoding_handler_ } /* convert encoding */ - string.no_encoding = from_encoding; + string.no_encoding = from_encoding->no_encoding; n = 0; while (n < num) { @@ -312,10 +313,10 @@ enum mbfl_no_encoding _php_mb_encoding_handler_ex(const php_mb_encoding_handler_ var = val_list[n]; } n++; - string.val = val_list[n]; + string.val = (unsigned char *)val_list[n]; string.len = len_list[n]; if (convd != NULL && mbfl_buffer_converter_feed_result(convd, &string, &resval) != NULL) { - val = resval.val; + val = (char *)resval.val; val_len = resval.len; } else { val = val_list[n]; @@ -355,10 +356,10 @@ out: /* {{{ SAPI_POST_HANDLER_FUNC(php_mb_post_handler) */ SAPI_POST_HANDLER_FUNC(php_mb_post_handler) { - enum mbfl_no_encoding detected; + const mbfl_encoding *detected; php_mb_encoding_handler_info_t info; - MBSTRG(http_input_identify_post) = mbfl_no_encoding_invalid; + MBSTRG(http_input_identify_post) = NULL; info.data_type = PARSE_POST; info.separator = "&"; @@ -372,7 +373,7 @@ SAPI_POST_HANDLER_FUNC(php_mb_post_handler) detected = _php_mb_encoding_handler_ex(&info, arg, SG(request_info).post_data TSRMLS_CC); MBSTRG(http_input_identify) = detected; - if (detected != mbfl_no_encoding_invalid) { + if (detected) { MBSTRG(http_input_identify_post) = detected; } } diff --git a/ext/mbstring/mb_gpc.h b/ext/mbstring/mb_gpc.h index 83090c3bc9..ab6fcc86e0 100644 --- a/ext/mbstring/mb_gpc.h +++ b/ext/mbstring/mb_gpc.h @@ -34,10 +34,10 @@ typedef struct _php_mb_encoding_handler_info_t { const char *separator; unsigned int report_errors: 1; enum mbfl_no_language to_language; - enum mbfl_no_encoding to_encoding; + const mbfl_encoding *to_encoding; enum mbfl_no_language from_language; - int num_from_encodings; - const enum mbfl_no_encoding *from_encodings; + const mbfl_encoding **from_encodings; + size_t num_from_encodings; } php_mb_encoding_handler_info_t; /* }}}*/ @@ -47,7 +47,7 @@ SAPI_POST_HANDLER_FUNC(php_mb_post_handler); MBSTRING_API SAPI_TREAT_DATA_FUNC(mbstr_treat_data); int _php_mb_enable_encoding_translation(int flag); -enum mbfl_no_encoding _php_mb_encoding_handler_ex(const php_mb_encoding_handler_info_t *info, zval *arg, char *res TSRMLS_DC); +const mbfl_encoding *_php_mb_encoding_handler_ex(const php_mb_encoding_handler_info_t *info, zval *arg, char *res TSRMLS_DC); /* }}} */ #endif /* HAVE_MBSTRING */ diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index d4119dda97..7013ebc0da 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -62,6 +62,7 @@ #include "ext/standard/info.h" #include "libmbfl/mbfl/mbfl_allocators.h" +#include "libmbfl/mbfl/mbfilter_pass.h" #include "php_variables.h" #include "php_globals.h" @@ -96,18 +97,15 @@ ZEND_DECLARE_MODULE_GLOBALS(mbstring) static PHP_GINIT_FUNCTION(mbstring); static PHP_GSHUTDOWN_FUNCTION(mbstring); -static const char* php_mb_internal_encoding_name(TSRMLS_D); -static size_t php_mb_oddlen(const unsigned char *string, size_t length, const char *encoding TSRMLS_DC); -static int php_mb_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const char *encoding_to, const char *encoding_from TSRMLS_DC); -static char* php_mb_encoding_detector(const unsigned char *arg_string, size_t arg_length, char *arg_list TSRMLS_DC); -static int php_mb_set_zend_encoding(TSRMLS_D); +static void php_mb_populate_current_detect_order_list(TSRMLS_D); + /* }}} */ /* {{{ php_mb_default_identify_list */ typedef struct _php_mb_nls_ident_list { enum mbfl_no_language lang; - const enum mbfl_no_encoding* list; - int list_size; + const enum mbfl_no_encoding *list; + size_t list_size; } php_mb_nls_ident_list; static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = { @@ -650,12 +648,12 @@ static sapi_post_entry mbstr_post_entries[] = { * of parsed encodings. */ static int -php_mb_parse_encoding_list(const char *value, int value_length, enum mbfl_no_encoding **return_list, int *return_size, int persistent TSRMLS_DC) +php_mb_parse_encoding_list(const char *value, size_t value_length, const mbfl_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC) { - int n, l, size, bauto, ret = 1; + int size, bauto, ret = SUCCESS; + size_t n; char *p, *p1, *p2, *endp, *tmpstr; - enum mbfl_no_encoding no_encoding; - enum mbfl_no_encoding *src, *entry, *list; + const mbfl_encoding **entry, **list; list = NULL; if (value == NULL || value_length <= 0) { @@ -665,14 +663,8 @@ php_mb_parse_encoding_list(const char *value, int value_length, enum mbfl_no_enc if (return_size) { *return_size = 0; } - return 0; + return FAILURE; } else { - enum mbfl_no_encoding *identify_list; - int identify_list_size; - - identify_list = MBSTRG(default_detect_order_list); - identify_list_size = MBSTRG(default_detect_order_list_size); - /* copy the value string for work */ if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) { tmpstr = (char *)estrndup(value+1, value_length-2); @@ -681,7 +673,7 @@ php_mb_parse_encoding_list(const char *value, int value_length, enum mbfl_no_enc else tmpstr = (char *)estrndup(value, value_length); if (tmpstr == NULL) { - return 0; + return FAILURE; } /* count the number of listed encoding names */ endp = tmpstr + value_length; @@ -691,9 +683,9 @@ php_mb_parse_encoding_list(const char *value, int value_length, enum mbfl_no_enc p1 = p2 + 1; n++; } - size = n + identify_list_size; + size = n + MBSTRG(default_detect_order_list_size); /* make list */ - list = (enum mbfl_no_encoding *)pecalloc(size, sizeof(int), persistent); + list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent); if (list != NULL) { entry = list; n = 0; @@ -717,19 +709,19 @@ php_mb_parse_encoding_list(const char *value, int value_length, enum mbfl_no_enc /* convert to the encoding number and check encoding */ if (strcasecmp(p1, "auto") == 0) { if (!bauto) { + const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list); + const size_t identify_list_size = MBSTRG(default_detect_order_list_size); + size_t i; bauto = 1; - l = identify_list_size; - src = identify_list; - while (l > 0) { - *entry++ = *src++; - l--; + for (i = 0; i < identify_list_size; i++) { + *entry++ = mbfl_no2encoding(*src++); n++; } } } else { - no_encoding = mbfl_name2no_encoding(p1); - if (no_encoding != mbfl_no_encoding_invalid) { - *entry++ = no_encoding; + const mbfl_encoding *encoding = mbfl_name2encoding(p1); + if (encoding) { + *entry++ = encoding; n++; } else { ret = 0; @@ -769,40 +761,26 @@ php_mb_parse_encoding_list(const char *value, int value_length, enum mbfl_no_enc } /* }}} */ -/* {{{ MBSTRING_API php_mb_check_encoding_list */ -MBSTRING_API int php_mb_check_encoding_list(const char *encoding_list TSRMLS_DC) -{ - return php_mb_parse_encoding_list(encoding_list, strlen(encoding_list), NULL, NULL, 0 TSRMLS_CC); -} -/* }}} */ - /* {{{ static int php_mb_parse_encoding_array() * Return 0 if input contains any illegal encoding, otherwise 1. * Even if any illegal encoding is detected the result may contain a list * of parsed encodings. */ static int -php_mb_parse_encoding_array(zval *array, enum mbfl_no_encoding **return_list, int *return_size, int persistent TSRMLS_DC) +php_mb_parse_encoding_array(zval *array, const mbfl_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC) { zval **hash_entry; HashTable *target_hash; - int i, n, l, size, bauto,ret = 1; - enum mbfl_no_encoding no_encoding; - enum mbfl_no_encoding *src, *list, *entry; + int i, n, size, bauto, ret = SUCCESS; + const mbfl_encoding **list, **entry; list = NULL; if (Z_TYPE_P(array) == IS_ARRAY) { - enum mbfl_no_encoding *identify_list; - int identify_list_size; - - identify_list = MBSTRG(default_detect_order_list); - identify_list_size = MBSTRG(default_detect_order_list_size); - target_hash = Z_ARRVAL_P(array); zend_hash_internal_pointer_reset(target_hash); i = zend_hash_num_elements(target_hash); - size = i + identify_list_size; - list = (enum mbfl_no_encoding *)pecalloc(size, sizeof(int), persistent); + size = i + MBSTRG(default_detect_order_list_size); + list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent); if (list != NULL) { entry = list; bauto = 0; @@ -814,22 +792,23 @@ php_mb_parse_encoding_array(zval *array, enum mbfl_no_encoding **return_list, in convert_to_string_ex(hash_entry); if (strcasecmp(Z_STRVAL_PP(hash_entry), "auto") == 0) { if (!bauto) { + const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list); + const size_t identify_list_size = MBSTRG(default_detect_order_list_size); + size_t j; + bauto = 1; - l = identify_list_size; - src = identify_list; - while (l > 0) { - *entry++ = *src++; - l--; + for (j = 0; j < identify_list_size; j++) { + *entry++ = mbfl_no2encoding(*src++); n++; } } } else { - no_encoding = mbfl_name2no_encoding(Z_STRVAL_PP(hash_entry)); - if (no_encoding != mbfl_no_encoding_invalid) { - *entry++ = no_encoding; + const mbfl_encoding *encoding = mbfl_name2encoding(Z_STRVAL_PP(hash_entry)); + if (encoding) { + *entry++ = encoding; n++; } else { - ret = 0; + ret = FAILURE; } } zend_hash_move_forward(target_hash); @@ -846,7 +825,7 @@ php_mb_parse_encoding_array(zval *array, enum mbfl_no_encoding **return_list, in if (return_list) { *return_list = NULL; } - ret = 0; + ret = FAILURE; } if (return_size) { *return_size = n; @@ -858,7 +837,7 @@ php_mb_parse_encoding_array(zval *array, enum mbfl_no_encoding **return_list, in if (return_size) { *return_size = 0; } - ret = 0; + ret = FAILURE; } } @@ -866,6 +845,118 @@ php_mb_parse_encoding_array(zval *array, enum mbfl_no_encoding **return_list, in } /* }}} */ +/* {{{ zend_multibyte interface */ +static const zend_encoding* php_mb_zend_encoding_fetcher(const char *encoding_name TSRMLS_DC) +{ + return (const zend_encoding*)mbfl_name2encoding(encoding_name); +} + +static const char *php_mb_zend_encoding_name_getter(const zend_encoding *encoding) +{ + return ((const mbfl_encoding *)encoding)->name; +} + +static int php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding *_encoding) +{ + const mbfl_encoding *encoding = (const mbfl_encoding*)_encoding; + if (encoding->flag & MBFL_ENCTYPE_SBCS) { + return 1; + } + if ((encoding->flag & (MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE)) == MBFL_ENCTYPE_MBCS) { + return 1; + } + return 0; +} + +static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *arg_string, size_t arg_length, const zend_encoding **list, size_t list_size TSRMLS_DC) +{ + mbfl_string string; + + if (!list) { + list = (const zend_encoding **)MBSTRG(current_detect_order_list); + list_size = MBSTRG(current_detect_order_list_size); + } + + mbfl_string_init(&string); + string.no_language = MBSTRG(language); + string.val = (unsigned char *)arg_string; + string.len = arg_length; + return (const zend_encoding *) mbfl_identify_encoding2(&string, (const mbfl_encoding **)list, list_size, 0); +} + +static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from TSRMLS_DC) +{ + mbfl_string string, result; + mbfl_buffer_converter *convd; + int status, loc; + + /* new encoding */ + /* initialize string */ + mbfl_string_init(&string); + mbfl_string_init(&result); + string.no_encoding = ((const mbfl_encoding*)encoding_from)->no_encoding; + string.no_language = MBSTRG(language); + string.val = (unsigned char*)from; + string.len = from_length; + + /* initialize converter */ + convd = mbfl_buffer_converter_new2((const mbfl_encoding *)encoding_from, (const mbfl_encoding *)encoding_to, string.len); + if (convd == NULL) { + return -1; + } + mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode)); + mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar)); + + /* do it */ + status = mbfl_buffer_converter_feed2(convd, &string, &loc); + if (status) { + mbfl_buffer_converter_delete(convd); + return (size_t)-1; + } + + mbfl_buffer_converter_flush(convd); + if (!mbfl_buffer_converter_result(convd, &result)) { + mbfl_buffer_converter_delete(convd); + return (size_t)-1; + } + + *to = result.val; + *to_length = result.len; + + mbfl_buffer_converter_delete(convd); + + return loc; +} + +static int php_mb_zend_encoding_list_parser(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC) +{ + return php_mb_parse_encoding_list(encoding_list, encoding_list_len, (const mbfl_encoding ***)return_list, return_size, persistent TSRMLS_CC); +} + +static const zend_encoding *php_mb_zend_internal_encoding_getter(TSRMLS_D) +{ + return (const zend_encoding *)MBSTRG(internal_encoding); +} + +static int php_mb_zend_internal_encoding_setter(const zend_encoding *encoding TSRMLS_DC) +{ + MBSTRG(internal_encoding) = (const mbfl_encoding *)encoding; + return SUCCESS; +} + +static zend_multibyte_functions php_mb_zend_multibyte_functions = { + "mbstring", + php_mb_zend_encoding_fetcher, + php_mb_zend_encoding_name_getter, + php_mb_zend_encoding_lexer_compatibility_checker, + php_mb_zend_encoding_detector, + php_mb_zend_encoding_converter, + php_mb_zend_encoding_list_parser, + php_mb_zend_internal_encoding_getter, + php_mb_zend_internal_encoding_setter +}; +/* }}} */ + static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC); static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len); static void _php_mb_free_regex(void *opaque); @@ -940,7 +1031,7 @@ static void _php_mb_free_regex(void *opaque) #endif /* {{{ php_mb_nls_get_default_detect_order_list */ -static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, int* plist_size) +static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, size_t *plist_size) { size_t i; @@ -1048,23 +1139,27 @@ static PHP_INI_MH(OnUpdate_mbstring_language) /* {{{ static PHP_INI_MH(OnUpdate_mbstring_detect_order) */ static PHP_INI_MH(OnUpdate_mbstring_detect_order) { - enum mbfl_no_encoding *list; - int size; + const mbfl_encoding **list; + size_t size; - if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) { - if (MBSTRG(detect_order_list)) { - free(MBSTRG(detect_order_list)); - } - MBSTRG(detect_order_list) = list; - MBSTRG(detect_order_list_size) = size; - } else { + if (!new_value) { if (MBSTRG(detect_order_list)) { - free(MBSTRG(detect_order_list)); - MBSTRG(detect_order_list) = NULL; + pefree(MBSTRG(detect_order_list), 1); } + MBSTRG(detect_order_list) = NULL; + MBSTRG(detect_order_list_size) = 0; + return SUCCESS; + } + + if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) { return FAILURE; } + if (MBSTRG(detect_order_list)) { + pefree(MBSTRG(detect_order_list), 1); + } + MBSTRG(detect_order_list) = list; + MBSTRG(detect_order_list_size) = size; return SUCCESS; } /* }}} */ @@ -1072,24 +1167,28 @@ static PHP_INI_MH(OnUpdate_mbstring_detect_order) /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */ static PHP_INI_MH(OnUpdate_mbstring_http_input) { - enum mbfl_no_encoding *list; - int size; + const mbfl_encoding **list; + size_t size; - if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) { - if (MBSTRG(http_input_list)) { - free(MBSTRG(http_input_list)); - } - MBSTRG(http_input_list) = list; - MBSTRG(http_input_list_size) = size; - } else { + if (!new_value) { if (MBSTRG(http_input_list)) { - free(MBSTRG(http_input_list)); - MBSTRG(http_input_list) = NULL; + pefree(MBSTRG(http_input_list), 1); } + MBSTRG(http_input_list) = NULL; MBSTRG(http_input_list_size) = 0; + return SUCCESS; + } + + if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) { return FAILURE; } + if (MBSTRG(http_input_list)) { + pefree(MBSTRG(http_input_list), 1); + } + MBSTRG(http_input_list) = list; + MBSTRG(http_input_list_size) = size; + return SUCCESS; } /* }}} */ @@ -1097,20 +1196,23 @@ static PHP_INI_MH(OnUpdate_mbstring_http_input) /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */ static PHP_INI_MH(OnUpdate_mbstring_http_output) { - enum mbfl_no_encoding no_encoding; + const mbfl_encoding *encoding; - no_encoding = mbfl_name2no_encoding(new_value); - if (no_encoding != mbfl_no_encoding_invalid) { - MBSTRG(http_output_encoding) = no_encoding; - MBSTRG(current_http_output_encoding) = no_encoding; - } else { - MBSTRG(http_output_encoding) = mbfl_no_encoding_pass; - MBSTRG(current_http_output_encoding) = mbfl_no_encoding_pass; - if (new_value != NULL && new_value_length > 0) { - return FAILURE; - } + if (new_value == NULL || new_value_length == 0) { + MBSTRG(http_output_encoding) = &mbfl_encoding_pass; + MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass; + return SUCCESS; + } + + encoding = mbfl_name2encoding(new_value); + if (!encoding) { + MBSTRG(http_output_encoding) = &mbfl_encoding_pass; + MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass; + return FAILURE; } + MBSTRG(http_output_encoding) = encoding; + MBSTRG(current_http_output_encoding) = encoding; return SUCCESS; } /* }}} */ @@ -1118,46 +1220,44 @@ static PHP_INI_MH(OnUpdate_mbstring_http_output) /* {{{ static _php_mb_ini_mbstring_internal_encoding_set */ int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint new_value_length TSRMLS_DC) { - enum mbfl_no_encoding no_encoding; - - if (!new_value - || !*new_value - || (no_encoding = mbfl_name2no_encoding(new_value)) == mbfl_no_encoding_invalid) { + const mbfl_encoding *encoding; + + if (!new_value || new_value_length == 0 || !(encoding = mbfl_name2encoding(new_value))) { switch (MBSTRG(language)) { case mbfl_no_language_uni: - no_encoding = mbfl_no_encoding_utf8; + encoding = mbfl_no2encoding(mbfl_no_encoding_utf8); break; case mbfl_no_language_japanese: - no_encoding = mbfl_no_encoding_euc_jp; + encoding = mbfl_no2encoding(mbfl_no_encoding_euc_jp); break; case mbfl_no_language_korean: - no_encoding = mbfl_no_encoding_euc_kr; + encoding = mbfl_no2encoding(mbfl_no_encoding_euc_kr); break; case mbfl_no_language_simplified_chinese: - no_encoding = mbfl_no_encoding_euc_cn; + encoding = mbfl_no2encoding(mbfl_no_encoding_euc_cn); break; case mbfl_no_language_traditional_chinese: - no_encoding = mbfl_no_encoding_euc_tw; + encoding = mbfl_no2encoding(mbfl_no_encoding_euc_tw); break; case mbfl_no_language_russian: - no_encoding = mbfl_no_encoding_koi8r; + encoding = mbfl_no2encoding(mbfl_no_encoding_koi8r); break; case mbfl_no_language_german: - no_encoding = mbfl_no_encoding_8859_15; + encoding = mbfl_no2encoding(mbfl_no_encoding_8859_15); break; case mbfl_no_language_armenian: - no_encoding = mbfl_no_encoding_armscii8; + encoding = mbfl_no2encoding(mbfl_no_encoding_armscii8); break; case mbfl_no_language_turkish: - no_encoding = mbfl_no_encoding_8859_9; + encoding = mbfl_no2encoding(mbfl_no_encoding_8859_9); break; default: - no_encoding = mbfl_no_encoding_8859_1; + encoding = NULL; break; } } - MBSTRG(internal_encoding) = no_encoding; - MBSTRG(current_internal_encoding) = no_encoding; + MBSTRG(internal_encoding) = encoding; + MBSTRG(current_internal_encoding) = encoding; #if HAVE_MBREGEX { const char *enc_name = new_value; @@ -1194,33 +1294,6 @@ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) } /* }}} */ -/* {{{ static PHP_INI_MH(OnUpdate_mbstring_script_encoding) */ -static PHP_INI_MH(OnUpdate_mbstring_script_encoding) -{ - int *list, size; - - if (!CG(multibyte)) { - return FAILURE; - } - if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) { - if (MBSTRG(script_encoding_list) != NULL) { - free(MBSTRG(script_encoding_list)); - } - MBSTRG(script_encoding_list) = list; - MBSTRG(script_encoding_list_size) = size; - } else { - if (MBSTRG(script_encoding_list) != NULL) { - free(MBSTRG(script_encoding_list)); - } - MBSTRG(script_encoding_list) = NULL; - MBSTRG(script_encoding_list_size) = 0; - return FAILURE; - } - - return SUCCESS; -} -/* }}} */ - /* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) { @@ -1263,7 +1336,7 @@ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) { if (new_value == NULL) { - return FAILURE; + return FAILURE; } OnUpdateBool(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC); @@ -1318,7 +1391,6 @@ PHP_INI_BEGIN() PHP_INI_ENTRY("mbstring.http_input", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_input) PHP_INI_ENTRY("mbstring.http_output", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_output) STD_PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding, internal_encoding_name, zend_mbstring_globals, mbstring_globals) - PHP_INI_ENTRY("mbstring.script_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_script_encoding) PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character) STD_PHP_INI_ENTRY("mbstring.func_overload", "0", PHP_INI_SYSTEM, OnUpdateLong, func_overload, zend_mbstring_globals, mbstring_globals) @@ -1343,17 +1415,15 @@ PHP_INI_END() static PHP_GINIT_FUNCTION(mbstring) { mbstring_globals->language = mbfl_no_language_uni; - mbstring_globals->internal_encoding = mbfl_no_encoding_invalid; + mbstring_globals->internal_encoding = NULL; mbstring_globals->current_internal_encoding = mbstring_globals->internal_encoding; - mbstring_globals->script_encoding_list = NULL; - mbstring_globals->script_encoding_list_size = 0; - mbstring_globals->http_output_encoding = mbfl_no_encoding_pass; - mbstring_globals->current_http_output_encoding = mbfl_no_encoding_pass; - mbstring_globals->http_input_identify = mbfl_no_encoding_invalid; - mbstring_globals->http_input_identify_get = mbfl_no_encoding_invalid; - mbstring_globals->http_input_identify_post = mbfl_no_encoding_invalid; - mbstring_globals->http_input_identify_cookie = mbfl_no_encoding_invalid; - mbstring_globals->http_input_identify_string = mbfl_no_encoding_invalid; + mbstring_globals->http_output_encoding = &mbfl_encoding_pass; + mbstring_globals->current_http_output_encoding = &mbfl_encoding_pass; + mbstring_globals->http_input_identify = NULL; + mbstring_globals->http_input_identify_get = NULL; + mbstring_globals->http_input_identify_post = NULL; + mbstring_globals->http_input_identify_cookie = NULL; + mbstring_globals->http_input_identify_string = NULL; mbstring_globals->http_input_list = NULL; mbstring_globals->http_input_list_size = 0; mbstring_globals->detect_order_list = NULL; @@ -1384,9 +1454,6 @@ static PHP_GSHUTDOWN_FUNCTION(mbstring) if (mbstring_globals->http_input_list) { free(mbstring_globals->http_input_list); } - if (mbstring_globals->script_encoding_list) { - free(mbstring_globals->script_encoding_list); - } if (mbstring_globals->detect_order_list) { free(mbstring_globals->detect_order_list); } @@ -1426,12 +1493,9 @@ PHP_MINIT_FUNCTION(mbstring) PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU); #endif - zend_multibyte_set_functions( - php_mb_encoding_detector, - php_mb_encoding_converter, - php_mb_oddlen, - php_mb_check_encoding_list, - php_mb_internal_encoding_name TSRMLS_CC); + if (FAILURE == zend_multibyte_set_functions(&php_mb_zend_multibyte_functions TSRMLS_CC)) { + return FAILURE; + } php_rfc1867_set_multibyte_callbacks( php_mb_encoding_translation, @@ -1460,8 +1524,6 @@ PHP_MSHUTDOWN_FUNCTION(mbstring) /* {{{ PHP_RINIT_FUNCTION(mbstring) */ PHP_RINIT_FUNCTION(mbstring) { - int n; - enum mbfl_no_encoding *list=NULL, *entry; zend_function *func, *orig; const struct mb_overload_def *p; @@ -1472,22 +1534,7 @@ PHP_RINIT_FUNCTION(mbstring) MBSTRG(illegalchars) = 0; - n = 0; - if (MBSTRG(detect_order_list)) { - list = MBSTRG(detect_order_list); - n = MBSTRG(detect_order_list_size); - } - if (n <= 0) { - list = MBSTRG(default_detect_order_list); - n = MBSTRG(default_detect_order_list_size); - } - entry = (enum mbfl_no_encoding *)safe_emalloc(n, sizeof(int), 0); - MBSTRG(current_detect_order_list) = entry; - MBSTRG(current_detect_order_list_size) = n; - while (n > 0) { - *entry++ = *list++; - n--; - } + php_mb_populate_current_detect_order_list(TSRMLS_C); /* override original function. */ if (MBSTRG(func_overload)){ @@ -1519,10 +1566,7 @@ PHP_RINIT_FUNCTION(mbstring) #if HAVE_MBREGEX PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU); #endif - if (CG(multibyte)) { - zend_multibyte_set_internal_encoding(mbfl_no_encoding2name(MBSTRG(internal_encoding)) TSRMLS_CC); - php_mb_set_zend_encoding(TSRMLS_C); - } + zend_multibyte_set_internal_encoding((const zend_encoding *)MBSTRG(internal_encoding) TSRMLS_CC); return SUCCESS; } @@ -1546,11 +1590,11 @@ PHP_RSHUTDOWN_FUNCTION(mbstring) } /* clear http input identification. */ - MBSTRG(http_input_identify) = mbfl_no_encoding_invalid; - MBSTRG(http_input_identify_post) = mbfl_no_encoding_invalid; - MBSTRG(http_input_identify_get) = mbfl_no_encoding_invalid; - MBSTRG(http_input_identify_cookie) = mbfl_no_encoding_invalid; - MBSTRG(http_input_identify_string) = mbfl_no_encoding_invalid; + MBSTRG(http_input_identify) = NULL; + MBSTRG(http_input_identify_post) = NULL; + MBSTRG(http_input_identify_get) = NULL; + MBSTRG(http_input_identify_cookie) = NULL; + MBSTRG(http_input_identify_string) = NULL; /* clear overloaded function. */ if (MBSTRG(func_overload)){ @@ -1625,31 +1669,27 @@ PHP_FUNCTION(mb_language) Sets the current internal encoding or Returns the current internal encoding as a string */ PHP_FUNCTION(mb_internal_encoding) { - char *name = NULL; + const char *name = NULL; int name_len; - enum mbfl_no_encoding no_encoding; + const mbfl_encoding *encoding; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) { RETURN_FALSE; } if (name == NULL) { - name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding)); + name = MBSTRG(current_internal_encoding) ? MBSTRG(current_internal_encoding)->name: NULL; if (name != NULL) { RETURN_STRING(name, 1); } else { RETURN_FALSE; } } else { - no_encoding = mbfl_name2no_encoding(name); - if (no_encoding == mbfl_no_encoding_invalid) { + encoding = mbfl_name2encoding(name); + if (!encoding) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name); RETURN_FALSE; } else { - MBSTRG(current_internal_encoding) = no_encoding; - /* TODO: make independent from mbstring.encoding_translation? */ - if (CG(multibyte) && MBSTRG(encoding_translation)) { - zend_multibyte_set_internal_encoding(name TSRMLS_CC); - } + MBSTRG(current_internal_encoding) = encoding; RETURN_TRUE; } } @@ -1662,10 +1702,9 @@ PHP_FUNCTION(mb_http_input) { char *typ = NULL; int typ_len; - int retname, n; - char *name, *list, *temp; - enum mbfl_no_encoding *entry; - enum mbfl_no_encoding result = mbfl_no_encoding_invalid; + int retname; + char *list, *temp; + const mbfl_encoding *result = NULL; retname = 1; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) { @@ -1693,40 +1732,38 @@ PHP_FUNCTION(mb_http_input) break; case 'I': case 'i': - array_init(return_value); - entry = MBSTRG(http_input_list); - n = MBSTRG(http_input_list_size); - while (n > 0) { - name = (char *)mbfl_no_encoding2name(*entry); - if (name) { - add_next_index_string(return_value, name, 1); + { + array_init(return_value); + const mbfl_encoding **entry = MBSTRG(http_input_list); + const size_t n = MBSTRG(http_input_list_size); + size_t i; + for (i = 0; i < n; i++) { + add_next_index_string(return_value, (*entry)->name, 1); + entry++; } - entry++; - n--; + retname = 0; } - retname = 0; break; case 'L': case 'l': - entry = MBSTRG(http_input_list); - n = MBSTRG(http_input_list_size); - list = NULL; - while (n > 0) { - name = (char *)mbfl_no_encoding2name(*entry); - if (name) { + { + const mbfl_encoding **entry = MBSTRG(http_input_list); + const size_t n = MBSTRG(http_input_list_size); + size_t i; + list = NULL; + for (i = 0; i < n; i++) { if (list) { temp = list; - spprintf(&list, 0, "%s,%s", temp, name); + spprintf(&list, 0, "%s,%s", temp, (*entry)->name); efree(temp); if (!list) { break; } } else { - list = estrdup(name); + list = estrdup((*entry)->name); } + entry++; } - entry++; - n--; } if (!list) { RETURN_FALSE; @@ -1741,9 +1778,8 @@ PHP_FUNCTION(mb_http_input) } if (retname) { - if (result != mbfl_no_encoding_invalid && - (name = (char *)mbfl_no_encoding2name(result)) != NULL) { - RETVAL_STRING(name, 1); + if (result) { + RETVAL_STRING(result->name, 1); } else { RETVAL_FALSE; } @@ -1755,28 +1791,28 @@ PHP_FUNCTION(mb_http_input) Sets the current output_encoding or returns the current output_encoding as a string */ PHP_FUNCTION(mb_http_output) { - char *name = NULL; + const char *name = NULL; int name_len; - enum mbfl_no_encoding no_encoding; + const mbfl_encoding *encoding; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", (char **)&name, &name_len) == FAILURE) { RETURN_FALSE; } if (name == NULL) { - name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding)); + name = MBSTRG(current_http_output_encoding) ? MBSTRG(current_http_output_encoding)->name: NULL; if (name != NULL) { RETURN_STRING(name, 1); } else { RETURN_FALSE; } } else { - no_encoding = mbfl_name2no_encoding(name); - if (no_encoding == mbfl_no_encoding_invalid) { + encoding = mbfl_name2encoding(name); + if (!encoding) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name); RETURN_FALSE; } else { - MBSTRG(current_http_output_encoding) = no_encoding; + MBSTRG(current_http_output_encoding) = encoding; RETURN_TRUE; } } @@ -1788,32 +1824,26 @@ PHP_FUNCTION(mb_http_output) PHP_FUNCTION(mb_detect_order) { zval **arg1 = NULL; - int n, size; - enum mbfl_no_encoding *list, *entry; - char *name; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|Z", &arg1) == FAILURE) { return; } if (!arg1) { + size_t i; + size_t n = MBSTRG(current_detect_order_list_size); + const mbfl_encoding **entry = MBSTRG(current_detect_order_list); array_init(return_value); - entry = MBSTRG(current_detect_order_list); - n = MBSTRG(current_detect_order_list_size); - while (n > 0) { - name = (char *)mbfl_no_encoding2name(*entry); - if (name) { - add_next_index_string(return_value, name, 1); - } + for (i = 0; i < n; i++) { + add_next_index_string(return_value, (*entry)->name, 1); entry++; - n--; } } else { - list = NULL; - size = 0; + const mbfl_encoding **list = NULL; + size_t size = 0; switch (Z_TYPE_PP(arg1)) { case IS_ARRAY: - if (!php_mb_parse_encoding_array(*arg1, &list, &size, 0 TSRMLS_CC)) { + if (FAILURE == php_mb_parse_encoding_array(*arg1, &list, &size, 0 TSRMLS_CC)) { if (list) { efree(list); } @@ -1822,7 +1852,7 @@ PHP_FUNCTION(mb_detect_order) break; default: convert_to_string_ex(arg1); - if (!php_mb_parse_encoding_list(Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1), &list, &size, 0 TSRMLS_CC)) { + if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1), &list, &size, 0 TSRMLS_CC)) { if (list) { efree(list); } @@ -1942,7 +1972,7 @@ PHP_FUNCTION(mb_parse_str) char *encstr = NULL; int encstr_len; php_mb_encoding_handler_info_t info; - enum mbfl_no_encoding detected; + const mbfl_encoding *detected; track_vars_array = NULL; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|z", &encstr, &encstr_len, &track_vars_array) == FAILURE) { @@ -1970,7 +2000,7 @@ PHP_FUNCTION(mb_parse_str) MBSTRG(http_input_identify) = detected; - RETVAL_BOOL(detected != mbfl_no_encoding_invalid); + RETVAL_BOOL(detected); if (encstr != NULL) efree(encstr); } @@ -1986,7 +2016,7 @@ PHP_FUNCTION(mb_output_handler) mbfl_string string, result; const char *charset; char *p; - enum mbfl_no_encoding encoding; + const mbfl_encoding *encoding; int last_feed, len; unsigned char send_text_mimetype = 0; char *s, *mimetype = NULL; @@ -2005,7 +2035,7 @@ PHP_FUNCTION(mb_output_handler) mbfl_buffer_converter_delete(MBSTRG(outconv)); MBSTRG(outconv) = NULL; } - if (encoding == mbfl_no_encoding_pass) { + if (encoding == &mbfl_encoding_pass) { RETURN_STRINGL(arg_string, arg_string_len, 1); } @@ -2027,7 +2057,7 @@ PHP_FUNCTION(mb_output_handler) /* if content-type is not yet set, set it and activate the converter */ if (SG(sapi_headers).send_default_content_type || send_text_mimetype) { - charset = mbfl_no2preferred_mime_name(encoding); + charset = encoding->mime_name; if (charset) { len = spprintf( &p, 0, "Content-Type: %s; charset=%s", mimetype, charset ); if (sapi_add_header(p, len, 0) != FAILURE) { @@ -2035,7 +2065,7 @@ PHP_FUNCTION(mb_output_handler) } } /* activate the converter */ - MBSTRG(outconv) = mbfl_buffer_converter_new(MBSTRG(current_internal_encoding), encoding, 0); + MBSTRG(outconv) = mbfl_buffer_converter_new2(MBSTRG(current_internal_encoding), encoding, 0); if (send_text_mimetype){ efree(mimetype); } @@ -2056,7 +2086,7 @@ PHP_FUNCTION(mb_output_handler) /* feed the string */ mbfl_string_init(&string); string.no_language = MBSTRG(language); - string.no_encoding = MBSTRG(current_internal_encoding); + string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; string.val = (unsigned char *)arg_string; string.len = arg_string_len; mbfl_buffer_converter_feed(MBSTRG(outconv), &string); @@ -2093,7 +2123,7 @@ PHP_FUNCTION(mb_strlen) string.no_language = MBSTRG(language); if (enc_name == NULL) { - string.no_encoding = MBSTRG(current_internal_encoding); + string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; } else { string.no_encoding = mbfl_name2no_encoding(enc_name); if (string.no_encoding == mbfl_no_encoding_invalid) { @@ -2124,9 +2154,9 @@ PHP_FUNCTION(mb_strpos) mbfl_string_init(&haystack); mbfl_string_init(&needle); haystack.no_language = MBSTRG(language); - haystack.no_encoding = MBSTRG(current_internal_encoding); + haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; needle.no_language = MBSTRG(language); - needle.no_encoding = MBSTRG(current_internal_encoding); + needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; offset = 0; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &enc_name, &enc_name_len) == FAILURE) { @@ -2191,9 +2221,9 @@ PHP_FUNCTION(mb_strrpos) mbfl_string_init(&haystack); mbfl_string_init(&needle); haystack.no_language = MBSTRG(language); - haystack.no_encoding = MBSTRG(current_internal_encoding); + haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; needle.no_language = MBSTRG(language); - needle.no_encoding = MBSTRG(current_internal_encoding); + needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|Zs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &zoffset, &enc_name, &enc_name_len) == FAILURE) { RETURN_FALSE; @@ -2280,7 +2310,7 @@ PHP_FUNCTION(mb_stripos) int n; long offset; mbfl_string haystack, needle; - char *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding)); + const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name; int from_encoding_len; n = -1; offset = 0; @@ -2309,7 +2339,7 @@ PHP_FUNCTION(mb_strripos) int n; long offset; mbfl_string haystack, needle; - const char *from_encoding = mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding)); + const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name; int from_encoding_len; n = -1; offset = 0; @@ -2341,9 +2371,9 @@ PHP_FUNCTION(mb_strstr) mbfl_string_init(&haystack); mbfl_string_init(&needle); haystack.no_language = MBSTRG(language); - haystack.no_encoding = MBSTRG(current_internal_encoding); + haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; needle.no_language = MBSTRG(language); - needle.no_encoding = MBSTRG(current_internal_encoding); + needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &part, &enc_name, &enc_name_len) == FAILURE) { RETURN_FALSE; @@ -2399,9 +2429,9 @@ PHP_FUNCTION(mb_strrchr) mbfl_string_init(&haystack); mbfl_string_init(&needle); haystack.no_language = MBSTRG(language); - haystack.no_encoding = MBSTRG(current_internal_encoding); + haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; needle.no_language = MBSTRG(language); - needle.no_encoding = MBSTRG(current_internal_encoding); + needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &enc_name, &enc_name_len) == FAILURE) { RETURN_FALSE; @@ -2454,13 +2484,13 @@ PHP_FUNCTION(mb_stristr) unsigned int from_encoding_len, len, mblen; int n; mbfl_string haystack, needle, result, *ret = NULL; - const char *from_encoding = mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding)); + const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name; mbfl_string_init(&haystack); mbfl_string_init(&needle); haystack.no_language = MBSTRG(language); - haystack.no_encoding = MBSTRG(current_internal_encoding); + haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; needle.no_language = MBSTRG(language); - needle.no_encoding = MBSTRG(current_internal_encoding); + needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) { @@ -2512,13 +2542,13 @@ PHP_FUNCTION(mb_strrichr) zend_bool part = 0; int n, from_encoding_len, len, mblen; mbfl_string haystack, needle, result, *ret = NULL; - char *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding)); + const char *from_encoding = MBSTRG(current_internal_encoding)->name; mbfl_string_init(&haystack); mbfl_string_init(&needle); haystack.no_language = MBSTRG(language); - haystack.no_encoding = MBSTRG(current_internal_encoding); + haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; needle.no_language = MBSTRG(language); - needle.no_encoding = MBSTRG(current_internal_encoding); + needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) { @@ -2570,9 +2600,9 @@ PHP_FUNCTION(mb_substr_count) mbfl_string_init(&haystack); mbfl_string_init(&needle); haystack.no_language = MBSTRG(language); - haystack.no_encoding = MBSTRG(current_internal_encoding); + haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; needle.no_language = MBSTRG(language); - needle.no_encoding = MBSTRG(current_internal_encoding); + needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|s", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &enc_name, &enc_name_len) == FAILURE) { return; @@ -2616,7 +2646,7 @@ PHP_FUNCTION(mb_substr) mbfl_string_init(&string); string.no_language = MBSTRG(language); - string.no_encoding = MBSTRG(current_internal_encoding); + string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; if (argc == 4) { string.no_encoding = mbfl_name2no_encoding(encoding); @@ -2685,7 +2715,7 @@ PHP_FUNCTION(mb_strcut) mbfl_string_init(&string); string.no_language = MBSTRG(language); - string.no_encoding = MBSTRG(current_internal_encoding); + string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|ls", (char **)&string.val, (int **)&string.len, &from, &len, &encoding, &encoding_len) == FAILURE) { return; @@ -2748,7 +2778,7 @@ PHP_FUNCTION(mb_strwidth) mbfl_string_init(&string); string.no_language = MBSTRG(language); - string.no_encoding = MBSTRG(current_internal_encoding); + string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) { return; @@ -2787,9 +2817,9 @@ PHP_FUNCTION(mb_strimwidth) mbfl_string_init(&string); mbfl_string_init(&marker); string.no_language = MBSTRG(language); - string.no_encoding = MBSTRG(current_internal_encoding); + string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; marker.no_language = MBSTRG(language); - marker.no_encoding = MBSTRG(current_internal_encoding); + marker.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; marker.val = NULL; marker.len = 0; @@ -2833,9 +2863,10 @@ PHP_FUNCTION(mb_strimwidth) MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len TSRMLS_DC) { mbfl_string string, result, *ret; - enum mbfl_no_encoding from_encoding, to_encoding; + const mbfl_encoding *from_encoding, *to_encoding; mbfl_buffer_converter *convd; - int size, *list; + size_t size; + const mbfl_encoding **list; char *output=NULL; if (output_len) { @@ -2846,8 +2877,8 @@ MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, co } /* new encoding */ if (_to_encoding && strlen(_to_encoding)) { - to_encoding = mbfl_name2no_encoding(_to_encoding); - if (to_encoding == mbfl_no_encoding_invalid) { + to_encoding = mbfl_name2encoding(_to_encoding); + if (!to_encoding) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", _to_encoding); return NULL; } @@ -2859,7 +2890,7 @@ MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, co mbfl_string_init(&string); mbfl_string_init(&result); from_encoding = MBSTRG(current_internal_encoding); - string.no_encoding = from_encoding; + string.no_encoding = from_encoding->no_encoding; string.no_language = MBSTRG(language); string.val = (unsigned char *)input; string.len = length; @@ -2871,17 +2902,17 @@ MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, co php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0 TSRMLS_CC); if (size == 1) { from_encoding = *list; - string.no_encoding = from_encoding; + string.no_encoding = from_encoding->no_encoding; } else if (size > 1) { /* auto detect */ - from_encoding = mbfl_identify_encoding_no(&string, list, size, MBSTRG(strict_detection)); - if (from_encoding != mbfl_no_encoding_invalid) { - string.no_encoding = from_encoding; + from_encoding = mbfl_identify_encoding2(&string, list, size, MBSTRG(strict_detection)); + if (from_encoding) { + string.no_encoding = from_encoding->no_encoding; } else { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect character encoding"); from_encoding = mbfl_no_encoding_pass; to_encoding = from_encoding; - string.no_encoding = from_encoding; + string.no_encoding = from_encoding->no_encoding; } } else { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal character encoding specified"); @@ -2892,7 +2923,7 @@ MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, co } /* initialize converter */ - convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len); + convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, string.len); if (convd == NULL) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create character encoding converter"); return NULL; @@ -2993,7 +3024,8 @@ PHP_FUNCTION(mb_convert_encoding) Returns a case-folded version of sourcestring */ PHP_FUNCTION(mb_convert_case) { - char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding)); + const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name; + char *str; int str_len, from_encoding_len; long case_mode = 0; char *newstr; @@ -3017,7 +3049,8 @@ PHP_FUNCTION(mb_convert_case) */ PHP_FUNCTION(mb_strtoupper) { - char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding)); + const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name; + char *str; int str_len, from_encoding_len; char *newstr; size_t ret_len; @@ -3040,7 +3073,8 @@ PHP_FUNCTION(mb_strtoupper) */ PHP_FUNCTION(mb_strtolower) { - char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding)); + const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name; + char *str; int str_len, from_encoding_len; char *newstr; size_t ret_len; @@ -3068,9 +3102,9 @@ PHP_FUNCTION(mb_detect_encoding) zval *encoding_list; mbfl_string string; - const char *ret; - enum mbfl_no_encoding *elist; - int size, *list; + const mbfl_encoding *ret; + const mbfl_encoding **elist, **list; + size_t size; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|zb", &str, &str_len, &encoding_list, &strict) == FAILURE) { return; @@ -3082,7 +3116,7 @@ PHP_FUNCTION(mb_detect_encoding) if (ZEND_NUM_ARGS() >= 2 && Z_STRVAL_P(encoding_list)) { switch (Z_TYPE_P(encoding_list)) { case IS_ARRAY: - if (!php_mb_parse_encoding_array(encoding_list, &list, &size, 0 TSRMLS_CC)) { + if (FAILURE == php_mb_parse_encoding_array(encoding_list, &list, &size, 0 TSRMLS_CC)) { if (list) { efree(list); list = NULL; @@ -3092,7 +3126,7 @@ PHP_FUNCTION(mb_detect_encoding) break; default: convert_to_string(encoding_list); - if (!php_mb_parse_encoding_list(Z_STRVAL_P(encoding_list), Z_STRLEN_P(encoding_list), &list, &size, 0 TSRMLS_CC)) { + if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_P(encoding_list), Z_STRLEN_P(encoding_list), &list, &size, 0 TSRMLS_CC)) { if (list) { efree(list); list = NULL; @@ -3121,7 +3155,7 @@ PHP_FUNCTION(mb_detect_encoding) string.no_language = MBSTRG(language); string.val = (unsigned char *)str; string.len = str_len; - ret = mbfl_identify_encoding_name(&string, elist, size, strict); + ret = mbfl_identify_encoding2(&string, elist, size, strict); if (list != NULL) { efree((void *)list); @@ -3131,7 +3165,7 @@ PHP_FUNCTION(mb_detect_encoding) RETURN_FALSE; } - RETVAL_STRING((char *)ret, 1); + RETVAL_STRING((char *)ret->name, 1); } /* }}} */ @@ -3196,7 +3230,7 @@ PHP_FUNCTION(mb_encode_mimeheader) mbfl_string_init(&string); string.no_language = MBSTRG(language); - string.no_encoding = MBSTRG(current_internal_encoding); + string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|sssl", (char **)&string.val, &string.len, &charset_name, &charset_name_len, &trans_enc_name, &trans_enc_name_len, &linefeed, &linefeed_len, &indent) == FAILURE) { return; @@ -3245,14 +3279,14 @@ PHP_FUNCTION(mb_decode_mimeheader) mbfl_string_init(&string); string.no_language = MBSTRG(language); - string.no_encoding = MBSTRG(current_internal_encoding); + string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", (char **)&string.val, &string.len) == FAILURE) { return; } mbfl_string_init(&result); - ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding)); + ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding)->no_encoding); if (ret != NULL) { RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */ } else { @@ -3274,7 +3308,7 @@ PHP_FUNCTION(mb_convert_kana) mbfl_string_init(&string); string.no_language = MBSTRG(language); - string.no_encoding = MBSTRG(current_internal_encoding); + string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|ss", (char **)&string.val, &string.len, &optstr, &optstr_len, &encname, &encname_len) == FAILURE) { return; @@ -3373,12 +3407,13 @@ PHP_FUNCTION(mb_convert_variables) zval ***args, ***stack, **var, **hash_entry, **zfrom_enc; HashTable *target_hash; mbfl_string string, result, *ret; - enum mbfl_no_encoding from_encoding, to_encoding; + const mbfl_encoding *from_encoding, *to_encoding; mbfl_encoding_detector *identd; mbfl_buffer_converter *convd; - int n, to_enc_len, argc, stack_level, stack_max, elistsz; - enum mbfl_no_encoding *elist; - char *name, *to_enc; + int n, to_enc_len, argc, stack_level, stack_max; + size_t elistsz; + const mbfl_encoding **elist; + char *to_enc; void *ptmp; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sZ+", &to_enc, &to_enc_len, &zfrom_enc, &args, &argc) == FAILURE) { @@ -3386,8 +3421,8 @@ PHP_FUNCTION(mb_convert_variables) } /* new encoding */ - to_encoding = mbfl_name2no_encoding(to_enc); - if (to_encoding == mbfl_no_encoding_invalid) { + to_encoding = mbfl_name2encoding(to_enc); + if (!to_encoding) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", to_enc); efree(args); RETURN_FALSE; @@ -3397,7 +3432,7 @@ PHP_FUNCTION(mb_convert_variables) mbfl_string_init(&string); mbfl_string_init(&result); from_encoding = MBSTRG(current_internal_encoding); - string.no_encoding = from_encoding; + string.no_encoding = from_encoding->no_encoding; string.no_language = MBSTRG(language); /* pre-conversion encoding */ @@ -3418,11 +3453,11 @@ PHP_FUNCTION(mb_convert_variables) from_encoding = *elist; } else { /* auto detect */ - from_encoding = mbfl_no_encoding_invalid; + from_encoding = NULL; stack_max = PHP_MBSTR_STACK_BLOCK_SIZE; stack = (zval ***)safe_emalloc(stack_max, sizeof(zval **), 0); stack_level = 0; - identd = mbfl_encoding_detector_new(elist, elistsz, MBSTRG(strict_detection)); + identd = mbfl_encoding_detector_new2(elist, elistsz, MBSTRG(strict_detection)); if (identd != NULL) { n = 0; while (n < argc || stack_level > 0) { @@ -3475,12 +3510,12 @@ PHP_FUNCTION(mb_convert_variables) } } detect_end: - from_encoding = mbfl_encoding_detector_judge(identd); + from_encoding = mbfl_encoding_detector_judge2(identd); mbfl_encoding_detector_delete(identd); } efree(stack); - if (from_encoding == mbfl_no_encoding_invalid) { + if (!from_encoding) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect encoding"); from_encoding = mbfl_no_encoding_pass; } @@ -3491,7 +3526,7 @@ detect_end: /* create converter */ convd = NULL; if (from_encoding != mbfl_no_encoding_pass) { - convd = mbfl_buffer_converter_new(from_encoding, to_encoding, 0); + convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, 0); if (convd == NULL) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter"); RETURN_FALSE; @@ -3573,9 +3608,8 @@ detect_end: efree(args); - name = (char *)mbfl_no_encoding2name(from_encoding); - if (name != NULL) { - RETURN_STRING(name, 1); + if (from_encoding) { + RETURN_STRING(from_encoding->name, 1); } else { RETURN_FALSE; } @@ -3602,7 +3636,7 @@ php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type) mbfl_string_init(&string); string.no_language = MBSTRG(language); - string.no_encoding = MBSTRG(current_internal_encoding); + string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; string.val = (unsigned char *)str; string.len = str_len; @@ -4055,10 +4089,10 @@ PHP_FUNCTION(mb_send_mail) orig_str.no_language = MBSTRG(language); orig_str.val = (unsigned char *)subject; orig_str.len = subject_len; - orig_str.no_encoding = MBSTRG(current_internal_encoding); - if (orig_str.no_encoding == mbfl_no_encoding_invalid - || orig_str.no_encoding == mbfl_no_encoding_pass) { - orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection)); + orig_str.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; + if (orig_str.no_encoding == mbfl_no_encoding_invalid || orig_str.no_encoding == mbfl_no_encoding_pass) { + const mbfl_encoding *encoding = mbfl_identify_encoding2(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection)); + orig_str.no_encoding = encoding ? encoding->no_encoding: mbfl_no_encoding_invalid; } pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, "\n", sizeof("Subject: [PHP-jp nnnnnnnn]")); if (pstr != NULL) { @@ -4074,11 +4108,11 @@ PHP_FUNCTION(mb_send_mail) orig_str.no_language = MBSTRG(language); orig_str.val = (unsigned char *)message; orig_str.len = (unsigned int)message_len; - orig_str.no_encoding = MBSTRG(current_internal_encoding); + orig_str.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; - if (orig_str.no_encoding == mbfl_no_encoding_invalid - || orig_str.no_encoding == mbfl_no_encoding_pass) { - orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection)); + if (orig_str.no_encoding == mbfl_no_encoding_invalid || orig_str.no_encoding == mbfl_no_encoding_pass) { + const mbfl_encoding *encoding = mbfl_identify_encoding2(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection)); + orig_str.no_encoding = encoding ? encoding->no_encoding: mbfl_no_encoding_invalid; } pstr = NULL; @@ -4186,13 +4220,13 @@ PHP_FUNCTION(mb_send_mail) PHP_FUNCTION(mb_get_info) { char *typ = NULL; - int typ_len, n; + int typ_len; + size_t n; char *name; const struct mb_overload_def *over_func; zval *row1, *row2; const mbfl_language *lang = mbfl_no2language(MBSTRG(language)); - enum mbfl_no_encoding *entry; - zval *row3; + const mbfl_encoding **entry; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) { RETURN_FALSE; @@ -4200,14 +4234,14 @@ PHP_FUNCTION(mb_get_info) if (!typ || !strcasecmp("all", typ)) { array_init(return_value); - if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding))) != NULL) { - add_assoc_string(return_value, "internal_encoding", name, 1); + if (MBSTRG(current_internal_encoding)) { + add_assoc_string(return_value, "internal_encoding", (char *)MBSTRG(current_internal_encoding)->name, 1); } - if ((name = (char *)mbfl_no_encoding2name(MBSTRG(http_input_identify))) != NULL) { - add_assoc_string(return_value, "http_input", name, 1); + if (MBSTRG(http_input_identify)) { + add_assoc_string(return_value, "http_input", (char *)MBSTRG(http_input_identify)->name, 1); } - if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding))) != NULL) { - add_assoc_string(return_value, "http_output", name, 1); + if (MBSTRG(current_http_output_encoding)) { + add_assoc_string(return_value, "http_output", (char *)MBSTRG(current_http_output_encoding)->name, 1); } if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes"), 0)) != NULL) { add_assoc_string(return_value, "http_output_conv_mimetypes", name, 1); @@ -4249,15 +4283,13 @@ PHP_FUNCTION(mb_get_info) } n = MBSTRG(current_detect_order_list_size); entry = MBSTRG(current_detect_order_list); - if(n > 0) { + if (n > 0) { + size_t i; MAKE_STD_ZVAL(row2); array_init(row2); - while (n > 0) { - if ((name = (char *)mbfl_no_encoding2name(*entry)) != NULL) { - add_next_index_string(row2, name, 1); - } + for (i = 0; i < n; i++) { + add_next_index_string(row2, (*entry)->name, 1); entry++; - n--; } add_assoc_zval(return_value, "detect_order", row2); } @@ -4275,33 +4307,17 @@ PHP_FUNCTION(mb_get_info) } else { add_assoc_string(return_value, "strict_detection", "Off", 1); } - if (CG(multibyte)) { - entry = MBSTRG(script_encoding_list); - n = MBSTRG(script_encoding_list_size); - if(n > 0) { - MAKE_STD_ZVAL(row3); - array_init(row3); - while (n > 0) { - if ((name = (char *)mbfl_no_encoding2name(*entry)) != NULL) { - add_next_index_string(row3, name, 1); - } - entry++; - n--; - } - add_assoc_zval(return_value, "script_encoding", row3); - } - } } else if (!strcasecmp("internal_encoding", typ)) { - if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding))) != NULL) { - RETVAL_STRING(name, 1); + if (MBSTRG(current_internal_encoding)) { + RETVAL_STRING((char *)MBSTRG(current_internal_encoding)->name, 1); } } else if (!strcasecmp("http_input", typ)) { - if ((name = (char *)mbfl_no_encoding2name(MBSTRG(http_input_identify))) != NULL) { - RETVAL_STRING(name, 1); + if (MBSTRG(http_input_identify)) { + RETVAL_STRING((char *)MBSTRG(http_input_identify)->name, 1); } } else if (!strcasecmp("http_output", typ)) { - if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding))) != NULL) { - RETVAL_STRING(name, 1); + if (MBSTRG(current_http_output_encoding)) { + RETVAL_STRING((char *)MBSTRG(current_http_output_encoding)->name, 1); } } else if (!strcasecmp("http_output_conv_mimetypes", typ)) { if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes"), 0)) != NULL) { @@ -4349,15 +4365,11 @@ PHP_FUNCTION(mb_get_info) } else if (!strcasecmp("detect_order", typ)) { n = MBSTRG(current_detect_order_list_size); entry = MBSTRG(current_detect_order_list); - if(n > 0) { + if (n > 0) { + size_t i; array_init(return_value); - while (n > 0) { - name = (char *)mbfl_no_encoding2name(*entry); - if (name) { - add_next_index_string(return_value, name, 1); - } - entry++; - n--; + for (i = 0; i < n; i++) { + add_next_index_string(return_value, (*entry)->name, 1); } } } else if (!strcasecmp("substitute_character", typ)) { @@ -4377,22 +4389,6 @@ PHP_FUNCTION(mb_get_info) RETVAL_STRING("Off", 1); } } else { - if (CG(multibyte) && !strcasecmp("script_encoding", typ)) { - entry = MBSTRG(script_encoding_list); - n = MBSTRG(script_encoding_list_size); - if(n > 0) { - array_init(return_value); - while (n > 0) { - name = (char *)mbfl_no_encoding2name(*entry); - if (name) { - add_next_index_string(return_value, name, 1); - } - entry++; - n--; - } - } - return; - } RETURN_FALSE; } } @@ -4407,7 +4403,7 @@ PHP_FUNCTION(mb_check_encoding) char *enc = NULL; int enc_len; mbfl_buffer_converter *convd; - enum mbfl_no_encoding no_encoding = MBSTRG(current_internal_encoding); + const mbfl_encoding *encoding = MBSTRG(current_internal_encoding); mbfl_string string, result, *ret = NULL; long illegalchars = 0; @@ -4420,14 +4416,14 @@ PHP_FUNCTION(mb_check_encoding) } if (enc != NULL) { - no_encoding = mbfl_name2no_encoding(enc); - if (no_encoding == mbfl_no_encoding_invalid || no_encoding == mbfl_no_encoding_pass) { + encoding = mbfl_name2encoding(enc); + if (!encoding || encoding == &mbfl_encoding_pass) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid encoding \"%s\"", enc); RETURN_FALSE; } } - convd = mbfl_buffer_converter_new(no_encoding, no_encoding, 0); + convd = mbfl_buffer_converter_new2(encoding, encoding, 0); if (convd == NULL) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter"); RETURN_FALSE; @@ -4436,7 +4432,7 @@ PHP_FUNCTION(mb_check_encoding) mbfl_buffer_converter_illegal_substchar(convd, 0); /* initialize string */ - mbfl_string_init_set(&string, mbfl_no_language_neutral, no_encoding); + mbfl_string_init_set(&string, mbfl_no_language_neutral, encoding->no_encoding); mbfl_string_init(&result); string.val = (unsigned char *)var; @@ -4455,6 +4451,34 @@ PHP_FUNCTION(mb_check_encoding) } /* }}} */ + +/* {{{ php_mb_populate_current_detect_order_list */ +static void php_mb_populate_current_detect_order_list(TSRMLS_D) +{ + const mbfl_encoding **entry = 0; + size_t nentries; + + if (MBSTRG(current_detect_order_list)) { + return; + } + + if (MBSTRG(detect_order_list) && MBSTRG(detect_order_list_size)) { + nentries = MBSTRG(detect_order_list_size); + entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0); + memcpy(entry, MBSTRG(detect_order_list), sizeof(mbfl_encoding*) * nentries); + } else { + const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list); + nentries = MBSTRG(default_detect_order_list_size); + entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0); + size_t i; + for (i = 0; i < nentries; i++) { + entry[i] = mbfl_no2encoding(src[i]); + } + } + MBSTRG(current_detect_order_list) = entry; + MBSTRG(current_detect_order_list_size) = nentries; +} + /* {{{ MBSTRING_API int php_mb_encoding_translation() */ MBSTRING_API int php_mb_encoding_translation(TSRMLS_D) { @@ -4483,8 +4507,7 @@ MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *e /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes() */ MBSTRING_API size_t php_mb_mbchar_bytes(const char *s TSRMLS_DC) { - return php_mb_mbchar_bytes_ex(s, - mbfl_no2encoding(MBSTRG(internal_encoding))); + return php_mb_mbchar_bytes_ex(s, MBSTRG(internal_encoding)); } /* }}} */ @@ -4532,8 +4555,7 @@ MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t /* {{{ MBSTRING_API char *php_mb_safe_strrchr() */ MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nbytes TSRMLS_DC) { - return php_mb_safe_strrchr_ex(s, c, nbytes, - mbfl_no2encoding(MBSTRG(internal_encoding))); + return php_mb_safe_strrchr_ex(s, c, nbytes, MBSTRG(internal_encoding)); } /* }}} */ @@ -4548,12 +4570,10 @@ MBSTRING_API char *php_mb_strrchr(const char *s, char c TSRMLS_DC) MBSTRING_API size_t php_mb_gpc_mbchar_bytes(const char *s TSRMLS_DC) { - if (MBSTRG(http_input_identify) != mbfl_no_encoding_invalid){ - return php_mb_mbchar_bytes_ex(s, - mbfl_no2encoding(MBSTRG(http_input_identify))); + if (MBSTRG(http_input_identify)) { + return php_mb_mbchar_bytes_ex(s, MBSTRG(http_input_identify)); } else { - return php_mb_mbchar_bytes_ex(s, - mbfl_no2encoding(MBSTRG(internal_encoding))); + return php_mb_mbchar_bytes_ex(s, MBSTRG(internal_encoding)); } } /* }}} */ @@ -4563,13 +4583,13 @@ MBSTRING_API int php_mb_gpc_encoding_converter(char **str, int *len, int num, co { int i; mbfl_string string, result, *ret = NULL; - enum mbfl_no_encoding from_encoding, to_encoding; + const mbfl_encoding *from_encoding, *to_encoding; mbfl_buffer_converter *convd; if (encoding_to) { /* new encoding */ - to_encoding = mbfl_name2no_encoding(encoding_to); - if (to_encoding == mbfl_no_encoding_invalid) { + to_encoding = mbfl_name2encoding(encoding_to); + if (!to_encoding) { return -1; } } else { @@ -4577,8 +4597,8 @@ MBSTRING_API int php_mb_gpc_encoding_converter(char **str, int *len, int num, co } if (encoding_from) { /* old encoding */ - from_encoding = mbfl_name2no_encoding(encoding_from); - if (from_encoding == mbfl_no_encoding_invalid) { + from_encoding = mbfl_name2encoding(encoding_from); + if (from_encoding) { return -1; } } else { @@ -4592,7 +4612,7 @@ MBSTRING_API int php_mb_gpc_encoding_converter(char **str, int *len, int num, co /* initialize string */ mbfl_string_init(&string); mbfl_string_init(&result); - string.no_encoding = from_encoding; + string.no_encoding = from_encoding->no_encoding; string.no_language = MBSTRG(language); for (i=0; i<num; i++){ @@ -4600,7 +4620,7 @@ MBSTRING_API int php_mb_gpc_encoding_converter(char **str, int *len, int num, co string.len = len[i]; /* initialize converter */ - convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len); + convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, string.len); if (convd == NULL) { return -1; } @@ -4628,23 +4648,17 @@ MBSTRING_API int php_mb_gpc_encoding_converter(char **str, int *len, int num, co MBSTRING_API int php_mb_gpc_encoding_detector(char **arg_string, int *arg_length, int num, char *arg_list TSRMLS_DC) { mbfl_string string; - enum mbfl_no_encoding *elist; - enum mbfl_no_encoding encoding = mbfl_no_encoding_invalid; + const mbfl_encoding **elist; + const mbfl_encoding *encoding = NULL; mbfl_encoding_detector *identd = NULL; - int size; - enum mbfl_no_encoding *list; + size_t size; + const mbfl_encoding **list; - if (MBSTRG(http_input_list_size) == 1 && - MBSTRG(http_input_list)[0] == mbfl_no_encoding_pass) { - MBSTRG(http_input_identify) = mbfl_no_encoding_pass; - return SUCCESS; - } + php_mb_populate_current_detect_order_list(TSRMLS_C); - if (MBSTRG(http_input_list_size) == 1 && - MBSTRG(http_input_list)[0] != mbfl_no_encoding_auto && - mbfl_no_encoding2name(MBSTRG(http_input_list)[0]) != NULL) { - MBSTRG(http_input_identify) = MBSTRG(http_input_list)[0]; + if (MBSTRG(http_input_list_size) == 1 && MBSTRG(http_input_list)[0] == &mbfl_encoding_pass) { + MBSTRG(http_input_identify) = &mbfl_encoding_pass; return SUCCESS; } @@ -4659,24 +4673,16 @@ MBSTRING_API int php_mb_gpc_encoding_detector(char **arg_string, int *arg_length } else { elist = MBSTRG(current_detect_order_list); size = MBSTRG(current_detect_order_list_size); - if (size <= 0){ - elist = MBSTRG(default_detect_order_list); - size = MBSTRG(default_detect_order_list_size); - } } } else { elist = MBSTRG(current_detect_order_list); size = MBSTRG(current_detect_order_list_size); - if (size <= 0){ - elist = MBSTRG(default_detect_order_list); - size = MBSTRG(default_detect_order_list_size); - } } mbfl_string_init(&string); string.no_language = MBSTRG(language); - identd = mbfl_encoding_detector_new(elist, size, MBSTRG(strict_detection)); + identd = mbfl_encoding_detector_new2(elist, size, MBSTRG(strict_detection)); if (identd) { int n = 0; @@ -4688,11 +4694,11 @@ MBSTRING_API int php_mb_gpc_encoding_detector(char **arg_string, int *arg_length } n++; } - encoding = mbfl_encoding_detector_judge(identd); + encoding = mbfl_encoding_detector_judge2(identd); mbfl_encoding_detector_delete(identd); } - if (encoding != mbfl_no_encoding_invalid) { + if (encoding) { MBSTRG(http_input_identify) = encoding; return SUCCESS; } else { @@ -4712,9 +4718,9 @@ MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int mbfl_string_init(&haystack); mbfl_string_init(&needle); haystack.no_language = MBSTRG(language); - haystack.no_encoding = MBSTRG(current_internal_encoding); + haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; needle.no_language = MBSTRG(language); - needle.no_encoding = MBSTRG(current_internal_encoding); + needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; do { size_t len = 0; @@ -4778,176 +4784,6 @@ MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int } /* }}} */ -/* {{{ php_mb_set_zend_encoding() */ -static int php_mb_set_zend_encoding(TSRMLS_D) -{ - /* 'd better use mbfl_memory_device? */ - char *name, *list = NULL; - int n, *entry, list_size = 0; - - /* notify script encoding to Zend Engine */ - entry = MBSTRG(script_encoding_list); - n = MBSTRG(script_encoding_list_size); - while (n > 0) { - name = (char *)mbfl_no_encoding2name(*entry); - if (name) { - list_size += strlen(name) + 1; - if (!list) { - list = (char*)emalloc(list_size); - *list = '\0'; - } else { - list = (char*)erealloc(list, list_size); - strcat(list, ","); - } - strcat(list, name); - } - entry++; - n--; - } - zend_multibyte_set_script_encoding(list, (list ? strlen(list) : 0) TSRMLS_CC); - if (list) { - efree(list); - } - - /* TODO: make independent from mbstring.encoding_translation? */ - if (MBSTRG(encoding_translation)) { - /* notify internal encoding to Zend Engine */ - name = (char*)mbfl_no_encoding2name(MBSTRG(current_internal_encoding)); - zend_multibyte_set_internal_encoding(name TSRMLS_CC); - } - - return 0; -} -/* }}} */ - -/* {{{ char *php_mb_encoding_detector() - * Interface for Zend Engine - */ -static char* php_mb_encoding_detector(const unsigned char *arg_string, size_t arg_length, char *arg_list TSRMLS_DC) -{ - mbfl_string string; - const char *ret; - enum mbfl_no_encoding *elist; - int size, *list; - - /* make encoding list */ - list = NULL; - size = 0; - php_mb_parse_encoding_list(arg_list, strlen(arg_list), &list, &size, 0 TSRMLS_CC); - if (size <= 0) { - return NULL; - } - if (size > 0 && list != NULL) { - elist = list; - } else { - elist = MBSTRG(current_detect_order_list); - size = MBSTRG(current_detect_order_list_size); - } - - mbfl_string_init(&string); - string.no_language = MBSTRG(language); - string.val = (unsigned char *)arg_string; - string.len = arg_length; - ret = mbfl_identify_encoding_name(&string, elist, size, 0); - if (list != NULL) { - efree((void *)list); - } - if (ret != NULL) { - return estrdup(ret); - } else { - return NULL; - } -} -/* }}} */ - -/* {{{ int php_mb_encoding_converter() */ -static int php_mb_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const char *encoding_to, const char *encoding_from TSRMLS_DC) -{ - mbfl_string string, result, *ret; - enum mbfl_no_encoding from_encoding, to_encoding; - mbfl_buffer_converter *convd; - - /* new encoding */ - to_encoding = mbfl_name2no_encoding(encoding_to); - if (to_encoding == mbfl_no_encoding_invalid) { - return -1; - } - /* old encoding */ - from_encoding = mbfl_name2no_encoding(encoding_from); - if (from_encoding == mbfl_no_encoding_invalid) { - return -1; - } - /* initialize string */ - mbfl_string_init(&string); - mbfl_string_init(&result); - string.no_encoding = from_encoding; - string.no_language = MBSTRG(language); - string.val = (unsigned char*)from; - string.len = from_length; - - /* initialize converter */ - convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len); - if (convd == NULL) { - return -1; - } - mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode)); - mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar)); - - /* do it */ - ret = mbfl_buffer_converter_feed_result(convd, &string, &result); - if (ret != NULL) { - *to = ret->val; - *to_length = ret->len; - } - - MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd); - mbfl_buffer_converter_delete(convd); - - return ret ? 0 : -1; -} -/* }}} */ - -/* {{{ int php_mb_oddlen() - * returns number of odd (e.g. appears only first byte of multibyte - * character) chars - */ -static size_t php_mb_oddlen(const unsigned char *string, size_t length, const char *encoding TSRMLS_DC) -{ - mbfl_string mb_string; - - mbfl_string_init(&mb_string); - mb_string.no_language = MBSTRG(language); - mb_string.no_encoding = mbfl_name2no_encoding(encoding); - mb_string.val = (unsigned char *)string; - mb_string.len = length; - - if (mb_string.no_encoding == mbfl_no_encoding_invalid) { - return 0; - } - return mbfl_oddlen(&mb_string); -} -/* }}} */ - -/* {{{ const char* php_mb_internal_encoding_name() - * returns name of internal encoding - */ -static const char* php_mb_internal_encoding_name(TSRMLS_D) -{ - const char *name = mbfl_no_encoding2name(MBSTRG(current_internal_encoding)); - - if (!name || - !*name || - (strlen(name) == 4 && - (!memcmp("pass", name, sizeof("pass") - 1) || - !memcmp("auto", name, sizeof("auto") - 1) || - !memcmp("none", name, sizeof("none") - 1)))) { - return NULL; - } - return name; -} -/* }}} */ - - #endif /* HAVE_MBSTRING */ /* diff --git a/ext/mbstring/mbstring.h b/ext/mbstring/mbstring.h index 77f1c9d5ef..6eae92f4d0 100644 --- a/ext/mbstring/mbstring.h +++ b/ext/mbstring/mbstring.h @@ -165,25 +165,23 @@ int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint new_v ZEND_BEGIN_MODULE_GLOBALS(mbstring) char *internal_encoding_name; enum mbfl_no_language language; - enum mbfl_no_encoding internal_encoding; - enum mbfl_no_encoding current_internal_encoding; - enum mbfl_no_encoding *script_encoding_list; - int script_encoding_list_size; - enum mbfl_no_encoding http_output_encoding; - enum mbfl_no_encoding current_http_output_encoding; - enum mbfl_no_encoding http_input_identify; - enum mbfl_no_encoding http_input_identify_get; - enum mbfl_no_encoding http_input_identify_post; - enum mbfl_no_encoding http_input_identify_cookie; - enum mbfl_no_encoding http_input_identify_string; - enum mbfl_no_encoding *http_input_list; - int http_input_list_size; - enum mbfl_no_encoding *detect_order_list; - int detect_order_list_size; - enum mbfl_no_encoding *current_detect_order_list; - int current_detect_order_list_size; + const mbfl_encoding *internal_encoding; + const mbfl_encoding *current_internal_encoding; + const mbfl_encoding *http_output_encoding; + const mbfl_encoding *current_http_output_encoding; + const mbfl_encoding *http_input_identify; + const mbfl_encoding *http_input_identify_get; + const mbfl_encoding *http_input_identify_post; + const mbfl_encoding *http_input_identify_cookie; + const mbfl_encoding *http_input_identify_string; + const mbfl_encoding **http_input_list; + size_t http_input_list_size; + const mbfl_encoding **detect_order_list; + size_t detect_order_list_size; + const mbfl_encoding **current_detect_order_list; + size_t current_detect_order_list_size; enum mbfl_no_encoding *default_detect_order_list; - int default_detect_order_list_size; + size_t default_detect_order_list_size; int filter_illegal_mode; int filter_illegal_substchar; int current_filter_illegal_mode; |
