summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ext/mbstring/mbfilter.c246
-rw-r--r--ext/mbstring/mbfilter.h8
-rw-r--r--ext/mbstring/mbstring.c78
-rw-r--r--ext/mbstring/mbstring.h1
-rw-r--r--ext/mbstring/unicode_table_ru.h37
5 files changed, 334 insertions, 36 deletions
diff --git a/ext/mbstring/mbfilter.c b/ext/mbstring/mbfilter.c
index 2bf816279a..4e7c942afc 100644
--- a/ext/mbstring/mbfilter.c
+++ b/ext/mbstring/mbfilter.c
@@ -104,8 +104,9 @@
#if defined(HAVE_MBSTR_KR)
#include "mbfilter_kr.h"
#endif
-#if defined(HAVE_MBSTR_KR)
+#if defined(HAVE_MBSTR_RU)
#include "mbfilter_ru.h"
+#include "unicode_table_ru.h"
#endif
#include "zend.h"
@@ -1242,6 +1243,9 @@ static int mbfl_filt_ident_2022kr(int c, mbfl_identify_filter *filter TSRMLS_DC)
#endif /* HAVE_MBSTR_KR */
#if defined(HAVE_MBSTR_RU)
+static int mbfl_filt_get_rating_cp1251(int c, mbfl_identify_filter *filter TSRMLS_DC);
+static int mbfl_filt_get_rating_cp866(int c, mbfl_identify_filter *filter TSRMLS_DC);
+static int mbfl_filt_get_rating_koi8r(int c, mbfl_identify_filter *filter TSRMLS_DC);
static int mbfl_filt_ident_cp1251(int c, mbfl_identify_filter *filter TSRMLS_DC);
static int mbfl_filt_ident_cp866(int c, mbfl_identify_filter *filter TSRMLS_DC);
static int mbfl_filt_ident_koi8r(int c, mbfl_identify_filter *filter TSRMLS_DC);
@@ -2294,56 +2298,65 @@ static const struct mbfl_identify_vtbl vtbl_identify_ascii = {
mbfl_no_encoding_ascii,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
- mbfl_filt_ident_ascii };
+ mbfl_filt_ident_ascii,
+ NULL };
static const struct mbfl_identify_vtbl vtbl_identify_utf8 = {
mbfl_no_encoding_utf8,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
- mbfl_filt_ident_utf8 };
+ mbfl_filt_ident_utf8,
+ NULL };
static const struct mbfl_identify_vtbl vtbl_identify_utf7 = {
mbfl_no_encoding_utf7,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
- mbfl_filt_ident_utf7 };
+ mbfl_filt_ident_utf7,
+ NULL };
#if defined(HAVE_MBSTR_JA)
static const struct mbfl_identify_vtbl vtbl_identify_eucjp = {
mbfl_no_encoding_euc_jp,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
- mbfl_filt_ident_eucjp };
+ mbfl_filt_ident_eucjp,
+ NULL };
static const struct mbfl_identify_vtbl vtbl_identify_eucjpwin = {
mbfl_no_encoding_eucjp_win,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
- mbfl_filt_ident_eucjp };
+ mbfl_filt_ident_eucjp,
+ NULL };
static const struct mbfl_identify_vtbl vtbl_identify_sjis = {
mbfl_no_encoding_sjis,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
- mbfl_filt_ident_sjis };
+ mbfl_filt_ident_sjis,
+ NULL };
static const struct mbfl_identify_vtbl vtbl_identify_sjiswin = {
mbfl_no_encoding_sjis_win,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
- mbfl_filt_ident_sjiswin };
+ mbfl_filt_ident_sjiswin,
+ NULL };
static const struct mbfl_identify_vtbl vtbl_identify_jis = {
mbfl_no_encoding_jis,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
- mbfl_filt_ident_jis };
+ mbfl_filt_ident_jis,
+ NULL };
static const struct mbfl_identify_vtbl vtbl_identify_2022jp = {
mbfl_no_encoding_2022jp,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
- mbfl_filt_ident_2022jp };
+ mbfl_filt_ident_2022jp,
+ NULL };
#endif /* HAVE_MBSTR_JA */
#if defined(HAVE_MBSTR_CN)
@@ -2351,19 +2364,22 @@ static struct mbfl_identify_vtbl vtbl_identify_euccn = {
mbfl_no_encoding_euc_cn,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
- mbfl_filt_ident_euccn };
+ mbfl_filt_ident_euccn,
+ NULL };
static struct mbfl_identify_vtbl vtbl_identify_cp936 = {
mbfl_no_encoding_cp936,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
- mbfl_filt_ident_cp936 };
+ mbfl_filt_ident_cp936,
+ NULL };
static struct mbfl_identify_vtbl vtbl_identify_hz = {
mbfl_no_encoding_hz,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
- mbfl_filt_ident_hz };
+ mbfl_filt_ident_hz,
+ NULL };
#endif /* HAVE_MBSTR_CN */
@@ -2372,13 +2388,15 @@ static struct mbfl_identify_vtbl vtbl_identify_euctw = {
mbfl_no_encoding_euc_tw,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
- mbfl_filt_ident_euctw };
+ mbfl_filt_ident_euctw,
+ NULL };
static struct mbfl_identify_vtbl vtbl_identify_big5 = {
mbfl_no_encoding_big5,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
- mbfl_filt_ident_big5 };
+ mbfl_filt_ident_big5,
+ NULL };
#endif /* HAVE_MBSTR_TW */
#if defined(HAVE_MBSTR_KR)
@@ -2386,19 +2404,22 @@ static struct mbfl_identify_vtbl vtbl_identify_euckr = {
mbfl_no_encoding_euc_kr,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
- mbfl_filt_ident_euckr };
+ mbfl_filt_ident_euckr,
+ NULL };
static struct mbfl_identify_vtbl vtbl_identify_uhc = {
mbfl_no_encoding_uhc,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
- mbfl_filt_ident_uhc };
+ mbfl_filt_ident_uhc,
+ NULL };
static struct mbfl_identify_vtbl vtbl_identify_2022kr = {
mbfl_no_encoding_2022kr,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
- mbfl_filt_ident_2022kr };
+ mbfl_filt_ident_2022kr,
+ NULL };
#endif /* HAVE_MBSTR_KR */
@@ -2407,110 +2428,128 @@ static struct mbfl_identify_vtbl vtbl_identify_cp1251 = {
mbfl_no_encoding_cp1251,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
- mbfl_filt_ident_cp1251 };
+ mbfl_filt_ident_cp1251,
+ mbfl_filt_get_rating_cp1251 };
static struct mbfl_identify_vtbl vtbl_identify_cp866 = {
mbfl_no_encoding_cp866,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
- mbfl_filt_ident_cp866 };
+ mbfl_filt_ident_cp866,
+ mbfl_filt_get_rating_cp866 };
static struct mbfl_identify_vtbl vtbl_identify_koi8r = {
mbfl_no_encoding_koi8r,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
- mbfl_filt_ident_koi8r };
+ mbfl_filt_ident_koi8r,
+ mbfl_filt_get_rating_koi8r };
#endif /* HAVE_MBSTR_RU */
static const struct mbfl_identify_vtbl vtbl_identify_cp1252 = {
mbfl_no_encoding_cp1252,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
- mbfl_filt_ident_cp1252 };
+ mbfl_filt_ident_cp1252,
+ NULL };
static const struct mbfl_identify_vtbl vtbl_identify_8859_1 = {
mbfl_no_encoding_8859_1,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
- mbfl_filt_ident_true };
+ mbfl_filt_ident_true,
+ NULL };
static const struct mbfl_identify_vtbl vtbl_identify_8859_2 = {
mbfl_no_encoding_8859_2,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
- mbfl_filt_ident_true };
+ mbfl_filt_ident_true,
+ NULL };
static const struct mbfl_identify_vtbl vtbl_identify_8859_3 = {
mbfl_no_encoding_8859_3,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
- mbfl_filt_ident_true };
+ mbfl_filt_ident_true,
+ NULL };
static const struct mbfl_identify_vtbl vtbl_identify_8859_4 = {
mbfl_no_encoding_8859_4,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
- mbfl_filt_ident_true };
+ mbfl_filt_ident_true,
+ NULL };
static const struct mbfl_identify_vtbl vtbl_identify_8859_5 = {
mbfl_no_encoding_8859_5,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
- mbfl_filt_ident_true };
+ mbfl_filt_ident_true,
+ NULL };
static const struct mbfl_identify_vtbl vtbl_identify_8859_6 = {
mbfl_no_encoding_8859_6,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
- mbfl_filt_ident_true };
+ mbfl_filt_ident_true,
+ NULL };
static const struct mbfl_identify_vtbl vtbl_identify_8859_7 = {
mbfl_no_encoding_8859_7,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
- mbfl_filt_ident_true };
+ mbfl_filt_ident_true,
+ NULL };
static const struct mbfl_identify_vtbl vtbl_identify_8859_8 = {
mbfl_no_encoding_8859_8,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
- mbfl_filt_ident_true };
+ mbfl_filt_ident_true,
+ NULL };
static const struct mbfl_identify_vtbl vtbl_identify_8859_9 = {
mbfl_no_encoding_8859_9,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
- mbfl_filt_ident_true };
+ mbfl_filt_ident_true,
+ NULL };
static const struct mbfl_identify_vtbl vtbl_identify_8859_10 = {
mbfl_no_encoding_8859_10,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
- mbfl_filt_ident_true };
+ mbfl_filt_ident_true,
+ NULL };
static const struct mbfl_identify_vtbl vtbl_identify_8859_13 = {
mbfl_no_encoding_8859_13,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
- mbfl_filt_ident_true };
+ mbfl_filt_ident_true,
+ NULL };
static const struct mbfl_identify_vtbl vtbl_identify_8859_14 = {
mbfl_no_encoding_8859_14,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
- mbfl_filt_ident_true };
+ mbfl_filt_ident_true,
+ NULL };
static const struct mbfl_identify_vtbl vtbl_identify_8859_15 = {
mbfl_no_encoding_8859_15,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
- mbfl_filt_ident_true };
+ mbfl_filt_ident_true,
+ NULL };
static const struct mbfl_identify_vtbl vtbl_identify_false = {
mbfl_no_encoding_pass,
mbfl_filt_ident_false_ctor,
mbfl_filt_ident_common_dtor,
- mbfl_filt_ident_false };
+ mbfl_filt_ident_false,
+ NULL };
static const struct mbfl_identify_vtbl *mbfl_identify_filter_list[] = {
&vtbl_identify_utf8,
@@ -6537,6 +6576,30 @@ mbfl_filt_ident_cp1252(int c, mbfl_identify_filter *filter TSRMLS_DC)
}
#if defined(HAVE_MBSTR_RU)
+static int
+mbfl_filt_get_rating_cp1251(int c, mbfl_identify_filter *filter TSRMLS_DC)
+{
+ if (c >= cp1251_char_ratings_table_min && c < (cp1251_char_ratings_table_min + cp1251_char_ratings_table_len) )
+ return cp1251_char_ratings_table[c - cp1251_char_ratings_table_min];
+ return 0;
+}
+
+static int
+mbfl_filt_get_rating_cp866(int c, mbfl_identify_filter *filter TSRMLS_DC)
+{
+ if (c >= cp866_char_ratings_table_min && c < (cp866_char_ratings_table_min + cp866_char_ratings_table_len) )
+ return cp866_char_ratings_table[c - cp866_char_ratings_table_min];
+ return 0;
+}
+
+static int
+mbfl_filt_get_rating_koi8r(int c, mbfl_identify_filter *filter TSRMLS_DC)
+{
+ if (c >= koi8r_char_ratings_table_min && c < (koi8r_char_ratings_table_min + koi8r_char_ratings_table_len) )
+ return koi8r_char_ratings_table[c - koi8r_char_ratings_table_min];
+ return 0;
+}
+
// all of this is so ugly now!
static int
mbfl_filt_ident_cp1251(int c, mbfl_identify_filter *filter TSRMLS_DC)
@@ -6981,6 +7044,7 @@ mbfl_identify_filter_set_vtbl(mbfl_identify_filter *filter, const struct mbfl_id
filter->filter_ctor = vtbl->filter_ctor;
filter->filter_dtor = vtbl->filter_dtor;
filter->filter_function = vtbl->filter_function;
+ filter->get_rating_function = vtbl->get_rating_function;
}
}
@@ -7552,6 +7616,116 @@ mbfl_identify_encoding_name(mbfl_string *string, enum mbfl_no_encoding *elist, i
}
}
+/*
+ * guess encoding - uses another algorithm for charset detection based on symbols rating
+ */
+const mbfl_encoding *
+mbfl_guess_encoding(mbfl_string *string, enum mbfl_no_encoding *elist, int eliztsz TSRMLS_DC)
+{
+ int i, n, num, num_actual, bad, overflow;
+ unsigned char *p;
+ const struct mbfl_identify_vtbl *vtbl;
+ mbfl_identify_filter *flist, *filter;
+ const mbfl_encoding *encoding;
+ unsigned long *ratings,add_rating,max_rating;
+
+ if (elist == NULL)
+ return NULL;
+
+ /* initialize */
+ flist = (mbfl_identify_filter *)mbfl_calloc(eliztsz, sizeof(mbfl_identify_filter));
+ if (flist == NULL) {
+ return NULL;
+ }
+ i = 0;
+ num = 0;
+ num_actual = 0;
+ if (elist != NULL) {
+ while (i < eliztsz) {
+ vtbl = mbfl_identify_filter_get_vtbl(elist[i]);
+ if (vtbl != NULL) {
+ filter = &flist[num];
+ mbfl_identify_filter_set_vtbl(filter, vtbl);
+ filter->encoding = mbfl_no2encoding(vtbl->encoding);
+ (*filter->filter_ctor)(filter TSRMLS_CC);
+ num++;
+ if (filter->get_rating_function)
+ num_actual++;
+ }
+ i++;
+ }
+ }
+ if (num_actual == 0) {
+ /* no filters with character rating routines - exit */
+ mbfl_free((void *)flist);
+ return NULL;
+ }
+
+ ratings = (unsigned long *)mbfl_calloc(eliztsz, sizeof(unsigned long));
+ if (ratings == NULL) {
+ mbfl_free((void *)flist);
+ return NULL;
+ }
+
+
+ /* feed data */
+ n = string->len;
+ p = string->val;
+ if (p != NULL) {
+ while (n > 0) {
+ i = 0;
+ bad = 0;
+ overflow = 0;
+ while (i < num) {
+ filter = &flist[i];
+ add_rating=(*filter->get_rating_function)(*p, filter TSRMLS_CC);
+ if ( (ratings[i] + add_rating) < ratings[i] )
+ overflow = 1;
+ ratings[i] += add_rating;
+ i++;
+ }
+ if (overflow)
+ // overflow - enough data now - exit
+ break;
+ p++;
+ n--;
+ }
+ }
+
+ /* judge */
+ max_rating = 0;
+ i = 0;
+ encoding = NULL;
+ while (i < num) {
+ filter = &flist[i];
+ if (ratings[i] > max_rating) {
+ max_rating = ratings[i];
+ encoding = filter->encoding;
+ }
+ (*filter->filter_dtor)(filter TSRMLS_CC);
+ i++;
+ }
+ mbfl_free((void *)ratings);
+ mbfl_free((void *)flist);
+
+ return encoding;
+}
+
+const char*
+mbfl_guess_encoding_name(mbfl_string *string, enum mbfl_no_encoding *elist, int eliztsz TSRMLS_DC)
+{
+ const mbfl_encoding *encoding;
+
+ encoding = mbfl_guess_encoding(string, elist, eliztsz TSRMLS_CC);
+ if (encoding != NULL &&
+ encoding->no_encoding > mbfl_no_encoding_charset_min &&
+ encoding->no_encoding < mbfl_no_encoding_charset_max) {
+ return encoding->name;
+ } else {
+ return NULL;
+ }
+}
+
const enum mbfl_no_encoding
mbfl_identify_encoding_no(mbfl_string *string, enum mbfl_no_encoding *elist, int eliztsz TSRMLS_DC)
{
diff --git a/ext/mbstring/mbfilter.h b/ext/mbstring/mbfilter.h
index 563a11ea89..58da85c9ad 100644
--- a/ext/mbstring/mbfilter.h
+++ b/ext/mbstring/mbfilter.h
@@ -389,6 +389,7 @@ struct _mbfl_identify_filter {
void (*filter_ctor)(mbfl_identify_filter *filter TSRMLS_DC);
void (*filter_dtor)(mbfl_identify_filter *filter TSRMLS_DC);
int (*filter_function)(int c, mbfl_identify_filter *filter TSRMLS_DC);
+ int (*get_rating_function)(int c, mbfl_identify_filter *filter TSRMLS_DC);
int status;
int flag;
int score;
@@ -400,6 +401,7 @@ struct mbfl_identify_vtbl {
void (*filter_ctor)(mbfl_identify_filter *filter TSRMLS_DC);
void (*filter_dtor)(mbfl_identify_filter *filter TSRMLS_DC);
int (*filter_function)(int c, mbfl_identify_filter *filter TSRMLS_DC);
+ int (*get_rating_function)(int c, mbfl_identify_filter *filter TSRMLS_DC);
};
mbfl_identify_filter * mbfl_identify_filter_new(enum mbfl_no_encoding encoding TSRMLS_DC);
@@ -459,6 +461,12 @@ mbfl_convert_encoding(mbfl_string *string, mbfl_string *result, enum mbfl_no_enc
* identify encoding
*/
const mbfl_encoding *
+mbfl_guess_encoding(mbfl_string *string, enum mbfl_no_encoding *elist, int eliztsz TSRMLS_DC);
+
+const char *
+mbfl_guess_encoding_name(mbfl_string *string, enum mbfl_no_encoding *elist, int eliztsz TSRMLS_DC);
+
+const mbfl_encoding *
mbfl_identify_encoding(mbfl_string *string, enum mbfl_no_encoding *elist, int eliztsz TSRMLS_DC);
const char *
diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c
index 197ad3bc57..c0c82e759a 100644
--- a/ext/mbstring/mbstring.c
+++ b/ext/mbstring/mbstring.c
@@ -194,6 +194,7 @@ function_entry mbstring_functions[] = {
PHP_FE(mb_strimwidth, NULL)
PHP_FE(mb_convert_encoding, NULL)
PHP_FE(mb_detect_encoding, NULL)
+ PHP_FE(mb_guess_encoding, NULL)
PHP_FE(mb_convert_kana, NULL)
PHP_FE(mb_encode_mimeheader, NULL)
PHP_FE(mb_decode_mimeheader, NULL)
@@ -2607,6 +2608,83 @@ PHP_FUNCTION(mb_detect_encoding)
+/* {{{ proto string mb_guess_encoding(string str [, mixed encoding_list])
+ Encodings of the given string is returned (as a string) */
+PHP_FUNCTION(mb_guess_encoding)
+{
+ pval **arg_str, **arg_list;
+ mbfl_string string;
+ const char *ret;
+ enum mbfl_no_encoding *elist;
+ int size, *list;
+
+ if (ZEND_NUM_ARGS() == 1) {
+ if (zend_get_parameters_ex(1, &arg_str) == FAILURE) {
+ WRONG_PARAM_COUNT;
+ }
+ } else if (ZEND_NUM_ARGS() == 2) {
+ if (zend_get_parameters_ex(2, &arg_str, &arg_list) == FAILURE) {
+ WRONG_PARAM_COUNT;
+ }
+ } else {
+ WRONG_PARAM_COUNT;
+ }
+
+ /* make encoding list */
+ list = NULL;
+ size = 0;
+ if (ZEND_NUM_ARGS() >= 2) {
+ switch (Z_TYPE_PP(arg_list)) {
+ case IS_ARRAY:
+ if (!php_mbstring_parse_encoding_array(*arg_list, &list, &size, 0)) {
+ if (list) {
+ efree(list);
+ size = 0;
+ }
+ }
+ break;
+ default:
+ convert_to_string_ex(arg_list);
+ if (!php_mbstring_parse_encoding_list(Z_STRVAL_PP(arg_list), Z_STRLEN_PP(arg_list), &list, &size, 0)) {
+ if (list) {
+ efree(list);
+ size = 0;
+ }
+ }
+ break;
+ }
+ if (size <= 0) {
+ php_error(E_WARNING, "%s() illegal argument",
+ get_active_function_name(TSRMLS_C));
+ }
+ }
+
+ if (size > 0 && list != NULL) {
+ elist = list;
+ } else {
+ elist = MBSTRG(current_detect_order_list);
+ size = MBSTRG(current_detect_order_list_size);
+ }
+
+ convert_to_string_ex(arg_str);
+ mbfl_string_init(&string);
+ string.no_language = MBSTRG(current_language);
+ string.val = Z_STRVAL_PP(arg_str);
+ string.len = Z_STRLEN_PP(arg_str);
+ ret = mbfl_guess_encoding_name(&string, elist, size TSRMLS_CC);
+ if (list != NULL) {
+ efree((void *)list);
+ }
+ if (ret != NULL) {
+ RETVAL_STRING((char *)ret, 1);
+ } else {
+ RETVAL_FALSE;
+ }
+}
+/* }}} */
+
+
+
/* {{{ proto string mb_encode_mimeheader(string str [, string charset [, string transfer-encoding [, string linefeed]]])
Converts the string to MIME "encoded-word" in the format of =?charset?(B|Q)?encoded_string?= */
PHP_FUNCTION(mb_encode_mimeheader)
diff --git a/ext/mbstring/mbstring.h b/ext/mbstring/mbstring.h
index 094ecc4cb3..e2d51501ff 100644
--- a/ext/mbstring/mbstring.h
+++ b/ext/mbstring/mbstring.h
@@ -94,6 +94,7 @@ PHP_FUNCTION(mb_strcut);
PHP_FUNCTION(mb_strwidth);
PHP_FUNCTION(mb_strimwidth);
PHP_FUNCTION(mb_convert_encoding);
+PHP_FUNCTION(mb_guess_encoding);
PHP_FUNCTION(mb_detect_encoding);
PHP_FUNCTION(mb_convert_kana);
PHP_FUNCTION(mb_encode_mimeheader);
diff --git a/ext/mbstring/unicode_table_ru.h b/ext/mbstring/unicode_table_ru.h
index 74d02fef34..270032ea93 100644
--- a/ext/mbstring/unicode_table_ru.h
+++ b/ext/mbstring/unicode_table_ru.h
@@ -67,3 +67,40 @@ static const int koi8r_ucs_table_min = 0x80;
static const int koi8r_ucs_table_len = (sizeof (koi8r_ucs_table) / sizeof (unsigned short));
static const int koi8r_ucs_table_max = 0x80 + (sizeof (koi8r_ucs_table) / sizeof (unsigned short));
+
+
+static const unsigned int cp1251_char_ratings_table[] = {
+ 14985, 3207, 9044, 2847, 6015,18094, 2305, 3456,
+ 15786, 2472, 6531, 7803, 6341,13494,21800, 6267,
+ 10139,10398,13877, 5094, 536, 2201, 855, 2665,
+ 1127, 981, 99, 4460, 3805, 426, 1516, 4341
+};
+static const int cp1251_char_ratings_table_min = 0xe0;
+static const int cp1251_char_ratings_table_len = (sizeof (cp1251_char_ratings_table) / sizeof (unsigned int));
+
+
+static const unsigned int cp866_char_ratings_table[] = {
+ 99, 0, 1516,14985, 2305, 536, 855, 6015,
+ 5094, 4460, 2201,15786, 2472, 6531, 7803, 6341,
+ 13494,21800, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 4341,10139,10398,13877,18094, 2847,
+ 3207, 981, 3456, 426, 2665, 3805, 9044, 1127,
+ 0, 6267
+};
+static const int cp866_char_ratings_table_min = 0x9e;
+static const int cp866_char_ratings_table_len = (sizeof (cp866_char_ratings_table) / sizeof (unsigned int));
+
+
+static const unsigned int koi8r_char_ratings_table[] = {
+ 1516,14985, 3207, 855, 6015,18094, 536, 2847,
+ 2201,15786, 2472, 6531, 7803, 6341,13494,21800,
+ 6267, 4341,10139,10398,13877, 5094, 2305, 9044,
+ 3805, 4460, 3456, 1127, 426, 981, 2665, 99
+};
+static const int koi8r_char_ratings_table_min = 0xc0;
+static const int koi8r_char_ratings_table_len = (sizeof (koi8r_char_ratings_table) / sizeof (unsigned int));