diff options
-rw-r--r-- | ext/mbstring/mbfilter.c | 246 | ||||
-rw-r--r-- | ext/mbstring/mbfilter.h | 8 | ||||
-rw-r--r-- | ext/mbstring/mbstring.c | 78 | ||||
-rw-r--r-- | ext/mbstring/mbstring.h | 1 | ||||
-rw-r--r-- | ext/mbstring/unicode_table_ru.h | 37 |
5 files changed, 334 insertions, 36 deletions
diff --git a/ext/mbstring/mbfilter.c b/ext/mbstring/mbfilter.c index 2bf816279a..4e7c942afc 100644 --- a/ext/mbstring/mbfilter.c +++ b/ext/mbstring/mbfilter.c @@ -104,8 +104,9 @@ #if defined(HAVE_MBSTR_KR) #include "mbfilter_kr.h" #endif -#if defined(HAVE_MBSTR_KR) +#if defined(HAVE_MBSTR_RU) #include "mbfilter_ru.h" +#include "unicode_table_ru.h" #endif #include "zend.h" @@ -1242,6 +1243,9 @@ static int mbfl_filt_ident_2022kr(int c, mbfl_identify_filter *filter TSRMLS_DC) #endif /* HAVE_MBSTR_KR */ #if defined(HAVE_MBSTR_RU) +static int mbfl_filt_get_rating_cp1251(int c, mbfl_identify_filter *filter TSRMLS_DC); +static int mbfl_filt_get_rating_cp866(int c, mbfl_identify_filter *filter TSRMLS_DC); +static int mbfl_filt_get_rating_koi8r(int c, mbfl_identify_filter *filter TSRMLS_DC); static int mbfl_filt_ident_cp1251(int c, mbfl_identify_filter *filter TSRMLS_DC); static int mbfl_filt_ident_cp866(int c, mbfl_identify_filter *filter TSRMLS_DC); static int mbfl_filt_ident_koi8r(int c, mbfl_identify_filter *filter TSRMLS_DC); @@ -2294,56 +2298,65 @@ static const struct mbfl_identify_vtbl vtbl_identify_ascii = { mbfl_no_encoding_ascii, mbfl_filt_ident_common_ctor, mbfl_filt_ident_common_dtor, - mbfl_filt_ident_ascii }; + mbfl_filt_ident_ascii, + NULL }; static const struct mbfl_identify_vtbl vtbl_identify_utf8 = { mbfl_no_encoding_utf8, mbfl_filt_ident_common_ctor, mbfl_filt_ident_common_dtor, - mbfl_filt_ident_utf8 }; + mbfl_filt_ident_utf8, + NULL }; static const struct mbfl_identify_vtbl vtbl_identify_utf7 = { mbfl_no_encoding_utf7, mbfl_filt_ident_common_ctor, mbfl_filt_ident_common_dtor, - mbfl_filt_ident_utf7 }; + mbfl_filt_ident_utf7, + NULL }; #if defined(HAVE_MBSTR_JA) static const struct mbfl_identify_vtbl vtbl_identify_eucjp = { mbfl_no_encoding_euc_jp, mbfl_filt_ident_common_ctor, mbfl_filt_ident_common_dtor, - mbfl_filt_ident_eucjp }; + mbfl_filt_ident_eucjp, + NULL }; static const struct mbfl_identify_vtbl vtbl_identify_eucjpwin = { mbfl_no_encoding_eucjp_win, mbfl_filt_ident_common_ctor, mbfl_filt_ident_common_dtor, - mbfl_filt_ident_eucjp }; + mbfl_filt_ident_eucjp, + NULL }; static const struct mbfl_identify_vtbl vtbl_identify_sjis = { mbfl_no_encoding_sjis, mbfl_filt_ident_common_ctor, mbfl_filt_ident_common_dtor, - mbfl_filt_ident_sjis }; + mbfl_filt_ident_sjis, + NULL }; static const struct mbfl_identify_vtbl vtbl_identify_sjiswin = { mbfl_no_encoding_sjis_win, mbfl_filt_ident_common_ctor, mbfl_filt_ident_common_dtor, - mbfl_filt_ident_sjiswin }; + mbfl_filt_ident_sjiswin, + NULL }; static const struct mbfl_identify_vtbl vtbl_identify_jis = { mbfl_no_encoding_jis, mbfl_filt_ident_common_ctor, mbfl_filt_ident_common_dtor, - mbfl_filt_ident_jis }; + mbfl_filt_ident_jis, + NULL }; static const struct mbfl_identify_vtbl vtbl_identify_2022jp = { mbfl_no_encoding_2022jp, mbfl_filt_ident_common_ctor, mbfl_filt_ident_common_dtor, - mbfl_filt_ident_2022jp }; + mbfl_filt_ident_2022jp, + NULL }; #endif /* HAVE_MBSTR_JA */ #if defined(HAVE_MBSTR_CN) @@ -2351,19 +2364,22 @@ static struct mbfl_identify_vtbl vtbl_identify_euccn = { mbfl_no_encoding_euc_cn, mbfl_filt_ident_common_ctor, mbfl_filt_ident_common_dtor, - mbfl_filt_ident_euccn }; + mbfl_filt_ident_euccn, + NULL }; static struct mbfl_identify_vtbl vtbl_identify_cp936 = { mbfl_no_encoding_cp936, mbfl_filt_ident_common_ctor, mbfl_filt_ident_common_dtor, - mbfl_filt_ident_cp936 }; + mbfl_filt_ident_cp936, + NULL }; static struct mbfl_identify_vtbl vtbl_identify_hz = { mbfl_no_encoding_hz, mbfl_filt_ident_common_ctor, mbfl_filt_ident_common_dtor, - mbfl_filt_ident_hz }; + mbfl_filt_ident_hz, + NULL }; #endif /* HAVE_MBSTR_CN */ @@ -2372,13 +2388,15 @@ static struct mbfl_identify_vtbl vtbl_identify_euctw = { mbfl_no_encoding_euc_tw, mbfl_filt_ident_common_ctor, mbfl_filt_ident_common_dtor, - mbfl_filt_ident_euctw }; + mbfl_filt_ident_euctw, + NULL }; static struct mbfl_identify_vtbl vtbl_identify_big5 = { mbfl_no_encoding_big5, mbfl_filt_ident_common_ctor, mbfl_filt_ident_common_dtor, - mbfl_filt_ident_big5 }; + mbfl_filt_ident_big5, + NULL }; #endif /* HAVE_MBSTR_TW */ #if defined(HAVE_MBSTR_KR) @@ -2386,19 +2404,22 @@ static struct mbfl_identify_vtbl vtbl_identify_euckr = { mbfl_no_encoding_euc_kr, mbfl_filt_ident_common_ctor, mbfl_filt_ident_common_dtor, - mbfl_filt_ident_euckr }; + mbfl_filt_ident_euckr, + NULL }; static struct mbfl_identify_vtbl vtbl_identify_uhc = { mbfl_no_encoding_uhc, mbfl_filt_ident_common_ctor, mbfl_filt_ident_common_dtor, - mbfl_filt_ident_uhc }; + mbfl_filt_ident_uhc, + NULL }; static struct mbfl_identify_vtbl vtbl_identify_2022kr = { mbfl_no_encoding_2022kr, mbfl_filt_ident_common_ctor, mbfl_filt_ident_common_dtor, - mbfl_filt_ident_2022kr }; + mbfl_filt_ident_2022kr, + NULL }; #endif /* HAVE_MBSTR_KR */ @@ -2407,110 +2428,128 @@ static struct mbfl_identify_vtbl vtbl_identify_cp1251 = { mbfl_no_encoding_cp1251, mbfl_filt_ident_common_ctor, mbfl_filt_ident_common_dtor, - mbfl_filt_ident_cp1251 }; + mbfl_filt_ident_cp1251, + mbfl_filt_get_rating_cp1251 }; static struct mbfl_identify_vtbl vtbl_identify_cp866 = { mbfl_no_encoding_cp866, mbfl_filt_ident_common_ctor, mbfl_filt_ident_common_dtor, - mbfl_filt_ident_cp866 }; + mbfl_filt_ident_cp866, + mbfl_filt_get_rating_cp866 }; static struct mbfl_identify_vtbl vtbl_identify_koi8r = { mbfl_no_encoding_koi8r, mbfl_filt_ident_common_ctor, mbfl_filt_ident_common_dtor, - mbfl_filt_ident_koi8r }; + mbfl_filt_ident_koi8r, + mbfl_filt_get_rating_koi8r }; #endif /* HAVE_MBSTR_RU */ static const struct mbfl_identify_vtbl vtbl_identify_cp1252 = { mbfl_no_encoding_cp1252, mbfl_filt_ident_common_ctor, mbfl_filt_ident_common_dtor, - mbfl_filt_ident_cp1252 }; + mbfl_filt_ident_cp1252, + NULL }; static const struct mbfl_identify_vtbl vtbl_identify_8859_1 = { mbfl_no_encoding_8859_1, mbfl_filt_ident_common_ctor, mbfl_filt_ident_common_dtor, - mbfl_filt_ident_true }; + mbfl_filt_ident_true, + NULL }; static const struct mbfl_identify_vtbl vtbl_identify_8859_2 = { mbfl_no_encoding_8859_2, mbfl_filt_ident_common_ctor, mbfl_filt_ident_common_dtor, - mbfl_filt_ident_true }; + mbfl_filt_ident_true, + NULL }; static const struct mbfl_identify_vtbl vtbl_identify_8859_3 = { mbfl_no_encoding_8859_3, mbfl_filt_ident_common_ctor, mbfl_filt_ident_common_dtor, - mbfl_filt_ident_true }; + mbfl_filt_ident_true, + NULL }; static const struct mbfl_identify_vtbl vtbl_identify_8859_4 = { mbfl_no_encoding_8859_4, mbfl_filt_ident_common_ctor, mbfl_filt_ident_common_dtor, - mbfl_filt_ident_true }; + mbfl_filt_ident_true, + NULL }; static const struct mbfl_identify_vtbl vtbl_identify_8859_5 = { mbfl_no_encoding_8859_5, mbfl_filt_ident_common_ctor, mbfl_filt_ident_common_dtor, - mbfl_filt_ident_true }; + mbfl_filt_ident_true, + NULL }; static const struct mbfl_identify_vtbl vtbl_identify_8859_6 = { mbfl_no_encoding_8859_6, mbfl_filt_ident_common_ctor, mbfl_filt_ident_common_dtor, - mbfl_filt_ident_true }; + mbfl_filt_ident_true, + NULL }; static const struct mbfl_identify_vtbl vtbl_identify_8859_7 = { mbfl_no_encoding_8859_7, mbfl_filt_ident_common_ctor, mbfl_filt_ident_common_dtor, - mbfl_filt_ident_true }; + mbfl_filt_ident_true, + NULL }; static const struct mbfl_identify_vtbl vtbl_identify_8859_8 = { mbfl_no_encoding_8859_8, mbfl_filt_ident_common_ctor, mbfl_filt_ident_common_dtor, - mbfl_filt_ident_true }; + mbfl_filt_ident_true, + NULL }; static const struct mbfl_identify_vtbl vtbl_identify_8859_9 = { mbfl_no_encoding_8859_9, mbfl_filt_ident_common_ctor, mbfl_filt_ident_common_dtor, - mbfl_filt_ident_true }; + mbfl_filt_ident_true, + NULL }; static const struct mbfl_identify_vtbl vtbl_identify_8859_10 = { mbfl_no_encoding_8859_10, mbfl_filt_ident_common_ctor, mbfl_filt_ident_common_dtor, - mbfl_filt_ident_true }; + mbfl_filt_ident_true, + NULL }; static const struct mbfl_identify_vtbl vtbl_identify_8859_13 = { mbfl_no_encoding_8859_13, mbfl_filt_ident_common_ctor, mbfl_filt_ident_common_dtor, - mbfl_filt_ident_true }; + mbfl_filt_ident_true, + NULL }; static const struct mbfl_identify_vtbl vtbl_identify_8859_14 = { mbfl_no_encoding_8859_14, mbfl_filt_ident_common_ctor, mbfl_filt_ident_common_dtor, - mbfl_filt_ident_true }; + mbfl_filt_ident_true, + NULL }; static const struct mbfl_identify_vtbl vtbl_identify_8859_15 = { mbfl_no_encoding_8859_15, mbfl_filt_ident_common_ctor, mbfl_filt_ident_common_dtor, - mbfl_filt_ident_true }; + mbfl_filt_ident_true, + NULL }; static const struct mbfl_identify_vtbl vtbl_identify_false = { mbfl_no_encoding_pass, mbfl_filt_ident_false_ctor, mbfl_filt_ident_common_dtor, - mbfl_filt_ident_false }; + mbfl_filt_ident_false, + NULL }; static const struct mbfl_identify_vtbl *mbfl_identify_filter_list[] = { &vtbl_identify_utf8, @@ -6537,6 +6576,30 @@ mbfl_filt_ident_cp1252(int c, mbfl_identify_filter *filter TSRMLS_DC) } #if defined(HAVE_MBSTR_RU) +static int +mbfl_filt_get_rating_cp1251(int c, mbfl_identify_filter *filter TSRMLS_DC) +{ + if (c >= cp1251_char_ratings_table_min && c < (cp1251_char_ratings_table_min + cp1251_char_ratings_table_len) ) + return cp1251_char_ratings_table[c - cp1251_char_ratings_table_min]; + return 0; +} + +static int +mbfl_filt_get_rating_cp866(int c, mbfl_identify_filter *filter TSRMLS_DC) +{ + if (c >= cp866_char_ratings_table_min && c < (cp866_char_ratings_table_min + cp866_char_ratings_table_len) ) + return cp866_char_ratings_table[c - cp866_char_ratings_table_min]; + return 0; +} + +static int +mbfl_filt_get_rating_koi8r(int c, mbfl_identify_filter *filter TSRMLS_DC) +{ + if (c >= koi8r_char_ratings_table_min && c < (koi8r_char_ratings_table_min + koi8r_char_ratings_table_len) ) + return koi8r_char_ratings_table[c - koi8r_char_ratings_table_min]; + return 0; +} + // all of this is so ugly now! static int mbfl_filt_ident_cp1251(int c, mbfl_identify_filter *filter TSRMLS_DC) @@ -6981,6 +7044,7 @@ mbfl_identify_filter_set_vtbl(mbfl_identify_filter *filter, const struct mbfl_id filter->filter_ctor = vtbl->filter_ctor; filter->filter_dtor = vtbl->filter_dtor; filter->filter_function = vtbl->filter_function; + filter->get_rating_function = vtbl->get_rating_function; } } @@ -7552,6 +7616,116 @@ mbfl_identify_encoding_name(mbfl_string *string, enum mbfl_no_encoding *elist, i } } +/* + * guess encoding - uses another algorithm for charset detection based on symbols rating + */ +const mbfl_encoding * +mbfl_guess_encoding(mbfl_string *string, enum mbfl_no_encoding *elist, int eliztsz TSRMLS_DC) +{ + int i, n, num, num_actual, bad, overflow; + unsigned char *p; + const struct mbfl_identify_vtbl *vtbl; + mbfl_identify_filter *flist, *filter; + const mbfl_encoding *encoding; + unsigned long *ratings,add_rating,max_rating; + + if (elist == NULL) + return NULL; + + /* initialize */ + flist = (mbfl_identify_filter *)mbfl_calloc(eliztsz, sizeof(mbfl_identify_filter)); + if (flist == NULL) { + return NULL; + } + i = 0; + num = 0; + num_actual = 0; + if (elist != NULL) { + while (i < eliztsz) { + vtbl = mbfl_identify_filter_get_vtbl(elist[i]); + if (vtbl != NULL) { + filter = &flist[num]; + mbfl_identify_filter_set_vtbl(filter, vtbl); + filter->encoding = mbfl_no2encoding(vtbl->encoding); + (*filter->filter_ctor)(filter TSRMLS_CC); + num++; + if (filter->get_rating_function) + num_actual++; + } + i++; + } + } + if (num_actual == 0) { + /* no filters with character rating routines - exit */ + mbfl_free((void *)flist); + return NULL; + } + + ratings = (unsigned long *)mbfl_calloc(eliztsz, sizeof(unsigned long)); + if (ratings == NULL) { + mbfl_free((void *)flist); + return NULL; + } + + + /* feed data */ + n = string->len; + p = string->val; + if (p != NULL) { + while (n > 0) { + i = 0; + bad = 0; + overflow = 0; + while (i < num) { + filter = &flist[i]; + add_rating=(*filter->get_rating_function)(*p, filter TSRMLS_CC); + if ( (ratings[i] + add_rating) < ratings[i] ) + overflow = 1; + ratings[i] += add_rating; + i++; + } + if (overflow) + // overflow - enough data now - exit + break; + p++; + n--; + } + } + + /* judge */ + max_rating = 0; + i = 0; + encoding = NULL; + while (i < num) { + filter = &flist[i]; + if (ratings[i] > max_rating) { + max_rating = ratings[i]; + encoding = filter->encoding; + } + (*filter->filter_dtor)(filter TSRMLS_CC); + i++; + } + mbfl_free((void *)ratings); + mbfl_free((void *)flist); + + return encoding; +} + +const char* +mbfl_guess_encoding_name(mbfl_string *string, enum mbfl_no_encoding *elist, int eliztsz TSRMLS_DC) +{ + const mbfl_encoding *encoding; + + encoding = mbfl_guess_encoding(string, elist, eliztsz TSRMLS_CC); + if (encoding != NULL && + encoding->no_encoding > mbfl_no_encoding_charset_min && + encoding->no_encoding < mbfl_no_encoding_charset_max) { + return encoding->name; + } else { + return NULL; + } +} + const enum mbfl_no_encoding mbfl_identify_encoding_no(mbfl_string *string, enum mbfl_no_encoding *elist, int eliztsz TSRMLS_DC) { diff --git a/ext/mbstring/mbfilter.h b/ext/mbstring/mbfilter.h index 563a11ea89..58da85c9ad 100644 --- a/ext/mbstring/mbfilter.h +++ b/ext/mbstring/mbfilter.h @@ -389,6 +389,7 @@ struct _mbfl_identify_filter { void (*filter_ctor)(mbfl_identify_filter *filter TSRMLS_DC); void (*filter_dtor)(mbfl_identify_filter *filter TSRMLS_DC); int (*filter_function)(int c, mbfl_identify_filter *filter TSRMLS_DC); + int (*get_rating_function)(int c, mbfl_identify_filter *filter TSRMLS_DC); int status; int flag; int score; @@ -400,6 +401,7 @@ struct mbfl_identify_vtbl { void (*filter_ctor)(mbfl_identify_filter *filter TSRMLS_DC); void (*filter_dtor)(mbfl_identify_filter *filter TSRMLS_DC); int (*filter_function)(int c, mbfl_identify_filter *filter TSRMLS_DC); + int (*get_rating_function)(int c, mbfl_identify_filter *filter TSRMLS_DC); }; mbfl_identify_filter * mbfl_identify_filter_new(enum mbfl_no_encoding encoding TSRMLS_DC); @@ -459,6 +461,12 @@ mbfl_convert_encoding(mbfl_string *string, mbfl_string *result, enum mbfl_no_enc * identify encoding */ const mbfl_encoding * +mbfl_guess_encoding(mbfl_string *string, enum mbfl_no_encoding *elist, int eliztsz TSRMLS_DC); + +const char * +mbfl_guess_encoding_name(mbfl_string *string, enum mbfl_no_encoding *elist, int eliztsz TSRMLS_DC); + +const mbfl_encoding * mbfl_identify_encoding(mbfl_string *string, enum mbfl_no_encoding *elist, int eliztsz TSRMLS_DC); const char * diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 197ad3bc57..c0c82e759a 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -194,6 +194,7 @@ function_entry mbstring_functions[] = { PHP_FE(mb_strimwidth, NULL) PHP_FE(mb_convert_encoding, NULL) PHP_FE(mb_detect_encoding, NULL) + PHP_FE(mb_guess_encoding, NULL) PHP_FE(mb_convert_kana, NULL) PHP_FE(mb_encode_mimeheader, NULL) PHP_FE(mb_decode_mimeheader, NULL) @@ -2607,6 +2608,83 @@ PHP_FUNCTION(mb_detect_encoding) +/* {{{ proto string mb_guess_encoding(string str [, mixed encoding_list]) + Encodings of the given string is returned (as a string) */ +PHP_FUNCTION(mb_guess_encoding) +{ + pval **arg_str, **arg_list; + mbfl_string string; + const char *ret; + enum mbfl_no_encoding *elist; + int size, *list; + + if (ZEND_NUM_ARGS() == 1) { + if (zend_get_parameters_ex(1, &arg_str) == FAILURE) { + WRONG_PARAM_COUNT; + } + } else if (ZEND_NUM_ARGS() == 2) { + if (zend_get_parameters_ex(2, &arg_str, &arg_list) == FAILURE) { + WRONG_PARAM_COUNT; + } + } else { + WRONG_PARAM_COUNT; + } + + /* make encoding list */ + list = NULL; + size = 0; + if (ZEND_NUM_ARGS() >= 2) { + switch (Z_TYPE_PP(arg_list)) { + case IS_ARRAY: + if (!php_mbstring_parse_encoding_array(*arg_list, &list, &size, 0)) { + if (list) { + efree(list); + size = 0; + } + } + break; + default: + convert_to_string_ex(arg_list); + if (!php_mbstring_parse_encoding_list(Z_STRVAL_PP(arg_list), Z_STRLEN_PP(arg_list), &list, &size, 0)) { + if (list) { + efree(list); + size = 0; + } + } + break; + } + if (size <= 0) { + php_error(E_WARNING, "%s() illegal argument", + get_active_function_name(TSRMLS_C)); + } + } + + if (size > 0 && list != NULL) { + elist = list; + } else { + elist = MBSTRG(current_detect_order_list); + size = MBSTRG(current_detect_order_list_size); + } + + convert_to_string_ex(arg_str); + mbfl_string_init(&string); + string.no_language = MBSTRG(current_language); + string.val = Z_STRVAL_PP(arg_str); + string.len = Z_STRLEN_PP(arg_str); + ret = mbfl_guess_encoding_name(&string, elist, size TSRMLS_CC); + if (list != NULL) { + efree((void *)list); + } + if (ret != NULL) { + RETVAL_STRING((char *)ret, 1); + } else { + RETVAL_FALSE; + } +} +/* }}} */ + + + /* {{{ proto string mb_encode_mimeheader(string str [, string charset [, string transfer-encoding [, string linefeed]]]) Converts the string to MIME "encoded-word" in the format of =?charset?(B|Q)?encoded_string?= */ PHP_FUNCTION(mb_encode_mimeheader) diff --git a/ext/mbstring/mbstring.h b/ext/mbstring/mbstring.h index 094ecc4cb3..e2d51501ff 100644 --- a/ext/mbstring/mbstring.h +++ b/ext/mbstring/mbstring.h @@ -94,6 +94,7 @@ PHP_FUNCTION(mb_strcut); PHP_FUNCTION(mb_strwidth); PHP_FUNCTION(mb_strimwidth); PHP_FUNCTION(mb_convert_encoding); +PHP_FUNCTION(mb_guess_encoding); PHP_FUNCTION(mb_detect_encoding); PHP_FUNCTION(mb_convert_kana); PHP_FUNCTION(mb_encode_mimeheader); diff --git a/ext/mbstring/unicode_table_ru.h b/ext/mbstring/unicode_table_ru.h index 74d02fef34..270032ea93 100644 --- a/ext/mbstring/unicode_table_ru.h +++ b/ext/mbstring/unicode_table_ru.h @@ -67,3 +67,40 @@ static const int koi8r_ucs_table_min = 0x80; static const int koi8r_ucs_table_len = (sizeof (koi8r_ucs_table) / sizeof (unsigned short)); static const int koi8r_ucs_table_max = 0x80 + (sizeof (koi8r_ucs_table) / sizeof (unsigned short)); + + +static const unsigned int cp1251_char_ratings_table[] = { + 14985, 3207, 9044, 2847, 6015,18094, 2305, 3456, + 15786, 2472, 6531, 7803, 6341,13494,21800, 6267, + 10139,10398,13877, 5094, 536, 2201, 855, 2665, + 1127, 981, 99, 4460, 3805, 426, 1516, 4341 +}; +static const int cp1251_char_ratings_table_min = 0xe0; +static const int cp1251_char_ratings_table_len = (sizeof (cp1251_char_ratings_table) / sizeof (unsigned int)); + + +static const unsigned int cp866_char_ratings_table[] = { + 99, 0, 1516,14985, 2305, 536, 855, 6015, + 5094, 4460, 2201,15786, 2472, 6531, 7803, 6341, + 13494,21800, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 4341,10139,10398,13877,18094, 2847, + 3207, 981, 3456, 426, 2665, 3805, 9044, 1127, + 0, 6267 +}; +static const int cp866_char_ratings_table_min = 0x9e; +static const int cp866_char_ratings_table_len = (sizeof (cp866_char_ratings_table) / sizeof (unsigned int)); + + +static const unsigned int koi8r_char_ratings_table[] = { + 1516,14985, 3207, 855, 6015,18094, 536, 2847, + 2201,15786, 2472, 6531, 7803, 6341,13494,21800, + 6267, 4341,10139,10398,13877, 5094, 2305, 9044, + 3805, 4460, 3456, 1127, 426, 981, 2665, 99 +}; +static const int koi8r_char_ratings_table_min = 0xc0; +static const int koi8r_char_ratings_table_len = (sizeof (koi8r_char_ratings_table) / sizeof (unsigned int)); |