diff options
author | bar@mysql.com <> | 2004-06-11 16:29:16 +0500 |
---|---|---|
committer | bar@mysql.com <> | 2004-06-11 16:29:16 +0500 |
commit | c64d93b27403dc9d154eb601b88d95964f9fc05b (patch) | |
tree | 72245220b637b8d5caf3b8267ea6801af59f0bc7 | |
parent | 21c524e712875f8b66ad95c76da1d4305d0538d8 (diff) | |
download | mariadb-git-c64d93b27403dc9d154eb601b88d95964f9fc05b.tar.gz |
Allocate memory when a character set is requested:
- For simple character sets: from_uni convertion table.
- For UCA: alternative weight arrays.
Use mbminlen instead of MY_CS_NONTEXT
-rw-r--r-- | include/m_ctype.h | 5 | ||||
-rw-r--r-- | mysys/charset.c | 562 | ||||
-rw-r--r-- | sql/item_create.cc | 2 | ||||
-rw-r--r-- | sql/mysqld.cc | 3 | ||||
-rw-r--r-- | sql/sql_string.cc | 2 | ||||
-rw-r--r-- | sql/sql_table.cc | 2 | ||||
-rw-r--r-- | strings/ctype-big5.c | 2 | ||||
-rw-r--r-- | strings/ctype-bin.c | 2 | ||||
-rw-r--r-- | strings/ctype-czech.c | 1 | ||||
-rw-r--r-- | strings/ctype-euc_kr.c | 2 | ||||
-rw-r--r-- | strings/ctype-gb2312.c | 2 | ||||
-rw-r--r-- | strings/ctype-gbk.c | 2 | ||||
-rw-r--r-- | strings/ctype-latin1.c | 2 | ||||
-rw-r--r-- | strings/ctype-mb.c | 1 | ||||
-rw-r--r-- | strings/ctype-simple.c | 100 | ||||
-rw-r--r-- | strings/ctype-sjis.c | 2 | ||||
-rw-r--r-- | strings/ctype-tis620.c | 2 | ||||
-rw-r--r-- | strings/ctype-uca.c | 458 | ||||
-rw-r--r-- | strings/ctype-ucs2.c | 7 | ||||
-rw-r--r-- | strings/ctype-ujis.c | 6 | ||||
-rw-r--r-- | strings/ctype-utf8.c | 2 | ||||
-rw-r--r-- | strings/ctype-win1250ch.c | 1 |
22 files changed, 608 insertions, 560 deletions
diff --git a/include/m_ctype.h b/include/m_ctype.h index 002b77b5310..9be5538b48a 100644 --- a/include/m_ctype.h +++ b/include/m_ctype.h @@ -53,7 +53,6 @@ typedef struct unicase_info_st #define MY_SEQ_SPACES 2 /* My charsets_list flags */ -#define MY_NO_SETS 0 #define MY_CS_COMPILED 1 /* compiled-in sets */ #define MY_CS_CONFIG 2 /* sets that have a *.conf file */ #define MY_CS_INDEX 4 /* sets listed in the Index file */ @@ -62,7 +61,7 @@ typedef struct unicase_info_st #define MY_CS_PRIMARY 32 /* if primary collation */ #define MY_CS_STRNXFRM 64 /* if strnxfrm is used for sort */ #define MY_CS_UNICODE 128 /* is a charset is full unicode */ -#define MY_CS_NONTEXT 256 /* if a charset is not sprintf() compatible */ +#define MY_CS_READY 256 /* if a charset is initialized */ #define MY_CS_AVAILABLE 512 /* If either compiled-in or loaded*/ #define MY_CHARSET_UNDEFINED 0 @@ -102,6 +101,7 @@ struct charset_info_st; typedef struct my_collation_handler_st { + my_bool (*init)(struct charset_info_st *, void *(*alloc)(uint)); /* Collation routines */ int (*strnncoll)(struct charset_info_st *, const uchar *, uint, const uchar *, uint); @@ -140,6 +140,7 @@ extern MY_COLLATION_HANDLER my_collation_ucs2_uca_handler; typedef struct my_charset_handler_st { + my_bool (*init)(struct charset_info_st *, void *(*alloc)(uint)); /* Multibyte routines */ int (*ismbchar)(struct charset_info_st *, const char *, const char *); int (*mbcharlen)(struct charset_info_st *, uint); diff --git a/mysys/charset.c b/mysys/charset.c index d2d71689d7b..165fa19e3d5 100644 --- a/mysys/charset.c +++ b/mysys/charset.c @@ -23,354 +23,6 @@ /* - Collation language is implemented according to - subset of ICU Collation Customization (tailorings): - http://oss.software.ibm.com/icu/userguide/Collate_Customization.html - - Collation language elements: - Delimiters: - space - skipped - - <char> := A-Z | a-z | \uXXXX - - Shift command: - <shift> := & - reset at this letter. - - Diff command: - <d1> := < - Identifies a primary difference. - <d2> := << - Identifies a secondary difference. - <d3> := <<< - Idenfifies a tertiary difference. - - - Collation rules: - <ruleset> := <rule> { <ruleset> } - - <rule> := <d1> <string> - | <d2> <string> - | <d3> <string> - | <shift> <char> - - <string> := <char> [ <string> ] - - An example, Polish collation: - - &A < \u0105 <<< \u0104 - &C < \u0107 <<< \u0106 - &E < \u0119 <<< \u0118 - &L < \u0142 <<< \u0141 - &N < \u0144 <<< \u0143 - &O < \u00F3 <<< \u00D3 - &S < \u015B <<< \u015A - &Z < \u017A <<< \u017B -*/ - - -typedef enum my_coll_lexem_num_en -{ - MY_COLL_LEXEM_EOF = 0, - MY_COLL_LEXEM_DIFF = 1, - MY_COLL_LEXEM_SHIFT = 4, - MY_COLL_LEXEM_CHAR = 5, - MY_COLL_LEXEM_ERROR = 6 -} my_coll_lexem_num; - - -typedef struct my_coll_lexem_st -{ - const char *beg; - const char *end; - const char *prev; - int diff; - int code; -} MY_COLL_LEXEM; - - -/* - Initialize collation rule lexical anilizer - - SYNOPSIS - my_coll_lexem_init - lexem Lex analizer to init - str Const string to parse - strend End of the string - USAGE - - RETURN VALUES - N/A -*/ - -static void my_coll_lexem_init(MY_COLL_LEXEM *lexem, - const char *str, const char *strend) -{ - lexem->beg= str; - lexem->prev= str; - lexem->end= strend; - lexem->diff= 0; - lexem->code= 0; -} - - -/* - Print collation customization expression parse error, with context. - - SYNOPSIS - my_coll_lexem_print_error - lexem Lex analizer to take context from - errstr sting to write error to - errsize errstr size - txt error message - USAGE - - RETURN VALUES - N/A -*/ - -static void my_coll_lexem_print_error(MY_COLL_LEXEM *lexem, - char *errstr, size_t errsize, - const char *txt) -{ - char tail[30]; - size_t len= lexem->end - lexem->prev; - strmake (tail, lexem->prev, min(len, sizeof(tail)-1)); - errstr[errsize-1]= '\0'; - my_snprintf(errstr,errsize-1,"%s at '%s'", txt, tail); -} - - -/* - Convert a hex digit into its numeric value - - SYNOPSIS - ch2x - ch hex digit to convert - USAGE - - RETURN VALUES - an integer value in the range 0..15 - -1 on error -*/ - -static int ch2x(int ch) -{ - if (ch >= '0' && ch <= '9') - return ch - '0'; - - if (ch >= 'a' && ch <= 'f') - return 10 + ch - 'a'; - - if (ch >= 'A' && ch <= 'F') - return 10 + ch - 'A'; - - return -1; -} - - -/* - Collation language lexical parser: - Scans the next lexem. - - SYNOPSIS - my_coll_lexem_next - lexem Lex analizer, previously initialized by - my_coll_lexem_init. - USAGE - Call this function in a loop - - RETURN VALUES - Lexem number: eof, diff, shift, char or error. -*/ - -static my_coll_lexem_num my_coll_lexem_next(MY_COLL_LEXEM *lexem) -{ - for ( ;lexem->beg < lexem->end ; lexem->beg++) - { - lexem->prev= lexem->beg; - if (lexem->beg[0] == ' ' || lexem->beg[0] == '\t' || - lexem->beg[0] == '\r' || lexem->beg[0] == '\n') - continue; - - if (lexem->beg[0] == '&') - { - lexem->beg++; - return MY_COLL_LEXEM_SHIFT; - } - - if (lexem->beg[0] == '<') - { - for (lexem->beg++, lexem->diff=1; - (lexem->beg < lexem->end) && - (lexem->beg[0] == '<') && (lexem->diff<3); - lexem->beg++, lexem->diff++); - return MY_COLL_LEXEM_DIFF; - } - - if ((lexem->beg[0] >= 'a' && lexem->beg[0] <= 'z') || - (lexem->beg[0] >= 'A' && lexem->beg[0] <= 'Z')) - { - lexem->code= lexem->beg[0]; - lexem->beg++; - return MY_COLL_LEXEM_CHAR; - } - - if ((lexem->beg[0] == '\\') && - (lexem->beg+2 < lexem->end) && - (lexem->beg[1] == 'u')) - { - int ch; - - lexem->code= 0; - for (lexem->beg+=2; - (lexem->beg < lexem->end) && ((ch= ch2x(lexem->beg[0])) >= 0) ; - lexem->beg++) - { - lexem->code= (lexem->code << 4) + ch; - } - return MY_COLL_LEXEM_CHAR; - } - - return MY_COLL_LEXEM_ERROR; - } - return MY_COLL_LEXEM_EOF; -} - - -/* - Collation rule item -*/ - -typedef struct my_coll_rule_item_st -{ - uint base; /* Base character */ - uint curr; /* Current character */ - int diff[3]; /* Primary, Secondary and Tertiary difference */ -} MY_COLL_RULE; - - -/* - Collation language syntax parser. - Uses lexical parser. - - SYNOPSIS - my_coll_rule_parse - rule Collation rule list to load to. - str A string containin collation language expression. - strend End of the string. - USAGE - - RETURN VALUES - 0 - OK - 1 - ERROR, e.g. too many items. -*/ - -static int my_coll_rule_parse(MY_COLL_RULE *rule, size_t mitems, - const char *str, const char *strend, - char *errstr, size_t errsize) -{ - MY_COLL_LEXEM lexem; - my_coll_lexem_num lexnum; - my_coll_lexem_num prevlexnum= MY_COLL_LEXEM_ERROR; - MY_COLL_RULE item; - int state= 0; - size_t nitems= 0; - - /* Init all variables */ - errstr[0]= '\0'; - bzero(&item, sizeof(item)); - my_coll_lexem_init(&lexem, str, strend); - - while ((lexnum= my_coll_lexem_next(&lexem))) - { - if (lexnum == MY_COLL_LEXEM_ERROR) - { - my_coll_lexem_print_error(&lexem,errstr,errsize-1,"Unknown character"); - return -1; - } - - switch (state) { - case 0: - if (lexnum != MY_COLL_LEXEM_SHIFT) - { - my_coll_lexem_print_error(&lexem,errstr,errsize-1,"& expected"); - return -1; - } - prevlexnum= lexnum; - state= 2; - continue; - - case 1: - if (lexnum != MY_COLL_LEXEM_SHIFT && lexnum != MY_COLL_LEXEM_DIFF) - { - my_coll_lexem_print_error(&lexem,errstr,errsize-1,"& or < expected"); - return -1; - } - prevlexnum= lexnum; - state= 2; - continue; - - case 2: - if (lexnum != MY_COLL_LEXEM_CHAR) - { - my_coll_lexem_print_error(&lexem,errstr,errsize-1,"character expected"); - return -1; - } - - if (prevlexnum == MY_COLL_LEXEM_SHIFT) - { - item.base= lexem.code; - item.diff[0]= 0; - item.diff[1]= 0; - item.diff[2]= 0; - } - else if (prevlexnum == MY_COLL_LEXEM_DIFF) - { - item.curr= lexem.code; - if (lexem.diff == 3) - { - item.diff[2]++; - } - else if (lexem.diff == 2) - { - item.diff[1]++; - item.diff[2]= 0; - } - else if (lexem.diff == 1) - { - item.diff[0]++; - item.diff[1]= 0; - item.diff[2]= 0; - } - if (nitems >= mitems) - { - my_coll_lexem_print_error(&lexem,errstr,errsize-1,"Too many rules"); - return -1; - } - rule[nitems++]= item; - } - else - { - my_coll_lexem_print_error(&lexem,errstr,errsize-1,"Should never happen"); - return -1; - } - state= 1; - continue; - } - } - return (size_t) nitems; -} - - -typedef struct -{ - int nchars; - MY_UNI_IDX uidx; -} uni_idx; - -#define PLANE_SIZE 0x100 -#define PLANE_NUM 0x100 -#define PLANE_NUMBER(x) (((x)>>8) % PLANE_NUM) - - -/* The code below implements this functionality: - Initializing charset related structures @@ -484,91 +136,6 @@ static void simple_cs_init_functions(CHARSET_INFO *cs) } -static int pcmp(const void * f, const void * s) -{ - const uni_idx *F= (const uni_idx*) f; - const uni_idx *S= (const uni_idx*) s; - int res; - - if (!(res=((S->nchars)-(F->nchars)))) - res=((F->uidx.from)-(S->uidx.to)); - return res; -} - - -static my_bool create_fromuni(CHARSET_INFO *cs) -{ - uni_idx idx[PLANE_NUM]; - int i,n; - - /* Clear plane statistics */ - bzero(idx,sizeof(idx)); - - /* Count number of characters in each plane */ - for (i=0; i< 0x100; i++) - { - uint16 wc=cs->tab_to_uni[i]; - int pl= PLANE_NUMBER(wc); - - if (wc || !i) - { - if (!idx[pl].nchars) - { - idx[pl].uidx.from=wc; - idx[pl].uidx.to=wc; - }else - { - idx[pl].uidx.from=wc<idx[pl].uidx.from?wc:idx[pl].uidx.from; - idx[pl].uidx.to=wc>idx[pl].uidx.to?wc:idx[pl].uidx.to; - } - idx[pl].nchars++; - } - } - - /* Sort planes in descending order */ - qsort(&idx,PLANE_NUM,sizeof(uni_idx),&pcmp); - - for (i=0; i < PLANE_NUM; i++) - { - int ch,numchars; - - /* Skip empty plane */ - if (!idx[i].nchars) - break; - - numchars=idx[i].uidx.to-idx[i].uidx.from+1; - if (!(idx[i].uidx.tab=(uchar*) my_once_alloc(numchars * - sizeof(*idx[i].uidx.tab), - MYF(MY_WME)))) - return TRUE; - - bzero(idx[i].uidx.tab,numchars*sizeof(*idx[i].uidx.tab)); - - for (ch=1; ch < PLANE_SIZE; ch++) - { - uint16 wc=cs->tab_to_uni[ch]; - if (wc >= idx[i].uidx.from && wc <= idx[i].uidx.to && wc) - { - int ofs= wc - idx[i].uidx.from; - idx[i].uidx.tab[ofs]= ch; - } - } - } - - /* Allocate and fill reverse table for each plane */ - n=i; - if (!(cs->tab_from_uni= (MY_UNI_IDX*) my_once_alloc(sizeof(MY_UNI_IDX)*(n+1), - MYF(MY_WME)))) - return TRUE; - - for (i=0; i< n; i++) - cs->tab_from_uni[i]= idx[i].uidx; - - /* Set end-of-list marker */ - bzero(&cs->tab_from_uni[i],sizeof(MY_UNI_IDX)); - return FALSE; -} - static int simple_cs_copy_data(CHARSET_INFO *to, CHARSET_INFO *from) { @@ -622,8 +189,6 @@ static int simple_cs_copy_data(CHARSET_INFO *to, CHARSET_INFO *from) if (!(to->tab_to_uni= (uint16*) my_once_memdup((char*)from->tab_to_uni, sz, MYF(MY_WME)))) goto err; - if (create_fromuni(to)) - goto err; } to->mbminlen= 1; to->mbmaxlen= 1; @@ -754,117 +319,6 @@ static my_tailoring tailoring[]= } }; -#define MY_MAX_COLL_RULE 64 - -/* - This function copies an UCS2 collation from - the default Unicode Collation Algorithm (UCA) - weights applying tailorings, i.e. a set of - alternative weights for some characters. - - The default UCA weights are stored in my_charset_ucs2_general_uca. - They consist of 256 pages, 256 character each. - - If a page is not overwritten by tailoring rules, - it is copies as is from UCA as is. - - If a page contains some overwritten characters, it is - allocated. Untouched characters are copied from the - default weights. -*/ - -static my_bool create_tailoring(CHARSET_INFO *cs) -{ - MY_COLL_RULE rule[MY_MAX_COLL_RULE]; - char errstr[128]; - uchar *newlengths; - uint16 **newweights; - const uchar *deflengths= my_charset_ucs2_general_uca.sort_order; - uint16 **defweights= my_charset_ucs2_general_uca.sort_order_big; - int rc, i; - - if (!cs->tailoring) - return 1; - - /* Parse ICU Collation Customization expression */ - if ((rc= my_coll_rule_parse(rule, MY_MAX_COLL_RULE, - cs->tailoring, - cs->tailoring + strlen(cs->tailoring), - errstr, sizeof(errstr))) <= 0) - { - /* - TODO: add error message reporting. - printf("Error: %d '%s'\n", rc, errstr); - */ - return 1; - } - - if (!(newweights= (uint16**) my_once_alloc(256*sizeof(uint16*),MYF(MY_WME)))) - return 1; - bzero(newweights, 256*sizeof(uint16*)); - - if (!(newlengths= (uchar*) my_once_memdup(deflengths,256,MYF(MY_WME)))) - return 1; - - /* - Calculate maximum lenghts for the pages - which will be overwritten. - */ - for (i=0; i < rc; i++) - { - uint pageb= (rule[i].base >> 8) & 0xFF; - uint pagec= (rule[i].curr >> 8) & 0xFF; - - if (newlengths[pagec] < deflengths[pageb]) - newlengths[pagec]= deflengths[pageb]; - } - - for (i=0; i < rc; i++) - { - uint pageb= (rule[i].base >> 8) & 0xFF; - uint pagec= (rule[i].curr >> 8) & 0xFF; - uint chb, chc; - - if (!newweights[pagec]) - { - /* Alloc new page and copy the default UCA weights */ - uint size= 256*newlengths[pagec]*sizeof(uint16); - - if (!(newweights[pagec]= (uint16*) my_once_alloc(size,MYF(MY_WME)))) - return 1; - bzero((void*) newweights[pagec], size); - - for (chc=0 ; chc < 256; chc++) - { - memcpy(newweights[pagec] + chc*newlengths[pagec], - defweights[pagec] + chc*deflengths[pagec], - deflengths[pagec]*sizeof(uint16)); - } - } - - /* - Aply the alternative rule: - shift to the base character and primary difference. - */ - chc= rule[i].curr & 0xFF; - chb= rule[i].base & 0xFF; - memcpy(newweights[pagec] + chc*newlengths[pagec], - defweights[pageb] + chb*deflengths[pageb], - deflengths[pageb]*sizeof(uint16)); - /* Apply primary difference */ - newweights[pagec][chc*newlengths[pagec]]+= rule[i].diff[0]; - } - - /* Copy non-overwritten pages from the default UCA weights */ - for (i= 0; i < 256 ; i++) - if (!newweights[i]) - newweights[i]= defweights[i]; - - cs->sort_order= newlengths; - cs->sort_order_big= newweights; - - return 0; -} static int ucs2_copy_data(CHARSET_INFO *to, CHARSET_INFO *from) @@ -894,7 +348,7 @@ static int ucs2_copy_data(CHARSET_INFO *to, CHARSET_INFO *from) to->mbminlen= 2; to->mbmaxlen= 2; - return create_tailoring(to); + return 0; err: return 1; @@ -997,7 +451,7 @@ static my_bool init_uca_charsets() CHARSET_INFO cs= my_charset_ucs2_general_uca; char name[64]; - cs.state= MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONTEXT; + cs.state= MY_CS_STRNXFRM|MY_CS_UNICODE; for (t= tailoring; t->tailoring; t++) { cs.number= 128 + t->number; @@ -1083,6 +537,10 @@ void add_compiled_collation(CHARSET_INFO *cs) cs->state|= MY_CS_AVAILABLE; } +static void *cs_alloc(uint size) +{ + return my_once_alloc(size, MYF(MY_WME)); +} #ifdef __NETWARE__ @@ -1207,6 +665,14 @@ static CHARSET_INFO *get_internal_charset(uint cs_number, myf flags) cs= (cs->state & MY_CS_AVAILABLE) ? cs : NULL; } pthread_mutex_unlock(&THR_LOCK_charset); + if (cs && !(cs->state & MY_CS_READY)) + { + if ((cs->cset->init && cs->cset->init(cs, cs_alloc)) || + (cs->coll->init && cs->coll->init(cs, cs_alloc))) + cs= NULL; + else + cs->state|= MY_CS_READY; + } return cs; } diff --git a/sql/item_create.cc b/sql/item_create.cc index 74f36de11ac..53d4f14d1ee 100644 --- a/sql/item_create.cc +++ b/sql/item_create.cc @@ -373,7 +373,7 @@ Item *create_func_space(Item *a) CHARSET_INFO *cs= current_thd->variables.collation_connection; Item *sp; - if (cs->state & MY_CS_NONTEXT) + if (cs->mbminlen > 1) { sp= new Item_string("",0,cs); if (sp) diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 4e9847e99cc..841898ac505 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -329,7 +329,7 @@ char log_error_file[FN_REFLEN], glob_hostname[FN_REFLEN]; char* log_error_file_ptr= log_error_file; char mysql_real_data_home[FN_REFLEN], language[LIBLEN],reg_ext[FN_EXTLEN], mysql_charsets_dir[FN_REFLEN], - max_sort_char,*mysqld_user,*mysqld_chroot, *opt_init_file, + *mysqld_user,*mysqld_chroot, *opt_init_file, *opt_init_connect, *opt_init_slave, def_ft_boolean_syntax[sizeof(ft_boolean_syntax)]; @@ -5249,7 +5249,6 @@ static void mysql_init_variables(void) specialflag= opened_tables= created_tmp_tables= created_tmp_disk_tables= 0; binlog_cache_use= binlog_cache_disk_use= 0; max_used_connections= slow_launch_threads = 0; - max_sort_char= 0; mysqld_user= mysqld_chroot= opt_init_file= opt_bin_logname = 0; errmesg= 0; mysqld_unix_port= opt_mysql_tmpdir= my_bind_addr_str= NullS; diff --git a/sql/sql_string.cc b/sql/sql_string.cc index f7e4e436495..cf4f94ba966 100644 --- a/sql/sql_string.cc +++ b/sql/sql_string.cc @@ -370,7 +370,7 @@ bool String::copy(const char *str, uint32 arg_length, bool String::set_ascii(const char *str, uint32 arg_length) { - if (!(str_charset->state & MY_CS_NONTEXT)) + if (!(str_charset->mbminlen > 1)) { set(str, arg_length, str_charset); return 0; diff --git a/sql/sql_table.cc b/sql/sql_table.cc index aea6140226c..dc838f0e685 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -858,7 +858,7 @@ int mysql_prepare_table(THD *thd, HA_CREATE_INFO *create_info, sql_field->sql_type != FIELD_TYPE_VAR_STRING && !f_is_blob(sql_field->pack_flag)) || sql_field->charset == &my_charset_bin || - sql_field->charset->state & MY_CS_NONTEXT || // ucs2 doesn't work yet + sql_field->charset->mbminlen > 1 || // ucs2 doesn't work yet (ft_key_charset && sql_field->charset != ft_key_charset)) { my_printf_error(ER_BAD_FT_COLUMN,ER(ER_BAD_FT_COLUMN),MYF(0), diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c index fb72dec7385..3d9cb92bf0d 100644 --- a/strings/ctype-big5.c +++ b/strings/ctype-big5.c @@ -6269,6 +6269,7 @@ my_mb_wc_big5(CHARSET_INFO *cs __attribute__((unused)), static MY_COLLATION_HANDLER my_collation_big5_chinese_ci_handler = { + NULL, /* init */ my_strnncoll_big5, my_strnncollsp_big5, my_strnxfrm_big5, @@ -6281,6 +6282,7 @@ static MY_COLLATION_HANDLER my_collation_big5_chinese_ci_handler = static MY_CHARSET_HANDLER my_charset_big5_handler= { + NULL, /* init */ ismbchar_big5, mbcharlen_big5, my_numchars_mb, diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c index 7b3164bf438..54fe4476ae6 100644 --- a/strings/ctype-bin.c +++ b/strings/ctype-bin.c @@ -333,6 +333,7 @@ skip: MY_COLLATION_HANDLER my_collation_8bit_bin_handler = { + NULL, /* init */ my_strnncoll_binary, my_strnncoll_binary, my_strnxfrm_bin, @@ -346,6 +347,7 @@ MY_COLLATION_HANDLER my_collation_8bit_bin_handler = static MY_CHARSET_HANDLER my_charset_handler= { + NULL, /* init */ NULL, /* ismbchar */ my_mbcharlen_8bit, /* mbcharlen */ my_numchars_8bit, diff --git a/strings/ctype-czech.c b/strings/ctype-czech.c index 3218fdee673..8fde9498ed9 100644 --- a/strings/ctype-czech.c +++ b/strings/ctype-czech.c @@ -572,6 +572,7 @@ static MY_UNI_IDX idx_uni_8859_2[]={ static MY_COLLATION_HANDLER my_collation_latin2_czech_ci_handler = { + NULL, /* init */ my_strnncoll_czech, my_strnncollsp_czech, my_strnxfrm_czech, diff --git a/strings/ctype-euc_kr.c b/strings/ctype-euc_kr.c index c387246b4c6..e739339b9e4 100644 --- a/strings/ctype-euc_kr.c +++ b/strings/ctype-euc_kr.c @@ -8637,6 +8637,7 @@ my_mb_wc_euc_kr(CHARSET_INFO *cs __attribute__((unused)), static MY_COLLATION_HANDLER my_collation_ci_handler = { + NULL, /* init */ my_strnncoll_simple, /* strnncoll */ my_strnncollsp_simple, my_strnxfrm_simple, /* strnxfrm */ @@ -8649,6 +8650,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = static MY_CHARSET_HANDLER my_charset_handler= { + NULL, /* init */ ismbchar_euc_kr, mbcharlen_euc_kr, my_numchars_mb, diff --git a/strings/ctype-gb2312.c b/strings/ctype-gb2312.c index fe1f72e7eda..aee4ed55af6 100644 --- a/strings/ctype-gb2312.c +++ b/strings/ctype-gb2312.c @@ -5688,6 +5688,7 @@ my_mb_wc_gb2312(CHARSET_INFO *cs __attribute__((unused)), static MY_COLLATION_HANDLER my_collation_ci_handler = { + NULL, /* init */ my_strnncoll_simple, /* strnncoll */ my_strnncollsp_simple, my_strnxfrm_simple, /* strnxfrm */ @@ -5700,6 +5701,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = static MY_CHARSET_HANDLER my_charset_handler= { + NULL, /* init */ ismbchar_gb2312, mbcharlen_gb2312, my_numchars_mb, diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c index 8b659cb55f9..d6063e9f80f 100644 --- a/strings/ctype-gbk.c +++ b/strings/ctype-gbk.c @@ -9918,6 +9918,7 @@ my_mb_wc_gbk(CHARSET_INFO *cs __attribute__((unused)), static MY_COLLATION_HANDLER my_collation_ci_handler = { + NULL, /* init */ my_strnncoll_gbk, my_strnncollsp_gbk, my_strnxfrm_gbk, @@ -9930,6 +9931,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = static MY_CHARSET_HANDLER my_charset_handler= { + NULL, /* init */ ismbchar_gbk, mbcharlen_gbk, my_numchars_mb, diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c index 03d4e71377b..86c80ff5a66 100644 --- a/strings/ctype-latin1.c +++ b/strings/ctype-latin1.c @@ -380,6 +380,7 @@ int my_wc_mb_latin1(CHARSET_INFO *cs __attribute__((unused)), static MY_CHARSET_HANDLER my_charset_handler= { + NULL, /* init */ NULL, my_mbcharlen_8bit, my_numchars_8bit, @@ -674,6 +675,7 @@ void my_hash_sort_latin1_de(CHARSET_INFO *cs __attribute__((unused)), static MY_COLLATION_HANDLER my_collation_german2_ci_handler= { + NULL, /* init */ my_strnncoll_latin1_de, my_strnncollsp_latin1_de, my_strnxfrm_latin1_de, diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c index 9b02cd3b3da..c143994dbc3 100644 --- a/strings/ctype-mb.c +++ b/strings/ctype-mb.c @@ -512,6 +512,7 @@ static int my_wildcmp_mb_bin(CHARSET_INFO *cs, MY_COLLATION_HANDLER my_collation_mb_bin_handler = { + NULL, /* init */ my_strnncoll_mb_bin, my_strnncoll_mb_bin, my_strnxfrm_mb_bin, diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c index 5f0a7426db3..c28df91ae86 100644 --- a/strings/ctype-simple.c +++ b/strings/ctype-simple.c @@ -1142,8 +1142,107 @@ skip: } +typedef struct +{ + int nchars; + MY_UNI_IDX uidx; +} uni_idx; + +#define PLANE_SIZE 0x100 +#define PLANE_NUM 0x100 +#define PLANE_NUMBER(x) (((x)>>8) % PLANE_NUM) + +static int pcmp(const void * f, const void * s) +{ + const uni_idx *F= (const uni_idx*) f; + const uni_idx *S= (const uni_idx*) s; + int res; + + if (!(res=((S->nchars)-(F->nchars)))) + res=((F->uidx.from)-(S->uidx.to)); + return res; +} + +static my_bool create_fromuni(CHARSET_INFO *cs, void *(*alloc)(uint)) +{ + uni_idx idx[PLANE_NUM]; + int i,n; + + /* Clear plane statistics */ + bzero(idx,sizeof(idx)); + + /* Count number of characters in each plane */ + for (i=0; i< 0x100; i++) + { + uint16 wc=cs->tab_to_uni[i]; + int pl= PLANE_NUMBER(wc); + + if (wc || !i) + { + if (!idx[pl].nchars) + { + idx[pl].uidx.from=wc; + idx[pl].uidx.to=wc; + }else + { + idx[pl].uidx.from=wc<idx[pl].uidx.from?wc:idx[pl].uidx.from; + idx[pl].uidx.to=wc>idx[pl].uidx.to?wc:idx[pl].uidx.to; + } + idx[pl].nchars++; + } + } + + /* Sort planes in descending order */ + qsort(&idx,PLANE_NUM,sizeof(uni_idx),&pcmp); + + for (i=0; i < PLANE_NUM; i++) + { + int ch,numchars; + + /* Skip empty plane */ + if (!idx[i].nchars) + break; + + numchars=idx[i].uidx.to-idx[i].uidx.from+1; + if (!(idx[i].uidx.tab=(uchar*) alloc(numchars * sizeof(*idx[i].uidx.tab)))) + return TRUE; + + bzero(idx[i].uidx.tab,numchars*sizeof(*idx[i].uidx.tab)); + + for (ch=1; ch < PLANE_SIZE; ch++) + { + uint16 wc=cs->tab_to_uni[ch]; + if (wc >= idx[i].uidx.from && wc <= idx[i].uidx.to && wc) + { + int ofs= wc - idx[i].uidx.from; + idx[i].uidx.tab[ofs]= ch; + } + } + } + + /* Allocate and fill reverse table for each plane */ + n=i; + if (!(cs->tab_from_uni= (MY_UNI_IDX*) alloc(sizeof(MY_UNI_IDX)*(n+1)))) + return TRUE; + + for (i=0; i< n; i++) + cs->tab_from_uni[i]= idx[i].uidx; + + /* Set end-of-list marker */ + bzero(&cs->tab_from_uni[i],sizeof(MY_UNI_IDX)); + return FALSE; +} + +static my_bool my_cset_init_8bit(CHARSET_INFO *cs, void *(*alloc)(uint)) +{ + return create_fromuni(cs, alloc); +} + + + MY_CHARSET_HANDLER my_charset_8bit_handler= { + my_cset_init_8bit, NULL, /* ismbchar */ my_mbcharlen_8bit, /* mbcharlen */ my_numchars_8bit, @@ -1170,6 +1269,7 @@ MY_CHARSET_HANDLER my_charset_8bit_handler= MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler = { + NULL, /* init */ my_strnncoll_simple, my_strnncollsp_simple, my_strnxfrm_simple, diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c index b4a131d3410..3744711447a 100644 --- a/strings/ctype-sjis.c +++ b/strings/ctype-sjis.c @@ -4534,6 +4534,7 @@ my_mb_wc_sjis(CHARSET_INFO *cs __attribute__((unused)), static MY_COLLATION_HANDLER my_collation_ci_handler = { + NULL, /* init */ my_strnncoll_sjis, my_strnncollsp_sjis, my_strnxfrm_sjis, @@ -4547,6 +4548,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = static MY_CHARSET_HANDLER my_charset_handler= { + NULL, /* init */ ismbchar_sjis, mbcharlen_sjis, my_numchars_mb, diff --git a/strings/ctype-tis620.c b/strings/ctype-tis620.c index 79ac2079720..a0ba1a266ea 100644 --- a/strings/ctype-tis620.c +++ b/strings/ctype-tis620.c @@ -906,6 +906,7 @@ int my_wc_mb_tis620(CHARSET_INFO *cs __attribute__((unused)), static MY_COLLATION_HANDLER my_collation_ci_handler = { + NULL, /* init */ my_strnncoll_tis620, my_strnncollsp_tis620, my_strnxfrm_tis620, @@ -918,6 +919,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = static MY_CHARSET_HANDLER my_charset_handler= { + NULL, /* init */ NULL, /* ismbchar */ my_mbcharlen_8bit, /* mbcharlen */ my_numchars_8bit, diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c index e6b68b8c9b2..846f17982c3 100644 --- a/strings/ctype-uca.c +++ b/strings/ctype-uca.c @@ -7036,8 +7036,464 @@ int my_wildcmp_uca(CHARSET_INFO *cs, } +/* + Collation language is implemented according to + subset of ICU Collation Customization (tailorings): + http://oss.software.ibm.com/icu/userguide/Collate_Customization.html + + Collation language elements: + Delimiters: + space - skipped + + <char> := A-Z | a-z | \uXXXX + + Shift command: + <shift> := & - reset at this letter. + + Diff command: + <d1> := < - Identifies a primary difference. + <d2> := << - Identifies a secondary difference. + <d3> := <<< - Idenfifies a tertiary difference. + + + Collation rules: + <ruleset> := <rule> { <ruleset> } + + <rule> := <d1> <string> + | <d2> <string> + | <d3> <string> + | <shift> <char> + + <string> := <char> [ <string> ] + + An example, Polish collation: + + &A < \u0105 <<< \u0104 + &C < \u0107 <<< \u0106 + &E < \u0119 <<< \u0118 + &L < \u0142 <<< \u0141 + &N < \u0144 <<< \u0143 + &O < \u00F3 <<< \u00D3 + &S < \u015B <<< \u015A + &Z < \u017A <<< \u017B +*/ + + +typedef enum my_coll_lexem_num_en +{ + MY_COLL_LEXEM_EOF = 0, + MY_COLL_LEXEM_DIFF = 1, + MY_COLL_LEXEM_SHIFT = 4, + MY_COLL_LEXEM_CHAR = 5, + MY_COLL_LEXEM_ERROR = 6 +} my_coll_lexem_num; + + +typedef struct my_coll_lexem_st +{ + const char *beg; + const char *end; + const char *prev; + int diff; + int code; +} MY_COLL_LEXEM; + + +/* + Initialize collation rule lexical anilizer + + SYNOPSIS + my_coll_lexem_init + lexem Lex analizer to init + str Const string to parse + strend End of the string + USAGE + + RETURN VALUES + N/A +*/ + +static void my_coll_lexem_init(MY_COLL_LEXEM *lexem, + const char *str, const char *strend) +{ + lexem->beg= str; + lexem->prev= str; + lexem->end= strend; + lexem->diff= 0; + lexem->code= 0; +} + + +/* + Print collation customization expression parse error, with context. + + SYNOPSIS + my_coll_lexem_print_error + lexem Lex analizer to take context from + errstr sting to write error to + errsize errstr size + txt error message + USAGE + + RETURN VALUES + N/A +*/ + +static void my_coll_lexem_print_error(MY_COLL_LEXEM *lexem, + char *errstr, size_t errsize, + const char *txt) +{ + char tail[30]; + size_t len= lexem->end - lexem->prev; + strmake (tail, lexem->prev, min(len, sizeof(tail)-1)); + errstr[errsize-1]= '\0'; + my_snprintf(errstr,errsize-1,"%s at '%s'", txt, tail); +} + + +/* + Convert a hex digit into its numeric value + + SYNOPSIS + ch2x + ch hex digit to convert + USAGE + + RETURN VALUES + an integer value in the range 0..15 + -1 on error +*/ + +static int ch2x(int ch) +{ + if (ch >= '0' && ch <= '9') + return ch - '0'; + + if (ch >= 'a' && ch <= 'f') + return 10 + ch - 'a'; + + if (ch >= 'A' && ch <= 'F') + return 10 + ch - 'A'; + + return -1; +} + + +/* + Collation language lexical parser: + Scans the next lexem. + + SYNOPSIS + my_coll_lexem_next + lexem Lex analizer, previously initialized by + my_coll_lexem_init. + USAGE + Call this function in a loop + + RETURN VALUES + Lexem number: eof, diff, shift, char or error. +*/ + +static my_coll_lexem_num my_coll_lexem_next(MY_COLL_LEXEM *lexem) +{ + for ( ;lexem->beg < lexem->end ; lexem->beg++) + { + lexem->prev= lexem->beg; + if (lexem->beg[0] == ' ' || lexem->beg[0] == '\t' || + lexem->beg[0] == '\r' || lexem->beg[0] == '\n') + continue; + + if (lexem->beg[0] == '&') + { + lexem->beg++; + return MY_COLL_LEXEM_SHIFT; + } + + if (lexem->beg[0] == '<') + { + for (lexem->beg++, lexem->diff=1; + (lexem->beg < lexem->end) && + (lexem->beg[0] == '<') && (lexem->diff<3); + lexem->beg++, lexem->diff++); + return MY_COLL_LEXEM_DIFF; + } + + if ((lexem->beg[0] >= 'a' && lexem->beg[0] <= 'z') || + (lexem->beg[0] >= 'A' && lexem->beg[0] <= 'Z')) + { + lexem->code= lexem->beg[0]; + lexem->beg++; + return MY_COLL_LEXEM_CHAR; + } + + if ((lexem->beg[0] == '\\') && + (lexem->beg+2 < lexem->end) && + (lexem->beg[1] == 'u')) + { + int ch; + + lexem->code= 0; + for (lexem->beg+=2; + (lexem->beg < lexem->end) && ((ch= ch2x(lexem->beg[0])) >= 0) ; + lexem->beg++) + { + lexem->code= (lexem->code << 4) + ch; + } + return MY_COLL_LEXEM_CHAR; + } + + return MY_COLL_LEXEM_ERROR; + } + return MY_COLL_LEXEM_EOF; +} + + +/* + Collation rule item +*/ + +typedef struct my_coll_rule_item_st +{ + uint base; /* Base character */ + uint curr; /* Current character */ + int diff[3]; /* Primary, Secondary and Tertiary difference */ +} MY_COLL_RULE; + + +/* + Collation language syntax parser. + Uses lexical parser. + + SYNOPSIS + my_coll_rule_parse + rule Collation rule list to load to. + str A string containin collation language expression. + strend End of the string. + USAGE + + RETURN VALUES + 0 - OK + 1 - ERROR, e.g. too many items. +*/ + +static int my_coll_rule_parse(MY_COLL_RULE *rule, size_t mitems, + const char *str, const char *strend, + char *errstr, size_t errsize) +{ + MY_COLL_LEXEM lexem; + my_coll_lexem_num lexnum; + my_coll_lexem_num prevlexnum= MY_COLL_LEXEM_ERROR; + MY_COLL_RULE item; + int state= 0; + size_t nitems= 0; + + /* Init all variables */ + errstr[0]= '\0'; + bzero(&item, sizeof(item)); + my_coll_lexem_init(&lexem, str, strend); + + while ((lexnum= my_coll_lexem_next(&lexem))) + { + if (lexnum == MY_COLL_LEXEM_ERROR) + { + my_coll_lexem_print_error(&lexem,errstr,errsize-1,"Unknown character"); + return -1; + } + + switch (state) { + case 0: + if (lexnum != MY_COLL_LEXEM_SHIFT) + { + my_coll_lexem_print_error(&lexem,errstr,errsize-1,"& expected"); + return -1; + } + prevlexnum= lexnum; + state= 2; + continue; + + case 1: + if (lexnum != MY_COLL_LEXEM_SHIFT && lexnum != MY_COLL_LEXEM_DIFF) + { + my_coll_lexem_print_error(&lexem,errstr,errsize-1,"& or < expected"); + return -1; + } + prevlexnum= lexnum; + state= 2; + continue; + + case 2: + if (lexnum != MY_COLL_LEXEM_CHAR) + { + my_coll_lexem_print_error(&lexem,errstr,errsize-1,"character expected"); + return -1; + } + + if (prevlexnum == MY_COLL_LEXEM_SHIFT) + { + item.base= lexem.code; + item.diff[0]= 0; + item.diff[1]= 0; + item.diff[2]= 0; + } + else if (prevlexnum == MY_COLL_LEXEM_DIFF) + { + item.curr= lexem.code; + if (lexem.diff == 3) + { + item.diff[2]++; + } + else if (lexem.diff == 2) + { + item.diff[1]++; + item.diff[2]= 0; + } + else if (lexem.diff == 1) + { + item.diff[0]++; + item.diff[1]= 0; + item.diff[2]= 0; + } + if (nitems >= mitems) + { + my_coll_lexem_print_error(&lexem,errstr,errsize-1,"Too many rules"); + return -1; + } + rule[nitems++]= item; + } + else + { + my_coll_lexem_print_error(&lexem,errstr,errsize-1,"Should never happen"); + return -1; + } + state= 1; + continue; + } + } + return (size_t) nitems; +} + +#define MY_MAX_COLL_RULE 64 + +/* + This function copies an UCS2 collation from + the default Unicode Collation Algorithm (UCA) + weights applying tailorings, i.e. a set of + alternative weights for some characters. + + The default UCA weights are stored in my_charset_ucs2_general_uca. + They consist of 256 pages, 256 character each. + + If a page is not overwritten by tailoring rules, + it is copies as is from UCA as is. + + If a page contains some overwritten characters, it is + allocated. Untouched characters are copied from the + default weights. +*/ + +static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(uint)) +{ + MY_COLL_RULE rule[MY_MAX_COLL_RULE]; + char errstr[128]; + uchar *newlengths; + uint16 **newweights; + const uchar *deflengths= my_charset_ucs2_general_uca.sort_order; + uint16 **defweights= my_charset_ucs2_general_uca.sort_order_big; + int rc, i; + + if (!cs->tailoring) + return 1; + + /* Parse ICU Collation Customization expression */ + if ((rc= my_coll_rule_parse(rule, MY_MAX_COLL_RULE, + cs->tailoring, + cs->tailoring + strlen(cs->tailoring), + errstr, sizeof(errstr))) <= 0) + { + /* + TODO: add error message reporting. + printf("Error: %d '%s'\n", rc, errstr); + */ + return 1; + } + + if (!(newweights= (uint16**) alloc(256*sizeof(uint16*)))) + return 1; + bzero(newweights, 256*sizeof(uint16*)); + + if (!(newlengths= (uchar*) alloc(256))) + return 1; + + memcpy(newlengths, deflengths, 256); + + /* + Calculate maximum lenghts for the pages + which will be overwritten. + */ + for (i=0; i < rc; i++) + { + uint pageb= (rule[i].base >> 8) & 0xFF; + uint pagec= (rule[i].curr >> 8) & 0xFF; + + if (newlengths[pagec] < deflengths[pageb]) + newlengths[pagec]= deflengths[pageb]; + } + + for (i=0; i < rc; i++) + { + uint pageb= (rule[i].base >> 8) & 0xFF; + uint pagec= (rule[i].curr >> 8) & 0xFF; + uint chb, chc; + + if (!newweights[pagec]) + { + /* Alloc new page and copy the default UCA weights */ + uint size= 256*newlengths[pagec]*sizeof(uint16); + + if (!(newweights[pagec]= (uint16*) alloc(size))) + return 1; + bzero((void*) newweights[pagec], size); + + for (chc=0 ; chc < 256; chc++) + { + memcpy(newweights[pagec] + chc*newlengths[pagec], + defweights[pagec] + chc*deflengths[pagec], + deflengths[pagec]*sizeof(uint16)); + } + } + + /* + Aply the alternative rule: + shift to the base character and primary difference. + */ + chc= rule[i].curr & 0xFF; + chb= rule[i].base & 0xFF; + memcpy(newweights[pagec] + chc*newlengths[pagec], + defweights[pageb] + chb*deflengths[pageb], + deflengths[pageb]*sizeof(uint16)); + /* Apply primary difference */ + newweights[pagec][chc*newlengths[pagec]]+= rule[i].diff[0]; + } + + /* Copy non-overwritten pages from the default UCA weights */ + for (i= 0; i < 256 ; i++) + if (!newweights[i]) + newweights[i]= defweights[i]; + + cs->sort_order= newlengths; + cs->sort_order_big= newweights; + + return 0; +} + +static my_bool my_coll_init_uca(CHARSET_INFO *cs, void *(*alloc)(uint)) +{ + return create_tailoring(cs, alloc); +} + MY_COLLATION_HANDLER my_collation_ucs2_uca_handler = { + my_coll_init_uca, /* init */ my_strnncoll_uca, my_strnncollsp_uca, my_strnxfrm_uca, @@ -7051,7 +7507,7 @@ MY_COLLATION_HANDLER my_collation_ucs2_uca_handler = CHARSET_INFO my_charset_ucs2_general_uca= { 45,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONTEXT, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, "ucs2", /* cs name */ "ucs2_general_uca", /* name */ "", /* comment */ diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c index 9f1accf841f..f05e85a9d88 100644 --- a/strings/ctype-ucs2.c +++ b/strings/ctype-ucs2.c @@ -1372,6 +1372,7 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs, static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler = { + NULL, /* init */ my_strnncoll_ucs2, my_strnncoll_ucs2, my_strnxfrm_ucs2, @@ -1385,6 +1386,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler = static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler = { + NULL, /* init */ my_strnncoll_ucs2_bin, my_strnncoll_ucs2_bin, my_strnxfrm_ucs2_bin, @@ -1398,6 +1400,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler = MY_CHARSET_HANDLER my_charset_ucs2_handler= { + NULL, /* init */ my_ismbchar_ucs2, /* ismbchar */ my_mbcharlen_ucs2, /* mbcharlen */ my_numchars_ucs2, @@ -1426,7 +1429,7 @@ MY_CHARSET_HANDLER my_charset_ucs2_handler= CHARSET_INFO my_charset_ucs2_general_ci= { 35,0,0, /* number */ - MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONTEXT, + MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE, "ucs2", /* cs name */ "ucs2_general_ci", /* name */ "", /* comment */ @@ -1452,7 +1455,7 @@ CHARSET_INFO my_charset_ucs2_general_ci= CHARSET_INFO my_charset_ucs2_bin= { 90,0,0, /* number */ - MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_NONTEXT, + MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE, "ucs2", /* cs name */ "ucs2_bin", /* name */ "", /* comment */ diff --git a/strings/ctype-ujis.c b/strings/ctype-ujis.c index f28ea165f80..fb7946a6b98 100644 --- a/strings/ctype-ujis.c +++ b/strings/ctype-ujis.c @@ -8423,6 +8423,7 @@ my_wc_mb_euc_jp(CHARSET_INFO *c,my_wc_t wc, unsigned char *s, unsigned char *e) static MY_COLLATION_HANDLER my_collation_ci_handler = { + NULL, /* init */ my_strnncoll_simple,/* strnncoll */ my_strnncollsp_simple, my_strnxfrm_simple, /* strnxfrm */ @@ -8435,14 +8436,15 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = static MY_CHARSET_HANDLER my_charset_handler= { + NULL, /* init */ ismbchar_ujis, mbcharlen_ujis, my_numchars_mb, my_charpos_mb, my_well_formed_len_mb, my_lengthsp_8bit, - my_mb_wc_euc_jp, /* mb_wc */ - my_wc_mb_euc_jp, /* wc_mb */ + my_mb_wc_euc_jp, /* mb_wc */ + my_wc_mb_euc_jp, /* wc_mb */ my_caseup_str_mb, my_casedn_str_mb, my_caseup_mb, diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index 39e9260ffed..99ac114de9c 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -2045,6 +2045,7 @@ static int my_mbcharlen_utf8(CHARSET_INFO *cs __attribute__((unused)) , uint c) static MY_COLLATION_HANDLER my_collation_ci_handler = { + NULL, /* init */ my_strnncoll_utf8, my_strnncollsp_utf8, my_strnxfrm_utf8, @@ -2057,6 +2058,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = static MY_CHARSET_HANDLER my_charset_handler= { + NULL, /* init */ my_ismbchar_utf8, my_mbcharlen_utf8, my_numchars_mb, diff --git a/strings/ctype-win1250ch.c b/strings/ctype-win1250ch.c index 670318a082e..a2c5768b16c 100644 --- a/strings/ctype-win1250ch.c +++ b/strings/ctype-win1250ch.c @@ -605,6 +605,7 @@ my_like_range_win1250ch(CHARSET_INFO *cs __attribute__((unused)), static MY_COLLATION_HANDLER my_collation_czech_ci_handler = { + NULL, /* init */ my_strnncoll_win1250ch, my_strnncollsp_win1250ch, my_strnxfrm_win1250ch, |