diff options
author | unknown <bar@mysql.com> | 2004-06-11 16:29:16 +0500 |
---|---|---|
committer | unknown <bar@mysql.com> | 2004-06-11 16:29:16 +0500 |
commit | 5275eb21c25eef0c5b799315da10f26b779fbfe9 (patch) | |
tree | 72245220b637b8d5caf3b8267ea6801af59f0bc7 /strings | |
parent | 4047b5ade3b02d73c60eac7ddae7e5e928de7fd3 (diff) | |
download | mariadb-git-5275eb21c25eef0c5b799315da10f26b779fbfe9.tar.gz |
Allocate memory when a character set is requested:
- For simple character sets: from_uni convertion table.
- For UCA: alternative weight arrays.
Use mbminlen instead of MY_CS_NONTEXT
Diffstat (limited to 'strings')
-rw-r--r-- | strings/ctype-big5.c | 2 | ||||
-rw-r--r-- | strings/ctype-bin.c | 2 | ||||
-rw-r--r-- | strings/ctype-czech.c | 1 | ||||
-rw-r--r-- | strings/ctype-euc_kr.c | 2 | ||||
-rw-r--r-- | strings/ctype-gb2312.c | 2 | ||||
-rw-r--r-- | strings/ctype-gbk.c | 2 | ||||
-rw-r--r-- | strings/ctype-latin1.c | 2 | ||||
-rw-r--r-- | strings/ctype-mb.c | 1 | ||||
-rw-r--r-- | strings/ctype-simple.c | 100 | ||||
-rw-r--r-- | strings/ctype-sjis.c | 2 | ||||
-rw-r--r-- | strings/ctype-tis620.c | 2 | ||||
-rw-r--r-- | strings/ctype-uca.c | 458 | ||||
-rw-r--r-- | strings/ctype-ucs2.c | 7 | ||||
-rw-r--r-- | strings/ctype-ujis.c | 6 | ||||
-rw-r--r-- | strings/ctype-utf8.c | 2 | ||||
-rw-r--r-- | strings/ctype-win1250ch.c | 1 |
16 files changed, 587 insertions, 5 deletions
diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c index fb72dec7385..3d9cb92bf0d 100644 --- a/strings/ctype-big5.c +++ b/strings/ctype-big5.c @@ -6269,6 +6269,7 @@ my_mb_wc_big5(CHARSET_INFO *cs __attribute__((unused)), static MY_COLLATION_HANDLER my_collation_big5_chinese_ci_handler = { + NULL, /* init */ my_strnncoll_big5, my_strnncollsp_big5, my_strnxfrm_big5, @@ -6281,6 +6282,7 @@ static MY_COLLATION_HANDLER my_collation_big5_chinese_ci_handler = static MY_CHARSET_HANDLER my_charset_big5_handler= { + NULL, /* init */ ismbchar_big5, mbcharlen_big5, my_numchars_mb, diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c index 7b3164bf438..54fe4476ae6 100644 --- a/strings/ctype-bin.c +++ b/strings/ctype-bin.c @@ -333,6 +333,7 @@ skip: MY_COLLATION_HANDLER my_collation_8bit_bin_handler = { + NULL, /* init */ my_strnncoll_binary, my_strnncoll_binary, my_strnxfrm_bin, @@ -346,6 +347,7 @@ MY_COLLATION_HANDLER my_collation_8bit_bin_handler = static MY_CHARSET_HANDLER my_charset_handler= { + NULL, /* init */ NULL, /* ismbchar */ my_mbcharlen_8bit, /* mbcharlen */ my_numchars_8bit, diff --git a/strings/ctype-czech.c b/strings/ctype-czech.c index 3218fdee673..8fde9498ed9 100644 --- a/strings/ctype-czech.c +++ b/strings/ctype-czech.c @@ -572,6 +572,7 @@ static MY_UNI_IDX idx_uni_8859_2[]={ static MY_COLLATION_HANDLER my_collation_latin2_czech_ci_handler = { + NULL, /* init */ my_strnncoll_czech, my_strnncollsp_czech, my_strnxfrm_czech, diff --git a/strings/ctype-euc_kr.c b/strings/ctype-euc_kr.c index c387246b4c6..e739339b9e4 100644 --- a/strings/ctype-euc_kr.c +++ b/strings/ctype-euc_kr.c @@ -8637,6 +8637,7 @@ my_mb_wc_euc_kr(CHARSET_INFO *cs __attribute__((unused)), static MY_COLLATION_HANDLER my_collation_ci_handler = { + NULL, /* init */ my_strnncoll_simple, /* strnncoll */ my_strnncollsp_simple, my_strnxfrm_simple, /* strnxfrm */ @@ -8649,6 +8650,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = static MY_CHARSET_HANDLER my_charset_handler= { + NULL, /* init */ ismbchar_euc_kr, mbcharlen_euc_kr, my_numchars_mb, diff --git a/strings/ctype-gb2312.c b/strings/ctype-gb2312.c index fe1f72e7eda..aee4ed55af6 100644 --- a/strings/ctype-gb2312.c +++ b/strings/ctype-gb2312.c @@ -5688,6 +5688,7 @@ my_mb_wc_gb2312(CHARSET_INFO *cs __attribute__((unused)), static MY_COLLATION_HANDLER my_collation_ci_handler = { + NULL, /* init */ my_strnncoll_simple, /* strnncoll */ my_strnncollsp_simple, my_strnxfrm_simple, /* strnxfrm */ @@ -5700,6 +5701,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = static MY_CHARSET_HANDLER my_charset_handler= { + NULL, /* init */ ismbchar_gb2312, mbcharlen_gb2312, my_numchars_mb, diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c index 8b659cb55f9..d6063e9f80f 100644 --- a/strings/ctype-gbk.c +++ b/strings/ctype-gbk.c @@ -9918,6 +9918,7 @@ my_mb_wc_gbk(CHARSET_INFO *cs __attribute__((unused)), static MY_COLLATION_HANDLER my_collation_ci_handler = { + NULL, /* init */ my_strnncoll_gbk, my_strnncollsp_gbk, my_strnxfrm_gbk, @@ -9930,6 +9931,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = static MY_CHARSET_HANDLER my_charset_handler= { + NULL, /* init */ ismbchar_gbk, mbcharlen_gbk, my_numchars_mb, diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c index 03d4e71377b..86c80ff5a66 100644 --- a/strings/ctype-latin1.c +++ b/strings/ctype-latin1.c @@ -380,6 +380,7 @@ int my_wc_mb_latin1(CHARSET_INFO *cs __attribute__((unused)), static MY_CHARSET_HANDLER my_charset_handler= { + NULL, /* init */ NULL, my_mbcharlen_8bit, my_numchars_8bit, @@ -674,6 +675,7 @@ void my_hash_sort_latin1_de(CHARSET_INFO *cs __attribute__((unused)), static MY_COLLATION_HANDLER my_collation_german2_ci_handler= { + NULL, /* init */ my_strnncoll_latin1_de, my_strnncollsp_latin1_de, my_strnxfrm_latin1_de, diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c index 9b02cd3b3da..c143994dbc3 100644 --- a/strings/ctype-mb.c +++ b/strings/ctype-mb.c @@ -512,6 +512,7 @@ static int my_wildcmp_mb_bin(CHARSET_INFO *cs, MY_COLLATION_HANDLER my_collation_mb_bin_handler = { + NULL, /* init */ my_strnncoll_mb_bin, my_strnncoll_mb_bin, my_strnxfrm_mb_bin, diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c index 5f0a7426db3..c28df91ae86 100644 --- a/strings/ctype-simple.c +++ b/strings/ctype-simple.c @@ -1142,8 +1142,107 @@ skip: } +typedef struct +{ + int nchars; + MY_UNI_IDX uidx; +} uni_idx; + +#define PLANE_SIZE 0x100 +#define PLANE_NUM 0x100 +#define PLANE_NUMBER(x) (((x)>>8) % PLANE_NUM) + +static int pcmp(const void * f, const void * s) +{ + const uni_idx *F= (const uni_idx*) f; + const uni_idx *S= (const uni_idx*) s; + int res; + + if (!(res=((S->nchars)-(F->nchars)))) + res=((F->uidx.from)-(S->uidx.to)); + return res; +} + +static my_bool create_fromuni(CHARSET_INFO *cs, void *(*alloc)(uint)) +{ + uni_idx idx[PLANE_NUM]; + int i,n; + + /* Clear plane statistics */ + bzero(idx,sizeof(idx)); + + /* Count number of characters in each plane */ + for (i=0; i< 0x100; i++) + { + uint16 wc=cs->tab_to_uni[i]; + int pl= PLANE_NUMBER(wc); + + if (wc || !i) + { + if (!idx[pl].nchars) + { + idx[pl].uidx.from=wc; + idx[pl].uidx.to=wc; + }else + { + idx[pl].uidx.from=wc<idx[pl].uidx.from?wc:idx[pl].uidx.from; + idx[pl].uidx.to=wc>idx[pl].uidx.to?wc:idx[pl].uidx.to; + } + idx[pl].nchars++; + } + } + + /* Sort planes in descending order */ + qsort(&idx,PLANE_NUM,sizeof(uni_idx),&pcmp); + + for (i=0; i < PLANE_NUM; i++) + { + int ch,numchars; + + /* Skip empty plane */ + if (!idx[i].nchars) + break; + + numchars=idx[i].uidx.to-idx[i].uidx.from+1; + if (!(idx[i].uidx.tab=(uchar*) alloc(numchars * sizeof(*idx[i].uidx.tab)))) + return TRUE; + + bzero(idx[i].uidx.tab,numchars*sizeof(*idx[i].uidx.tab)); + + for (ch=1; ch < PLANE_SIZE; ch++) + { + uint16 wc=cs->tab_to_uni[ch]; + if (wc >= idx[i].uidx.from && wc <= idx[i].uidx.to && wc) + { + int ofs= wc - idx[i].uidx.from; + idx[i].uidx.tab[ofs]= ch; + } + } + } + + /* Allocate and fill reverse table for each plane */ + n=i; + if (!(cs->tab_from_uni= (MY_UNI_IDX*) alloc(sizeof(MY_UNI_IDX)*(n+1)))) + return TRUE; + + for (i=0; i< n; i++) + cs->tab_from_uni[i]= idx[i].uidx; + + /* Set end-of-list marker */ + bzero(&cs->tab_from_uni[i],sizeof(MY_UNI_IDX)); + return FALSE; +} + +static my_bool my_cset_init_8bit(CHARSET_INFO *cs, void *(*alloc)(uint)) +{ + return create_fromuni(cs, alloc); +} + + + MY_CHARSET_HANDLER my_charset_8bit_handler= { + my_cset_init_8bit, NULL, /* ismbchar */ my_mbcharlen_8bit, /* mbcharlen */ my_numchars_8bit, @@ -1170,6 +1269,7 @@ MY_CHARSET_HANDLER my_charset_8bit_handler= MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler = { + NULL, /* init */ my_strnncoll_simple, my_strnncollsp_simple, my_strnxfrm_simple, diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c index b4a131d3410..3744711447a 100644 --- a/strings/ctype-sjis.c +++ b/strings/ctype-sjis.c @@ -4534,6 +4534,7 @@ my_mb_wc_sjis(CHARSET_INFO *cs __attribute__((unused)), static MY_COLLATION_HANDLER my_collation_ci_handler = { + NULL, /* init */ my_strnncoll_sjis, my_strnncollsp_sjis, my_strnxfrm_sjis, @@ -4547,6 +4548,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = static MY_CHARSET_HANDLER my_charset_handler= { + NULL, /* init */ ismbchar_sjis, mbcharlen_sjis, my_numchars_mb, diff --git a/strings/ctype-tis620.c b/strings/ctype-tis620.c index 79ac2079720..a0ba1a266ea 100644 --- a/strings/ctype-tis620.c +++ b/strings/ctype-tis620.c @@ -906,6 +906,7 @@ int my_wc_mb_tis620(CHARSET_INFO *cs __attribute__((unused)), static MY_COLLATION_HANDLER my_collation_ci_handler = { + NULL, /* init */ my_strnncoll_tis620, my_strnncollsp_tis620, my_strnxfrm_tis620, @@ -918,6 +919,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = static MY_CHARSET_HANDLER my_charset_handler= { + NULL, /* init */ NULL, /* ismbchar */ my_mbcharlen_8bit, /* mbcharlen */ my_numchars_8bit, diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c index e6b68b8c9b2..846f17982c3 100644 --- a/strings/ctype-uca.c +++ b/strings/ctype-uca.c @@ -7036,8 +7036,464 @@ int my_wildcmp_uca(CHARSET_INFO *cs, } +/* + Collation language is implemented according to + subset of ICU Collation Customization (tailorings): + http://oss.software.ibm.com/icu/userguide/Collate_Customization.html + + Collation language elements: + Delimiters: + space - skipped + + <char> := A-Z | a-z | \uXXXX + + Shift command: + <shift> := & - reset at this letter. + + Diff command: + <d1> := < - Identifies a primary difference. + <d2> := << - Identifies a secondary difference. + <d3> := <<< - Idenfifies a tertiary difference. + + + Collation rules: + <ruleset> := <rule> { <ruleset> } + + <rule> := <d1> <string> + | <d2> <string> + | <d3> <string> + | <shift> <char> + + <string> := <char> [ <string> ] + + An example, Polish collation: + + &A < \u0105 <<< \u0104 + &C < \u0107 <<< \u0106 + &E < \u0119 <<< \u0118 + &L < \u0142 <<< \u0141 + &N < \u0144 <<< \u0143 + &O < \u00F3 <<< \u00D3 + &S < \u015B <<< \u015A + &Z < \u017A <<< \u017B +*/ + + +typedef enum my_coll_lexem_num_en +{ + MY_COLL_LEXEM_EOF = 0, + MY_COLL_LEXEM_DIFF = 1, + MY_COLL_LEXEM_SHIFT = 4, + MY_COLL_LEXEM_CHAR = 5, + MY_COLL_LEXEM_ERROR = 6 +} my_coll_lexem_num; + + +typedef struct my_coll_lexem_st +{ + const char *beg; + const char *end; + const char *prev; + int diff; + int code; +} MY_COLL_LEXEM; + + +/* + Initialize collation rule lexical anilizer + + SYNOPSIS + my_coll_lexem_init + lexem Lex analizer to init + str Const string to parse + strend End of the string + USAGE + + RETURN VALUES + N/A +*/ + +static void my_coll_lexem_init(MY_COLL_LEXEM *lexem, + const char *str, const char *strend) +{ + lexem->beg= str; + lexem->prev= str; + lexem->end= strend; + lexem->diff= 0; + lexem->code= 0; +} + + +/* + Print collation customization expression parse error, with context. + + SYNOPSIS + my_coll_lexem_print_error + lexem Lex analizer to take context from + errstr sting to write error to + errsize errstr size + txt error message + USAGE + + RETURN VALUES + N/A +*/ + +static void my_coll_lexem_print_error(MY_COLL_LEXEM *lexem, + char *errstr, size_t errsize, + const char *txt) +{ + char tail[30]; + size_t len= lexem->end - lexem->prev; + strmake (tail, lexem->prev, min(len, sizeof(tail)-1)); + errstr[errsize-1]= '\0'; + my_snprintf(errstr,errsize-1,"%s at '%s'", txt, tail); +} + + +/* + Convert a hex digit into its numeric value + + SYNOPSIS + ch2x + ch hex digit to convert + USAGE + + RETURN VALUES + an integer value in the range 0..15 + -1 on error +*/ + +static int ch2x(int ch) +{ + if (ch >= '0' && ch <= '9') + return ch - '0'; + + if (ch >= 'a' && ch <= 'f') + return 10 + ch - 'a'; + + if (ch >= 'A' && ch <= 'F') + return 10 + ch - 'A'; + + return -1; +} + + +/* + Collation language lexical parser: + Scans the next lexem. + + SYNOPSIS + my_coll_lexem_next + lexem Lex analizer, previously initialized by + my_coll_lexem_init. + USAGE + Call this function in a loop + + RETURN VALUES + Lexem number: eof, diff, shift, char or error. +*/ + +static my_coll_lexem_num my_coll_lexem_next(MY_COLL_LEXEM *lexem) +{ + for ( ;lexem->beg < lexem->end ; lexem->beg++) + { + lexem->prev= lexem->beg; + if (lexem->beg[0] == ' ' || lexem->beg[0] == '\t' || + lexem->beg[0] == '\r' || lexem->beg[0] == '\n') + continue; + + if (lexem->beg[0] == '&') + { + lexem->beg++; + return MY_COLL_LEXEM_SHIFT; + } + + if (lexem->beg[0] == '<') + { + for (lexem->beg++, lexem->diff=1; + (lexem->beg < lexem->end) && + (lexem->beg[0] == '<') && (lexem->diff<3); + lexem->beg++, lexem->diff++); + return MY_COLL_LEXEM_DIFF; + } + + if ((lexem->beg[0] >= 'a' && lexem->beg[0] <= 'z') || + (lexem->beg[0] >= 'A' && lexem->beg[0] <= 'Z')) + { + lexem->code= lexem->beg[0]; + lexem->beg++; + return MY_COLL_LEXEM_CHAR; + } + + if ((lexem->beg[0] == '\\') && + (lexem->beg+2 < lexem->end) && + (lexem->beg[1] == 'u')) + { + int ch; + + lexem->code= 0; + for (lexem->beg+=2; + (lexem->beg < lexem->end) && ((ch= ch2x(lexem->beg[0])) >= 0) ; + lexem->beg++) + { + lexem->code= (lexem->code << 4) + ch; + } + return MY_COLL_LEXEM_CHAR; + } + + return MY_COLL_LEXEM_ERROR; + } + return MY_COLL_LEXEM_EOF; +} + + +/* + Collation rule item +*/ + +typedef struct my_coll_rule_item_st +{ + uint base; /* Base character */ + uint curr; /* Current character */ + int diff[3]; /* Primary, Secondary and Tertiary difference */ +} MY_COLL_RULE; + + +/* + Collation language syntax parser. + Uses lexical parser. + + SYNOPSIS + my_coll_rule_parse + rule Collation rule list to load to. + str A string containin collation language expression. + strend End of the string. + USAGE + + RETURN VALUES + 0 - OK + 1 - ERROR, e.g. too many items. +*/ + +static int my_coll_rule_parse(MY_COLL_RULE *rule, size_t mitems, + const char *str, const char *strend, + char *errstr, size_t errsize) +{ + MY_COLL_LEXEM lexem; + my_coll_lexem_num lexnum; + my_coll_lexem_num prevlexnum= MY_COLL_LEXEM_ERROR; + MY_COLL_RULE item; + int state= 0; + size_t nitems= 0; + + /* Init all variables */ + errstr[0]= '\0'; + bzero(&item, sizeof(item)); + my_coll_lexem_init(&lexem, str, strend); + + while ((lexnum= my_coll_lexem_next(&lexem))) + { + if (lexnum == MY_COLL_LEXEM_ERROR) + { + my_coll_lexem_print_error(&lexem,errstr,errsize-1,"Unknown character"); + return -1; + } + + switch (state) { + case 0: + if (lexnum != MY_COLL_LEXEM_SHIFT) + { + my_coll_lexem_print_error(&lexem,errstr,errsize-1,"& expected"); + return -1; + } + prevlexnum= lexnum; + state= 2; + continue; + + case 1: + if (lexnum != MY_COLL_LEXEM_SHIFT && lexnum != MY_COLL_LEXEM_DIFF) + { + my_coll_lexem_print_error(&lexem,errstr,errsize-1,"& or < expected"); + return -1; + } + prevlexnum= lexnum; + state= 2; + continue; + + case 2: + if (lexnum != MY_COLL_LEXEM_CHAR) + { + my_coll_lexem_print_error(&lexem,errstr,errsize-1,"character expected"); + return -1; + } + + if (prevlexnum == MY_COLL_LEXEM_SHIFT) + { + item.base= lexem.code; + item.diff[0]= 0; + item.diff[1]= 0; + item.diff[2]= 0; + } + else if (prevlexnum == MY_COLL_LEXEM_DIFF) + { + item.curr= lexem.code; + if (lexem.diff == 3) + { + item.diff[2]++; + } + else if (lexem.diff == 2) + { + item.diff[1]++; + item.diff[2]= 0; + } + else if (lexem.diff == 1) + { + item.diff[0]++; + item.diff[1]= 0; + item.diff[2]= 0; + } + if (nitems >= mitems) + { + my_coll_lexem_print_error(&lexem,errstr,errsize-1,"Too many rules"); + return -1; + } + rule[nitems++]= item; + } + else + { + my_coll_lexem_print_error(&lexem,errstr,errsize-1,"Should never happen"); + return -1; + } + state= 1; + continue; + } + } + return (size_t) nitems; +} + +#define MY_MAX_COLL_RULE 64 + +/* + This function copies an UCS2 collation from + the default Unicode Collation Algorithm (UCA) + weights applying tailorings, i.e. a set of + alternative weights for some characters. + + The default UCA weights are stored in my_charset_ucs2_general_uca. + They consist of 256 pages, 256 character each. + + If a page is not overwritten by tailoring rules, + it is copies as is from UCA as is. + + If a page contains some overwritten characters, it is + allocated. Untouched characters are copied from the + default weights. +*/ + +static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(uint)) +{ + MY_COLL_RULE rule[MY_MAX_COLL_RULE]; + char errstr[128]; + uchar *newlengths; + uint16 **newweights; + const uchar *deflengths= my_charset_ucs2_general_uca.sort_order; + uint16 **defweights= my_charset_ucs2_general_uca.sort_order_big; + int rc, i; + + if (!cs->tailoring) + return 1; + + /* Parse ICU Collation Customization expression */ + if ((rc= my_coll_rule_parse(rule, MY_MAX_COLL_RULE, + cs->tailoring, + cs->tailoring + strlen(cs->tailoring), + errstr, sizeof(errstr))) <= 0) + { + /* + TODO: add error message reporting. + printf("Error: %d '%s'\n", rc, errstr); + */ + return 1; + } + + if (!(newweights= (uint16**) alloc(256*sizeof(uint16*)))) + return 1; + bzero(newweights, 256*sizeof(uint16*)); + + if (!(newlengths= (uchar*) alloc(256))) + return 1; + + memcpy(newlengths, deflengths, 256); + + /* + Calculate maximum lenghts for the pages + which will be overwritten. + */ + for (i=0; i < rc; i++) + { + uint pageb= (rule[i].base >> 8) & 0xFF; + uint pagec= (rule[i].curr >> 8) & 0xFF; + + if (newlengths[pagec] < deflengths[pageb]) + newlengths[pagec]= deflengths[pageb]; + } + + for (i=0; i < rc; i++) + { + uint pageb= (rule[i].base >> 8) & 0xFF; + uint pagec= (rule[i].curr >> 8) & 0xFF; + uint chb, chc; + + if (!newweights[pagec]) + { + /* Alloc new page and copy the default UCA weights */ + uint size= 256*newlengths[pagec]*sizeof(uint16); + + if (!(newweights[pagec]= (uint16*) alloc(size))) + return 1; + bzero((void*) newweights[pagec], size); + + for (chc=0 ; chc < 256; chc++) + { + memcpy(newweights[pagec] + chc*newlengths[pagec], + defweights[pagec] + chc*deflengths[pagec], + deflengths[pagec]*sizeof(uint16)); + } + } + + /* + Aply the alternative rule: + shift to the base character and primary difference. + */ + chc= rule[i].curr & 0xFF; + chb= rule[i].base & 0xFF; + memcpy(newweights[pagec] + chc*newlengths[pagec], + defweights[pageb] + chb*deflengths[pageb], + deflengths[pageb]*sizeof(uint16)); + /* Apply primary difference */ + newweights[pagec][chc*newlengths[pagec]]+= rule[i].diff[0]; + } + + /* Copy non-overwritten pages from the default UCA weights */ + for (i= 0; i < 256 ; i++) + if (!newweights[i]) + newweights[i]= defweights[i]; + + cs->sort_order= newlengths; + cs->sort_order_big= newweights; + + return 0; +} + +static my_bool my_coll_init_uca(CHARSET_INFO *cs, void *(*alloc)(uint)) +{ + return create_tailoring(cs, alloc); +} + MY_COLLATION_HANDLER my_collation_ucs2_uca_handler = { + my_coll_init_uca, /* init */ my_strnncoll_uca, my_strnncollsp_uca, my_strnxfrm_uca, @@ -7051,7 +7507,7 @@ MY_COLLATION_HANDLER my_collation_ucs2_uca_handler = CHARSET_INFO my_charset_ucs2_general_uca= { 45,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONTEXT, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, "ucs2", /* cs name */ "ucs2_general_uca", /* name */ "", /* comment */ diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c index 9f1accf841f..f05e85a9d88 100644 --- a/strings/ctype-ucs2.c +++ b/strings/ctype-ucs2.c @@ -1372,6 +1372,7 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs, static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler = { + NULL, /* init */ my_strnncoll_ucs2, my_strnncoll_ucs2, my_strnxfrm_ucs2, @@ -1385,6 +1386,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler = static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler = { + NULL, /* init */ my_strnncoll_ucs2_bin, my_strnncoll_ucs2_bin, my_strnxfrm_ucs2_bin, @@ -1398,6 +1400,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler = MY_CHARSET_HANDLER my_charset_ucs2_handler= { + NULL, /* init */ my_ismbchar_ucs2, /* ismbchar */ my_mbcharlen_ucs2, /* mbcharlen */ my_numchars_ucs2, @@ -1426,7 +1429,7 @@ MY_CHARSET_HANDLER my_charset_ucs2_handler= CHARSET_INFO my_charset_ucs2_general_ci= { 35,0,0, /* number */ - MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONTEXT, + MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE, "ucs2", /* cs name */ "ucs2_general_ci", /* name */ "", /* comment */ @@ -1452,7 +1455,7 @@ CHARSET_INFO my_charset_ucs2_general_ci= CHARSET_INFO my_charset_ucs2_bin= { 90,0,0, /* number */ - MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_NONTEXT, + MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE, "ucs2", /* cs name */ "ucs2_bin", /* name */ "", /* comment */ diff --git a/strings/ctype-ujis.c b/strings/ctype-ujis.c index f28ea165f80..fb7946a6b98 100644 --- a/strings/ctype-ujis.c +++ b/strings/ctype-ujis.c @@ -8423,6 +8423,7 @@ my_wc_mb_euc_jp(CHARSET_INFO *c,my_wc_t wc, unsigned char *s, unsigned char *e) static MY_COLLATION_HANDLER my_collation_ci_handler = { + NULL, /* init */ my_strnncoll_simple,/* strnncoll */ my_strnncollsp_simple, my_strnxfrm_simple, /* strnxfrm */ @@ -8435,14 +8436,15 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = static MY_CHARSET_HANDLER my_charset_handler= { + NULL, /* init */ ismbchar_ujis, mbcharlen_ujis, my_numchars_mb, my_charpos_mb, my_well_formed_len_mb, my_lengthsp_8bit, - my_mb_wc_euc_jp, /* mb_wc */ - my_wc_mb_euc_jp, /* wc_mb */ + my_mb_wc_euc_jp, /* mb_wc */ + my_wc_mb_euc_jp, /* wc_mb */ my_caseup_str_mb, my_casedn_str_mb, my_caseup_mb, diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index 39e9260ffed..99ac114de9c 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -2045,6 +2045,7 @@ static int my_mbcharlen_utf8(CHARSET_INFO *cs __attribute__((unused)) , uint c) static MY_COLLATION_HANDLER my_collation_ci_handler = { + NULL, /* init */ my_strnncoll_utf8, my_strnncollsp_utf8, my_strnxfrm_utf8, @@ -2057,6 +2058,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = static MY_CHARSET_HANDLER my_charset_handler= { + NULL, /* init */ my_ismbchar_utf8, my_mbcharlen_utf8, my_numchars_mb, diff --git a/strings/ctype-win1250ch.c b/strings/ctype-win1250ch.c index 670318a082e..a2c5768b16c 100644 --- a/strings/ctype-win1250ch.c +++ b/strings/ctype-win1250ch.c @@ -605,6 +605,7 @@ my_like_range_win1250ch(CHARSET_INFO *cs __attribute__((unused)), static MY_COLLATION_HANDLER my_collation_czech_ci_handler = { + NULL, /* init */ my_strnncoll_win1250ch, my_strnncollsp_win1250ch, my_strnxfrm_win1250ch, |