diff options
author | unknown <bar@mysql.com> | 2004-06-12 20:36:58 +0500 |
---|---|---|
committer | unknown <bar@mysql.com> | 2004-06-12 20:36:58 +0500 |
commit | f573ee08871ce04d43d7e309e3f3d5e540dd59a1 (patch) | |
tree | 2effa345d520890cd93cba2f06b5beae65ae07c2 /strings | |
parent | a86e6f1788f95146e1bdb236d6d3b9ece7827ce5 (diff) | |
download | mariadb-git-f573ee08871ce04d43d7e309e3f3d5e540dd59a1.tar.gz |
Unicode collation algorithm: contraction support.
E.g. 'Ch' is treated as a separate letter in Czech,
not as a combination of C+h.
Diffstat (limited to 'strings')
-rw-r--r-- | strings/ctype-big5.c | 3 | ||||
-rw-r--r-- | strings/ctype-bin.c | 3 | ||||
-rw-r--r-- | strings/ctype-czech.c | 1 | ||||
-rw-r--r-- | strings/ctype-euc_kr.c | 2 | ||||
-rw-r--r-- | strings/ctype-extra.c | 1 | ||||
-rw-r--r-- | strings/ctype-gb2312.c | 6 | ||||
-rw-r--r-- | strings/ctype-gbk.c | 6 | ||||
-rw-r--r-- | strings/ctype-latin1.c | 3 | ||||
-rw-r--r-- | strings/ctype-sjis.c | 6 | ||||
-rw-r--r-- | strings/ctype-tis620.c | 6 | ||||
-rw-r--r-- | strings/ctype-uca.c | 273 | ||||
-rw-r--r-- | strings/ctype-ucs2.c | 4 | ||||
-rw-r--r-- | strings/ctype-ujis.c | 6 | ||||
-rw-r--r-- | strings/ctype-utf8.c | 8 | ||||
-rw-r--r-- | strings/ctype-win1250ch.c | 1 |
15 files changed, 304 insertions, 25 deletions
diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c index c84c1c1a2bf..ff53f61c053 100644 --- a/strings/ctype-big5.c +++ b/strings/ctype-big5.c @@ -6300,7 +6300,6 @@ static MY_CHARSET_HANDLER my_charset_big5_handler= my_long10_to_str_8bit, my_longlong10_to_str_8bit, my_fill_8bit, - my_strntol_8bit, my_strntoul_8bit, my_strntoll_8bit, @@ -6321,6 +6320,7 @@ CHARSET_INFO my_charset_big5_chinese_ci= to_lower_big5, to_upper_big5, sort_order_big5, + NULL, /* contractions */ NULL, /* sort_order_big*/ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ @@ -6348,6 +6348,7 @@ CHARSET_INFO my_charset_big5_bin= to_lower_big5, to_upper_big5, sort_order_big5, + NULL, /* contractions */ NULL, /* sort_order_big*/ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c index 5c5e56290e0..cc83471f264 100644 --- a/strings/ctype-bin.c +++ b/strings/ctype-bin.c @@ -395,9 +395,10 @@ CHARSET_INFO my_charset_bin = bin_char_array, /* to_lower */ bin_char_array, /* to_upper */ bin_char_array, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ - NULL, /* sort_order_big*/ NULL, /* state_map */ NULL, /* ident_map */ 1, /* strxfrm_multiply */ diff --git a/strings/ctype-czech.c b/strings/ctype-czech.c index 08677d737ec..6f9e9f74d35 100644 --- a/strings/ctype-czech.c +++ b/strings/ctype-czech.c @@ -599,6 +599,7 @@ CHARSET_INFO my_charset_latin2_czech_ci = to_lower_czech, to_upper_czech, sort_order_czech, + NULL, /* contractions */ NULL, /* sort_order_big*/ tab_8859_2_uni, /* tab_to_uni */ idx_uni_8859_2, /* tab_from_uni */ diff --git a/strings/ctype-euc_kr.c b/strings/ctype-euc_kr.c index 1e6931244d2..fd8659a181c 100644 --- a/strings/ctype-euc_kr.c +++ b/strings/ctype-euc_kr.c @@ -8688,6 +8688,7 @@ CHARSET_INFO my_charset_euckr_korean_ci= to_lower_euc_kr, to_upper_euc_kr, sort_order_euc_kr, + NULL, /* contractions */ NULL, /* sort_order_big*/ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ @@ -8715,6 +8716,7 @@ CHARSET_INFO my_charset_euckr_bin= to_lower_euc_kr, to_upper_euc_kr, sort_order_euc_kr, + NULL, /* contractions */ NULL, /* sort_order_big*/ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ diff --git a/strings/ctype-extra.c b/strings/ctype-extra.c index baf1d319b00..3672dcd0b33 100644 --- a/strings/ctype-extra.c +++ b/strings/ctype-extra.c @@ -29,6 +29,7 @@ CHARSET_INFO compiled_charsets[] = { NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ + NULL, /* contractions */ NULL, /* sort_order_big*/ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ diff --git a/strings/ctype-gb2312.c b/strings/ctype-gb2312.c index 2c5aae83769..b9f61256717 100644 --- a/strings/ctype-gb2312.c +++ b/strings/ctype-gb2312.c @@ -5739,9 +5739,10 @@ CHARSET_INFO my_charset_gb2312_chinese_ci= to_lower_gb2312, to_upper_gb2312, sort_order_gb2312, + NULL, /* contractions */ + NULL, /* sort_order_big*/ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ - NULL, /* sort_order_big*/ NULL, /* state_map */ NULL, /* ident_map */ 1, /* strxfrm_multiply */ @@ -5765,9 +5766,10 @@ CHARSET_INFO my_charset_gb2312_bin= to_lower_gb2312, to_upper_gb2312, sort_order_gb2312, + NULL, /* contractions */ + NULL, /* sort_order_big*/ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ - NULL, /* sort_order_big*/ NULL, /* state_map */ NULL, /* ident_map */ 1, /* strxfrm_multiply */ diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c index 2b31adc2f8a..2ef75e27d9a 100644 --- a/strings/ctype-gbk.c +++ b/strings/ctype-gbk.c @@ -9970,9 +9970,10 @@ CHARSET_INFO my_charset_gbk_chinese_ci= to_lower_gbk, to_upper_gbk, sort_order_gbk, + NULL, /* contractions */ + NULL, /* sort_order_big*/ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ - NULL, /* sort_order_big*/ NULL, /* state_map */ NULL, /* ident_map */ 1, /* strxfrm_multiply */ @@ -9996,9 +9997,10 @@ CHARSET_INFO my_charset_gbk_bin= to_lower_gbk, to_upper_gbk, sort_order_gbk, + NULL, /* contractions */ + NULL, /* sort_order_big*/ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ - NULL, /* sort_order_big*/ NULL, /* state_map */ NULL, /* ident_map */ 1, /* strxfrm_multiply */ diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c index dd9ab4399fb..652794fa84d 100644 --- a/strings/ctype-latin1.c +++ b/strings/ctype-latin1.c @@ -418,6 +418,7 @@ CHARSET_INFO my_charset_latin1= to_lower_latin1, to_upper_latin1, sort_order_latin1, + NULL, /* contractions */ NULL, /* sort_order_big*/ cs_to_uni, /* tab_to_uni */ NULL, /* tab_from_uni */ @@ -700,6 +701,7 @@ CHARSET_INFO my_charset_latin1_german2_ci= to_lower_latin1, to_upper_latin1, sort_order_latin1_de, + NULL, /* contractions */ NULL, /* sort_order_big*/ cs_to_uni, /* tab_to_uni */ NULL, /* tab_from_uni */ @@ -727,6 +729,7 @@ CHARSET_INFO my_charset_latin1_bin= to_lower_latin1, to_upper_latin1, sort_order_latin1_de, + NULL, /* contractions */ NULL, /* sort_order_big*/ cs_to_uni, /* tab_to_uni */ NULL, /* tab_from_uni */ diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c index 54a0df26f09..5fd005f842e 100644 --- a/strings/ctype-sjis.c +++ b/strings/ctype-sjis.c @@ -4589,9 +4589,10 @@ CHARSET_INFO my_charset_sjis_japanese_ci= to_lower_sjis, to_upper_sjis, sort_order_sjis, + NULL, /* contractions */ + NULL, /* sort_order_big*/ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ - NULL, /* sort_order_big*/ NULL, /* state_map */ NULL, /* ident_map */ 1, /* strxfrm_multiply */ @@ -4615,9 +4616,10 @@ CHARSET_INFO my_charset_sjis_bin= to_lower_sjis, to_upper_sjis, sort_order_sjis, + NULL, /* contractions */ + NULL, /* sort_order_big*/ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ - NULL, /* sort_order_big*/ NULL, /* state_map */ NULL, /* ident_map */ 1, /* strxfrm_multiply */ diff --git a/strings/ctype-tis620.c b/strings/ctype-tis620.c index dae778a8328..c7d859a6ead 100644 --- a/strings/ctype-tis620.c +++ b/strings/ctype-tis620.c @@ -962,9 +962,10 @@ CHARSET_INFO my_charset_tis620_thai_ci= to_lower_tis620, to_upper_tis620, sort_order_tis620, + NULL, /* contractions */ + NULL, /* sort_order_big*/ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ - NULL, /* sort_order_big*/ NULL, /* state_map */ NULL, /* ident_map */ 4, /* strxfrm_multiply */ @@ -988,9 +989,10 @@ CHARSET_INFO my_charset_tis620_bin= to_lower_tis620, to_upper_tis620, sort_order_tis620, + NULL, /* contractions */ + NULL, /* sort_order_big*/ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ - NULL, /* sort_order_big*/ NULL, /* state_map */ NULL, /* ident_map */ 1, /* strxfrm_multiply */ diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c index 82ab3660111..7de373b3c8f 100644 --- a/strings/ctype-uca.c +++ b/strings/ctype-uca.c @@ -6620,6 +6620,41 @@ static const char turkish[]= "& U < \\u00FC <<< \\u00DC "; +static const char czech[]= + "& C < \\u010D <<< \\u010C " + "& H < ch <<< Ch <<< CH" + "& R < \\u0159 <<< \\u0158" + "& S < \\u0161 <<< \\u0160" + "& Z < \\u017E <<< \\u017D"; + +static const char danish[]= /* Also good for Norwegian */ + "& Y << \\u00FC <<< \\u00DC << \\u0171 <<< \\u0170" + "& Z < \\u00E6 <<< \\u00C6 << \\u00E4 <<< \\u00C4" + " < \\u00F8 <<< \\u00D8 << \\u00F6 <<< \\u00D6 << \\u0151 <<< \\u0150" + " < \\u00E5 <<< \\u00C5 << aa <<< Aa <<< AA"; + +static const char lithuanian[]= + "& C << ch <<< Ch <<< CH< \\u010D <<< \\u010C" + "& E << \\u0119 <<< \\u0118 << \\u0117 <<< \\u0116" + "& I << y <<< Y" + "& S < \\u0161 <<< \\u0160" + "& Z < \\u017E <<< \\u017D"; + +static const char slovak[]= + "& A < \\u00E4 <<< \\u00C4" + "& C < \\u010D <<< \\u010C" + "& H < ch <<< Ch <<< CH" + "& O < \\u00F4 <<< \\u00D4" + "& S < \\u0161 <<< \\u0160" + "& Z < \\u017E <<< \\017D"; + +static const char spanish2[]= /* Also good for Asturian and Galician */ + "&C < ch <<< Ch <<< CH" + "&L < ll <<< Ll <<< LL" + "&N < \\u00F1 <<< \\u00D1" + "&R << rr <<< Rr <<< RR"; + + /* Unicode Collation Algorithm: Collation element (weight) scanner, @@ -6633,6 +6668,7 @@ typedef struct my_uca_scanner_st const uchar *send; /* End of the input string */ uchar *uca_length; uint16 **uca_weight; + uint16 *contractions; uint16 implicit[2]; int page; int code; @@ -6666,6 +6702,7 @@ static void my_uca_scanner_init(my_uca_scanner *scanner, scanner->wbeg= nochar; scanner->uca_length= cs->sort_order; scanner->uca_weight= cs->sort_order_big; + scanner->contractions= cs->contractions; } @@ -6732,6 +6769,22 @@ static int my_uca_scanner_next(my_uca_scanner *scanner) scanner->code= (unsigned char)scanner->sbeg[1]; scanner->sbeg+= 2; + if (scanner->contractions && (scanner->sbeg <= scanner->send)) + { + int cweight; + + if (!scanner->page && !scanner->sbeg[0] && + (scanner->sbeg[1] > 0x40) && (scanner->sbeg[1] < 0x80) && + (scanner->code > 0x40) && (scanner->code < 0x80) && + (cweight= scanner->contractions[(scanner->code-0x40)*0x40+scanner->sbeg[1]-0x40])) + { + scanner->implicit[0]= 0; + scanner->wbeg= scanner->implicit; + scanner->sbeg+=2; + return cweight; + } + } + if (!ucaw[scanner->page]) goto implicit; scanner->wbeg= ucaw[scanner->page] + scanner->code * ucal[scanner->page]; @@ -7354,7 +7407,7 @@ static my_coll_lexem_num my_coll_lexem_next(MY_COLL_LEXEM *lexem) typedef struct my_coll_rule_item_st { uint base; /* Base character */ - uint curr; /* Current character */ + uint curr[2]; /* Current character */ int diff[3]; /* Primary, Secondary and Tertiary difference */ } MY_COLL_RULE; @@ -7436,7 +7489,18 @@ static int my_coll_rule_parse(MY_COLL_RULE *rule, size_t mitems, } else if (prevlexnum == MY_COLL_LEXEM_DIFF) { - item.curr= lexem.code; + MY_COLL_LEXEM savlex; + savlex= lexem; + item.curr[0]= lexem.code; + if ((lexnum= my_coll_lexem_next(&lexem)) == MY_COLL_LEXEM_CHAR) + { + item.curr[1]= lexem.code; + } + else + { + item.curr[1]= 0; + lexem=savlex; /* Restore previous parser state */ + } if (lexem.diff == 3) { item.diff[2]++; @@ -7499,7 +7563,8 @@ static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(uint)) const uchar *deflengths= my_charset_ucs2_general_uca.sort_order; uint16 **defweights= my_charset_ucs2_general_uca.sort_order_big; int rc, i; - + int ncontractions= 0; + if (!cs->tailoring) return 1; @@ -7531,19 +7596,27 @@ static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(uint)) */ for (i=0; i < rc; i++) { - uint pageb= (rule[i].base >> 8) & 0xFF; - uint pagec= (rule[i].curr >> 8) & 0xFF; + if (!rule[i].curr[1]) /* If not a contraction */ + { + uint pageb= (rule[i].base >> 8) & 0xFF; + uint pagec= (rule[i].curr[0] >> 8) & 0xFF; - if (newlengths[pagec] < deflengths[pageb]) - newlengths[pagec]= deflengths[pageb]; + if (newlengths[pagec] < deflengths[pageb]) + newlengths[pagec]= deflengths[pageb]; + } + else + ncontractions++; } for (i=0; i < rc; i++) { uint pageb= (rule[i].base >> 8) & 0xFF; - uint pagec= (rule[i].curr >> 8) & 0xFF; + uint pagec= (rule[i].curr[0] >> 8) & 0xFF; uint chb, chc; + if (rule[i].curr[1]) /* Skip contraction */ + continue; + if (!newweights[pagec]) { /* Alloc new page and copy the default UCA weights */ @@ -7565,7 +7638,7 @@ static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(uint)) Aply the alternative rule: shift to the base character and primary difference. */ - chc= rule[i].curr & 0xFF; + chc= rule[i].curr[0] & 0xFF; chb= rule[i].base & 0xFF; memcpy(newweights[pagec] + chc*newlengths[pagec], defweights[pageb] + chb*deflengths[pageb], @@ -7581,7 +7654,43 @@ static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(uint)) cs->sort_order= newlengths; cs->sort_order_big= newweights; + cs->contractions= NULL; + /* Now process contractions */ + if (ncontractions) + { + uint size= 0x40*0x40*sizeof(uint16); /* 8K, for basic latin letter only */ + if (!(cs->contractions= (uint16*) alloc(size))) + return 1; + bzero((void*)cs->contractions, size); + for (i=0; i < rc; i++) + { + if (rule[i].curr[1]) + { + uint pageb= (rule[i].base >> 8) & 0xFF; + uint chb= rule[i].base & 0xFF; + uint16 *offsb= defweights[pageb] + chb*deflengths[pageb]; + uint offsc; + + if (offsb[1] || + rule[i].curr[0] < 0x40 || rule[i].curr[0] > 0x7f || + rule[i].curr[1] < 0x40 || rule[i].curr[1] > 0x7f) + { + /* + TODO: add error reporting; + We support only basic latin letters contractions at this point. + Also, We don't support contractions with weight longer than one. + Otherwise, we'd need much more memory. + */ + return 1; + } + offsc= (rule[i].curr[0]-0x40)*0x40+(rule[i].curr[1]-0x40); + + /* Copy base weight applying primary difference */ + cs->contractions[offsc]= offsb[0] + rule[i].diff[0]; + } + } + } return 0; } @@ -7615,6 +7724,7 @@ CHARSET_INFO my_charset_ucs2_general_uca= NULL, /* to_lower */ NULL, /* to_upper */ uca_length, /* sort_order */ + NULL, /* contractions */ uca_weight, /* sort_order_big*/ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ @@ -7642,6 +7752,7 @@ CHARSET_INFO my_charset_ucs2_icelandic_uca_ci= NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ + NULL, /* contractions */ NULL, /* sort_order_big*/ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ @@ -7668,6 +7779,7 @@ CHARSET_INFO my_charset_ucs2_latvian_uca_ci= NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ + NULL, /* contractions */ NULL, /* sort_order_big*/ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ @@ -7694,6 +7806,7 @@ CHARSET_INFO my_charset_ucs2_romanian_uca_ci= NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ + NULL, /* contractions */ NULL, /* sort_order_big*/ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ @@ -7720,6 +7833,7 @@ CHARSET_INFO my_charset_ucs2_slovenian_uca_ci= NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ + NULL, /* contractions */ NULL, /* sort_order_big*/ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ @@ -7746,6 +7860,7 @@ CHARSET_INFO my_charset_ucs2_polish_uca_ci= NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ + NULL, /* contractions */ NULL, /* sort_order_big*/ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ @@ -7772,6 +7887,7 @@ CHARSET_INFO my_charset_ucs2_estonian_uca_ci= NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ + NULL, /* contractions */ NULL, /* sort_order_big*/ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ @@ -7798,6 +7914,7 @@ CHARSET_INFO my_charset_ucs2_spanish_uca_ci= NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ + NULL, /* contractions */ NULL, /* sort_order_big*/ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ @@ -7824,6 +7941,7 @@ CHARSET_INFO my_charset_ucs2_swedish_uca_ci= NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ + NULL, /* contractions */ NULL, /* sort_order_big*/ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ @@ -7850,6 +7968,143 @@ CHARSET_INFO my_charset_ucs2_turkish_uca_ci= NULL, /* to_lower */ NULL, /* to_upper */ NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 2, /* mbminlen */ + 2, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + &my_charset_ucs2_handler, + &my_collation_ucs2_uca_handler +}; + +CHARSET_INFO my_charset_ucs2_czech_uca_ci= +{ + 137,0,0, /* number */ + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + "ucs2", /* cs name */ + "ucs2_czech_ci", /* name */ + "", /* comment */ + czech, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 2, /* mbminlen */ + 2, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + &my_charset_ucs2_handler, + &my_collation_ucs2_uca_handler +}; + + +CHARSET_INFO my_charset_ucs2_danish_uca_ci= +{ + 138,0,0, /* number */ + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + "ucs2", /* cs name */ + "ucs2_danish_ci", /* name */ + "", /* comment */ + danish, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 2, /* mbminlen */ + 2, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + &my_charset_ucs2_handler, + &my_collation_ucs2_uca_handler +}; + +CHARSET_INFO my_charset_ucs2_lithuanian_uca_ci= +{ + 139,0,0, /* number */ + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + "ucs2", /* cs name */ + "ucs2_lithuanian_ci",/* name */ + "", /* comment */ + lithuanian, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 2, /* mbminlen */ + 2, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + &my_charset_ucs2_handler, + &my_collation_ucs2_uca_handler +}; + +CHARSET_INFO my_charset_ucs2_slovak_uca_ci= +{ + 140,0,0, /* number */ + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + "ucs2", /* cs name */ + "ucs2_slovak_ci", /* name */ + "", /* comment */ + lithuanian, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 2, /* mbminlen */ + 2, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + &my_charset_ucs2_handler, + &my_collation_ucs2_uca_handler +}; + +CHARSET_INFO my_charset_ucs2_spanish2_uca_ci= +{ + 141,0,0, /* number */ + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + "ucs2", /* cs name */ + "ucs2_spanish2_ci", /* name */ + "", /* comment */ + spanish2, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ NULL, /* sort_order_big*/ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c index 58df303a79f..20a5ff58d3a 100644 --- a/strings/ctype-ucs2.c +++ b/strings/ctype-ucs2.c @@ -1454,9 +1454,10 @@ CHARSET_INFO my_charset_ucs2_general_ci= to_lower_ucs2, /* to_lower */ to_upper_ucs2, /* to_upper */ to_upper_ucs2, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ - NULL, /* sort_order_big*/ NULL, /* state_map */ NULL, /* ident_map */ 1, /* strxfrm_multiply */ @@ -1480,6 +1481,7 @@ CHARSET_INFO my_charset_ucs2_bin= to_lower_ucs2, /* to_lower */ to_upper_ucs2, /* to_upper */ to_upper_ucs2, /* sort_order */ + NULL, /* contractions */ NULL, /* sort_order_big*/ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ diff --git a/strings/ctype-ujis.c b/strings/ctype-ujis.c index 7f88a16cac6..3f53a07f527 100644 --- a/strings/ctype-ujis.c +++ b/strings/ctype-ujis.c @@ -8475,9 +8475,10 @@ CHARSET_INFO my_charset_ujis_japanese_ci= to_lower_ujis, to_upper_ujis, sort_order_ujis, + NULL, /* sort_order_big*/ + NULL, /* contractions */ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ - NULL, /* sort_order_big*/ NULL, /* state_map */ NULL, /* ident_map */ 1, /* strxfrm_multiply */ @@ -8502,9 +8503,10 @@ CHARSET_INFO my_charset_ujis_bin= to_lower_ujis, to_upper_ujis, sort_order_ujis, + NULL, /* contractions */ + NULL, /* sort_order_big*/ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ - NULL, /* sort_order_big*/ NULL, /* state_map */ NULL, /* ident_map */ 1, /* strxfrm_multiply */ diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index 58f684a0f16..dd496aa8fa2 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -2098,9 +2098,10 @@ CHARSET_INFO my_charset_utf8_general_ci= to_lower_utf8, /* to_lower */ to_upper_utf8, /* to_upper */ to_upper_utf8, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ - NULL, /* sort_order_big*/ NULL, /* state_map */ NULL, /* ident_map */ 1, /* strxfrm_multiply */ @@ -2125,9 +2126,10 @@ CHARSET_INFO my_charset_utf8_bin= to_lower_utf8, /* to_lower */ to_upper_utf8, /* to_upper */ to_upper_utf8, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ - NULL, /* sort_order_big*/ NULL, /* state_map */ NULL, /* ident_map */ 1, /* strxfrm_multiply */ @@ -2173,7 +2175,7 @@ int main() test_mb(cs,(uchar*)str); - pr1;2cintf("orig :'%s'\n",str); + printf("orig :'%s'\n",str); my_caseup_utf8(cs,str,15); printf("caseup :'%s'\n",str); diff --git a/strings/ctype-win1250ch.c b/strings/ctype-win1250ch.c index 83aaa6839e4..b4dbda3e8ed 100644 --- a/strings/ctype-win1250ch.c +++ b/strings/ctype-win1250ch.c @@ -634,6 +634,7 @@ CHARSET_INFO my_charset_cp1250_czech_ci = to_lower_win1250ch, to_upper_win1250ch, sort_order_win1250ch, + NULL, /* contractions */ NULL, /* sort_order_big*/ tab_cp1250_uni, /* tab_to_uni */ idx_uni_cp1250, /* tab_from_uni */ |