summaryrefslogtreecommitdiff
path: root/strings
diff options
context:
space:
mode:
authorunknown <bar@mysql.com>2004-06-12 20:36:58 +0500
committerunknown <bar@mysql.com>2004-06-12 20:36:58 +0500
commitf573ee08871ce04d43d7e309e3f3d5e540dd59a1 (patch)
tree2effa345d520890cd93cba2f06b5beae65ae07c2 /strings
parenta86e6f1788f95146e1bdb236d6d3b9ece7827ce5 (diff)
downloadmariadb-git-f573ee08871ce04d43d7e309e3f3d5e540dd59a1.tar.gz
Unicode collation algorithm: contraction support.
E.g. 'Ch' is treated as a separate letter in Czech, not as a combination of C+h.
Diffstat (limited to 'strings')
-rw-r--r--strings/ctype-big5.c3
-rw-r--r--strings/ctype-bin.c3
-rw-r--r--strings/ctype-czech.c1
-rw-r--r--strings/ctype-euc_kr.c2
-rw-r--r--strings/ctype-extra.c1
-rw-r--r--strings/ctype-gb2312.c6
-rw-r--r--strings/ctype-gbk.c6
-rw-r--r--strings/ctype-latin1.c3
-rw-r--r--strings/ctype-sjis.c6
-rw-r--r--strings/ctype-tis620.c6
-rw-r--r--strings/ctype-uca.c273
-rw-r--r--strings/ctype-ucs2.c4
-rw-r--r--strings/ctype-ujis.c6
-rw-r--r--strings/ctype-utf8.c8
-rw-r--r--strings/ctype-win1250ch.c1
15 files changed, 304 insertions, 25 deletions
diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c
index c84c1c1a2bf..ff53f61c053 100644
--- a/strings/ctype-big5.c
+++ b/strings/ctype-big5.c
@@ -6300,7 +6300,6 @@ static MY_CHARSET_HANDLER my_charset_big5_handler=
my_long10_to_str_8bit,
my_longlong10_to_str_8bit,
my_fill_8bit,
-
my_strntol_8bit,
my_strntoul_8bit,
my_strntoll_8bit,
@@ -6321,6 +6320,7 @@ CHARSET_INFO my_charset_big5_chinese_ci=
to_lower_big5,
to_upper_big5,
sort_order_big5,
+ NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
@@ -6348,6 +6348,7 @@ CHARSET_INFO my_charset_big5_bin=
to_lower_big5,
to_upper_big5,
sort_order_big5,
+ NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c
index 5c5e56290e0..cc83471f264 100644
--- a/strings/ctype-bin.c
+++ b/strings/ctype-bin.c
@@ -395,9 +395,10 @@ CHARSET_INFO my_charset_bin =
bin_char_array, /* to_lower */
bin_char_array, /* to_upper */
bin_char_array, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- NULL, /* sort_order_big*/
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
diff --git a/strings/ctype-czech.c b/strings/ctype-czech.c
index 08677d737ec..6f9e9f74d35 100644
--- a/strings/ctype-czech.c
+++ b/strings/ctype-czech.c
@@ -599,6 +599,7 @@ CHARSET_INFO my_charset_latin2_czech_ci =
to_lower_czech,
to_upper_czech,
sort_order_czech,
+ NULL, /* contractions */
NULL, /* sort_order_big*/
tab_8859_2_uni, /* tab_to_uni */
idx_uni_8859_2, /* tab_from_uni */
diff --git a/strings/ctype-euc_kr.c b/strings/ctype-euc_kr.c
index 1e6931244d2..fd8659a181c 100644
--- a/strings/ctype-euc_kr.c
+++ b/strings/ctype-euc_kr.c
@@ -8688,6 +8688,7 @@ CHARSET_INFO my_charset_euckr_korean_ci=
to_lower_euc_kr,
to_upper_euc_kr,
sort_order_euc_kr,
+ NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
@@ -8715,6 +8716,7 @@ CHARSET_INFO my_charset_euckr_bin=
to_lower_euc_kr,
to_upper_euc_kr,
sort_order_euc_kr,
+ NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
diff --git a/strings/ctype-extra.c b/strings/ctype-extra.c
index baf1d319b00..3672dcd0b33 100644
--- a/strings/ctype-extra.c
+++ b/strings/ctype-extra.c
@@ -29,6 +29,7 @@ CHARSET_INFO compiled_charsets[] = {
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
+ NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
diff --git a/strings/ctype-gb2312.c b/strings/ctype-gb2312.c
index 2c5aae83769..b9f61256717 100644
--- a/strings/ctype-gb2312.c
+++ b/strings/ctype-gb2312.c
@@ -5739,9 +5739,10 @@ CHARSET_INFO my_charset_gb2312_chinese_ci=
to_lower_gb2312,
to_upper_gb2312,
sort_order_gb2312,
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- NULL, /* sort_order_big*/
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@@ -5765,9 +5766,10 @@ CHARSET_INFO my_charset_gb2312_bin=
to_lower_gb2312,
to_upper_gb2312,
sort_order_gb2312,
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- NULL, /* sort_order_big*/
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c
index 2b31adc2f8a..2ef75e27d9a 100644
--- a/strings/ctype-gbk.c
+++ b/strings/ctype-gbk.c
@@ -9970,9 +9970,10 @@ CHARSET_INFO my_charset_gbk_chinese_ci=
to_lower_gbk,
to_upper_gbk,
sort_order_gbk,
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- NULL, /* sort_order_big*/
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@@ -9996,9 +9997,10 @@ CHARSET_INFO my_charset_gbk_bin=
to_lower_gbk,
to_upper_gbk,
sort_order_gbk,
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- NULL, /* sort_order_big*/
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c
index dd9ab4399fb..652794fa84d 100644
--- a/strings/ctype-latin1.c
+++ b/strings/ctype-latin1.c
@@ -418,6 +418,7 @@ CHARSET_INFO my_charset_latin1=
to_lower_latin1,
to_upper_latin1,
sort_order_latin1,
+ NULL, /* contractions */
NULL, /* sort_order_big*/
cs_to_uni, /* tab_to_uni */
NULL, /* tab_from_uni */
@@ -700,6 +701,7 @@ CHARSET_INFO my_charset_latin1_german2_ci=
to_lower_latin1,
to_upper_latin1,
sort_order_latin1_de,
+ NULL, /* contractions */
NULL, /* sort_order_big*/
cs_to_uni, /* tab_to_uni */
NULL, /* tab_from_uni */
@@ -727,6 +729,7 @@ CHARSET_INFO my_charset_latin1_bin=
to_lower_latin1,
to_upper_latin1,
sort_order_latin1_de,
+ NULL, /* contractions */
NULL, /* sort_order_big*/
cs_to_uni, /* tab_to_uni */
NULL, /* tab_from_uni */
diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c
index 54a0df26f09..5fd005f842e 100644
--- a/strings/ctype-sjis.c
+++ b/strings/ctype-sjis.c
@@ -4589,9 +4589,10 @@ CHARSET_INFO my_charset_sjis_japanese_ci=
to_lower_sjis,
to_upper_sjis,
sort_order_sjis,
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- NULL, /* sort_order_big*/
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@@ -4615,9 +4616,10 @@ CHARSET_INFO my_charset_sjis_bin=
to_lower_sjis,
to_upper_sjis,
sort_order_sjis,
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- NULL, /* sort_order_big*/
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
diff --git a/strings/ctype-tis620.c b/strings/ctype-tis620.c
index dae778a8328..c7d859a6ead 100644
--- a/strings/ctype-tis620.c
+++ b/strings/ctype-tis620.c
@@ -962,9 +962,10 @@ CHARSET_INFO my_charset_tis620_thai_ci=
to_lower_tis620,
to_upper_tis620,
sort_order_tis620,
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- NULL, /* sort_order_big*/
NULL, /* state_map */
NULL, /* ident_map */
4, /* strxfrm_multiply */
@@ -988,9 +989,10 @@ CHARSET_INFO my_charset_tis620_bin=
to_lower_tis620,
to_upper_tis620,
sort_order_tis620,
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- NULL, /* sort_order_big*/
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c
index 82ab3660111..7de373b3c8f 100644
--- a/strings/ctype-uca.c
+++ b/strings/ctype-uca.c
@@ -6620,6 +6620,41 @@ static const char turkish[]=
"& U < \\u00FC <<< \\u00DC ";
+static const char czech[]=
+ "& C < \\u010D <<< \\u010C "
+ "& H < ch <<< Ch <<< CH"
+ "& R < \\u0159 <<< \\u0158"
+ "& S < \\u0161 <<< \\u0160"
+ "& Z < \\u017E <<< \\u017D";
+
+static const char danish[]= /* Also good for Norwegian */
+ "& Y << \\u00FC <<< \\u00DC << \\u0171 <<< \\u0170"
+ "& Z < \\u00E6 <<< \\u00C6 << \\u00E4 <<< \\u00C4"
+ " < \\u00F8 <<< \\u00D8 << \\u00F6 <<< \\u00D6 << \\u0151 <<< \\u0150"
+ " < \\u00E5 <<< \\u00C5 << aa <<< Aa <<< AA";
+
+static const char lithuanian[]=
+ "& C << ch <<< Ch <<< CH< \\u010D <<< \\u010C"
+ "& E << \\u0119 <<< \\u0118 << \\u0117 <<< \\u0116"
+ "& I << y <<< Y"
+ "& S < \\u0161 <<< \\u0160"
+ "& Z < \\u017E <<< \\u017D";
+
+static const char slovak[]=
+ "& A < \\u00E4 <<< \\u00C4"
+ "& C < \\u010D <<< \\u010C"
+ "& H < ch <<< Ch <<< CH"
+ "& O < \\u00F4 <<< \\u00D4"
+ "& S < \\u0161 <<< \\u0160"
+ "& Z < \\u017E <<< \\017D";
+
+static const char spanish2[]= /* Also good for Asturian and Galician */
+ "&C < ch <<< Ch <<< CH"
+ "&L < ll <<< Ll <<< LL"
+ "&N < \\u00F1 <<< \\u00D1"
+ "&R << rr <<< Rr <<< RR";
+
+
/*
Unicode Collation Algorithm:
Collation element (weight) scanner,
@@ -6633,6 +6668,7 @@ typedef struct my_uca_scanner_st
const uchar *send; /* End of the input string */
uchar *uca_length;
uint16 **uca_weight;
+ uint16 *contractions;
uint16 implicit[2];
int page;
int code;
@@ -6666,6 +6702,7 @@ static void my_uca_scanner_init(my_uca_scanner *scanner,
scanner->wbeg= nochar;
scanner->uca_length= cs->sort_order;
scanner->uca_weight= cs->sort_order_big;
+ scanner->contractions= cs->contractions;
}
@@ -6732,6 +6769,22 @@ static int my_uca_scanner_next(my_uca_scanner *scanner)
scanner->code= (unsigned char)scanner->sbeg[1];
scanner->sbeg+= 2;
+ if (scanner->contractions && (scanner->sbeg <= scanner->send))
+ {
+ int cweight;
+
+ if (!scanner->page && !scanner->sbeg[0] &&
+ (scanner->sbeg[1] > 0x40) && (scanner->sbeg[1] < 0x80) &&
+ (scanner->code > 0x40) && (scanner->code < 0x80) &&
+ (cweight= scanner->contractions[(scanner->code-0x40)*0x40+scanner->sbeg[1]-0x40]))
+ {
+ scanner->implicit[0]= 0;
+ scanner->wbeg= scanner->implicit;
+ scanner->sbeg+=2;
+ return cweight;
+ }
+ }
+
if (!ucaw[scanner->page])
goto implicit;
scanner->wbeg= ucaw[scanner->page] + scanner->code * ucal[scanner->page];
@@ -7354,7 +7407,7 @@ static my_coll_lexem_num my_coll_lexem_next(MY_COLL_LEXEM *lexem)
typedef struct my_coll_rule_item_st
{
uint base; /* Base character */
- uint curr; /* Current character */
+ uint curr[2]; /* Current character */
int diff[3]; /* Primary, Secondary and Tertiary difference */
} MY_COLL_RULE;
@@ -7436,7 +7489,18 @@ static int my_coll_rule_parse(MY_COLL_RULE *rule, size_t mitems,
}
else if (prevlexnum == MY_COLL_LEXEM_DIFF)
{
- item.curr= lexem.code;
+ MY_COLL_LEXEM savlex;
+ savlex= lexem;
+ item.curr[0]= lexem.code;
+ if ((lexnum= my_coll_lexem_next(&lexem)) == MY_COLL_LEXEM_CHAR)
+ {
+ item.curr[1]= lexem.code;
+ }
+ else
+ {
+ item.curr[1]= 0;
+ lexem=savlex; /* Restore previous parser state */
+ }
if (lexem.diff == 3)
{
item.diff[2]++;
@@ -7499,7 +7563,8 @@ static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(uint))
const uchar *deflengths= my_charset_ucs2_general_uca.sort_order;
uint16 **defweights= my_charset_ucs2_general_uca.sort_order_big;
int rc, i;
-
+ int ncontractions= 0;
+
if (!cs->tailoring)
return 1;
@@ -7531,19 +7596,27 @@ static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(uint))
*/
for (i=0; i < rc; i++)
{
- uint pageb= (rule[i].base >> 8) & 0xFF;
- uint pagec= (rule[i].curr >> 8) & 0xFF;
+ if (!rule[i].curr[1]) /* If not a contraction */
+ {
+ uint pageb= (rule[i].base >> 8) & 0xFF;
+ uint pagec= (rule[i].curr[0] >> 8) & 0xFF;
- if (newlengths[pagec] < deflengths[pageb])
- newlengths[pagec]= deflengths[pageb];
+ if (newlengths[pagec] < deflengths[pageb])
+ newlengths[pagec]= deflengths[pageb];
+ }
+ else
+ ncontractions++;
}
for (i=0; i < rc; i++)
{
uint pageb= (rule[i].base >> 8) & 0xFF;
- uint pagec= (rule[i].curr >> 8) & 0xFF;
+ uint pagec= (rule[i].curr[0] >> 8) & 0xFF;
uint chb, chc;
+ if (rule[i].curr[1]) /* Skip contraction */
+ continue;
+
if (!newweights[pagec])
{
/* Alloc new page and copy the default UCA weights */
@@ -7565,7 +7638,7 @@ static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(uint))
Aply the alternative rule:
shift to the base character and primary difference.
*/
- chc= rule[i].curr & 0xFF;
+ chc= rule[i].curr[0] & 0xFF;
chb= rule[i].base & 0xFF;
memcpy(newweights[pagec] + chc*newlengths[pagec],
defweights[pageb] + chb*deflengths[pageb],
@@ -7581,7 +7654,43 @@ static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(uint))
cs->sort_order= newlengths;
cs->sort_order_big= newweights;
+ cs->contractions= NULL;
+ /* Now process contractions */
+ if (ncontractions)
+ {
+ uint size= 0x40*0x40*sizeof(uint16); /* 8K, for basic latin letter only */
+ if (!(cs->contractions= (uint16*) alloc(size)))
+ return 1;
+ bzero((void*)cs->contractions, size);
+ for (i=0; i < rc; i++)
+ {
+ if (rule[i].curr[1])
+ {
+ uint pageb= (rule[i].base >> 8) & 0xFF;
+ uint chb= rule[i].base & 0xFF;
+ uint16 *offsb= defweights[pageb] + chb*deflengths[pageb];
+ uint offsc;
+
+ if (offsb[1] ||
+ rule[i].curr[0] < 0x40 || rule[i].curr[0] > 0x7f ||
+ rule[i].curr[1] < 0x40 || rule[i].curr[1] > 0x7f)
+ {
+ /*
+ TODO: add error reporting;
+ We support only basic latin letters contractions at this point.
+ Also, We don't support contractions with weight longer than one.
+ Otherwise, we'd need much more memory.
+ */
+ return 1;
+ }
+ offsc= (rule[i].curr[0]-0x40)*0x40+(rule[i].curr[1]-0x40);
+
+ /* Copy base weight applying primary difference */
+ cs->contractions[offsc]= offsb[0] + rule[i].diff[0];
+ }
+ }
+ }
return 0;
}
@@ -7615,6 +7724,7 @@ CHARSET_INFO my_charset_ucs2_general_uca=
NULL, /* to_lower */
NULL, /* to_upper */
uca_length, /* sort_order */
+ NULL, /* contractions */
uca_weight, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
@@ -7642,6 +7752,7 @@ CHARSET_INFO my_charset_ucs2_icelandic_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
+ NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
@@ -7668,6 +7779,7 @@ CHARSET_INFO my_charset_ucs2_latvian_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
+ NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
@@ -7694,6 +7806,7 @@ CHARSET_INFO my_charset_ucs2_romanian_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
+ NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
@@ -7720,6 +7833,7 @@ CHARSET_INFO my_charset_ucs2_slovenian_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
+ NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
@@ -7746,6 +7860,7 @@ CHARSET_INFO my_charset_ucs2_polish_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
+ NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
@@ -7772,6 +7887,7 @@ CHARSET_INFO my_charset_ucs2_estonian_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
+ NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
@@ -7798,6 +7914,7 @@ CHARSET_INFO my_charset_ucs2_spanish_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
+ NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
@@ -7824,6 +7941,7 @@ CHARSET_INFO my_charset_ucs2_swedish_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
+ NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
@@ -7850,6 +7968,143 @@ CHARSET_INFO my_charset_ucs2_turkish_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 2, /* mbminlen */
+ 2, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ &my_charset_ucs2_handler,
+ &my_collation_ucs2_uca_handler
+};
+
+CHARSET_INFO my_charset_ucs2_czech_uca_ci=
+{
+ 137,0,0, /* number */
+ MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+ "ucs2", /* cs name */
+ "ucs2_czech_ci", /* name */
+ "", /* comment */
+ czech, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 2, /* mbminlen */
+ 2, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ &my_charset_ucs2_handler,
+ &my_collation_ucs2_uca_handler
+};
+
+
+CHARSET_INFO my_charset_ucs2_danish_uca_ci=
+{
+ 138,0,0, /* number */
+ MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+ "ucs2", /* cs name */
+ "ucs2_danish_ci", /* name */
+ "", /* comment */
+ danish, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 2, /* mbminlen */
+ 2, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ &my_charset_ucs2_handler,
+ &my_collation_ucs2_uca_handler
+};
+
+CHARSET_INFO my_charset_ucs2_lithuanian_uca_ci=
+{
+ 139,0,0, /* number */
+ MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+ "ucs2", /* cs name */
+ "ucs2_lithuanian_ci",/* name */
+ "", /* comment */
+ lithuanian, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 2, /* mbminlen */
+ 2, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ &my_charset_ucs2_handler,
+ &my_collation_ucs2_uca_handler
+};
+
+CHARSET_INFO my_charset_ucs2_slovak_uca_ci=
+{
+ 140,0,0, /* number */
+ MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+ "ucs2", /* cs name */
+ "ucs2_slovak_ci", /* name */
+ "", /* comment */
+ lithuanian, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 2, /* mbminlen */
+ 2, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ &my_charset_ucs2_handler,
+ &my_collation_ucs2_uca_handler
+};
+
+CHARSET_INFO my_charset_ucs2_spanish2_uca_ci=
+{
+ 141,0,0, /* number */
+ MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+ "ucs2", /* cs name */
+ "ucs2_spanish2_ci", /* name */
+ "", /* comment */
+ spanish2, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index 58df303a79f..20a5ff58d3a 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -1454,9 +1454,10 @@ CHARSET_INFO my_charset_ucs2_general_ci=
to_lower_ucs2, /* to_lower */
to_upper_ucs2, /* to_upper */
to_upper_ucs2, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- NULL, /* sort_order_big*/
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@@ -1480,6 +1481,7 @@ CHARSET_INFO my_charset_ucs2_bin=
to_lower_ucs2, /* to_lower */
to_upper_ucs2, /* to_upper */
to_upper_ucs2, /* sort_order */
+ NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
diff --git a/strings/ctype-ujis.c b/strings/ctype-ujis.c
index 7f88a16cac6..3f53a07f527 100644
--- a/strings/ctype-ujis.c
+++ b/strings/ctype-ujis.c
@@ -8475,9 +8475,10 @@ CHARSET_INFO my_charset_ujis_japanese_ci=
to_lower_ujis,
to_upper_ujis,
sort_order_ujis,
+ NULL, /* sort_order_big*/
+ NULL, /* contractions */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- NULL, /* sort_order_big*/
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@@ -8502,9 +8503,10 @@ CHARSET_INFO my_charset_ujis_bin=
to_lower_ujis,
to_upper_ujis,
sort_order_ujis,
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- NULL, /* sort_order_big*/
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index 58f684a0f16..dd496aa8fa2 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -2098,9 +2098,10 @@ CHARSET_INFO my_charset_utf8_general_ci=
to_lower_utf8, /* to_lower */
to_upper_utf8, /* to_upper */
to_upper_utf8, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- NULL, /* sort_order_big*/
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@@ -2125,9 +2126,10 @@ CHARSET_INFO my_charset_utf8_bin=
to_lower_utf8, /* to_lower */
to_upper_utf8, /* to_upper */
to_upper_utf8, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- NULL, /* sort_order_big*/
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@@ -2173,7 +2175,7 @@ int main()
test_mb(cs,(uchar*)str);
- pr1;2cintf("orig :'%s'\n",str);
+ printf("orig :'%s'\n",str);
my_caseup_utf8(cs,str,15);
printf("caseup :'%s'\n",str);
diff --git a/strings/ctype-win1250ch.c b/strings/ctype-win1250ch.c
index 83aaa6839e4..b4dbda3e8ed 100644
--- a/strings/ctype-win1250ch.c
+++ b/strings/ctype-win1250ch.c
@@ -634,6 +634,7 @@ CHARSET_INFO my_charset_cp1250_czech_ci =
to_lower_win1250ch,
to_upper_win1250ch,
sort_order_win1250ch,
+ NULL, /* contractions */
NULL, /* sort_order_big*/
tab_cp1250_uni, /* tab_to_uni */
idx_uni_cp1250, /* tab_from_uni */