diff options
author | unknown <bar@bar.intranet.mysql.r18.ru> | 2004-05-25 17:40:20 +0500 |
---|---|---|
committer | unknown <bar@bar.intranet.mysql.r18.ru> | 2004-05-25 17:40:20 +0500 |
commit | 8ee5b216a15dcd5fb81d8e7d231185c6fd1ac65c (patch) | |
tree | a9a4fa11270cbdaa126493450d0bb6e8552335ed /strings | |
parent | 6c09db274773a36350b8a3f794a847684bd0ca30 (diff) | |
download | mariadb-git-8ee5b216a15dcd5fb81d8e7d231185c6fd1ac65c.tar.gz |
Preparation for user-defined Unicode collations:
weights data now comes from a static variables
but from the charset structure.
Diffstat (limited to 'strings')
-rw-r--r-- | strings/ctype-big5.c | 2 | ||||
-rw-r--r-- | strings/ctype-bin.c | 4 | ||||
-rw-r--r-- | strings/ctype-czech.c | 1 | ||||
-rw-r--r-- | strings/ctype-euc_kr.c | 2 | ||||
-rw-r--r-- | strings/ctype-extra.c | 1 | ||||
-rw-r--r-- | strings/ctype-gb2312.c | 2 | ||||
-rw-r--r-- | strings/ctype-gbk.c | 2 | ||||
-rw-r--r-- | strings/ctype-latin1.c | 3 | ||||
-rw-r--r-- | strings/ctype-sjis.c | 2 | ||||
-rw-r--r-- | strings/ctype-tis620.c | 2 | ||||
-rw-r--r-- | strings/ctype-uca.c | 45 | ||||
-rw-r--r-- | strings/ctype-ucs2.c | 28 | ||||
-rw-r--r-- | strings/ctype-ujis.c | 8 | ||||
-rw-r--r-- | strings/ctype-utf8.c | 2 | ||||
-rw-r--r-- | strings/ctype-win1250ch.c | 1 |
15 files changed, 72 insertions, 33 deletions
diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c index ccb8d07e786..2071759ddae 100644 --- a/strings/ctype-big5.c +++ b/strings/ctype-big5.c @@ -6317,6 +6317,7 @@ CHARSET_INFO my_charset_big5_chinese_ci= to_lower_big5, to_upper_big5, sort_order_big5, + NULL, /* sort_order_big*/ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ "", @@ -6342,6 +6343,7 @@ CHARSET_INFO my_charset_big5_bin= to_lower_big5, to_upper_big5, sort_order_big5, + NULL, /* sort_order_big*/ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ "", diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c index ea0a471ef74..7cac8c7c337 100644 --- a/strings/ctype-bin.c +++ b/strings/ctype-bin.c @@ -384,7 +384,9 @@ CHARSET_INFO my_charset_bin = bin_char_array, /* sort_order */ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ - "","", + NULL, /* sort_order_big*/ + "", + "", 1, /* strxfrm_multiply */ 1, /* mbminlen */ 1, /* mbmaxlen */ diff --git a/strings/ctype-czech.c b/strings/ctype-czech.c index f4350f87153..2eb2fac46e9 100644 --- a/strings/ctype-czech.c +++ b/strings/ctype-czech.c @@ -593,6 +593,7 @@ CHARSET_INFO my_charset_latin2_czech_ci = to_lower_czech, to_upper_czech, sort_order_czech, + NULL, /* sort_order_big*/ tab_8859_2_uni, /* tab_to_uni */ idx_uni_8859_2, /* tab_from_uni */ "","", diff --git a/strings/ctype-euc_kr.c b/strings/ctype-euc_kr.c index 7f89b8152e8..8f955c15a73 100644 --- a/strings/ctype-euc_kr.c +++ b/strings/ctype-euc_kr.c @@ -8685,6 +8685,7 @@ CHARSET_INFO my_charset_euckr_korean_ci= to_lower_euc_kr, to_upper_euc_kr, sort_order_euc_kr, + NULL, /* sort_order_big*/ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ "", @@ -8710,6 +8711,7 @@ CHARSET_INFO my_charset_euckr_bin= to_lower_euc_kr, to_upper_euc_kr, sort_order_euc_kr, + NULL, /* sort_order_big*/ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ "", diff --git a/strings/ctype-extra.c b/strings/ctype-extra.c index 55dab3ca2a8..51a9531fbf5 100644 --- a/strings/ctype-extra.c +++ b/strings/ctype-extra.c @@ -28,6 +28,7 @@ CHARSET_INFO compiled_charsets[] = { NULL, NULL, NULL, + NULL, /* sort_order_big*/ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ "","", diff --git a/strings/ctype-gb2312.c b/strings/ctype-gb2312.c index e5dfaf45276..b76511fc4f3 100644 --- a/strings/ctype-gb2312.c +++ b/strings/ctype-gb2312.c @@ -5738,6 +5738,7 @@ CHARSET_INFO my_charset_gb2312_chinese_ci= sort_order_gb2312, NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ + NULL, /* sort_order_big*/ "", "", 1, /* strxfrm_multiply */ @@ -5762,6 +5763,7 @@ CHARSET_INFO my_charset_gb2312_bin= sort_order_gb2312, NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ + NULL, /* sort_order_big*/ "", "", 1, /* strxfrm_multiply */ diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c index 577f8a33275..cc0f226d01c 100644 --- a/strings/ctype-gbk.c +++ b/strings/ctype-gbk.c @@ -9968,6 +9968,7 @@ CHARSET_INFO my_charset_gbk_chinese_ci= sort_order_gbk, NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ + NULL, /* sort_order_big*/ "", "", 1, /* strxfrm_multiply */ @@ -9992,6 +9993,7 @@ CHARSET_INFO my_charset_gbk_bin= sort_order_gbk, NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ + NULL, /* sort_order_big*/ "", "", 1, /* strxfrm_multiply */ diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c index 520fec676b1..0b439964c7c 100644 --- a/strings/ctype-latin1.c +++ b/strings/ctype-latin1.c @@ -416,6 +416,7 @@ CHARSET_INFO my_charset_latin1= to_lower_latin1, to_upper_latin1, sort_order_latin1, + NULL, /* sort_order_big*/ cs_to_uni, /* tab_to_uni */ NULL, /* tab_from_uni */ "","", @@ -693,6 +694,7 @@ CHARSET_INFO my_charset_latin1_german2_ci= to_lower_latin1, to_upper_latin1, sort_order_latin1_de, + NULL, /* sort_order_big*/ cs_to_uni, /* tab_to_uni */ NULL, /* tab_from_uni */ "","", @@ -717,6 +719,7 @@ CHARSET_INFO my_charset_latin1_bin= to_lower_latin1, to_upper_latin1, sort_order_latin1_de, + NULL, /* sort_order_big*/ cs_to_uni, /* tab_to_uni */ NULL, /* tab_from_uni */ "", diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c index 5f413305c88..72666175a1f 100644 --- a/strings/ctype-sjis.c +++ b/strings/ctype-sjis.c @@ -4585,6 +4585,7 @@ CHARSET_INFO my_charset_sjis_japanese_ci= sort_order_sjis, NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ + NULL, /* sort_order_big*/ "", "", 1, /* strxfrm_multiply */ @@ -4609,6 +4610,7 @@ CHARSET_INFO my_charset_sjis_bin= sort_order_sjis, NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ + NULL, /* sort_order_big*/ "", "", 1, /* strxfrm_multiply */ diff --git a/strings/ctype-tis620.c b/strings/ctype-tis620.c index aecb4f9753c..1b6b1edc8b9 100644 --- a/strings/ctype-tis620.c +++ b/strings/ctype-tis620.c @@ -957,6 +957,7 @@ CHARSET_INFO my_charset_tis620_thai_ci= sort_order_tis620, NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ + NULL, /* sort_order_big*/ "", "", 4, /* strxfrm_multiply */ @@ -981,6 +982,7 @@ CHARSET_INFO my_charset_tis620_bin= sort_order_tis620, NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ + NULL, /* sort_order_big*/ "", "", 1, /* strxfrm_multiply */ diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c index 72c28d92ca8..81073d47554 100644 --- a/strings/ctype-uca.c +++ b/strings/ctype-uca.c @@ -6436,7 +6436,7 @@ uint16 page0FFdata[]= { /* FF00 (3 weights per char) */ 0x0DC5,0x0000,0x0000, 0x0DC6,0x0000,0x0000, 0xFBC1,0xFFFE,0x0000, 0xFBC1,0xFFFF,0x0000 }; -uchar ucal[256]={ +uchar uca_length[256]={ 4,3,3,4,3,3,3,3,0,3,3,3,3,3,3,3, 3,3,3,3,3,2,3,3,3,3,0,0,0,3,3,3, 5,5,4,3,5,2,3,3,2,2,5,3,0,0,3,3, @@ -6454,7 +6454,7 @@ uchar ucal[256]={ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,3,3,4,3,9,3,3 }; -uint16 *ucaw[256]={ +uint16 *uca_weight[256]={ page000data,page001data,page002data,page003data, page004data,page005data,page006data,page007data, NULL ,page009data,page00Adata,page00Bdata, @@ -6533,6 +6533,8 @@ typedef struct my_uca_scanner_st const uint16 *wbeg; /* Beginning of the current weight string */ const uchar *sbeg; /* Beginning of the input string */ const uchar *send; /* End of the input string */ + uchar *uca_length; + uint16 **uca_weight; uint16 implicit[2]; int page; int code; @@ -6564,6 +6566,8 @@ static void my_uca_scanner_init(my_uca_scanner *scanner, scanner->sbeg= str; scanner->send= str + length - 2; scanner->wbeg= nochar; + scanner->uca_length= cs->sort_order; + scanner->uca_weight= cs->sort_order_big; } @@ -6620,6 +6624,9 @@ static int my_uca_scanner_next(my_uca_scanner *scanner) do { + uint16 **ucaw= scanner->uca_weight; + uchar *ucal= scanner->uca_length; + if (scanner->sbeg > scanner->send) return -1; @@ -6866,10 +6873,12 @@ static int my_strnxfrm_uca(CHARSET_INFO *cs, This fact allows us to use memcmp() safely, on both little-endian and big-endian machines. */ -static int my_uca_charcmp(my_wc_t wc1, my_wc_t wc2) +static int my_uca_charcmp(CHARSET_INFO *cs, my_wc_t wc1, my_wc_t wc2) { size_t page1= wc1 >> MY_UCA_PSHIFT; size_t page2= wc2 >> MY_UCA_PSHIFT; + uchar *ucal= cs->sort_order; + uint16 **ucaw= cs->sort_order_big; size_t length1= ucal[page1]; size_t length2= ucal[page2]; uint16 *weight1= ucaw[page1] + (wc1 & MY_UCA_CMASK) * ucal[page1]; @@ -6943,7 +6952,7 @@ int my_wildcmp_uca(CHARSET_INFO *cs, } else { - if (my_uca_charcmp(s_wc,w_wc)) + if (my_uca_charcmp(cs,s_wc,w_wc)) return 1; } if (wildstr == wildend) @@ -7006,7 +7015,7 @@ int my_wildcmp_uca(CHARSET_INFO *cs, (const uchar*)str_end)) <= 0) return 1; - if (!my_uca_charcmp(s_wc,w_wc)) + if (!my_uca_charcmp(cs,s_wc,w_wc)) break; str+= scan; } @@ -7039,4 +7048,30 @@ MY_COLLATION_HANDLER my_collation_ucs2_uca_handler = my_hash_sort_uca }; +CHARSET_INFO my_charset_ucs2_general_uca= +{ + 45,0,0, /* number */ + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONTEXT, + "ucs2", /* cs name */ + "ucs2_general_uca", /* name */ + "", /* comment */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + uca_length, /* sort_order */ + uca_weight, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + "", + "", + 8, /* strxfrm_multiply */ + 2, /* mbminlen */ + 2, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + &my_charset_ucs2_handler, + &my_collation_ucs2_uca_handler +}; + + #endif diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c index 59c1706fd26..67340fdd4f4 100644 --- a/strings/ctype-ucs2.c +++ b/strings/ctype-ucs2.c @@ -1397,7 +1397,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler = }; -static MY_CHARSET_HANDLER my_charset_ucs2_handler= +MY_CHARSET_HANDLER my_charset_ucs2_handler= { my_ismbchar_ucs2, /* ismbchar */ my_mbcharlen_ucs2, /* mbcharlen */ @@ -1437,6 +1437,7 @@ CHARSET_INFO my_charset_ucs2_general_ci= to_upper_ucs2, /* sort_order */ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ + NULL, /* sort_order_big*/ "", "", 1, /* strxfrm_multiply */ @@ -1448,30 +1449,6 @@ CHARSET_INFO my_charset_ucs2_general_ci= &my_collation_ucs2_general_ci_handler }; -CHARSET_INFO my_charset_ucs2_general_uca= -{ - 45,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONTEXT, - "ucs2", /* cs name */ - "ucs2_general_uca", /* name */ - "", /* comment */ - ctype_ucs2, /* ctype */ - to_lower_ucs2, /* to_lower */ - to_upper_ucs2, /* to_upper */ - to_upper_ucs2, /* sort_order */ - NULL, /* tab_to_uni */ - NULL, /* tab_from_uni */ - "", - "", - 8, /* strxfrm_multiply */ - 2, /* mbminlen */ - 2, /* mbmaxlen */ - 9, /* min_sort_char */ - 0xFFFF, /* max_sort_char */ - &my_charset_ucs2_handler, - &my_collation_ucs2_uca_handler -}; - CHARSET_INFO my_charset_ucs2_bin= { 90,0,0, /* number */ @@ -1483,6 +1460,7 @@ CHARSET_INFO my_charset_ucs2_bin= to_lower_ucs2, /* to_lower */ to_upper_ucs2, /* to_upper */ to_upper_ucs2, /* sort_order */ + NULL, /* sort_order_big*/ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ "", diff --git a/strings/ctype-ujis.c b/strings/ctype-ujis.c index 51b7cd4f7d7..fd3692553be 100644 --- a/strings/ctype-ujis.c +++ b/strings/ctype-ujis.c @@ -8474,7 +8474,9 @@ CHARSET_INFO my_charset_ujis_japanese_ci= sort_order_ujis, NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ - "","", + NULL, /* sort_order_big*/ + "", + "", 1, /* strxfrm_multiply */ 1, /* mbminlen */ 3, /* mbmaxlen */ @@ -8498,7 +8500,9 @@ CHARSET_INFO my_charset_ujis_bin= sort_order_ujis, NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ - "","", + NULL, /* sort_order_big*/ + "", + "", 1, /* strxfrm_multiply */ 1, /* mbminlen */ 3, /* mbmaxlen */ diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index 82787f2b65f..29d2c5d1358 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -2090,6 +2090,7 @@ CHARSET_INFO my_charset_utf8_general_ci= to_upper_utf8, /* sort_order */ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ + NULL, /* sort_order_big*/ "", "", 1, /* strxfrm_multiply */ @@ -2115,6 +2116,7 @@ CHARSET_INFO my_charset_utf8_bin= to_upper_utf8, /* sort_order */ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ + NULL, /* sort_order_big*/ "", "", 1, /* strxfrm_multiply */ diff --git a/strings/ctype-win1250ch.c b/strings/ctype-win1250ch.c index ed6bb877dc8..2eefb570170 100644 --- a/strings/ctype-win1250ch.c +++ b/strings/ctype-win1250ch.c @@ -627,6 +627,7 @@ CHARSET_INFO my_charset_cp1250_czech_ci = to_lower_win1250ch, to_upper_win1250ch, sort_order_win1250ch, + NULL, /* sort_order_big*/ tab_cp1250_uni, /* tab_to_uni */ idx_uni_cp1250, /* tab_from_uni */ "","", |