summaryrefslogtreecommitdiff
path: root/strings
diff options
context:
space:
mode:
authorunknown <bar@mysql.com>2005-01-26 16:34:09 +0400
committerunknown <bar@mysql.com>2005-01-26 16:34:09 +0400
commit9bb464487d6c1c0359dba9aa79e88f927cd24c93 (patch)
tree91906f2e462ed709b34b9973ff6afcb3ec9072a9 /strings
parente8ae338356d203942944e6bf4d4d40489b1a7625 (diff)
downloadmariadb-git-9bb464487d6c1c0359dba9aa79e88f927cd24c93.tar.gz
CSC#4385: slow sorting for UTF8 large table:
my_strnxfrm_utf8 now requires 2 bytes per character in filesort key, instead of 3 bytes per character. Shorter filesort keys make sorting faster.
Diffstat (limited to 'strings')
-rw-r--r--strings/ctype-big5.c1
-rw-r--r--strings/ctype-bin.c2
-rw-r--r--strings/ctype-cp932.c1
-rw-r--r--strings/ctype-czech.c1
-rw-r--r--strings/ctype-euc_kr.c1
-rw-r--r--strings/ctype-eucjpms.c1
-rw-r--r--strings/ctype-gb2312.c1
-rw-r--r--strings/ctype-gbk.c1
-rw-r--r--strings/ctype-latin1.c1
-rw-r--r--strings/ctype-mb.c1
-rw-r--r--strings/ctype-simple.c10
-rw-r--r--strings/ctype-sjis.c1
-rw-r--r--strings/ctype-tis620.c1
-rw-r--r--strings/ctype-uca.c2
-rw-r--r--strings/ctype-ucs2.c2
-rw-r--r--strings/ctype-ujis.c1
-rw-r--r--strings/ctype-utf8.c37
-rw-r--r--strings/ctype-win1250ch.c1
18 files changed, 53 insertions, 13 deletions
diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c
index a2db7de244e..70c5ec633be 100644
--- a/strings/ctype-big5.c
+++ b/strings/ctype-big5.c
@@ -6293,6 +6293,7 @@ static MY_COLLATION_HANDLER my_collation_big5_chinese_ci_handler =
my_strnncoll_big5,
my_strnncollsp_big5,
my_strnxfrm_big5,
+ my_strnxfrmlen_simple,
my_like_range_big5,
my_wildcmp_mb,
my_strcasecmp_mb,
diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c
index 401605a462f..50c66a63e97 100644
--- a/strings/ctype-bin.c
+++ b/strings/ctype-bin.c
@@ -447,6 +447,7 @@ MY_COLLATION_HANDLER my_collation_8bit_bin_handler =
my_strnncoll_8bit_bin,
my_strnncollsp_8bit_bin,
my_strnxfrm_8bit_bin,
+ my_strnxfrmlen_simple,
my_like_range_simple,
my_wildcmp_bin,
my_strcasecmp_bin,
@@ -461,6 +462,7 @@ static MY_COLLATION_HANDLER my_collation_binary_handler =
my_strnncoll_binary,
my_strnncollsp_binary,
my_strnxfrm_bin,
+ my_strnxfrmlen_simple,
my_like_range_simple,
my_wildcmp_bin,
my_strcasecmp_bin,
diff --git a/strings/ctype-cp932.c b/strings/ctype-cp932.c
index 804f87b2a5b..c47f2c2d8ce 100644
--- a/strings/ctype-cp932.c
+++ b/strings/ctype-cp932.c
@@ -5454,6 +5454,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
my_strnncoll_cp932,
my_strnncollsp_cp932,
my_strnxfrm_cp932,
+ my_strnxfrmlen_simple,
my_like_range_cp932,
my_wildcmp_mb, /* wildcmp */
my_strcasecmp_8bit,
diff --git a/strings/ctype-czech.c b/strings/ctype-czech.c
index 2834dbb28ff..f5a410afc50 100644
--- a/strings/ctype-czech.c
+++ b/strings/ctype-czech.c
@@ -593,6 +593,7 @@ static MY_COLLATION_HANDLER my_collation_latin2_czech_ci_handler =
my_strnncoll_czech,
my_strnncollsp_czech,
my_strnxfrm_czech,
+ my_strnxfrmlen_simple,
my_like_range_czech,
my_wildcmp_8bit,
my_strcasecmp_8bit,
diff --git a/strings/ctype-euc_kr.c b/strings/ctype-euc_kr.c
index ee792d9c3e4..289b7309ea0 100644
--- a/strings/ctype-euc_kr.c
+++ b/strings/ctype-euc_kr.c
@@ -8641,6 +8641,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
my_strnncoll_simple, /* strnncoll */
my_strnncollsp_simple,
my_strnxfrm_simple, /* strnxfrm */
+ my_strnxfrmlen_simple,
my_like_range_simple, /* like_range */
my_wildcmp_mb, /* wildcmp */
my_strcasecmp_mb,
diff --git a/strings/ctype-eucjpms.c b/strings/ctype-eucjpms.c
index 5b108d24f4b..8c8d237cf48 100644
--- a/strings/ctype-eucjpms.c
+++ b/strings/ctype-eucjpms.c
@@ -8636,6 +8636,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
my_strnncoll_simple,/* strnncoll */
my_strnncollsp_simple,
my_strnxfrm_simple, /* strnxfrm */
+ my_strnxfrmlen_simple,
my_like_range_simple,/* like_range */
my_wildcmp_mb, /* wildcmp */
my_strcasecmp_mb,
diff --git a/strings/ctype-gb2312.c b/strings/ctype-gb2312.c
index f17cc94723f..73e4132dd7f 100644
--- a/strings/ctype-gb2312.c
+++ b/strings/ctype-gb2312.c
@@ -5692,6 +5692,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
my_strnncoll_simple, /* strnncoll */
my_strnncollsp_simple,
my_strnxfrm_simple, /* strnxfrm */
+ my_strnxfrmlen_simple,
my_like_range_simple, /* like_range */
my_wildcmp_mb, /* wildcmp */
my_strcasecmp_mb, /* instr */
diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c
index dc4aea60096..6b47b537fb9 100644
--- a/strings/ctype-gbk.c
+++ b/strings/ctype-gbk.c
@@ -9939,6 +9939,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
my_strnncoll_gbk,
my_strnncollsp_gbk,
my_strnxfrm_gbk,
+ my_strnxfrmlen_simple,
my_like_range_gbk,
my_wildcmp_mb,
my_strcasecmp_mb,
diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c
index b5da99a7452..043645684cf 100644
--- a/strings/ctype-latin1.c
+++ b/strings/ctype-latin1.c
@@ -693,6 +693,7 @@ static MY_COLLATION_HANDLER my_collation_german2_ci_handler=
my_strnncoll_latin1_de,
my_strnncollsp_latin1_de,
my_strnxfrm_latin1_de,
+ my_strnxfrmlen_simple,
my_like_range_simple,
my_wildcmp_8bit,
my_strcasecmp_8bit,
diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c
index 4be21599fef..e902730d65a 100644
--- a/strings/ctype-mb.c
+++ b/strings/ctype-mb.c
@@ -912,6 +912,7 @@ MY_COLLATION_HANDLER my_collation_mb_bin_handler =
my_strnncoll_mb_bin,
my_strnncollsp_mb_bin,
my_strnxfrm_mb_bin,
+ my_strnxfrmlen_simple,
my_like_range_simple,
my_wildcmp_mb_bin,
my_strcasecmp_mb_bin,
diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c
index bb623ef66f1..e436d5f8702 100644
--- a/strings/ctype-simple.c
+++ b/strings/ctype-simple.c
@@ -22,6 +22,15 @@
#include "stdarg.h"
/*
+ Returns the number of bytes required for strnxfrm().
+*/
+uint my_strnxfrmlen_simple(CHARSET_INFO *cs, uint len)
+{
+ return len * (cs->strxfrm_multiply ? cs->strxfrm_multiply : 1);
+}
+
+
+/*
Converts a string into its sort key.
SYNOPSIS
@@ -1365,6 +1374,7 @@ MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler =
my_strnncoll_simple,
my_strnncollsp_simple,
my_strnxfrm_simple,
+ my_strnxfrmlen_simple,
my_like_range_simple,
my_wildcmp_8bit,
my_strcasecmp_8bit,
diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c
index c1e41dc2d94..22cc8d9818d 100644
--- a/strings/ctype-sjis.c
+++ b/strings/ctype-sjis.c
@@ -4627,6 +4627,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
my_strnncoll_sjis,
my_strnncollsp_sjis,
my_strnxfrm_sjis,
+ my_strnxfrmlen_simple,
my_like_range_sjis,
my_wildcmp_mb, /* wildcmp */
my_strcasecmp_8bit,
diff --git a/strings/ctype-tis620.c b/strings/ctype-tis620.c
index c6bdd106ad4..9ba35e1c8ec 100644
--- a/strings/ctype-tis620.c
+++ b/strings/ctype-tis620.c
@@ -927,6 +927,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
my_strnncoll_tis620,
my_strnncollsp_tis620,
my_strnxfrm_tis620,
+ my_strnxfrmlen_simple,
my_like_range_tis620,
my_wildcmp_8bit, /* wildcmp */
my_strcasecmp_8bit,
diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c
index 2353c9397a2..8345d0474f2 100644
--- a/strings/ctype-uca.c
+++ b/strings/ctype-uca.c
@@ -8024,6 +8024,7 @@ MY_COLLATION_HANDLER my_collation_ucs2_uca_handler =
my_strnncoll_ucs2_uca,
my_strnncollsp_ucs2_uca,
my_strnxfrm_ucs2_uca,
+ my_strnxfrmlen_simple,
my_like_range_ucs2,
my_wildcmp_uca,
NULL,
@@ -8504,6 +8505,7 @@ MY_COLLATION_HANDLER my_collation_any_uca_handler =
my_strnncoll_any_uca,
my_strnncollsp_any_uca,
my_strnxfrm_any_uca,
+ my_strnxfrmlen_simple,
my_like_range_mb,
my_wildcmp_uca,
NULL,
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index adfd4794e36..0d45cceb64d 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -1499,6 +1499,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler =
my_strnncoll_ucs2,
my_strnncollsp_ucs2,
my_strnxfrm_ucs2,
+ my_strnxfrmlen_simple,
my_like_range_ucs2,
my_wildcmp_ucs2_ci,
my_strcasecmp_ucs2,
@@ -1513,6 +1514,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler =
my_strnncoll_ucs2_bin,
my_strnncollsp_ucs2_bin,
my_strnxfrm_ucs2_bin,
+ my_strnxfrmlen_simple,
my_like_range_simple,
my_wildcmp_ucs2_bin,
my_strcasecmp_ucs2_bin,
diff --git a/strings/ctype-ujis.c b/strings/ctype-ujis.c
index fc1496df280..deaddcc76f6 100644
--- a/strings/ctype-ujis.c
+++ b/strings/ctype-ujis.c
@@ -8501,6 +8501,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
my_strnncoll_simple,/* strnncoll */
my_strnncollsp_simple,
my_strnxfrm_simple, /* strnxfrm */
+ my_strnxfrmlen_simple,
my_like_range_simple,/* like_range */
my_wildcmp_mb, /* wildcmp */
my_strcasecmp_mb,
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index 1f9f158a73d..e17e7587e85 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -2238,6 +2238,12 @@ int my_wildcmp_utf8(CHARSET_INFO *cs,
}
+static
+uint my_strnxfrmlen_utf8(CHARSET_INFO *cs __attribute__((unused)), uint len)
+{
+ return (len * 2 + 2) / 3;
+}
+
static int my_strnxfrm_utf8(CHARSET_INFO *cs,
uchar *dst, uint dstlen,
const uchar *src, uint srclen)
@@ -2245,29 +2251,33 @@ static int my_strnxfrm_utf8(CHARSET_INFO *cs,
my_wc_t wc;
int res;
int plane;
- uchar *de = dst + dstlen;
+ uchar *de= dst + dstlen;
+ uchar *de_beg= de - 1;
const uchar *se = src + srclen;
- while( src < se && dst < de )
+ while (dst < de_beg)
{
- if ((res=my_utf8_uni(cs,&wc, src, se))<0)
- {
+ if ((res=my_utf8_uni(cs,&wc, src, se)) <= 0)
break;
- }
src+=res;
- srclen-=res;
plane=(wc>>8) & 0xFF;
wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].sort : wc;
- if ((res=my_uni_utf8(cs,wc,dst,de)) <0)
- {
- break;
- }
- dst+=res;
+ *dst++= wc >> 8;
+ *dst++= wc & 0xFF;
+
}
- if (dst < de)
- bfill(dst, de - dst, ' ');
+
+ while (dst < de_beg) /* Fill the tail with keys for space character */
+ {
+ *dst++= 0x00;
+ *dst++= 0x20;
+ }
+
+ if (dst < de) /* Clear the last byte, if "dstlen" was an odd number */
+ *de= 0x00;
+
return dstlen;
}
@@ -2306,6 +2316,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
my_strnncoll_utf8,
my_strnncollsp_utf8,
my_strnxfrm_utf8,
+ my_strnxfrmlen_utf8,
my_like_range_mb,
my_wildcmp_utf8,
my_strcasecmp_utf8,
diff --git a/strings/ctype-win1250ch.c b/strings/ctype-win1250ch.c
index b58a8f0f1e5..37611a5bd20 100644
--- a/strings/ctype-win1250ch.c
+++ b/strings/ctype-win1250ch.c
@@ -626,6 +626,7 @@ static MY_COLLATION_HANDLER my_collation_czech_ci_handler =
my_strnncoll_win1250ch,
my_strnncollsp_win1250ch,
my_strnxfrm_win1250ch,
+ my_strnxfrmlen_simple,
my_like_range_win1250ch,
my_wildcmp_8bit,
my_strcasecmp_8bit,