summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormonty@mysql.com <>2004-03-25 15:05:01 +0200
committermonty@mysql.com <>2004-03-25 15:05:01 +0200
commitebc3b3afac575b2c2a3e89823dfba217436a85c4 (patch)
tree27e5290a14e7d3e6c7aaced5b2ce86546de4721e
parent042af556d57f305734e64c7aa047e4709288f5bd (diff)
downloadmariadb-git-ebc3b3afac575b2c2a3e89823dfba217436a85c4.tar.gz
BTREE-indexes in HEAP tables can now be used to optimize ORDER BY
Don't read character set files if we are using only the default charset. In most cases the user will not anymore get a warning about missing character set files Compare strings with space extend instead of space strip. Now the following comparisons holds: "a" == "a " and "a\t" < "a". (Bug #3152). Note: Because of the above fix, one has to do a REPAIR on any table that has an ascii character < 32 last in a CHAR/VARCHAR/TEXT columns.
-rw-r--r--heap/hp_hash.c41
-rw-r--r--include/my_handler.h2
-rw-r--r--myisam/ft_boolean_search.c8
-rw-r--r--myisam/ft_nlq_search.c2
-rw-r--r--myisam/ft_parser.c2
-rw-r--r--myisam/ft_stopwords.c2
-rw-r--r--myisam/ft_update.c4
-rw-r--r--myisam/mi_check.c2
-rw-r--r--myisam/mi_search.c123
-rw-r--r--myisam/mi_unique.c2
-rw-r--r--myisam/mi_write.c2
-rw-r--r--myisam/myisam_ftdump.c2
-rw-r--r--mysql-test/r/ctype_collate.result12
-rw-r--r--mysql-test/r/endspace.result196
-rw-r--r--mysql-test/r/heap_btree.result17
-rw-r--r--mysql-test/t/endspace.test96
-rw-r--r--mysql-test/t/heap_btree.test8
-rw-r--r--mysys/charset.c27
-rw-r--r--mysys/my_handler.c69
-rw-r--r--mysys/tree.c5
-rw-r--r--sql/field.cc13
-rw-r--r--sql/filesort.cc10
-rw-r--r--sql/ha_heap.cc10
-rw-r--r--sql/ha_heap.h2
-rw-r--r--strings/ctype-big5.c90
-rw-r--r--strings/ctype-czech.c297
-rw-r--r--strings/ctype-gbk.c88
-rw-r--r--strings/ctype-latin1.c128
-rw-r--r--strings/ctype-mb.c17
-rw-r--r--strings/ctype-simple.c90
-rw-r--r--strings/ctype-sjis.c116
-rw-r--r--strings/ctype-tis620.c16
-rw-r--r--strings/ctype-ucs2.c48
-rw-r--r--strings/ctype-utf8.c8
-rw-r--r--strings/ctype-win1250ch.c4
35 files changed, 1096 insertions, 463 deletions
diff --git a/heap/hp_hash.c b/heap/hp_hash.c
index d30cbc9b82f..d040f37aea0 100644
--- a/heap/hp_hash.c
+++ b/heap/hp_hash.c
@@ -20,6 +20,38 @@
#include <m_ctype.h>
#include <assert.h>
+
+
+/*
+ Find out how many rows there is in the given range
+
+ SYNOPSIS
+ hp_rb_records_in_range()
+ info HEAP handler
+ inx Index to use
+ start_key Start of range. Null pointer if from first key
+ start_key_len Length of start key
+ start_search_flag Flag if start key should be included or not
+ end_key End of range. Null pointer if to last key
+ end_key_len Length of end key
+ end_search_flag Flag if start key should be included or not
+
+ NOTES
+ start_search_flag can have one of the following values:
+ HA_READ_KEY_EXACT Include the key in the range
+ HA_READ_AFTER_KEY Don't include key in range
+
+ end_search_flag can have one of the following values:
+ HA_READ_BEFORE_KEY Don't include key in range
+ HA_READ_AFTER_KEY Include all 'end_key' values in the range
+
+ RETURN
+ HA_POS_ERROR Something is wrong with the index tree.
+ 0 There is no matching keys in the given range
+ number > 0 There is approximately 'number' matching rows in
+ the range.
+*/
+
ha_rows hp_rb_records_in_range(HP_INFO *info, int inx, const byte *start_key,
uint start_key_len,
enum ha_rkey_function start_search_flag,
@@ -30,6 +62,7 @@ ha_rows hp_rb_records_in_range(HP_INFO *info, int inx, const byte *start_key,
HP_KEYDEF *keyinfo= info->s->keydef + inx;
TREE *rb_tree = &keyinfo->rb_tree;
heap_rb_param custom_arg;
+ DBUG_ENTER("hp_rb_records_in_range");
info->lastinx= inx;
custom_arg.keyseg= keyinfo->seg;
@@ -59,10 +92,12 @@ ha_rows hp_rb_records_in_range(HP_INFO *info, int inx, const byte *start_key,
end_pos= rb_tree->elements_in_tree + (ha_rows)1;
}
+ DBUG_PRINT("info",("start_pos: %lu end_pos: %lu", (ulong) start_pos,
+ (ulong) end_pos));
if (start_pos == HA_POS_ERROR || end_pos == HA_POS_ERROR)
- return HA_POS_ERROR;
- return end_pos < start_pos ? (ha_rows) 0 :
- (end_pos == start_pos ? (ha_rows) 1 : end_pos - start_pos);
+ DBUG_RETURN(HA_POS_ERROR);
+ DBUG_RETURN(end_pos < start_pos ? (ha_rows) 0 :
+ (end_pos == start_pos ? (ha_rows) 1 : end_pos - start_pos));
}
/* Search after a record based on a key */
diff --git a/include/my_handler.h b/include/my_handler.h
index 618d1df1a6e..18a6234d3f6 100644
--- a/include/my_handler.h
+++ b/include/my_handler.h
@@ -58,7 +58,7 @@ typedef struct st_HA_KEYSEG /* Key-portion */
}
extern int mi_compare_text(CHARSET_INFO *, uchar *, uint, uchar *, uint ,
- my_bool);
+ my_bool, my_bool);
extern int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a,
register uchar *b, uint key_length, uint nextflag,
uint *diff_pos);
diff --git a/myisam/ft_boolean_search.c b/myisam/ft_boolean_search.c
index bff1232750e..1d4bfee86a4 100644
--- a/myisam/ft_boolean_search.c
+++ b/myisam/ft_boolean_search.c
@@ -124,7 +124,7 @@ static int FTB_WORD_cmp_list(CHARSET_INFO *cs, FTB_WORD **a, FTB_WORD **b)
{
/* ORDER BY word DESC, ndepth DESC */
int i= mi_compare_text(cs, (uchar*) (*b)->word+1,(*b)->len-1,
- (uchar*) (*a)->word+1,(*a)->len-1,0);
+ (uchar*) (*a)->word+1,(*a)->len-1,0,0);
if (!i)
i=CMP_NUM((*b)->ndepth,(*a)->ndepth);
return i;
@@ -228,7 +228,7 @@ static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search)
ftbw->len - (ftbw->flags & FTB_FLAG_TRUNC),
(uchar*) ftbw->word + (ftbw->flags & FTB_FLAG_TRUNC),
ftbw->len - (ftbw->flags & FTB_FLAG_TRUNC),
- 0);
+ 0,0);
}
if (r) /* not found */
@@ -633,7 +633,7 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length)
ftbw=ftb->list[c];
if (mi_compare_text(ftb->charset, (uchar*) word.pos, word.len,
(uchar*) ftbw->word+1, ftbw->len-1,
- (my_bool) (ftbw->flags&FTB_FLAG_TRUNC)) >0)
+ (my_bool) (ftbw->flags&FTB_FLAG_TRUNC),0) >0)
b=c;
else
a=c;
@@ -643,7 +643,7 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length)
ftbw=ftb->list[c];
if (mi_compare_text(ftb->charset, (uchar*) word.pos, word.len,
(uchar*) ftbw->word+1,ftbw->len-1,
- (my_bool) (ftbw->flags&FTB_FLAG_TRUNC)))
+ (my_bool) (ftbw->flags&FTB_FLAG_TRUNC),0))
break;
if (ftbw->docid[1] == docid)
continue;
diff --git a/myisam/ft_nlq_search.c b/myisam/ft_nlq_search.c
index 45d13f56c6e..03875abe7b0 100644
--- a/myisam/ft_nlq_search.c
+++ b/myisam/ft_nlq_search.c
@@ -96,7 +96,7 @@ static int walk_and_match(FT_WORD *word, uint32 count, ALL_IN_ONE *aio)
if (keylen &&
mi_compare_text(aio->charset,info->lastkey+1,
- info->lastkey_length-extra-1, keybuff+1,keylen-1,0))
+ info->lastkey_length-extra-1, keybuff+1,keylen-1,0,0))
break;
subkeys=ft_sintXkorr(info->lastkey+info->lastkey_length-extra);
diff --git a/myisam/ft_parser.c b/myisam/ft_parser.c
index 250e92566b7..b0fe180d0fb 100644
--- a/myisam/ft_parser.c
+++ b/myisam/ft_parser.c
@@ -27,7 +27,7 @@ typedef struct st_ft_docstat {
static int FT_WORD_cmp(CHARSET_INFO* cs, FT_WORD *w1, FT_WORD *w2)
{
return mi_compare_text(cs, (uchar*) w1->pos, w1->len,
- (uchar*) w2->pos, w2->len, 0);
+ (uchar*) w2->pos, w2->len, 0, 0);
}
static int walk_and_copy(FT_WORD *word,uint32 count,FT_DOCSTAT *docstat)
diff --git a/myisam/ft_stopwords.c b/myisam/ft_stopwords.c
index 6682de18c65..112af87d201 100644
--- a/myisam/ft_stopwords.c
+++ b/myisam/ft_stopwords.c
@@ -32,7 +32,7 @@ static int FT_STOPWORD_cmp(void* cmp_arg __attribute__((unused)),
{
return mi_compare_text(default_charset_info,
(uchar *)w1->pos,w1->len,
- (uchar *)w2->pos,w2->len,0);
+ (uchar *)w2->pos,w2->len,0,0);
}
static void FT_STOPWORD_free(FT_STOPWORD *w, TREE_FREE action,
diff --git a/myisam/ft_update.c b/myisam/ft_update.c
index 4015abbbeba..beccc062270 100644
--- a/myisam/ft_update.c
+++ b/myisam/ft_update.c
@@ -179,7 +179,7 @@ int _mi_ft_cmp(MI_INFO *info, uint keynr, const byte *rec1, const byte *rec2)
if ((ftsi1.pos != ftsi2.pos) &&
(!ftsi1.pos || !ftsi2.pos ||
mi_compare_text(cs, (uchar*) ftsi1.pos,ftsi1.len,
- (uchar*) ftsi2.pos,ftsi2.len,0)))
+ (uchar*) ftsi2.pos,ftsi2.len,0,0)))
DBUG_RETURN(THOSE_TWO_DAMN_KEYS_ARE_REALLY_DIFFERENT);
}
DBUG_RETURN(GEE_THEY_ARE_ABSOLUTELY_IDENTICAL);
@@ -207,7 +207,7 @@ int _mi_ft_update(MI_INFO *info, uint keynr, byte *keybuf,
while(old_word->pos && new_word->pos)
{
cmp= mi_compare_text(cs, (uchar*) old_word->pos,old_word->len,
- (uchar*) new_word->pos,new_word->len,0);
+ (uchar*) new_word->pos,new_word->len,0,0);
cmp2= cmp ? 0 : (fabs(old_word->weight - new_word->weight) > 1.e-5);
if (cmp < 0 || cmp2)
diff --git a/myisam/mi_check.c b/myisam/mi_check.c
index cdaed59cd01..9be191f349c 100644
--- a/myisam/mi_check.c
+++ b/myisam/mi_check.c
@@ -3313,7 +3313,7 @@ static int sort_ft_key_write(MI_SORT_PARAM *sort_param, const void *a)
if (val_off == a_len &&
mi_compare_text(sort_param->seg->charset,
((uchar *)a)+1,a_len-1,
- ft_buf->lastkey+1,val_off-1, 0)==0)
+ ft_buf->lastkey+1,val_off-1, 0, 0)==0)
{
if (!ft_buf->buf) /* store in second-level tree */
{
diff --git a/myisam/mi_search.c b/myisam/mi_search.c
index 2871633102d..73a4d229fd6 100644
--- a/myisam/mi_search.c
+++ b/myisam/mi_search.c
@@ -273,7 +273,8 @@ int _mi_prefix_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page,
uchar *sort_order=keyinfo->seg->charset->sort_order;
uchar tt_buff[MI_MAX_KEY_BUFF+2], *t_buff=tt_buff+2;
uchar *saved_from, *saved_to, *saved_vseg;
- uint saved_length=0, saved_prefix_len=0;
+ uint saved_length=0, saved_prefix_len=0;
+ uint length_pack;
DBUG_ENTER("_mi_prefix_search");
LINT_INIT(length);
@@ -289,26 +290,24 @@ int _mi_prefix_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page,
page+=2+nod_flag;
*ret_pos=page;
kseg=key;
- {
- uint lenght_pack;
- get_key_pack_length(kseg_len,lenght_pack,kseg);
- key_len_skip=lenght_pack+kseg_len;
- key_len_left=(int) key_len- (int) key_len_skip;
- cmplen=(key_len_left>=0) ? kseg_len : key_len-lenght_pack;
- DBUG_PRINT("info",("key: '%.*s'",kseg_len,kseg));
- }
-/*
- Keys are compressed the following way:
+ get_key_pack_length(kseg_len,length_pack,kseg);
+ key_len_skip=length_pack+kseg_len;
+ key_len_left=(int) key_len- (int) key_len_skip;
+ cmplen=(key_len_left>=0) ? kseg_len : key_len-length_pack;
+ DBUG_PRINT("info",("key: '%.*s'",kseg_len,kseg));
- If the max length of first key segment <= 127 characters the prefix is
- 1 byte else it's 2 byte
+ /*
+ Keys are compressed the following way:
- prefix The high bit is set if this is a prefix for the prev key
- length Packed length if the previous was a prefix byte
- [length] Length character of data
- next-key-seg Next key segments
-*/
+ If the max length of first key segment <= 127 characters the prefix is
+ 1 byte else it's 2 byte
+
+ prefix The high bit is set if this is a prefix for the prev key
+ length Packed length if the previous was a prefix byte
+ [length] Length character of data
+ next-key-seg Next key segments
+ */
matched=0; /* how many char's from prefix were alredy matched */
len=0; /* length of previous key unpacked */
@@ -350,7 +349,8 @@ int _mi_prefix_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page,
saved_vseg=vseg;
saved_prefix_len=prefix_len;
- DBUG_PRINT("loop",("page: '%.*s%.*s'",prefix_len,t_buff+seg_len_pack,suffix_len,vseg));
+ DBUG_PRINT("loop",("page: '%.*s%.*s'",prefix_len,t_buff+seg_len_pack,
+ suffix_len,vseg));
{
uchar *from=vseg+suffix_len;
HA_KEYSEG *keyseg;
@@ -396,14 +396,15 @@ int _mi_prefix_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page,
matched=prefix_len+left;
- for(my_flag=0;left;left--)
+ for (my_flag=0;left;left--)
if ((my_flag= (int) sort_order[*vseg++] - (int) sort_order[*k++]))
break;
if (my_flag>0) /* mismatch */
break;
- else if (my_flag==0) /* match */
- { /*
+ if (my_flag==0) /* match */
+ {
+ /*
** len cmplen seg_left_len more_segs
** < matched=len; continue search
** > = prefix ? found : (matched=len; continue search)
@@ -414,30 +415,68 @@ int _mi_prefix_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page,
*/
if (len < cmplen)
{
- my_flag= -1;
+ if ((keyinfo->seg->type != HA_KEYTYPE_TEXT &&
+ keyinfo->seg->type != HA_KEYTYPE_VARTEXT))
+ my_flag= -1;
+ else
+ {
+ /* We have to compare k and vseg as if they where space extended */
+ uchar *end= k+ (cmplen - len);
+ for ( ; k < end && *k == ' '; k++) ;
+ if (k == end)
+ goto cmp_rest; /* should never happen */
+ if (*k < (uchar) ' ')
+ {
+ my_flag= 1; /* Compared string is smaller */
+ break;
+ }
+ my_flag= -1; /* Continue searching */
+ }
}
else if (len > cmplen)
{
- if ((my_flag= (!(nextflag & SEARCH_PREFIX) || key_len_left>0)))
- break;
- goto fix_flag;
- }
- else if (key_len_left>0)
- {
- uint not_used;
- if ((flag = ha_key_cmp(keyinfo->seg+1,vseg,
- k,key_len_left,nextflag,&not_used)) >= 0)
- break;
+ uchar *end;
+ if ((nextflag & SEARCH_PREFIX) && key_len_left == 0)
+ goto fix_flag;
+
+ /* We have to compare k and vseg as if they where space extended */
+ for (end=vseg + (len-cmplen) ;
+ vseg < end && *vseg == (uchar) ' ';
+ vseg++) ;
+ if (vseg == end)
+ goto cmp_rest; /* should never happen */
+
+ if (*vseg > (uchar) ' ')
+ {
+ my_flag= 1; /* Compared string is smaller */
+ break;
+ }
+ my_flag= -1; /* Continue searching */
}
else
- {
- /* at this line flag==-1 if the following lines were already
- visited and 0 otherwise, i.e. flag <=0 here always !!! */
- fix_flag:
- if (nextflag & (SEARCH_NO_FIND | SEARCH_LAST))
- flag=(nextflag & (SEARCH_BIGGER | SEARCH_LAST)) ? -1 : 1;
- if (flag>=0) break;
- }
+ {
+ cmp_rest:
+ if (key_len_left>0)
+ {
+ uint not_used;
+ if ((flag = ha_key_cmp(keyinfo->seg+1,vseg,
+ k,key_len_left,nextflag,&not_used)) >= 0)
+ break;
+ }
+ else
+ {
+ /*
+ at this line flag==-1 if the following lines were already
+ visited and 0 otherwise, i.e. flag <=0 here always !!!
+ */
+ fix_flag:
+ DBUG_ASSERT(flag <= 0);
+ if (nextflag & (SEARCH_NO_FIND | SEARCH_LAST))
+ flag=(nextflag & (SEARCH_BIGGER | SEARCH_LAST)) ? -1 : 1;
+ if (flag>=0)
+ break;
+ }
+ }
}
matched-=left;
}
@@ -1567,7 +1606,7 @@ _mi_calc_var_pack_key_length(MI_KEYDEF *keyinfo,uint nod_flag,uchar *next_key,
n_length-=tmp_length;
length-=tmp_length+next_length_pack; /* We gained these chars */
}
- if (n_length == 0)
+ if (n_length == 0 && ref_length == new_key_length)
{
s_temp->n_ref_length=pack_marker; /* Same as prev key */
}
diff --git a/myisam/mi_unique.c b/myisam/mi_unique.c
index f4ee39e55ca..38b4ed93311 100644
--- a/myisam/mi_unique.c
+++ b/myisam/mi_unique.c
@@ -180,7 +180,7 @@ int mi_unique_comp(MI_UNIQUEDEF *def, const byte *a, const byte *b,
if (type == HA_KEYTYPE_TEXT || type == HA_KEYTYPE_VARTEXT)
{
if (mi_compare_text(keyseg->charset, (uchar *) pos_a, length,
- (uchar *) pos_b, length, 0))
+ (uchar *) pos_b, length, 0, 0))
return 1;
}
else
diff --git a/myisam/mi_write.c b/myisam/mi_write.c
index 88e7f070642..382fb1156cb 100644
--- a/myisam/mi_write.c
+++ b/myisam/mi_write.c
@@ -500,7 +500,7 @@ int _mi_insert(register MI_INFO *info, register MI_KEYDEF *keyinfo,
get_key_length(alen,a);
DBUG_ASSERT(info->ft1_to_ft2==0);
if (alen == blen &&
- mi_compare_text(keyinfo->seg->charset, a, alen, b, blen, 0)==0)
+ mi_compare_text(keyinfo->seg->charset, a, alen, b, blen, 0, 0)==0)
{
/* yup. converting */
info->ft1_to_ft2=(DYNAMIC_ARRAY *)
diff --git a/myisam/myisam_ftdump.c b/myisam/myisam_ftdump.c
index 7d1b20eb854..8ab6a7600b2 100644
--- a/myisam/myisam_ftdump.c
+++ b/myisam/myisam_ftdump.c
@@ -68,7 +68,7 @@ int main(int argc,char *argv[])
struct { MI_INFO *info; } aio0, *aio=&aio0; /* for GWS_IN_USE */
MY_INIT(argv[0]);
- if (error=handle_options(&argc, &argv, my_long_options, get_one_option))
+ if ((error=handle_options(&argc, &argv, my_long_options, get_one_option)))
exit(error);
if (count || dump)
verbose=0;
diff --git a/mysql-test/r/ctype_collate.result b/mysql-test/r/ctype_collate.result
index add730fe68f..c837d676b6e 100644
--- a/mysql-test/r/ctype_collate.result
+++ b/mysql-test/r/ctype_collate.result
@@ -337,15 +337,12 @@ SELECT latin1_f,count(*) FROM t1 GROUP BY latin1_f COLLATE latin1_german2_ci;
latin1_f count(*)
A 4
AD 2
-AE 2
-Ä 2
+AE 4
AF 2
B 2
-SS 2
-ß 1
+SS 3
U 2
-UE 2
-Ü 2
+UE 4
Y 2
Z 2
SELECT latin1_f,count(*) FROM t1 GROUP BY latin1_f COLLATE latin1_general_ci;
@@ -431,13 +428,10 @@ A
AD
AE
AF
B
U
UE
SS
Y
Z
SELECT DISTINCT latin1_f COLLATE latin1_general_ci FROM t1;
diff --git a/mysql-test/r/endspace.result b/mysql-test/r/endspace.result
new file mode 100644
index 00000000000..d2519523f36
--- /dev/null
+++ b/mysql-test/r/endspace.result
@@ -0,0 +1,196 @@
+drop table if exists t1;
+select 'a' = 'a', 'a' = 'a ', 'a ' = 'a';
+'a' = 'a' 'a' = 'a ' 'a ' = 'a'
+1 1 1
+select 'a\0' = 'a', 'a\0' < 'a', 'a\0' > 'a';
+'a\0' = 'a' 'a\0' < 'a' 'a\0' > 'a'
+0 1 0
+select 'a' = 'a\0', 'a' < 'a\0', 'a' > 'a\0';
+'a' = 'a\0' 'a' < 'a\0' 'a' > 'a\0'
+0 0 1
+select 'a\0' = 'a ', 'a\0' < 'a ', 'a\0' > 'a ';
+'a\0' = 'a ' 'a\0' < 'a ' 'a\0' > 'a '
+0 1 0
+select 'a ' = 'a\0', 'a ' < 'a\0', 'a ' > 'a\0';
+'a ' = 'a\0' 'a ' < 'a\0' 'a ' > 'a\0'
+0 0 1
+select 'a a' > 'a', 'a \0' < 'a';
+'a a' > 'a' 'a \0' < 'a'
+1 1
+select binary 'a a' > 'a', binary 'a \0' > 'a', binary 'a\0' > 'a';
+binary 'a a' > 'a' binary 'a \0' > 'a' binary 'a\0' > 'a'
+1 1 1
+create table t1 (text1 varchar(32) not NULL, KEY key1 (text1));
+insert into t1 values ('teststring'), ('nothing'), ('teststring\t');
+check table t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+select * from t1 ignore key (key1) where text1='teststring' or text1 like 'teststring_%';
+text1
+teststring
+teststring
+select * from t1 where text1='teststring' or text1 like 'teststring_%';
+text1
+teststring
+teststring
+select * from t1 where text1='teststring' or text1 > 'teststring\t';
+text1
+teststring
+select * from t1 order by text1;
+text1
+nothing
+teststring
+teststring
+explain select * from t1 order by text1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL key1 32 NULL 3 Using index
+alter table t1 modify text1 char(32) binary not null;
+check table t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+select * from t1 ignore key (key1) where text1='teststring' or text1 like 'teststring_%';
+text1
+teststring
+teststring
+select * from t1 where text1='teststring' or text1 like 'teststring_%';
+text1
+teststring
+teststring
+select * from t1 where text1='teststring' or text1 > 'teststring\t';
+text1
+teststring
+select text1, length(text1) from t1 order by text1;
+text1 length(text1)
+nothing 7
+teststring 11
+teststring 10
+select text1, length(text1) from t1 order by binary text1;
+text1 length(text1)
+nothing 7
+teststring 10
+teststring 11
+alter table t1 modify text1 blob not null, drop key key1, add key key1 (text1(20));
+insert into t1 values ('teststring ');
+select concat('|', text1, '|') from t1 order by text1;
+concat('|', text1, '|')
+|nothing|
+|teststring|
+|teststring |
+|teststring |
+alter table t1 modify text1 text not null, pack_keys=1;
+select * from t1 where text1 like 'teststring_%';
+text1
+teststring
+teststring
+select * from t1 where text1='teststring' or text1 like 'teststring_%';
+text1
+teststring
+teststring
+teststring
+select * from t1 where text1='teststring' or text1 > 'teststring\t';
+text1
+teststring
+teststring
+select concat('|', text1, '|') from t1 order by text1;
+concat('|', text1, '|')
+|nothing|
+|teststring |
+|teststring|
+|teststring |
+drop table t1;
+create table t1 (text1 varchar(32) not NULL, KEY key1 (text1)) pack_keys=0;
+insert into t1 values ('teststring'), ('nothing'), ('teststring\t');
+select * from t1 where text1='teststring' or text1 like 'teststring_%';
+text1
+teststring
+teststring
+select * from t1 where text1='teststring' or text1 >= 'teststring\t';
+text1
+teststring
+teststring
+drop table t1;
+create table t1 (text1 varchar(32) not NULL, KEY key1 using BTREE (text1)) engine=heap;
+insert into t1 values ('teststring'), ('nothing'), ('teststring\t');
+select * from t1 ignore key (key1) where text1='teststring' or text1 like 'teststring_%';
+text1
+teststring
+teststring
+select * from t1 where text1='teststring' or text1 like 'teststring_%';
+text1
+teststring
+teststring
+select * from t1 where text1='teststring' or text1 >= 'teststring\t';
+text1
+teststring
+teststring
+select * from t1 order by text1;
+text1
+nothing
+teststring
+teststring
+explain select * from t1 order by text1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL key1 32 NULL 3
+alter table t1 modify text1 char(32) binary not null;
+select * from t1 order by text1;
+text1
+nothing
+teststring
+teststring
+drop table t1;
+create table t1 (text1 varchar(32) not NULL, KEY key1 (text1)) engine=innodb;
+insert into t1 values ('teststring'), ('nothing'), ('teststring\t');
+check table t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+select * from t1 where text1='teststring' or text1 like 'teststring_%';
+text1
+teststring
+teststring
+select * from t1 where text1='teststring' or text1 > 'teststring\t';
+text1
+teststring
+select * from t1 order by text1;
+text1
+nothing
+teststring
+teststring
+explain select * from t1 order by text1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL key1 32 NULL 4 Using index
+alter table t1 modify text1 char(32) binary not null;
+select * from t1 order by text1;
+text1
+nothing
+teststring
+teststring
+alter table t1 modify text1 blob not null, drop key key1, add key key1 (text1(20));
+insert into t1 values ('teststring ');
+select concat('|', text1, '|') from t1 order by text1;
+concat('|', text1, '|')
+|nothing|
+|teststring|
+|teststring |
+|teststring |
+alter table t1 modify text1 text not null, pack_keys=1;
+select * from t1 where text1 like 'teststring_%';
+text1
+teststring
+teststring
+select text1, length(text1) from t1 where text1='teststring' or text1 like 'teststring_%';
+text1 length(text1)
+teststring 10
+teststring 11
+teststring 11
+select text1, length(text1) from t1 where text1='teststring' or text1 >= 'teststring\t';
+text1 length(text1)
+teststring 10
+teststring 11
+teststring 11
+select concat('|', text1, '|') from t1 order by text1;
+concat('|', text1, '|')
+|nothing|
+|teststring |
+|teststring|
+|teststring |
+drop table t1;
diff --git a/mysql-test/r/heap_btree.result b/mysql-test/r/heap_btree.result
index ef63b1d4e00..f92a8653a69 100644
--- a/mysql-test/r/heap_btree.result
+++ b/mysql-test/r/heap_btree.result
@@ -173,9 +173,22 @@ f1 f2
drop table t1;
create table t1 (btn char(10) not null, key using BTREE (btn)) engine=heap;
insert into t1 values ("hello"),("hello"),("hello"),("hello"),("hello"),("a"),("b"),("c"),("d"),("e"),("f"),("g"),("h"),("i");
-explain select * from t1 where btn like "q%";
+explain select * from t1 where btn like "i%";
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t1 ALL btn NULL NULL NULL 14 Using where
+1 SIMPLE t1 range btn btn 10 NULL 1 Using where
+explain select * from t1 where btn like "h%";
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range btn btn 10 NULL 4 Using where
+explain select * from t1 where btn like "a%";
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range btn btn 10 NULL 1 Using where
+explain select * from t1 where btn like "b%";
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range btn btn 10 NULL 1 Using where
+select * from t1 where btn like "ff%";
+btn
+select * from t1 where btn like " %";
+btn
select * from t1 where btn like "q%";
btn
alter table t1 add column new_col char(1) not null, add key using BTREE (btn,new_col), drop key btn;
diff --git a/mysql-test/t/endspace.test b/mysql-test/t/endspace.test
new file mode 100644
index 00000000000..a9933ff93b5
--- /dev/null
+++ b/mysql-test/t/endspace.test
@@ -0,0 +1,96 @@
+#
+# Test problem with characters < ' ' at end of strings (Bug #3152)
+#
+
+-- source include/have_innodb.inc
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+
+select 'a' = 'a', 'a' = 'a ', 'a ' = 'a';
+select 'a\0' = 'a', 'a\0' < 'a', 'a\0' > 'a';
+select 'a' = 'a\0', 'a' < 'a\0', 'a' > 'a\0';
+select 'a\0' = 'a ', 'a\0' < 'a ', 'a\0' > 'a ';
+select 'a ' = 'a\0', 'a ' < 'a\0', 'a ' > 'a\0';
+select 'a a' > 'a', 'a \0' < 'a';
+select binary 'a a' > 'a', binary 'a \0' > 'a', binary 'a\0' > 'a';
+
+#
+# Test MyISAM tables.
+#
+
+create table t1 (text1 varchar(32) not NULL, KEY key1 (text1));
+insert into t1 values ('teststring'), ('nothing'), ('teststring\t');
+check table t1;
+select * from t1 ignore key (key1) where text1='teststring' or text1 like 'teststring_%';
+select * from t1 where text1='teststring' or text1 like 'teststring_%';
+select * from t1 where text1='teststring' or text1 > 'teststring\t';
+select * from t1 order by text1;
+explain select * from t1 order by text1;
+
+alter table t1 modify text1 char(32) binary not null;
+check table t1;
+select * from t1 ignore key (key1) where text1='teststring' or text1 like 'teststring_%';
+select * from t1 where text1='teststring' or text1 like 'teststring_%';
+select * from t1 where text1='teststring' or text1 > 'teststring\t';
+select text1, length(text1) from t1 order by text1;
+select text1, length(text1) from t1 order by binary text1;
+
+alter table t1 modify text1 blob not null, drop key key1, add key key1 (text1(20));
+insert into t1 values ('teststring ');
+select concat('|', text1, '|') from t1 order by text1;
+
+alter table t1 modify text1 text not null, pack_keys=1;
+select * from t1 where text1 like 'teststring_%';
+select * from t1 where text1='teststring' or text1 like 'teststring_%';
+select * from t1 where text1='teststring' or text1 > 'teststring\t';
+select concat('|', text1, '|') from t1 order by text1;
+drop table t1;
+
+create table t1 (text1 varchar(32) not NULL, KEY key1 (text1)) pack_keys=0;
+insert into t1 values ('teststring'), ('nothing'), ('teststring\t');
+select * from t1 where text1='teststring' or text1 like 'teststring_%';
+select * from t1 where text1='teststring' or text1 >= 'teststring\t';
+drop table t1;
+
+# Test HEAP tables (with BTREE keys)
+
+create table t1 (text1 varchar(32) not NULL, KEY key1 using BTREE (text1)) engine=heap;
+insert into t1 values ('teststring'), ('nothing'), ('teststring\t');
+select * from t1 ignore key (key1) where text1='teststring' or text1 like 'teststring_%';
+select * from t1 where text1='teststring' or text1 like 'teststring_%';
+select * from t1 where text1='teststring' or text1 >= 'teststring\t';
+select * from t1 order by text1;
+explain select * from t1 order by text1;
+
+alter table t1 modify text1 char(32) binary not null;
+select * from t1 order by text1;
+drop table t1;
+
+#
+# Test InnoDB tables
+#
+
+create table t1 (text1 varchar(32) not NULL, KEY key1 (text1)) engine=innodb;
+insert into t1 values ('teststring'), ('nothing'), ('teststring\t');
+check table t1;
+select * from t1 where text1='teststring' or text1 like 'teststring_%';
+select * from t1 where text1='teststring' or text1 > 'teststring\t';
+select * from t1 order by text1;
+explain select * from t1 order by text1;
+
+alter table t1 modify text1 char(32) binary not null;
+select * from t1 order by text1;
+
+alter table t1 modify text1 blob not null, drop key key1, add key key1 (text1(20));
+insert into t1 values ('teststring ');
+select concat('|', text1, '|') from t1 order by text1;
+
+alter table t1 modify text1 text not null, pack_keys=1;
+select * from t1 where text1 like 'teststring_%';
+
+# The following gives wrong result in InnoDB
+select text1, length(text1) from t1 where text1='teststring' or text1 like 'teststring_%';
+select text1, length(text1) from t1 where text1='teststring' or text1 >= 'teststring\t';
+select concat('|', text1, '|') from t1 order by text1;
+drop table t1;
diff --git a/mysql-test/t/heap_btree.test b/mysql-test/t/heap_btree.test
index a520065a8b3..ad78636d002 100644
--- a/mysql-test/t/heap_btree.test
+++ b/mysql-test/t/heap_btree.test
@@ -110,7 +110,13 @@ drop table t1;
create table t1 (btn char(10) not null, key using BTREE (btn)) engine=heap;
insert into t1 values ("hello"),("hello"),("hello"),("hello"),("hello"),("a"),("b"),("c"),("d"),("e"),("f"),("g"),("h"),("i");
-explain select * from t1 where btn like "q%";
+explain select * from t1 where btn like "i%";
+explain select * from t1 where btn like "h%";
+explain select * from t1 where btn like "a%";
+explain select * from t1 where btn like "b%";
+# For the following the BTREE MAY notice that there is no possible matches
+select * from t1 where btn like "ff%";
+select * from t1 where btn like " %";
select * from t1 where btn like "q%";
alter table t1 add column new_col char(1) not null, add key using BTREE (btn,new_col), drop key btn;
update t1 set new_col=left(btn,1);
diff --git a/mysys/charset.c b/mysys/charset.c
index c422ead89c0..80f62b06a3e 100644
--- a/mysys/charset.c
+++ b/mysys/charset.c
@@ -459,7 +459,9 @@ static my_bool init_available_charsets(myf myflags)
init_compiled_charsets(myflags);
/* Copy compiled charsets */
- for (cs=all_charsets; cs < all_charsets+255 ; cs++)
+ for (cs=all_charsets;
+ cs < all_charsets+array_elements(all_charsets)-1 ;
+ cs++)
{
if (*cs)
{
@@ -486,10 +488,11 @@ void free_charsets(void)
uint get_collation_number(const char *name)
{
CHARSET_INFO **cs;
- if (init_available_charsets(MYF(0))) /* If it isn't initialized */
- return 0;
+ init_available_charsets(MYF(0));
- for (cs= all_charsets; cs < all_charsets+255; ++cs)
+ for (cs= all_charsets;
+ cs < all_charsets+array_elements(all_charsets)-1 ;
+ cs++)
{
if ( cs[0] && cs[0]->name &&
!my_strcasecmp(&my_charset_latin1, cs[0]->name, name))
@@ -498,13 +501,15 @@ uint get_collation_number(const char *name)
return 0; /* this mimics find_type() */
}
+
uint get_charset_number(const char *charset_name, uint cs_flags)
{
CHARSET_INFO **cs;
- if (init_available_charsets(MYF(0))) /* If it isn't initialized */
- return 0;
+ init_available_charsets(MYF(0));
- for (cs= all_charsets; cs < all_charsets+255; ++cs)
+ for (cs= all_charsets;
+ cs < all_charsets+array_elements(all_charsets)-1 ;
+ cs++)
{
if ( cs[0] && cs[0]->csname && (cs[0]->state & cs_flags) &&
!my_strcasecmp(&my_charset_latin1, cs[0]->csname, charset_name))
@@ -517,8 +522,7 @@ uint get_charset_number(const char *charset_name, uint cs_flags)
const char *get_charset_name(uint charset_number)
{
CHARSET_INFO *cs;
- if (init_available_charsets(MYF(0))) /* If it isn't initialized */
- return "?";
+ init_available_charsets(MYF(0));
cs=all_charsets[charset_number];
if (cs && (cs->number == charset_number) && cs->name )
@@ -554,9 +558,12 @@ static CHARSET_INFO *get_internal_charset(uint cs_number, myf flags)
CHARSET_INFO *get_charset(uint cs_number, myf flags)
{
CHARSET_INFO *cs;
+ if (cs_number == default_charset_info->number)
+ return default_charset_info;
+
(void) init_available_charsets(MYF(0)); /* If it isn't initialized */
- if (!cs_number)
+ if (!cs_number || cs_number >= array_elements(all_charsets)-1)
return NULL;
cs=get_internal_charset(cs_number, flags);
diff --git a/mysys/my_handler.c b/mysys/my_handler.c
index 190c279aadf..35f620ccbcb 100644
--- a/mysys/my_handler.c
+++ b/mysys/my_handler.c
@@ -18,15 +18,21 @@
#include "my_handler.h"
int mi_compare_text(CHARSET_INFO *charset_info, uchar *a, uint a_length,
- uchar *b, uint b_length, my_bool part_key)
+ uchar *b, uint b_length, my_bool part_key,
+ my_bool skip_end_space)
{
if (part_key && b_length < a_length)
a_length=b_length;
- return my_strnncoll(charset_info, a, a_length, b, b_length);
+ if (skip_end_space)
+ return charset_info->coll->strnncollsp(charset_info, a, a_length,
+ b, b_length);
+ return charset_info->coll->strnncoll(charset_info, a, a_length,
+ b, b_length);
}
+
static int compare_bin(uchar *a, uint a_length, uchar *b, uint b_length,
- my_bool part_key)
+ my_bool part_key, my_bool skip_end_space)
{
uint length= min(a_length,b_length);
uchar *end= a+ length;
@@ -37,6 +43,31 @@ static int compare_bin(uchar *a, uint a_length, uchar *b, uint b_length,
return flag;
if (part_key && b_length < a_length)
return 0;
+ if (skip_end_space && a_length != b_length)
+ {
+ int swap= 0;
+ /*
+ We are using space compression. We have to check if longer key
+ has next character < ' ', in which case it's less than the shorter
+ key that has an implicite space afterwards.
+
+ This code is identical to the one in
+ strings/ctype-simple.c:my_strnncollsp_simple
+ */
+ if (a_length < b_length)
+ {
+ /* put shorter key in a */
+ a_length= b_length;
+ a= b;
+ swap= -1; /* swap sign of result */
+ }
+ for (end= a + a_length-length; a < end ; a++)
+ {
+ if (*a != ' ')
+ return ((int) *a - (int) ' ') ^ swap;
+ }
+ return 0;
+ }
return (int) (a_length-b_length);
}
@@ -128,7 +159,8 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a,
if (piks &&
(flag=mi_compare_text(keyseg->charset,a,a_length,b,b_length,
(my_bool) ((nextflag & SEARCH_PREFIX) &&
- next_key_length <= 0))))
+ next_key_length <= 0),
+ !(nextflag & SEARCH_PREFIX))))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
a+=a_length;
b+=b_length;
@@ -137,17 +169,11 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a,
else
{
uint length=(uint) (end-a), a_length=length, b_length=length;
- if (!(nextflag & SEARCH_PREFIX))
- {
- while (a_length && a[a_length-1] == ' ')
- a_length--;
- while (b_length && b[b_length-1] == ' ')
- b_length--;
- }
if (piks &&
(flag= mi_compare_text(keyseg->charset, a, a_length, b, b_length,
(my_bool) ((nextflag & SEARCH_PREFIX) &&
- next_key_length <= 0))))
+ next_key_length <= 0),
+ !(nextflag & SEARCH_PREFIX))))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
a=end;
b+=length;
@@ -164,7 +190,7 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a,
if (piks &&
(flag=compare_bin(a,a_length,b,b_length,
(my_bool) ((nextflag & SEARCH_PREFIX) &&
- next_key_length <= 0))))
+ next_key_length <= 0),1)))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
a+=a_length;
b+=b_length;
@@ -176,7 +202,7 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a,
if (piks &&
(flag=compare_bin(a,length,b,length,
(my_bool) ((nextflag & SEARCH_PREFIX) &&
- next_key_length <= 0))))
+ next_key_length <= 0),0)))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
a+=length;
b+=length;
@@ -191,18 +217,13 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a,
full_b_length= b_length;
next_key_length=key_length-b_length-pack_length;
- if ((nextflag & (SEARCH_FIND | SEARCH_UPDATE)) == SEARCH_FIND)
- {
- while (a_length && a[a_length-1] == ' ')
- a_length--;
- while (b_length && b[b_length-1] == ' ')
- b_length--;
- }
-
if (piks &&
(flag= mi_compare_text(keyseg->charset,a,a_length,b,b_length,
(my_bool) ((nextflag & SEARCH_PREFIX) &&
- next_key_length <= 0))))
+ next_key_length <= 0),
+ (my_bool) ((nextflag & (SEARCH_FIND |
+ SEARCH_UPDATE)) ==
+ SEARCH_FIND))))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
a+= full_a_length;
b+= full_b_length;
@@ -219,7 +240,7 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a,
if (piks &&
(flag=compare_bin(a,a_length,b,b_length,
(my_bool) ((nextflag & SEARCH_PREFIX) &&
- next_key_length <= 0))))
+ next_key_length <= 0), 0)))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
a+=a_length;
b+=b_length;
diff --git a/mysys/tree.c b/mysys/tree.c
index 0b30ffa4971..063c8739e58 100644
--- a/mysys/tree.c
+++ b/mysys/tree.c
@@ -481,7 +481,6 @@ ha_rows tree_record_pos(TREE *tree, const void *key,
TREE_ELEMENT *element= tree->root;
double left= 1;
double right= tree->elements_in_tree;
- ha_rows last_equal_pos= HA_POS_ERROR;
while (element != &tree->null_element)
{
@@ -490,9 +489,6 @@ ha_rows tree_record_pos(TREE *tree, const void *key,
{
switch (flag) {
case HA_READ_KEY_EXACT:
- last_equal_pos= (ha_rows) ((left + right) / 2);
- cmp= 1;
- break;
case HA_READ_BEFORE_KEY:
cmp= 1;
break;
@@ -516,7 +512,6 @@ ha_rows tree_record_pos(TREE *tree, const void *key,
}
switch (flag) {
case HA_READ_KEY_EXACT:
- return last_equal_pos;
case HA_READ_BEFORE_KEY:
return (ha_rows) right;
case HA_READ_AFTER_KEY:
diff --git a/sql/field.cc b/sql/field.cc
index 00b7b9ebdb9..574800b6180 100644
--- a/sql/field.cc
+++ b/sql/field.cc
@@ -4209,10 +4209,10 @@ int Field_string::cmp(const char *a_ptr, const char *b_ptr)
void Field_string::sort_string(char *to,uint length)
{
uint tmp=my_strnxfrm(field_charset,
- (unsigned char *)to, length,
- (unsigned char *) ptr, field_length);
+ (unsigned char *) to, length,
+ (unsigned char *) ptr, field_length);
if (tmp < length)
- bzero(to + tmp, length - tmp);
+ field_charset->cset->fill(field_charset, to + tmp, length - tmp, ' ');
}
@@ -4384,7 +4384,8 @@ void Field_varstring::sort_string(char *to,uint length)
(unsigned char *) to, length,
(unsigned char *)ptr+2, tot_length);
if (tot_length < length)
- bzero(to+tot_length,length-tot_length);
+ field_charset->cset->fill(field_charset, to+tot_length,length-tot_length,
+ binary() ? (char) 0 : ' ');
}
@@ -4838,7 +4839,9 @@ void Field_blob::sort_string(char *to,uint length)
(unsigned char *)to, length,
(unsigned char *)blob, blob_length);
if (blob_length < length)
- bzero(to+blob_length, length-blob_length);
+ field_charset->cset->fill(field_charset, to+blob_length,
+ length-blob_length,
+ binary() ? (char) 0 : ' ');
}
}
diff --git a/sql/filesort.cc b/sql/filesort.cc
index 064e92b7888..a15ffb43f6f 100644
--- a/sql/filesort.cc
+++ b/sql/filesort.cc
@@ -528,6 +528,8 @@ static void make_sortkey(register SORTPARAM *param,
case STRING_RESULT:
{
CHARSET_INFO *cs=item->collation.collation;
+ char fill_char= ((cs->state & MY_CS_BINSORT) ? (char) 0 : ' ');
+
if ((maybe_null=item->maybe_null))
*to++=1;
/* All item->str() to use some extra byte for end null.. */
@@ -564,14 +566,16 @@ static void make_sortkey(register SORTPARAM *param,
uint tmp_length=my_strnxfrm(cs,to,sort_field->length,
(unsigned char *) from, length);
if (tmp_length < sort_field->length)
- bzero((char*) to+tmp_length,sort_field->length-tmp_length);
+ cs->cset->fill(cs, (char*) to+tmp_length,
+ sort_field->length-tmp_length,
+ fill_char);
}
else
{
my_strnxfrm(cs,(uchar*)to,length,(const uchar*)res->ptr(),length);
- bzero((char *)to+length,diff);
+ cs->cset->fill(cs, (char *)to+length,diff,fill_char);
}
- break;
+ break;
}
case INT_RESULT:
{
diff --git a/sql/ha_heap.cc b/sql/ha_heap.cc
index 807b6e35a29..c84f0da0d25 100644
--- a/sql/ha_heap.cc
+++ b/sql/ha_heap.cc
@@ -44,6 +44,16 @@ int ha_heap::open(const char *name, int mode, uint test_if_locked)
}
}
ref_length= sizeof(HEAP_PTR);
+ if (file)
+ {
+ /* Initialize variables for the opened table */
+ btree_keys.clear_all();
+ for (uint i= 0 ; i < table->keys ; i++)
+ {
+ if (table->key_info[i].algorithm == HA_KEY_ALG_BTREE)
+ btree_keys.set_bit(i);
+ }
+ }
return (file ? 0 : 1);
}
diff --git a/sql/ha_heap.h b/sql/ha_heap.h
index c369c7029b4..feadc0c3c0f 100644
--- a/sql/ha_heap.h
+++ b/sql/ha_heap.h
@@ -26,6 +26,7 @@
class ha_heap: public handler
{
HP_INFO *file;
+ key_map btree_keys;
public:
ha_heap(TABLE *table): handler(table), file(0) {}
@@ -49,6 +50,7 @@ class ha_heap: public handler
(HA_ONLY_WHOLE_INDEX | HA_WRONG_ASCII_ORDER |
HA_NOT_READ_PREFIX_LAST));
}
+ const key_map *keys_to_use_for_scanning() { return &btree_keys; }
uint max_record_length() const { return HA_MAX_REC_LENGTH; }
uint max_keys() const { return MAX_KEY; }
uint max_key_parts() const { return MAX_REF_PARTS; }
diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c
index ee55cfda6c1..2bde29ecc47 100644
--- a/strings/ctype-big5.c
+++ b/strings/ctype-big5.c
@@ -218,40 +218,80 @@ static uint16 big5strokexfrm(uint16 i)
return 0xA140;
}
-static int my_strnncoll_big5(CHARSET_INFO *cs __attribute__((unused)),
- const uchar * s1, uint len1,
- const uchar * s2, uint len2)
+
+
+static int my_strnncoll_big5_internal(const uchar **a_res,
+ const uchar **b_res, uint length)
{
- uint len;
+ const char *a= *a_res, *b= *b_res;
- len = min(len1,len2);
- while (len--)
+ while (length--)
{
- if ((len > 0) && isbig5code(*s1,*(s1+1)) && isbig5code(*s2, *(s2+1)))
+ if ((length > 0) && isbig5code(*a,*(a+1)) && isbig5code(*b, *(b+1)))
{
- if (*s1 != *s2 || *(s1+1) != *(s2+1))
- return ((int) big5code(*s1,*(s1+1)) -
- (int) big5code(*s2,*(s2+1)));
- s1 +=2;
- s2 +=2;
- len--;
- } else if (sort_order_big5[(uchar) *s1++] != sort_order_big5[(uchar) *s2++])
- return ((int) sort_order_big5[(uchar) s1[-1]] -
- (int) sort_order_big5[(uchar) s2[-1]]);
+ if (*a != *b || *(a+1) != *(b+1))
+ return ((int) big5code(*a,*(a+1)) -
+ (int) big5code(*b,*(b+1)));
+ a+= 2;
+ b+= 2;
+ length--;
+ }
+ else if (sort_order_big5[(uchar) *a++] !=
+ sort_order_big5[(uchar) *b++])
+ return ((int) sort_order_big5[(uchar) a[-1]] -
+ (int) sort_order_big5[(uchar) b[-1]]);
}
- return (int) (len1-len2);
+ *a_res= a;
+ *b_res= b;
+ return 0;
}
-static
-int my_strnncollsp_big5(CHARSET_INFO * cs,
- const uchar *s, uint slen,
- const uchar *t, uint tlen)
+
+/* Compare strings */
+
+static int my_strnncoll_big5(CHARSET_INFO *cs __attribute__((unused)),
+ const uchar *a, uint a_length,
+ const uchar *b, uint b_length)
{
- for ( ; slen && s[slen-1] == ' ' ; slen--);
- for ( ; tlen && t[tlen-1] == ' ' ; tlen--);
- return my_strnncoll_big5(cs,s,slen,t,tlen);
+ uint length= min(a_length, b_length);
+ int res= my_strnncoll_big5_internal(&a, &b, length);
+ return res ? res : (int) (a_length - b_length);
}
+
+/* compare strings, ignore end space */
+
+static int my_strnncollsp_big5(CHARSET_INFO * cs __attribute__((unused)),
+ const uchar *a, uint a_length,
+ const uchar *b, uint b_length)
+{
+ uint length= min(a_length, b_length);
+ int res= my_strnncoll_big5_internal(&a, &b, length);
+ if (!res && a_length != b_length)
+ {
+ const uchar *end;
+ int swap= 0;
+ /*
+ Check the next not space character of the longer key. If it's < ' ',
+ then it's smaller than the other key.
+ */
+ if (a_length < b_length)
+ {
+ /* put shorter key in a */
+ a_length= b_length;
+ a= b;
+ swap= -1; /* swap sign of result */
+ }
+ for (end= a + a_length-length; a < end ; a++)
+ {
+ if (*a != ' ')
+ return ((int) *a - (int) ' ') ^ swap;
+ }
+ }
+ return res;
+}
+
+
static int my_strnxfrm_big5(CHARSET_INFO *cs __attribute__((unused)),
uchar * dest, uint len,
const uchar * src, uint srclen)
@@ -377,7 +417,7 @@ static my_bool my_like_range_big5(CHARSET_INFO *cs __attribute__((unused)),
*min_length= (uint) (min_str-min_org);
*max_length= res_length;
do {
- *min_str++ = ' '; /* Because if key compression */
+ *min_str++ = 0;
*max_str++ = max_sort_char;
} while (min_str != min_end);
return 0;
diff --git a/strings/ctype-czech.c b/strings/ctype-czech.c
index ed8c0b5b415..5094a7c45da 100644
--- a/strings/ctype-czech.c
+++ b/strings/ctype-czech.c
@@ -165,169 +165,144 @@ static struct wordvalue doubles[] = {
Na konci připojíme znak 0
*/
-#define ADD_TO_RESULT(dest, len, totlen, value) \
- if ((totlen) < (len)) { dest[totlen] = value; } (totlen++);
-
-#define NEXT_CMP_VALUE(src, p, store, pass, value, len) \
- while (1) /* we will make a loop */ \
- { \
- if (IS_END(p, src, len)) \
- /* when we are at the end of string */ \
- { /* return either 0 for end of string */ \
- /* or 1 for end of pass */ \
- if (pass == 3) { value = 0; break; } \
- if (pass == 0) p = store; \
- else p = src; \
- value = 1; pass++; break; \
- } \
- /* not at end of string */ \
- value = CZ_SORT_TABLE[pass][*p]; \
- \
- if (value == 0) { p++; continue; } /* ignore value */ \
- if (value == 2) /* space */ \
- { \
- const uchar * tmp; \
- const uchar * runner = ++p; \
- while (!(IS_END(runner, src, len)) && (CZ_SORT_TABLE[pass][*runner] == 2)) \
- runner++; /* skip all spaces */ \
- if (IS_END(runner, src, len) && SKIP_TRAILING_SPACES) \
- p = runner; \
- if ((pass <= 2) && !(IS_END(runner, src, len))) \
- p = runner; \
- if (IS_END(p, src, len)) \
- continue; \
- /* we switch passes */ \
- if (pass > 1) \
- break; \
- tmp = p; \
- if (pass == 0) pass = 1; \
- else pass = 0; \
- p = store; store = tmp; \
- break; \
- } \
- if (value == 255) \
- { \
- int i; \
- for (i = 0; i < (int) sizeof(doubles); i++) \
- { \
- const char * pattern = doubles[i].word; \
- const char * q = (const char *) p; \
- int j = 0; \
- while (pattern[j]) \
- { \
- if (IS_END(q, src, len) || (*q != pattern[j])) \
- { break ; } \
- j++; q++; \
- } \
- if (!(pattern[j])) \
- { \
- value = (int)(doubles[i].outvalue[pass]); \
- p = (const uchar *) q - 1; \
- break; \
- } \
- } \
- } \
- p++; \
- break; \
- }
-
-#define IS_END(p, src, len) (!(*p))
-
-#if 0
-/* Function strcoll, with Czech sorting, for zero terminated strings */
-static int my_strcoll_czech(const uchar * s1, const uchar * s2)
- {
- int v1, v2;
- const uchar * p1, * p2, * store1, * store2;
- int pass1 = 0, pass2 = 0;
- int diff;
-
- p1 = s1; p2 = s2;
- store1 = s1; store2 = s2;
-
- do
- {
- NEXT_CMP_VALUE(s1, p1, store1, pass1, v1, 0);
- NEXT_CMP_VALUE(s2, p2, store2, pass2, v2, 0);
- diff = v1 - v2;
- if (diff != 0) return diff;
- }
- while (v1);
- return 0;
- }
-#endif
+#define ADD_TO_RESULT(dest, len, totlen, value) \
+if ((totlen) < (len)) { dest[totlen] = value; } (totlen++);
+#define IS_END(p, src, len) (((char *)p - (char *)src) >= (len))
-#if 0
-/* Function strxfrm, with Czech sorting, for zero terminated strings */
-static int my_strxfrm_czech(uchar * dest, const uchar * src, int len)
+#define NEXT_CMP_VALUE(src, p, store, pass, value, len) \
+while (1) \
+{ \
+ if (IS_END(p, src, len)) \
+ { \
+ /* when we are at the end of string */ \
+ /* return either 0 for end of string */ \
+ /* or 1 for end of pass */ \
+ value= 0; \
+ if (pass != 3) \
+ { \
+ p= (pass++ == 0) ? store : src; \
+ value = 1; \
+ } \
+ break; \
+ } \
+ /* not at end of string */ \
+ value = CZ_SORT_TABLE[pass][*p]; \
+ if (value == 0) \
+ { p++; continue; } /* ignore value */ \
+ if (value == 2) /* space */ \
+ { \
+ const uchar * tmp; \
+ const uchar * runner = ++p; \
+ while (!(IS_END(runner, src, len)) && (CZ_SORT_TABLE[pass][*runner] == 2)) \
+ runner++; /* skip all spaces */ \
+ if (IS_END(runner, src, len) && SKIP_TRAILING_SPACES) \
+ p = runner; \
+ if ((pass <= 2) && !(IS_END(runner, src, len))) \
+ p = runner; \
+ if (IS_END(p, src, len)) \
+ continue; \
+ /* we switch passes */ \
+ if (pass > 1) \
+ break; \
+ tmp = p; \
+ pass= 1-pass; \
+ p = store; store = tmp; \
+ break; \
+ } \
+ if (value == 255) \
+ { \
+ int i; \
+ for (i = 0; i < (int) sizeof(doubles); i++) \
+ { \
+ const char * pattern = doubles[i].word; \
+ const char * q = (const char *) p; \
+ int j = 0; \
+ while (pattern[j]) \
+ { \
+ if (IS_END(q, src, len) || (*q != pattern[j])) \
+ break; \
+ j++; q++; \
+ } \
+ if (!(pattern[j])) \
+ { \
+ value = (int)(doubles[i].outvalue[pass]); \
+ p= (const uchar *) q - 1; \
+ break; \
+ } \
+ } \
+ } \
+ p++; \
+ break; \
+}
+
+/*
+ Function strnncoll, actually strcoll, with Czech sorting, which expect
+ the length of the strings being specified
+*/
+
+static int my_strnncoll_czech(CHARSET_INFO *cs __attribute__((unused)),
+ const uchar * s1, uint len1,
+ const uchar * s2, uint len2)
{
- int value;
- const uchar * p, * store;
- int pass = 0;
- int totlen = 0;
- p = store = src;
-
- do
- {
- NEXT_CMP_VALUE(src, p, store, pass, value, 0);
- ADD_TO_RESULT(dest, len, totlen, value);
- }
- while (value);
- return totlen;
- }
-#endif
+ int v1, v2;
+ const uchar * p1, * p2, * store1, * store2;
+ int pass1 = 0, pass2 = 0;
+ p1 = s1; p2 = s2;
+ store1 = s1; store2 = s2;
-#undef IS_END
+ do
+ {
+ int diff;
+ NEXT_CMP_VALUE(s1, p1, store1, pass1, v1, (int)len1);
+ NEXT_CMP_VALUE(s2, p2, store2, pass2, v2, (int)len2);
+ if ((diff = v1 - v2))
+ return diff;
+ }
+ while (v1);
+ return 0;
+}
-#define IS_END(p, src, len) (((char *)p - (char *)src) >= (len))
-/* Function strnncoll, actually strcoll, with Czech sorting, which expect
- the length of the strings being specified */
-static int my_strnncoll_czech(CHARSET_INFO *cs __attribute__((unused)),
- const uchar * s1, uint len1,
- const uchar * s2, uint len2)
- {
- int v1, v2;
- const uchar * p1, * p2, * store1, * store2;
- int pass1 = 0, pass2 = 0;
- int diff;
-
- p1 = s1; p2 = s2;
- store1 = s1; store2 = s2;
-
- do
- {
- NEXT_CMP_VALUE(s1, p1, store1, pass1, v1, (int)len1);
- NEXT_CMP_VALUE(s2, p2, store2, pass2, v2, (int)len2);
- diff = v1 - v2;
-
- if (diff != 0) return diff;
- }
- while (v1);
- return 0;
- }
-
-/* Function strnxfrm, actually strxfrm, with Czech sorting, which expect
- the length of the strings being specified */
+
+/*
+ TODO: Fix this one to compare strings as they are done in ctype-simple1
+*/
+
+static
+int my_strnncollsp_czech(CHARSET_INFO * cs,
+ const uchar *s, uint slen,
+ const uchar *t, uint tlen)
+{
+ for ( ; slen && s[slen-1] == ' ' ; slen--);
+ for ( ; tlen && t[tlen-1] == ' ' ; tlen--);
+ return my_strnncoll_czech(cs,s,slen,t,tlen);
+}
+
+
+/*
+ Function strnxfrm, actually strxfrm, with Czech sorting, which expect
+ the length of the strings being specified
+*/
+
static int my_strnxfrm_czech(CHARSET_INFO *cs __attribute__((unused)),
- uchar * dest, uint len,
- const uchar * src, uint srclen)
- {
- int value;
- const uchar * p, * store;
- int pass = 0;
- int totlen = 0;
- p = src; store = src;
-
- do
- {
- NEXT_CMP_VALUE(src, p, store, pass, value, (int)srclen);
- ADD_TO_RESULT(dest, (int)len, totlen, value);
- }
- while (value);
- return totlen;
- }
+ uchar * dest, uint len,
+ const uchar * src, uint srclen)
+{
+ int value;
+ const uchar * p, * store;
+ int pass = 0;
+ int totlen = 0;
+ p = src; store = src;
+
+ do
+ {
+ NEXT_CMP_VALUE(src, p, store, pass, value, (int)srclen);
+ ADD_TO_RESULT(dest, (int)len, totlen, value);
+ }
+ while (value);
+ return totlen;
+}
#undef IS_END
@@ -595,16 +570,6 @@ static MY_UNI_IDX idx_uni_8859_2[]={
};
-static
-int my_strnncollsp_czech(CHARSET_INFO * cs,
- const uchar *s, uint slen,
- const uchar *t, uint tlen)
-{
- for ( ; slen && s[slen-1] == ' ' ; slen--);
- for ( ; tlen && t[tlen-1] == ' ' ; tlen--);
- return my_strnncoll_czech(cs,s,slen,t,tlen);
-}
-
static MY_COLLATION_HANDLER my_collation_latin2_czech_ci_handler =
{
my_strnncoll_czech,
diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c
index 98511406ba9..1990060e67b 100644
--- a/strings/ctype-gbk.c
+++ b/strings/ctype-gbk.c
@@ -2582,40 +2582,74 @@ static uint16 gbksortorder(uint16 i)
}
-int my_strnncoll_gbk(CHARSET_INFO *cs __attribute__((unused)),
- const uchar * s1, uint len1,
- const uchar * s2, uint len2)
+int my_strnncoll_gbk_internal(const uchar **a_res, const uchar **b_res,
+ uint length)
{
- uint len,c1,c2;
+ const char *a= *a_res, *b= *b_res;
+ uint a_char,b_char;
- len = min(len1,len2);
- while (len--)
+ while (length--)
{
- if ((len > 0) && isgbkcode(*s1,*(s1+1)) && isgbkcode(*s2, *(s2+1)))
+ if ((length > 0) && isgbkcode(*a,*(a+1)) && isgbkcode(*b, *(b+1)))
{
- c1=gbkcode(*s1,*(s1+1));
- c2=gbkcode(*s2,*(s2+1));
- if (c1!=c2)
- return ((int) gbksortorder((uint16) c1) -
- (int) gbksortorder((uint16) c2));
- s1+=2;
- s2+=2;
- --len;
- } else if (sort_order_gbk[(uchar) *s1++] != sort_order_gbk[(uchar) *s2++])
- return ((int) sort_order_gbk[(uchar) s1[-1]] -
- (int) sort_order_gbk[(uchar) s2[-1]]);
+ a_char= gbkcode(*a,*(a+1));
+ b_char= gbkcode(*b,*(b+1));
+ if (a_char != b_char)
+ return ((int) gbksortorder((uint16) a_char) -
+ (int) gbksortorder((uint16) b_char));
+ a+= 2;
+ b+= 2;
+ length--;
+ }
+ else if (sort_order_gbk[(uchar) *a++] != sort_order_gbk[(uchar) *b++])
+ return ((int) sort_order_gbk[(uchar) a[-1]] -
+ (int) sort_order_gbk[(uchar) b[-1]]);
}
- return (int) (len1-len2);
+ *a_res= a;
+ *b_res= b;
+ return 0;
}
-static
-int my_strnncollsp_gbk(CHARSET_INFO * cs,
- const uchar *s, uint slen,
- const uchar *t, uint tlen)
+
+
+int my_strnncoll_gbk(CHARSET_INFO *cs __attribute__((unused)),
+ const uchar *a, uint a_length,
+ const uchar *b, uint b_length)
{
- for ( ; slen && s[slen-1] == ' ' ; slen--);
- for ( ; tlen && t[tlen-1] == ' ' ; tlen--);
- return my_strnncoll_gbk(cs,s,slen,t,tlen);
+ uint length= min(a_length, b_length);
+ int res= my_strnncoll_gbk_internal(&a, &b, length);
+ return res ? res : (int) (a_length - b_length);
+}
+
+
+static int my_strnncollsp_gbk(CHARSET_INFO * cs __attribute__((unused)),
+ const uchar *a, uint a_length,
+ const uchar *b, uint b_length)
+{
+ uint length= min(a_length, b_length);
+ int res= my_strnncoll_gbk_internal(&a, &b, length);
+ if (!res && a_length != b_length)
+ {
+ const uchar *end;
+ int swap= 0;
+ /*
+ Check the next not space character of the longer key. If it's < ' ',
+ then it's smaller than the other key.
+ */
+ if (a_length < b_length)
+ {
+ /* put shorter key in a */
+ a_length= b_length;
+ a= b;
+ swap= -1; /* swap sign of result */
+ }
+ for (end= a + a_length-length; a < end ; a++)
+ {
+ if (*a != ' ')
+ return ((int) *a - (int) ' ') ^ swap;
+ }
+ }
+ return res;
}
@@ -2696,7 +2730,7 @@ static my_bool my_like_range_gbk(CHARSET_INFO *cs __attribute__((unused)),
*min_length= (uint) (min_str - min_org);
*max_length= res_length;
do {
- *min_str++ = '\0'; /* Because if key compression */
+ *min_str++= 0;
*max_str++ = max_sort_char;
} while (min_str != min_end);
return 0;
diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c
index 0682b15d135..7a010c3bef8 100644
--- a/strings/ctype-latin1.c
+++ b/strings/ctype-latin1.c
@@ -319,51 +319,105 @@ uchar combo2map[]={
static int my_strnncoll_latin1_de(CHARSET_INFO *cs __attribute__((unused)),
- const uchar *s1, uint len1,
- const uchar *s2, uint len2)
+ const uchar *a, uint a_length,
+ const uchar *b, uint b_length)
{
- const uchar *e1 = s1 + len1;
- const uchar *e2 = s2 + len2;
- uchar c1, c12=0, c2, c22=0;
+ const uchar *a_end= a + a_length;
+ const uchar *b_end= b + b_length;
+ uchar a_char, a_extend= 0, b_char, b_extend= 0;
- while ((s1 < e1 || c12) && (s2 < e2 || c22))
+ while ((a < a_end || a_extend) && (b < b_end || b_extend))
{
- if (c12)
+ if (a_extend)
{
- c1=c12; c12=0;
+ a_char=a_extend; a_extend=0;
}
else
{
- c12=combo2map[*s1];
- c1=combo1map[*s1++];
+ a_extend=combo2map[*a];
+ a_char=combo1map[*a++];
}
- if (c22)
+ if (b_extend)
{
- c2=c22; c22=0;
+ b_char=b_extend; b_extend=0;
}
else
{
- c22=combo2map[*s2];
- c2=combo1map[*s2++];
+ b_extend=combo2map[*b];
+ b_char=combo1map[*b++];
}
- if (c1 != c2) return (int)c1 - (int)c2;
+ if (a_char != b_char)
+ return (int) a_char - (int) b_char;
}
-
/*
A simple test of string lengths won't work -- we test to see
which string ran out first
*/
- return (s1 < e1 || c12) ? 1 : (s2 < e2 || c22) ? -1 : 0;
+ return ((a < a_end || a_extend) ? 1 :
+ (b < b_end || b_extend) ? -1 : 0);
}
-static int my_strnncollsp_latin1_de(CHARSET_INFO *cs,
- const uchar *s, uint slen,
- const uchar *t, uint tlen)
+static int my_strnncollsp_latin1_de(CHARSET_INFO *cs __attribute__((unused)),
+ const uchar *a, uint a_length,
+ const uchar *b, uint b_length)
{
- for ( ; slen && s[slen-1] == ' ' ; slen--);
- for ( ; tlen && t[tlen-1] == ' ' ; tlen--);
- return my_strnncoll_latin1_de(cs,s,slen,t,tlen);
+ const uchar *a_end= a + a_length;
+ const uchar *b_end= b + b_length;
+ uchar a_char, a_extend= 0, b_char, b_extend= 0;
+
+ while ((a < a_end || a_extend) && (b < b_end || b_extend))
+ {
+ if (a_extend)
+ {
+ a_char=a_extend;
+ a_extend= 0;
+ }
+ else
+ {
+ a_extend= combo2map[*a];
+ a_char= combo1map[*a++];
+ }
+ if (b_extend)
+ {
+ b_char= b_extend;
+ b_extend= 0;
+ }
+ else
+ {
+ b_extend= combo2map[*b];
+ b_char= combo1map[*b++];
+ }
+ if (a_char != b_char)
+ return (int) a_char - (int) b_char;
+ }
+ /* Check if double character last */
+ if (a_extend)
+ return 1;
+ if (b_extend)
+ return -1;
+
+ if (a != a_end || b != b_end)
+ {
+ int swap= 0;
+ /*
+ Check the next not space character of the longer key. If it's < ' ',
+ then it's smaller than the other key.
+ */
+ if (a == a_end)
+ {
+ /* put shorter key in a */
+ a_end= b_end;
+ a= b;
+ swap= -1; /* swap sign of result */
+ }
+ for ( ; a < a_end ; a++)
+ {
+ if (*a != ' ')
+ return ((int) *a - (int) ' ') ^ swap;
+ }
+ }
+ return 0;
}
@@ -385,6 +439,32 @@ static int my_strnxfrm_latin1_de(CHARSET_INFO *cs __attribute__((unused)),
}
+void my_hash_sort_latin1_de(CHARSET_INFO *cs __attribute__((unused)),
+ const uchar *key, uint len,
+ ulong *nr1, ulong *nr2)
+{
+ const uchar *end= key+len;
+ /*
+ Remove end space. We have to do this to be able to compare
+ 'AE' and 'Ä' as identical
+ */
+ while (end > key && end[-1] == ' ')
+ end--;
+
+ for (; key < end ; key++)
+ {
+ uint X= (uint) combo1map[(uint) *key];
+ nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * X) + (nr1[0] << 8);
+ nr2[0]+=3;
+ if ((X= combo2map[*key]))
+ {
+ nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * X) + (nr1[0] << 8);
+ nr2[0]+=3;
+ }
+ }
+}
+
+
static MY_COLLATION_HANDLER my_collation_german2_ci_handler=
{
my_strnncoll_latin1_de,
@@ -394,7 +474,7 @@ static MY_COLLATION_HANDLER my_collation_german2_ci_handler=
my_wildcmp_8bit,
my_strcasecmp_8bit,
my_instr_simple,
- my_hash_sort_simple
+ my_hash_sort_latin1_de
};
diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c
index 2f7cf698664..ed772a68845 100644
--- a/strings/ctype-mb.c
+++ b/strings/ctype-mb.c
@@ -347,6 +347,7 @@ uint my_instr_mb(CHARSET_INFO *cs,
return 0;
}
+
/* BINARY collations handlers for MB charsets */
static int my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
@@ -357,20 +358,6 @@ static int my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
return cmp ? cmp : (int) (slen - tlen);
}
-static int my_strnncollsp_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
- const uchar *s, uint slen,
- const uchar *t, uint tlen)
-{
- int len, cmp;
-
- for ( ; slen && s[slen-1] == ' ' ; slen--);
- for ( ; tlen && t[tlen-1] == ' ' ; tlen--);
-
- len = ( slen > tlen ) ? tlen : slen;
-
- cmp= memcmp(s,t,len);
- return cmp ? cmp : (int) (slen - tlen);
-}
static int my_strnxfrm_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
uchar * dest, uint len,
@@ -526,7 +513,7 @@ static int my_wildcmp_mb_bin(CHARSET_INFO *cs,
MY_COLLATION_HANDLER my_collation_mb_bin_handler =
{
my_strnncoll_mb_bin,
- my_strnncollsp_mb_bin,
+ my_strnncoll_mb_bin,
my_strnxfrm_mb_bin,
my_like_range_simple,
my_wildcmp_mb_bin,
diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c
index 0aae60a0b56..c8eb3c07a3f 100644
--- a/strings/ctype-simple.c
+++ b/strings/ctype-simple.c
@@ -60,25 +60,69 @@ int my_strnncoll_simple(CHARSET_INFO * cs, const uchar *s, uint slen,
}
-int my_strnncollsp_simple(CHARSET_INFO * cs, const uchar *s, uint slen,
- const uchar *t, uint tlen)
+/*
+ Compare strings, discarding end space
+
+ SYNOPSIS
+ my_strnncollsp_simple()
+ cs character set handler
+ a First string to compare
+ a_length Length of 'a'
+ b Second string to compare
+ b_length Length of 'b'
+
+ IMPLEMENTATION
+ If one string is shorter as the other, then we space extend the other
+ so that the strings have equal length.
+
+ This will ensure that the following things hold:
+
+ "a" == "a "
+ "a\0" < "a"
+ "a\0" < "a "
+
+ RETURN
+ < 0 a < b
+ = 0 a == b
+ > 0 a > b
+*/
+
+int my_strnncollsp_simple(CHARSET_INFO * cs, const uchar *a, uint a_length,
+ const uchar *b, uint b_length)
{
- uchar *map= cs->sort_order;
- int len;
-
- for ( ; slen && s[slen-1] == ' ' ; slen--);
- for ( ; tlen && t[tlen-1] == ' ' ; tlen--);
-
- len = ( slen > tlen ) ? tlen : slen;
-
- while (len--)
+ const uchar *map= cs->sort_order, *end;
+ uint length;
+
+ end= a + (length= min(a_length, b_length));
+ while (a < end)
{
- if (map[*s++] != map[*t++])
- return ((int) map[s[-1]] - (int) map[t[-1]]);
+ if (map[*a++] != map[*b++])
+ return ((int) map[a[-1]] - (int) map[b[-1]]);
}
- return (int) (slen-tlen);
+ if (a_length != b_length)
+ {
+ int swap= 0;
+ /*
+ Check the next not space character of the longer key. If it's < ' ',
+ then it's smaller than the other key.
+ */
+ if (a_length < b_length)
+ {
+ /* put shorter key in s */
+ a_length= b_length;
+ a= b;
+ swap= -1; /* swap sign of result */
+ }
+ for (end= a + a_length-length; a < end ; a++)
+ {
+ if (*a != ' ')
+ return ((int) *a - (int) ' ') ^ swap;
+ }
+ }
+ return 0;
}
+
void my_caseup_str_8bit(CHARSET_INFO * cs,char *str)
{
register uchar *map=cs->to_upper;
@@ -169,8 +213,8 @@ int my_snprintf_8bit(CHARSET_INFO *cs __attribute__((unused)),
void my_hash_sort_simple(CHARSET_INFO *cs,
- const uchar *key, uint len,
- ulong *nr1, ulong *nr2)
+ const uchar *key, uint len,
+ ulong *nr1, ulong *nr2)
{
register uchar *sort_order=cs->sort_order;
const uchar *pos = key;
@@ -953,9 +997,10 @@ my_bool my_like_range_simple(CHARSET_INFO *cs,
{
*min_length= (uint) (min_str - min_org);
*max_length=res_length;
- do {
- *min_str++ = ' '; /* Because if key compression */
- *max_str++ = (char) cs->max_sort_char;
+ do
+ {
+ *min_str++= 0;
+ *max_str++= (char) cs->max_sort_char;
} while (min_str != min_end);
return 0;
}
@@ -963,13 +1008,6 @@ my_bool my_like_range_simple(CHARSET_INFO *cs,
}
*min_length= *max_length = (uint) (min_str - min_org);
- /* Temporary fix for handling w_one at end of string (key compression) */
- {
- char *tmp;
- for (tmp= min_str ; tmp > min_org && tmp[-1] == '\0';)
- *--tmp=' ';
- }
-
while (min_str != min_end)
*min_str++ = *max_str++ = ' '; /* Because if key compression */
return 0;
diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c
index c337b8122fb..91a24fa8bee 100644
--- a/strings/ctype-sjis.c
+++ b/strings/ctype-sjis.c
@@ -184,7 +184,7 @@ static uchar NEAR sort_order_sjis[]=
static int ismbchar_sjis(CHARSET_INFO *cs __attribute__((unused)),
- const char* p, const char *e)
+ const char* p, const char *e)
{
return (issjishead((uchar) *p) && (e-p)>1 && issjistail((uchar)p[1]) ? 2: 0);
}
@@ -197,59 +197,101 @@ static int mbcharlen_sjis(CHARSET_INFO *cs __attribute__((unused)),uint c)
#define sjiscode(c,d) ((((uint) (uchar)(c)) << 8) | (uint) (uchar) (d))
-static int my_strnncoll_sjis(CHARSET_INFO *cs __attribute__((unused)),
- const uchar *s1, uint len1,
- const uchar *s2, uint len2)
+
+static int my_strnncoll_sjis_internal(CHARSET_INFO *cs,
+ const uchar **a_res, uint a_length,
+ const uchar **b_res, uint b_length)
{
- const uchar *e1 = s1 + len1;
- const uchar *e2 = s2 + len2;
- while (s1 < e1 && s2 < e2) {
- if (ismbchar_sjis(cs,(char*) s1, (char*) e1) &&
- ismbchar_sjis(cs,(char*) s2, (char*) e2)) {
- uint c1 = sjiscode(*s1, *(s1+1));
- uint c2 = sjiscode(*s2, *(s2+1));
- if (c1 != c2)
- return c1 - c2;
- s1 += 2;
- s2 += 2;
- } else {
- if (sort_order_sjis[(uchar)*s1] != sort_order_sjis[(uchar)*s2])
- return sort_order_sjis[(uchar)*s1] - sort_order_sjis[(uchar)*s2];
- s1++;
- s2++;
+ const uchar *a= *a_res, *b= *b_res;
+ const uchar *a_end= a + a_length;
+ const uchar *b_end= b + b_length;
+ while (a < a_end && b < b_end)
+ {
+ if (ismbchar_sjis(cs,(char*) a, (char*) a_end) &&
+ ismbchar_sjis(cs,(char*) b, (char*) b_end))
+ {
+ uint a_char= sjiscode(*a, *(a+1));
+ uint b_char= sjiscode(*b, *(b+1));
+ if (a_char != b_char)
+ return a_char - b_char;
+ a += 2;
+ b += 2;
+ } else
+ {
+ if (sort_order_sjis[(uchar)*a] != sort_order_sjis[(uchar)*b])
+ return sort_order_sjis[(uchar)*a] - sort_order_sjis[(uchar)*b];
+ a++;
+ b++;
}
}
- return len1 - len2;
+ *a_res= a;
+ *b_res= b;
+ return 0;
+}
+
+
+static int my_strnncoll_sjis(CHARSET_INFO *cs __attribute__((unused)),
+ const uchar *a, uint a_length,
+ const uchar *b, uint b_length)
+{
+ int res= my_strnncoll_sjis_internal(cs, &a, a_length, &b, b_length);
+ return res ? res : (int) (a_length - b_length);
}
-static
-int my_strnncollsp_sjis(CHARSET_INFO * cs,
- const uchar *s, uint slen,
- const uchar *t, uint tlen)
+
+static int my_strnncollsp_sjis(CHARSET_INFO *cs __attribute__((unused)),
+ const uchar *a, uint a_length,
+ const uchar *b, uint b_length)
{
- for ( ; slen && s[slen-1] == ' ' ; slen--);
- for ( ; tlen && t[tlen-1] == ' ' ; tlen--);
- return my_strnncoll_sjis(cs,s,slen,t,tlen);
+ const uchar *a_end= a + a_length;
+ const uchar *b_end= b + b_length;
+ int res= my_strnncoll_sjis_internal(cs, &a, a_length, &b, b_length);
+ if (!res && (a != a_end || b != b_end))
+ {
+ int swap= 0;
+ /*
+ Check the next not space character of the longer key. If it's < ' ',
+ then it's smaller than the other key.
+ */
+ if (a == a_end)
+ {
+ /* put shorter key in a */
+ a_end= b_end;
+ a= b;
+ swap= -1; /* swap sign of result */
+ }
+ for (; a < a_end ; a++)
+ {
+ if (*a != ' ')
+ return ((int) *a - (int) ' ') ^ swap;
+ }
+ }
+ return res;
}
+
+
static int my_strnxfrm_sjis(CHARSET_INFO *cs __attribute__((unused)),
uchar *dest, uint len,
const uchar *src, uint srclen)
{
uchar *d_end = dest + len;
uchar *s_end = (uchar*) src + srclen;
- while (dest < d_end && src < s_end) {
- if (ismbchar_sjis(cs,(char*) src, (char*) s_end)) {
+ while (dest < d_end && src < s_end)
+ {
+ if (ismbchar_sjis(cs,(char*) src, (char*) s_end))
+ {
*dest++ = *src++;
if (dest < d_end && src < s_end)
*dest++ = *src++;
- } else {
- *dest++ = sort_order_sjis[(uchar)*src++];
}
+ else
+ *dest++ = sort_order_sjis[(uchar)*src++];
}
return srclen;
}
+
/*
** Calculate min_str and max_str that ranges a LIKE string.
** Arguments:
@@ -300,12 +342,14 @@ static my_bool my_like_range_sjis(CHARSET_INFO *cs __attribute__((unused)),
ptr++;
continue;
}
- if (*ptr == w_many) { /* '%' in SQL */
+ if (*ptr == w_many)
+ { /* '%' in SQL */
*min_length = (uint)(min_str - min_org);
*max_length = res_length;
- do {
- *min_str++ = ' '; /* Because if key compression */
- *max_str++ = max_sort_char;
+ do
+ {
+ *min_str++= 0;
+ *max_str++= max_sort_char;
} while (min_str < min_end);
return 0;
}
diff --git a/strings/ctype-tis620.c b/strings/ctype-tis620.c
index 59be820863a..954a3768536 100644
--- a/strings/ctype-tis620.c
+++ b/strings/ctype-tis620.c
@@ -518,6 +518,10 @@ static uint thai2sortable(uchar *tstr, uint len)
strncoll() replacement, compare 2 string, both are converted to sortable
string
+ NOTE:
+ We can't cut strings at end \0 as this would break comparision with
+ LIKE characters, where the min range is stored as end \0
+
Arg: 2 Strings and it compare length
Ret: strcmp result
*/
@@ -530,9 +534,6 @@ int my_strnncoll_tis620(CHARSET_INFO *cs __attribute__((unused)),
uchar *tc1, *tc2;
int i;
- /* Cut strings at end \0 */
- len1= (int) strnlen((char*) s1,len1);
- len2= (int) strnlen((char*) s2,len2);
tc1= buf;
if ((len1 + len2 +2) > (int) sizeof(buf))
tc1= (uchar*) malloc(len1+len2);
@@ -550,6 +551,10 @@ int my_strnncoll_tis620(CHARSET_INFO *cs __attribute__((unused)),
}
+/*
+ TODO: Has to be fixed like strnncollsp in ctype-simple.c
+*/
+
static
int my_strnncollsp_tis620(CHARSET_INFO * cs,
const uchar *s, uint slen,
@@ -637,8 +642,9 @@ my_bool my_like_range_tis620(CHARSET_INFO *cs __attribute__((unused)),
{
*min_length= (uint) (min_str - min_org);
*max_length=res_length;
- do {
- *min_str++ = ' '; /* Because of key compression */
+ do
+ {
+ *min_str++ = 0;
*max_str++ = max_sort_chr;
} while (min_str != min_end);
return 0;
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index f4c1a22939a..7d32dcb1b61 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -90,8 +90,8 @@ static uchar to_upper_ucs2[] = {
};
-static int my_ucs2_uni (CHARSET_INFO *cs __attribute__((unused)),
- my_wc_t * pwc, const uchar *s, const uchar *e)
+static int my_ucs2_uni(CHARSET_INFO *cs __attribute__((unused)),
+ my_wc_t * pwc, const uchar *s, const uchar *e)
{
if (s+2 > e) /* Need 2 characters */
return MY_CS_TOOFEW(0);
@@ -100,8 +100,8 @@ static int my_ucs2_uni (CHARSET_INFO *cs __attribute__((unused)),
return 2;
}
-static int my_uni_ucs2 (CHARSET_INFO *cs __attribute__((unused)) ,
- my_wc_t wc, uchar *r, uchar *e)
+static int my_uni_ucs2(CHARSET_INFO *cs __attribute__((unused)) ,
+ my_wc_t wc, uchar *r, uchar *e)
{
if ( r+2 > e )
return MY_CS_TOOSMALL;
@@ -128,13 +128,15 @@ static void my_caseup_ucs2(CHARSET_INFO *cs, char *s, uint slen)
}
}
-static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, uint slen, ulong *n1, ulong *n2)
+
+static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, uint slen,
+ ulong *n1, ulong *n2)
{
my_wc_t wc;
int res;
const uchar *e=s+slen;
- while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e))>0 )
+ while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e)) >0)
{
int plane = (wc>>8) & 0xFF;
wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].sort : wc;
@@ -148,7 +150,7 @@ static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, uint slen, ulong
static void my_caseup_str_ucs2(CHARSET_INFO * cs __attribute__((unused)),
- char * s __attribute__((unused)))
+ char * s __attribute__((unused)))
{
}
@@ -173,13 +175,14 @@ static void my_casedn_ucs2(CHARSET_INFO *cs, char *s, uint slen)
}
static void my_casedn_str_ucs2(CHARSET_INFO *cs __attribute__((unused)),
- char * s __attribute__((unused)))
+ char * s __attribute__((unused)))
{
}
static int my_strnncoll_ucs2(CHARSET_INFO *cs,
- const uchar *s, uint slen, const uchar *t, uint tlen)
+ const uchar *s, uint slen,
+ const uchar *t, uint tlen)
{
int s_res,t_res;
my_wc_t s_wc,t_wc;
@@ -213,8 +216,9 @@ static int my_strnncoll_ucs2(CHARSET_INFO *cs,
return ( (se-s) - (te-t) );
}
+
static int my_strncasecmp_ucs2(CHARSET_INFO *cs,
- const char *s, const char *t, uint len)
+ const char *s, const char *t, uint len)
{
int s_res,t_res;
my_wc_t s_wc,t_wc;
@@ -249,6 +253,7 @@ static int my_strncasecmp_ucs2(CHARSET_INFO *cs,
return ( (se-s) - (te-t) );
}
+
static int my_strcasecmp_ucs2(CHARSET_INFO *cs, const char *s, const char *t)
{
uint s_len=strlen(s);
@@ -257,6 +262,7 @@ static int my_strcasecmp_ucs2(CHARSET_INFO *cs, const char *s, const char *t)
return my_strncasecmp_ucs2(cs, s, t, len);
}
+
static int my_strnxfrm_ucs2(CHARSET_INFO *cs,
uchar *dst, uint dstlen, const uchar *src, uint srclen)
{
@@ -288,6 +294,7 @@ static int my_strnxfrm_ucs2(CHARSET_INFO *cs,
return dst - dst_orig;
}
+
static int my_ismbchar_ucs2(CHARSET_INFO *cs __attribute__((unused)),
const char *b __attribute__((unused)),
const char *e __attribute__((unused)))
@@ -295,6 +302,7 @@ static int my_ismbchar_ucs2(CHARSET_INFO *cs __attribute__((unused)),
return 2;
}
+
static int my_mbcharlen_ucs2(CHARSET_INFO *cs __attribute__((unused)) ,
uint c __attribute__((unused)))
{
@@ -380,8 +388,8 @@ static int my_vsnprintf_ucs2(char *dst, uint n, const char* fmt, va_list ap)
return (uint) (dst - start);
}
-static int my_snprintf_ucs2(CHARSET_INFO *cs __attribute__((unused))
- ,char* to, uint n, const char* fmt, ...)
+static int my_snprintf_ucs2(CHARSET_INFO *cs __attribute__((unused)),
+ char* to, uint n, const char* fmt, ...)
{
va_list args;
va_start(args,fmt);
@@ -389,9 +397,9 @@ static int my_snprintf_ucs2(CHARSET_INFO *cs __attribute__((unused))
}
-long my_strntol_ucs2(CHARSET_INFO *cs,
- const char *nptr, uint l, int base,
- char **endptr, int *err)
+long my_strntol_ucs2(CHARSET_INFO *cs,
+ const char *nptr, uint l, int base,
+ char **endptr, int *err)
{
int negative=0;
int overflow;
@@ -504,9 +512,9 @@ bs:
}
-ulong my_strntoul_ucs2(CHARSET_INFO *cs,
- const char *nptr, uint l, int base,
- char **endptr, int *err)
+ulong my_strntoul_ucs2(CHARSET_INFO *cs,
+ const char *nptr, uint l, int base,
+ char **endptr, int *err)
{
int negative=0;
int overflow;
@@ -1334,8 +1342,8 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs,
*min_length= (uint) (min_str - min_org);
*max_length=res_length;
do {
- *min_str++ = '\0';
- *min_str++ = ' '; /* Because if key compression */
+ *min_str++ = 0;
+ *min_str++ = 0;
*max_str++ = (char) cs->max_sort_char >>8;
*max_str++ = (char) cs->max_sort_char & 255;
} while (min_str + 1 < min_end);
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index dca73e5a79f..886ecfbd0c9 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -1801,7 +1801,8 @@ static void my_casedn_str_utf8(CHARSET_INFO *cs, char * s)
static int my_strnncoll_utf8(CHARSET_INFO *cs,
- const uchar *s, uint slen, const uchar *t, uint tlen)
+ const uchar *s, uint slen,
+ const uchar *t, uint tlen)
{
int s_res,t_res;
my_wc_t s_wc,t_wc;
@@ -1835,6 +1836,11 @@ static int my_strnncoll_utf8(CHARSET_INFO *cs,
return ( (se-s) - (te-t) );
}
+
+/*
+ TODO: Has to be fixed as strnncollsp in ctype-simple
+*/
+
static
int my_strnncollsp_utf8(CHARSET_INFO * cs,
const uchar *s, uint slen,
diff --git a/strings/ctype-win1250ch.c b/strings/ctype-win1250ch.c
index bda349f1988..8fd4e612713 100644
--- a/strings/ctype-win1250ch.c
+++ b/strings/ctype-win1250ch.c
@@ -467,6 +467,10 @@ static int my_strnncoll_win1250ch(CHARSET_INFO *cs __attribute__((unused)),
}
+/*
+ TODO: Has to be fixed as strnncollsp in ctype-simple
+*/
+
static
int my_strnncollsp_win1250ch(CHARSET_INFO * cs,
const uchar *s, uint slen,