diff options
author | unknown <monty@mysql.com> | 2004-03-25 15:05:01 +0200 |
---|---|---|
committer | unknown <monty@mysql.com> | 2004-03-25 15:05:01 +0200 |
commit | 3c46af6cf4e935683e5288c55f8b4ff4badaa553 (patch) | |
tree | 27e5290a14e7d3e6c7aaced5b2ce86546de4721e /myisam | |
parent | 2d20eddcbbfdac3403313f860464937d4250eb8e (diff) | |
download | mariadb-git-3c46af6cf4e935683e5288c55f8b4ff4badaa553.tar.gz |
BTREE-indexes in HEAP tables can now be used to optimize ORDER BY
Don't read character set files if we are using only the default charset. In most cases the user will not anymore get a warning about missing character set files
Compare strings with space extend instead of space strip. Now the following comparisons holds: "a" == "a " and "a\t" < "a". (Bug #3152).
Note: Because of the above fix, one has to do a REPAIR on any table that has an ascii character < 32 last in a CHAR/VARCHAR/TEXT columns.
heap/hp_hash.c:
Comments and DBUG information
include/my_handler.h:
Updated prototype for mi_compare_text
myisam/ft_boolean_search.c:
Updated calls to mi_compare_text
myisam/ft_nlq_search.c:
Updated calls to mi_compare_text
myisam/ft_parser.c:
Updated calls to mi_compare_text
myisam/ft_stopwords.c:
Updated calls to mi_compare_text
myisam/ft_update.c:
Updated calls to mi_compare_text
myisam/mi_check.c:
Updated calls to mi_compare_text
myisam/mi_search.c:
Changed all string comparisons that removed end space to instead extend the shorter string with space
myisam/mi_unique.c:
Updated calls to mi_compare_text
myisam/mi_write.c:
Updated calls to mi_compare_text
myisam/myisam_ftdump.c:
Removed compiler warning
mysql-test/r/ctype_collate.result:
Fixed wrong result
mysql-test/r/heap_btree.result:
More tests
mysql-test/t/heap_btree.test:
more tests
mysys/charset.c:
Don't read charsets if we are only using default charset
Don't require 'init_available_charsets' to succeed.
mysys/my_handler.c:
Compare strings with space extend instead of space strip
mysys/tree.c:
Fixed code to get better results for range optimzier
sql/field.cc:
Compare strings with space extend instead of space strip
sql/filesort.cc:
Compare strings with space extend instead of space strip
sql/ha_heap.cc:
Created bit map for keys that are using BTREE. This allows the optimzer to use BTREE's for sorting
sql/ha_heap.h:
Created bit map for keys that are using BTREE. This allows the optimzer to use BTREE's for sorting
strings/ctype-big5.c:
Compare strings with space extend instead of space strip
strings/ctype-czech.c:
Indentation cleanup. Should be fixed to use space extend
strings/ctype-gbk.c:
Compare strings with space extend instead of space strip
strings/ctype-latin1.c:
Compare strings with space extend instead of space strip
Added missing my_hash_sort_latin1_de function
strings/ctype-mb.c:
For binary strings, don't remove end space when comparing
strings/ctype-simple.c:
Compare strings with space extend instead of space strip
strings/ctype-sjis.c:
Compare strings with space extend instead of space strip
strings/ctype-tis620.c:
Added comments that we should fix end space handling
strings/ctype-ucs2.c:
indentation fixes
strings/ctype-utf8.c:
Added comments that we should fix end space handling
strings/ctype-win1250ch.c:
Added comments that we should fix end space handling
Diffstat (limited to 'myisam')
-rw-r--r-- | myisam/ft_boolean_search.c | 8 | ||||
-rw-r--r-- | myisam/ft_nlq_search.c | 2 | ||||
-rw-r--r-- | myisam/ft_parser.c | 2 | ||||
-rw-r--r-- | myisam/ft_stopwords.c | 2 | ||||
-rw-r--r-- | myisam/ft_update.c | 4 | ||||
-rw-r--r-- | myisam/mi_check.c | 2 | ||||
-rw-r--r-- | myisam/mi_search.c | 123 | ||||
-rw-r--r-- | myisam/mi_unique.c | 2 | ||||
-rw-r--r-- | myisam/mi_write.c | 2 | ||||
-rw-r--r-- | myisam/myisam_ftdump.c | 2 |
10 files changed, 94 insertions, 55 deletions
diff --git a/myisam/ft_boolean_search.c b/myisam/ft_boolean_search.c index bff1232750e..1d4bfee86a4 100644 --- a/myisam/ft_boolean_search.c +++ b/myisam/ft_boolean_search.c @@ -124,7 +124,7 @@ static int FTB_WORD_cmp_list(CHARSET_INFO *cs, FTB_WORD **a, FTB_WORD **b) { /* ORDER BY word DESC, ndepth DESC */ int i= mi_compare_text(cs, (uchar*) (*b)->word+1,(*b)->len-1, - (uchar*) (*a)->word+1,(*a)->len-1,0); + (uchar*) (*a)->word+1,(*a)->len-1,0,0); if (!i) i=CMP_NUM((*b)->ndepth,(*a)->ndepth); return i; @@ -228,7 +228,7 @@ static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search) ftbw->len - (ftbw->flags & FTB_FLAG_TRUNC), (uchar*) ftbw->word + (ftbw->flags & FTB_FLAG_TRUNC), ftbw->len - (ftbw->flags & FTB_FLAG_TRUNC), - 0); + 0,0); } if (r) /* not found */ @@ -633,7 +633,7 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length) ftbw=ftb->list[c]; if (mi_compare_text(ftb->charset, (uchar*) word.pos, word.len, (uchar*) ftbw->word+1, ftbw->len-1, - (my_bool) (ftbw->flags&FTB_FLAG_TRUNC)) >0) + (my_bool) (ftbw->flags&FTB_FLAG_TRUNC),0) >0) b=c; else a=c; @@ -643,7 +643,7 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length) ftbw=ftb->list[c]; if (mi_compare_text(ftb->charset, (uchar*) word.pos, word.len, (uchar*) ftbw->word+1,ftbw->len-1, - (my_bool) (ftbw->flags&FTB_FLAG_TRUNC))) + (my_bool) (ftbw->flags&FTB_FLAG_TRUNC),0)) break; if (ftbw->docid[1] == docid) continue; diff --git a/myisam/ft_nlq_search.c b/myisam/ft_nlq_search.c index 45d13f56c6e..03875abe7b0 100644 --- a/myisam/ft_nlq_search.c +++ b/myisam/ft_nlq_search.c @@ -96,7 +96,7 @@ static int walk_and_match(FT_WORD *word, uint32 count, ALL_IN_ONE *aio) if (keylen && mi_compare_text(aio->charset,info->lastkey+1, - info->lastkey_length-extra-1, keybuff+1,keylen-1,0)) + info->lastkey_length-extra-1, keybuff+1,keylen-1,0,0)) break; subkeys=ft_sintXkorr(info->lastkey+info->lastkey_length-extra); diff --git a/myisam/ft_parser.c b/myisam/ft_parser.c index 250e92566b7..b0fe180d0fb 100644 --- a/myisam/ft_parser.c +++ b/myisam/ft_parser.c @@ -27,7 +27,7 @@ typedef struct st_ft_docstat { static int FT_WORD_cmp(CHARSET_INFO* cs, FT_WORD *w1, FT_WORD *w2) { return mi_compare_text(cs, (uchar*) w1->pos, w1->len, - (uchar*) w2->pos, w2->len, 0); + (uchar*) w2->pos, w2->len, 0, 0); } static int walk_and_copy(FT_WORD *word,uint32 count,FT_DOCSTAT *docstat) diff --git a/myisam/ft_stopwords.c b/myisam/ft_stopwords.c index 6682de18c65..112af87d201 100644 --- a/myisam/ft_stopwords.c +++ b/myisam/ft_stopwords.c @@ -32,7 +32,7 @@ static int FT_STOPWORD_cmp(void* cmp_arg __attribute__((unused)), { return mi_compare_text(default_charset_info, (uchar *)w1->pos,w1->len, - (uchar *)w2->pos,w2->len,0); + (uchar *)w2->pos,w2->len,0,0); } static void FT_STOPWORD_free(FT_STOPWORD *w, TREE_FREE action, diff --git a/myisam/ft_update.c b/myisam/ft_update.c index 4015abbbeba..beccc062270 100644 --- a/myisam/ft_update.c +++ b/myisam/ft_update.c @@ -179,7 +179,7 @@ int _mi_ft_cmp(MI_INFO *info, uint keynr, const byte *rec1, const byte *rec2) if ((ftsi1.pos != ftsi2.pos) && (!ftsi1.pos || !ftsi2.pos || mi_compare_text(cs, (uchar*) ftsi1.pos,ftsi1.len, - (uchar*) ftsi2.pos,ftsi2.len,0))) + (uchar*) ftsi2.pos,ftsi2.len,0,0))) DBUG_RETURN(THOSE_TWO_DAMN_KEYS_ARE_REALLY_DIFFERENT); } DBUG_RETURN(GEE_THEY_ARE_ABSOLUTELY_IDENTICAL); @@ -207,7 +207,7 @@ int _mi_ft_update(MI_INFO *info, uint keynr, byte *keybuf, while(old_word->pos && new_word->pos) { cmp= mi_compare_text(cs, (uchar*) old_word->pos,old_word->len, - (uchar*) new_word->pos,new_word->len,0); + (uchar*) new_word->pos,new_word->len,0,0); cmp2= cmp ? 0 : (fabs(old_word->weight - new_word->weight) > 1.e-5); if (cmp < 0 || cmp2) diff --git a/myisam/mi_check.c b/myisam/mi_check.c index cdaed59cd01..9be191f349c 100644 --- a/myisam/mi_check.c +++ b/myisam/mi_check.c @@ -3313,7 +3313,7 @@ static int sort_ft_key_write(MI_SORT_PARAM *sort_param, const void *a) if (val_off == a_len && mi_compare_text(sort_param->seg->charset, ((uchar *)a)+1,a_len-1, - ft_buf->lastkey+1,val_off-1, 0)==0) + ft_buf->lastkey+1,val_off-1, 0, 0)==0) { if (!ft_buf->buf) /* store in second-level tree */ { diff --git a/myisam/mi_search.c b/myisam/mi_search.c index 2871633102d..73a4d229fd6 100644 --- a/myisam/mi_search.c +++ b/myisam/mi_search.c @@ -273,7 +273,8 @@ int _mi_prefix_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page, uchar *sort_order=keyinfo->seg->charset->sort_order; uchar tt_buff[MI_MAX_KEY_BUFF+2], *t_buff=tt_buff+2; uchar *saved_from, *saved_to, *saved_vseg; - uint saved_length=0, saved_prefix_len=0; + uint saved_length=0, saved_prefix_len=0; + uint length_pack; DBUG_ENTER("_mi_prefix_search"); LINT_INIT(length); @@ -289,26 +290,24 @@ int _mi_prefix_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page, page+=2+nod_flag; *ret_pos=page; kseg=key; - { - uint lenght_pack; - get_key_pack_length(kseg_len,lenght_pack,kseg); - key_len_skip=lenght_pack+kseg_len; - key_len_left=(int) key_len- (int) key_len_skip; - cmplen=(key_len_left>=0) ? kseg_len : key_len-lenght_pack; - DBUG_PRINT("info",("key: '%.*s'",kseg_len,kseg)); - } -/* - Keys are compressed the following way: + get_key_pack_length(kseg_len,length_pack,kseg); + key_len_skip=length_pack+kseg_len; + key_len_left=(int) key_len- (int) key_len_skip; + cmplen=(key_len_left>=0) ? kseg_len : key_len-length_pack; + DBUG_PRINT("info",("key: '%.*s'",kseg_len,kseg)); - If the max length of first key segment <= 127 characters the prefix is - 1 byte else it's 2 byte + /* + Keys are compressed the following way: - prefix The high bit is set if this is a prefix for the prev key - length Packed length if the previous was a prefix byte - [length] Length character of data - next-key-seg Next key segments -*/ + If the max length of first key segment <= 127 characters the prefix is + 1 byte else it's 2 byte + + prefix The high bit is set if this is a prefix for the prev key + length Packed length if the previous was a prefix byte + [length] Length character of data + next-key-seg Next key segments + */ matched=0; /* how many char's from prefix were alredy matched */ len=0; /* length of previous key unpacked */ @@ -350,7 +349,8 @@ int _mi_prefix_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page, saved_vseg=vseg; saved_prefix_len=prefix_len; - DBUG_PRINT("loop",("page: '%.*s%.*s'",prefix_len,t_buff+seg_len_pack,suffix_len,vseg)); + DBUG_PRINT("loop",("page: '%.*s%.*s'",prefix_len,t_buff+seg_len_pack, + suffix_len,vseg)); { uchar *from=vseg+suffix_len; HA_KEYSEG *keyseg; @@ -396,14 +396,15 @@ int _mi_prefix_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page, matched=prefix_len+left; - for(my_flag=0;left;left--) + for (my_flag=0;left;left--) if ((my_flag= (int) sort_order[*vseg++] - (int) sort_order[*k++])) break; if (my_flag>0) /* mismatch */ break; - else if (my_flag==0) /* match */ - { /* + if (my_flag==0) /* match */ + { + /* ** len cmplen seg_left_len more_segs ** < matched=len; continue search ** > = prefix ? found : (matched=len; continue search) @@ -414,30 +415,68 @@ int _mi_prefix_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page, */ if (len < cmplen) { - my_flag= -1; + if ((keyinfo->seg->type != HA_KEYTYPE_TEXT && + keyinfo->seg->type != HA_KEYTYPE_VARTEXT)) + my_flag= -1; + else + { + /* We have to compare k and vseg as if they where space extended */ + uchar *end= k+ (cmplen - len); + for ( ; k < end && *k == ' '; k++) ; + if (k == end) + goto cmp_rest; /* should never happen */ + if (*k < (uchar) ' ') + { + my_flag= 1; /* Compared string is smaller */ + break; + } + my_flag= -1; /* Continue searching */ + } } else if (len > cmplen) { - if ((my_flag= (!(nextflag & SEARCH_PREFIX) || key_len_left>0))) - break; - goto fix_flag; - } - else if (key_len_left>0) - { - uint not_used; - if ((flag = ha_key_cmp(keyinfo->seg+1,vseg, - k,key_len_left,nextflag,¬_used)) >= 0) - break; + uchar *end; + if ((nextflag & SEARCH_PREFIX) && key_len_left == 0) + goto fix_flag; + + /* We have to compare k and vseg as if they where space extended */ + for (end=vseg + (len-cmplen) ; + vseg < end && *vseg == (uchar) ' '; + vseg++) ; + if (vseg == end) + goto cmp_rest; /* should never happen */ + + if (*vseg > (uchar) ' ') + { + my_flag= 1; /* Compared string is smaller */ + break; + } + my_flag= -1; /* Continue searching */ } else - { - /* at this line flag==-1 if the following lines were already - visited and 0 otherwise, i.e. flag <=0 here always !!! */ - fix_flag: - if (nextflag & (SEARCH_NO_FIND | SEARCH_LAST)) - flag=(nextflag & (SEARCH_BIGGER | SEARCH_LAST)) ? -1 : 1; - if (flag>=0) break; - } + { + cmp_rest: + if (key_len_left>0) + { + uint not_used; + if ((flag = ha_key_cmp(keyinfo->seg+1,vseg, + k,key_len_left,nextflag,¬_used)) >= 0) + break; + } + else + { + /* + at this line flag==-1 if the following lines were already + visited and 0 otherwise, i.e. flag <=0 here always !!! + */ + fix_flag: + DBUG_ASSERT(flag <= 0); + if (nextflag & (SEARCH_NO_FIND | SEARCH_LAST)) + flag=(nextflag & (SEARCH_BIGGER | SEARCH_LAST)) ? -1 : 1; + if (flag>=0) + break; + } + } } matched-=left; } @@ -1567,7 +1606,7 @@ _mi_calc_var_pack_key_length(MI_KEYDEF *keyinfo,uint nod_flag,uchar *next_key, n_length-=tmp_length; length-=tmp_length+next_length_pack; /* We gained these chars */ } - if (n_length == 0) + if (n_length == 0 && ref_length == new_key_length) { s_temp->n_ref_length=pack_marker; /* Same as prev key */ } diff --git a/myisam/mi_unique.c b/myisam/mi_unique.c index f4ee39e55ca..38b4ed93311 100644 --- a/myisam/mi_unique.c +++ b/myisam/mi_unique.c @@ -180,7 +180,7 @@ int mi_unique_comp(MI_UNIQUEDEF *def, const byte *a, const byte *b, if (type == HA_KEYTYPE_TEXT || type == HA_KEYTYPE_VARTEXT) { if (mi_compare_text(keyseg->charset, (uchar *) pos_a, length, - (uchar *) pos_b, length, 0)) + (uchar *) pos_b, length, 0, 0)) return 1; } else diff --git a/myisam/mi_write.c b/myisam/mi_write.c index 88e7f070642..382fb1156cb 100644 --- a/myisam/mi_write.c +++ b/myisam/mi_write.c @@ -500,7 +500,7 @@ int _mi_insert(register MI_INFO *info, register MI_KEYDEF *keyinfo, get_key_length(alen,a); DBUG_ASSERT(info->ft1_to_ft2==0); if (alen == blen && - mi_compare_text(keyinfo->seg->charset, a, alen, b, blen, 0)==0) + mi_compare_text(keyinfo->seg->charset, a, alen, b, blen, 0, 0)==0) { /* yup. converting */ info->ft1_to_ft2=(DYNAMIC_ARRAY *) diff --git a/myisam/myisam_ftdump.c b/myisam/myisam_ftdump.c index 7d1b20eb854..8ab6a7600b2 100644 --- a/myisam/myisam_ftdump.c +++ b/myisam/myisam_ftdump.c @@ -68,7 +68,7 @@ int main(int argc,char *argv[]) struct { MI_INFO *info; } aio0, *aio=&aio0; /* for GWS_IN_USE */ MY_INIT(argv[0]); - if (error=handle_options(&argc, &argv, my_long_options, get_one_option)) + if ((error=handle_options(&argc, &argv, my_long_options, get_one_option))) exit(error); if (count || dump) verbose=0; |