diff options
author | unknown <serg@serg.mylan> | 2003-10-02 20:22:29 +0200 |
---|---|---|
committer | unknown <serg@serg.mylan> | 2003-10-02 20:22:29 +0200 |
commit | 3205118eaf1ea9a9c22b0b8437fdf5ac06c5ff4a (patch) | |
tree | 4ccd6f1ab67583e81c02d66fd35d83a3d9bee0fe | |
parent | 43a4d55eefaf7409f708377b712ff306c28cc1e1 (diff) | |
download | mariadb-git-3205118eaf1ea9a9c22b0b8437fdf5ac06c5ff4a.tar.gz |
ft1->ft2 auto-conversion on INSERT (WL#725)
-rw-r--r-- | myisam/ft_update.c | 59 | ||||
-rw-r--r-- | myisam/fulltext.h | 2 | ||||
-rw-r--r-- | myisam/mi_delete.c | 18 | ||||
-rw-r--r-- | myisam/mi_open.c | 7 | ||||
-rw-r--r-- | myisam/mi_write.c | 85 | ||||
-rw-r--r-- | myisam/myisamdef.h | 6 | ||||
-rw-r--r-- | mysql-test/r/fulltext2.result | 104 | ||||
-rw-r--r-- | mysql-test/t/fulltext2.test | 80 |
8 files changed, 331 insertions, 30 deletions
diff --git a/myisam/ft_update.c b/myisam/ft_update.c index cdf3b306087..8423b6898cd 100644 --- a/myisam/ft_update.c +++ b/myisam/ft_update.c @@ -21,13 +21,6 @@ #include "ftdefs.h" #include <math.h> -/************************************************************** - This is to make ft-code to ignore keyseg.length at all * - and to index the whole VARCHAR/BLOB instead... */ -#undef set_if_smaller -#define set_if_smaller(A,B) /* no op */ -/**************************************************************/ - void _mi_ft_segiterator_init(MI_INFO *info, uint keynr, const byte *record, FT_SEG_ITERATOR *ftsi) { @@ -88,7 +81,6 @@ uint _mi_ft_segiterator(register FT_SEG_ITERATOR *ftsi) { ftsi->len=uint2korr(ftsi->pos); ftsi->pos+=2; /* Skip VARCHAR length */ - set_if_smaller(ftsi->len,ftsi->seg->length); DBUG_RETURN(1); } if (ftsi->seg->flag & HA_BLOB_PART) @@ -96,7 +88,6 @@ uint _mi_ft_segiterator(register FT_SEG_ITERATOR *ftsi) ftsi->len=_mi_calc_blob_length(ftsi->seg->bit_start,ftsi->pos); memcpy_fixed((char*) &ftsi->pos, ftsi->pos+ftsi->seg->bit_start, sizeof(char*)); - set_if_smaller(ftsi->len,ftsi->seg->length); DBUG_RETURN(1); } ftsi->len=ftsi->seg->length; @@ -305,3 +296,53 @@ uint _ft_make_key(MI_INFO *info, uint keynr, byte *keybuf, FT_WORD *wptr, memcpy(buf+HA_FT_WLEN+2,wptr->pos,wptr->len); DBUG_RETURN(_mi_make_key(info,keynr,(uchar*) keybuf,buf,filepos)); } + +/* + convert key value to ft2 +*/ +uint _mi_ft_convert_to_ft2(MI_INFO *info, uint keynr, uchar *key) +{ + my_off_t root; + DYNAMIC_ARRAY *da=info->ft1_to_ft2; + MI_KEYDEF *keyinfo=&info->s->ft2_keyinfo; + uchar *key_ptr=dynamic_array_ptr(da, 0), *end; + uint length, key_length; + DBUG_ENTER("_mi_ft_convert_to_ft2"); + + /* we'll generate one pageful at once, and insert the rest one-by-one */ + /* calculating the length of this page ...*/ + length=(keyinfo->block_length-2) / keyinfo->keylength; + set_if_smaller(length, da->elements); + length=length * keyinfo->keylength; + + get_key_full_length_rdonly(key_length, key); + while (_mi_ck_delete(info, keynr, key, key_length) == 0) + /* nothing to do here. + _mi_ck_delete() will populate info->ft1_to_ft2 with deleted keys + */; + + /* creating pageful of keys */ + mi_putint(info->buff,length+2,0); + memcpy(info->buff+2, key_ptr, length); + info->buff_used=info->page_changed=1; /* info->buff is used */ + if ((root= _mi_new(info,keyinfo)) == HA_OFFSET_ERROR || + _mi_write_keypage(info,keyinfo,root,info->buff)) + DBUG_RETURN(-1); + + /* inserting the rest of key values */ + end=dynamic_array_ptr(da, da->elements); + for (key_ptr+=length; key_ptr < end; key_ptr+=keyinfo->keylength) + if(_mi_ck_real_write_btree(info, keyinfo, key_ptr, 0, &root, SEARCH_SAME)) + DBUG_RETURN(-1); + + /* now, writing the word key entry */ + ft_intXstore(key+key_length, -da->elements); + _mi_dpointer(info, key+key_length+HA_FT_WLEN, root); + + DBUG_RETURN(_mi_ck_real_write_btree(info, + info->s->keyinfo+keynr, + key, 0, + &info->s->state.key_root[keynr], + SEARCH_SAME)); +} + diff --git a/myisam/fulltext.h b/myisam/fulltext.h index ec267eb3e86..d8c74d4e94b 100644 --- a/myisam/fulltext.h +++ b/myisam/fulltext.h @@ -34,3 +34,5 @@ int _mi_ft_cmp(MI_INFO *, uint, const byte *, const byte *); int _mi_ft_add(MI_INFO *, uint, byte *, const byte *, my_off_t); int _mi_ft_del(MI_INFO *, uint, byte *, const byte *, my_off_t); +uint _mi_ft_convert_to_ft2(MI_INFO *, uint, uchar *); + diff --git a/myisam/mi_delete.c b/myisam/mi_delete.c index 2ab5c5d0319..d8e1aef5eb6 100644 --- a/myisam/mi_delete.c +++ b/myisam/mi_delete.c @@ -18,6 +18,7 @@ #include "fulltext.h" #include "rt_index.h" +#include <assert.h> #ifdef __WIN__ #include <errno.h> @@ -231,13 +232,22 @@ static int d_search(register MI_INFO *info, register MI_KEYDEF *keyinfo, get_key_full_length_rdonly(off, lastkey); subkeys=ft_sintXkorr(lastkey+off); + DBUG_ASSERT(info->ft1_to_ft2==0 || subkeys >=0); comp_flag=SEARCH_SAME; if (subkeys >= 0) { /* normal word, one-level tree structure */ - DBUG_PRINT("info",("FT1")); - flag=(*keyinfo->bin_search)(info,keyinfo,anc_buff,key,USE_WHOLE_KEY, - comp_flag, &keypos, lastkey, &last_key); + if (info->ft1_to_ft2) + { + /* we're in ft1->ft2 conversion mode. Saving key data */ + insert_dynamic(info->ft1_to_ft2, lastkey+off); + } + else + { + /* we need exact match only if not in ft1->ft2 conversion mode */ + flag=(*keyinfo->bin_search)(info,keyinfo,anc_buff,key,USE_WHOLE_KEY, + comp_flag, &keypos, lastkey, &last_key); + } /* fall through to normal delete */ } else @@ -252,13 +262,11 @@ static int d_search(register MI_INFO *info, register MI_KEYDEF *keyinfo, if (subkeys == -1) { /* the last entry in sub-tree */ - DBUG_PRINT("info",("FT2: the last entry")); _mi_dispose(info, keyinfo, root); /* fall through to normal delete */ } else { - DBUG_PRINT("info",("FT2: going down")); keyinfo=&info->s->ft2_keyinfo; kpos-=keyinfo->keylength+nod_flag; /* we'll modify key entry 'in vivo' */ key+=off; diff --git a/myisam/mi_open.c b/myisam/mi_open.c index c4b24acdb77..744bb9bb3b6 100644 --- a/myisam/mi_open.c +++ b/myisam/mi_open.c @@ -513,8 +513,8 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags) NullS)) goto err; errpos=6; - - if (!have_rtree) + + if (!have_rtree) info.rtree_recursion_state= NULL; strmov(info.filename,org_name); @@ -536,6 +536,7 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags) info.lock_type=F_UNLCK; info.quick_mode=0; info.bulk_insert=0; + info.ft1_to_ft2=0; info.errkey= -1; info.page_changed=1; pthread_mutex_lock(&share->intern_lock); @@ -1112,7 +1113,7 @@ char *mi_recinfo_read(char *ptr, MI_COLUMNDEF *recinfo) /************************************************************************** Open data file with or without RAID -We can't use dup() here as the data file descriptors need to have different +We can't use dup() here as the data file descriptors need to have different active seek-positions. The argument file_to_dup is here for the future if there would on some OS diff --git a/myisam/mi_write.c b/myisam/mi_write.c index 8e0b7e3530c..b6a7bf50dd0 100644 --- a/myisam/mi_write.c +++ b/myisam/mi_write.c @@ -18,6 +18,7 @@ #include "fulltext.h" #include "rt_index.h" +#include <assert.h> #ifdef __WIN__ #include <errno.h> @@ -124,7 +125,7 @@ int mi_write(MI_INFO *info, byte *record) else { if (share->keyinfo[i].ck_insert(info,i,buff, - _mi_make_key(info,i,buff,record,filepos))) + _mi_make_key(info,i,buff,record,filepos))) { if (local_lock_tree) rw_unlock(&share->key_root_lock[i]); @@ -264,13 +265,32 @@ int _mi_ck_write_btree(register MI_INFO *info, uint keynr, uchar *key, else comp_flag=SEARCH_SAME; /* Keys in rec-pos order */ + error=_mi_ck_real_write_btree(info, keyinfo, key, key_length, + root, comp_flag); + if (info->ft1_to_ft2) + { + if (!error) + error= _mi_ft_convert_to_ft2(info, keynr, key); + delete_dynamic(info->ft1_to_ft2); + my_free(info->ft1_to_ft2, MYF(0)); + info->ft1_to_ft2=0; + } + DBUG_RETURN(error); +} /* _mi_ck_write_btree */ + +int _mi_ck_real_write_btree(MI_INFO *info, MI_KEYDEF *keyinfo, + uchar *key, uint key_length, my_off_t *root, uint comp_flag) +{ + int error; + DBUG_ENTER("_mi_ck_real_write_btree"); + /* key_length parameter is used only if comp_flag is SEARCH_FIND */ if (*root == HA_OFFSET_ERROR || (error=w_search(info, keyinfo, comp_flag, key, key_length, *root, (uchar *) 0, (uchar*) 0, (my_off_t) 0, 1)) > 0) error=_mi_enlarge_root(info,keyinfo,key,root); DBUG_RETURN(error); -} /* _mi_ck_write_btree */ +} /* _mi_ck_real_write_btree */ /* Make a new root with key as only pointer */ @@ -359,13 +379,11 @@ static int w_search(register MI_INFO *info, register MI_KEYDEF *keyinfo, keyinfo=&info->s->ft2_keyinfo; key+=off; keypos-=keyinfo->keylength; /* we'll modify key entry 'in vivo' */ - if ((error=w_search(info, keyinfo, comp_flag, key, HA_FT_WLEN, root, - (uchar *) 0, (uchar*) 0, (my_off_t) 0, 1)) > 0) - { - error=_mi_enlarge_root(info, keyinfo, key, &root); - _mi_dpointer(info, keypos+HA_FT_WLEN, root); - } + error=_mi_ck_real_write_btree(info, keyinfo, key, 0, + &root, comp_flag); + _mi_dpointer(info, keypos+HA_FT_WLEN, root); subkeys--; /* should there be underflow protection ? */ + DBUG_ASSERT(subkeys < 0); ft_intXstore(keypos, subkeys); if (!error) error=_mi_write_keypage(info,keyinfo,page,temp_buff); @@ -410,7 +428,6 @@ int _mi_insert(register MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *key, uchar *anc_buff, uchar *key_pos, uchar *key_buff, uchar *father_buff, uchar *father_key_pos, my_off_t father_page, my_bool insert_last) - { uint a_length,nod_flag; int t_length; @@ -464,8 +481,56 @@ int _mi_insert(register MI_INFO *info, register MI_KEYDEF *keyinfo, a_length+=t_length; mi_putint(anc_buff,a_length,nod_flag); if (a_length <= keyinfo->block_length) + { + if (keyinfo->block_length - a_length < 32 && + keyinfo->flag & HA_FULLTEXT && key_pos == endpos && + info->s->base.key_reflength <= info->s->base.rec_reflength && + info->s->options & (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)) + { + /* + Normal word. One-level tree. Page is almost full. + Let's consider converting. + We'll compare 'key' and the first key at anc_buff + */ + uchar *a=key, *b=anc_buff+2+nod_flag; + uint alen, blen, ft2len=info->s->ft2_keyinfo.keylength; + /* the very first key on the page is always unpacked */ + DBUG_ASSERT((*b & 128) == 0); +#if HA_FT_MAXLEN >= 127 + blen= mi_uint2korr(b); b+=2; +#else + blen= *b++; +#endif + get_key_length(alen,a); + DBUG_ASSERT(info->ft1_to_ft2==0); + if (alen == blen && + mi_compare_text(keyinfo->seg->charset, a, alen, b, blen, 0)==0) + { + /* yup. converting */ + info->ft1_to_ft2=(DYNAMIC_ARRAY *) + my_malloc(sizeof(DYNAMIC_ARRAY), MYF(MY_WME)); + my_init_dynamic_array(info->ft1_to_ft2, ft2len, 300, 50); + + /* + now, adding all keys from the page to dynarray + if the page is a leaf (if not keys will be deleted later) + */ + if (!nod_flag) + { + /* let's leave the first key on the page, though, because + we cannot easily dispatch an empty page here */ + b+=blen+ft2len+2; + for (a=anc_buff+a_length ; b < a ; b+=ft2len+2) + insert_dynamic(info->ft1_to_ft2, b); + + /* fixing the page's length - it contains only one key now */ + mi_putint(anc_buff,2+blen+ft2len+2,0); + } + /* the rest will be done when we're back from recursion */ + } + } DBUG_RETURN(0); /* There is room on page */ - + } /* Page is full */ if (nod_flag) insert_last=0; diff --git a/myisam/myisamdef.h b/myisam/myisamdef.h index 83e28a42797..6ec7908c014 100644 --- a/myisam/myisamdef.h +++ b/myisam/myisamdef.h @@ -222,7 +222,8 @@ struct st_myisam_info { MI_BLOB *blobs; /* Pointer to blobs */ MI_BIT_BUFF bit_buff; /* accumulate indexfile changes between write's */ - TREE *bulk_insert; + TREE *bulk_insert; + DYNAMIC_ARRAY *ft1_to_ft2; /* used only in ft1->ft2 conversion */ char *filename; /* parameter to open filename */ uchar *buff, /* Temp area for key */ *lastkey,*lastkey2; /* Last used search key */ @@ -464,6 +465,9 @@ extern int _mi_delete_static_record(MI_INFO *info); extern int _mi_cmp_static_record(MI_INFO *info,const byte *record); extern int _mi_read_rnd_static_record(MI_INFO*, byte *,my_off_t, my_bool); extern int _mi_ck_write(MI_INFO *info,uint keynr,uchar *key,uint length); +extern int _mi_ck_real_write_btree(MI_INFO *info, MI_KEYDEF *keyinfo, + uchar *key, uint key_length, + my_off_t *root, uint comp_flag); extern int _mi_enlarge_root(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *key, my_off_t *root); extern int _mi_insert(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *key, uchar *anc_buff,uchar *key_pos,uchar *key_buff, diff --git a/mysql-test/r/fulltext2.result b/mysql-test/r/fulltext2.result index a35be210ece..687bc6edfa0 100644 --- a/mysql-test/r/fulltext2.result +++ b/mysql-test/r/fulltext2.result @@ -103,4 +103,106 @@ count(*) select count(*) from t1 where match a against ('aaazzz' in boolean mode); count(*) 262 -DROP TABLE IF EXISTS t1; +drop table t1; +CREATE TABLE t1 ( +i int(10) unsigned not null auto_increment primary key, +a varchar(255) not null, +FULLTEXT KEY (a) +) TYPE=MyISAM; +select count(*) from t1 where match a against ('aaaxxx'); +count(*) +260 +select count(*) from t1 where match a against ('aaayyy'); +count(*) +250 +select count(*) from t1 where match a against ('aaazzz'); +count(*) +255 +select count(*) from t1 where match a against ('aaaxxx' in boolean mode); +count(*) +260 +select count(*) from t1 where match a against ('aaayyy' in boolean mode); +count(*) +250 +select count(*) from t1 where match a against ('aaazzz' in boolean mode); +count(*) +255 +select count(*) from t1 where match a against ('aaaxxx aaayyy aaazzz'); +count(*) +765 +select count(*) from t1 where match a against ('aaaxxx aaayyy aaazzz' in boolean mode); +count(*) +765 +select count(*) from t1 where match a against ('aaax*' in boolean mode); +count(*) +260 +select count(*) from t1 where match a against ('aaay*' in boolean mode); +count(*) +250 +select count(*) from t1 where match a against ('aaa*' in boolean mode); +count(*) +765 +insert t1 (a) values ('aaaxxx'),('aaayyy'); +insert t1 (a) values ('aaazzz'),('aaazzz'),('aaazzz'),('aaazzz'),('aaazzz'); +select count(*) from t1 where match a against ('aaaxxx'); +count(*) +261 +select count(*) from t1 where match a against ('aaayyy'); +count(*) +251 +select count(*) from t1 where match a against ('aaazzz'); +count(*) +260 +insert t1 (a) values ('aaaxxx 000000'); +select count(*) from t1 where match a against ('000000'); +count(*) +1 +delete from t1 where match a against ('000000'); +select count(*) from t1 where match a against ('000000'); +count(*) +0 +select count(*) from t1 where match a against ('aaaxxx'); +count(*) +261 +delete from t1 where match a against ('aaazzz'); +select count(*) from t1 where match a against ('aaaxxx' in boolean mode); +count(*) +261 +select count(*) from t1 where match a against ('aaayyy' in boolean mode); +count(*) +251 +select count(*) from t1 where match a against ('aaazzz' in boolean mode); +count(*) +0 +select count(*) from t1 where a = 'aaaxxx'; +count(*) +261 +select count(*) from t1 where a = 'aaayyy'; +count(*) +251 +select count(*) from t1 where a = 'aaazzz'; +count(*) +0 +insert t1 (a) values ('aaaxxx 000000'); +select count(*) from t1 where match a against ('000000'); +count(*) +1 +update t1 set a='aaazzz' where match a against ('000000'); +select count(*) from t1 where match a against ('aaaxxx' in boolean mode); +count(*) +261 +select count(*) from t1 where match a against ('aaazzz' in boolean mode); +count(*) +1 +update t1 set a='aaazzz' where a = 'aaaxxx'; +update t1 set a='aaaxxx' where a = 'aaayyy'; +select count(*) from t1 where match a against ('aaaxxx' in boolean mode); +count(*) +251 +select count(*) from t1 where match a against ('aaayyy' in boolean mode); +count(*) +0 +select count(*) from t1 where match a against ('aaazzz' in boolean mode); +count(*) +262 +drop table t1; diff --git a/mysql-test/t/fulltext2.test b/mysql-test/t/fulltext2.test index b739d60e3b3..a01ec3a59c9 100644 --- a/mysql-test/t/fulltext2.test +++ b/mysql-test/t/fulltext2.test @@ -94,5 +94,83 @@ select count(*) from t1 where match a against ('aaaxxx' in boolean mode); select count(*) from t1 where match a against ('aaayyy' in boolean mode); select count(*) from t1 where match a against ('aaazzz' in boolean mode); -DROP TABLE IF EXISTS t1; +drop table t1; + +CREATE TABLE t1 ( + i int(10) unsigned not null auto_increment primary key, + a varchar(255) not null, + FULLTEXT KEY (a) +) TYPE=MyISAM; + +# two-level entry, second-level tree with depth 2 +--disable_query_log +let $1=260; +while ($1) +{ + eval insert t1 (a) values ('aaaxxx'); + dec $1; +} +let $1=255; +while ($1) +{ + eval insert t1 (a) values ('aaazzz'); + dec $1; +} +let $1=250; +while ($1) +{ + eval insert t1 (a) values ('aaayyy'); + dec $1; +} +--enable_query_log + +select count(*) from t1 where match a against ('aaaxxx'); +select count(*) from t1 where match a against ('aaayyy'); +select count(*) from t1 where match a against ('aaazzz'); +select count(*) from t1 where match a against ('aaaxxx' in boolean mode); +select count(*) from t1 where match a against ('aaayyy' in boolean mode); +select count(*) from t1 where match a against ('aaazzz' in boolean mode); +select count(*) from t1 where match a against ('aaaxxx aaayyy aaazzz'); +select count(*) from t1 where match a against ('aaaxxx aaayyy aaazzz' in boolean mode); + +select count(*) from t1 where match a against ('aaax*' in boolean mode); +select count(*) from t1 where match a against ('aaay*' in boolean mode); +select count(*) from t1 where match a against ('aaa*' in boolean mode); + +# mi_write: + +insert t1 (a) values ('aaaxxx'),('aaayyy'); +insert t1 (a) values ('aaazzz'),('aaazzz'),('aaazzz'),('aaazzz'),('aaazzz'); +select count(*) from t1 where match a against ('aaaxxx'); +select count(*) from t1 where match a against ('aaayyy'); +select count(*) from t1 where match a against ('aaazzz'); + +# mi_delete +insert t1 (a) values ('aaaxxx 000000'); +select count(*) from t1 where match a against ('000000'); +delete from t1 where match a against ('000000'); +select count(*) from t1 where match a against ('000000'); +select count(*) from t1 where match a against ('aaaxxx'); +delete from t1 where match a against ('aaazzz'); +select count(*) from t1 where match a against ('aaaxxx' in boolean mode); +select count(*) from t1 where match a against ('aaayyy' in boolean mode); +select count(*) from t1 where match a against ('aaazzz' in boolean mode); +# double-check without index +select count(*) from t1 where a = 'aaaxxx'; +select count(*) from t1 where a = 'aaayyy'; +select count(*) from t1 where a = 'aaazzz'; + +# update +insert t1 (a) values ('aaaxxx 000000'); +select count(*) from t1 where match a against ('000000'); +update t1 set a='aaazzz' where match a against ('000000'); +select count(*) from t1 where match a against ('aaaxxx' in boolean mode); +select count(*) from t1 where match a against ('aaazzz' in boolean mode); +update t1 set a='aaazzz' where a = 'aaaxxx'; +update t1 set a='aaaxxx' where a = 'aaayyy'; +select count(*) from t1 where match a against ('aaaxxx' in boolean mode); +select count(*) from t1 where match a against ('aaayyy' in boolean mode); +select count(*) from t1 where match a against ('aaazzz' in boolean mode); + +drop table t1; |