diff options
Diffstat (limited to 'myisam')
62 files changed, 6137 insertions, 1346 deletions
diff --git a/myisam/.cvsignore b/myisam/.cvsignore index d9adcedd308..ef6d92c6e18 100644 --- a/myisam/.cvsignore +++ b/myisam/.cvsignore @@ -7,6 +7,8 @@ ft_test1 mi_test1 mi_test2 mi_test3 +rt_test +sp_test myisamchk myisamlog myisampack diff --git a/myisam/Makefile.am b/myisam/Makefile.am index aeb3b34ee15..fdcfc6d0d41 100644 --- a/myisam/Makefile.am +++ b/myisam/Makefile.am @@ -17,7 +17,7 @@ EXTRA_DIST = mi_test_all.sh mi_test_all.res pkgdata_DATA = mi_test_all mi_test_all.res -INCLUDES = @MT_INCLUDES@ -I$(srcdir)/../include -I../include +INCLUDES = @MT_INCLUDES@ -I$(top_srcdir)/include LDADD = @CLIENT_EXTRA_LDFLAGS@ libmyisam.a ../mysys/libmysys.a \ ../dbug/libdbug.a ../strings/libmystrings.a pkglib_LIBRARIES = libmyisam.a @@ -25,14 +25,16 @@ bin_PROGRAMS = myisamchk myisamlog myisampack myisamchk_DEPENDENCIES= $(LIBRARIES) myisamlog_DEPENDENCIES= $(LIBRARIES) myisampack_DEPENDENCIES=$(LIBRARIES) -noinst_PROGRAMS = mi_test1 mi_test2 mi_test3 ft_dump #ft_test1 ft_eval -noinst_HEADERS = myisamdef.h fulltext.h ftdefs.h ft_test1.h ft_eval.h +noinst_PROGRAMS = mi_test1 mi_test2 mi_test3 rt_test sp_test ft_dump #ft_test1 ft_eval +noinst_HEADERS = myisamdef.h rt_index.h rt_key.h rt_mbr.h sp_defs.h fulltext.h ftdefs.h ft_test1.h ft_eval.h mi_test1_DEPENDENCIES= $(LIBRARIES) mi_test2_DEPENDENCIES= $(LIBRARIES) mi_test3_DEPENDENCIES= $(LIBRARIES) #ft_test1_DEPENDENCIES= $(LIBRARIES) #ft_eval_DEPENDENCIES= $(LIBRARIES) ft_dump_DEPENDENCIES= $(LIBRARIES) +rt_test_DEPENDENCIES= $(LIBRARIES) +sp_test_DEPENDENCIES= $(LIBRARIES) libmyisam_a_SOURCES = mi_open.c mi_extra.c mi_info.c mi_rkey.c \ mi_rnext.c mi_rnext_same.c \ mi_search.c mi_page.c mi_key.c mi_locking.c \ @@ -45,9 +47,11 @@ libmyisam_a_SOURCES = mi_open.c mi_extra.c mi_info.c mi_rkey.c \ mi_range.c mi_dbug.c mi_checksum.c mi_log.c \ mi_changed.c mi_static.c mi_delete_all.c \ mi_delete_table.c mi_rename.c mi_check.c \ + mi_keycache.c mi_preload.c \ ft_parser.c ft_stopwords.c ft_static.c \ - ft_update.c ft_boolean_search.c ft_nlq_search.c sort.c -CLEANFILES = test?.MY? FT?.MY? isam.log mi_test_all + ft_update.c ft_boolean_search.c ft_nlq_search.c sort.c \ + rt_index.c rt_key.c rt_mbr.c rt_split.c sp_key.c +CLEANFILES = test?.MY? FT?.MY? isam.log mi_test_all rt_test.MY? sp_test.MY? DEFS = -DMAP_TO_USE_RAID # Move to automake rules ? diff --git a/myisam/ft_boolean_search.c b/myisam/ft_boolean_search.c index 30b52a20060..aa41bc11513 100644 --- a/myisam/ft_boolean_search.c +++ b/myisam/ft_boolean_search.c @@ -20,7 +20,6 @@ #define FT_CORE #include "ftdefs.h" -#include <queues.h> /* search with boolean queries */ @@ -62,45 +61,47 @@ typedef struct st_ftb_expr FTB_EXPR; struct st_ftb_expr { FTB_EXPR *up; - float weight; - uint flags; my_off_t docid[2]; /* ^^^^^^^^^^^^^^^^^^ FTB_{EXPR,WORD} common section */ + float weight; float cur_weight; byte *quot, *qend; - int yesses; /* number of "yes" words matched */ - int nos; /* number of "no" words matched */ - int ythresh; /* number of "yes" words in expr */ - int yweaks; /* number of "yes" words for scan only */ + uint flags; + uint yesses; /* number of "yes" words matched */ + uint nos; /* number of "no" words matched */ + uint ythresh; /* number of "yes" words in expr */ + uint yweaks; /* number of "yes" words for scan only */ }; typedef struct st_ftb_word { - FTB_EXPR *up; - float weight; - uint flags; - my_off_t docid[2]; + FTB_EXPR *up; + MI_KEYDEF *keyinfo; + my_off_t docid[2]; /* for index search and for scan */ /* ^^^^^^^^^^^^^^^^^^ FTB_{EXPR,WORD} common section */ - uint ndepth; - int len; - /* ... docid cache can be added here. SerG */ - byte word[1]; + my_off_t key_root; + float weight; + uint ndepth; + uint flags; + uint len; + uchar off; + byte word[1]; } FTB_WORD; typedef struct st_ft_info { struct _ft_vft *please; MI_INFO *info; - uint keynr; CHARSET_INFO *charset; - enum { UNINITIALIZED, READY, INDEX_SEARCH, INDEX_DONE /*, SCAN*/ } state; - uint with_scan; - my_off_t lastpos; FTB_EXPR *root; - QUEUE queue; - TREE no_dupes; FTB_WORD **list; MEM_ROOT mem_root; + QUEUE queue; + TREE no_dupes; + my_off_t lastpos; + uint keynr; + uchar with_scan; + enum { UNINITIALIZED, READY, INDEX_SEARCH, INDEX_DONE } state; } FTB; static int FTB_WORD_cmp(my_off_t *v, FTB_WORD *a, FTB_WORD *b) @@ -121,7 +122,7 @@ static int FTB_WORD_cmp(my_off_t *v, FTB_WORD *a, FTB_WORD *b) static int FTB_WORD_cmp_list(CHARSET_INFO *cs, FTB_WORD **a, FTB_WORD **b) { /* ORDER BY word DESC, ndepth DESC */ - int i=_mi_compare_text(cs, (uchar*) (*b)->word+1,(*b)->len-1, + int i= mi_compare_text(cs, (uchar*) (*b)->word+1,(*b)->len-1, (uchar*) (*a)->word+1,(*a)->len-1,0); if (!i) i=CMP_NUM((*b)->ndepth,(*a)->ndepth); @@ -143,7 +144,7 @@ static void _ftb_parse_query(FTB *ftb, byte **start, byte *end, param.prev=' '; param.quot=up->quot; - while ((res=ft_get_word(start,end,&w,¶m))) + while ((res=ft_get_word(ftb->charset,start,end,&w,¶m))) { int r=param.plusminus; float weight= (float) (param.pmsign ? nwghts : wghts)[(r>5)?5:((r<-5)?-5:r)]; @@ -162,11 +163,16 @@ static void _ftb_parse_query(FTB *ftb, byte **start, byte *end, ftbw->up=up; ftbw->docid[0]=ftbw->docid[1]=HA_POS_ERROR; ftbw->ndepth= (param.yesno<0) + depth; + ftbw->key_root=HA_POS_ERROR; memcpy(ftbw->word+1, w.pos, w.len); ftbw->word[0]=w.len; if (param.yesno > 0) up->ythresh++; queue_insert(& ftb->queue, (byte *)ftbw); +#ifdef TO_BE_REMOVED + /* after removing the following line, + ftb->with_scan handling can be simplified (no longer a bitmap) */ ftb->with_scan|=(param.trunc & FTB_FLAG_TRUNC); +#endif break; case 2: /* left bracket */ ftbe=(FTB_EXPR *)alloc_root(&ftb->mem_root, sizeof(FTB_EXPR)); @@ -196,22 +202,98 @@ static int _ftb_no_dupes_cmp(void* not_used __attribute__((unused)), return CMP_NUM((*((my_off_t*)a)), (*((my_off_t*)b))); } +/* returns 1 if the search was finished (must-word wasn't found) */ +static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search) +{ + int r; + uint off; + int subkeys; + MI_INFO *info=ftb->info; + + if (init_search) + { + ftbw->key_root=info->s->state.key_root[ftb->keynr]; + ftbw->keyinfo=info->s->keyinfo+ftb->keynr; + ftbw->off=0; + + r=_mi_search(info, ftbw->keyinfo, (uchar*) ftbw->word, ftbw->len, + SEARCH_FIND | SEARCH_BIGGER, ftbw->key_root); + } + else + { + r=_mi_search(info, ftbw->keyinfo, (uchar*) ftbw->word+ftbw->off, + USE_WHOLE_KEY, SEARCH_BIGGER, ftbw->key_root); + } + if (!r && !ftbw->off) + { + r= mi_compare_text(ftb->charset, + info->lastkey + (ftbw->flags & FTB_FLAG_TRUNC), + ftbw->len - (ftbw->flags & FTB_FLAG_TRUNC), + (uchar*) ftbw->word + (ftbw->flags & FTB_FLAG_TRUNC), + ftbw->len - (ftbw->flags & FTB_FLAG_TRUNC), + 0); + } + + if (r) /* not found */ + { + if (!ftbw->off || !(ftbw->flags & FTB_FLAG_TRUNC)) + { + ftbw->docid[0]=HA_POS_ERROR; + if ((ftbw->flags & FTB_FLAG_YES) && ftbw->up->up==0) + { + /* + This word MUST BE present in every document returned, + so we can stop the search right now + */ + ftb->state=INDEX_DONE; + return 1; /* search is done */ + } + else + return 0; + } + + /* going up to the first-level tree to continue search there */ + _mi_dpointer(info, (uchar*) (ftbw->word+ftbw->off+HA_FT_WLEN), + ftbw->key_root); + ftbw->key_root=info->s->state.key_root[ftb->keynr]; + ftbw->keyinfo=info->s->keyinfo+ftb->keynr; + ftbw->off=0; + return _ft2_search(ftb, ftbw, 0); + } + + /* matching key found */ + memcpy(ftbw->word+ftbw->off, info->lastkey, info->lastkey_length); + if (!ftbw->off && (init_search || (ftbw->flags & FTB_FLAG_TRUNC))) + { + /* going down ? */ + get_key_full_length_rdonly(off, info->lastkey); + subkeys=ft_sintXkorr(info->lastkey+off); + if (subkeys<0) + { + /* yep, going down, to the second-level tree */ + /* TODO here: subkey-based optimization */ + ftbw->off=off; + ftbw->key_root=info->lastpos; + ftbw->keyinfo=& info->s->ft2_keyinfo; + r=_mi_search_first(info, ftbw->keyinfo, ftbw->key_root); + DBUG_ASSERT(r==0); /* found something */ + memcpy(ftbw->word+off, info->lastkey, info->lastkey_length); + } + } + ftbw->docid[0]=info->lastpos; + return 0; +} + static void _ftb_init_index_search(FT_INFO *ftb) { - int i, r; + int i; FTB_WORD *ftbw; - MI_INFO *info=ftb->info; - MI_KEYDEF *keyinfo; - my_off_t keyroot; if ((ftb->state != READY && ftb->state !=INDEX_DONE) || ftb->keynr == NO_SUCH_KEY) return; ftb->state=INDEX_SEARCH; - keyinfo=info->s->keyinfo+ftb->keynr; - keyroot=info->s->state.key_root[ftb->keynr]; - for (i=ftb->queue.elements; i; i--) { ftbw=(FTB_WORD *)(ftb->queue.root[i]); @@ -261,42 +343,16 @@ static void _ftb_init_index_search(FT_INFO *ftb) else reset_tree(& ftb->no_dupes); } - r=_mi_search(info, keyinfo, (uchar*) ftbw->word, ftbw->len, - SEARCH_FIND | SEARCH_BIGGER, keyroot); - if (!r) - { - r=_mi_compare_text(ftb->charset, - info->lastkey + (ftbw->flags&FTB_FLAG_TRUNC), - ftbw->len - (ftbw->flags&FTB_FLAG_TRUNC), - (uchar*) ftbw->word + (ftbw->flags&FTB_FLAG_TRUNC), - ftbw->len - (ftbw->flags&FTB_FLAG_TRUNC), - 0); - } - if (r) /* not found */ - { - if (ftbw->flags&FTB_FLAG_YES && ftbw->up->up==0) - { - /* - This word MUST BE present in every document returned, - so we can abort the search right now - */ - ftb->state=INDEX_DONE; - return; - } - } - else - { - memcpy(ftbw->word, info->lastkey, info->lastkey_length); - ftbw->docid[0]=info->lastpos; - } + + if (_ft2_search(ftb, ftbw, 1)) + return; } queue_fix(& ftb->queue); } FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query, - uint query_len, - my_bool presort __attribute__((unused))) + uint query_len) { FTB *ftb; FTB_EXPR *ftbe; @@ -309,8 +365,7 @@ FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query, ftb->info=info; ftb->keynr=keynr; ftb->charset= ((keynr==NO_SUCH_KEY) ? - default_charset_info : - info->s->keyinfo[keynr].seg->charset); + default_charset_info : info->s->keyinfo[keynr].seg->charset); ftb->with_scan=0; ftb->lastpos=HA_POS_ERROR; bzero(& ftb->no_dupes, sizeof(TREE)); @@ -346,25 +401,34 @@ FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query, } -/* returns 1 if str0 contain str1 */ +/* returns 1 if str0 ~= /\<str1\>/ */ static int _ftb_strstr(const byte *s0, const byte *e0, const byte *s1, const byte *e1, CHARSET_INFO *cs) { - const byte *p; + const byte *p0, *p1; + my_bool s_after, e_before; - while (s0 < e0) + s_after=true_word_char(cs, s1[0]); + e_before=true_word_char(cs, e1[-1]); + p0=s0; + + while (p0 < e0) { - while (s0 < e0 && cs->to_upper[(uint) (uchar) *s0++] != + while (p0 < e0 && cs->to_upper[(uint) (uchar) *p0++] != cs->to_upper[(uint) (uchar) *s1]) /* no-op */; - if (s0 >= e0) + if (p0 >= e0) return 0; - p=s1+1; - while (s0 < e0 && p < e1 && cs->to_upper[(uint) (uchar) *s0] == - cs->to_upper[(uint) (uchar) *p]) - s0++, p++; - if (p >= e1) + + if (s_after && p0-1 > s0 && true_word_char(cs, p0[-2])) + continue; + + p1=s1+1; + while (p0 < e0 && p1 < e1 && cs->to_upper[(uint) (uchar) *p0] == + cs->to_upper[(uint) (uchar) *p1]) + p0++, p1++; + if (p1 == e1 && (!e_before || p0 == e0 || !true_word_char(cs, p0[0]))) return 1; } return 0; @@ -394,7 +458,7 @@ static void _ftb_climb_the_tree(FTB *ftb, FTB_WORD *ftbw, FT_SEG_ITERATOR *ftsi_ { weight /= ftbe->ythresh; ftbe->cur_weight += weight; - if (++ftbe->yesses == ythresh) + if ((int) ++ftbe->yesses == ythresh) { yn=ftbe->flags; weight=ftbe->cur_weight*ftbe->weight; @@ -435,9 +499,9 @@ static void _ftb_climb_the_tree(FTB *ftb, FTB_WORD *ftbw, FT_SEG_ITERATOR *ftsi_ if (ftbe->ythresh) weight/=3; ftbe->cur_weight += weight; - if (ftbe->yesses < ythresh) + if ((int) ftbe->yesses < ythresh) break; - yn= (ftbe->yesses++ == ythresh) ? ftbe->flags : 0 ; + yn= ((int) ftbe->yesses++ == ythresh) ? ftbe->flags : 0 ; weight*= ftbe->weight; } } @@ -449,10 +513,7 @@ int ft_boolean_read_next(FT_INFO *ftb, char *record) FTB_EXPR *ftbe; FTB_WORD *ftbw; MI_INFO *info=ftb->info; - MI_KEYDEF *keyinfo=info->s->keyinfo+ftb->keynr; - my_off_t keyroot=info->s->state.key_root[ftb->keynr]; my_off_t curdoc; - int r; if (ftb->state != INDEX_SEARCH && ftb->state != INDEX_DONE) return -1; @@ -474,39 +535,12 @@ int ft_boolean_read_next(FT_INFO *ftb, char *record) (curdoc=((FTB_WORD *)queue_top(& ftb->queue))->docid[0]) != HA_POS_ERROR) { - while (curdoc==(ftbw=(FTB_WORD *)queue_top(& ftb->queue))->docid[0]) + while (curdoc == (ftbw=(FTB_WORD *)queue_top(& ftb->queue))->docid[0]) { _ftb_climb_the_tree(ftb, ftbw, 0); /* update queue */ - r=_mi_search(info, keyinfo, (uchar*) ftbw->word, USE_WHOLE_KEY, - SEARCH_BIGGER , keyroot); - if (!r) - { - r=_mi_compare_text(ftb->charset, - info->lastkey + (ftbw->flags&FTB_FLAG_TRUNC), - ftbw->len - (ftbw->flags&FTB_FLAG_TRUNC), - (uchar*) ftbw->word + (ftbw->flags&FTB_FLAG_TRUNC), - ftbw->len - (ftbw->flags&FTB_FLAG_TRUNC), - 0); - } - if (r) /* not found */ - { - ftbw->docid[0]=HA_POS_ERROR; - if (ftbw->flags&FTB_FLAG_YES && ftbw->up->up==0) - { - /* - This word MUST BE present in every document returned, - so we can stop the search right now - */ - ftb->state=INDEX_DONE; - } - } - else - { - memcpy(ftbw->word, info->lastkey, info->lastkey_length); - ftbw->docid[0]=info->lastpos; - } + _ft2_search(ftb, ftbw, 0); queue_replaced(& ftb->queue); } @@ -515,12 +549,14 @@ int ft_boolean_read_next(FT_INFO *ftb, char *record) ftbe->yesses>=(ftbe->ythresh-ftbe->yweaks) && !ftbe->nos) { /* curdoc matched ! */ - if (is_tree_inited(& ftb->no_dupes) && - tree_insert(& ftb->no_dupes, &curdoc, 0)->count >1) - /* but it managed to get past this line once */ + if (is_tree_inited(&ftb->no_dupes) && + tree_insert(&ftb->no_dupes, &curdoc, 0, + ftb->no_dupes.custom_arg)->count >1) + /* but it managed already to get past this line once */ continue; info->lastpos=curdoc; + /* Clear all states, except that the table was updated */ info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); if (!(*info->read_record)(info,curdoc,record)) @@ -583,15 +619,16 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length) continue; end=ftsi.pos+ftsi.len; - while (ft_simple_get_word((byte **) &ftsi.pos,(byte *) end, &word)) + while (ft_simple_get_word(ftb->charset, + (byte **) &ftsi.pos, (byte *) end, &word)) { int a, b, c; for (a=0, b=ftb->queue.elements, c=(a+b)/2; b-a>1; c=(a+b)/2) { ftbw=ftb->list[c]; - if (_mi_compare_text(ftb->charset, (uchar*) word.pos, word.len, - (uchar*) ftbw->word+1, ftbw->len-1, - (my_bool) (ftbw->flags&FTB_FLAG_TRUNC)) >0) + if (mi_compare_text(ftb->charset, (uchar*) word.pos, word.len, + (uchar*) ftbw->word+1, ftbw->len-1, + (my_bool) (ftbw->flags&FTB_FLAG_TRUNC)) >0) b=c; else a=c; @@ -599,9 +636,9 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length) for (; c>=0; c--) { ftbw=ftb->list[c]; - if (_mi_compare_text(ftb->charset, (uchar*) word.pos, word.len, - (uchar*) ftbw->word+1,ftbw->len-1, - (my_bool) (ftbw->flags&FTB_FLAG_TRUNC))) + if (mi_compare_text(ftb->charset, (uchar*) word.pos, word.len, + (uchar*) ftbw->word+1,ftbw->len-1, + (my_bool) (ftbw->flags&FTB_FLAG_TRUNC))) break; if (ftbw->docid[1] == docid) continue; diff --git a/myisam/ft_dump.c b/myisam/ft_dump.c index 69fef529fa9..8c40878cf00 100644 --- a/myisam/ft_dump.c +++ b/myisam/ft_dump.c @@ -34,21 +34,23 @@ static uint lengths[256]; static struct my_option my_long_options[] = { - {"dump", 'd', "Dump index (incl. data offsets and word weights)", + {"dump", 'd', "Dump index (incl. data offsets and word weights).", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, - {"stats", 's', "Report global stats", + {"stats", 's', "Report global stats.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, - {"verbose", 'v', "Be verbose", + {"verbose", 'v', "Be verbose.", (gptr*) &verbose, (gptr*) &verbose, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, - {"count", 'c', "Calculate per-word stats (counts and global weights)", + {"count", 'c', "Calculate per-word stats (counts and global weights).", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, - {"length", 'l', "Report length distribution", + {"length", 'l', "Report length distribution.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, - {"execute", 'e', "Execute given query", (gptr*) &query, (gptr*) &query, 0, +#ifdef DISABLED + {"execute", 'e', "Execute given query.", (gptr*) &query, (gptr*) &query, 0, GET_STR_ALLOC, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, - {"help", 'h', "Display help and exit", +#endif + {"help", 'h', "Display help and exit.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, - {"help", '?', "Synonym for -h", + {"help", '?', "Synonym for -h.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} }; @@ -56,9 +58,9 @@ static struct my_option my_long_options[] = int main(int argc,char *argv[]) { - int error=0; + int error=0, subkeys; uint keylen, keylen2=0, inx, doc_cnt=0; - float weight; + float weight= 1.0; double gws, min_gws=0, avg_gws=0; MI_INFO *info; char buf[MAX_LEN], buf2[MAX_LEN], buf_maxlen[MAX_LEN], buf_min_gws[MAX_LEN]; @@ -78,6 +80,8 @@ int main(int argc,char *argv[]) if (argc < 2) usage(); + init_key_cache(dflt_key_cache,MI_KEY_BLOCK_LENGTH,USE_BUFFER_INIT, 0, 0); + if (!(info=mi_open(argv[0],2,HA_OPEN_ABORT_IF_LOCKED))) goto err; @@ -124,18 +128,16 @@ int main(int argc,char *argv[]) { keylen=*(info->lastkey); -#if HA_FT_WTYPE == HA_KEYTYPE_FLOAT - mi_float4get(weight,info->lastkey+keylen+1); -#else -#error -#endif + subkeys=ft_sintXkorr(info->lastkey+keylen+1); + if (subkeys >= 0) + weight=*(float*)&subkeys; #ifdef HAVE_SNPRINTF snprintf(buf,MAX_LEN,"%.*s",(int) keylen,info->lastkey+1); #else sprintf(buf,"%.*s",(int) keylen,info->lastkey+1); #endif - casedn_str(buf); + my_casedn_str(default_charset_info,buf); total++; lengths[keylen]++; @@ -168,9 +170,13 @@ int main(int argc,char *argv[]) } } if (dump) - printf("%9qx %20.7f %s\n",info->lastpos,weight,buf); - - if(verbose && (total%HOW_OFTEN_TO_WRITE)==0) + { + if (subkeys>=0) + printf("%9lx %20.7f %s\n", (long) info->lastpos,weight,buf); + else + printf("%9lx => %17d %s\n",(long) info->lastpos,-subkeys,buf); + } + if (verbose && (total%HOW_OFTEN_TO_WRITE)==0) printf("%10ld\r",total); } @@ -183,12 +189,12 @@ int main(int argc,char *argv[]) if ((ulong) count >= total/2) break; } - printf("Total rows: %qu\nTotal words: %lu\n" + printf("Total rows: %lu\nTotal words: %lu\n" "Unique words: %lu\nLongest word: %lu chars (%s)\n" "Median length: %u\n" "Average global weight: %f\n" "Most common word: %lu times, weight: %f (%s)\n", - (ulonglong)info->state->records, total, uniq, maxlen, buf_maxlen, + (long) info->state->records, total, uniq, maxlen, buf_maxlen, inx, avg_gws/uniq, max_doc_cnt, min_gws, buf_min_gws); } if (lstats) @@ -220,18 +226,18 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), { switch(optid) { case 'd': - dump=1; + dump=1; complain(count || query); break; - case 's': - stats=1; + case 's': + stats=1; complain(query!=0); break; - case 'c': + case 'c': count= 1; complain(dump || query); break; - case 'l': + case 'l': lstats=1; complain(query!=0); break; diff --git a/myisam/ft_eval.h b/myisam/ft_eval.h index 68be3a39f33..5501fe9d34b 100644 --- a/myisam/ft_eval.h +++ b/myisam/ft_eval.h @@ -33,7 +33,7 @@ FILE *df,*qf; MI_COLUMNDEF recinfo[3]; MI_KEYDEF keyinfo[2]; -MI_KEYSEG keyseg[10]; +HA_KEYSEG keyseg[10]; #define SWL_INIT 500 #define SWL_PLUS 50 diff --git a/myisam/ft_nlq_search.c b/myisam/ft_nlq_search.c index f426b88d77d..1e3d47577d2 100644 --- a/myisam/ft_nlq_search.c +++ b/myisam/ft_nlq_search.c @@ -42,8 +42,6 @@ typedef struct st_all_in_one uint keynr; CHARSET_INFO *charset; uchar *keybuff; - MI_KEYDEF *keyinfo; - my_off_t key_root; TREE dtree; } ALL_IN_ONE; @@ -66,10 +64,16 @@ static int FT_SUPERDOC_cmp(void* cmp_arg __attribute__((unused)), static int walk_and_match(FT_WORD *word, uint32 count, ALL_IN_ONE *aio) { + int subkeys; uint keylen, r, doc_cnt; FT_SUPERDOC sdoc, *sptr; TREE_ELEMENT *selem; - double gweight=1; + double gweight=1; + MI_INFO *info=aio->info; + uchar *keybuff=aio->keybuff; + MI_KEYDEF *keyinfo=info->s->keyinfo+aio->keynr; + my_off_t key_root=info->s->state.key_root[aio->keynr]; + uint extra=HA_FT_WLEN+info->s->base.rec_reflength+1; #if HA_FT_WTYPE == HA_KEYTYPE_FLOAT float tmp_weight; #else @@ -80,32 +84,49 @@ static int walk_and_match(FT_WORD *word, uint32 count, ALL_IN_ONE *aio) word->weight=LWS_FOR_QUERY; - keylen=_ft_make_key(aio->info,aio->keynr,(char*) aio->keybuff,word,0); + keylen=_ft_make_key(info,aio->keynr,(char*) keybuff,word,0); keylen-=HA_FT_WLEN; - doc_cnt=0; - r=_mi_search(aio->info, aio->keyinfo, aio->keybuff, keylen, - SEARCH_FIND | SEARCH_PREFIX, aio->key_root); - aio->info->update|= HA_STATE_AKTIV; /* for _mi_test_if_changed() */ + r=_mi_search(info, keyinfo, keybuff, keylen, SEARCH_FIND, key_root); + info->update|= HA_STATE_AKTIV; /* for _mi_test_if_changed() */ while (!r && gweight) { - if (_mi_compare_text(aio->charset, - aio->info->lastkey,keylen, - aio->keybuff,keylen,0)) break; + if (keylen && + mi_compare_text(aio->charset,info->lastkey+1, + info->lastkey_length-extra, keybuff+1,keylen-1,0)) + break; + + subkeys=ft_sintXkorr(info->lastkey+keylen); + if (subkeys<0) + { + if (doc_cnt) + DBUG_RETURN(1); /* index is corrupted */ + /* + TODO here: unsafe optimization, should this word + be skipped (based on subkeys) ? + */ + keybuff+=keylen; + keyinfo=& info->s->ft2_keyinfo; + key_root=info->lastpos; + keylen=0; + r=_mi_search_first(info, keyinfo, key_root); + continue; + } #if HA_FT_WTYPE == HA_KEYTYPE_FLOAT - mi_float4get(tmp_weight,aio->info->lastkey+keylen); + tmp_weight=*(float*)&subkeys; #else #error #endif - if(tmp_weight==0) DBUG_RETURN(doc_cnt); /* stopword, doc_cnt should be 0 */ + if (tmp_weight==0) + DBUG_RETURN(doc_cnt); /* stopword, doc_cnt should be 0 */ - sdoc.doc.dpos=aio->info->lastpos; + sdoc.doc.dpos=info->lastpos; /* saving document matched into dtree */ - if (!(selem=tree_insert(&aio->dtree, &sdoc, 0))) + if (!(selem=tree_insert(&aio->dtree, &sdoc, 0, aio->dtree.custom_arg))) DBUG_RETURN(1); sptr=(FT_SUPERDOC *)ELEMENT_KEY((&aio->dtree), selem); @@ -124,16 +145,13 @@ static int walk_and_match(FT_WORD *word, uint32 count, ALL_IN_ONE *aio) if (gweight < 0 || doc_cnt > 2000000) gweight=0; - if (_mi_test_if_changed(aio->info) == 0) - r=_mi_search_next(aio->info, aio->keyinfo, aio->info->lastkey, - aio->info->lastkey_length, SEARCH_BIGGER, - aio->key_root); + if (_mi_test_if_changed(info) == 0) + r=_mi_search_next(info, keyinfo, info->lastkey, info->lastkey_length, + SEARCH_BIGGER, key_root); else - r=_mi_search(aio->info, aio->keyinfo, aio->info->lastkey, - aio->info->lastkey_length, SEARCH_BIGGER, - aio->key_root); + r=_mi_search(info, keyinfo, info->lastkey, info->lastkey_length, + SEARCH_BIGGER, key_root); } - word->weight=gweight; DBUG_RETURN(0); @@ -151,17 +169,28 @@ static int walk_and_copy(FT_SUPERDOC *from, DBUG_RETURN(0); } +static int walk_and_push(FT_SUPERDOC *from, + uint32 count __attribute__((unused)), QUEUE *best) +{ + DBUG_ENTER("walk_and_copy"); + from->doc.weight+=from->tmp_weight*from->word_ptr->weight; + set_if_smaller(best->elements, ft_query_expansion_limit-1) + queue_insert(best, (byte *)& from->doc); + DBUG_RETURN(0); +} -static int FT_DOC_cmp(FT_DOC *a, FT_DOC *b) + +static int FT_DOC_cmp(void *unused __attribute__((unused)), + FT_DOC *a, FT_DOC *b) { return sgn(b->weight - a->weight); } FT_INFO *ft_init_nlq_search(MI_INFO *info, uint keynr, byte *query, - uint query_len, my_bool presort) + uint query_len, uint flags, byte *record) { - TREE allocated_wtree, *wtree=&allocated_wtree; + TREE wtree; ALL_IN_ONE aio; FT_DOC *dptr; FT_INFO *dlist=NULL; @@ -177,29 +206,51 @@ FT_INFO *ft_init_nlq_search(MI_INFO *info, uint keynr, byte *query, aio.info=info; aio.keynr=keynr; - aio.keyinfo=info->s->keyinfo+keynr; - aio.charset=aio.keyinfo->seg->charset; + aio.charset=info->s->keyinfo[keynr].seg->charset; aio.keybuff=info->lastkey+info->s->base.max_key_length; - aio.key_root=info->s->state.key_root[keynr]; - bzero(&allocated_wtree,sizeof(allocated_wtree)); + bzero(&wtree,sizeof(wtree)); init_tree(&aio.dtree,0,0,sizeof(FT_SUPERDOC),(qsort_cmp2)&FT_SUPERDOC_cmp,0, NULL, NULL); - ft_parse_init(&allocated_wtree, aio.charset); - if (ft_parse(&allocated_wtree,query,query_len)) + ft_parse_init(&wtree, aio.charset); + if (ft_parse(&wtree,query,query_len,0)) goto err; - if (tree_walk(wtree, (tree_walk_action)&walk_and_match, &aio, + if (tree_walk(&wtree, (tree_walk_action)&walk_and_match, &aio, left_root_right)) - goto err2; + goto err; + + if (flags & FT_EXPAND && ft_query_expansion_limit) + { + QUEUE best; + init_queue(&best,ft_query_expansion_limit,0,0, (queue_compare) &FT_DOC_cmp, + 0); + tree_walk(&aio.dtree, (tree_walk_action) &walk_and_push, + &best, left_root_right); + while (best.elements) + { + my_off_t docid=((FT_DOC *)queue_remove(& best, 0))->dpos; + if (!(*info->read_record)(info,docid,record)) + { + info->update|= HA_STATE_AKTIV; + _mi_ft_parse(&wtree, info, keynr, record,1); + } + } + delete_queue(&best); + reset_tree(&aio.dtree); + if (tree_walk(&wtree, (tree_walk_action)&walk_and_match, &aio, + left_root_right)) + goto err; + + } dlist=(FT_INFO *)my_malloc(sizeof(FT_INFO)+ sizeof(FT_DOC)*(aio.dtree.elements_in_tree-1), MYF(0)); - if(!dlist) - goto err2; + if (!dlist) + goto err; dlist->please= (struct _ft_vft *) & _ft_vft_nlq; dlist->ndocs=aio.dtree.elements_in_tree; @@ -210,14 +261,12 @@ FT_INFO *ft_init_nlq_search(MI_INFO *info, uint keynr, byte *query, tree_walk(&aio.dtree, (tree_walk_action) &walk_and_copy, &dptr, left_root_right); - if (presort) - qsort(dlist->doc, dlist->ndocs, sizeof(FT_DOC), (qsort_cmp)&FT_DOC_cmp); - -err2: - delete_tree(wtree); - delete_tree(&aio.dtree); + if (flags & FT_SORTED) + qsort2(dlist->doc, dlist->ndocs, sizeof(FT_DOC), (qsort2_cmp)&FT_DOC_cmp, 0); err: + delete_tree(&aio.dtree); + delete_tree(&wtree); info->lastpos=saved_lastpos; DBUG_RETURN(dlist); } diff --git a/myisam/ft_parser.c b/myisam/ft_parser.c index c25ed6022a0..0318bcbed10 100644 --- a/myisam/ft_parser.c +++ b/myisam/ft_parser.c @@ -18,41 +18,22 @@ #include "ftdefs.h" -#ifdef EVAL_RUN -#ifdef PIVOT_STAT -ulong collstat=0; -#endif -#endif /* EVAL_RUN */ - typedef struct st_ft_docstat { FT_WORD *list; uint uniq; double sum; -#ifdef EVAL_RUN - uint words, totlen; - double max, nsum, nsum2; -#endif /* EVAL_RUN */ - } FT_DOCSTAT; static int FT_WORD_cmp(CHARSET_INFO* cs, FT_WORD *w1, FT_WORD *w2) { - return _mi_compare_text(cs, (uchar*) w1->pos, w1->len, - (uchar*) w2->pos, w2->len, 0); + return mi_compare_text(cs, (uchar*) w1->pos, w1->len, + (uchar*) w2->pos, w2->len, 0); } static int walk_and_copy(FT_WORD *word,uint32 count,FT_DOCSTAT *docstat) { word->weight=LWS_IN_USE; - -#ifdef EVAL_RUN - word->cnt= (uchar) count; - if(docstat->max < word->weight) docstat->max=word->weight; - docstat->words+=count; - docstat->totlen+=word->len; -#endif /* EVAL_RUN */ docstat->sum+=word->weight; - memcpy_fixed((docstat->list)++,word,sizeof(FT_WORD)); return 0; } @@ -70,9 +51,6 @@ FT_WORD * ft_linearize(TREE *wtree) { docstat.list=wlist; docstat.uniq=wtree->elements_in_tree; -#ifdef EVAL_RUN - docstat.nsum=docstat.nsum2=docstat.max=docstat.words=docstat.totlen= -#endif /* EVAL_RUN */ docstat.sum=0; tree_walk(wtree,(tree_walk_action)&walk_and_copy,&docstat,left_root_right); } @@ -85,18 +63,8 @@ FT_WORD * ft_linearize(TREE *wtree) for (p=wlist;p->pos;p++) { p->weight=PRENORM_IN_USE; -#ifdef EVAL_RUN - docstat.nsum+=p->weight; - docstat.nsum2+=p->weight*p->weight; -#endif /* EVAL_RUN */ } -#ifdef EVAL_RUN -#ifdef PIVOT_STAT - collstat+=PIVOT_STAT; -#endif -#endif /* EVAL_RUN */ - for (p=wlist;p->pos;p++) { p->weight/=NORM_IN_USE; @@ -105,25 +73,17 @@ FT_WORD * ft_linearize(TREE *wtree) DBUG_RETURN(wlist); } -#define true_word_char(X) (isalnum(X) || (X)=='_') -#ifdef HYPHEN_IS_DELIM -#define misc_word_char(X) ((X)=='\'') -#else -#define misc_word_char(X) ((X)=='\'' || (X)=='-') -#endif -#define word_char(X) (true_word_char(X) || misc_word_char(X)) - - /* returns: * 0 - eof * 1 - word found * 2 - left bracket * 3 - right bracket */ -byte ft_get_word(byte **start, byte *end, FT_WORD *word, FTB_PARAM *param) +byte ft_get_word(CHARSET_INFO *cs, byte **start, byte *end, + FT_WORD *word, FTB_PARAM *param) { byte *doc=*start; - int mwc; + uint mwc, length; param->yesno=(FTB_YES==' ') ? 1 : (param->quot != 0); param->plusminus=param->pmsign=0; @@ -132,7 +92,7 @@ byte ft_get_word(byte **start, byte *end, FT_WORD *word, FTB_PARAM *param) { for (;doc<end;doc++) { - if (true_word_char(*doc)) break; + if (true_word_char(cs,*doc)) break; if (*doc == FTB_RQUOT && param->quot) { param->quot=doc; *start=doc+1; @@ -160,9 +120,9 @@ byte ft_get_word(byte **start, byte *end, FT_WORD *word, FTB_PARAM *param) param->plusminus=param->pmsign=0; } - mwc=0; - for (word->pos=doc; doc<end; doc++) - if (true_word_char(*doc)) + mwc=length=0; + for (word->pos=doc; doc<end; length++, doc+=my_mbcharlen(cs, *(uchar *)doc)) + if (true_word_char(cs,*doc)) mwc=0; else if (!misc_word_char(*doc) || mwc++) break; @@ -172,8 +132,8 @@ byte ft_get_word(byte **start, byte *end, FT_WORD *word, FTB_PARAM *param) if ((param->trunc=(doc<end && *doc == FTB_TRUNC))) doc++; - if (((word->len >= ft_min_word_len && !is_stopword(word->pos, word->len)) - || param->trunc) && word->len < ft_max_word_len) + if (((length >= ft_min_word_len && !is_stopword(word->pos, word->len)) + || param->trunc) && length < ft_max_word_len) { *start=doc; return 1; @@ -182,32 +142,33 @@ byte ft_get_word(byte **start, byte *end, FT_WORD *word, FTB_PARAM *param) return 0; } -byte ft_simple_get_word(byte **start, byte *end, FT_WORD *word) +byte ft_simple_get_word(CHARSET_INFO *cs, byte **start, byte *end, + FT_WORD *word) { - byte *doc=*start; - int mwc; + byte *doc= *start; + uint mwc, length; DBUG_ENTER("ft_simple_get_word"); while (doc<end) { for (;doc<end;doc++) { - if (true_word_char(*doc)) break; + if (true_word_char(cs,*doc)) break; } - mwc=0; - for(word->pos=doc; doc<end; doc++) - if (true_word_char(*doc)) - mwc=0; + mwc= length= 0; + for (word->pos=doc; doc<end; length++, doc+=my_mbcharlen(cs, *(uchar *)doc)) + if (true_word_char(cs,*doc)) + mwc= 0; else if (!misc_word_char(*doc) || mwc++) break; word->len= (uint)(doc-word->pos) - mwc; - if (word->len >= ft_min_word_len && word->len < ft_max_word_len && + if (length >= ft_min_word_len && length < ft_max_word_len && !is_stopword(word->pos, word->len)) { - *start=doc; + *start= doc; DBUG_RETURN(1); } } @@ -222,15 +183,24 @@ void ft_parse_init(TREE *wtree, CHARSET_INFO *cs) DBUG_VOID_RETURN; } -int ft_parse(TREE *wtree, byte *doc, int doclen) +int ft_parse(TREE *wtree, byte *doc, int doclen, my_bool with_alloc) { byte *end=doc+doclen; FT_WORD w; DBUG_ENTER("ft_parse"); - while (ft_simple_get_word(&doc,end,&w)) + while (ft_simple_get_word(wtree->custom_arg, &doc,end,&w)) { - if (!tree_insert(wtree, &w, 0)) + if (with_alloc) + { + byte *ptr; + /* allocating the data in the tree - to avoid mallocs and frees */ + DBUG_ASSERT(wtree->with_delete==0); + ptr=(byte *)alloc_root(& wtree->mem_root,w.len); + memcpy(ptr, w.pos, w.len); + w.pos=ptr; + } + if (!tree_insert(wtree, &w, 0, wtree->custom_arg)) goto err; } DBUG_RETURN(0); diff --git a/myisam/ft_static.c b/myisam/ft_static.c index 7f78a11bb2f..cf4f3d6a02a 100644 --- a/myisam/ft_static.c +++ b/myisam/ft_static.c @@ -20,31 +20,27 @@ ulong ft_min_word_len=4; ulong ft_max_word_len=HA_FT_MAXLEN; -ulong ft_max_word_len_for_sort=20; +ulong ft_query_expansion_limit=5; const char *ft_boolean_syntax="+ -><()~*:\"\"&|"; -const MI_KEYSEG ft_keysegs[FT_SEGS]={ +const HA_KEYSEG ft_keysegs[FT_SEGS]={ { HA_KEYTYPE_VARTEXT, /* type */ - 7, /* language (will be overwritten) */ + 63, /* language (will be overwritten) */ 0, 0, 0, /* null_bit, bit_start, bit_end */ HA_VAR_LENGTH | HA_PACK_KEY, /* flag */ HA_FT_MAXLEN, /* length */ -#ifdef EVAL_RUN - HA_FT_WLEN+1, /* start */ -#else /* EVAL_RUN */ HA_FT_WLEN, /* start */ -#endif /* EVAL_RUN */ 0, /* null_pos */ - NULL /* sort_order */ + NULL /* charset */ }, -#ifdef EVAL_RUN { - HA_KEYTYPE_INT8, 7, 0, 0, 0, 0, 1, HA_FT_WLEN, 0, NULL - }, -#endif /* EVAL_RUN */ - { - HA_FT_WTYPE, 7, 0, 0, 0, HA_NO_SORT, HA_FT_WLEN, 0, 0, NULL +/* + Note, this (and the last HA_KEYTYPE_END) segment should NOT + be packed in any way, otherwise w_search() won't be able to + update key entry 'in vivo' +*/ + HA_FT_WTYPE, 63, 0, 0, 0, HA_NO_SORT, HA_FT_WLEN, 0, 0, NULL } }; @@ -57,14 +53,17 @@ const struct _ft_vft _ft_vft_boolean = { ft_boolean_get_relevance, ft_boolean_reinit_search }; -FT_INFO *(*_ft_init_vft[2])(MI_INFO *, uint, byte *, uint, my_bool) = -{ ft_init_nlq_search, ft_init_boolean_search }; -FT_INFO *ft_init_search(uint mode, void *info, uint keynr, - byte *query, uint query_len, my_bool presort) +FT_INFO *ft_init_search(uint flags, void *info, uint keynr, + byte *query, uint query_len, byte *record) { - return (*_ft_init_vft[mode])((MI_INFO *)info, keynr, - query, query_len, presort); + FT_INFO *res; + if (flags & FT_BOOL) + res= ft_init_boolean_search((MI_INFO *)info, keynr, query, query_len); + else + res= ft_init_nlq_search((MI_INFO *)info, keynr, query, query_len, flags, + record); + return res; } const char *ft_stopword_file = 0; diff --git a/myisam/ft_stopwords.c b/myisam/ft_stopwords.c index 298df9a54cf..6682de18c65 100644 --- a/myisam/ft_stopwords.c +++ b/myisam/ft_stopwords.c @@ -17,8 +17,10 @@ /* Written by Sergei A. Golubchik, who has a shared copyright to this code */ #include "ftdefs.h" +#include "my_handler.h" -typedef struct st_ft_stopwords { +typedef struct st_ft_stopwords +{ const char * pos; uint len; } FT_STOPWORD; @@ -28,9 +30,9 @@ static TREE *stopwords3=NULL; static int FT_STOPWORD_cmp(void* cmp_arg __attribute__((unused)), FT_STOPWORD *w1, FT_STOPWORD *w2) { - return _mi_compare_text(default_charset_info, - (uchar *)w1->pos,w1->len, - (uchar *)w2->pos,w2->len,0); + return mi_compare_text(default_charset_info, + (uchar *)w1->pos,w1->len, + (uchar *)w2->pos,w2->len,0); } static void FT_STOPWORD_free(FT_STOPWORD *w, TREE_FREE action, @@ -45,7 +47,7 @@ static int ft_add_stopword(const char *w) FT_STOPWORD sw; return !w || (((sw.len= (uint) strlen(sw.pos=w)) >= ft_min_word_len) && - (tree_insert(stopwords3, &sw, 0)==NULL)); + (tree_insert(stopwords3, &sw, 0, stopwords3->custom_arg)==NULL)); } int ft_init_stopwords() @@ -79,7 +81,7 @@ int ft_init_stopwords() goto err0; len=my_read(fd, buffer, len, MYF(MY_WME)); end=start+len; - while (ft_simple_get_word(&start, end, &w)) + while (ft_simple_get_word(default_charset_info, &start, end, &w)) { if (ft_add_stopword(my_strdup_with_length(w.pos, w.len, MYF(0)))) goto err1; @@ -111,7 +113,7 @@ int is_stopword(char *word, uint len) FT_STOPWORD sw; sw.pos=word; sw.len=len; - return tree_search(stopwords3,&sw) != NULL; + return tree_search(stopwords3,&sw, stopwords3->custom_arg) != NULL; } diff --git a/myisam/ft_test1.c b/myisam/ft_test1.c index cb0b6054f0a..f4884f8ca39 100644 --- a/myisam/ft_test1.c +++ b/myisam/ft_test1.c @@ -64,7 +64,7 @@ int main(int argc, char *argv[]) static MI_COLUMNDEF recinfo[3]; static MI_KEYDEF keyinfo[2]; -static MI_KEYSEG keyseg[10]; +static HA_KEYSEG keyseg[10]; static int run_test(const char *filename) { diff --git a/myisam/ft_update.c b/myisam/ft_update.c index a68cc2a4cf4..b94a174b292 100644 --- a/myisam/ft_update.c +++ b/myisam/ft_update.c @@ -21,28 +21,27 @@ #include "ftdefs.h" #include <math.h> -/************************************************************** - This is to make ft-code to ignore keyseg.length at all * - and to index the whole VARCHAR/BLOB instead... */ -#undef set_if_smaller -#define set_if_smaller(A,B) /* no op */ -/**************************************************************/ - void _mi_ft_segiterator_init(MI_INFO *info, uint keynr, const byte *record, FT_SEG_ITERATOR *ftsi) { - ftsi->num=info->s->keyinfo[keynr].keysegs-FT_SEGS; + DBUG_ENTER("_mi_ft_segiterator_init"); + + ftsi->num=info->s->keyinfo[keynr].keysegs; ftsi->seg=info->s->keyinfo[keynr].seg; ftsi->rec=record; + DBUG_VOID_RETURN; } void _mi_ft_segiterator_dummy_init(const byte *record, uint len, FT_SEG_ITERATOR *ftsi) { + DBUG_ENTER("_mi_ft_segiterator_dummy_init"); + ftsi->num=1; ftsi->seg=0; ftsi->pos=record; ftsi->len=len; + DBUG_VOID_RETURN; } /* @@ -56,84 +55,98 @@ void _mi_ft_segiterator_dummy_init(const byte *record, uint len, uint _mi_ft_segiterator(register FT_SEG_ITERATOR *ftsi) { - if (!ftsi->num) return 0; else ftsi->num--; - if (!ftsi->seg) return 1; else ftsi->seg--; + DBUG_ENTER("_mi_ft_segiterator"); + + if (!ftsi->num) + { + DBUG_RETURN(0); + } + else + ftsi->num--; + if (!ftsi->seg) + { + DBUG_RETURN(1); + } + else + ftsi->seg--; if (ftsi->seg->null_bit && (ftsi->rec[ftsi->seg->null_pos] & ftsi->seg->null_bit)) { ftsi->pos=0; - return 1; + DBUG_RETURN(1); } ftsi->pos= ftsi->rec+ftsi->seg->start; if (ftsi->seg->flag & HA_VAR_LENGTH) { ftsi->len=uint2korr(ftsi->pos); ftsi->pos+=2; /* Skip VARCHAR length */ - set_if_smaller(ftsi->len,ftsi->seg->length); - return 1; + DBUG_RETURN(1); } if (ftsi->seg->flag & HA_BLOB_PART) { ftsi->len=_mi_calc_blob_length(ftsi->seg->bit_start,ftsi->pos); memcpy_fixed((char*) &ftsi->pos, ftsi->pos+ftsi->seg->bit_start, sizeof(char*)); - set_if_smaller(ftsi->len,ftsi->seg->length); - return 1; + DBUG_RETURN(1); } ftsi->len=ftsi->seg->length; - return 1; + DBUG_RETURN(1); } /* parses a document i.e. calls ft_parse for every keyseg */ -uint _mi_ft_parse(TREE *parsed, MI_INFO *info, uint keynr, const byte *record) +uint _mi_ft_parse(TREE *parsed, MI_INFO *info, uint keynr, + const byte *record, my_bool with_alloc) { FT_SEG_ITERATOR ftsi; + DBUG_ENTER("_mi_ft_parse"); + _mi_ft_segiterator_init(info, keynr, record, &ftsi); ft_parse_init(parsed, info->s->keyinfo[keynr].seg->charset); while (_mi_ft_segiterator(&ftsi)) { if (ftsi.pos) - if (ft_parse(parsed, (byte *)ftsi.pos, ftsi.len)) - return 1; + if (ft_parse(parsed, (byte *)ftsi.pos, ftsi.len, with_alloc)) + DBUG_RETURN(1); } - return 0; + DBUG_RETURN(0); } -FT_WORD * _mi_ft_parserecord(MI_INFO *info, uint keynr, - byte *keybuf __attribute__((unused)), - const byte *record) +FT_WORD * _mi_ft_parserecord(MI_INFO *info, uint keynr, const byte *record) { TREE ptree; + DBUG_ENTER("_mi_ft_parserecord"); bzero((char*) &ptree, sizeof(ptree)); - if (_mi_ft_parse(&ptree, info, keynr, record)) - return NULL; + if (_mi_ft_parse(&ptree, info, keynr, record,0)) + DBUG_RETURN(NULL); - return ft_linearize(/*info, keynr, keybuf, */ &ptree); + DBUG_RETURN(ft_linearize(&ptree)); } static int _mi_ft_store(MI_INFO *info, uint keynr, byte *keybuf, FT_WORD *wlist, my_off_t filepos) { uint key_length; + DBUG_ENTER("_mi_ft_store"); for (; wlist->pos; wlist++) { key_length=_ft_make_key(info,keynr,keybuf,wlist,filepos); if (_mi_ck_write(info,keynr,(uchar*) keybuf,key_length)) - return 1; + DBUG_RETURN(1); } - return 0; + DBUG_RETURN(0); } static int _mi_ft_erase(MI_INFO *info, uint keynr, byte *keybuf, FT_WORD *wlist, my_off_t filepos) { uint key_length, err=0; + DBUG_ENTER("_mi_ft_erase"); for (; wlist->pos; wlist++) { @@ -141,7 +154,7 @@ static int _mi_ft_erase(MI_INFO *info, uint keynr, byte *keybuf, if (_mi_ck_delete(info,keynr,(uchar*) keybuf,key_length)) err=1; } - return err; + DBUG_RETURN(err); } /* @@ -156,6 +169,8 @@ int _mi_ft_cmp(MI_INFO *info, uint keynr, const byte *rec1, const byte *rec2) { FT_SEG_ITERATOR ftsi1, ftsi2; CHARSET_INFO *cs=info->s->keyinfo[keynr].seg->charset; + DBUG_ENTER("_mi_ft_cmp"); + _mi_ft_segiterator_init(info, keynr, rec1, &ftsi1); _mi_ft_segiterator_init(info, keynr, rec2, &ftsi2); @@ -163,11 +178,11 @@ int _mi_ft_cmp(MI_INFO *info, uint keynr, const byte *rec1, const byte *rec2) { if ((ftsi1.pos != ftsi2.pos) && (!ftsi1.pos || !ftsi2.pos || - _mi_compare_text(cs, (uchar*) ftsi1.pos,ftsi1.len, - (uchar*) ftsi2.pos,ftsi2.len,0))) - return THOSE_TWO_DAMN_KEYS_ARE_REALLY_DIFFERENT; + mi_compare_text(cs, (uchar*) ftsi1.pos,ftsi1.len, + (uchar*) ftsi2.pos,ftsi2.len,0))) + DBUG_RETURN(THOSE_TWO_DAMN_KEYS_ARE_REALLY_DIFFERENT); } - return GEE_THEY_ARE_ABSOLUTELY_IDENTICAL; + DBUG_RETURN(GEE_THEY_ARE_ABSOLUTELY_IDENTICAL); } @@ -181,16 +196,17 @@ int _mi_ft_update(MI_INFO *info, uint keynr, byte *keybuf, CHARSET_INFO *cs=info->s->keyinfo[keynr].seg->charset; uint key_length; int cmp, cmp2; + DBUG_ENTER("_mi_ft_update"); - if (!(old_word=oldlist=_mi_ft_parserecord(info, keynr, keybuf, oldrec))) + if (!(old_word=oldlist=_mi_ft_parserecord(info, keynr, oldrec))) goto err0; - if (!(new_word=newlist=_mi_ft_parserecord(info, keynr, keybuf, newrec))) + if (!(new_word=newlist=_mi_ft_parserecord(info, keynr, newrec))) goto err1; error=0; while(old_word->pos && new_word->pos) { - cmp=_mi_compare_text(cs, (uchar*) old_word->pos,old_word->len, + cmp= mi_compare_text(cs, (uchar*) old_word->pos,old_word->len, (uchar*) new_word->pos,new_word->len,0); cmp2= cmp ? 0 : (fabs(old_word->weight - new_word->weight) > 1.e-5); @@ -219,7 +235,7 @@ err2: err1: my_free((char*) oldlist,MYF(0)); err0: - return error; + DBUG_RETURN(error); } @@ -230,13 +246,14 @@ int _mi_ft_add(MI_INFO *info, uint keynr, byte *keybuf, const byte *record, { int error= -1; FT_WORD *wlist; + DBUG_ENTER("_mi_ft_add"); - if ((wlist=_mi_ft_parserecord(info, keynr, keybuf, record))) + if ((wlist=_mi_ft_parserecord(info, keynr, record))) { error=_mi_ft_store(info,keynr,keybuf,wlist,pos); my_free((char*) wlist,MYF(0)); } - return error; + DBUG_RETURN(error); } @@ -247,33 +264,84 @@ int _mi_ft_del(MI_INFO *info, uint keynr, byte *keybuf, const byte *record, { int error= -1; FT_WORD *wlist; - if ((wlist=_mi_ft_parserecord(info, keynr, keybuf, record))) + DBUG_ENTER("_mi_ft_del"); + DBUG_PRINT("enter",("keynr: %d",keynr)); + + if ((wlist=_mi_ft_parserecord(info, keynr, record))) { error=_mi_ft_erase(info,keynr,keybuf,wlist,pos); my_free((char*) wlist,MYF(0)); } - return error; + DBUG_PRINT("exit",("Return: %d",error)); + DBUG_RETURN(error); } uint _ft_make_key(MI_INFO *info, uint keynr, byte *keybuf, FT_WORD *wptr, my_off_t filepos) { byte buf[HA_FT_MAXLEN+16]; + DBUG_ENTER("_ft_make_key"); #if HA_FT_WTYPE == HA_KEYTYPE_FLOAT - float weight=(float) ((filepos==HA_OFFSET_ERROR) ? 0 : wptr->weight); - mi_float4store(buf,weight); + { + float weight=(float) ((filepos==HA_OFFSET_ERROR) ? 0 : wptr->weight); + mi_float4store(buf,weight); + } #else #error #endif -#ifdef EVAL_RUN - *(buf+HA_FT_WLEN)=wptr->cnt; - int2store(buf+HA_FT_WLEN+1,wptr->len); - memcpy(buf+HA_FT_WLEN+3,wptr->pos,wptr->len); -#else /* EVAL_RUN */ int2store(buf+HA_FT_WLEN,wptr->len); memcpy(buf+HA_FT_WLEN+2,wptr->pos,wptr->len); -#endif /* EVAL_RUN */ - return _mi_make_key(info,keynr,(uchar*) keybuf,buf,filepos); + DBUG_RETURN(_mi_make_key(info,keynr,(uchar*) keybuf,buf,filepos)); } + +/* + convert key value to ft2 +*/ +uint _mi_ft_convert_to_ft2(MI_INFO *info, uint keynr, uchar *key) +{ + my_off_t root; + DYNAMIC_ARRAY *da=info->ft1_to_ft2; + MI_KEYDEF *keyinfo=&info->s->ft2_keyinfo; + uchar *key_ptr= (uchar*) dynamic_array_ptr(da, 0), *end; + uint length, key_length; + DBUG_ENTER("_mi_ft_convert_to_ft2"); + + /* we'll generate one pageful at once, and insert the rest one-by-one */ + /* calculating the length of this page ...*/ + length=(keyinfo->block_length-2) / keyinfo->keylength; + set_if_smaller(length, da->elements); + length=length * keyinfo->keylength; + + get_key_full_length_rdonly(key_length, key); + while (_mi_ck_delete(info, keynr, key, key_length) == 0) + /* nothing to do here. + _mi_ck_delete() will populate info->ft1_to_ft2 with deleted keys + */; + + /* creating pageful of keys */ + mi_putint(info->buff,length+2,0); + memcpy(info->buff+2, key_ptr, length); + info->buff_used=info->page_changed=1; /* info->buff is used */ + if ((root= _mi_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR || + _mi_write_keypage(info,keyinfo,root,DFLT_INIT_HITS,info->buff)) + DBUG_RETURN(-1); + + /* inserting the rest of key values */ + end= (uchar*) dynamic_array_ptr(da, da->elements); + for (key_ptr+=length; key_ptr < end; key_ptr+=keyinfo->keylength) + if(_mi_ck_real_write_btree(info, keyinfo, key_ptr, 0, &root, SEARCH_SAME)) + DBUG_RETURN(-1); + + /* now, writing the word key entry */ + ft_intXstore(key+key_length, - (int) da->elements); + _mi_dpointer(info, key+key_length+HA_FT_WLEN, root); + + DBUG_RETURN(_mi_ck_real_write_btree(info, + info->s->keyinfo+keynr, + key, 0, + &info->s->state.key_root[keynr], + SEARCH_SAME)); +} + diff --git a/myisam/ftbench/Ecompare.pl b/myisam/ftbench/Ecompare.pl new file mode 100755 index 00000000000..265534e704d --- /dev/null +++ b/myisam/ftbench/Ecompare.pl @@ -0,0 +1,96 @@ +#!/usr/bin/perl + +# compares out-files (as created by Ereport.pl) from dir1/*.out and dir2/*.out +# for each effectiveness column computes the probability of the hypothesis +# "Both files have the same effectiveness" + +# sign test is used to verify that test results are statistically +# significant to support the hypothesis. Function is computed on the fly. + +# basic formula is \sum_{r=0}^R C_N^r 2^{-N} +# As N can be big, we'll work with logarithms +$log2=log(2); +sub probab { + my $N=shift, $R=shift; + + my $r, $sum=0; + + for $r (0..$R) { + $sum+=exp(logfac($N)-logfac($r)-logfac($N-$r)-$N*$log2); + } + return $sum; +} + +# log(N!) +# for N<20 exact value from the table (below) is taken +# otherwise, Stirling approximation for N! is used +sub logfac { + my $n=shift; die "n=$n<0" if $n<0; + return $logfactab[$n] if $n<=$#logfactab; + return $n*log($n)-$n+log(2*3.14159265358*$n)/2; +} +@logfactab=( +0, 0, 0.693147180559945, 1.79175946922805, 3.17805383034795, +4.78749174278205, 6.57925121201010, 8.52516136106541, 10.6046029027453, +12.8018274800815, 15.1044125730755, 17.5023078458739, 19.9872144956619, +22.5521638531234, 25.1912211827387, 27.8992713838409, 30.6718601060807, +33.5050734501369, 36.3954452080331, 39.3398841871995, 42.3356164607535, +); + +############################# main () ############################### +#$p=shift; $m=shift; $p-=$m; +#if($p>$m) { +# print "1 > 2 [+$p-$m]: ", probab($p+$m, $m), "\n"; +#} elsif($p<$m) { +# print "1 < 2 [+$p-$m]: ", probab($p+$m, $p), "\n"; +#} else { +# print "1 = 2 [+$p-$m]: ", probab($p+$m, $m), "\n"; +#} +#exit; + +die "Use: $0 dir1 dir2\n" unless @ARGV==2 && + -d ($dir1=shift) && -d ($dir2=shift); +$_=`cd $dir1; echo *.out`; +s/\.out\b//g; +$total=""; + +for $file (split) { + open(OUT1,$out1="$dir1/$file.out") || die "Cannot open $out1: $!"; + open(OUT2,$out2="$dir2/$file.out") || die "Cannot open $out2: $!"; + + @p=@m=(); + while(!eof(OUT1) || !eof(OUT2)) { + $_=<OUT1>; @l1=split; shift @l1; + $_=<OUT2>; @l2=split; shift @l2; + + die "Number of columns differ in line $.\n" unless $#l1 == $#l2; + + for (0..$#l1) { + $p[$_]+= $l1[$_] > $l2[$_]; + $m[$_]+= $l1[$_] < $l2[$_]; + } + } + + for (0..$#l1) { + $pp[$_]+=$p[$_]; $mm[$_]+=$m[$_]; + $total[$_].=rep($file, ($#l1 ? $_ : undef), $p[$_], $m[$_]); + } + close OUT1; + close OUT2; +} + +for (0..$#l1) { + rep($total[$_], ($#l1 ? $_ : undef), $pp[$_], $mm[$_]); +} + +sub rep { + my ($test, $n, $p, $m, $c, $r)=@_; + + if ($p>$m) { $c=">"; $r="+"; } + elsif($p<$m) { $c="<"; $r="-"; } + else { $c="="; $r="="; } + $n=" $n: " if defined $n; + printf "%-8s $n $dir1 $c $dir2 [+%03d-%03d]: %16.15f\n", + $test, $p, $m, probab($p+$m, ($p>=$m ? $m : $p)); + $r; +} diff --git a/myisam/ftbench/Ecreate.pl b/myisam/ftbench/Ecreate.pl new file mode 100755 index 00000000000..d90a6f7a0ad --- /dev/null +++ b/myisam/ftbench/Ecreate.pl @@ -0,0 +1,44 @@ +#!/usr/bin/perl + +$test=shift || die "Usage $0 testname [option]"; +$option=shift; + +open(D, "<data/$test.d") || die "Cannot open(<data/$test.d): $!"; +open(Q, "<data/$test.q") || die "Cannot open(<data/$test.q): $!"; + +$N=0; + +print <<__HEADER__; +DROP TABLE IF EXISTS $test; +CREATE TABLE $test ( + id int(10) unsigned NOT NULL, + text text NOT NULL, + FULLTEXT KEY text (text) +) TYPE=MyISAM CHARSET=latin1; + +ALTER TABLE $test DISABLE KEYS; +__HEADER__ + +while (<D>) { chomp; + s/'/\\'/g; ++$N; + print "INSERT $test VALUES ($N, '$_');\n"; +} + +print <<__PREP__; +ALTER TABLE $test ENABLE KEYS; +SELECT $N; +__PREP__ + +$N=0; + +while (<Q>) { chomp; + s/'/\\'/g; ++$N; + $_="MATCH text AGAINST ('$_' $option)"; + print "SELECT $N, id, $_ FROM $test WHERE $_;\n"; +} + +print <<__FOOTER__; +DROP TABLE $test; +__FOOTER__ + + diff --git a/myisam/ftbench/Ereport.pl b/myisam/ftbench/Ereport.pl new file mode 100755 index 00000000000..5969304da09 --- /dev/null +++ b/myisam/ftbench/Ereport.pl @@ -0,0 +1,49 @@ +#!/usr/bin/perl + +die "Use: $0 eval_output qrels_file\n" unless @ARGV==2; + +open(EOUT,$eout=shift) || die "Cannot open $eout: $!"; +open(RELJ,$relj=shift) || die "Cannot open $relj: $!"; + +$_=<EOUT>; +die "$eout must start with a number!\n "unless /^[1-9][0-9]*\n/; +$ndocs=$_+0; + +$qid=0; +$relj_str=<RELJ>; +$eout_str=<EOUT>; + +while(!eof(RELJ) || !eof(EOUT)) { + ++$qid; + %dq=(); + $A=$B=$AB=0; + $Ravg=$Pavg=0; + + while($relj_str =~ /^0*$qid\s+(\d+)/) { + ++$A; + $dq{$1+0}=1; + last unless $relj_str=<RELJ>; + } + # Favg measure = 1/(a/Pavg+(1-a)/Ravg) +sub Favg { my $a=shift; $Pavg*$Ravg ? 1/($a/$Pavg+(1-$a)/$Ravg) : 0; } + # F0 : a=0 -- ignore precision + # F5 : a=0.5 + # F1 : a=1 -- ignore recall + while($eout_str =~ /^$qid\s+(\d+)\s+(\d+(?:\.\d+)?)/) { + $B++; + $AB++ if $dq{$1+0}; + $Ravg+=$AB; + $Pavg+=$AB/$B; + last unless $eout_str=<EOUT>; + } + next unless $A; + + $Ravg/=$B*$A if $B; + $Pavg/=$B if $B; + + printf "%5d %1.12f %1.12f %1.12f\n", $qid, Favg(0),Favg(0.5),Favg(1); +} + +exit 0; + + diff --git a/myisam/ftbench/README b/myisam/ftbench/README new file mode 100644 index 00000000000..b1f8b66b15f --- /dev/null +++ b/myisam/ftbench/README @@ -0,0 +1,43 @@ +1. should be run from myisam/ftbench/ +2. myisam/ftdefs.h should NOT be locked (bk get, not bk edit!) +3. there should be ./data/ subdir with test collections, files: + test1.d + test1.q + test1.r + test2.d + test2.q + test2.r + where test1, test2, etc - are arbitrary test names + + *.[dq] files contain documents/queries one item per line. + + *.r files have the structure: + 1 16 .....blablabla + 1 09 .....blablabla + 2 116 .....blablabla + ... + + that is /^\d+\s+\d+/ + and are sorted by the first number (not necessarily by the second) + +4. there should be ./t/ subdir with test directories + + ./t + ./t/BEST/ + ./t/testdir1/ + ./t/testdir2/ + ... + + there *must* be ./t/BEST/ subdir or a symlink to one of other dirs in ./t + all other names (besides BEST) can be arbitrary + + all test results are compared with BEST results. + + test directories may contain ftdefs.h, my.cnf, ft_mode + (the last one is used as in ... MATCH ... AGAINST ("..." $ft_mode) ...) + NOTE: all *.out files in test directories will NOT be overwritten! + delete them to re-test + +5. run ./ft-test-run.sh +6. go make some coffee + diff --git a/myisam/ftbench/ft-test-run.sh b/myisam/ftbench/ft-test-run.sh new file mode 100755 index 00000000000..ceba818fa5c --- /dev/null +++ b/myisam/ftbench/ft-test-run.sh @@ -0,0 +1,98 @@ +#!/bin/sh + +if [ ! -x ./ft-test-run.sh ] ; then + echo "Usage: ./ft-test-run.sh" + exit 1 +fi + +BASE=`pwd` +DATA=$BASE/var +ROOT=`cd ../..; pwd` +MYSQLD=$ROOT/sql/mysqld +MYSQL=$ROOT/client/mysql +MYSQLADMIN=$ROOT/client/mysqladmin +SOCK=$DATA/mysql.sock +PID=$DATA/mysql.pid +H=../ftdefs.h +OPTS="--no-defaults --socket=$SOCK --character-sets-dir=$ROOT/sql/share/charsets" +DELAY=10 + +stop_myslqd() +{ + [ -S $SOCK ] && $MYSQLADMIN $OPTS shutdown + [ -f $PID ] && kill `cat $PID` && sleep 15 && [ -f $PID ] && kill -9 `cat $PID` +} + +if [ ! -d t/BEST ] ; then + echo "No ./t/BEST directory! Aborting..." + exit 1 +fi +rm -f t/BEST/report.txt +if [ -w $H ] ; then + echo "$H is writeable! Aborting..." + exit 1 +fi + +stop_myslqd +rm -rf var > /dev/null 2>&1 +mkdir var +mkdir var/test + +for batch in t/* ; do + [ ! -d $batch ] && continue + [ $batch -ef t/BEST -a $batch != t/BEST ] && continue + + rm -rf var/test/* > /dev/null 2>&1 + rm -f $H + if [ -f $BASE/$batch/ftdefs.h ] ; then + cat $BASE/$batch/ftdefs.h > $H + chmod a-wx $H + else + bk get -q $H + fi + OPTS="--defaults-file=$BASE/$batch/my.cnf --socket=$SOCK --character-sets-dir=$ROOT/sql/share/charsets" + stop_myslqd + rm -f $MYSQLD + echo "building $batch" + echo "============== $batch ===============" >> var/ft_test.log + (cd $ROOT; gmake) >> var/ft_test.log 2>&1 + + for prog in $MYSQLD $MYSQL $MYSQLADMIN ; do + if [ ! -x $prog ] ; then + echo "build failed: no $prog" + exit 1 + fi + done + + echo "=====================================" >> var/ft_test.log + $MYSQLD $OPTS --basedir=$BASE --skip-bdb --pid-file=$PID \ + --language=$ROOT/sql/share/english \ + --skip-grant-tables --skip-innodb \ + --skip-networking --tmpdir=$DATA >> var/ft_test.log 2>&1 & + + sleep $DELAY + $MYSQLADMIN $OPTS ping + if [ $? != 0 ] ; then + echo "$MYSQLD refused to start" + exit 1 + fi + for test in `cd data; echo *.r|sed "s/\.r//g"` ; do + if [ -f $batch/$test.out ] ; then + echo "skipping $batch/$test.out" + continue + fi + echo "testing $batch/$test" + FT_MODE=`cat $batch/ft_mode 2>/dev/null` + ./Ecreate.pl $test "$FT_MODE" | $MYSQL $OPTS --skip-column-names test >var/$test.eval + echo "reporting $batch/$test" + ./Ereport.pl var/$test.eval data/$test.r > $batch/$test.out || exit + done + stop_myslqd + rm -f $H + bk get -q $H + if [ ! $batch -ef t/BEST ] ; then + echo "comparing $batch" + ./Ecompare.pl t/BEST $batch >> t/BEST/report.txt + fi +done + diff --git a/myisam/ftdefs.h b/myisam/ftdefs.h index 62fa4362e19..a97485ec021 100644 --- a/myisam/ftdefs.h +++ b/myisam/ftdefs.h @@ -21,9 +21,14 @@ #include "fulltext.h" #include <m_ctype.h> #include <my_tree.h> +#include <queues.h> +#include <assert.h> -#define HYPHEN_IS_DELIM -#define HYPHEN_IS_CONCAT /* not used for now */ +#define true_word_char(s,X) (my_isalnum(s,X) || (X)=='_') +#define misc_word_char(X) ((X)=='\'') +#define word_char(s,X) (true_word_char(s,X) || misc_word_char(X)) + +#define FT_MAX_WORD_LEN_FOR_SORT 20 #define COMPILE_STOPWORDS_IN @@ -60,16 +65,6 @@ #define NORM_SUM (docstat.nsum) #define NORM_COS (sqrt(docstat.nsum2)) -#ifdef EVAL_RUN -/* -extern ulong collstat; -#define PIVOT_STAT (docstat.uniq) -#define PIVOT_SLOPE (0.69) -#define PIVOT_PIVOT ((double)collstat/(info->state->records+1)) -#define NORM_PIVOT ((1-PIVOT_SLOPE)*PIVOT_PIVOT+PIVOT_SLOPE*docstat.uniq) -*/ -#endif /* EVAL_RUN */ - #define PIVOT_VAL (0.0115) #define NORM_PIVOT (1+PIVOT_VAL*docstat.uniq) /*---------------------------------------------------------------*/ @@ -102,9 +97,6 @@ typedef struct st_ft_word { byte * pos; uint len; double weight; -#ifdef EVAL_RUN - byte cnt; -#endif /* EVAL_RUN */ } FT_WORD; typedef struct st_ftb_param { @@ -120,12 +112,12 @@ int is_stopword(char *word, uint len); uint _ft_make_key(MI_INFO *, uint , byte *, FT_WORD *, my_off_t); -byte ft_get_word(byte **, byte *, FT_WORD *, FTB_PARAM *); -byte ft_simple_get_word(byte **, byte *, FT_WORD *); +byte ft_get_word(CHARSET_INFO *, byte **, byte *, FT_WORD *, FTB_PARAM *); +byte ft_simple_get_word(CHARSET_INFO *, byte **, byte *, FT_WORD *); typedef struct _st_ft_seg_iterator { uint num, len; - MI_KEYSEG *seg; + HA_KEYSEG *seg; const byte *rec, *pos; } FT_SEG_ITERATOR; @@ -134,13 +126,15 @@ void _mi_ft_segiterator_dummy_init(const byte *, uint, FT_SEG_ITERATOR *); uint _mi_ft_segiterator(FT_SEG_ITERATOR *); void ft_parse_init(TREE *, CHARSET_INFO *); -int ft_parse(TREE *, byte *, int); +int ft_parse(TREE *, byte *, int, my_bool); FT_WORD * ft_linearize(TREE *); -FT_WORD * _mi_ft_parserecord(MI_INFO *, uint, byte *, const byte *); -uint _mi_ft_parse(TREE *parsed, MI_INFO *info, uint keynr, const byte *record); +FT_WORD * _mi_ft_parserecord(MI_INFO *, uint, const byte *); +uint _mi_ft_parse(TREE *, MI_INFO *, uint, const byte *, my_bool); + +FT_INFO *ft_init_nlq_search(MI_INFO *, uint, byte *, uint, uint, byte *); +FT_INFO *ft_init_boolean_search(MI_INFO *, uint, byte *, uint); extern const struct _ft_vft _ft_vft_nlq; -FT_INFO *ft_init_nlq_search(MI_INFO *, uint, byte *, uint, my_bool); int ft_nlq_read_next(FT_INFO *, char *); float ft_nlq_find_relevance(FT_INFO *, byte *, uint); void ft_nlq_close_search(FT_INFO *); @@ -149,10 +143,10 @@ my_off_t ft_nlq_get_docid(FT_INFO *); void ft_nlq_reinit_search(FT_INFO *); extern const struct _ft_vft _ft_vft_boolean; -FT_INFO *ft_init_boolean_search(MI_INFO *, uint, byte *, uint, my_bool); int ft_boolean_read_next(FT_INFO *, char *); float ft_boolean_find_relevance(FT_INFO *, byte *, uint); void ft_boolean_close_search(FT_INFO *); float ft_boolean_get_relevance(FT_INFO *); my_off_t ft_boolean_get_docid(FT_INFO *); void ft_boolean_reinit_search(FT_INFO *); + diff --git a/myisam/fulltext.h b/myisam/fulltext.h index f787c9bcfe8..d8c74d4e94b 100644 --- a/myisam/fulltext.h +++ b/myisam/fulltext.h @@ -21,19 +21,18 @@ #include "myisamdef.h" #include "ft_global.h" -/* shoudn't be def'ed when linking with mysql */ -#undef EVAL_RUN - #define HA_FT_WTYPE HA_KEYTYPE_FLOAT #define HA_FT_WLEN 4 -#ifdef EVAL_RUN -#define FT_SEGS 3 -#else /* EVAL_RUN */ #define FT_SEGS 2 -#endif /* EVAL_RUN */ -extern const MI_KEYSEG ft_keysegs[FT_SEGS]; +#define ft_sintXkorr(A) mi_sint4korr(A) +#define ft_intXstore(T,A) mi_int4store(T,A) + +extern const HA_KEYSEG ft_keysegs[FT_SEGS]; int _mi_ft_cmp(MI_INFO *, uint, const byte *, const byte *); int _mi_ft_add(MI_INFO *, uint, byte *, const byte *, my_off_t); int _mi_ft_del(MI_INFO *, uint, byte *, const byte *, my_off_t); + +uint _mi_ft_convert_to_ft2(MI_INFO *, uint, uchar *); + diff --git a/myisam/mi_check.c b/myisam/mi_check.c index 7108fdcba9e..bd8c8c60e33 100644 --- a/myisam/mi_check.c +++ b/myisam/mi_check.c @@ -20,7 +20,6 @@ #include <m_ctype.h> #include <stdarg.h> #include <my_getopt.h> -#include <assert.h> #ifdef HAVE_SYS_VADVISE_H #include <sys/vadvise.h> #endif @@ -49,10 +48,11 @@ static int sort_key_read(MI_SORT_PARAM *sort_param,void *key); static int sort_ft_key_read(MI_SORT_PARAM *sort_param,void *key); static int sort_get_next_record(MI_SORT_PARAM *sort_param); static int sort_key_cmp(MI_SORT_PARAM *sort_param, const void *a,const void *b); +static int sort_ft_key_write(MI_SORT_PARAM *sort_param, const void *a); static int sort_key_write(MI_SORT_PARAM *sort_param, const void *a); static my_off_t get_record_for_key(MI_INFO *info,MI_KEYDEF *keyinfo, uchar *key); -static int sort_insert_key(MI_SORT_PARAM *sort_param, +static int sort_insert_key(MI_SORT_PARAM *sort_param, reg1 SORT_KEY_BLOCKS *key_block, uchar *key, my_off_t prev_block); static int sort_delete_record(MI_SORT_PARAM *sort_param); @@ -62,22 +62,6 @@ static SORT_KEY_BLOCKS *alloc_key_blocks(MI_CHECK *param, uint blocks, static ha_checksum mi_byte_checksum(const byte *buf, uint length); static void set_data_file_type(SORT_INFO *sort_info, MYISAM_SHARE *share); -#ifdef __WIN__ -static double ulonglong2double(ulonglong value) -{ - longlong nr=(longlong) value; - if (nr >= 0) - return (double) nr; - return (18446744073709551616.0 + (double) nr); -} - -#if SIZEOF_OFF_T > 4 -#define my_off_t2double(A) ulonglong2double(A) -#else -#define my_off_t2double(A) ((double) (A)) -#endif /* SIZEOF_OFF_T > 4 */ -#endif /* __WIN__ */ - void myisamchk_init(MI_CHECK *param) { bzero((gptr) param,sizeof(*param)); @@ -93,6 +77,8 @@ void myisamchk_init(MI_CHECK *param) param->tmpfile_createflag=O_RDWR | O_TRUNC | O_EXCL; param->myf_rw=MYF(MY_NABP | MY_WME | MY_WAIT_IF_FULL); param->start_check_pos=0; + param->max_record_length= LONGLONG_MAX; + param->key_cache_block_size= KEY_CACHE_BLOCK_SIZE; } /* Check the status flags for the table */ @@ -257,7 +243,9 @@ static int check_k_link(MI_CHECK *param, register MI_INFO *info, uint nr) if (next_link > info->state->key_file_length || next_link & (info->s->blocksize-1)) DBUG_RETURN(1); - if (!(buff=key_cache_read(info->s->kfile, next_link, (byte*) info->buff, + if (!(buff=key_cache_read(info->s->key_cache, + info->s->kfile, next_link, DFLT_INIT_HITS, + (byte*) info->buff, myisam_block_size, block_size, 1))) DBUG_RETURN(1); next_link=mi_sizekorr(buff); @@ -275,7 +263,7 @@ static int check_k_link(MI_CHECK *param, register MI_INFO *info, uint nr) } /* check_k_link */ - /* Kontrollerar storleken p} filerna */ + /* Check sizes of files */ int chk_size(MI_CHECK *param, register MI_INFO *info) { @@ -286,7 +274,9 @@ int chk_size(MI_CHECK *param, register MI_INFO *info) if (!(param->testflag & T_SILENT)) puts("- check file-size"); - flush_key_blocks(info->s->kfile, FLUSH_FORCE_WRITE); /* If called externally */ + /* The following is needed if called externally (not from myisamchk) */ + flush_key_blocks(info->s->key_cache, + info->s->kfile, FLUSH_FORCE_WRITE); size=my_seek(info->s->kfile,0L,MY_SEEK_END,MYF(0)); if ((skr=(my_off_t) info->state->key_file_length) != size) @@ -396,8 +386,8 @@ int chk_key(MI_CHECK *param, register MI_INFO *info) if (share->state.key_root[key] == HA_OFFSET_ERROR && (info->state->records == 0 || keyinfo->flag & HA_FULLTEXT)) continue; - if (!_mi_fetch_keypage(info,keyinfo,share->state.key_root[key],info->buff, - 0)) + if (!_mi_fetch_keypage(info,keyinfo,share->state.key_root[key], + DFLT_INIT_HITS,info->buff,0)) { mi_check_print_error(param,"Can't read indexpage from filepos: %s", llstr(share->state.key_root[key],buff)); @@ -514,7 +504,7 @@ int chk_key(MI_CHECK *param, register MI_INFO *info) param->record_checksum=old_record_checksum-init_checksum; /* Remove delete links */ else param->record_checksum=0; - DBUG_RETURN(0); + DBUG_RETURN(result); } /* chk_key */ @@ -534,7 +524,7 @@ static int chk_index(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo, if (!(temp_buff=(uchar*) my_alloca((uint) keyinfo->block_length))) { - mi_check_print_error(param,"Not Enough memory"); + mi_check_print_error(param,"Not enough memory for keyblock"); DBUG_RETURN(-1); } @@ -575,7 +565,8 @@ static int chk_index(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo, info->state->key_file_length=(max_length & ~ (my_off_t) (info->s->blocksize-1)); } - if (!_mi_fetch_keypage(info,keyinfo,next_page,temp_buff,0)) + if (!_mi_fetch_keypage(info,keyinfo,next_page, + DFLT_INIT_HITS,temp_buff,0)) { mi_check_print_error(param,"Can't read key from filepos: %s",llstr(next_page,llbuff)); goto err; @@ -595,8 +586,8 @@ static int chk_index(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo, goto err; } if ((*keys)++ && - (flag=_mi_key_cmp(keyinfo->seg,info->lastkey,key,key_length, - comp_flag, ¬_used)) >=0) + (flag=ha_key_cmp(keyinfo->seg,info->lastkey,key,key_length, + comp_flag, ¬_used)) >=0) { DBUG_DUMP("old",(byte*) info->lastkey, info->lastkey_length); DBUG_DUMP("new",(byte*) key, key_length); @@ -613,9 +604,9 @@ static int chk_index(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo, if (*keys != 1L) /* not first_key */ { uint diff; - _mi_key_cmp(keyinfo->seg,info->lastkey,key,USE_WHOLE_KEY, - SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, - &diff); + ha_key_cmp(keyinfo->seg,info->lastkey,key,USE_WHOLE_KEY, + SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, + &diff); param->unique_count[diff-1]++; } } @@ -682,7 +673,7 @@ static ha_checksum calc_checksum(ha_rows count) static uint isam_key_length(MI_INFO *info, register MI_KEYDEF *keyinfo) { uint length; - MI_KEYSEG *keyseg; + HA_KEYSEG *keyseg; DBUG_ENTER("isam_key_length"); length= info->s->rec_reflength; @@ -722,7 +713,7 @@ int chk_data_link(MI_CHECK *param, MI_INFO *info,int extend) if (!(record= (byte*) my_malloc(info->s->base.pack_reclength,MYF(0)))) { - mi_check_print_error(param,"Not Enough memory"); + mi_check_print_error(param,"Not enough memory for record"); DBUG_RETURN(-1); } records=del_blocks=0; @@ -825,16 +816,17 @@ int chk_data_link(MI_CHECK *param, MI_INFO *info,int extend) goto next; } mi_check_print_error(param,"Wrong bytesec: %d-%d-%d at linkstart: %s", - block_info.header[0],block_info.header[1], - block_info.header[2], - llstr(start_block,llbuff)); + block_info.header[0],block_info.header[1], + block_info.header[2], + llstr(start_block,llbuff)); goto err2; } if (info->state->data_file_length < block_info.filepos+ block_info.block_len) { - mi_check_print_error(param,"Recordlink that points outside datafile at %s", - llstr(pos,llbuff)); + mi_check_print_error(param, + "Recordlink that points outside datafile at %s", + llstr(pos,llbuff)); got_error=1; break; } @@ -845,9 +837,9 @@ int chk_data_link(MI_CHECK *param, MI_INFO *info,int extend) pos=block_info.filepos+block_info.block_len; if (block_info.rec_len > (uint) info->s->base.max_pack_length) { - mi_check_print_error(param,"Found too long record (%d) at %s", - block_info.rec_len, - llstr(start_recpos,llbuff)); + mi_check_print_error(param,"Found too long record (%lu) at %s", + (ulong) block_info.rec_len, + llstr(start_recpos,llbuff)); got_error=1; break; } @@ -856,8 +848,10 @@ int chk_data_link(MI_CHECK *param, MI_INFO *info,int extend) if (!(to= mi_alloc_rec_buff(info, block_info.rec_len, &info->rec_buff))) { - mi_check_print_error(param,"Not enough memory for blob at %s", - llstr(start_recpos,llbuff)); + mi_check_print_error(param, + "Not enough memory (%lu) for blob at %s", + (ulong) block_info.rec_len, + llstr(start_recpos,llbuff)); got_error=1; break; } @@ -868,9 +862,11 @@ int chk_data_link(MI_CHECK *param, MI_INFO *info,int extend) } if (left_length < block_info.data_len) { - mi_check_print_error(param,"Found too long record at %s", - llstr(start_recpos,llbuff)); - got_error=1; break; + mi_check_print_error(param,"Found too long record (%lu) at %s", + (ulong) block_info.data_len, + llstr(start_recpos,llbuff)); + got_error=1; + break; } if (_mi_read_cache(¶m->read_cache,(byte*) to,block_info.filepos, (uint) block_info.data_len, @@ -1157,7 +1153,8 @@ int mi_repair(MI_CHECK *param, register MI_INFO *info, param->testflag|=T_CALC_CHECKSUM; if (!param->using_global_keycache) - VOID(init_key_cache(param->use_buffers)); + VOID(init_key_cache(dflt_key_cache, param->key_cache_block_size, + param->use_buffers, 0, 0)); if (init_io_cache(¶m->read_cache,info->dfile, (uint) param->read_buffer_length, @@ -1176,7 +1173,7 @@ int mi_repair(MI_CHECK *param, register MI_INFO *info, MYF(0))) || !mi_alloc_rec_buff(info, -1, &sort_param.rec_buff)) { - mi_check_print_error(param,"Not enough memory for extra record"); + mi_check_print_error(param, "Not enough memory for extra record"); goto err; } @@ -1379,7 +1376,7 @@ err: VOID(end_io_cache(¶m->read_cache)); info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); VOID(end_io_cache(&info->rec_cache)); - got_error|=flush_blocks(param,share->kfile); + got_error|=flush_blocks(param, share->key_cache, share->kfile); if (!got_error && param->testflag & T_UNPACK) { share->state.header.options[0]&= (uchar) ~HA_OPTION_COMPRESS_RECORD; @@ -1479,7 +1476,8 @@ int movepoint(register MI_INFO *info, byte *record, my_off_t oldpos, nod_flag=mi_test_if_nod(info->buff); _mi_dpointer(info,info->int_keypos-nod_flag- info->s->rec_reflength,newpos); - if (_mi_write_keypage(info,keyinfo,info->last_keypage,info->buff)) + if (_mi_write_keypage(info,keyinfo,info->last_keypage, + DFLT_INIT_HITS,info->buff)) DBUG_RETURN(-1); } else @@ -1514,15 +1512,15 @@ void lock_memory(MI_CHECK *param __attribute__((unused))) /* Flush all changed blocks to disk */ -int flush_blocks(MI_CHECK *param, File file) +int flush_blocks(MI_CHECK *param, KEY_CACHE *key_cache, File file) { - if (flush_key_blocks(file,FLUSH_RELEASE)) + if (flush_key_blocks(key_cache, file, FLUSH_RELEASE)) { mi_check_print_error(param,"%d when trying to write bufferts",my_errno); return(1); } if (!param->using_global_keycache) - end_key_cache(); + end_key_cache(key_cache,1); return 0; } /* flush_blocks */ @@ -1577,7 +1575,7 @@ int mi_sort_index(MI_CHECK *param, register MI_INFO *info, my_string name) } /* Flush key cache for this file if we are calling this outside myisamchk */ - flush_key_blocks(share->kfile, FLUSH_IGNORE_CHANGED); + flush_key_blocks(share->key_cache,share->kfile, FLUSH_IGNORE_CHANGED); share->state.version=(ulong) time((time_t*) 0); old_state= share->state; /* save state if not stored */ @@ -1638,10 +1636,10 @@ static int sort_one_index(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo, if (!(buff=(uchar*) my_alloca((uint) keyinfo->block_length))) { - mi_check_print_error(param,"Not Enough memory"); + mi_check_print_error(param,"Not enough memory for key block"); DBUG_RETURN(-1); } - if (!_mi_fetch_keypage(info,keyinfo,pagepos,buff,0)) + if (!_mi_fetch_keypage(info,keyinfo,pagepos,DFLT_INIT_HITS,buff,0)) { mi_check_print_error(param,"Can't read key block from filepos: %s", llstr(pagepos,llbuff)); @@ -1805,7 +1803,7 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info, File new_file; MI_SORT_PARAM sort_param; MYISAM_SHARE *share=info->s; - MI_KEYSEG *keyseg; + HA_KEYSEG *keyseg; ulong *rec_per_key_part; char llbuff[22]; SORT_INFO sort_info; @@ -1850,7 +1848,7 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info, MYF(0))) || !mi_alloc_rec_buff(info, -1, &sort_param.rec_buff)) { - mi_check_print_error(param,"Not enough memory for extra record"); + mi_check_print_error(param, "Not enough memory for extra record"); goto err; } if (!rep_quick) @@ -1890,7 +1888,7 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info, Flush key cache for this file if we are calling this outside myisamchk */ - flush_key_blocks(share->kfile, FLUSH_IGNORE_CHANGED); + flush_key_blocks(share->key_cache,share->kfile, FLUSH_IGNORE_CHANGED); /* Clear the pointers to the given rows */ for (i=0 ; i < share->base.keys ; i++) share->state.key_root[i]= HA_OFFSET_ERROR; @@ -1900,7 +1898,7 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info, } else { - if (flush_key_blocks(share->kfile, FLUSH_FORCE_WRITE)) + if (flush_key_blocks(share->key_cache,share->kfile, FLUSH_FORCE_WRITE)) goto err; key_map= ~key_map; /* Create the missing keys */ } @@ -1927,7 +1925,6 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info, ((param->testflag & T_CREATE_MISSING_KEYS) ? info->state->records : (ha_rows) (sort_info.filelength/length+1)); sort_param.key_cmp=sort_key_cmp; - sort_param.key_write=sort_key_write; sort_param.lock_in_memory=lock_memory; sort_param.tmpdir=param->tmpdir; sort_param.sort_info=&sort_info; @@ -1945,6 +1942,7 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info, { sort_param.read_cache=param->read_cache; sort_param.keyinfo=share->keyinfo+sort_param.key; + sort_param.seg=sort_param.keyinfo->seg; if (!(((ulonglong) 1 << sort_param.key) & key_map)) { /* Remember old statistics for key */ @@ -1958,7 +1956,7 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info, if ((!(param->testflag & T_SILENT))) printf ("- Fixing index %d\n",sort_param.key+1); sort_param.max_pos=sort_param.pos=share->pack.header_length; - keyseg=sort_param.keyinfo->seg; + keyseg=sort_param.seg; bzero((char*) sort_param.unique,sizeof(sort_param.unique)); sort_param.key_length=share->rec_reflength; for (i=0 ; keyseg[i].type != HA_KEYTYPE_END; i++) @@ -1977,13 +1975,17 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info, if (sort_param.keyinfo->flag & HA_FULLTEXT) { sort_info.max_records= - (ha_rows) (sort_info.filelength/ft_max_word_len_for_sort+1); + (ha_rows) (sort_info.filelength/FT_MAX_WORD_LEN_FOR_SORT+1); sort_param.key_read=sort_ft_key_read; - sort_param.key_length+=ft_max_word_len_for_sort-HA_FT_MAXLEN; + sort_param.key_write=sort_ft_key_write; + sort_param.key_length+=FT_MAX_WORD_LEN_FOR_SORT-HA_FT_MAXLEN; } else + { sort_param.key_read=sort_key_read; + sort_param.key_write=sort_key_write; + } if (_create_index_by_sort(&sort_param, (my_bool) (!(param->testflag & T_VERBOSE)), @@ -2029,9 +2031,6 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info, else info->state->data_file_length=sort_param.max_pos; - /*if (flush_pending_blocks(param)) - goto err;*/ - param->read_cache.file=info->dfile; /* re-init read cache */ reinit_io_cache(¶m->read_cache,READ_CACHE,share->pack.header_length, 1,1); @@ -2091,7 +2090,7 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info, memcpy( &share->state.state, info->state, sizeof(*info->state)); err: - got_error|= flush_blocks(param,share->kfile); + got_error|= flush_blocks(param, share->key_cache, share->kfile); VOID(end_io_cache(&info->rec_cache)); if (!got_error) { @@ -2130,6 +2129,7 @@ err: MYF(MY_ALLOW_ZERO_PTR)); my_free(sort_param.record,MYF(MY_ALLOW_ZERO_PTR)); my_free((gptr) sort_info.key_block,MYF(MY_ALLOW_ZERO_PTR)); + my_free((gptr) sort_info.ft_buf, MYF(MY_ALLOW_ZERO_PTR)); my_free(sort_info.buff,MYF(MY_ALLOW_ZERO_PTR)); VOID(end_io_cache(¶m->read_cache)); info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); @@ -2176,7 +2176,7 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info, MI_SORT_PARAM *sort_param=0; MYISAM_SHARE *share=info->s; ulong *rec_per_key_part; - MI_KEYSEG *keyseg; + HA_KEYSEG *keyseg; char llbuff[22]; IO_CACHE_SHARE io_share; SORT_INFO sort_info; @@ -2254,7 +2254,7 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info, Flush key cache for this file if we are calling this outside myisamchk */ - flush_key_blocks(share->kfile, FLUSH_IGNORE_CHANGED); + flush_key_blocks(share->key_cache,share->kfile, FLUSH_IGNORE_CHANGED); /* Clear the pointers to the given rows */ for (i=0 ; i < share->base.keys ; i++) share->state.key_root[i]= HA_OFFSET_ERROR; @@ -2264,7 +2264,7 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info, } else { - if (flush_key_blocks(share->kfile, FLUSH_FORCE_WRITE)) + if (flush_key_blocks(share->key_cache,share->kfile, FLUSH_FORCE_WRITE)) goto err; key_map= ~key_map; /* Create the missing keys */ } @@ -2284,17 +2284,19 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info, rec_length=share->base.min_block_length; else rec_length=share->base.pack_reclength; + /* + +1 below is required hack for parallel repair mode. + The info->state->records value, that is compared later + to sort_info.max_records and cannot exceed it, is + increased in sort_key_write. In mi_repair_by_sort, sort_key_write + is called after sort_key_read, where the comparison is performed, + but in parallel mode master thread can call sort_key_write + before some other repair thread calls sort_key_read. + Furthermore I'm not even sure +1 would be enough. + May be sort_info.max_records shold be always set to max value in + parallel mode. + */ sort_info.max_records= - /* +1 below is required hack for parallel repair mode. - The info->state->records value, that is compared later - to sort_info.max_records and cannot exceed it, is - increased in sort_key_write. In mi_repair_by_sort, sort_key_write - is called after sort_key_read, where the comparison is performed, - but in parallel mode master thread can call sort_key_write - before some other repair thread calls sort_key_read. - Furthermore I'm not even sure +1 would be enough. - May be sort_info.max_records shold be always set to max value in - parallel mode. */ ((param->testflag & T_CREATE_MISSING_KEYS) ? info->state->records + 1: (ha_rows) (sort_info.filelength/rec_length+1)); @@ -2308,7 +2310,7 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info, (sizeof(MI_SORT_PARAM) + share->base.pack_reclength), MYF(MY_ZEROFILL)))) { - mi_check_print_error(param,"Not enough memory!"); + mi_check_print_error(param,"Not enough memory for key!"); goto err; } total_key_length=0; @@ -2321,6 +2323,7 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info, { sort_param[i].key=key; sort_param[i].keyinfo=share->keyinfo+key; + sort_param[i].seg=sort_param[i].keyinfo->seg; if (!(((ulonglong) 1 << key) & key_map)) { /* Remember old statistics for key */ @@ -2334,10 +2337,17 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info, istep=1; if ((!(param->testflag & T_SILENT))) printf ("- Fixing index %d\n",key+1); - sort_param[i].key_read= ((sort_param[i].keyinfo->flag & HA_FULLTEXT) ? - sort_ft_key_read : sort_key_read); + if (sort_param[i].keyinfo->flag & HA_FULLTEXT) + { + sort_param[i].key_read=sort_ft_key_read; + sort_param[i].key_write=sort_ft_key_write; + } + else + { + sort_param[i].key_read=sort_key_read; + sort_param[i].key_write=sort_key_write; + } sort_param[i].key_cmp=sort_key_cmp; - sort_param[i].key_write=sort_key_write; sort_param[i].lock_in_memory=lock_memory; sort_param[i].tmpdir=param->tmpdir; sort_param[i].sort_info=&sort_info; @@ -2351,7 +2361,7 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info, (share->base.pack_reclength * i)); sort_param[i].key_length=share->rec_reflength; - for (keyseg=sort_param[i].keyinfo->seg; keyseg->type != HA_KEYTYPE_END; + for (keyseg=sort_param[i].seg; keyseg->type != HA_KEYTYPE_END; keyseg++) { sort_param[i].key_length+=keyseg->length; @@ -2365,7 +2375,7 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info, total_key_length+=sort_param[i].key_length; if (sort_param[i].keyinfo->flag & HA_FULLTEXT) - sort_param[i].key_length+=ft_max_word_len_for_sort-ft_max_word_len; + sort_param[i].key_length+=FT_MAX_WORD_LEN_FOR_SORT-ft_max_word_len; } sort_info.total_keys=i; sort_param[0].master= 1; @@ -2492,7 +2502,7 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info, memcpy(&share->state.state, info->state, sizeof(*info->state)); err: - got_error|= flush_blocks(param,share->kfile); + got_error|= flush_blocks(param, share->key_cache, share->kfile); VOID(end_io_cache(&info->rec_cache)); if (!got_error) { @@ -2530,6 +2540,7 @@ err: pthread_cond_destroy (&sort_info.cond); pthread_mutex_destroy(&sort_info.mutex); + my_free((gptr) sort_info.ft_buf, MYF(MY_ALLOW_ZERO_PTR)); my_free((gptr) sort_info.key_block,MYF(MY_ALLOW_ZERO_PTR)); my_free((gptr) sort_param,MYF(MY_ALLOW_ZERO_PTR)); my_free(sort_info.buff,MYF(MY_ALLOW_ZERO_PTR)); @@ -2563,7 +2574,7 @@ static int sort_key_read(MI_SORT_PARAM *sort_param, void *key) DBUG_RETURN(1); } sort_param->real_key_length= - (info->s->rec_reflength+ + (info->s->rec_reflength+ _mi_make_key(info, sort_param->key, (uchar*) key, sort_param->record, sort_param->filepos)); #ifdef HAVE_purify @@ -2589,8 +2600,7 @@ static int sort_ft_key_read(MI_SORT_PARAM *sort_param, void *key) my_free((char*) wptr, MYF(MY_ALLOW_ZERO_PTR)); if ((error=sort_get_next_record(sort_param))) DBUG_RETURN(error); - if (!(wptr=_mi_ft_parserecord(info,sort_param->key, - key,sort_param->record))) + if (!(wptr=_mi_ft_parserecord(info,sort_param->key,sort_param->record))) DBUG_RETURN(1); if (wptr->pos) break; @@ -2642,6 +2652,9 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param) char llbuff[22],llbuff2[22]; DBUG_ENTER("sort_get_next_record"); + if (*killed_ptr(param)) + DBUG_RETURN(1); + switch (share->data_file_type) { case STATIC_RECORD: for (;;) @@ -2853,9 +2866,20 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param) if (!(to=mi_alloc_rec_buff(info,block_info.rec_len, &(sort_param->rec_buff)))) { - mi_check_print_error(param,"Not enough memory for blob at %s (need %lu)", - llstr(sort_param->start_recpos,llbuff), block_info.rec_len); - DBUG_RETURN(1); + if (param->max_record_length >= block_info.rec_len) + { + mi_check_print_error(param,"Not enough memory for blob at %s (need %lu)", + llstr(sort_param->start_recpos,llbuff), + (ulong) block_info.rec_len); + DBUG_RETURN(1); + } + else + { + mi_check_print_info(param,"Not enough memory for blob at %s (need %lu); Row skipped", + llstr(sort_param->start_recpos,llbuff), + (ulong) block_info.rec_len); + goto try_next; + } } } else @@ -2863,14 +2887,16 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param) } if (left_length < block_info.data_len || ! block_info.data_len) { - mi_check_print_info(param,"Found block with too small length at %s; Skipped", + mi_check_print_info(param, + "Found block with too small length at %s; Skipped", llstr(sort_param->start_recpos,llbuff)); goto try_next; } if (block_info.filepos + block_info.data_len > sort_param->read_cache.end_of_file) { - mi_check_print_info(param,"Found block that points outside data file at %s", + mi_check_print_info(param, + "Found block that points outside data file at %s", llstr(sort_param->start_recpos,llbuff)); goto try_next; } @@ -3094,7 +3120,8 @@ int sort_write_record(MI_SORT_PARAM *sort_param) (info->state->records % WRITE_COUNT) == 0) { char llbuff[22]; - printf("%s\r", llstr(info->state->records,llbuff)); VOID(fflush(stdout)); + printf("%s\r", llstr(info->state->records,llbuff)); + VOID(fflush(stdout)); } } DBUG_RETURN(0); @@ -3107,8 +3134,8 @@ static int sort_key_cmp(MI_SORT_PARAM *sort_param, const void *a, const void *b) { uint not_used; - return (_mi_key_cmp(sort_param->keyinfo->seg,*((uchar**) a),*((uchar**) b), - USE_WHOLE_KEY, SEARCH_SAME,¬_used)); + return (ha_key_cmp(sort_param->seg, *((uchar**) a), *((uchar**) b), + USE_WHOLE_KEY, SEARCH_SAME,¬_used)); } /* sort_key_cmp */ @@ -3122,9 +3149,9 @@ static int sort_key_write(MI_SORT_PARAM *sort_param, const void *a) if (sort_info->key_block->inited) { - cmp=_mi_key_cmp(sort_param->keyinfo->seg, sort_info->key_block->lastkey, - (uchar*) a, USE_WHOLE_KEY, SEARCH_FIND | SEARCH_UPDATE, - &diff_pos); + cmp=ha_key_cmp(sort_param->seg,sort_info->key_block->lastkey, + (uchar*) a, USE_WHOLE_KEY,SEARCH_FIND | SEARCH_UPDATE, + &diff_pos); sort_param->unique[diff_pos-1]++; } else @@ -3135,16 +3162,19 @@ static int sort_key_write(MI_SORT_PARAM *sort_param, const void *a) { sort_info->dupp++; sort_info->info->lastpos=get_record_for_key(sort_info->info, - sort_param->keyinfo, - (uchar*) a); + sort_param->keyinfo, + (uchar*) a); mi_check_print_warning(param, - "Duplicate key for record at %10s against record at %10s", - llstr(sort_info->info->lastpos,llbuff), - llstr(get_record_for_key(sort_info->info, sort_param->keyinfo, - sort_info->key_block->lastkey), llbuff2)); + "Duplicate key for record at %10s against record at %10s", + llstr(sort_info->info->lastpos,llbuff), + llstr(get_record_for_key(sort_info->info, + sort_param->keyinfo, + sort_info->key_block-> + lastkey), + llbuff2)); param->testflag|=T_RETRY_WITHOUT_QUICK; if (sort_info->param->testflag & T_VERBOSE) - _mi_print_key(stdout,sort_param->keyinfo->seg,(uchar*) a, USE_WHOLE_KEY); + _mi_print_key(stdout,sort_param->seg,(uchar*) a, USE_WHOLE_KEY); return (sort_delete_record(sort_param)); } #ifndef DBUG_OFF @@ -3159,6 +3189,141 @@ static int sort_key_write(MI_SORT_PARAM *sort_param, const void *a) (uchar*) a, HA_OFFSET_ERROR)); } /* sort_key_write */ +int sort_ft_buf_flush(MI_SORT_PARAM *sort_param) +{ + SORT_INFO *sort_info=sort_param->sort_info; + SORT_KEY_BLOCKS *key_block=sort_info->key_block; + MYISAM_SHARE *share=sort_info->info->s; + uint val_off, val_len; + int error; + SORT_FT_BUF *ft_buf=sort_info->ft_buf; + uchar *from, *to; + + val_len=share->ft2_keyinfo.keylength; + get_key_full_length_rdonly(val_off, ft_buf->lastkey); + to=ft_buf->lastkey+val_off; + + if (ft_buf->buf) + { + /* flushing first-level tree */ + error=sort_insert_key(sort_param,key_block,ft_buf->lastkey, + HA_OFFSET_ERROR); + for (from=to+val_len; + !error && from < ft_buf->buf; + from+= val_len) + { + memcpy(to, from, val_len); + error=sort_insert_key(sort_param,key_block,ft_buf->lastkey, + HA_OFFSET_ERROR); + } + return error; + } + /* flushing second-level tree keyblocks */ + error=flush_pending_blocks(sort_param); + /* updating lastkey with second-level tree info */ + ft_intXstore(ft_buf->lastkey+val_off, -ft_buf->count); + _mi_dpointer(sort_info->info, ft_buf->lastkey+val_off+HA_FT_WLEN, + share->state.key_root[sort_param->key]); + /* restoring first level tree data in sort_info/sort_param */ + sort_info->key_block=sort_info->key_block_end- sort_info->param->sort_key_blocks; + sort_param->keyinfo=share->keyinfo+sort_param->key; + share->state.key_root[sort_param->key]=HA_OFFSET_ERROR; + /* writing lastkey in first-level tree */ + return error ? error : + sort_insert_key(sort_param,sort_info->key_block, + ft_buf->lastkey,HA_OFFSET_ERROR); +} + +static int sort_ft_key_write(MI_SORT_PARAM *sort_param, const void *a) +{ + uint a_len, val_off, val_len, error; + uchar *p; + SORT_INFO *sort_info=sort_param->sort_info; + SORT_FT_BUF *ft_buf=sort_info->ft_buf; + SORT_KEY_BLOCKS *key_block=sort_info->key_block; + + val_len=HA_FT_WLEN+sort_info->info->s->base.rec_reflength; + get_key_full_length_rdonly(a_len, (uchar *)a); + + if (!ft_buf) + { + /* + use two-level tree only if key_reflength fits in rec_reflength place + and row format is NOT static - for _mi_dpointer not to garble offsets + */ + if ((sort_info->info->s->base.key_reflength <= + sort_info->info->s->base.rec_reflength) && + (sort_info->info->s->options & + (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD))) + ft_buf=(SORT_FT_BUF *)my_malloc(sort_param->keyinfo->block_length + + sizeof(SORT_FT_BUF), MYF(MY_WME)); + + if (!ft_buf) + { + sort_param->key_write=sort_key_write; + return sort_key_write(sort_param, a); + } + sort_info->ft_buf=ft_buf; + goto word_init_ft_buf; /* no need to duplicate the code */ + } + get_key_full_length_rdonly(val_off, ft_buf->lastkey); + + if (val_off == a_len && + mi_compare_text(sort_param->seg->charset, + ((uchar *)a)+1,a_len-1, + ft_buf->lastkey+1,val_off-1, 0)==0) + { + if (!ft_buf->buf) /* store in second-level tree */ + { + ft_buf->count++; + return sort_insert_key(sort_param,key_block, + ((uchar *)a)+val_off, HA_OFFSET_ERROR); + } + + /* storing the key in the buffer. */ + memcpy (ft_buf->buf, (char *)a+val_off, val_len); + ft_buf->buf+=val_len; + if (ft_buf->buf < ft_buf->end) + return 0; + + /* converting to two-level tree */ + p=ft_buf->lastkey+val_off; + + while (key_block->inited) + key_block++; + sort_info->key_block=key_block; + sort_param->keyinfo=& sort_info->info->s->ft2_keyinfo; + ft_buf->count=(ft_buf->buf - p)/val_len; + + /* flushing buffer to second-level tree */ + for (error=0; !error && p < ft_buf->buf; p+= val_len) + error=sort_insert_key(sort_param,key_block,p,HA_OFFSET_ERROR); + ft_buf->buf=0; + return error; + } + else + { + /* flushing buffer */ + if ((error=sort_ft_buf_flush(sort_param))) + return error; + +word_init_ft_buf: + a_len+=val_len; + memcpy(ft_buf->lastkey, a, a_len); + ft_buf->buf=ft_buf->lastkey+a_len; + ft_buf->end=ft_buf->lastkey+ (sort_param->keyinfo->block_length-32); + /* 32 is just a safety margin here + (at least max(val_len, sizeof(nod_flag)) should be there). + May be better performance could be achieved if we'd put + (sort_info->keyinfo->block_length-32)/XXX + instead. + TODO: benchmark the best value for XXX. + */ + + return 0; + } + return -1; /* impossible */ +} /* sort_ft_key_write */ /* get pointer to record from a key */ @@ -3229,13 +3394,13 @@ static int sort_insert_key(MI_SORT_PARAM *sort_param, bzero((byte*) anc_buff+key_block->last_length, keyinfo->block_length- key_block->last_length); key_file_length=info->state->key_file_length; - if ((filepos=_mi_new(info,keyinfo)) == HA_OFFSET_ERROR) + if ((filepos=_mi_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR) DBUG_RETURN(1); /* If we read the page from the key cache, we have to write it back to it */ if (key_file_length == info->state->key_file_length) { - if (_mi_write_keypage(info, keyinfo, filepos, anc_buff)) + if (_mi_write_keypage(info, keyinfo, filepos, DFLT_INIT_HITS, anc_buff)) DBUG_RETURN(1); } else if (my_pwrite(info->s->kfile,(byte*) anc_buff, @@ -3318,7 +3483,7 @@ int flush_pending_blocks(MI_SORT_PARAM *sort_param) my_off_t filepos,key_file_length; SORT_KEY_BLOCKS *key_block; SORT_INFO *sort_info= sort_param->sort_info; - MI_CHECK *param=sort_info->param; + myf myf_rw=sort_info->param->myf_rw; MI_INFO *info=sort_info->info; MI_KEYDEF *keyinfo=sort_param->keyinfo; DBUG_ENTER("flush_pending_blocks"); @@ -3333,17 +3498,18 @@ int flush_pending_blocks(MI_SORT_PARAM *sort_param) _mi_kpointer(info,key_block->end_pos,filepos); key_file_length=info->state->key_file_length; bzero((byte*) key_block->buff+length, keyinfo->block_length-length); - if ((filepos=_mi_new(info,keyinfo)) == HA_OFFSET_ERROR) + if ((filepos=_mi_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR) DBUG_RETURN(1); /* If we read the page from the key cache, we have to write it back */ if (key_file_length == info->state->key_file_length) { - if (_mi_write_keypage(info, keyinfo, filepos, key_block->buff)) + if (_mi_write_keypage(info, keyinfo, filepos, + DFLT_INIT_HITS, key_block->buff)) DBUG_RETURN(1); } else if (my_pwrite(info->s->kfile,(byte*) key_block->buff, - (uint) keyinfo->block_length,filepos, param->myf_rw)) + (uint) keyinfo->block_length,filepos, myf_rw)) DBUG_RETURN(1); DBUG_DUMP("buff",(byte*) key_block->buff,length); nod_flag=1; @@ -3365,7 +3531,7 @@ static SORT_KEY_BLOCKS *alloc_key_blocks(MI_CHECK *param, uint blocks, buffer_length+IO_SIZE)*blocks, MYF(0)))) { - mi_check_print_error(param,"Not Enough memory for sort-key-blocks"); + mi_check_print_error(param,"Not enough memory for sort-key-blocks"); return(0); } for (i=0 ; i < blocks ; i++) @@ -3397,7 +3563,7 @@ int recreate_table(MI_CHECK *param, MI_INFO **org_info, char *filename) MI_INFO info; MYISAM_SHARE share; MI_KEYDEF *keyinfo,*key,*key_end; - MI_KEYSEG *keysegs,*keyseg; + HA_KEYSEG *keysegs,*keyseg; MI_COLUMNDEF *recdef,*rec,*end; MI_UNIQUEDEF *uniquedef,*u_ptr,*u_end; MI_STATUS_INFO status_info; @@ -3419,7 +3585,7 @@ int recreate_table(MI_CHECK *param, MI_INFO **org_info, char *filename) (size_t) (sizeof(MI_KEYDEF)*share.base.keys)); key_parts= share.base.all_key_parts; - if (!(keysegs=(MI_KEYSEG*) my_alloca(sizeof(MI_KEYSEG)* + if (!(keysegs=(HA_KEYSEG*) my_alloca(sizeof(HA_KEYSEG)* (key_parts+share.base.keys)))) { my_afree((gptr) keyinfo); @@ -3455,7 +3621,7 @@ int recreate_table(MI_CHECK *param, MI_INFO **org_info, char *filename) /* Change the new key to point at the saved key segments */ memcpy((byte*) keysegs,(byte*) share.keyparts, - (size_t) (sizeof(MI_KEYSEG)*(key_parts+share.base.keys+ + (size_t) (sizeof(HA_KEYSEG)*(key_parts+share.base.keys+ share.state.header.uniques))); keyseg=keysegs; for (key=keyinfo,key_end=keyinfo+share.base.keys; key != key_end ; key++) @@ -3749,13 +3915,10 @@ static my_bool mi_too_big_key_for_sort(MI_KEYDEF *key, ha_rows rows) { uint key_maxlength=key->maxlength; if (key->flag & HA_FULLTEXT) - key_maxlength+=ft_max_word_len_for_sort-HA_FT_MAXLEN; + key_maxlength+=FT_MAX_WORD_LEN_FOR_SORT-HA_FT_MAXLEN; return (key->flag & (HA_BINARY_PACK_KEY | HA_VAR_LENGTH_KEY | HA_FULLTEXT) && ((ulonglong) rows * key_maxlength > - (ulonglong) myisam_max_temp_length || - (ulonglong) rows * (key_maxlength - key->minlength) / 2 > - myisam_max_extra_temp_length || - (rows == 0 && (key_maxlength / key->minlength) > 2))); + (ulonglong) myisam_max_temp_length)); } @@ -3768,8 +3931,8 @@ void mi_disable_non_unique_index(MI_INFO *info, ha_rows rows) MI_KEYDEF *key=share->keyinfo; for (i=0 ; i < share->base.keys ; i++,key++) { - if (!(key->flag & (HA_NOSAME|HA_AUTO_KEY)) && - ! mi_too_big_key_for_sort(key,rows)) + if (!(key->flag & (HA_NOSAME | HA_SPATIAL | HA_AUTO_KEY)) && + ! mi_too_big_key_for_sort(key,rows) && info->s->base.auto_key != i+1) { share->state.key_map&= ~ ((ulonglong) 1 << i); info->update|= HA_STATE_CHANGED; diff --git a/myisam/mi_checksum.c b/myisam/mi_checksum.c index a760b03a032..95338434211 100644 --- a/myisam/mi_checksum.c +++ b/myisam/mi_checksum.c @@ -27,31 +27,29 @@ ha_checksum mi_checksum(MI_INFO *info, const byte *buf) for (i=info->s->base.fields ; i-- ; buf+=(rec++)->length) { const byte *pos; - const byte *end; + ulong length; switch (rec->type) { case FIELD_BLOB: { - ulong length=_mi_calc_blob_length(rec->length- + length=_mi_calc_blob_length(rec->length- mi_portable_sizeof_char_ptr, buf); memcpy((char*) &pos, buf+rec->length- mi_portable_sizeof_char_ptr, sizeof(char*)); - end=pos+length; break; } case FIELD_VARCHAR: { - uint length; length=uint2korr(buf); - pos=buf+2; end=pos+length; + pos=buf+2; break; } default: - pos=buf; end=buf+rec->length; + length=rec->length; + pos=buf; break; } - for ( ; pos != end ; pos++) - crc=((crc << 8) + *((uchar*) pos)) + (crc >> (8*sizeof(ha_checksum)-8)); + crc=my_checksum(crc, pos ? pos : "", length); } return crc; } @@ -59,9 +57,5 @@ ha_checksum mi_checksum(MI_INFO *info, const byte *buf) ha_checksum mi_static_checksum(MI_INFO *info, const byte *pos) { - ha_checksum crc; - const byte *end=pos+info->s->base.reclength; - for (crc=0; pos != end; pos++) - crc=((crc << 8) + *((uchar*) pos)) + (crc >> (8*sizeof(ha_checksum)-8)); - return crc; + return my_checksum(0, pos, info->s->base.reclength); } diff --git a/myisam/mi_close.c b/myisam/mi_close.c index dbaaebb1143..deb0ccee8f3 100644 --- a/myisam/mi_close.c +++ b/myisam/mi_close.c @@ -64,7 +64,7 @@ int mi_close(register MI_INFO *info) if (flag) { if (share->kfile >= 0 && - flush_key_blocks(share->kfile, + flush_key_blocks(share->key_cache, share->kfile, share->temporary ? FLUSH_IGNORE_CHANGED : FLUSH_RELEASE)) error=my_errno; diff --git a/myisam/mi_create.c b/myisam/mi_create.c index 9f39c6f522e..0982e5bdaf6 100644 --- a/myisam/mi_create.c +++ b/myisam/mi_create.c @@ -17,6 +17,8 @@ /* Create a MyISAM table */ #include "fulltext.h" +#include "sp_defs.h" + #if defined(MSDOS) || defined(__WIN__) #ifdef __WIN__ #include <fcntl.h> @@ -42,7 +44,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, uint fields,length,max_key_length,packed,pointer, key_length,info_length,key_segs,options,min_key_length_skipp, base_pos,varchar_count,long_varchar_count,varchar_length, - max_key_block_length,unique_key_parts,offset; + max_key_block_length,unique_key_parts,fulltext_keys,offset; ulong reclength, real_reclength,min_pack_length; char filename[FN_REFLEN],linkname[FN_REFLEN], *linkname_ptr; ulong pack_reclength; @@ -51,7 +53,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, MYISAM_SHARE share; MI_KEYDEF *keydef,tmp_keydef; MI_UNIQUEDEF *uniquedef; - MI_KEYSEG *keyseg,tmp_keyseg; + HA_KEYSEG *keyseg,tmp_keyseg; MI_COLUMNDEF *rec; ulong *rec_per_key_part; my_off_t key_root[MI_MAX_POSSIBLE_KEY],key_del[MI_MAX_KEY_BLOCK_SIZE]; @@ -221,6 +223,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, reclength+=long_varchar_count; /* We need space for this! */ max_key_length=0; tot_length=0 ; key_segs=0; + fulltext_keys=0; max_key_block_length=0; share.state.rec_per_key_part=rec_per_key_part; share.state.key_root=key_root; @@ -233,28 +236,50 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, for (i=0, keydef=keydefs ; i < keys ; i++ , keydef++) { - share.state.key_root[i]= HA_OFFSET_ERROR; + share.state.key_root[i]= HA_OFFSET_ERROR; min_key_length_skipp=length=0; key_length=pointer; - - if (keydef->flag & HA_FULLTEXT) /* SerG */ + if (keydef->flag & HA_SPATIAL) { - keydef->flag=HA_FULLTEXT | HA_PACK_KEY | HA_VAR_LENGTH_KEY; - options|=HA_OPTION_PACK_KEYS; /* Using packed keys */ + /* BAR TODO to support 3D and more dimensions in the future */ + uint sp_segs=SPDIMS*2; + keydef->flag=HA_SPATIAL; if (flags & HA_DONT_TOUCH_DATA) { - /* called by myisamchk - i.e. table structure was taken from - MYI file and FULLTEXT key *do has* additional FT_SEGS keysegs. + /* + called by myisamchk - i.e. table structure was taken from + MYI file and SPATIAL key *does have* additional sp_segs keysegs. We'd better delete them now */ - keydef->keysegs-=FT_SEGS; + keydef->keysegs-=sp_segs; } for (j=0, keyseg=keydef->seg ; (int) j < keydef->keysegs ; j++, keyseg++) { + if (keyseg->type != HA_KEYTYPE_BINARY && + keyseg->type != HA_KEYTYPE_VARBINARY) + { + my_errno=HA_WRONG_CREATE_OPTION; + goto err; + } + } + keydef->keysegs+=sp_segs; + key_length+=SPLEN*sp_segs; + length++; /* At least one length byte */ + min_key_length_skipp+=SPLEN*2*SPDIMS; + } + else + if (keydef->flag & HA_FULLTEXT) + { + keydef->flag=HA_FULLTEXT | HA_PACK_KEY | HA_VAR_LENGTH_KEY; + options|=HA_OPTION_PACK_KEYS; /* Using packed keys */ + + for (j=0, keyseg=keydef->seg ; (int) j < keydef->keysegs ; + j++, keyseg++) + { if (keyseg->type != HA_KEYTYPE_TEXT && keyseg->type != HA_KEYTYPE_VARTEXT) { @@ -262,19 +287,11 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, goto err; } } - keydef->keysegs+=FT_SEGS; + fulltext_keys++; key_length+= HA_FT_MAXLEN+HA_FT_WLEN; -#ifdef EVAL_RUN - key_length++; -#endif - length++; /* At least one length byte */ min_key_length_skipp+=HA_FT_MAXLEN; -#if HA_FT_MAXLEN >= 255 - min_key_length_skipp+=2; /* prefix may be 3 bytes */ - length+=2; -#endif } else { @@ -411,7 +428,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, info_length=base_pos+(uint) (MI_BASE_INFO_SIZE+ keys * MI_KEYDEF_SIZE+ uniques * MI_UNIQUEDEF_SIZE + - (key_segs + unique_key_parts)*MI_KEYSEG_SIZE+ + (key_segs + unique_key_parts)*HA_KEYSEG_SIZE+ columns*MI_COLUMNDEF_SIZE); bmove(share.state.header.file_version,(byte*) myisam_file_magic,4); @@ -424,7 +441,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, mi_int2store(share.state.header.base_info_length,MI_BASE_INFO_SIZE); mi_int2store(share.state.header.base_pos,base_pos); share.state.header.language= (ci->language ? - ci->language : MY_CHARSET_CURRENT); + ci->language : default_charset_info->number); share.state.header.max_block_size=max_key_block_length/MI_MIN_KEY_BLOCK_LENGTH; share.state.dellink = HA_OFFSET_ERROR; @@ -440,8 +457,9 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, mi_get_pointer_length((tot_length + max_key_block_length * keys * MI_INDEX_BLOCK_MARGIN) / MI_MIN_KEY_BLOCK_LENGTH, 3); - share.base.keys= share.state.header.keys = keys; + share.base.keys= share.state.header.keys= keys; share.state.header.uniques= uniques; + share.state.header.fulltext_keys= fulltext_keys; mi_int2store(share.state.header.key_parts,key_segs); mi_int2store(share.state.header.unique_key_parts,unique_key_parts); @@ -533,7 +551,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, linkname_ptr=0; create_flag=MY_DELETE_OLD; } - if ((dfile= + if ((dfile= my_create_with_symlink(linkname_ptr, filename, 0,O_RDWR | O_TRUNC, MYF(MY_WME | create_flag))) < 0) @@ -557,18 +575,26 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, /* Write key and keyseg definitions */ for (i=0 ; i < share.base.keys - uniques; i++) { - uint ft_segs=(keydefs[i].flag & HA_FULLTEXT) ? FT_SEGS : 0; + uint sp_segs=(keydefs[i].flag & HA_SPATIAL) ? 2*SPDIMS : 0; if (mi_keydef_write(file, &keydefs[i])) goto err; - for (j=0 ; j < keydefs[i].keysegs-ft_segs ; j++) + for (j=0 ; j < keydefs[i].keysegs-sp_segs ; j++) if (mi_keyseg_write(file, &keydefs[i].seg[j])) - goto err; - for (j=0 ; j < ft_segs ; j++) + goto err; + for (j=0 ; j < sp_segs ; j++) { - MI_KEYSEG seg=ft_keysegs[j]; - seg.language= keydefs[i].seg[0].language; - if (mi_keyseg_write(file, &seg)) + HA_KEYSEG sseg; + sseg.type=SPTYPE; + sseg.language= 7; + sseg.null_bit=0; + sseg.bit_start=0; + sseg.bit_end=0; + sseg.length=SPLEN; + sseg.null_pos=0; + sseg.start=j*SPLEN; + sseg.flag= HA_SWAP_KEY; + if (mi_keyseg_write(file, &sseg)) goto err; } } diff --git a/myisam/mi_dbug.c b/myisam/mi_dbug.c index 482287938c0..6548b38c135 100644 --- a/myisam/mi_dbug.c +++ b/myisam/mi_dbug.c @@ -20,7 +20,7 @@ /* Print a key in user understandable format */ -void _mi_print_key(FILE *stream, register MI_KEYSEG *keyseg, +void _mi_print_key(FILE *stream, register HA_KEYSEG *keyseg, const uchar *key, uint length) { int flag; diff --git a/myisam/mi_delete.c b/myisam/mi_delete.c index 6f94e3c4256..f21b0e928b6 100644 --- a/myisam/mi_delete.c +++ b/myisam/mi_delete.c @@ -17,20 +17,25 @@ /* Remove a row from a MyISAM table */ #include "fulltext.h" +#include "rt_index.h" +#include <assert.h> + #ifdef __WIN__ #include <errno.h> #endif -static int d_search(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *key, - uint key_length, my_off_t page, uchar *anc_buff); +static int d_search(MI_INFO *info,MI_KEYDEF *keyinfo,uint comp_flag, + uchar *key,uint key_length,my_off_t page,uchar *anc_buff); static int del(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *key,uchar *anc_buff, my_off_t leaf_page,uchar *leaf_buff,uchar *keypos, my_off_t next_block,uchar *ret_key); static int underflow(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *anc_buff, - my_off_t leaf_page, uchar *leaf_buff,uchar *keypos); + my_off_t leaf_page,uchar *leaf_buff,uchar *keypos); static uint remove_key(MI_KEYDEF *keyinfo,uint nod_flag,uchar *keypos, uchar *lastkey,uchar *page_end, my_off_t *next_block); +static int _mi_ck_real_delete(register MI_INFO *info,MI_KEYDEF *keyinfo, + uchar *key, uint key_length, my_off_t *root); int mi_delete(MI_INFO *info,const byte *record) @@ -71,7 +76,6 @@ int mi_delete(MI_INFO *info,const byte *record) if (((ulonglong) 1 << i) & info->s->state.key_map) { info->s->keyinfo[i].version++; - /* The following code block is for text searching by SerG */ if (info->s->keyinfo[i].flag & HA_FULLTEXT ) { if (_mi_ft_del(info,i,(char*) old_key,record,info->lastpos)) @@ -79,9 +83,9 @@ int mi_delete(MI_INFO *info,const byte *record) } else { - uint key_length=_mi_make_key(info,i,old_key,record,info->lastpos); - if (_mi_ck_delete(info,i,old_key,key_length)) - goto err; + if (info->s->keyinfo[i].ck_delete(info,i,old_key, + _mi_make_key(info,i,old_key,record,info->lastpos))) + goto err; } } } @@ -127,18 +131,24 @@ err: int _mi_ck_delete(register MI_INFO *info, uint keynr, uchar *key, uint key_length) { + return _mi_ck_real_delete(info, info->s->keyinfo+keynr, key, key_length, + &info->s->state.key_root[keynr]); +} /* _mi_ck_delete */ + + +static int _mi_ck_real_delete(register MI_INFO *info, MI_KEYDEF *keyinfo, + uchar *key, uint key_length, my_off_t *root) +{ int error; uint nod_flag; my_off_t old_root; uchar *root_buff; - MI_KEYDEF *keyinfo; - DBUG_ENTER("_mi_ck_delete"); + DBUG_ENTER("_mi_ck_real_delete"); - if ((old_root=info->s->state.key_root[keynr]) == HA_OFFSET_ERROR) + if ((old_root=*root) == HA_OFFSET_ERROR) { DBUG_RETURN(my_errno=HA_ERR_CRASHED); } - keyinfo=info->s->keyinfo+keynr; if (!(root_buff= (uchar*) my_alloca((uint) keyinfo->block_length+ MI_MAX_KEY_BUFF*2))) { @@ -146,17 +156,20 @@ int _mi_ck_delete(register MI_INFO *info, uint keynr, uchar *key, DBUG_RETURN(my_errno=ENOMEM); } DBUG_PRINT("info",("root_page: %ld",old_root)); - if (!_mi_fetch_keypage(info,keyinfo,old_root,root_buff,0)) + if (!_mi_fetch_keypage(info,keyinfo,old_root,DFLT_INIT_HITS,root_buff,0)) { error= -1; goto err; } - if ((error=d_search(info,keyinfo,key,key_length,old_root,root_buff)) >0) + if ((error=d_search(info,keyinfo, + (keyinfo->flag & HA_FULLTEXT ? SEARCH_FIND + : SEARCH_SAME), + key,key_length,old_root,root_buff)) >0) { if (error == 2) { DBUG_PRINT("test",("Enlarging of root when deleting")); - error=_mi_enlarge_root(info,keynr,key); + error=_mi_enlarge_root(info,keyinfo,key,root); } else /* error == 1 */ { @@ -164,21 +177,22 @@ int _mi_ck_delete(register MI_INFO *info, uint keynr, uchar *key, { error=0; if (nod_flag) - info->s->state.key_root[keynr]=_mi_kpos(nod_flag, - root_buff+2+nod_flag); + *root=_mi_kpos(nod_flag,root_buff+2+nod_flag); else - info->s->state.key_root[keynr]= HA_OFFSET_ERROR; - if (_mi_dispose(info,keyinfo,old_root)) + *root=HA_OFFSET_ERROR; + if (_mi_dispose(info,keyinfo,old_root,DFLT_INIT_HITS)) error= -1; } else - error=_mi_write_keypage(info,keyinfo,old_root,root_buff); + error=_mi_write_keypage(info,keyinfo,old_root, + DFLT_INIT_HITS,root_buff); } } err: my_afree((gptr) root_buff); + DBUG_PRINT("exit",("Return: %d",error)); DBUG_RETURN(error); -} /* _mi_ck_delete */ +} /* _mi_ck_real_delete */ /* @@ -190,11 +204,11 @@ err: */ static int d_search(register MI_INFO *info, register MI_KEYDEF *keyinfo, - uchar *key, uint key_length, my_off_t page, - uchar *anc_buff) + uint comp_flag, uchar *key, uint key_length, + my_off_t page, uchar *anc_buff) { int flag,ret_value,save_flag; - uint length,nod_flag; + uint length,nod_flag,search_key_length; my_bool last_key; uchar *leaf_buff,*keypos; my_off_t leaf_page,next_block; @@ -202,9 +216,9 @@ static int d_search(register MI_INFO *info, register MI_KEYDEF *keyinfo, DBUG_ENTER("d_search"); DBUG_DUMP("page",(byte*) anc_buff,mi_getint(anc_buff)); - flag=(*keyinfo->bin_search)(info,keyinfo,anc_buff,key, USE_WHOLE_KEY, - SEARCH_SAME, - &keypos, lastkey, &last_key); + search_key_length= (comp_flag & SEARCH_FIND) ? key_length : USE_WHOLE_KEY; + flag=(*keyinfo->bin_search)(info,keyinfo,anc_buff,key, search_key_length, + comp_flag, &keypos, lastkey, &last_key); if (flag == MI_FOUND_WRONG_KEY) { DBUG_PRINT("error",("Found wrong key")); @@ -212,6 +226,64 @@ static int d_search(register MI_INFO *info, register MI_KEYDEF *keyinfo, } nod_flag=mi_test_if_nod(anc_buff); + if (!flag && keyinfo->flag & HA_FULLTEXT) + { + uint off; + int subkeys; + + get_key_full_length_rdonly(off, lastkey); + subkeys=ft_sintXkorr(lastkey+off); + DBUG_ASSERT(info->ft1_to_ft2==0 || subkeys >=0); + comp_flag=SEARCH_SAME; + if (subkeys >= 0) + { + /* normal word, one-level tree structure */ + if (info->ft1_to_ft2) + { + /* we're in ft1->ft2 conversion mode. Saving key data */ + insert_dynamic(info->ft1_to_ft2, lastkey+off); + } + else + { + /* we need exact match only if not in ft1->ft2 conversion mode */ + flag=(*keyinfo->bin_search)(info,keyinfo,anc_buff,key,USE_WHOLE_KEY, + comp_flag, &keypos, lastkey, &last_key); + } + /* fall through to normal delete */ + } + else + { + /* popular word. two-level tree. going down */ + uint tmp_key_length; + my_off_t root; + uchar *kpos=keypos; + + tmp_key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&kpos,lastkey); + root=_mi_dpos(info,nod_flag,kpos); + if (subkeys == -1) + { + /* the last entry in sub-tree */ + _mi_dispose(info, keyinfo, root,DFLT_INIT_HITS); + /* fall through to normal delete */ + } + else + { + keyinfo=&info->s->ft2_keyinfo; + kpos-=keyinfo->keylength+nod_flag; /* we'll modify key entry 'in vivo' */ + key+=off; + ret_value=_mi_ck_real_delete(info, &info->s->ft2_keyinfo, + key, HA_FT_WLEN, &root); + _mi_dpointer(info, kpos+HA_FT_WLEN, root); + subkeys++; + ft_intXstore(kpos, subkeys); + if (!ret_value) + ret_value=_mi_write_keypage(info,keyinfo,page, + DFLT_INIT_HITS,anc_buff); + DBUG_PRINT("exit",("Return: %d",ret_value)); + DBUG_RETURN(ret_value); + } + } + } leaf_buff=0; LINT_INIT(leaf_page); if (nod_flag) @@ -222,9 +294,10 @@ static int d_search(register MI_INFO *info, register MI_KEYDEF *keyinfo, { DBUG_PRINT("error",("Couldn't allocate memory")); my_errno=ENOMEM; + DBUG_PRINT("exit",("Return: %d",-1)); DBUG_RETURN(-1); } - if (!_mi_fetch_keypage(info,keyinfo,leaf_page,leaf_buff,0)) + if (!_mi_fetch_keypage(info,keyinfo,leaf_page,DFLT_INIT_HITS,leaf_buff,0)) goto err; } @@ -237,7 +310,8 @@ static int d_search(register MI_INFO *info, register MI_KEYDEF *keyinfo, goto err; } save_flag=0; - ret_value=d_search(info,keyinfo,key,key_length,leaf_page,leaf_buff); + ret_value=d_search(info,keyinfo,comp_flag,key,key_length, + leaf_page,leaf_buff); } else { /* Found key */ @@ -246,14 +320,20 @@ static int d_search(register MI_INFO *info, register MI_KEYDEF *keyinfo, tmp=remove_key(keyinfo,nod_flag,keypos,lastkey,anc_buff+length, &next_block); if (tmp == 0) + { + DBUG_PRINT("exit",("Return: %d",0)); DBUG_RETURN(0); + } length-= tmp; mi_putint(anc_buff,length,nod_flag); if (!nod_flag) { /* On leaf page */ - if (_mi_write_keypage(info,keyinfo,page,anc_buff)) + if (_mi_write_keypage(info,keyinfo,page,DFLT_INIT_HITS,anc_buff)) + { + DBUG_PRINT("exit",("Return: %d",-1)); DBUG_RETURN(-1); + } /* Page will be update later if we return 1 */ DBUG_RETURN(test(length <= (info->quick_mode ? MI_MIN_KEYBLOCK_LENGTH : (uint) keyinfo->underflow_block_length))); @@ -284,12 +364,13 @@ static int d_search(register MI_INFO *info, register MI_KEYDEF *keyinfo, ret_value=_mi_split_page(info,keyinfo,key,anc_buff,lastkey,0) | 2; } if (save_flag && ret_value != 1) - ret_value|=_mi_write_keypage(info,keyinfo,page,anc_buff); + ret_value|=_mi_write_keypage(info,keyinfo,page,DFLT_INIT_HITS,anc_buff); else { DBUG_DUMP("page",(byte*) anc_buff,mi_getint(anc_buff)); } my_afree((byte*) leaf_buff); + DBUG_PRINT("exit",("Return: %d",ret_value)); DBUG_RETURN(ret_value); err: my_afree((byte*) leaf_buff); @@ -327,7 +408,7 @@ static int del(register MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *key, if (!(next_buff= (uchar*) my_alloca((uint) keyinfo->block_length+ MI_MAX_KEY_BUFF*2))) DBUG_RETURN(-1); - if (!_mi_fetch_keypage(info,keyinfo,next_page,next_buff,0)) + if (!_mi_fetch_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,next_buff,0)) ret_value= -1; else { @@ -355,7 +436,7 @@ static int del(register MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *key, (uchar*) 0,(uchar*) 0,(my_off_t) 0,0); } } - if (_mi_write_keypage(info,keyinfo,leaf_page,leaf_buff)) + if (_mi_write_keypage(info,keyinfo,leaf_page,DFLT_INIT_HITS,leaf_buff)) goto err; } my_afree((byte*) next_buff); @@ -365,7 +446,7 @@ static int del(register MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *key, /* Remove last key from leaf page */ mi_putint(leaf_buff,key_start-leaf_buff,nod_flag); - if (_mi_write_keypage(info,keyinfo,leaf_page,leaf_buff)) + if (_mi_write_keypage(info,keyinfo,leaf_page,DFLT_INIT_HITS,leaf_buff)) goto err; /* Place last key in ancestor page on deleted key position */ @@ -433,7 +514,7 @@ static int underflow(register MI_INFO *info, register MI_KEYDEF *keyinfo, if (info->s->keyinfo+info->lastinx == keyinfo) info->page_changed=1; - if ((keypos < anc_buff+anc_length && (share->rnd++ & 1)) || + if ((keypos < anc_buff+anc_length && (info->state->records & 1)) || keypos == anc_buff+2+key_reflength) { /* Use page right of anc-page */ DBUG_PRINT("test",("use right page")); @@ -453,7 +534,7 @@ static int underflow(register MI_INFO *info, register MI_KEYDEF *keyinfo, goto err; } next_page= _mi_kpos(key_reflength,next_keypos); - if (!_mi_fetch_keypage(info,keyinfo,next_page,buff,0)) + if (!_mi_fetch_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,buff,0)) goto err; buff_length=mi_getint(buff); DBUG_DUMP("next",(byte*) buff,buff_length); @@ -492,7 +573,7 @@ static int underflow(register MI_INFO *info, register MI_KEYDEF *keyinfo, if (buff_length <= keyinfo->block_length) { /* Keys in one page */ memcpy((byte*) leaf_buff,(byte*) buff,(size_t) buff_length); - if (_mi_dispose(info,keyinfo,next_page)) + if (_mi_dispose(info,keyinfo,next_page,DFLT_INIT_HITS)) goto err; } else @@ -541,10 +622,10 @@ static int underflow(register MI_INFO *info, register MI_KEYDEF *keyinfo, (*keyinfo->store_key)(keyinfo,buff+p_length,&s_temp); mi_putint(buff,length+t_length+p_length,nod_flag); - if (_mi_write_keypage(info,keyinfo,next_page,buff)) + if (_mi_write_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,buff)) goto err; } - if (_mi_write_keypage(info,keyinfo,leaf_page,leaf_buff)) + if (_mi_write_keypage(info,keyinfo,leaf_page,DFLT_INIT_HITS,leaf_buff)) goto err; DBUG_RETURN(anc_length <= ((info->quick_mode ? MI_MIN_BLOCK_LENGTH : (uint) keyinfo->underflow_block_length))); @@ -556,7 +637,7 @@ static int underflow(register MI_INFO *info, register MI_KEYDEF *keyinfo, if (!keypos) goto err; next_page= _mi_kpos(key_reflength,keypos); - if (!_mi_fetch_keypage(info,keyinfo,next_page,buff,0)) + if (!_mi_fetch_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,buff,0)) goto err; buff_length=mi_getint(buff); endpos=buff+buff_length; @@ -600,7 +681,7 @@ static int underflow(register MI_INFO *info, register MI_KEYDEF *keyinfo, if (buff_length <= keyinfo->block_length) { /* Keys in one page */ - if (_mi_dispose(info,keyinfo,leaf_page)) + if (_mi_dispose(info,keyinfo,leaf_page,DFLT_INIT_HITS)) goto err; } else @@ -647,11 +728,11 @@ static int underflow(register MI_INFO *info, register MI_KEYDEF *keyinfo, (size_t) length); (*keyinfo->store_key)(keyinfo,leaf_buff+p_length,&s_temp); mi_putint(leaf_buff,length+t_length+p_length,nod_flag); - if (_mi_write_keypage(info,keyinfo,leaf_page,leaf_buff)) + if (_mi_write_keypage(info,keyinfo,leaf_page,DFLT_INIT_HITS,leaf_buff)) goto err; mi_putint(buff,endpos-buff,nod_flag); } - if (_mi_write_keypage(info,keyinfo,next_page,buff)) + if (_mi_write_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,buff)) goto err; DBUG_RETURN(anc_length <= (uint) keyinfo->block_length/2); err: diff --git a/myisam/mi_delete_all.c b/myisam/mi_delete_all.c index 45e56626d59..3033249886f 100644 --- a/myisam/mi_delete_all.c +++ b/myisam/mi_delete_all.c @@ -53,7 +53,7 @@ int mi_delete_all_rows(MI_INFO *info) If we are using delayed keys or if the user has done changes to the tables since it was locked then there may be key blocks in the key cache */ - flush_key_blocks(share->kfile, FLUSH_IGNORE_CHANGED); + flush_key_blocks(share->key_cache, share->kfile, FLUSH_IGNORE_CHANGED); if (my_chsize(info->dfile, 0, 0, MYF(MY_WME)) || my_chsize(share->kfile, share->base.keystart, 0, MYF(MY_WME)) ) goto err; diff --git a/myisam/mi_extra.c b/myisam/mi_extra.c index 1d45fd300e7..fcc753b62e5 100644 --- a/myisam/mi_extra.c +++ b/myisam/mi_extra.c @@ -283,7 +283,7 @@ int mi_extra(MI_INFO *info, enum ha_extra_function function, void *extra_arg) #ifdef __WIN__ /* Close the isam and data files as Win32 can't drop an open table */ pthread_mutex_lock(&share->intern_lock); - if (flush_key_blocks(share->kfile, + if (flush_key_blocks(share->key_cache, share->kfile, (function == HA_EXTRA_FORCE_REOPEN ? FLUSH_RELEASE : FLUSH_IGNORE_CHANGED))) { @@ -329,7 +329,7 @@ int mi_extra(MI_INFO *info, enum ha_extra_function function, void *extra_arg) break; case HA_EXTRA_FLUSH: if (!share->temporary) - flush_key_blocks(share->kfile,FLUSH_KEEP); + flush_key_blocks(share->key_cache, share->kfile, FLUSH_KEEP); #ifdef HAVE_PWRITE _mi_decrement_open_count(info); #endif @@ -359,6 +359,9 @@ int mi_extra(MI_INFO *info, enum ha_extra_function function, void *extra_arg) if (!share->state.header.uniques) info->opt_flag|= OPT_NO_ROWS; break; + case HA_EXTRA_PRELOAD_BUFFER_SIZE: + info->preload_buff_size= *((ulong *) extra_arg); + break; case HA_EXTRA_KEY_CACHE: case HA_EXTRA_NO_KEY_CACHE: default: diff --git a/myisam/mi_key.c b/myisam/mi_key.c index 89f6bc490fa..93e3b9621f3 100644 --- a/myisam/mi_key.c +++ b/myisam/mi_key.c @@ -18,6 +18,7 @@ #include "myisamdef.h" #include "m_ctype.h" +#include "sp_defs.h" #ifdef HAVE_IEEEFP_H #include <ieeefp.h> #endif @@ -36,9 +37,17 @@ uint _mi_make_key(register MI_INFO *info, uint keynr, uchar *key, { byte *pos,*end; uchar *start; - reg1 MI_KEYSEG *keyseg; + reg1 HA_KEYSEG *keyseg; DBUG_ENTER("_mi_make_key"); + if(info->s->keyinfo[keynr].flag & HA_SPATIAL) + { + /* + TODO: nulls processing + */ + return sp_make_key(info,keynr,key,record,filepos); + } + start=key; for (keyseg=info->s->keyinfo[keynr].seg ; keyseg->type ;keyseg++) { @@ -155,11 +164,11 @@ uint _mi_make_key(register MI_INFO *info, uint keynr, uchar *key, */ uint _mi_pack_key(register MI_INFO *info, uint keynr, uchar *key, uchar *old, - uint k_length, MI_KEYSEG **last_used_keyseg) + uint k_length, HA_KEYSEG **last_used_keyseg) { uint length; uchar *pos,*end,*start_key=key; - reg1 MI_KEYSEG *keyseg; + reg1 HA_KEYSEG *keyseg; enum ha_base_keytype type; DBUG_ENTER("_mi_pack_key"); @@ -260,7 +269,7 @@ static int _mi_put_key_in_record(register MI_INFO *info, uint keynr, { reg2 byte *key; byte *pos,*key_end; - reg1 MI_KEYSEG *keyseg; + reg1 HA_KEYSEG *keyseg; byte *blob_ptr; DBUG_ENTER("_mi_put_key_in_record"); @@ -394,7 +403,7 @@ int _mi_read_key_record(MI_INFO *info, my_off_t filepos, byte *buf) void update_auto_increment(MI_INFO *info,const byte *record) { ulonglong value; - MI_KEYSEG *keyseg=info->s->keyinfo[info->s->base.auto_key-1].seg; + HA_KEYSEG *keyseg=info->s->keyinfo[info->s->base.auto_key-1].seg; const uchar *key=(uchar*) record+keyseg->start; switch (keyseg->type) { diff --git a/myisam/mi_keycache.c b/myisam/mi_keycache.c new file mode 100644 index 00000000000..fc51f331d76 --- /dev/null +++ b/myisam/mi_keycache.c @@ -0,0 +1,161 @@ +/* Copyright (C) 2003 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Key cache assignments +*/ + +#include "myisamdef.h" + +/* + Assign pages of the index file for a table to a key cache + + SYNOPSIS + mi_assign_to_key_cache() + info open table + key_map map of indexes to assign to the key cache + key_cache_ptr pointer to the key cache handle + assign_lock Mutex to lock during assignment + + PREREQUESTS + One must have a READ lock or a WRITE lock on the table when calling + the function to ensure that there is no other writers to it. + + The caller must also ensure that one doesn't call this function from + two different threads with the same table. + + NOTES + At present pages for all indexes must be assigned to the same key cache. + In future only pages for indexes specified in the key_map parameter + of the table will be assigned to the specified key cache. + + RETURN VALUE + 0 If a success + # Error code +*/ + +int mi_assign_to_key_cache(MI_INFO *info, + ulonglong key_map __attribute__((unused)), + KEY_CACHE *key_cache) +{ + int error= 0; + MYISAM_SHARE* share= info->s; + DBUG_ENTER("mi_assign_to_key_cache"); + DBUG_PRINT("enter",("old_key_cache_handle: %lx new_key_cache_handle: %lx", + share->key_cache, key_cache)); + + /* + Skip operation if we didn't change key cache. This can happen if we + call this for all open instances of the same table + */ + if (share->key_cache == key_cache) + DBUG_RETURN(0); + + /* + First flush all blocks for the table in the old key cache. + This is to ensure that the disk is consistent with the data pages + in memory (which may not be the case if the table uses delayed_key_write) + + Note that some other read thread may still fill in the key cache with + new blocks during this call and after, but this doesn't matter as + all threads will start using the new key cache for their next call to + myisam library and we know that there will not be any changed blocks + in the old key cache. + */ + + if (flush_key_blocks(share->key_cache, share->kfile, FLUSH_REMOVE)) + { + error= my_errno; + mi_mark_crashed(info); /* Mark that table must be checked */ + } + + /* + Flush the new key cache for this file. This is needed to ensure + that there is no old blocks (with outdated data) left in the new key + cache from an earlier assign_to_keycache operation + + (This can never fail as there is never any not written data in the + new key cache) + */ + (void) flush_key_blocks(key_cache, share->kfile, FLUSH_REMOVE); + + /* + ensure that setting the key cache and changing the multi_key_cache + is done atomicly + */ + pthread_mutex_lock(&share->intern_lock); + /* + Tell all threads to use the new key cache + This should be seen at the lastes for the next call to an myisam function. + */ + share->key_cache= key_cache; + + /* store the key cache in the global hash structure for future opens */ + if (multi_key_cache_set(share->unique_file_name, share->unique_name_length, + share->key_cache)) + error= my_errno; + pthread_mutex_unlock(&share->intern_lock); + DBUG_RETURN(error); +} + + +/* + Change all MyISAM entries that uses one key cache to another key cache + + SYNOPSIS + mi_change_key_cache() + old_key_cache Old key cache + new_key_cache New key cache + + NOTES + This is used when we delete one key cache. + + To handle the case where some other threads tries to open an MyISAM + table associated with the to-be-deleted key cache while this operation + is running, we have to call 'multi_key_cache_change()' from this + function while we have a lock on the MyISAM table list structure. + + This is safe as long as it's only MyISAM that is using this specific + key cache. +*/ + + +void mi_change_key_cache(KEY_CACHE *old_key_cache, + KEY_CACHE *new_key_cache) +{ + LIST *pos; + DBUG_ENTER("mi_change_key_cache"); + + /* + Lock list to ensure that no one can close the table while we manipulate it + */ + pthread_mutex_lock(&THR_LOCK_myisam); + for (pos=myisam_open_list ; pos ; pos=pos->next) + { + MI_INFO *info= (MI_INFO*) pos->data; + MYISAM_SHARE *share= info->s; + if (share->key_cache == old_key_cache) + mi_assign_to_key_cache(info, (ulonglong) ~0, new_key_cache); + } + + /* + We have to do the following call while we have the lock on the + MyISAM list structure to ensure that another thread is not trying to + open a new table that will be associted with the old key cache + */ + multi_key_cache_change(old_key_cache, new_key_cache); + pthread_mutex_unlock(&THR_LOCK_myisam); +} diff --git a/myisam/mi_locking.c b/myisam/mi_locking.c index f3bfa8deb90..67ccca52d08 100644 --- a/myisam/mi_locking.c +++ b/myisam/mi_locking.c @@ -61,7 +61,8 @@ int mi_lock_database(MI_INFO *info, int lock_type) count= --share->w_locks; --share->tot_locks; if (info->lock_type == F_WRLCK && !share->w_locks && - !share->delay_key_write && flush_key_blocks(share->kfile,FLUSH_KEEP)) + !share->delay_key_write && flush_key_blocks(share->key_cache, + share->kfile,FLUSH_KEEP)) { error=my_errno; mi_mark_crashed(info); /* Mark that table must be checked */ @@ -388,7 +389,7 @@ int _mi_test_if_changed(register MI_INFO *info) { /* Keyfile has changed */ DBUG_PRINT("info",("index file changed")); if (share->state.process != share->this_process) - VOID(flush_key_blocks(share->kfile,FLUSH_RELEASE)); + VOID(flush_key_blocks(share->key_cache, share->kfile, FLUSH_RELEASE)); share->last_process=share->state.process; info->last_unique= share->state.unique; info->last_loop= share->state.update_count; diff --git a/myisam/mi_open.c b/myisam/mi_open.c index 580261d8078..d5492ae5e35 100644 --- a/myisam/mi_open.c +++ b/myisam/mi_open.c @@ -17,6 +17,8 @@ /* open a isam-database */ #include "fulltext.h" +#include "sp_defs.h" +#include "rt_index.h" #include <m_ctype.h> #if defined(MSDOS) || defined(__WIN__) @@ -73,9 +75,9 @@ static MI_INFO *test_if_reopen(char *filename) MI_INFO *mi_open(const char *name, int mode, uint open_flags) { - int lock_error,kfile,open_mode,save_errno; + int lock_error,kfile,open_mode,save_errno,have_rtree=0; uint i,j,len,errpos,head_length,base_pos,offset,info_length,keys, - key_parts,unique_key_parts,tmp_length,uniques; + key_parts,unique_key_parts,fulltext_keys,uniques; char name_buff[FN_REFLEN], org_name [FN_REFLEN], index_name[FN_REFLEN], data_name[FN_REFLEN]; char *disk_cache, *disk_pos, *end_pos; @@ -102,6 +104,7 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags) share_buff.state.rec_per_key_part=rec_per_key_part; share_buff.state.key_root=key_root; share_buff.state.key_del=key_del; + share_buff.key_cache= multi_key_cache_search(name_buff, strlen(name_buff)); if ((kfile=my_open(name_buff,(open_mode=O_RDWR) | O_SHARE,MYF(0))) < 0) { @@ -134,8 +137,7 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags) HA_OPTION_TEMP_COMPRESS_RECORD | HA_OPTION_CHECKSUM | HA_OPTION_TMP_TABLE | HA_OPTION_DELAY_KEY_WRITE)) { - DBUG_PRINT("error",("wrong options: 0x%lx", - share->options)); + DBUG_PRINT("error",("wrong options: 0x%lx", share->options)); my_errno=HA_ERR_OLD_FILE; goto err; } @@ -174,11 +176,9 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags) len=mi_uint2korr(share->state.header.state_info_length); keys= (uint) share->state.header.keys; uniques= (uint) share->state.header.uniques; + fulltext_keys= (uint) share->state.header.fulltext_keys; key_parts= mi_uint2korr(share->state.header.key_parts); unique_key_parts= mi_uint2korr(share->state.header.unique_key_parts); - tmp_length=(MI_STATE_INFO_SIZE + keys * MI_STATE_KEY_SIZE + - key_parts*MI_STATE_KEYSEG_SIZE + - share->state.header.max_block_size*MI_STATE_KEYBLOCK_SIZE); if (len != MI_STATE_INFO_SIZE) { DBUG_PRINT("warning", @@ -187,9 +187,6 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags) } share->state_diff_length=len-MI_STATE_INFO_SIZE; - if (share->state.header.fulltext_keys) - fprintf(stderr, "Warning: table file %s was created in MySQL 4.1+, use REPAIR TABLE ... USE_FRM to recreate it as a valid MySQL 4.0 table\n", name_buff); - mi_state_info_read(disk_cache, &share->state); len= mi_uint2korr(share->state.header.base_info_length); if (len != MI_BASE_INFO_SIZE) @@ -218,6 +215,7 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags) goto err; } + key_parts+=fulltext_keys*FT_SEGS; if (share->base.max_key_length > MI_MAX_KEY_BUFF || keys > MI_MAX_KEY || key_parts >= MI_MAX_KEY * MI_MAX_KEY_SEG) { @@ -226,7 +224,7 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags) goto err; } - /* Correct max_file_length based on length of sizeof_t */ + /* Correct max_file_length based on length of sizeof(off_t) */ max_data_file_length= (share->options & (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)) ? (((ulonglong) 1 << (share->base.rec_reflength*8))-1) : @@ -271,7 +269,7 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags) &share->uniqueinfo,uniques*sizeof(MI_UNIQUEDEF), &share->keyparts, (key_parts+unique_key_parts+keys+uniques) * - sizeof(MI_KEYSEG), + sizeof(HA_KEYSEG), &share->rec, (share->base.fields+1)*sizeof(MI_COLUMNDEF), &share->blobs,sizeof(MI_BLOB)*share->base.blobs, @@ -296,22 +294,26 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags) (char*) key_del, (sizeof(my_off_t) * share->state.header.max_block_size)); strmov(share->unique_file_name, name_buff); + share->unique_name_length= strlen(name_buff); strmov(share->index_file_name, index_name); strmov(share->data_file_name, data_name); share->blocksize=min(IO_SIZE,myisam_block_size); { - MI_KEYSEG *pos=share->keyparts; + HA_KEYSEG *pos=share->keyparts; for (i=0 ; i < keys ; i++) { disk_pos=mi_keydef_read(disk_pos, &share->keyinfo[i]); - disk_pos_assert(disk_pos + share->keyinfo[i].keysegs * MI_KEYSEG_SIZE, - end_pos); + disk_pos_assert(disk_pos + share->keyinfo[i].keysegs * HA_KEYSEG_SIZE, + end_pos); + if (share->keyinfo[i].key_alg == HA_KEY_ALG_RTREE) + have_rtree=1; set_if_smaller(share->blocksize,share->keyinfo[i].block_length); share->keyinfo[i].seg=pos; for (j=0 ; j < share->keyinfo[i].keysegs; j++,pos++) { disk_pos=mi_keyseg_read(disk_pos, pos); + if (pos->type == HA_KEYTYPE_TEXT || pos->type == HA_KEYTYPE_VARTEXT) { if (!pos->language) @@ -323,11 +325,49 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags) } } } - if (share->keyinfo[i].flag & HA_FULLTEXT) + if (share->keyinfo[i].flag & HA_SPATIAL) + { + uint sp_segs=SPDIMS*2; + share->keyinfo[i].seg=pos-sp_segs; + share->keyinfo[i].keysegs--; + } + else if (share->keyinfo[i].flag & HA_FULLTEXT) { - share->keyinfo[i].seg=pos-FT_SEGS; - share->fulltext_index=1; + if (!fulltext_keys) + { /* 4.0 compatibility code, to be removed in 5.0 */ + share->keyinfo[i].seg=pos-FT_SEGS; + share->keyinfo[i].keysegs-=FT_SEGS; + } + else + { + uint j; + share->keyinfo[i].seg=pos; + for (j=0; j < FT_SEGS; j++) + { + *pos=ft_keysegs[j]; + pos[0].language= pos[-1].language; + if (!(pos[0].charset= pos[-1].charset)) + { + my_errno=HA_ERR_CRASHED; + goto err; + } + pos++; + } + } + if (!share->ft2_keyinfo.seg) + { + memcpy(& share->ft2_keyinfo, & share->keyinfo[i], sizeof(MI_KEYDEF)); + share->ft2_keyinfo.keysegs=1; + share->ft2_keyinfo.flag=0; + share->ft2_keyinfo.keylength= + share->ft2_keyinfo.minlength= + share->ft2_keyinfo.maxlength=HA_FT_WLEN+share->base.rec_reflength; + share->ft2_keyinfo.seg=pos-1; + share->ft2_keyinfo.end=pos; + setup_key_functions(& share->ft2_keyinfo); + } } + setup_key_functions(share->keyinfo+i); share->keyinfo[i].end=pos; pos->type=HA_KEYTYPE_END; /* End */ pos->length=share->base.rec_reflength; @@ -339,7 +379,7 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags) { disk_pos=mi_uniquedef_read(disk_pos, &share->uniqueinfo[i]); disk_pos_assert(disk_pos + share->uniqueinfo[i].keysegs * - MI_KEYSEG_SIZE, end_pos); + HA_KEYSEG_SIZE, end_pos); share->uniqueinfo[i].seg=pos; for (j=0 ; j < share->uniqueinfo[i].keysegs; j++,pos++) { @@ -362,8 +402,6 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags) pos++; } } - for (i=0 ; i < keys ; i++) - setup_key_functions(share->keyinfo+i); disk_pos_assert(disk_pos + share->base.fields *MI_COLUMNDEF_SIZE, end_pos); for (i=j=offset=0 ; i < share->base.fields ; i++) @@ -395,10 +433,6 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags) share->kfile=kfile; share->this_process=(ulong) getpid(); - share->rnd= (int) share->this_process; /* rnd-counter for splits */ -#ifndef DBUG_OFF - share->rnd=0; /* To make things repeatable */ -#endif share->last_process= share->state.process; share->base.key_parts=key_parts; share->base.all_key_parts=key_parts+unique_key_parts; @@ -443,7 +477,8 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags) ((share->options & (HA_OPTION_READ_ONLY_DATA | HA_OPTION_TMP_TABLE | HA_OPTION_COMPRESS_RECORD | HA_OPTION_TEMP_COMPRESS_RECORD)) || - (open_flags & HA_OPEN_TMP_TABLE)) ? 0 : 1; + (open_flags & HA_OPEN_TMP_TABLE) || + have_rtree) ? 0 : 1; if (share->concurrent_insert) { share->lock.get_status=mi_get_status; @@ -465,6 +500,7 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags) if (mi_open_datafile(&info, share, old_info->dfile)) goto err; errpos=5; + have_rtree= old_info->rtree_recursion_state != NULL; } /* alloc and set up private structure parts */ @@ -474,11 +510,16 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags) &info.buff,(share->base.max_key_block_length*2+ share->base.max_key_length), &info.lastkey,share->base.max_key_length*3+1, + &info.first_mbr_key, share->base.max_key_length, &info.filename,strlen(org_name)+1, + &info.rtree_recursion_state,have_rtree ? 1024 : 0, NullS)) goto err; errpos=6; + if (!have_rtree) + info.rtree_recursion_state= NULL; + strmov(info.filename,org_name); memcpy(info.blobs,share->blobs,sizeof(MI_BLOB)*share->base.blobs); info.lastkey2=info.lastkey+share->base.max_key_length; @@ -498,6 +539,7 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags) info.lock_type=F_UNLCK; info.quick_mode=0; info.bulk_insert=0; + info.ft1_to_ft2=0; info.errkey= -1; info.page_changed=1; pthread_mutex_lock(&share->intern_lock); @@ -682,6 +724,16 @@ void mi_setup_functions(register MYISAM_SHARE *share) static void setup_key_functions(register MI_KEYDEF *keyinfo) { + if (keyinfo->key_alg == HA_KEY_ALG_RTREE) + { + keyinfo->ck_insert = rtree_insert; + keyinfo->ck_delete = rtree_delete; + } + else + { + keyinfo->ck_insert = _mi_ck_write; + keyinfo->ck_delete = _mi_ck_delete; + } if (keyinfo->flag & HA_BINARY_PACK_KEY) { /* Simple prefix compression */ keyinfo->bin_search=_mi_seq_search; @@ -694,7 +746,7 @@ static void setup_key_functions(register MI_KEYDEF *keyinfo) keyinfo->get_key= _mi_get_pack_key; if (keyinfo->seg[0].flag & HA_PACK_KEY) { /* Prefix compression */ - if (!keyinfo->seg->charset || use_strcoll(keyinfo->seg->charset) || + if (!keyinfo->seg->charset || use_strnxfrm(keyinfo->seg->charset) || (keyinfo->seg->flag & HA_NULL_PART)) keyinfo->bin_search=_mi_seq_search; else @@ -720,9 +772,9 @@ static void setup_key_functions(register MI_KEYDEF *keyinfo) } -/*************************************************************************** -** Function to save and store the header in the index file (.MSI) -***************************************************************************/ +/* + Function to save and store the header in the index file (.MYI) +*/ uint mi_state_info_write(File file, MI_STATE_INFO *state, uint pWrite) { @@ -948,7 +1000,7 @@ uint mi_keydef_write(File file, MI_KEYDEF *keydef) uchar *ptr=buff; *ptr++ = (uchar) keydef->keysegs; - *ptr++ = 0; /* not used */ + *ptr++ = keydef->key_alg; /* Rtree or Btree */ mi_int2store(ptr,keydef->flag); ptr +=2; mi_int2store(ptr,keydef->block_length); ptr +=2; mi_int2store(ptr,keydef->keylength); ptr +=2; @@ -960,7 +1012,8 @@ uint mi_keydef_write(File file, MI_KEYDEF *keydef) char *mi_keydef_read(char *ptr, MI_KEYDEF *keydef) { keydef->keysegs = (uint) *ptr++; - ptr++; + keydef->key_alg = *ptr++; /* Rtree or Btree */ + keydef->flag = mi_uint2korr(ptr); ptr +=2; keydef->block_length = mi_uint2korr(ptr); ptr +=2; keydef->keylength = mi_uint2korr(ptr); ptr +=2; @@ -976,9 +1029,9 @@ char *mi_keydef_read(char *ptr, MI_KEYDEF *keydef) ** mi_keyseg ***************************************************************************/ -int mi_keyseg_write(File file, const MI_KEYSEG *keyseg) +int mi_keyseg_write(File file, const HA_KEYSEG *keyseg) { - uchar buff[MI_KEYSEG_SIZE]; + uchar buff[HA_KEYSEG_SIZE]; uchar *ptr=buff; *ptr++ =keyseg->type; @@ -996,7 +1049,7 @@ int mi_keyseg_write(File file, const MI_KEYSEG *keyseg) } -char *mi_keyseg_read(char *ptr, MI_KEYSEG *keyseg) +char *mi_keyseg_read(char *ptr, HA_KEYSEG *keyseg) { keyseg->type = *ptr++; keyseg->language = *ptr++; @@ -1063,7 +1116,7 @@ char *mi_recinfo_read(char *ptr, MI_COLUMNDEF *recinfo) /************************************************************************** Open data file with or without RAID -We can't use dup() here as the data file descriptors need to have different +We can't use dup() here as the data file descriptors need to have different active seek-positions. The argument file_to_dup is here for the future if there would on some OS diff --git a/myisam/mi_page.c b/myisam/mi_page.c index 1d40980e309..8c6981afa00 100644 --- a/myisam/mi_page.c +++ b/myisam/mi_page.c @@ -24,11 +24,16 @@ /* Fetch a key-page in memory */ uchar *_mi_fetch_keypage(register MI_INFO *info, MI_KEYDEF *keyinfo, - my_off_t page, uchar *buff, int return_buffer) + my_off_t page, int level, + uchar *buff, int return_buffer) { uchar *tmp; uint page_size; - tmp=(uchar*) key_cache_read(info->s->kfile,page,(byte*) buff, + DBUG_ENTER("_mi_fetch_keypage"); + DBUG_PRINT("enter",("page: %ld",page)); + + tmp=(uchar*) key_cache_read(info->s->key_cache, + info->s->kfile, page, level, (byte*) buff, (uint) keyinfo->block_length, (uint) keyinfo->block_length, return_buffer); @@ -39,7 +44,7 @@ uchar *_mi_fetch_keypage(register MI_INFO *info, MI_KEYDEF *keyinfo, DBUG_PRINT("error",("Got errno: %d from key_cache_read",my_errno)); info->last_keypage=HA_OFFSET_ERROR; my_errno=HA_ERR_CRASHED; - return 0; + DBUG_RETURN(0); } info->last_keypage=page; page_size=mi_getint(tmp); @@ -47,20 +52,23 @@ uchar *_mi_fetch_keypage(register MI_INFO *info, MI_KEYDEF *keyinfo, { DBUG_PRINT("error",("page %lu had wrong page length: %u", (ulong) page, page_size)); + DBUG_DUMP("page", tmp, keyinfo->block_length); info->last_keypage = HA_OFFSET_ERROR; my_errno = HA_ERR_CRASHED; tmp = 0; } - return tmp; + DBUG_RETURN(tmp); } /* _mi_fetch_keypage */ /* Write a key-page on disk */ int _mi_write_keypage(register MI_INFO *info, register MI_KEYDEF *keyinfo, - my_off_t page, uchar *buff) + my_off_t page, int level, uchar *buff) { reg3 uint length; + DBUG_ENTER("_mi_write_keypage"); + #ifndef FAST /* Safety check */ if (page < info->s->base.keystart || page+keyinfo->block_length > info->state->key_file_length || @@ -71,7 +79,7 @@ int _mi_write_keypage(register MI_INFO *info, register MI_KEYDEF *keyinfo, (long) info->state->key_file_length, (long) page)); my_errno=EINVAL; - return(-1); + DBUG_RETURN((-1)); } DBUG_PRINT("page",("write page at: %lu",(long) page,buff)); DBUG_DUMP("buff",(byte*) buff,mi_getint(buff)); @@ -87,16 +95,18 @@ int _mi_write_keypage(register MI_INFO *info, register MI_KEYDEF *keyinfo, length=keyinfo->block_length; } #endif - return (key_cache_write(info->s->kfile,page,(byte*) buff,length, + DBUG_RETURN((key_cache_write(info->s->key_cache, + info->s->kfile,page, level, (byte*) buff,length, (uint) keyinfo->block_length, (int) ((info->lock_type != F_UNLCK) || - info->s->delay_key_write))); + info->s->delay_key_write)))); } /* mi_write_keypage */ /* Remove page from disk */ -int _mi_dispose(register MI_INFO *info, MI_KEYDEF *keyinfo, my_off_t pos) +int _mi_dispose(register MI_INFO *info, MI_KEYDEF *keyinfo, my_off_t pos, + int level) { my_off_t old_link; char buff[8]; @@ -107,7 +117,8 @@ int _mi_dispose(register MI_INFO *info, MI_KEYDEF *keyinfo, my_off_t pos) info->s->state.key_del[keyinfo->block_size]=pos; mi_sizestore(buff,old_link); info->s->state.changed|= STATE_NOT_SORTED_PAGES; - DBUG_RETURN(key_cache_write(info->s->kfile,pos,buff, + DBUG_RETURN(key_cache_write(info->s->key_cache, + info->s->kfile, pos , level, buff, sizeof(buff), (uint) keyinfo->block_length, (int) (info->lock_type != F_UNLCK))); @@ -116,7 +127,7 @@ int _mi_dispose(register MI_INFO *info, MI_KEYDEF *keyinfo, my_off_t pos) /* Make new page on disk */ -my_off_t _mi_new(register MI_INFO *info, MI_KEYDEF *keyinfo) +my_off_t _mi_new(register MI_INFO *info, MI_KEYDEF *keyinfo, int level) { my_off_t pos; char buff[8]; @@ -135,7 +146,8 @@ my_off_t _mi_new(register MI_INFO *info, MI_KEYDEF *keyinfo) } else { - if (!key_cache_read(info->s->kfile,pos, + if (!key_cache_read(info->s->key_cache, + info->s->kfile, pos, level, buff, (uint) sizeof(buff), (uint) keyinfo->block_length,0)) diff --git a/myisam/mi_panic.c b/myisam/mi_panic.c index bd0b07b097e..78698d88c54 100644 --- a/myisam/mi_panic.c +++ b/myisam/mi_panic.c @@ -48,7 +48,7 @@ int mi_panic(enum ha_panic_function flag) if (info->s->options & HA_OPTION_READ_ONLY_DATA) break; #endif - if (flush_key_blocks(info->s->kfile,FLUSH_RELEASE)) + if (flush_key_blocks(info->s->key_cache, info->s->kfile, FLUSH_RELEASE)) error=my_errno; if (info->opt_flag & WRITE_CACHE_USED) if (flush_io_cache(&info->rec_cache)) diff --git a/myisam/mi_preload.c b/myisam/mi_preload.c new file mode 100644 index 00000000000..80489cbcd13 --- /dev/null +++ b/myisam/mi_preload.c @@ -0,0 +1,118 @@ +/* Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Preload indexes into key cache +*/ + +#include "myisamdef.h" + + +/* + Preload pages of the index file for a table into the key cache + + SYNOPSIS + mi_preload() + info open table + map map of indexes to preload into key cache + ignore_leaves only non-leaves pages are to be preloaded + + RETURN VALUE + 0 if a success. error code - otherwise. + + NOTES. + At present pages for all indexes are preloaded. + In future only pages for indexes specified in the key_map parameter + of the table will be preloaded. +*/ + +int mi_preload(MI_INFO *info, ulonglong key_map, my_bool ignore_leaves) +{ + uint i; + ulong length, block_length= 0; + uchar *buff= NULL; + MYISAM_SHARE* share= info->s; + uint keys= share->state.header.keys; + MI_KEYDEF *keyinfo= share->keyinfo; + my_off_t key_file_length= share->state.state.key_file_length; + my_off_t pos= share->base.keystart; + DBUG_ENTER("mi_preload"); + + if (!keys || !key_map || key_file_length == pos) + DBUG_RETURN(0); + + block_length= keyinfo[0].block_length; + + /* Check whether all indexes use the same block size */ + for (i= 1 ; i < keys ; i++) + { + if (keyinfo[i].block_length != block_length) + DBUG_RETURN(my_errno= HA_ERR_NON_UNIQUE_BLOCK_SIZE); + } + + length= info->preload_buff_size/block_length * block_length; + set_if_bigger(length, block_length); + + if (!(buff= (uchar *) my_malloc(length, MYF(MY_WME)))) + DBUG_RETURN(my_errno= HA_ERR_OUT_OF_MEM); + + if (flush_key_blocks(share->key_cache,share->kfile, FLUSH_RELEASE)) + goto err; + + do + { + /* Read the next block of index file into the preload buffer */ + if ((my_off_t) length > (key_file_length-pos)) + length= (ulong) (key_file_length-pos); + if (my_pread(share->kfile, (byte*) buff, length, pos, MYF(MY_FAE))) + goto err; + + if (ignore_leaves) + { + uchar *end= buff+length; + do + { + if (mi_test_if_nod(buff)) + { + if (key_cache_insert(share->key_cache, + share->kfile, pos, DFLT_INIT_HITS, + (byte*) buff, block_length)) + goto err; + } + pos+= block_length; + } + while ((buff+= block_length) != end); + buff= end-length; + } + else + { + if (key_cache_insert(share->key_cache, + share->kfile, pos, DFLT_INIT_HITS, + (byte*) buff, length)) + goto err; + pos+= length; + } + } + while (pos != key_file_length); + + my_free(buff, MYF(0)); + DBUG_RETURN(0); + +err: + my_free(buff, MYF(MY_ALLOW_ZERO_PTR)); + DBUG_RETURN(my_errno= errno); +} + diff --git a/myisam/mi_range.c b/myisam/mi_range.c index 8e85afc5f80..caa57ce6187 100644 --- a/myisam/mi_range.c +++ b/myisam/mi_range.c @@ -20,6 +20,7 @@ */ #include "myisamdef.h" +#include "rt_index.h" static ha_rows _mi_record_pos(MI_INFO *info,const byte *key,uint key_len, enum ha_rkey_function search_flag); @@ -39,7 +40,7 @@ ha_rows mi_records_in_range(MI_INFO *info, int inx, const byte *start_key, const byte *end_key, uint end_key_len, enum ha_rkey_function end_search_flag) { - ha_rows start_pos,end_pos; + ha_rows start_pos,end_pos,res; DBUG_ENTER("mi_records_in_range"); if ((inx = _mi_check_index(info,inx)) < 0) @@ -50,20 +51,40 @@ ha_rows mi_records_in_range(MI_INFO *info, int inx, const byte *start_key, info->update&= (HA_STATE_CHANGED+HA_STATE_ROW_CHANGED); if (info->s->concurrent_insert) rw_rdlock(&info->s->key_root_lock[inx]); - start_pos= (start_key ? + + switch(info->s->keyinfo[inx].key_alg){ + case HA_KEY_ALG_RTREE: + { + uchar * key_buff; + if (start_key_len == 0) + start_key_len=USE_WHOLE_KEY; + key_buff=info->lastkey+info->s->base.max_key_length; + start_key_len= _mi_pack_key(info,inx,key_buff,(uchar*) start_key, + start_key_len, (HA_KEYSEG**) 0); + res=rtree_estimate(info, inx, key_buff, start_key_len, myisam_read_vec[start_search_flag]); + res=res?res:1; + break; + } + case HA_KEY_ALG_BTREE: + default: + start_pos= (start_key ? _mi_record_pos(info,start_key,start_key_len,start_search_flag) : (ha_rows) 0); - end_pos= (end_key ? + end_pos= (end_key ? _mi_record_pos(info,end_key,end_key_len,end_search_flag) : info->state->records+ (ha_rows) 1); + res=end_pos < start_pos ? (ha_rows) 0 : + (end_pos == start_pos ? (ha_rows) 1 : end_pos-start_pos); + if (start_pos == HA_POS_ERROR || end_pos == HA_POS_ERROR) + res=HA_POS_ERROR; + } + if (info->s->concurrent_insert) rw_unlock(&info->s->key_root_lock[inx]); fast_mi_writeinfo(info); - if (start_pos == HA_POS_ERROR || end_pos == HA_POS_ERROR) - DBUG_RETURN(HA_POS_ERROR); - DBUG_PRINT("info",("records: %ld",(ulong) (end_pos-start_pos))); - DBUG_RETURN(end_pos < start_pos ? (ha_rows) 0 : - (end_pos == start_pos ? (ha_rows) 1 : end_pos-start_pos)); + + DBUG_PRINT("info",("records: %ld",(ulong) (res))); + DBUG_RETURN(res); } @@ -84,7 +105,7 @@ static ha_rows _mi_record_pos(MI_INFO *info, const byte *key, uint key_len, key_len=USE_WHOLE_KEY; key_buff=info->lastkey+info->s->base.max_key_length; key_len=_mi_pack_key(info,inx,key_buff,(uchar*) key,key_len, - (MI_KEYSEG**) 0); + (HA_KEYSEG**) 0); DBUG_EXECUTE("key",_mi_print_key(DBUG_FILE,keyinfo->seg, (uchar*) key_buff,key_len);); nextflag=myisam_read_vec[search_flag]; @@ -121,7 +142,7 @@ static double _mi_search_pos(register MI_INFO *info, if (pos == HA_OFFSET_ERROR) DBUG_RETURN(0.5); - if (!(buff=_mi_fetch_keypage(info,keyinfo,pos,info->buff,1))) + if (!(buff=_mi_fetch_keypage(info,keyinfo,pos,DFLT_INIT_HITS,info->buff,1))) goto err; flag=(*keyinfo->bin_search)(info,keyinfo,buff,key,key_len,nextflag, &keypos,info->lastkey, &after_key); diff --git a/myisam/mi_rkey.c b/myisam/mi_rkey.c index 1bb478efd3d..ddfac0a39a2 100644 --- a/myisam/mi_rkey.c +++ b/myisam/mi_rkey.c @@ -17,7 +17,7 @@ /* Read record based on a key */ #include "myisamdef.h" - +#include "rt_index.h" /* Read a record using key */ /* Ordinary search_flag is 0 ; Give error if no record with key */ @@ -28,7 +28,7 @@ int mi_rkey(MI_INFO *info, byte *buf, int inx, const byte *key, uint key_len, uchar *key_buff; MYISAM_SHARE *share=info->s; MI_KEYDEF *keyinfo; - MI_KEYSEG *last_used_keyseg; + HA_KEYSEG *last_used_keyseg; uint pack_key_length, use_key_length, nextflag; DBUG_ENTER("mi_rkey"); DBUG_PRINT("enter",("base: %lx inx: %d search_flag: %d", @@ -38,20 +38,12 @@ int mi_rkey(MI_INFO *info, byte *buf, int inx, const byte *key, uint key_len, DBUG_RETURN(my_errno); info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + info->last_key_func= search_flag; keyinfo= share->keyinfo + inx; - if (!info->use_packed_key) - { - if (key_len == 0) - key_len=USE_WHOLE_KEY; - key_buff=info->lastkey+info->s->base.max_key_length; - pack_key_length=_mi_pack_key(info, (uint) inx, key_buff, (uchar*) key, - key_len, &last_used_keyseg); - DBUG_EXECUTE("key",_mi_print_key(DBUG_FILE, keyinfo->seg, - key_buff, pack_key_length);); - } - else + if (info->once_flags & USE_PACKED_KEYS) { + info->once_flags&= ~USE_PACKED_KEYS; /* Reset flag */ /* key is already packed!; This happens when we are using a MERGE TABLE */ @@ -60,6 +52,16 @@ int mi_rkey(MI_INFO *info, byte *buf, int inx, const byte *key, uint key_len, bmove(key_buff,key,key_len); last_used_keyseg= 0; } + else + { + if (key_len == 0) + key_len=USE_WHOLE_KEY; + key_buff=info->lastkey+info->s->base.max_key_length; + pack_key_length=_mi_pack_key(info,(uint) inx, key_buff, (uchar*) key, + key_len, &last_used_keyseg); + DBUG_EXECUTE("key",_mi_print_key(DBUG_FILE, keyinfo->seg, + key_buff, pack_key_length);); + } if (fast_mi_readinfo(info)) goto err; @@ -71,22 +73,33 @@ int mi_rkey(MI_INFO *info, byte *buf, int inx, const byte *key, uint key_len, if (!(nextflag & (SEARCH_FIND | SEARCH_NO_FIND | SEARCH_LAST))) use_key_length=USE_WHOLE_KEY; - if (!_mi_search(info,keyinfo, key_buff, use_key_length, + switch (info->s->keyinfo[inx].key_alg) { + case HA_KEY_ALG_RTREE: + if (rtree_find_first(info,inx,key_buff,use_key_length,nextflag) < 0) + { + my_errno=HA_ERR_CRASHED; + goto err; + } + break; + case HA_KEY_ALG_BTREE: + default: + if (!_mi_search(info, keyinfo, key_buff, use_key_length, myisam_read_vec[search_flag], info->s->state.key_root[inx])) - { - while (info->lastpos >= info->state->data_file_length) { - /* - Skip rows that are inserted by other threads since we got a lock - Note that this can only happen if we are not searching after an - exact key, because the keys are sorted according to position - */ - - if (_mi_search_next(info, keyinfo, info->lastkey, + while (info->lastpos >= info->state->data_file_length) + { + /* + Skip rows that are inserted by other threads since we got a lock + Note that this can only happen if we are not searching after an + exact key, because the keys are sorted according to position + */ + + if (_mi_search_next(info, keyinfo, info->lastkey, info->lastkey_length, myisam_readnext_vec[search_flag], info->s->state.key_root[inx])) - break; + break; + } } } if (share->concurrent_insert) diff --git a/myisam/mi_rnext.c b/myisam/mi_rnext.c index 6d135462f96..e1cf916d6d9 100644 --- a/myisam/mi_rnext.c +++ b/myisam/mi_rnext.c @@ -16,6 +16,8 @@ #include "myisamdef.h" +#include "rt_index.h" + /* Read next row with the same key as previous read One may have done a write, update or delete of the previous row. @@ -42,27 +44,55 @@ int mi_rnext(MI_INFO *info, byte *buf, int inx) changed=_mi_test_if_changed(info); if (!flag) { - error=_mi_search_first(info,info->s->keyinfo+inx, + switch(info->s->keyinfo[inx].key_alg){ + case HA_KEY_ALG_RTREE: + error=rtree_get_first(info,inx,info->lastkey_length); + break; + case HA_KEY_ALG_BTREE: + default: + error=_mi_search_first(info,info->s->keyinfo+inx, info->s->state.key_root[inx]); + break; + } } - else if (!changed) - error=_mi_search_next(info,info->s->keyinfo+inx,info->lastkey, - info->lastkey_length,flag, - info->s->state.key_root[inx]); else - error=_mi_search(info,info->s->keyinfo+inx,info->lastkey, - USE_WHOLE_KEY,flag, info->s->state.key_root[inx]); - - if (!error) { - while (info->lastpos >= info->state->data_file_length) + switch(info->s->keyinfo[inx].key_alg) { - /* Skip rows that are inserted by other threads since we got a lock */ - if ((error=_mi_search_next(info,info->s->keyinfo+inx,info->lastkey, - info->lastkey_length, - SEARCH_BIGGER, - info->s->state.key_root[inx]))) - break; + case HA_KEY_ALG_RTREE: + /* + Note that rtree doesn't support that the table + may be changed since last call, so we do need + to skip rows inserted by other threads like in btree + */ + error=rtree_get_next(info,inx,info->lastkey_length); + break; + + case HA_KEY_ALG_BTREE: + default: + if (!changed) + { + error=_mi_search_next(info,info->s->keyinfo+inx,info->lastkey, + info->lastkey_length,flag, + info->s->state.key_root[inx]); + } + else + { + error=_mi_search(info,info->s->keyinfo+inx,info->lastkey, + USE_WHOLE_KEY,flag, info->s->state.key_root[inx]); + } + if (!error && info->s->concurrent_insert) + { + while (info->lastpos >= info->state->data_file_length) + { + /* Skip rows that are inserted by other threads since we got a lock */ + if ((error=_mi_search_next(info,info->s->keyinfo+inx,info->lastkey, + info->lastkey_length, + SEARCH_BIGGER, + info->s->state.key_root[inx]))) + break; + } + } } } diff --git a/myisam/mi_rnext_same.c b/myisam/mi_rnext_same.c index a9d1953323c..19190a60246 100644 --- a/myisam/mi_rnext_same.c +++ b/myisam/mi_rnext_same.c @@ -15,6 +15,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include "myisamdef.h" +#include "rt_index.h" /* Read next row with the same key as previous read, but abort if @@ -37,26 +38,42 @@ int mi_rnext_same(MI_INFO *info, byte *buf) if (fast_mi_readinfo(info)) DBUG_RETURN(my_errno); - memcpy(info->lastkey2,info->lastkey,info->last_rkey_length); if (info->s->concurrent_insert) rw_rdlock(&info->s->key_root_lock[inx]); - for (;;) + + switch (keyinfo->key_alg) { - if ((error=_mi_search_next(info,keyinfo,info->lastkey, + case HA_KEY_ALG_RTREE: + if ((error=rtree_find_next(info,inx, + myisam_read_vec[info->last_key_func]))) + { + error=1; + my_errno=HA_ERR_END_OF_FILE; + info->lastpos= HA_OFFSET_ERROR; + break; + } + break; + case HA_KEY_ALG_BTREE: + default: + memcpy(info->lastkey2,info->lastkey,info->last_rkey_length); + for (;;) + { + if ((error=_mi_search_next(info,keyinfo,info->lastkey, info->lastkey_length,SEARCH_BIGGER, info->s->state.key_root[inx]))) - break; - if (_mi_key_cmp(keyinfo->seg,info->lastkey2,info->lastkey, + break; + if (ha_key_cmp(keyinfo->seg,info->lastkey2,info->lastkey, info->last_rkey_length, SEARCH_FIND, ¬_used)) - { - error=1; - my_errno=HA_ERR_END_OF_FILE; - info->lastpos= HA_OFFSET_ERROR; - break; - } - /* Skip rows that are inserted by other threads since we got a lock */ - if (info->lastpos < info->state->data_file_length) - break; + { + error=1; + my_errno=HA_ERR_END_OF_FILE; + info->lastpos= HA_OFFSET_ERROR; + break; + } + /* Skip rows that are inserted by other threads since we got a lock */ + if (info->lastpos < info->state->data_file_length) + break; + } } if (info->s->concurrent_insert) rw_unlock(&info->s->key_root_lock[inx]); diff --git a/myisam/mi_rrnd.c b/myisam/mi_rrnd.c index f8009441cff..29f686b0456 100644 --- a/myisam/mi_rrnd.c +++ b/myisam/mi_rrnd.c @@ -46,7 +46,10 @@ int mi_rrnd(MI_INFO *info, byte *buf, register my_off_t filepos) filepos= info->nextpos; } - info->lastinx= -1; /* Can't forward or backward */ + if (info->once_flags & RRND_PRESERVE_LASTINX) + info->once_flags&= ~RRND_PRESERVE_LASTINX; + else + info->lastinx= -1; /* Can't forward or backward */ /* Init all but update-flag */ info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); diff --git a/myisam/mi_search.c b/myisam/mi_search.c index 423b15ff8f7..2871633102d 100644 --- a/myisam/mi_search.c +++ b/myisam/mi_search.c @@ -76,7 +76,7 @@ int _mi_search(register MI_INFO *info, register MI_KEYDEF *keyinfo, DBUG_RETURN(1); /* Search at upper levels */ } - if (!(buff=_mi_fetch_keypage(info,keyinfo,pos,info->buff, + if (!(buff=_mi_fetch_keypage(info,keyinfo,pos,DFLT_INIT_HITS,info->buff, test(!(nextflag & SEARCH_SAVE_BUFF))))) goto err; DBUG_DUMP("page",(byte*) buff,mi_getint(buff)); @@ -119,7 +119,7 @@ int _mi_search(register MI_INFO *info, register MI_KEYDEF *keyinfo, if (pos != info->last_keypage) { uchar *old_buff=buff; - if (!(buff=_mi_fetch_keypage(info,keyinfo,pos,info->buff, + if (!(buff=_mi_fetch_keypage(info,keyinfo,pos,DFLT_INIT_HITS,info->buff, test(!(nextflag & SEARCH_SAVE_BUFF))))) goto err; keypos=buff+(keypos-old_buff); @@ -132,8 +132,8 @@ int _mi_search(register MI_INFO *info, register MI_KEYDEF *keyinfo, if (_mi_get_prev_key(info,keyinfo, buff, info->lastkey, keypos, &info->lastkey_length)) goto err; - if ((nextflag & SEARCH_LAST) && - _mi_key_cmp(keyinfo->seg, info->lastkey, key, key_len, SEARCH_FIND, + if (!(nextflag & SEARCH_SMALLER) && + ha_key_cmp(keyinfo->seg, info->lastkey, key, key_len, SEARCH_FIND, ¬_used)) { my_errno=HA_ERR_KEY_NOT_FOUND; /* Didn't find key */ @@ -191,7 +191,7 @@ int _mi_bin_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page, while (start != end) { mid= (start+end)/2; - if ((flag=_mi_key_cmp(keyinfo->seg,page+(uint) mid*totlength,key,key_len, + if ((flag=ha_key_cmp(keyinfo->seg,page+(uint) mid*totlength,key,key_len, comp_flag,¬_used)) >= 0) end=mid; @@ -199,7 +199,7 @@ int _mi_bin_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page, start=mid+1; } if (mid != start) - flag=_mi_key_cmp(keyinfo->seg,page+(uint) start*totlength,key,key_len, + flag=ha_key_cmp(keyinfo->seg,page+(uint) start*totlength,key,key_len, comp_flag,¬_used); if (flag < 0) start++; /* point at next, bigger key */ @@ -239,7 +239,7 @@ int _mi_seq_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page, length,page,end)); DBUG_RETURN(MI_FOUND_WRONG_KEY); } - if ((flag=_mi_key_cmp(keyinfo->seg,t_buff,key,key_len,comp_flag, + if ((flag=ha_key_cmp(keyinfo->seg,t_buff,key,key_len,comp_flag, ¬_used)) >= 0) break; #ifdef EXTRA_DEBUG @@ -263,7 +263,7 @@ int _mi_prefix_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page, /* my_flag is raw comparison result to be changed according to SEARCH_NO_FIND,SEARCH_LAST and HA_REVERSE_SORT flags. - flag is the value returned by _mi_key_cmp and as treated as final + flag is the value returned by ha_key_cmp and as treated as final */ int flag=0, my_flag=-1; uint nod_flag, length, len, matched, cmplen, kseg_len; @@ -353,7 +353,7 @@ int _mi_prefix_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page, DBUG_PRINT("loop",("page: '%.*s%.*s'",prefix_len,t_buff+seg_len_pack,suffix_len,vseg)); { uchar *from=vseg+suffix_len; - MI_KEYSEG *keyseg; + HA_KEYSEG *keyseg; uint l; for (keyseg=keyinfo->seg+1 ; keyseg->type ; keyseg++ ) @@ -425,7 +425,7 @@ int _mi_prefix_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page, else if (key_len_left>0) { uint not_used; - if ((flag = _mi_key_cmp(keyinfo->seg+1,vseg, + if ((flag = ha_key_cmp(keyinfo->seg+1,vseg, k,key_len_left,nextflag,¬_used)) >= 0) break; } @@ -653,414 +653,6 @@ void _mi_dpointer(MI_INFO *info, uchar *buff, my_off_t pos) } /* _mi_dpointer */ -int _mi_compare_text(CHARSET_INFO *charset_info, uchar *a, uint a_length, - uchar *b, uint b_length, my_bool part_key) -{ - int flag; - -#ifdef USE_STRCOLL - if (use_strcoll(charset_info)) - { - if (part_key && b_length < a_length) - a_length=b_length; - return my_strnncoll(charset_info, a, a_length, b, b_length); - } - else -#endif - { - uint length= min(a_length,b_length); - uchar *end= a+ length; - uchar *sort_order=charset_info->sort_order; - while (a < end) - if ((flag= (int) sort_order[*a++] - (int) sort_order[*b++])) - return flag; - } - if (part_key && b_length < a_length) - return 0; - return (int) (a_length-b_length); -} - - -static int compare_bin(uchar *a, uint a_length, uchar *b, uint b_length, - my_bool part_key) -{ - uint length= min(a_length,b_length); - uchar *end= a+ length; - int flag; - - while (a < end) - if ((flag= (int) *a++ - (int) *b++)) - return flag; - if (part_key && b_length < a_length) - return 0; - return (int) (a_length-b_length); -} - - -/* - Compare two keys - - SYNOPSIS - _mi_key_cmp() - keyseg Key segments of key to compare - a First key to compare, in format from _mi_pack_key() - This is normally key specified by user - b Second key to compare. This is always from a row - key_length Length of key to compare. This can be shorter than - a to just compare sub keys - next_flag How keys should be compared - If bit SEARCH_FIND is not set the keys includes the row - position and this should also be compared - - NOTES - Number-keys can't be splited - - RETURN VALUES - <0 If a < b - 0 If a == b - >0 If a > b -*/ - -#define FCMP(A,B) ((int) (A) - (int) (B)) - -int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a, - register uchar *b, uint key_length, uint nextflag, - uint *diff_pos) -{ - int flag; - int16 s_1,s_2; - int32 l_1,l_2; - uint32 u_1,u_2; - float f_1,f_2; - double d_1,d_2; - uint next_key_length; - - *diff_pos=0; - for ( ; (int) key_length >0 ; key_length=next_key_length, keyseg++) - { - uchar *end; - uint piks=! (keyseg->flag & HA_NO_SORT); - (*diff_pos)++; - - /* Handle NULL part */ - if (keyseg->null_bit) - { - key_length--; - if (*a != *b && piks) - { - flag = (int) *a - (int) *b; - return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); - } - b++; - if (!*a++) /* If key was NULL */ - { - if (nextflag == (SEARCH_FIND | SEARCH_UPDATE)) - nextflag=SEARCH_SAME; /* Allow duplicate keys */ - else if (nextflag & SEARCH_NULL_ARE_NOT_EQUAL) - { - /* - This is only used from mi_check() to calculate cardinality. - It can't be used when searching for a key as this would cause - compare of (a,b) and (b,a) to return the same value. - */ - return -1; - } - next_key_length=key_length; - continue; /* To next key part */ - } - } - end= a+ min(keyseg->length,key_length); - next_key_length=key_length-keyseg->length; - - switch ((enum ha_base_keytype) keyseg->type) { - case HA_KEYTYPE_TEXT: /* Ascii; Key is converted */ - if (keyseg->flag & HA_SPACE_PACK) - { - int a_length,b_length,pack_length; - get_key_length(a_length,a); - get_key_pack_length(b_length,pack_length,b); - next_key_length=key_length-b_length-pack_length; - - if (piks && - (flag=_mi_compare_text(keyseg->charset,a,a_length,b,b_length, - (my_bool) ((nextflag & SEARCH_PREFIX) && next_key_length <= 0)))) - return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); - a+=a_length; - b+=b_length; - break; - } - else - { - uint length=(uint) (end-a), a_length=length, b_length=length; - if (!(nextflag & SEARCH_PREFIX)) - { - while (a_length && a[a_length-1] == ' ') - a_length--; - while (b_length && b[b_length-1] == ' ') - b_length--; - } - if (piks && - (flag=_mi_compare_text(keyseg->charset,a,a_length,b,b_length, - (my_bool) ((nextflag & SEARCH_PREFIX) && next_key_length <= 0)))) - return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); - a=end; - b+=length; - } - break; - case HA_KEYTYPE_BINARY: - if (keyseg->flag & HA_SPACE_PACK) - { - int a_length,b_length,pack_length; - get_key_length(a_length,a); - get_key_pack_length(b_length,pack_length,b); - next_key_length=key_length-b_length-pack_length; - - if (piks && - (flag=compare_bin(a,a_length,b,b_length, - (my_bool) ((nextflag & SEARCH_PREFIX) && next_key_length <= 0)))) - return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); - a+=a_length; - b+=b_length; - break; - } - else - { - uint length=keyseg->length; - if (piks && - (flag=compare_bin(a,length,b,length, - (my_bool) ((nextflag & SEARCH_PREFIX) && next_key_length <= 0)))) - return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); - a+=length; - b+=length; - } - break; - case HA_KEYTYPE_VARTEXT: - { - int a_length,b_length,pack_length; - get_key_length(a_length,a); - get_key_pack_length(b_length,pack_length,b); - next_key_length=key_length-b_length-pack_length; - - if (piks && - (flag=_mi_compare_text(keyseg->charset,a,a_length,b,b_length, - (my_bool) ((nextflag & SEARCH_PREFIX) && next_key_length <= 0)))) - return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); - a+=a_length; - b+=b_length; - break; - } - break; - case HA_KEYTYPE_VARBINARY: - { - int a_length,b_length,pack_length; - get_key_length(a_length,a); - get_key_pack_length(b_length,pack_length,b); - next_key_length=key_length-b_length-pack_length; - - if (piks && - (flag=compare_bin(a,a_length,b,b_length, - (my_bool) ((nextflag & SEARCH_PREFIX) && next_key_length <= 0)))) - return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); - a+=a_length; - b+=b_length; - break; - } - break; - case HA_KEYTYPE_INT8: - { - int i_1= (int) *((signed char*) a); - int i_2= (int) *((signed char*) b); - if (piks && (flag = CMP_NUM(i_1,i_2))) - return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); - a= end; - b++; - break; - } - case HA_KEYTYPE_SHORT_INT: - s_1= mi_sint2korr(a); - s_2= mi_sint2korr(b); - if (piks && (flag = CMP_NUM(s_1,s_2))) - return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); - a= end; - b+= 2; /* sizeof(short int); */ - break; - case HA_KEYTYPE_USHORT_INT: - { - uint16 us_1,us_2; - us_1= mi_sint2korr(a); - us_2= mi_sint2korr(b); - if (piks && (flag = CMP_NUM(us_1,us_2))) - return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); - a= end; - b+=2; /* sizeof(short int); */ - break; - } - case HA_KEYTYPE_LONG_INT: - l_1= mi_sint4korr(a); - l_2= mi_sint4korr(b); - if (piks && (flag = CMP_NUM(l_1,l_2))) - return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); - a= end; - b+= 4; /* sizeof(long int); */ - break; - case HA_KEYTYPE_ULONG_INT: - u_1= mi_sint4korr(a); - u_2= mi_sint4korr(b); - if (piks && (flag = CMP_NUM(u_1,u_2))) - return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); - a= end; - b+= 4; /* sizeof(long int); */ - break; - case HA_KEYTYPE_INT24: - l_1=mi_sint3korr(a); - l_2=mi_sint3korr(b); - if (piks && (flag = CMP_NUM(l_1,l_2))) - return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); - a= end; - b+= 3; - break; - case HA_KEYTYPE_UINT24: - l_1=mi_uint3korr(a); - l_2=mi_uint3korr(b); - if (piks && (flag = CMP_NUM(l_1,l_2))) - return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); - a= end; - b+= 3; - break; - case HA_KEYTYPE_FLOAT: - mi_float4get(f_1,a); - mi_float4get(f_2,b); - if (piks && (flag = CMP_NUM(f_1,f_2))) - return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); - a= end; - b+= 4; /* sizeof(float); */ - break; - case HA_KEYTYPE_DOUBLE: - mi_float8get(d_1,a); - mi_float8get(d_2,b); - if (piks && (flag = CMP_NUM(d_1,d_2))) - return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); - a= end; - b+= 8; /* sizeof(double); */ - break; - case HA_KEYTYPE_NUM: /* Numeric key */ - { - int swap_flag= 0; - int alength,blength; - - if (keyseg->flag & HA_REVERSE_SORT) - { - swap(uchar*,a,b); - swap_flag=1; /* Remember swap of a & b */ - end= a+ (int) (end-b); - } - if (keyseg->flag & HA_SPACE_PACK) - { - alength= *a++; blength= *b++; - end=a+alength; - next_key_length=key_length-blength-1; - } - else - { - alength= (int) (end-a); - blength=keyseg->length; - /* remove pre space from keys */ - for ( ; alength && *a == ' ' ; a++, alength--) ; - for ( ; blength && *b == ' ' ; b++, blength--) ; - } - if (piks) - { - if (*a == '-') - { - if (*b != '-') - return -1; - a++; b++; - swap(uchar*,a,b); - swap(int,alength,blength); - swap_flag=1-swap_flag; - alength--; blength--; - end=a+alength; - } - else if (*b == '-') - return 1; - while (alength && (*a == '+' || *a == '0')) - { - a++; alength--; - } - while (blength && (*b == '+' || *b == '0')) - { - b++; blength--; - } - if (alength != blength) - return (alength < blength) ? -1 : 1; - while (a < end) - if (*a++ != *b++) - return ((int) a[-1] - (int) b[-1]); - } - else - { - b+=(end-a); - a=end; - } - - if (swap_flag) /* Restore pointers */ - swap(uchar*,a,b); - break; - } -#ifdef HAVE_LONG_LONG - case HA_KEYTYPE_LONGLONG: - { - longlong ll_a,ll_b; - ll_a= mi_sint8korr(a); - ll_b= mi_sint8korr(b); - if (piks && (flag = CMP_NUM(ll_a,ll_b))) - return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); - a= end; - b+= 8; - break; - } - case HA_KEYTYPE_ULONGLONG: - { - ulonglong ll_a,ll_b; - ll_a= mi_uint8korr(a); - ll_b= mi_uint8korr(b); - if (piks && (flag = CMP_NUM(ll_a,ll_b))) - return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); - a= end; - b+= 8; - break; - } -#endif - case HA_KEYTYPE_END: /* Ready */ - goto end; /* diff_pos is incremented */ - } - } - (*diff_pos)++; -end: - if (!(nextflag & SEARCH_FIND)) - { - uint i; - if (nextflag & (SEARCH_NO_FIND | SEARCH_LAST)) /* Find record after key */ - return (nextflag & (SEARCH_BIGGER | SEARCH_LAST)) ? -1 : 1; - flag=0; - for (i=keyseg->length ; i-- > 0 ; ) - { - if (*a++ != *b++) - { - flag= FCMP(a[-1],b[-1]); - break; - } - } - if (nextflag & SEARCH_SAME) - return (flag); /* read same */ - if (nextflag & SEARCH_BIGGER) - return (flag <= 0 ? -1 : 1); /* read next */ - return (flag < 0 ? -1 : 1); /* read previous */ - } - return 0; -} /* _mi_key_cmp */ - - /* Get key from key-block */ /* page points at previous key; its advanced to point at next key */ /* key should contain previous key */ @@ -1084,7 +676,7 @@ uint _mi_get_static_key(register MI_KEYDEF *keyinfo, uint nod_flag, uint _mi_get_pack_key(register MI_KEYDEF *keyinfo, uint nod_flag, register uchar **page_pos, register uchar *key) { - reg1 MI_KEYSEG *keyseg; + reg1 HA_KEYSEG *keyseg; uchar *start_key,*page=*page_pos; uint length; @@ -1217,7 +809,7 @@ uint _mi_get_pack_key(register MI_KEYDEF *keyinfo, uint nod_flag, uint _mi_get_binary_pack_key(register MI_KEYDEF *keyinfo, uint nod_flag, register uchar **page_pos, register uchar *key) { - reg1 MI_KEYSEG *keyseg; + reg1 HA_KEYSEG *keyseg; uchar *start_key,*page,*page_end,*from,*from_end; uint length,tmp; @@ -1419,7 +1011,7 @@ uchar *_mi_get_last_key(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *page, uint _mi_keylength(MI_KEYDEF *keyinfo, register uchar *key) { - reg1 MI_KEYSEG *keyseg; + reg1 HA_KEYSEG *keyseg; uchar *start; if (! (keyinfo->flag & (HA_VAR_LENGTH_KEY | HA_BINARY_PACK_KEY))) @@ -1453,9 +1045,9 @@ uint _mi_keylength(MI_KEYDEF *keyinfo, register uchar *key) */ uint _mi_keylength_part(MI_KEYDEF *keyinfo, register uchar *key, - MI_KEYSEG *end) + HA_KEYSEG *end) { - reg1 MI_KEYSEG *keyseg; + reg1 HA_KEYSEG *keyseg; uchar *start= key; for (keyseg=keyinfo->seg ; keyseg != end ; keyseg++) @@ -1516,7 +1108,7 @@ int _mi_search_next(register MI_INFO *info, register MI_KEYDEF *keyinfo, if (info->buff_used) { if (!_mi_fetch_keypage(info,keyinfo,info->last_search_keypage, - info->buff,0)) + DFLT_INIT_HITS,info->buff,0)) DBUG_RETURN(-1); info->buff_used=0; } @@ -1585,7 +1177,7 @@ int _mi_search_first(register MI_INFO *info, register MI_KEYDEF *keyinfo, do { - if (!_mi_fetch_keypage(info,keyinfo,pos,info->buff,0)) + if (!_mi_fetch_keypage(info,keyinfo,pos,DFLT_INIT_HITS,info->buff,0)) { info->lastpos= HA_OFFSET_ERROR; DBUG_RETURN(-1); @@ -1628,7 +1220,7 @@ int _mi_search_last(register MI_INFO *info, register MI_KEYDEF *keyinfo, buff=info->buff; do { - if (!_mi_fetch_keypage(info,keyinfo,pos,buff,0)) + if (!_mi_fetch_keypage(info,keyinfo,pos,DFLT_INIT_HITS,buff,0)) { info->lastpos= HA_OFFSET_ERROR; DBUG_RETURN(-1); @@ -1716,7 +1308,7 @@ _mi_calc_var_pack_key_length(MI_KEYDEF *keyinfo,uint nod_flag,uchar *next_key, uchar *org_key, uchar *prev_key, uchar *key, MI_KEY_PARAM *s_temp) { - reg1 MI_KEYSEG *keyseg; + reg1 HA_KEYSEG *keyseg; int length; uint key_length,ref_length,org_key_length=0, length_pack,new_key_length,diff_flag,pack_marker; @@ -1731,7 +1323,7 @@ _mi_calc_var_pack_key_length(MI_KEYDEF *keyinfo,uint nod_flag,uchar *next_key, if ((keyinfo->flag & HA_FULLTEXT) && ((keyseg->type == HA_KEYTYPE_TEXT) || (keyseg->type == HA_KEYTYPE_VARTEXT)) && - !use_strcoll(keyseg->charset)) + !use_strnxfrm(keyseg->charset)) sort_order=keyseg->charset->sort_order; /* diff flag contains how many bytes is needed to pack key */ diff --git a/myisam/mi_static.c b/myisam/mi_static.c index 37b9ac04b7a..a36c38e3c01 100644 --- a/myisam/mi_static.c +++ b/myisam/mi_static.c @@ -51,11 +51,12 @@ uint NEAR myisam_read_vec[]= { SEARCH_FIND, SEARCH_FIND | SEARCH_BIGGER, SEARCH_FIND | SEARCH_SMALLER, SEARCH_NO_FIND | SEARCH_BIGGER, SEARCH_NO_FIND | SEARCH_SMALLER, - SEARCH_FIND | SEARCH_PREFIX, SEARCH_LAST + SEARCH_FIND | SEARCH_PREFIX, SEARCH_LAST, SEARCH_LAST | SEARCH_SMALLER, + MBR_CONTAIN, MBR_INTERSECT, MBR_WITHIN, MBR_DISJOINT, MBR_EQUAL }; uint NEAR myisam_readnext_vec[]= { SEARCH_BIGGER, SEARCH_BIGGER, SEARCH_SMALLER, SEARCH_BIGGER, SEARCH_SMALLER, - SEARCH_BIGGER, SEARCH_SMALLER + SEARCH_BIGGER, SEARCH_SMALLER, SEARCH_SMALLER }; diff --git a/myisam/mi_test1.c b/myisam/mi_test1.c index 8ea97c8e489..64ffbe2db1d 100644 --- a/myisam/mi_test1.c +++ b/myisam/mi_test1.c @@ -36,8 +36,8 @@ static my_bool key_cacheing, null_fields, silent, skip_update, opt_unique, verbose; static MI_COLUMNDEF recinfo[4]; static MI_KEYDEF keyinfo[10]; -static MI_KEYSEG keyseg[10]; -static MI_KEYSEG uniqueseg[10]; +static HA_KEYSEG keyseg[10]; +static HA_KEYSEG uniqueseg[10]; static int run_test(const char *filename); static void get_options(int argc, char *argv[]); @@ -50,7 +50,7 @@ int main(int argc,char *argv[]) MY_INIT(argv[0]); my_init(); if (key_cacheing) - init_key_cache(IO_SIZE*16); + init_key_cache(dflt_key_cache,KEY_CACHE_BLOCK_SIZE,IO_SIZE*16,0,0); get_options(argc,argv); exit(run_test("test1")); @@ -92,13 +92,14 @@ static int run_test(const char *filename) /* Define a key over the first column */ keyinfo[0].seg=keyseg; keyinfo[0].keysegs=1; + keyinfo[0].key_alg=HA_KEY_ALG_BTREE; keyinfo[0].seg[0].type= key_type; keyinfo[0].seg[0].flag= pack_seg; keyinfo[0].seg[0].start=1; keyinfo[0].seg[0].length=key_length; keyinfo[0].seg[0].null_bit= null_fields ? 2 : 0; keyinfo[0].seg[0].null_pos=0; - keyinfo[0].seg[0].language=MY_CHARSET_CURRENT; + keyinfo[0].seg[0].language= default_charset_info->number; if (pack_seg & HA_BLOB_PART) { keyinfo[0].seg[0].bit_start=4; /* Length of blob length */ @@ -121,7 +122,7 @@ static int run_test(const char *filename) uniqueseg[i].start=start; start+=recinfo[i+1].length; uniqueseg[i].length=recinfo[i+1].length; - uniqueseg[i].language=MY_CHARSET_CURRENT; + uniqueseg[i].language= default_charset_info->number; } uniqueseg[0].type= key_type; uniqueseg[0].null_bit= null_fields ? 2 : 0; @@ -460,19 +461,19 @@ static void update_record(char *record) ptr=blob_key; memcpy_fixed(pos+4,&ptr,sizeof(char*)); /* Store pointer to new key */ if (keyinfo[0].seg[0].type != HA_KEYTYPE_NUM) - casedn(blob_key,length); + my_casedn(default_charset_info,blob_key,length); pos+=recinfo[1].length; } else if (recinfo[1].type == FIELD_VARCHAR) { uint length=uint2korr(pos); - casedn(pos+2,length); + my_casedn(default_charset_info,pos+2,length); pos+=recinfo[1].length; } else { if (keyinfo[0].seg[0].type != HA_KEYTYPE_NUM) - casedn(pos,keyinfo[0].seg[0].length); + my_casedn(default_charset_info,pos,keyinfo[0].seg[0].length); pos+=recinfo[1].length; } diff --git a/myisam/mi_test2.c b/myisam/mi_test2.c index 11253f1fdee..d3c3cc2c492 100644 --- a/myisam/mi_test2.c +++ b/myisam/mi_test2.c @@ -49,13 +49,14 @@ static int verbose=0,testflag=0, static int pack_seg=HA_SPACE_PACK,pack_type=HA_PACK_KEY,remove_count=-1, create_flag=0; static ulong key_cache_size=IO_SIZE*16; +static uint key_cache_block_size= KEY_CACHE_BLOCK_SIZE; static uint keys=MYISAM_KEYS,recant=1000; static uint use_blob=0; static uint16 key1[1001],key3[5000]; static char record[300],record2[300],key[100],key2[100], read_record[300],read_record2[300],read_record3[300]; -static MI_KEYSEG glob_keyseg[MYISAM_KEYS][MAX_PARTS]; +static HA_KEYSEG glob_keyseg[MYISAM_KEYS][MAX_PARTS]; /* Test program */ @@ -87,10 +88,11 @@ int main(int argc, char *argv[]) keyinfo[0].seg[0].start=0; keyinfo[0].seg[0].length=6; keyinfo[0].seg[0].type=HA_KEYTYPE_TEXT; - keyinfo[0].seg[0].language=MY_CHARSET_CURRENT; + keyinfo[0].seg[0].language= default_charset_info->number; keyinfo[0].seg[0].flag=(uint8) pack_seg; keyinfo[0].seg[0].null_bit=0; keyinfo[0].seg[0].null_pos=0; + keyinfo[0].key_alg=HA_KEY_ALG_BTREE; keyinfo[0].keysegs=1; keyinfo[0].flag = pack_type; keyinfo[1].seg= &glob_keyseg[1][0]; @@ -106,6 +108,7 @@ int main(int argc, char *argv[]) keyinfo[1].seg[1].flag=HA_REVERSE_SORT; keyinfo[1].seg[1].null_bit=0; keyinfo[1].seg[1].null_pos=0; + keyinfo[1].key_alg=HA_KEY_ALG_BTREE; keyinfo[1].keysegs=2; keyinfo[1].flag =0; keyinfo[2].seg= &glob_keyseg[2][0]; @@ -115,36 +118,40 @@ int main(int argc, char *argv[]) keyinfo[2].seg[0].flag=HA_REVERSE_SORT; keyinfo[2].seg[0].null_bit=0; keyinfo[2].seg[0].null_pos=0; + keyinfo[2].key_alg=HA_KEY_ALG_BTREE; keyinfo[2].keysegs=1; keyinfo[2].flag =HA_NOSAME; keyinfo[3].seg= &glob_keyseg[3][0]; keyinfo[3].seg[0].start=0; keyinfo[3].seg[0].length=reclength-(use_blob ? 8 : 0); keyinfo[3].seg[0].type=HA_KEYTYPE_TEXT; - keyinfo[3].seg[0].language=MY_CHARSET_CURRENT; + keyinfo[3].seg[0].language=default_charset_info->number; keyinfo[3].seg[0].flag=(uint8) pack_seg; keyinfo[3].seg[0].null_bit=0; keyinfo[3].seg[0].null_pos=0; + keyinfo[3].key_alg=HA_KEY_ALG_BTREE; keyinfo[3].keysegs=1; keyinfo[3].flag = pack_type; keyinfo[4].seg= &glob_keyseg[4][0]; keyinfo[4].seg[0].start=0; keyinfo[4].seg[0].length=5; keyinfo[4].seg[0].type=HA_KEYTYPE_TEXT; - keyinfo[4].seg[0].language=MY_CHARSET_CURRENT; + keyinfo[4].seg[0].language=default_charset_info->number; keyinfo[4].seg[0].flag=0; keyinfo[4].seg[0].null_bit=0; keyinfo[4].seg[0].null_pos=0; + keyinfo[4].key_alg=HA_KEY_ALG_BTREE; keyinfo[4].keysegs=1; keyinfo[4].flag = pack_type; keyinfo[5].seg= &glob_keyseg[5][0]; keyinfo[5].seg[0].start=0; keyinfo[5].seg[0].length=4; keyinfo[5].seg[0].type=HA_KEYTYPE_TEXT; - keyinfo[5].seg[0].language=MY_CHARSET_CURRENT; + keyinfo[5].seg[0].language=default_charset_info->number; keyinfo[5].seg[0].flag=pack_seg; keyinfo[5].seg[0].null_bit=0; keyinfo[5].seg[0].null_pos=0; + keyinfo[5].key_alg=HA_KEY_ALG_BTREE; keyinfo[5].keysegs=1; keyinfo[5].flag = pack_type; @@ -208,7 +215,7 @@ int main(int argc, char *argv[]) if (!silent) printf("- Writing key:s\n"); if (key_cacheing) - init_key_cache(key_cache_size); /* Use a small cache */ + init_key_cache(dflt_key_cache,key_cache_block_size,key_cache_size,0,0); if (locking) mi_lock_database(file,F_WRLCK); if (write_cacheing) @@ -269,7 +276,7 @@ int main(int argc, char *argv[]) } } if (key_cacheing) - resize_key_cache(key_cache_size*2); + resize_key_cache(dflt_key_cache,key_cache_block_size,key_cache_size*2,0,0); if (!silent) printf("- Delete\n"); @@ -810,16 +817,19 @@ end: puts("Locking used"); if (use_blob) puts("blobs used"); +#if 0 printf("key cache status: \n\ blocks used:%10lu\n\ w_requests: %10lu\n\ writes: %10lu\n\ r_requests: %10lu\n\ reads: %10lu\n", - _my_blocks_used,_my_cache_w_requests, _my_cache_write, - _my_cache_r_requests,_my_cache_read); + my_blocks_used, + my_cache_w_requests, my_cache_write, + my_cache_r_requests, my_cache_read); +#endif } - end_key_cache(); + end_key_cache(dflt_key_cache,1); if (blob_buffer) my_free(blob_buffer,MYF(0)); my_end(silent ? MY_CHECK_ERROR : MY_CHECK_ERROR | MY_GIVE_INFO); @@ -1013,7 +1023,7 @@ static void put_blob_in_record(char *blob_pos, char **blob_buffer) static void copy_key(MI_INFO *info,uint inx,uchar *rec,uchar *key_buff) { - MI_KEYSEG *keyseg; + HA_KEYSEG *keyseg; for (keyseg=info->s->keyinfo[inx].seg ; keyseg->type ; keyseg++) { diff --git a/myisam/mi_test3.c b/myisam/mi_test3.c index 6111167b38f..dca04a9a64b 100644 --- a/myisam/mi_test3.c +++ b/myisam/mi_test3.c @@ -61,7 +61,7 @@ int main(int argc,char **argv) uint i=0; MI_KEYDEF keyinfo[10]; MI_COLUMNDEF recinfo[10]; - MI_KEYSEG keyseg[10][2]; + HA_KEYSEG keyseg[10][2]; MY_INIT(argv[0]); get_options(argc,argv); @@ -72,6 +72,7 @@ int main(int argc,char **argv) keyinfo[0].seg[0].length=8; keyinfo[0].seg[0].type=HA_KEYTYPE_TEXT; keyinfo[0].seg[0].flag=HA_SPACE_PACK; + keyinfo[0].key_alg=HA_KEY_ALG_BTREE; keyinfo[0].keysegs=1; keyinfo[0].flag = (uint8) HA_PACK_KEY; keyinfo[1].seg= &keyseg[1][0]; @@ -79,6 +80,7 @@ int main(int argc,char **argv) keyinfo[1].seg[0].length=4; /* Long is always 4 in myisam */ keyinfo[1].seg[0].type=HA_KEYTYPE_LONG_INT; keyinfo[1].seg[0].flag=0; + keyinfo[1].key_alg=HA_KEY_ALG_BTREE; keyinfo[1].keysegs=1; keyinfo[1].flag =HA_NOSAME; @@ -175,7 +177,7 @@ void start_test(int id) exit(1); } if (key_cacheing && rnd(2) == 0) - init_key_cache(65536L); + init_key_cache(dflt_key_cache, KEY_CACHE_BLOCK_SIZE, 65536L, 0, 0); printf("Process %d, pid: %d\n",id,getpid()); fflush(stdout); for (error=i=0 ; i < tests && !error; i++) @@ -205,7 +207,7 @@ void start_test(int id) { mi_status(file1,&isam_info,HA_STATUS_VARIABLE); printf("%2d: End of test. Records: %ld Deleted: %ld\n", - id,isam_info.records,isam_info.deleted); + id,(long) isam_info.records, (long) isam_info.deleted); fflush(stdout); } diff --git a/myisam/mi_unique.c b/myisam/mi_unique.c index ddba40214e7..f4ee39e55ca 100644 --- a/myisam/mi_unique.c +++ b/myisam/mi_unique.c @@ -70,7 +70,7 @@ ha_checksum mi_unique_hash(MI_UNIQUEDEF *def, const byte *record) { const byte *pos, *end; ha_checksum crc=0; - MI_KEYSEG *keyseg; + HA_KEYSEG *keyseg; for (keyseg=def->seg ; keyseg < def->end ; keyseg++) { @@ -108,11 +108,9 @@ ha_checksum mi_unique_hash(MI_UNIQUEDEF *def, const byte *record) end= pos+length; if (type == HA_KEYTYPE_TEXT || type == HA_KEYTYPE_VARTEXT) { - uchar *sort_order=keyseg->charset->sort_order; - while (pos != end) - crc=((crc << 8) + - (((uchar) sort_order[*(uchar*) pos++]))) + - (crc >> (8*sizeof(ha_checksum)-8)); + ulong nr=1, nr2=4; + keyseg->charset->coll->hash_sort(keyseg->charset,(const uchar*)pos,length,&nr, &nr2); + crc=nr; } else while (pos != end) @@ -131,7 +129,7 @@ int mi_unique_comp(MI_UNIQUEDEF *def, const byte *a, const byte *b, my_bool null_are_equal) { const byte *pos_a, *pos_b, *end; - MI_KEYSEG *keyseg; + HA_KEYSEG *keyseg; for (keyseg=def->seg ; keyseg < def->end ; keyseg++) { @@ -181,8 +179,8 @@ int mi_unique_comp(MI_UNIQUEDEF *def, const byte *a, const byte *b, } if (type == HA_KEYTYPE_TEXT || type == HA_KEYTYPE_VARTEXT) { - if (_mi_compare_text(keyseg->charset, (uchar *)pos_a, length, - (uchar *)pos_b, length, 0)) + if (mi_compare_text(keyseg->charset, (uchar *) pos_a, length, + (uchar *) pos_b, length, 0)) return 1; } else diff --git a/myisam/mi_update.c b/myisam/mi_update.c index f9ed969d8b5..d1d41ac351a 100644 --- a/myisam/mi_update.c +++ b/myisam/mi_update.c @@ -17,6 +17,8 @@ /* Update an old row in a MyISAM table */ #include "fulltext.h" +#include "rt_index.h" + #ifdef __WIN__ #include <errno.h> #endif @@ -61,6 +63,7 @@ int mi_update(register MI_INFO *info, const byte *oldrec, byte *newrec) goto err_end; /* Record has changed */ } + /* Calculate and check all unique constraints */ key_changed=0; for (i=0 ; i < share->state.header.uniques ; i++) @@ -88,7 +91,6 @@ int mi_update(register MI_INFO *info, const byte *oldrec, byte *newrec) { if (((ulonglong) 1 << i) & share->state.key_map) { - /* The following code block is for text searching by SerG */ if (share->keyinfo[i].flag & HA_FULLTEXT ) { if (_mi_ft_cmp(info,i,oldrec, newrec)) @@ -118,8 +120,8 @@ int mi_update(register MI_INFO *info, const byte *oldrec, byte *newrec) key_changed|=HA_STATE_WRITTEN; /* Mark that keyfile changed */ changed|=((ulonglong) 1 << i); share->keyinfo[i].version++; - if (_mi_ck_delete(info,i,old_key,old_length)) goto err; - if (_mi_ck_write(info,i,new_key,new_length)) goto err; + if (share->keyinfo[i].ck_delete(info,i,old_key,old_length)) goto err; + if (share->keyinfo[i].ck_insert(info,i,new_key,new_length)) goto err; if (share->base.auto_key == i+1) auto_key_changed=1; } @@ -190,7 +192,6 @@ err: { if (((ulonglong) 1 << i) & changed) { - /* The following code block is for text searching by SerG */ if (share->keyinfo[i].flag & HA_FULLTEXT) { if ((flag++ && _mi_ft_del(info,i,(char*) new_key,newrec,pos)) || diff --git a/myisam/mi_write.c b/myisam/mi_write.c index 40e2f301fce..86c45e0692f 100644 --- a/myisam/mi_write.c +++ b/myisam/mi_write.c @@ -17,6 +17,9 @@ /* Write a row to a MyISAM table */ #include "fulltext.h" +#include "rt_index.h" +#include <assert.h> + #ifdef __WIN__ #include <errno.h> #endif @@ -36,9 +39,9 @@ static int _mi_balance_page(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *key, static uchar *_mi_find_last_pos(MI_KEYDEF *keyinfo, uchar *page, uchar *key, uint *return_key_length, uchar **after_key); -int _mi_ck_write_tree(register MI_INFO *info, uint keynr, uchar *key, +int _mi_ck_write_tree(register MI_INFO *info, uint keynr,uchar *key, uint key_length); -int _mi_ck_write_btree(register MI_INFO *info, uint keynr, uchar *key, +int _mi_ck_write_btree(register MI_INFO *info, uint keynr,uchar *key, uint key_length); /* Write new record to database */ @@ -121,17 +124,17 @@ int mi_write(MI_INFO *info, byte *record) } else { - uint key_length=_mi_make_key(info,i,buff,record,filepos); - if (_mi_ck_write(info,i,buff,key_length)) - { - if (local_lock_tree) - rw_unlock(&share->key_root_lock[i]); - DBUG_PRINT("error",("Got error: %d on write",my_errno)); - goto err; - } + if (share->keyinfo[i].ck_insert(info,i,buff, + _mi_make_key(info,i,buff,record,filepos))) + { + if (local_lock_tree) + rw_unlock(&share->key_root_lock[i]); + DBUG_PRINT("error",("Got error: %d on write",my_errno)); + goto err; + } } if (local_lock_tree) - rw_unlock(&share->key_root_lock[i]); + rw_unlock(&share->key_root_lock[i]); } } if (share->calc_checksum) @@ -248,11 +251,12 @@ int _mi_ck_write_btree(register MI_INFO *info, uint keynr, uchar *key, int error; uint comp_flag; MI_KEYDEF *keyinfo=info->s->keyinfo+keynr; + my_off_t *root=&info->s->state.key_root[keynr]; DBUG_ENTER("_mi_ck_write_btree"); if (keyinfo->flag & HA_SORT_ALLOWS_SAME) comp_flag=SEARCH_BIGGER; /* Put after same key */ - else if (keyinfo->flag & HA_NOSAME) + else if (keyinfo->flag & (HA_NOSAME|HA_FULLTEXT)) { comp_flag=SEARCH_FIND | SEARCH_UPDATE; /* No dupplicates */ if (keyinfo->flag & HA_NULL_ARE_EQUAL) @@ -261,37 +265,53 @@ int _mi_ck_write_btree(register MI_INFO *info, uint keynr, uchar *key, else comp_flag=SEARCH_SAME; /* Keys in rec-pos order */ - if (info->s->state.key_root[keynr] == HA_OFFSET_ERROR || + error=_mi_ck_real_write_btree(info, keyinfo, key, key_length, + root, comp_flag); + if (info->ft1_to_ft2) + { + if (!error) + error= _mi_ft_convert_to_ft2(info, keynr, key); + delete_dynamic(info->ft1_to_ft2); + my_free((gptr)info->ft1_to_ft2, MYF(0)); + info->ft1_to_ft2=0; + } + DBUG_RETURN(error); +} /* _mi_ck_write_btree */ + +int _mi_ck_real_write_btree(MI_INFO *info, MI_KEYDEF *keyinfo, + uchar *key, uint key_length, my_off_t *root, uint comp_flag) +{ + int error; + DBUG_ENTER("_mi_ck_real_write_btree"); + /* key_length parameter is used only if comp_flag is SEARCH_FIND */ + if (*root == HA_OFFSET_ERROR || (error=w_search(info, keyinfo, comp_flag, key, key_length, - info->s->state.key_root[keynr], (uchar *) 0, (uchar*) 0, + *root, (uchar *) 0, (uchar*) 0, (my_off_t) 0, 1)) > 0) - error=_mi_enlarge_root(info,keynr,key); + error=_mi_enlarge_root(info,keyinfo,key,root); DBUG_RETURN(error); -} /* _mi_ck_write_btree */ +} /* _mi_ck_real_write_btree */ /* Make a new root with key as only pointer */ -int _mi_enlarge_root(register MI_INFO *info, uint keynr, uchar *key) +int _mi_enlarge_root(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *key, + my_off_t *root) { uint t_length,nod_flag; - reg2 MI_KEYDEF *keyinfo; MI_KEY_PARAM s_temp; MYISAM_SHARE *share=info->s; DBUG_ENTER("_mi_enlarge_root"); - nod_flag= (share->state.key_root[keynr] != HA_OFFSET_ERROR) ? - share->base.key_reflength : 0; - _mi_kpointer(info,info->buff+2,share->state.key_root[keynr]); /* if nod */ - keyinfo=share->keyinfo+keynr; + nod_flag= (*root != HA_OFFSET_ERROR) ? share->base.key_reflength : 0; + _mi_kpointer(info,info->buff+2,*root); /* if nod */ t_length=(*keyinfo->pack_key)(keyinfo,nod_flag,(uchar*) 0, (uchar*) 0, (uchar*) 0, key,&s_temp); mi_putint(info->buff,t_length+2+nod_flag,nod_flag); (*keyinfo->store_key)(keyinfo,info->buff+2+nod_flag,&s_temp); info->buff_used=info->page_changed=1; /* info->buff is used */ - if ((share->state.key_root[keynr]= _mi_new(info,keyinfo)) == - HA_OFFSET_ERROR || - _mi_write_keypage(info,keyinfo,share->state.key_root[keynr],info->buff)) + if ((*root= _mi_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR || + _mi_write_keypage(info,keyinfo,*root,DFLT_INIT_HITS,info->buff)) DBUG_RETURN(-1); DBUG_RETURN(0); } /* _mi_enlarge_root */ @@ -322,7 +342,7 @@ static int w_search(register MI_INFO *info, register MI_KEYDEF *keyinfo, if (!(temp_buff= (uchar*) my_alloca((uint) keyinfo->block_length+ MI_MAX_KEY_BUFF*2))) DBUG_RETURN(-1); - if (!_mi_fetch_keypage(info,keyinfo,page,temp_buff,0)) + if (!_mi_fetch_keypage(info,keyinfo,page,DFLT_INIT_HITS,temp_buff,0)) goto err; flag=(*keyinfo->bin_search)(info,keyinfo,temp_buff,key,search_key_length, @@ -331,15 +351,52 @@ static int w_search(register MI_INFO *info, register MI_KEYDEF *keyinfo, if (flag == 0) { uint tmp_key_length; - my_errno=HA_ERR_FOUND_DUPP_KEY; /* get position to record with duplicated key */ tmp_key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&keypos,keybuff); if (tmp_key_length) info->dupp_key_pos=_mi_dpos(info,0,keybuff+tmp_key_length); else info->dupp_key_pos= HA_OFFSET_ERROR; - my_afree((byte*) temp_buff); - DBUG_RETURN(-1); + if (keyinfo->flag & HA_FULLTEXT) + { + uint off; + int subkeys; + + get_key_full_length_rdonly(off, keybuff); + subkeys=ft_sintXkorr(keybuff+off); + comp_flag=SEARCH_SAME; + if (subkeys >= 0) + { + /* normal word, one-level tree structure */ + flag=(*keyinfo->bin_search)(info, keyinfo, temp_buff, key, + USE_WHOLE_KEY, comp_flag, + &keypos, keybuff, &was_last_key); + } + else + { + /* popular word. two-level tree. going down */ + my_off_t root=info->dupp_key_pos; + keyinfo=&info->s->ft2_keyinfo; + key+=off; + keypos-=keyinfo->keylength; /* we'll modify key entry 'in vivo' */ + error=_mi_ck_real_write_btree(info, keyinfo, key, 0, + &root, comp_flag); + _mi_dpointer(info, keypos+HA_FT_WLEN, root); + subkeys--; /* should there be underflow protection ? */ + DBUG_ASSERT(subkeys < 0); + ft_intXstore(keypos, subkeys); + if (!error) + error=_mi_write_keypage(info,keyinfo,page,DFLT_INIT_HITS,temp_buff); + my_afree((byte*) temp_buff); + DBUG_RETURN(error); + } + } + else /* not HA_FULLTEXT, normal HA_NOSAME key */ + { + my_afree((byte*) temp_buff); + my_errno=HA_ERR_FOUND_DUPP_KEY; + DBUG_RETURN(-1); + } } if (flag == MI_FOUND_WRONG_KEY) DBUG_RETURN(-1); @@ -352,7 +409,7 @@ static int w_search(register MI_INFO *info, register MI_KEYDEF *keyinfo, { error=_mi_insert(info,keyinfo,key,temp_buff,keypos,keybuff,father_buff, father_keypos,father_page, insert_last); - if (_mi_write_keypage(info,keyinfo,page,temp_buff)) + if (_mi_write_keypage(info,keyinfo,page,DFLT_INIT_HITS,temp_buff)) goto err; } my_afree((byte*) temp_buff); @@ -371,7 +428,6 @@ int _mi_insert(register MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *key, uchar *anc_buff, uchar *key_pos, uchar *key_buff, uchar *father_buff, uchar *father_key_pos, my_off_t father_page, my_bool insert_last) - { uint a_length,nod_flag; int t_length; @@ -392,7 +448,9 @@ int _mi_insert(register MI_INFO *info, register MI_KEYDEF *keyinfo, #ifndef DBUG_OFF if (key_pos != anc_buff+2+nod_flag && (keyinfo->flag & (HA_BINARY_PACK_KEY | HA_PACK_KEY))) + { DBUG_DUMP("prev_key",(byte*) key_buff,_mi_keylength(keyinfo,key_buff)); + } if (keyinfo->flag & HA_PACK_KEY) { DBUG_PRINT("test",("t_length: %d ref_len: %d", @@ -423,8 +481,56 @@ int _mi_insert(register MI_INFO *info, register MI_KEYDEF *keyinfo, a_length+=t_length; mi_putint(anc_buff,a_length,nod_flag); if (a_length <= keyinfo->block_length) + { + if (keyinfo->block_length - a_length < 32 && + keyinfo->flag & HA_FULLTEXT && key_pos == endpos && + info->s->base.key_reflength <= info->s->base.rec_reflength && + info->s->options & (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)) + { + /* + Normal word. One-level tree. Page is almost full. + Let's consider converting. + We'll compare 'key' and the first key at anc_buff + */ + uchar *a=key, *b=anc_buff+2+nod_flag; + uint alen, blen, ft2len=info->s->ft2_keyinfo.keylength; + /* the very first key on the page is always unpacked */ + DBUG_ASSERT((*b & 128) == 0); +#if HA_FT_MAXLEN >= 127 + blen= mi_uint2korr(b); b+=2; +#else + blen= *b++; +#endif + get_key_length(alen,a); + DBUG_ASSERT(info->ft1_to_ft2==0); + if (alen == blen && + mi_compare_text(keyinfo->seg->charset, a, alen, b, blen, 0)==0) + { + /* yup. converting */ + info->ft1_to_ft2=(DYNAMIC_ARRAY *) + my_malloc(sizeof(DYNAMIC_ARRAY), MYF(MY_WME)); + my_init_dynamic_array(info->ft1_to_ft2, ft2len, 300, 50); + + /* + now, adding all keys from the page to dynarray + if the page is a leaf (if not keys will be deleted later) + */ + if (!nod_flag) + { + /* let's leave the first key on the page, though, because + we cannot easily dispatch an empty page here */ + b+=blen+ft2len+2; + for (a=anc_buff+a_length ; b < a ; b+=ft2len+2) + insert_dynamic(info->ft1_to_ft2, b); + + /* fixing the page's length - it contains only one key now */ + mi_putint(anc_buff,2+blen+ft2len+2,0); + } + /* the rest will be done when we're back from recursion */ + } + } DBUG_RETURN(0); /* There is room on page */ - + } /* Page is full */ if (nod_flag) insert_last=0; @@ -474,7 +580,7 @@ int _mi_split_page(register MI_INFO *info, register MI_KEYDEF *keyinfo, } /* Move middle item to key and pointer to new page */ - if ((new_pos=_mi_new(info,keyinfo)) == HA_OFFSET_ERROR) + if ((new_pos=_mi_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR) DBUG_RETURN(-1); _mi_kpointer(info,_mi_move_key(keyinfo,key,key_buff),new_pos); @@ -490,7 +596,7 @@ int _mi_split_page(register MI_INFO *info, register MI_KEYDEF *keyinfo, (*keyinfo->store_key)(keyinfo,info->buff+key_ref_length,&s_temp); mi_putint(info->buff,length+t_length+key_ref_length,nod_flag); - if (_mi_write_keypage(info,keyinfo,new_pos,info->buff)) + if (_mi_write_keypage(info,keyinfo,new_pos,DFLT_INIT_HITS,info->buff)) DBUG_RETURN(-1); DBUG_DUMP("key",(byte*) key,_mi_keylength(keyinfo,key)); DBUG_RETURN(2); /* Middle key up */ @@ -621,7 +727,8 @@ static int _mi_balance_page(register MI_INFO *info, MI_KEYDEF *keyinfo, curr_keylength=k_length+nod_flag; info->page_changed=1; - if ((father_key_pos != father_buff+father_length && (info->s->rnd++ & 1)) || + if ((father_key_pos != father_buff+father_length && + (info->state->records & 1)) || father_key_pos == father_buff+2+info->s->base.key_reflength) { right=1; @@ -640,7 +747,7 @@ static int _mi_balance_page(register MI_INFO *info, MI_KEYDEF *keyinfo, DBUG_PRINT("test",("use left page: %lu",next_page)); } /* father_key_pos ptr to parting key */ - if (!_mi_fetch_keypage(info,keyinfo,next_page,info->buff,0)) + if (!_mi_fetch_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,info->buff,0)) goto err; DBUG_DUMP("next",(byte*) info->buff,mi_getint(info->buff)); @@ -680,8 +787,8 @@ static int _mi_balance_page(register MI_INFO *info, MI_KEYDEF *keyinfo, memcpy((byte*) buff+2,(byte*) pos+k_length,(size_t) length); } - if (_mi_write_keypage(info,keyinfo,next_page,info->buff) || - _mi_write_keypage(info,keyinfo,father_page,father_buff)) + if (_mi_write_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,info->buff) || + _mi_write_keypage(info,keyinfo,father_page,DFLT_INIT_HITS,father_buff)) goto err; DBUG_RETURN(0); } @@ -721,12 +828,13 @@ static int _mi_balance_page(register MI_INFO *info, MI_KEYDEF *keyinfo, memcpy((byte*) (right ? key : father_key_pos),pos,(size_t) k_length); memcpy((byte*) (right ? father_key_pos : key),tmp_part_key, k_length); - if ((new_pos=_mi_new(info,keyinfo)) == HA_OFFSET_ERROR) + if ((new_pos=_mi_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR) goto err; _mi_kpointer(info,key+k_length,new_pos); if (_mi_write_keypage(info,keyinfo,(right ? new_pos : next_page), - info->buff) || - _mi_write_keypage(info,keyinfo,(right ? next_page : new_pos),extra_buff)) + DFLT_INIT_HITS,info->buff) || + _mi_write_keypage(info,keyinfo,(right ? next_page : new_pos), + DFLT_INIT_HITS,extra_buff)) goto err; DBUG_RETURN(1); /* Middle key up */ @@ -751,7 +859,8 @@ int _mi_ck_write_tree(register MI_INFO *info, uint keynr, uchar *key, DBUG_ENTER("_mi_ck_write_tree"); error= tree_insert(&info->bulk_insert[keynr], key, - key_length + info->s->rec_reflength) ? 0 : HA_ERR_OUT_OF_MEM ; + key_length + info->s->rec_reflength, + info->bulk_insert[keynr].custom_arg) ? 0 : HA_ERR_OUT_OF_MEM ; DBUG_RETURN(error); } /* _mi_ck_write_tree */ @@ -762,7 +871,7 @@ int _mi_ck_write_tree(register MI_INFO *info, uint keynr, uchar *key, static int keys_compare(bulk_insert_param *param, uchar *key1, uchar *key2) { uint not_used; - return _mi_key_cmp(param->info->s->keyinfo[param->keynr].seg, + return ha_key_cmp(param->info->s->keyinfo[param->keynr].seg, key1, key2, USE_WHOLE_KEY, SEARCH_SAME, ¬_used); } diff --git a/myisam/myisamchk.c b/myisam/myisamchk.c index f1deda41f8b..61a1736c521 100644 --- a/myisam/myisamchk.c +++ b/myisam/myisamchk.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB +/* Copyright (C) 2000-2003 MySQL AB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -41,11 +41,13 @@ SET_STACK_SIZE(9000) /* Minimum stack size for program */ static uint decode_bits; static char **default_argv; static const char *load_default_groups[]= { "myisamchk", 0 }; -static const char *set_charset_name; +static const char *set_charset_name, *opt_tmpdir; static CHARSET_INFO *set_charset; static long opt_myisam_block_size; +static long opt_key_cache_block_size; static const char *my_progname_short; static int stopwords_inited= 0; +static MY_TMPDIR myisamchk_tmpdir; static const char *type_names[]= { "?","char","binary", "short", "long", "float", @@ -108,7 +110,8 @@ int main(int argc, char **argv) VOID(fflush(stderr)); if ((check_param.error_printed | check_param.warning_printed) && (check_param.testflag & T_FORCE_CREATE) && - (!(check_param.testflag & (T_REP | T_SORT_RECORDS | T_SORT_INDEX)))) + (!(check_param.testflag & (T_REP | T_REP_BY_SORT | T_SORT_RECORDS | + T_SORT_INDEX)))) { uint old_testflag=check_param.testflag; if (!(check_param.testflag & T_REP)) @@ -136,6 +139,7 @@ int main(int argc, char **argv) llstr(check_param.total_deleted,buff2)); } free_defaults(default_argv); + free_tmpdir(&myisamchk_tmpdir); ft_free_stopwords(); my_end(check_param.testflag & T_INFO ? MY_CHECK_ERROR | MY_GIVE_INFO : MY_CHECK_ERROR); exit(error); @@ -146,10 +150,11 @@ int main(int argc, char **argv) enum options_mc { OPT_CHARSETS_DIR=256, OPT_SET_CHARSET,OPT_START_CHECK_POS, - OPT_CORRECT_CHECKSUM, OPT_KEY_BUFFER_SIZE, OPT_MYISAM_BLOCK_SIZE, + OPT_CORRECT_CHECKSUM, OPT_KEY_BUFFER_SIZE, + OPT_KEY_CACHE_BLOCK_SIZE, OPT_MYISAM_BLOCK_SIZE, OPT_READ_BUFFER_SIZE, OPT_WRITE_BUFFER_SIZE, OPT_SORT_BUFFER_SIZE, OPT_SORT_KEY_BLOCKS, OPT_DECODE_BITS, OPT_FT_MIN_WORD_LEN, - OPT_FT_MAX_WORD_LEN, OPT_FT_MAX_WORD_LEN_FOR_SORT + OPT_FT_MAX_WORD_LEN, OPT_FT_MAX_WORD_LEN_FOR_SORT, OPT_MAX_RECORD_LENGTH }; static struct my_option my_long_options[] = @@ -161,7 +166,7 @@ static struct my_option my_long_options[] = "No help available.", 0, 0, 0, GET_ULONG, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"backup", 'B', - "Make a backup of the .MYD file as 'filename-time.BAK'", + "Make a backup of the .MYD file as 'filename-time.BAK'.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"character-sets-dir", OPT_CHARSETS_DIR, "Directory where character sets are.", @@ -170,7 +175,7 @@ static struct my_option my_long_options[] = "Check table for errors.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"check-only-changed", 'C', - "Check only tables that have changed since last check.", + "Check only tables that have changed since last check. It also applies to other requested actions (e.g. --analyze will be ignored if the table is already analyzed).", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"correct-checksum", OPT_CORRECT_CHECKSUM, "Correct checksum information for table.", @@ -192,7 +197,7 @@ static struct my_option my_long_options[] = "If used when checking a table, ensure that the table is 100 percent consistent, which will take a long time. If used when repairing a table, try to recover every possible row from the data file. Normally this will also find a lot of garbage rows; Don't use this option with repair if you are not totally desperate.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"fast", 'F', - "Check only tables that haven't been closed properly.", + "Check only tables that haven't been closed properly. It also applies to other requested actions (e.g. --analyze will be ignored if the table is already analyzed).", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"force", 'f', "Restart with -r if there are any errors in the table. States will be updated as with --update-state.", @@ -207,10 +212,15 @@ static struct my_option my_long_options[] = "Print statistics information about table that is checked.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"keys-used", 'k', - "Tell MyISAM to update only some specific keys. # is a bit mask of which keys to use. This can be used to get faster inserts!", + "Tell MyISAM to update only some specific keys. # is a bit mask of which keys to use. This can be used to get faster inserts.", (gptr*) &check_param.keys_in_use, (gptr*) &check_param.keys_in_use, 0, GET_ULL, REQUIRED_ARG, -1, 0, 0, 0, 0, 0}, + {"max-record-length", OPT_MAX_RECORD_LENGTH, + "Skip rows bigger than this if myisamchk can't allocate memory to hold it", + (gptr*) &check_param.max_record_length, + (gptr*) &check_param.max_record_length, + 0, GET_ULL, REQUIRED_ARG, LONGLONG_MAX, 0, LONGLONG_MAX, 0, 0, 0}, {"medium-check", 'm', "Faster than extend-check, but only finds 99.99% of all errors. Should be good enough for most cases.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, @@ -223,7 +233,7 @@ static struct my_option my_long_options[] = "Can fix almost anything except unique keys that aren't unique.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"parallel-recover", 'p', - "Same as '-r' but creates all the keys in parallel", + "Same as '-r' but creates all the keys in parallel.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"safe-recover", 'o', "Uses old recovery method; Slower than '-r' but can handle a couple of cases where '-r' reports that it can't fix the data file.", @@ -260,7 +270,7 @@ static struct my_option my_long_options[] = 0, GET_UINT, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"tmpdir", 't', "Path for temporary files.", - (gptr*) &check_param.tmpdir, + (gptr*) &opt_tmpdir, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"update-state", 'U', "Mark tables as crashed if any errors were found.", @@ -281,6 +291,11 @@ static struct my_option my_long_options[] = (gptr*) &check_param.use_buffers, (gptr*) &check_param.use_buffers, 0, GET_ULONG, REQUIRED_ARG, (long) USE_BUFFER_INIT, (long) MALLOC_OVERHEAD, (long) ~0L, (long) MALLOC_OVERHEAD, (long) IO_SIZE, 0}, + { "key_cache_block_size", OPT_KEY_CACHE_BLOCK_SIZE, "", + (gptr*) &opt_key_cache_block_size, + (gptr*) &opt_key_cache_block_size, 0, + GET_LONG, REQUIRED_ARG, MI_KEY_BLOCK_LENGTH, MI_MIN_KEY_BLOCK_LENGTH, + MI_MAX_KEY_BLOCK_LENGTH, 0, MI_MIN_KEY_BLOCK_LENGTH, 0}, { "myisam_block_size", OPT_MYISAM_BLOCK_SIZE, "", (gptr*) &opt_myisam_block_size, (gptr*) &opt_myisam_block_size, 0, GET_LONG, REQUIRED_ARG, MI_KEY_BLOCK_LENGTH, MI_MIN_KEY_BLOCK_LENGTH, @@ -312,16 +327,13 @@ static struct my_option my_long_options[] = { "ft_max_word_len", OPT_FT_MAX_WORD_LEN, "", (gptr*) &ft_max_word_len, (gptr*) &ft_max_word_len, 0, GET_ULONG, REQUIRED_ARG, HA_FT_MAXLEN, 10, HA_FT_MAXLEN, 0, 1, 0}, - { "ft_max_word_len_for_sort", OPT_FT_MAX_WORD_LEN_FOR_SORT, "", - (gptr*) &ft_max_word_len_for_sort, (gptr*) &ft_max_word_len_for_sort, 0, - GET_ULONG, REQUIRED_ARG, 20, 4, HA_FT_MAXLEN, 0, 1, 0}, - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} + { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} }; static void print_version(void) { - printf("%s Ver 2.6 for %s at %s\n", my_progname, SYSTEM_TYPE, + printf("%s Ver 2.7 for %s at %s\n", my_progname, SYSTEM_TYPE, MACHINE_TYPE); } @@ -335,17 +347,25 @@ static void usage(void) puts("Used without options all tables on the command will be checked for errors"); printf("Usage: %s [OPTIONS] tables[.MYI]\n", my_progname_short); puts("\nGlobal options:\n\ - -#, --debug=... Output debug log. Often this is 'd:t:o,filename'\n\ + -#, --debug=... Output debug log. Often this is 'd:t:o,filename'.\n\ -?, --help Display this help and exit.\n\ - -O, --set-variable var=option\n\ + -O, --set-variable var=option.\n\ Change the value of a variable. Please note that\n\ this option is deprecated; you can set variables\n\ directly with '--variable-name=value'.\n\ - -t, --tmpdir=path Path for temporary files\n\ + -t, --tmpdir=path Path for temporary files. Multiple paths can be\n\ + specified, separated by " +#if defined( __WIN__) || defined(OS2) + "semicolon (;)" +#else + "colon (:)" +#endif + ", they will be used\n\ + in a round-robin fashion.\n\ -s, --silent Only print errors. One can use two -s to make\n\ - myisamchk very silent\n\ + myisamchk very silent.\n\ -v, --verbose Print more information. This can be used with\n\ - --description and --check. Use many -v for more verbosity!\n\ + --description and --check. Use many -v for more verbosity.\n\ -V, --version Print version and exit.\n\ -w, --wait Wait if table is locked.\n"); #ifdef DEBUG @@ -353,37 +373,36 @@ static void usage(void) #endif puts("Check options (check is the default action for myisamchk):\n\ - -c, --check Check table for errors\n\ + -c, --check Check table for errors.\n\ -e, --extend-check Check the table VERY throughly. Only use this in\n\ extreme cases as myisamchk should normally be able to\n\ - find out if the table is ok even without this switch\n\ - -F, --fast Check only tables that haven't been closed properly.\n\ - It also applies to other requested actions (e.g. --analyze\n\ - will be ignored if the table is already analyzed).\n\ - -f, --force Restart with '-r' if there are any errors in the table.\n\ - States will be updated as with '--update-state'\n\ + find out if the table is ok even without this switch.\n\ + -F, --fast Check only tables that haven't been closed properly.\n\ -C, --check-only-changed\n\ - Check only tables that have changed since last check.\n\ - It also applies to other requested actions (e.g. --analyze\n\ - will be ignored if the table is already analyzed).\n\ - -i, --information Print statistics information about table that is checked\n\ + Check only tables that have changed since last check.\n\ + -f, --force Restart with '-r' if there are any errors in the table.\n\ + States will be updated as with '--update-state'.\n\ + -i, --information Print statistics information about table that is checked.\n\ -m, --medium-check Faster than extend-check, but only finds 99.99% of\n\ - all errors. Should be good enough for most cases\n\ - -U --update-state Mark tables as crashed if you find any errors\n\ - -T, --read-only Don't mark table as checked\n"); + all errors. Should be good enough for most cases.\n\ + -U --update-state Mark tables as crashed if you find any errors.\n\ + -T, --read-only Don't mark table as checked.\n"); - puts("Repair options (When using '-r' or '-o') \n\ - -B, --backup Make a backup of the .MYD file as 'filename-time.BAK'\n\ + puts("Repair options (When using '-r' or '-o'):\n\ + -B, --backup Make a backup of the .MYD file as 'filename-time.BAK'.\n\ --correct-checksum Correct checksum information for table.\n\ -D, --data-file-length=# Max length of data file (when recreating data\n\ - file when it's full)\n\ + file when it's full).\n\ -e, --extend-check Try to recover every possible row from the data file\n\ Normally this will also find a lot of garbage rows;\n\ Don't use this option if you are not totally desperate.\n\ -f, --force Overwrite old temporary files.\n\ -k, --keys-used=# Tell MyISAM to update only some specific keys. # is a\n\ bit mask of which keys to use. This can be used to\n\ - get faster inserts!\n\ + get faster inserts.\n\ + --max-record-length=#\n\ + Skip rows bigger than this if myisamchk can't allocate\n\ + memory to hold it.\n\ -r, --recover Can fix almost anything except unique keys that aren't\n\ unique.\n\ -n, --sort-recover Forces recovering with sorting even if the temporary\n\ @@ -391,14 +410,13 @@ static void usage(void) -p, --parallel-recover\n\ Uses the same technique as '-r' and '-n', but creates\n\ all the keys in parallel, in different threads.\n\ - THIS IS ALPHA CODE. USE AT YOUR OWN RISK!\n\ -o, --safe-recover Uses old recovery method; Slower than '-r' but can\n\ handle a couple of cases where '-r' reports that it\n\ can't fix the data file.\n\ --character-sets-dir=...\n\ - Directory where character sets are\n\ + Directory where character sets are.\n\ --set-character-set=name\n\ - Change the character set used by the index\n\ + Change the character set used by the index.\n\ -q, --quick Faster repair by not modifying the data file.\n\ One can give a second '-q' to force myisamchk to\n\ modify the original datafile in case of duplicate keys.\n\ @@ -417,11 +435,11 @@ static void usage(void) If no value is given, then sets the next auto_increment\n\ value to the highest used value for the auto key + 1.\n\ -S, --sort-index Sort index blocks. This speeds up 'read-next' in\n\ - applications\n\ + applications.\n\ -R, --sort-records=#\n\ Sort records according to an index. This makes your\n\ data much more localized and may speed up things\n\ - (It may be VERY slow to do a sort the first time!)\n\ + (It may be VERY slow to do a sort the first time!).\n\ -b, --block-search=#\n\ Find a record, a block at given offset belongs to."); @@ -702,6 +720,12 @@ static void get_options(register int *argc,register char ***argv) exit(1); } + if (init_tmpdir(&myisamchk_tmpdir, opt_tmpdir)) + exit(1); + + check_param.tmpdir=&myisamchk_tmpdir; + check_param.key_cache_block_size= opt_key_cache_block_size; + if (set_charset_name) if (!(set_charset=get_charset_by_name(set_charset_name, MYF(MY_WME)))) exit(1); @@ -871,7 +895,7 @@ static int myisamchk(MI_CHECK *param, my_string filename) } else { - if (share->fulltext_index && !stopwords_inited++) + if (!stopwords_inited++) ft_init_stopwords(); if (!(param->testflag & T_READONLY)) @@ -1017,7 +1041,8 @@ static int myisamchk(MI_CHECK *param, my_string filename) !(param->testflag & (T_FAST | T_FORCE_CREATE))) { if (param->testflag & (T_EXTEND | T_MEDIUM)) - VOID(init_key_cache(param->use_buffers)); + VOID(init_key_cache(dflt_key_cache,opt_key_cache_block_size, + param->use_buffers, 0, 0)); VOID(init_io_cache(¶m->read_cache,datafile, (uint) param->read_buffer_length, READ_CACHE, @@ -1031,7 +1056,7 @@ static int myisamchk(MI_CHECK *param, my_string filename) HA_OPTION_COMPRESS_RECORD)) || (param->testflag & (T_EXTEND | T_MEDIUM))) error|=chk_data_link(param, info, param->testflag & T_EXTEND); - error|=flush_blocks(param,share->kfile); + error|=flush_blocks(param, share->key_cache, share->kfile); VOID(end_io_cache(¶m->read_cache)); } if (!error) @@ -1120,7 +1145,7 @@ static void descript(MI_CHECK *param, register MI_INFO *info, my_string name) { uint key,keyseg_nr,field,start; reg3 MI_KEYDEF *keyinfo; - reg2 MI_KEYSEG *keyseg; + reg2 HA_KEYSEG *keyseg; reg4 const char *text; char buff[160],length[10],*pos,*end; enum en_fieldtype type; @@ -1440,7 +1465,8 @@ static int mi_sort_records(MI_CHECK *param, if (share->state.key_root[sort_key] == HA_OFFSET_ERROR) DBUG_RETURN(0); /* Nothing to do */ - init_key_cache(param->use_buffers); + init_key_cache(dflt_key_cache, opt_key_cache_block_size, param->use_buffers, + 0, 0); if (init_io_cache(&info->rec_cache,-1,(uint) param->write_buffer_length, WRITE_CACHE,share->pack.header_length,1, MYF(MY_WME | MY_WAIT_IF_FULL))) @@ -1554,7 +1580,8 @@ err: my_free(sort_info.buff,MYF(MY_ALLOW_ZERO_PTR)); sort_info.buff=0; share->state.sortkey=sort_key; - DBUG_RETURN(flush_blocks(param, share->kfile) | got_error); + DBUG_RETURN(flush_blocks(param, share->key_cache, share->kfile) | + got_error); } /* sort_records */ @@ -1649,6 +1676,10 @@ err: DBUG_RETURN(1); } /* sort_record_index */ +volatile bool *killed_ptr(MI_CHECK *param) +{ + return (bool *)(& param->thd); /* always NULL */ +} /* print warnings and errors */ /* VARARGS */ diff --git a/myisam/myisamdef.h b/myisam/myisamdef.h index e1df1d73191..c92d5a76815 100644 --- a/myisam/myisamdef.h +++ b/myisam/myisamdef.h @@ -55,8 +55,8 @@ typedef struct st_mi_state_info uchar uniques; /* number of UNIQUE definitions */ uchar language; /* Language for indexes */ uchar max_block_size; /* max keyblock size */ - uchar fulltext_keys; /* reserved for 4.1 */ - uchar not_used; /* To align to 8 */ + uchar fulltext_keys; + uchar not_used; /* To align to 8 */ } header; MI_STATUS_INFO state; @@ -91,13 +91,13 @@ typedef struct st_mi_state_info } MI_STATE_INFO; #define MI_STATE_INFO_SIZE (24+14*8+7*4+2*2+8) -#define MI_STATE_KEY_SIZE 8 +#define MI_STATE_KEY_SIZE 8 #define MI_STATE_KEYBLOCK_SIZE 8 #define MI_STATE_KEYSEG_SIZE 4 #define MI_STATE_EXTRA_SIZE ((MI_MAX_KEY+MI_MAX_KEY_BLOCK_SIZE)*MI_STATE_KEY_SIZE + MI_MAX_KEY*MI_MAX_KEY_SEG*MI_STATE_KEYSEG_SIZE) #define MI_KEYDEF_SIZE (2+ 5*2) #define MI_UNIQUEDEF_SIZE (2+1+1) -#define MI_KEYSEG_SIZE (6+ 2*2 + 4*2) +#define HA_KEYSEG_SIZE (6+ 2*2 + 4*2) #define MI_COLUMNDEF_SIZE (2*3+1) #define MI_BASE_INFO_SIZE (5*8 + 8*4 + 4 + 4*2 + 16) #define MI_INDEX_BLOCK_MARGIN 16 /* Safety margin for .MYI tables */ @@ -155,9 +155,10 @@ typedef struct st_mi_isam_pack { typedef struct st_mi_isam_share { /* Shared between opens */ MI_STATE_INFO state; MI_BASE_INFO base; + MI_KEYDEF ft2_keyinfo; /* Second-level ft-key definition */ MI_KEYDEF *keyinfo; /* Key definitions */ MI_UNIQUEDEF *uniqueinfo; /* unique definitions */ - MI_KEYSEG *keyparts; /* key part info */ + HA_KEYSEG *keyparts; /* key part info */ MI_COLUMNDEF *rec; /* Pointer to field information */ MI_PACK pack; /* Data about packed records */ MI_BLOB *blobs; /* Pointer to blobs */ @@ -165,6 +166,7 @@ typedef struct st_mi_isam_share { /* Shared between opens */ char *data_file_name, /* Resolved path names from symlinks */ *index_file_name; byte *file_map; /* mem-map of file if possible */ + KEY_CACHE *key_cache; /* ref to the current key cache */ MI_DECODE_TREE *decode_trees; uint16 *decode_tables; int (*read_record)(struct st_myisam_info*, my_off_t, byte*); @@ -185,6 +187,7 @@ typedef struct st_mi_isam_share { /* Shared between opens */ ulong max_pack_length; ulong state_diff_length; uint rec_reflength; /* rec_reflength in use now */ + uint unique_name_length; File kfile; /* Shared keyfile */ File data_file; /* Shared data file */ int mode; /* mode of file on open */ @@ -192,14 +195,12 @@ typedef struct st_mi_isam_share { /* Shared between opens */ uint w_locks,r_locks,tot_locks; /* Number of read/write locks */ uint blocksize; /* blocksize of keyfile */ myf write_flag; - int rnd; /* rnd-counter */ enum data_file_type data_file_type; my_bool changed, /* If changed since lock */ global_changed, /* If changed since open */ not_flushed, temporary,delay_key_write, - concurrent_insert, - fulltext_index; + concurrent_insert; #ifdef THREAD THR_LOCK lock; pthread_mutex_t intern_lock; /* Locking for use with _locking */ @@ -223,13 +224,17 @@ struct st_myisam_info { MI_BLOB *blobs; /* Pointer to blobs */ MI_BIT_BUFF bit_buff; /* accumulate indexfile changes between write's */ - TREE *bulk_insert; + TREE *bulk_insert; + DYNAMIC_ARRAY *ft1_to_ft2; /* used only in ft1->ft2 conversion */ char *filename; /* parameter to open filename */ uchar *buff, /* Temp area for key */ *lastkey,*lastkey2; /* Last used search key */ + uchar *first_mbr_key; /* Searhed spatial key */ byte *rec_buff; /* Tempbuff for recordpack */ uchar *int_keypos, /* Save position for next/previous */ *int_maxpos; /* -""- */ + uint int_nod_flag; /* -""- */ + uint32 int_keytree_version; /* -""- */ int (*read_record)(struct st_myisam_info*, my_off_t, byte*); invalidator_by_filename invalidator; /* query cache invalidator */ ulong this_unique; /* uniq filenumber or thread */ @@ -248,10 +253,10 @@ struct st_myisam_info { int dfile; /* The datafile */ uint opt_flag; /* Optim. for space/speed */ uint update; /* If file changed since open */ - uint int_nod_flag; /* -""- */ int lastinx; /* Last used index */ uint lastkey_length; /* Length of key in lastkey */ uint last_rkey_length; /* Last length in mi_rkey() */ + enum ha_rkey_function last_key_func; /* CONTAIN, OVERLAP, etc */ uint save_lastkey_length; int errkey; /* Got last error on this key */ int lock_type; /* How database was locked */ @@ -259,21 +264,58 @@ struct st_myisam_info { uint data_changed; /* Somebody has changed data */ uint save_update; /* When using KEY_READ */ int save_lastinx; - uint32 int_keytree_version; /* -""- */ LIST open_list; IO_CACHE rec_cache; /* When cacheing records */ + uint preload_buff_size; /* When preloading indexes */ myf lock_wait; /* is 0 or MY_DONT_WAIT */ my_bool was_locked; /* Was locked in panic */ my_bool quick_mode; my_bool page_changed; /* If info->buff can't be used for rnext */ my_bool buff_used; /* If info->buff has to be reread for rnext */ - my_bool use_packed_key; /* For MYISAMMRG */ + my_bool once_flags; /* For MYISAMMRG */ #ifdef THREAD THR_LOCK_DATA lock; #endif + uchar *rtree_recursion_state; /* For RTREE */ + int rtree_recursion_depth; }; +typedef struct st_buffpek { + my_off_t file_pos; /* Where we are in the sort file */ + uchar *base,*key; /* Key pointers */ + ha_rows count; /* Number of rows in table */ + ulong mem_count; /* numbers of keys in memory */ + ulong max_keys; /* Max keys in buffert */ +} BUFFPEK; +typedef struct st_mi_sort_param +{ + pthread_t thr; + IO_CACHE read_cache, tempfile, tempfile_for_exceptions; + DYNAMIC_ARRAY buffpek; + ulonglong unique[MI_MAX_KEY_SEG+1]; + my_off_t pos,max_pos,filepos,start_recpos; + uint key, key_length,real_key_length,sortbuff_size; + uint maxbuffers, keys, find_length, sort_keys_length; + my_bool fix_datafile, master; + MI_KEYDEF *keyinfo; + HA_KEYSEG *seg; + SORT_INFO *sort_info; + uchar **sort_keys; + byte *rec_buff; + void *wordlist, *wordptr; + char *record; + MY_TMPDIR *tmpdir; + int (*key_cmp)(struct st_mi_sort_param *, const void *, const void *); + int (*key_read)(struct st_mi_sort_param *,void *); + int (*key_write)(struct st_mi_sort_param *, const void *); + void (*lock_in_memory)(MI_CHECK *); + NEAR int (*write_keys)(struct st_mi_sort_param *, register uchar **, + uint , struct st_buffpek *, IO_CACHE *); + NEAR uint (*read_to_buffer)(IO_CACHE *,struct st_buffpek *, uint); + NEAR int (*write_key)(struct st_mi_sort_param *, IO_CACHE *,char *, + uint, uint); +} MI_SORT_PARAM; /* Some defines used by isam-funktions */ #define USE_WHOLE_KEY MI_MAX_KEY_BUFF*2 /* Use whole key in _mi_search() */ @@ -286,6 +328,10 @@ struct st_myisam_info { #define WRITEINFO_UPDATE_KEYFILE 1 #define WRITEINFO_NO_UNLOCK 2 + /* once_flags */ +#define USE_PACKED_KEYS 1 +#define RRND_PRESERVE_LASTINX 2 + /* bits in state.changed */ #define STATE_CHANGED 1 @@ -325,13 +371,6 @@ struct st_myisam_info { { *(key)=255; mi_int2store((key)+1,(length)); } \ } -#define get_key_length(length,key) \ -{ if ((uchar) *(key) != 255) \ - length= (uint) (uchar) *((key)++); \ - else \ - { length=mi_uint2korr((key)+1); (key)+=3; } \ -} - #define get_key_full_length(length,key) \ { if ((uchar) *(key) != 255) \ length= ((uint) (uchar) *((key)++))+1; \ @@ -339,11 +378,11 @@ struct st_myisam_info { { length=mi_uint2korr((key)+1)+3; (key)+=3; } \ } -#define get_key_pack_length(length,length_pack,key) \ +#define get_key_full_length_rdonly(length,key) \ { if ((uchar) *(key) != 255) \ - { length= (uint) (uchar) *((key)++); length_pack=1; }\ + length= ((uint) (uchar) *((key)))+1; \ else \ - { length=mi_uint2korr((key)+1); (key)+=3; length_pack=3; } \ + { length=mi_uint2korr((key)+1)+3; } \ } #define get_pack_length(length) ((length) >= 255 ? 3 : 1) @@ -366,10 +405,10 @@ struct st_myisam_info { #define PACK_TYPE_SELECTED 1 /* Bits in field->pack_type */ #define PACK_TYPE_SPACE_FIELDS 2 #define PACK_TYPE_ZERO_FILL 4 -#define MI_FOUND_WRONG_KEY 32738 /* Impossible value from _mi_key_cmp */ +#define MI_FOUND_WRONG_KEY 32738 /* Impossible value from ha_key_cmp */ #define MI_MAX_KEY_BLOCK_SIZE (MI_MAX_KEY_BLOCK_LENGTH/MI_MIN_KEY_BLOCK_LENGTH) -#define MI_BLOCK_SIZE(key_length,data_pointer,key_pointer) ((((key_length+data_pointer+key_pointer)*4+key_pointer+2)/myisam_block_size+1)*myisam_block_size) +#define MI_BLOCK_SIZE(key_length,data_pointer,key_pointer) (((((key_length)+(data_pointer)+(key_pointer))*4+(key_pointer)+2)/myisam_block_size+1)*myisam_block_size) #define MI_MAX_KEYPTR_SIZE 5 /* For calculating block lengths */ #define MI_MIN_KEYBLOCK_LENGTH 50 /* When to split delete blocks */ @@ -429,7 +468,10 @@ extern int _mi_delete_static_record(MI_INFO *info); extern int _mi_cmp_static_record(MI_INFO *info,const byte *record); extern int _mi_read_rnd_static_record(MI_INFO*, byte *,my_off_t, my_bool); extern int _mi_ck_write(MI_INFO *info,uint keynr,uchar *key,uint length); -extern int _mi_enlarge_root(MI_INFO *info,uint keynr,uchar *key); +extern int _mi_ck_real_write_btree(MI_INFO *info, MI_KEYDEF *keyinfo, + uchar *key, uint key_length, + my_off_t *root, uint comp_flag); +extern int _mi_enlarge_root(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *key, my_off_t *root); extern int _mi_insert(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *key, uchar *anc_buff,uchar *key_pos,uchar *key_buff, uchar *father_buff, uchar *father_keypos, @@ -484,14 +526,12 @@ extern int _mi_seq_search(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *page, extern int _mi_prefix_search(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *page, uchar *key,uint key_len,uint comp_flag, uchar **ret_pos,uchar *buff, my_bool *was_last_key); -extern int _mi_compare_text(CHARSET_INFO *, uchar *, uint, uchar *, uint , - my_bool); extern my_off_t _mi_kpos(uint nod_flag,uchar *after_key); extern void _mi_kpointer(MI_INFO *info,uchar *buff,my_off_t pos); extern my_off_t _mi_dpos(MI_INFO *info, uint nod_flag,uchar *after_key); extern my_off_t _mi_rec_pos(MYISAM_SHARE *info, uchar *ptr); extern void _mi_dpointer(MI_INFO *info, uchar *buff,my_off_t pos); -extern int _mi_key_cmp(MI_KEYSEG *keyseg, uchar *a,uchar *b, +extern int ha_key_cmp(HA_KEYSEG *keyseg, uchar *a,uchar *b, uint key_length,uint nextflag,uint *diff_length); extern uint _mi_get_static_key(MI_KEYDEF *keyinfo,uint nod_flag,uchar * *page, uchar *key); @@ -506,22 +546,23 @@ extern uchar *_mi_get_key(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *page, uchar *key, uchar *keypos, uint *return_key_length); extern uint _mi_keylength(MI_KEYDEF *keyinfo,uchar *key); extern uint _mi_keylength_part(MI_KEYDEF *keyinfo, register uchar *key, - MI_KEYSEG *end); + HA_KEYSEG *end); extern uchar *_mi_move_key(MI_KEYDEF *keyinfo,uchar *to,uchar *from); extern int _mi_search_next(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *key, uint key_length,uint nextflag,my_off_t pos); extern int _mi_search_first(MI_INFO *info,MI_KEYDEF *keyinfo,my_off_t pos); extern int _mi_search_last(MI_INFO *info,MI_KEYDEF *keyinfo,my_off_t pos); extern uchar *_mi_fetch_keypage(MI_INFO *info,MI_KEYDEF *keyinfo,my_off_t page, - uchar *buff,int return_buffer); + int level,uchar *buff,int return_buffer); extern int _mi_write_keypage(MI_INFO *info,MI_KEYDEF *keyinfo,my_off_t page, - uchar *buff); -extern int _mi_dispose(MI_INFO *info,MI_KEYDEF *keyinfo,my_off_t pos); -extern my_off_t _mi_new(MI_INFO *info,MI_KEYDEF *keyinfo); + int level, uchar *buff); +extern int _mi_dispose(MI_INFO *info,MI_KEYDEF *keyinfo,my_off_t pos, + int level); +extern my_off_t _mi_new(MI_INFO *info,MI_KEYDEF *keyinfo,int level); extern uint _mi_make_key(MI_INFO *info,uint keynr,uchar *key, const byte *record,my_off_t filepos); extern uint _mi_pack_key(MI_INFO *info,uint keynr,uchar *key,uchar *old, - uint key_length, MI_KEYSEG **last_used_keyseg); + uint key_length, HA_KEYSEG **last_used_keyseg); extern int _mi_read_key_record(MI_INFO *info,my_off_t filepos,byte *buf); extern int _mi_read_cache(IO_CACHE *info,byte *buff,my_off_t pos, uint length,int re_read_if_possibly); @@ -540,7 +581,7 @@ extern my_bool _mi_rec_check(MI_INFO *info,const char *record, byte *packpos); extern int _mi_write_part_record(MI_INFO *info,my_off_t filepos,ulong length, my_off_t next_filepos,byte **record, ulong *reclength,int *flag); -extern void _mi_print_key(FILE *stream,MI_KEYSEG *keyseg,const uchar *key, +extern void _mi_print_key(FILE *stream,HA_KEYSEG *keyseg,const uchar *key, uint length); extern my_bool _mi_read_pack_info(MI_INFO *info,pbool fix_keys); extern int _mi_read_pack_record(MI_INFO *info,my_off_t filepos,byte *buf); @@ -630,8 +671,8 @@ char *mi_state_info_read(char *ptr, MI_STATE_INFO *state); uint mi_state_info_read_dsk(File file, MI_STATE_INFO *state, my_bool pRead); uint mi_base_info_write(File file, MI_BASE_INFO *base); char *my_n_base_info_read(char *ptr, MI_BASE_INFO *base); -int mi_keyseg_write(File file, const MI_KEYSEG *keyseg); -char *mi_keyseg_read(char *ptr, MI_KEYSEG *keyseg); +int mi_keyseg_write(File file, const HA_KEYSEG *keyseg); +char *mi_keyseg_read(char *ptr, HA_KEYSEG *keyseg); uint mi_keydef_write(File file, MI_KEYDEF *keydef); char *mi_keydef_read(char *ptr, MI_KEYDEF *keydef); uint mi_uniquedef_write(File file, MI_UNIQUEDEF *keydef); @@ -662,14 +703,20 @@ int mi_open_keyfile(MYISAM_SHARE *share); void mi_setup_functions(register MYISAM_SHARE *share); /* Functions needed by mi_check */ +volatile bool *killed_ptr(MI_CHECK *param); void mi_check_print_error _VARARGS((MI_CHECK *param, const char *fmt,...)); void mi_check_print_warning _VARARGS((MI_CHECK *param, const char *fmt,...)); void mi_check_print_info _VARARGS((MI_CHECK *param, const char *fmt,...)); int flush_pending_blocks(MI_SORT_PARAM *param); +int sort_ft_buf_flush(MI_SORT_PARAM *sort_param); int thr_write_keys(MI_SORT_PARAM *sort_param); #ifdef THREAD pthread_handler_decl(thr_find_all_keys,arg); #endif +int flush_blocks(MI_CHECK *param, KEY_CACHE *key_cache, File file); + +int sort_write_record(MI_SORT_PARAM *sort_param); +int _create_index_by_sort(MI_SORT_PARAM *info,my_bool no_messages, ulong); #ifdef __cplusplus } diff --git a/myisam/myisamlog.c b/myisam/myisamlog.c index 091f9ad1d7e..c9b00be7d9e 100644 --- a/myisam/myisamlog.c +++ b/myisam/myisamlog.c @@ -71,7 +71,7 @@ static void printf_log(const char *str,...); static bool cmp_filename(struct file_info *file_info,my_string name); static uint verbose=0,update=0,test_info=0,max_files=0,re_open_count=0, - recover=0,prefix_remove=0,opt_processes=0,opt_myisam_with_debug=0; + recover=0,prefix_remove=0,opt_processes=0; static my_string log_filename=0,filepath=0,write_filename=0,record_pos_file=0; static ulong com_count[10][3],number_of_commands=(ulong) ~0L, isamlog_process; @@ -201,9 +201,6 @@ static void get_options(register int *argc, register char ***argv) update=1; recover++; break; - case 'D': - opt_myisam_with_debug=1; - break; case 'P': opt_processes=1; break; @@ -333,7 +330,8 @@ static int examine_log(my_string file_name, char **table_names) bzero((gptr) com_count,sizeof(com_count)); init_tree(&tree,0,0,sizeof(file_info),(qsort_cmp2) file_info_compare,1, (tree_element_free) file_info_free, NULL); - VOID(init_key_cache(KEY_CACHE_SIZE)); + VOID(init_key_cache(dflt_key_cache,KEY_CACHE_BLOCK_SIZE,KEY_CACHE_SIZE, + 0, 0)); files_open=0; access_time=0; while (access_time++ != number_of_commands && @@ -345,7 +343,8 @@ static int examine_log(my_string file_name, char **table_names) if (!opt_processes) file_info.process=0; result= mi_uint2korr(head+7); - if ((curr_file_info=(struct file_info*) tree_search(&tree,&file_info))) + if ((curr_file_info=(struct file_info*) tree_search(&tree, &file_info, + tree.custom_arg))) { curr_file_info->accessed=access_time; if (update && curr_file_info->used && curr_file_info->closed) @@ -455,12 +454,8 @@ static int examine_log(my_string file_name, char **table_names) goto end; files_open++; file_info.closed=0; - if (opt_myisam_with_debug) - file_info.isam->s->rnd= 0; - else - file_info.isam->s->rnd= isamlog_process; } - VOID(tree_insert(&tree,(gptr) &file_info,0)); + VOID(tree_insert(&tree, (gptr) &file_info, 0, tree.custom_arg)); if (file_info.used) { if (verbose && !record_pos_file) @@ -479,7 +474,7 @@ static int examine_log(my_string file_name, char **table_names) { if (!curr_file_info->closed) files_open--; - VOID(tree_delete(&tree,(gptr) curr_file_info)); + VOID(tree_delete(&tree, (gptr) curr_file_info, tree.custom_arg)); } break; case MI_LOG_EXTRA: @@ -650,7 +645,7 @@ static int examine_log(my_string file_name, char **table_names) goto end; } } - end_key_cache(); + end_key_cache(dflt_key_cache,1); delete_tree(&tree); VOID(end_io_cache(&cache)); VOID(my_close(file,MYF(0))); @@ -670,7 +665,7 @@ static int examine_log(my_string file_name, char **table_names) llstr(isamlog_filepos,llbuff))); fflush(stderr); end: - end_key_cache(); + end_key_cache(dflt_key_cache, 1); delete_tree(&tree); VOID(end_io_cache(&cache)); VOID(my_close(file,MYF(0))); @@ -813,7 +808,6 @@ static int close_some_file(TREE *tree) (void*) &access_param,left_root_right)); if (!access_param.found) return 1; /* No open file that is possibly to close */ - access_param.found->rnd=access_param.found->isam->s->rnd; if (mi_close(access_param.found->isam)) return 1; access_param.found->closed=1; @@ -833,7 +827,6 @@ static int reopen_closed_file(TREE *tree, struct file_info *fileinfo) if (!(fileinfo->isam= mi_open(name,O_RDWR,HA_OPEN_WAIT_IF_LOCKED))) return 1; fileinfo->closed=0; - fileinfo->isam->s->rnd=fileinfo->rnd; re_open_count++; return 0; } diff --git a/myisam/myisampack.c b/myisam/myisampack.c index 800203a30e2..5ca57248204 100644 --- a/myisam/myisampack.c +++ b/myisam/myisampack.c @@ -235,12 +235,12 @@ enum options_mp {OPT_CHARSETS_DIR_MP=256}; static struct my_option my_long_options[] = { - {"backup", 'b', "Make a backup of the table as table_name.OLD", + {"backup", 'b', "Make a backup of the table as table_name.OLD.", (gptr*) &backup, (gptr*) &backup, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, {"character-sets-dir", OPT_CHARSETS_DIR_MP, "Directory where character sets are.", (gptr*) &charsets_dir, (gptr*) &charsets_dir, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, - {"debug", '#', "Output debug log. Often this is 'd:t:o,filename'", + {"debug", '#', "Output debug log. Often this is 'd:t:o,filename'.", 0, 0, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0}, {"force", 'f', "Force packing of table even if it gets bigger or if tempfile exists.", @@ -665,7 +665,8 @@ static HUFF_COUNTS *init_huff_count(MI_INFO *info,my_off_t records) (type == FIELD_NORMAL || type == FIELD_SKIP_ZERO)) count[i].max_zero_fill= count[i].field_length; - init_tree(&count[i].int_tree,0,0,-1,(qsort_cmp2) compare_tree,0,NULL,NULL); + init_tree(&count[i].int_tree,0,0,-1,(qsort_cmp2) compare_tree,0, NULL, + NULL); if (records && type != FIELD_BLOB && type != FIELD_VARCHAR) count[i].tree_pos=count[i].tree_buff = my_malloc(count[i].field_length > 1 ? tree_buff_length : 2, @@ -763,7 +764,8 @@ static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts) if (count->tree_buff) { global_count=count; - if (!(element=tree_insert(&count->int_tree,pos,0)) || + if (!(element=tree_insert(&count->int_tree,pos, 0, + count->int_tree.custom_arg)) || (element->count == 1 && count->tree_buff + tree_buff_length < count->tree_pos + count->field_length) || @@ -1786,7 +1788,8 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts) break; case FIELD_INTERVALL: global_count=count; - pos=(byte*) tree_search(&count->int_tree,start_pos); + pos=(byte*) tree_search(&count->int_tree, start_pos, + count->int_tree.custom_arg); intervall=(uint) (pos - count->tree_buff)/field_length; write_bits(tree->code[intervall],(uint) tree->code_len[intervall]); start_pos=end_pos; diff --git a/myisam/rt_index.c b/myisam/rt_index.c new file mode 100644 index 00000000000..30146b9fd67 --- /dev/null +++ b/myisam/rt_index.c @@ -0,0 +1,993 @@ +/* Copyright (C) 2000 MySQL AB & Ramil Kalimullin & MySQL Finland AB + & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "myisamdef.h" + +#include "rt_index.h" +#include "rt_key.h" +#include "rt_mbr.h" + +#define REINSERT_BUFFER_INC 10 + +typedef struct st_page_level +{ + uint level; + my_off_t offs; +} stPageLevel; + +typedef struct st_page_list +{ + ulong n_pages; + ulong m_pages; + stPageLevel *pages; +} stPageList; + + +/* + Find next key in r-tree according to search_flag recursively + + NOTES + Used in rtree_find_first() and rtree_find_next() + + RETURN + -1 Error + 0 Found + 1 Not found +*/ + +static int rtree_find_req(MI_INFO *info, MI_KEYDEF *keyinfo, uint search_flag, + uint nod_cmp_flag, my_off_t page, int level) +{ + uchar *k; + uchar *last; + uint nod_flag; + int res; + uchar *page_buf; + int k_len; + uint *saved_key = (uint*) (info->rtree_recursion_state) + level; + + if (!(page_buf = (uchar*)my_alloca((uint)keyinfo->block_length))) + { + my_errno = HA_ERR_OUT_OF_MEM; + return -1; + } + if (!_mi_fetch_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf, 0)) + goto err1; + nod_flag = mi_test_if_nod(page_buf); + + k_len = keyinfo->keylength - info->s->base.rec_reflength; + + if(info->rtree_recursion_depth >= level) + { + k = page_buf + *saved_key; + } + else + { + k = rt_PAGE_FIRST_KEY(page_buf, nod_flag); + } + last = rt_PAGE_END(page_buf); + + for (; k < last; k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag)) + { + if (nod_flag) + { + /* this is an internal node in the tree */ + if (!(res = rtree_key_cmp(keyinfo->seg, info->first_mbr_key, k, + info->last_rkey_length, nod_cmp_flag))) + { + switch ((res = rtree_find_req(info, keyinfo, search_flag, nod_cmp_flag, + _mi_kpos(nod_flag, k), level + 1))) + { + case 0: /* found - exit from recursion */ + *saved_key = k - page_buf; + goto ok; + case 1: /* not found - continue searching */ + info->rtree_recursion_depth = level; + break; + default: /* error */ + case -1: + goto err1; + } + } + } + else + { + /* this is a leaf */ + if (!rtree_key_cmp(keyinfo->seg, info->first_mbr_key, k, + info->last_rkey_length, search_flag)) + { + uchar *after_key = rt_PAGE_NEXT_KEY(k, k_len, nod_flag); + info->lastpos = _mi_dpos(info, 0, after_key); + info->lastkey_length = k_len + info->s->base.rec_reflength; + memcpy(info->lastkey, k, info->lastkey_length); + info->rtree_recursion_depth = level; + *saved_key = last - page_buf; + + if (after_key < last) + { + info->int_keypos = info->buff; + info->int_maxpos = info->buff + (last - after_key); + memcpy(info->buff, after_key, last - after_key); + info->buff_used = 0; + } + else + { + info->buff_used = 1; + } + + res = 0; + goto ok; + } + } + } + info->lastpos = HA_OFFSET_ERROR; + my_errno = HA_ERR_KEY_NOT_FOUND; + res = 1; + +ok: + my_afree((byte*)page_buf); + return res; + +err1: + my_afree((byte*)page_buf); + info->lastpos = HA_OFFSET_ERROR; + return -1; +} + + +/* + Find first key in r-tree according to search_flag condition + + SYNOPSIS + rtree_find_first() + info Handler to MyISAM file + uint keynr Key number to use + key Key to search for + key_length Length of 'key' + search_flag Bitmap of flags how to do the search + + RETURN + -1 Error + 0 Found + 1 Not found +*/ + +int rtree_find_first(MI_INFO *info, uint keynr, uchar *key, uint key_length, + uint search_flag) +{ + my_off_t root; + uint nod_cmp_flag; + MI_KEYDEF *keyinfo = info->s->keyinfo + keynr; + + if ((root = info->s->state.key_root[keynr]) == HA_OFFSET_ERROR) + { + my_errno= HA_ERR_END_OF_FILE; + return -1; + } + + /* Save searched key */ + memcpy(info->first_mbr_key, key, keyinfo->keylength - + info->s->base.rec_reflength); + info->last_rkey_length = key_length; + + info->rtree_recursion_depth = -1; + info->buff_used = 1; + + nod_cmp_flag = ((search_flag & (MBR_EQUAL | MBR_WITHIN)) ? + MBR_WITHIN : MBR_INTERSECT); + return rtree_find_req(info, keyinfo, search_flag, nod_cmp_flag, root, 0); +} + + +/* + Find next key in r-tree according to search_flag condition + + SYNOPSIS + rtree_find_next() + info Handler to MyISAM file + uint keynr Key number to use + search_flag Bitmap of flags how to do the search + + RETURN + -1 Error + 0 Found + 1 Not found +*/ + +int rtree_find_next(MI_INFO *info, uint keynr, uint search_flag) +{ + my_off_t root; + uint nod_cmp_flag; + MI_KEYDEF *keyinfo = info->s->keyinfo + keynr; + + if (info->update & HA_STATE_DELETED) + return rtree_find_first(info, keynr, info->lastkey, info->lastkey_length, + search_flag); + + if (!info->buff_used) + { + uchar *key= info->int_keypos; + + while (key < info->int_maxpos) + { + if (!rtree_key_cmp(keyinfo->seg, info->first_mbr_key, key, + info->last_rkey_length, search_flag)) + { + uchar *after_key = key + keyinfo->keylength; + + info->lastpos= _mi_dpos(info, 0, after_key); + memcpy(info->lastkey, key, info->lastkey_length); + + if (after_key < info->int_maxpos) + info->int_keypos= after_key; + else + info->buff_used= 1; + return 0; + } + key+= keyinfo->keylength; + } + } + if ((root = info->s->state.key_root[keynr]) == HA_OFFSET_ERROR) + { + my_errno= HA_ERR_END_OF_FILE; + return -1; + } + + nod_cmp_flag = ((search_flag & (MBR_EQUAL | MBR_WITHIN)) ? + MBR_WITHIN : MBR_INTERSECT); + return rtree_find_req(info, keyinfo, search_flag, nod_cmp_flag, root, 0); +} + + +/* + Get next key in r-tree recursively + + NOTES + Used in rtree_get_first() and rtree_get_next() + + RETURN + -1 Error + 0 Found + 1 Not found +*/ + +static int rtree_get_req(MI_INFO *info, MI_KEYDEF *keyinfo, uint key_length, + my_off_t page, int level) +{ + uchar *k; + uchar *last; + uint nod_flag; + int res; + uchar *page_buf; + uint k_len; + uint *saved_key = (uint*) (info->rtree_recursion_state) + level; + + if (!(page_buf = (uchar*)my_alloca((uint)keyinfo->block_length))) + return -1; + if (!_mi_fetch_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf, 0)) + goto err1; + nod_flag = mi_test_if_nod(page_buf); + + k_len = keyinfo->keylength - info->s->base.rec_reflength; + + if(info->rtree_recursion_depth >= level) + { + k = page_buf + *saved_key; + if (!nod_flag) + { + /* Only leaf pages contain data references. */ + /* Need to check next key with data reference. */ + k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag); + } + } + else + { + k = rt_PAGE_FIRST_KEY(page_buf, nod_flag); + } + last = rt_PAGE_END(page_buf); + + for (; k < last; k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag)) + { + if (nod_flag) + { + /* this is an internal node in the tree */ + switch ((res = rtree_get_req(info, keyinfo, key_length, + _mi_kpos(nod_flag, k), level + 1))) + { + case 0: /* found - exit from recursion */ + *saved_key = k - page_buf; + goto ok; + case 1: /* not found - continue searching */ + info->rtree_recursion_depth = level; + break; + default: + case -1: /* error */ + goto err1; + } + } + else + { + /* this is a leaf */ + uchar *after_key = rt_PAGE_NEXT_KEY(k, k_len, nod_flag); + info->lastpos = _mi_dpos(info, 0, after_key); + info->lastkey_length = k_len + info->s->base.rec_reflength; + memcpy(info->lastkey, k, info->lastkey_length); + + info->rtree_recursion_depth = level; + *saved_key = k - page_buf; + + if (after_key < last) + { + info->int_keypos = (uchar*)saved_key; + memcpy(info->buff, page_buf, keyinfo->block_length); + info->int_maxpos = rt_PAGE_END(info->buff); + info->buff_used = 0; + } + else + { + info->buff_used = 1; + } + + res = 0; + goto ok; + } + } + info->lastpos = HA_OFFSET_ERROR; + my_errno = HA_ERR_KEY_NOT_FOUND; + res = 1; + +ok: + my_afree((byte*)page_buf); + return res; + +err1: + my_afree((byte*)page_buf); + info->lastpos = HA_OFFSET_ERROR; + return -1; +} + + +/* + Get first key in r-tree + + RETURN + -1 Error + 0 Found + 1 Not found +*/ + +int rtree_get_first(MI_INFO *info, uint keynr, uint key_length) +{ + my_off_t root; + MI_KEYDEF *keyinfo = info->s->keyinfo + keynr; + + if ((root = info->s->state.key_root[keynr]) == HA_OFFSET_ERROR) + { + my_errno= HA_ERR_END_OF_FILE; + return -1; + } + + info->rtree_recursion_depth = -1; + info->buff_used = 1; + + return rtree_get_req(info, &keyinfo[keynr], key_length, root, 0); +} + + +/* + Get next key in r-tree + + RETURN + -1 Error + 0 Found + 1 Not found +*/ + +int rtree_get_next(MI_INFO *info, uint keynr, uint key_length) +{ + my_off_t root; + MI_KEYDEF *keyinfo = info->s->keyinfo + keynr; + + if (!info->buff_used) + { + uint k_len = keyinfo->keylength - info->s->base.rec_reflength; + /* rt_PAGE_NEXT_KEY(info->int_keypos) */ + uchar *key = info->buff + *(int*)info->int_keypos + k_len + + info->s->base.rec_reflength; + /* rt_PAGE_NEXT_KEY(key) */ + uchar *after_key = key + k_len + info->s->base.rec_reflength; + + info->lastpos = _mi_dpos(info, 0, after_key); + info->lastkey_length = k_len + info->s->base.rec_reflength; + memcpy(info->lastkey, key, k_len + info->s->base.rec_reflength); + + *(int*)info->int_keypos = key - info->buff; + if (after_key >= info->int_maxpos) + { + info->buff_used = 1; + } + + return 0; + } + else + { + if ((root = info->s->state.key_root[keynr]) == HA_OFFSET_ERROR) + { + my_errno= HA_ERR_END_OF_FILE; + return -1; + } + + return rtree_get_req(info, &keyinfo[keynr], key_length, root, 0); + } +} + + +/* + Go down and insert key into tree + + RETURN + -1 Error + 0 Child was not split + 1 Child was split +*/ + +static int rtree_insert_req(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *key, + uint key_length, my_off_t page, my_off_t *new_page, + int ins_level, int level) +{ + uchar *k; + uint nod_flag; + uchar *page_buf; + int res; + + if (!(page_buf = (uchar*)my_alloca((uint)keyinfo->block_length + + MI_MAX_KEY_BUFF))) + { + my_errno = HA_ERR_OUT_OF_MEM; + return -1; + } + if (!_mi_fetch_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf, 0)) + goto err1; + nod_flag = mi_test_if_nod(page_buf); + + if ((ins_level == -1 && nod_flag) || /* key: go down to leaf */ + (ins_level > -1 && ins_level > level)) /* branch: go down to ins_level */ + { + if ((k = rtree_choose_key(info, keyinfo, key, key_length, page_buf, + nod_flag)) == NULL) + goto err1; + switch ((res = rtree_insert_req(info, keyinfo, key, key_length, + _mi_kpos(nod_flag, k), new_page, ins_level, level + 1))) + { + case 0: /* child was not split */ + { + rtree_combine_rect(keyinfo->seg, k, key, k, key_length); + if (_mi_write_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf)) + goto err1; + goto ok; + } + case 1: /* child was split */ + { + uchar *new_key = page_buf + keyinfo->block_length + nod_flag; + /* set proper MBR for key */ + if (rtree_set_key_mbr(info, keyinfo, k, key_length, + _mi_kpos(nod_flag, k))) + goto err1; + /* add new key for new page */ + _mi_kpointer(info, new_key - nod_flag, *new_page); + if (rtree_set_key_mbr(info, keyinfo, new_key, key_length, *new_page)) + goto err1; + res = rtree_add_key(info, keyinfo, new_key, key_length, + page_buf, new_page); + if (_mi_write_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf)) + goto err1; + goto ok; + } + default: + case -1: /* error */ + { + goto err1; + } + } + } + else + { + res = rtree_add_key(info, keyinfo, key, key_length, page_buf, new_page); + if (_mi_write_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf)) + goto err1; + goto ok; + } + +ok: + my_afree((byte*)page_buf); + return res; + +err1: + my_afree((byte*)page_buf); + return -1; +} + + +/* + Insert key into the tree + + RETURN + -1 Error + 0 Root was not split + 1 Root was split +*/ + +static int rtree_insert_level(MI_INFO *info, uint keynr, uchar *key, + uint key_length, int ins_level) +{ + my_off_t old_root; + MI_KEYDEF *keyinfo = info->s->keyinfo + keynr; + int res; + my_off_t new_page; + + if ((old_root = info->s->state.key_root[keynr]) == HA_OFFSET_ERROR) + { + int res; + + if ((old_root = _mi_new(info, keyinfo, DFLT_INIT_HITS)) == HA_OFFSET_ERROR) + return -1; + info->buff_used = 1; + mi_putint(info->buff, 2, 0); + res = rtree_add_key(info, keyinfo, key, key_length, info->buff, NULL); + if (_mi_write_keypage(info, keyinfo, old_root, DFLT_INIT_HITS, info->buff)) + return 1; + info->s->state.key_root[keynr] = old_root; + return res; + } + + switch ((res = rtree_insert_req(info, keyinfo, key, key_length, + old_root, &new_page, ins_level, 0))) + { + case 0: /* root was not split */ + { + break; + } + case 1: /* root was split, grow a new root */ + { + uchar *new_root_buf; + my_off_t new_root; + uchar *new_key; + uint nod_flag = info->s->base.key_reflength; + + if (!(new_root_buf = (uchar*)my_alloca((uint)keyinfo->block_length + + MI_MAX_KEY_BUFF))) + { + my_errno = HA_ERR_OUT_OF_MEM; + return -1; + } + + mi_putint(new_root_buf, 2, nod_flag); + if ((new_root = _mi_new(info, keyinfo, DFLT_INIT_HITS)) == + HA_OFFSET_ERROR) + goto err1; + + new_key = new_root_buf + keyinfo->block_length + nod_flag; + + _mi_kpointer(info, new_key - nod_flag, old_root); + if (rtree_set_key_mbr(info, keyinfo, new_key, key_length, old_root)) + goto err1; + if (rtree_add_key(info, keyinfo, new_key, key_length, new_root_buf, NULL) + == -1) + goto err1; + _mi_kpointer(info, new_key - nod_flag, new_page); + if (rtree_set_key_mbr(info, keyinfo, new_key, key_length, new_page)) + goto err1; + if (rtree_add_key(info, keyinfo, new_key, key_length, new_root_buf, NULL) + == -1) + goto err1; + if (_mi_write_keypage(info, keyinfo, new_root, + DFLT_INIT_HITS, new_root_buf)) + goto err1; + info->s->state.key_root[keynr] = new_root; + + my_afree((byte*)new_root_buf); + break; +err1: + my_afree((byte*)new_root_buf); + return -1; + } + default: + case -1: /* error */ + { + break; + } + } + return res; +} + + +/* + Insert key into the tree - interface function + + RETURN + -1 Error + 0 OK +*/ + +int rtree_insert(MI_INFO *info, uint keynr, uchar *key, uint key_length) +{ + return (rtree_insert_level(info, keynr, key, key_length, -1) == -1) ? -1 : 0; +} + + +/* + Fill reinsert page buffer + + RETURN + -1 Error + 0 OK +*/ + +static int rtree_fill_reinsert_list(stPageList *ReinsertList, my_off_t page, + int level) +{ + if (ReinsertList->n_pages == ReinsertList->m_pages) + { + ReinsertList->m_pages += REINSERT_BUFFER_INC; + if (!(ReinsertList->pages = (stPageLevel*)my_realloc((gptr)ReinsertList->pages, + ReinsertList->m_pages * sizeof(stPageLevel), MYF(MY_ALLOW_ZERO_PTR)))) + goto err1; + } + /* save page to ReinsertList */ + ReinsertList->pages[ReinsertList->n_pages].offs = page; + ReinsertList->pages[ReinsertList->n_pages].level = level; + ReinsertList->n_pages++; + return 0; + +err1: + return -1; +} + + +/* + Go down and delete key from the tree + + RETURN + -1 Error + 0 Deleted + 1 Not found + 2 Empty leaf +*/ + +static int rtree_delete_req(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *key, + uint key_length, my_off_t page, uint *page_size, + stPageList *ReinsertList, int level) +{ + uchar *k; + uchar *last; + ulong i; + uint nod_flag; + uchar *page_buf; + int res; + + if (!(page_buf = (uchar*)my_alloca((uint)keyinfo->block_length))) + { + my_errno = HA_ERR_OUT_OF_MEM; + return -1; + } + if (!_mi_fetch_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf, 0)) + goto err1; + nod_flag = mi_test_if_nod(page_buf); + + k = rt_PAGE_FIRST_KEY(page_buf, nod_flag); + last = rt_PAGE_END(page_buf); + + for (i = 0; k < last; k = rt_PAGE_NEXT_KEY(k, key_length, nod_flag), ++i) + { + if (nod_flag) + { + /* not leaf */ + if (!rtree_key_cmp(keyinfo->seg, key, k, key_length, MBR_WITHIN)) + { + switch ((res = rtree_delete_req(info, keyinfo, key, key_length, + _mi_kpos(nod_flag, k), page_size, ReinsertList, level + 1))) + { + case 0: /* deleted */ + { + /* test page filling */ + if (*page_size + key_length >= rt_PAGE_MIN_SIZE(keyinfo->block_length)) + { + /* OK */ + if (rtree_set_key_mbr(info, keyinfo, k, key_length, + _mi_kpos(nod_flag, k))) + goto err1; + if (_mi_write_keypage(info, keyinfo, page, + DFLT_INIT_HITS, page_buf)) + goto err1; + } + else + { + /* too small: delete key & add it descendant to reinsert list */ + if (rtree_fill_reinsert_list(ReinsertList, _mi_kpos(nod_flag, k), + level + 1)) + goto err1; + rtree_delete_key(info, page_buf, k, key_length, nod_flag); + if (_mi_write_keypage(info, keyinfo, page, + DFLT_INIT_HITS, page_buf)) + goto err1; + *page_size = mi_getint(page_buf); + } + + goto ok; + } + case 1: /* not found - continue searching */ + { + break; + } + case 2: /* vacuous case: last key in the leaf */ + { + rtree_delete_key(info, page_buf, k, key_length, nod_flag); + if (_mi_write_keypage(info, keyinfo, page, + DFLT_INIT_HITS, page_buf)) + goto err1; + *page_size = mi_getint(page_buf); + res = 0; + goto ok; + } + default: /* error */ + case -1: + { + goto err1; + } + } + } + } + else + { + /* leaf */ + if (!rtree_key_cmp(keyinfo->seg, key, k, key_length, MBR_EQUAL | MBR_DATA)) + { + rtree_delete_key(info, page_buf, k, key_length, nod_flag); + *page_size = mi_getint(page_buf); + if (*page_size == 2) + { + /* last key in the leaf */ + res = 2; + if (_mi_dispose(info, keyinfo, page, DFLT_INIT_HITS)) + goto err1; + } + else + { + res = 0; + if (_mi_write_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf)) + goto err1; + } + goto ok; + } + } + } + res = 1; + +ok: + my_afree((byte*)page_buf); + return res; + +err1: + my_afree((byte*)page_buf); + return -1; +} + + +/* + Delete key - interface function + + RETURN + -1 Error + 0 Deleted +*/ + +int rtree_delete(MI_INFO *info, uint keynr, uchar *key, uint key_length) +{ + uint page_size; + stPageList ReinsertList; + my_off_t old_root; + MI_KEYDEF *keyinfo = info->s->keyinfo + keynr; + + if ((old_root = info->s->state.key_root[keynr]) == HA_OFFSET_ERROR) + { + my_errno= HA_ERR_END_OF_FILE; + return -1; + } + + ReinsertList.pages = NULL; + ReinsertList.n_pages = 0; + ReinsertList.m_pages = 0; + + switch (rtree_delete_req(info, keyinfo, key, key_length, old_root, + &page_size, &ReinsertList, 0)) + { + case 2: + { + info->s->state.key_root[keynr] = HA_OFFSET_ERROR; + return 0; + } + case 0: + { + uint nod_flag; + ulong i; + for (i = 0; i < ReinsertList.n_pages; ++i) + { + uchar *page_buf; + uint nod_flag; + uchar *k; + uchar *last; + + if (!(page_buf = (uchar*)my_alloca((uint)keyinfo->block_length))) + { + my_errno = HA_ERR_OUT_OF_MEM; + goto err1; + } + if (!_mi_fetch_keypage(info, keyinfo, ReinsertList.pages[i].offs, + DFLT_INIT_HITS, page_buf, 0)) + goto err1; + nod_flag = mi_test_if_nod(page_buf); + k = rt_PAGE_FIRST_KEY(page_buf, nod_flag); + last = rt_PAGE_END(page_buf); + for (; k < last; k = rt_PAGE_NEXT_KEY(k, key_length, nod_flag)) + { + if (rtree_insert_level(info, keynr, k, key_length, + ReinsertList.pages[i].level) == -1) + { + my_afree((byte*)page_buf); + goto err1; + } + } + my_afree((byte*)page_buf); + if (_mi_dispose(info, keyinfo, ReinsertList.pages[i].offs, + DFLT_INIT_HITS)) + goto err1; + } + if (ReinsertList.pages) + my_free((byte*) ReinsertList.pages, MYF(0)); + + /* check for redundant root (not leaf, 1 child) and eliminate */ + if ((old_root = info->s->state.key_root[keynr]) == HA_OFFSET_ERROR) + goto err1; + if (!_mi_fetch_keypage(info, keyinfo, old_root, DFLT_INIT_HITS, + info->buff, 0)) + goto err1; + nod_flag = mi_test_if_nod(info->buff); + page_size = mi_getint(info->buff); + if (nod_flag && (page_size == 2 + key_length + + (nod_flag ? nod_flag : info->s->base.rec_reflength))) + { + my_off_t new_root = _mi_kpos(nod_flag, + rt_PAGE_FIRST_KEY(info->buff, nod_flag)); + if (_mi_dispose(info, keyinfo, old_root, DFLT_INIT_HITS)) + goto err1; + info->s->state.key_root[keynr] = new_root; + } + info->update= HA_STATE_DELETED; + return 0; + +err1: + return -1; + } + case 1: /* not found */ + { + my_errno = HA_ERR_KEY_NOT_FOUND; + return -1; + } + default: + case -1: /* error */ + { + return -1; + } + } +} + + +/* + Estimate number of suitable keys in the tree + + RETURN + estimated value +*/ + +ha_rows rtree_estimate(MI_INFO *info, uint keynr, uchar *key, + uint key_length, uint flag) +{ + MI_KEYDEF *keyinfo = info->s->keyinfo + keynr; + my_off_t root; + uint i = 0; + uchar *k; + uchar *last; + uint nod_flag; + uchar *page_buf; + uint k_len; + double area = 0; + ha_rows res = 0; + + if (flag & MBR_DISJOINT) + return info->state->records; + + if ((root = info->s->state.key_root[keynr]) == HA_OFFSET_ERROR) + return HA_POS_ERROR; + if (!(page_buf = (uchar*)my_alloca((uint)keyinfo->block_length))) + return HA_POS_ERROR; + if (!_mi_fetch_keypage(info, keyinfo, root, DFLT_INIT_HITS, page_buf, 0)) + goto err1; + nod_flag = mi_test_if_nod(page_buf); + + k_len = keyinfo->keylength - info->s->base.rec_reflength; + + k = rt_PAGE_FIRST_KEY(page_buf, nod_flag); + last = rt_PAGE_END(page_buf); + + for (; k < last; k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag), ++i) + { + if (nod_flag) + { + double k_area = rtree_rect_volume(keyinfo->seg, k, key_length); + + if (k_area == 0) + { + if (flag & (MBR_CONTAIN | MBR_INTERSECT)) + { + area += 1; + } + else if (flag & (MBR_WITHIN | MBR_EQUAL)) + { + if (!rtree_key_cmp(keyinfo->seg, key, k, key_length, MBR_WITHIN)) + area += 1; + } + else + goto err1; + } + else + { + if (flag & (MBR_CONTAIN | MBR_INTERSECT)) + { + area += rtree_overlapping_area(keyinfo->seg, key, k, key_length) / + k_area; + } + else if (flag & (MBR_WITHIN | MBR_EQUAL)) + { + if (!rtree_key_cmp(keyinfo->seg, key, k, key_length, MBR_WITHIN)) + area += rtree_rect_volume(keyinfo->seg, key, key_length) / + k_area; + } + else + goto err1; + } + } + else + { + if (!rtree_key_cmp(keyinfo->seg, key, k, key_length, flag)) + ++res; + } + } + if (nod_flag) + { + if (i) + res = (ha_rows) (area / i * info->state->records); + else + res = HA_POS_ERROR; + } + + my_afree((byte*)page_buf); + return res; + +err1: + my_afree((byte*)page_buf); + return HA_POS_ERROR; +} diff --git a/myisam/rt_index.h b/myisam/rt_index.h new file mode 100644 index 00000000000..1a0fce72a82 --- /dev/null +++ b/myisam/rt_index.h @@ -0,0 +1,44 @@ +/* Copyright (C) 2000 MySQL AB & Ramil Kalimullin & MySQL Finland AB + & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef _rt_index_h +#define _rt_index_h + +#define rt_PAGE_FIRST_KEY(page, nod_flag) (page + 2 + nod_flag) +#define rt_PAGE_NEXT_KEY(key, key_length, nod_flag) (key + key_length + \ + (nod_flag ? nod_flag : info->s->base.rec_reflength)) +#define rt_PAGE_END(page) (page + mi_getint(page)) + +#define rt_PAGE_MIN_SIZE(block_length) ((uint)(block_length) / 2) + +int rtree_insert(MI_INFO *info, uint keynr, uchar *key, uint key_length); +int rtree_delete(MI_INFO *info, uint keynr, uchar *key, uint key_length); + +int rtree_find_first(MI_INFO *info, uint keynr, uchar *key, uint key_length, + uint search_flag); +int rtree_find_next(MI_INFO *info, uint keynr, uint search_flag); + +int rtree_get_first(MI_INFO *info, uint keynr, uint key_length); +int rtree_get_next(MI_INFO *info, uint keynr, uint key_length); + +ha_rows rtree_estimate(MI_INFO *info, uint keynr, uchar *key, + uint key_length, uint flag); + +int rtree_split_page(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *page, uchar *key, + uint key_length, my_off_t *new_page_offs); + +#endif /* _rt_index_h */ diff --git a/myisam/rt_key.c b/myisam/rt_key.c new file mode 100644 index 00000000000..f18d13af8d8 --- /dev/null +++ b/myisam/rt_key.c @@ -0,0 +1,139 @@ +/* Copyright (C) 2000 MySQL AB & Ramil Kalimullin + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "myisamdef.h" + +#include "rt_index.h" +#include "rt_key.h" +#include "rt_mbr.h" + +/* + Add key to the page + + RESULT VALUES + -1 Error + 0 Not split + 1 Split +*/ + +int rtree_add_key(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *key, + uint key_length, uchar *page_buf, my_off_t *new_page) +{ + uint page_size = mi_getint(page_buf); + uint nod_flag = mi_test_if_nod(page_buf); + + if (page_size + key_length + nod_flag <= keyinfo->block_length) + { + /* split won't be necessary */ + if (nod_flag) + { + /* save key */ + memcpy(rt_PAGE_END(page_buf), key - nod_flag, key_length + nod_flag); + page_size += key_length + nod_flag; + } + else + { + /* save key */ + memcpy(rt_PAGE_END(page_buf), key, key_length + + info->s->base.rec_reflength); + page_size += key_length + info->s->base.rec_reflength; + } + mi_putint(page_buf, page_size, nod_flag); + return 0; + } + + return (rtree_split_page(info, keyinfo, page_buf, key, key_length, + new_page) ? -1 : 1); +} + +/* + Delete key from the page +*/ +int rtree_delete_key(MI_INFO *info, uchar *page_buf, uchar *key, + uint key_length, uint nod_flag) +{ + uint16 page_size = mi_getint(page_buf); + uchar *key_start; + + key_start= key - nod_flag; + if (!nod_flag) + key_length += info->s->base.rec_reflength; + + memmove(key_start, key + key_length, page_size - key_length - + (key - page_buf)); + page_size-= key_length + nod_flag; + + mi_putint(page_buf, page_size, nod_flag); + return 0; +} + + +/* + Calculate and store key MBR +*/ + +int rtree_set_key_mbr(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *key, + uint key_length, my_off_t child_page) +{ + if (!_mi_fetch_keypage(info, keyinfo, child_page, + DFLT_INIT_HITS, info->buff, 0)) + return -1; + + return rtree_page_mbr(info, keyinfo->seg, info->buff, key, key_length); +} + + +/* + Choose non-leaf better key for insertion +*/ + +uchar *rtree_choose_key(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *key, + uint key_length, uchar *page_buf, uint nod_flag) +{ + double increase; + double best_incr = DBL_MAX; + double area; + double best_area; + uchar *best_key; + uchar *k = rt_PAGE_FIRST_KEY(page_buf, nod_flag); + uchar *last = rt_PAGE_END(page_buf); + + LINT_INIT(best_area); + LINT_INIT(best_key); + + for (; k < last; k = rt_PAGE_NEXT_KEY(k, key_length, nod_flag)) + { + if ((increase = rtree_area_increase(keyinfo->seg, key, k, key_length, + &area)) == -1) + return NULL; + if (increase < best_incr) + { + best_key = k; + best_area = area; + best_incr = increase; + } + else + { + if ((increase == best_incr) && (area < best_area)) + { + best_key = k; + best_area = area; + best_incr = increase; + } + } + } + return best_key; +} diff --git a/myisam/rt_key.h b/myisam/rt_key.h new file mode 100644 index 00000000000..dfd7b874b54 --- /dev/null +++ b/myisam/rt_key.h @@ -0,0 +1,31 @@ +/* Copyright (C) 2000 MySQL AB & Ramil Kalimullin & MySQL Finland AB + & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Ramil Kalimullin, who has a shared copyright to this code */ + +#ifndef _rt_key_h +#define _rt_key_h + +int rtree_add_key(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *key, + uint key_length, uchar *page_buf, my_off_t *new_page); +int rtree_delete_key(MI_INFO *info, uchar *page, uchar *key, + uint key_length, uint nod_flag); +int rtree_set_key_mbr(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *key, + uint key_length, my_off_t child_page); +uchar *rtree_choose_key(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *key, + uint key_length, uchar *page_buf, uint nod_flag); +#endif /* _rt_key_h */ diff --git a/myisam/rt_mbr.c b/myisam/rt_mbr.c new file mode 100644 index 00000000000..bb13c0769b3 --- /dev/null +++ b/myisam/rt_mbr.c @@ -0,0 +1,759 @@ +/* Copyright (C) 2000 MySQL AB & Ramil Kalimullin & MySQL Finland AB + & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "myisamdef.h" + +#include "rt_index.h" +#include "rt_mbr.h" + +#define INTERSECT_CMP(amin, amax, bmin, bmax) ((amin > bmax) || (bmin > amax)) +#define CONTAIN_CMP(amin, amax, bmin, bmax) ((bmin > amin) || (bmax < amax)) +#define WITHIN_CMP(amin, amax, bmin, bmax) ((amin > bmin) || (amax < bmax)) +#define DISJOINT_CMP(amin, amax, bmin, bmax) ((amin <= bmax) && (bmin <= amax)) +#define EQUAL_CMP(amix, amax, bmin, bmax) ((amix != bmin) || (amax != bmax)) + +#define FCMP(A, B) ((int)(A) - (int)(B)) +#define p_inc(A, B, X) {A += X; B += X;} + +#define RT_CMP(nextflag) \ + if (nextflag & MBR_INTERSECT) \ + { \ + if (INTERSECT_CMP(amin, amax, bmin, bmax)) \ + return 1; \ + } \ + else if (nextflag & MBR_CONTAIN) \ + { \ + if (CONTAIN_CMP(amin, amax, bmin, bmax)) \ + return 1; \ + } \ + else if (nextflag & MBR_WITHIN) \ + { \ + if (WITHIN_CMP(amin, amax, bmin, bmax)) \ + return 1; \ + } \ + else if (nextflag & MBR_EQUAL) \ + { \ + if (EQUAL_CMP(amin, amax, bmin, bmax)) \ + return 1; \ + } \ + else /* if (nextflag & MBR_DISJOINT) */ \ + { \ + if (DISJOINT_CMP(amin, amax, bmin, bmax)) \ + return 1; \ + } + +#define RT_CMP_KORR(type, korr_func, len, nextflag) \ +{ \ + type amin, amax, bmin, bmax; \ + amin = korr_func(a); \ + bmin = korr_func(b); \ + p_inc(a, b, len); \ + amax = korr_func(a); \ + bmax = korr_func(b); \ + RT_CMP(nextflag); \ + p_inc(a, b, len); \ + break; \ +} + +#define RT_CMP_GET(type, get_func, len, nextflag) \ +{ \ + type amin, amax, bmin, bmax; \ + get_func(amin, a); \ + get_func(bmin, b); \ + p_inc(a, b, len); \ + get_func(amax, a); \ + get_func(bmax, b); \ + RT_CMP(nextflag); \ + p_inc(a, b, len); \ + break; \ +} + +/* + Compares two keys a and b depending on nextflag + nextflag can contain these flags: + MBR_INTERSECT(a,b) a overlaps b + MBR_CONTAIN(a,b) a contains b + MBR_DISJOINT(a,b) a disjoint b + MBR_WITHIN(a,b) a within b + MBR_EQUAL(a,b) All coordinates of MBRs are equal + MBR_DATA(a,b) Data reference is the same + Returns 0 on success. +*/ +int rtree_key_cmp(HA_KEYSEG *keyseg, uchar *b, uchar *a, uint key_length, + uint nextflag) +{ + for (; (int) key_length > 0; keyseg += 2 ) + { + key_length -= keyseg->length * 2; + + switch ((enum ha_base_keytype) keyseg->type) { + case HA_KEYTYPE_TEXT: + case HA_KEYTYPE_BINARY: + case HA_KEYTYPE_VARTEXT: + case HA_KEYTYPE_VARBINARY: + case HA_KEYTYPE_NUM: + default: + return 1; + break; + case HA_KEYTYPE_INT8: + { + int amin,amax,bmin,bmax; + amin = (int)*((signed char *)a); + bmin = (int)*((signed char *)b); + p_inc(a, b, 1); + amax = (int)*((signed char *)a); + bmax = (int)*((signed char *)b); + RT_CMP(nextflag); + p_inc(a, b, 1); + break; + } + case HA_KEYTYPE_SHORT_INT: + RT_CMP_KORR(int16, mi_sint2korr, 2, nextflag); + case HA_KEYTYPE_USHORT_INT: + RT_CMP_KORR(uint16, mi_uint2korr, 2, nextflag); + case HA_KEYTYPE_INT24: + RT_CMP_KORR(int32, mi_sint3korr, 3, nextflag); + case HA_KEYTYPE_UINT24: + RT_CMP_KORR(uint32, mi_uint3korr, 3, nextflag); + case HA_KEYTYPE_LONG_INT: + RT_CMP_KORR(int32, mi_sint4korr, 4, nextflag); + case HA_KEYTYPE_ULONG_INT: + RT_CMP_KORR(uint32, mi_uint4korr, 4, nextflag); +#ifdef HAVE_LONG_LONG + case HA_KEYTYPE_LONGLONG: + RT_CMP_KORR(longlong, mi_sint8korr, 8, nextflag) + case HA_KEYTYPE_ULONGLONG: + RT_CMP_KORR(ulonglong, mi_uint8korr, 8, nextflag) +#endif + case HA_KEYTYPE_FLOAT: + RT_CMP_GET(float, mi_float4get, 4, nextflag); + case HA_KEYTYPE_DOUBLE: + RT_CMP_GET(double, mi_float8get, 8, nextflag); + case HA_KEYTYPE_END: + goto end; + } + } + +end: + if (nextflag & MBR_DATA) + { + uchar *end = a + keyseg->length; + do + { + if (*a++ != *b++) + return FCMP(a[-1], b[-1]); + } while (a != end); + } + return 0; +} + +#define RT_VOL_KORR(type, korr_func, len, cast) \ +{ \ + type amin, amax; \ + amin = korr_func(a); \ + a += len; \ + amax = korr_func(a); \ + a += len; \ + res *= (cast(amax) - cast(amin)); \ + break; \ +} + +#define RT_VOL_GET(type, get_func, len, cast) \ +{ \ + type amin, amax; \ + get_func(amin, a); \ + a += len; \ + get_func(amax, a); \ + a += len; \ + res *= (cast(amax) - cast(amin)); \ + break; \ +} + +/* + Calculates rectangle volume +*/ +double rtree_rect_volume(HA_KEYSEG *keyseg, uchar *a, uint key_length) +{ + double res = 1; + for (; (int)key_length > 0; keyseg += 2) + { + key_length -= keyseg->length * 2; + + switch ((enum ha_base_keytype) keyseg->type) { + case HA_KEYTYPE_TEXT: + case HA_KEYTYPE_BINARY: + case HA_KEYTYPE_VARTEXT: + case HA_KEYTYPE_VARBINARY: + case HA_KEYTYPE_NUM: + default: + return 1; + break; + case HA_KEYTYPE_INT8: + { + int amin, amax; + amin = (int)*((signed char *)a); + a += 1; + amax = (int)*((signed char *)a); + a += 1; + res *= ((double)amax - (double)amin); + break; + } + case HA_KEYTYPE_SHORT_INT: + RT_VOL_KORR(int16, mi_sint2korr, 2, (double)); + case HA_KEYTYPE_USHORT_INT: + RT_VOL_KORR(uint16, mi_uint2korr, 2, (double)); + case HA_KEYTYPE_INT24: + RT_VOL_KORR(int32, mi_sint3korr, 3, (double)); + case HA_KEYTYPE_UINT24: + RT_VOL_KORR(uint32, mi_uint3korr, 3, (double)); + case HA_KEYTYPE_LONG_INT: + RT_VOL_KORR(int32, mi_sint4korr, 4, (double)); + case HA_KEYTYPE_ULONG_INT: + RT_VOL_KORR(uint32, mi_uint4korr, 4, (double)); +#ifdef HAVE_LONG_LONG + case HA_KEYTYPE_LONGLONG: + RT_VOL_KORR(longlong, mi_sint8korr, 8, (double)); + case HA_KEYTYPE_ULONGLONG: + RT_VOL_KORR(longlong, mi_sint8korr, 8, ulonglong2double); +#endif + case HA_KEYTYPE_FLOAT: + RT_VOL_GET(float, mi_float4get, 4, (double)); + case HA_KEYTYPE_DOUBLE: + RT_VOL_GET(double, mi_float8get, 8, (double)); + case HA_KEYTYPE_END: + key_length = 0; + break; + } + } + return res; +} + +#define RT_D_MBR_KORR(type, korr_func, len, cast) \ +{ \ + type amin, amax; \ + amin = korr_func(a); \ + a += len; \ + amax = korr_func(a); \ + a += len; \ + *res++ = cast(amin); \ + *res++ = cast(amax); \ + break; \ +} + +#define RT_D_MBR_GET(type, get_func, len, cast) \ +{ \ + type amin, amax; \ + get_func(amin, a); \ + a += len; \ + get_func(amax, a); \ + a += len; \ + *res++ = cast(amin); \ + *res++ = cast(amax); \ + break; \ +} + +/* + Creates an MBR as an array of doubles. +*/ +int rtree_d_mbr(HA_KEYSEG *keyseg, uchar *a, uint key_length, double *res) +{ + for (; (int)key_length > 0; keyseg += 2) + { + key_length -= keyseg->length * 2; + + switch ((enum ha_base_keytype) keyseg->type) { + case HA_KEYTYPE_TEXT: + case HA_KEYTYPE_BINARY: + case HA_KEYTYPE_VARTEXT: + case HA_KEYTYPE_VARBINARY: + case HA_KEYTYPE_NUM: + default: + return 1; + break; + case HA_KEYTYPE_INT8: + { + int amin, amax; + amin = (int)*((signed char *)a); + a += 1; + amax = (int)*((signed char *)a); + a += 1; + *res++ = (double)amin; + *res++ = (double)amax; + break; + } + case HA_KEYTYPE_SHORT_INT: + RT_D_MBR_KORR(int16, mi_sint2korr, 2, (double)); + case HA_KEYTYPE_USHORT_INT: + RT_D_MBR_KORR(uint16, mi_uint2korr, 2, (double)); + case HA_KEYTYPE_INT24: + RT_D_MBR_KORR(int32, mi_sint3korr, 3, (double)); + case HA_KEYTYPE_UINT24: + RT_D_MBR_KORR(uint32, mi_uint3korr, 3, (double)); + case HA_KEYTYPE_LONG_INT: + RT_D_MBR_KORR(int32, mi_sint4korr, 4, (double)); + case HA_KEYTYPE_ULONG_INT: + RT_D_MBR_KORR(uint32, mi_uint4korr, 4, (double)); +#ifdef HAVE_LONG_LONG + case HA_KEYTYPE_LONGLONG: + RT_D_MBR_KORR(longlong, mi_sint8korr, 8, (double)); + case HA_KEYTYPE_ULONGLONG: + RT_D_MBR_KORR(longlong, mi_sint8korr, 8, ulonglong2double); +#endif + case HA_KEYTYPE_FLOAT: + RT_D_MBR_GET(float, mi_float4get, 4, (double)); + case HA_KEYTYPE_DOUBLE: + RT_D_MBR_GET(double, mi_float8get, 8, (double)); + case HA_KEYTYPE_END: + key_length = 0; + break; + } + } + return 0; +} + +#define RT_COMB_KORR(type, korr_func, store_func, len) \ +{ \ + type amin, amax, bmin, bmax; \ + amin = korr_func(a); \ + bmin = korr_func(b); \ + p_inc(a, b, len); \ + amax = korr_func(a); \ + bmax = korr_func(b); \ + p_inc(a, b, len); \ + amin = min(amin, bmin); \ + amax = max(amax, bmax); \ + store_func(c, amin); \ + c += len; \ + store_func(c, amax); \ + c += len; \ + break; \ +} + +#define RT_COMB_GET(type, get_func, store_func, len) \ +{ \ + type amin, amax, bmin, bmax; \ + get_func(amin, a); \ + get_func(bmin, b); \ + p_inc(a, b, len); \ + get_func(amax, a); \ + get_func(bmax, b); \ + p_inc(a, b, len); \ + amin = min(amin, bmin); \ + amax = max(amax, bmax); \ + store_func(c, amin); \ + c += len; \ + store_func(c, amax); \ + c += len; \ + break; \ +} + +/* + Creates common minimal bounding rectungle + for two input rectagnles a and b + Result is written to c +*/ + +int rtree_combine_rect(HA_KEYSEG *keyseg, uchar* a, uchar* b, uchar* c, + uint key_length) +{ + + for ( ; (int) key_length > 0 ; keyseg += 2) + { + key_length -= keyseg->length * 2; + + switch ((enum ha_base_keytype) keyseg->type) { + case HA_KEYTYPE_TEXT: + case HA_KEYTYPE_BINARY: + case HA_KEYTYPE_VARTEXT: + case HA_KEYTYPE_VARBINARY: + case HA_KEYTYPE_NUM: + default: + return 1; + break; + case HA_KEYTYPE_INT8: + { + int amin, amax, bmin, bmax; + amin = (int)*((signed char *)a); + bmin = (int)*((signed char *)b); + p_inc(a, b, 1); + amax = (int)*((signed char *)a); + bmax = (int)*((signed char *)b); + p_inc(a, b, 1); + amin = min(amin, bmin); + amax = max(amax, bmax); + *((signed char*)c) = amin; + c += 1; + *((signed char*)c) = amax; + c += 1; + break; + } + case HA_KEYTYPE_SHORT_INT: + RT_COMB_KORR(int16, mi_sint2korr, mi_int2store, 2); + case HA_KEYTYPE_USHORT_INT: + RT_COMB_KORR(uint16, mi_uint2korr, mi_int2store, 2); + case HA_KEYTYPE_INT24: + RT_COMB_KORR(int32, mi_sint3korr, mi_int3store, 3); + case HA_KEYTYPE_UINT24: + RT_COMB_KORR(uint32, mi_uint3korr, mi_int3store, 3); + case HA_KEYTYPE_LONG_INT: + RT_COMB_KORR(int32, mi_sint4korr, mi_int4store, 4); + case HA_KEYTYPE_ULONG_INT: + RT_COMB_KORR(uint32, mi_uint4korr, mi_int4store, 4); +#ifdef HAVE_LONG_LONG + case HA_KEYTYPE_LONGLONG: + RT_COMB_KORR(longlong, mi_sint8korr, mi_int8store, 8); + case HA_KEYTYPE_ULONGLONG: + RT_COMB_KORR(ulonglong, mi_uint8korr, mi_int8store, 8); +#endif + case HA_KEYTYPE_FLOAT: + RT_COMB_GET(float, mi_float4get, mi_float4store, 4); + case HA_KEYTYPE_DOUBLE: + RT_COMB_GET(double, mi_float8get, mi_float8store, 8); + case HA_KEYTYPE_END: + return 0; + } + } + return 0; +} + +#define RT_OVL_AREA_KORR(type, korr_func, len) \ +{ \ + type amin, amax, bmin, bmax; \ + amin = korr_func(a); \ + bmin = korr_func(b); \ + p_inc(a, b, len); \ + amax = korr_func(a); \ + bmax = korr_func(b); \ + p_inc(a, b, len); \ + amin = max(amin, bmin); \ + amax = min(amax, bmax); \ + if (amin >= amax) \ + return 0; \ + res *= amax - amin; \ + break; \ +} + +#define RT_OVL_AREA_GET(type, get_func, len) \ +{ \ + type amin, amax, bmin, bmax; \ + get_func(amin, a); \ + get_func(bmin, b); \ + p_inc(a, b, len); \ + get_func(amax, a); \ + get_func(bmax, b); \ + p_inc(a, b, len); \ + amin = max(amin, bmin); \ + amax = min(amax, bmax); \ + if (amin >= amax) \ + return 0; \ + res *= amax - amin; \ + break; \ +} + +/* +Calculates overlapping area of two MBRs a & b +*/ +double rtree_overlapping_area(HA_KEYSEG *keyseg, uchar* a, uchar* b, + uint key_length) +{ + double res = 1; + for (; (int) key_length > 0 ; keyseg += 2) + { + key_length -= keyseg->length * 2; + + switch ((enum ha_base_keytype) keyseg->type) { + case HA_KEYTYPE_TEXT: + case HA_KEYTYPE_BINARY: + case HA_KEYTYPE_VARTEXT: + case HA_KEYTYPE_VARBINARY: + case HA_KEYTYPE_NUM: + default: + return -1; + break; + case HA_KEYTYPE_INT8: + { + int amin, amax, bmin, bmax; + amin = (int)*((signed char *)a); + bmin = (int)*((signed char *)b); + p_inc(a, b, 1); + amax = (int)*((signed char *)a); + bmax = (int)*((signed char *)b); + p_inc(a, b, 1); + amin = max(amin, bmin); + amax = min(amax, bmax); + if (amin >= amax) + return 0; + res *= amax - amin; + break; + } + case HA_KEYTYPE_SHORT_INT: + RT_OVL_AREA_KORR(int16, mi_sint2korr, 2); + case HA_KEYTYPE_USHORT_INT: + RT_OVL_AREA_KORR(uint16, mi_uint2korr, 2); + case HA_KEYTYPE_INT24: + RT_OVL_AREA_KORR(int32, mi_sint3korr, 3); + case HA_KEYTYPE_UINT24: + RT_OVL_AREA_KORR(uint32, mi_uint3korr, 3); + case HA_KEYTYPE_LONG_INT: + RT_OVL_AREA_KORR(int32, mi_sint4korr, 4); + case HA_KEYTYPE_ULONG_INT: + RT_OVL_AREA_KORR(uint32, mi_uint4korr, 4); +#ifdef HAVE_LONG_LONG + case HA_KEYTYPE_LONGLONG: + RT_OVL_AREA_KORR(longlong, mi_sint8korr, 8); + case HA_KEYTYPE_ULONGLONG: + RT_OVL_AREA_KORR(longlong, mi_sint8korr, 8); +#endif + case HA_KEYTYPE_FLOAT: + RT_OVL_AREA_GET(float, mi_float4get, 4); + case HA_KEYTYPE_DOUBLE: + RT_OVL_AREA_GET(double, mi_float8get, 8); + case HA_KEYTYPE_END: + return res; + } + } + return res; +} + +#define RT_AREA_INC_KORR(type, korr_func, len) \ +{ \ + type amin, amax, bmin, bmax; \ + amin = korr_func(a); \ + bmin = korr_func(b); \ + p_inc(a, b, len); \ + amax = korr_func(a); \ + bmax = korr_func(b); \ + p_inc(a, b, len); \ + a_area *= (((double)amax) - ((double)amin)); \ + *ab_area *= ((double)max(amax, bmax) - (double)min(amin, bmin)); \ + break; \ +} + +#define RT_AREA_INC_GET(type, get_func, len)\ +{\ + type amin, amax, bmin, bmax; \ + get_func(amin, a); \ + get_func(bmin, b); \ + p_inc(a, b, len); \ + get_func(amax, a); \ + get_func(bmax, b); \ + p_inc(a, b, len); \ + a_area *= (((double)amax) - ((double)amin)); \ + *ab_area *= ((double)max(amax, bmax) - (double)min(amin, bmin)); \ + break; \ +} + +/* +Calculates MBR_AREA(a+b) - MBR_AREA(a) +*/ +double rtree_area_increase(HA_KEYSEG *keyseg, uchar* a, uchar* b, + uint key_length, double *ab_area) +{ + double a_area = 1; + + *ab_area = 1; + for (; (int)key_length > 0; keyseg += 2) + { + key_length -= keyseg->length * 2; + + /* Handle NULL part */ + if (keyseg->null_bit) + { + return -1; + } + + switch ((enum ha_base_keytype) keyseg->type) { + case HA_KEYTYPE_TEXT: + case HA_KEYTYPE_BINARY: + case HA_KEYTYPE_VARTEXT: + case HA_KEYTYPE_VARBINARY: + case HA_KEYTYPE_NUM: + default: + return 1; + break; + case HA_KEYTYPE_INT8: + { + int amin, amax, bmin, bmax; + amin = (int)*((signed char *)a); + bmin = (int)*((signed char *)b); + p_inc(a, b, 1); + amax = (int)*((signed char *)a); + bmax = (int)*((signed char *)b); + p_inc(a, b, 1); + a_area *= (((double)amax) - ((double)amin)); + *ab_area *= ((double)max(amax, bmax) - (double)min(amin, bmin)); + break; + } + case HA_KEYTYPE_SHORT_INT: + RT_AREA_INC_KORR(int16, mi_sint2korr, 2); + case HA_KEYTYPE_USHORT_INT: + RT_AREA_INC_KORR(uint16, mi_uint2korr, 2); + case HA_KEYTYPE_INT24: + RT_AREA_INC_KORR(int32, mi_sint3korr, 3); + case HA_KEYTYPE_UINT24: + RT_AREA_INC_KORR(int32, mi_uint3korr, 3); + case HA_KEYTYPE_LONG_INT: + RT_AREA_INC_KORR(int32, mi_sint4korr, 4); + case HA_KEYTYPE_ULONG_INT: + RT_AREA_INC_KORR(uint32, mi_uint4korr, 4); +#ifdef HAVE_LONG_LONG + case HA_KEYTYPE_LONGLONG: + RT_AREA_INC_KORR(longlong, mi_sint8korr, 8); + case HA_KEYTYPE_ULONGLONG: + RT_AREA_INC_KORR(longlong, mi_sint8korr, 8); +#endif + case HA_KEYTYPE_FLOAT: + RT_AREA_INC_GET(float, mi_float4get, 4); + case HA_KEYTYPE_DOUBLE: + RT_AREA_INC_GET(double, mi_float8get, 8); + case HA_KEYTYPE_END: + return *ab_area - a_area; + } + } + return *ab_area - a_area; +} + +#define RT_PAGE_MBR_KORR(type, korr_func, store_func, len) \ +{ \ + type amin, amax, bmin, bmax; \ + amin = korr_func(k + inc); \ + amax = korr_func(k + inc + len); \ + k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag); \ + for (; k < last; k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag)) \ +{ \ + bmin = korr_func(k + inc); \ + bmax = korr_func(k + inc + len); \ + if (amin > bmin) \ + amin = bmin; \ + if (amax < bmax) \ + amax = bmax; \ +} \ + store_func(c, amin); \ + c += len; \ + store_func(c, amax); \ + c += len; \ + inc += 2 * len; \ + break; \ +} + +#define RT_PAGE_MBR_GET(type, get_func, store_func, len) \ +{ \ + type amin, amax, bmin, bmax; \ + get_func(amin, k + inc); \ + get_func(amax, k + inc + len); \ + k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag); \ + for (; k < last; k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag)) \ +{ \ + get_func(bmin, k + inc); \ + get_func(bmax, k + inc + len); \ + if (amin > bmin) \ + amin = bmin; \ + if (amax < bmax) \ + amax = bmax; \ +} \ + store_func(c, amin); \ + c += len; \ + store_func(c, amax); \ + c += len; \ + inc += 2 * len; \ + break; \ +} + +/* +Calculates key page total MBR = MBR(key1) + MBR(key2) + ... +*/ +int rtree_page_mbr(MI_INFO *info, HA_KEYSEG *keyseg, uchar *page_buf, + uchar *c, uint key_length) +{ + uint inc = 0; + uint k_len = key_length; + uint nod_flag = mi_test_if_nod(page_buf); + uchar *k; + uchar *last = rt_PAGE_END(page_buf); + + for (; (int)key_length > 0; keyseg += 2) + { + key_length -= keyseg->length * 2; + + /* Handle NULL part */ + if (keyseg->null_bit) + { + return 1; + } + + k = rt_PAGE_FIRST_KEY(page_buf, nod_flag); + + switch ((enum ha_base_keytype) keyseg->type) { + case HA_KEYTYPE_TEXT: + case HA_KEYTYPE_BINARY: + case HA_KEYTYPE_VARTEXT: + case HA_KEYTYPE_VARBINARY: + case HA_KEYTYPE_NUM: + default: + return 1; + break; + case HA_KEYTYPE_INT8: + { + int amin, amax, bmin, bmax; + amin = (int)*((signed char *)(k + inc)); + amax = (int)*((signed char *)(k + inc + 1)); + k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag); + for (; k < last; k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag)) + { + bmin = (int)*((signed char *)(k + inc)); + bmax = (int)*((signed char *)(k + inc + 1)); + + if (amin > bmin) + amin = bmin; + if (amax < bmax) + amax = bmax; + } + *((signed char*)c) = amin; + c += 1; + *((signed char*)c) = amax; + c += 1; + inc += 1 * 2; + break; + } + case HA_KEYTYPE_SHORT_INT: + RT_PAGE_MBR_KORR(int16, mi_sint2korr, mi_int2store, 2); + case HA_KEYTYPE_USHORT_INT: + RT_PAGE_MBR_KORR(uint16, mi_uint2korr, mi_int2store, 2); + case HA_KEYTYPE_INT24: + RT_PAGE_MBR_KORR(int32, mi_sint3korr, mi_int3store, 3); + case HA_KEYTYPE_UINT24: + RT_PAGE_MBR_KORR(uint32, mi_uint3korr, mi_int3store, 3); + case HA_KEYTYPE_LONG_INT: + RT_PAGE_MBR_KORR(int32, mi_sint4korr, mi_int4store, 4); + case HA_KEYTYPE_ULONG_INT: + RT_PAGE_MBR_KORR(uint32, mi_uint4korr, mi_int4store, 4); +#ifdef HAVE_LONG_LONG + case HA_KEYTYPE_LONGLONG: + RT_PAGE_MBR_KORR(longlong, mi_sint8korr, mi_int8store, 8); + case HA_KEYTYPE_ULONGLONG: + RT_PAGE_MBR_KORR(ulonglong, mi_uint8korr, mi_int8store, 8); +#endif + case HA_KEYTYPE_FLOAT: + RT_PAGE_MBR_GET(float, mi_float4get, mi_float4store, 4); + case HA_KEYTYPE_DOUBLE: + RT_PAGE_MBR_GET(double, mi_float8get, mi_float8store, 8); + case HA_KEYTYPE_END: + return 0; + } + } + return 0; +} diff --git a/myisam/rt_mbr.h b/myisam/rt_mbr.h new file mode 100644 index 00000000000..a68807370f9 --- /dev/null +++ b/myisam/rt_mbr.h @@ -0,0 +1,33 @@ +/* Copyright (C) 2000 MySQL AB & Ramil Kalimullin & MySQL Finland AB + & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef _rt_mbr_h +#define _rt_mbr_h + +int rtree_key_cmp(HA_KEYSEG *keyseg, uchar *a, uchar *b, uint key_length, + uint nextflag); +int rtree_combine_rect(HA_KEYSEG *keyseg,uchar *, uchar *, uchar*, + uint key_length); +double rtree_rect_volume(HA_KEYSEG *keyseg, uchar*, uint key_length); +int rtree_d_mbr(HA_KEYSEG *keyseg, uchar *a, uint key_length, double *res); +double rtree_overlapping_area(HA_KEYSEG *keyseg, uchar *a, uchar *b, + uint key_length); +double rtree_area_increase(HA_KEYSEG *keyseg, uchar *a, uchar *b, + uint key_length, double *ab_area); +int rtree_page_mbr(MI_INFO *info, HA_KEYSEG *keyseg, uchar *page_buf, + uchar* c, uint key_length); +#endif /* _rt_mbr_h */ diff --git a/myisam/rt_split.c b/myisam/rt_split.c new file mode 100644 index 00000000000..62b8ea6a65b --- /dev/null +++ b/myisam/rt_split.c @@ -0,0 +1,348 @@ +/* Copyright (C) 2000 MySQL AB & Alexey Botchkov & MySQL Finland AB + & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "myisamdef.h" + +#include "rt_index.h" +#include "rt_key.h" +#include "rt_mbr.h" + +typedef struct +{ + double square; + int n_node; + uchar *key; + double *coords; +} SplitStruct; + +inline static double *reserve_coords(double **d_buffer, int n_dim) +{ + double *coords = *d_buffer; + (*d_buffer) += n_dim * 2; + return coords; +} + +static void mbr_join(double *a, const double *b, int n_dim) +{ + double *end = a + n_dim * 2; + do + { + if (a[0] > b[0]) + a[0] = b[0]; + + if (a[1] < b[1]) + a[1] = b[1]; + + a += 2; + b += 2; + }while (a != end); +} + +/* +Counts the square of mbr which is a join of a and b +*/ +static double mbr_join_square(const double *a, const double *b, int n_dim) +{ + const double *end = a + n_dim * 2; + double square = 1.0; + do + { + square *= + ((a[1] < b[1]) ? b[1] : a[1]) - ((a[0] > b[0]) ? b[0] : a[0]); + + a += 2; + b += 2; + }while (a != end); + + return square; +} + +static double count_square(const double *a, int n_dim) +{ + const double *end = a + n_dim * 2; + double square = 1.0; + do + { + square *= a[1] - a[0]; + a += 2; + }while (a != end); + return square; +} + +inline static void copy_coords(double *dst, const double *src, int n_dim) +{ + memcpy(dst, src, sizeof(double) * (n_dim * 2)); +} + +/* +Select two nodes to collect group upon +*/ +static void pick_seeds(SplitStruct *node, int n_entries, + SplitStruct **seed_a, SplitStruct **seed_b, int n_dim) +{ + SplitStruct *cur1; + SplitStruct *lim1 = node + (n_entries - 1); + SplitStruct *cur2; + SplitStruct *lim2 = node + n_entries; + + double max_d = -DBL_MAX; + double d; + + for (cur1 = node; cur1 < lim1; ++cur1) + { + for (cur2=cur1 + 1; cur2 < lim2; ++cur2) + { + + d = mbr_join_square(cur1->coords, cur2->coords, n_dim) - cur1->square - + cur2->square; + if (d > max_d) + { + max_d = d; + *seed_a = cur1; + *seed_b = cur2; + } + } + } +} + +/* +Select next node and group where to add +*/ +static void pick_next(SplitStruct *node, int n_entries, double *g1, double *g2, + SplitStruct **choice, int *n_group, int n_dim) +{ + SplitStruct *cur = node; + SplitStruct *end = node + n_entries; + + double max_diff = -DBL_MAX; + + for (; cur<end; ++cur) + { + double diff; + double abs_diff; + + if (cur->n_node) + { + continue; + } + + diff = mbr_join_square(g1, cur->coords, n_dim) - + mbr_join_square(g2, cur->coords, n_dim); + + abs_diff = fabs(diff); + if (abs_diff > max_diff) + { + max_diff = abs_diff; + *n_group = 1 + (diff > 0); + *choice = cur; + } + } +} + +/* +Mark not-in-group entries as n_group +*/ +static void mark_all_entries(SplitStruct *node, int n_entries, int n_group) +{ + SplitStruct *cur = node; + SplitStruct *end = node + n_entries; + for (; cur<end; ++cur) + { + if (cur->n_node) + { + continue; + } + cur->n_node = n_group; + } +} + +static int split_rtree_node(SplitStruct *node, int n_entries, + int all_size, /* Total key's size */ + int key_size, + int min_size, /* Minimal group size */ + int size1, int size2 /* initial group sizes */, + double **d_buffer, int n_dim) +{ + SplitStruct *cur; + SplitStruct *a; + SplitStruct *b; + double *g1 = reserve_coords(d_buffer, n_dim); + double *g2 = reserve_coords(d_buffer, n_dim); + SplitStruct *next; + int next_node; + int i; + SplitStruct *end = node + n_entries; + + if (all_size < min_size * 2) + { + return 1; + } + + cur = node; + for (; cur<end; ++cur) + { + cur->square = count_square(cur->coords, n_dim); + cur->n_node = 0; + } + + pick_seeds(node, n_entries, &a, &b, n_dim); + a->n_node = 1; + b->n_node = 2; + + + copy_coords(g1, a->coords, n_dim); + size1 += key_size; + copy_coords(g2, b->coords, n_dim); + size2 += key_size; + + + for (i=n_entries - 2; i>0; --i) + { + if (all_size - (size2 + key_size) < min_size) /* Can't write into group 2 */ + { + mark_all_entries(node, n_entries, 1); + break; + } + + if (all_size - (size1 + key_size) < min_size) /* Can't write into group 1 */ + { + mark_all_entries(node, n_entries, 2); + break; + } + + pick_next(node, n_entries, g1, g2, &next, &next_node, n_dim); + if (next_node == 1) + { + size1 += key_size; + mbr_join(g1, next->coords, n_dim); + } + else + { + size2 += key_size; + mbr_join(g2, next->coords, n_dim); + } + next->n_node = next_node; + } + + return 0; +} + +int rtree_split_page(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *page, uchar *key, + uint key_length, my_off_t *new_page_offs) +{ + int n1, n2; /* Number of items in groups */ + + SplitStruct *task; + SplitStruct *cur; + SplitStruct *stop; + double *coord_buf; + double *next_coord; + double *old_coord; + int n_dim; + uchar *source_cur, *cur1, *cur2; + uchar *new_page; + int err_code = 0; + + uint nod_flag = mi_test_if_nod(page); + uint full_length = key_length + (nod_flag ? nod_flag : + info->s->base.rec_reflength); + + int max_keys = (mi_getint(page)-2) / (full_length); + + n_dim = keyinfo->keysegs / 2; + + if (!my_multi_malloc(MYF(0), + &coord_buf, n_dim * 2 * sizeof(double) * (max_keys + 1 + 4), + &task, sizeof(SplitStruct) * (max_keys + 1), + NullS)) + return -1; + + next_coord = coord_buf; + + stop = task + max_keys; + source_cur = rt_PAGE_FIRST_KEY(page, nod_flag); + + for (cur = task; cur < stop; ++cur, source_cur = rt_PAGE_NEXT_KEY(source_cur, + key_length, nod_flag)) + { + cur->coords = reserve_coords(&next_coord, n_dim); + cur->key = source_cur; + rtree_d_mbr(keyinfo->seg, source_cur, key_length, cur->coords); + } + + cur->coords = reserve_coords(&next_coord, n_dim); + rtree_d_mbr(keyinfo->seg, key, key_length, cur->coords); + cur->key = key; + + old_coord = next_coord; + + if (split_rtree_node(task, max_keys + 1, + mi_getint(page) + full_length + 2, full_length, + rt_PAGE_MIN_SIZE(keyinfo->block_length), + 2, 2, &next_coord, n_dim)) + { + err_code = 1; + goto split_err; + } + + if (!(new_page = (uchar*)my_alloca((uint)keyinfo->block_length))) + { + err_code= -1; + goto split_err; + } + + stop = task + (max_keys + 1); + cur1 = rt_PAGE_FIRST_KEY(page, nod_flag); + cur2 = rt_PAGE_FIRST_KEY(new_page, nod_flag); + + n1 = 0; + n2 = 0; + for (cur = task; cur < stop; ++cur) + { + uchar *to; + if (cur->n_node == 1) + { + to = cur1; + cur1 = rt_PAGE_NEXT_KEY(cur1, key_length, nod_flag); + ++n1; + } + else + { + to = cur2; + cur2 = rt_PAGE_NEXT_KEY(cur2, key_length, nod_flag); + ++n2; + } + if (to != cur->key) + memcpy(to - nod_flag, cur->key - nod_flag, full_length); + } + + mi_putint(page, 2 + n1 * full_length, nod_flag); + mi_putint(new_page, 2 + n2 * full_length, nod_flag); + + if ((*new_page_offs= _mi_new(info, keyinfo, DFLT_INIT_HITS)) == + HA_OFFSET_ERROR) + err_code= -1; + else + err_code= _mi_write_keypage(info, keyinfo, *new_page_offs, + DFLT_INIT_HITS, new_page); + + my_afree((byte*)new_page); + +split_err: + my_free((gptr) coord_buf, MYF(0)); + return err_code; +} diff --git a/myisam/rt_test.c b/myisam/rt_test.c new file mode 100644 index 00000000000..61890bfded1 --- /dev/null +++ b/myisam/rt_test.c @@ -0,0 +1,421 @@ +/* Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Testing of the basic functions of a MyISAM rtree table */ +/* Written by Alex Barkov who has a shared copyright to this code */ + + +#include "myisam.h" +#include "rt_index.h" + +#define MAX_REC_LENGTH 1024 +#define ndims 2 +#define KEYALG HA_KEY_ALG_RTREE + +static int read_with_pos(MI_INFO * file, int silent); +static void create_record(char *record,uint rownr); +static void create_record1(char *record,uint rownr); +static void print_record(char * record,my_off_t offs,const char * tail); +static int run_test(const char *filename); + + +int main(int argc __attribute__((unused)),char *argv[] __attribute__((unused))) +{ + MY_INIT(argv[0]); + exit(run_test("rt_test")); +} + + + +int run_test(const char *filename) +{ + MI_INFO *file; + MI_UNIQUEDEF uniquedef; + MI_CREATE_INFO create_info; + MI_COLUMNDEF recinfo[20]; + MI_KEYDEF keyinfo[20]; + HA_KEYSEG keyseg[20]; + + int silent=0; + int opt_unique=0; + int create_flag=0; + int key_type=HA_KEYTYPE_DOUBLE; + int key_length=8; + int null_fields=0; + int nrecords=300; + int rec_length=0; + int uniques=0; + int i; + int error; + int row_count=0; + char record[MAX_REC_LENGTH]; + char read_record[MAX_REC_LENGTH]; + int upd= 10; + ha_rows hrows; + + + + /* Define a column for NULLs and DEL markers*/ + + recinfo[0].type=FIELD_NORMAL; + recinfo[0].length=1; /* For NULL bits */ + rec_length=1; + + + /* Define 2*ndims columns for coordinates*/ + + for (i=1; i<=2*ndims ;i++){ + recinfo[i].type=FIELD_NORMAL; + recinfo[i].length=key_length; + rec_length+=key_length; + } + + + /* Define a key with 2*ndims segments */ + + keyinfo[0].seg=keyseg; + keyinfo[0].keysegs=2*ndims; + keyinfo[0].flag=0; + keyinfo[0].key_alg=KEYALG; + + for (i=0; i<2*ndims; i++){ + keyinfo[0].seg[i].type= key_type; + keyinfo[0].seg[i].flag=0; /* Things like HA_REVERSE_SORT */ + keyinfo[0].seg[i].start= (key_length*i)+1; + keyinfo[0].seg[i].length=key_length; + keyinfo[0].seg[i].null_bit= null_fields ? 2 : 0; + keyinfo[0].seg[i].null_pos=0; + keyinfo[0].seg[i].language=default_charset_info->number; + } + + + if(!silent) + printf("- Creating isam-file\n"); + + bzero((char*) &create_info,sizeof(create_info)); + create_info.max_rows=10000000; + + if (mi_create(filename, + 1, /* keys */ + keyinfo, + 1+2*ndims+opt_unique, /* columns */ + recinfo,uniques,&uniquedef,&create_info,create_flag)) + goto err; + + + + + if(!silent) + printf("- Open isam-file\n"); + + if (!(file=mi_open(filename,2,HA_OPEN_ABORT_IF_LOCKED))) + goto err; + + + if (!silent) + printf("- Writing key:s\n"); + + for (i=0; i<nrecords; i++ ) + { + create_record(record,i); + error=mi_write(file,record); + print_record(record,mi_position(file),"\n"); + if (!error) + { + row_count++; + } + else + { + printf("mi_write: %d\n", error); + goto err; + } + } + + + if((error=read_with_pos(file,silent))) + goto err; + + + if (!silent) + printf("- Reading rows with key\n"); + + for (i=0 ; i < nrecords ; i++) + { + my_errno=0; + create_record(record,i); + + bzero((char*) read_record,MAX_REC_LENGTH); + error=mi_rkey(file,read_record,0,record+1,0,HA_READ_MBR_EQUAL); + + if(error && error!=HA_ERR_KEY_NOT_FOUND) + { + printf(" mi_rkey: %3d errno: %3d\n",error,my_errno); + goto err; + } + if(error == HA_ERR_KEY_NOT_FOUND) + { + print_record(record,mi_position(file)," NOT FOUND\n"); + continue; + } + print_record(read_record,mi_position(file),"\n"); + } + + + + + + if (!silent) + printf("- Deleting rows\n"); + for (i=0; i < nrecords/4; i++) + { + my_errno=0; + bzero((char*) read_record,MAX_REC_LENGTH); + error=mi_rrnd(file,read_record,i == 0 ? 0L : HA_OFFSET_ERROR); + if(error) + { + printf("pos: %2d mi_rrnd: %3d errno: %3d\n",i,error,my_errno); + goto err; + } + print_record(read_record,mi_position(file),"\n"); + + error=mi_delete(file,read_record); + if(error) + { + printf("pos: %2d mi_delete: %3d errno: %3d\n",i,error,my_errno); + goto err; + } + } + + + if (!silent) + printf("- Updating rows with position\n"); + for (i=0; i < (nrecords - nrecords/4) ; i++) + { + my_errno=0; + bzero((char*) read_record,MAX_REC_LENGTH); + error=mi_rrnd(file,read_record,i == 0 ? 0L : HA_OFFSET_ERROR); + if(error) + { + if(error==HA_ERR_RECORD_DELETED) + continue; + printf("pos: %2d mi_rrnd: %3d errno: %3d\n",i,error,my_errno); + goto err; + } + print_record(read_record,mi_position(file),""); + create_record(record,i+nrecords*upd); + printf("\t-> "); + print_record(record,mi_position(file),"\n"); + error=mi_update(file,read_record,record); + if(error) + { + printf("pos: %2d mi_update: %3d errno: %3d\n",i,error,my_errno); + goto err; + } + } + + + if((error=read_with_pos(file,silent))) + goto err; + + + + if (!silent) + printf("- Test mi_rkey then a sequence of mi_rnext_same\n"); + + create_record(record, nrecords*4/5); + print_record(record,0," search for\n"); + + if ((error=mi_rkey(file,read_record,0,record+1,0,HA_READ_MBR_INTERSECT))) + { + printf("mi_rkey: %3d errno: %3d\n",error,my_errno); + goto err; + } + print_record(read_record,mi_position(file)," mi_rkey\n"); + row_count=1; + + + do { + if((error=mi_rnext_same(file,read_record))) + { + if(error==HA_ERR_END_OF_FILE) + break; + printf("mi_next: %3d errno: %3d\n",error,my_errno); + goto err; + } + print_record(read_record,mi_position(file)," mi_rnext_same\n"); + row_count++; + }while(1); + printf(" %d rows\n",row_count); + + + + + + + if (!silent) + printf("- Test mi_rfirst then a sequence of mi_rnext\n"); + + error=mi_rfirst(file,read_record,0); + if (error) + { + printf("mi_rfirst: %3d errno: %3d\n",error,my_errno); + goto err; + } + row_count=1; + print_record(read_record,mi_position(file)," mi_frirst\n"); + + for(i=0;i<nrecords;i++) { + if((error=mi_rnext(file,read_record,0))) + { + if(error==HA_ERR_END_OF_FILE) + break; + printf("mi_next: %3d errno: %3d\n",error,my_errno); + goto err; + } + print_record(read_record,mi_position(file)," mi_rnext\n"); + row_count++; + } + printf(" %d rows\n",row_count); + + + if (!silent) + printf("- Test mi_records_in_range()\n"); + + create_record1(record, nrecords*4/5); + print_record(record,0,"\n"); + hrows=mi_records_in_range(file,0,record+1,0,HA_READ_MBR_INTERSECT,record+1,0,0); + printf(" %ld rows\n", (long) hrows); + + + if (mi_close(file)) goto err; + my_end(MY_CHECK_ERROR); + + return 0; + +err: + printf("got error: %3d when using myisam-database\n",my_errno); + return 1; /* skipp warning */ +} + + + +static int read_with_pos (MI_INFO * file,int silent) +{ + int error; + int i; + char read_record[MAX_REC_LENGTH]; + + if (!silent) + printf("- Reading rows with position\n"); + for (i=0;;i++) + { + my_errno=0; + bzero((char*) read_record,MAX_REC_LENGTH); + error=mi_rrnd(file,read_record,i == 0 ? 0L : HA_OFFSET_ERROR); + if(error) + { + if(error==HA_ERR_END_OF_FILE) + break; + if(error==HA_ERR_RECORD_DELETED) + continue; + printf("pos: %2d mi_rrnd: %3d errno: %3d\n",i,error,my_errno); + return error; + } + print_record(read_record,mi_position(file),"\n"); + } + return 0; +} + + +#ifdef NOT_USED +static void bprint_record(char * record, + my_off_t offs __attribute__((unused)), + const char * tail) +{ + int i; + char * pos; + i=(unsigned char)record[0]; + printf("%02X ",i); + + for( pos=record+1, i=0; i<32; i++,pos++){ + int b=(unsigned char)*pos; + printf("%02X",b); + } + printf("%s",tail); +} +#endif + + +static void print_record(char * record, + my_off_t offs __attribute__((unused)), + const char * tail) +{ + int i; + char * pos; + double c; + + printf(" rec=(%d)",(unsigned char)record[0]); + for ( pos=record+1, i=0; i<2*ndims; i++) + { + memcpy(&c,pos,sizeof(c)); + float8get(c,pos); + printf(" %.14g ",c); + pos+=sizeof(c); + } + printf("pos=%ld",(long int)offs); + printf("%s",tail); +} + + + +static void create_record1(char *record,uint rownr) +{ + int i; + char * pos; + double c=rownr+10; + + bzero((char*) record,MAX_REC_LENGTH); + record[0]=0x01; /* DEL marker */ + + for ( pos=record+1, i=0; i<2*ndims; i++) + { + memcpy(pos,&c,sizeof(c)); + float8store(pos,c); + pos+=sizeof(c); + } +} + + +static void create_record(char *record,uint rownr) +{ + int i; + char * pos; + double c=rownr+10; + double c0=0; + + bzero((char*) record,MAX_REC_LENGTH); + record[0]=0x01; /* DEL marker */ + + for ( pos=record+1, i=0; i<ndims; i++) + { + memcpy(pos,&c0,sizeof(c0)); + float8store(pos,c0); + pos+=sizeof(c0); + memcpy(pos,&c,sizeof(c)); + float8store(pos,c); + pos+=sizeof(c); + } +} diff --git a/myisam/sort.c b/myisam/sort.c index 95ede6ddaa1..651b2331cd1 100644 --- a/myisam/sort.c +++ b/myisam/sort.c @@ -39,13 +39,10 @@ #define MYF_RW MYF(MY_NABP | MY_WME | MY_WAIT_IF_FULL) #define DISK_BUFFER_SIZE (IO_SIZE*16) -typedef struct st_buffpek { - my_off_t file_pos; /* Where we are in the sort file */ - uchar *base,*key; /* Key pointers */ - ha_rows count; /* Number of rows in table */ - ulong mem_count; /* numbers of keys in memory */ - ulong max_keys; /* Max keys in buffert */ -} BUFFPEK; + +/* + Pointers of functions for store and read keys from temp file +*/ extern void print_error _VARARGS((const char *fmt,...)); @@ -56,7 +53,7 @@ static ha_rows NEAR_F find_all_keys(MI_SORT_PARAM *info,uint keys, DYNAMIC_ARRAY *buffpek,int *maxbuffer, IO_CACHE *tempfile, IO_CACHE *tempfile_for_exceptions); -static int NEAR_F write_keys(MI_SORT_PARAM *info,uchar * *sort_keys, +static int NEAR_F write_keys(MI_SORT_PARAM *info,uchar **sort_keys, uint count, BUFFPEK *buffpek,IO_CACHE *tempfile); static int NEAR_F write_key(MI_SORT_PARAM *info, uchar *key, IO_CACHE *tempfile); @@ -74,8 +71,20 @@ static int NEAR_F merge_buffers(MI_SORT_PARAM *info,uint keys, BUFFPEK *Fb, BUFFPEK *Tb); static int NEAR_F merge_index(MI_SORT_PARAM *,uint,uchar **,BUFFPEK *, int, IO_CACHE *); - - +static int flush_ft_buf(MI_SORT_PARAM *info); + +static int NEAR_F write_keys_varlen(MI_SORT_PARAM *info,uchar **sort_keys, + uint count, BUFFPEK *buffpek, + IO_CACHE *tempfile); +static uint NEAR_F read_to_buffer_varlen(IO_CACHE *fromfile,BUFFPEK *buffpek, + uint sort_length); +static int NEAR_F write_merge_key(MI_SORT_PARAM *info, IO_CACHE *to_file, + char *key, uint sort_length, uint count); +static int NEAR_F write_merge_key_varlen(MI_SORT_PARAM *info, + IO_CACHE *to_file, + char* key, uint sort_length, + uint count); +inline int my_var_write(MI_SORT_PARAM *info,IO_CACHE *to_file, byte *bufs); /* Creates a index of sorted keys @@ -102,6 +111,19 @@ int _create_index_by_sort(MI_SORT_PARAM *info,my_bool no_messages, DBUG_ENTER("_create_index_by_sort"); DBUG_PRINT("enter",("sort_length: %d", info->key_length)); + if (info->keyinfo->flag & HA_VAR_LENGTH_KEY) + { + info->write_keys=write_keys_varlen; + info->read_to_buffer=read_to_buffer_varlen; + info->write_key=write_merge_key_varlen; + } + else + { + info->write_keys=write_keys; + info->read_to_buffer=read_to_buffer; + info->write_key=write_merge_key; + } + my_b_clear(&tempfile); my_b_clear(&tempfile_for_exceptions); bzero((char*) &buffpek,sizeof(buffpek)); @@ -188,7 +210,7 @@ int _create_index_by_sort(MI_SORT_PARAM *info,my_bool no_messages, goto err; /* purecov: inspected */ } - if (flush_pending_blocks(info)) + if (flush_ft_buf(info) || flush_pending_blocks(info)) goto err; if (my_b_inited(&tempfile_for_exceptions)) @@ -249,7 +271,7 @@ static ha_rows NEAR_F find_all_keys(MI_SORT_PARAM *info, uint keys, if (++idx == keys) { - if (write_keys(info,sort_keys,idx-1,(BUFFPEK *)alloc_dynamic(buffpek), + if (info->write_keys(info,sort_keys,idx-1,(BUFFPEK *)alloc_dynamic(buffpek), tempfile)) DBUG_RETURN(HA_POS_ERROR); /* purecov: inspected */ @@ -263,7 +285,7 @@ static ha_rows NEAR_F find_all_keys(MI_SORT_PARAM *info, uint keys, DBUG_RETURN(HA_POS_ERROR); /* Aborted by get_key */ /* purecov: inspected */ if (buffpek->elements) { - if (write_keys(info,sort_keys,idx,(BUFFPEK *)alloc_dynamic(buffpek), + if (info->write_keys(info,sort_keys,idx,(BUFFPEK *)alloc_dynamic(buffpek), tempfile)) DBUG_RETURN(HA_POS_ERROR); /* purecov: inspected */ *maxbuffer=buffpek->elements-1; @@ -295,6 +317,19 @@ pthread_handler_decl(thr_find_all_keys,arg) if (info->sort_info->got_error) goto err; + if (info->keyinfo->flag && HA_VAR_LENGTH_KEY) + { + info->write_keys=write_keys_varlen; + info->read_to_buffer=read_to_buffer_varlen; + info->write_key=write_merge_key_varlen; + } + else + { + info->write_keys=write_keys; + info->read_to_buffer=read_to_buffer; + info->write_key=write_merge_key; + } + my_b_clear(&info->tempfile); my_b_clear(&info->tempfile_for_exceptions); bzero((char*) &info->buffpek,sizeof(info->buffpek)); @@ -367,7 +402,7 @@ pthread_handler_decl(thr_find_all_keys,arg) if (++idx == keys) { - if (write_keys(info,sort_keys,idx-1, + if (info->write_keys(info,sort_keys,idx-1, (BUFFPEK *)alloc_dynamic(&info->buffpek), &info->tempfile)) goto err; @@ -381,7 +416,7 @@ pthread_handler_decl(thr_find_all_keys,arg) goto err; if (info->buffpek.elements) { - if (write_keys(info,sort_keys, idx, + if (info->write_keys(info,sort_keys, idx, (BUFFPEK *) alloc_dynamic(&info->buffpek), &info->tempfile)) goto err; info->keys=(info->buffpek.elements-1)*(keys-1)+idx; @@ -449,7 +484,7 @@ int thr_write_keys(MI_SORT_PARAM *sort_param) fflush(stdout); } if (write_index(sinfo, sinfo->sort_keys, sinfo->keys) || - flush_pending_blocks(sinfo)) + flush_ft_buf(sinfo) || flush_pending_blocks(sinfo)) got_error=1; } } @@ -469,6 +504,18 @@ int thr_write_keys(MI_SORT_PARAM *sort_param) { if (got_error) continue; + if (sinfo->keyinfo->flag && HA_VAR_LENGTH_KEY) + { + sinfo->write_keys=write_keys_varlen; + sinfo->read_to_buffer=read_to_buffer_varlen; + sinfo->write_key=write_merge_key_varlen; + } + else + { + sinfo->write_keys=write_keys; + sinfo->read_to_buffer=read_to_buffer; + sinfo->write_key=write_merge_key; + } if (sinfo->buffpek.elements) { uint maxbuffer=sinfo->buffpek.elements-1; @@ -510,6 +557,7 @@ int thr_write_keys(MI_SORT_PARAM *sort_param) if (merge_index(sinfo, keys, (uchar **)mergebuf, dynamic_element(&sinfo->buffpek,0,BUFFPEK *), maxbuffer,&sinfo->tempfile) || + flush_ft_buf(sinfo) || flush_pending_blocks(sinfo)) { got_error=1; @@ -559,8 +607,8 @@ static int NEAR_F write_keys(MI_SORT_PARAM *info, register uchar **sort_keys, qsort2((byte*) sort_keys,count,sizeof(byte*),(qsort2_cmp) info->key_cmp, info); if (!my_b_inited(tempfile) && - open_cached_file(tempfile, info->tmpdir, "ST", DISK_BUFFER_SIZE, - info->sort_info->param->myf_rw)) + open_cached_file(tempfile, my_tmpdir(info->tmpdir), "ST", + DISK_BUFFER_SIZE, info->sort_info->param->myf_rw)) DBUG_RETURN(1); /* purecov: inspected */ buffpek->file_pos=my_b_tell(tempfile); @@ -575,6 +623,47 @@ static int NEAR_F write_keys(MI_SORT_PARAM *info, register uchar **sort_keys, } /* write_keys */ +inline int my_var_write(MI_SORT_PARAM *info, IO_CACHE *to_file, byte *bufs) +{ + int err; + uint16 len = _mi_keylength(info->keyinfo, (uchar*) bufs); + + /* The following is safe as this is a local file */ + if ((err= my_b_write(to_file, (byte*)&len, sizeof(len)))) + return (err); + if ((err= my_b_write(to_file,bufs, (uint) len))) + return (err); + return (0); +} + + +static int NEAR_F write_keys_varlen(MI_SORT_PARAM *info, + register uchar **sort_keys, + uint count, BUFFPEK *buffpek, + IO_CACHE *tempfile) +{ + uchar **end; + int err; + DBUG_ENTER("write_keys_varlen"); + + qsort2((byte*) sort_keys,count,sizeof(byte*),(qsort2_cmp) info->key_cmp, + info); + if (!my_b_inited(tempfile) && + open_cached_file(tempfile, my_tmpdir(info->tmpdir), "ST", + DISK_BUFFER_SIZE, info->sort_info->param->myf_rw)) + DBUG_RETURN(1); /* purecov: inspected */ + + buffpek->file_pos=my_b_tell(tempfile); + buffpek->count=count; + for (end=sort_keys+count ; sort_keys != end ; sort_keys++) + { + if ((err= my_var_write(info,tempfile, (byte*) *sort_keys))) + DBUG_RETURN(err); + } + DBUG_RETURN(0); +} /* write_keys_varlen */ + + static int NEAR_F write_key(MI_SORT_PARAM *info, uchar *key, IO_CACHE *tempfile) { @@ -582,8 +671,8 @@ static int NEAR_F write_key(MI_SORT_PARAM *info, uchar *key, DBUG_ENTER("write_key"); if (!my_b_inited(tempfile) && - open_cached_file(tempfile, info->tmpdir, "ST", DISK_BUFFER_SIZE, - info->sort_info->param->myf_rw)) + open_cached_file(tempfile, my_tmpdir(info->tmpdir), "ST", + DISK_BUFFER_SIZE, info->sort_info->param->myf_rw)) DBUG_RETURN(1); if (my_b_write(tempfile,(byte*)&key_length,sizeof(key_length)) || @@ -625,8 +714,8 @@ static int NEAR_F merge_many_buff(MI_SORT_PARAM *info, uint keys, if (*maxbuffer < MERGEBUFF2) DBUG_RETURN(0); /* purecov: inspected */ if (flush_io_cache(t_file) || - open_cached_file(&t_file2,info->tmpdir,"ST",DISK_BUFFER_SIZE, - info->sort_info->param->myf_rw)) + open_cached_file(&t_file2,my_tmpdir(info->tmpdir),"ST", + DISK_BUFFER_SIZE, info->sort_info->param->myf_rw)) DBUG_RETURN(1); /* purecov: inspected */ from_file= t_file ; to_file= &t_file2; @@ -689,6 +778,62 @@ static uint NEAR_F read_to_buffer(IO_CACHE *fromfile, BUFFPEK *buffpek, return (count*sort_length); } /* read_to_buffer */ +static uint NEAR_F read_to_buffer_varlen(IO_CACHE *fromfile, BUFFPEK *buffpek, + uint sort_length) +{ + register uint count; + uint16 length_of_key = 0; + uint idx; + uchar *buffp; + + if ((count=(uint) min((ha_rows) buffpek->max_keys,buffpek->count))) + { + buffp = buffpek->base; + + for (idx=1;idx<=count;idx++) + { + if (my_pread(fromfile->file,(byte*)&length_of_key,sizeof(length_of_key), + buffpek->file_pos,MYF_RW)) + return((uint) -1); + buffpek->file_pos+=sizeof(length_of_key); + if (my_pread(fromfile->file,(byte*) buffp,length_of_key, + buffpek->file_pos,MYF_RW)) + return((uint) -1); + buffpek->file_pos+=length_of_key; + buffp = buffp + sort_length; + } + buffpek->key=buffpek->base; + buffpek->count-= count; + buffpek->mem_count= count; + } + return (count*sort_length); +} /* read_to_buffer_varlen */ + + +static int NEAR_F write_merge_key_varlen(MI_SORT_PARAM *info, + IO_CACHE *to_file,char* key, + uint sort_length, uint count) +{ + uint idx; + + char *bufs = key; + for (idx=1;idx<=count;idx++) + { + int err; + if ((err= my_var_write(info,to_file, (byte*) bufs))) + return (err); + bufs=bufs+sort_length; + } + return(0); +} + + +static int NEAR_F write_merge_key(MI_SORT_PARAM *info __attribute__((unused)), + IO_CACHE *to_file, char* key, + uint sort_length, uint count) +{ + return my_b_write(to_file,(byte*) key,(uint) sort_length*count); +} /* Merge buffers to one buffer @@ -707,6 +852,7 @@ merge_buffers(MI_SORT_PARAM *info, uint keys, IO_CACHE *from_file, uchar *strpos; BUFFPEK *buffpek,**refpek; QUEUE queue; + volatile bool *killed= killed_ptr(info->sort_info->param); DBUG_ENTER("merge_buffers"); count=error=0; @@ -727,8 +873,7 @@ merge_buffers(MI_SORT_PARAM *info, uint keys, IO_CACHE *from_file, count+= buffpek->count; buffpek->base= strpos; buffpek->max_keys=maxcount; - strpos+= (uint) (error=(int) read_to_buffer(from_file,buffpek, - sort_length)); + strpos+= (uint) (error=(int) info->read_to_buffer(from_file,buffpek,sort_length)); if (error == -1) goto err; /* purecov: inspected */ queue_insert(&queue,(char*) buffpek); @@ -738,10 +883,14 @@ merge_buffers(MI_SORT_PARAM *info, uint keys, IO_CACHE *from_file, { for (;;) { + if (*killed) + { + error=1; goto err; + } buffpek=(BUFFPEK*) queue_top(&queue); if (to_file) { - if (my_b_write(to_file,(byte*) buffpek->key,(uint) sort_length)) + if (info->write_key(info,to_file,(byte*) buffpek->key,(uint) sort_length,1)) { error=1; goto err; /* purecov: inspected */ } @@ -756,7 +905,7 @@ merge_buffers(MI_SORT_PARAM *info, uint keys, IO_CACHE *from_file, buffpek->key+=sort_length; if (! --buffpek->mem_count) { - if (!(error=(int) read_to_buffer(from_file,buffpek,sort_length))) + if (!(error=(int) info->read_to_buffer(from_file,buffpek,sort_length))) { uchar *base=buffpek->base; uint max_keys=buffpek->max_keys; @@ -796,8 +945,8 @@ merge_buffers(MI_SORT_PARAM *info, uint keys, IO_CACHE *from_file, { if (to_file) { - if (my_b_write(to_file,(byte*) buffpek->key, - (sort_length*buffpek->mem_count))) + if (info->write_key(info,to_file,(byte*) buffpek->key, + sort_length,buffpek->mem_count)) { error=1; goto err; /* purecov: inspected */ } @@ -817,7 +966,7 @@ merge_buffers(MI_SORT_PARAM *info, uint keys, IO_CACHE *from_file, } } } - while ((error=(int) read_to_buffer(from_file,buffpek,sort_length)) != -1 && + while ((error=(int) info->read_to_buffer(from_file,buffpek,sort_length)) != -1 && error != 0); lastbuff->count=count; @@ -842,3 +991,16 @@ merge_index(MI_SORT_PARAM *info, uint keys, uchar **sort_keys, DBUG_RETURN(0); } /* merge_index */ +static int +flush_ft_buf(MI_SORT_PARAM *info) +{ + int err=0; + if (info->sort_info->ft_buf) + { + err=sort_ft_buf_flush(info); + my_free((gptr)info->sort_info->ft_buf, MYF(0)); + info->sort_info->ft_buf=0; + } + return err; +} + diff --git a/myisam/sp_defs.h b/myisam/sp_defs.h new file mode 100644 index 00000000000..0acefe32f80 --- /dev/null +++ b/myisam/sp_defs.h @@ -0,0 +1,45 @@ +/* Copyright (C) 2000 MySQL AB & Ramil Kalimullin & MySQL Finland AB + & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef _SP_DEFS_H +#define _SP_DEFS_H + +#define SPDIMS 2 +#define SPTYPE HA_KEYTYPE_DOUBLE +#define SPLEN 8 + +enum wkbType +{ + wkbPoint = 1, + wkbLineString = 2, + wkbPolygon = 3, + wkbMultiPoint = 4, + wkbMultiLineString = 5, + wkbMultiPolygon = 6, + wkbGeometryCollection = 7 +}; + +enum wkbByteOrder +{ + wkbXDR = 0, /* Big Endian */ + wkbNDR = 1 /* Little Endian */ +}; + +uint sp_make_key(register MI_INFO *info, uint keynr, uchar *key, + const byte *record, my_off_t filepos); + +#endif /* _SP_DEFS_H */ diff --git a/myisam/sp_key.c b/myisam/sp_key.c new file mode 100644 index 00000000000..f669d217026 --- /dev/null +++ b/myisam/sp_key.c @@ -0,0 +1,286 @@ +/* Copyright (C) 2000 MySQL AB & Ramil Kalimullin + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "myisamdef.h" +#include "sp_defs.h" + +static int sp_add_point_to_mbr(uchar *(*wkb), uchar *end, uint n_dims, + uchar byte_order, double *mbr); +static int sp_get_point_mbr(uchar *(*wkb), uchar *end, uint n_dims, + uchar byte_order, double *mbr); +static int sp_get_linestring_mbr(uchar *(*wkb), uchar *end, uint n_dims, + uchar byte_order, double *mbr); +static int sp_get_polygon_mbr(uchar *(*wkb), uchar *end, uint n_dims, + uchar byte_order, double *mbr); +static int sp_get_geometry_mbr(uchar *(*wkb), uchar *end, uint n_dims, + double *mbr, int top); +static int sp_mbr_from_wkb(uchar (*wkb), uint size, uint n_dims, double *mbr); + + +uint sp_make_key(register MI_INFO *info, uint keynr, uchar *key, + const byte *record, my_off_t filepos) +{ + HA_KEYSEG *keyseg; + MI_KEYDEF *keyinfo = &info->s->keyinfo[keynr]; + uint len = 0; + byte *pos; + uint dlen; + uchar *dptr; + double mbr[SPDIMS * 2]; + uint i; + + keyseg = &keyinfo->seg[-1]; + pos = (byte*)record + keyseg->start; + + dlen = _mi_calc_blob_length(keyseg->bit_start, pos); + memcpy_fixed(&dptr, pos + keyseg->bit_start, sizeof(char*)); + sp_mbr_from_wkb(dptr + 4, dlen - 4, SPDIMS, mbr); /* SRID */ + + for (i = 0, keyseg = keyinfo->seg; keyseg->type; keyseg++, i++) + { + uint length = keyseg->length; + + pos = ((byte*)mbr) + keyseg->start; + if (keyseg->flag & HA_SWAP_KEY) + { +#ifdef HAVE_ISNAN + if (keyseg->type == HA_KEYTYPE_FLOAT) + { + float nr; + float4get(nr, pos); + if (isnan(nr)) + { + /* Replace NAN with zero */ + bzero(key, length); + key+= length; + continue; + } + } + else if (keyseg->type == HA_KEYTYPE_DOUBLE) + { + double nr; + float8get(nr, pos); + if (isnan(nr)) + { + bzero(key, length); + key+= length; + continue; + } + } +#endif + pos += length; + while (length--) + { + *key++ = *--pos; + } + } + else + { + memcpy((byte*)key, pos, length); + key += keyseg->length; + } + len += keyseg->length; + } + _mi_dpointer(info, key, filepos); + return len; +} + +/* +Calculate minimal bounding rectangle (mbr) of the spatial object +stored in "well-known binary representation" (wkb) format. +*/ +static int sp_mbr_from_wkb(uchar *wkb, uint size, uint n_dims, double *mbr) +{ + uint i; + + for (i=0; i < n_dims; ++i) + { + mbr[i * 2] = DBL_MAX; + mbr[i * 2 + 1] = -DBL_MAX; + } + + return sp_get_geometry_mbr(&wkb, wkb + size, n_dims, mbr, 1); +} + +/* + Add one point stored in wkb to mbr +*/ + +static int sp_add_point_to_mbr(uchar *(*wkb), uchar *end, uint n_dims, + uchar byte_order __attribute__((unused)), + double *mbr) +{ + double ord; + double *mbr_end= mbr + n_dims * 2; + + while (mbr < mbr_end) + { + if ((*wkb) > end - 8) + return -1; + float8get(ord, (*wkb)); + (*wkb)+= 8; + if (ord < *mbr) + float8store((char*) mbr, ord); + mbr++; + if (ord > *mbr) + float8store((char*) mbr, ord); + mbr++; + } + return 0; +} + + +static int sp_get_point_mbr(uchar *(*wkb), uchar *end, uint n_dims, + uchar byte_order, double *mbr) +{ + return sp_add_point_to_mbr(wkb, end, n_dims, byte_order, mbr); +} + + +static int sp_get_linestring_mbr(uchar *(*wkb), uchar *end, uint n_dims, + uchar byte_order, double *mbr) +{ + uint n_points; + + n_points = uint4korr(*wkb); + (*wkb) += 4; + for (; n_points > 0; --n_points) + { + /* Add next point to mbr */ + if (sp_add_point_to_mbr(wkb, end, n_dims, byte_order, mbr)) + return -1; + } + return 0; +} + + +static int sp_get_polygon_mbr(uchar *(*wkb), uchar *end, uint n_dims, + uchar byte_order, double *mbr) +{ + uint n_linear_rings; + uint n_points; + + n_linear_rings = uint4korr((*wkb)); + (*wkb) += 4; + + for (; n_linear_rings > 0; --n_linear_rings) + { + n_points = uint4korr((*wkb)); + (*wkb) += 4; + for (; n_points > 0; --n_points) + { + /* Add next point to mbr */ + if (sp_add_point_to_mbr(wkb, end, n_dims, byte_order, mbr)) + return -1; + } + } + return 0; +} + +static int sp_get_geometry_mbr(uchar *(*wkb), uchar *end, uint n_dims, + double *mbr, int top) +{ + int res; + uchar byte_order; + uint wkb_type; + + byte_order = *(*wkb); + ++(*wkb); + + wkb_type = uint4korr((*wkb)); + (*wkb) += 4; + + switch ((enum wkbType) wkb_type) + { + case wkbPoint: + res = sp_get_point_mbr(wkb, end, n_dims, byte_order, mbr); + break; + case wkbLineString: + res = sp_get_linestring_mbr(wkb, end, n_dims, byte_order, mbr); + break; + case wkbPolygon: + res = sp_get_polygon_mbr(wkb, end, n_dims, byte_order, mbr); + break; + case wkbMultiPoint: + { + uint n_items; + n_items = uint4korr((*wkb)); + (*wkb) += 4; + for (; n_items > 0; --n_items) + { + byte_order = *(*wkb); + ++(*wkb); + (*wkb) += 4; + if (sp_get_point_mbr(wkb, end, n_dims, byte_order, mbr)) + return -1; + } + res = 0; + break; + } + case wkbMultiLineString: + { + uint n_items; + n_items = uint4korr((*wkb)); + (*wkb) += 4; + for (; n_items > 0; --n_items) + { + byte_order = *(*wkb); + ++(*wkb); + (*wkb) += 4; + if (sp_get_linestring_mbr(wkb, end, n_dims, byte_order, mbr)) + return -1; + } + res = 0; + break; + } + case wkbMultiPolygon: + { + uint n_items; + n_items = uint4korr((*wkb)); + (*wkb) += 4; + for (; n_items > 0; --n_items) + { + byte_order = *(*wkb); + ++(*wkb); + (*wkb) += 4; + if (sp_get_polygon_mbr(wkb, end, n_dims, byte_order, mbr)) + return -1; + } + res = 0; + break; + } + case wkbGeometryCollection: + { + uint n_items; + + if (!top) + return -1; + + n_items = uint4korr((*wkb)); + (*wkb) += 4; + for (; n_items > 0; --n_items) + { + if (sp_get_geometry_mbr(wkb, end, n_dims, mbr, 0)) + return -1; + } + res = 0; + break; + } + default: + res = -1; + } + return res; +} diff --git a/myisam/sp_test.c b/myisam/sp_test.c new file mode 100644 index 00000000000..16a97771887 --- /dev/null +++ b/myisam/sp_test.c @@ -0,0 +1,577 @@ +/* Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Testing of the basic functions of a MyISAM spatial table */ +/* Written by Alex Barkov, who has a shared copyright to this code */ + +#include "myisam.h" +#include "sp_defs.h" + +#define MAX_REC_LENGTH 1024 +#define KEYALG HA_KEY_ALG_RTREE + +static void create_linestring(char *record,uint rownr); +static void print_record(char * record,my_off_t offs,const char * tail); + +static void create_key(char *key,uint rownr); +static void print_key(const char *key,const char * tail); + +static int run_test(const char *filename); +static int read_with_pos(MI_INFO * file, int silent); + +static int rtree_CreateLineStringWKB(double *ords, uint n_dims, uint n_points, uchar *wkb); +static void rtree_PrintWKB(uchar *wkb, uint n_dims); + + +static char blob_key[MAX_REC_LENGTH]; + + +int main(int argc __attribute__((unused)),char *argv[]) +{ + MY_INIT(argv[0]); + exit(run_test("sp_test")); +} + + + +int run_test(const char *filename) +{ + MI_INFO *file; + MI_UNIQUEDEF uniquedef; + MI_CREATE_INFO create_info; + MI_COLUMNDEF recinfo[20]; + MI_KEYDEF keyinfo[20]; + HA_KEYSEG keyseg[20]; + + int silent=0; + int create_flag=0; + int null_fields=0; + int nrecords=30; + int uniques=0; + int i; + int error; + int row_count=0; + char record[MAX_REC_LENGTH]; + char key[MAX_REC_LENGTH]; + char read_record[MAX_REC_LENGTH]; + int upd=10; + ha_rows hrows; + + /* Define a column for NULLs and DEL markers*/ + + recinfo[0].type=FIELD_NORMAL; + recinfo[0].length=1; /* For NULL bits */ + + + /* Define spatial column */ + + recinfo[1].type=FIELD_BLOB; + recinfo[1].length=4 + mi_portable_sizeof_char_ptr; + + + + /* Define a key with 1 spatial segment */ + + keyinfo[0].seg=keyseg; + keyinfo[0].keysegs=1; + keyinfo[0].flag=HA_SPATIAL; + keyinfo[0].key_alg=KEYALG; + + keyinfo[0].seg[0].type= HA_KEYTYPE_BINARY; + keyinfo[0].seg[0].flag=0; + keyinfo[0].seg[0].start= 1; + keyinfo[0].seg[0].length=1; /* Spatial ignores it anyway */ + keyinfo[0].seg[0].null_bit= null_fields ? 2 : 0; + keyinfo[0].seg[0].null_pos=0; + keyinfo[0].seg[0].language=default_charset_info->number; + keyinfo[0].seg[0].bit_start=4; /* Long BLOB */ + + + if(!silent) + printf("- Creating isam-file\n"); + + bzero((char*) &create_info,sizeof(create_info)); + create_info.max_rows=10000000; + + if (mi_create(filename, + 1, /* keys */ + keyinfo, + 2, /* columns */ + recinfo,uniques,&uniquedef,&create_info,create_flag)) + goto err; + + + + + if(!silent) + printf("- Open isam-file\n"); + + if (!(file=mi_open(filename,2,HA_OPEN_ABORT_IF_LOCKED))) + goto err; + + + + if (!silent) + printf("- Writing key:s\n"); + + for (i=0; i<nrecords; i++ ) + { + create_linestring(record,i); + error=mi_write(file,record); + print_record(record,mi_position(file),"\n"); + if (!error) + { + row_count++; + } + else + { + printf("mi_write: %d\n", error); + goto err; + } + } + + + if((error=read_with_pos(file,silent))) + goto err; + + + if (!silent) + printf("- Deleting rows with position\n"); + for (i=0; i < nrecords/4; i++) + { + my_errno=0; + bzero((char*) read_record,MAX_REC_LENGTH); + error=mi_rrnd(file,read_record,i == 0 ? 0L : HA_OFFSET_ERROR); + if(error) + { + printf("pos: %2d mi_rrnd: %3d errno: %3d\n",i,error,my_errno); + goto err; + } + print_record(read_record,mi_position(file),"\n"); + error=mi_delete(file,read_record); + if(error) + { + printf("pos: %2d mi_delete: %3d errno: %3d\n",i,error,my_errno); + goto err; + } + } + + + + + if (!silent) + printf("- Updating rows with position\n"); + for (i=0; i < nrecords/2 ; i++) + { + my_errno=0; + bzero((char*) read_record,MAX_REC_LENGTH); + error=mi_rrnd(file,read_record,i == 0 ? 0L : HA_OFFSET_ERROR); + if(error) + { + if(error==HA_ERR_RECORD_DELETED) + continue; + printf("pos: %2d mi_rrnd: %3d errno: %3d\n",i,error,my_errno); + goto err; + } + print_record(read_record,mi_position(file),""); + create_linestring(record,i+nrecords*upd); + printf("\t-> "); + print_record(record,mi_position(file),"\n"); + error=mi_update(file,read_record,record); + if(error) + { + printf("pos: %2d mi_update: %3d errno: %3d\n",i,error,my_errno); + goto err; + } + } + + + + if((error=read_with_pos(file,silent))) + goto err; + + + + if (!silent) + printf("- Test mi_rkey then a sequence of mi_rnext_same\n"); + + create_key(key, nrecords*4/5); + print_key(key," search for INTERSECT\n"); + + if ((error=mi_rkey(file,read_record,0,key,0,HA_READ_MBR_INTERSECT))) + { + printf("mi_rkey: %3d errno: %3d\n",error,my_errno); + goto err; + } + print_record(read_record,mi_position(file)," mi_rkey\n"); + row_count=1; + + + do { + if((error=mi_rnext_same(file,read_record))) + { + if(error==HA_ERR_END_OF_FILE) + break; + printf("mi_next: %3d errno: %3d\n",error,my_errno); + goto err; + } + print_record(read_record,mi_position(file)," mi_rnext_same\n"); + row_count++; + }while(1); + printf(" %d rows\n",row_count); + + + + + + + if (!silent) + printf("- Test mi_rfirst then a sequence of mi_rnext\n"); + + error=mi_rfirst(file,read_record,0); + if (error) + { + printf("mi_rfirst: %3d errno: %3d\n",error,my_errno); + goto err; + } + row_count=1; + print_record(read_record,mi_position(file)," mi_frirst\n"); + + for(i=0;i<nrecords;i++) { + if((error=mi_rnext(file,read_record,0))) + { + if(error==HA_ERR_END_OF_FILE) + break; + printf("mi_next: %3d errno: %3d\n",error,my_errno); + goto err; + } + print_record(read_record,mi_position(file)," mi_rnext\n"); + row_count++; + } + printf(" %d rows\n",row_count); + + + + + if (!silent) + printf("- Test mi_records_in_range()\n"); + + create_key(key, nrecords*upd); + print_key(key," INTERSECT\n"); + hrows=mi_records_in_range(file,0,key,0,HA_READ_MBR_INTERSECT,record+1,0, + HA_READ_KEY_EXACT); + printf(" %ld rows\n", (long) hrows); + + + if (mi_close(file)) goto err; + my_end(MY_CHECK_ERROR); + + return 0; + +err: + printf("got error: %3d when using myisam-database\n",my_errno); + return 1; /* skipp warning */ +} + + + +static int read_with_pos (MI_INFO * file,int silent) +{ + int error; + int i; + char read_record[MAX_REC_LENGTH]; + int rows=0; + + if (!silent) + printf("- Reading rows with position\n"); + for (i=0;;i++) + { + my_errno=0; + bzero((char*) read_record,MAX_REC_LENGTH); + error=mi_rrnd(file,read_record,i == 0 ? 0L : HA_OFFSET_ERROR); + if(error) + { + if(error==HA_ERR_END_OF_FILE) + break; + if(error==HA_ERR_RECORD_DELETED) + continue; + printf("pos: %2d mi_rrnd: %3d errno: %3d\n",i,error,my_errno); + return error; + } + rows++; + print_record(read_record,mi_position(file),"\n"); + } + printf(" %d rows\n",rows); + return 0; +} + + +#ifdef NOT_USED +static void bprint_record(char * record, + my_off_t offs __attribute__((unused)), + const char * tail) +{ + int i; + char * pos; + i=(unsigned char)record[0]; + printf("%02X ",i); + + for( pos=record+1, i=0; i<32; i++,pos++) + { + int b=(unsigned char)*pos; + printf("%02X",b); + } + printf("%s",tail); +} +#endif + + +static void print_record(char * record, my_off_t offs,const char * tail) +{ + char *pos; + char *ptr; + uint len; + + printf(" rec=(%d)",(unsigned char)record[0]); + pos=record+1; + len=sint4korr(pos); + pos+=4; + printf(" len=%d ",len); + memcpy_fixed(&ptr,pos,sizeof(char*)); + if(ptr) + rtree_PrintWKB((uchar*) ptr,SPDIMS); + else + printf("<NULL> "); + printf(" offs=%ld ",(long int)offs); + printf("%s",tail); +} + + + +#ifdef NOT_USED +static void create_point(char *record,uint rownr) +{ + uint tmp; + char *ptr; + char *pos=record; + double x[200]; + int i; + + for(i=0;i<SPDIMS;i++) + x[i]=rownr; + + bzero((char*) record,MAX_REC_LENGTH); + *pos=0x01; /* DEL marker */ + pos++; + + memset(blob_key,0,sizeof(blob_key)); + tmp=rtree_CreatePointWKB(x,SPDIMS,blob_key); + + int4store(pos,tmp); + pos+=4; + + ptr=blob_key; + memcpy_fixed(pos,&ptr,sizeof(char*)); +} +#endif + + +static void create_linestring(char *record,uint rownr) +{ + uint tmp; + char *ptr; + char *pos=record; + double x[200]; + int i,j; + int npoints=2; + + for(j=0;j<npoints;j++) + for(i=0;i<SPDIMS;i++) + x[i+j*SPDIMS]=rownr*j; + + bzero((char*) record,MAX_REC_LENGTH); + *pos=0x01; /* DEL marker */ + pos++; + + memset(blob_key,0,sizeof(blob_key)); + tmp=rtree_CreateLineStringWKB(x,SPDIMS,npoints, (uchar*) blob_key); + + int4store(pos,tmp); + pos+=4; + + ptr=blob_key; + memcpy_fixed(pos,&ptr,sizeof(char*)); +} + + +static void create_key(char *key,uint rownr) +{ + double c=rownr; + char *pos; + uint i; + + bzero(key,MAX_REC_LENGTH); + for ( pos=key, i=0; i<2*SPDIMS; i++) + { + float8store(pos,c); + pos+=sizeof(c); + } +} + +static void print_key(const char *key,const char * tail) +{ + double c; + uint i; + + printf(" key="); + for (i=0; i<2*SPDIMS; i++) + { + float8get(c,key); + key+=sizeof(c); + printf("%.14g ",c); + } + printf("%s",tail); +} + + + +#ifdef NOT_USED + +static int rtree_CreatePointWKB(double *ords, uint n_dims, uchar *wkb) +{ + uint i; + + *wkb = wkbXDR; + ++wkb; + int4store(wkb, wkbPoint); + wkb += 4; + + for (i=0; i < n_dims; ++i) + { + float8store(wkb, ords[i]); + wkb += 8; + } + return 5 + n_dims * 8; +} +#endif + + +static int rtree_CreateLineStringWKB(double *ords, uint n_dims, uint n_points, + uchar *wkb) +{ + uint i; + uint n_ords = n_dims * n_points; + + *wkb = wkbXDR; + ++wkb; + int4store(wkb, wkbLineString); + wkb += 4; + int4store(wkb, n_points); + wkb += 4; + for (i=0; i < n_ords; ++i) + { + float8store(wkb, ords[i]); + wkb += 8; + } + return 9 + n_points * n_dims * 8; +} + +static void rtree_PrintWKB(uchar *wkb, uint n_dims) +{ + uint wkb_type; + + ++wkb; + wkb_type = uint4korr(wkb); + wkb += 4; + + switch ((enum wkbType)wkb_type) + { + case wkbPoint: + { + uint i; + double ord; + + printf("POINT("); + for (i=0; i < n_dims; ++i) + { + float8get(ord, wkb); + wkb += 8; + printf("%.14g", ord); + if (i < n_dims - 1) + printf(" "); + else + printf(")"); + } + break; + } + case wkbLineString: + { + uint p, i; + uint n_points; + double ord; + + printf("LineString("); + n_points = uint4korr(wkb); + wkb += 4; + for (p=0; p < n_points; ++p) + { + for (i=0; i < n_dims; ++i) + { + float8get(ord, wkb); + wkb += 8; + printf("%.14g", ord); + if (i < n_dims - 1) + printf(" "); + } + if (p < n_points - 1) + printf(", "); + else + printf(")"); + } + break; + } + case wkbPolygon: + { + printf("POLYGON(...)"); + break; + } + case wkbMultiPoint: + { + printf("MULTIPOINT(...)"); + break; + } + case wkbMultiLineString: + { + printf("MULTILINESTRING(...)"); + break; + } + case wkbMultiPolygon: + { + printf("MULTIPOLYGON(...)"); + break; + } + case wkbGeometryCollection: + { + printf("GEOMETRYCOLLECTION(...)"); + break; + } + default: + { + printf("UNKNOWN GEOMETRY TYPE"); + break; + } + } +} |