diff options
author | unknown <serg@serg.mysql.com> | 2001-12-11 14:40:56 +0100 |
---|---|---|
committer | unknown <serg@serg.mysql.com> | 2001-12-11 14:40:56 +0100 |
commit | a3bbc574cf06313d6e362b6b861cf3aab62d5b9e (patch) | |
tree | b5cdf1ea38e8f1be9349c1caffb845b362ab7c83 | |
parent | 718825a9b0b9517201b9109c4f88cfaa6b2f20a6 (diff) | |
parent | e8b18cc0c5521f0a7fbc5ccb1e470070101ef9b6 (diff) | |
download | mariadb-git-a3bbc574cf06313d6e362b6b861cf3aab62d5b9e.tar.gz |
Merge work:/home/bk/mysql-4.0
into serg.mysql.com:/usr/home/serg/Abk/mysql-4.0
myisam/ft_boolean_search.c:
Auto merged
myisam/ft_update.c:
Auto merged
-rwxr-xr-x | build-tags | 11 | ||||
-rw-r--r-- | myisam/ft_boolean_search.c | 8 | ||||
-rw-r--r-- | myisam/ft_update.c | 140 | ||||
-rw-r--r-- | myisam/ftdefs.h | 10 | ||||
-rw-r--r-- | mysql-test/r/fulltext.result | 2 | ||||
-rw-r--r-- | mysql-test/t/fulltext.test | 2 | ||||
-rw-r--r-- | mysys/queues.c | 6 |
7 files changed, 93 insertions, 86 deletions
diff --git a/build-tags b/build-tags deleted file mode 100755 index 90b957eb3bc..00000000000 --- a/build-tags +++ /dev/null @@ -1,11 +0,0 @@ -#! /bin/sh - -if [ ! -f configure.in ] ; then - echo "$0 must be run from MySQL source root" - exit 1 -fi - -rm -f TAGS -find -not -path \*SCCS\* -and \ - \( -name \*.cc -or -name \*.h -or -name \*.yy -or -name \*.c \) \ - -print -exec etags -o TAGS --append {} \; diff --git a/myisam/ft_boolean_search.c b/myisam/ft_boolean_search.c index c41c699bf5b..4acce93e3ab 100644 --- a/myisam/ft_boolean_search.c +++ b/myisam/ft_boolean_search.c @@ -229,11 +229,12 @@ FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query, return ftb; } -void _ftb_climb_the_tree(FTB_WORD *ftbw, my_off_t curdoc) +void _ftb_climb_the_tree(FTB_WORD *ftbw) { FTB_EXPR *ftbe; float weight=ftbw->weight; int yn=ftbw->yesno; + my_off_t curdoc=ftbw->docid; for (ftbe=ftbw->up; ftbe; ftbe=ftbe->up) { @@ -307,7 +308,7 @@ int ft_boolean_read_next(FT_INFO *ftb, char *record) { while (curdoc==(ftbw=(FTB_WORD *)queue_top(& ftb->queue))->docid) { - _ftb_climb_the_tree(ftbw, curdoc); + _ftb_climb_the_tree(ftbw); /* update queue */ r=_mi_search(info, keyinfo, (uchar*) ftbw->word, USE_WHOLE_KEY, @@ -401,12 +402,13 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length) for (i=1; i<=ftb->queue.elements; i++) { ftbw=(FTB_WORD *)(ftb->queue.root[i]); + ftbw->docid=docid; ptree.custom_arg=(void *)(ftbw->trunc); word.pos=ftbw->word+1; word.len=ftbw->len-1; if (tree_search(& ptree, & word)) { /* found! */ - _ftb_climb_the_tree(ftbw, docid); + _ftb_climb_the_tree(ftbw); } else { /* not found! */ diff --git a/myisam/ft_update.c b/myisam/ft_update.c index e83f0a21491..e942b2d0975 100644 --- a/myisam/ft_update.c +++ b/myisam/ft_update.c @@ -28,39 +28,75 @@ #define set_if_smaller(A,B) /* no op */ /**************************************************************/ +void _mi_ft_segiterator_init(MI_INFO *info, uint keynr, const byte *record, + FT_SEG_ITERATOR *ftsi) +{ + ftsi->num=info->s->keyinfo[keynr].keysegs-FT_SEGS; + ftsi->seg=info->s->keyinfo[keynr].seg; + ftsi->rec=record; +} -/* parses a document i.e. calls ft_parse for every keyseg */ -uint _mi_ft_parse(TREE *parsed, MI_INFO *info, uint keynr, const byte *record) +void _mi_ft_segiterator_dummy_init(const byte *record, uint len, + FT_SEG_ITERATOR *ftsi) { - byte *pos; - uint i; - MI_KEYSEG *keyseg=info->s->keyinfo[keynr].seg; + ftsi->num=1; + ftsi->seg=0; + ftsi->pos=record; + ftsi->len=len; +} - for (i=info->s->keyinfo[keynr].keysegs-FT_SEGS ; i-- ; ) - { - uint len; +/* This function breaks convention "return 0 in success" + but it's easier to use like this - keyseg--; - if (keyseg->null_bit && (record[keyseg->null_pos] & keyseg->null_bit)) - continue; /* NULL field */ - pos= (byte *)record+keyseg->start; - if (keyseg->flag & HA_VAR_LENGTH) - { - len=uint2korr(pos); - pos+=2; /* Skip VARCHAR length */ - set_if_smaller(len,keyseg->length); - } - else if (keyseg->flag & HA_BLOB_PART) - { - len=_mi_calc_blob_length(keyseg->bit_start,pos); - memcpy_fixed(&pos,pos+keyseg->bit_start,sizeof(char*)); - set_if_smaller(len,keyseg->length); - } - else - len=keyseg->length; - if (ft_parse(parsed, pos, len)) + while(_mi_ft_segiterator()) + + so "1" means "OK", "0" means "EOF" +*/ + +uint _mi_ft_segiterator(register FT_SEG_ITERATOR *ftsi) +{ + if(!ftsi->num) + return 0; + if (!ftsi->seg) + return 1; + + ftsi->seg--; ftsi->num--; + if (ftsi->seg->null_bit && + (ftsi->rec[ftsi->seg->null_pos] & ftsi->seg->null_bit)) + { + ftsi->pos=0; return 1; } + ftsi->pos= ftsi->rec+ftsi->seg->start; + if (ftsi->seg->flag & HA_VAR_LENGTH) + { + ftsi->len=uint2korr(ftsi->pos); + ftsi->pos+=2; /* Skip VARCHAR length */ + set_if_smaller(ftsi->len,ftsi->seg->length); + return 1; + } + if (ftsi->seg->flag & HA_BLOB_PART) + { + ftsi->len=_mi_calc_blob_length(ftsi->seg->bit_start,ftsi->pos); + memcpy_fixed(&ftsi->pos,ftsi->pos+ftsi->seg->bit_start,sizeof(char*)); + set_if_smaller(ftsi->len,ftsi->seg->length); + return 1; + } + ftsi->len=ftsi->seg->length; + return 1; +} + +/* parses a document i.e. calls ft_parse for every keyseg */ +uint _mi_ft_parse(TREE *parsed, MI_INFO *info, uint keynr, const byte *record) +{ + FT_SEG_ITERATOR ftsi; + _mi_ft_segiterator_init(info, keynr, record, &ftsi); + + while (_mi_ft_segiterator(&ftsi)) + if (ftsi.pos) + if (ft_parse(parsed, (byte *)ftsi.pos, ftsi.len)) + return 1; + /* Handle the case where all columns are NULL */ if (!is_tree_inited(parsed) && ft_parse(parsed, (byte*) "", 0)) return 1; @@ -118,50 +154,16 @@ static int _mi_ft_erase(MI_INFO *info, uint keynr, byte *keybuf, FT_WORD *wlist, int _mi_ft_cmp(MI_INFO *info, uint keynr, const byte *rec1, const byte *rec2) { - MI_KEYSEG *keyseg; - byte *pos1, *pos2; - uint i; + FT_SEG_ITERATOR ftsi1, ftsi2; + _mi_ft_segiterator_init(info, keynr, rec1, &ftsi1); + _mi_ft_segiterator_init(info, keynr, rec2, &ftsi2); - i=info->s->keyinfo[keynr].keysegs-FT_SEGS; - keyseg=info->s->keyinfo[keynr].seg; - while(i--) + while(_mi_ft_segiterator(&ftsi1) && _mi_ft_segiterator(&ftsi2)) { - uint len1, len2; - LINT_INIT(len1); LINT_INIT(len2); - keyseg--; - if (keyseg->null_bit) - { - if ( (rec1[keyseg->null_pos] ^ rec2[keyseg->null_pos]) - & keyseg->null_bit ) - return THOSE_TWO_DAMN_KEYS_ARE_REALLY_DIFFERENT; - if (rec1[keyseg->null_pos] & keyseg->null_bit ) - continue; /* NULL field */ - } - pos1= (byte *)rec1+keyseg->start; - pos2= (byte *)rec2+keyseg->start; - if (keyseg->flag & HA_VAR_LENGTH) - { - len1=uint2korr(pos1); - pos1+=2; /* Skip VARCHAR length */ - set_if_smaller(len1,keyseg->length); - len2=uint2korr(pos2); - pos2+=2; /* Skip VARCHAR length */ - set_if_smaller(len2,keyseg->length); - } - else if (keyseg->flag & HA_BLOB_PART) - { - len1=_mi_calc_blob_length(keyseg->bit_start,pos1); - memcpy_fixed(&pos1,pos1+keyseg->bit_start,sizeof(char*)); - set_if_smaller(len1,keyseg->length); - len2=_mi_calc_blob_length(keyseg->bit_start,pos2); - memcpy_fixed(&pos2,pos2+keyseg->bit_start,sizeof(char*)); - set_if_smaller(len2,keyseg->length); - } - else /* fixed length key */ - { - len1=len2=keyseg->length; - } - if ((len1 != len2) || memcmp(pos1, pos2, len1)) + if ((ftsi1.pos != ftsi2.pos) && + _mi_compare_text(default_charset_info, + (uchar*) ftsi1.pos,ftsi1.len, + (uchar*) ftsi2.pos,ftsi2.len,0)) return THOSE_TWO_DAMN_KEYS_ARE_REALLY_DIFFERENT; } return GEE_THEY_ARE_ABSOLUTELY_IDENTICAL; diff --git a/myisam/ftdefs.h b/myisam/ftdefs.h index ee35ccbb14a..f8b50cb45b9 100644 --- a/myisam/ftdefs.h +++ b/myisam/ftdefs.h @@ -120,6 +120,16 @@ uint _ft_make_key(MI_INFO *, uint , byte *, FT_WORD *, my_off_t); byte ft_get_word(byte **, byte *, FT_WORD *, FTB_PARAM *); byte ft_simple_get_word(byte **, byte *, FT_WORD *); +typedef struct _st_ft_seg_iterator { + uint num, len; + MI_KEYSEG *seg; + const byte *rec, *pos; +} FT_SEG_ITERATOR; + +void _mi_ft_segiterator_init(MI_INFO *, uint, const byte *, FT_SEG_ITERATOR *); +void _mi_ft_segiterator_dummy_init(const byte *, uint, FT_SEG_ITERATOR *); +uint _mi_ft_segiterator(FT_SEG_ITERATOR *); + int ft_parse(TREE *, byte *, int); FT_WORD * ft_linearize(/*MI_INFO *, uint, byte *, */TREE *); FT_WORD * _mi_ft_parserecord(MI_INFO *, uint, byte *, const byte *); diff --git a/mysql-test/r/fulltext.result b/mysql-test/r/fulltext.result index 839db356e74..68d3b6ecc62 100644 --- a/mysql-test/r/fulltext.result +++ b/mysql-test/r/fulltext.result @@ -55,7 +55,7 @@ Full-text indexes are called collections 1 Only MyISAM tables support collections 2 Function MATCH ... AGAINST() is used to do a search 0 Full-text search in MySQL implements vector space model 0 -select * from t1 where MATCH a AGAINST ("search" IN BOOLEAN MODE); +select * from t1 where MATCH a AGAINST ("sear*" IN BOOLEAN MODE); a b Full-text search in MySQL implements vector space model delete from t1 where a like "MySQL%"; diff --git a/mysql-test/t/fulltext.test b/mysql-test/t/fulltext.test index ab3fc194891..ce003ee33ad 100644 --- a/mysql-test/t/fulltext.test +++ b/mysql-test/t/fulltext.test @@ -31,7 +31,7 @@ select *, MATCH(a,b) AGAINST("support collections" IN BOOLEAN MODE) as x from t # boolean w/o index: -select * from t1 where MATCH a AGAINST ("search" IN BOOLEAN MODE); +select * from t1 where MATCH a AGAINST ("sear*" IN BOOLEAN MODE); #update/delete with fulltext index diff --git a/mysys/queues.c b/mysys/queues.c index 087df48eb72..50ef3944a3f 100644 --- a/mysys/queues.c +++ b/mysys/queues.c @@ -173,7 +173,11 @@ static int queue_fix_cmp(QUEUE *queue, void **a, void **b) (char*) (*b)+queue->offset_to_key); } - /* Fix heap when every element was changed */ +/* Fix heap when every element was changed + actually, it can be done in linear time, + not in n*log(n), but some code (myisam/ft_boolean_search.c) + requires a strict order here, not just a queue property +*/ void queue_fix(QUEUE *queue) { qsort2(queue->root+1,queue->elements, sizeof(void *), |