diff options
author | unknown <svoj@mysql.com/june.mysql.com> | 2007-07-05 15:17:06 +0500 |
---|---|---|
committer | unknown <svoj@mysql.com/june.mysql.com> | 2007-07-05 15:17:06 +0500 |
commit | 3bd52b0b7b4cddc973b74af64c097033cb75e149 (patch) | |
tree | 8b54b83ae7e0a63dac8e6950d3eb250c5680d3ae /storage/myisam/ft_parser.c | |
parent | 435df1859fc503c884cdec9eaceb7f5f102af70d (diff) | |
download | mariadb-git-3bd52b0b7b4cddc973b74af64c097033cb75e149.tar.gz |
BUG#29464 - load data infile into table with big5 chinese fulltext index
hangs 100% cpu
Fulltext parser may fall into infinite loop when it gets illegal
multibyte sequence (or a sequence that doesn't have mapping to unicode).
Affects 5.1 only.
mysql-test/r/fulltext.result:
A test case for BUG#29464.
mysql-test/t/fulltext.test:
A test case for BUG#29464.
storage/myisam/ft_parser.c:
ctype() may return negative value, which was stored in unsigned
variable.
Also ctype() may return negative length for correct multibyte
sequence that doesn't have a mapping to unicode. These characters
are skipped correctly with this patch.
Diffstat (limited to 'storage/myisam/ft_parser.c')
-rw-r--r-- | storage/myisam/ft_parser.c | 16 |
1 files changed, 10 insertions, 6 deletions
diff --git a/storage/myisam/ft_parser.c b/storage/myisam/ft_parser.c index ba858c37aee..befe2bab066 100644 --- a/storage/myisam/ft_parser.c +++ b/storage/myisam/ft_parser.c @@ -111,7 +111,8 @@ uchar ft_get_word(CHARSET_INFO *cs, uchar **start, uchar *end, { uchar *doc=*start; int ctype; - uint mwc, length, mbl; + uint mwc, length; + int mbl; param->yesno=(FTB_YES==' ') ? 1 : (param->quot != 0); param->weight_adjust= param->wasign= 0; @@ -119,7 +120,7 @@ uchar ft_get_word(CHARSET_INFO *cs, uchar **start, uchar *end, while (doc<end) { - for (; doc < end; doc+= (mbl > 0 ? mbl : 1)) + for (; doc < end; doc+= (mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1))) { mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end); if (true_word_char(ctype, *doc)) @@ -157,7 +158,8 @@ uchar ft_get_word(CHARSET_INFO *cs, uchar **start, uchar *end, } mwc=length=0; - for (word->pos= doc; doc < end; length++, doc+= (mbl > 0 ? mbl : 1)) + for (word->pos= doc; doc < end; length++, + doc+= (mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1))) { mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end); if (true_word_char(ctype, *doc)) @@ -200,13 +202,14 @@ uchar ft_simple_get_word(CHARSET_INFO *cs, uchar **start, const uchar *end, FT_WORD *word, my_bool skip_stopwords) { uchar *doc= *start; - uint mwc, length, mbl; + uint mwc, length; + int mbl; int ctype; DBUG_ENTER("ft_simple_get_word"); do { - for (;; doc+= (mbl > 0 ? mbl : 1)) + for (;; doc+= (mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1))) { if (doc >= end) DBUG_RETURN(0); @@ -216,7 +219,8 @@ uchar ft_simple_get_word(CHARSET_INFO *cs, uchar **start, const uchar *end, } mwc= length= 0; - for (word->pos= doc; doc < end; length++, doc+= (mbl > 0 ? mbl : 1)) + for (word->pos= doc; doc < end; length++, + doc+= (mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1))) { mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end); if (true_word_char(ctype, *doc)) |