summaryrefslogtreecommitdiff
path: root/storage/myisam/ft_parser.c
diff options
context:
space:
mode:
authorunknown <svoj@mysql.com/june.mysql.com>2007-07-05 15:17:06 +0500
committerunknown <svoj@mysql.com/june.mysql.com>2007-07-05 15:17:06 +0500
commit3bd52b0b7b4cddc973b74af64c097033cb75e149 (patch)
tree8b54b83ae7e0a63dac8e6950d3eb250c5680d3ae /storage/myisam/ft_parser.c
parent435df1859fc503c884cdec9eaceb7f5f102af70d (diff)
downloadmariadb-git-3bd52b0b7b4cddc973b74af64c097033cb75e149.tar.gz
BUG#29464 - load data infile into table with big5 chinese fulltext index
hangs 100% cpu Fulltext parser may fall into infinite loop when it gets illegal multibyte sequence (or a sequence that doesn't have mapping to unicode). Affects 5.1 only. mysql-test/r/fulltext.result: A test case for BUG#29464. mysql-test/t/fulltext.test: A test case for BUG#29464. storage/myisam/ft_parser.c: ctype() may return negative value, which was stored in unsigned variable. Also ctype() may return negative length for correct multibyte sequence that doesn't have a mapping to unicode. These characters are skipped correctly with this patch.
Diffstat (limited to 'storage/myisam/ft_parser.c')
-rw-r--r--storage/myisam/ft_parser.c16
1 files changed, 10 insertions, 6 deletions
diff --git a/storage/myisam/ft_parser.c b/storage/myisam/ft_parser.c
index ba858c37aee..befe2bab066 100644
--- a/storage/myisam/ft_parser.c
+++ b/storage/myisam/ft_parser.c
@@ -111,7 +111,8 @@ uchar ft_get_word(CHARSET_INFO *cs, uchar **start, uchar *end,
{
uchar *doc=*start;
int ctype;
- uint mwc, length, mbl;
+ uint mwc, length;
+ int mbl;
param->yesno=(FTB_YES==' ') ? 1 : (param->quot != 0);
param->weight_adjust= param->wasign= 0;
@@ -119,7 +120,7 @@ uchar ft_get_word(CHARSET_INFO *cs, uchar **start, uchar *end,
while (doc<end)
{
- for (; doc < end; doc+= (mbl > 0 ? mbl : 1))
+ for (; doc < end; doc+= (mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1)))
{
mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end);
if (true_word_char(ctype, *doc))
@@ -157,7 +158,8 @@ uchar ft_get_word(CHARSET_INFO *cs, uchar **start, uchar *end,
}
mwc=length=0;
- for (word->pos= doc; doc < end; length++, doc+= (mbl > 0 ? mbl : 1))
+ for (word->pos= doc; doc < end; length++,
+ doc+= (mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1)))
{
mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end);
if (true_word_char(ctype, *doc))
@@ -200,13 +202,14 @@ uchar ft_simple_get_word(CHARSET_INFO *cs, uchar **start, const uchar *end,
FT_WORD *word, my_bool skip_stopwords)
{
uchar *doc= *start;
- uint mwc, length, mbl;
+ uint mwc, length;
+ int mbl;
int ctype;
DBUG_ENTER("ft_simple_get_word");
do
{
- for (;; doc+= (mbl > 0 ? mbl : 1))
+ for (;; doc+= (mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1)))
{
if (doc >= end)
DBUG_RETURN(0);
@@ -216,7 +219,8 @@ uchar ft_simple_get_word(CHARSET_INFO *cs, uchar **start, const uchar *end,
}
mwc= length= 0;
- for (word->pos= doc; doc < end; length++, doc+= (mbl > 0 ? mbl : 1))
+ for (word->pos= doc; doc < end; length++,
+ doc+= (mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1)))
{
mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end);
if (true_word_char(ctype, *doc))