summaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorAlexander Barkov <bar@mariadb.com>2019-04-21 12:07:30 +0400
committerAlexander Barkov <bar@mariadb.com>2019-04-21 12:07:30 +0400
commit765ae6e82165d1bc4cf6cc9f0d556d66a5e172d1 (patch)
tree79056428f6d0c7d262f042ff3fd9b1f28f464ef6 /sql
parentf4019f5b3544a18f3ddf32df2c5214c3f8dabdce (diff)
downloadmariadb-git-765ae6e82165d1bc4cf6cc9f0d556d66a5e172d1.tar.gz
MDEV-19239 ERROR 1300 (HY000): Invalid utf8 character string in 10.3.13-MariaDB
A sequence of <digits>e<mbhead><mbtail>, e.g.: SELECT 123eXYzzz FROM t1; was not scanned correctly (where XY is a multi-byte character). The multi-byte head byte X was appended to 123e separately from the multi-byte tail byte Y, so a pointer to "Yzzz" was passed into scan_ident_start(), which failed on a bad multi-byte sequence. After this change, scan_ident_start() gets a pointer to "XYzzz", so it correctly sees the whole multi-byte character.
Diffstat (limited to 'sql')
-rw-r--r--sql/sql_lex.cc20
1 files changed, 19 insertions, 1 deletions
diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc
index c52005e7683..b5ff060ecc6 100644
--- a/sql/sql_lex.cc
+++ b/sql/sql_lex.cc
@@ -1587,9 +1587,27 @@ int Lex_input_stream::lex_one_token(YYSTYPE *yylval, THD *thd)
return(FLOAT_NUM);
}
}
+ /*
+ We've found:
+ - A sequence of digits
+ - Followed by 'e' or 'E'
+ - Followed by some byte XX which is not a known mantissa start,
+ and it's known to be a valid identifier part.
+ XX can be either a 8bit identifier character, or a multi-byte head.
+ */
yyUnget();
+ return scan_ident_start(thd, &yylval->ident_cli);
}
- // fall through
+ /*
+ We've found:
+ - A sequence of digits
+ - Followed by some character XX, which is neither 'e' nor 'E',
+ and it's known to be a valid identifier part.
+ XX can be a 8bit identifier character, or a multi-byte head.
+ */
+ yyUnget();
+ return scan_ident_start(thd, &yylval->ident_cli);
+
case MY_LEX_IDENT_START: // We come here after '.'
return scan_ident_start(thd, &yylval->ident_cli);