diff options
author | Alexander Barkov <bar@mariadb.com> | 2019-04-21 12:07:30 +0400 |
---|---|---|
committer | Alexander Barkov <bar@mariadb.com> | 2019-04-21 12:07:30 +0400 |
commit | 765ae6e82165d1bc4cf6cc9f0d556d66a5e172d1 (patch) | |
tree | 79056428f6d0c7d262f042ff3fd9b1f28f464ef6 /sql | |
parent | f4019f5b3544a18f3ddf32df2c5214c3f8dabdce (diff) | |
download | mariadb-git-765ae6e82165d1bc4cf6cc9f0d556d66a5e172d1.tar.gz |
MDEV-19239 ERROR 1300 (HY000): Invalid utf8 character string in 10.3.13-MariaDB
A sequence of <digits>e<mbhead><mbtail>, e.g.:
SELECT 123eXYzzz FROM t1;
was not scanned correctly (where XY is a multi-byte character).
The multi-byte head byte X was appended to 123e separately from
the multi-byte tail byte Y, so a pointer to "Yzzz" was passed
into scan_ident_start(), which failed on a bad multi-byte sequence.
After this change, scan_ident_start() gets a pointer to "XYzzz",
so it correctly sees the whole multi-byte character.
Diffstat (limited to 'sql')
-rw-r--r-- | sql/sql_lex.cc | 20 |
1 files changed, 19 insertions, 1 deletions
diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index c52005e7683..b5ff060ecc6 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -1587,9 +1587,27 @@ int Lex_input_stream::lex_one_token(YYSTYPE *yylval, THD *thd) return(FLOAT_NUM); } } + /* + We've found: + - A sequence of digits + - Followed by 'e' or 'E' + - Followed by some byte XX which is not a known mantissa start, + and it's known to be a valid identifier part. + XX can be either a 8bit identifier character, or a multi-byte head. + */ yyUnget(); + return scan_ident_start(thd, &yylval->ident_cli); } - // fall through + /* + We've found: + - A sequence of digits + - Followed by some character XX, which is neither 'e' nor 'E', + and it's known to be a valid identifier part. + XX can be a 8bit identifier character, or a multi-byte head. + */ + yyUnget(); + return scan_ident_start(thd, &yylval->ident_cli); + case MY_LEX_IDENT_START: // We come here after '.' return scan_ident_start(thd, &yylval->ident_cli); |