summaryrefslogtreecommitdiff
path: root/sql/sql_lex.cc
diff options
context:
space:
mode:
Diffstat (limited to 'sql/sql_lex.cc')
-rw-r--r--sql/sql_lex.cc270
1 files changed, 242 insertions, 28 deletions
diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc
index f0bc582985b..e93ba7fc10f 100644
--- a/sql/sql_lex.cc
+++ b/sql/sql_lex.cc
@@ -1,5 +1,5 @@
/* Copyright (c) 2000, 2014, Oracle and/or its affiliates.
- Copyright (c) 2009, 2015, MariaDB
+ Copyright (c) 2009, 2016, MariaDB
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -324,9 +324,7 @@ void Lex_input_stream::body_utf8_start(THD *thd, const char *begin_ptr)
DBUG_ASSERT(begin_ptr);
DBUG_ASSERT(m_cpp_buf <= begin_ptr && begin_ptr <= m_cpp_buf + m_buf_length);
- uint body_utf8_length=
- (m_buf_length / thd->variables.character_set_client->mbminlen) *
- my_charset_utf8_bin.mbmaxlen;
+ uint body_utf8_length= get_body_utf8_maximum_length(thd);
m_body_utf8= (char *) thd->alloc(body_utf8_length + 1);
m_body_utf8_ptr= m_body_utf8;
@@ -335,6 +333,22 @@ void Lex_input_stream::body_utf8_start(THD *thd, const char *begin_ptr)
m_cpp_utf8_processed_ptr= begin_ptr;
}
+
+uint Lex_input_stream::get_body_utf8_maximum_length(THD *thd)
+{
+ /*
+ String literals can grow during escaping:
+ 1a. Character string '<TAB>' can grow to '\t', 3 bytes to 4 bytes growth.
+ 1b. Character string '1000 times <TAB>' grows from
+ 1002 to 2002 bytes (including quotes), which gives a little bit
+ less than 2 times growth.
+ "2" should be a reasonable multiplier that safely covers escaping needs.
+ */
+ return (m_buf_length / thd->variables.character_set_client->mbminlen) *
+ my_charset_utf8_bin.mbmaxlen * 2/*for escaping*/;
+}
+
+
/**
@brief The operation appends unprocessed part of pre-processed buffer till
the given pointer (ptr) and sets m_cpp_utf8_processed_ptr to end_ptr.
@@ -402,15 +416,15 @@ void Lex_input_stream::body_utf8_append(const char *ptr)
operation.
*/
-void Lex_input_stream::body_utf8_append_literal(THD *thd,
- const LEX_STRING *txt,
- CHARSET_INFO *txt_cs,
- const char *end_ptr)
+void Lex_input_stream::body_utf8_append_ident(THD *thd,
+ const LEX_STRING *txt,
+ const char *end_ptr)
{
if (!m_cpp_utf8_processed_ptr)
return;
LEX_STRING utf_txt;
+ CHARSET_INFO *txt_cs= thd->charset();
if (!my_charset_same(txt_cs, &my_charset_utf8_general_ci))
{
@@ -434,6 +448,189 @@ void Lex_input_stream::body_utf8_append_literal(THD *thd,
m_cpp_utf8_processed_ptr= end_ptr;
}
+
+
+
+extern "C" {
+
+/**
+ Escape a character. Consequently puts "escape" and "wc" characters into
+ the destination utf8 string.
+ @param cs - the character set (utf8)
+ @param escape - the escape character (backslash, single quote, double quote)
+ @param wc - the character to be escaped
+ @param str - the destination string
+ @param end - the end of the destination string
+ @returns - a code according to the wc_mb() convension.
+*/
+int my_wc_mb_utf8_with_escape(CHARSET_INFO *cs, my_wc_t escape, my_wc_t wc,
+ uchar *str, uchar *end)
+{
+ DBUG_ASSERT(escape > 0);
+ if (str + 1 >= end)
+ return MY_CS_TOOSMALL2; // Not enough space, need at least two bytes.
+ *str= escape;
+ int cnvres= my_charset_utf8_handler.wc_mb(cs, wc, str + 1, end);
+ if (cnvres > 0)
+ return cnvres + 1; // The character was normally put
+ if (cnvres == MY_CS_ILUNI)
+ return MY_CS_ILUNI; // Could not encode "wc" (e.g. non-BMP character)
+ DBUG_ASSERT(cnvres <= MY_CS_TOOSMALL);
+ return cnvres - 1; // Not enough space
+}
+
+
+/**
+ Optionally escape a character.
+ If "escape" is non-zero, then both "escape" and "wc" are put to
+ the destination string. Otherwise, only "wc" is put.
+ @param cs - the character set (utf8)
+ @param wc - the character to be optionally escaped
+ @param escape - the escape character, or 0
+ @param ewc - the escaped replacement of "wc" (e.g. 't' for '\t')
+ @param str - the destination string
+ @param end - the end of the destination string
+ @returns - a code according to the wc_mb() conversion.
+*/
+int my_wc_mb_utf8_opt_escape(CHARSET_INFO *cs,
+ my_wc_t wc, my_wc_t escape, my_wc_t ewc,
+ uchar *str, uchar *end)
+{
+ return escape ? my_wc_mb_utf8_with_escape(cs, escape, ewc, str, end) :
+ my_charset_utf8_handler.wc_mb(cs, wc, str, end);
+}
+
+/**
+ Encode a character with optional backlash escaping and quote escaping.
+ Quote marks are escaped using another quote mark.
+ Additionally, if "escape" is non-zero, then special characters are
+ also escaped using "escape".
+ Otherwise (if "escape" is zero, e.g. in case of MODE_NO_BACKSLASH_ESCAPES),
+ then special characters are not escaped and handled as normal characters.
+
+ @param cs - the character set (utf8)
+ @param wc - the character to be encoded
+ @param str - the destination string
+ @param end - the end of the destination string
+ @param sep - the string delimiter (e.g. ' or ")
+ @param escape - the escape character (backslash, or 0)
+ @returns - a code according to the wc_mb() convension.
+*/
+int my_wc_mb_utf8_escape(CHARSET_INFO *cs, my_wc_t wc, uchar *str, uchar *end,
+ my_wc_t sep, my_wc_t escape)
+{
+ DBUG_ASSERT(escape == 0 || escape == '\\');
+ DBUG_ASSERT(sep == '"' || sep == '\'');
+ switch (wc) {
+ case 0: return my_wc_mb_utf8_opt_escape(cs, wc, escape, '0', str, end);
+ case '\t': return my_wc_mb_utf8_opt_escape(cs, wc, escape, 't', str, end);
+ case '\r': return my_wc_mb_utf8_opt_escape(cs, wc, escape, 'r', str, end);
+ case '\n': return my_wc_mb_utf8_opt_escape(cs, wc, escape, 'n', str, end);
+ case '\032': return my_wc_mb_utf8_opt_escape(cs, wc, escape, 'Z', str, end);
+ case '\'':
+ case '\"':
+ if (wc == sep)
+ return my_wc_mb_utf8_with_escape(cs, wc, wc, str, end);
+ }
+ return my_charset_utf8_handler.wc_mb(cs, wc, str, end); // No escaping needed
+}
+
+
+/** wc_mb() compatible routines for all sql_mode and delimiter combinations */
+int my_wc_mb_utf8_escape_single_quote_and_backslash(CHARSET_INFO *cs,
+ my_wc_t wc,
+ uchar *str, uchar *end)
+{
+ return my_wc_mb_utf8_escape(cs, wc, str, end, '\'', '\\');
+}
+
+
+int my_wc_mb_utf8_escape_double_quote_and_backslash(CHARSET_INFO *cs,
+ my_wc_t wc,
+ uchar *str, uchar *end)
+{
+ return my_wc_mb_utf8_escape(cs, wc, str, end, '"', '\\');
+}
+
+
+int my_wc_mb_utf8_escape_single_quote(CHARSET_INFO *cs, my_wc_t wc,
+ uchar *str, uchar *end)
+{
+ return my_wc_mb_utf8_escape(cs, wc, str, end, '\'', 0);
+}
+
+
+int my_wc_mb_utf8_escape_double_quote(CHARSET_INFO *cs, my_wc_t wc,
+ uchar *str, uchar *end)
+{
+ return my_wc_mb_utf8_escape(cs, wc, str, end, '"', 0);
+}
+
+}; // End of extern "C"
+
+
+/**
+ Get an escaping function, depending on the current sql_mode and the
+ string separator.
+*/
+my_charset_conv_wc_mb
+Lex_input_stream::get_escape_func(THD *thd, my_wc_t sep) const
+{
+ return thd->backslash_escapes() ?
+ (sep == '"' ? my_wc_mb_utf8_escape_double_quote_and_backslash:
+ my_wc_mb_utf8_escape_single_quote_and_backslash) :
+ (sep == '"' ? my_wc_mb_utf8_escape_double_quote:
+ my_wc_mb_utf8_escape_single_quote);
+}
+
+
+/**
+ Append a text literal to the end of m_body_utf8.
+ The string is escaped according to the current sql_mode and the
+ string delimiter (e.g. ' or ").
+
+ @param thd - current THD
+ @param txt - the string to be appended to m_body_utf8.
+ Note, the string must be already unescaped.
+ @param cs - the character set of the string
+ @param end_ptr - m_cpp_utf8_processed_ptr will be set to this value
+ (see body_utf8_append_ident for details)
+ @param sep - the string delimiter (single or double quote)
+*/
+void Lex_input_stream::body_utf8_append_escape(THD *thd,
+ const LEX_STRING *txt,
+ CHARSET_INFO *cs,
+ const char *end_ptr,
+ my_wc_t sep)
+{
+ DBUG_ASSERT(sep == '\'' || sep == '"');
+ if (!m_cpp_utf8_processed_ptr)
+ return;
+ uint errors;
+ /**
+ We previously alloced m_body_utf8 to be able to store the query with all
+ strings properly escaped. See get_body_utf8_maximum_length().
+ So here we have guaranteedly enough space to append any string literal
+ with escaping. Passing txt->length*2 as "available space" is always safe.
+ For better safety purposes we could calculate get_body_utf8_maximum_length()
+ every time we append a string, but this would affect performance negatively,
+ so let's check that we don't get beyond the allocated buffer in
+ debug build only.
+ */
+ DBUG_ASSERT(m_body_utf8 + get_body_utf8_maximum_length(thd) >=
+ m_body_utf8_ptr + txt->length * 2);
+ uint32 cnv_length= my_convert_using_func(m_body_utf8_ptr, txt->length * 2,
+ &my_charset_utf8_general_ci,
+ get_escape_func(thd, sep),
+ txt->str, txt->length,
+ cs, cs->cset->mb_wc,
+ &errors);
+ m_body_utf8_ptr+= cnv_length;
+ *m_body_utf8_ptr= 0;
+ m_cpp_utf8_processed_ptr= end_ptr;
+}
+
+
void Lex_input_stream::add_digest_token(uint token, LEX_YYSTYPE yylval)
{
if (m_digest != NULL)
@@ -796,14 +993,14 @@ Lex_input_stream::unescape(CHARSET_INFO *cs, char *to,
Fix sometimes to do only one scan of the string
*/
-bool Lex_input_stream::get_text(LEX_STRING *dst, int pre_skip, int post_skip)
+bool Lex_input_stream::get_text(LEX_STRING *dst, uint sep,
+ int pre_skip, int post_skip)
{
- reg1 uchar c,sep;
+ reg1 uchar c;
uint found_escape=0;
CHARSET_INFO *cs= m_thd->charset();
tok_bitmap= 0;
- sep= yyGetLast(); // String should end with this
while (! eof())
{
c= yyGet();
@@ -1168,6 +1365,8 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd)
return((int) c);
case MY_LEX_IDENT_OR_NCHAR:
+ {
+ uint sep;
if (lip->yyPeek() != '\'')
{
state= MY_LEX_IDENT;
@@ -1175,14 +1374,20 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd)
}
/* Found N'string' */
lip->yySkip(); // Skip '
- if (lip->get_text(&yylval->lex_str, 2, 1))
+ if (lip->get_text(&yylval->lex_str, (sep= lip->yyGetLast()), 2, 1))
{
state= MY_LEX_CHAR; // Read char by char
break;
}
+
+ lip->body_utf8_append(lip->m_cpp_text_start);
+ lip->body_utf8_append_escape(thd, &yylval->lex_str,
+ national_charset_info,
+ lip->m_cpp_text_end, sep);
+
lex->text_string_is_7bit= (lip->tok_bitmap & 0x80) ? 0 : 1;
return(NCHAR_STRING);
-
+ }
case MY_LEX_IDENT_OR_HEX:
if (lip->yyPeek() == '\'')
{ // Found x'hex-number'
@@ -1285,8 +1490,7 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd)
lip->body_utf8_append(lip->m_cpp_text_start);
- lip->body_utf8_append_literal(thd, &yylval->lex_str, cs,
- lip->m_cpp_text_end);
+ lip->body_utf8_append_ident(thd, &yylval->lex_str, lip->m_cpp_text_end);
return(result_state); // IDENT or IDENT_QUOTED
@@ -1390,8 +1594,7 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd)
lip->body_utf8_append(lip->m_cpp_text_start);
- lip->body_utf8_append_literal(thd, &yylval->lex_str, cs,
- lip->m_cpp_text_end);
+ lip->body_utf8_append_ident(thd, &yylval->lex_str, lip->m_cpp_text_end);
return(result_state);
@@ -1434,8 +1637,7 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd)
lip->body_utf8_append(lip->m_cpp_text_start);
- lip->body_utf8_append_literal(thd, &yylval->lex_str, cs,
- lip->m_cpp_text_end);
+ lip->body_utf8_append_ident(thd, &yylval->lex_str, lip->m_cpp_text_end);
return(IDENT_QUOTED);
}
@@ -1540,23 +1742,23 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd)
}
/* " used for strings */
case MY_LEX_STRING: // Incomplete text string
- if (lip->get_text(&yylval->lex_str, 1, 1))
+ {
+ uint sep;
+ if (lip->get_text(&yylval->lex_str, (sep= lip->yyGetLast()), 1, 1))
{
state= MY_LEX_CHAR; // Read char by char
break;
}
-
+ CHARSET_INFO *strcs= lip->m_underscore_cs ? lip->m_underscore_cs : cs;
lip->body_utf8_append(lip->m_cpp_text_start);
- lip->body_utf8_append_literal(thd, &yylval->lex_str,
- lip->m_underscore_cs ? lip->m_underscore_cs : cs,
- lip->m_cpp_text_end);
-
+ lip->body_utf8_append_escape(thd, &yylval->lex_str, strcs,
+ lip->m_cpp_text_end, sep);
lip->m_underscore_cs= NULL;
lex->text_string_is_7bit= (lip->tok_bitmap & 0x80) ? 0 : 1;
return(TEXT_STRING);
-
+ }
case MY_LEX_COMMENT: // Comment
lex->select_lex.options|= OPTION_FOUND_COMMENT;
while ((c = lip->yyGet()) != '\n' && c) ;
@@ -1805,8 +2007,7 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd)
lip->body_utf8_append(lip->m_cpp_text_start);
- lip->body_utf8_append_literal(thd, &yylval->lex_str, cs,
- lip->m_cpp_text_end);
+ lip->body_utf8_append_ident(thd, &yylval->lex_str, lip->m_cpp_text_end);
return(result_state);
}
@@ -3915,6 +4116,19 @@ void SELECT_LEX::update_used_tables()
tl->on_expr->update_used_tables();
tl->on_expr->walk(&Item::eval_not_null_tables, 0, NULL);
}
+ /*
+ - There is no need to check sj_on_expr, because merged semi-joins inject
+ sj_on_expr into the parent's WHERE clase.
+ - For non-merged semi-joins (aka JTBMs), we need to check their
+ left_expr. There is no need to check the rest of the subselect, we know
+ it is uncorrelated and so cannot refer to any tables in this select.
+ */
+ if (tl->jtbm_subselect)
+ {
+ Item *left_expr= tl->jtbm_subselect->left_expr;
+ left_expr->walk(&Item::update_table_bitmaps_processor, FALSE, NULL);
+ }
+
embedding= tl->embedding;
while (embedding)
{