summaryrefslogtreecommitdiff
path: root/sql/sql_lex.h
diff options
context:
space:
mode:
Diffstat (limited to 'sql/sql_lex.h')
-rw-r--r--sql/sql_lex.h334
1 files changed, 314 insertions, 20 deletions
diff --git a/sql/sql_lex.h b/sql/sql_lex.h
index 2cecf282e7f..7057bf4b5ef 100644
--- a/sql/sql_lex.h
+++ b/sql/sql_lex.h
@@ -1049,8 +1049,40 @@ struct st_parsing_options
/**
+ The state of the lexical parser, when parsing comments.
+*/
+enum enum_comment_state
+{
+ /**
+ Not parsing comments.
+ */
+ NO_COMMENT,
+ /**
+ Parsing comments that need to be preserved.
+ Typically, these are user comments '/' '*' ... '*' '/'.
+ */
+ PRESERVE_COMMENT,
+ /**
+ Parsing comments that need to be discarded.
+ Typically, these are special comments '/' '*' '!' ... '*' '/',
+ or '/' '*' '!' 'M' 'M' 'm' 'm' 'm' ... '*' '/', where the comment
+ markers should not be expanded.
+ */
+ DISCARD_COMMENT
+};
+
+
+/**
This class represents the character input stream consumed during
lexical analysis.
+ In addition to consuming the input stream, this class performs some
+ comment pre processing, by filtering out out of bound special text
+ from the query input stream.
+ Two buffers, with pointers inside each buffers, are maintained in
+ parallel. The 'raw' buffer is the original query text, which may
+ contain out-of-bound comments. The 'cpp' (for comments pre processor)
+ is the pre-processed buffer that contains only the query text that
+ should be seen once out-of-bound data is removed.
*/
class Lex_input_stream
{
@@ -1058,6 +1090,218 @@ public:
Lex_input_stream(THD *thd, const char* buff, unsigned int length);
~Lex_input_stream();
+ /**
+ Set the echo mode.
+ When echo is true, characters parsed from the raw input stream are
+ preserved. When false, characters parsed are silently ignored.
+ @param echo the echo mode.
+ */
+ void set_echo(bool echo)
+ {
+ m_echo= echo;
+ }
+
+ /**
+ Skip binary from the input stream.
+ @param n number of bytes to accept.
+ */
+ void skip_binary(int n)
+ {
+ if (m_echo)
+ {
+ memcpy(m_cpp_ptr, m_ptr, n);
+ m_cpp_ptr += n;
+ }
+ m_ptr += n;
+ }
+
+ /**
+ Get a character, and advance in the stream.
+ @return the next character to parse.
+ */
+ char yyGet()
+ {
+ char c= *m_ptr++;
+ if (m_echo)
+ *m_cpp_ptr++ = c;
+ return c;
+ }
+
+ /**
+ Get the last character accepted.
+ @return the last character accepted.
+ */
+ char yyGetLast()
+ {
+ return m_ptr[-1];
+ }
+
+ /**
+ Look at the next character to parse, but do not accept it.
+ */
+ char yyPeek()
+ {
+ return m_ptr[0];
+ }
+
+ /**
+ Look ahead at some character to parse.
+ @param n offset of the character to look up
+ */
+ char yyPeekn(int n)
+ {
+ return m_ptr[n];
+ }
+
+ /**
+ Cancel the effect of the last yyGet() or yySkip().
+ Note that the echo mode should not change between calls to yyGet / yySkip
+ and yyUnget. The caller is responsible for ensuring that.
+ */
+ void yyUnget()
+ {
+ m_ptr--;
+ if (m_echo)
+ m_cpp_ptr--;
+ }
+
+ /**
+ Accept a character, by advancing the input stream.
+ */
+ void yySkip()
+ {
+ if (m_echo)
+ *m_cpp_ptr++ = *m_ptr++;
+ else
+ m_ptr++;
+ }
+
+ /**
+ Accept multiple characters at once.
+ @param n the number of characters to accept.
+ */
+ void yySkipn(int n)
+ {
+ if (m_echo)
+ {
+ memcpy(m_cpp_ptr, m_ptr, n);
+ m_cpp_ptr += n;
+ }
+ m_ptr += n;
+ }
+
+ /**
+ End of file indicator for the query text to parse.
+ @return true if there are no more characters to parse
+ */
+ bool eof()
+ {
+ return (m_ptr >= m_end_of_query);
+ }
+
+ /**
+ End of file indicator for the query text to parse.
+ @param n number of characters expected
+ @return true if there are less than n characters to parse
+ */
+ bool eof(int n)
+ {
+ return ((m_ptr + n) >= m_end_of_query);
+ }
+
+ /** Get the raw query buffer. */
+ const char* get_buf()
+ {
+ return m_buf;
+ }
+
+ /** Get the pre-processed query buffer. */
+ const char* get_cpp_buf()
+ {
+ return m_cpp_buf;
+ }
+
+ /** Get the end of the raw query buffer. */
+ const char* get_end_of_query()
+ {
+ return m_end_of_query;
+ }
+
+ /** Mark the stream position as the start of a new token. */
+ void start_token()
+ {
+ m_tok_start_prev= m_tok_start;
+ m_tok_start= m_ptr;
+ m_tok_end= m_ptr;
+
+ m_cpp_tok_start_prev= m_cpp_tok_start;
+ m_cpp_tok_start= m_cpp_ptr;
+ m_cpp_tok_end= m_cpp_ptr;
+ }
+
+ /**
+ Adjust the starting position of the current token.
+ This is used to compensate for starting whitespace.
+ */
+ void restart_token()
+ {
+ m_tok_start= m_ptr;
+ m_cpp_tok_start= m_cpp_ptr;
+ }
+
+ /** Get the token start position, in the raw buffer. */
+ const char* get_tok_start()
+ {
+ return m_tok_start;
+ }
+
+ /** Get the token start position, in the pre-processed buffer. */
+ const char* get_cpp_tok_start()
+ {
+ return m_cpp_tok_start;
+ }
+
+ /** Get the token end position, in the raw buffer. */
+ const char* get_tok_end()
+ {
+ return m_tok_end;
+ }
+
+ /** Get the token end position, in the pre-processed buffer. */
+ const char* get_cpp_tok_end()
+ {
+ return m_cpp_tok_end;
+ }
+
+ /** Get the previous token start position, in the raw buffer. */
+ const char* get_tok_start_prev()
+ {
+ return m_tok_start_prev;
+ }
+
+ /** Get the current stream pointer, in the raw buffer. */
+ const char* get_ptr()
+ {
+ return m_ptr;
+ }
+
+ /** Get the current stream pointer, in the pre-processed buffer. */
+ const char* get_cpp_ptr()
+ {
+ return m_cpp_ptr;
+ }
+
+ /** Get the length of the current token, in the raw buffer. */
+ uint yyLength()
+ {
+ /*
+ The assumption is that the lexical analyser is always 1 character ahead,
+ which the -1 account for.
+ */
+ DBUG_ASSERT(m_ptr > m_tok_start);
+ return (uint) ((m_ptr - m_tok_start) - 1);
+ }
+
/** Current thread. */
THD *m_thd;
@@ -1070,37 +1314,74 @@ public:
/** Interface with bison, value of the last token parsed. */
LEX_YYSTYPE yylval;
- /** Pointer to the current position in the input stream. */
- const char* ptr;
+private:
+ /** Pointer to the current position in the raw input stream. */
+ const char* m_ptr;
+
+ /** Starting position of the last token parsed, in the raw buffer. */
+ const char* m_tok_start;
- /** Starting position of the last token parsed. */
- const char* tok_start;
+ /** Ending position of the previous token parsed, in the raw buffer. */
+ const char* m_tok_end;
- /** Ending position of the last token parsed. */
- const char* tok_end;
+ /** End of the query text in the input stream, in the raw buffer. */
+ const char* m_end_of_query;
- /** End of the query text in the input stream. */
- const char* end_of_query;
+ /** Starting position of the previous token parsed, in the raw buffer. */
+ const char* m_tok_start_prev;
- /** Starting position of the previous token parsed. */
- const char* tok_start_prev;
+ /** Begining of the query text in the input stream, in the raw buffer. */
+ const char* m_buf;
- /** Begining of the query text in the input stream. */
- const char* buf;
+ /** Echo the parsed stream to the pre-processed buffer. */
+ bool m_echo;
+
+ /** Pre-processed buffer. */
+ char* m_cpp_buf;
+
+ /** Pointer to the current position in the pre-processed input stream. */
+ char* m_cpp_ptr;
+
+ /**
+ Starting position of the last token parsed,
+ in the pre-processed buffer.
+ */
+ const char* m_cpp_tok_start;
+
+ /**
+ Starting position of the previous token parsed,
+ in the pre-procedded buffer.
+ */
+ const char* m_cpp_tok_start_prev;
+
+ /**
+ Ending position of the previous token parsed,
+ in the pre-processed buffer.
+ */
+ const char* m_cpp_tok_end;
+
+public:
/** Current state of the lexical analyser. */
enum my_lex_states next_state;
- /** Position of ';' in the stream, to delimit multiple queries. */
+ /**
+ Position of ';' in the stream, to delimit multiple queries.
+ This delimiter is in the raw buffer.
+ */
const char* found_semicolon;
/** SQL_MODE = IGNORE_SPACE. */
bool ignore_space;
- /*
+
+ /**
TRUE if we're parsing a prepared statement: in this mode
we should allow placeholders and disallow multi-statements.
*/
bool stmt_prepare_mode;
+
+ /** State of the lexical analyser for comments. */
+ enum_comment_state in_comment;
};
@@ -1138,8 +1419,17 @@ typedef struct st_lex : public Query_tables_list
CHARSET_INFO *charset, *underscore_charset;
/* store original leaf_tables for INSERT SELECT and PS/SP */
TABLE_LIST *leaf_tables_insert;
- /* Position (first character index) of SELECT of CREATE VIEW statement */
- uint create_view_select_start;
+
+ /** Start of SELECT of CREATE VIEW statement */
+ const char* create_view_select_start;
+ /** End of SELECT of CREATE VIEW statement */
+ const char* create_view_select_end;
+
+ /** Start of 'ON <table>', in trigger statements. */
+ const char* raw_trg_on_table_name_begin;
+ /** End of 'ON <table>', in trigger statements. */
+ const char* raw_trg_on_table_name_end;
+
/* Partition info structure filled in by PARTITION BY parse part */
partition_info *part_info;
@@ -1238,7 +1528,9 @@ typedef struct st_lex : public Query_tables_list
uint8 create_view_algorithm;
uint8 create_view_check;
bool drop_if_exists, drop_temporary, local_file, one_shot_set;
- bool in_comment, verbose, no_write_to_binlog;
+
+ bool verbose, no_write_to_binlog;
+
bool tx_chain, tx_release;
/*
Special JOIN::prepare mode: changing of query is prohibited.
@@ -1302,10 +1594,12 @@ typedef struct st_lex : public Query_tables_list
- CREATE FUNCTION (points to "FUNCTION" or "AGGREGATE");
This pointer is required to add possibly omitted DEFINER-clause to the
- DDL-statement before dumping it to the binlog.
+ DDL-statement before dumping it to the binlog.
*/
const char *stmt_definition_begin;
+ const char *stmt_definition_end;
+
/*
Pointers to part of LOAD DATA statement that should be rewritten
during replication ("LOCAL 'filename' REPLACE INTO" part).
@@ -1434,8 +1728,8 @@ extern void lex_free(void);
extern void lex_start(THD *thd);
extern void lex_end(LEX *lex);
extern int MYSQLlex(void *arg, void *yythd);
-extern const char *skip_rear_comments(CHARSET_INFO *cs, const char *ubegin,
- const char *uend);
+
+extern void trim_whitespace(CHARSET_INFO *cs, LEX_STRING *str);
extern bool is_lex_native_function(const LEX_STRING *name);