diff options
Diffstat (limited to 'sql/sql_lex.h')
-rw-r--r-- | sql/sql_lex.h | 334 |
1 files changed, 314 insertions, 20 deletions
diff --git a/sql/sql_lex.h b/sql/sql_lex.h index 2cecf282e7f..7057bf4b5ef 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -1049,8 +1049,40 @@ struct st_parsing_options /** + The state of the lexical parser, when parsing comments. +*/ +enum enum_comment_state +{ + /** + Not parsing comments. + */ + NO_COMMENT, + /** + Parsing comments that need to be preserved. + Typically, these are user comments '/' '*' ... '*' '/'. + */ + PRESERVE_COMMENT, + /** + Parsing comments that need to be discarded. + Typically, these are special comments '/' '*' '!' ... '*' '/', + or '/' '*' '!' 'M' 'M' 'm' 'm' 'm' ... '*' '/', where the comment + markers should not be expanded. + */ + DISCARD_COMMENT +}; + + +/** This class represents the character input stream consumed during lexical analysis. + In addition to consuming the input stream, this class performs some + comment pre processing, by filtering out out of bound special text + from the query input stream. + Two buffers, with pointers inside each buffers, are maintained in + parallel. The 'raw' buffer is the original query text, which may + contain out-of-bound comments. The 'cpp' (for comments pre processor) + is the pre-processed buffer that contains only the query text that + should be seen once out-of-bound data is removed. */ class Lex_input_stream { @@ -1058,6 +1090,218 @@ public: Lex_input_stream(THD *thd, const char* buff, unsigned int length); ~Lex_input_stream(); + /** + Set the echo mode. + When echo is true, characters parsed from the raw input stream are + preserved. When false, characters parsed are silently ignored. + @param echo the echo mode. + */ + void set_echo(bool echo) + { + m_echo= echo; + } + + /** + Skip binary from the input stream. + @param n number of bytes to accept. + */ + void skip_binary(int n) + { + if (m_echo) + { + memcpy(m_cpp_ptr, m_ptr, n); + m_cpp_ptr += n; + } + m_ptr += n; + } + + /** + Get a character, and advance in the stream. + @return the next character to parse. + */ + char yyGet() + { + char c= *m_ptr++; + if (m_echo) + *m_cpp_ptr++ = c; + return c; + } + + /** + Get the last character accepted. + @return the last character accepted. + */ + char yyGetLast() + { + return m_ptr[-1]; + } + + /** + Look at the next character to parse, but do not accept it. + */ + char yyPeek() + { + return m_ptr[0]; + } + + /** + Look ahead at some character to parse. + @param n offset of the character to look up + */ + char yyPeekn(int n) + { + return m_ptr[n]; + } + + /** + Cancel the effect of the last yyGet() or yySkip(). + Note that the echo mode should not change between calls to yyGet / yySkip + and yyUnget. The caller is responsible for ensuring that. + */ + void yyUnget() + { + m_ptr--; + if (m_echo) + m_cpp_ptr--; + } + + /** + Accept a character, by advancing the input stream. + */ + void yySkip() + { + if (m_echo) + *m_cpp_ptr++ = *m_ptr++; + else + m_ptr++; + } + + /** + Accept multiple characters at once. + @param n the number of characters to accept. + */ + void yySkipn(int n) + { + if (m_echo) + { + memcpy(m_cpp_ptr, m_ptr, n); + m_cpp_ptr += n; + } + m_ptr += n; + } + + /** + End of file indicator for the query text to parse. + @return true if there are no more characters to parse + */ + bool eof() + { + return (m_ptr >= m_end_of_query); + } + + /** + End of file indicator for the query text to parse. + @param n number of characters expected + @return true if there are less than n characters to parse + */ + bool eof(int n) + { + return ((m_ptr + n) >= m_end_of_query); + } + + /** Get the raw query buffer. */ + const char* get_buf() + { + return m_buf; + } + + /** Get the pre-processed query buffer. */ + const char* get_cpp_buf() + { + return m_cpp_buf; + } + + /** Get the end of the raw query buffer. */ + const char* get_end_of_query() + { + return m_end_of_query; + } + + /** Mark the stream position as the start of a new token. */ + void start_token() + { + m_tok_start_prev= m_tok_start; + m_tok_start= m_ptr; + m_tok_end= m_ptr; + + m_cpp_tok_start_prev= m_cpp_tok_start; + m_cpp_tok_start= m_cpp_ptr; + m_cpp_tok_end= m_cpp_ptr; + } + + /** + Adjust the starting position of the current token. + This is used to compensate for starting whitespace. + */ + void restart_token() + { + m_tok_start= m_ptr; + m_cpp_tok_start= m_cpp_ptr; + } + + /** Get the token start position, in the raw buffer. */ + const char* get_tok_start() + { + return m_tok_start; + } + + /** Get the token start position, in the pre-processed buffer. */ + const char* get_cpp_tok_start() + { + return m_cpp_tok_start; + } + + /** Get the token end position, in the raw buffer. */ + const char* get_tok_end() + { + return m_tok_end; + } + + /** Get the token end position, in the pre-processed buffer. */ + const char* get_cpp_tok_end() + { + return m_cpp_tok_end; + } + + /** Get the previous token start position, in the raw buffer. */ + const char* get_tok_start_prev() + { + return m_tok_start_prev; + } + + /** Get the current stream pointer, in the raw buffer. */ + const char* get_ptr() + { + return m_ptr; + } + + /** Get the current stream pointer, in the pre-processed buffer. */ + const char* get_cpp_ptr() + { + return m_cpp_ptr; + } + + /** Get the length of the current token, in the raw buffer. */ + uint yyLength() + { + /* + The assumption is that the lexical analyser is always 1 character ahead, + which the -1 account for. + */ + DBUG_ASSERT(m_ptr > m_tok_start); + return (uint) ((m_ptr - m_tok_start) - 1); + } + /** Current thread. */ THD *m_thd; @@ -1070,37 +1314,74 @@ public: /** Interface with bison, value of the last token parsed. */ LEX_YYSTYPE yylval; - /** Pointer to the current position in the input stream. */ - const char* ptr; +private: + /** Pointer to the current position in the raw input stream. */ + const char* m_ptr; + + /** Starting position of the last token parsed, in the raw buffer. */ + const char* m_tok_start; - /** Starting position of the last token parsed. */ - const char* tok_start; + /** Ending position of the previous token parsed, in the raw buffer. */ + const char* m_tok_end; - /** Ending position of the last token parsed. */ - const char* tok_end; + /** End of the query text in the input stream, in the raw buffer. */ + const char* m_end_of_query; - /** End of the query text in the input stream. */ - const char* end_of_query; + /** Starting position of the previous token parsed, in the raw buffer. */ + const char* m_tok_start_prev; - /** Starting position of the previous token parsed. */ - const char* tok_start_prev; + /** Begining of the query text in the input stream, in the raw buffer. */ + const char* m_buf; - /** Begining of the query text in the input stream. */ - const char* buf; + /** Echo the parsed stream to the pre-processed buffer. */ + bool m_echo; + + /** Pre-processed buffer. */ + char* m_cpp_buf; + + /** Pointer to the current position in the pre-processed input stream. */ + char* m_cpp_ptr; + + /** + Starting position of the last token parsed, + in the pre-processed buffer. + */ + const char* m_cpp_tok_start; + + /** + Starting position of the previous token parsed, + in the pre-procedded buffer. + */ + const char* m_cpp_tok_start_prev; + + /** + Ending position of the previous token parsed, + in the pre-processed buffer. + */ + const char* m_cpp_tok_end; + +public: /** Current state of the lexical analyser. */ enum my_lex_states next_state; - /** Position of ';' in the stream, to delimit multiple queries. */ + /** + Position of ';' in the stream, to delimit multiple queries. + This delimiter is in the raw buffer. + */ const char* found_semicolon; /** SQL_MODE = IGNORE_SPACE. */ bool ignore_space; - /* + + /** TRUE if we're parsing a prepared statement: in this mode we should allow placeholders and disallow multi-statements. */ bool stmt_prepare_mode; + + /** State of the lexical analyser for comments. */ + enum_comment_state in_comment; }; @@ -1138,8 +1419,17 @@ typedef struct st_lex : public Query_tables_list CHARSET_INFO *charset, *underscore_charset; /* store original leaf_tables for INSERT SELECT and PS/SP */ TABLE_LIST *leaf_tables_insert; - /* Position (first character index) of SELECT of CREATE VIEW statement */ - uint create_view_select_start; + + /** Start of SELECT of CREATE VIEW statement */ + const char* create_view_select_start; + /** End of SELECT of CREATE VIEW statement */ + const char* create_view_select_end; + + /** Start of 'ON <table>', in trigger statements. */ + const char* raw_trg_on_table_name_begin; + /** End of 'ON <table>', in trigger statements. */ + const char* raw_trg_on_table_name_end; + /* Partition info structure filled in by PARTITION BY parse part */ partition_info *part_info; @@ -1238,7 +1528,9 @@ typedef struct st_lex : public Query_tables_list uint8 create_view_algorithm; uint8 create_view_check; bool drop_if_exists, drop_temporary, local_file, one_shot_set; - bool in_comment, verbose, no_write_to_binlog; + + bool verbose, no_write_to_binlog; + bool tx_chain, tx_release; /* Special JOIN::prepare mode: changing of query is prohibited. @@ -1302,10 +1594,12 @@ typedef struct st_lex : public Query_tables_list - CREATE FUNCTION (points to "FUNCTION" or "AGGREGATE"); This pointer is required to add possibly omitted DEFINER-clause to the - DDL-statement before dumping it to the binlog. + DDL-statement before dumping it to the binlog. */ const char *stmt_definition_begin; + const char *stmt_definition_end; + /* Pointers to part of LOAD DATA statement that should be rewritten during replication ("LOCAL 'filename' REPLACE INTO" part). @@ -1434,8 +1728,8 @@ extern void lex_free(void); extern void lex_start(THD *thd); extern void lex_end(LEX *lex); extern int MYSQLlex(void *arg, void *yythd); -extern const char *skip_rear_comments(CHARSET_INFO *cs, const char *ubegin, - const char *uend); + +extern void trim_whitespace(CHARSET_INFO *cs, LEX_STRING *str); extern bool is_lex_native_function(const LEX_STRING *name); |