summaryrefslogtreecommitdiff
path: root/sql/sql_lex.h
diff options
context:
space:
mode:
authorunknown <malff/marcsql@weblab.(none)>2007-06-12 15:23:58 -0600
committerunknown <malff/marcsql@weblab.(none)>2007-06-12 15:23:58 -0600
commitf09496c8c23160a9b775a8123ae0b080f314885e (patch)
tree5827319d574182509c192b85dcb24886e07ced70 /sql/sql_lex.h
parent7afb6b58907754697908be4edeee311c56d5f944 (diff)
downloadmariadb-git-f09496c8c23160a9b775a8123ae0b080f314885e.tar.gz
Bug#25411 (trigger code truncated), PART II
Bug 28127 (Some valid identifiers names are not parsed correctly) Bug 26302 (MySQL server cuts off trailing "*/" from comments in SP/func) This patch is the second part of a major cleanup, required to fix Bug 25411 (trigger code truncated). The root cause of the issue stems from the function skip_rear_comments, which was a work around to remove "extra" "*/" characters from the query text, when parsing a query and reusing the text fragments to represent a view, trigger, function or stored procedure. The reason for this work around is that "special comments", like /*!50002 XXX */, were not parsed properly, so that a query like: AAA /*!50002 BBB */ CCC would be seen by the parser as "AAA BBB */ CCC" when the current version is greater or equal to 5.0.2 The root cause of this stems from how special comments are parsed. Special comments are really out-of-bound text that appear inside a query, that affects how the parser behave. In nature, /*!50002 XXX */ in MySQL is similar to the C concept of preprocessing : #if VERSION >= 50002 XXX #endif Depending on the current VERSION of the server, either the special comment should be expanded or it should be ignored, but in all cases the "text" of the query should be re-written to strip the "/*!50002" and "*/" markers, which does not belong to the SQL language itself. Prior to this fix, these markers would leak into : - the storage format for VIEW, - the storage format for FUNCTION, - the storage format for FUNCTION parameters, in mysql.proc (param_list), - the storage format for PROCEDURE, - the storage format for PROCEDURE parameters, in mysql.proc (param_list), - the storage format for TRIGGER, - the binary log used for replication. In all cases, not only this cause format corruption, but also provide a vector for dormant security issues, by allowing to tunnel code that will be activated after an upgrade. The proper solution is to deal with special comments strictly during parsing, when accepting a query from the outside world. Once a query is parsed and an object is created with a persistant representation, this object should not arbitrarily mutate after an upgrade. In short, special comments are a useful but limited feature for MYSQLdump, when used at an *interface* level to facilitate import/export, but bloating the server *internal* storage format is *not* the proper way to deal with configuration management of the user logic. With this fix: - the Lex_input_stream class now acts as a comment pre-processor, and either expands or ignore special comments on the fly. - MYSQLlex and sql_yacc.yy have been cleaned up to strictly use the public interface of Lex_input_stream. In particular, how the input stream accepts or rejects a character is private to Lex_input_stream, and the internal buffer pointers of that class are strictly private, and should not be tempered with during parsing. This caused many changes mostly in sql_lex.cc. During the code cleanup in case MY_LEX_NUMBER_IDENT, Bug 28127 (Some valid identifiers names are not parsed correctly) was found and fixed. By parsing special comments properly, and removing the function 'skip_rear_comments' [sic], Bug 26302 (MySQL server cuts off trailing "*/" from comments in SP/func) has been fixed as well. sql/event_data_objects.cc: Cleanup of the code that extracts the query text sql/sp.cc: Cleanup of the code that extracts the query text sql/sp_head.cc: Cleanup of the code that extracts the query text sql/sql_trigger.cc: Cleanup of the code that extracts the query text sql/sql_view.cc: Cleanup of the code that extracts the query text mysql-test/r/comments.result: Bug#25411 (trigger code truncated) mysql-test/r/sp.result: Bug#25411 (trigger code truncated) Bug 26302 (MySQL server cuts off trailing "*/" from comments in SP/func) mysql-test/r/trigger.result: Bug#25411 (trigger code truncated) mysql-test/r/varbinary.result: Bug 28127 (Some valid identifiers names are not parsed correctly) mysql-test/t/comments.test: Bug#25411 (trigger code truncated) mysql-test/t/sp.test: Bug#25411 (trigger code truncated) Bug 26302 (MySQL server cuts off trailing "*/" from comments in SP/func) mysql-test/t/trigger.test: Bug#25411 (trigger code truncated) mysql-test/t/varbinary.test: Bug 28127 (Some valid identifiers names are not parsed correctly) sql/sql_lex.cc: Implemented comment pre-processing in Lex_input_stream, major cleanup of the lex/yacc code to not use Lex_input_stream private members. sql/sql_lex.h: Implemented comment pre-processing in Lex_input_stream, major cleanup of the lex/yacc code to not use Lex_input_stream private members. sql/sql_yacc.yy: post merge fix : view_check_options must be parsed before signaling the end of the query
Diffstat (limited to 'sql/sql_lex.h')
-rw-r--r--sql/sql_lex.h334
1 files changed, 314 insertions, 20 deletions
diff --git a/sql/sql_lex.h b/sql/sql_lex.h
index 2cecf282e7f..7057bf4b5ef 100644
--- a/sql/sql_lex.h
+++ b/sql/sql_lex.h
@@ -1049,8 +1049,40 @@ struct st_parsing_options
/**
+ The state of the lexical parser, when parsing comments.
+*/
+enum enum_comment_state
+{
+ /**
+ Not parsing comments.
+ */
+ NO_COMMENT,
+ /**
+ Parsing comments that need to be preserved.
+ Typically, these are user comments '/' '*' ... '*' '/'.
+ */
+ PRESERVE_COMMENT,
+ /**
+ Parsing comments that need to be discarded.
+ Typically, these are special comments '/' '*' '!' ... '*' '/',
+ or '/' '*' '!' 'M' 'M' 'm' 'm' 'm' ... '*' '/', where the comment
+ markers should not be expanded.
+ */
+ DISCARD_COMMENT
+};
+
+
+/**
This class represents the character input stream consumed during
lexical analysis.
+ In addition to consuming the input stream, this class performs some
+ comment pre processing, by filtering out out of bound special text
+ from the query input stream.
+ Two buffers, with pointers inside each buffers, are maintained in
+ parallel. The 'raw' buffer is the original query text, which may
+ contain out-of-bound comments. The 'cpp' (for comments pre processor)
+ is the pre-processed buffer that contains only the query text that
+ should be seen once out-of-bound data is removed.
*/
class Lex_input_stream
{
@@ -1058,6 +1090,218 @@ public:
Lex_input_stream(THD *thd, const char* buff, unsigned int length);
~Lex_input_stream();
+ /**
+ Set the echo mode.
+ When echo is true, characters parsed from the raw input stream are
+ preserved. When false, characters parsed are silently ignored.
+ @param echo the echo mode.
+ */
+ void set_echo(bool echo)
+ {
+ m_echo= echo;
+ }
+
+ /**
+ Skip binary from the input stream.
+ @param n number of bytes to accept.
+ */
+ void skip_binary(int n)
+ {
+ if (m_echo)
+ {
+ memcpy(m_cpp_ptr, m_ptr, n);
+ m_cpp_ptr += n;
+ }
+ m_ptr += n;
+ }
+
+ /**
+ Get a character, and advance in the stream.
+ @return the next character to parse.
+ */
+ char yyGet()
+ {
+ char c= *m_ptr++;
+ if (m_echo)
+ *m_cpp_ptr++ = c;
+ return c;
+ }
+
+ /**
+ Get the last character accepted.
+ @return the last character accepted.
+ */
+ char yyGetLast()
+ {
+ return m_ptr[-1];
+ }
+
+ /**
+ Look at the next character to parse, but do not accept it.
+ */
+ char yyPeek()
+ {
+ return m_ptr[0];
+ }
+
+ /**
+ Look ahead at some character to parse.
+ @param n offset of the character to look up
+ */
+ char yyPeekn(int n)
+ {
+ return m_ptr[n];
+ }
+
+ /**
+ Cancel the effect of the last yyGet() or yySkip().
+ Note that the echo mode should not change between calls to yyGet / yySkip
+ and yyUnget. The caller is responsible for ensuring that.
+ */
+ void yyUnget()
+ {
+ m_ptr--;
+ if (m_echo)
+ m_cpp_ptr--;
+ }
+
+ /**
+ Accept a character, by advancing the input stream.
+ */
+ void yySkip()
+ {
+ if (m_echo)
+ *m_cpp_ptr++ = *m_ptr++;
+ else
+ m_ptr++;
+ }
+
+ /**
+ Accept multiple characters at once.
+ @param n the number of characters to accept.
+ */
+ void yySkipn(int n)
+ {
+ if (m_echo)
+ {
+ memcpy(m_cpp_ptr, m_ptr, n);
+ m_cpp_ptr += n;
+ }
+ m_ptr += n;
+ }
+
+ /**
+ End of file indicator for the query text to parse.
+ @return true if there are no more characters to parse
+ */
+ bool eof()
+ {
+ return (m_ptr >= m_end_of_query);
+ }
+
+ /**
+ End of file indicator for the query text to parse.
+ @param n number of characters expected
+ @return true if there are less than n characters to parse
+ */
+ bool eof(int n)
+ {
+ return ((m_ptr + n) >= m_end_of_query);
+ }
+
+ /** Get the raw query buffer. */
+ const char* get_buf()
+ {
+ return m_buf;
+ }
+
+ /** Get the pre-processed query buffer. */
+ const char* get_cpp_buf()
+ {
+ return m_cpp_buf;
+ }
+
+ /** Get the end of the raw query buffer. */
+ const char* get_end_of_query()
+ {
+ return m_end_of_query;
+ }
+
+ /** Mark the stream position as the start of a new token. */
+ void start_token()
+ {
+ m_tok_start_prev= m_tok_start;
+ m_tok_start= m_ptr;
+ m_tok_end= m_ptr;
+
+ m_cpp_tok_start_prev= m_cpp_tok_start;
+ m_cpp_tok_start= m_cpp_ptr;
+ m_cpp_tok_end= m_cpp_ptr;
+ }
+
+ /**
+ Adjust the starting position of the current token.
+ This is used to compensate for starting whitespace.
+ */
+ void restart_token()
+ {
+ m_tok_start= m_ptr;
+ m_cpp_tok_start= m_cpp_ptr;
+ }
+
+ /** Get the token start position, in the raw buffer. */
+ const char* get_tok_start()
+ {
+ return m_tok_start;
+ }
+
+ /** Get the token start position, in the pre-processed buffer. */
+ const char* get_cpp_tok_start()
+ {
+ return m_cpp_tok_start;
+ }
+
+ /** Get the token end position, in the raw buffer. */
+ const char* get_tok_end()
+ {
+ return m_tok_end;
+ }
+
+ /** Get the token end position, in the pre-processed buffer. */
+ const char* get_cpp_tok_end()
+ {
+ return m_cpp_tok_end;
+ }
+
+ /** Get the previous token start position, in the raw buffer. */
+ const char* get_tok_start_prev()
+ {
+ return m_tok_start_prev;
+ }
+
+ /** Get the current stream pointer, in the raw buffer. */
+ const char* get_ptr()
+ {
+ return m_ptr;
+ }
+
+ /** Get the current stream pointer, in the pre-processed buffer. */
+ const char* get_cpp_ptr()
+ {
+ return m_cpp_ptr;
+ }
+
+ /** Get the length of the current token, in the raw buffer. */
+ uint yyLength()
+ {
+ /*
+ The assumption is that the lexical analyser is always 1 character ahead,
+ which the -1 account for.
+ */
+ DBUG_ASSERT(m_ptr > m_tok_start);
+ return (uint) ((m_ptr - m_tok_start) - 1);
+ }
+
/** Current thread. */
THD *m_thd;
@@ -1070,37 +1314,74 @@ public:
/** Interface with bison, value of the last token parsed. */
LEX_YYSTYPE yylval;
- /** Pointer to the current position in the input stream. */
- const char* ptr;
+private:
+ /** Pointer to the current position in the raw input stream. */
+ const char* m_ptr;
+
+ /** Starting position of the last token parsed, in the raw buffer. */
+ const char* m_tok_start;
- /** Starting position of the last token parsed. */
- const char* tok_start;
+ /** Ending position of the previous token parsed, in the raw buffer. */
+ const char* m_tok_end;
- /** Ending position of the last token parsed. */
- const char* tok_end;
+ /** End of the query text in the input stream, in the raw buffer. */
+ const char* m_end_of_query;
- /** End of the query text in the input stream. */
- const char* end_of_query;
+ /** Starting position of the previous token parsed, in the raw buffer. */
+ const char* m_tok_start_prev;
- /** Starting position of the previous token parsed. */
- const char* tok_start_prev;
+ /** Begining of the query text in the input stream, in the raw buffer. */
+ const char* m_buf;
- /** Begining of the query text in the input stream. */
- const char* buf;
+ /** Echo the parsed stream to the pre-processed buffer. */
+ bool m_echo;
+
+ /** Pre-processed buffer. */
+ char* m_cpp_buf;
+
+ /** Pointer to the current position in the pre-processed input stream. */
+ char* m_cpp_ptr;
+
+ /**
+ Starting position of the last token parsed,
+ in the pre-processed buffer.
+ */
+ const char* m_cpp_tok_start;
+
+ /**
+ Starting position of the previous token parsed,
+ in the pre-procedded buffer.
+ */
+ const char* m_cpp_tok_start_prev;
+
+ /**
+ Ending position of the previous token parsed,
+ in the pre-processed buffer.
+ */
+ const char* m_cpp_tok_end;
+
+public:
/** Current state of the lexical analyser. */
enum my_lex_states next_state;
- /** Position of ';' in the stream, to delimit multiple queries. */
+ /**
+ Position of ';' in the stream, to delimit multiple queries.
+ This delimiter is in the raw buffer.
+ */
const char* found_semicolon;
/** SQL_MODE = IGNORE_SPACE. */
bool ignore_space;
- /*
+
+ /**
TRUE if we're parsing a prepared statement: in this mode
we should allow placeholders and disallow multi-statements.
*/
bool stmt_prepare_mode;
+
+ /** State of the lexical analyser for comments. */
+ enum_comment_state in_comment;
};
@@ -1138,8 +1419,17 @@ typedef struct st_lex : public Query_tables_list
CHARSET_INFO *charset, *underscore_charset;
/* store original leaf_tables for INSERT SELECT and PS/SP */
TABLE_LIST *leaf_tables_insert;
- /* Position (first character index) of SELECT of CREATE VIEW statement */
- uint create_view_select_start;
+
+ /** Start of SELECT of CREATE VIEW statement */
+ const char* create_view_select_start;
+ /** End of SELECT of CREATE VIEW statement */
+ const char* create_view_select_end;
+
+ /** Start of 'ON <table>', in trigger statements. */
+ const char* raw_trg_on_table_name_begin;
+ /** End of 'ON <table>', in trigger statements. */
+ const char* raw_trg_on_table_name_end;
+
/* Partition info structure filled in by PARTITION BY parse part */
partition_info *part_info;
@@ -1238,7 +1528,9 @@ typedef struct st_lex : public Query_tables_list
uint8 create_view_algorithm;
uint8 create_view_check;
bool drop_if_exists, drop_temporary, local_file, one_shot_set;
- bool in_comment, verbose, no_write_to_binlog;
+
+ bool verbose, no_write_to_binlog;
+
bool tx_chain, tx_release;
/*
Special JOIN::prepare mode: changing of query is prohibited.
@@ -1302,10 +1594,12 @@ typedef struct st_lex : public Query_tables_list
- CREATE FUNCTION (points to "FUNCTION" or "AGGREGATE");
This pointer is required to add possibly omitted DEFINER-clause to the
- DDL-statement before dumping it to the binlog.
+ DDL-statement before dumping it to the binlog.
*/
const char *stmt_definition_begin;
+ const char *stmt_definition_end;
+
/*
Pointers to part of LOAD DATA statement that should be rewritten
during replication ("LOCAL 'filename' REPLACE INTO" part).
@@ -1434,8 +1728,8 @@ extern void lex_free(void);
extern void lex_start(THD *thd);
extern void lex_end(LEX *lex);
extern int MYSQLlex(void *arg, void *yythd);
-extern const char *skip_rear_comments(CHARSET_INFO *cs, const char *ubegin,
- const char *uend);
+
+extern void trim_whitespace(CHARSET_INFO *cs, LEX_STRING *str);
extern bool is_lex_native_function(const LEX_STRING *name);