diff options
Diffstat (limited to 'sql')
-rw-r--r-- | sql/CMakeLists.txt | 30 | ||||
-rw-r--r-- | sql/event_scheduler.cc | 5 | ||||
-rw-r--r-- | sql/gen_lex_token.cc | 353 | ||||
-rw-r--r-- | sql/log_event.cc | 16 | ||||
-rw-r--r-- | sql/mysqld.cc | 3 | ||||
-rw-r--r-- | sql/mysqld.h | 1 | ||||
-rw-r--r-- | sql/sp_head.cc | 5 | ||||
-rw-r--r-- | sql/sql_class.cc | 9 | ||||
-rw-r--r-- | sql/sql_class.h | 9 | ||||
-rw-r--r-- | sql/sql_cursor.cc | 4 | ||||
-rw-r--r-- | sql/sql_digest.cc | 683 | ||||
-rw-r--r-- | sql/sql_digest.h | 130 | ||||
-rw-r--r-- | sql/sql_digest_stream.h | 51 | ||||
-rw-r--r-- | sql/sql_lex.cc | 27 | ||||
-rw-r--r-- | sql/sql_lex.h | 30 | ||||
-rw-r--r-- | sql/sql_parse.cc | 50 | ||||
-rw-r--r-- | sql/sql_yacc.yy | 8 | ||||
-rw-r--r-- | sql/sys_vars.cc | 6 |
18 files changed, 1400 insertions, 20 deletions
diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt index fba1a68fa46..88ec26eb6f6 100644 --- a/sql/CMakeLists.txt +++ b/sql/CMakeLists.txt @@ -28,8 +28,26 @@ ${CMAKE_CURRENT_BINARY_DIR}/sql_yacc.h ${CMAKE_CURRENT_BINARY_DIR}/sql_yacc.cc ${CMAKE_CURRENT_BINARY_DIR}/lex_hash.h ) +SET(GEN_DIGEST_SOURCES + ${CMAKE_CURRENT_BINARY_DIR}/lex_token.h +) + +SET_SOURCE_FILES_PROPERTIES(${GEN_SOURCES} + ${GEN_DIGEST_SOURCES} + PROPERTIES GENERATED 1) + +# Gen_lex_token +# Make sure sql_yacc.h is generated before compiling gen_lex_token +IF(NOT CMAKE_CROSSCOMPILING) + ADD_EXECUTABLE(gen_lex_token gen_lex_token.cc) + ADD_DEPENDENCIES(gen_lex_token GenServerSource) +ENDIF() -SET_SOURCE_FILES_PROPERTIES(${GEN_SOURCES} PROPERTIES GENERATED 1) +ADD_CUSTOM_COMMAND( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/lex_token.h + COMMAND gen_lex_token > lex_token.h + DEPENDS gen_lex_token +) ADD_DEFINITIONS(-DMYSQL_SERVER -DHAVE_EVENT_SCHEDULER) @@ -59,7 +77,8 @@ SET (SQL_SOURCE slave.cc sp.cc sp_cache.cc sp_head.cc sp_pcontext.cc sp_rcontext.cc spatial.cc sql_acl.cc sql_analyse.cc sql_base.cc sql_cache.cc sql_class.cc sql_client.cc sql_crypt.cc sql_crypt.h - sql_cursor.cc sql_db.cc sql_delete.cc sql_derived.cc sql_do.cc + sql_cursor.cc sql_db.cc sql_delete.cc sql_derived.cc + sql_digest.cc sql_do.cc sql_error.cc sql_handler.cc sql_get_diagnostics.cc sql_help.cc sql_insert.cc sql_lex.cc sql_list.cc sql_load.cc sql_manager.cc @@ -96,6 +115,7 @@ SET (SQL_SOURCE table_cache.cc ${CMAKE_CURRENT_BINARY_DIR}/sql_builtin.cc ${GEN_SOURCES} + ${GEN_DIGEST_SOURCES} ${MYSYS_LIBWRAP_SOURCE} ) @@ -116,6 +136,7 @@ RECOMPILE_FOR_EMBEDDED) ADD_LIBRARY(sql STATIC ${SQL_SOURCE}) ADD_DEPENDENCIES(sql GenServerSource) +ADD_DEPENDENCIES(sql GenDigestServerSource) DTRACE_INSTRUMENT(sql) TARGET_LINK_LIBRARIES(sql ${MYSQLD_STATIC_PLUGIN_LIBS} mysys mysys_ssl dbug strings vio pcre ${LIBJEMALLOC} @@ -245,6 +266,11 @@ ADD_CUSTOM_TARGET( DEPENDS ${GEN_SOURCES} ) +ADD_CUSTOM_TARGET( + GenDigestServerSource + DEPENDS ${GEN_DIGEST_SOURCES} +) + #Need this only for embedded SET_TARGET_PROPERTIES(GenServerSource PROPERTIES EXCLUDE_FROM_ALL TRUE) diff --git a/sql/event_scheduler.cc b/sql/event_scheduler.cc index f2b3a77f414..5c4926c830c 100644 --- a/sql/event_scheduler.cc +++ b/sql/event_scheduler.cc @@ -301,6 +301,9 @@ Event_worker_thread::run(THD *thd, Event_queue_element_for_exec *event) Event_job_data job_data; bool res; + DBUG_ASSERT(thd->m_digest == NULL); + DBUG_ASSERT(thd->m_statement_psi == NULL); + thd->thread_stack= &my_stack; // remember where our stack is res= post_init_event_thread(thd); @@ -329,6 +332,8 @@ Event_worker_thread::run(THD *thd, Event_queue_element_for_exec *event) job_data.definer.str, job_data.dbname.str, job_data.name.str); end: + DBUG_ASSERT(thd->m_statement_psi == NULL); + DBUG_ASSERT(thd->m_digest == NULL); DBUG_PRINT("info", ("Done with Event %s.%s", event->dbname.str, event->name.str)); diff --git a/sql/gen_lex_token.cc b/sql/gen_lex_token.cc new file mode 100644 index 00000000000..3584dd60c62 --- /dev/null +++ b/sql/gen_lex_token.cc @@ -0,0 +1,353 @@ +/* + Copyright (c) 2011, 2015, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */ + +#include <my_global.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +/* We only need the tokens here */ +#define YYSTYPE_IS_DECLARED +#include <sql_yacc.h> +#include <lex.h> + +#include <welcome_copyright_notice.h> /* ORACLE_WELCOME_COPYRIGHT_NOTICE */ + +/* + This is a tool used during build only, + so MY_MAX_TOKEN does not need to be exact, + only big enough to hold: + - 256 character terminal tokens + - YYNTOKENS named terminal tokens + from bison. + See also YYMAXUTOK. +*/ +#define MY_MAX_TOKEN 1000 +/** Generated token. */ +struct gen_lex_token_string +{ + const char *m_token_string; + int m_token_length; + bool m_append_space; + bool m_start_expr; +}; + +gen_lex_token_string compiled_token_array[MY_MAX_TOKEN]; +int max_token_seen= 0; + +char char_tokens[256]; + +int tok_generic_value= 0; +int tok_generic_value_list= 0; +int tok_row_single_value= 0; +int tok_row_single_value_list= 0; +int tok_row_multiple_value= 0; +int tok_row_multiple_value_list= 0; +int tok_unused= 0; + +void set_token(int tok, const char *str) +{ + if (tok <= 0) + { + fprintf(stderr, "Bad token found\n"); + exit(1); + } + + if (tok > max_token_seen) + { + max_token_seen= tok; + } + + if (max_token_seen >= MY_MAX_TOKEN) + { + fprintf(stderr, "Added that many new keywords ? Increase MY_MAX_TOKEN\n"); + exit(1); + } + + compiled_token_array[tok].m_token_string= str; + compiled_token_array[tok].m_token_length= strlen(str); + compiled_token_array[tok].m_append_space= true; + compiled_token_array[tok].m_start_expr= false; +} + +void set_start_expr_token(int tok) +{ + compiled_token_array[tok].m_start_expr= true; +} + +void compute_tokens() +{ + int tok; + unsigned int i; + char *str; + + /* + Default value. + */ + for (tok= 0; tok < MY_MAX_TOKEN; tok++) + { + compiled_token_array[tok].m_token_string= "(unknown)"; + compiled_token_array[tok].m_token_length= 9; + compiled_token_array[tok].m_append_space= true; + compiled_token_array[tok].m_start_expr= false; + } + + /* + Tokens made of just one terminal character + */ + for (tok=0; tok < 256; tok++) + { + str= & char_tokens[tok]; + str[0]= (char) tok; + compiled_token_array[tok].m_token_string= str; + compiled_token_array[tok].m_token_length= 1; + compiled_token_array[tok].m_append_space= true; + } + + max_token_seen= 255; + + /* + String terminal tokens, used in sql_yacc.yy + */ + set_token(NEG, "~"); + set_token(TABLE_REF_PRIORITY, "TABLE_REF_PRIORITY"); + + /* + Tokens hard coded in sql_lex.cc + */ + + set_token(WITH_CUBE_SYM, "WITH CUBE"); + set_token(WITH_ROLLUP_SYM, "WITH ROLLUP"); + set_token(NOT2_SYM, "!"); + set_token(OR2_SYM, "|"); + set_token(PARAM_MARKER, "?"); + set_token(SET_VAR, ":="); + set_token(UNDERSCORE_CHARSET, "(_charset)"); + set_token(END_OF_INPUT, ""); + + /* + Values. + These tokens are all normalized later, + so this strings will never be displayed. + */ + set_token(BIN_NUM, "(bin)"); + set_token(DECIMAL_NUM, "(decimal)"); + set_token(FLOAT_NUM, "(float)"); + set_token(HEX_NUM, "(hex)"); + set_token(LEX_HOSTNAME, "(hostname)"); + set_token(LONG_NUM, "(long)"); + set_token(NUM, "(num)"); + set_token(TEXT_STRING, "(text)"); + set_token(NCHAR_STRING, "(nchar)"); + set_token(ULONGLONG_NUM, "(ulonglong)"); + + /* + Identifiers. + */ + set_token(IDENT, "(id)"); + set_token(IDENT_QUOTED, "(id_quoted)"); + + /* + Unused tokens + */ + set_token(LOCATOR_SYM, "LOCATOR"); + set_token(SERVER_OPTIONS, "SERVER_OPTIONS"); + set_token(UDF_RETURNS_SYM, "UDF_RETURNS"); + + /* + See symbols[] in sql/lex.h + */ + for (i= 0; i< sizeof(symbols)/sizeof(symbols[0]); i++) + { + set_token(symbols[i].tok, symbols[i].name); + } + + /* + See sql_functions[] in sql/lex.h + */ + for (i= 0; i< sizeof(sql_functions)/sizeof(sql_functions[0]); i++) + { + set_token(sql_functions[i].tok, sql_functions[i].name); + } + + /* + Additional FAKE tokens, + used internally to normalize a digest text. + */ + + max_token_seen++; + tok_generic_value= max_token_seen; + set_token(tok_generic_value, "?"); + + max_token_seen++; + tok_generic_value_list= max_token_seen; + set_token(tok_generic_value_list, "?, ..."); + + max_token_seen++; + tok_row_single_value= max_token_seen; + set_token(tok_row_single_value, "(?)"); + + max_token_seen++; + tok_row_single_value_list= max_token_seen; + set_token(tok_row_single_value_list, "(?) /* , ... */"); + + max_token_seen++; + tok_row_multiple_value= max_token_seen; + set_token(tok_row_multiple_value, "(...)"); + + max_token_seen++; + tok_row_multiple_value_list= max_token_seen; + set_token(tok_row_multiple_value_list, "(...) /* , ... */"); + + max_token_seen++; + tok_unused= max_token_seen; + set_token(tok_unused, "UNUSED"); + + /* + Fix whitespace for some special tokens. + */ + + /* + The lexer parses "@@variable" as '@', '@', 'variable', + returning a token for '@' alone. + + This is incorrect, '@' is not really a token, + because the syntax "@ @ variable" (with spaces) is not accepted: + The lexer keeps some internal state after the '@' fake token. + + To work around this, digest text are printed as "@@variable". + */ + compiled_token_array[(int) '@'].m_append_space= false; + + /* + Define additional properties for tokens. + + List all the token that are followed by an expression. + This is needed to differentiate unary from binary + '+' and '-' operators, because we want to: + - reduce <unary +> <NUM> to <?>, + - preserve <...> <binary +> <NUM> as is. + */ + set_start_expr_token('('); + set_start_expr_token(','); + set_start_expr_token(EVERY_SYM); + set_start_expr_token(AT_SYM); + set_start_expr_token(STARTS_SYM); + set_start_expr_token(ENDS_SYM); + set_start_expr_token(DEFAULT); + set_start_expr_token(RETURN_SYM); + set_start_expr_token(IF); + set_start_expr_token(ELSEIF_SYM); + set_start_expr_token(CASE_SYM); + set_start_expr_token(WHEN_SYM); + set_start_expr_token(WHILE_SYM); + set_start_expr_token(UNTIL_SYM); + set_start_expr_token(SELECT_SYM); + + set_start_expr_token(OR_SYM); + set_start_expr_token(OR2_SYM); + set_start_expr_token(XOR); + set_start_expr_token(AND_SYM); + set_start_expr_token(AND_AND_SYM); + set_start_expr_token(NOT_SYM); + set_start_expr_token(BETWEEN_SYM); + set_start_expr_token(LIKE); + set_start_expr_token(REGEXP); + + set_start_expr_token('|'); + set_start_expr_token('&'); + set_start_expr_token(SHIFT_LEFT); + set_start_expr_token(SHIFT_RIGHT); + set_start_expr_token('+'); + set_start_expr_token('-'); + set_start_expr_token(INTERVAL_SYM); + set_start_expr_token('*'); + set_start_expr_token('/'); + set_start_expr_token('%'); + set_start_expr_token(DIV_SYM); + set_start_expr_token(MOD_SYM); + set_start_expr_token('^'); +} + +void print_tokens() +{ + int tok; + + printf("#ifdef LEX_TOKEN_WITH_DEFINITION\n"); + printf("lex_token_string lex_token_array[]=\n"); + printf("{\n"); + printf("/* PART 1: character tokens. */\n"); + + for (tok= 0; tok<256; tok++) + { + printf("/* %03d */ { \"\\x%02x\", 1, %s, %s},\n", + tok, + tok, + compiled_token_array[tok].m_append_space ? "true" : "false", + compiled_token_array[tok].m_start_expr ? "true" : "false"); + } + + printf("/* PART 2: named tokens. */\n"); + + for (tok= 256; tok<= max_token_seen; tok++) + { + printf("/* %03d */ { \"%s\", %d, %s, %s},\n", + tok, + compiled_token_array[tok].m_token_string, + compiled_token_array[tok].m_token_length, + compiled_token_array[tok].m_append_space ? "true" : "false", + compiled_token_array[tok].m_start_expr ? "true" : "false"); + } + + printf("/* DUMMY */ { \"\", 0, false, false}\n"); + printf("};\n"); + printf("#endif /* LEX_TOKEN_WITH_DEFINITION */\n"); + + printf("/* DIGEST specific tokens. */\n"); + printf("#define TOK_GENERIC_VALUE %d\n", tok_generic_value); + printf("#define TOK_GENERIC_VALUE_LIST %d\n", tok_generic_value_list); + printf("#define TOK_ROW_SINGLE_VALUE %d\n", tok_row_single_value); + printf("#define TOK_ROW_SINGLE_VALUE_LIST %d\n", tok_row_single_value_list); + printf("#define TOK_ROW_MULTIPLE_VALUE %d\n", tok_row_multiple_value); + printf("#define TOK_ROW_MULTIPLE_VALUE_LIST %d\n", tok_row_multiple_value_list); + printf("#define TOK_UNUSED %d\n", tok_unused); +} + +int main(int argc,char **argv) +{ + puts("/*"); + puts(ORACLE_WELCOME_COPYRIGHT_NOTICE("2011")); + puts("*/"); + + printf("/*\n"); + printf(" This file is generated, do not edit.\n"); + printf(" See file sql/gen_lex_token.cc.\n"); + printf("*/\n"); + printf("struct lex_token_string\n"); + printf("{\n"); + printf(" const char *m_token_string;\n"); + printf(" int m_token_length;\n"); + printf(" bool m_append_space;\n"); + printf(" bool m_start_expr;\n"); + printf("};\n"); + printf("typedef struct lex_token_string lex_token_string;\n"); + + compute_tokens(); + print_tokens(); + + return 0; +} + diff --git a/sql/log_event.cc b/sql/log_event.cc index 5c5f5cdb9f1..29cb10c0abf 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -49,6 +49,7 @@ #include <base64.h> #include <my_bitmap.h> #include "rpl_utility.h" +#include "sql_digest.h" #define my_b_write_string(A, B) my_b_write((A), (B), (uint) (sizeof(B) - 1)) @@ -4282,12 +4283,17 @@ int Query_log_event::do_apply_event(rpl_group_info *rgi, Parser_state parser_state; if (!parser_state.init(thd, thd->query(), thd->query_length())) { + DBUG_ASSERT(thd->m_digest == NULL); + thd->m_digest= & thd->m_digest_state; + DBUG_ASSERT(thd->m_statement_psi == NULL); thd->m_statement_psi= MYSQL_START_STATEMENT(&thd->m_statement_state, stmt_info_rpl.m_key, thd->db, thd->db_length, thd->charset()); THD_STAGE_INFO(thd, stage_init); MYSQL_SET_STATEMENT_TEXT(thd->m_statement_psi, thd->query(), thd->query_length()); + if (thd->m_digest != NULL) + thd->m_digest->reset(thd->m_token_array, max_digest_length); mysql_parse(thd, thd->query(), thd->query_length(), &parser_state); /* Finalize server status flags after executing a statement. */ @@ -4379,11 +4385,10 @@ compare_errors: !ignored_error_code(expected_error)) { rli->report(ERROR_LEVEL, 0, rgi->gtid_info(), - "\ -Query caused different errors on master and slave. \ -Error on master: message (format)='%s' error code=%d ; \ -Error on slave: actual message='%s', error code=%d. \ -Default database: '%s'. Query: '%s'", + "Query caused different errors on master and slave. " + "Error on master: message (format)='%s' error code=%d ; " + "Error on slave: actual message='%s', error code=%d. " + "Default database: '%s'. Query: '%s'", ER_SAFE(expected_error), expected_error, actual_error ? thd->get_stmt_da()->message() : "no error", @@ -4479,6 +4484,7 @@ end: /* Mark the statement completed. */ MYSQL_END_STATEMENT(thd->m_statement_psi, thd->get_stmt_da()); thd->m_statement_psi= NULL; + thd->m_digest= NULL; /* As a disk space optimization, future masters will not log an event for diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 2eaf8096795..6f28244f6c2 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -515,6 +515,7 @@ ulong binlog_cache_use= 0, binlog_cache_disk_use= 0; ulong binlog_stmt_cache_use= 0, binlog_stmt_cache_disk_use= 0; ulong max_connections, max_connect_errors; ulong extra_max_connections; +ulong max_digest_length= 0; ulong slave_retried_transactions; ulong feature_files_opened_with_delayed_keys; ulonglong denied_connections; @@ -5274,6 +5275,8 @@ int mysqld_main(int argc, char **argv) pfs_param.m_hints.m_table_open_cache= tc_size; pfs_param.m_hints.m_max_connections= max_connections; pfs_param.m_hints.m_open_files_limit= open_files_limit; + /* the performance schema digest size is the same as the SQL layer */ + pfs_param.m_max_digest_length= max_digest_length; PSI_hook= initialize_performance_schema(&pfs_param); if (PSI_hook == NULL) { diff --git a/sql/mysqld.h b/sql/mysqld.h index f6d9dbea48a..5f47c9d5b4e 100644 --- a/sql/mysqld.h +++ b/sql/mysqld.h @@ -167,6 +167,7 @@ extern ulong query_cache_limit; extern ulong query_cache_min_res_unit; extern ulong slow_launch_threads, slow_launch_time; extern MYSQL_PLUGIN_IMPORT ulong max_connections; +extern ulong max_digest_length; extern ulong max_connect_errors, connect_timeout; extern my_bool slave_allow_batching; extern my_bool allow_slave_start; diff --git a/sql/sp_head.cc b/sql/sp_head.cc index d59e0aec541..e181e14611b 100644 --- a/sql/sp_head.cc +++ b/sql/sp_head.cc @@ -1365,8 +1365,13 @@ sp_head::execute(THD *thd, bool merge_da_on_success) if (thd->locked_tables_mode <= LTM_LOCK_TABLES) thd->user_var_events_alloc= thd->mem_root; + sql_digest_state *parent_digest= thd->m_digest; + thd->m_digest= NULL; + err_status= i->execute(thd, &ip); + thd->m_digest= parent_digest; + if (i->free_list) cleanup_items(i->free_list); diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 1ef0827b943..ed2bddd7e8d 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -874,6 +874,7 @@ THD::THD() stmt_depends_on_first_successful_insert_id_in_prev_stmt(FALSE), m_examined_row_count(0), accessed_rows_and_keys(0), + m_digest(NULL), m_statement_psi(NULL), m_idle_psi(NULL), thread_id(0), @@ -1044,6 +1045,13 @@ THD::THD() substitute_null_with_insert_id = FALSE; thr_lock_info_init(&lock_info); /* safety: will be reset after start */ + m_token_array= NULL; + if (max_digest_length > 0) + { + m_token_array= (unsigned char*) my_malloc(max_digest_length, + MYF(MY_WME|MY_THREAD_SPECIFIC)); + } + m_internal_handler= NULL; m_binlog_invoker= INVOKER_NONE; arena_for_cached_items= 0; @@ -1625,6 +1633,7 @@ THD::~THD() #endif free_root(&main_mem_root, MYF(0)); + my_free(m_token_array); main_da.free_memory(); if (status_var.memory_used != 0) { diff --git a/sql/sql_class.h b/sql/sql_class.h index 0746ac79b97..a8d8444571e 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -38,6 +38,8 @@ #include "violite.h" /* vio_is_connected */ #include "thr_lock.h" /* thr_lock_type, THR_LOCK_DATA, THR_LOCK_INFO */ +#include "sql_digest_stream.h" // sql_digest_state + #include <mysql/psi/mysql_stage.h> #include <mysql/psi/mysql_statement.h> #include <mysql/psi/mysql_idle.h> @@ -2505,6 +2507,13 @@ public: PROFILING profiling; #endif + /** Current statement digest. */ + sql_digest_state *m_digest; + /** Current statement digest token array. */ + unsigned char *m_token_array; + /** Top level statement digest. */ + sql_digest_state m_digest_state; + /** Current statement instrumentation. */ PSI_statement_locker *m_statement_psi; #ifdef HAVE_PSI_STATEMENT_INTERFACE diff --git a/sql/sql_cursor.cc b/sql/sql_cursor.cc index 99b7b1e58d0..c09f3269d7a 100644 --- a/sql/sql_cursor.cc +++ b/sql/sql_cursor.cc @@ -98,6 +98,7 @@ public: int mysql_open_cursor(THD *thd, select_result *result, Server_side_cursor **pcursor) { + sql_digest_state *parent_digest; PSI_statement_locker *parent_locker; select_result *save_result; Select_materialize *result_materialize; @@ -117,11 +118,14 @@ int mysql_open_cursor(THD *thd, select_result *result, &thd->security_ctx->priv_user[0], (char *) thd->security_ctx->host_or_ip, 2); + parent_digest= thd->m_digest; parent_locker= thd->m_statement_psi; + thd->m_digest= NULL; thd->m_statement_psi= NULL; /* Mark that we can't use query cache with cursors */ thd->query_cache_is_applicable= 0; rc= mysql_execute_command(thd); + thd->m_digest= parent_digest; thd->m_statement_psi= parent_locker; MYSQL_QUERY_EXEC_DONE(rc); diff --git a/sql/sql_digest.cc b/sql/sql_digest.cc new file mode 100644 index 00000000000..324f2fbd428 --- /dev/null +++ b/sql/sql_digest.cc @@ -0,0 +1,683 @@ +/* Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */ + +/* + This code needs extra visibility in the lexer structures +*/ + +#include "my_global.h" +#include "my_md5.h" +#include "mysqld_error.h" + +#include "sql_string.h" +#include "sql_class.h" +#include "sql_lex.h" +#include "sql_digest.h" +#include "sql_digest_stream.h" + +#include "sql_get_diagnostics.h" + +#ifdef NEVER +#include "my_sys.h" +#include "sql_signal.h" +#endif + +/* Generated code */ +#include "sql_yacc.h" +#define LEX_TOKEN_WITH_DEFINITION +#include "lex_token.h" + +/* Name pollution from sql/sql_lex.h */ +#ifdef LEX_YYSTYPE +#undef LEX_YYSTYPE +#endif + +#define LEX_YYSTYPE YYSTYPE* + +#define SIZE_OF_A_TOKEN 2 + +/** + Read a single token from token array. +*/ +inline uint read_token(const sql_digest_storage *digest_storage, + uint index, uint *tok) +{ + uint safe_byte_count= digest_storage->m_byte_count; + + if (index + SIZE_OF_A_TOKEN <= safe_byte_count && + safe_byte_count <= digest_storage->m_token_array_length) + { + const unsigned char *src= & digest_storage->m_token_array[index]; + *tok= src[0] | (src[1] << 8); + return index + SIZE_OF_A_TOKEN; + } + + /* The input byte stream is exhausted. */ + *tok= 0; + return MAX_DIGEST_STORAGE_SIZE + 1; +} + +/** + Store a single token in token array. +*/ +inline void store_token(sql_digest_storage* digest_storage, uint token) +{ + DBUG_ASSERT(digest_storage->m_byte_count <= digest_storage->m_token_array_length); + + if (digest_storage->m_byte_count + SIZE_OF_A_TOKEN <= digest_storage->m_token_array_length) + { + unsigned char* dest= & digest_storage->m_token_array[digest_storage->m_byte_count]; + dest[0]= token & 0xff; + dest[1]= (token >> 8) & 0xff; + digest_storage->m_byte_count+= SIZE_OF_A_TOKEN; + } + else + { + digest_storage->m_full= true; + } +} + +/** + Read an identifier from token array. +*/ +inline uint read_identifier(const sql_digest_storage* digest_storage, + uint index, char ** id_string, int *id_length) +{ + uint new_index; + uint safe_byte_count= digest_storage->m_byte_count; + + DBUG_ASSERT(index <= safe_byte_count); + DBUG_ASSERT(safe_byte_count <= digest_storage->m_token_array_length); + + /* + token + length + string are written in an atomic way, + so we do always expect a length + string here + */ + + uint bytes_needed= SIZE_OF_A_TOKEN; + /* If we can read token and identifier length */ + if ((index + bytes_needed) <= safe_byte_count) + { + const unsigned char *src= & digest_storage->m_token_array[index]; + /* Read the length of identifier */ + uint length= src[0] | (src[1] << 8); + bytes_needed+= length; + /* If we can read entire identifier from token array */ + if ((index + bytes_needed) <= safe_byte_count) + { + *id_string= (char *) (src + 2); + *id_length= length; + + new_index= index + bytes_needed; + DBUG_ASSERT(new_index <= safe_byte_count); + return new_index; + } + } + + /* The input byte stream is exhausted. */ + return MAX_DIGEST_STORAGE_SIZE + 1; +} + +/** + Store an identifier in token array. +*/ +inline void store_token_identifier(sql_digest_storage* digest_storage, + uint token, + size_t id_length, const char *id_name) +{ + DBUG_ASSERT(digest_storage->m_byte_count <= digest_storage->m_token_array_length); + + size_t bytes_needed= 2 * SIZE_OF_A_TOKEN + id_length; + if (digest_storage->m_byte_count + bytes_needed <= (unsigned int)digest_storage->m_token_array_length) + { + unsigned char* dest= & digest_storage->m_token_array[digest_storage->m_byte_count]; + /* Write the token */ + dest[0]= token & 0xff; + dest[1]= (token >> 8) & 0xff; + /* Write the string length */ + dest[2]= id_length & 0xff; + dest[3]= (id_length >> 8) & 0xff; + /* Write the string data */ + if (id_length > 0) + memcpy((char *)(dest + 4), id_name, id_length); + digest_storage->m_byte_count+= bytes_needed; + } + else + { + digest_storage->m_full= true; + } +} + +void compute_digest_md5(const sql_digest_storage *digest_storage, unsigned char *md5) +{ + compute_md5_hash((char *) md5, + (const char *) digest_storage->m_token_array, + digest_storage->m_byte_count); +} + +/* + Iterate token array and updates digest_text. +*/ +void compute_digest_text(const sql_digest_storage* digest_storage, + String *digest_text) +{ + DBUG_ASSERT(digest_storage != NULL); + uint byte_count= digest_storage->m_byte_count; + String *digest_output= digest_text; + uint tok= 0; + uint current_byte= 0; + lex_token_string *tok_data; + + /* Reset existing data */ + digest_output->length(0); + + if (byte_count > digest_storage->m_token_array_length) + { + digest_output->append("\0", 1); + return; + } + + /* Convert text to utf8 */ + const CHARSET_INFO *from_cs= get_charset(digest_storage->m_charset_number, MYF(0)); + const CHARSET_INFO *to_cs= &my_charset_utf8_bin; + + if (from_cs == NULL) + { + /* + Can happen, as we do dirty reads on digest_storage, + which can be written to in another thread. + */ + digest_output->append("\0", 1); + return; + } + + char id_buffer[NAME_LEN + 1]= {'\0'}; + char *id_string; + size_t id_length; + bool convert_text= !my_charset_same(from_cs, to_cs); + + while (current_byte < byte_count) + { + current_byte= read_token(digest_storage, current_byte, &tok); + + if (tok <= 0 || tok >= array_elements(lex_token_array) + || current_byte > max_digest_length) + return; + + tok_data= &lex_token_array[tok]; + + switch (tok) + { + /* All identifiers are printed with their name. */ + case IDENT: + case IDENT_QUOTED: + { + char *id_ptr= NULL; + int id_len= 0; + uint err_cs= 0; + + /* Get the next identifier from the storage buffer. */ + current_byte= read_identifier(digest_storage, current_byte, + &id_ptr, &id_len); + if (current_byte > max_digest_length) + return; + + if (convert_text) + { + /* Verify that the converted text will fit. */ + if (to_cs->mbmaxlen*id_len > NAME_LEN) + { + digest_output->append("...", 3); + break; + } + /* Convert identifier string into the storage character set. */ + id_length= my_convert(id_buffer, NAME_LEN, to_cs, + id_ptr, id_len, from_cs, &err_cs); + id_string= id_buffer; + } + else + { + id_string= id_ptr; + id_length= id_len; + } + + if (id_length == 0 || err_cs != 0) + { + break; + } + /* Copy the converted identifier into the digest string. */ + if (tok == IDENT_QUOTED) + digest_output->append("`", 1); + if (id_length > 0) + digest_output->append(id_string, id_length); + if (tok == IDENT_QUOTED) + digest_output->append("`", 1); + digest_output->append(" ", 1); + } + break; + + /* Everything else is printed as is. */ + default: + /* + Make sure not to overflow digest_text buffer. + +1 is to make sure extra space for ' '. + */ + int tok_length= tok_data->m_token_length; + + digest_output->append(tok_data->m_token_string, tok_length); + if (tok_data->m_append_space) + digest_output->append(" ", 1); + break; + } + } +} + +static inline uint peek_token(const sql_digest_storage *digest, uint index) +{ + uint token; + DBUG_ASSERT(index + SIZE_OF_A_TOKEN <= digest->m_byte_count); + DBUG_ASSERT(digest->m_byte_count <= digest->m_token_array_length); + + token= ((digest->m_token_array[index + 1])<<8) | digest->m_token_array[index]; + return token; +} + +/** + Function to read last two tokens from token array. If an identifier + is found, do not look for token before that. +*/ +static inline void peek_last_two_tokens(const sql_digest_storage* digest_storage, + uint last_id_index, uint *t1, uint *t2) +{ + uint byte_count= digest_storage->m_byte_count; + uint peek_index= byte_count; + + if (last_id_index + SIZE_OF_A_TOKEN <= peek_index) + { + /* Take last token. */ + peek_index-= SIZE_OF_A_TOKEN; + *t1= peek_token(digest_storage, peek_index); + + if (last_id_index + SIZE_OF_A_TOKEN <= peek_index) + { + /* Take 2nd token from last. */ + peek_index-= SIZE_OF_A_TOKEN; + *t2= peek_token(digest_storage, peek_index); + } + else + { + *t2= TOK_UNUSED; + } + } + else + { + *t1= TOK_UNUSED; + *t2= TOK_UNUSED; + } +} + +/** + Function to read last three tokens from token array. If an identifier + is found, do not look for token before that. +*/ +static inline void peek_last_three_tokens(const sql_digest_storage* digest_storage, + uint last_id_index, uint *t1, uint *t2, uint *t3) +{ + uint byte_count= digest_storage->m_byte_count; + uint peek_index= byte_count; + + if (last_id_index + SIZE_OF_A_TOKEN <= peek_index) + { + /* Take last token. */ + peek_index-= SIZE_OF_A_TOKEN; + *t1= peek_token(digest_storage, peek_index); + + if (last_id_index + SIZE_OF_A_TOKEN <= peek_index) + { + /* Take 2nd token from last. */ + peek_index-= SIZE_OF_A_TOKEN; + *t2= peek_token(digest_storage, peek_index); + + if (last_id_index + SIZE_OF_A_TOKEN <= peek_index) + { + /* Take 3rd token from last. */ + peek_index-= SIZE_OF_A_TOKEN; + *t3= peek_token(digest_storage, peek_index); + } + else + { + *t3= TOK_UNUSED; + } + } + else + { + *t2= TOK_UNUSED; + *t3= TOK_UNUSED; + } + } + else + { + *t1= TOK_UNUSED; + *t2= TOK_UNUSED; + *t3= TOK_UNUSED; + } +} + +sql_digest_state* digest_add_token(sql_digest_state *state, + uint token, + LEX_YYSTYPE yylval) +{ + sql_digest_storage *digest_storage= NULL; + + digest_storage= &state->m_digest_storage; + + /* + Stop collecting further tokens if digest storage is full or + if END token is received. + */ + if (digest_storage->m_full || token == END_OF_INPUT) + return NULL; + + /* + Take last_token 2 tokens collected till now. These tokens will be used + in reduce for normalisation. Make sure not to consider ID tokens in reduce. + */ + uint last_token; + uint last_token2; + + switch (token) + { + case NUM: + case LONG_NUM: + case ULONGLONG_NUM: + case DECIMAL_NUM: + case FLOAT_NUM: + case BIN_NUM: + case HEX_NUM: + { + bool found_unary; + do + { + found_unary= false; + peek_last_two_tokens(digest_storage, state->m_last_id_index, + &last_token, &last_token2); + + if ((last_token == '-') || (last_token == '+')) + { + /* + We need to differentiate: + - a <unary minus> operator + - a <unary plus> operator + from + - a <binary minus> operator + - a <binary plus> operator + to only reduce "a = -1" to "a = ?", and not change "b - 1" to "b ?" + + Binary operators are found inside an expression, + while unary operators are found at the beginning of an expression, or after operators. + + To achieve this, every token that is followed by an <expr> expression + in the SQL grammar is flagged. + See sql/sql_yacc.yy + See sql/gen_lex_token.cc + + For example, + "(-1)" is parsed as "(", "-", NUM, ")", and lex_token_array["("].m_start_expr is true, + so reduction of the "-" NUM is done, the result is "(?)". + "(a-1)" is parsed as "(", ID, "-", NUM, ")", and lex_token_array[ID].m_start_expr is false, + so the operator is binary, no reduction is done, and the result is "(a-?)". + */ + if (lex_token_array[last_token2].m_start_expr) + { + /* + REDUCE: + TOK_GENERIC_VALUE := (UNARY_PLUS | UNARY_MINUS) (NUM | LOG_NUM | ... | FLOAT_NUM) + + REDUCE: + TOK_GENERIC_VALUE := (UNARY_PLUS | UNARY_MINUS) TOK_GENERIC_VALUE + */ + token= TOK_GENERIC_VALUE; + digest_storage->m_byte_count-= SIZE_OF_A_TOKEN; + found_unary= true; + } + } + } while (found_unary); + } + /* fall through, for case NULL_SYM below */ + case LEX_HOSTNAME: + case TEXT_STRING: + case NCHAR_STRING: + case PARAM_MARKER: + { + /* + REDUCE: + TOK_GENERIC_VALUE := BIN_NUM | DECIMAL_NUM | ... | ULONGLONG_NUM + */ + token= TOK_GENERIC_VALUE; + + peek_last_two_tokens(digest_storage, state->m_last_id_index, + &last_token, &last_token2); + + if ((last_token2 == TOK_GENERIC_VALUE || + last_token2 == TOK_GENERIC_VALUE_LIST) && + (last_token == ',')) + { + /* + REDUCE: + TOK_GENERIC_VALUE_LIST := + TOK_GENERIC_VALUE ',' TOK_GENERIC_VALUE + + REDUCE: + TOK_GENERIC_VALUE_LIST := + TOK_GENERIC_VALUE_LIST ',' TOK_GENERIC_VALUE + */ + digest_storage->m_byte_count-= 2*SIZE_OF_A_TOKEN; + token= TOK_GENERIC_VALUE_LIST; + } + /* + Add this token or the resulting reduce to digest storage. + */ + store_token(digest_storage, token); + break; + } + case ')': + { + peek_last_two_tokens(digest_storage, state->m_last_id_index, + &last_token, &last_token2); + + if (last_token == TOK_GENERIC_VALUE && + last_token2 == '(') + { + /* + REDUCE: + TOK_ROW_SINGLE_VALUE := + '(' TOK_GENERIC_VALUE ')' + */ + digest_storage->m_byte_count-= 2*SIZE_OF_A_TOKEN; + token= TOK_ROW_SINGLE_VALUE; + + /* Read last two tokens again */ + peek_last_two_tokens(digest_storage, state->m_last_id_index, + &last_token, &last_token2); + + if ((last_token2 == TOK_ROW_SINGLE_VALUE || + last_token2 == TOK_ROW_SINGLE_VALUE_LIST) && + (last_token == ',')) + { + /* + REDUCE: + TOK_ROW_SINGLE_VALUE_LIST := + TOK_ROW_SINGLE_VALUE ',' TOK_ROW_SINGLE_VALUE + + REDUCE: + TOK_ROW_SINGLE_VALUE_LIST := + TOK_ROW_SINGLE_VALUE_LIST ',' TOK_ROW_SINGLE_VALUE + */ + digest_storage->m_byte_count-= 2*SIZE_OF_A_TOKEN; + token= TOK_ROW_SINGLE_VALUE_LIST; + } + } + else if (last_token == TOK_GENERIC_VALUE_LIST && + last_token2 == '(') + { + /* + REDUCE: + TOK_ROW_MULTIPLE_VALUE := + '(' TOK_GENERIC_VALUE_LIST ')' + */ + digest_storage->m_byte_count-= 2*SIZE_OF_A_TOKEN; + token= TOK_ROW_MULTIPLE_VALUE; + + /* Read last two tokens again */ + peek_last_two_tokens(digest_storage, state->m_last_id_index, + &last_token, &last_token2); + + if ((last_token2 == TOK_ROW_MULTIPLE_VALUE || + last_token2 == TOK_ROW_MULTIPLE_VALUE_LIST) && + (last_token == ',')) + { + /* + REDUCE: + TOK_ROW_MULTIPLE_VALUE_LIST := + TOK_ROW_MULTIPLE_VALUE ',' TOK_ROW_MULTIPLE_VALUE + + REDUCE: + TOK_ROW_MULTIPLE_VALUE_LIST := + TOK_ROW_MULTIPLE_VALUE_LIST ',' TOK_ROW_MULTIPLE_VALUE + */ + digest_storage->m_byte_count-= 2*SIZE_OF_A_TOKEN; + token= TOK_ROW_MULTIPLE_VALUE_LIST; + } + } + /* + Add this token or the resulting reduce to digest storage. + */ + store_token(digest_storage, token); + break; + } + case IDENT: + case IDENT_QUOTED: + { + YYSTYPE *lex_token= yylval; + char *yytext= lex_token->lex_str.str; + size_t yylen= lex_token->lex_str.length; + + /* Add this token and identifier string to digest storage. */ + store_token_identifier(digest_storage, token, yylen, yytext); + + /* Update the index of last identifier found. */ + state->m_last_id_index= digest_storage->m_byte_count; + break; + } + default: + { + /* Add this token to digest storage. */ + store_token(digest_storage, token); + break; + } + } + + return state; +} + +sql_digest_state* digest_reduce_token(sql_digest_state *state, + uint token_left, uint token_right) +{ + sql_digest_storage *digest_storage= NULL; + + digest_storage= &state->m_digest_storage; + + /* + Stop collecting further tokens if digest storage is full. + */ + if (digest_storage->m_full) + return NULL; + + uint last_token; + uint last_token2; + uint last_token3; + uint token_to_push= TOK_UNUSED; + + peek_last_two_tokens(digest_storage, state->m_last_id_index, + &last_token, &last_token2); + + /* + There is only one caller of digest_reduce_token(), + see sql/sql_yacc.yy, rule literal := NULL_SYM. + REDUCE: + token_left := token_right + Used for: + TOK_GENERIC_VALUE := NULL_SYM + */ + + if (last_token == token_right) + { + /* + Current stream is like: + TOKEN_X TOKEN_RIGHT . + REDUCE to + TOKEN_X TOKEN_LEFT . + */ + digest_storage->m_byte_count-= SIZE_OF_A_TOKEN; + store_token(digest_storage, token_left); + } + else + { + /* + Current stream is like: + TOKEN_X TOKEN_RIGHT TOKEN_Y . + Pop TOKEN_Y + TOKEN_X TOKEN_RIGHT . TOKEN_Y + REDUCE to + TOKEN_X TOKEN_LEFT . TOKEN_Y + */ + DBUG_ASSERT(last_token2 == token_right); + digest_storage->m_byte_count-= 2 * SIZE_OF_A_TOKEN; + store_token(digest_storage, token_left); + token_to_push= last_token; + } + + peek_last_three_tokens(digest_storage, state->m_last_id_index, + &last_token, &last_token2, &last_token3); + + if ((last_token3 == TOK_GENERIC_VALUE || + last_token3 == TOK_GENERIC_VALUE_LIST) && + (last_token2 == ',') && + (last_token == TOK_GENERIC_VALUE)) + { + /* + REDUCE: + TOK_GENERIC_VALUE_LIST := + TOK_GENERIC_VALUE ',' TOK_GENERIC_VALUE + + REDUCE: + TOK_GENERIC_VALUE_LIST := + TOK_GENERIC_VALUE_LIST ',' TOK_GENERIC_VALUE + */ + digest_storage->m_byte_count-= 3*SIZE_OF_A_TOKEN; + store_token(digest_storage, TOK_GENERIC_VALUE_LIST); + } + + if (token_to_push != TOK_UNUSED) + { + /* + Push TOKEN_Y + */ + store_token(digest_storage, token_to_push); + } + + return state; +} + diff --git a/sql/sql_digest.h b/sql/sql_digest.h new file mode 100644 index 00000000000..ce159283d4d --- /dev/null +++ b/sql/sql_digest.h @@ -0,0 +1,130 @@ +/* Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */ + +#ifndef SQL_DIGEST_H +#define SQL_DIGEST_H + +#include <string.h> +class String; +#include "my_md5.h" + +#define MAX_DIGEST_STORAGE_SIZE (1024*1024) + +/** + Structure to store token count/array for a statement + on which digest is to be calculated. +*/ +struct sql_digest_storage +{ + bool m_full; + uint m_byte_count; + unsigned char m_md5[MD5_HASH_SIZE]; + /** Character set number. */ + uint m_charset_number; + /** + Token array. + Token array is an array of bytes to store tokens received during parsing. + Following is the way token array is formed. + ... <non-id-token> <non-id-token> <id-token> <id_len> <id_text> ... + For Example: + SELECT * FROM T1; + <SELECT_TOKEN> <*> <FROM_TOKEN> <ID_TOKEN> <2> <T1> + */ + unsigned char *m_token_array; + /* Length of the token array to be considered for DIGEST_TEXT calculation. */ + uint m_token_array_length; + + sql_digest_storage() + { + reset(NULL, 0); + } + + inline void reset(unsigned char *token_array, uint length) + { + m_token_array= token_array; + m_token_array_length= length; + reset(); + } + + inline void reset() + { + m_full= false; + m_byte_count= 0; + m_charset_number= 0; + if (m_token_array_length > 0) + { + memset(m_token_array, 0, m_token_array_length); + } + memset(m_md5, 0, MD5_HASH_SIZE); + } + + inline bool is_empty() + { + return (m_byte_count == 0); + } + + inline void copy(const sql_digest_storage *from) + { + /* + Keep in mind this is a dirty copy of something that may change, + as the thread producing the digest is executing concurrently, + without any lock enforced. + */ + uint byte_count_copy= m_token_array_length < from->m_byte_count ? + m_token_array_length : from->m_byte_count; + + if (byte_count_copy > 0) + { + m_full= from->m_full; + m_byte_count= byte_count_copy; + m_charset_number= from->m_charset_number; + memcpy(m_token_array, from->m_token_array, m_byte_count); + memcpy(m_md5, from->m_md5, MD5_HASH_SIZE); + } + else + { + m_full= false; + m_byte_count= 0; + m_charset_number= 0; + } + } +}; +typedef struct sql_digest_storage sql_digest_storage; + +/** + Compute a digest hash. + @param digest_storage The digest + @param [out] md5 The computed digest hash. This parameter is a buffer of size @c MD5_HASH_SIZE. +*/ +void compute_digest_md5(const sql_digest_storage *digest_storage, unsigned char *md5); + +/** + Compute a digest text. + A 'digest text' is a textual representation of a query, + where: + - comments are removed, + - non significant spaces are removed, + - literal values are replaced with a special '?' marker, + - lists of values are collapsed using a shorter notation + @param digest_storage The digest + @param [out] digest_text + @param digest_text_length Size of @c digest_text. + @param [out] truncated true if the text representation was truncated +*/ +void compute_digest_text(const sql_digest_storage *digest_storage, + String *digest_text); + +#endif + diff --git a/sql/sql_digest_stream.h b/sql/sql_digest_stream.h new file mode 100644 index 00000000000..55f7e2293c6 --- /dev/null +++ b/sql/sql_digest_stream.h @@ -0,0 +1,51 @@ +/* Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */ + +#ifndef SQL_DIGEST_STREAM_H +#define SQL_DIGEST_STREAM_H + +#include "sql_digest.h" + +/** + State data storage for @c digest_start, @c digest_add_token. + This structure extends the @c sql_digest_storage structure + with temporary state used only during parsing. +*/ +struct sql_digest_state +{ + /** + Index, in the digest token array, of the last identifier seen. + Reduce rules used in the digest computation can not + apply to tokens seen before an identifier. + @sa digest_add_token + */ + int m_last_id_index; + sql_digest_storage m_digest_storage; + + inline void reset(unsigned char *token_array, uint length) + { + m_last_id_index= 0; + m_digest_storage.reset(token_array, length); + } + + inline bool is_empty() + { + return m_digest_storage.is_empty(); + } +}; +typedef struct sql_digest_state sql_digest_state; + +#endif + diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index ba7d101a4cd..d8f40897a59 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -436,6 +436,21 @@ void Lex_input_stream::body_utf8_append_literal(THD *thd, m_cpp_utf8_processed_ptr= end_ptr; } +void Lex_input_stream::add_digest_token(uint token, LEX_YYSTYPE yylval) +{ + if (m_digest != NULL) + { + m_digest= digest_add_token(m_digest, token, yylval); + } +} + +void Lex_input_stream::reduce_digest_token(uint token_left, uint token_right) +{ + if (m_digest != NULL) + { + m_digest= digest_reduce_token(m_digest, token_left, token_right); + } +} /* This is called before every query that is to be parsed. @@ -982,7 +997,7 @@ int MYSQLlex(YYSTYPE *yylval, THD *thd) lip->lookahead_token= -1; *yylval= *(lip->lookahead_yylval); lip->lookahead_yylval= NULL; - lip->m_digest_psi= MYSQL_ADD_TOKEN(lip->m_digest_psi, token, yylval); + lip->add_digest_token(token, yylval); return token; } @@ -1000,12 +1015,10 @@ int MYSQLlex(YYSTYPE *yylval, THD *thd) token= lex_one_token(yylval, thd); switch(token) { case CUBE_SYM: - lip->m_digest_psi= MYSQL_ADD_TOKEN(lip->m_digest_psi, WITH_CUBE_SYM, - yylval); + lip->add_digest_token(WITH_CUBE_SYM, yylval); return WITH_CUBE_SYM; case ROLLUP_SYM: - lip->m_digest_psi= MYSQL_ADD_TOKEN(lip->m_digest_psi, WITH_ROLLUP_SYM, - yylval); + lip->add_digest_token(WITH_ROLLUP_SYM, yylval); return WITH_ROLLUP_SYM; default: /* @@ -1014,7 +1027,7 @@ int MYSQLlex(YYSTYPE *yylval, THD *thd) lip->lookahead_yylval= lip->yylval; lip->yylval= NULL; lip->lookahead_token= token; - lip->m_digest_psi= MYSQL_ADD_TOKEN(lip->m_digest_psi, WITH, yylval); + lip->add_digest_token(WITH, yylval); return WITH; } break; @@ -1022,7 +1035,7 @@ int MYSQLlex(YYSTYPE *yylval, THD *thd) break; } - lip->m_digest_psi= MYSQL_ADD_TOKEN(lip->m_digest_psi, token, yylval); + lip->add_digest_token(token, yylval); return token; } diff --git a/sql/sql_lex.h b/sql/sql_lex.h index 38f1417b40c..b17f0f4ec63 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -47,6 +47,7 @@ class sys_var; class Item_func_match; class File_parser; class Key_part_spec; +struct sql_digest_state; #ifdef MYSQL_SERVER /* @@ -2048,6 +2049,10 @@ public: /** LALR(2) resolution, value of the look ahead token.*/ LEX_YYSTYPE lookahead_yylval; + void add_digest_token(uint token, LEX_YYSTYPE yylval); + + void reduce_digest_token(uint token_left, uint token_right); + private: /** Pointer to the current position in the raw input stream. */ char *m_ptr; @@ -2167,7 +2172,7 @@ public: /** Current statement digest instrumentation. */ - PSI_digest_locker* m_digest_psi; + sql_digest_state* m_digest; }; /** @@ -2863,6 +2868,18 @@ public: }; /** + Input parameters to the parser. +*/ +struct Parser_input +{ + bool m_compute_digest; + + Parser_input() + : m_compute_digest(false) + {} +}; + +/** Internal state of the parser. The complete state consist of: - state data used during lexical parsing, @@ -2889,9 +2906,15 @@ public: ~Parser_state() {} + Parser_input m_input; Lex_input_stream m_lip; Yacc_state m_yacc; + /** + Current performance digest instrumentation. + */ + PSI_digest_locker* m_digest_psi; + void reset(char *found_semicolon, unsigned int length) { m_lip.reset(found_semicolon, length); @@ -2899,6 +2922,11 @@ public: } }; +extern sql_digest_state * +digest_add_token(sql_digest_state *state, uint token, LEX_YYSTYPE yylval); + +extern sql_digest_state * +digest_reduce_token(sql_digest_state *state, uint token_left, uint token_right); struct st_lex_local: public LEX { diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index 9ecf168c60e..5635e9ad4b7 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -83,6 +83,8 @@ #include "rpl_handler.h" #include "rpl_mi.h" +#include "sql_digest.h" + #include "sp_head.h" #include "sp.h" #include "sp_cache.h" @@ -949,6 +951,7 @@ bool do_command(THD *thd) /* Mark the statement completed. */ MYSQL_END_STATEMENT(thd->m_statement_psi, thd->get_stmt_da()); thd->m_statement_psi= NULL; + thd->m_digest= NULL; if (net->error != 3) { @@ -998,6 +1001,7 @@ bool do_command(THD *thd) out: /* The statement instrumentation must be closed in all cases. */ + DBUG_ASSERT(thd->m_digest == NULL); DBUG_ASSERT(thd->m_statement_psi == NULL); DBUG_RETURN(return_value); } @@ -1278,6 +1282,10 @@ bool dispatch_command(enum enum_server_command command, THD *thd, } case COM_QUERY: { + DBUG_ASSERT(thd->m_digest == NULL); + thd->m_digest= & thd->m_digest_state; + thd->m_digest->reset(thd->m_token_array, max_digest_length); + if (alloc_query(thd, packet, packet_length)) break; // fatal error is set MYSQL_QUERY_START(thd->query(), thd->thread_id, @@ -1337,6 +1345,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd, /* PSI end */ MYSQL_END_STATEMENT(thd->m_statement_psi, thd->get_stmt_da()); thd->m_statement_psi= NULL; + thd->m_digest= NULL; /* DTRACE end */ if (MYSQL_QUERY_DONE_ENABLED()) @@ -1357,6 +1366,8 @@ bool dispatch_command(enum enum_server_command command, THD *thd, (char *) thd->security_ctx->host_or_ip); /* PSI begin */ + thd->m_digest= & thd->m_digest_state; + thd->m_statement_psi= MYSQL_START_STATEMENT(&thd->m_statement_state, com_statement_info[command].m_key, thd->db, thd->db_length, @@ -1741,6 +1752,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd, /* Performance Schema Interface instrumentation, end */ MYSQL_END_STATEMENT(thd->m_statement_psi, thd->get_stmt_da()); thd->m_statement_psi= NULL; + thd->m_digest= NULL; thd->set_time(); dec_thread_running(); @@ -8551,11 +8563,27 @@ bool parse_sql(THD *thd, Parser_state *parser_state, thd->m_parser_state= parser_state; -#ifdef HAVE_PSI_STATEMENT_DIGEST_INTERFACE - /* Start Digest */ - thd->m_parser_state->m_lip.m_digest_psi= - MYSQL_DIGEST_START(do_pfs_digest ? thd->m_statement_psi : NULL); -#endif + parser_state->m_digest_psi= NULL; + parser_state->m_lip.m_digest= NULL; + + if (do_pfs_digest) + { + /* Start Digest */ + parser_state->m_digest_psi= MYSQL_DIGEST_START(thd->m_statement_psi); + + if (parser_state->m_input.m_compute_digest || + (parser_state->m_digest_psi != NULL)) + { + /* + If either: + - the caller wants to compute a digest + - the performance schema wants to compute a digest + set the digest listener in the lexer. + */ + parser_state->m_lip.m_digest= thd->m_digest; + parser_state->m_lip.m_digest->m_digest_storage.m_charset_number= thd->charset()->number; + } + } /* Parse the query. */ @@ -8588,6 +8616,18 @@ bool parse_sql(THD *thd, Parser_state *parser_state, /* That's it. */ ret_value= mysql_parse_status || thd->is_fatal_error; + + if ((ret_value == 0) && (parser_state->m_digest_psi != NULL)) + { + /* + On parsing success, record the digest in the performance schema. + */ + DBUG_ASSERT(do_pfs_digest); + DBUG_ASSERT(thd->m_digest != NULL); + MYSQL_DIGEST_END(parser_state->m_digest_psi, + & thd->m_digest->m_digest_storage); + } + MYSQL_QUERY_PARSE_DONE(ret_value); DBUG_RETURN(ret_value); } diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 03022c7ed19..9c7898b1b02 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -63,6 +63,7 @@ #include "keycaches.h" #include "set_var.h" #include "rpl_mi.h" +#include "lex_token.h" /* this is to get the bison compilation windows warnings out */ #ifdef _MSC_VER @@ -13370,6 +13371,13 @@ literal: | temporal_literal { $$= $1; } | NULL_SYM { + /* + For the digest computation, in this context only, + NULL is considered a literal, hence reduced to '?' + REDUCE: + TOK_GENERIC_VALUE := NULL_SYM + */ + YYLIP->reduce_digest_token(TOK_GENERIC_VALUE, NULL_SYM); $$ = new (thd->mem_root) Item_null(); if ($$ == NULL) MYSQL_YYABORT; diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 21ea70def63..94466db5fd9 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -1313,6 +1313,12 @@ static Sys_var_ulong Sys_max_connect_errors( VALID_RANGE(1, UINT_MAX), DEFAULT(MAX_CONNECT_ERRORS), BLOCK_SIZE(1)); +static Sys_var_long Sys_max_digest_length( + "max_digest_length", "Maximum length considered for digest text.", + PARSED_EARLY READ_ONLY GLOBAL_VAR(max_digest_length), + CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, 1024 * 1024), DEFAULT(1024), BLOCK_SIZE(1)); + static bool check_max_delayed_threads(sys_var *self, THD *thd, set_var *var) { return var->type != OPT_GLOBAL && |