diff options
Diffstat (limited to 'sql/log.cc')
-rw-r--r-- | sql/log.cc | 4136 |
1 files changed, 3141 insertions, 995 deletions
diff --git a/sql/log.cc b/sql/log.cc index 5a1cfe46686..a36a0c14d07 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -1,4 +1,4 @@ -/* Copyright (C) 2000-2003 MySQL AB +/* Copyright 2000-2008 MySQL AB, 2008 Sun Microsystems, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -14,15 +14,20 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -/* logging of commands */ -/* TODO: Abort logging when we get an error in reading or writing log files */ +/** + @file -#ifdef __EMX__ -#include <io.h> -#endif + @brief + logging of commands + + @todo + Abort logging when we get an error in reading or writing log files +*/ #include "mysql_priv.h" #include "sql_repl.h" +#include "rpl_filter.h" +#include "rpl_rli.h" #include <my_dir.h> #include <stdarg.h> @@ -32,138 +37,1525 @@ #include "message.h" #endif -MYSQL_LOG mysql_log, mysql_slow_log, mysql_bin_log; -ulong sync_binlog_counter= 0; +#include <mysql/plugin.h> + +/* max size of the log message */ +#define MAX_LOG_BUFFER_SIZE 1024 +#define MAX_USER_HOST_SIZE 512 +#define MAX_TIME_SIZE 32 +#define MY_OFF_T_UNDEF (~(my_off_t)0UL) -static Muted_query_log_event invisible_commit; +#define FLAGSTR(V,F) ((V)&(F)?#F" ":"") + +LOGGER logger; + +MYSQL_BIN_LOG mysql_bin_log; +ulong sync_binlog_counter= 0; static bool test_if_number(const char *str, long *res, bool allow_wildcards); -static bool binlog_init(); -static int binlog_close_connection(THD *thd); -static int binlog_savepoint_set(THD *thd, void *sv); -static int binlog_savepoint_rollback(THD *thd, void *sv); -static int binlog_commit(THD *thd, bool all); -static int binlog_rollback(THD *thd, bool all); -static int binlog_prepare(THD *thd, bool all); - -handlerton binlog_hton = { - "binlog", - SHOW_OPTION_YES, - "This is a meta storage engine to represent the binlog in a transaction", - DB_TYPE_UNKNOWN, /* IGNORE for now */ - binlog_init, - 0, - sizeof(my_off_t), /* savepoint size = binlog offset */ - binlog_close_connection, - binlog_savepoint_set, - binlog_savepoint_rollback, - NULL, /* savepoint_release */ - binlog_commit, - binlog_rollback, - binlog_prepare, - NULL, /* recover */ - NULL, /* commit_by_xid */ - NULL, /* rollback_by_xid */ - NULL, /* create_cursor_read_view */ - NULL, /* set_cursor_read_view */ - NULL, /* close_cursor_read_view */ - HTON_HIDDEN +static int binlog_init(void *p); +static int binlog_close_connection(handlerton *hton, THD *thd); +static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv); +static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv); +static int binlog_commit(handlerton *hton, THD *thd, bool all); +static int binlog_rollback(handlerton *hton, THD *thd, bool all); +static int binlog_prepare(handlerton *hton, THD *thd, bool all); + +/** + Silence all errors and warnings reported when performing a write + to a log table. + Errors and warnings are not reported to the client or SQL exception + handlers, so that the presence of logging does not interfere and affect + the logic of an application. +*/ +class Silence_log_table_errors : public Internal_error_handler +{ + char m_message[MYSQL_ERRMSG_SIZE]; +public: + Silence_log_table_errors() + { + m_message[0]= '\0'; + } + + virtual ~Silence_log_table_errors() {} + + virtual bool handle_error(uint sql_errno, const char *message, + MYSQL_ERROR::enum_warning_level level, + THD *thd); + const char *message() const { return m_message; } +}; + +bool +Silence_log_table_errors::handle_error(uint /* sql_errno */, + const char *message_arg, + MYSQL_ERROR::enum_warning_level /* level */, + THD * /* thd */) +{ + strmake(m_message, message_arg, sizeof(m_message)-1); + return TRUE; +} + + +sql_print_message_func sql_print_message_handlers[3] = +{ + sql_print_information, + sql_print_warning, + sql_print_error +}; + + +char *make_default_log_name(char *buff,const char* log_ext) +{ + strmake(buff, pidfile_name, FN_REFLEN-5); + return fn_format(buff, buff, mysql_data_home, log_ext, + MYF(MY_UNPACK_FILENAME|MY_REPLACE_EXT)); +} + +/* + Helper class to hold a mutex for the duration of the + block. + + Eliminates the need for explicit unlocking of mutexes on, e.g., + error returns. On passing a null pointer, the sentry will not do + anything. + */ +class Mutex_sentry +{ +public: + Mutex_sentry(pthread_mutex_t *mutex) + : m_mutex(mutex) + { + if (m_mutex) + pthread_mutex_lock(mutex); + } + + ~Mutex_sentry() + { + if (m_mutex) + pthread_mutex_unlock(m_mutex); +#ifndef DBUG_OFF + m_mutex= 0; +#endif + } + +private: + pthread_mutex_t *m_mutex; + + // It's not allowed to copy this object in any way + Mutex_sentry(Mutex_sentry const&); + void operator=(Mutex_sentry const&); }; /* + Helper class to store binary log transaction data. +*/ +class binlog_trx_data { +public: + binlog_trx_data() + : at_least_one_stmt(0), m_pending(0), before_stmt_pos(MY_OFF_T_UNDEF) + { + trans_log.end_of_file= max_binlog_cache_size; + } + + ~binlog_trx_data() + { + DBUG_ASSERT(pending() == NULL); + close_cached_file(&trans_log); + } + + my_off_t position() const { + return my_b_tell(&trans_log); + } + + bool empty() const + { + return pending() == NULL && my_b_tell(&trans_log) == 0; + } + + /* + Truncate the transaction cache to a certain position. This + includes deleting the pending event. + */ + void truncate(my_off_t pos) + { + DBUG_PRINT("info", ("truncating to position %lu", (ulong) pos)); + DBUG_PRINT("info", ("before_stmt_pos=%lu", (ulong) pos)); + delete pending(); + set_pending(0); + reinit_io_cache(&trans_log, WRITE_CACHE, pos, 0, 0); + if (pos < before_stmt_pos) + before_stmt_pos= MY_OFF_T_UNDEF; + + /* + The only valid positions that can be truncated to are at the + beginning of a statement. We are relying on this fact to be able + to set the at_least_one_stmt flag correctly. In other word, if + we are truncating to the beginning of the transaction cache, + there will be no statements in the cache, otherwhise, we will + have at least one statement in the transaction cache. + */ + at_least_one_stmt= (pos > 0); + } + + /* + Reset the entire contents of the transaction cache, emptying it + completely. + */ + void reset() { + if (!empty()) + truncate(0); + before_stmt_pos= MY_OFF_T_UNDEF; + trans_log.end_of_file= max_binlog_cache_size; + } + + Rows_log_event *pending() const + { + return m_pending; + } + + void set_pending(Rows_log_event *const pending) + { + m_pending= pending; + } + + IO_CACHE trans_log; // The transaction cache + + /** + Boolean that is true if there is at least one statement in the + transaction cache. + */ + bool at_least_one_stmt; + +private: + /* + Pending binrows event. This event is the event where the rows are + currently written. + */ + Rows_log_event *m_pending; + +public: + /* + Binlog position before the start of the current statement. + */ + my_off_t before_stmt_pos; +}; + +handlerton *binlog_hton; + +bool LOGGER::is_log_table_enabled(uint log_table_type) +{ + switch (log_table_type) { + case QUERY_LOG_SLOW: + return (table_log_handler != NULL) && opt_slow_log; + case QUERY_LOG_GENERAL: + return (table_log_handler != NULL) && opt_log ; + default: + DBUG_ASSERT(0); + return FALSE; /* make compiler happy */ + } +} + + +/* Check if a given table is opened log table */ +int check_if_log_table(uint db_len, const char *db, uint table_name_len, + const char *table_name, uint check_if_opened) +{ + if (db_len == 5 && + !(lower_case_table_names ? + my_strcasecmp(system_charset_info, db, "mysql") : + strcmp(db, "mysql"))) + { + if (table_name_len == 11 && !(lower_case_table_names ? + my_strcasecmp(system_charset_info, + table_name, "general_log") : + strcmp(table_name, "general_log"))) + { + if (!check_if_opened || logger.is_log_table_enabled(QUERY_LOG_GENERAL)) + return QUERY_LOG_GENERAL; + return 0; + } + + if (table_name_len == 8 && !(lower_case_table_names ? + my_strcasecmp(system_charset_info, table_name, "slow_log") : + strcmp(table_name, "slow_log"))) + { + if (!check_if_opened || logger.is_log_table_enabled(QUERY_LOG_SLOW)) + return QUERY_LOG_SLOW; + return 0; + } + } + return 0; +} + + +Log_to_csv_event_handler::Log_to_csv_event_handler() +{ +} + + +Log_to_csv_event_handler::~Log_to_csv_event_handler() +{ +} + + +void Log_to_csv_event_handler::cleanup() +{ + logger.is_log_tables_initialized= FALSE; +} + +/* log event handlers */ + +/** + Log command to the general log table + + Log given command to the general log table. + + @param event_time command start timestamp + @param user_host the pointer to the string with user@host info + @param user_host_len length of the user_host string. this is computed + once and passed to all general log event handlers + @param thread_id Id of the thread, issued a query + @param command_type the type of the command being logged + @param command_type_len the length of the string above + @param sql_text the very text of the query being executed + @param sql_text_len the length of sql_text string + + + @return This function attempts to never call my_error(). This is + necessary, because general logging happens already after a statement + status has been sent to the client, so the client can not see the + error anyway. Besides, the error is not related to the statement + being executed and is internal, and thus should be handled + internally (@todo: how?). + If a write to the table has failed, the function attempts to + write to a short error message to the file. The failure is also + indicated in the return value. + + @retval FALSE OK + @retval TRUE error occured +*/ + +bool Log_to_csv_event_handler:: + log_general(THD *thd, time_t event_time, const char *user_host, + uint user_host_len, int thread_id, + const char *command_type, uint command_type_len, + const char *sql_text, uint sql_text_len, + CHARSET_INFO *client_cs) +{ + TABLE_LIST table_list; + TABLE *table; + bool result= TRUE; + bool need_close= FALSE; + bool need_pop= FALSE; + bool need_rnd_end= FALSE; + uint field_index; + Silence_log_table_errors error_handler; + Open_tables_state open_tables_backup; + ulonglong save_thd_options; + bool save_time_zone_used; + + /* + CSV uses TIME_to_timestamp() internally if table needs to be repaired + which will set thd->time_zone_used + */ + save_time_zone_used= thd->time_zone_used; + + save_thd_options= thd->options; + thd->options&= ~OPTION_BIN_LOG; + + bzero(& table_list, sizeof(TABLE_LIST)); + table_list.alias= table_list.table_name= GENERAL_LOG_NAME.str; + table_list.table_name_length= GENERAL_LOG_NAME.length; + + table_list.lock_type= TL_WRITE_CONCURRENT_INSERT; + + table_list.db= MYSQL_SCHEMA_NAME.str; + table_list.db_length= MYSQL_SCHEMA_NAME.length; + + /* + 1) open_performance_schema_table generates an error of the + table can not be opened or is corrupted. + 2) "INSERT INTO general_log" can generate warning sometimes. + + Suppress these warnings and errors, they can't be dealt with + properly anyway. + + QQ: this problem needs to be studied in more detail. + Comment this 2 lines and run "cast.test" to see what's happening. + */ + thd->push_internal_handler(& error_handler); + need_pop= TRUE; + + if (!(table= open_performance_schema_table(thd, & table_list, + & open_tables_backup))) + goto err; + + need_close= TRUE; + + if (table->file->extra(HA_EXTRA_MARK_AS_LOG_TABLE) || + table->file->ha_rnd_init(0)) + goto err; + + need_rnd_end= TRUE; + + /* Honor next number columns if present */ + table->next_number_field= table->found_next_number_field; + + /* + NOTE: we do not call restore_record() here, as all fields are + filled by the Logger (=> no need to load default ones). + */ + + /* + We do not set a value for table->field[0], as it will use + default value (which is CURRENT_TIMESTAMP). + */ + + /* check that all columns exist */ + if (table->s->fields < 6) + goto err; + + DBUG_ASSERT(table->field[0]->type() == MYSQL_TYPE_TIMESTAMP); + + ((Field_timestamp*) table->field[0])->store_timestamp((my_time_t) + event_time); + + /* do a write */ + if (table->field[1]->store(user_host, user_host_len, client_cs) || + table->field[2]->store((longlong) thread_id, TRUE) || + table->field[3]->store((longlong) server_id, TRUE) || + table->field[4]->store(command_type, command_type_len, client_cs)) + goto err; + + /* + A positive return value in store() means truncation. + Still logging a message in the log in this case. + */ + table->field[5]->flags|= FIELDFLAG_HEX_ESCAPE; + if (table->field[5]->store(sql_text, sql_text_len, client_cs) < 0) + goto err; + + /* mark all fields as not null */ + table->field[1]->set_notnull(); + table->field[2]->set_notnull(); + table->field[3]->set_notnull(); + table->field[4]->set_notnull(); + table->field[5]->set_notnull(); + + /* Set any extra columns to their default values */ + for (field_index= 6 ; field_index < table->s->fields ; field_index++) + { + table->field[field_index]->set_default(); + } + + /* log table entries are not replicated */ + if (table->file->ha_write_row(table->record[0])) + goto err; + + result= FALSE; + +err: + if (result && !thd->killed) + sql_print_error("Failed to write to mysql.general_log: %s", + error_handler.message()); + + if (need_rnd_end) + { + table->file->ha_rnd_end(); + table->file->ha_release_auto_increment(); + } + if (need_pop) + thd->pop_internal_handler(); + if (need_close) + close_performance_schema_table(thd, & open_tables_backup); + + thd->options= save_thd_options; + thd->time_zone_used= save_time_zone_used; + return result; +} + + +/* + Log a query to the slow log table + + SYNOPSIS + log_slow() + thd THD of the query + current_time current timestamp + query_start_arg command start timestamp + user_host the pointer to the string with user@host info + user_host_len length of the user_host string. this is computed once + and passed to all general log event handlers + query_time Amount of time the query took to execute (in microseconds) + lock_time Amount of time the query was locked (in microseconds) + is_command The flag, which determines, whether the sql_text is a + query or an administrator command (these are treated + differently by the old logging routines) + sql_text the very text of the query or administrator command + processed + sql_text_len the length of sql_text string + + DESCRIPTION + + Log a query to the slow log table + + RETURN + FALSE - OK + TRUE - error occured +*/ + +bool Log_to_csv_event_handler:: + log_slow(THD *thd, time_t current_time, time_t query_start_arg, + const char *user_host, uint user_host_len, + ulonglong query_utime, ulonglong lock_utime, bool is_command, + const char *sql_text, uint sql_text_len) +{ + TABLE_LIST table_list; + TABLE *table; + bool result= TRUE; + bool need_close= FALSE; + bool need_rnd_end= FALSE; + Silence_log_table_errors error_handler; + Open_tables_state open_tables_backup; + CHARSET_INFO *client_cs= thd->variables.character_set_client; + bool save_time_zone_used; + DBUG_ENTER("Log_to_csv_event_handler::log_slow"); + + thd->push_internal_handler(& error_handler); + /* + CSV uses TIME_to_timestamp() internally if table needs to be repaired + which will set thd->time_zone_used + */ + save_time_zone_used= thd->time_zone_used; + + bzero(& table_list, sizeof(TABLE_LIST)); + table_list.alias= table_list.table_name= SLOW_LOG_NAME.str; + table_list.table_name_length= SLOW_LOG_NAME.length; + + table_list.lock_type= TL_WRITE_CONCURRENT_INSERT; + + table_list.db= MYSQL_SCHEMA_NAME.str; + table_list.db_length= MYSQL_SCHEMA_NAME.length; + + if (!(table= open_performance_schema_table(thd, & table_list, + & open_tables_backup))) + goto err; + + need_close= TRUE; + + if (table->file->extra(HA_EXTRA_MARK_AS_LOG_TABLE) || + table->file->ha_rnd_init(0)) + goto err; + + need_rnd_end= TRUE; + + /* Honor next number columns if present */ + table->next_number_field= table->found_next_number_field; + + restore_record(table, s->default_values); // Get empty record + + /* check that all columns exist */ + if (table->s->fields < 11) + goto err; + + /* store the time and user values */ + DBUG_ASSERT(table->field[0]->type() == MYSQL_TYPE_TIMESTAMP); + ((Field_timestamp*) table->field[0])->store_timestamp((my_time_t) + current_time); + if (table->field[1]->store(user_host, user_host_len, client_cs)) + goto err; + + if (query_start_arg) + { + longlong query_time= (longlong) (query_utime/1000000); + longlong lock_time= (longlong) (lock_utime/1000000); + /* + A TIME field can not hold the full longlong range; query_time or + lock_time may be truncated without warning here, if greater than + 839 hours (~35 days) + */ + MYSQL_TIME t; + t.neg= 0; + + /* fill in query_time field */ + calc_time_from_sec(&t, (long) min(query_time, (longlong) TIME_MAX_VALUE_SECONDS), 0); + if (table->field[2]->store_time(&t, MYSQL_TIMESTAMP_TIME)) + goto err; + /* lock_time */ + calc_time_from_sec(&t, (long) min(lock_time, (longlong) TIME_MAX_VALUE_SECONDS), 0); + if (table->field[3]->store_time(&t, MYSQL_TIMESTAMP_TIME)) + goto err; + /* rows_sent */ + if (table->field[4]->store((longlong) thd->sent_row_count, TRUE)) + goto err; + /* rows_examined */ + if (table->field[5]->store((longlong) thd->examined_row_count, TRUE)) + goto err; + } + else + { + table->field[2]->set_null(); + table->field[3]->set_null(); + table->field[4]->set_null(); + table->field[5]->set_null(); + } + /* fill database field */ + if (thd->db) + { + if (table->field[6]->store(thd->db, thd->db_length, client_cs)) + goto err; + table->field[6]->set_notnull(); + } + + if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt) + { + if (table-> + field[7]->store((longlong) + thd->first_successful_insert_id_in_prev_stmt_for_binlog, + TRUE)) + goto err; + table->field[7]->set_notnull(); + } + + /* + Set value if we do an insert on autoincrement column. Note that for + some engines (those for which get_auto_increment() does not leave a + table lock until the statement ends), this is just the first value and + the next ones used may not be contiguous to it. + */ + if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0) + { + if (table-> + field[8]->store((longlong) + thd->auto_inc_intervals_in_cur_stmt_for_binlog.minimum(), TRUE)) + goto err; + table->field[8]->set_notnull(); + } + + if (table->field[9]->store((longlong) server_id, TRUE)) + goto err; + table->field[9]->set_notnull(); + + /* + Column sql_text. + A positive return value in store() means truncation. + Still logging a message in the log in this case. + */ + if (table->field[10]->store(sql_text, sql_text_len, client_cs) < 0) + goto err; + + /* log table entries are not replicated */ + if (table->file->ha_write_row(table->record[0])) + goto err; + + result= FALSE; + +err: + thd->pop_internal_handler(); + + if (result && !thd->killed) + sql_print_error("Failed to write to mysql.slow_log: %s", + error_handler.message()); + + if (need_rnd_end) + { + table->file->ha_rnd_end(); + table->file->ha_release_auto_increment(); + } + if (need_close) + close_performance_schema_table(thd, & open_tables_backup); + thd->time_zone_used= save_time_zone_used; + DBUG_RETURN(result); +} + +int Log_to_csv_event_handler:: + activate_log(THD *thd, uint log_table_type) +{ + TABLE_LIST table_list; + TABLE *table; + int result; + Open_tables_state open_tables_backup; + + DBUG_ENTER("Log_to_csv_event_handler::activate_log"); + + bzero(& table_list, sizeof(TABLE_LIST)); + + if (log_table_type == QUERY_LOG_GENERAL) + { + table_list.alias= table_list.table_name= GENERAL_LOG_NAME.str; + table_list.table_name_length= GENERAL_LOG_NAME.length; + } + else + { + DBUG_ASSERT(log_table_type == QUERY_LOG_SLOW); + table_list.alias= table_list.table_name= SLOW_LOG_NAME.str; + table_list.table_name_length= SLOW_LOG_NAME.length; + } + + table_list.lock_type= TL_WRITE_CONCURRENT_INSERT; + + table_list.db= MYSQL_SCHEMA_NAME.str; + table_list.db_length= MYSQL_SCHEMA_NAME.length; + + table= open_performance_schema_table(thd, & table_list, + & open_tables_backup); + if (table) + { + result= 0; + close_performance_schema_table(thd, & open_tables_backup); + } + else + result= 1; + + DBUG_RETURN(result); +} + +bool Log_to_csv_event_handler:: + log_error(enum loglevel level, const char *format, va_list args) +{ + /* No log table is implemented */ + DBUG_ASSERT(0); + return FALSE; +} + +bool Log_to_file_event_handler:: + log_error(enum loglevel level, const char *format, + va_list args) +{ + return vprint_msg_to_log(level, format, args); +} + +void Log_to_file_event_handler::init_pthread_objects() +{ + mysql_log.init_pthread_objects(); + mysql_slow_log.init_pthread_objects(); +} + + +/** Wrapper around MYSQL_LOG::write() for slow log. */ + +bool Log_to_file_event_handler:: + log_slow(THD *thd, time_t current_time, time_t query_start_arg, + const char *user_host, uint user_host_len, + ulonglong query_utime, ulonglong lock_utime, bool is_command, + const char *sql_text, uint sql_text_len) +{ + Silence_log_table_errors error_handler; + thd->push_internal_handler(&error_handler); + bool retval= mysql_slow_log.write(thd, current_time, query_start_arg, + user_host, user_host_len, + query_utime, lock_utime, is_command, + sql_text, sql_text_len); + thd->pop_internal_handler(); + return retval; +} + + +/** + Wrapper around MYSQL_LOG::write() for general log. We need it since we + want all log event handlers to have the same signature. +*/ + +bool Log_to_file_event_handler:: + log_general(THD *thd, time_t event_time, const char *user_host, + uint user_host_len, int thread_id, + const char *command_type, uint command_type_len, + const char *sql_text, uint sql_text_len, + CHARSET_INFO *client_cs) +{ + Silence_log_table_errors error_handler; + thd->push_internal_handler(&error_handler); + bool retval= mysql_log.write(event_time, user_host, user_host_len, + thread_id, command_type, command_type_len, + sql_text, sql_text_len); + thd->pop_internal_handler(); + return retval; +} + + +bool Log_to_file_event_handler::init() +{ + if (!is_initialized) + { + if (opt_slow_log) + mysql_slow_log.open_slow_log(sys_var_slow_log_path.value); + + if (opt_log) + mysql_log.open_query_log(sys_var_general_log_path.value); + + is_initialized= TRUE; + } + + return FALSE; +} + + +void Log_to_file_event_handler::cleanup() +{ + mysql_log.cleanup(); + mysql_slow_log.cleanup(); +} + +void Log_to_file_event_handler::flush() +{ + /* reopen log files */ + if (opt_log) + mysql_log.reopen_file(); + if (opt_slow_log) + mysql_slow_log.reopen_file(); +} + +/* + Log error with all enabled log event handlers + + SYNOPSIS + error_log_print() + + level The level of the error significance: NOTE, + WARNING or ERROR. + format format string for the error message + args list of arguments for the format string + + RETURN + FALSE - OK + TRUE - error occured +*/ + +bool LOGGER::error_log_print(enum loglevel level, const char *format, + va_list args) +{ + bool error= FALSE; + Log_event_handler **current_handler; + + /* currently we don't need locking here as there is no error_log table */ + for (current_handler= error_log_handler_list ; *current_handler ;) + error= (*current_handler++)->log_error(level, format, args) || error; + + return error; +} + + +void LOGGER::cleanup_base() +{ + DBUG_ASSERT(inited == 1); + rwlock_destroy(&LOCK_logger); + if (table_log_handler) + { + table_log_handler->cleanup(); + delete table_log_handler; + } + if (file_log_handler) + file_log_handler->cleanup(); +} + + +void LOGGER::cleanup_end() +{ + DBUG_ASSERT(inited == 1); + if (file_log_handler) + delete file_log_handler; +} + + +/** + Perform basic log initialization: create file-based log handler and + init error log. +*/ +void LOGGER::init_base() +{ + DBUG_ASSERT(inited == 0); + inited= 1; + + /* + Here we create file log handler. We don't do it for the table log handler + here as it cannot be created so early. The reason is THD initialization, + which depends on the system variables (parsed later). + */ + if (!file_log_handler) + file_log_handler= new Log_to_file_event_handler; + + /* by default we use traditional error log */ + init_error_log(LOG_FILE); + + file_log_handler->init_pthread_objects(); + my_rwlock_init(&LOCK_logger, NULL); +} + + +void LOGGER::init_log_tables() +{ + if (!table_log_handler) + table_log_handler= new Log_to_csv_event_handler; + + if (!is_log_tables_initialized && + !table_log_handler->init() && !file_log_handler->init()) + is_log_tables_initialized= TRUE; +} + + +bool LOGGER::flush_logs(THD *thd) +{ + int rc= 0; + + /* + Now we lock logger, as nobody should be able to use logging routines while + log tables are closed + */ + logger.lock_exclusive(); + + /* reopen log files */ + file_log_handler->flush(); + + /* end of log flush */ + logger.unlock(); + return rc; +} + + +/* + Log slow query with all enabled log event handlers + + SYNOPSIS + slow_log_print() + + thd THD of the query being logged + query The query being logged + query_length The length of the query string + current_utime Current time in microseconds (from undefined start) + + RETURN + FALSE OK + TRUE error occured +*/ + +bool LOGGER::slow_log_print(THD *thd, const char *query, uint query_length, + ulonglong current_utime) + +{ + bool error= FALSE; + Log_event_handler **current_handler; + bool is_command= FALSE; + char user_host_buff[MAX_USER_HOST_SIZE]; + Security_context *sctx= thd->security_ctx; + uint user_host_len= 0; + ulonglong query_utime, lock_utime; + + /* + Print the message to the buffer if we have slow log enabled + */ + + if (*slow_log_handler_list) + { + time_t current_time; + + /* do not log slow queries from replication threads */ + if (thd->slave_thread && !opt_log_slow_slave_statements) + return 0; + + lock_shared(); + if (!opt_slow_log) + { + unlock(); + return 0; + } + + /* fill in user_host value: the format is "%s[%s] @ %s [%s]" */ + user_host_len= (strxnmov(user_host_buff, MAX_USER_HOST_SIZE, + sctx->priv_user ? sctx->priv_user : "", "[", + sctx->user ? sctx->user : "", "] @ ", + sctx->host ? sctx->host : "", " [", + sctx->ip ? sctx->ip : "", "]", NullS) - + user_host_buff); + + current_time= my_time_possible_from_micro(current_utime); + if (thd->start_utime) + { + query_utime= (current_utime - thd->start_utime); + lock_utime= (thd->utime_after_lock - thd->start_utime); + } + else + { + query_utime= lock_utime= 0; + } + + if (!query) + { + is_command= TRUE; + query= command_name[thd->command].str; + query_length= command_name[thd->command].length; + } + + for (current_handler= slow_log_handler_list; *current_handler ;) + error= (*current_handler++)->log_slow(thd, current_time, thd->start_time, + user_host_buff, user_host_len, + query_utime, lock_utime, is_command, + query, query_length) || error; + + unlock(); + } + return error; +} + +bool LOGGER::general_log_write(THD *thd, enum enum_server_command command, + const char *query, uint query_length) +{ + bool error= FALSE; + Log_event_handler **current_handler= general_log_handler_list; + char user_host_buff[MAX_USER_HOST_SIZE]; + Security_context *sctx= thd->security_ctx; + ulong id; + uint user_host_len= 0; + time_t current_time; + + if (thd) + id= thd->thread_id; /* Normal thread */ + else + id= 0; /* Log from connect handler */ + + lock_shared(); + if (!opt_log) + { + unlock(); + return 0; + } + user_host_len= strxnmov(user_host_buff, MAX_USER_HOST_SIZE, + sctx->priv_user ? sctx->priv_user : "", "[", + sctx->user ? sctx->user : "", "] @ ", + sctx->host ? sctx->host : "", " [", + sctx->ip ? sctx->ip : "", "]", NullS) - + user_host_buff; + + current_time= my_time(0); + while (*current_handler) + error|= (*current_handler++)-> + log_general(thd, current_time, user_host_buff, + user_host_len, id, + command_name[(uint) command].str, + command_name[(uint) command].length, + query, query_length, + thd->variables.character_set_client) || error; + unlock(); + + return error; +} + +bool LOGGER::general_log_print(THD *thd, enum enum_server_command command, + const char *format, va_list args) +{ + uint message_buff_len= 0; + char message_buff[MAX_LOG_BUFFER_SIZE]; + + /* prepare message */ + if (format) + message_buff_len= my_vsnprintf(message_buff, sizeof(message_buff), + format, args); + else + message_buff[0]= '\0'; + + return general_log_write(thd, command, message_buff, message_buff_len); +} + +void LOGGER::init_error_log(uint error_log_printer) +{ + if (error_log_printer & LOG_NONE) + { + error_log_handler_list[0]= 0; + return; + } + + switch (error_log_printer) { + case LOG_FILE: + error_log_handler_list[0]= file_log_handler; + error_log_handler_list[1]= 0; + break; + /* these two are disabled for now */ + case LOG_TABLE: + DBUG_ASSERT(0); + break; + case LOG_TABLE|LOG_FILE: + DBUG_ASSERT(0); + break; + } +} + +void LOGGER::init_slow_log(uint slow_log_printer) +{ + if (slow_log_printer & LOG_NONE) + { + slow_log_handler_list[0]= 0; + return; + } + + switch (slow_log_printer) { + case LOG_FILE: + slow_log_handler_list[0]= file_log_handler; + slow_log_handler_list[1]= 0; + break; + case LOG_TABLE: + slow_log_handler_list[0]= table_log_handler; + slow_log_handler_list[1]= 0; + break; + case LOG_TABLE|LOG_FILE: + slow_log_handler_list[0]= file_log_handler; + slow_log_handler_list[1]= table_log_handler; + slow_log_handler_list[2]= 0; + break; + } +} + +void LOGGER::init_general_log(uint general_log_printer) +{ + if (general_log_printer & LOG_NONE) + { + general_log_handler_list[0]= 0; + return; + } + + switch (general_log_printer) { + case LOG_FILE: + general_log_handler_list[0]= file_log_handler; + general_log_handler_list[1]= 0; + break; + case LOG_TABLE: + general_log_handler_list[0]= table_log_handler; + general_log_handler_list[1]= 0; + break; + case LOG_TABLE|LOG_FILE: + general_log_handler_list[0]= file_log_handler; + general_log_handler_list[1]= table_log_handler; + general_log_handler_list[2]= 0; + break; + } +} + + +bool LOGGER::activate_log_handler(THD* thd, uint log_type) +{ + MYSQL_QUERY_LOG *file_log; + bool res= FALSE; + lock_exclusive(); + switch (log_type) { + case QUERY_LOG_SLOW: + if (!opt_slow_log) + { + file_log= file_log_handler->get_mysql_slow_log(); + + file_log->open_slow_log(sys_var_slow_log_path.value); + if (table_log_handler->activate_log(thd, QUERY_LOG_SLOW)) + { + /* Error printed by open table in activate_log() */ + res= TRUE; + file_log->close(0); + } + else + { + init_slow_log(log_output_options); + opt_slow_log= TRUE; + } + } + break; + case QUERY_LOG_GENERAL: + if (!opt_log) + { + file_log= file_log_handler->get_mysql_log(); + + file_log->open_query_log(sys_var_general_log_path.value); + if (table_log_handler->activate_log(thd, QUERY_LOG_GENERAL)) + { + /* Error printed by open table in activate_log() */ + res= TRUE; + file_log->close(0); + } + else + { + init_general_log(log_output_options); + opt_log= TRUE; + } + } + break; + default: + DBUG_ASSERT(0); + } + unlock(); + return res; +} + + +void LOGGER::deactivate_log_handler(THD *thd, uint log_type) +{ + my_bool *tmp_opt= 0; + MYSQL_LOG *file_log; + + switch (log_type) { + case QUERY_LOG_SLOW: + tmp_opt= &opt_slow_log; + file_log= file_log_handler->get_mysql_slow_log(); + break; + case QUERY_LOG_GENERAL: + tmp_opt= &opt_log; + file_log= file_log_handler->get_mysql_log(); + break; + default: + assert(0); // Impossible + } + + if (!(*tmp_opt)) + return; + + lock_exclusive(); + file_log->close(0); + *tmp_opt= FALSE; + unlock(); +} + + +/* the parameters are unused for the log tables */ +bool Log_to_csv_event_handler::init() +{ + return 0; +} + +int LOGGER::set_handlers(uint error_log_printer, + uint slow_log_printer, + uint general_log_printer) +{ + /* error log table is not supported yet */ + DBUG_ASSERT(error_log_printer < LOG_TABLE); + + lock_exclusive(); + + if ((slow_log_printer & LOG_TABLE || general_log_printer & LOG_TABLE) && + !is_log_tables_initialized) + { + slow_log_printer= (slow_log_printer & ~LOG_TABLE) | LOG_FILE; + general_log_printer= (general_log_printer & ~LOG_TABLE) | LOG_FILE; + + sql_print_error("Failed to initialize log tables. " + "Falling back to the old-fashioned logs"); + } + + init_error_log(error_log_printer); + init_slow_log(slow_log_printer); + init_general_log(general_log_printer); + + unlock(); + + return 0; +} + + + /* + Save position of binary log transaction cache. + + SYNPOSIS + binlog_trans_log_savepos() + + thd The thread to take the binlog data from + pos Pointer to variable where the position will be stored + + DESCRIPTION + + Save the current position in the binary log transaction cache into + the variable pointed to by 'pos' + */ + +static void +binlog_trans_log_savepos(THD *thd, my_off_t *pos) +{ + DBUG_ENTER("binlog_trans_log_savepos"); + DBUG_ASSERT(pos != NULL); + if (thd_get_ha_data(thd, binlog_hton) == NULL) + thd->binlog_setup_trx_data(); + binlog_trx_data *const trx_data= + (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton); + DBUG_ASSERT(mysql_bin_log.is_open()); + *pos= trx_data->position(); + DBUG_PRINT("return", ("*pos: %lu", (ulong) *pos)); + DBUG_VOID_RETURN; +} + + +/* + Truncate the binary log transaction cache. + + SYNPOSIS + binlog_trans_log_truncate() + + thd The thread to take the binlog data from + pos Position to truncate to + + DESCRIPTION + + Truncate the binary log to the given position. Will not change + anything else. + + */ +static void +binlog_trans_log_truncate(THD *thd, my_off_t pos) +{ + DBUG_ENTER("binlog_trans_log_truncate"); + DBUG_PRINT("enter", ("pos: %lu", (ulong) pos)); + + DBUG_ASSERT(thd_get_ha_data(thd, binlog_hton) != NULL); + /* Only true if binlog_trans_log_savepos() wasn't called before */ + DBUG_ASSERT(pos != ~(my_off_t) 0); + + binlog_trx_data *const trx_data= + (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton); + trx_data->truncate(pos); + DBUG_VOID_RETURN; +} + + +/* this function is mostly a placeholder. - conceptually, binlog initialization (now mostly done in MYSQL_LOG::open) + conceptually, binlog initialization (now mostly done in MYSQL_BIN_LOG::open) should be moved here. */ -bool binlog_init() +int binlog_init(void *p) { - return !opt_bin_log; + binlog_hton= (handlerton *)p; + binlog_hton->state=opt_bin_log ? SHOW_OPTION_YES : SHOW_OPTION_NO; + binlog_hton->db_type=DB_TYPE_BINLOG; + binlog_hton->savepoint_offset= sizeof(my_off_t); + binlog_hton->close_connection= binlog_close_connection; + binlog_hton->savepoint_set= binlog_savepoint_set; + binlog_hton->savepoint_rollback= binlog_savepoint_rollback; + binlog_hton->commit= binlog_commit; + binlog_hton->rollback= binlog_rollback; + binlog_hton->prepare= binlog_prepare; + binlog_hton->flags= HTON_NOT_USER_SELECTABLE | HTON_HIDDEN; + return 0; } -static int binlog_close_connection(THD *thd) +static int binlog_close_connection(handlerton *hton, THD *thd) { - IO_CACHE *trans_log= (IO_CACHE*)thd->ha_data[binlog_hton.slot]; - DBUG_ASSERT(!my_b_tell(trans_log)); - close_cached_file(trans_log); - my_free((gptr)trans_log, MYF(0)); + binlog_trx_data *const trx_data= + (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton); + DBUG_ASSERT(trx_data->empty()); + thd_set_ha_data(thd, binlog_hton, NULL); + trx_data->~binlog_trx_data(); + my_free((uchar*)trx_data, MYF(0)); return 0; } -static int binlog_end_trans(THD *thd, IO_CACHE *trans_log, Log_event *end_ev) +/* + End a transaction. + + SYNOPSIS + binlog_end_trans() + + thd The thread whose transaction should be ended + trx_data Pointer to the transaction data to use + end_ev The end event to use, or NULL + all True if the entire transaction should be ended, false if + only the statement transaction should be ended. + + DESCRIPTION + + End the currently open transaction. The transaction can be either + a real transaction (if 'all' is true) or a statement transaction + (if 'all' is false). + + If 'end_ev' is NULL, the transaction is a rollback of only + transactional tables, so the transaction cache will be truncated + to either just before the last opened statement transaction (if + 'all' is false), or reset completely (if 'all' is true). + */ +static int +binlog_end_trans(THD *thd, binlog_trx_data *trx_data, + Log_event *end_ev, bool all) { - int error=0; DBUG_ENTER("binlog_end_trans"); + int error=0; + IO_CACHE *trans_log= &trx_data->trans_log; + DBUG_PRINT("enter", ("transaction: %s end_ev: 0x%lx", + all ? "all" : "stmt", (long) end_ev)); + DBUG_PRINT("info", ("thd->options={ %s%s}", + FLAGSTR(thd->options, OPTION_NOT_AUTOCOMMIT), + FLAGSTR(thd->options, OPTION_BEGIN))); + + thd->binlog_flush_pending_rows_event(TRUE); - /* NULL denotes ROLLBACK with nothing to replicate */ + /* + NULL denotes ROLLBACK with nothing to replicate: i.e., rollback of + only transactional tables. If the transaction contain changes to + any non-transactiona tables, we need write the transaction and log + a ROLLBACK last. + */ if (end_ev != NULL) - error= mysql_bin_log.write(thd, trans_log, end_ev); + { + /* + Doing a commit or a rollback including non-transactional tables, + i.e., ending a transaction where we might write the transaction + cache to the binary log. + + We can always end the statement when ending a transaction since + transactions are not allowed inside stored functions. If they + were, we would have to ensure that we're not ending a statement + inside a stored function. + */ + error= mysql_bin_log.write(thd, &trx_data->trans_log, end_ev); + trx_data->reset(); - statistic_increment(binlog_cache_use, &LOCK_status); - if (trans_log->disk_writes != 0) + /* + We need to step the table map version after writing the + transaction cache to disk. + */ + mysql_bin_log.update_table_map_version(); + statistic_increment(binlog_cache_use, &LOCK_status); + if (trans_log->disk_writes != 0) + { + statistic_increment(binlog_cache_disk_use, &LOCK_status); + trans_log->disk_writes= 0; + } + } + else { - statistic_increment(binlog_cache_disk_use, &LOCK_status); - trans_log->disk_writes= 0; + /* + If rolling back an entire transaction or a single statement not + inside a transaction, we reset the transaction cache. + + If rolling back a statement in a transaction, we truncate the + transaction cache to remove the statement. + */ + thd->binlog_remove_pending_rows_event(TRUE); + if (all || !(thd->options & (OPTION_BEGIN | OPTION_NOT_AUTOCOMMIT))) + trx_data->reset(); + else // ...statement + trx_data->truncate(trx_data->before_stmt_pos); + + /* + We need to step the table map version on a rollback to ensure + that a new table map event is generated instead of the one that + was written to the thrown-away transaction cache. + */ + mysql_bin_log.update_table_map_version(); } - reinit_io_cache(trans_log, WRITE_CACHE, (my_off_t) 0, 0, 1); // cannot fail - trans_log->end_of_file= max_binlog_cache_size; + DBUG_RETURN(error); } -static int binlog_prepare(THD *thd, bool all) +static int binlog_prepare(handlerton *hton, THD *thd, bool all) { /* do nothing. just pretend we can do 2pc, so that MySQL won't switch to 1pc. - real work will be done in MYSQL_LOG::log_xid() + real work will be done in MYSQL_BIN_LOG::log_xid() */ return 0; } +#define YESNO(X) ((X) ? "yes" : "no") + /** This function is called once after each statement. It has the responsibility to flush the transaction cache to the binlog file on commits. + @param hton The binlog handlerton. @param thd The client thread that executes the transaction. - @param all true if this is the last statement before a COMMIT - statement; false if either this is a statement in a - transaction but not the last, or if this is a statement - not inside a BEGIN block and autocommit is on. + @param all This is @c true if this is a real transaction commit, and + @false otherwise. @see handlerton::commit */ -static int binlog_commit(THD *thd, bool all) +static int binlog_commit(handlerton *hton, THD *thd, bool all) { - IO_CACHE *trans_log= (IO_CACHE*)thd->ha_data[binlog_hton.slot]; DBUG_ENTER("binlog_commit"); - DBUG_ASSERT( - (all || !(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))); + binlog_trx_data *const trx_data= + (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton); - if (my_b_tell(trans_log) == 0) + if (trx_data->empty()) { - // we're here because trans_log was flushed in MYSQL_LOG::log_xid() + // we're here because trans_log was flushed in MYSQL_BIN_LOG::log_xid() + trx_data->reset(); DBUG_RETURN(0); } + /* - Write commit event if at least one of the following holds: - - the user sends an explicit COMMIT; or - - the autocommit flag is on, and we are not inside a BEGIN. - However, if the user has not sent an explicit COMMIT, and we are - either inside a BEGIN or run with autocommit off, then this is not - the end of a transaction and we should not write a commit event. + Decision table for committing a transaction. The top part, the + *conditions* represent different cases that can occur, and hte + bottom part, the *actions*, represent what should be done in that + particular case. + + Real transaction 'all' was true + + Statement in cache There were at least one statement in the + transaction cache + + In transaction We are inside a transaction + + Stmt modified non-trans The statement being committed modified a + non-transactional table + + All modified non-trans Some statement before this one in the + transaction modified a non-transactional + table + + + ============================= = = = = = = = = = = = = = = = = + Real transaction N N N N N N N N N N N N N N N N + Statement in cache N N N N N N N N Y Y Y Y Y Y Y Y + In transaction N N N N Y Y Y Y N N N N Y Y Y Y + Stmt modified non-trans N N Y Y N N Y Y N N Y Y N N Y Y + All modified non-trans N Y N Y N Y N Y N Y N Y N Y N Y + + Action: (C)ommit/(A)ccumulate C C - C A C - C - - - - A A - A + ============================= = = = = = = = = = = = = = = = = + + + ============================= = = = = = = = = = = = = = = = = + Real transaction Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y + Statement in cache N N N N N N N N Y Y Y Y Y Y Y Y + In transaction N N N N Y Y Y Y N N N N Y Y Y Y + Stmt modified non-trans N N Y Y N N Y Y N N Y Y N N Y Y + All modified non-trans N Y N Y N Y N Y N Y N Y N Y N Y + + (C)ommit/(A)ccumulate/(-) - - - - C C - C - - - - C C - C + ============================= = = = = = = = = = = = = = = = = + + In other words, we commit the transaction if and only if both of + the following are true: + - We are not in a transaction and committing a statement + + - We are in a transaction and one (or more) of the following are + true: + + - A full transaction is committed + + OR + + - A non-transactional statement is committed and there is + no statement cached + + Otherwise, we accumulate the statement */ - if (all || !(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) + ulonglong const in_transaction= + thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN); + DBUG_PRINT("debug", + ("all: %d, empty: %s, in_transaction: %s, all.modified_non_trans_table: %s, stmt.modified_non_trans_table: %s", + all, + YESNO(trx_data->empty()), + YESNO(in_transaction), + YESNO(thd->transaction.all.modified_non_trans_table), + YESNO(thd->transaction.stmt.modified_non_trans_table))); + if (in_transaction && + (all || + (!trx_data->at_least_one_stmt && + thd->transaction.stmt.modified_non_trans_table)) || + !in_transaction && !all) { Query_log_event qev(thd, STRING_WITH_LEN("COMMIT"), TRUE, FALSE); qev.error_code= 0; // see comment in MYSQL_LOG::write(THD, IO_CACHE) - DBUG_RETURN(binlog_end_trans(thd, trans_log, &qev)); + int error= binlog_end_trans(thd, trx_data, &qev, all); + DBUG_RETURN(error); } - else - DBUG_RETURN(binlog_end_trans(thd, trans_log, &invisible_commit)); + DBUG_RETURN(0); } /** @@ -174,46 +1566,64 @@ static int binlog_commit(THD *thd, bool all) binlog file. However, if the transaction does not involve non-transactional tables, nothing needs to be logged. + @param hton The binlog handlerton. @param thd The client thread that executes the transaction. - @param all true if this is the last statement before a COMMIT - statement; false if either this is a statement in a - transaction but not the last, or if this is a statement - not inside a BEGIN block and autocommit is on. + @param all This is @c true if this is a real transaction rollback, and + @false otherwise. @see handlerton::rollback */ -static int binlog_rollback(THD *thd, bool all) +static int binlog_rollback(handlerton *hton, THD *thd, bool all) { - int error=0; - IO_CACHE *trans_log= (IO_CACHE*)thd->ha_data[binlog_hton.slot]; DBUG_ENTER("binlog_rollback"); - /* - First assert is guaranteed - see trans_register_ha() call below. - The second must be true. If it is not, we're registering - unnecessary, doing extra work. The cause should be found and eliminated - */ - DBUG_ASSERT(all || !(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))); - DBUG_ASSERT(my_b_tell(trans_log)); - /* - Update the binary log with a BEGIN/ROLLBACK block if we have - cached some queries and we updated some non-transactional - table. Such cases should be rare (updating a - non-transactional table inside a transaction...) - */ - if (unlikely(thd->transaction.all.modified_non_trans_table)) + int error=0; + binlog_trx_data *const trx_data= + (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton); + + if (trx_data->empty()) { + trx_data->reset(); + DBUG_RETURN(0); + } + + DBUG_PRINT("debug", ("all: %s, all.modified_non_trans_table: %s, stmt.modified_non_trans_table: %s", + YESNO(all), + YESNO(thd->transaction.all.modified_non_trans_table), + YESNO(thd->transaction.stmt.modified_non_trans_table))); + if (all && thd->transaction.all.modified_non_trans_table || + !all && thd->transaction.stmt.modified_non_trans_table || + (thd->options & OPTION_KEEP_LOG)) { + /* + We write the transaction cache with a rollback last if we have + modified any non-transactional table. We do this even if we are + committing a single statement that has modified a + non-transactional table since it can have modified a + transactional table in that statement as well, which needs to be + rolled back on the slave. + */ Query_log_event qev(thd, STRING_WITH_LEN("ROLLBACK"), TRUE, FALSE); qev.error_code= 0; // see comment in MYSQL_LOG::write(THD, IO_CACHE) - error= binlog_end_trans(thd, trans_log, &qev); + error= binlog_end_trans(thd, trx_data, &qev, all); + } + else if (all && !thd->transaction.all.modified_non_trans_table || + !all && !thd->transaction.stmt.modified_non_trans_table) + { + /* + If we have modified only transactional tables, we can truncate + the transaction cache without writing anything to the binary + log. + */ + error= binlog_end_trans(thd, trx_data, 0, all); } - else - error= binlog_end_trans(thd, trans_log, 0); DBUG_RETURN(error); } -/* - NOTE: how do we handle this (unlikely but legal) case: - [transaction] + [update to non-trans table] + [rollback to savepoint] ? +/** + @note + How do we handle this (unlikely but legal) case: + @verbatim + [transaction] + [update to non-trans table] + [rollback to savepoint] ? + @endverbatim The problem occurs when a savepoint is before the update to the non-transactional table. Then when there's a rollback to the savepoint, if we simply truncate the binlog cache, we lose the part of the binlog cache where @@ -232,44 +1642,47 @@ static int binlog_rollback(THD *thd, bool all) that case there is no need to have it in the binlog). */ -static int binlog_savepoint_set(THD *thd, void *sv) +static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv) { - IO_CACHE *trans_log= (IO_CACHE*)thd->ha_data[binlog_hton.slot]; DBUG_ENTER("binlog_savepoint_set"); - DBUG_ASSERT(my_b_tell(trans_log)); - *(my_off_t *)sv= my_b_tell(trans_log); + binlog_trans_log_savepos(thd, (my_off_t*) sv); /* Write it to the binary log */ - Query_log_event qinfo(thd, thd->query, thd->query_length, TRUE, FALSE); - DBUG_RETURN(mysql_bin_log.write(&qinfo)); + + int const error= + thd->binlog_query(THD::STMT_QUERY_TYPE, + thd->query, thd->query_length, TRUE, FALSE); + DBUG_RETURN(error); } -static int binlog_savepoint_rollback(THD *thd, void *sv) +static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv) { - IO_CACHE *trans_log= (IO_CACHE*)thd->ha_data[binlog_hton.slot]; DBUG_ENTER("binlog_savepoint_rollback"); - DBUG_ASSERT(my_b_tell(trans_log)); /* Write ROLLBACK TO SAVEPOINT to the binlog cache if we have updated some non-transactional table. Otherwise, truncate the binlog cache starting from the SAVEPOINT command. */ - if (unlikely(thd->transaction.all.modified_non_trans_table)) + if (unlikely(thd->transaction.all.modified_non_trans_table || + (thd->options & OPTION_KEEP_LOG))) { - Query_log_event qinfo(thd, thd->query, thd->query_length, TRUE, FALSE); - DBUG_RETURN(mysql_bin_log.write(&qinfo)); + int error= + thd->binlog_query(THD::STMT_QUERY_TYPE, + thd->query, thd->query_length, TRUE, FALSE); + DBUG_RETURN(error); } - reinit_io_cache(trans_log, WRITE_CACHE, *(my_off_t *)sv, 0, 0); + binlog_trans_log_truncate(thd, *(my_off_t*)sv); DBUG_RETURN(0); } + int check_binlog_magic(IO_CACHE* log, const char** errmsg) { char magic[4]; DBUG_ASSERT(my_b_tell(log) == 0); - if (my_b_read(log, (byte*) magic, sizeof(magic))) + if (my_b_read(log, (uchar*) magic, sizeof(magic))) { *errmsg = "I/O error reading the header from the binary log"; sql_print_error("%s, errno=%d, io cache code=%d", *errmsg, my_errno, @@ -284,6 +1697,7 @@ int check_binlog_magic(IO_CACHE* log, const char** errmsg) return 0; } + File open_binlog(IO_CACHE *log, const char *log_file_name, const char **errmsg) { File file; @@ -356,11 +1770,14 @@ static void setup_windows_event_source() #endif /* __NT__ */ -/**************************************************************************** -** Find a uniq filename for 'filename.#'. -** Set # to a number as low as possible -** returns != 0 if not possible to get uniq filename -****************************************************************************/ +/** + Find a unique filename for 'filename.#'. + + Set '#' to a number as low as possible. + + @return + nonzero if not possible to get unique filename +*/ static int find_uniq_filename(char *name) { @@ -370,15 +1787,16 @@ static int find_uniq_filename(char *name) struct st_my_dir *dir_info; reg1 struct fileinfo *file_info; ulong max_found=0; - + size_t buf_length, length; + char *start, *end; DBUG_ENTER("find_uniq_filename"); - uint length = dirname_part(buff,name); - char *start = name + length; - char *end = strend(start); + length= dirname_part(buff, name, &buf_length); + start= name + length; + end= strend(start); *end='.'; - length= (uint) (end-start+1); + length= (size_t) (end-start+1); if (!(dir_info = my_dir(buff,MYF(MY_DONT_SORT)))) { // This shouldn't happen @@ -388,7 +1806,7 @@ static int find_uniq_filename(char *name) file_info= dir_info->dir_entry; for (i=dir_info->number_off_files ; i-- ; file_info++) { - if (bcmp(file_info->name,start,length) == 0 && + if (bcmp((uchar*) file_info->name, (uchar*) start, length) == 0 && test_if_number(file_info->name+length, &number,0)) { set_if_bigger(max_found,(ulong) number); @@ -402,11 +1820,120 @@ static int find_uniq_filename(char *name) } +void MYSQL_LOG::init(enum_log_type log_type_arg, + enum cache_type io_cache_type_arg) +{ + DBUG_ENTER("MYSQL_LOG::init"); + log_type= log_type_arg; + io_cache_type= io_cache_type_arg; + DBUG_PRINT("info",("log_type: %d", log_type)); + DBUG_VOID_RETURN; +} + + +/* + Open a (new) log file. + + SYNOPSIS + open() + + log_name The name of the log to open + log_type_arg The type of the log. E.g. LOG_NORMAL + new_name The new name for the logfile. This is only needed + when the method is used to open the binlog file. + io_cache_type_arg The type of the IO_CACHE to use for this log file + + DESCRIPTION + Open the logfile, init IO_CACHE and write startup messages + (in case of general and slow query logs). + + RETURN VALUES + 0 ok + 1 error +*/ + +bool MYSQL_LOG::open(const char *log_name, enum_log_type log_type_arg, + const char *new_name, enum cache_type io_cache_type_arg) +{ + char buff[FN_REFLEN]; + File file= -1; + int open_flags= O_CREAT | O_BINARY; + DBUG_ENTER("MYSQL_LOG::open"); + DBUG_PRINT("enter", ("log_type: %d", (int) log_type_arg)); + + write_error= 0; + + init(log_type_arg, io_cache_type_arg); + + if (!(name= my_strdup(log_name, MYF(MY_WME)))) + { + name= (char *)log_name; // for the error message + goto err; + } + + if (new_name) + strmov(log_file_name, new_name); + else if (generate_new_name(log_file_name, name)) + goto err; + + if (io_cache_type == SEQ_READ_APPEND) + open_flags |= O_RDWR | O_APPEND; + else + open_flags |= O_WRONLY | (log_type == LOG_BIN ? 0 : O_APPEND); + + db[0]= 0; + + if ((file= my_open(log_file_name, open_flags, + MYF(MY_WME | ME_WAITTANG))) < 0 || + init_io_cache(&log_file, file, IO_SIZE, io_cache_type, + my_tell(file, MYF(MY_WME)), 0, + MYF(MY_WME | MY_NABP | + ((log_type == LOG_BIN) ? MY_WAIT_IF_FULL : 0)))) + goto err; + + if (log_type == LOG_NORMAL) + { + char *end; + int len=my_snprintf(buff, sizeof(buff), "%s, Version: %s (%s). " +#ifdef EMBEDDED_LIBRARY + "embedded library\n", + my_progname, server_version, MYSQL_COMPILATION_COMMENT +#elif __NT__ + "started with:\nTCP Port: %d, Named Pipe: %s\n", + my_progname, server_version, MYSQL_COMPILATION_COMMENT, + mysqld_port, mysqld_unix_port +#else + "started with:\nTcp port: %d Unix socket: %s\n", + my_progname, server_version, MYSQL_COMPILATION_COMMENT, + mysqld_port, mysqld_unix_port +#endif + ); + end= strnmov(buff + len, "Time Id Command Argument\n", + sizeof(buff) - len); + if (my_b_write(&log_file, (uchar*) buff, (uint) (end-buff)) || + flush_io_cache(&log_file)) + goto err; + } + + log_state= LOG_OPENED; + DBUG_RETURN(0); + +err: + sql_print_error("Could not use %s for logging (error %d). \ +Turning logging off for the whole duration of the MySQL server process. \ +To turn it on again: fix the cause, \ +shutdown the MySQL server and restart it.", name, errno); + if (file >= 0) + my_close(file, MYF(0)); + end_io_cache(&log_file); + safeFree(name); + log_state= LOG_CLOSED; + DBUG_RETURN(1); +} + MYSQL_LOG::MYSQL_LOG() - :bytes_written(0), last_time(0), query_start(0), name(0), - prepared_xids(0), log_type(LOG_CLOSED), file_id(1), open_count(1), - write_error(FALSE), inited(FALSE), need_start_event(TRUE), - description_event_for_exec(0), description_event_for_queue(0) + : name(0), write_error(FALSE), inited(FALSE), log_type(LOG_UNKNOWN), + log_state(LOG_CLOSED) { /* We don't want to initialize LOCK_Log here as such initialization depends on @@ -414,12 +1941,57 @@ MYSQL_LOG::MYSQL_LOG() called only in main(). Doing initialization here would make it happen before main(). */ - index_file_name[0] = 0; - bzero((char*) &log_file,sizeof(log_file)); - bzero((char*) &index_file, sizeof(index_file)); + bzero((char*) &log_file, sizeof(log_file)); } -/* this is called only once */ +void MYSQL_LOG::init_pthread_objects() +{ + DBUG_ASSERT(inited == 0); + inited= 1; + (void) pthread_mutex_init(&LOCK_log, MY_MUTEX_INIT_SLOW); +} + +/* + Close the log file + + SYNOPSIS + close() + exiting Bitmask. For the slow and general logs the only used bit is + LOG_CLOSE_TO_BE_OPENED. This is used if we intend to call + open at once after close. + + NOTES + One can do an open on the object at once after doing a close. + The internal structures are not freed until cleanup() is called +*/ + +void MYSQL_LOG::close(uint exiting) +{ // One can't set log_type here! + DBUG_ENTER("MYSQL_LOG::close"); + DBUG_PRINT("enter",("exiting: %d", (int) exiting)); + if (log_state == LOG_OPENED) + { + end_io_cache(&log_file); + + if (my_sync(log_file.file, MYF(MY_WME)) && ! write_error) + { + write_error= 1; + sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno); + } + + if (my_close(log_file.file, MYF(MY_WME)) && ! write_error) + { + write_error= 1; + sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno); + } + } + + log_state= (exiting & LOG_CLOSE_TO_BE_OPENED) ? LOG_TO_BE_OPENED : LOG_CLOSED; + safeFree(name); + DBUG_VOID_RETURN; +} + +/** This is called only once. */ void MYSQL_LOG::cleanup() { @@ -427,12 +1999,8 @@ void MYSQL_LOG::cleanup() if (inited) { inited= 0; - close(LOG_CLOSE_INDEX|LOG_CLOSE_STOP_EVENT); - delete description_event_for_queue; - delete description_event_for_exec; (void) pthread_mutex_destroy(&LOCK_log); - (void) pthread_mutex_destroy(&LOCK_index); - (void) pthread_cond_destroy(&update_cond); + close(0); } DBUG_VOID_RETURN; } @@ -440,8 +2008,8 @@ void MYSQL_LOG::cleanup() int MYSQL_LOG::generate_new_name(char *new_name, const char *log_name) { - fn_format(new_name,log_name,mysql_data_home,"",4); - if (log_type != LOG_NORMAL) + fn_format(new_name, log_name, mysql_data_home, "", 4); + if (log_type == LOG_BIN) { if (!fn_ext(log_name)[0]) { @@ -456,53 +2024,384 @@ int MYSQL_LOG::generate_new_name(char *new_name, const char *log_name) } -void MYSQL_LOG::init(enum_log_type log_type_arg, - enum cache_type io_cache_type_arg, - bool no_auto_events_arg, - ulong max_size_arg) +/* + Reopen the log file + + SYNOPSIS + reopen_file() + + DESCRIPTION + Reopen the log file. The method is used during FLUSH LOGS + and locks LOCK_log mutex +*/ + + +void MYSQL_QUERY_LOG::reopen_file() { - DBUG_ENTER("MYSQL_LOG::init"); - log_type = log_type_arg; - io_cache_type = io_cache_type_arg; - no_auto_events = no_auto_events_arg; - max_size=max_size_arg; - DBUG_PRINT("info",("log_type: %d max_size: %lu", log_type, max_size)); + char *save_name; + + DBUG_ENTER("MYSQL_LOG::reopen_file"); + if (!is_open()) + { + DBUG_PRINT("info",("log is closed")); + DBUG_VOID_RETURN; + } + + pthread_mutex_lock(&LOCK_log); + + save_name= name; + name= 0; // Don't free name + close(LOG_CLOSE_TO_BE_OPENED); + + /* + Note that at this point, log_state != LOG_CLOSED (important for is_open()). + */ + + open(save_name, log_type, 0, io_cache_type); + my_free(save_name, MYF(0)); + + pthread_mutex_unlock(&LOCK_log); + DBUG_VOID_RETURN; } -void MYSQL_LOG::init_pthread_objects() +/* + Write a command to traditional general log file + + SYNOPSIS + write() + + event_time command start timestamp + user_host the pointer to the string with user@host info + user_host_len length of the user_host string. this is computed once + and passed to all general log event handlers + thread_id Id of the thread, issued a query + command_type the type of the command being logged + command_type_len the length of the string above + sql_text the very text of the query being executed + sql_text_len the length of sql_text string + + DESCRIPTION + + Log given command to to normal (not rotable) log file + + RETURN + FASE - OK + TRUE - error occured +*/ + +bool MYSQL_QUERY_LOG::write(time_t event_time, const char *user_host, + uint user_host_len, int thread_id, + const char *command_type, uint command_type_len, + const char *sql_text, uint sql_text_len) { - DBUG_ASSERT(inited == 0); - inited= 1; - (void) pthread_mutex_init(&LOCK_log,MY_MUTEX_INIT_SLOW); - (void) pthread_mutex_init(&LOCK_index, MY_MUTEX_INIT_SLOW); - (void) pthread_cond_init(&update_cond, 0); + char buff[32]; + uint length= 0; + char local_time_buff[MAX_TIME_SIZE]; + struct tm start; + uint time_buff_len= 0; + + (void) pthread_mutex_lock(&LOCK_log); + + /* Test if someone closed between the is_open test and lock */ + if (is_open()) + { + /* Note that my_b_write() assumes it knows the length for this */ + if (event_time != last_time) + { + last_time= event_time; + + localtime_r(&event_time, &start); + + time_buff_len= my_snprintf(local_time_buff, MAX_TIME_SIZE, + "%02d%02d%02d %2d:%02d:%02d", + start.tm_year % 100, start.tm_mon + 1, + start.tm_mday, start.tm_hour, + start.tm_min, start.tm_sec); + + if (my_b_write(&log_file, (uchar*) local_time_buff, time_buff_len)) + goto err; + } + else + if (my_b_write(&log_file, (uchar*) "\t\t" ,2) < 0) + goto err; + + /* command_type, thread_id */ + length= my_snprintf(buff, 32, "%5ld ", (long) thread_id); + + if (my_b_write(&log_file, (uchar*) buff, length)) + goto err; + + if (my_b_write(&log_file, (uchar*) command_type, command_type_len)) + goto err; + + if (my_b_write(&log_file, (uchar*) "\t", 1)) + goto err; + + /* sql_text */ + if (my_b_write(&log_file, (uchar*) sql_text, sql_text_len)) + goto err; + + if (my_b_write(&log_file, (uchar*) "\n", 1) || + flush_io_cache(&log_file)) + goto err; + } + + (void) pthread_mutex_unlock(&LOCK_log); + return FALSE; +err: + + if (!write_error) + { + write_error= 1; + sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno); + } + (void) pthread_mutex_unlock(&LOCK_log); + return TRUE; +} + + +/* + Log a query to the traditional slow log file + + SYNOPSIS + write() + + thd THD of the query + current_time current timestamp + query_start_arg command start timestamp + user_host the pointer to the string with user@host info + user_host_len length of the user_host string. this is computed once + and passed to all general log event handlers + query_utime Amount of time the query took to execute (in microseconds) + lock_utime Amount of time the query was locked (in microseconds) + is_command The flag, which determines, whether the sql_text is a + query or an administrator command. + sql_text the very text of the query or administrator command + processed + sql_text_len the length of sql_text string + + DESCRIPTION + + Log a query to the slow log file. + + RETURN + FALSE - OK + TRUE - error occured +*/ + +bool MYSQL_QUERY_LOG::write(THD *thd, time_t current_time, + time_t query_start_arg, const char *user_host, + uint user_host_len, ulonglong query_utime, + ulonglong lock_utime, bool is_command, + const char *sql_text, uint sql_text_len) +{ + bool error= 0; + DBUG_ENTER("MYSQL_QUERY_LOG::write"); + + (void) pthread_mutex_lock(&LOCK_log); + + if (!is_open()) + { + (void) pthread_mutex_unlock(&LOCK_log); + DBUG_RETURN(0); + } + + if (is_open()) + { // Safety agains reopen + int tmp_errno= 0; + char buff[80], *end; + char query_time_buff[22+7], lock_time_buff[22+7]; + uint buff_len; + end= buff; + + if (!(specialflag & SPECIAL_SHORT_LOG_FORMAT)) + { + if (current_time != last_time) + { + last_time= current_time; + struct tm start; + localtime_r(¤t_time, &start); + + buff_len= my_snprintf(buff, sizeof buff, + "# Time: %02d%02d%02d %2d:%02d:%02d\n", + start.tm_year % 100, start.tm_mon + 1, + start.tm_mday, start.tm_hour, + start.tm_min, start.tm_sec); + + /* Note that my_b_write() assumes it knows the length for this */ + if (my_b_write(&log_file, (uchar*) buff, buff_len)) + tmp_errno= errno; + } + const uchar uh[]= "# User@Host: "; + if (my_b_write(&log_file, uh, sizeof(uh) - 1)) + tmp_errno= errno; + if (my_b_write(&log_file, (uchar*) user_host, user_host_len)) + tmp_errno= errno; + if (my_b_write(&log_file, (uchar*) "\n", 1)) + tmp_errno= errno; + } + /* For slow query log */ + sprintf(query_time_buff, "%.6f", ulonglong2double(query_utime)/1000000.0); + sprintf(lock_time_buff, "%.6f", ulonglong2double(lock_utime)/1000000.0); + if (my_b_printf(&log_file, + "# Query_time: %s Lock_time: %s" + " Rows_sent: %lu Rows_examined: %lu\n", + query_time_buff, lock_time_buff, + (ulong) thd->sent_row_count, + (ulong) thd->examined_row_count) == (uint) -1) + tmp_errno= errno; + if (thd->db && strcmp(thd->db, db)) + { // Database changed + if (my_b_printf(&log_file,"use %s;\n",thd->db) == (uint) -1) + tmp_errno= errno; + strmov(db,thd->db); + } + if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt) + { + end=strmov(end, ",last_insert_id="); + end=longlong10_to_str((longlong) + thd->first_successful_insert_id_in_prev_stmt_for_binlog, + end, -10); + } + // Save value if we do an insert. + if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0) + { + if (!(specialflag & SPECIAL_SHORT_LOG_FORMAT)) + { + end=strmov(end,",insert_id="); + end=longlong10_to_str((longlong) + thd->auto_inc_intervals_in_cur_stmt_for_binlog.minimum(), + end, -10); + } + } + + /* + This info used to show up randomly, depending on whether the query + checked the query start time or not. now we always write current + timestamp to the slow log + */ + end= strmov(end, ",timestamp="); + end= int10_to_str((long) current_time, end, 10); + + if (end != buff) + { + *end++=';'; + *end='\n'; + if (my_b_write(&log_file, (uchar*) "SET ", 4) || + my_b_write(&log_file, (uchar*) buff + 1, (uint) (end-buff))) + tmp_errno= errno; + } + if (is_command) + { + end= strxmov(buff, "# administrator command: ", NullS); + buff_len= (ulong) (end - buff); + my_b_write(&log_file, (uchar*) buff, buff_len); + } + if (my_b_write(&log_file, (uchar*) sql_text, sql_text_len) || + my_b_write(&log_file, (uchar*) ";\n",2) || + flush_io_cache(&log_file)) + tmp_errno= errno; + if (tmp_errno) + { + error= 1; + if (! write_error) + { + write_error= 1; + sql_print_error(ER(ER_ERROR_ON_WRITE), name, error); + } + } + } + (void) pthread_mutex_unlock(&LOCK_log); + DBUG_RETURN(error); } + +/** + @todo + The following should be using fn_format(); We just need to + first change fn_format() to cut the file name if it's too long. +*/ const char *MYSQL_LOG::generate_name(const char *log_name, - const char *suffix, - bool strip_ext, char *buff) + const char *suffix, + bool strip_ext, char *buff) { if (!log_name || !log_name[0]) { strmake(buff, pidfile_name, FN_REFLEN - strlen(suffix) - 1); return (const char *) fn_format(buff, buff, "", suffix, MYF(MY_REPLACE_EXT|MY_REPLACE_DIR)); - } // get rid of extension if the log is binary to avoid problems if (strip_ext) { - char *p = fn_ext(log_name); - uint length=(uint) (p-log_name); - strmake(buff,log_name,min(length,FN_REFLEN)); + char *p= fn_ext(log_name); + uint length= (uint) (p - log_name); + strmake(buff, log_name, min(length, FN_REFLEN)); return (const char*)buff; } return log_name; } -bool MYSQL_LOG::open_index_file(const char *index_file_name_arg, + + +MYSQL_BIN_LOG::MYSQL_BIN_LOG() + :bytes_written(0), prepared_xids(0), file_id(1), open_count(1), + need_start_event(TRUE), m_table_map_version(0), + description_event_for_exec(0), description_event_for_queue(0) +{ + /* + We don't want to initialize locks here as such initialization depends on + safe_mutex (when using safe_mutex) which depends on MY_INIT(), which is + called only in main(). Doing initialization here would make it happen + before main(). + */ + index_file_name[0] = 0; + bzero((char*) &index_file, sizeof(index_file)); +} + +/* this is called only once */ + +void MYSQL_BIN_LOG::cleanup() +{ + DBUG_ENTER("cleanup"); + if (inited) + { + inited= 0; + close(LOG_CLOSE_INDEX|LOG_CLOSE_STOP_EVENT); + delete description_event_for_queue; + delete description_event_for_exec; + (void) pthread_mutex_destroy(&LOCK_log); + (void) pthread_mutex_destroy(&LOCK_index); + (void) pthread_cond_destroy(&update_cond); + } + DBUG_VOID_RETURN; +} + + +/* Init binlog-specific vars */ +void MYSQL_BIN_LOG::init(bool no_auto_events_arg, ulong max_size_arg) +{ + DBUG_ENTER("MYSQL_BIN_LOG::init"); + no_auto_events= no_auto_events_arg; + max_size= max_size_arg; + DBUG_PRINT("info",("max_size: %lu", max_size)); + DBUG_VOID_RETURN; +} + + +void MYSQL_BIN_LOG::init_pthread_objects() +{ + DBUG_ASSERT(inited == 0); + inited= 1; + (void) pthread_mutex_init(&LOCK_log, MY_MUTEX_INIT_SLOW); + (void) pthread_mutex_init(&LOCK_index, MY_MUTEX_INIT_SLOW); + (void) pthread_cond_init(&update_cond, 0); +} + + +bool MYSQL_BIN_LOG::open_index_file(const char *index_file_name_arg, const char *log_name) { File index_file_nr= -1; @@ -530,6 +2429,11 @@ bool MYSQL_LOG::open_index_file(const char *index_file_name_arg, my_seek(index_file_nr,0L,MY_SEEK_END,MYF(0)), 0, MYF(MY_WME | MY_WAIT_IF_FULL))) { + /* + TODO: all operations creating/deleting the index file or a log, should + call my_sync_dir() or my_sync_dir_by_file() to be durable. + TODO: file creation should be done with my_create() not my_open(). + */ if (index_file_nr >= 0) my_close(index_file_nr,MYF(0)); return TRUE; @@ -538,111 +2442,44 @@ bool MYSQL_LOG::open_index_file(const char *index_file_name_arg, } -/* - Open a (new) log file. +/** + Open a (new) binlog file. - DESCRIPTION - - If binary logs, also open the index file and register the new - file name in it + - Open the log file and the index file. Register the new + file name in it - When calling this when the file is in use, you must have a locks - on LOCK_log and LOCK_index. + on LOCK_log and LOCK_index. - RETURN VALUES + @retval 0 ok + @retval 1 error */ -bool MYSQL_LOG::open(const char *log_name, - enum_log_type log_type_arg, - const char *new_name, - enum cache_type io_cache_type_arg, - bool no_auto_events_arg, - ulong max_size_arg, - bool null_created_arg) +bool MYSQL_BIN_LOG::open(const char *log_name, + enum_log_type log_type_arg, + const char *new_name, + enum cache_type io_cache_type_arg, + bool no_auto_events_arg, + ulong max_size_arg, + bool null_created_arg) { - char buff[FN_REFLEN]; File file= -1; - int open_flags = O_CREAT | O_BINARY; - DBUG_ENTER("MYSQL_LOG::open"); + DBUG_ENTER("MYSQL_BIN_LOG::open"); DBUG_PRINT("enter",("log_type: %d",(int) log_type_arg)); - last_time=query_start=0; write_error=0; - init(log_type_arg,io_cache_type_arg,no_auto_events_arg,max_size_arg); + /* open the main log file */ + if (MYSQL_LOG::open(log_name, log_type_arg, new_name, io_cache_type_arg)) + DBUG_RETURN(1); /* all warnings issued */ - if (!(name=my_strdup(log_name,MYF(MY_WME)))) - { - name= (char *)log_name; // for the error message - goto err; - } - if (new_name) - strmov(log_file_name,new_name); - else if (generate_new_name(log_file_name, name)) - goto err; + init(no_auto_events_arg, max_size_arg); - if (io_cache_type == SEQ_READ_APPEND) - open_flags |= O_RDWR | O_APPEND; - else - open_flags |= O_WRONLY | (log_type == LOG_BIN ? 0 : O_APPEND); - - db[0]=0; open_count++; - if ((file=my_open(log_file_name,open_flags, - MYF(MY_WME | ME_WAITTANG))) < 0 || - init_io_cache(&log_file, file, IO_SIZE, io_cache_type, - my_tell(file,MYF(MY_WME)), 0, - MYF(MY_WME | MY_NABP | - ((log_type == LOG_BIN) ? MY_WAIT_IF_FULL : 0)))) - goto err; - switch (log_type) { - case LOG_NORMAL: - { - char *end; - int len=my_snprintf(buff, sizeof(buff), "%s, Version: %s (%s). " -#ifdef EMBEDDED_LIBRARY - "embedded library\n", - my_progname, server_version, MYSQL_COMPILATION_COMMENT -#elif __NT__ - "started with:\nTCP Port: %d, Named Pipe: %s\n", - my_progname, server_version, MYSQL_COMPILATION_COMMENT, - mysqld_port, mysqld_unix_port -#else - "started with:\nTcp port: %d Unix socket: %s\n", - my_progname, server_version, MYSQL_COMPILATION_COMMENT, - mysqld_port, mysqld_unix_port -#endif - ); - end=strnmov(buff+len,"Time Id Command Argument\n", - sizeof(buff)-len); - if (my_b_write(&log_file, (byte*) buff,(uint) (end-buff)) || - flush_io_cache(&log_file)) - goto err; - break; - } - case LOG_NEW: - { - uint len; - time_t skr=time(NULL); - struct tm tm_tmp; - - localtime_r(&skr,&tm_tmp); - len= my_snprintf(buff,sizeof(buff), - "# %s, Version: %s at %02d%02d%02d %2d:%02d:%02d\n", - my_progname,server_version, - tm_tmp.tm_year % 100, - tm_tmp.tm_mon+1, - tm_tmp.tm_mday, - tm_tmp.tm_hour, - tm_tmp.tm_min, - tm_tmp.tm_sec); - if (my_b_write(&log_file, (byte*) buff, len) || - flush_io_cache(&log_file)) - goto err; - break; - } - case LOG_BIN: + DBUG_ASSERT(log_type == LOG_BIN); + { bool write_file_name_to_index_file=0; @@ -654,7 +2491,7 @@ bool MYSQL_LOG::open(const char *log_name, an extension for the binary log files. In this case we write a standard header to it. */ - if (my_b_safe_write(&log_file, (byte*) BINLOG_MAGIC, + if (my_b_safe_write(&log_file, (uchar*) BINLOG_MAGIC, BIN_LOG_HEADER_SIZE)) goto err; bytes_written+= BIN_LOG_HEADER_SIZE; @@ -679,8 +2516,7 @@ bool MYSQL_LOG::open(const char *log_name, s.flags|= LOG_EVENT_BINLOG_IN_USE_F; if (!s.is_valid()) goto err; - if (null_created_arg) - s.created= 0; + s.dont_set_created= null_created_arg; if (s.write(&log_file)) goto err; bytes_written+= s.data_written; @@ -706,7 +2542,7 @@ bool MYSQL_LOG::open(const char *log_name, /* Set 'created' to 0, so that in next relay logs this event does not trigger cleaning actions on the slave in - Format_description_log_event::exec_event(). + Format_description_log_event::apply_event_impl(). */ description_event_for_queue->created= 0; /* Don't set log_pos in event header */ @@ -726,20 +2562,16 @@ bool MYSQL_LOG::open(const char *log_name, As this is a new log file, we write the file name to the index file. As every time we write to the index file, we sync it. */ - if (my_b_write(&index_file, (byte*) log_file_name, + if (my_b_write(&index_file, (uchar*) log_file_name, strlen(log_file_name)) || - my_b_write(&index_file, (byte*) "\n", 1) || + my_b_write(&index_file, (uchar*) "\n", 1) || flush_io_cache(&index_file) || my_sync(index_file.file, MYF(MY_WME))) goto err; } - break; - } - case LOG_CLOSED: // Impossible - case LOG_TO_BE_OPENED: - DBUG_ASSERT(1); - break; } + log_state= LOG_OPENED; + DBUG_RETURN(0); err: @@ -752,12 +2584,12 @@ shutdown the MySQL server and restart it.", name, errno); end_io_cache(&log_file); end_io_cache(&index_file); safeFree(name); - log_type= LOG_CLOSED; + log_state= LOG_CLOSED; DBUG_RETURN(1); } -int MYSQL_LOG::get_current_log(LOG_INFO* linfo) +int MYSQL_BIN_LOG::get_current_log(LOG_INFO* linfo) { pthread_mutex_lock(&LOCK_log); int ret = raw_get_current_log(linfo); @@ -765,31 +2597,27 @@ int MYSQL_LOG::get_current_log(LOG_INFO* linfo) return ret; } -int MYSQL_LOG::raw_get_current_log(LOG_INFO* linfo) +int MYSQL_BIN_LOG::raw_get_current_log(LOG_INFO* linfo) { strmake(linfo->log_file_name, log_file_name, sizeof(linfo->log_file_name)-1); linfo->pos = my_b_tell(&log_file); return 0; } -/* - Move all data up in a file in an filename index file - - SYNOPSIS - copy_up_file_and_fill() - index_file File to move - offset Move everything from here to beginning - - NOTE - File will be truncated to be 'offset' shorter or filled up with - newlines +/** + Move all data up in a file in an filename index file. - IMPLEMENTATION We do the copy outside of the IO_CACHE as the cache buffers would just make things slower and more complicated. In most cases the copy loop should only do one read. - RETURN VALUES + @param index_file File to move + @param offset Move everything from here to beginning + + @note + File will be truncated to be 'offset' shorter or filled up with newlines + + @retval 0 ok */ @@ -800,7 +2628,7 @@ static bool copy_up_file_and_fill(IO_CACHE *index_file, my_off_t offset) int bytes_read; my_off_t init_offset= offset; File file= index_file->file; - byte io_buf[IO_SIZE*2]; + uchar io_buf[IO_SIZE*2]; DBUG_ENTER("copy_up_file_and_fill"); for (;; offset+= bytes_read) @@ -812,7 +2640,7 @@ static bool copy_up_file_and_fill(IO_CACHE *index_file, my_off_t offset) if (!bytes_read) break; // end of file (void) my_seek(file, offset-init_offset, MY_SEEK_SET, MYF(0)); - if (my_write(file, (byte*) io_buf, bytes_read, MYF(MY_WME | MY_NABP))) + if (my_write(file, io_buf, bytes_read, MYF(MY_WME | MY_NABP))) goto err; } /* The following will either truncate the file or fill the end with \n' */ @@ -830,29 +2658,29 @@ err: #endif /* HAVE_REPLICATION */ -/* - Find the position in the log-index-file for the given log name +/** + Find the position in the log-index-file for the given log name. - SYNOPSIS - find_log_pos() - linfo Store here the found log file name and position to - the NEXT log file name in the index file. - log_name Filename to find in the index file. - Is a null pointer if we want to read the first entry - need_lock Set this to 1 if the parent doesn't already have a - lock on LOCK_index + @param linfo Store here the found log file name and position to + the NEXT log file name in the index file. + @param log_name Filename to find in the index file. + Is a null pointer if we want to read the first entry + @param need_lock Set this to 1 if the parent doesn't already have a + lock on LOCK_index - NOTE + @note On systems without the truncate function the file will end with one or more empty lines. These will be ignored when reading the file. - RETURN VALUES + @retval 0 ok - LOG_INFO_EOF End of log-index-file found + @retval + LOG_INFO_EOF End of log-index-file found + @retval LOG_INFO_IO Got IO error while reading file */ -int MYSQL_LOG::find_log_pos(LOG_INFO *linfo, const char *log_name, +int MYSQL_BIN_LOG::find_log_pos(LOG_INFO *linfo, const char *log_name, bool need_lock) { int error= 0; @@ -904,29 +2732,31 @@ int MYSQL_LOG::find_log_pos(LOG_INFO *linfo, const char *log_name, } -/* - Find the position in the log-index-file for the given log name +/** + Find the position in the log-index-file for the given log name. - SYNOPSIS - find_next_log() + @param linfo Store here the next log file name and position to the file name after that. + @param need_lock Set this to 1 if the parent doesn't already have a lock on LOCK_index - NOTE + @note - Before calling this function, one has to call find_log_pos() - to set up 'linfo' + to set up 'linfo' - Mutex needed because we need to make sure the file pointer does not move - from under our feet + from under our feet - RETURN VALUES + @retval 0 ok - LOG_INFO_EOF End of log-index-file found + @retval + LOG_INFO_EOF End of log-index-file found + @retval LOG_INFO_IO Got IO error while reading file */ -int MYSQL_LOG::find_next_log(LOG_INFO* linfo, bool need_lock) +int MYSQL_BIN_LOG::find_next_log(LOG_INFO* linfo, bool need_lock) { int error= 0; uint length; @@ -956,32 +2786,31 @@ err: } -/* - Delete all logs refered to in the index file - Start writing to a new log file. The new index file will only contain - this file. +/** + Delete all logs refered to in the index file. + Start writing to a new log file. - SYNOPSIS - reset_logs() - thd Thread + The new index file will only contain this file. - NOTE - If not called from slave thread, write start event to new log + @param thd Thread + @note + If not called from slave thread, write start event to new log - RETURN VALUES + @retval 0 ok + @retval 1 error */ -bool MYSQL_LOG::reset_logs(THD* thd) +bool MYSQL_BIN_LOG::reset_logs(THD* thd) { LOG_INFO linfo; bool error=0; const char* save_name; - enum_log_type save_log_type; DBUG_ENTER("reset_logs"); + ha_reset_logs(thd); /* We need to get both locks to be sure that no one is trying to write to the index log file. @@ -995,12 +2824,11 @@ bool MYSQL_LOG::reset_logs(THD* thd) thread. If the transaction involved MyISAM tables, it should go into binlog even on rollback. */ - (void) pthread_mutex_lock(&LOCK_thread_count); + VOID(pthread_mutex_lock(&LOCK_thread_count)); /* Save variables so that we can reopen the log */ save_name=name; name=0; // Protect against free - save_log_type=log_type; close(LOG_CLOSE_TO_BE_OPENED); /* First delete all old log files */ @@ -1013,67 +2841,116 @@ bool MYSQL_LOG::reset_logs(THD* thd) for (;;) { - my_delete_allow_opened(linfo.log_file_name, MYF(MY_WME)); + if ((error= my_delete_allow_opened(linfo.log_file_name, MYF(0))) != 0) + { + if (my_errno == ENOENT) + { + push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE), + linfo.log_file_name); + sql_print_information("Failed to delete file '%s'", + linfo.log_file_name); + my_errno= 0; + error= 0; + } + else + { + push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR, + ER_BINLOG_PURGE_FATAL_ERR, + "a problem with deleting %s; " + "consider examining correspondence " + "of your binlog index file " + "to the actual binlog files", + linfo.log_file_name); + error= 1; + goto err; + } + } if (find_next_log(&linfo, 0)) break; } /* Start logging with a new file */ close(LOG_CLOSE_INDEX); - my_delete_allow_opened(index_file_name, MYF(MY_WME)); // Reset (open will update) + if ((error= my_delete_allow_opened(index_file_name, MYF(0)))) // Reset (open will update) + { + if (my_errno == ENOENT) + { + push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE), + index_file_name); + sql_print_information("Failed to delete file '%s'", + index_file_name); + my_errno= 0; + error= 0; + } + else + { + push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR, + ER_BINLOG_PURGE_FATAL_ERR, + "a problem with deleting %s; " + "consider examining correspondence " + "of your binlog index file " + "to the actual binlog files", + index_file_name); + error= 1; + goto err; + } + } if (!thd->slave_thread) need_start_event=1; if (!open_index_file(index_file_name, 0)) - open(save_name, save_log_type, 0, - io_cache_type, no_auto_events, max_size, 0); - my_free((gptr) save_name, MYF(0)); + open(save_name, log_type, 0, io_cache_type, no_auto_events, max_size, 0); + my_free((uchar*) save_name, MYF(0)); err: - (void) pthread_mutex_unlock(&LOCK_thread_count); + VOID(pthread_mutex_unlock(&LOCK_thread_count)); pthread_mutex_unlock(&LOCK_index); pthread_mutex_unlock(&LOCK_log); DBUG_RETURN(error); } -/* +/** Delete relay log files prior to rli->group_relay_log_name (i.e. all logs which are not involved in a non-finished group - (transaction)), remove them from the index file and start on next relay log. + (transaction)), remove them from the index file and start on next + relay log. - SYNOPSIS - purge_first_log() - rli Relay log information - included If false, all relay logs that are strictly before - rli->group_relay_log_name are deleted ; if true, the latter is - deleted too (i.e. all relay logs - read by the SQL slave thread are deleted). - - NOTE + IMPLEMENTATION + - Protects index file with LOCK_index + - Delete relevant relay log files + - Copy all file names after these ones to the front of the index file + - If the OS has truncate, truncate the file, else fill it with \n' + - Read the next file name from the index file and store in rli->linfo + + @param rli Relay log information + @param included If false, all relay logs that are strictly before + rli->group_relay_log_name are deleted ; if true, the + latter is deleted too (i.e. all relay logs + read by the SQL slave thread are deleted). + + @note - This is only called from the slave-execute thread when it has read - all commands from a relay log and want to switch to a new relay log. + all commands from a relay log and want to switch to a new relay log. - When this happens, we can be in an active transaction as - a transaction can span over two relay logs - (although it is always written as a single block to the master's binary - log, hence cannot span over two master's binary logs). + a transaction can span over two relay logs + (although it is always written as a single block to the master's binary + log, hence cannot span over two master's binary logs). - IMPLEMENTATION - - Protects index file with LOCK_index - - Delete relevant relay log files - - Copy all file names after these ones to the front of the index file - - If the OS has truncate, truncate the file, else fill it with \n' - - Read the next file name from the index file and store in rli->linfo - - RETURN VALUES + @retval 0 ok - LOG_INFO_EOF End of log-index-file found + @retval + LOG_INFO_EOF End of log-index-file found + @retval LOG_INFO_SEEK Could not allocate IO cache + @retval LOG_INFO_IO Got IO error while reading file */ #ifdef HAVE_REPLICATION -int MYSQL_LOG::purge_first_log(struct st_relay_log_info* rli, bool included) +int MYSQL_BIN_LOG::purge_first_log(Relay_log_info* rli, bool included) { int error; DBUG_ENTER("purge_first_log"); @@ -1145,11 +3022,11 @@ err: DBUG_RETURN(error); } -/* - Update log index_file +/** + Update log index_file. */ -int MYSQL_LOG::update_log_index(LOG_INFO* log_info, bool need_update_threads) +int MYSQL_BIN_LOG::update_log_index(LOG_INFO* log_info, bool need_update_threads) { if (copy_up_file_and_fill(&index_file, log_info->index_file_start_offset)) return LOG_INFO_IO; @@ -1160,37 +3037,38 @@ int MYSQL_LOG::update_log_index(LOG_INFO* log_info, bool need_update_threads) return 0; } -/* +/** Remove all logs before the given log from disk and from the index file. - SYNOPSIS - purge_logs() - to_log Delete all log file name before this file. - included If true, to_log is deleted too. - need_mutex - need_update_threads If we want to update the log coordinates of - all threads. False for relay logs, true otherwise. - freed_log_space If not null, decrement this variable of - the amount of log space freed + @param to_log Delete all log file name before this file. + @param included If true, to_log is deleted too. + @param need_mutex + @param need_update_threads If we want to update the log coordinates of + all threads. False for relay logs, true otherwise. + @param freed_log_space If not null, decrement this variable of + the amount of log space freed - NOTES + @note If any of the logs before the deleted one is in use, only purge logs up to this one. - RETURN VALUES - 0 ok + @retval + 0 ok + @retval LOG_INFO_EOF to_log not found + LOG_INFO_EMFILE too many files opened LOG_INFO_FATAL if any other than ENOENT error from my_stat() or my_delete() */ -int MYSQL_LOG::purge_logs(const char *to_log, +int MYSQL_BIN_LOG::purge_logs(const char *to_log, bool included, bool need_mutex, bool need_update_threads, ulonglong *decrease_log_space) { int error; + int ret = 0; bool exit_loop= 0; LOG_INFO log_info; THD *thd= current_thd; @@ -1247,11 +3125,11 @@ int MYSQL_LOG::purge_logs(const char *to_log, } else { - sql_print_information("Failed to delete log file '%s'; " - "consider examining correspondence " - "of your binlog index file " - "to the actual binlog files", - log_info.log_file_name); + sql_print_information("Failed to delete log file '%s'; " + "consider examining correspondence " + "of your binlog index file " + "to the actual binlog files", + log_info.log_file_name); } error= LOG_INFO_FATAL; goto err; @@ -1299,11 +3177,21 @@ int MYSQL_LOG::purge_logs(const char *to_log, "to the actual binlog files", log_info.log_file_name); } + if (my_errno == EMFILE) + { + DBUG_PRINT("info", + ("my_errno: %d, set ret = LOG_INFO_EMFILE", my_errno)); + error= LOG_INFO_EMFILE; + goto err; + } error= LOG_INFO_FATAL; goto err; } } } + + ha_binlog_index_purge_file(current_thd, log_info.log_file_name); + if (find_next_log(&log_info, 0) || exit_loop) break; } @@ -1313,6 +3201,9 @@ int MYSQL_LOG::purge_logs(const char *to_log, the log index file after restart - otherwise, this should be safe */ error= update_log_index(&log_info, need_update_threads); + if (error == 0) { + error = ret; + } err: if (need_mutex) @@ -1320,27 +3211,26 @@ err: DBUG_RETURN(error); } -/* +/** Remove all logs before the given file date from disk and from the index file. - SYNOPSIS - purge_logs_before_date() - thd Thread pointer - before_date Delete all log files before given date. + @param thd Thread pointer + @param before_date Delete all log files before given date. - NOTES + @note If any of the logs before the deleted one is in use, only purge logs up to this one. - RETURN VALUES + @retval 0 ok + @retval LOG_INFO_PURGE_NO_ROTATE Binary file that can't be rotated LOG_INFO_FATAL if any other than ENOENT error from my_stat() or my_delete() */ -int MYSQL_LOG::purge_logs_before_date(time_t purge_time) +int MYSQL_BIN_LOG::purge_logs_before_date(time_t purge_time) { int error; LOG_INFO log_info; @@ -1368,7 +3258,7 @@ int MYSQL_LOG::purge_logs_before_date(time_t purge_time) { /* It's not fatal if we can't stat a log file that does not exist. - */ + */ if (thd) { push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, @@ -1414,9 +3304,9 @@ int MYSQL_LOG::purge_logs_before_date(time_t purge_time) /* It's not fatal even if we can't delete a log file */ if (thd) { - push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, - ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE), - log_info.log_file_name); + push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE), + log_info.log_file_name); } sql_print_information("Failed to delete file '%s'", log_info.log_file_name); @@ -1443,6 +3333,7 @@ int MYSQL_LOG::purge_logs_before_date(time_t purge_time) goto err; } } + ha_binlog_index_purge_file(current_thd, log_info.log_file_name); } if (find_next_log(&log_info, 0)) break; @@ -1461,18 +3352,16 @@ err: #endif /* HAVE_REPLICATION */ -/* - Create a new log file name +/** + Create a new log file name. - SYNOPSIS - make_log_name() - buf buf of at least FN_REFLEN where new name is stored + @param buf buf of at least FN_REFLEN where new name is stored - NOTE + @note If file name will be longer then FN_REFLEN it will be truncated */ -void MYSQL_LOG::make_log_name(char* buf, const char* log_ident) +void MYSQL_BIN_LOG::make_log_name(char* buf, const char* log_ident) { uint dir_len = dirname_length(log_file_name); if (dir_len >= FN_REFLEN) @@ -1482,33 +3371,50 @@ void MYSQL_LOG::make_log_name(char* buf, const char* log_ident) } -/* - Check if we are writing/reading to the given log file +/** + Check if we are writing/reading to the given log file. */ -bool MYSQL_LOG::is_active(const char *log_file_name_arg) +bool MYSQL_BIN_LOG::is_active(const char *log_file_name_arg) { return !strcmp(log_file_name, log_file_name_arg); } /* - Start writing to a new log file or reopen the old file + Wrappers around new_file_impl to avoid using argument + to control locking. The argument 1) less readable 2) breaks + incapsulation 3) allows external access to the class without + a lock (which is not possible with private new_file_without_locking + method). +*/ - SYNOPSIS - new_file() - need_lock Set to 1 if caller has not locked LOCK_log +void MYSQL_BIN_LOG::new_file() +{ + new_file_impl(1); +} + + +void MYSQL_BIN_LOG::new_file_without_locking() +{ + new_file_impl(0); +} + + +/** + Start writing to a new log file or reopen the old file. + + @param need_lock Set to 1 if caller has not locked LOCK_log - NOTE + @note The new file name is stored last in the index file */ -void MYSQL_LOG::new_file(bool need_lock) +void MYSQL_BIN_LOG::new_file_impl(bool need_lock) { char new_name[FN_REFLEN], *new_name_ptr, *old_name; - enum_log_type save_log_type; - DBUG_ENTER("MYSQL_LOG::new_file"); + DBUG_ENTER("MYSQL_BIN_LOG::new_file_impl"); if (!is_open()) { DBUG_PRINT("info",("log is closed")); @@ -1535,8 +3441,10 @@ void MYSQL_LOG::new_file(bool need_lock) { tc_log_page_waits++; pthread_mutex_lock(&LOCK_prep_xids); - while (prepared_xids) + while (prepared_xids) { + DBUG_PRINT("info", ("prepared_xids=%lu", prepared_xids)); pthread_cond_wait(&COND_prep_xids, &LOCK_prep_xids); + } pthread_mutex_unlock(&LOCK_prep_xids); } @@ -1560,8 +3468,7 @@ void MYSQL_LOG::new_file(bool need_lock) We log the whole file name for log file as the user may decide to change base names at some point. */ - THD *thd = current_thd; /* may be 0 if we are reacting to SIGHUP */ - Rotate_log_event r(thd,new_name+dirname_length(new_name), + Rotate_log_event r(new_name+dirname_length(new_name), 0, LOG_EVENT_OFFSET, 0); r.write(&log_file); bytes_written += r.data_written; @@ -1574,12 +3481,11 @@ void MYSQL_LOG::new_file(bool need_lock) signal_update(); } old_name=name; - save_log_type=log_type; name=0; // Don't free name close(LOG_CLOSE_TO_BE_OPENED); /* - Note that at this point, log_type != LOG_CLOSED (important for is_open()). + Note that at this point, log_state != LOG_CLOSED (important for is_open()). */ /* @@ -1591,7 +3497,7 @@ void MYSQL_LOG::new_file(bool need_lock) trigger temp tables deletion on slaves. */ - open(old_name, save_log_type, new_name_ptr, + open(old_name, log_type, new_name_ptr, io_cache_type, no_auto_events, max_size, 1); my_free(old_name,MYF(0)); @@ -1604,11 +3510,11 @@ end: } -bool MYSQL_LOG::append(Log_event* ev) +bool MYSQL_BIN_LOG::append(Log_event* ev) { bool error = 0; pthread_mutex_lock(&LOCK_log); - DBUG_ENTER("MYSQL_LOG::append"); + DBUG_ENTER("MYSQL_BIN_LOG::append"); DBUG_ASSERT(log_file.type == SEQ_READ_APPEND); /* @@ -1623,7 +3529,7 @@ bool MYSQL_LOG::append(Log_event* ev) bytes_written+= ev->data_written; DBUG_PRINT("info",("max_size: %lu",max_size)); if ((uint) my_b_append_tell(&log_file) > max_size) - new_file(0); + new_file_without_locking(); err: pthread_mutex_unlock(&LOCK_log); @@ -1632,10 +3538,10 @@ err: } -bool MYSQL_LOG::appendv(const char* buf, uint len,...) +bool MYSQL_BIN_LOG::appendv(const char* buf, uint len,...) { bool error= 0; - DBUG_ENTER("MYSQL_LOG::appendv"); + DBUG_ENTER("MYSQL_BIN_LOG::appendv"); va_list(args); va_start(args,len); @@ -1644,7 +3550,7 @@ bool MYSQL_LOG::appendv(const char* buf, uint len,...) safe_mutex_assert_owner(&LOCK_log); do { - if (my_b_append(&log_file,(byte*) buf,len)) + if (my_b_append(&log_file,(uchar*) buf,len)) { error= 1; goto err; @@ -1653,7 +3559,7 @@ bool MYSQL_LOG::appendv(const char* buf, uint len,...) } while ((buf=va_arg(args,const char*)) && (len=va_arg(args,uint))); DBUG_PRINT("info",("max_size: %lu",max_size)); if ((uint) my_b_append_tell(&log_file) > max_size) - new_file(0); + new_file_without_locking(); err: if (!error) @@ -1662,97 +3568,7 @@ err: } -/* - Write to normal (not rotable) log - This is the format for the 'normal' log. -*/ - -bool MYSQL_LOG::write(THD *thd,enum enum_server_command command, - const char *format,...) -{ - if (is_open() && (what_to_log & (1L << (uint) command))) - { - uint length; - int error= 0; - VOID(pthread_mutex_lock(&LOCK_log)); - - /* Test if someone closed between the is_open test and lock */ - if (is_open()) - { - time_t skr; - ulong id; - va_list args; - va_start(args,format); - char buff[32]; - - if (thd) - { // Normal thread - if ((thd->options & OPTION_LOG_OFF) -#ifndef NO_EMBEDDED_ACCESS_CHECKS - && (thd->security_ctx->master_access & SUPER_ACL) -#endif -) - { - VOID(pthread_mutex_unlock(&LOCK_log)); - return 0; // No logging - } - id=thd->thread_id; - if (thd->user_time || !(skr=thd->query_start())) - skr=time(NULL); // Connected - } - else - { // Log from connect handler - skr=time(NULL); - id=0; - } - if (skr != last_time) - { - last_time=skr; - struct tm tm_tmp; - struct tm *start; - localtime_r(&skr,&tm_tmp); - start=&tm_tmp; - /* Note that my_b_write() assumes it knows the length for this */ - sprintf(buff,"%02d%02d%02d %2d:%02d:%02d\t", - start->tm_year % 100, - start->tm_mon+1, - start->tm_mday, - start->tm_hour, - start->tm_min, - start->tm_sec); - if (my_b_write(&log_file, (byte*) buff,16)) - error=errno; - } - else if (my_b_write(&log_file, (byte*) "\t\t",2) < 0) - error=errno; - length=my_sprintf(buff, - (buff, "%7ld %-11.11s", id, - command_name[(uint) command])); - if (my_b_write(&log_file, (byte*) buff,length)) - error=errno; - if (format) - { - if (my_b_write(&log_file, (byte*) " ",1) || - my_b_vprintf(&log_file,format,args) == (uint) -1) - error=errno; - } - if (my_b_write(&log_file, (byte*) "\n",1) || - flush_io_cache(&log_file)) - error=errno; - if (error && ! write_error) - { - write_error=1; - sql_print_error(ER(ER_ERROR_ON_WRITE),name,error); - } - va_end(args); - } - VOID(pthread_mutex_unlock(&LOCK_log)); - return error != 0; - } - return 0; -} - -bool MYSQL_LOG::flush_and_sync() +bool MYSQL_BIN_LOG::flush_and_sync() { int err=0, fd=log_file.file; safe_mutex_assert_owner(&LOCK_log); @@ -1766,7 +3582,7 @@ bool MYSQL_LOG::flush_and_sync() return err; } -void MYSQL_LOG::start_union_events(THD *thd, query_id_t query_id_param) +void MYSQL_BIN_LOG::start_union_events(THD *thd, query_id_t query_id_param) { DBUG_ASSERT(!thd->binlog_evt_union.do_union); thd->binlog_evt_union.do_union= TRUE; @@ -1775,27 +3591,307 @@ void MYSQL_LOG::start_union_events(THD *thd, query_id_t query_id_param) thd->binlog_evt_union.first_query_id= query_id_param; } -void MYSQL_LOG::stop_union_events(THD *thd) +void MYSQL_BIN_LOG::stop_union_events(THD *thd) { DBUG_ASSERT(thd->binlog_evt_union.do_union); thd->binlog_evt_union.do_union= FALSE; } -bool MYSQL_LOG::is_query_in_union(THD *thd, query_id_t query_id_param) +bool MYSQL_BIN_LOG::is_query_in_union(THD *thd, query_id_t query_id_param) { return (thd->binlog_evt_union.do_union && query_id_param >= thd->binlog_evt_union.first_query_id); } + /* - Write an event to the binary log + These functions are placed in this file since they need access to + binlog_hton, which has internal linkage. */ -bool MYSQL_LOG::write(Log_event *event_info) +int THD::binlog_setup_trx_data() +{ + DBUG_ENTER("THD::binlog_setup_trx_data"); + binlog_trx_data *trx_data= + (binlog_trx_data*) thd_get_ha_data(this, binlog_hton); + + if (trx_data) + DBUG_RETURN(0); // Already set up + + trx_data= (binlog_trx_data*) my_malloc(sizeof(binlog_trx_data), MYF(MY_ZEROFILL)); + if (!trx_data || + open_cached_file(&trx_data->trans_log, mysql_tmpdir, + LOG_PREFIX, binlog_cache_size, MYF(MY_WME))) + { + my_free((uchar*)trx_data, MYF(MY_ALLOW_ZERO_PTR)); + DBUG_RETURN(1); // Didn't manage to set it up + } + thd_set_ha_data(this, binlog_hton, trx_data); + + trx_data= new (thd_get_ha_data(this, binlog_hton)) binlog_trx_data; + + DBUG_RETURN(0); +} + +/* + Function to start a statement and optionally a transaction for the + binary log. + + SYNOPSIS + binlog_start_trans_and_stmt() + + DESCRIPTION + + This function does three things: + - Start a transaction if not in autocommit mode or if a BEGIN + statement has been seen. + + - Start a statement transaction to allow us to truncate the binary + log. + + - Save the currrent binlog position so that we can roll back the + statement by truncating the transaction log. + + We only update the saved position if the old one was undefined, + the reason is that there are some cases (e.g., for CREATE-SELECT) + where the position is saved twice (e.g., both in + select_create::prepare() and THD::binlog_write_table_map()) , but + we should use the first. This means that calls to this function + can be used to start the statement before the first table map + event, to include some extra events. + */ + +void +THD::binlog_start_trans_and_stmt() +{ + binlog_trx_data *trx_data= (binlog_trx_data*) thd_get_ha_data(this, binlog_hton); + DBUG_ENTER("binlog_start_trans_and_stmt"); + DBUG_PRINT("enter", ("trx_data: 0x%lx trx_data->before_stmt_pos: %lu", + (long) trx_data, + (trx_data ? (ulong) trx_data->before_stmt_pos : + (ulong) 0))); + + if (trx_data == NULL || + trx_data->before_stmt_pos == MY_OFF_T_UNDEF) + { + this->binlog_set_stmt_begin(); + if (options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) + trans_register_ha(this, TRUE, binlog_hton); + trans_register_ha(this, FALSE, binlog_hton); + /* + Mark statement transaction as read/write. We never start + a binary log transaction and keep it read-only, + therefore it's best to mark the transaction read/write just + at the same time we start it. + Not necessary to mark the normal transaction read/write + since the statement-level flag will be propagated automatically + inside ha_commit_trans. + */ + ha_data[binlog_hton->slot].ha_info[0].set_trx_read_write(); + } + DBUG_VOID_RETURN; +} + +void THD::binlog_set_stmt_begin() { + binlog_trx_data *trx_data= + (binlog_trx_data*) thd_get_ha_data(this, binlog_hton); + + /* + The call to binlog_trans_log_savepos() might create the trx_data + structure, if it didn't exist before, so we save the position + into an auto variable and then write it into the transaction + data for the binary log (i.e., trx_data). + */ + my_off_t pos= 0; + binlog_trans_log_savepos(this, &pos); + trx_data= (binlog_trx_data*) thd_get_ha_data(this, binlog_hton); + trx_data->before_stmt_pos= pos; +} + + +/* + Write a table map to the binary log. + */ + +int THD::binlog_write_table_map(TABLE *table, bool is_trans) +{ + int error; + DBUG_ENTER("THD::binlog_write_table_map"); + DBUG_PRINT("enter", ("table: 0x%lx (%s: #%lu)", + (long) table, table->s->table_name.str, + table->s->table_map_id)); + + /* Pre-conditions */ + DBUG_ASSERT(current_stmt_binlog_row_based && mysql_bin_log.is_open()); + DBUG_ASSERT(table->s->table_map_id != ULONG_MAX); + + Table_map_log_event::flag_set const + flags= Table_map_log_event::TM_NO_FLAGS; + + Table_map_log_event + the_event(this, table, table->s->table_map_id, is_trans, flags); + + if (is_trans && binlog_table_maps == 0) + binlog_start_trans_and_stmt(); + + if ((error= mysql_bin_log.write(&the_event))) + DBUG_RETURN(error); + + binlog_table_maps++; + table->s->table_map_version= mysql_bin_log.table_map_version(); + DBUG_RETURN(0); +} + +Rows_log_event* +THD::binlog_get_pending_rows_event() const +{ + binlog_trx_data *const trx_data= + (binlog_trx_data*) thd_get_ha_data(this, binlog_hton); + /* + This is less than ideal, but here's the story: If there is no + trx_data, prepare_pending_rows_event() has never been called + (since the trx_data is set up there). In that case, we just return + NULL. + */ + return trx_data ? trx_data->pending() : NULL; +} + +void +THD::binlog_set_pending_rows_event(Rows_log_event* ev) +{ + if (thd_get_ha_data(this, binlog_hton) == NULL) + binlog_setup_trx_data(); + + binlog_trx_data *const trx_data= + (binlog_trx_data*) thd_get_ha_data(this, binlog_hton); + + DBUG_ASSERT(trx_data); + trx_data->set_pending(ev); +} + + +/** + Remove the pending rows event, discarding any outstanding rows. + + If there is no pending rows event available, this is effectively a + no-op. + */ +int +MYSQL_BIN_LOG::remove_pending_rows_event(THD *thd) +{ + DBUG_ENTER("MYSQL_BIN_LOG::remove_pending_rows_event"); + + binlog_trx_data *const trx_data= + (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton); + + DBUG_ASSERT(trx_data); + + if (Rows_log_event* pending= trx_data->pending()) + { + delete pending; + trx_data->set_pending(NULL); + } + + DBUG_RETURN(0); +} + +/* + Moves the last bunch of rows from the pending Rows event to the binlog + (either cached binlog if transaction, or disk binlog). Sets a new pending + event. +*/ +int +MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd, + Rows_log_event* event) +{ + DBUG_ENTER("MYSQL_BIN_LOG::flush_and_set_pending_rows_event(event)"); + DBUG_ASSERT(mysql_bin_log.is_open()); + DBUG_PRINT("enter", ("event: 0x%lx", (long) event)); + + int error= 0; + + binlog_trx_data *const trx_data= + (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton); + + DBUG_ASSERT(trx_data); + + DBUG_PRINT("info", ("trx_data->pending(): 0x%lx", (long) trx_data->pending())); + + if (Rows_log_event* pending= trx_data->pending()) + { + IO_CACHE *file= &log_file; + + /* + Decide if we should write to the log file directly or to the + transaction log. + */ + if (pending->get_cache_stmt() || my_b_tell(&trx_data->trans_log)) + file= &trx_data->trans_log; + + /* + If we are writing to the log file directly, we could avoid + locking the log. This does not work since we need to step the + m_table_map_version below, and that change has to be protected + by the LOCK_log mutex. + */ + pthread_mutex_lock(&LOCK_log); + + /* + Write pending event to log file or transaction cache + */ + if (pending->write(file)) + { + pthread_mutex_unlock(&LOCK_log); + DBUG_RETURN(1); + } + + /* + We step the table map version if we are writing an event + representing the end of a statement. We do this regardless of + wheather we write to the transaction cache or to directly to the + file. + + In an ideal world, we could avoid stepping the table map version + if we were writing to a transaction cache, since we could then + reuse the table map that was written earlier in the transaction + cache. This does not work since STMT_END_F implies closing all + table mappings on the slave side. + + TODO: Find a solution so that table maps does not have to be + written several times within a transaction. + */ + if (pending->get_flags(Rows_log_event::STMT_END_F)) + ++m_table_map_version; + + delete pending; + + if (file == &log_file) + { + error= flush_and_sync(); + if (!error) + { + signal_update(); + rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED); + } + } + + pthread_mutex_unlock(&LOCK_log); + } + + thd->binlog_set_pending_rows_event(event); + + DBUG_RETURN(error); +} + +/** + Write an event to the binary log. +*/ + +bool MYSQL_BIN_LOG::write(Log_event *event_info) { THD *thd= event_info->thd; bool error= 1; - DBUG_ENTER("MYSQL_LOG::write(Log_event *)"); + DBUG_ENTER("MYSQL_BIN_LOG::write(Log_event *)"); if (thd->binlog_evt_union.do_union) { @@ -1807,7 +3903,21 @@ bool MYSQL_LOG::write(Log_event *event_info) thd->binlog_evt_union.unioned_events_trans |= event_info->cache_stmt; DBUG_RETURN(0); } - + + /* + Flush the pending rows event to the transaction cache or to the + log file. Since this function potentially aquire the LOCK_log + mutex, we do this before aquiring the LOCK_log mutex in this + function. + + We only end the statement if we are in a top-level statement. If + we are inside a stored function, we do not end the statement since + this will close all tables on the slave. + */ + bool const end_stmt= + thd->prelocked_mode && thd->lex->requires_prelocking(); + thd->binlog_flush_pending_rows_event(end_stmt); + pthread_mutex_lock(&LOCK_log); /* @@ -1826,15 +3936,14 @@ bool MYSQL_LOG::write(Log_event *event_info) */ const char *local_db= event_info->get_db(); if ((thd && !(thd->options & OPTION_BIN_LOG)) || - (!db_ok(local_db, binlog_do_db, binlog_ignore_db))) + (!binlog_filter->db_ok(local_db))) { VOID(pthread_mutex_unlock(&LOCK_log)); - DBUG_PRINT("error",("!db_ok('%s')", local_db)); DBUG_RETURN(0); } #endif /* HAVE_REPLICATION */ -#ifdef USING_TRANSACTIONS +#if defined(USING_TRANSACTIONS) /* Should we write to the binlog cache or to the binlog on disk? Write to the binlog cache if: @@ -1846,40 +3955,31 @@ bool MYSQL_LOG::write(Log_event *event_info) */ if (opt_using_transactions && thd) { - IO_CACHE *trans_log= (IO_CACHE*)thd->ha_data[binlog_hton.slot]; + if (thd->binlog_setup_trx_data()) + goto err; - if (event_info->get_cache_stmt()) + binlog_trx_data *const trx_data= + (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton); + IO_CACHE *trans_log= &trx_data->trans_log; + my_off_t trans_log_pos= my_b_tell(trans_log); + if (event_info->get_cache_stmt() || trans_log_pos != 0) { - if (!trans_log) - { - thd->ha_data[binlog_hton.slot]= trans_log= (IO_CACHE *) - my_malloc(sizeof(IO_CACHE), MYF(MY_ZEROFILL)); - if (!trans_log || open_cached_file(trans_log, mysql_tmpdir, - LOG_PREFIX, - binlog_cache_size, MYF(MY_WME))) - { - my_free((gptr)trans_log, MYF(MY_ALLOW_ZERO_PTR)); - thd->ha_data[binlog_hton.slot]= trans_log= 0; - goto err; - } - trans_log->end_of_file= max_binlog_cache_size; - trans_register_ha(thd, - test(thd->options & (OPTION_NOT_AUTOCOMMIT | - OPTION_BEGIN)), - &binlog_hton); - } - else if (!my_b_tell(trans_log)) - trans_register_ha(thd, - test(thd->options & (OPTION_NOT_AUTOCOMMIT | - OPTION_BEGIN)), - &binlog_hton); + DBUG_PRINT("info", ("Using trans_log: cache: %d, trans_log_pos: %lu", + event_info->get_cache_stmt(), + (ulong) trans_log_pos)); + if (trans_log_pos == 0) + thd->binlog_start_trans_and_stmt(); file= trans_log; } - else if (trans_log && my_b_tell(trans_log)) - file= trans_log; + /* + TODO as Mats suggested, for all the cases above where we write to + trans_log, it sounds unnecessary to lock LOCK_log. We should rather + test first if we want to write to trans_log, and if not, lock + LOCK_log. + */ } -#endif - DBUG_PRINT("info",("event type=%d",event_info->get_type_code())); +#endif /* USING_TRANSACTIONS */ + DBUG_PRINT("info",("event type: %d",event_info->get_type_code())); /* No check for auto events flag here - this write method should @@ -1891,42 +3991,54 @@ bool MYSQL_LOG::write(Log_event *event_info) of the SQL command */ + /* + If row-based binlogging, Insert_id, Rand and other kind of "setting + context" events are not needed. + */ if (thd) { - if (thd->last_insert_id_used_bin_log) - { - Intvar_log_event e(thd,(uchar) LAST_INSERT_ID_EVENT, - thd->current_insert_id); - if (e.write(file)) - goto err; - } - if (thd->insert_id_used) - { - Intvar_log_event e(thd,(uchar) INSERT_ID_EVENT,thd->last_insert_id); - if (e.write(file)) - goto err; - } - if (thd->rand_used) - { - Rand_log_event e(thd,thd->rand_saved_seed1,thd->rand_saved_seed2); - if (e.write(file)) - goto err; - } - if (thd->user_var_events.elements) + if (!thd->current_stmt_binlog_row_based) { - for (uint i= 0; i < thd->user_var_events.elements; i++) - { - BINLOG_USER_VAR_EVENT *user_var_event; - get_dynamic(&thd->user_var_events,(gptr) &user_var_event, i); - User_var_log_event e(thd, user_var_event->user_var_event->name.str, - user_var_event->user_var_event->name.length, - user_var_event->value, - user_var_event->length, - user_var_event->type, - user_var_event->charset_number); - if (e.write(file)) - goto err; - } + if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt) + { + Intvar_log_event e(thd,(uchar) LAST_INSERT_ID_EVENT, + thd->first_successful_insert_id_in_prev_stmt_for_binlog); + if (e.write(file)) + goto err; + } + if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0) + { + DBUG_PRINT("info",("number of auto_inc intervals: %u", + thd->auto_inc_intervals_in_cur_stmt_for_binlog. + nb_elements())); + Intvar_log_event e(thd, (uchar) INSERT_ID_EVENT, + thd->auto_inc_intervals_in_cur_stmt_for_binlog. + minimum()); + if (e.write(file)) + goto err; + } + if (thd->rand_used) + { + Rand_log_event e(thd,thd->rand_saved_seed1,thd->rand_saved_seed2); + if (e.write(file)) + goto err; + } + if (thd->user_var_events.elements) + { + for (uint i= 0; i < thd->user_var_events.elements; i++) + { + BINLOG_USER_VAR_EVENT *user_var_event; + get_dynamic(&thd->user_var_events,(uchar*) &user_var_event, i); + User_var_log_event e(thd, user_var_event->user_var_event->name.str, + user_var_event->user_var_event->name.length, + user_var_event->value, + user_var_event->length, + user_var_event->type, + user_var_event->charset_number); + if (e.write(file)) + goto err; + } + } } } @@ -1957,22 +4069,95 @@ err: } } + if (event_info->flags & LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F) + ++m_table_map_version; + pthread_mutex_unlock(&LOCK_log); DBUG_RETURN(error); } -void MYSQL_LOG::rotate_and_purge(uint flags) + +int error_log_print(enum loglevel level, const char *format, + va_list args) +{ + return logger.error_log_print(level, format, args); +} + + +bool slow_log_print(THD *thd, const char *query, uint query_length, + ulonglong current_utime) +{ + return logger.slow_log_print(thd, query, query_length, current_utime); +} + + +bool LOGGER::log_command(THD *thd, enum enum_server_command command) +{ +#ifndef NO_EMBEDDED_ACCESS_CHECKS + Security_context *sctx= thd->security_ctx; +#endif + /* + Log command if we have at least one log event handler enabled and want + to log this king of commands + */ + if (*general_log_handler_list && (what_to_log & (1L << (uint) command))) + { + if ((thd->options & OPTION_LOG_OFF) +#ifndef NO_EMBEDDED_ACCESS_CHECKS + && (sctx->master_access & SUPER_ACL) +#endif + ) + { + /* No logging */ + return FALSE; + } + + return TRUE; + } + + return FALSE; +} + + +bool general_log_print(THD *thd, enum enum_server_command command, + const char *format, ...) +{ + va_list args; + uint error= 0; + + /* Print the message to the buffer if we want to log this king of commands */ + if (! logger.log_command(thd, command)) + return FALSE; + + va_start(args, format); + error= logger.general_log_print(thd, command, format, args); + va_end(args); + + return error; +} + +bool general_log_write(THD *thd, enum enum_server_command command, + const char *query, uint query_length) +{ + /* Write the message to the log if we want to log this king of commands */ + if (logger.log_command(thd, command)) + return logger.general_log_write(thd, command, query, query_length); + + return FALSE; +} + +void MYSQL_BIN_LOG::rotate_and_purge(uint flags) { if (!(flags & RP_LOCK_LOG_IS_ALREADY_LOCKED)) pthread_mutex_lock(&LOCK_log); if ((flags & RP_FORCE_ROTATE) || (my_b_tell(&log_file) >= (my_off_t) max_size)) { - new_file(0); + new_file_without_locking(); #ifdef HAVE_REPLICATION if (expire_logs_days) { - time_t purge_time= time(0) - expire_logs_days*24*60*60; + time_t purge_time= my_time(0) - expire_logs_days*24*60*60; if (purge_time >= 0) purge_logs_before_date(purge_time); } @@ -1982,7 +4167,7 @@ void MYSQL_LOG::rotate_and_purge(uint flags) pthread_mutex_unlock(&LOCK_log); } -uint MYSQL_LOG::next_file_id() +uint MYSQL_BIN_LOG::next_file_id() { uint res; pthread_mutex_lock(&LOCK_log); @@ -1993,205 +4178,239 @@ uint MYSQL_LOG::next_file_id() /* - Write a cached log entry to the binary log + Write the contents of a cache to the binary log. SYNOPSIS - write() - thd - cache The cache to copy to the binlog - - NOTE - - We only come here if there is something in the cache. - - The thing in the cache is always a complete transaction - - 'cache' needs to be reinitialized after this functions returns. + write_cache() + cache Cache to write to the binary log + lock_log True if the LOCK_log mutex should be aquired, false otherwise + sync_log True if the log should be flushed and sync:ed - IMPLEMENTATION - - To support transaction over replication, we wrap the transaction - with BEGIN/COMMIT or BEGIN/ROLLBACK in the binary log. - If a transaction that only involves transactional tables is - rolled back, we do not binlog it. However, we write a - BEGIN/ROLLBACK block when a non-transactional table was updated - in a transaction which was rolled back. This is to ensure that - the same updates are run on the slave. -*/ + DESCRIPTION + Write the contents of the cache to the binary log. The cache will + be reset as a READ_CACHE to be able to read the contents from it. + */ -bool MYSQL_LOG::write(THD *thd, IO_CACHE *cache, Log_event *commit_event) +int MYSQL_BIN_LOG::write_cache(IO_CACHE *cache, bool lock_log, bool sync_log) { - DBUG_ENTER("MYSQL_LOG::write(THD *, IO_CACHE *, Log_event *)"); - VOID(pthread_mutex_lock(&LOCK_log)); + Mutex_sentry sentry(lock_log ? &LOCK_log : NULL); - /* NULL would represent nothing to replicate after ROLLBACK */ - DBUG_ASSERT(commit_event != NULL); + if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0)) + return ER_ERROR_ON_WRITE; + uint length= my_b_bytes_in_cache(cache), group, carry, hdr_offs; + long val; + uchar header[LOG_EVENT_HEADER_LEN]; - DBUG_ASSERT(is_open()); - if (likely(is_open())) // Should always be true + /* + The events in the buffer have incorrect end_log_pos data + (relative to beginning of group rather than absolute), + so we'll recalculate them in situ so the binlog is always + correct, even in the middle of a group. This is possible + because we now know the start position of the group (the + offset of this cache in the log, if you will); all we need + to do is to find all event-headers, and add the position of + the group to the end_log_pos of each event. This is pretty + straight forward, except that we read the cache in segments, + so an event-header might end up on the cache-border and get + split. + */ + + group= (uint)my_b_tell(&log_file); + hdr_offs= carry= 0; + + do { - uint length, group, carry, hdr_offs, val; - byte header[LOG_EVENT_HEADER_LEN]; /* - Log "BEGIN" at the beginning of every transaction. Here, a - transaction is either a BEGIN..COMMIT block or a single - statement in autocommit mode. + if we only got a partial header in the last iteration, + get the other half now and process a full header. */ - Query_log_event qinfo(thd, STRING_WITH_LEN("BEGIN"), TRUE, FALSE); - /* - Imagine this is rollback due to net timeout, after all - statements of the transaction succeeded. Then we want a - zero-error code in BEGIN. In other words, if there was a - really serious error code it's already in the statement's - events, there is no need to put it also in this internally - generated event, and as this event is generated late it would - lead to false alarms. - - This is safer than thd->clear_error() against kills at shutdown. - */ - qinfo.error_code= 0; - /* - Now this Query_log_event has artificial log_pos 0. It must be - adjusted to reflect the real position in the log. Not doing it - would confuse the slave: it would prevent this one from - knowing where he is in the master's binlog, which would result - in wrong positions being shown to the user, MASTER_POS_WAIT - undue waiting etc. - */ - if (qinfo.write(&log_file)) - goto err; - - /* Read from the file used to cache the queries .*/ - if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0)) - goto err; + if (unlikely(carry > 0)) + { + DBUG_ASSERT(carry < LOG_EVENT_HEADER_LEN); - length= my_b_bytes_in_cache(cache); - DBUG_EXECUTE_IF("half_binlogged_transaction", length-=100;); + /* assemble both halves */ + memcpy(&header[carry], (char *)cache->read_pos, LOG_EVENT_HEADER_LEN - carry); - /* - The events in the buffer have incorrect end_log_pos data - (relative to beginning of group rather than absolute), - so we'll recalculate them in situ so the binlog is always - correct, even in the middle of a group. This is possible - because we now know the start position of the group (the - offset of this cache in the log, if you will); all we need - to do is to find all event-headers, and add the position of - the group to the end_log_pos of each event. This is pretty - straight forward, except that we read the cache in segments, - so an event-header might end up on the cache-border and get - split. - */ + /* fix end_log_pos */ + val= uint4korr(&header[LOG_POS_OFFSET]) + group; + int4store(&header[LOG_POS_OFFSET], val); - group= (uint)my_b_tell(&log_file); - hdr_offs= carry= 0; - - do - { + /* write the first half of the split header */ + if (my_b_write(&log_file, header, carry)) + return ER_ERROR_ON_WRITE; /* - if we only got a partial header in the last iteration, - get the other half now and process a full header. + copy fixed second half of header to cache so the correct + version will be written later. */ - if (unlikely(carry > 0)) - { - DBUG_ASSERT(carry < LOG_EVENT_HEADER_LEN); + memcpy((char *)cache->read_pos, &header[carry], LOG_EVENT_HEADER_LEN - carry); + + /* next event header at ... */ + hdr_offs = uint4korr(&header[EVENT_LEN_OFFSET]) - carry; - /* assemble both halves */ - memcpy(&header[carry], (char *)cache->read_pos, LOG_EVENT_HEADER_LEN - carry); + carry= 0; + } - /* fix end_log_pos */ - val= uint4korr(&header[LOG_POS_OFFSET]) + group; - int4store(&header[LOG_POS_OFFSET], val); + /* if there is anything to write, process it. */ - /* write the first half of the split header */ - if (my_b_write(&log_file, header, carry)) - goto err; + if (likely(length > 0)) + { + /* + process all event-headers in this (partial) cache. + if next header is beyond current read-buffer, + we'll get it later (though not necessarily in the + very next iteration, just "eventually"). + */ + while (hdr_offs < length) + { /* - copy fixed second half of header to cache so the correct - version will be written later. + partial header only? save what we can get, process once + we get the rest. */ - memcpy((char *)cache->read_pos, &header[carry], LOG_EVENT_HEADER_LEN - carry); - /* next event header at ... */ - hdr_offs = uint4korr(&header[EVENT_LEN_OFFSET]) - carry; + if (hdr_offs + LOG_EVENT_HEADER_LEN > length) + { + carry= length - hdr_offs; + memcpy(header, (char *)cache->read_pos + hdr_offs, carry); + length= hdr_offs; + } + else + { + /* we've got a full event-header, and it came in one piece */ - carry= 0; - } + uchar *log_pos= (uchar *)cache->read_pos + hdr_offs + LOG_POS_OFFSET; - /* if there is anything to write, process it. */ + /* fix end_log_pos */ + val= uint4korr(log_pos) + group; + int4store(log_pos, val); - if (likely(length > 0)) - { - /* - process all event-headers in this (partial) cache. - if next header is beyond current read-buffer, - we'll get it later (though not necessarily in the - very next iteration, just "eventually"). - */ + /* next event header at ... */ + log_pos= (uchar *)cache->read_pos + hdr_offs + EVENT_LEN_OFFSET; + hdr_offs += uint4korr(log_pos); - while (hdr_offs < length) - { - /* - partial header only? save what we can get, process once - we get the rest. - */ + } + } - if (hdr_offs + LOG_EVENT_HEADER_LEN > length) - { - carry= length - hdr_offs; - memcpy(header, (char *)cache->read_pos + hdr_offs, carry); - length= hdr_offs; - } - else - { - /* we've got a full event-header, and it came in one piece */ + /* + Adjust hdr_offs. Note that it may still point beyond the segment + read in the next iteration; if the current event is very long, + it may take a couple of read-iterations (and subsequent adjustments + of hdr_offs) for it to point into the then-current segment. + If we have a split header (!carry), hdr_offs will be set at the + beginning of the next iteration, overwriting the value we set here: + */ + hdr_offs -= length; + } - uchar *log_pos= (uchar *)cache->read_pos + hdr_offs + LOG_POS_OFFSET; + /* Write data to the binary log file */ + if (my_b_write(&log_file, cache->read_pos, length)) + return ER_ERROR_ON_WRITE; + cache->read_pos=cache->read_end; // Mark buffer used up + } while ((length= my_b_fill(cache))); - /* fix end_log_pos */ - val= uint4korr(log_pos) + group; - int4store(log_pos, val); + DBUG_ASSERT(carry == 0); - /* next event header at ... */ - log_pos= (uchar *)cache->read_pos + hdr_offs + EVENT_LEN_OFFSET; - hdr_offs += uint4korr(log_pos); + if (sync_log) + flush_and_sync(); - } - } + return 0; // All OK +} - /* - Adjust hdr_offs. Note that it may still point beyond the segment - read in the next iteration; if the current event is very long, - it may take a couple of read-iterations (and subsequent adjustments - of hdr_offs) for it to point into the then-current segment. - If we have a split header (!carry), hdr_offs will be set at the - beginning of the next iteration, overwriting the value we set here: - */ - hdr_offs -= length; - } +/** + Write a cached log entry to the binary log. + - To support transaction over replication, we wrap the transaction + with BEGIN/COMMIT or BEGIN/ROLLBACK in the binary log. + We want to write a BEGIN/ROLLBACK block when a non-transactional table + was updated in a transaction which was rolled back. This is to ensure + that the same updates are run on the slave. + + @param thd + @param cache The cache to copy to the binlog + @param commit_event The commit event to print after writing the + contents of the cache. + + @note + We only come here if there is something in the cache. + @note + The thing in the cache is always a complete transaction. + @note + 'cache' needs to be reinitialized after this functions returns. +*/ - /* Write data to the binary log file */ - if (my_b_write(&log_file, cache->read_pos, length)) - goto err; - cache->read_pos=cache->read_end; // Mark buffer used up - DBUG_EXECUTE_IF("half_binlogged_transaction", goto DBUG_skip_commit;); - } while ((length=my_b_fill(cache))); +bool MYSQL_BIN_LOG::write(THD *thd, IO_CACHE *cache, Log_event *commit_event) +{ + DBUG_ENTER("MYSQL_BIN_LOG::write(THD *, IO_CACHE *, Log_event *)"); + VOID(pthread_mutex_lock(&LOCK_log)); - DBUG_ASSERT(carry == 0); + /* NULL would represent nothing to replicate after ROLLBACK */ + DBUG_ASSERT(commit_event != NULL); - if (commit_event->write(&log_file)) - goto err; -#ifndef DBUG_OFF -DBUG_skip_commit: -#endif - if (flush_and_sync()) - goto err; - DBUG_EXECUTE_IF("half_binlogged_transaction", abort();); - if (cache->error) // Error on read + DBUG_ASSERT(is_open()); + if (likely(is_open())) // Should always be true + { + /* + We only bother to write to the binary log if there is anything + to write. + */ + if (my_b_tell(cache) > 0) { - sql_print_error(ER(ER_ERROR_ON_READ), cache->file_name, errno); - write_error=1; // Don't give more errors - goto err; + /* + Log "BEGIN" at the beginning of every transaction. Here, a + transaction is either a BEGIN..COMMIT block or a single + statement in autocommit mode. + */ + Query_log_event qinfo(thd, STRING_WITH_LEN("BEGIN"), TRUE, FALSE); + /* + Imagine this is rollback due to net timeout, after all + statements of the transaction succeeded. Then we want a + zero-error code in BEGIN. In other words, if there was a + really serious error code it's already in the statement's + events, there is no need to put it also in this internally + generated event, and as this event is generated late it would + lead to false alarms. + + This is safer than thd->clear_error() against kills at shutdown. + */ + qinfo.error_code= 0; + /* + Now this Query_log_event has artificial log_pos 0. It must be + adjusted to reflect the real position in the log. Not doing it + would confuse the slave: it would prevent this one from + knowing where he is in the master's binlog, which would result + in wrong positions being shown to the user, MASTER_POS_WAIT + undue waiting etc. + */ + if (qinfo.write(&log_file)) + goto err; + + DBUG_EXECUTE_IF("crash_before_writing_xid", + { + if ((write_error= write_cache(cache, false, true))) + DBUG_PRINT("info", ("error writing binlog cache: %d", + write_error)); + DBUG_PRINT("info", ("crashing before writing xid")); + abort(); + }); + + if ((write_error= write_cache(cache, false, false))) + goto err; + + if (commit_event && commit_event->write(&log_file)) + goto err; + if (flush_and_sync()) + goto err; + DBUG_EXECUTE_IF("half_binlogged_transaction", abort();); + if (cache->error) // Error on read + { + sql_print_error(ER(ER_ERROR_ON_READ), cache->file_name, errno); + write_error=1; // Don't give more errors + goto err; + } + signal_update(); } - signal_update(); + /* if commit_event is Xid_log_event, increase the number of prepared_xids (it's decreasd in ::unlog()). Binlog cannot be rotated @@ -2200,7 +4419,7 @@ DBUG_skip_commit: If the commit_event is not Xid_log_event (then it's a Query_log_event) rotate binlog, if necessary. */ - if (commit_event->get_type_code() == XID_EVENT) + if (commit_event && commit_event->get_type_code() == XID_EVENT) { pthread_mutex_lock(&LOCK_prep_xids); prepared_xids++; @@ -2224,152 +4443,21 @@ err: } -/* - Write to the slow query log. -*/ - -bool MYSQL_LOG::write(THD *thd,const char *query, uint query_length, - time_t query_start_arg) -{ - bool error=0; - time_t current_time; - if (!is_open()) - return 0; - DBUG_ENTER("MYSQL_LOG::write"); - - VOID(pthread_mutex_lock(&LOCK_log)); - if (is_open()) - { // Safety agains reopen - int tmp_errno=0; - char buff[80],*end; - end=buff; - if (!(thd->options & OPTION_UPDATE_LOG)) - { - VOID(pthread_mutex_unlock(&LOCK_log)); - DBUG_RETURN(0); - } - if (!(specialflag & SPECIAL_SHORT_LOG_FORMAT) || query_start_arg) - { - Security_context *sctx= thd->security_ctx; - current_time=time(NULL); - if (current_time != last_time) - { - last_time=current_time; - struct tm tm_tmp; - struct tm *start; - localtime_r(¤t_time,&tm_tmp); - start=&tm_tmp; - /* Note that my_b_write() assumes it knows the length for this */ - sprintf(buff,"# Time: %02d%02d%02d %2d:%02d:%02d\n", - start->tm_year % 100, - start->tm_mon+1, - start->tm_mday, - start->tm_hour, - start->tm_min, - start->tm_sec); - if (my_b_write(&log_file, (byte*) buff,24)) - tmp_errno=errno; - } - if (my_b_printf(&log_file, "# User@Host: %s[%s] @ %s [%s]\n", - sctx->priv_user ? - sctx->priv_user : "", - sctx->user ? sctx->user : "", - sctx->host ? sctx->host : "", - sctx->ip ? sctx->ip : "") == - (uint) -1) - tmp_errno=errno; - } - if (query_start_arg) - { - /* For slow query log */ - if (my_b_printf(&log_file, - "# Query_time: %lu Lock_time: %lu Rows_sent: %lu Rows_examined: %lu\n", - (ulong) (current_time - query_start_arg), - (ulong) (thd->time_after_lock - query_start_arg), - (ulong) thd->sent_row_count, - (ulong) thd->examined_row_count) == (uint) -1) - tmp_errno=errno; - } - if (thd->db && strcmp(thd->db,db)) - { // Database changed - if (my_b_printf(&log_file,"use %s;\n",thd->db) == (uint) -1) - tmp_errno=errno; - strmov(db,thd->db); - } - if (thd->last_insert_id_used_bin_log) - { - end=strmov(end,",last_insert_id="); - end=longlong10_to_str((longlong) thd->current_insert_id,end,-10); - } - // Save value if we do an insert. - if (thd->insert_id_used) - { - if (!(specialflag & SPECIAL_SHORT_LOG_FORMAT)) - { - end=strmov(end,",insert_id="); - end=longlong10_to_str((longlong) thd->last_insert_id,end,-10); - } - } - if (thd->query_start_used) - { - if (query_start_arg != thd->query_start()) - { - query_start_arg=thd->query_start(); - end=strmov(end,",timestamp="); - end=int10_to_str((long) query_start_arg,end,10); - } - } - if (end != buff) - { - *end++=';'; - *end='\n'; - if (my_b_write(&log_file, (byte*) "SET ",4) || - my_b_write(&log_file, (byte*) buff+1,(uint) (end-buff))) - tmp_errno=errno; - } - if (!query) - { - end=strxmov(buff, "# administrator command: ", - command_name[thd->command], NullS); - query_length=(ulong) (end-buff); - query=buff; - } - if (my_b_write(&log_file, (byte*) query,query_length) || - my_b_write(&log_file, (byte*) ";\n",2) || - flush_io_cache(&log_file)) - tmp_errno=errno; - if (tmp_errno) - { - error=1; - if (! write_error) - { - write_error=1; - sql_print_error(ER(ER_ERROR_ON_WRITE),name,error); - } - } - } - VOID(pthread_mutex_unlock(&LOCK_log)); - DBUG_RETURN(error); -} - - -/* - Wait until we get a signal that the binary log has been updated +/** + Wait until we get a signal that the binary log has been updated. - SYNOPSIS - wait_for_update() - thd Thread variable - is_slave If 0, the caller is the Binlog_dump thread from master; - if 1, the caller is the SQL thread from the slave. This - influences only thd->proc_info. + @param thd Thread variable + @param is_slave If 0, the caller is the Binlog_dump thread from master; + if 1, the caller is the SQL thread from the slave. This + influences only thd->proc_info. - NOTES + @note One must have a lock on LOCK_log before calling this function. This lock will be released before return! That's required by THD::enter_cond() (see NOTES in sql_class.h). */ -void MYSQL_LOG::wait_for_update(THD* thd, bool is_slave) +void MYSQL_BIN_LOG::wait_for_update(THD* thd, bool is_slave) { const char *old_msg; DBUG_ENTER("wait_for_update"); @@ -2386,27 +4474,25 @@ void MYSQL_LOG::wait_for_update(THD* thd, bool is_slave) } -/* - Close the log file +/** + Close the log file. - SYNOPSIS - close() - exiting Bitmask for one or more of the following bits: - LOG_CLOSE_INDEX if we should close the index file - LOG_CLOSE_TO_BE_OPENED if we intend to call open - at once after close. - LOG_CLOSE_STOP_EVENT write a 'stop' event to the log + @param exiting Bitmask for one or more of the following bits: + - LOG_CLOSE_INDEX : if we should close the index file + - LOG_CLOSE_TO_BE_OPENED : if we intend to call open + at once after close. + - LOG_CLOSE_STOP_EVENT : write a 'stop' event to the log - NOTES + @note One can do an open on the object at once after doing a close. The internal structures are not freed until cleanup() is called */ -void MYSQL_LOG::close(uint exiting) +void MYSQL_BIN_LOG::close(uint exiting) { // One can't set log_type here! - DBUG_ENTER("MYSQL_LOG::close"); + DBUG_ENTER("MYSQL_BIN_LOG::close"); DBUG_PRINT("enter",("exiting: %d", (int) exiting)); - if (log_type != LOG_CLOSED && log_type != LOG_TO_BE_OPENED) + if (log_state == LOG_OPENED) { #ifdef HAVE_REPLICATION if (log_type == LOG_BIN && !no_auto_events && @@ -2418,26 +4504,25 @@ void MYSQL_LOG::close(uint exiting) signal_update(); } #endif /* HAVE_REPLICATION */ - end_io_cache(&log_file); /* don't pwrite in a file opened with O_APPEND - it doesn't work */ if (log_file.type == WRITE_CACHE && log_type == LOG_BIN) { my_off_t offset= BIN_LOG_HEADER_SIZE + FLAGS_OFFSET; - byte flags=0; // clearing LOG_EVENT_BINLOG_IN_USE_F + my_off_t org_position= my_tell(log_file.file, MYF(0)); + uchar flags= 0; // clearing LOG_EVENT_BINLOG_IN_USE_F my_pwrite(log_file.file, &flags, 1, offset, MYF(0)); + /* + Restore position so that anything we have in the IO_cache is written + to the correct position. + We need the seek here, as my_pwrite() is not guaranteed to keep the + original position on system that doesn't support pwrite(). + */ + my_seek(log_file.file, org_position, MY_SEEK_SET, MYF(0)); } - if (my_sync(log_file.file,MYF(MY_WME)) && ! write_error) - { - write_error=1; - sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno); - } - if (my_close(log_file.file,MYF(MY_WME)) && ! write_error) - { - write_error=1; - sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno); - } + /* this will cleanup IO_CACHE, sync and close the file */ + MYSQL_LOG::close(exiting); } /* @@ -2454,13 +4539,13 @@ void MYSQL_LOG::close(uint exiting) sql_print_error(ER(ER_ERROR_ON_WRITE), index_file_name, errno); } } - log_type= (exiting & LOG_CLOSE_TO_BE_OPENED) ? LOG_TO_BE_OPENED : LOG_CLOSED; + log_state= (exiting & LOG_CLOSE_TO_BE_OPENED) ? LOG_TO_BE_OPENED : LOG_CLOSED; safeFree(name); DBUG_VOID_RETURN; } -void MYSQL_LOG::set_max_size(ulong max_size_arg) +void MYSQL_BIN_LOG::set_max_size(ulong max_size_arg) { /* We need to take locks, otherwise this may happen: @@ -2469,7 +4554,7 @@ void MYSQL_LOG::set_max_size(ulong max_size_arg) uses the old_max_size argument, so max_size_arg has been overwritten and it's like if the SET command was never run. */ - DBUG_ENTER("MYSQL_LOG::set_max_size"); + DBUG_ENTER("MYSQL_BIN_LOG::set_max_size"); pthread_mutex_lock(&LOCK_log); if (is_open()) max_size= max_size_arg; @@ -2478,21 +4563,20 @@ void MYSQL_LOG::set_max_size(ulong max_size_arg) } -/* - Check if a string is a valid number +/** + Check if a string is a valid number. - SYNOPSIS - test_if_number() - str String to test - res Store value here - allow_wildcards Set to 1 if we should ignore '%' and '_' + @param str String to test + @param res Store value here + @param allow_wildcards Set to 1 if we should ignore '%' and '_' - NOTE + @note For the moment the allow_wildcards argument is not used Should be move to some other file. - RETURN VALUES + @retval 1 String is a number + @retval 0 Error */ @@ -2557,17 +4641,21 @@ bool flush_error_log() (void) my_delete(err_temp, MYF(0)); if (freopen(err_temp,"a+",stdout)) { + int fd; + size_t bytes; + uchar buf[IO_SIZE]; + freopen(err_temp,"a+",stderr); (void) my_delete(err_renamed, MYF(0)); my_rename(log_error_file,err_renamed,MYF(0)); if (freopen(log_error_file,"a+",stdout)) freopen(log_error_file,"a+",stderr); - int fd, bytes; - char buf[IO_SIZE]; + if ((fd = my_open(err_temp, O_RDONLY, MYF(0))) >= 0) { - while ((bytes = (int) my_read(fd, (byte*) buf, IO_SIZE, MYF(0))) > 0) - my_fwrite(stderr, (byte*) buf, bytes, MYF(0)); + while ((bytes= my_read(fd, buf, IO_SIZE, MYF(0))) && + bytes != MY_FILE_ERROR) + my_fwrite(stderr, buf, bytes, MYF(0)); my_close(fd, MYF(0)); } (void) my_delete(err_temp, MYF(0)); @@ -2586,9 +4674,9 @@ bool flush_error_log() return result; } -void MYSQL_LOG::signal_update() +void MYSQL_BIN_LOG::signal_update() { - DBUG_ENTER("MYSQL_LOG::signal_update"); + DBUG_ENTER("MYSQL_BIN_LOG::signal_update"); pthread_cond_broadcast(&update_cond); DBUG_VOID_RETURN; } @@ -2629,24 +4717,21 @@ static void print_buffer_to_nt_eventlog(enum loglevel level, char *buff, #endif /* __NT__ */ -/* +/** Prints a printf style message to the error log and, under NT, to the Windows event log. - SYNOPSIS - vprint_msg_to_log() - event_type Type of event to write (Error, Warning, or Info) - format Printf style format of message - args va_list list of arguments for the message - - NOTE + This function prints the message into a buffer and then sends that buffer + to other functions to write that message to other logging sources. - IMPLEMENTATION - This function prints the message into a buffer and then sends that buffer - to other functions to write that message to other logging sources. + @param event_type Type of event to write (Error, Warning, or Info) + @param format Printf style format of message + @param args va_list list of arguments for the message - RETURN VALUES - void + @returns + The function always returns 0. The return value is present in the + signature to be compatible with other logging routines, which could + return an error (e.g. logging to the log tables) */ #ifndef EMBEDDED_LIBRARY @@ -2660,9 +4745,10 @@ static void print_buffer_to_file(enum loglevel level, const char *buffer) VOID(pthread_mutex_lock(&LOCK_error_log)); - skr=time(NULL); + skr= my_time(0); localtime_r(&skr, &tm_tmp); start=&tm_tmp; + fprintf(stderr, "%02d%02d%02d %2d:%02d:%02d [%s] %s\n", start->tm_year % 100, start->tm_mon+1, @@ -2681,7 +4767,7 @@ static void print_buffer_to_file(enum loglevel level, const char *buffer) } -void vprint_msg_to_log(enum loglevel level, const char *format, va_list args) +int vprint_msg_to_log(enum loglevel level, const char *format, va_list args) { char buff[1024]; size_t length; @@ -2694,7 +4780,7 @@ void vprint_msg_to_log(enum loglevel level, const char *format, va_list args) print_buffer_to_nt_eventlog(level, buff, length, sizeof(buff)); #endif - DBUG_VOID_RETURN; + DBUG_RETURN(0); } #endif /*EMBEDDED_LIBRARY*/ @@ -2705,7 +4791,7 @@ void sql_print_error(const char *format, ...) DBUG_ENTER("sql_print_error"); va_start(args, format); - vprint_msg_to_log(ERROR_LEVEL, format, args); + error_log_print(ERROR_LEVEL, format, args); va_end(args); DBUG_VOID_RETURN; @@ -2718,7 +4804,7 @@ void sql_print_warning(const char *format, ...) DBUG_ENTER("sql_print_warning"); va_start(args, format); - vprint_msg_to_log(WARNING_LEVEL, format, args); + error_log_print(WARNING_LEVEL, format, args); va_end(args); DBUG_VOID_RETURN; @@ -2731,7 +4817,7 @@ void sql_print_information(const char *format, ...) DBUG_ENTER("sql_print_information"); va_start(args, format); - vprint_msg_to_log(INFORMATION_LEVEL, format, args); + error_log_print(INFORMATION_LEVEL, format, args); va_end(args); DBUG_VOID_RETURN; @@ -2888,16 +4974,18 @@ err: return 1; } -/* - there is no active page, let's got one from the pool +/** + there is no active page, let's got one from the pool. - two strategies here: - 1. take the first from the pool - 2. if there're waiters - take the one with the most free space + Two strategies here: + -# take the first from the pool + -# if there're waiters - take the one with the most free space. - TODO page merging. try to allocate adjacent page first, - so that they can be flushed both in one sync + @todo + TODO page merging. try to allocate adjacent page first, + so that they can be flushed both in one sync */ + void TC_LOG_MMAP::get_active_from_pool() { PAGE **p, **best_p=0; @@ -2940,6 +5028,10 @@ void TC_LOG_MMAP::get_active_from_pool() pthread_mutex_unlock(&LOCK_pool); } +/** + @todo + perhaps, increase log size ? +*/ int TC_LOG_MMAP::overflow() { /* @@ -2952,10 +5044,9 @@ int TC_LOG_MMAP::overflow() return 1; // always return 1 } -/* - Record that transaction XID is committed on the persistent storage +/** + Record that transaction XID is committed on the persistent storage. - NOTES This function is called in the middle of two-phase commit: First all resources prepare the transaction, then tc_log->log() is called, then all resources commit the transaction, then tc_log->unlog() is called. @@ -2966,18 +5057,18 @@ int TC_LOG_MMAP::overflow() threads waiting for a page, but then all these threads will be waiting for a fsync() anyway - IMPLEMENTATION If tc_log == MYSQL_LOG then tc_log writes transaction to binlog and records XID in a special Xid_log_event. If tc_log = TC_LOG_MMAP then xid is written in a special memory-mapped log. - RETURN - 0 Error - # "cookie", a number that will be passed as an argument - to unlog() call. tc_log can define it any way it wants, - and use for whatever purposes. TC_LOG_MMAP sets it - to the position in memory where xid was logged to. + @retval + 0 - error + @retval + \# - otherwise, "cookie", a number that will be passed as an argument + to unlog() call. tc_log can define it any way it wants, + and use for whatever purposes. TC_LOG_MMAP sets it + to the position in memory where xid was logged to. */ int TC_LOG_MMAP::log_xid(THD *thd, my_xid xid) @@ -3085,9 +5176,9 @@ int TC_LOG_MMAP::sync() return err; } -/* +/** erase xid from the page, update page free space counters/pointers. - cookie points directly to the memory where xid was logged + cookie points directly to the memory where xid was logged. */ void TC_LOG_MMAP::unlog(ulong cookie, my_xid xid) @@ -3130,9 +5221,9 @@ void TC_LOG_MMAP::close() pthread_cond_destroy(&pages[i].cond); } case 3: - my_free((gptr)pages, MYF(0)); + my_free((uchar*)pages, MYF(0)); case 2: - my_munmap((byte*)data, (size_t)file_length); + my_munmap((char*)data, (size_t)file_length); case 1: my_close(fd, MYF(0)); } @@ -3166,13 +5257,13 @@ int TC_LOG_MMAP::recover() } if (hash_init(&xids, &my_charset_bin, tc_log_page_size/3, 0, - sizeof(my_xid), 0, 0, MYF(0))) + sizeof(my_xid), 0, 0, MYF(0))) goto err1; for ( ; p < end_p ; p++) { for (my_xid *x=p->start; x < p->end; x++) - if (*x && my_hash_insert(&xids, (byte *)x)) + if (*x && my_hash_insert(&xids, (uchar *)x)) goto err2; // OOM } @@ -3198,16 +5289,17 @@ TC_LOG *tc_log; TC_LOG_DUMMY tc_log_dummy; TC_LOG_MMAP tc_log_mmap; -/* - Perform heuristic recovery, if --tc-heuristic-recover was used - - RETURN VALUE - 0 no heuristic recovery was requested - 1 heuristic recovery was performed +/** + Perform heuristic recovery, if --tc-heuristic-recover was used. - NOTE + @note no matter whether heuristic recovery was successful or not mysqld must exit. So, return value is the same in both cases. + + @retval + 0 no heuristic recovery was requested + @retval + 1 heuristic recovery was performed */ int TC_LOG::using_heuristic_recover() @@ -3223,10 +5315,11 @@ int TC_LOG::using_heuristic_recover() } /****** transaction coordinator log for 2pc - binlog() based solution ******/ -#define TC_LOG_BINLOG MYSQL_LOG +#define TC_LOG_BINLOG MYSQL_BIN_LOG -/* - TODO keep in-memory list of prepared transactions +/** + @todo + keep in-memory list of prepared transactions (add to list in log(), remove on unlog()) and copy it to the new binlog if rotated but let's check the behaviour of tc_log_page_waits first! @@ -3317,7 +5410,7 @@ err: return error; } -/* this is called on shutdown, after ha_panic */ +/** This is called on shutdown, after ha_panic. */ void TC_LOG_BINLOG::close() { DBUG_ASSERT(prepared_xids==0); @@ -3325,25 +5418,36 @@ void TC_LOG_BINLOG::close() pthread_cond_destroy (&COND_prep_xids); } -/* - TODO group commit +/** + @todo + group commit - RETURN - 0 - error - 1 - success + @retval + 0 error + @retval + 1 success */ int TC_LOG_BINLOG::log_xid(THD *thd, my_xid xid) { + DBUG_ENTER("TC_LOG_BINLOG::log"); Xid_log_event xle(thd, xid); - IO_CACHE *trans_log= (IO_CACHE*)thd->ha_data[binlog_hton.slot]; - return !binlog_end_trans(thd, trans_log, &xle); // invert return value + binlog_trx_data *trx_data= + (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton); + /* + We always commit the entire transaction when writing an XID. Also + note that the return value is inverted. + */ + DBUG_RETURN(!binlog_end_trans(thd, trx_data, &xle, TRUE)); } void TC_LOG_BINLOG::unlog(ulong cookie, my_xid xid) { pthread_mutex_lock(&LOCK_prep_xids); - if (--prepared_xids == 0) + DBUG_ASSERT(prepared_xids > 0); + if (--prepared_xids == 0) { + DBUG_PRINT("info", ("prepared_xids=%lu", prepared_xids)); pthread_cond_signal(&COND_prep_xids); + } pthread_mutex_unlock(&LOCK_prep_xids); rotate_and_purge(0); // as ::write() did not rotate } @@ -3356,7 +5460,7 @@ int TC_LOG_BINLOG::recover(IO_CACHE *log, Format_description_log_event *fdle) if (! fdle->is_valid() || hash_init(&xids, &my_charset_bin, TC_LOG_PAGE_SIZE/3, 0, - sizeof(my_xid), 0, 0, MYF(0))) + sizeof(my_xid), 0, 0, MYF(0))) goto err1; init_alloc_root(&mem_root, TC_LOG_PAGE_SIZE, TC_LOG_PAGE_SIZE); @@ -3368,8 +5472,8 @@ int TC_LOG_BINLOG::recover(IO_CACHE *log, Format_description_log_event *fdle) if (ev->get_type_code() == XID_EVENT) { Xid_log_event *xev=(Xid_log_event *)ev; - byte *x=(byte *)memdup_root(&mem_root, (char *)& xev->xid, - sizeof(xev->xid)); + uchar *x= (uchar *) memdup_root(&mem_root, (uchar*) &xev->xid, + sizeof(xev->xid)); if (! x) goto err2; my_hash_insert(&xids, x); @@ -3395,3 +5499,45 @@ err1: return 1; } + +#ifdef INNODB_COMPATIBILITY_HOOKS +/** + Get the file name of the MySQL binlog. + @return the name of the binlog file +*/ +extern "C" +const char* mysql_bin_log_file_name(void) +{ + return mysql_bin_log.get_log_fname(); +} +/** + Get the current position of the MySQL binlog. + @return byte offset from the beginning of the binlog +*/ +extern "C" +ulonglong mysql_bin_log_file_pos(void) +{ + return (ulonglong) mysql_bin_log.get_log_file()->pos_in_file; +} +#endif /* INNODB_COMPATIBILITY_HOOKS */ + + +struct st_mysql_storage_engine binlog_storage_engine= +{ MYSQL_HANDLERTON_INTERFACE_VERSION }; + +mysql_declare_plugin(binlog) +{ + MYSQL_STORAGE_ENGINE_PLUGIN, + &binlog_storage_engine, + "binlog", + "MySQL AB", + "This is a pseudo storage engine to represent the binlog in a transaction", + PLUGIN_LICENSE_GPL, + binlog_init, /* Plugin Init */ + NULL, /* Plugin Deinit */ + 0x0100 /* 1.0 */, + NULL, /* status variables */ + NULL, /* system variables */ + NULL /* config options */ +} +mysql_declare_plugin_end; |