diff options
author | Jan Lindström <jan.lindstrom@skysql.com> | 2014-08-06 15:39:15 +0300 |
---|---|---|
committer | Jan Lindström <jan.lindstrom@skysql.com> | 2014-08-26 15:43:46 +0300 |
commit | df4dd593f29aec8e2116aec1775ad4b8833d8c93 (patch) | |
tree | becae67f02054e15ead58e929e91c044f0b7aa15 /sql | |
parent | e974b564389af8251c2ba51060e6129e45431586 (diff) | |
download | mariadb-git-df4dd593f29aec8e2116aec1775ad4b8833d8c93.tar.gz |
MDEV-6247: Merge 10.0-galera to 10.1.
Merged lp:maria/maria-10.0-galera up to revision 3879.
Added a new functions to handler API to forcefully abort_transaction,
producing fake_trx_id, get_checkpoint and set_checkpoint for XA. These
were added for future possiblity to add more storage engines that
could use galera replication.
Diffstat (limited to 'sql')
70 files changed, 10782 insertions, 159 deletions
diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt index 1eb8dc7cdd6..803e14988d2 100644 --- a/sql/CMakeLists.txt +++ b/sql/CMakeLists.txt @@ -13,6 +13,25 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +IF(WITH_WSREP AND NOT EMBEDDED_LIBRARY) + BUILD_WITH_WSREP() + SET(WSREP_INCLUDES ${CMAKE_SOURCE_DIR}/wsrep) + SET(WSREP_SOURCES + wsrep_check_opts.cc + wsrep_hton.cc + wsrep_mysqld.cc + wsrep_notify.cc + wsrep_sst.cc + wsrep_utils.cc + wsrep_var.cc + wsrep_binlog.cc + wsrep_applier.cc + wsrep_thd.cc + ) + SET(WSREP_LIB wsrep) +ENDIF() + INCLUDE_DIRECTORIES( ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/sql @@ -20,6 +39,7 @@ ${PCRE_INCLUDES} ${ZLIB_INCLUDE_DIR} ${SSL_INCLUDE_DIRS} ${CMAKE_BINARY_DIR}/sql +${WSREP_INCLUDES} ) SET(GEN_SOURCES @@ -91,6 +111,7 @@ SET (SQL_SOURCE ../sql-common/mysql_async.c my_apc.cc my_apc.h rpl_gtid.cc rpl_parallel.cc + ${WSREP_SOURCES} table_cache.cc ${CMAKE_CURRENT_BINARY_DIR}/sql_builtin.cc ${GEN_SOURCES} @@ -112,6 +133,7 @@ DTRACE_INSTRUMENT(sql) TARGET_LINK_LIBRARIES(sql ${MYSQLD_STATIC_PLUGIN_LIBS} mysys mysys_ssl dbug strings vio pcre ${LIBJEMALLOC} ${LIBWRAP} ${LIBCRYPT} ${LIBDL} ${CMAKE_THREAD_LIBS_INIT} + ${WSREP_LIB} ${SSL_LIBRARIES}) IF(WIN32) diff --git a/sql/event_data_objects.cc b/sql/event_data_objects.cc index d65180a60be..26d2e1ef985 100644 --- a/sql/event_data_objects.cc +++ b/sql/event_data_objects.cc @@ -1472,8 +1472,25 @@ end: bool save_tx_read_only= thd->tx_read_only; thd->tx_read_only= false; +#ifdef WITH_WSREP + if (WSREP(thd)) + { + // sql_print_information("sizeof(LEX) = %d", sizeof(struct LEX)); + // sizeof(LEX) = 4512, so it's relatively safe to allocate it on stack. + LEX lex; + LEX* saved = thd->lex; + lex.sql_command = SQLCOM_DROP_EVENT; + thd->lex = &lex; + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL); + thd->lex = saved; + } +#endif + ret= Events::drop_event(thd, dbname, name, FALSE); + WSREP_TO_ISOLATION_END; + + error: thd->tx_read_only= save_tx_read_only; thd->security_ctx->master_access= saved_master_access; } diff --git a/sql/events.cc b/sql/events.cc index 63627b21777..725a7613e69 100644 --- a/sql/events.cc +++ b/sql/events.cc @@ -1128,7 +1128,20 @@ Events::load_events_from_db(THD *thd) delete et; goto end; } - +#ifdef WITH_WSREP + // when SST from master node who initials event, the event status is ENABLED + // this is problematic because there are two nodes with same events and both enabled. + if (WSREP(thd) && et->originator != thd->variables.server_id) + { + store_record(table, record[1]); + table->field[ET_FIELD_STATUS]-> + store((longlong) Event_parse_data::SLAVESIDE_DISABLED, + TRUE); + (void) table->file->ha_update_row(table->record[1], table->record[0]); + delete et; + continue; + } +#endif /** Since the Event_queue_element object could be deleted inside Event_queue::create_event we should save the value of dropped flag @@ -1174,6 +1187,47 @@ end: DBUG_RETURN(ret); } +#ifdef WITH_WSREP +int wsrep_create_event_query(THD *thd, uchar** buf, size_t* buf_len) +{ + char buffer[1024]; + String log_query(buffer, sizeof(buffer), &my_charset_bin); + + if (create_query_string(thd, &log_query)) + { + WSREP_WARN("events create string failed: %s", thd->query()); + return 1; + } + return wsrep_to_buf_helper(thd, log_query.ptr(), log_query.length(), buf, buf_len); +} +static int +wsrep_alter_query_string(THD *thd, String *buf) +{ + /* Append the "ALTER" part of the query */ + if (buf->append(STRING_WITH_LEN("ALTER "))) + return 1; + /* Append definer */ + append_definer(thd, buf, &(thd->lex->definer->user), &(thd->lex->definer->host)); + /* Append the left part of thd->query after event name part */ + if (buf->append(thd->lex->stmt_definition_begin, + thd->lex->stmt_definition_end - + thd->lex->stmt_definition_begin)) + return 1; + + return 0; +} +int wsrep_alter_event_query(THD *thd, uchar** buf, size_t* buf_len) +{ + String log_query; + + if (wsrep_alter_query_string(thd, &log_query)) + { + WSREP_WARN("events alter string failed: %s", thd->query()); + return 1; + } + return wsrep_to_buf_helper(thd, log_query.ptr(), log_query.length(), buf, buf_len); +} +#endif /* WITH_WSREP */ /** @} (End of group Event_Scheduler) */ diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc index f503b2f54e5..23af3825756 100644 --- a/sql/ha_partition.cc +++ b/sql/ha_partition.cc @@ -388,6 +388,15 @@ const char *ha_partition::table_type() const } +#if defined(WITH_WSREP) && !defined(EMBEDDED_LIBRARY) +int ha_partition::wsrep_db_type() const +{ + // we can do this since we only support a single engine type + return partition_ht()->db_type; +} +#endif /* WITH_WSREP && !EMBEDDED_LIBRARY */ + + /* Destructor method diff --git a/sql/ha_partition.h b/sql/ha_partition.h index 71ae84b06a0..c3c72394374 100644 --- a/sql/ha_partition.h +++ b/sql/ha_partition.h @@ -1282,7 +1282,9 @@ public: DBUG_ASSERT(h == m_file[i]->ht); return h; } - +#if defined(WITH_WSREP) && !defined(EMBEDDED_LIBRARY) + virtual int wsrep_db_type() const; +#endif /* WITH_WSREP && !EMBEDDED_LIBRARY */ friend int cmp_key_rowid_part_id(void *ptr, uchar *ref1, uchar *ref2); }; diff --git a/sql/handler.cc b/sql/handler.cc index 56e7da6430d..63ddb9c6d6e 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -50,6 +50,9 @@ #include "../storage/maria/ha_maria.h" #endif +#include "wsrep_mysqld.h" +#include "wsrep.h" + /* While we have legacy_db_type, we have this array to check for dups and to find handlerton from legacy_db_type. @@ -1165,10 +1168,25 @@ int ha_prepare(THD *thd) { if ((err= ht->prepare(ht, thd, all))) { - my_error(ER_ERROR_DURING_COMMIT, MYF(0), err); - ha_rollback_trans(thd, all); - error=1; - break; +#ifdef WITH_WSREP + if (ht == wsrep_hton) + { + error= 1; + /* avoid sending error, if we need to replay */ + if (thd->wsrep_conflict_state!= MUST_REPLAY) + { + my_error(ER_LOCK_DEADLOCK, MYF(0), err); + } + } + else +#endif + { + /* not wsrep hton, bail to native mysql behavior */ + my_error(ER_ERROR_DURING_COMMIT, MYF(0), err); + ha_rollback_trans(thd, all); + error=1; + break; + } } } else @@ -1366,8 +1384,9 @@ int ha_commit_trans(THD *thd, bool all) mdl_request.init(MDL_key::COMMIT, "", "", MDL_INTENTION_EXCLUSIVE, MDL_EXPLICIT); - if (thd->mdl_context.acquire_lock(&mdl_request, - thd->variables.lock_wait_timeout)) + if (IF_WSREP(!WSREP(thd),1) && + thd->mdl_context.acquire_lock(&mdl_request, + thd->variables.lock_wait_timeout)) { ha_rollback_trans(thd, all); thd->wakeup_subsequent_commits(1); @@ -1414,8 +1433,28 @@ int ha_commit_trans(THD *thd, bool all) err= ht->prepare(ht, thd, all); status_var_increment(thd->status_var.ha_prepare_count); if (err) - my_error(ER_ERROR_DURING_COMMIT, MYF(0), err); - + { +#ifdef WITH_WSREP + if (ht == wsrep_hton) + { + switch (err) { + case WSREP_TRX_SIZE_EXCEEDED: + /* give user size exeeded error from wsrep_api.h */ + my_error(ER_ERROR_DURING_COMMIT, MYF(0), WSREP_SIZE_EXCEEDED); + break; + case WSREP_TRX_CERT_FAIL: + case WSREP_TRX_ERROR: + /* avoid sending error, if we need to replay */ + if (thd->wsrep_conflict_state!= MUST_REPLAY) + { + my_error(ER_LOCK_DEADLOCK, MYF(0), err); + } + } + goto err; + } +#endif /* WITH_WSREP */ + my_error(ER_ERROR_DURING_COMMIT, MYF(0), err); + } if (err) goto err; @@ -1425,6 +1464,14 @@ int ha_commit_trans(THD *thd, bool all) DEBUG_SYNC(thd, "ha_commit_trans_after_prepare"); DBUG_EXECUTE_IF("crash_commit_after_prepare", DBUG_SUICIDE();); +#ifdef WITH_WSREP + if (!error && WSREP_ON && wsrep_is_wsrep_xid(&thd->transaction.xid_state.xid)) + { + // xid was rewritten by wsrep + xid= wsrep_xid_seqno(&thd->transaction.xid_state.xid); + } +#endif // WITH_WSREP + if (!is_real_trans) { error= commit_one_phase_2(thd, all, trans, is_real_trans); @@ -1801,7 +1848,10 @@ static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin, got, hton_name(hton)->str); for (int i=0; i < got; i ++) { - my_xid x=info->list[i].get_my_xid(); + my_xid x= IF_WSREP(WSREP_ON && wsrep_is_wsrep_xid(&info->list[i]) ? + wsrep_xid_seqno(&info->list[i]) : + info->list[i].get_my_xid(), + info->list[i].get_my_xid()); if (!x) // not "mine" - that is generated by external TM { #ifndef DBUG_OFF @@ -3086,10 +3136,12 @@ int handler::update_auto_increment() variables->auto_increment_increment); auto_inc_intervals_count++; /* Row-based replication does not need to store intervals in binlog */ - if (mysql_bin_log.is_open() && !thd->is_current_stmt_binlog_format_row()) - thd->auto_inc_intervals_in_cur_stmt_for_binlog.append(auto_inc_interval_for_cur_row.minimum(), - auto_inc_interval_for_cur_row.values(), - variables->auto_increment_increment); + if (IF_WSREP(((WSREP(thd) && wsrep_emulate_bin_log ) || mysql_bin_log.is_open()), mysql_bin_log.is_open()) + && !thd->is_current_stmt_binlog_format_row()) + thd->auto_inc_intervals_in_cur_stmt_for_binlog. + append(auto_inc_interval_for_cur_row.minimum(), + auto_inc_interval_for_cur_row.values(), + variables->auto_increment_increment); } /* @@ -5707,7 +5759,10 @@ static bool check_table_binlog_row_based(THD *thd, TABLE *table) return (thd->is_current_stmt_binlog_format_row() && table->s->cached_row_logging_check && (thd->variables.option_bits & OPTION_BIN_LOG) && - mysql_bin_log.is_open()); + /* applier and replayer should not binlog */ + (IF_WSREP((WSREP_EMULATE_BINLOG(thd) && + (thd->wsrep_exec_mode != REPL_RECV)) || + mysql_bin_log.is_open(), mysql_bin_log.is_open()))); } @@ -5807,6 +5862,12 @@ static int binlog_log_row(TABLE* table, bool error= 0; THD *const thd= table->in_use; +#ifdef WITH_WSREP + /* only InnoDB tables will be replicated through binlog emulation */ + if (WSREP_EMULATE_BINLOG(thd) && + table->file->partition_ht()->db_type != DB_TYPE_INNODB) + return 0; +#endif /* WITH_WSREP */ if (check_table_binlog_row_based(thd, table)) { MY_BITMAP cols; @@ -6136,6 +6197,76 @@ void handler::set_lock_type(enum thr_lock_type lock) table->reginfo.lock_type= lock; } +#ifdef WITH_WSREP +/** + @details + This function makes the storage engine to force the victim transaction + to abort. Currently, only innodb has this functionality, but any SE + implementing the wsrep API should provide this service to support + multi-master operation. + + @param bf_thd brute force THD asking for the abort + @param victim_thd victim THD to be aborted + + @return + always 0 +*/ + +int ha_abort_transaction(THD *bf_thd, THD *victim_thd, my_bool signal) +{ + DBUG_ENTER("ha_abort_transaction"); + if (!WSREP(bf_thd) && + !(wsrep_OSU_method_options == WSREP_OSU_RSU && + bf_thd->wsrep_exec_mode == TOTAL_ORDER)) { + DBUG_RETURN(0); + } + + THD_TRANS *trans= &victim_thd->transaction.all; + Ha_trx_info *ha_info= trans->ha_list, *ha_info_next; + + for (; ha_info; ha_info= ha_info_next) + { + handlerton *hton= ha_info->ht(); + if (!hton->abort_transaction) + { + WSREP_WARN("cannot abort transaction"); + } + else + hton->abort_transaction(hton, bf_thd, victim_thd, signal); + ha_info_next= ha_info->next(); + ha_info->reset(); /* keep it conveniently zero-filled */ + } + DBUG_RETURN(0); +} + +void ha_fake_trx_id(THD *thd) +{ + DBUG_ENTER("ha_fake_trx_id"); + if (!WSREP(thd)) + { + DBUG_VOID_RETURN; + } + + THD_TRANS *trans= &thd->transaction.all; + Ha_trx_info *ha_info= trans->ha_list, *ha_info_next; + + for (; ha_info; ha_info= ha_info_next) + { + handlerton *hton= ha_info->ht(); + if (!hton->fake_trx_id) + { + WSREP_WARN("cannot get fake InnoDB transaction ID"); + } + else + hton->fake_trx_id(hton, thd); + ha_info_next= ha_info->next(); + ha_info->reset(); /* keep it conveniently zero-filled */ + } + DBUG_VOID_RETURN; +} +#endif /* WITH_WSREP */ + + #ifdef TRANS_LOG_MGM_EXAMPLE_CODE /* Example of transaction log management functions based on assumption that logs diff --git a/sql/handler.h b/sql/handler.h index 1e4fe5557b6..169ed209403 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -1230,6 +1230,11 @@ struct handlerton enum handler_create_iterator_result (*create_iterator)(handlerton *hton, enum handler_iterator_type type, struct handler_iterator *fill_this_in); + int (*abort_transaction)(handlerton *hton, THD *bf_thd, + THD *victim_thd, my_bool signal); + int (*set_checkpoint)(handlerton *hton, const XID* xid); + int (*get_checkpoint)(handlerton *hton, XID* xid); + void (*fake_trx_id)(handlerton *hton, THD *thd); /* Optional clauses in the CREATE/ALTER TABLE */ @@ -3962,6 +3967,9 @@ bool key_uses_partial_cols(TABLE_SHARE *table, uint keyno); extern const char *ha_row_type[]; extern MYSQL_PLUGIN_IMPORT const char *tx_isolation_names[]; extern MYSQL_PLUGIN_IMPORT const char *binlog_format_names[]; +#ifdef WITH_WSREP +extern MYSQL_PLUGIN_IMPORT const char *wsrep_binlog_format_names[]; +#endif /* WITH_WSREP */ extern TYPELIB tx_isolation_typelib; extern const char *myisam_stats_method_names[]; extern ulong total_ha, total_ha_2pc; @@ -4080,6 +4088,10 @@ int ha_rollback_to_savepoint(THD *thd, SAVEPOINT *sv); bool ha_rollback_to_savepoint_can_release_mdl(THD *thd); int ha_savepoint(THD *thd, SAVEPOINT *sv); int ha_release_savepoint(THD *thd, SAVEPOINT *sv); +#ifdef WITH_WSREP +int ha_abort_transaction(THD *bf_thd, THD *victim_thd, my_bool signal); +void ha_fake_trx_id(THD *thd); +#endif /* WITH_WSREP */ /* these are called by storage engines */ void trans_register_ha(THD *thd, bool all, handlerton *ht); @@ -4110,6 +4122,9 @@ int ha_binlog_end(THD *thd); #define ha_binlog_wait(a) do {} while (0) #define ha_binlog_end(a) do {} while (0) #endif +#ifdef WITH_WSREP +void wsrep_brute_force_aborts(); +#endif const char *get_canonical_filename(handler *file, const char *path, char *tmp_path); diff --git a/sql/item_func.cc b/sql/item_func.cc index ccb7ec56021..13803cff2d6 100644 --- a/sql/item_func.cc +++ b/sql/item_func.cc @@ -2769,7 +2769,19 @@ void Item_func_rand::seed_random(Item *arg) TODO: do not do reinit 'rand' for every execute of PS/SP if args[0] is a constant. */ - uint32 tmp= (uint32) arg->val_int(); + uint32 tmp; +#ifdef WITH_WSREP + if (WSREP(current_thd)) + { + if (current_thd->wsrep_exec_mode==REPL_RECV) + tmp= current_thd->wsrep_rand; + else + tmp= current_thd->wsrep_rand= (uint32) arg->val_int(); + } + else +#endif /* WITH_WSREP */ + tmp= (uint32) arg->val_int(); + my_rnd_init(rand, (uint32) (tmp*0x10001L+55555555L), (uint32) (tmp*0x10000001L)); } diff --git a/sql/lock.cc b/sql/lock.cc index 54c7720e750..a74a12c41c3 100644 --- a/sql/lock.cc +++ b/sql/lock.cc @@ -83,6 +83,7 @@ #include "sql_acl.h" // SUPER_ACL #include <hash.h> #include <assert.h> +#include "wsrep_mysqld.h" /** @defgroup Locking Locking @@ -314,6 +315,7 @@ bool mysql_lock_tables(THD *thd, MYSQL_LOCK *sql_lock, uint flags) /* Copy the lock data array. thr_multi_lock() reorders its contents. */ memmove(sql_lock->locks + sql_lock->lock_count, sql_lock->locks, sql_lock->lock_count * sizeof(*sql_lock->locks)); + /* Lock on the copied half of the lock data array. */ rc= thr_lock_errno_to_mysql[(int) thr_multi_lock(sql_lock->locks + sql_lock->lock_count, @@ -329,7 +331,10 @@ end: { thd->send_kill_message(); if (!rc) + { mysql_unlock_tables(thd, sql_lock, 0); + THD_STAGE_INFO(thd, stage_after_table_lock); + } rc= 1; } else if (rc > 1) @@ -380,6 +385,8 @@ static int lock_external(THD *thd, TABLE **tables, uint count) void mysql_unlock_tables(THD *thd, MYSQL_LOCK *sql_lock, bool free_lock) { DBUG_ENTER("mysql_unlock_tables"); + THD_STAGE_INFO(thd, stage_unlocking_tables); + if (sql_lock->table_count) unlock_external(thd, sql_lock->table, sql_lock->table_count); if (sql_lock->lock_count) @@ -1052,6 +1059,13 @@ void Global_read_lock::unlock_global_read_lock(THD *thd) { thd->mdl_context.release_lock(m_mdl_blocks_commits_lock); m_mdl_blocks_commits_lock= NULL; +#ifdef WITH_WSREP + if (WSREP_ON) + { + wsrep_locked_seqno= WSREP_SEQNO_UNDEFINED; + wsrep->resume(wsrep); + } +#endif /* WITH_WSREP */ } thd->mdl_context.release_lock(m_mdl_global_shared_lock); m_mdl_global_shared_lock= NULL; @@ -1084,9 +1098,22 @@ bool Global_read_lock::make_global_read_lock_block_commit(THD *thd) If we didn't succeed lock_global_read_lock(), or if we already suceeded make_global_read_lock_block_commit(), do nothing. */ + if (m_state != GRL_ACQUIRED) DBUG_RETURN(0); +#ifdef WITH_WSREP + if (WSREP_ON && m_mdl_blocks_commits_lock) + { + WSREP_DEBUG("GRL was in block commit mode when entering " + "make_global_read_lock_block_commit"); + thd->mdl_context.release_lock(m_mdl_blocks_commits_lock); + m_mdl_blocks_commits_lock= NULL; + wsrep_locked_seqno= WSREP_SEQNO_UNDEFINED; + wsrep->resume(wsrep); + } +#endif /* WITH_WSREP */ + mdl_request.init(MDL_key::COMMIT, "", "", MDL_SHARED, MDL_EXPLICIT); if (thd->mdl_context.acquire_lock(&mdl_request, @@ -1096,6 +1123,25 @@ bool Global_read_lock::make_global_read_lock_block_commit(THD *thd) m_mdl_blocks_commits_lock= mdl_request.ticket; m_state= GRL_ACQUIRED_AND_BLOCKS_COMMIT; +#ifdef WITH_WSREP + if (WSREP_ON) + { + long long ret = wsrep->pause(wsrep); + if (ret >= 0) + { + wsrep_locked_seqno= ret; + } + else if (ret != -ENOSYS) /* -ENOSYS - no provider */ + { + WSREP_ERROR("Failed to pause provider: %lld (%s)", -ret, strerror(-ret)); + + DBUG_ASSERT(m_mdl_blocks_commits_lock == NULL); + wsrep_locked_seqno= WSREP_SEQNO_UNDEFINED; + my_error(ER_LOCK_DEADLOCK, MYF(0)); + DBUG_RETURN(TRUE); + } + } +#endif /* WITH_WSREP */ DBUG_RETURN(FALSE); } diff --git a/sql/log.cc b/sql/log.cc index 75a895e25f8..371f9c06cce 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -52,9 +52,11 @@ #include "sql_plugin.h" #include "rpl_handler.h" +#include "wsrep_mysqld.h" #include "debug_sync.h" #include "sql_show.h" #include "my_pthread.h" +#include "wsrep_mysqld.h" /* max size of the log message */ #define MAX_LOG_BUFFER_SIZE 1024 @@ -63,6 +65,7 @@ #define FLAGSTR(V,F) ((V)&(F)?#F" ":"") +handlerton *binlog_hton; LOGGER logger; MYSQL_BIN_LOG mysql_bin_log(&sync_binlog_period); @@ -511,8 +514,6 @@ private: binlog_cache_mngr(const binlog_cache_mngr& info); }; -handlerton *binlog_hton; - bool LOGGER::is_log_table_enabled(uint log_table_type) { switch (log_table_type) { @@ -526,7 +527,6 @@ bool LOGGER::is_log_table_enabled(uint log_table_type) } } - /** Check if a given table is opened log table @@ -1577,7 +1577,8 @@ binlog_trans_log_savepos(THD *thd, my_off_t *pos) DBUG_ENTER("binlog_trans_log_savepos"); DBUG_ASSERT(pos != NULL); binlog_cache_mngr *const cache_mngr= thd->binlog_setup_trx_data(); - DBUG_ASSERT(mysql_bin_log.is_open()); + DBUG_ASSERT_IF_WSREP(((WSREP(thd) && wsrep_emulate_bin_log)) || mysql_bin_log.is_open()); + DBUG_ASSERT(IF_WSREP(1, mysql_bin_log.is_open())); *pos= cache_mngr->trx_cache.get_byte_position(); DBUG_PRINT("return", ("*pos: %lu", (ulong) *pos)); DBUG_VOID_RETURN; @@ -1625,7 +1626,8 @@ binlog_trans_log_truncate(THD *thd, my_off_t pos) int binlog_init(void *p) { binlog_hton= (handlerton *)p; - binlog_hton->state=opt_bin_log ? SHOW_OPTION_YES : SHOW_OPTION_NO; + binlog_hton->state=IF_WSREP(WSREP_ON || opt_bin_log, opt_bin_log) ? + SHOW_OPTION_YES : SHOW_OPTION_NO; binlog_hton->db_type=DB_TYPE_BINLOG; binlog_hton->savepoint_offset= sizeof(my_off_t); binlog_hton->close_connection= binlog_close_connection; @@ -1745,6 +1747,16 @@ binlog_commit_flush_stmt_cache(THD *thd, bool all, binlog_cache_mngr *cache_mngr) { DBUG_ENTER("binlog_commit_flush_stmt_cache"); +#ifdef WITH_WSREP + if (thd->wsrep_mysql_replicated > 0) + { + DBUG_ASSERT(WSREP_ON); + WSREP_DEBUG("avoiding binlog_commit_flush_trx_cache: %d", + thd->wsrep_mysql_replicated); + return 0; + } +#endif + Query_log_event end_evt(thd, STRING_WITH_LEN("COMMIT"), FALSE, TRUE, TRUE, 0); DBUG_RETURN(binlog_flush_cache(thd, cache_mngr, &end_evt, all, TRUE, FALSE)); @@ -1900,12 +1912,12 @@ static bool trans_cannot_safely_rollback(THD *thd, bool all) return ((thd->variables.option_bits & OPTION_KEEP_LOG) || (trans_has_updated_non_trans_table(thd) && - thd->variables.binlog_format == BINLOG_FORMAT_STMT) || + WSREP_FORMAT(thd->variables.binlog_format) == BINLOG_FORMAT_STMT) || (cache_mngr->trx_cache.changes_to_non_trans_temp_table() && - thd->variables.binlog_format == BINLOG_FORMAT_MIXED) || + WSREP_FORMAT(thd->variables.binlog_format) == BINLOG_FORMAT_MIXED) || (trans_has_updated_non_trans_table(thd) && ending_single_stmt_trans(thd,all) && - thd->variables.binlog_format == BINLOG_FORMAT_MIXED)); + WSREP_FORMAT(thd->variables.binlog_format) == BINLOG_FORMAT_MIXED)); } @@ -1927,6 +1939,13 @@ static int binlog_commit(handlerton *hton, THD *thd, bool all) DBUG_ENTER("binlog_commit"); binlog_cache_mngr *const cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); +#ifdef WITH_WSREP + if (!cache_mngr) + { + DBUG_ASSERT(WSREP(thd)); + DBUG_RETURN(0); + } +#endif /* WITH_WSREP */ DBUG_PRINT("debug", ("all: %d, in_transaction: %s, all.modified_non_trans_table: %s, stmt.modified_non_trans_table: %s", @@ -1983,6 +2002,14 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all) int error= 0; binlog_cache_mngr *const cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); +#ifdef WITH_WSREP + if (!cache_mngr) + { + DBUG_ASSERT(WSREP(thd)); + DBUG_RETURN(0); + } + +#endif /* WITH_WSREP */ DBUG_PRINT("debug", ("all: %s, all.modified_non_trans_table: %s, stmt.modified_non_trans_table: %s", YESNO(all), @@ -2011,8 +2038,8 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all) cache_mngr->reset(false, true); DBUG_RETURN(error); } - - if (mysql_bin_log.check_write_error(thd)) + if (IF_WSREP(!wsrep_emulate_bin_log,1) && + mysql_bin_log.check_write_error(thd)) { /* "all == true" means that a "rollback statement" triggered the error and @@ -2043,9 +2070,9 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all) else if (ending_trans(thd, all) || (!(thd->variables.option_bits & OPTION_KEEP_LOG) && (!stmt_has_updated_non_trans_table(thd) || - thd->variables.binlog_format != BINLOG_FORMAT_STMT) && + WSREP_FORMAT(thd->variables.binlog_format) != BINLOG_FORMAT_STMT) && (!cache_mngr->trx_cache.changes_to_non_trans_temp_table() || - thd->variables.binlog_format != BINLOG_FORMAT_MIXED))) + WSREP_FORMAT(thd->variables.binlog_format) != BINLOG_FORMAT_MIXED))) error= binlog_truncate_trx_cache(thd, cache_mngr, all); } @@ -2150,8 +2177,13 @@ bool MYSQL_BIN_LOG::check_write_error(THD *thd) static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv) { - DBUG_ENTER("binlog_savepoint_set"); int error= 1; + DBUG_ENTER("binlog_savepoint_set"); + +#ifdef WITH_WSREP + if (wsrep_emulate_bin_log) + DBUG_RETURN(0); +#endif /* WITH_WSREP */ char buf[1024]; String log_query(buf, sizeof(buf), &my_charset_bin); @@ -2190,7 +2222,8 @@ static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv) non-transactional table. Otherwise, truncate the binlog cache starting from the SAVEPOINT command. */ - if (unlikely(trans_has_updated_non_trans_table(thd) || + if (IF_WSREP(!wsrep_emulate_bin_log, 1) && + unlikely(trans_has_updated_non_trans_table(thd) || (thd->variables.option_bits & OPTION_KEEP_LOG))) { char buf[1024]; @@ -2204,7 +2237,10 @@ static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv) TRUE, FALSE, TRUE, errcode); DBUG_RETURN(mysql_bin_log.write(&qinfo)); } - binlog_trans_log_truncate(thd, *(my_off_t*)sv); + + if (IF_WSREP(!wsrep_emulate_bin_log, 1)) + binlog_trans_log_truncate(thd, *(my_off_t*)sv); + DBUG_RETURN(0); } @@ -5332,7 +5368,8 @@ int THD::binlog_write_table_map(TABLE *table, bool is_transactional, is_transactional= 1; /* Pre-conditions */ - DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open()); + DBUG_ASSERT(is_current_stmt_binlog_format_row()); + DBUG_ASSERT(IF_WSREP(WSREP_EMULATE_BINLOG(this), 0) || mysql_bin_log.is_open()); DBUG_ASSERT(table->s->table_map_id != ULONG_MAX); Table_map_log_event @@ -5465,7 +5502,7 @@ MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd, bool is_transactional) { DBUG_ENTER("MYSQL_BIN_LOG::flush_and_set_pending_rows_event(event)"); - DBUG_ASSERT(mysql_bin_log.is_open()); + DBUG_ASSERT(IF_WSREP(WSREP_EMULATE_BINLOG(thd),0) || mysql_bin_log.is_open()); DBUG_PRINT("enter", ("event: 0x%lx", (long) event)); int error= 0; @@ -5791,7 +5828,8 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info, my_bool *with_annotate) mostly called if is_open() *was* true a few instructions before, but it could have changed since. */ - if (likely(is_open())) + /* applier and replayer can skip writing binlog events */ + if (IF_WSREP((WSREP_EMULATE_BINLOG(thd) && (thd->wsrep_exec_mode != REPL_RECV)) || is_open(), likely(is_open()))) { my_off_t UNINIT_VAR(my_org_b_tell); #ifdef HAVE_REPLICATION @@ -6130,6 +6168,17 @@ int MYSQL_BIN_LOG::rotate(bool force_rotate, bool* check_purge) int error= 0; DBUG_ENTER("MYSQL_BIN_LOG::rotate"); +#ifdef WITH_WSREP + if (wsrep_to_isolation) + { + DBUG_ASSERT(WSREP_ON); + *check_purge= false; + WSREP_DEBUG("avoiding binlog rotate due to TO isolation: %d", + wsrep_to_isolation); + DBUG_RETURN(0); + } +#endif + //todo: fix the macro def and restore safe_mutex_assert_owner(&LOCK_log); *check_purge= false; @@ -6675,6 +6724,11 @@ MYSQL_BIN_LOG::write_transaction_to_binlog(THD *thd, Ha_trx_info *ha_info; DBUG_ENTER("MYSQL_BIN_LOG::write_transaction_to_binlog"); +#ifdef WITH_WSREP + if (wsrep_emulate_bin_log) + DBUG_RETURN(0); +#endif /* WITH_WSREP */ + entry.thd= thd; entry.cache_mngr= cache_mngr; entry.error= 0; @@ -6683,6 +6737,7 @@ MYSQL_BIN_LOG::write_transaction_to_binlog(THD *thd, entry.using_trx_cache= using_trx_cache; entry.need_unlog= false; ha_info= all ? thd->transaction.all.ha_list : thd->transaction.stmt.ha_list; + for (; ha_info; ha_info= ha_info->next()) { if (ha_info->is_started() && ha_info->ht() != binlog_hton && @@ -8819,7 +8874,10 @@ TC_LOG_BINLOG::log_and_order(THD *thd, my_xid xid, bool all, binlog_cache_mngr *cache_mngr= thd->binlog_setup_trx_data(); if (!cache_mngr) + { + WSREP_DEBUG("Skipping empty log_xid: %s", thd->query()); DBUG_RETURN(0); + } cache_mngr->using_xa= TRUE; cache_mngr->xa_xid= xid; @@ -9649,3 +9707,50 @@ maria_declare_plugin(binlog) MariaDB_PLUGIN_MATURITY_STABLE /* maturity */ } maria_declare_plugin_end; + +#ifdef WITH_WSREP +IO_CACHE * get_trans_log(THD * thd) +{ + binlog_cache_mngr *cache_mngr = (binlog_cache_mngr*) + thd_get_ha_data(thd, binlog_hton); + if (cache_mngr) + return cache_mngr->get_binlog_cache_log(true); + + WSREP_DEBUG("binlog cache not initialized, conn :%ld", thd->thread_id); + return NULL; +} + + +bool wsrep_trans_cache_is_empty(THD *thd) +{ + binlog_cache_mngr *const cache_mngr= + (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); + return (!cache_mngr || cache_mngr->trx_cache.empty()); +} + + +void thd_binlog_trx_reset(THD * thd) +{ + /* + todo: fix autocommit select to not call the caller + */ + if (thd_get_ha_data(thd, binlog_hton) != NULL) + { + binlog_cache_mngr *const cache_mngr= + (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); + if (cache_mngr) + cache_mngr->reset(false, true); + } + thd->clear_binlog_table_maps(); +} + + +void thd_binlog_rollback_stmt(THD * thd) +{ + WSREP_DEBUG("thd_binlog_rollback_stmt :%ld", thd->thread_id); + binlog_cache_mngr *const cache_mngr= + (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); + if (cache_mngr) + cache_mngr->trx_cache.set_prev_position(MY_OFF_T_UNDEF); +} +#endif /* WITH_WSREP */ diff --git a/sql/log.h b/sql/log.h index 67fcf068ec4..3e9a11b8b94 100644 --- a/sql/log.h +++ b/sql/log.h @@ -19,6 +19,8 @@ #include "unireg.h" // REQUIRED: for other includes #include "handler.h" /* my_xid */ +#include "wsrep.h" +#include "wsrep_mysqld.h" class Relay_log_info; @@ -106,7 +108,11 @@ public: int log_and_order(THD *thd, my_xid xid, bool all, bool need_prepare_ordered, bool need_commit_ordered) { - DBUG_ASSERT(0 /* Internal error - TC_LOG_DUMMY::log_and_order() called */); + /* + If we are not using WSREP this is an Internal error + - TC_LOG_DUMMY::log_and_order() called + */ + DBUG_ASSERT(IF_WSREP(1,0)); return 1; } int unlog(ulong cookie, my_xid xid) { return 0; } diff --git a/sql/log_event.cc b/sql/log_event.cc index 6a85893803e..8e5f7a1f23e 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -44,7 +44,7 @@ #include <mysql/psi/mysql_statement.h> #include <strfunc.h> #include "compat56.h" - +#include "wsrep_mysqld.h" #endif /* MYSQL_CLIENT */ #include <base64.h> @@ -3092,6 +3092,15 @@ Query_log_event::Query_log_event(THD* thd_arg, const char* query_arg, { time_t end_time; +#ifdef WITH_WSREP + /* + If Query_log_event will contain non trans keyword (not BEGIN, COMMIT, + SAVEPOINT or ROLLBACK) we disable PA for this transaction. + */ + if (WSREP_ON && !is_trans_keyword()) + thd->wsrep_PA_safe= false; +#endif /* WITH_WSREP */ + memset(&user, 0, sizeof(user)); memset(&host, 0, sizeof(host)); @@ -4519,6 +4528,22 @@ Query_log_event::do_shall_skip(rpl_group_info *rgi) DBUG_RETURN(Log_event::EVENT_SKIP_COUNT); } } +#ifdef WITH_WSREP + else if (WSREP_ON && wsrep_mysql_replication_bundle && opt_slave_domain_parallel_threads == 0 && + thd->wsrep_mysql_replicated > 0 && + (is_begin() || is_commit())) + { + if (++thd->wsrep_mysql_replicated < (int)wsrep_mysql_replication_bundle) + { + WSREP_DEBUG("skipping wsrep commit %d", thd->wsrep_mysql_replicated); + DBUG_RETURN(Log_event::EVENT_SKIP_IGNORE); + } + else + { + thd->wsrep_mysql_replicated = 0; + } + } +#endif DBUG_RETURN(Log_event::do_shall_skip(rgi)); } @@ -7348,6 +7373,21 @@ Xid_log_event::do_shall_skip(rpl_group_info *rgi) thd->variables.option_bits&= ~(OPTION_BEGIN | OPTION_GTID_BEGIN); DBUG_RETURN(Log_event::EVENT_SKIP_COUNT); } +#ifdef WITH_WSREP + else if (wsrep_mysql_replication_bundle && WSREP_ON && + opt_slave_domain_parallel_threads == 0) + { + if (++thd->wsrep_mysql_replicated < (int)wsrep_mysql_replication_bundle) + { + WSREP_DEBUG("skipping wsrep commit %d", thd->wsrep_mysql_replicated); + DBUG_RETURN(Log_event::EVENT_SKIP_IGNORE); + } + else + { + thd->wsrep_mysql_replicated = 0; + } + } +#endif DBUG_RETURN(Log_event::do_shall_skip(rgi)); } #endif /* !MYSQL_CLIENT */ @@ -9625,6 +9665,18 @@ int Rows_log_event::do_apply_event(rpl_group_info *rgi) if (open_and_lock_tables(thd, rgi->tables_to_lock, FALSE, 0)) { uint actual_error= thd->get_stmt_da()->sql_errno(); +#ifdef WITH_WSREP + if (WSREP(thd)) + { + WSREP_WARN("BF applier failed to open_and_lock_tables: %u, fatal: %d " + "wsrep = (exec_mode: %d conflict_state: %d seqno: %lld)", + thd->get_stmt_da()->sql_errno(), + thd->is_fatal_error, + thd->wsrep_exec_mode, + thd->wsrep_conflict_state, + (long long)wsrep_thd_trx_seqno(thd)); + } +#endif if (thd->is_slave_error || thd->is_fatal_error) { /* @@ -10771,8 +10823,8 @@ check_table_map(rpl_group_info *rgi, RPL_TABLE_LIST *table_list) DBUG_ENTER("check_table_map"); enum_tbl_map_status res= OK_TO_PROCESS; Relay_log_info *rli= rgi->rli; - - if (rgi->thd->slave_thread /* filtering is for slave only */ && + if ((rgi->thd->slave_thread /* filtering is for slave only */ || + IF_WSREP((WSREP(rgi->thd) && rgi->thd->wsrep_applier), 0)) && (!rli->mi->rpl_filter->db_ok(table_list->db) || (rli->mi->rpl_filter->is_on() && !rli->mi->rpl_filter->tables_ok("", table_list)))) res= FILTERED_OUT; @@ -11520,7 +11572,19 @@ int Write_rows_log_event::do_exec_row(rpl_group_info *rgi) { DBUG_ASSERT(m_table != NULL); + const char *tmp= thd->get_proc_info(); + const char *message= "Write_rows_log_event::write_row()"; + +#ifdef WSREP_PROC_INFO + my_snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, + "Write_rows_log_event::write_row(%lld)", + (long long) wsrep_thd_trx_seqno(thd)); + message= thd->wsrep_info; +#endif /* WSREP_PROC_INFO */ + + thd_proc_info(thd, message); int error= write_row(rgi, slave_exec_mode == SLAVE_EXEC_MODE_IDEMPOTENT); + thd_proc_info(thd, tmp); if (error && !thd->is_error()) { @@ -12198,15 +12262,34 @@ Delete_rows_log_event::do_after_row_operations(const Slave_reporting_capability int Delete_rows_log_event::do_exec_row(rpl_group_info *rgi) { int error; + const char *tmp= thd->get_proc_info(); + const char *message= "Delete_rows_log_event::find_row()"; const bool invoke_triggers= slave_run_triggers_for_rbr && !master_had_triggers && m_table->triggers; DBUG_ASSERT(m_table != NULL); +#ifdef WSREP_PROC_INFO + my_snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, + "Delete_rows_log_event::find_row(%lld)", + (long long) wsrep_thd_trx_seqno(thd)); + message= thd->wsrep_info; +#endif /* WSREP_PROC_INFO */ + + thd_proc_info(thd, message); if (!(error= find_row(rgi))) { /* Delete the record found, located in record[0] */ + message= "Delete_rows_log_event::ha_delete_row()"; +#ifdef WSREP_PROC_INFO + snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, + "Delete_rows_log_event::ha_delete_row(%lld)", + (long long) wsrep_thd_trx_seqno(thd)); + message= thd->wsrep_info; +#endif + thd_proc_info(thd, message); + if (invoke_triggers && process_triggers(TRG_EVENT_DELETE, TRG_ACTION_BEFORE, FALSE)) error= HA_ERR_GENERIC; // in case if error is not set yet @@ -12217,6 +12300,7 @@ int Delete_rows_log_event::do_exec_row(rpl_group_info *rgi) error= HA_ERR_GENERIC; // in case if error is not set yet m_table->file->ha_index_or_rnd_end(); } + thd_proc_info(thd, tmp); return error; } @@ -12344,8 +12428,18 @@ Update_rows_log_event::do_exec_row(rpl_group_info *rgi) { const bool invoke_triggers= slave_run_triggers_for_rbr && !master_had_triggers && m_table->triggers; + const char *tmp= thd->get_proc_info(); + const char *message= "Update_rows_log_event::find_row()"; DBUG_ASSERT(m_table != NULL); +#ifdef WSREP_PROC_INFO + my_snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, + "Update_rows_log_event::find_row(%lld)", + (long long) wsrep_thd_trx_seqno(thd)); + message= thd->wsrep_info; +#endif /* WSREP_PROC_INFO */ + + thd_proc_info(thd, message); int error= find_row(rgi); if (error) { @@ -12355,6 +12449,7 @@ Update_rows_log_event::do_exec_row(rpl_group_info *rgi) */ m_curr_row= m_curr_row_end; unpack_current_row(rgi); + thd_proc_info(thd, tmp); return error; } @@ -12372,7 +12467,16 @@ Update_rows_log_event::do_exec_row(rpl_group_info *rgi) store_record(m_table,record[1]); m_curr_row= m_curr_row_end; + message= "Update_rows_log_event::unpack_current_row()"; +#ifdef WSREP_PROC_INFO + my_snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, + "Update_rows_log_event::unpack_current_row(%lld)", + (long long) wsrep_thd_trx_seqno(thd)); + message= thd->wsrep_info; +#endif /* WSREP_PROC_INFO */ + /* this also updates m_curr_row_end */ + thd_proc_info(thd, message); if ((error= unpack_current_row(rgi))) goto err; @@ -12390,6 +12494,15 @@ Update_rows_log_event::do_exec_row(rpl_group_info *rgi) DBUG_DUMP("new values", m_table->record[0], m_table->s->reclength); #endif + message= "Update_rows_log_event::ha_update_row()"; +#ifdef WSREP_PROC_INFO + my_snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, + "Update_rows_log_event::ha_update_row(%lld)", + (long long) wsrep_thd_trx_seqno(thd)); + message= thd->wsrep_info; +#endif /* WSREP_PROC_INFO */ + + thd_proc_info(thd, message); if (invoke_triggers && process_triggers(TRG_EVENT_UPDATE, TRG_ACTION_BEFORE, TRUE)) { @@ -12405,6 +12518,8 @@ Update_rows_log_event::do_exec_row(rpl_group_info *rgi) process_triggers(TRG_EVENT_UPDATE, TRG_ACTION_AFTER, TRUE)) error= HA_ERR_GENERIC; // in case if error is not set yet + thd_proc_info(thd, tmp); + err: m_table->file->ha_index_or_rnd_end(); return error; @@ -12494,6 +12609,49 @@ void Incident_log_event::pack_info(THD *thd, Protocol *protocol) m_incident, description(), m_message.str); protocol->store(buf, bytes, &my_charset_bin); } +#endif /* MYSQL_CLIENT */ + + +#if WITH_WSREP && !defined(MYSQL_CLIENT) +Format_description_log_event *wsrep_format_desc; // TODO: free them at the end +/* + read the first event from (*buf). The size of the (*buf) is (*buf_len). + At the end (*buf) is shitfed to point to the following event or NULL and + (*buf_len) will be changed to account just being read bytes of the 1st event. +*/ +#define WSREP_MAX_ALLOWED_PACKET 1024*1024*1024 // current protocol max + +Log_event* wsrep_read_log_event( + char **arg_buf, size_t *arg_buf_len, + const Format_description_log_event *description_event) +{ + char *head= (*arg_buf); + uint data_len = uint4korr(head + EVENT_LEN_OFFSET); + char *buf= (*arg_buf); + const char *error= 0; + Log_event *res= 0; + DBUG_ENTER("wsrep_read_log_event"); + + if (data_len > WSREP_MAX_ALLOWED_PACKET) + { + error = "Event too big"; + goto err; + } + + res= Log_event::read_log_event(buf, data_len, &error, description_event, false); + +err: + if (!res) + { + DBUG_ASSERT(error != 0); + sql_print_error("Error in Log_event::read_log_event(): " + "'%s', data_len: %d, event_type: %d", + error,data_len,head[EVENT_TYPE_OFFSET]); + } + (*arg_buf)+= data_len; + (*arg_buf_len)-= data_len; + DBUG_RETURN(res); +} #endif diff --git a/sql/log_event.h b/sql/log_event.h index 2091d968558..212215d97b6 100644 --- a/sql/log_event.h +++ b/sql/log_event.h @@ -1340,7 +1340,11 @@ public: */ int apply_event(rpl_group_info *rgi) { - return do_apply_event(rgi); + int res; + THD_STAGE_INFO(thd, stage_apply_event); + res= do_apply_event(rgi); + THD_STAGE_INFO(thd, stage_after_apply_event); + return res; } diff --git a/sql/mdl.cc b/sql/mdl.cc index 2c2d64e96b2..07130b4d002 100644 --- a/sql/mdl.cc +++ b/sql/mdl.cc @@ -22,6 +22,8 @@ #include <mysql/plugin.h> #include <mysql/service_thd_wait.h> #include <mysql/psi/mysql_stage.h> +#include "wsrep_mysqld.h" +#include "wsrep_thd.h" #ifdef HAVE_PSI_INTERFACE static PSI_mutex_key key_MDL_map_mutex; @@ -1497,11 +1499,53 @@ void MDL_lock::Ticket_list::add_ticket(MDL_ticket *ticket) called by other threads. */ DBUG_ASSERT(ticket->get_lock()); - /* - Add ticket to the *back* of the queue to ensure fairness - among requests with the same priority. - */ - m_list.push_back(ticket); +#ifdef WITH_WSREP + if ((this == &(ticket->get_lock()->m_waiting)) && + wsrep_thd_is_BF((void *)(ticket->get_ctx()->get_thd()), false)) + { + Ticket_iterator itw(ticket->get_lock()->m_waiting); + Ticket_iterator itg(ticket->get_lock()->m_granted); + + DBUG_ASSERT(WSREP_ON); + MDL_ticket *waiting, *granted; + MDL_ticket *prev=NULL; + bool added= false; + + while ((waiting= itw++) && !added) + { + if (!wsrep_thd_is_BF((void *)(waiting->get_ctx()->get_thd()), true)) + { + WSREP_DEBUG("MDL add_ticket inserted before: %lu %s", + wsrep_thd_thread_id(waiting->get_ctx()->get_thd()), + wsrep_thd_query(waiting->get_ctx()->get_thd())); + m_list.insert_after(prev, ticket); + added= true; + } + prev= waiting; + } + if (!added) m_list.push_back(ticket); + + while ((granted= itg++)) + { + if (granted->get_ctx() != ticket->get_ctx() && + granted->is_incompatible_when_granted(ticket->get_type())) + { + if (!wsrep_grant_mdl_exception(ticket->get_ctx(), granted)) + { + WSREP_DEBUG("MDL victim killed at add_ticket"); + } + } + } + } + else +#endif /* WITH_WSREP */ + { + /* + Add ticket to the *back* of the queue to ensure fairness + among requests with the same priority. + */ + m_list.push_back(ticket); + } m_bitmap|= MDL_BIT(ticket->get_type()); } @@ -1842,6 +1886,7 @@ MDL_lock::can_grant_lock(enum_mdl_type type_arg, bool can_grant= FALSE; bitmap_t waiting_incompat_map= incompatible_waiting_types_bitmap()[type_arg]; bitmap_t granted_incompat_map= incompatible_granted_types_bitmap()[type_arg]; + bool wsrep_can_grant= TRUE; /* New lock request can be satisfied iff: @@ -1864,12 +1909,53 @@ MDL_lock::can_grant_lock(enum_mdl_type type_arg, { if (ticket->get_ctx() != requestor_ctx && ticket->is_incompatible_when_granted(type_arg)) + { +#ifdef WITH_WSREP + if (wsrep_thd_is_BF((void *)(requestor_ctx->get_thd()),false) && + key.mdl_namespace() == MDL_key::GLOBAL) + { + WSREP_DEBUG("global lock granted for BF: %lu %s", + wsrep_thd_thread_id(requestor_ctx->get_thd()), + wsrep_thd_query(requestor_ctx->get_thd())); + can_grant = true; + } + else if (!wsrep_grant_mdl_exception(requestor_ctx, ticket)) + { + wsrep_can_grant= FALSE; + if (wsrep_log_conflicts) + { + MDL_lock * lock = ticket->get_lock(); + WSREP_INFO( + "MDL conflict db=%s table=%s ticket=%d solved by %s", + lock->key.db_name(), lock->key.name(), ticket->get_type(), + "abort" ); + } + } + else + can_grant= TRUE; + /* Continue loop */ +#else break; +#endif /* WITH_WSREP */ + } } - if (ticket == NULL) /* Incompatible locks are our own. */ - can_grant= TRUE; + if ((ticket == NULL) && IF_WSREP(wsrep_can_grant, 1)) + can_grant= TRUE; /* Incompatible locks are our own. */ } } +#ifdef WITH_WSREP + else + { + if (wsrep_thd_is_BF((void *)(requestor_ctx->get_thd()), false) && + key.mdl_namespace() == MDL_key::GLOBAL) + { + WSREP_DEBUG("global lock granted for BF (waiting queue): %lu %s", + wsrep_thd_thread_id(requestor_ctx->get_thd()), + wsrep_thd_query(requestor_ctx->get_thd())); + can_grant = true; + } + } +#endif /* WITH_WSREP */ return can_grant; } @@ -3222,3 +3308,44 @@ void MDL_context::set_transaction_duration_for_all_locks() ticket->m_duration= MDL_TRANSACTION; #endif } + + +#ifdef WITH_WSREP + +void MDL_context::release_explicit_locks() +{ + release_locks_stored_before(MDL_EXPLICIT, NULL); +} + + +void MDL_ticket::wsrep_report(bool debug) +{ + if (debug) + { + const PSI_stage_info *psi_stage = m_lock->key.get_wait_state_name(); + + WSREP_DEBUG("MDL ticket: type: %s space: %s db: %s name: %s (%s)", + (get_type() == MDL_INTENTION_EXCLUSIVE) ? "intention exclusive" : + ((get_type() == MDL_SHARED) ? "shared" : + ((get_type() == MDL_SHARED_HIGH_PRIO ? "shared high prio" : + ((get_type() == MDL_SHARED_READ) ? "shared read" : + ((get_type() == MDL_SHARED_WRITE) ? "shared write" : + ((get_type() == MDL_SHARED_NO_WRITE) ? "shared no write" : + ((get_type() == MDL_SHARED_NO_READ_WRITE) ? "shared no read write" : + ((get_type() == MDL_EXCLUSIVE) ? "exclusive" : + "UNKNOWN")))))))), + (m_lock->key.mdl_namespace() == MDL_key::GLOBAL) ? "GLOBAL" : + ((m_lock->key.mdl_namespace() == MDL_key::SCHEMA) ? "SCHEMA" : + ((m_lock->key.mdl_namespace() == MDL_key::TABLE) ? "TABLE" : + ((m_lock->key.mdl_namespace() == MDL_key::TABLE) ? "FUNCTION" : + ((m_lock->key.mdl_namespace() == MDL_key::TABLE) ? "PROCEDURE" : + ((m_lock->key.mdl_namespace() == MDL_key::TABLE) ? "TRIGGER" : + ((m_lock->key.mdl_namespace() == MDL_key::TABLE) ? "EVENT" : + ((m_lock->key.mdl_namespace() == MDL_key::COMMIT) ? "COMMIT" : + (char *)"UNKNOWN"))))))), + m_lock->key.db_name(), + m_lock->key.name(), + psi_stage->m_name); + } +} +#endif /* WITH_WSREP */ diff --git a/sql/mdl.h b/sql/mdl.h index 47c587eb3be..231f95d7419 100644 --- a/sql/mdl.h +++ b/sql/mdl.h @@ -586,6 +586,9 @@ public: MDL_ticket *next_in_lock; MDL_ticket **prev_in_lock; public: +#ifdef WITH_WSREP + void wsrep_report(bool debug); +#endif /* WITH_WSREP */ bool has_pending_conflicting_lock() const; MDL_context *get_ctx() const { return m_ctx; } @@ -773,6 +776,10 @@ public: m_tickets[MDL_TRANSACTION].is_empty() && m_tickets[MDL_EXPLICIT].is_empty()); } + inline bool has_transactional_locks() const + { + return !m_tickets[MDL_TRANSACTION].is_empty(); + } MDL_savepoint mdl_savepoint() { @@ -786,6 +793,9 @@ public: void release_statement_locks(); void release_transactional_locks(); +#ifdef WITH_WSREP + void release_explicit_locks(); +#endif void rollback_to_savepoint(const MDL_savepoint &mdl_savepoint); MDL_context_owner *get_owner() { return m_owner; } @@ -910,7 +920,6 @@ private: */ MDL_wait_for_subgraph *m_waiting_for; private: - THD *get_thd() const { return m_owner->get_thd(); } MDL_ticket *find_ticket(MDL_request *mdl_req, enum_mdl_duration *duration); void release_locks_stored_before(enum_mdl_duration duration, MDL_ticket *sentinel); @@ -919,6 +928,7 @@ private: MDL_ticket **out_ticket); public: + THD *get_thd() const { return m_owner->get_thd(); } void find_deadlock(); ulong get_thread_id() const { return thd_get_thread_id(get_thd()); } diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 066dae224eb..97e9cff3b79 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -71,6 +71,11 @@ #include "scheduler.h" #include <waiting_threads.h> #include "debug_sync.h" +#include "wsrep_mysqld.h" +#include "wsrep_var.h" +#include "wsrep_thd.h" +#include "wsrep_sst.h" + #include "sql_callback.h" #include "threadpool.h" @@ -352,7 +357,8 @@ static bool volatile select_thread_in_use, signal_thread_in_use; static volatile bool ready_to_exit; static my_bool opt_debugging= 0, opt_external_locking= 0, opt_console= 0; static my_bool opt_short_log_format= 0; -static uint kill_cached_threads, wake_thread; +uint kill_cached_threads; +static uint wake_thread; ulong max_used_connections; static volatile ulong cached_thread_count= 0; static char *mysqld_user, *mysqld_chroot; @@ -360,14 +366,15 @@ static char *default_character_set_name; static char *character_set_filesystem_name; static char *lc_messages; static char *lc_time_names_name; -static char *my_bind_addr_str; +char *my_bind_addr_str; static char *default_collation_name; char *default_storage_engine, *default_tmp_storage_engine; static char compiled_default_collation_name[]= MYSQL_DEFAULT_COLLATION_NAME; static I_List<THD> thread_cache; static bool binlog_format_used= false; LEX_STRING opt_init_connect, opt_init_slave; -static mysql_cond_t COND_thread_cache, COND_flush_thread_cache; +mysql_cond_t COND_thread_cache; +static mysql_cond_t COND_flush_thread_cache; static DYNAMIC_ARRAY all_options; /* Global variables */ @@ -731,6 +738,27 @@ mysql_cond_t COND_server_started; int mysqld_server_started=0, mysqld_server_initialized= 0; File_parser_dummy_hook file_parser_dummy_hook; +#ifdef WITH_WSREP +mysql_mutex_t LOCK_wsrep_ready; +mysql_cond_t COND_wsrep_ready; +mysql_mutex_t LOCK_wsrep_sst; +mysql_cond_t COND_wsrep_sst; +mysql_mutex_t LOCK_wsrep_sst_init; +mysql_cond_t COND_wsrep_sst_init; +mysql_mutex_t LOCK_wsrep_rollback; +mysql_cond_t COND_wsrep_rollback; +wsrep_aborting_thd_t wsrep_aborting_thd= NULL; +mysql_mutex_t LOCK_wsrep_replaying; +mysql_cond_t COND_wsrep_replaying; +mysql_mutex_t LOCK_wsrep_slave_threads; +mysql_mutex_t LOCK_wsrep_desync; +int wsrep_replaying= 0; +ulong wsrep_running_threads = 0; // # of currently running wsrep threads +ulong my_bind_addr; +const char *wsrep_binlog_format_names[]= + {"MIXED", "STATEMENT", "ROW", "NONE", NullS}; +#endif /* WITH_WSREP */ + /* replication parameters, if master_host is not NULL, we are a slave */ uint report_port= 0; ulong master_retry_count=0; @@ -868,6 +896,12 @@ PSI_mutex_key key_BINLOG_LOCK_index, key_BINLOG_LOCK_xid_list, key_LOCK_error_messages, key_LOG_INFO_lock, key_LOCK_thread_count, key_LOCK_thread_cache, key_PARTITION_LOCK_auto_inc; +#ifdef WITH_WSREP +PSI_mutex_key key_LOCK_wsrep_rollback, key_LOCK_wsrep_thd, + key_LOCK_wsrep_replaying, key_LOCK_wsrep_ready, key_LOCK_wsrep_sst, + key_LOCK_wsrep_sst_thread, key_LOCK_wsrep_sst_init, + key_LOCK_wsrep_slave_threads, key_LOCK_wsrep_desync; +#endif PSI_mutex_key key_RELAYLOG_LOCK_index; PSI_mutex_key key_LOCK_slave_state, key_LOCK_binlog_state, key_LOCK_rpl_thread, key_LOCK_rpl_thread_pool, key_LOCK_parallel_entry; @@ -942,6 +976,18 @@ static PSI_mutex_info all_server_mutexes[]= { &key_LOCK_prepare_ordered, "LOCK_prepare_ordered", PSI_FLAG_GLOBAL}, { &key_LOCK_commit_ordered, "LOCK_commit_ordered", PSI_FLAG_GLOBAL}, { &key_LOG_INFO_lock, "LOG_INFO::lock", 0}, +#ifdef WITH_WSREP + { &key_LOCK_wsrep_ready, "LOCK_wsrep_ready", PSI_FLAG_GLOBAL}, + { &key_LOCK_wsrep_sst, "LOCK_wsrep_sst", PSI_FLAG_GLOBAL}, + { &key_LOCK_wsrep_sst_thread, "wsrep_sst_thread", 0}, + { &key_LOCK_wsrep_sst_init, "LOCK_wsrep_sst_init", PSI_FLAG_GLOBAL}, + { &key_LOCK_wsrep_sst, "LOCK_wsrep_sst", PSI_FLAG_GLOBAL}, + { &key_LOCK_wsrep_rollback, "LOCK_wsrep_rollback", PSI_FLAG_GLOBAL}, + { &key_LOCK_wsrep_thd, "THD::LOCK_wsrep_thd", 0}, + { &key_LOCK_wsrep_replaying, "LOCK_wsrep_replaying", PSI_FLAG_GLOBAL}, + { &key_LOCK_wsrep_slave_threads, "LOCK_wsrep_slave_threads", PSI_FLAG_GLOBAL}, + { &key_LOCK_wsrep_desync, "LOCK_wsrep_desync", PSI_FLAG_GLOBAL}, +#endif { &key_LOCK_thread_count, "LOCK_thread_count", PSI_FLAG_GLOBAL}, { &key_LOCK_thread_cache, "LOCK_thread_cache", PSI_FLAG_GLOBAL}, { &key_PARTITION_LOCK_auto_inc, "HA_DATA_PARTITION::LOCK_auto_inc", 0}, @@ -988,6 +1034,11 @@ PSI_cond_key key_BINLOG_COND_xid_list, key_BINLOG_update_cond, key_TABLE_SHARE_cond, key_user_level_lock_cond, key_COND_thread_count, key_COND_thread_cache, key_COND_flush_thread_cache, key_BINLOG_COND_queue_busy; +#ifdef WITH_WSREP +PSI_cond_key key_COND_wsrep_rollback, key_COND_wsrep_thd, + key_COND_wsrep_replaying, key_COND_wsrep_ready, key_COND_wsrep_sst, + key_COND_wsrep_sst_init, key_COND_wsrep_sst_thread; +#endif /* WITH_WSREP */ PSI_cond_key key_RELAYLOG_update_cond, key_COND_wakeup_ready, key_COND_wait_commit; PSI_cond_key key_RELAYLOG_COND_queue_busy; @@ -1037,6 +1088,15 @@ static PSI_cond_info all_server_conds[]= { &key_user_level_lock_cond, "User_level_lock::cond", 0}, { &key_COND_thread_count, "COND_thread_count", PSI_FLAG_GLOBAL}, { &key_COND_thread_cache, "COND_thread_cache", PSI_FLAG_GLOBAL}, +#ifdef WITH_WSREP + { &key_COND_wsrep_ready, "COND_wsrep_ready", PSI_FLAG_GLOBAL}, + { &key_COND_wsrep_sst, "COND_wsrep_sst", PSI_FLAG_GLOBAL}, + { &key_COND_wsrep_sst_init, "COND_wsrep_sst_init", PSI_FLAG_GLOBAL}, + { &key_COND_wsrep_sst_thread, "wsrep_sst_thread", 0}, + { &key_COND_wsrep_rollback, "COND_wsrep_rollback", PSI_FLAG_GLOBAL}, + { &key_COND_wsrep_thd, "THD::COND_wsrep_thd", 0}, + { &key_COND_wsrep_replaying, "COND_wsrep_replaying", PSI_FLAG_GLOBAL}, +#endif { &key_COND_flush_thread_cache, "COND_flush_thread_cache", PSI_FLAG_GLOBAL}, { &key_COND_rpl_thread, "COND_rpl_thread", 0}, { &key_COND_rpl_thread_queue, "COND_rpl_thread_queue", 0}, @@ -1398,7 +1458,7 @@ bool mysqld_embedded=0; bool mysqld_embedded=1; #endif -static my_bool plugins_are_initialized= FALSE; +my_bool plugins_are_initialized= FALSE; #ifndef DBUG_OFF static const char* default_dbug_option; @@ -1622,6 +1682,11 @@ static void close_connections(void) if (tmp->slave_thread) continue; +#ifdef WITH_WSREP + /* skip wsrep system threads as well */ + if (WSREP(tmp) && (tmp->wsrep_exec_mode==REPL_RECV || tmp->wsrep_applier)) + continue; +#endif tmp->killed= KILL_SERVER_HARD; MYSQL_CALLBACK(thread_scheduler, post_kill_notification, (tmp)); mysql_mutex_lock(&tmp->LOCK_thd_data); @@ -1698,6 +1763,34 @@ static void close_connections(void) close_connection(tmp,ER_SERVER_SHUTDOWN); } #endif +#ifdef WITH_WSREP + /* + * WSREP_TODO: + * this code block may turn out redundant. wsrep->disconnect() + * should terminate slave threads gracefully, and we don't need + * to signal them here. + * The code here makes sure mysqld will not hang during shutdown + * even if wsrep provider has problems in shutting down. + */ + if (WSREP(tmp) && tmp->wsrep_exec_mode==REPL_RECV) + { + sql_print_information("closing wsrep system thread"); + tmp->killed= KILL_CONNECTION; + MYSQL_CALLBACK(thread_scheduler, post_kill_notification, (tmp)); + if (tmp->mysys_var) + { + tmp->mysys_var->abort=1; + mysql_mutex_lock(&tmp->mysys_var->mutex); + if (tmp->mysys_var->current_cond) + { + mysql_mutex_lock(tmp->mysys_var->current_mutex); + mysql_cond_broadcast(tmp->mysys_var->current_cond); + mysql_mutex_unlock(tmp->mysys_var->current_mutex); + } + mysql_mutex_unlock(&tmp->mysys_var->mutex); + } + } +#endif DBUG_PRINT("quit",("Unlocking LOCK_thread_count")); mysql_mutex_unlock(&LOCK_thread_count); } @@ -1852,7 +1945,18 @@ static void __cdecl kill_server(int sig_ptr) } #endif +#ifdef WITH_WSREP + if (WSREP_ON) + wsrep_stop_replication(NULL); +#endif + close_connections(); + +#ifdef WITH_WSREP + if (wsrep_inited == 1) + wsrep_deinit(true); +#endif + if (sig != MYSQL_KILL_SIGNAL && sig != 0) unireg_abort(1); /* purecov: inspected */ @@ -1947,6 +2051,30 @@ extern "C" void unireg_abort(int exit_code) usage(); if (exit_code) sql_print_error("Aborting\n"); + +#ifdef WITH_WSREP + /* Check if wsrep class is used. If yes, then cleanup wsrep */ + if (wsrep) + { + /* + This is an abort situation, we cannot expect to gracefully close all + wsrep threads here, we can only diconnect from service + */ + wsrep_close_client_connections(FALSE); + shutdown_in_progress= 1; + THD *thd(0); + wsrep->disconnect(wsrep); + WSREP_INFO("Service disconnected."); + wsrep_close_threads(thd); /* this won't close all threads */ + sleep(1); /* so give some time to exit for those which can */ + WSREP_INFO("Some threads may fail to exit."); + + /* In bootstrap mode we deinitialize wsrep here. */ + if (opt_bootstrap && wsrep_inited) + wsrep_deinit(true); + } +#endif // WITH_WSREP + clean_up(!opt_abort && (exit_code || !opt_bootstrap)); /* purecov: inspected */ DBUG_PRINT("quit",("done with cleanup in unireg_abort")); mysqld_exit(exit_code); @@ -2166,6 +2294,20 @@ static void clean_up_mutexes() mysql_cond_destroy(&COND_thread_count); mysql_cond_destroy(&COND_thread_cache); mysql_cond_destroy(&COND_flush_thread_cache); +#ifdef WITH_WSREP + (void) mysql_mutex_destroy(&LOCK_wsrep_ready); + (void) mysql_cond_destroy(&COND_wsrep_ready); + (void) mysql_mutex_destroy(&LOCK_wsrep_sst); + (void) mysql_cond_destroy(&COND_wsrep_sst); + (void) mysql_mutex_destroy(&LOCK_wsrep_sst_init); + (void) mysql_cond_destroy(&COND_wsrep_sst_init); + (void) mysql_mutex_destroy(&LOCK_wsrep_rollback); + (void) mysql_cond_destroy(&COND_wsrep_rollback); + (void) mysql_mutex_destroy(&LOCK_wsrep_replaying); + (void) mysql_cond_destroy(&COND_wsrep_replaying); + (void) mysql_mutex_destroy(&LOCK_wsrep_slave_threads); + (void) mysql_mutex_destroy(&LOCK_wsrep_desync); +#endif mysql_mutex_destroy(&LOCK_server_started); mysql_cond_destroy(&COND_server_started); mysql_mutex_destroy(&LOCK_prepare_ordered); @@ -2481,6 +2623,11 @@ static MYSQL_SOCKET activate_tcp_port(uint port) socket_errno); unireg_abort(1); } +#if defined(WITH_WSREP) && defined(HAVE_FCNTL) && defined(FD_CLOEXEC) + if (WSREP_ON) + (void) fcntl(mysql_socket_getfd(ip_sock), F_SETFD, FD_CLOEXEC); +#endif /* WITH_WSREP */ + DBUG_RETURN(ip_sock); } @@ -2607,6 +2754,10 @@ static void network_init(void) if (mysql_socket_listen(unix_sock,(int) back_log) < 0) sql_print_warning("listen() on Unix socket failed with error %d", socket_errno); +#if defined(WITH_WSREP) && defined(HAVE_FCNTL) && defined(FD_CLOEXEC) + if (WSREP_ON) + (void) fcntl(mysql_socket_getfd(unix_sock), F_SETFD, FD_CLOEXEC); +#endif /* WITH_WSREP */ } #endif DBUG_PRINT("info",("server started")); @@ -2681,6 +2832,16 @@ void thd_cleanup(THD *thd) void dec_connection_count(THD *thd) { +#ifdef WITH_WSREP + /* + Do not decrement when its wsrep system thread. wsrep_applier is set for + applier as well as rollbacker threads. + */ + if (thd->wsrep_applier) + return; +#endif /* WITH_WSREP */ + + DBUG_ASSERT(*thd->scheduler->connection_count > 0); mysql_mutex_lock(&LOCK_connection_count); (*thd->scheduler->connection_count)--; mysql_mutex_unlock(&LOCK_connection_count); @@ -2851,10 +3012,15 @@ static bool cache_thread() bool one_thread_per_connection_end(THD *thd, bool put_in_cache) { DBUG_ENTER("one_thread_per_connection_end"); +#ifdef WITH_WSREP + const bool wsrep_applier(thd->wsrep_applier); +#endif + unlink_thd(thd); /* Mark that current_thd is not valid anymore */ set_current_thd(0); - if (put_in_cache && cache_thread()) + + if (put_in_cache && cache_thread() && IF_WSREP(!wsrep_applier, 1)) DBUG_RETURN(0); // Thread is reused /* @@ -3976,6 +4142,14 @@ static int init_common_variables() else opt_log_basename= glob_hostname; +#ifdef WITH_WSREP + if (0 == wsrep_node_name || 0 == wsrep_node_name[0]) + { + my_free((void *)wsrep_node_name); + wsrep_node_name= my_strdup(glob_hostname, MYF(MY_WME)); + } +#endif /* WITH_WSREP */ + if (!*pidfile_name) { strmake(pidfile_name, opt_log_basename, sizeof(pidfile_name)-5); @@ -4036,6 +4210,15 @@ static int init_common_variables() SQLCOM_END + 8); #endif +#ifdef WITH_WSREP + /* + This is a protection against mutually incompatible option values. + Note WSREP_ON == global_system_variables.wsrep_on + */ + if (WSREP_ON && wsrep_check_opts (remaining_argc, remaining_argv)) + global_system_variables.wsrep_on= 0; +#endif /* WITH_WSREP */ + if (get_options(&remaining_argc, &remaining_argv)) return 1; set_server_version(); @@ -4430,6 +4613,28 @@ static int init_thread_environment() rpl_init_gtid_waiting(); #endif +#ifdef WITH_WSREP + mysql_mutex_init(key_LOCK_wsrep_ready, + &LOCK_wsrep_ready, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_wsrep_ready, &COND_wsrep_ready, NULL); + mysql_mutex_init(key_LOCK_wsrep_sst, + &LOCK_wsrep_sst, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_wsrep_sst, &COND_wsrep_sst, NULL); + mysql_mutex_init(key_LOCK_wsrep_sst_init, + &LOCK_wsrep_sst_init, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_wsrep_sst_init, &COND_wsrep_sst_init, NULL); + mysql_mutex_init(key_LOCK_wsrep_rollback, + &LOCK_wsrep_rollback, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_wsrep_rollback, &COND_wsrep_rollback, NULL); + mysql_mutex_init(key_LOCK_wsrep_replaying, + &LOCK_wsrep_replaying, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_wsrep_replaying, &COND_wsrep_replaying, NULL); + mysql_mutex_init(key_LOCK_wsrep_slave_threads, + &LOCK_wsrep_slave_threads, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_LOCK_wsrep_desync, + &LOCK_wsrep_desync, MY_MUTEX_INIT_FAST); +#endif + DBUG_RETURN(0); } @@ -4727,10 +4932,10 @@ static int init_server_components() /* need to configure logging before initializing storage engines */ if (!opt_bin_log_used) { - if (opt_log_slave_updates) + if (IF_WSREP(!WSREP_ON,1) && opt_log_slave_updates) sql_print_warning("You need to use --log-bin to make " "--log-slave-updates work."); - if (binlog_format_used) + if (IF_WSREP(!WSREP_ON, 1) && binlog_format_used) sql_print_warning("You need to use --log-bin to make " "--binlog-format work."); } @@ -4805,10 +5010,67 @@ a file name for --log-bin-index option", opt_binlog_index_name); { opt_bin_logname= my_once_strdup(buf, MYF(MY_WME)); } +#ifdef WITH_WSREP /* WSREP BEFORE SE */ + /* + Wsrep initialization must happen at this point, because: + - opt_bin_logname must be known when starting replication + since SST may need it + - SST may modify binlog index file, so it must be opened + after SST has happened + */ + } + if (WSREP_ON && !wsrep_recovery) + { + if (opt_bootstrap) // bootsrap option given - disable wsrep functionality + { + wsrep_provider_init(WSREP_NONE); + if (wsrep_init()) unireg_abort(1); + } + else // full wsrep initialization + { + // add basedir/bin to PATH to resolve wsrep script names + char* const tmp_path((char*)alloca(strlen(mysql_home) + + strlen("/bin") + 1)); + if (tmp_path) + { + strcpy(tmp_path, mysql_home); + strcat(tmp_path, "/bin"); + wsrep_prepend_PATH(tmp_path); + } + else + { + WSREP_ERROR("Could not append %s/bin to PATH", mysql_home); + } + + if (wsrep_before_SE()) + { + set_ports(); // this is also called in network_init() later but we need + // to know mysqld_port now - lp:1071882 + wsrep_init_startup(true); + } + } + } + if (opt_bin_log) + { + /* + Variable ln is not defined at this scope. We use opt_bin_logname instead. + It should be the same as ln since + - mysql_bin_log.generate_name() returns first argument if new log name + is not generated + - if new log name is generated, return value is assigned to ln and copied + to opt_bin_logname above + */ + if (mysql_bin_log.open_index_file(opt_binlog_index_name, opt_bin_logname, + TRUE)) + { + unireg_abort(1); + } +#else if (mysql_bin_log.open_index_file(opt_binlog_index_name, ln, TRUE)) { unireg_abort(1); } +#endif /* WITH_WSREP */ } /* call ha_init_key_cache() on all key caches to init them */ @@ -4930,10 +5192,40 @@ a file name for --log-bin-index option", opt_binlog_index_name); internal_tmp_table_max_key_segments= myisam_max_key_segments(); #endif +#ifdef WITH_WSREP + if (WSREP_ON && !opt_bin_log) + { + wsrep_emulate_bin_log= 1; + } +#endif + + /* if total_ha_2pc <= 1 + tc_log = tc_log_dummy + else + if opt_bin_log == true + tc_log = mysql_bin_log + else + if WITH_WSREP + if WSREP_ON + tc_log = tc_log_dummy + else + tc_log = tc_log_mmap + else + tc_log=tc_log_mmap + */ tc_log= (total_ha_2pc > 1 ? (opt_bin_log ? (TC_LOG *) &mysql_bin_log : - (TC_LOG *) &tc_log_mmap) : - (TC_LOG *) &tc_log_dummy); + IF_WSREP((WSREP_ON ? (TC_LOG *) &tc_log_dummy : + (TC_LOG *) &tc_log_mmap), (TC_LOG *) &tc_log_mmap)) : + (TC_LOG *) &tc_log_dummy); + +#ifdef WITH_WSREP + WSREP_DEBUG("Initial TC log open: %s", + (tc_log == &mysql_bin_log) ? "binlog" : + (tc_log == &tc_log_mmap) ? "mmap" : + (tc_log == &tc_log_dummy) ? "dummy" : "unknown" + ); +#endif if (tc_log->open(opt_bin_log ? opt_bin_logname : opt_tc_log_file)) { @@ -5192,6 +5484,10 @@ int mysqld_main(int argc, char **argv) return 1; } #endif +#ifdef WITH_WSREP + if (WSREP_ON) + wsrep_filter_new_cluster (&argc, argv); +#endif /* WITH_WSREP */ orig_argc= argc; orig_argv= argv; @@ -5408,6 +5704,15 @@ int mysqld_main(int argc, char **argv) } #endif +#ifdef WITH_WSREP /* WSREP AFTER SE */ + if (WSREP_ON && wsrep_recovery) + { + select_thread_in_use= 0; + wsrep_recover(); + unireg_abort(0); + } +#endif /* WITH_WSREP */ + /* init signals & alarm After this we can't quit by a simple unireg_abort @@ -5464,7 +5769,35 @@ int mysqld_main(int argc, char **argv) if (Events::init(opt_noacl || opt_bootstrap)) unireg_abort(1); - if (opt_bootstrap) +#ifdef WITH_WSREP /* WSREP AFTER SE */ + if (WSREP_ON) + { + if (opt_bootstrap) + { + /*! bootstrap wsrep init was taken care of above */ + } + else + { + wsrep_SE_initialized(); + + if (wsrep_before_SE()) + { + /*! in case of no SST wsrep waits in view handler callback */ + wsrep_SE_init_grab(); + wsrep_SE_init_done(); + /*! in case of SST wsrep waits for wsrep->sst_received */ + wsrep_sst_continue(); + } + else + { + wsrep_init_startup (false); + } + + wsrep_create_appliers(wsrep_slave_threads - 1); + } + } +#endif /* WITH_WSREP */ + if (opt_bootstrap) { select_thread_in_use= 0; // Allow 'kill' to work bootstrap(mysql_stdin); @@ -5532,6 +5865,7 @@ int mysqld_main(int argc, char **argv) #ifdef EXTRA_DEBUG2 sql_print_error("Before Lock_thread_count"); #endif + WSREP_DEBUG("Before Lock_thread_count"); mysql_mutex_lock(&LOCK_thread_count); DBUG_PRINT("quit", ("Got thread_count mutex")); select_thread_in_use=0; // For close_connections @@ -5797,6 +6131,9 @@ static void bootstrap(MYSQL_FILE *file) DBUG_ENTER("bootstrap"); THD *thd= new THD; +#ifdef WITH_WSREP + thd->variables.wsrep_on= 0; +#endif thd->bootstrap=1; my_net_init(&thd->net,(st_vio*) 0, MYF(0)); thd->max_client_packet_length= thd->net.max_packet; @@ -6198,6 +6535,9 @@ void handle_connections_sockets() sleep(1); // Give other threads some time continue; } +#if defined(WITH_WSREP) && defined(HAVE_FCNTL) && defined(FD_CLOEXEC) + (void) fcntl(mysql_socket_getfd(new_sock), F_SETFD, FD_CLOEXEC); +#endif /* WITH_WSREP */ #ifdef HAVE_LIBWRAP { @@ -7971,6 +8311,21 @@ SHOW_VAR status_vars[]= { #ifdef ENABLED_PROFILING {"Uptime_since_flush_status",(char*) &show_flushstatustime, SHOW_SIMPLE_FUNC}, #endif +#ifdef WITH_WSREP + {"wsrep_connected", (char*) &wsrep_connected, SHOW_BOOL}, + {"wsrep_ready", (char*) &wsrep_ready, SHOW_BOOL}, + {"wsrep_cluster_state_uuid", (char*) &wsrep_cluster_state_uuid,SHOW_CHAR_PTR}, + {"wsrep_cluster_conf_id", (char*) &wsrep_cluster_conf_id, SHOW_LONGLONG}, + {"wsrep_cluster_status", (char*) &wsrep_cluster_status, SHOW_CHAR_PTR}, + {"wsrep_cluster_size", (char*) &wsrep_cluster_size, SHOW_LONG_NOFLUSH}, + {"wsrep_local_index", (char*) &wsrep_local_index, SHOW_LONG_NOFLUSH}, + {"wsrep_local_bf_aborts", (char*) &wsrep_show_bf_aborts, SHOW_SIMPLE_FUNC}, + {"wsrep_provider_name", (char*) &wsrep_provider_name, SHOW_CHAR_PTR}, + {"wsrep_provider_version", (char*) &wsrep_provider_version, SHOW_CHAR_PTR}, + {"wsrep_provider_vendor", (char*) &wsrep_provider_vendor, SHOW_CHAR_PTR}, + {"wsrep_thread_count", (char*) &wsrep_running_threads, SHOW_LONG_NOFLUSH}, + {"wsrep", (char*) &wsrep_show_status, SHOW_FUNC}, +#endif {NullS, NullS, SHOW_LONG} }; @@ -8314,6 +8669,10 @@ static int mysql_init_variables(void) tmpenv = DEFAULT_MYSQL_HOME; strmake_buf(mysql_home, tmpenv); #endif +#ifdef WITH_WSREP + if (WSREP_ON && wsrep_init_vars()) + return 1; +#endif return 0; } @@ -8561,6 +8920,14 @@ mysqld_get_one_option(int optid, case OPT_LOWER_CASE_TABLE_NAMES: lower_case_table_names_used= 1; break; +#ifdef WITH_WSREP + case OPT_WSREP_START_POSITION: + wsrep_start_position_init (argument); + break; + case OPT_WSREP_SST_AUTH: + wsrep_sst_auth_init (argument); + break; +#endif #if defined(ENABLED_DEBUG_SYNC) case OPT_DEBUG_SYNC_TIMEOUT: /* @@ -9029,6 +9396,9 @@ void set_server_version(void) #ifdef EMBEDDED_LIBRARY end= strmov(end, "-embedded"); #endif +#ifdef WITH_WSREP + end= strmov(end, "-wsrep"); +#endif #ifndef DBUG_OFF if (!strstr(MYSQL_SERVER_SUFFIX_STR, "-debug")) end= strmov(end, "-debug"); @@ -9321,6 +9691,10 @@ void refresh_status(THD *thd) /* Reset some global variables */ reset_status_vars(); +#ifdef WITH_WSREP + if (WSREP_ON) + wsrep->stats_reset(wsrep); +#endif /* WITH_WSREP */ /* Reset the counters of all key caches (default and named). */ process_key_caches(reset_key_cache_counters, 0); @@ -9372,6 +9746,7 @@ static PSI_file_info all_server_files[]= }; #endif /* HAVE_PSI_INTERFACE */ +PSI_stage_info stage_after_apply_event= { 0, "after apply log event", 0}; PSI_stage_info stage_after_create= { 0, "After create", 0}; PSI_stage_info stage_after_opening_tables= { 0, "After opening tables", 0}; PSI_stage_info stage_after_table_lock= { 0, "After table lock", 0}; @@ -9379,6 +9754,7 @@ PSI_stage_info stage_allocating_local_table= { 0, "allocating local table", 0}; PSI_stage_info stage_alter_inplace_prepare= { 0, "preparing for alter table", 0}; PSI_stage_info stage_alter_inplace= { 0, "altering table", 0}; PSI_stage_info stage_alter_inplace_commit= { 0, "committing alter table to storage engine", 0}; +PSI_stage_info stage_apply_event= { 0, "apply log event", 0}; PSI_stage_info stage_changing_master= { 0, "Changing master", 0}; PSI_stage_info stage_checking_master_version= { 0, "Checking master version", 0}; PSI_stage_info stage_checking_permissions= { 0, "checking permissions", 0}; @@ -9452,6 +9828,7 @@ PSI_stage_info stage_sql_thd_waiting_until_delay= { 0, "Waiting until MASTER_DEL PSI_stage_info stage_storing_result_in_query_cache= { 0, "storing result in query cache", 0}; PSI_stage_info stage_storing_row_into_queue= { 0, "storing row into queue", 0}; PSI_stage_info stage_system_lock= { 0, "System lock", 0}; +PSI_stage_info stage_unlocking_tables= { 0, "Unlocking tables", 0}; PSI_stage_info stage_update= { 0, "update", 0}; PSI_stage_info stage_updating= { 0, "updating", 0}; PSI_stage_info stage_updating_main_table= { 0, "updating main table", 0}; @@ -9496,6 +9873,7 @@ PSI_stage_info stage_gtid_wait_other_connection= { 0, "Waiting for other master PSI_stage_info *all_server_stages[]= { + & stage_after_apply_event, & stage_after_create, & stage_after_opening_tables, & stage_after_table_lock, @@ -9503,6 +9881,7 @@ PSI_stage_info *all_server_stages[]= & stage_alter_inplace, & stage_alter_inplace_commit, & stage_alter_inplace_prepare, + & stage_apply_event, & stage_binlog_processing_checkpoint_notify, & stage_binlog_stopping_background_thread, & stage_binlog_waiting_background_tasks, @@ -9584,6 +9963,7 @@ PSI_stage_info *all_server_stages[]= & stage_storing_result_in_query_cache, & stage_storing_row_into_queue, & stage_system_lock, + & stage_unlocking_tables, & stage_update, & stage_updating, & stage_updating_main_table, diff --git a/sql/mysqld.h b/sql/mysqld.h index a1656a49047..8a6794cc30c 100644 --- a/sql/mysqld.h +++ b/sql/mysqld.h @@ -25,6 +25,7 @@ #include "sql_list.h" /* I_List */ #include "sql_cmd.h" #include <my_rnd.h> +#include "my_pthread.h" class THD; struct handlerton; @@ -241,6 +242,11 @@ extern PSI_mutex_key key_PAGE_lock, key_LOCK_sync, key_LOCK_active, key_LOCK_pool, key_LOCK_pending_checkpoint; #endif /* HAVE_MMAP */ +#ifdef WITH_WSREP +extern PSI_mutex_key key_LOCK_wsrep_thd; +extern PSI_cond_key key_COND_wsrep_thd; +#endif /* WITH_WSREP */ + #ifdef HAVE_OPENSSL extern PSI_mutex_key key_LOCK_des_key_file; #endif @@ -328,6 +334,7 @@ void init_server_psi_keys(); MAINTAINER: Please keep this list in order, to limit merge collisions. Hint: grep PSI_stage_info | sort -u */ +extern PSI_stage_info stage_apply_event; extern PSI_stage_info stage_after_create; extern PSI_stage_info stage_after_opening_tables; extern PSI_stage_info stage_after_table_lock; @@ -335,6 +342,7 @@ extern PSI_stage_info stage_allocating_local_table; extern PSI_stage_info stage_alter_inplace_prepare; extern PSI_stage_info stage_alter_inplace; extern PSI_stage_info stage_alter_inplace_commit; +extern PSI_stage_info stage_after_apply_event; extern PSI_stage_info stage_changing_master; extern PSI_stage_info stage_checking_master_version; extern PSI_stage_info stage_checking_permissions; @@ -408,6 +416,7 @@ extern PSI_stage_info stage_statistics; extern PSI_stage_info stage_storing_result_in_query_cache; extern PSI_stage_info stage_storing_row_into_queue; extern PSI_stage_info stage_system_lock; +extern PSI_stage_info stage_unlocking_tables; extern PSI_stage_info stage_update; extern PSI_stage_info stage_updating; extern PSI_stage_info stage_updating_main_table; @@ -595,6 +604,15 @@ enum options_mysqld OPT_WANT_CORE, OPT_MYSQL_COMPATIBILITY, OPT_MYSQL_TO_BE_IMPLEMENTED, +#ifdef WITH_WSREP + OPT_WSREP_PROVIDER, + OPT_WSREP_PROVIDER_OPTIONS, + OPT_WSREP_CLUSTER_ADDRESS, + OPT_WSREP_START_POSITION, + OPT_WSREP_SST_AUTH, + OPT_WSREP_RECOVER, +#endif /* WITH_WSREP */ + OPT_which_is_always_the_last }; #endif diff --git a/sql/protocol.cc b/sql/protocol.cc index 2400dadfadc..caa00dd80e3 100644 --- a/sql/protocol.cc +++ b/sql/protocol.cc @@ -488,6 +488,8 @@ static uchar *net_store_length_fast(uchar *packet, uint length) void Protocol::end_statement() { + /* sanity check*/ + DBUG_ASSERT_IF_WSREP(!(WSREP_ON && WSREP(thd) && thd->wsrep_conflict_state == REPLAYING)); DBUG_ENTER("Protocol::end_statement"); DBUG_ASSERT(! thd->get_stmt_da()->is_sent()); bool error= FALSE; diff --git a/sql/rpl_record.cc b/sql/rpl_record.cc index a6d93d10f11..29a1b9728e5 100644 --- a/sql/rpl_record.cc +++ b/sql/rpl_record.cc @@ -305,9 +305,8 @@ unpack_row(rpl_group_info *rgi, normal unpack operation. */ uint16 const metadata= tabledef->field_metadata(i); -#ifndef DBUG_OFF uchar const *const old_pack_ptr= pack_ptr; -#endif + pack_ptr= f->unpack(f->ptr, pack_ptr, row_end, metadata); DBUG_PRINT("debug", ("field: %s; metadata: 0x%x;" " pack_ptr: 0x%lx; pack_ptr': 0x%lx; bytes: %d", @@ -316,6 +315,24 @@ unpack_row(rpl_group_info *rgi, (int) (pack_ptr - old_pack_ptr))); if (!pack_ptr) { +#ifdef WITH_WSREP + if (WSREP_ON) + { + /* + Debug message to troubleshoot bug: + https://mariadb.atlassian.net/browse/MDEV-4404 + Galera Node throws "Could not read field" error and drops out of cluster + */ + WSREP_WARN("ROW event unpack field: %s metadata: 0x%x;" + " pack_ptr: 0x%lx; conv_table %p conv_field %p table %s" + " row_end: 0x%lx", + f->field_name, metadata, + (ulong) old_pack_ptr, conv_table, conv_field, + (table_found) ? "found" : "not found", (ulong)row_end + ); + } +#endif /* WITH_WSREP */ + rgi->rli->report(ERROR_LEVEL, ER_SLAVE_CORRUPT_EVENT, "Could not read field '%s' of table '%s.%s'", f->field_name, table->s->db.str, diff --git a/sql/set_var.cc b/sql/set_var.cc index 5c1e00af33e..7ad5dc31f6e 100644 --- a/sql/set_var.cc +++ b/sql/set_var.cc @@ -556,7 +556,11 @@ int mysql_del_sys_var_chain(sys_var *first) static int show_cmp(SHOW_VAR *a, SHOW_VAR *b) { +#ifdef WITH_WSREP + return my_strcasecmp(system_charset_info, a->name, b->name); +#else return strcmp(a->name, b->name); +#endif /* WITH_WSREP */ } diff --git a/sql/set_var.h b/sql/set_var.h index bb92e555aa7..e72a8af6210 100644 --- a/sql/set_var.h +++ b/sql/set_var.h @@ -249,6 +249,9 @@ public: int check(THD *thd); int update(THD *thd); int light_check(THD *thd); +#ifdef WITH_WSREP + int wsrep_store_variable(THD *thd); +#endif }; @@ -355,6 +358,9 @@ extern sys_var *Sys_autocommit_ptr; CHARSET_INFO *get_old_charset_by_name(const char *old_name); +#ifdef WITH_WSREP +int sql_set_wsrep_variables(THD *thd, List<set_var_base> *var_list); +#endif int sys_var_init(); int sys_var_add_options(DYNAMIC_ARRAY *long_options, int parse_flags); void sys_var_end(void); diff --git a/sql/slave.cc b/sql/slave.cc index f7d019a6c39..746b1d48458 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -52,6 +52,7 @@ #include "log_event.h" // Rotate_log_event, // Create_file_log_event, // Format_description_log_event +#include "wsrep_mysqld.h" #ifdef HAVE_REPLICATION @@ -4367,6 +4368,7 @@ pthread_handler_t handle_slave_sql(void *arg) my_off_t saved_skip= 0; Master_info *mi= ((Master_info*)arg); Relay_log_info* rli = &mi->rli; + my_bool wsrep_node_dropped= FALSE; const char *errmsg; rpl_group_info *serial_rgi; rpl_sql_thread_info sql_info(mi->rpl_filter); @@ -4375,6 +4377,8 @@ pthread_handler_t handle_slave_sql(void *arg) my_thread_init(); DBUG_ENTER("handle_slave_sql"); + wsrep_restart_point: + LINT_INIT(saved_master_log_pos); LINT_INIT(saved_log_pos); @@ -4503,6 +4507,12 @@ pthread_handler_t handle_slave_sql(void *arg) } #endif +#ifdef WITH_WSREP + thd->wsrep_exec_mode= LOCAL_STATE; + /* synchronize with wsrep replication */ + if (WSREP_ON) + wsrep_ready_wait(); +#endif DBUG_PRINT("master_info",("log_file_name: %s position: %s", rli->group_master_log_name, llstr(rli->group_master_log_pos,llbuff))); @@ -4603,13 +4613,21 @@ log '%s' at position %s, relay log '%s' position: %s%s", RPL_LOG_NAME, rli->group_master_log_name, (ulong) rli->group_master_log_pos); saved_skip= 0; } - + if (exec_relay_log_event(thd, rli, serial_rgi)) { DBUG_PRINT("info", ("exec_relay_log_event() failed")); // do not scare the user if SQL thread was simply killed or stopped if (!sql_slave_killed(serial_rgi)) + { slave_output_error_info(rli, thd); +#ifdef WITH_WSREP + if (WSREP_ON && rli->last_error().number == ER_UNKNOWN_COM_ERROR) + { + wsrep_node_dropped= TRUE; + } +#endif /* WITH_WSREP */ + } goto err; } } @@ -4694,6 +4712,27 @@ err_during_init: delete serial_rgi; delete thd; mysql_mutex_unlock(&LOCK_thread_count); +#ifdef WITH_WSREP + /* if slave stopped due to node going non primary, we set global flag to + trigger automatic restart of slave when node joins back to cluster + */ + if (WSREP_ON && wsrep_node_dropped && wsrep_restart_slave) + { + if (wsrep_ready) + { + WSREP_INFO("Slave error due to node temporarily non-primary" + "SQL slave will continue"); + wsrep_node_dropped= FALSE; + mysql_mutex_unlock(&rli->run_lock); + goto wsrep_restart_point; + } else { + WSREP_INFO("Slave error due to node going non-primary"); + WSREP_INFO("wsrep_restart_slave was set and therefore slave will be " + "automatically restarted when node joins back to cluster"); + wsrep_restart_slave_activated= TRUE; + } + } +#endif /* WITH_WSREP */ /* Note: the order of the broadcast and unlock calls below (first broadcast, then unlock) is important. Otherwise a killer_thread can execute between the calls and diff --git a/sql/sp.cc b/sql/sp.cc index 188b311ae86..261299464c5 100644 --- a/sql/sp.cc +++ b/sql/sp.cc @@ -32,7 +32,7 @@ #include <my_user.h> -static bool +bool create_string(THD *thd, String *buf, stored_procedure_type sp_type, const char *db, ulong dblen, @@ -924,7 +924,7 @@ end: } -static void +void sp_returns_type(THD *thd, String &result, sp_head *sp) { TABLE table; @@ -2126,7 +2126,7 @@ int sp_cache_routine(THD *thd, enum stored_procedure_type type, sp_name *name, @return Returns TRUE on success, FALSE on (alloc) failure. */ -static bool +bool create_string(THD *thd, String *buf, stored_procedure_type type, const char *db, ulong dblen, @@ -2271,3 +2271,4 @@ sp_load_for_information_schema(THD *thd, TABLE *proc_table, String *db, thd->lex= old_lex; return sp; } + @@ -214,4 +214,19 @@ bool load_collation(MEM_ROOT *mem_root, CHARSET_INFO *dflt_cl, CHARSET_INFO **cl); +void sp_returns_type(THD *thd, + String &result, + sp_head *sp); + +bool create_string(THD *thd, String *buf, + stored_procedure_type type, + const char *db, ulong dblen, + const char *name, ulong namelen, + const char *params, ulong paramslen, + const char *returns, ulong returnslen, + const char *body, ulong bodylen, + st_sp_chistics *chistics, + const LEX_STRING *definer_user, + const LEX_STRING *definer_host, + ulonglong sql_mode); #endif /* _SP_H_ */ diff --git a/sql/sql_acl.cc b/sql/sql_acl.cc index 106036e1e83..e67a019d1f7 100644 --- a/sql/sql_acl.cc +++ b/sql/sql_acl.cc @@ -2559,7 +2559,9 @@ int check_alter_user(THD *thd, const char *host, const char *user) my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--skip-grant-tables"); goto end; } - if (!thd->slave_thread && !thd->security_ctx->priv_user[0]) + + if (IF_WSREP((!WSREP(thd) || !thd->wsrep_applier), 1) && + !thd->slave_thread && !thd->security_ctx->priv_user[0]) { my_message(ER_PASSWORD_ANONYMOUS_USER, ER(ER_PASSWORD_ANONYMOUS_USER), MYF(0)); @@ -2570,7 +2572,9 @@ int check_alter_user(THD *thd, const char *host, const char *user) my_error(ER_PASSWORD_NO_MATCH, MYF(0)); goto end; } + if (!thd->slave_thread && + IF_WSREP((!WSREP(thd) || !thd->wsrep_applier),1) && (strcmp(thd->security_ctx->priv_user, user) || my_strcasecmp(system_charset_info, host, thd->security_ctx->priv_host))) @@ -2635,10 +2639,12 @@ bool change_password(THD *thd, const char *host, const char *user, TABLE_LIST tables[TABLES_MAX]; /* Buffer should be extended when password length is extended. */ char buff[512]; - ulong query_length; + ulong query_length=0; enum_binlog_format save_binlog_format; uint new_password_len= (uint) strlen(new_password); - int result; + int result=0; + const CSET_STRING query_save = thd->query_string; + DBUG_ENTER("change_password"); DBUG_PRINT("enter",("host: '%s' user: '%s' new_password: '%s'", host,user,new_password)); @@ -2647,6 +2653,19 @@ bool change_password(THD *thd, const char *host, const char *user, if (check_change_password(thd, host, user, new_password, new_password_len)) DBUG_RETURN(1); +#ifdef WITH_WSREP + if (WSREP(thd) && !thd->wsrep_applier) + { + query_length= sprintf(buff, "SET PASSWORD FOR '%-.120s'@'%-.120s'='%-.120s'", + user ? user : "", + host ? host : "", + new_password); + thd->set_query_inner(buff, query_length, system_charset_info); + + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, (char*)"user", NULL); + } +#endif /* WITH_WSREP */ + if ((result= open_grant_tables(thd, tables, TL_WRITE, Table_user))) DBUG_RETURN(result != 1); @@ -2708,9 +2727,23 @@ bool change_password(THD *thd, const char *host, const char *user, } end: close_mysql_tables(thd); +#ifdef WITH_WSREP + if (WSREP(thd) && !thd->wsrep_applier) + { + WSREP_TO_ISOLATION_END; + + thd->query_string = query_save; + thd->wsrep_exec_mode = LOCAL_STATE; + } +#endif /* WITH_WSREP */ thd->restore_stmt_binlog_format(save_binlog_format); DBUG_RETURN(result); + +error: + WSREP_ERROR("Repliation of SET PASSWORD failed: %s", buff); + DBUG_RETURN(result); + } int acl_check_set_default_role(THD *thd, const char *host, const char *user) @@ -2727,8 +2760,9 @@ int acl_set_default_role(THD *thd, const char *host, const char *user, int result= 1; int error; bool clear_role= FALSE; + char buff[512]; enum_binlog_format save_binlog_format; - + const CSET_STRING query_save = thd->query_string; DBUG_ENTER("acl_set_default_role"); DBUG_PRINT("enter",("host: '%s' user: '%s' rolename: '%s'", @@ -2823,7 +2857,6 @@ int acl_set_default_role(THD *thd, const char *host, const char *user, result= 0; if (mysql_bin_log.is_open()) { - char buff[512]; int query_length= sprintf(buff,"SET DEFAULT ROLE '%-.120s' FOR '%-.120s'@'%-.120s'", safe_str(acl_user->default_rolename.str), @@ -2835,6 +2868,17 @@ int acl_set_default_role(THD *thd, const char *host, const char *user, } end: close_mysql_tables(thd); + +#ifdef WITH_WSREP + if (WSREP(thd) && !thd->wsrep_applier) + { + WSREP_TO_ISOLATION_END; + + thd->query_string = query_save; + thd->wsrep_exec_mode = LOCAL_STATE; + } +#endif /* WITH_WSREP */ + thd->restore_stmt_binlog_format(save_binlog_format); DBUG_RETURN(result); diff --git a/sql/sql_admin.cc b/sql/sql_admin.cc index 34a076cc327..0b610718cd0 100644 --- a/sql/sql_admin.cc +++ b/sql/sql_admin.cc @@ -1143,6 +1143,8 @@ bool Sql_cmd_analyze_table::execute(THD *thd) FALSE, UINT_MAX, FALSE)) goto error; thd->enable_slow_log= opt_log_slow_admin_statements; + WSREP_TO_ISOLATION_BEGIN(first_table->db, first_table->table_name, NULL); + res= mysql_admin_table(thd, first_table, &m_lex->check_opt, "analyze", lock_type, 1, 0, 0, 0, &handler::ha_analyze, 0); @@ -1197,6 +1199,7 @@ bool Sql_cmd_optimize_table::execute(THD *thd) if (check_table_access(thd, SELECT_ACL | INSERT_ACL, first_table, FALSE, UINT_MAX, FALSE)) goto error; /* purecov: inspected */ + WSREP_TO_ISOLATION_BEGIN(first_table->db, first_table->table_name, NULL) thd->enable_slow_log= opt_log_slow_admin_statements; res= (specialflag & SPECIAL_NO_NEW_FUNC) ? mysql_recreate_table(thd, first_table, true) : @@ -1230,6 +1233,7 @@ bool Sql_cmd_repair_table::execute(THD *thd) FALSE, UINT_MAX, FALSE)) goto error; /* purecov: inspected */ thd->enable_slow_log= opt_log_slow_admin_statements; + WSREP_TO_ISOLATION_BEGIN(first_table->db, first_table->table_name, NULL) res= mysql_admin_table(thd, first_table, &m_lex->check_opt, "repair", TL_WRITE, 1, MY_TEST(m_lex->check_opt.sql_flags & TT_USEFRM), diff --git a/sql/sql_alter.cc b/sql/sql_alter.cc index 97b9c127c22..cfe360217c2 100644 --- a/sql/sql_alter.cc +++ b/sql/sql_alter.cc @@ -18,6 +18,7 @@ // mysql_exchange_partition #include "sql_base.h" // open_temporary_tables #include "sql_alter.h" +#include "wsrep_mysqld.h" Alter_info::Alter_info(const Alter_info &rhs, MEM_ROOT *mem_root) :drop_list(rhs.drop_list, mem_root), @@ -303,6 +304,22 @@ bool Sql_cmd_alter_table::execute(THD *thd) thd->enable_slow_log= opt_log_slow_admin_statements; +#ifdef WITH_WSREP + TABLE *find_temporary_table(THD *thd, const TABLE_LIST *tl); + + if (WSREP(thd) && + (!thd->is_current_stmt_binlog_format_row() || + !find_temporary_table(thd, first_table)) && + wsrep_to_isolation_begin(thd, + lex->name.str ? select_lex->db : NULL, + lex->name.str ? lex->name.str : NULL, + first_table)) + { + WSREP_WARN("ALTER TABLE isolation failure"); + DBUG_RETURN(TRUE); + } +#endif /* WITH_WSREP */ + result= mysql_alter_table(thd, select_lex->db, lex->name.str, &create_info, first_table, diff --git a/sql/sql_base.cc b/sql/sql_base.cc index d60506dcad7..68f325e20ab 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -61,7 +61,8 @@ #ifdef __WIN__ #include <io.h> #endif - +#include "wsrep_mysqld.h" +#include "wsrep_thd.h" bool No_such_table_error_handler::handle_condition(THD *, @@ -3557,7 +3558,7 @@ thr_lock_type read_lock_type_for_table(THD *thd, */ bool log_on= mysql_bin_log.is_open() && thd->variables.sql_log_bin; ulong binlog_format= thd->variables.binlog_format; - if ((log_on == FALSE) || (binlog_format == BINLOG_FORMAT_ROW) || + if ((log_on == FALSE) || (WSREP_FORMAT(binlog_format) == BINLOG_FORMAT_ROW) || (table_list->table->s->table_category == TABLE_CATEGORY_LOG) || (table_list->table->s->table_category == TABLE_CATEGORY_PERFORMANCE) || !(is_update_query(prelocking_ctx->sql_command) || @@ -4585,8 +4586,29 @@ restart: } } +#ifdef WITH_WSREP + if (WSREP_ON && + (thd->lex->sql_command== SQLCOM_INSERT || + thd->lex->sql_command== SQLCOM_INSERT_SELECT || + thd->lex->sql_command== SQLCOM_REPLACE || + thd->lex->sql_command== SQLCOM_REPLACE_SELECT || + thd->lex->sql_command== SQLCOM_UPDATE || + thd->lex->sql_command== SQLCOM_UPDATE_MULTI || + thd->lex->sql_command== SQLCOM_LOAD || + thd->lex->sql_command== SQLCOM_DELETE) && + wsrep_replicate_myisam && + (*start) && + (*start)->table && (*start)->table->file->ht->db_type == DB_TYPE_MYISAM) + { + WSREP_TO_ISOLATION_BEGIN(NULL, NULL, (*start)); + } + error: +#endif + err: THD_STAGE_INFO(thd, stage_after_opening_tables); + thd_proc_info(thd, 0); + free_root(&new_frm_mem, MYF(0)); // Free pre-alloced block if (error && *table_to_open) @@ -5040,6 +5062,8 @@ end: close_thread_tables(thd); } THD_STAGE_INFO(thd, stage_after_opening_tables); + + thd_proc_info(thd, 0); DBUG_RETURN(table); } @@ -5303,7 +5327,7 @@ bool lock_tables(THD *thd, TABLE_LIST *tables, uint count, We can solve these problems in mixed mode by switching to binlogging if at least one updated table is used by sub-statement */ - if (thd->variables.binlog_format != BINLOG_FORMAT_ROW && tables && + if (WSREP_FORMAT(thd->variables.binlog_format) != BINLOG_FORMAT_ROW && tables && has_write_table_with_auto_increment(thd->lex->first_not_own_table())) thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_AUTOINC_COLUMNS); } @@ -8993,7 +9017,17 @@ bool mysql_notify_thread_having_shared_lock(THD *thd, THD *in_use, (e.g. see partitioning code). */ if (!thd_table->needs_reopen()) - signalled|= mysql_lock_abort_for_thread(thd, thd_table); + { + signalled|= mysql_lock_abort_for_thread(thd, thd_table); +#ifdef WITH_WSREP + if (thd && WSREP(thd) && wsrep_thd_is_BF((void *)thd, true)) + { + WSREP_DEBUG("remove_table_from_cache: %llu", + (unsigned long long) thd->real_id); + wsrep_abort_thd((void *)thd, (void *)in_use, FALSE); + } +#endif + } } mysql_mutex_unlock(&in_use->LOCK_thd_data); } diff --git a/sql/sql_builtin.cc.in b/sql/sql_builtin.cc.in index 63850650ac9..2de475b0a76 100644 --- a/sql/sql_builtin.cc.in +++ b/sql/sql_builtin.cc.in @@ -25,7 +25,11 @@ extern #endif builtin_maria_plugin @mysql_mandatory_plugins@ @mysql_optional_plugins@ - builtin_maria_binlog_plugin, builtin_maria_mysql_password_plugin; + builtin_maria_binlog_plugin, +#ifdef WITH_WSREP + builtin_wsrep_plugin@mysql_plugin_defs@, +#endif /* WITH_WSREP */ + builtin_maria_mysql_password_plugin; struct st_maria_plugin *mysql_optional_plugins[]= { @@ -35,5 +39,8 @@ struct st_maria_plugin *mysql_optional_plugins[]= struct st_maria_plugin *mysql_mandatory_plugins[]= { builtin_maria_binlog_plugin, builtin_maria_mysql_password_plugin, +#ifdef WITH_WSREP + builtin_wsrep_plugin@mysql_plugin_defs@, +#endif /* WITH_WSREP */ @mysql_mandatory_plugins@ 0 }; diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 8d6ddc0bb08..443da8557ad 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -64,6 +64,8 @@ #include "sql_parse.h" // is_update_query #include "sql_callback.h" #include "lock.h" +#include "wsrep_mysqld.h" +#include "wsrep_thd.h" #include "sql_connect.h" /* @@ -859,7 +861,7 @@ bool Drop_table_error_handler::handle_condition(THD *thd, } -THD::THD() +THD::THD(bool is_applier) :Statement(&main_lex, &main_mem_root, STMT_CONVENTIONAL_EXECUTION, /* statement id */ 0), rli_fake(0), rgi_fake(0), rgi_slave(NULL), @@ -889,6 +891,16 @@ THD::THD() bootstrap(0), derived_tables_processing(FALSE), spcont(NULL), +#ifdef WITH_WSREP + wsrep_applier(is_applier), + wsrep_applier_closing(false), + wsrep_client_thread(false), + wsrep_po_handle(WSREP_PO_INITIALIZER), + wsrep_po_cnt(0), +// wsrep_po_in_trans(false), + wsrep_apply_format(0), + wsrep_apply_toi(false), +#endif m_parser_state(NULL), #if defined(ENABLED_DEBUG_SYNC) debug_sync_control(0), @@ -1004,6 +1016,22 @@ THD::THD() m_command=COM_CONNECT; *scramble= '\0'; +#ifdef WITH_WSREP + mysql_mutex_init(key_LOCK_wsrep_thd, &LOCK_wsrep_thd, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_wsrep_thd, &COND_wsrep_thd, NULL); + wsrep_ws_handle.trx_id = WSREP_UNDEFINED_TRX_ID; + wsrep_ws_handle.opaque = NULL; + wsrep_retry_counter = 0; + wsrep_PA_safe = true; + wsrep_retry_query = NULL; + wsrep_retry_query_len = 0; + wsrep_retry_command = COM_CONNECT; + wsrep_consistency_check = NO_CONSISTENCY_CHECK; + wsrep_status_vars = 0; + wsrep_mysql_replicated = 0; + wsrep_TOI_pre_query = NULL; + wsrep_TOI_pre_query_len = 0; +#endif /* Call to init() below requires fully initialized Open_tables_state. */ reset_open_tables_state(this); @@ -1043,6 +1071,9 @@ THD::THD() my_rnd_init(&rand, tmp + (ulong) &rand, tmp + (ulong) ::global_query_id); substitute_null_with_insert_id = FALSE; thr_lock_info_init(&lock_info); /* safety: will be reset after start */ + lock_info.mysql_thd= (void *)this; + + wsrep_info[sizeof(wsrep_info) - 1] = '\0'; /* make sure it is 0-terminated */ m_internal_handler= NULL; m_binlog_invoker= INVOKER_NONE; @@ -1387,7 +1418,24 @@ void THD::init(void) bzero((char *) &org_status_var, sizeof(org_status_var)); start_bytes_received= 0; last_commit_gtid.seq_no= 0; - +#ifdef WITH_WSREP + wsrep_exec_mode= wsrep_applier ? REPL_RECV : LOCAL_STATE; + wsrep_conflict_state= NO_CONFLICT; + wsrep_query_state= QUERY_IDLE; + wsrep_last_query_id= 0; + wsrep_trx_meta.gtid= WSREP_GTID_UNDEFINED; + wsrep_trx_meta.depends_on= WSREP_SEQNO_UNDEFINED; + wsrep_converted_lock_session= false; + wsrep_retry_counter= 0; + wsrep_rli= NULL; + wsrep_rgi= NULL; + wsrep_PA_safe= true; + wsrep_consistency_check = NO_CONSISTENCY_CHECK; + wsrep_mysql_replicated = 0; + + wsrep_TOI_pre_query = NULL; + wsrep_TOI_pre_query_len = 0; +#endif if (variables.sql_log_bin) variables.option_bits|= OPTION_BIN_LOG; else @@ -1582,6 +1630,14 @@ THD::~THD() mysql_mutex_lock(&LOCK_thd_data); mysql_mutex_unlock(&LOCK_thd_data); +#ifdef WITH_WSREP + mysql_mutex_lock(&LOCK_wsrep_thd); + mysql_mutex_unlock(&LOCK_wsrep_thd); + mysql_mutex_destroy(&LOCK_wsrep_thd); + if (wsrep_rli) delete wsrep_rli; + if (wsrep_rgi) delete wsrep_rgi; + if (wsrep_status_vars) wsrep->stats_free(wsrep, wsrep_status_vars); +#endif /* Close connection */ #ifndef EMBEDDED_LIBRARY if (net.vio) @@ -1892,7 +1948,17 @@ bool THD::notify_shared_lock(MDL_context_owner *ctx_in_use, (e.g. see partitioning code). */ if (!thd_table->needs_reopen()) + { signalled|= mysql_lock_abort_for_thread(this, thd_table); +#if WITH_WSREP + if (this && WSREP(this) && wsrep_thd_is_BF((void *)this, FALSE)) + { + WSREP_DEBUG("remove_table_from_cache: %llu", + (unsigned long long) this->real_id); + wsrep_abort_thd((void *)this, (void *)in_use, FALSE); + } +#endif /* WITH_WSREP */ + } } mysql_mutex_unlock(&in_use->LOCK_thd_data); } @@ -2074,6 +2140,12 @@ void THD::cleanup_after_query() /* reset table map for multi-table update */ table_map_for_update= 0; m_binlog_invoker= INVOKER_NONE; +#ifdef WITH_WSREP + if (TOTAL_ORDER == wsrep_exec_mode) + { + wsrep_exec_mode = LOCAL_STATE; + } +#endif /* WITH_WSREP */ #ifndef EMBEDDED_LIBRARY if (rgi_slave) @@ -2496,6 +2568,13 @@ bool sql_exchange::escaped_given(void) bool select_send::send_result_set_metadata(List<Item> &list, uint flags) { bool res; +#ifdef WITH_WSREP + if (WSREP(thd) && thd->wsrep_retry_query) + { + WSREP_DEBUG("skipping select metadata"); + return FALSE; + } +#endif /* WITH_WSREP */ if (!(res= thd->protocol->send_result_set_metadata(&list, flags))) is_result_set_started= 1; return res; @@ -4241,8 +4320,10 @@ extern "C" int thd_non_transactional_update(const MYSQL_THD thd) extern "C" int thd_binlog_format(const MYSQL_THD thd) { - if (mysql_bin_log.is_open() && (thd->variables.option_bits & OPTION_BIN_LOG)) - return (int) thd->variables.binlog_format; + if (IF_WSREP(((WSREP(thd) && wsrep_emulate_bin_log) || mysql_bin_log.is_open()), + mysql_bin_log.is_open()) && + thd->variables.option_bits & OPTION_BIN_LOG) + return (int) WSREP_FORMAT(thd->variables.binlog_format); else return BINLOG_FORMAT_UNSPEC; } @@ -4972,7 +5053,7 @@ int THD::decide_logging_format(TABLE_LIST *tables) binlog by filtering rules. */ if (mysql_bin_log.is_open() && (variables.option_bits & OPTION_BIN_LOG) && - !(variables.binlog_format == BINLOG_FORMAT_STMT && + !(WSREP_FORMAT(variables.binlog_format) == BINLOG_FORMAT_STMT && !binlog_filter->db_ok(db))) { /* @@ -5182,7 +5263,7 @@ int THD::decide_logging_format(TABLE_LIST *tables) */ my_error((error= ER_BINLOG_ROW_INJECTION_AND_STMT_ENGINE), MYF(0)); } - else if (variables.binlog_format == BINLOG_FORMAT_ROW && + else if (WSREP_FORMAT(variables.binlog_format) == BINLOG_FORMAT_ROW && sqlcom_can_generate_row_events(this)) { /* @@ -5211,7 +5292,7 @@ int THD::decide_logging_format(TABLE_LIST *tables) else { /* binlog_format = STATEMENT */ - if (variables.binlog_format == BINLOG_FORMAT_STMT) + if (WSREP_FORMAT(variables.binlog_format) == BINLOG_FORMAT_STMT) { if (lex->is_stmt_row_injection()) { @@ -5228,7 +5309,10 @@ int THD::decide_logging_format(TABLE_LIST *tables) 5. Error: Cannot modify table that uses a storage engine limited to row-logging when binlog_format = STATEMENT */ - my_error((error= ER_BINLOG_STMT_MODE_AND_ROW_ENGINE), MYF(0), ""); + if (IF_WSREP((!WSREP(this) || wsrep_exec_mode == LOCAL_STATE),1)) + { + my_error((error= ER_BINLOG_STMT_MODE_AND_ROW_ENGINE), MYF(0), ""); + } } else if (is_write && (unsafe_flags= lex->get_stmt_unsafe_flags()) != 0) { @@ -5340,7 +5424,7 @@ int THD::decide_logging_format(TABLE_LIST *tables) "and binlog_filter->db_ok(db) = %d", mysql_bin_log.is_open(), (variables.option_bits & OPTION_BIN_LOG), - variables.binlog_format, + WSREP_FORMAT(variables.binlog_format), binlog_filter->db_ok(db))); #endif @@ -5576,9 +5660,11 @@ CPP_UNNAMED_NS_END int THD::binlog_write_row(TABLE* table, bool is_trans, MY_BITMAP const* cols, size_t colcnt, uchar const *record) -{ - DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open()); +{ + DBUG_ASSERT(is_current_stmt_binlog_format_row() && + IF_WSREP(((WSREP(this) && wsrep_emulate_bin_log) || mysql_bin_log.is_open()), + mysql_bin_log.is_open())); /* Pack records into format for transfer. We are allocating more memory than needed, but that doesn't matter. @@ -5610,8 +5696,10 @@ int THD::binlog_update_row(TABLE* table, bool is_trans, MY_BITMAP const* cols, size_t colcnt, const uchar *before_record, const uchar *after_record) -{ - DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open()); +{ + DBUG_ASSERT(is_current_stmt_binlog_format_row() && + IF_WSREP(((WSREP(this) && wsrep_emulate_bin_log) || mysql_bin_log.is_open()), + mysql_bin_log.is_open())); size_t const before_maxlen = max_row_length(table, before_record); size_t const after_maxlen = max_row_length(table, after_record); @@ -5659,8 +5747,10 @@ int THD::binlog_update_row(TABLE* table, bool is_trans, int THD::binlog_delete_row(TABLE* table, bool is_trans, MY_BITMAP const* cols, size_t colcnt, uchar const *record) -{ - DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open()); +{ + DBUG_ASSERT(is_current_stmt_binlog_format_row() && + IF_WSREP(((WSREP(this) && wsrep_emulate_bin_log) || mysql_bin_log.is_open()), + mysql_bin_log.is_open())); /* Pack records into format for transfer. We are allocating more @@ -5695,7 +5785,8 @@ int THD::binlog_remove_pending_rows_event(bool clear_maps, { DBUG_ENTER("THD::binlog_remove_pending_rows_event"); - if (!mysql_bin_log.is_open()) + IF_WSREP(!(WSREP_EMULATE_BINLOG(this) || mysql_bin_log.is_open()), + !mysql_bin_log.is_open()); DBUG_RETURN(0); /* Ensure that all events in a GTID group are in the same cache */ @@ -5718,7 +5809,8 @@ int THD::binlog_flush_pending_rows_event(bool stmt_end, bool is_transactional) mode: it might be the case that we left row-based mode before flushing anything (e.g., if we have explicitly locked tables). */ - if (!mysql_bin_log.is_open()) + if(IF_WSREP(!(WSREP_EMULATE_BINLOG(this) || mysql_bin_log.is_open()), + !mysql_bin_log.is_open())) DBUG_RETURN(0); /* Ensure that all events in a GTID group are in the same cache */ @@ -5970,7 +6062,10 @@ int THD::binlog_query(THD::enum_binlog_query_type qtype, char const *query_arg, DBUG_ENTER("THD::binlog_query"); DBUG_PRINT("enter", ("qtype: %s query: '%-.*s'", show_query_type(qtype), (int) query_len, query_arg)); - DBUG_ASSERT(query_arg && mysql_bin_log.is_open()); + + DBUG_ASSERT(query_arg && + IF_WSREP((WSREP_EMULATE_BINLOG(this) || mysql_bin_log.is_open()), + mysql_bin_log.is_open())); /* If this is withing a BEGIN ... COMMIT group, don't log it */ if (variables.option_bits & OPTION_GTID_BEGIN) diff --git a/sql/sql_class.h b/sql/sql_class.h index f3537086132..d3b305d34f4 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -50,12 +50,13 @@ void set_thd_stage_info(void *thd, const char *calling_func, const char *calling_file, const unsigned int calling_line); - + #define THD_STAGE_INFO(thd, stage) \ (thd)->enter_stage(& stage, NULL, __func__, __FILE__, __LINE__) #include "my_apc.h" #include "rpl_gtid.h" +#include "wsrep_mysqld.h" class Reprepare_observer; class Relay_log_info; @@ -638,6 +639,12 @@ typedef struct system_variables ulong wt_timeout_short, wt_deadlock_search_depth_short; ulong wt_timeout_long, wt_deadlock_search_depth_long; +#ifdef WITH_WSREP + my_bool wsrep_on; + my_bool wsrep_causal_reads; + uint wsrep_sync_wait; + ulong wsrep_retry_autocommit; +#endif double long_query_time_double; my_bool pseudo_slave_mode; @@ -2072,7 +2079,7 @@ public: int is_current_stmt_binlog_format_row() const { DBUG_ASSERT(current_stmt_binlog_format == BINLOG_FORMAT_STMT || current_stmt_binlog_format == BINLOG_FORMAT_ROW); - return current_stmt_binlog_format == BINLOG_FORMAT_ROW; + return (WSREP_FORMAT((ulong)current_stmt_binlog_format) == BINLOG_FORMAT_ROW); } enum binlog_filter_state @@ -2725,6 +2732,45 @@ public: query_id_t first_query_id; } binlog_evt_union; +#ifdef WITH_WSREP + const bool wsrep_applier; /* dedicated slave applier thread */ + bool wsrep_applier_closing; /* applier marked to close */ + bool wsrep_client_thread; /* to identify client threads*/ + bool wsrep_PA_safe; + bool wsrep_converted_lock_session; + bool wsrep_apply_toi; /* applier processing in TOI */ + enum wsrep_exec_mode wsrep_exec_mode; + query_id_t wsrep_last_query_id; + enum wsrep_query_state wsrep_query_state; + enum wsrep_conflict_state wsrep_conflict_state; + mysql_mutex_t LOCK_wsrep_thd; + mysql_cond_t COND_wsrep_thd; + // changed from wsrep_seqno_t to wsrep_trx_meta_t in wsrep API rev 75 + // wsrep_seqno_t wsrep_trx_seqno; + wsrep_trx_meta_t wsrep_trx_meta; + uint32 wsrep_rand; + Relay_log_info* wsrep_rli; + rpl_group_info* wsrep_rgi; + wsrep_ws_handle_t wsrep_ws_handle; + ulong wsrep_retry_counter; // of autocommit + char* wsrep_retry_query; + size_t wsrep_retry_query_len; + enum enum_server_command wsrep_retry_command; + enum wsrep_consistency_check_mode + wsrep_consistency_check; + wsrep_stats_var* wsrep_status_vars; + int wsrep_mysql_replicated; + const char* wsrep_TOI_pre_query; /* a query to apply before + the actual TOI query */ + size_t wsrep_TOI_pre_query_len; + wsrep_po_handle_t wsrep_po_handle; + size_t wsrep_po_cnt; +#ifdef GTID_SUPPORT + rpl_sid wsrep_po_sid; +#endif /* GTID_SUPPORT */ + void* wsrep_apply_format; +#endif /* WITH_WSREP */ + char wsrep_info[128]; /* string for dynamic proc info */ /** Internal parser state. Note that since the parser is not re-entrant, we keep only one parser @@ -2756,7 +2802,8 @@ public: /* Debug Sync facility. See debug_sync.cc. */ struct st_debug_sync_control *debug_sync_control; #endif /* defined(ENABLED_DEBUG_SYNC) */ - THD(); + THD(bool is_applier= false); + ~THD(); void init(void); @@ -3270,7 +3317,7 @@ public: tests fail and so force them to propagate the lex->binlog_row_based_if_mixed upwards to the caller. */ - if ((variables.binlog_format == BINLOG_FORMAT_MIXED) && + if ((WSREP_FORMAT(variables.binlog_format) == BINLOG_FORMAT_MIXED) && (in_sub_stmt == 0)) set_current_stmt_binlog_format_row(); @@ -3322,7 +3369,7 @@ public: show_system_thread(system_thread))); if (in_sub_stmt == 0) { - if (variables.binlog_format == BINLOG_FORMAT_ROW) + if (WSREP_FORMAT(variables.binlog_format) == BINLOG_FORMAT_ROW) set_current_stmt_binlog_format_row(); else if (temporary_tables == NULL) set_current_stmt_binlog_format_stmt(); diff --git a/sql/sql_connect.cc b/sql/sql_connect.cc index 433f3303ad7..0065edcc14d 100644 --- a/sql/sql_connect.cc +++ b/sql/sql_connect.cc @@ -37,6 +37,7 @@ // reset_host_errors #include "sql_acl.h" // acl_getroot, NO_ACCESS, SUPER_ACL #include "sql_callback.h" +#include "wsrep_mysqld.h" HASH global_user_stats, global_client_stats, global_table_stats; HASH global_index_stats; @@ -1172,6 +1173,17 @@ bool login_connection(THD *thd) void end_connection(THD *thd) { NET *net= &thd->net; +#ifdef WITH_WSREP + if (WSREP(thd)) + { + wsrep_status_t rcode= wsrep->free_connection(wsrep, thd->thread_id); + if (rcode) { + WSREP_WARN("wsrep failed to free connection context: %lu, code: %d", + thd->thread_id, rcode); + } + } + thd->wsrep_client_thread= 0; +#endif plugin_thdvar_cleanup(thd); if (thd->user_connect) @@ -1307,6 +1319,9 @@ bool thd_prepare_connection(THD *thd) (char *) thd->security_ctx->host_or_ip); prepare_new_connection_state(thd); +#ifdef WITH_WSREP + thd->wsrep_client_thread= 1; +#endif /* WITH_WSREP */ return FALSE; } @@ -1380,7 +1395,15 @@ void do_handle_one_connection(THD *thd_arg) break; } end_connection(thd); - + +#ifdef WITH_WSREP + if (WSREP(thd)) + { + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + thd->wsrep_query_state= QUERY_EXITING; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } +#endif end_thread: close_connection(thd); diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc index 38fb897c4f8..5eb4c405c4b 100644 --- a/sql/sql_delete.cc +++ b/sql/sql_delete.cc @@ -642,18 +642,19 @@ cleanup: if (!transactional_table && deleted > 0) thd->transaction.stmt.modified_non_trans_table= thd->transaction.all.modified_non_trans_table= TRUE; - + /* See similar binlogging code in sql_update.cc, for comments */ if ((error < 0) || thd->transaction.stmt.modified_non_trans_table) { - if (mysql_bin_log.is_open()) + if(IF_WSREP((WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()), + mysql_bin_log.is_open())) { int errcode= 0; if (error < 0) thd->clear_error(); else errcode= query_error_code(thd, killed_status == NOT_KILLED); - + /* [binlog]: If 'handler::delete_all_rows()' was called and the storage engine does not inject the rows itself, we replicate @@ -1107,13 +1108,14 @@ void multi_delete::abort_result_set() DBUG_ASSERT(error_handled); DBUG_VOID_RETURN; } - + if (thd->transaction.stmt.modified_non_trans_table) { - /* + /* there is only side effects; to binlog with the error */ - if (mysql_bin_log.is_open()) + if (IF_WSREP((WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()), + mysql_bin_log.is_open())) { int errcode= query_error_code(thd, thd->killed == NOT_KILLED); /* possible error of writing binary log is ignored deliberately */ @@ -1289,7 +1291,8 @@ bool multi_delete::send_eof() } if ((local_error == 0) || thd->transaction.stmt.modified_non_trans_table) { - if (mysql_bin_log.is_open()) + if(IF_WSREP((WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()), + mysql_bin_log.is_open())) { int errcode= 0; if (local_error == 0) diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc index d61af758ced..f90e9cd2439 100644 --- a/sql/sql_insert.cc +++ b/sql/sql_insert.cc @@ -1014,7 +1014,8 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list, thd->transaction.stmt.modified_non_trans_table || was_insert_delayed) { - if (mysql_bin_log.is_open()) + if(IF_WSREP((WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()), + mysql_bin_log.is_open())) { int errcode= 0; if (error <= 0) @@ -3195,6 +3196,13 @@ bool Delayed_insert::handle_inserts(void) mysql_cond_broadcast(&cond_client); // If waiting clients } } + +#ifdef WITH_WSREP + if (WSREP((&thd))) + thd_proc_info(&thd, "insert done"); + else +#endif /* WITH_WSREP */ + thd_proc_info(&thd, 0); mysql_mutex_unlock(&mutex); /* @@ -3647,8 +3655,11 @@ bool select_insert::send_eof() DBUG_PRINT("enter", ("trans_table=%d, table_type='%s'", trans_table, table->file->table_type())); - error= (thd->locked_tables_mode <= LTM_LOCK_TABLES ? - table->file->ha_end_bulk_insert() : 0); + error = IF_WSREP((thd->wsrep_conflict_state == MUST_ABORT || + thd->wsrep_conflict_state == CERT_FAILURE) ? -1 :, ) + (thd->locked_tables_mode <= LTM_LOCK_TABLES ? + table->file->ha_end_bulk_insert() : 0); + if (!error && thd->is_error()) error= thd->get_stmt_da()->sql_errno(); @@ -3676,8 +3687,9 @@ bool select_insert::send_eof() events are in the transaction cache and will be written when ha_autocommit_or_rollback() is issued below. */ - if (mysql_bin_log.is_open() && - (!error || thd->transaction.stmt.modified_non_trans_table)) + if(IF_WSREP((WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()), + mysql_bin_log.is_open()) && + (!error || thd->transaction.stmt.modified_non_trans_table)) { int errcode= 0; if (!error) @@ -3761,7 +3773,8 @@ void select_insert::abort_result_set() { if (!can_rollback_data()) thd->transaction.all.modified_non_trans_table= TRUE; - if (mysql_bin_log.is_open()) + if(IF_WSREP((WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()), + mysql_bin_log.is_open())) { int errcode= query_error_code(thd, thd->killed == NOT_KILLED); /* error of writing binary log is ignored */ @@ -4169,7 +4182,8 @@ select_create::binlog_show_create_table(TABLE **tables, uint count) create_info->table_was_deleted); DBUG_ASSERT(result == 0); /* store_create_info() always return 0 */ - if (mysql_bin_log.is_open()) + if(IF_WSREP((WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()), + mysql_bin_log.is_open())) { int errcode= query_error_code(thd, thd->killed == NOT_KILLED); result= thd->binlog_query(THD::STMT_QUERY_TYPE, @@ -4179,6 +4193,9 @@ select_create::binlog_show_create_table(TABLE **tables, uint count) /* suppress_use */ FALSE, errcode); } + + IF_WSREP(ha_fake_trx_id(thd), ); + return result; } @@ -4208,6 +4225,21 @@ bool select_create::send_eof() trans_commit_stmt(thd); if (!(thd->variables.option_bits & OPTION_GTID_BEGIN)) trans_commit_implicit(thd); +#ifdef WITH_WSREP + if (WSREP_ON) + { + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + if (thd->wsrep_conflict_state != NO_CONFLICT) + { + WSREP_DEBUG("select_create commit failed, thd: %lu err: %d %s", + thd->thread_id, thd->wsrep_conflict_state, thd->query()); + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + abort_result_set(); + return TRUE; + } + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } +#endif /* WITH_WSREP */ } else if (!thd->is_current_stmt_binlog_format_row()) table->s->table_creation_was_logged= 1; diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index cd9f9238f71..926aa7d5e5a 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -1,5 +1,5 @@ /* Copyright (c) 2000, 2013, Oracle and/or its affiliates. - Copyright (c) 2009, 2013, Monty Program Ab. + Copyright (c) 2009, 2014, Monty Program Ab. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -1579,6 +1579,17 @@ int lex_one_token(void *arg, THD *thd) } else { +#ifdef WITH_WSREP + if (WSREP(thd) && version == 99997 && thd->wsrep_exec_mode == LOCAL_STATE) + { + WSREP_DEBUG("consistency check: %s", thd->query()); + thd->wsrep_consistency_check= CONSISTENCY_CHECK_DECLARED; + lip->yySkipn(5); + lip->set_echo(TRUE); + state=MY_LEX_START; + break; /* Do not treat contents as a comment. */ + } +#endif /* WITH_WSREP */ /* Patch and skip the conditional comment to avoid it being propagated infinitely (eg. to a slave). diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index 019fd55e3d8..066acfb015a 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -1,5 +1,5 @@ /* Copyright (c) 2000, 2013, Oracle and/or its affiliates. - Copyright (c) 2008, 2013, Monty Program Ab + Copyright (c) 2008, 2014, Monty Program Ab This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -103,6 +103,14 @@ #include "../storage/maria/ha_maria.h" #endif +#include "wsrep_mysqld.h" +#include "wsrep_thd.h" + +#ifdef WITH_WSREP +static void wsrep_mysql_parse(THD *thd, char *rawbuf, uint length, + Parser_state *parser_state); +#endif /* WITH_WSREP */ + /** @defgroup Runtime_Environment Runtime Environment @{ @@ -890,6 +898,19 @@ bool do_command(THD *thd) enum enum_server_command command; DBUG_ENTER("do_command"); +#ifdef WITH_WSREP + if (WSREP(thd)) + { + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + thd->wsrep_query_state= QUERY_IDLE; + if (thd->wsrep_conflict_state==MUST_ABORT) + { + wsrep_client_rollback(thd); + } + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } +#endif /* WITH_WSREP */ + /* indicator of uninitialized lex => normal flow of errors handling (see my_message_sql) @@ -937,6 +958,29 @@ bool do_command(THD *thd) thd->m_server_idle= TRUE; packet_length= my_net_read(net); thd->m_server_idle= FALSE; +#ifdef WITH_WSREP + if (WSREP(thd)) { + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + + /* these THD's are aborted or are aborting during being idle */ + if (thd->wsrep_conflict_state == ABORTING) + { + while (thd->wsrep_conflict_state == ABORTING) { + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + my_sleep(1000); + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + } + thd->store_globals(); + } + else if (thd->wsrep_conflict_state == ABORTED) + { + thd->store_globals(); + } + + thd->wsrep_query_state= QUERY_EXEC; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } +#endif /* WITH_WSREP */ if (packet_length == packet_error) { @@ -944,6 +988,19 @@ bool do_command(THD *thd) net->error, vio_description(net->vio))); +#ifdef WITH_WSREP + if (WSREP(thd)) + { + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + if (thd->wsrep_conflict_state == MUST_ABORT) + { + DBUG_PRINT("wsrep",("aborted for wsrep rollback: %lu", thd->real_id)); + wsrep_client_rollback(thd); + } + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } +#endif /* WITH_WSREP */ + /* Instrument this broken statement as "statement/com/error" */ thd->m_statement_psi= MYSQL_REFINE_STATEMENT(thd->m_statement_psi, com_statement_info[COM_END]. @@ -998,12 +1055,71 @@ bool do_command(THD *thd) vio_description(net->vio), command, command_name[command].str)); +#ifdef WITH_WSREP + if (WSREP(thd)) + { + /* + * bail out if DB snapshot has not been installed. We however, + * allow queries "SET" and "SHOW", they are trapped later in execute_command + */ + if (thd->variables.wsrep_on && !thd->wsrep_applier && !wsrep_ready && + command != COM_QUERY && + command != COM_PING && + command != COM_QUIT && + command != COM_PROCESS_INFO && + command != COM_PROCESS_KILL && + command != COM_SET_OPTION && + command != COM_SHUTDOWN && + command != COM_SLEEP && + command != COM_STATISTICS && + command != COM_TIME && + command != COM_END + ) { + my_message(ER_UNKNOWN_COM_ERROR, + "WSREP has not yet prepared node for application use", + MYF(0)); + thd->protocol->end_statement(); + return_value= FALSE; + goto out; + } + } +#endif /* WITH_WSREP */ /* Restore read timeout value */ my_net_set_read_timeout(net, thd->variables.net_read_timeout); DBUG_ASSERT(packet_length); DBUG_ASSERT(!thd->apc_target.is_enabled()); return_value= dispatch_command(command, thd, packet+1, (uint) (packet_length-1)); +#ifdef WITH_WSREP + if (WSREP(thd)) + { + while (thd->wsrep_conflict_state== RETRY_AUTOCOMMIT) + { + WSREP_DEBUG("Retry autocommit for: %s\n", thd->wsrep_retry_query); + CHARSET_INFO *current_charset = thd->variables.character_set_client; + if (!is_supported_parser_charset(current_charset)) + { + /* Do not use non-supported parser character sets */ + WSREP_WARN("Current client character set is non-supported parser " + "character set: %s", current_charset->csname); + thd->variables.character_set_client = &my_charset_latin1; + WSREP_WARN("For retry temporally setting character set to : %s", + my_charset_latin1.csname); + } + return_value= dispatch_command(command, thd, thd->wsrep_retry_query, + thd->wsrep_retry_query_len); + thd->variables.character_set_client = current_charset; + } + + if (thd->wsrep_retry_query && thd->wsrep_conflict_state != REPLAYING) + { + my_free(thd->wsrep_retry_query); + thd->wsrep_retry_query = NULL; + thd->wsrep_retry_query_len = 0; + thd->wsrep_retry_command = COM_CONNECT; + } + } +#endif /* WITH_WSREP */ DBUG_ASSERT(!thd->apc_target.is_enabled()); out: @@ -1027,7 +1143,7 @@ out: @retval FALSE The statement isn't updating any relevant tables. */ -static my_bool deny_updates_if_read_only_option(THD *thd, +my_bool deny_updates_if_read_only_option(THD *thd, TABLE_LIST *all_tables) { DBUG_ENTER("deny_updates_if_read_only_option"); @@ -1081,6 +1197,7 @@ static my_bool deny_updates_if_read_only_option(THD *thd, DBUG_RETURN(FALSE); } + /** Perform one connection-level (COM_XXXX) command. @@ -1110,6 +1227,43 @@ bool dispatch_command(enum enum_server_command command, THD *thd, DBUG_ENTER("dispatch_command"); DBUG_PRINT("info", ("command: %d", command)); +#ifdef WITH_WSREP + if (WSREP(thd)) + { + if (!thd->in_multi_stmt_transaction_mode()) + { + thd->wsrep_PA_safe= true; + } + + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + thd->wsrep_query_state= QUERY_EXEC; + if (thd->wsrep_conflict_state== RETRY_AUTOCOMMIT) + { + thd->wsrep_conflict_state= NO_CONFLICT; + } + if (thd->wsrep_conflict_state== MUST_ABORT) + { + wsrep_client_rollback(thd); + } + if (thd->wsrep_conflict_state== ABORTED) + { + my_error(ER_LOCK_DEADLOCK, MYF(0), "wsrep aborted transaction"); + WSREP_DEBUG("Deadlock error for: %s", thd->query()); + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + thd->killed = NOT_KILLED; + thd->mysys_var->abort = 0; + thd->wsrep_conflict_state = NO_CONFLICT; + thd->wsrep_retry_counter = 0; + /* + Increment threads running to compensate dec_thread_running() called + after dispatch_end label. + */ + inc_thread_running(); + goto dispatch_end; + } + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } +#endif /* WITH_WSREP */ #if defined(ENABLED_PROFILING) thd->profiling.start_new_query(); #endif @@ -1306,7 +1460,12 @@ bool dispatch_command(enum enum_server_command command, THD *thd, if (parser_state.init(thd, thd->query(), thd->query_length())) break; - mysql_parse(thd, thd->query(), thd->query_length(), &parser_state); +#ifdef WITH_WSREP + if (WSREP_ON) + wsrep_mysql_parse(thd, thd->query(), thd->query_length(), &parser_state); + else +#endif /* WITH_WSREP */ + mysql_parse(thd, thd->query(), thd->query_length(), &parser_state); while (!thd->killed && (parser_state.m_lip.found_semicolon != NULL) && ! thd->is_error()) @@ -1380,10 +1539,20 @@ bool dispatch_command(enum enum_server_command command, THD *thd, Count each statement from the client. */ statistic_increment(thd->status_var.questions, &LOCK_status); - thd->set_time(); /* Reset the query start time. */ + + if(IF_WSREP(!WSREP(thd), 1)) + thd->set_time(); /* Reset the query start time. */ + parser_state.reset(beginning_of_next_stmt, length); /* TODO: set thd->lex->sql_command to SQLCOM_END here */ - mysql_parse(thd, beginning_of_next_stmt, length, &parser_state); + +#ifdef WITH_WSREP + if (WSREP_ON) + wsrep_mysql_parse(thd, beginning_of_next_stmt, length, &parser_state); + else +#endif /* WITH_WSREP */ + mysql_parse(thd, beginning_of_next_stmt, length, &parser_state); + } DBUG_PRINT("info",("query ready")); @@ -1720,15 +1889,39 @@ bool dispatch_command(enum enum_server_command command, THD *thd, my_message(ER_UNKNOWN_COM_ERROR, ER(ER_UNKNOWN_COM_ERROR), MYF(0)); break; } - DBUG_ASSERT(thd->derived_tables == NULL && - (thd->open_tables == NULL || +#ifdef WITH_WSREP + dispatch_end: + + if (WSREP(thd)) + { + /* wsrep BF abort in query exec phase */ + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + if ((thd->wsrep_conflict_state != REPLAYING) && + (thd->wsrep_conflict_state != RETRY_AUTOCOMMIT)) + { + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + thd->update_server_status(); + thd->protocol->end_statement(); + query_cache_end_of_result(thd); + } + else + { + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } + } + else +#endif /* WITH_WSREP */ + { + DBUG_ASSERT(thd->derived_tables == NULL && + (thd->open_tables == NULL || (thd->locked_tables_mode == LTM_LOCK_TABLES))); - thd_proc_info(thd, "updating status"); - /* Finalize server status flags after executing a command. */ - thd->update_server_status(); - thd->protocol->end_statement(); - query_cache_end_of_result(thd); + thd_proc_info(thd, "updating status"); + /* Finalize server status flags after executing a command. */ + thd->update_server_status(); + thd->protocol->end_statement(); + query_cache_end_of_result(thd); + } if (!thd->is_error() && !thd->killed_errno()) mysql_audit_general(thd, MYSQL_AUDIT_GENERAL_RESULT, 0, 0); @@ -2381,7 +2574,47 @@ mysql_execute_command(THD *thd) #ifdef HAVE_REPLICATION } /* endif unlikely slave */ #endif +#ifdef WITH_WSREP + if (WSREP(thd)) + { + /* + change LOCK TABLE WRITE to transaction + */ + if (lex->sql_command== SQLCOM_LOCK_TABLES && wsrep_convert_LOCK_to_trx) + { + for (TABLE_LIST *table= all_tables; table; table= table->next_global) + { + if (table->lock_type >= TL_WRITE_ALLOW_WRITE) + { + lex->sql_command= SQLCOM_BEGIN; + thd->wsrep_converted_lock_session= true; + break; + } + } + } + if (lex->sql_command== SQLCOM_UNLOCK_TABLES && + thd->wsrep_converted_lock_session) + { + thd->wsrep_converted_lock_session= false; + lex->sql_command= SQLCOM_COMMIT; + lex->tx_release= TVL_NO; + } + /* + * bail out if DB snapshot has not been installed. We however, + * allow SET and SHOW queries + */ + if (thd->variables.wsrep_on && !thd->wsrep_applier && !wsrep_ready && + lex->sql_command != SQLCOM_SET_OPTION && + !wsrep_is_show_query(lex->sql_command)) + { + my_message(ER_UNKNOWN_COM_ERROR, + "WSREP has not yet prepared node for application use", + MYF(0)); + goto error; + } + } +#endif /* WITH_WSREP */ status_var_increment(thd->status_var.com_stat[lex->sql_command]); thd->progress.report_to_client= MY_TEST(sql_command_flags[lex->sql_command] & CF_REPORT_PROGRESS); @@ -2423,12 +2656,16 @@ mysql_execute_command(THD *thd) { /* Commit the normal transaction if one is active. */ if (trans_commit_implicit(thd)) + { + thd->mdl_context.release_transactional_locks(); + WSREP_DEBUG("implicit commit failed, MDL released: %lu", thd->thread_id); goto error; + } /* Release metadata locks acquired in this transaction. */ thd->mdl_context.release_transactional_locks(); } } - + #ifndef DBUG_OFF if (lex->sql_command != SQLCOM_SET_OPTION) DEBUG_SYNC(thd,"before_execute_sql_command"); @@ -2481,6 +2718,10 @@ mysql_execute_command(THD *thd) #endif case SQLCOM_SHOW_STATUS: { +#ifdef WITH_WSREP + if (WSREP_CLIENT(thd) && wsrep_sync_wait(thd)) + goto error; +#endif /* WITH_WSREP */ execute_show_status(thd, all_tables); break; } @@ -2513,6 +2754,11 @@ mysql_execute_command(THD *thd) } case SQLCOM_SHOW_STATUS_PROC: case SQLCOM_SHOW_STATUS_FUNC: +#ifdef WITH_WSREP + if (WSREP_CLIENT(thd) && wsrep_sync_wait(thd)) + goto error; +#endif /* WITH_WSREP */ + case SQLCOM_SHOW_DATABASES: case SQLCOM_SHOW_TABLES: case SQLCOM_SHOW_TRIGGERS: @@ -2521,16 +2767,27 @@ mysql_execute_command(THD *thd) case SQLCOM_SHOW_PLUGINS: case SQLCOM_SHOW_FIELDS: case SQLCOM_SHOW_KEYS: +#ifndef WITH_WSREP case SQLCOM_SHOW_VARIABLES: case SQLCOM_SHOW_CHARSETS: case SQLCOM_SHOW_COLLATIONS: case SQLCOM_SHOW_STORAGE_ENGINES: case SQLCOM_SHOW_PROFILE: +#endif /* WITH_WSREP */ case SQLCOM_SHOW_CLIENT_STATS: case SQLCOM_SHOW_USER_STATS: case SQLCOM_SHOW_TABLE_STATS: case SQLCOM_SHOW_INDEX_STATS: case SQLCOM_SELECT: +#ifdef WITH_WSREP + if (WSREP_CLIENT(thd) && wsrep_sync_wait(thd)) + goto error; + case SQLCOM_SHOW_VARIABLES: + case SQLCOM_SHOW_CHARSETS: + case SQLCOM_SHOW_COLLATIONS: + case SQLCOM_SHOW_STORAGE_ENGINES: + case SQLCOM_SHOW_PROFILE: +#endif /* WITH_WSREP */ { thd->status_var.last_query_cost= 0.0; @@ -2554,7 +2811,7 @@ mysql_execute_command(THD *thd) res= execute_sqlcom_select(thd, all_tables); break; } -case SQLCOM_PREPARE: + case SQLCOM_PREPARE: { mysql_sql_stmt_prepare(thd); break; @@ -2888,7 +3145,7 @@ case SQLCOM_PREPARE: */ if(lex->ignore) lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_CREATE_IGNORE_SELECT); - + if(lex->duplicates == DUP_REPLACE) lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_CREATE_REPLACE_SELECT); @@ -2900,9 +3157,9 @@ case SQLCOM_PREPARE: raise a warning, as it may cause problems (see 'NAME_CONST issues' in 'Binary Logging of Stored Programs') */ - if (thd->query_name_consts && + if (thd->query_name_consts && mysql_bin_log.is_open() && - thd->variables.binlog_format == BINLOG_FORMAT_STMT && + WSREP_FORMAT(thd->variables.binlog_format) == BINLOG_FORMAT_STMT && !mysql_bin_log.is_query_in_union(thd, thd->query_id)) { List_iterator_fast<Item> it(select_lex->item_list); @@ -3017,6 +3274,15 @@ case SQLCOM_PREPARE: } else { +#ifdef WITH_WSREP + /* in STATEMENT format, we probably have to replicate also temporary + tables, like mysql replication does + */ + if (WSREP_ON && (!thd->is_current_stmt_binlog_format_row() || + !(create_info.options & HA_LEX_CREATE_TMP_TABLE))) + WSREP_TO_ISOLATION_BEGIN(create_table->db, create_table->table_name, + NULL) +#endif /* WITH_WSREP */ /* Regular CREATE TABLE */ res= mysql_create_table(thd, create_table, &create_info, &alter_info); @@ -3055,6 +3321,7 @@ end_with_restore_list: DBUG_ASSERT(first_table == all_tables && first_table != 0); if (check_one_table_access(thd, INDEX_ACL, all_tables)) goto error; /* purecov: inspected */ + WSREP_TO_ISOLATION_BEGIN(first_table->db, first_table->table_name, NULL) /* Currently CREATE INDEX or DROP INDEX cause a full table rebuild and thus classify as slow administrative statements just like @@ -3172,6 +3439,7 @@ end_with_restore_list: #endif /* HAVE_REPLICATION */ case SQLCOM_RENAME_TABLE: { + WSREP_TO_ISOLATION_BEGIN(0, 0, first_table) if (execute_rename_table(thd, first_table, all_tables)) goto error; break; @@ -3192,13 +3460,19 @@ end_with_restore_list: #endif #endif /* EMBEDDED_LIBRARY */ case SQLCOM_SHOW_CREATE: + { DBUG_ASSERT(first_table == all_tables && first_table != 0); #ifdef DONT_ALLOW_SHOW_COMMANDS my_message(ER_NOT_ALLOWED_COMMAND, ER(ER_NOT_ALLOWED_COMMAND), MYF(0)); /* purecov: inspected */ goto error; #else - { + +#ifdef WITH_WSREP + if (WSREP_CLIENT(thd) && wsrep_sync_wait(thd)) + goto error; +#endif /* WITH_WSREP */ + /* Access check: SHOW CREATE TABLE require any privileges on the table level (ie @@ -3256,11 +3530,16 @@ end_with_restore_list: /* Access is granted. Execute the command. */ res= mysqld_show_create(thd, first_table); break; - } #endif + } case SQLCOM_CHECKSUM: { DBUG_ASSERT(first_table == all_tables && first_table != 0); +#ifdef WITH_WSREP + if (WSREP_CLIENT(thd) && wsrep_sync_wait(thd)) + goto error; +#endif /* WITH_WSREP */ + if (check_table_access(thd, SELECT_ACL, all_tables, FALSE, UINT_MAX, FALSE)) goto error; /* purecov: inspected */ @@ -3272,6 +3551,12 @@ end_with_restore_list: { ha_rows found= 0, updated= 0; DBUG_ASSERT(first_table == all_tables && first_table != 0); +#ifdef WITH_WSREP + if (WSREP_CLIENT(thd) && + wsrep_sync_wait(thd, WSREP_SYNC_WAIT_BEFORE_UPDATE_DELETE)) + goto error; +#endif /* WITH_WSREP */ + if (update_precheck(thd, all_tables)) break; @@ -3308,6 +3593,11 @@ end_with_restore_list: /* if we switched from normal update, rights are checked */ if (up_result != 2) { +#ifdef WITH_WSREP + if (WSREP_CLIENT(thd) && + wsrep_sync_wait(thd, WSREP_SYNC_WAIT_BEFORE_UPDATE_DELETE)) + goto error; +#endif /* WITH_WSREP */ if ((res= multi_update_precheck(thd, all_tables))) break; } @@ -3377,6 +3667,12 @@ end_with_restore_list: break; } case SQLCOM_REPLACE: + { +#ifdef WITH_WSREP + if (WSREP_CLIENT(thd) && + wsrep_sync_wait(thd, WSREP_SYNC_WAIT_BEFORE_INSERT_REPLACE)) + goto error; +#endif /* WITH_WSREP */ #ifndef DBUG_OFF if (mysql_bin_log.is_open()) { @@ -3411,10 +3707,17 @@ end_with_restore_list: DBUG_PRINT("debug", ("Just after generate_incident()")); } #endif + } case SQLCOM_INSERT: { DBUG_ASSERT(first_table == all_tables && first_table != 0); +#ifdef WITH_WSREP + if (WSREP_CLIENT(thd) && + wsrep_sync_wait(thd, WSREP_SYNC_WAIT_BEFORE_INSERT_REPLACE)) + goto error; +#endif /* WITH_WSREP */ + /* Since INSERT DELAYED doesn't support temporary tables, we could not pre-open temporary tables for SQLCOM_INSERT / SQLCOM_REPLACE. @@ -3469,8 +3772,22 @@ end_with_restore_list: select_result *sel_result; bool explain= MY_TEST(lex->describe); DBUG_ASSERT(first_table == all_tables && first_table != 0); +#ifdef WITH_WSREP + if (WSREP_CLIENT(thd) && + wsrep_sync_wait(thd, WSREP_SYNC_WAIT_BEFORE_INSERT_REPLACE)) + goto error; +#endif /* WITH_WSREP */ + if ((res= insert_precheck(thd, all_tables))) break; +#ifdef WITH_WSREP + if (WSREP(thd) && thd->wsrep_consistency_check == CONSISTENCY_CHECK_DECLARED) + { + thd->wsrep_consistency_check = CONSISTENCY_CHECK_RUNNING; + WSREP_TO_ISOLATION_BEGIN(first_table->db, first_table->table_name, NULL); + } +#endif /* WITH_WSREP */ + /* INSERT...SELECT...ON DUPLICATE KEY UPDATE/REPLACE SELECT/ INSERT...IGNORE...SELECT can be unsafe, unless ORDER BY PRIMARY KEY @@ -3561,6 +3878,12 @@ end_with_restore_list: { select_result *sel_result=lex->result; DBUG_ASSERT(first_table == all_tables && first_table != 0); +#ifdef WITH_WSREP + if (WSREP_CLIENT(thd) && + wsrep_sync_wait(thd, WSREP_SYNC_WAIT_BEFORE_UPDATE_DELETE)) + goto error; +#endif /* WITH_WSREP */ + if ((res= delete_precheck(thd, all_tables))) break; DBUG_ASSERT(select_lex->offset_limit == 0); @@ -3617,6 +3940,11 @@ end_with_restore_list: DBUG_ASSERT(first_table == all_tables && first_table != 0); TABLE_LIST *aux_tables= thd->lex->auxiliary_table_list.first; multi_delete *result; +#ifdef WITH_WSREP + if (WSREP_CLIENT(thd) && + wsrep_sync_wait(thd, WSREP_SYNC_WAIT_BEFORE_UPDATE_DELETE)) + goto error; +#endif /* WITH_WSREP */ if ((res= multi_delete_precheck(thd, all_tables))) break; @@ -3687,6 +4015,21 @@ end_with_restore_list: /* So that DROP TEMPORARY TABLE gets to binlog at commit/rollback */ thd->variables.option_bits|= OPTION_KEEP_LOG; } +#ifdef WITH_WSREP + if (WSREP(thd)) + { + for (TABLE_LIST *table= all_tables; table; table= table->next_global) + { + if (!lex->drop_temporary && + (!thd->is_current_stmt_binlog_format_row() || + !find_temporary_table(thd, table))) + { + WSREP_TO_ISOLATION_BEGIN(NULL, NULL, all_tables); + break; + } + } + } +#endif /* WITH_WSREP */ /* If we are a slave, we should add IF EXISTS if the query executed on the master without an error. This will help a slave to @@ -3700,8 +4043,8 @@ end_with_restore_list: /* DDL and binlog write order are protected by metadata locks. */ res= mysql_rm_table(thd, first_table, lex->check_exists, lex->drop_temporary); + break; } - break; case SQLCOM_SHOW_PROCESSLIST: if (!thd->security_ctx->priv_user[0] && check_global_access(thd,PROCESS_ACL)) @@ -3891,6 +4234,7 @@ end_with_restore_list: #endif if (check_access(thd, CREATE_ACL, lex->name.str, NULL, NULL, 1, 0)) break; + WSREP_TO_ISOLATION_BEGIN(lex->name.str, NULL, NULL) res= mysql_create_db(thd, lex->name.str, &create_info, 0); break; } @@ -3922,6 +4266,7 @@ end_with_restore_list: #endif if (check_access(thd, DROP_ACL, lex->name.str, NULL, NULL, 1, 0)) break; + WSREP_TO_ISOLATION_BEGIN(lex->name.str, NULL, NULL) res= mysql_rm_db(thd, lex->name.str, lex->check_exists, 0); break; } @@ -3953,6 +4298,7 @@ end_with_restore_list: res= 1; break; } + WSREP_TO_ISOLATION_BEGIN(db->str, NULL, NULL) res= mysql_upgrade_db(thd, db); if (!res) my_ok(thd); @@ -3988,6 +4334,7 @@ end_with_restore_list: #endif if (check_access(thd, ALTER_ACL, db->str, NULL, NULL, 1, 0)) break; + WSREP_TO_ISOLATION_BEGIN(db->str, NULL, NULL) res= mysql_alter_db(thd, db->str, &create_info); break; } @@ -4026,6 +4373,7 @@ end_with_restore_list: if (res) break; + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) switch (lex->sql_command) { case SQLCOM_CREATE_EVENT: { @@ -4060,6 +4408,7 @@ end_with_restore_list: lex->spname->m_name); break; case SQLCOM_DROP_EVENT: + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) if (!(res= Events::drop_event(thd, lex->spname->m_db, lex->spname->m_name, lex->check_exists))) @@ -4074,6 +4423,7 @@ end_with_restore_list: if (check_access(thd, INSERT_ACL, "mysql", NULL, NULL, 1, 0)) break; #ifdef HAVE_DLOPEN + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) if (!(res = mysql_create_function(thd, &lex->udf))) my_ok(thd); #else @@ -4089,6 +4439,7 @@ end_with_restore_list: if (check_access(thd, INSERT_ACL, "mysql", NULL, NULL, 1, 1) && check_global_access(thd,CREATE_USER_ACL)) break; + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) /* Conditionally writes to binlog */ if (!(res= mysql_create_user(thd, lex->users_list, lex->sql_command == SQLCOM_CREATE_ROLE))) @@ -4102,6 +4453,7 @@ end_with_restore_list: check_global_access(thd,CREATE_USER_ACL)) break; /* Conditionally writes to binlog */ + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) if (!(res= mysql_drop_user(thd, lex->users_list, lex->sql_command == SQLCOM_DROP_ROLE))) my_ok(thd); @@ -4113,6 +4465,7 @@ end_with_restore_list: check_global_access(thd,CREATE_USER_ACL)) break; /* Conditionally writes to binlog */ + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) if (!(res= mysql_rename_user(thd, lex->users_list))) my_ok(thd); break; @@ -4124,6 +4477,7 @@ end_with_restore_list: break; /* Conditionally writes to binlog */ + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) if (!(res = mysql_revoke_all(thd, lex->users_list))) my_ok(thd); break; @@ -4206,6 +4560,7 @@ end_with_restore_list: lex->type == TYPE_ENUM_PROCEDURE, 0)) goto error; /* Conditionally writes to binlog */ + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) res= mysql_routine_grant(thd, all_tables, lex->type == TYPE_ENUM_PROCEDURE, lex->users_list, grants, @@ -4219,6 +4574,7 @@ end_with_restore_list: all_tables, FALSE, UINT_MAX, FALSE)) goto error; /* Conditionally writes to binlog */ + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) res= mysql_table_grant(thd, all_tables, lex->users_list, lex->columns, lex->grant, lex->sql_command == SQLCOM_REVOKE); @@ -4234,6 +4590,7 @@ end_with_restore_list: } else { + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) /* Conditionally writes to binlog */ res= mysql_grant(thd, select_lex->db, lex->users_list, lex->grant, lex->sql_command == SQLCOM_REVOKE, @@ -4402,6 +4759,7 @@ end_with_restore_list: able to open it (with SQLCOM_HA_OPEN) in the first place. */ unit->set_limit(select_lex); + res= mysql_ha_read(thd, first_table, lex->ha_read_mode, lex->ident.str, lex->insert_list, lex->ha_rkey_mode, select_lex->where, unit->select_limit_cnt, unit->offset_limit_cnt); @@ -4410,7 +4768,11 @@ end_with_restore_list: case SQLCOM_BEGIN: DBUG_PRINT("info", ("Executing SQLCOM_BEGIN thd: %p", thd)); if (trans_begin(thd, lex->start_transaction_opt)) + { + thd->mdl_context.release_transactional_locks(); + WSREP_DEBUG("BEGIN failed, MDL released: %lu", thd->thread_id); goto error; + } my_ok(thd); break; case SQLCOM_COMMIT: @@ -4424,7 +4786,11 @@ end_with_restore_list: (thd->variables.completion_type == 2 && lex->tx_release != TVL_NO)); if (trans_commit(thd)) + { + thd->mdl_context.release_transactional_locks(); + WSREP_DEBUG("COMMIT failed, MDL released: %lu", thd->thread_id); goto error; + } thd->mdl_context.release_transactional_locks(); /* Begin transaction with the same isolation level. */ if (tx_chain) @@ -4444,7 +4810,17 @@ end_with_restore_list: thd->killed= KILL_CONNECTION; thd->print_aborted_warning(3, "RELEASE"); } - my_ok(thd); +#ifdef WITH_WSREP + if (WSREP(thd)) + { + if (thd->wsrep_conflict_state == NO_CONFLICT || + thd->wsrep_conflict_state == REPLAYING) + { + my_ok(thd); + } + } else +#endif /* WITH_WSREP */ + my_ok(thd); break; } case SQLCOM_ROLLBACK: @@ -4459,7 +4835,11 @@ end_with_restore_list: lex->tx_release != TVL_NO)); if (trans_rollback(thd)) + { + thd->mdl_context.release_transactional_locks(); + WSREP_DEBUG("rollback failed, MDL released: %lu", thd->thread_id); goto error; + } thd->mdl_context.release_transactional_locks(); /* Begin transaction with the same isolation level. */ if (tx_chain) @@ -4476,8 +4856,17 @@ end_with_restore_list: /* Disconnect the current client connection. */ if (tx_release) thd->killed= KILL_CONNECTION; - my_ok(thd); - break; +#ifdef WITH_WSREP + if (WSREP(thd)) + { + if (thd->wsrep_conflict_state == NO_CONFLICT) { + my_ok(thd); + } + } + else +#endif /* WITH_WSREP */ + my_ok(thd); + break; } case SQLCOM_RELEASE_SAVEPOINT: if (trans_release_savepoint(thd, lex->ident)) @@ -4545,6 +4934,7 @@ end_with_restore_list: if (sp_process_definer(thd)) goto create_sp_error; + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) res= (sp_result= sp_create_routine(thd, lex->sphead->m_type, lex->sphead)); switch (sp_result) { case SP_OK: { @@ -4826,6 +5216,7 @@ create_sp_error: if (check_routine_access(thd, ALTER_PROC_ACL, db, name, lex->sql_command == SQLCOM_DROP_PROCEDURE, 0)) goto error; + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) /* Conditionally writes to binlog */ sp_result= sp_drop_routine(thd, type, lex->spname); @@ -4943,6 +5334,7 @@ create_sp_error: Note: SQLCOM_CREATE_VIEW also handles 'ALTER VIEW' commands as specified through the thd->lex->create_view_mode flag. */ + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) res= mysql_create_view(thd, first_table, thd->lex->create_view_mode); break; } @@ -4951,12 +5343,14 @@ create_sp_error: if (check_table_access(thd, DROP_ACL, all_tables, FALSE, UINT_MAX, FALSE)) goto error; /* Conditionally writes to binlog. */ + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) res= mysql_drop_view(thd, first_table, thd->lex->drop_mode); break; } case SQLCOM_CREATE_TRIGGER: { /* Conditionally writes to binlog. */ + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) res= mysql_create_or_drop_trigger(thd, all_tables, 1); break; @@ -4964,6 +5358,7 @@ create_sp_error: case SQLCOM_DROP_TRIGGER: { /* Conditionally writes to binlog. */ + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) res= mysql_create_or_drop_trigger(thd, all_tables, 0); break; } @@ -4984,7 +5379,11 @@ create_sp_error: break; case SQLCOM_XA_COMMIT: if (trans_xa_commit(thd)) + { + thd->mdl_context.release_transactional_locks(); + WSREP_DEBUG("XA commit failed, MDL released: %lu", thd->thread_id); goto error; + } thd->mdl_context.release_transactional_locks(); /* We've just done a commit, reset transaction @@ -4996,7 +5395,11 @@ create_sp_error: break; case SQLCOM_XA_ROLLBACK: if (trans_xa_rollback(thd)) + { + thd->mdl_context.release_transactional_locks(); + WSREP_DEBUG("XA rollback failed, MDL released: %lu", thd->thread_id); goto error; + } thd->mdl_context.release_transactional_locks(); /* We've just done a rollback, reset transaction @@ -5016,11 +5419,13 @@ create_sp_error: my_ok(thd); break; case SQLCOM_INSTALL_PLUGIN: + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) if (! (res= mysql_install_plugin(thd, &thd->lex->comment, &thd->lex->ident))) my_ok(thd); break; case SQLCOM_UNINSTALL_PLUGIN: + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) if (! (res= mysql_uninstall_plugin(thd, &thd->lex->comment, &thd->lex->ident))) my_ok(thd); @@ -5169,6 +5574,9 @@ finish: /* Free tables */ close_thread_tables(thd); +#ifdef WITH_WSREP + thd->wsrep_consistency_check= NO_CONSISTENCY_CHECK; +#endif /* WITH_WSREP */ #ifndef DBUG_OFF if (lex->sql_command != SQLCOM_SET_OPTION && ! thd->in_sub_stmt) @@ -5222,6 +5630,22 @@ finish: { thd->mdl_context.release_statement_locks(); } + WSREP_TO_ISOLATION_END; + +#ifdef WITH_WSREP + /* + Force release of transactional locks if not in active MST and wsrep is on. + */ + if (WSREP(thd) && + ! thd->in_sub_stmt && + ! thd->in_active_multi_stmt_transaction() && + thd->mdl_context.has_transactional_locks()) + { + WSREP_DEBUG("Forcing release of transactional locks for thd %lu", + thd->thread_id); + thd->mdl_context.release_transactional_locks(); + } +#endif /* WITH_WSREP */ DBUG_RETURN(res || thd->is_error()); } @@ -5322,6 +5746,10 @@ static bool execute_sqlcom_select(THD *thd, TABLE_LIST *all_tables) status_var_increment(thd->status_var.empty_queries); else status_var_add(thd->status_var.rows_sent, thd->get_sent_row_count()); +#ifdef WITH_WSREP + if (WSREP_ON && lex->sql_command == SQLCOM_SHOW_STATUS) + wsrep_free_status(thd); +#endif /* WITH_WSREP */ return res; } @@ -6160,6 +6588,27 @@ void THD::reset_for_next_command() thd->auto_inc_intervals_in_cur_stmt_for_binlog.empty(); thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt= 0; +#ifdef WITH_WSREP + /* + Autoinc variables should be adjusted only for locally executed + transactions. Appliers and replayers are either processing ROW + events or get autoinc variable values from Query_log_event. + */ + if (WSREP(thd) && thd->wsrep_exec_mode == LOCAL_STATE) + { + if (wsrep_auto_increment_control) + { + if (thd->variables.auto_increment_offset != + global_system_variables.auto_increment_offset) + thd->variables.auto_increment_offset= + global_system_variables.auto_increment_offset; + if (thd->variables.auto_increment_increment != + global_system_variables.auto_increment_increment) + thd->variables.auto_increment_increment= + global_system_variables.auto_increment_increment; + } + } +#endif /* WITH_WSREP */ thd->query_start_used= 0; thd->query_start_sec_part_used= 0; thd->is_fatal_error= thd->time_zone_used= 0; @@ -6363,6 +6812,109 @@ void mysql_init_multi_delete(LEX *lex) lex->query_tables_last= &lex->query_tables; } +#ifdef WITH_WSREP +static void wsrep_mysql_parse(THD *thd, char *rawbuf, uint length, + Parser_state *parser_state) +{ + bool is_autocommit= + !thd->in_multi_stmt_transaction_mode() && + thd->wsrep_conflict_state == NO_CONFLICT && + !thd->wsrep_applier && + wsrep_read_only_option(thd, thd->lex->query_tables); + + do + { + if (thd->wsrep_conflict_state== RETRY_AUTOCOMMIT) + { + thd->wsrep_conflict_state= NO_CONFLICT; + /* Performance Schema Interface instrumentation, begin */ + thd->m_statement_psi= MYSQL_REFINE_STATEMENT(thd->m_statement_psi, + com_statement_info[thd->get_command()].m_key); + MYSQL_SET_STATEMENT_TEXT(thd->m_statement_psi, thd->query(), + thd->query_length()); + } + mysql_parse(thd, rawbuf, length, parser_state); + + if (WSREP(thd)) { + /* wsrep BF abort in query exec phase */ + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + if (thd->wsrep_conflict_state == MUST_ABORT) { + wsrep_client_rollback(thd); + + WSREP_DEBUG("abort in exec query state, avoiding autocommit"); + } + + if (thd->wsrep_conflict_state== MUST_REPLAY) + { + wsrep_replay_transaction(thd); + } + + /* setting error code for BF aborted trxs */ + if (thd->wsrep_conflict_state == ABORTED || + thd->wsrep_conflict_state == CERT_FAILURE) + { + mysql_reset_thd_for_next_command(thd); + thd->killed= NOT_KILLED; + if (is_autocommit && + thd->lex->sql_command != SQLCOM_SELECT && + (thd->wsrep_retry_counter < thd->variables.wsrep_retry_autocommit)) + { + WSREP_DEBUG("wsrep retrying AC query: %s", + (thd->query()) ? thd->query() : "void"); + + /* Performance Schema Interface instrumentation, end */ + MYSQL_END_STATEMENT(thd->m_statement_psi, thd->get_stmt_da()); + thd->m_statement_psi= NULL; + close_thread_tables(thd); + + thd->wsrep_conflict_state= RETRY_AUTOCOMMIT; + thd->wsrep_retry_counter++; // grow + wsrep_copy_query(thd); + thd->set_time(); + parser_state->reset(rawbuf, length); + } + else + { + WSREP_DEBUG("%s, thd: %lu is_AC: %d, retry: %lu - %lu SQL: %s", + (thd->wsrep_conflict_state == ABORTED) ? + "BF Aborted" : "cert failure", + thd->thread_id, is_autocommit, thd->wsrep_retry_counter, + thd->variables.wsrep_retry_autocommit, thd->query()); + my_error(ER_LOCK_DEADLOCK, MYF(0), "wsrep aborted transaction"); + thd->killed= NOT_KILLED; + thd->wsrep_conflict_state= NO_CONFLICT; + if (thd->wsrep_conflict_state != REPLAYING) + thd->wsrep_retry_counter= 0; // reset + } + } + else + { + set_if_smaller(thd->wsrep_retry_counter, 0); // reset; eventually ok + } + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } + + /* If retry is requested clean up explain structure */ + if (thd->wsrep_conflict_state == RETRY_AUTOCOMMIT && thd->lex->explain) + delete_explain_query(thd->lex); + + } while (thd->wsrep_conflict_state== RETRY_AUTOCOMMIT); + + if (thd->wsrep_retry_query) + { + WSREP_DEBUG("releasing retry_query: conf %d sent %d kill %d errno %d SQL %s", + thd->wsrep_conflict_state, + thd->get_stmt_da()->is_sent(), + thd->killed, + thd->get_stmt_da()->is_error() ? thd->get_stmt_da()->sql_errno() : 0, + thd->wsrep_retry_query); + my_free(thd->wsrep_retry_query); + thd->wsrep_retry_query = NULL; + thd->wsrep_retry_query_len = 0; + thd->wsrep_retry_command = COM_CONNECT; + } +} +#endif /* WITH_WSREP */ /* When you modify mysql_parse(), you may need to mofify @@ -7392,8 +7944,9 @@ kill_one_thread(THD *thd, longlong id, killed_state kill_signal, killed_type typ faster and do a harder kill than KILL_SYSTEM_THREAD; */ - if ((thd->security_ctx->master_access & SUPER_ACL) || - thd->security_ctx->user_matches(tmp->security_ctx)) + if (((thd->security_ctx->master_access & SUPER_ACL) || + thd->security_ctx->user_matches(tmp->security_ctx)) && + IF_WSREP(!wsrep_thd_is_BF((void *)tmp, true), 1)) { tmp->awake(kill_signal); error=0; diff --git a/sql/sql_parse.h b/sql/sql_parse.h index 926a4d800ad..87f8854e050 100644 --- a/sql/sql_parse.h +++ b/sql/sql_parse.h @@ -203,5 +203,4 @@ inline bool is_supported_parser_charset(CHARSET_INFO *cs) return MY_TEST(cs->mbminlen == 1); } - #endif /* SQL_PARSE_INCLUDED */ diff --git a/sql/sql_partition_admin.cc b/sql/sql_partition_admin.cc index 8c59febeb77..9db8b1c136a 100644 --- a/sql/sql_partition_admin.cc +++ b/sql/sql_partition_admin.cc @@ -1,4 +1,5 @@ /* Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2014, SkySQL Ab. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -763,6 +764,23 @@ bool Sql_cmd_alter_table_truncate_partition::execute(THD *thd) if (check_one_table_access(thd, DROP_ACL, first_table)) DBUG_RETURN(TRUE); +#ifdef WITH_WSREP + if (WSREP_ON) + { + TABLE *find_temporary_table(THD *thd, const TABLE_LIST *tl); + + if ((!thd->is_current_stmt_binlog_format_row() || + !find_temporary_table(thd, first_table)) && + wsrep_to_isolation_begin( + thd, first_table->db, first_table->table_name, NULL) + ) + { + WSREP_WARN("ALTER TABLE isolation failure"); + DBUG_RETURN(TRUE); + } + } +#endif /* WITH_WSREP */ + if (open_tables(thd, &first_table, &table_counter, 0)) DBUG_RETURN(true); diff --git a/sql/sql_plugin.cc b/sql/sql_plugin.cc index b2dd38ad720..ea9748c8cb2 100644 --- a/sql/sql_plugin.cc +++ b/sql/sql_plugin.cc @@ -3087,15 +3087,19 @@ void plugin_thdvar_init(THD *thd) thd->variables.dynamic_variables_size= 0; thd->variables.dynamic_variables_ptr= 0; - mysql_mutex_lock(&LOCK_plugin); - thd->variables.table_plugin= + if (IF_WSREP((!WSREP(thd) || !thd->wsrep_applier),1)) + { + mysql_mutex_lock(&LOCK_plugin); + thd->variables.table_plugin= intern_plugin_lock(NULL, global_system_variables.table_plugin); - if (global_system_variables.tmp_table_plugin) - thd->variables.tmp_table_plugin= + if (global_system_variables.tmp_table_plugin) + thd->variables.tmp_table_plugin= intern_plugin_lock(NULL, global_system_variables.tmp_table_plugin); - intern_plugin_unlock(NULL, old_table_plugin); - intern_plugin_unlock(NULL, old_tmp_table_plugin); - mysql_mutex_unlock(&LOCK_plugin); + intern_plugin_unlock(NULL, old_table_plugin); + intern_plugin_unlock(NULL, old_tmp_table_plugin); + mysql_mutex_unlock(&LOCK_plugin); + } + DBUG_VOID_RETURN; } diff --git a/sql/sql_prepare.cc b/sql/sql_prepare.cc index ebceae70ee5..d9365e14494 100644 --- a/sql/sql_prepare.cc +++ b/sql/sql_prepare.cc @@ -1,5 +1,5 @@ /* Copyright (c) 2002, 2013, Oracle and/or its affiliates. - Copyright (c) 2008, 2013, Monty Program Ab + Copyright (c) 2008, 2014, Monty Program Ab This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -117,6 +117,7 @@ When one supplies long data for a placeholder: #include "lock.h" // MYSQL_OPEN_FORCE_SHARED_MDL #include "sql_handler.h" #include "transaction.h" // trans_rollback_implicit +#include "wsrep_mysqld.h" /** A result class used to send cursor rows using the binary protocol. @@ -3626,6 +3627,27 @@ reexecute: error= execute(expanded_query, open_cursor) || thd->is_error(); thd->m_reprepare_observer= NULL; +#ifdef WITH_WSREP + + if (WSREP_ON) + { + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + switch (thd->wsrep_conflict_state) + { + case CERT_FAILURE: + WSREP_DEBUG("PS execute fail for CERT_FAILURE: thd: %ld err: %d", + thd->thread_id, thd->get_stmt_da()->sql_errno() ); + thd->wsrep_conflict_state = NO_CONFLICT; + break; + + case MUST_REPLAY: + (void)wsrep_replay_transaction(thd); + default: + break; + } + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } +#endif /* WITH_WSREP */ if ((sql_command_flags[lex->sql_command] & CF_REEXECUTION_FRAGILE) && error && !thd->is_fatal_error && !thd->killed && diff --git a/sql/sql_reload.cc b/sql/sql_reload.cc index bb3d5bb899a..f1f9ba9452f 100644 --- a/sql/sql_reload.cc +++ b/sql/sql_reload.cc @@ -253,7 +253,18 @@ bool reload_acl_and_cache(THD *thd, unsigned long long options, } if (options & REFRESH_CHECKPOINT) disable_checkpoints(thd); - } +#ifdef WITH_WSREP + /* + We need to do it second time after wsrep appliers were blocked in + make_global_read_lock_block_commit(thd) above since they could have + modified the tables too. + */ + if (WSREP(thd) && + close_cached_tables(thd, tables, (options & REFRESH_FAST) ? + FALSE : TRUE, TRUE)) + result= 1; +#endif /* WITH_WSREP */ + } else { if (thd && thd->locked_tables_mode) diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc index c7bd28259ae..d8db5c55c3b 100644 --- a/sql/sql_repl.cc +++ b/sql/sql_repl.cc @@ -1,5 +1,5 @@ /* Copyright (c) 2000, 2013, Oracle and/or its affiliates. - Copyright (c) 2008, 2013, Monty Program Ab + Copyright (c) 2008, 2014, Monty Program Ab This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -2946,6 +2946,7 @@ int start_slave(THD* thd , Master_info* mi, bool net_report) err: unlock_slave_threads(mi); + thd_proc_info(thd, 0); if (slave_errno) { diff --git a/sql/sql_show.cc b/sql/sql_show.cc index 79c444ea442..bb8ae16189a 100644 --- a/sql/sql_show.cc +++ b/sql/sql_show.cc @@ -2931,11 +2931,39 @@ static bool show_status_array(THD *thd, const char *wild, *prefix_end++= '_'; len=name_buffer + sizeof(name_buffer) - prefix_end; +#ifdef WITH_WSREP + bool is_wsrep_var= FALSE; + /* + This is a workaround for lp:1306875 (PBX) to skip switching of wsrep + status variable name's first letter to uppercase. This is an optimization + for status variables defined under wsrep plugin. + TODO: remove once lp:1306875 has been addressed. + */ + if (*prefix && !my_strcasecmp(system_charset_info, prefix, "wsrep")) + { + is_wsrep_var= TRUE; + } +#endif /* WITH_WSREP */ + for (; variables->name; variables++) { bool wild_checked; strnmov(prefix_end, variables->name, len); name_buffer[sizeof(name_buffer)-1]=0; /* Safety */ + +#ifdef WITH_WSREP + /* + If the prefix is NULL, that means we are looking into the status variables + defined directly under mysqld.cc. Do not capitalize wsrep status variable + names until lp:1306875 has been fixed. + TODO: remove once lp:1306875 has been addressed. + */ + if (!(*prefix) && !strncasecmp(name_buffer, "wsrep", strlen("wsrep"))) + { + is_wsrep_var= TRUE; + } +#endif /* WITH_WSREP */ + if (ucase_names) my_caseup_str(system_charset_info, name_buffer); else @@ -2944,8 +2972,9 @@ static bool show_status_array(THD *thd, const char *wild, DBUG_ASSERT(name_buffer[0] >= 'a'); DBUG_ASSERT(name_buffer[0] <= 'z'); - /* traditionally status variables have a first letter uppercased */ - if (status_var) + // WSREP_TODO: remove once lp:1306875 has been addressed. + if (IF_WSREP(is_wsrep_var == FALSE, 1) && + status_var) name_buffer[0]-= 'a' - 'A'; } diff --git a/sql/sql_table.cc b/sql/sql_table.cc index 80ac0978834..b86de2f6cc1 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -5264,6 +5264,12 @@ bool mysql_create_like_table(THD* thd, TABLE_LIST* table, int create_res; DBUG_ENTER("mysql_create_like_table"); +#ifdef WITH_WSREP + if (WSREP_ON && !thd->wsrep_applier && + wsrep_create_like_table(thd, table, src_table, create_info)) + goto end; +#endif + /* We the open source table to get its description in HA_CREATE_INFO and Alter_info objects. This also acquires a shared metadata lock @@ -5525,6 +5531,8 @@ err: thd->query_length(), is_trans)) res= 1; } + +end: DBUG_RETURN(res); } @@ -8117,6 +8125,7 @@ simple_rename_or_index_change(THD *thd, TABLE_LIST *table_list, if (!error) { error= write_bin_log(thd, TRUE, thd->query(), thd->query_length()); + if (!error) my_ok(thd); } diff --git a/sql/sql_trigger.cc b/sql/sql_trigger.cc index 70ac6265046..268fad5d90e 100644 --- a/sql/sql_trigger.cc +++ b/sql/sql_trigger.cc @@ -434,7 +434,8 @@ bool mysql_create_or_drop_trigger(THD *thd, TABLE_LIST *tables, bool create) binlogged, so they share the same danger, so trust_function_creators applies to them too. */ - if (!trust_function_creators && mysql_bin_log.is_open() && + if (!trust_function_creators && + IF_WSREP((WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()), mysql_bin_log.is_open()) && !(thd->security_ctx->master_access & SUPER_ACL)) { my_error(ER_BINLOG_CREATE_ROUTINE_NEED_SUPER, MYF(0)); @@ -2437,3 +2438,4 @@ bool load_table_name_for_trigger(THD *thd, DBUG_RETURN(FALSE); } + diff --git a/sql/sql_truncate.cc b/sql/sql_truncate.cc index e98679b1d51..5e23c6ee32b 100644 --- a/sql/sql_truncate.cc +++ b/sql/sql_truncate.cc @@ -24,6 +24,7 @@ #include "sql_acl.h" // DROP_ACL #include "sql_parse.h" // check_one_table_access() #include "sql_truncate.h" +#include "wsrep_mysqld.h" #include "sql_show.h" //append_identifier() @@ -411,6 +412,12 @@ bool Sql_cmd_truncate_table::truncate_table(THD *thd, TABLE_LIST *table_ref) { bool hton_can_recreate; +#ifdef WITH_WSREP + if (WSREP(thd) && wsrep_to_isolation_begin(thd, + table_ref->db, + table_ref->table_name, NULL)) + DBUG_RETURN(TRUE); +#endif /* WITH_WSREP */ if (lock_table(thd, table_ref, &hton_can_recreate)) DBUG_RETURN(TRUE); diff --git a/sql/sql_update.cc b/sql/sql_update.cc index faf8e4c61d2..c458f01303d 100644 --- a/sql/sql_update.cc +++ b/sql/sql_update.cc @@ -1,5 +1,5 @@ /* Copyright (c) 2000, 2013, Oracle and/or its affiliates. - Copyright (c) 2011, 2013, Monty Program Ab. + Copyright (c) 2011, 2014, Monty Program Ab. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -976,7 +976,8 @@ int mysql_update(THD *thd, */ if ((error < 0) || thd->transaction.stmt.modified_non_trans_table) { - if (mysql_bin_log.is_open()) + if (IF_WSREP((WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()), + mysql_bin_log.is_open())) { int errcode= 0; if (error < 0) @@ -2219,7 +2220,8 @@ void multi_update::abort_result_set() The query has to binlog because there's a modified non-transactional table either from the query's list or via a stored routine: bug#13270,23333 */ - if (mysql_bin_log.is_open()) + if (IF_WSREP((WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()), + mysql_bin_log.is_open())) { /* THD::killed status might not have been set ON at time of an error @@ -2488,7 +2490,8 @@ bool multi_update::send_eof() if (local_error == 0 || thd->transaction.stmt.modified_non_trans_table) { - if (mysql_bin_log.is_open()) + if (IF_WSREP((WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()), + mysql_bin_log.is_open())) { int errcode= 0; if (local_error == 0) diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index a261d611aa6..a06d702f620 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -7156,7 +7156,7 @@ alter: } view_tail {} - | ALTER definer_opt EVENT_SYM sp_name + | ALTER definer_opt remember_name EVENT_SYM sp_name { /* It is safe to use Lex->spname because @@ -7168,9 +7168,12 @@ alter: if (!(Lex->event_parse_data= Event_parse_data::new_instance(thd))) MYSQL_YYABORT; - Lex->event_parse_data->identifier= $4; + Lex->event_parse_data->identifier= $5; Lex->sql_command= SQLCOM_ALTER_EVENT; +#ifdef WITH_WSREP + Lex->stmt_definition_begin= $3; +#endif } ev_alter_on_schedule_completion opt_ev_rename_to @@ -7178,7 +7181,7 @@ alter: opt_ev_comment opt_ev_sql_stmt { - if (!($6 || $7 || $8 || $9 || $10)) + if (!($7 || $8 || $9 || $10 || $11)) { my_parse_error(ER(ER_SYNTAX_ERROR)); MYSQL_YYABORT; @@ -7188,6 +7191,9 @@ alter: can overwrite it */ Lex->sql_command= SQLCOM_ALTER_EVENT; +#ifdef WITH_WSREP + Lex->stmt_definition_end= (char*)YYLIP->get_cpp_ptr(); +#endif } | ALTER TABLESPACE alter_tablespace_info { diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 9495d16247d..c76ee5e6358 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -465,6 +465,26 @@ static bool binlog_format_check(sys_var *self, THD *thd, set_var *var) ER_INSIDE_TRANSACTION_PREVENTS_SWITCH_BINLOG_FORMAT)) return true; +#ifdef WITH_WSREP + /* MariaDB Galera does not support STATEMENT or MIXED binlog + format currently */ + if (WSREP(thd) && + (var->save_result.ulonglong_value == BINLOG_FORMAT_STMT || + var->save_result.ulonglong_value == BINLOG_FORMAT_MIXED)) + { + WSREP_DEBUG("MariaDB Galera does not support binlog format : %s", + var->save_result.ulonglong_value == BINLOG_FORMAT_STMT ? + "STATEMENT" : "MIXED"); + /* Push also warning, because error message is general */ + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_UNKNOWN_ERROR, + "MariaDB Galera does not support binlog format: %s", + var->save_result.ulonglong_value == BINLOG_FORMAT_STMT ? + "STATEMENT" : "MIXED"); + return true; + } +#endif + return false; } @@ -3317,6 +3337,8 @@ static bool fix_autocommit(sys_var *self, THD *thd, enum_var_type type) if (trans_commit_stmt(thd) || trans_commit(thd)) { thd->variables.option_bits&= ~OPTION_AUTOCOMMIT; + thd->mdl_context.release_transactional_locks(); + WSREP_DEBUG("autocommit, MDL TRX lock released: %lu", thd->thread_id); return true; } /* @@ -4412,6 +4434,270 @@ static Sys_var_tz Sys_time_zone( SESSION_VAR(time_zone), NO_CMD_LINE, DEFAULT(&default_tz), NO_MUTEX_GUARD, IN_BINLOG); +#ifdef WITH_WSREP +#include "wsrep_var.h" +#include "wsrep_sst.h" +#include "wsrep_binlog.h" + +static Sys_var_charptr Sys_wsrep_provider( + "wsrep_provider", "Path to replication provider library", + PREALLOCATED GLOBAL_VAR(wsrep_provider), CMD_LINE(REQUIRED_ARG, OPT_WSREP_PROVIDER), + IN_FS_CHARSET, DEFAULT(WSREP_NONE), + NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_provider_check), ON_UPDATE(wsrep_provider_update)); + +static Sys_var_charptr Sys_wsrep_provider_options( + "wsrep_provider_options", "provider specific options", + PREALLOCATED GLOBAL_VAR(wsrep_provider_options), + CMD_LINE(REQUIRED_ARG, OPT_WSREP_PROVIDER_OPTIONS), + IN_FS_CHARSET, DEFAULT(""), NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_provider_options_check), + ON_UPDATE(wsrep_provider_options_update)); + +static Sys_var_charptr Sys_wsrep_data_home_dir( + "wsrep_data_home_dir", "home directory for wsrep provider", + READ_ONLY GLOBAL_VAR(wsrep_data_home_dir), CMD_LINE(REQUIRED_ARG), + IN_FS_CHARSET, DEFAULT(""), + NO_MUTEX_GUARD, NOT_IN_BINLOG); + +static Sys_var_charptr Sys_wsrep_cluster_name( + "wsrep_cluster_name", "Name for the cluster", + PREALLOCATED GLOBAL_VAR(wsrep_cluster_name), CMD_LINE(REQUIRED_ARG), + IN_FS_CHARSET, DEFAULT(WSREP_CLUSTER_NAME), + NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_cluster_name_check), + ON_UPDATE(wsrep_cluster_name_update)); + +static PolyLock_mutex PLock_wsrep_slave_threads(&LOCK_wsrep_slave_threads); +static Sys_var_charptr Sys_wsrep_cluster_address ( + "wsrep_cluster_address", "Address to initially connect to cluster", + PREALLOCATED GLOBAL_VAR(wsrep_cluster_address), + CMD_LINE(REQUIRED_ARG, OPT_WSREP_CLUSTER_ADDRESS), + IN_FS_CHARSET, DEFAULT(""), + &PLock_wsrep_slave_threads, NOT_IN_BINLOG, + ON_CHECK(wsrep_cluster_address_check), + ON_UPDATE(wsrep_cluster_address_update)); + +static Sys_var_charptr Sys_wsrep_node_name ( + "wsrep_node_name", "Node name", + PREALLOCATED GLOBAL_VAR(wsrep_node_name), CMD_LINE(REQUIRED_ARG), + IN_FS_CHARSET, DEFAULT(""), NO_MUTEX_GUARD, NOT_IN_BINLOG, + wsrep_node_name_check, wsrep_node_name_update); + +static Sys_var_charptr Sys_wsrep_node_address ( + "wsrep_node_address", "Node address", + PREALLOCATED GLOBAL_VAR(wsrep_node_address), CMD_LINE(REQUIRED_ARG), + IN_FS_CHARSET, DEFAULT(""), + NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_node_address_check), + ON_UPDATE(wsrep_node_address_update)); + +static Sys_var_charptr Sys_wsrep_node_incoming_address( + "wsrep_node_incoming_address", "Client connection address", + PREALLOCATED GLOBAL_VAR(wsrep_node_incoming_address),CMD_LINE(REQUIRED_ARG), + IN_FS_CHARSET, DEFAULT(WSREP_NODE_INCOMING_AUTO), + NO_MUTEX_GUARD, NOT_IN_BINLOG); + +static Sys_var_ulong Sys_wsrep_slave_threads( + "wsrep_slave_threads", "Number of slave appliers to launch", + GLOBAL_VAR(wsrep_slave_threads), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1, 512), DEFAULT(1), BLOCK_SIZE(1), + &PLock_wsrep_slave_threads, NOT_IN_BINLOG, + ON_CHECK(wsrep_slave_threads_check), + ON_UPDATE(wsrep_slave_threads_update)); + +static Sys_var_charptr Sys_wsrep_dbug_option( + "wsrep_dbug_option", "DBUG options to provider library", + GLOBAL_VAR(wsrep_dbug_option),CMD_LINE(REQUIRED_ARG), + IN_FS_CHARSET, DEFAULT(""), + NO_MUTEX_GUARD, NOT_IN_BINLOG); + +static Sys_var_mybool Sys_wsrep_debug( + "wsrep_debug", "To enable debug level logging", + GLOBAL_VAR(wsrep_debug), CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static Sys_var_mybool Sys_wsrep_convert_LOCK_to_trx( + "wsrep_convert_LOCK_to_trx", "To convert locking sessions " + "into transactions", + GLOBAL_VAR(wsrep_convert_LOCK_to_trx), + CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static Sys_var_ulong Sys_wsrep_retry_autocommit( + "wsrep_retry_autocommit", "Max number of times to retry " + "a failed autocommit statement", + SESSION_VAR(wsrep_retry_autocommit), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, 10000), DEFAULT(1), BLOCK_SIZE(1)); + +static Sys_var_mybool Sys_wsrep_auto_increment_control( + "wsrep_auto_increment_control", "To automatically control the " + "assignment of autoincrement variables", + GLOBAL_VAR(wsrep_auto_increment_control), + CMD_LINE(OPT_ARG), DEFAULT(TRUE)); + +static Sys_var_mybool Sys_wsrep_drupal_282555_workaround( + "wsrep_drupal_282555_workaround", "To use a workaround for" + "bad autoincrement value", + GLOBAL_VAR(wsrep_drupal_282555_workaround), + CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static Sys_var_charptr sys_wsrep_sst_method( + "wsrep_sst_method", "State snapshot transfer method", + GLOBAL_VAR(wsrep_sst_method),CMD_LINE(REQUIRED_ARG), + IN_FS_CHARSET, DEFAULT(WSREP_SST_DEFAULT), NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_sst_method_check), + ON_UPDATE(wsrep_sst_method_update)); + +static Sys_var_charptr Sys_wsrep_sst_receive_address( + "wsrep_sst_receive_address", "Address where node is waiting for " + "SST contact", + GLOBAL_VAR(wsrep_sst_receive_address),CMD_LINE(REQUIRED_ARG), + IN_FS_CHARSET, DEFAULT(WSREP_SST_ADDRESS_AUTO), NO_MUTEX_GUARD, + NOT_IN_BINLOG, + ON_CHECK(wsrep_sst_receive_address_check), + ON_UPDATE(wsrep_sst_receive_address_update)); + +static Sys_var_charptr Sys_wsrep_sst_auth( + "wsrep_sst_auth", "Authentication for SST connection", + PREALLOCATED GLOBAL_VAR(wsrep_sst_auth), CMD_LINE(REQUIRED_ARG, OPT_WSREP_SST_AUTH), + IN_FS_CHARSET, DEFAULT(NULL), NO_MUTEX_GUARD, + NOT_IN_BINLOG, + ON_CHECK(wsrep_sst_auth_check), + ON_UPDATE(wsrep_sst_auth_update)); + +static Sys_var_charptr Sys_wsrep_sst_donor( + "wsrep_sst_donor", "preferred donor node for the SST", + GLOBAL_VAR(wsrep_sst_donor),CMD_LINE(REQUIRED_ARG), + IN_FS_CHARSET, DEFAULT(""), NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_sst_donor_check), + ON_UPDATE(wsrep_sst_donor_update)); + +static Sys_var_mybool Sys_wsrep_sst_donor_rejects_queries( + "wsrep_sst_donor_rejects_queries", "Reject client queries " + "when donating state snapshot transfer", + GLOBAL_VAR(wsrep_sst_donor_rejects_queries), + CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static Sys_var_mybool Sys_wsrep_on ( + "wsrep_on", "To enable wsrep replication ", + SESSION_VAR(wsrep_on), + CMD_LINE(OPT_ARG), DEFAULT(TRUE), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(wsrep_on_update)); + +static Sys_var_charptr Sys_wsrep_start_position ( + "wsrep_start_position", "global transaction position to start from ", + PREALLOCATED GLOBAL_VAR(wsrep_start_position), + CMD_LINE(REQUIRED_ARG, OPT_WSREP_START_POSITION), + IN_FS_CHARSET, DEFAULT(WSREP_START_POSITION_ZERO), + NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_start_position_check), + ON_UPDATE(wsrep_start_position_update)); + +static Sys_var_ulong Sys_wsrep_max_ws_size ( + "wsrep_max_ws_size", "Max write set size (bytes)", + GLOBAL_VAR(wsrep_max_ws_size), CMD_LINE(REQUIRED_ARG), + /* Upper limit is 65K short of 4G to avoid overlows on 32-bit systems */ + VALID_RANGE(1024, WSREP_MAX_WS_SIZE), DEFAULT(1073741824UL), BLOCK_SIZE(1)); + +static Sys_var_ulong Sys_wsrep_max_ws_rows ( + "wsrep_max_ws_rows", "Max number of rows in write set", + GLOBAL_VAR(wsrep_max_ws_rows), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1, 1048576), DEFAULT(131072), BLOCK_SIZE(1)); + +static Sys_var_charptr Sys_wsrep_notify_cmd( + "wsrep_notify_cmd", "", + GLOBAL_VAR(wsrep_notify_cmd),CMD_LINE(REQUIRED_ARG), + IN_FS_CHARSET, DEFAULT(""), NO_MUTEX_GUARD, NOT_IN_BINLOG); + +static Sys_var_mybool Sys_wsrep_certify_nonPK( + "wsrep_certify_nonPK", "Certify tables with no primary key", + GLOBAL_VAR(wsrep_certify_nonPK), + CMD_LINE(OPT_ARG), DEFAULT(TRUE)); + +static Sys_var_mybool Sys_wsrep_causal_reads( + "wsrep_causal_reads", "(DEPRECATED) Setting this variable is equivalent " + "to setting wsrep_sync_wait READ flag", + SESSION_VAR(wsrep_causal_reads), CMD_LINE(OPT_ARG), DEFAULT(FALSE), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(wsrep_causal_reads_update)); + +static Sys_var_uint Sys_wsrep_sync_wait( + "wsrep_sync_wait", "Ensure \"synchronous\" read view before executing " + "an operation of the type specified by bitmask: 1 - READ(includes " + "SELECT, SHOW and BEGIN/START TRANSACTION); 2 - UPDATE and DELETE; 4 - " + "INSERT and REPLACE", + SESSION_VAR(wsrep_sync_wait), CMD_LINE(OPT_ARG), + VALID_RANGE(WSREP_SYNC_WAIT_NONE, WSREP_SYNC_WAIT_MAX), + DEFAULT(WSREP_SYNC_WAIT_NONE), BLOCK_SIZE(1), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(wsrep_sync_wait_update)); + +static const char *wsrep_OSU_method_names[]= { "TOI", "RSU", NullS }; +static Sys_var_enum Sys_wsrep_OSU_method( + "wsrep_OSU_method", "Method for Online Schema Upgrade", + GLOBAL_VAR(wsrep_OSU_method_options), CMD_LINE(OPT_ARG), + wsrep_OSU_method_names, DEFAULT(WSREP_OSU_TOI), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(0)); + +static PolyLock_mutex PLock_wsrep_desync(&LOCK_wsrep_desync); +static Sys_var_mybool Sys_wsrep_desync ( + "wsrep_desync", "To desynchronize the node from the cluster", + GLOBAL_VAR(wsrep_desync), + CMD_LINE(OPT_ARG), DEFAULT(FALSE), + &PLock_wsrep_desync, NOT_IN_BINLOG, + ON_CHECK(wsrep_desync_check), + ON_UPDATE(wsrep_desync_update)); + +static Sys_var_enum Sys_wsrep_forced_binlog_format( + "wsrep_forced_binlog_format", "binlog format to take effect over user's choice", + GLOBAL_VAR(wsrep_forced_binlog_format), + CMD_LINE(REQUIRED_ARG, OPT_BINLOG_FORMAT), + wsrep_binlog_format_names, DEFAULT(BINLOG_FORMAT_UNSPEC), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(0)); + +static Sys_var_mybool Sys_wsrep_recover_datadir( + "wsrep_recover", "Recover database state after crash and exit", + READ_ONLY GLOBAL_VAR(wsrep_recovery), + CMD_LINE(OPT_ARG, OPT_WSREP_RECOVER), DEFAULT(FALSE)); + +static Sys_var_mybool Sys_wsrep_replicate_myisam( + "wsrep_replicate_myisam", "To enable myisam replication", + GLOBAL_VAR(wsrep_replicate_myisam), CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static Sys_var_mybool Sys_wsrep_log_conflicts( + "wsrep_log_conflicts", "To log multi-master conflicts", + GLOBAL_VAR(wsrep_log_conflicts), CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static Sys_var_ulong Sys_wsrep_mysql_replication_bundle( + "wsrep_mysql_replication_bundle", "mysql replication group commit ", + GLOBAL_VAR(wsrep_mysql_replication_bundle), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, 1000), DEFAULT(0), BLOCK_SIZE(1)); + +static Sys_var_mybool Sys_wsrep_load_data_splitting( + "wsrep_load_data_splitting", "To commit LOAD DATA " + "transaction after every 10K rows inserted", + GLOBAL_VAR(wsrep_load_data_splitting), + CMD_LINE(OPT_ARG), DEFAULT(TRUE)); + +static Sys_var_mybool Sys_wsrep_slave_FK_checks( + "wsrep_slave_FK_checks", "Should slave thread do " + "foreign key constraint checks", + GLOBAL_VAR(wsrep_slave_FK_checks), + CMD_LINE(OPT_ARG), DEFAULT(TRUE)); + +static Sys_var_mybool Sys_wsrep_slave_UK_checks( + "wsrep_slave_UK_checks", "Should slave thread do " + "secondary index uniqueness checks", + GLOBAL_VAR(wsrep_slave_UK_checks), + CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static Sys_var_mybool Sys_wsrep_restart_slave( + "wsrep_restart_slave", "Should MySQL slave be restarted automatically, when node joins back to cluster", + GLOBAL_VAR(wsrep_restart_slave), CMD_LINE(OPT_ARG), DEFAULT(FALSE)); +#endif /* WITH_WSREP */ + static bool fix_host_cache_size(sys_var *, THD *, enum_var_type) { hostname_cache_resize((uint) host_cache_size); diff --git a/sql/table.cc b/sql/table.cc index d720a16f6e5..735dbf6eedd 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -40,6 +40,7 @@ #include "sql_statistics.h" #include "discover.h" #include "mdl.h" // MDL_wait_for_graph_visitor +#include "ha_partition.h" /* INFORMATION_SCHEMA name */ LEX_STRING INFORMATION_SCHEMA_NAME= {C_STRING_WITH_LEN("information_schema")}; diff --git a/sql/transaction.cc b/sql/transaction.cc index 933e39ae357..9b3507de2eb 100644 --- a/sql/transaction.cc +++ b/sql/transaction.cc @@ -97,6 +97,10 @@ static bool xa_trans_force_rollback(THD *thd) by ha_rollback()/THD::transaction::cleanup(). */ thd->transaction.xid_state.rm_error= 0; +#ifdef WITH_WSREP + if (WSREP_ON) + wsrep_register_hton(thd, TRUE); +#endif /* WITH_WSREP */ if (ha_rollback_trans(thd, true)) { my_error(ER_XAER_RMERR, MYF(0)); @@ -135,10 +139,18 @@ bool trans_begin(THD *thd, uint flags) (thd->variables.option_bits & OPTION_TABLE_LOCK)) { thd->variables.option_bits&= ~OPTION_TABLE_LOCK; +#ifdef WITH_WSREP + if (WSREP_ON) + wsrep_register_hton(thd, TRUE); +#endif /* WITH_WSREP */ thd->server_status&= ~(SERVER_STATUS_IN_TRANS | SERVER_STATUS_IN_TRANS_READONLY); DBUG_PRINT("info", ("clearing SERVER_STATUS_IN_TRANS")); res= MY_TEST(ha_commit_trans(thd, TRUE)); +#ifdef WITH_WSREP + if (WSREP_ON) + wsrep_post_commit(thd, TRUE); +#endif /* WITH_WSREP */ } thd->variables.option_bits&= ~(OPTION_BEGIN | OPTION_KEEP_LOG); @@ -181,6 +193,12 @@ bool trans_begin(THD *thd, uint flags) thd->tx_read_only= false; } +#ifdef WITH_WSREP + thd->wsrep_PA_safe= true; + if (WSREP_CLIENT(thd) && wsrep_sync_wait(thd)) + DBUG_RETURN(TRUE); +#endif /* WITH_WSREP */ + thd->variables.option_bits|= OPTION_BEGIN; thd->server_status|= SERVER_STATUS_IN_TRANS; if (thd->tx_read_only) @@ -212,10 +230,18 @@ bool trans_commit(THD *thd) if (trans_check(thd)) DBUG_RETURN(TRUE); +#ifdef WITH_WSREP + if (WSREP_ON) + wsrep_register_hton(thd, TRUE); +#endif /* WITH_WSREP */ thd->server_status&= ~(SERVER_STATUS_IN_TRANS | SERVER_STATUS_IN_TRANS_READONLY); DBUG_PRINT("info", ("clearing SERVER_STATUS_IN_TRANS")); res= ha_commit_trans(thd, TRUE); +#ifdef WITH_WSREP + if (WSREP_ON) + wsrep_post_commit(thd, TRUE); +#endif /* WITH_WSREP */ /* if res is non-zero, then ha_commit_trans has rolled back the transaction, so the hooks for rollback will be called. @@ -261,10 +287,18 @@ bool trans_commit_implicit(THD *thd) /* Safety if one did "drop table" on locked tables */ if (!thd->locked_tables_mode) thd->variables.option_bits&= ~OPTION_TABLE_LOCK; +#ifdef WITH_WSREP + if (WSREP_ON) + wsrep_register_hton(thd, TRUE); +#endif /* WITH_WSREP */ thd->server_status&= ~(SERVER_STATUS_IN_TRANS | SERVER_STATUS_IN_TRANS_READONLY); DBUG_PRINT("info", ("clearing SERVER_STATUS_IN_TRANS")); res= MY_TEST(ha_commit_trans(thd, TRUE)); +#ifdef WITH_WSREP + if (WSREP_ON) + wsrep_post_commit(thd, TRUE); +#endif /* WITH_WSREP */ } thd->variables.option_bits&= ~(OPTION_BEGIN | OPTION_KEEP_LOG); @@ -297,9 +331,16 @@ bool trans_rollback(THD *thd) int res; DBUG_ENTER("trans_rollback"); +#ifdef WITH_WSREP + thd->wsrep_PA_safe= true; +#endif /* WITH_WSREP */ if (trans_check(thd)) DBUG_RETURN(TRUE); +#ifdef WITH_WSREP + if (WSREP_ON) + wsrep_register_hton(thd, TRUE); +#endif /* WITH_WSREP */ thd->server_status&= ~(SERVER_STATUS_IN_TRANS | SERVER_STATUS_IN_TRANS_READONLY); DBUG_PRINT("info", ("clearing SERVER_STATUS_IN_TRANS")); @@ -390,11 +431,19 @@ bool trans_commit_stmt(THD *thd) if (thd->transaction.stmt.ha_list) { +#ifdef WITH_WSREP + if (WSREP_ON) + wsrep_register_hton(thd, FALSE); +#endif /* WITH_WSREP */ res= ha_commit_trans(thd, FALSE); if (! thd->in_active_multi_stmt_transaction()) { thd->tx_isolation= (enum_tx_isolation) thd->variables.tx_isolation; thd->tx_read_only= thd->variables.tx_read_only; +#ifdef WITH_WSREP + if (WSREP_ON) + wsrep_post_commit(thd, FALSE); +#endif /* WITH_WSREP */ } } @@ -435,6 +484,10 @@ bool trans_rollback_stmt(THD *thd) if (thd->transaction.stmt.ha_list) { +#ifdef WITH_WSREP + if (WSREP_ON) + wsrep_register_hton(thd, FALSE); +#endif /* WITH_WSREP */ ha_rollback_trans(thd, FALSE); if (! thd->in_active_multi_stmt_transaction()) { @@ -812,9 +865,17 @@ bool trans_xa_commit(THD *thd) } else if (xa_state == XA_IDLE && thd->lex->xa_opt == XA_ONE_PHASE) { +#ifdef WITH_WSREP + if (WSREP_ON) + wsrep_register_hton(thd, TRUE); +#endif /* WITH_WSREP */ int r= ha_commit_trans(thd, TRUE); if ((res= MY_TEST(r))) my_error(r == 1 ? ER_XA_RBROLLBACK : ER_XAER_RMERR, MYF(0)); +#ifdef WITH_WSREP + if (WSREP_ON) + wsrep_post_commit(thd, TRUE); +#endif /* WITH_WSREP */ } else if (xa_state == XA_PREPARED && thd->lex->xa_opt == XA_NONE) { @@ -833,6 +894,10 @@ bool trans_xa_commit(THD *thd) if (thd->mdl_context.acquire_lock(&mdl_request, thd->variables.lock_wait_timeout)) { +#ifdef WITH_WSREP + if (WSREP_ON) + wsrep_register_hton(thd, TRUE); +#endif /* WITH_WSREP */ ha_rollback_trans(thd, TRUE); my_error(ER_XAER_RMERR, MYF(0)); } diff --git a/sql/tztime.cc b/sql/tztime.cc index d3b4fec6335..577474cd78f 100644 --- a/sql/tztime.cc +++ b/sql/tztime.cc @@ -2708,6 +2708,13 @@ main(int argc, char **argv) free_defaults(default_argv); return 1; } + +#ifdef WITH_WSREP + // Replicate MyISAM DDL for this session, cf. lp:1161432 + // timezone info unfixable in XtraDB Cluster + printf("SET GLOBAL wsrep_replicate_myisam= ON;\n"); +#endif /* WITH_WSREP */ + if (argc == 1 && !opt_leap) { /* Argument is timezonedir */ @@ -2755,6 +2762,11 @@ main(int argc, char **argv) free_root(&tz_storage, MYF(0)); } +#ifdef WITH_WSREP + // Reset wsrep_replicate_myisam. lp:1161432 + printf("SET GLOBAL wsrep_replicate_myisam= OFF;\n"); +#endif /* WITH_WSREP */ + free_defaults(default_argv); my_end(0); return 0; diff --git a/sql/wsrep_applier.cc b/sql/wsrep_applier.cc new file mode 100644 index 00000000000..23687e98c32 --- /dev/null +++ b/sql/wsrep_applier.cc @@ -0,0 +1,387 @@ +/* Copyright (C) 2013 Codership Oy <info@codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ + +#include "wsrep_priv.h" +#include "wsrep_binlog.h" // wsrep_dump_rbr_buf() + +#include "log_event.h" // EVENT_LEN_OFFSET, etc. +#include "wsrep_applier.h" + +/* + read the first event from (*buf). The size of the (*buf) is (*buf_len). + At the end (*buf) is shitfed to point to the following event or NULL and + (*buf_len) will be changed to account just being read bytes of the 1st event. +*/ + +static Log_event* wsrep_read_log_event( + char **arg_buf, size_t *arg_buf_len, + const Format_description_log_event *description_event) +{ + DBUG_ENTER("wsrep_read_log_event"); + char *head= (*arg_buf); + + uint data_len = uint4korr(head + EVENT_LEN_OFFSET); + char *buf= (*arg_buf); + const char *error= 0; + Log_event *res= 0; + + if (data_len > wsrep_max_ws_size) + { + error = "Event too big"; + goto err; + } + + res= Log_event::read_log_event(buf, data_len, &error, description_event, true); + +err: + if (!res) + { + DBUG_ASSERT(error != 0); + sql_print_error("Error in Log_event::read_log_event(): " + "'%s', data_len: %d, event_type: %d", + error,data_len,head[EVENT_TYPE_OFFSET]); + } + (*arg_buf)+= data_len; + (*arg_buf_len)-= data_len; + DBUG_RETURN(res); +} + +#include "transaction.h" // trans_commit(), trans_rollback() +#include "rpl_rli.h" // class Relay_log_info; +#include "sql_base.h" // close_temporary_table() + +static inline void +wsrep_set_apply_format(THD* thd, Format_description_log_event* ev) +{ + if (thd->wsrep_apply_format) + { + delete (Format_description_log_event*)thd->wsrep_apply_format; + } + thd->wsrep_apply_format= ev; +} + +static inline Format_description_log_event* +wsrep_get_apply_format(THD* thd) +{ + if (thd->wsrep_apply_format) + return (Format_description_log_event*) thd->wsrep_apply_format; + /* TODO: mariadb does not support rli->get_rli_description_event() + * => look for alternative way to remember last FDE in replication + */ + //return thd->wsrep_rli->get_rli_description_event(); + thd->wsrep_apply_format = new Format_description_log_event(4); + return (Format_description_log_event*) thd->wsrep_apply_format; +} + +static wsrep_cb_status_t wsrep_apply_events(THD* thd, + const void* events_buf, + size_t buf_len) +{ + char *buf= (char *)events_buf; + int rcode= 0; + int event= 1; + + DBUG_ENTER("wsrep_apply_events"); + + if (thd->killed == KILL_CONNECTION) + { + WSREP_INFO("applier has been aborted, skipping apply_rbr: %lld", + (long long) wsrep_thd_trx_seqno(thd)); + DBUG_RETURN(WSREP_CB_FAILURE); + } + + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + thd->wsrep_query_state= QUERY_EXEC; + if (thd->wsrep_conflict_state!= REPLAYING) + thd->wsrep_conflict_state= NO_CONFLICT; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + + if (!buf_len) WSREP_DEBUG("empty rbr buffer to apply: %lld", + (long long) wsrep_thd_trx_seqno(thd)); + + while(buf_len) + { + int exec_res; + Log_event* ev= wsrep_read_log_event(&buf, &buf_len, + wsrep_get_apply_format(thd)); + + if (!ev) + { + WSREP_ERROR("applier could not read binlog event, seqno: %lld, len: %zu", + (long long)wsrep_thd_trx_seqno(thd), buf_len); + rcode= 1; + goto error; + } + + switch (ev->get_type_code()) { + case FORMAT_DESCRIPTION_EVENT: + wsrep_set_apply_format(thd, (Format_description_log_event*)ev); + continue; +#ifdef GTID_SUPPORT + case GTID_LOG_EVENT: + { + Gtid_log_event* gev= (Gtid_log_event*)ev; + if (gev->get_gno() == 0) + { + /* Skip GTID log event to make binlog to generate LTID on commit */ + delete ev; + continue; + } + } +#endif /* GTID_SUPPORT */ + default: + break; + } + + thd->set_server_id(ev->server_id); // use the original server id for logging + thd->set_time(); // time the query + wsrep_xid_init(&thd->transaction.xid_state.xid, + &thd->wsrep_trx_meta.gtid.uuid, + thd->wsrep_trx_meta.gtid.seqno); + thd->lex->current_select= 0; + if (!ev->when) + { + my_hrtime_t hrtime= my_hrtime(); + ev->when= hrtime_to_my_time(hrtime); + ev->when_sec_part= hrtime_sec_part(hrtime); + } + + ev->thd = thd; + //exec_res = ev->apply_event(thd->wsrep_rli); + exec_res = ev->apply_event(thd->wsrep_rgi); + DBUG_PRINT("info", ("exec_event result: %d", exec_res)); + + if (exec_res) + { + WSREP_WARN("RBR event %d %s apply warning: %d, %lld", + event, ev->get_type_str(), exec_res, + (long long) wsrep_thd_trx_seqno(thd)); + rcode= exec_res; + /* stop processing for the first error */ + delete ev; + goto error; + } + event++; + + if (thd->wsrep_conflict_state!= NO_CONFLICT && + thd->wsrep_conflict_state!= REPLAYING) + WSREP_WARN("conflict state after RBR event applying: %d, %lld", + thd->wsrep_query_state, (long long)wsrep_thd_trx_seqno(thd)); + + if (thd->wsrep_conflict_state == MUST_ABORT) { + WSREP_WARN("RBR event apply failed, rolling back: %lld", + (long long) wsrep_thd_trx_seqno(thd)); + trans_rollback(thd); + thd->locked_tables_list.unlock_locked_tables(thd); + /* Release transactional metadata locks. */ + thd->mdl_context.release_transactional_locks(); + thd->wsrep_conflict_state= NO_CONFLICT; + DBUG_RETURN(WSREP_CB_FAILURE); + } + + delete ev; + } + + error: + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + thd->wsrep_query_state= QUERY_IDLE; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + + assert(thd->wsrep_exec_mode== REPL_RECV); + + if (thd->killed == KILL_CONNECTION) + WSREP_INFO("applier aborted: %lld", (long long)wsrep_thd_trx_seqno(thd)); + + if (rcode) DBUG_RETURN(WSREP_CB_FAILURE); + DBUG_RETURN(WSREP_CB_SUCCESS); +} + +wsrep_cb_status_t wsrep_apply_cb(void* const ctx, + const void* const buf, + size_t const buf_len, + uint32_t const flags, + const wsrep_trx_meta_t* meta) +{ + THD* const thd((THD*)ctx); + + thd->wsrep_trx_meta = *meta; + +#ifdef WSREP_PROC_INFO + snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, + "applying write set %lld: %p, %zu", + (long long)wsrep_thd_trx_seqno(thd), buf, buf_len); + thd_proc_info(thd, thd->wsrep_info); +#else + thd_proc_info(thd, "applying write set"); +#endif /* WSREP_PROC_INFO */ + + /* tune FK and UK checking policy */ + if (wsrep_slave_UK_checks == FALSE) + thd->variables.option_bits|= OPTION_RELAXED_UNIQUE_CHECKS; + else + thd->variables.option_bits&= ~OPTION_RELAXED_UNIQUE_CHECKS; + + if (wsrep_slave_FK_checks == FALSE) + thd->variables.option_bits|= OPTION_NO_FOREIGN_KEY_CHECKS; + else + thd->variables.option_bits&= ~OPTION_NO_FOREIGN_KEY_CHECKS; + + if (flags & WSREP_FLAG_ISOLATION) + { + thd->wsrep_apply_toi= true; + /* + Don't run in transaction mode with TOI actions. + */ + thd->variables.option_bits&= ~OPTION_BEGIN; + thd->server_status&= ~SERVER_STATUS_IN_TRANS; + } + wsrep_cb_status_t rcode(wsrep_apply_events(thd, buf, buf_len)); + +#ifdef WSREP_PROC_INFO + snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, + "applied write set %lld", (long long)wsrep_thd_trx_seqno(thd)); + thd_proc_info(thd, thd->wsrep_info); +#else + thd_proc_info(thd, "applied write set"); +#endif /* WSREP_PROC_INFO */ + + if (WSREP_CB_SUCCESS != rcode) + { + wsrep_dump_rbr_buf(thd, buf, buf_len); + } + + TABLE *tmp; + while ((tmp = thd->temporary_tables)) + { + WSREP_DEBUG("Applier %lu, has temporary tables: %s.%s", + thd->thread_id, + (tmp->s) ? tmp->s->db.str : "void", + (tmp->s) ? tmp->s->table_name.str : "void"); + close_temporary_table(thd, tmp, 1, 1); + } + + return rcode; +} + +static wsrep_cb_status_t wsrep_commit(THD* const thd, + wsrep_seqno_t const global_seqno) +{ +#ifdef WSREP_PROC_INFO + snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, + "committing %lld", (long long)wsrep_thd_trx_seqno(thd)); + thd_proc_info(thd, thd->wsrep_info); +#else + thd_proc_info(thd, "committing"); +#endif /* WSREP_PROC_INFO */ + + wsrep_cb_status_t const rcode(trans_commit(thd) ? + WSREP_CB_FAILURE : WSREP_CB_SUCCESS); + +#ifdef WSREP_PROC_INFO + snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, + "committed %lld", (long long)wsrep_thd_trx_seqno(thd)); + thd_proc_info(thd, thd->wsrep_info); +#else + thd_proc_info(thd, "committed"); +#endif /* WSREP_PROC_INFO */ + + if (WSREP_CB_SUCCESS == rcode) + { + thd->wsrep_rgi->cleanup_context(thd, false); +#ifdef GTID_SUPPORT + thd->variables.gtid_next.set_automatic(); +#endif /* GTID_SUPPORT */ + // TODO: mark snapshot with global_seqno. + } + + return rcode; +} + +static wsrep_cb_status_t wsrep_rollback(THD* const thd, + wsrep_seqno_t const global_seqno) +{ +#ifdef WSREP_PROC_INFO + snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, + "rolling back %lld", (long long)wsrep_thd_trx_seqno(thd)); + thd_proc_info(thd, thd->wsrep_info); +#else + thd_proc_info(thd, "rolling back"); +#endif /* WSREP_PROC_INFO */ + + wsrep_cb_status_t const rcode(trans_rollback(thd) ? + WSREP_CB_FAILURE : WSREP_CB_SUCCESS); + +#ifdef WSREP_PROC_INFO + snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, + "rolled back %lld", (long long)wsrep_thd_trx_seqno(thd)); + thd_proc_info(thd, thd->wsrep_info); +#else + thd_proc_info(thd, "rolled back"); +#endif /* WSREP_PROC_INFO */ + + return rcode; +} + +wsrep_cb_status_t wsrep_commit_cb(void* const ctx, + uint32_t const flags, + const wsrep_trx_meta_t* meta, + wsrep_bool_t* const exit, + bool const commit) +{ + THD* const thd((THD*)ctx); + + assert(meta->gtid.seqno == wsrep_thd_trx_seqno(thd)); + + wsrep_cb_status_t rcode; + + if (commit) + rcode = wsrep_commit(thd, meta->gtid.seqno); + else + rcode = wsrep_rollback(thd, meta->gtid.seqno); + + wsrep_set_apply_format(thd, NULL); + thd->mdl_context.release_transactional_locks(); + free_root(thd->mem_root,MYF(MY_KEEP_PREALLOC)); + thd->tx_isolation= (enum_tx_isolation) thd->variables.tx_isolation; + + if (wsrep_slave_count_change < 0 && commit && WSREP_CB_SUCCESS == rcode) + { + mysql_mutex_lock(&LOCK_wsrep_slave_threads); + if (wsrep_slave_count_change < 0) + { + wsrep_slave_count_change++; + *exit = true; + } + mysql_mutex_unlock(&LOCK_wsrep_slave_threads); + } + + if (*exit == false && thd->wsrep_applier) + { + /* From trans_begin() */ + thd->variables.option_bits|= OPTION_BEGIN; + thd->server_status|= SERVER_STATUS_IN_TRANS; + thd->wsrep_apply_toi= false; + } + + return rcode; +} + + +wsrep_cb_status_t wsrep_unordered_cb(void* const ctx, + const void* const data, + size_t const size) +{ + return WSREP_CB_SUCCESS; +} diff --git a/sql/wsrep_applier.h b/sql/wsrep_applier.h new file mode 100644 index 00000000000..816970db67c --- /dev/null +++ b/sql/wsrep_applier.h @@ -0,0 +1,38 @@ +/* Copyright 2013 Codership Oy <http://www.codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef WSREP_APPLIER_H +#define WSREP_APPLIER_H + +#include <sys/types.h> + +/* wsrep callback prototypes */ + +wsrep_cb_status_t wsrep_apply_cb(void *ctx, + const void* buf, size_t buf_len, + uint32_t flags, + const wsrep_trx_meta_t* meta); + +wsrep_cb_status_t wsrep_commit_cb(void *ctx, + uint32_t flags, + const wsrep_trx_meta_t* meta, + wsrep_bool_t* exit, + bool commit); + +wsrep_cb_status_t wsrep_unordered_cb(void* ctx, + const void* data, + size_t size); + +#endif /* WSREP_APPLIER_H */ diff --git a/sql/wsrep_binlog.cc b/sql/wsrep_binlog.cc new file mode 100644 index 00000000000..ee8a9cb130b --- /dev/null +++ b/sql/wsrep_binlog.cc @@ -0,0 +1,414 @@ +/* Copyright (C) 2013 Codership Oy <info@codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ + +#include "wsrep_binlog.h" +#include "wsrep_priv.h" +#include "log.h" + +extern handlerton *binlog_hton; +/* + Write the contents of a cache to a memory buffer. + + This function quite the same as MYSQL_BIN_LOG::write_cache(), + with the exception that here we write in buffer instead of log file. + */ +int wsrep_write_cache_buf(IO_CACHE *cache, uchar **buf, size_t *buf_len) +{ + *buf= NULL; + *buf_len= 0; + + my_off_t const saved_pos(my_b_tell(cache)); + + if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0)) + { + WSREP_ERROR("failed to initialize io-cache"); + return ER_ERROR_ON_WRITE; + } + + uint length = my_b_bytes_in_cache(cache); + if (unlikely(0 == length)) length = my_b_fill(cache); + + size_t total_length = 0; + + if (likely(length > 0)) do + { + total_length += length; + /* + Bail out if buffer grows too large. + A temporary fix to avoid allocating indefinitely large buffer, + not a real limit on a writeset size which includes other things + like header and keys. + */ + if (total_length > wsrep_max_ws_size) + { + WSREP_WARN("transaction size limit (%lu) exceeded: %zu", + wsrep_max_ws_size, total_length); + goto error; + } + uchar* tmp = (uchar *)my_realloc(*buf, total_length, + MYF(MY_ALLOW_ZERO_PTR)); + if (!tmp) + { + WSREP_ERROR("could not (re)allocate buffer: %zu + %u", + *buf_len, length); + goto error; + } + *buf = tmp; + + memcpy(*buf + *buf_len, cache->read_pos, length); + *buf_len = total_length; + cache->read_pos = cache->read_end; + } while ((cache->file >= 0) && (length = my_b_fill(cache))); + + if (reinit_io_cache(cache, WRITE_CACHE, saved_pos, 0, 0)) + { + WSREP_WARN("failed to initialize io-cache"); + goto cleanup; + } + + return 0; + +error: + if (reinit_io_cache(cache, WRITE_CACHE, saved_pos, 0, 0)) + { + WSREP_WARN("failed to initialize io-cache"); + } +cleanup: + my_free(*buf); + *buf= NULL; + *buf_len= 0; + return ER_ERROR_ON_WRITE; +} + +#define STACK_SIZE 4096 /* 4K - for buffer preallocated on the stack: + * many transactions would fit in there + * so there is no need to reach for the heap */ + +/* Returns minimum multiple of HEAP_PAGE_SIZE that is >= length */ +static inline size_t +heap_size(size_t length) +{ + return (length + HEAP_PAGE_SIZE - 1)/HEAP_PAGE_SIZE*HEAP_PAGE_SIZE; +} + +/* append data to writeset */ +static inline wsrep_status_t +wsrep_append_data(wsrep_t* const wsrep, + wsrep_ws_handle_t* const ws, + const void* const data, + size_t const len) +{ + struct wsrep_buf const buff = { data, len }; + wsrep_status_t const rc(wsrep->append_data(wsrep, ws, &buff, 1, + WSREP_DATA_ORDERED, true)); + if (rc != WSREP_OK) + { + WSREP_WARN("append_data() returned %d", rc); + } + + return rc; +} + +/* + Write the contents of a cache to wsrep provider. + + This function quite the same as MYSQL_BIN_LOG::write_cache(), + with the exception that here we write in buffer instead of log file. + + This version reads all of cache into single buffer and then appends to a + writeset at once. + */ +static int wsrep_write_cache_once(wsrep_t* const wsrep, + THD* const thd, + IO_CACHE* const cache, + size_t* const len) +{ + my_off_t const saved_pos(my_b_tell(cache)); + + if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0)) + { + WSREP_ERROR("failed to initialize io-cache"); + return ER_ERROR_ON_WRITE; + } + + int err(WSREP_OK); + + size_t total_length(0); + uchar stack_buf[STACK_SIZE]; /* to avoid dynamic allocations for few data*/ + uchar* heap_buf(NULL); + uchar* buf(stack_buf); + size_t allocated(sizeof(stack_buf)); + size_t used(0); + + uint length(my_b_bytes_in_cache(cache)); + if (unlikely(0 == length)) length = my_b_fill(cache); + + if (likely(length > 0)) do + { + total_length += length; + /* + Bail out if buffer grows too large. + A temporary fix to avoid allocating indefinitely large buffer, + not a real limit on a writeset size which includes other things + like header and keys. + */ + if (unlikely(total_length > wsrep_max_ws_size)) + { + WSREP_WARN("transaction size limit (%lu) exceeded: %zu", + wsrep_max_ws_size, total_length); + err = WSREP_TRX_SIZE_EXCEEDED; + goto cleanup; + } + + if (total_length > allocated) + { + size_t const new_size(heap_size(total_length)); + uchar* tmp = (uchar *)my_realloc(heap_buf, new_size, + MYF(MY_ALLOW_ZERO_PTR)); + if (!tmp) + { + WSREP_ERROR("could not (re)allocate buffer: %zu + %u", + allocated, length); + err = WSREP_TRX_SIZE_EXCEEDED; + goto cleanup; + } + + heap_buf = tmp; + buf = heap_buf; + allocated = new_size; + + if (used <= STACK_SIZE && used > 0) // there's data in stack_buf + { + DBUG_ASSERT(buf == stack_buf); + memcpy(heap_buf, stack_buf, used); + } + } + + memcpy(buf + used, cache->read_pos, length); + used = total_length; + cache->read_pos = cache->read_end; + } while ((cache->file >= 0) && (length = my_b_fill(cache))); + + if (used > 0) + err = wsrep_append_data(wsrep, &thd->wsrep_ws_handle, buf, used); + + if (WSREP_OK == err) *len = total_length; + +cleanup: + if (reinit_io_cache(cache, WRITE_CACHE, saved_pos, 0, 0)) + { + WSREP_ERROR("failed to reinitialize io-cache"); + } + + if (unlikely(WSREP_OK != err)) wsrep_dump_rbr_buf(thd, buf, used); + + my_free(heap_buf); + return err; +} + +/* + Write the contents of a cache to wsrep provider. + + This function quite the same as MYSQL_BIN_LOG::write_cache(), + with the exception that here we write in buffer instead of log file. + + This version uses incremental data appending as it reads it from cache. + */ +static int wsrep_write_cache_inc(wsrep_t* const wsrep, + THD* const thd, + IO_CACHE* const cache, + size_t* const len) +{ + my_off_t const saved_pos(my_b_tell(cache)); + + if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0)) + { + WSREP_ERROR("failed to initialize io-cache"); + return WSREP_TRX_ERROR; + } + + int err(WSREP_OK); + + size_t total_length(0); + + uint length(my_b_bytes_in_cache(cache)); + if (unlikely(0 == length)) length = my_b_fill(cache); + + if (likely(length > 0)) do + { + total_length += length; + /* bail out if buffer grows too large + not a real limit on a writeset size which includes other things + like header and keys. + */ + if (unlikely(total_length > wsrep_max_ws_size)) + { + WSREP_WARN("transaction size limit (%lu) exceeded: %zu", + wsrep_max_ws_size, total_length); + err = WSREP_TRX_SIZE_EXCEEDED; + goto cleanup; + } + + if(WSREP_OK != (err=wsrep_append_data(wsrep, &thd->wsrep_ws_handle, + cache->read_pos, length))) + goto cleanup; + + cache->read_pos = cache->read_end; + } while ((cache->file >= 0) && (length = my_b_fill(cache))); + + if (WSREP_OK == err) *len = total_length; + +cleanup: + if (reinit_io_cache(cache, WRITE_CACHE, saved_pos, 0, 0)) + { + WSREP_ERROR("failed to reinitialize io-cache"); + } + + return err; +} + +/* + Write the contents of a cache to wsrep provider. + + This function quite the same as MYSQL_BIN_LOG::write_cache(), + with the exception that here we write in buffer instead of log file. + */ +int wsrep_write_cache(wsrep_t* const wsrep, + THD* const thd, + IO_CACHE* const cache, + size_t* const len) +{ + if (wsrep_incremental_data_collection) { + return wsrep_write_cache_inc(wsrep, thd, cache, len); + } + else { + return wsrep_write_cache_once(wsrep, thd, cache, len); + } +} + +void wsrep_dump_rbr_buf(THD *thd, const void* rbr_buf, size_t buf_len) +{ + char filename[PATH_MAX]= {0}; + int len= snprintf(filename, PATH_MAX, "%s/GRA_%ld_%lld.log", + wsrep_data_home_dir, thd->thread_id, + (long long)wsrep_thd_trx_seqno(thd)); + if (len >= PATH_MAX) + { + WSREP_ERROR("RBR dump path too long: %d, skipping dump.", len); + return; + } + + FILE *of= fopen(filename, "wb"); + if (of) + { + fwrite (rbr_buf, buf_len, 1, of); + fclose(of); + } + else + { + WSREP_ERROR("Failed to open file '%s': %d (%s)", + filename, errno, strerror(errno)); + } +} + +/* + wsrep exploits binlog's caches even if binlogging itself is not + activated. In such case connection close needs calling + actual binlog's method. + Todo: split binlog hton from its caches to use ones by wsrep + without referring to binlog's stuff. +*/ +int wsrep_binlog_close_connection(THD* thd) +{ + DBUG_ENTER("wsrep_binlog_close_connection"); + if (thd_get_ha_data(thd, binlog_hton) != NULL) + binlog_hton->close_connection (binlog_hton, thd); + DBUG_RETURN(0); +} + +int wsrep_binlog_savepoint_set(THD *thd, void *sv) +{ + if (!wsrep_emulate_bin_log) return 0; + int rcode = binlog_hton->savepoint_set(binlog_hton, thd, sv); + return rcode; +} + +int wsrep_binlog_savepoint_rollback(THD *thd, void *sv) +{ + if (!wsrep_emulate_bin_log) return 0; + int rcode = binlog_hton->savepoint_rollback(binlog_hton, thd, sv); + return rcode; +} + +void wsrep_dump_rbr_direct(THD* thd, IO_CACHE* cache) +{ + char filename[PATH_MAX]= {0}; + int len= snprintf(filename, PATH_MAX, "%s/GRA_%ld_%lld.log", + wsrep_data_home_dir, thd->thread_id, + (long long)wsrep_thd_trx_seqno(thd)); + size_t bytes_in_cache = 0; + // check path + if (len >= PATH_MAX) + { + WSREP_ERROR("RBR dump path too long: %d, skipping dump.", len); + return ; + } + // init cache + my_off_t const saved_pos(my_b_tell(cache)); + if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0)) + { + WSREP_ERROR("failed to initialize io-cache"); + return ; + } + // open file + FILE* of = fopen(filename, "wb"); + if (!of) + { + WSREP_ERROR("Failed to open file '%s': %d (%s)", + filename, errno, strerror(errno)); + goto cleanup; + } + // ready to write + bytes_in_cache= my_b_bytes_in_cache(cache); + if (unlikely(bytes_in_cache == 0)) bytes_in_cache = my_b_fill(cache); + if (likely(bytes_in_cache > 0)) do + { + if (my_fwrite(of, cache->read_pos, bytes_in_cache, + MYF(MY_WME | MY_NABP)) == (size_t) -1) + { + WSREP_ERROR("Failed to write file '%s'", filename); + goto cleanup; + } + cache->read_pos= cache->read_end; + } while ((cache->file >= 0) && (bytes_in_cache= my_b_fill(cache))); + if(cache->error == -1) + { + WSREP_ERROR("RBR inconsistent"); + goto cleanup; + } +cleanup: + // init back + if (reinit_io_cache(cache, WRITE_CACHE, saved_pos, 0, 0)) + { + WSREP_ERROR("failed to reinitialize io-cache"); + } + // close file + if (of) fclose(of); +} + +void thd_binlog_flush_pending_rows_event(THD *thd, bool stmt_end) +{ + thd->binlog_flush_pending_rows_event(stmt_end); +} diff --git a/sql/wsrep_binlog.h b/sql/wsrep_binlog.h new file mode 100644 index 00000000000..a3d8ec6ec2c --- /dev/null +++ b/sql/wsrep_binlog.h @@ -0,0 +1,59 @@ +/* Copyright (C) 2013 Codership Oy <info@codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ + +#ifndef WSREP_BINLOG_H +#define WSREP_BINLOG_H + +#include "sql_class.h" // THD, IO_CACHE + +#define HEAP_PAGE_SIZE 65536 /* 64K */ +#define WSREP_MAX_WS_SIZE (0xFFFFFFFFUL - HEAP_PAGE_SIZE) + +/* + Write the contents of a cache to a memory buffer. + + This function quite the same as MYSQL_BIN_LOG::write_cache(), + with the exception that here we write in buffer instead of log file. + */ +int wsrep_write_cache_buf(IO_CACHE *cache, uchar **buf, size_t *buf_len); + +/* + Write the contents of a cache to wsrep provider. + + This function quite the same as MYSQL_BIN_LOG::write_cache(), + with the exception that here we write in buffer instead of log file. + + @param len total amount of data written + @return wsrep error status + */ +int wsrep_write_cache (wsrep_t* wsrep, + THD* thd, + IO_CACHE* cache, + size_t* len); + +/* Dump replication buffer to disk */ +void wsrep_dump_rbr_buf(THD *thd, const void* rbr_buf, size_t buf_len); + +/* Dump replication buffer to disk without intermediate buffer */ +void wsrep_dump_rbr_direct(THD* thd, IO_CACHE* cache); + +int wsrep_binlog_close_connection(THD* thd); +int wsrep_binlog_savepoint_set(THD *thd, void *sv); +int wsrep_binlog_savepoint_rollback(THD *thd, void *sv); + +/* Dump replication buffer to disk without intermediate buffer */ +void wsrep_dump_rbr_direct(THD* thd, IO_CACHE* cache); + +#endif /* WSREP_BINLOG_H */ diff --git a/sql/wsrep_check_opts.cc b/sql/wsrep_check_opts.cc new file mode 100644 index 00000000000..5ec18c79978 --- /dev/null +++ b/sql/wsrep_check_opts.cc @@ -0,0 +1,379 @@ +/* Copyright 2011 Codership Oy <http://www.codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +//#include <mysqld.h> +#include <sql_class.h> +//#include <sql_plugin.h> +//#include <set_var.h> + +#include "wsrep_mysqld.h" + +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <ctype.h> + +/* This file is about checking for correctness of mysql configuration options */ + +struct opt +{ + const char* const name; + const char* value; +}; + +/* A list of options to check. + * At first we assume default values and then see if they are changed on CLI or + * in my.cnf */ +static struct opt opts[] = +{ + { "wsrep_slave_threads", "1" }, // mysqld.cc + { "bind_address", "0.0.0.0" }, // mysqld.cc + { "wsrep_sst_method", "rsync" }, // mysqld.cc + { "wsrep_sst_receive_address","AUTO"}, // mysqld.cc + { "binlog_format", "ROW" }, // mysqld.cc + { "wsrep_provider", "none" }, // mysqld.cc + { "query_cache_type", "0" }, // mysqld.cc + { "query_cache_size", "0" }, // mysqld.cc + { "locked_in_memory", "0" }, // mysqld.cc + { "wsrep_cluster_address", "0" }, // mysqld.cc + { "locks_unsafe_for_binlog", "0" }, // ha_innodb.cc + { "autoinc_lock_mode", "1" }, // ha_innodb.cc + { 0, 0 } +}; + +enum +{ + WSREP_SLAVE_THREADS, + BIND_ADDRESS, + WSREP_SST_METHOD, + WSREP_SST_RECEIVE_ADDRESS, + BINLOG_FORMAT, + WSREP_PROVIDER, + QUERY_CACHE_TYPE, + QUERY_CACHE_SIZE, + LOCKED_IN_MEMORY, + WSREP_CLUSTER_ADDRESS, + LOCKS_UNSAFE_FOR_BINLOG, + AUTOINC_LOCK_MODE +}; + + +/* A class to make a copy of argv[] vector */ +struct argv_copy +{ + int const argc_; + char** argv_; + + argv_copy (int const argc, const char* const argv[]) : + argc_ (argc), + argv_ (reinterpret_cast<char**>(calloc(argc_, sizeof(char*)))) + { + if (argv_) + { + for (int i = 0; i < argc_; ++i) + { + argv_[i] = strdup(argv[i]); + + if (!argv_[i]) + { + argv_free (); // free whatever bee allocated + return; + } + } + } + } + + ~argv_copy () { argv_free (); } + +private: + argv_copy (const argv_copy&); + argv_copy& operator= (const argv_copy&); + + void argv_free() + { + if (argv_) + { + for (int i = 0; (i < argc_) && argv_[i] ; ++i) free (argv_[i]); + free (argv_); + argv_ = 0; + } + } +}; + +/* a short corresponding to '--' byte sequence */ +static short const long_opt_prefix ('-' + ('-' << 8)); + +/* Normalizes long options to have '_' instead of '-' */ +static int +normalize_opts (argv_copy& a) +{ + if (a.argv_) + { + for (int i = 0; i < a.argc_; ++i) + { + char* ptr = a.argv_[i]; + if (long_opt_prefix == *(short*)ptr) // long option + { + ptr += 2; + const char* end = strchr(ptr, '='); + + if (!end) end = ptr + strlen(ptr); + + for (; ptr != end; ++ptr) if ('-' == *ptr) *ptr = '_'; + } + } + + return 0; + } + + return EINVAL; +} + +/* Find required options in the argument list and change their values */ +static int +find_opts (argv_copy& a, struct opt* const opts) +{ + for (int i = 0; i < a.argc_; ++i) + { + char* ptr = a.argv_[i] + 2; // we're interested only in long options + + struct opt* opt = opts; + for (; 0 != opt->name; ++opt) + { + if (!strstr(ptr, opt->name)) continue; // try next option + + /* 1. try to find value after the '=' */ + opt->value = strchr(ptr, '=') + 1; + + /* 2. if no '=', try next element in the argument vector */ + if (reinterpret_cast<void*>(1) == opt->value) + { + /* also check that the next element is not an option itself */ + if (i + 1 < a.argc_ && *(a.argv_[i + 1]) != '-') + { + ++i; + opt->value = a.argv_[i]; + } + else opt->value = ""; // no value supplied (like boolean opt) + } + + break; // option found, break inner loop + } + } + + return 0; +} + +/* Parses string for an integer. Returns 0 on success. */ +int get_long_long (const struct opt& opt, long long* const val, int const base) +{ + const char* const str = opt.value; + + if ('\0' != *str) + { + char* endptr; + + *val = strtoll (str, &endptr, base); + + if ('k' == *endptr || 'K' == *endptr) + { + *val *= 1024L; + endptr++; + } + else if ('m' == *endptr || 'M' == *endptr) + { + *val *= 1024L * 1024L; + endptr++; + } + else if ('g' == *endptr || 'G' == *endptr) + { + *val *= 1024L * 1024L * 1024L; + endptr++; + } + + if ('\0' == *endptr) return 0; // the whole string was a valid integer + } + + WSREP_ERROR ("Bad value for *%s: '%s'. Should be integer.", + opt.name, opt.value); + + return EINVAL; +} + +/* This is flimzy coz hell knows how mysql interprets boolean strings... + * and, no, I'm not going to become versed in how mysql handles options - + * I'd rather sing. + + Aha, http://dev.mysql.com/doc/refman/5.1/en/dynamic-system-variables.html: + Variables that have a type of “boolean” can be set to 0, 1, ON or OFF. (If you + set them on the command line or in an option file, use the numeric values.) + + So it is '0' for FALSE, '1' or empty string for TRUE + + */ +int get_bool (const struct opt& opt, bool* const val) +{ + const char* str = opt.value; + + while (isspace(*str)) ++str; // skip initial whitespaces + + ssize_t str_len = strlen(str); + switch (str_len) + { + case 0: + *val = true; + return 0; + case 1: + if ('0' == *str || '1' == *str) + { + *val = ('1' == *str); + return 0; + } + } + + WSREP_ERROR ("Bad value for *%s: '%s'. Should be '0', '1' or empty string.", + opt.name, opt.value); + + return EINVAL; +} + +static int +check_opts (int const argc, const char* const argv[], struct opt opts[]) +{ + /* First, make a copy of argv to be able to manipulate it */ + argv_copy a(argc, argv); + + if (!a.argv_) + { + WSREP_ERROR ("Could not copy argv vector: not enough memory."); + return ENOMEM; + } + + int err = normalize_opts (a); + if (err) + { + WSREP_ERROR ("Failed to normalize options."); + return err; + } + + err = find_opts (a, opts); + if (err) + { + WSREP_ERROR ("Failed to parse options."); + return err; + } + + /* At this point we have updated default values in our option list to + what has been specified on the command line / my.cnf */ + + long long slave_threads; + err = get_long_long (opts[WSREP_SLAVE_THREADS], &slave_threads, 10); + if (err) return err; + + int rcode = 0; + + if (slave_threads > 1) + /* Need to check AUTOINC_LOCK_MODE and LOCKS_UNSAFE_FOR_BINLOG */ + { + long long autoinc_lock_mode; + err = get_long_long (opts[AUTOINC_LOCK_MODE], &autoinc_lock_mode, 10); + if (err) return err; + + bool locks_unsafe_for_binlog; + err = get_bool (opts[LOCKS_UNSAFE_FOR_BINLOG],&locks_unsafe_for_binlog); + if (err) return err; + + if (autoinc_lock_mode != 2) + { + WSREP_ERROR ("Parallel applying (wsrep_slave_threads > 1) requires" + " innodb_autoinc_lock_mode = 2."); + rcode = EINVAL; + } + } + + bool locked_in_memory; + err = get_bool (opts[LOCKED_IN_MEMORY], &locked_in_memory); + if (err) { WSREP_ERROR("get_bool error: %s", strerror(err)); return err; } + if (locked_in_memory) + { + WSREP_ERROR ("Memory locking is not supported (locked_in_memory=%s)", + locked_in_memory ? "ON" : "OFF"); + rcode = EINVAL; + } + + if (!strcasecmp(opts[WSREP_SST_METHOD].value,"mysqldump")) + { + if (!strcasecmp(opts[BIND_ADDRESS].value, "127.0.0.1") || + !strcasecmp(opts[BIND_ADDRESS].value, "localhost")) + { + WSREP_ERROR ("wsrep_sst_method is set to 'mysqldump' yet " + "mysqld bind_address is set to '%s', which makes it " + "impossible to receive state transfer from another " + "node, since mysqld won't accept such connections. " + "If you wish to use mysqldump state transfer method, " + "set bind_address to allow mysql client connections " + "from other cluster members (e.g. 0.0.0.0).", + opts[BIND_ADDRESS].value); + rcode = EINVAL; + } + } + else + { + // non-mysqldump SST requires wsrep_cluster_address on startup + if (strlen(opts[WSREP_CLUSTER_ADDRESS].value) == 0) + { + WSREP_ERROR ("%s SST method requires wsrep_cluster_address to be " + "configured on startup.",opts[WSREP_SST_METHOD].value); + rcode = EINVAL; + } + } + + if (strcasecmp(opts[WSREP_SST_RECEIVE_ADDRESS].value, "AUTO")) + { + if (!strncasecmp(opts[WSREP_SST_RECEIVE_ADDRESS].value, + "127.0.0.1", strlen("127.0.0.1")) || + !strncasecmp(opts[WSREP_SST_RECEIVE_ADDRESS].value, + "localhost", strlen("localhost"))) + { + WSREP_WARN ("wsrep_sst_receive_address is set to '%s' which " + "makes it impossible for another host to reach this " + "one. Please set it to the address which this node " + "can be connected at by other cluster members.", + opts[WSREP_SST_RECEIVE_ADDRESS].value); +// rcode = EINVAL; + } + } + + if (strcasecmp(opts[WSREP_PROVIDER].value, "none")) + { + if (strcasecmp(opts[BINLOG_FORMAT].value, "ROW")) + { + WSREP_ERROR ("Only binlog_format = 'ROW' is currently supported. " + "Configured value: '%s'. Please adjust your " + "configuration.", opts[BINLOG_FORMAT].value); + + rcode = EINVAL; + } + } + + return rcode; +} + +int +wsrep_check_opts (int const argc, char* const* const argv) +{ + return check_opts (argc, argv, opts); +} + diff --git a/sql/wsrep_hton.cc b/sql/wsrep_hton.cc new file mode 100644 index 00000000000..96f1431a82f --- /dev/null +++ b/sql/wsrep_hton.cc @@ -0,0 +1,577 @@ +/* Copyright 2008 Codership Oy <http://www.codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include <mysqld.h> +#include "sql_base.h" +#include "rpl_filter.h" +#include <sql_class.h> +#include "wsrep_mysqld.h" +#include "wsrep_binlog.h" +#include <cstdio> +#include <cstdlib> + +extern ulonglong thd_to_trx_id(THD *thd); + +extern "C" int thd_binlog_format(const MYSQL_THD thd); +// todo: share interface with ha_innodb.c + +enum wsrep_trx_status wsrep_run_wsrep_commit(THD *thd, handlerton *hton, + bool all); + +/* + Cleanup after local transaction commit/rollback, replay or TOI. +*/ +void wsrep_cleanup_transaction(THD *thd) +{ + if (wsrep_emulate_bin_log) thd_binlog_trx_reset(thd); + thd->wsrep_ws_handle.trx_id= WSREP_UNDEFINED_TRX_ID; + thd->wsrep_trx_meta.gtid= WSREP_GTID_UNDEFINED; + thd->wsrep_trx_meta.depends_on= WSREP_SEQNO_UNDEFINED; + thd->wsrep_exec_mode= LOCAL_STATE; + return; +} + +/* + wsrep hton +*/ +handlerton *wsrep_hton; + + +/* + Registers wsrep hton at commit time if transaction has registered htons + for supported engine types. + + Hton should not be registered for TOTAL_ORDER operations. + + Registration is needed for both LOCAL_MODE and REPL_RECV transactions to run + commit in 2pc so that wsrep position gets properly recorded in storage + engines. + + Note that all hton calls should immediately return for threads that are + in REPL_RECV mode as their states are controlled by wsrep appliers or + replaying code. Only threads in LOCAL_MODE should run wsrep callbacks + from hton methods. +*/ +void wsrep_register_hton(THD* thd, bool all) +{ + if (thd->wsrep_exec_mode != TOTAL_ORDER && !thd->wsrep_apply_toi) + { + THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt; + for (Ha_trx_info *i= trans->ha_list; WSREP(thd) && i; i = i->next()) + { + if ((i->ht()->db_type == DB_TYPE_INNODB) || + (i->ht()->db_type == DB_TYPE_TOKUDB)) + { + trans_register_ha(thd, all, wsrep_hton); + + /* follow innodb read/write settting + * but, as an exception: CTAS with empty result set will not be + * replicated unless we declare wsrep hton as read/write here + */ + if (i->is_trx_read_write() || + (thd->lex->sql_command == SQLCOM_CREATE_TABLE && + thd->wsrep_exec_mode == LOCAL_STATE)) + { + thd->ha_data[wsrep_hton->slot].ha_info[all].set_trx_read_write(); + } + break; + } + } + } +} + +/* + Calls wsrep->post_commit() for locally executed transactions that have + got seqno from provider (must commit) and don't require replaying. + */ +void wsrep_post_commit(THD* thd, bool all) +{ + if (thd->wsrep_exec_mode == LOCAL_COMMIT) + { + DBUG_ASSERT(thd->wsrep_trx_meta.gtid.seqno != WSREP_SEQNO_UNDEFINED); + if (wsrep->post_commit(wsrep, &thd->wsrep_ws_handle)) + { + DBUG_PRINT("wsrep", ("set committed fail")); + WSREP_WARN("set committed fail: %llu %d", + (long long)thd->real_id, thd->get_stmt_da()->status()); + } + wsrep_cleanup_transaction(thd); + } +} + +/* + wsrep exploits binlog's caches even if binlogging itself is not + activated. In such case connection close needs calling + actual binlog's method. + Todo: split binlog hton from its caches to use ones by wsrep + without referring to binlog's stuff. +*/ +static int +wsrep_close_connection(handlerton* hton, THD* thd) +{ + DBUG_ENTER("wsrep_close_connection"); + + if (thd->wsrep_exec_mode == REPL_RECV) + { + DBUG_RETURN(0); + } + DBUG_RETURN(wsrep_binlog_close_connection (thd)); +} + +/* + prepare/wsrep_run_wsrep_commit can fail in two ways + - certification test or an equivalent. As a result, + the current transaction just rolls back + Error codes: + WSREP_TRX_CERT_FAIL, WSREP_TRX_SIZE_EXCEEDED, WSREP_TRX_ERROR + - a post-certification failure makes this server unable to + commit its own WS and therefore the server must abort +*/ +static int wsrep_prepare(handlerton *hton, THD *thd, bool all) +{ + DBUG_ENTER("wsrep_prepare"); + + if (thd->wsrep_exec_mode == REPL_RECV) + { + DBUG_RETURN(0); + } + + DBUG_ASSERT(thd->ha_data[wsrep_hton->slot].ha_info[all].is_trx_read_write()); + DBUG_ASSERT(thd->wsrep_exec_mode == LOCAL_STATE); + DBUG_ASSERT(thd->wsrep_trx_meta.gtid.seqno == WSREP_SEQNO_UNDEFINED); + + if ((all || + !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) && + (thd->variables.wsrep_on && !wsrep_trans_cache_is_empty(thd))) + { + DBUG_RETURN (wsrep_run_wsrep_commit(thd, hton, all)); + } + DBUG_RETURN(0); +} + +static int wsrep_savepoint_set(handlerton *hton, THD *thd, void *sv) +{ + DBUG_ENTER("wsrep_savepoint_set"); + + if (thd->wsrep_exec_mode == REPL_RECV) + { + DBUG_RETURN(0); + } + + if (!wsrep_emulate_bin_log) DBUG_RETURN(0); + int rcode = wsrep_binlog_savepoint_set(thd, sv); + DBUG_RETURN(rcode); +} + +static int wsrep_savepoint_rollback(handlerton *hton, THD *thd, void *sv) +{ + DBUG_ENTER("wsrep_savepoint_rollback"); + + if (thd->wsrep_exec_mode == REPL_RECV) + { + DBUG_RETURN(0); + } + + if (!wsrep_emulate_bin_log) DBUG_RETURN(0); + int rcode = wsrep_binlog_savepoint_rollback(thd, sv); + DBUG_RETURN(rcode); +} + +static int wsrep_rollback(handlerton *hton, THD *thd, bool all) +{ + DBUG_ENTER("wsrep_rollback"); + + if (thd->wsrep_exec_mode == REPL_RECV) + { + DBUG_RETURN(0); + } + + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + switch (thd->wsrep_exec_mode) + { + case TOTAL_ORDER: + case REPL_RECV: + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + WSREP_DEBUG("Avoiding wsrep rollback for failed DDL: %s", thd->query()); + DBUG_RETURN(0); + default: break; + } + + if ((all || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) && + (thd->variables.wsrep_on && thd->wsrep_conflict_state != MUST_REPLAY)) + { + if (wsrep->post_rollback(wsrep, &thd->wsrep_ws_handle)) + { + DBUG_PRINT("wsrep", ("setting rollback fail")); + WSREP_ERROR("settting rollback fail: thd: %llu SQL: %s", + (long long)thd->real_id, thd->query()); + } + wsrep_cleanup_transaction(thd); + } + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + DBUG_RETURN(0); +} + +int wsrep_commit(handlerton *hton, THD *thd, bool all) +{ + DBUG_ENTER("wsrep_commit"); + + if (thd->wsrep_exec_mode == REPL_RECV) + { + DBUG_RETURN(0); + } + + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + if ((all || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) && + (thd->variables.wsrep_on && thd->wsrep_conflict_state != MUST_REPLAY)) + { + if (thd->wsrep_exec_mode == LOCAL_COMMIT) + { + DBUG_ASSERT(thd->ha_data[wsrep_hton->slot].ha_info[all].is_trx_read_write()); + /* + Call to wsrep->post_commit() (moved to wsrep_post_commit()) must + be done only after commit has done for all involved htons. + */ + DBUG_PRINT("wsrep", ("commit")); + } + else + { + /* + Transaction didn't go through wsrep->pre_commit() so just roll back + possible changes to clean state. + */ + if (WSREP_PROVIDER_EXISTS) { + if (wsrep->post_rollback(wsrep, &thd->wsrep_ws_handle)) + { + DBUG_PRINT("wsrep", ("setting rollback fail")); + WSREP_ERROR("settting rollback fail: thd: %llu SQL: %s", + (long long)thd->real_id, thd->query()); + } + } + wsrep_cleanup_transaction(thd); + } + } + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + DBUG_RETURN(0); +} + + +extern Rpl_filter* binlog_filter; +extern my_bool opt_log_slave_updates; + +enum wsrep_trx_status +wsrep_run_wsrep_commit(THD *thd, handlerton *hton, bool all) +{ + int rcode= -1; + size_t data_len= 0; + IO_CACHE *cache; + int replay_round= 0; + + if (thd->get_stmt_da()->is_error()) { + WSREP_ERROR("commit issue, error: %d %s", + thd->get_stmt_da()->sql_errno(), thd->get_stmt_da()->message()); + } + + DBUG_ENTER("wsrep_run_wsrep_commit"); + + if (thd->slave_thread && !opt_log_slave_updates) DBUG_RETURN(WSREP_TRX_OK); + + if (thd->wsrep_exec_mode == REPL_RECV) { + + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + if (thd->wsrep_conflict_state == MUST_ABORT) { + if (wsrep_debug) + WSREP_INFO("WSREP: must abort for BF"); + DBUG_PRINT("wsrep", ("BF apply commit fail")); + thd->wsrep_conflict_state = NO_CONFLICT; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + // + // TODO: test all calls of the rollback. + // rollback must happen automagically innobase_rollback(hton, thd, 1); + // + DBUG_RETURN(WSREP_TRX_ERROR); + } + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } + + if (thd->wsrep_exec_mode != LOCAL_STATE) DBUG_RETURN(WSREP_TRX_OK); + + if (thd->wsrep_consistency_check == CONSISTENCY_CHECK_RUNNING) { + WSREP_DEBUG("commit for consistency check: %s", thd->query()); + DBUG_RETURN(WSREP_TRX_OK); + } + + DBUG_PRINT("wsrep", ("replicating commit")); + + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + if (thd->wsrep_conflict_state == MUST_ABORT) { + DBUG_PRINT("wsrep", ("replicate commit fail")); + thd->wsrep_conflict_state = ABORTED; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + if (wsrep_debug) { + WSREP_INFO("innobase_commit, abort %s", + (thd->query()) ? thd->query() : "void"); + } + DBUG_RETURN(WSREP_TRX_CERT_FAIL); + } + + mysql_mutex_lock(&LOCK_wsrep_replaying); + + while (wsrep_replaying > 0 && + thd->wsrep_conflict_state == NO_CONFLICT && + thd->killed == NOT_KILLED && + !shutdown_in_progress) + { + + mysql_mutex_unlock(&LOCK_wsrep_replaying); + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + + mysql_mutex_lock(&thd->mysys_var->mutex); + thd_proc_info(thd, "wsrep waiting on replaying"); + thd->mysys_var->current_mutex= &LOCK_wsrep_replaying; + thd->mysys_var->current_cond= &COND_wsrep_replaying; + mysql_mutex_unlock(&thd->mysys_var->mutex); + + mysql_mutex_lock(&LOCK_wsrep_replaying); + // Using timedwait is a hack to avoid deadlock in case if BF victim + // misses the signal. + struct timespec wtime = {0, 1000000}; + mysql_cond_timedwait(&COND_wsrep_replaying, &LOCK_wsrep_replaying, + &wtime); + + if (replay_round++ % 100000 == 0) + WSREP_DEBUG("commit waiting for replaying: replayers %d, thd: (%lu) " + "conflict: %d (round: %d)", + wsrep_replaying, thd->thread_id, + thd->wsrep_conflict_state, replay_round); + + mysql_mutex_unlock(&LOCK_wsrep_replaying); + + mysql_mutex_lock(&thd->mysys_var->mutex); + thd->mysys_var->current_mutex= 0; + thd->mysys_var->current_cond= 0; + mysql_mutex_unlock(&thd->mysys_var->mutex); + + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + mysql_mutex_lock(&LOCK_wsrep_replaying); + } + mysql_mutex_unlock(&LOCK_wsrep_replaying); + + if (thd->wsrep_conflict_state == MUST_ABORT) { + DBUG_PRINT("wsrep", ("replicate commit fail")); + thd->wsrep_conflict_state = ABORTED; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + WSREP_DEBUG("innobase_commit abort after replaying wait %s", + (thd->query()) ? thd->query() : "void"); + DBUG_RETURN(WSREP_TRX_CERT_FAIL); + } + + thd->wsrep_query_state = QUERY_COMMITTING; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + + cache = get_trans_log(thd); + rcode = 0; + if (cache) { + thd->binlog_flush_pending_rows_event(true); + rcode = wsrep_write_cache(wsrep, thd, cache, &data_len); + if (WSREP_OK != rcode) { + WSREP_ERROR("rbr write fail, data_len: %zu, %d", data_len, rcode); + DBUG_RETURN(WSREP_TRX_SIZE_EXCEEDED); + } + } + + if (data_len == 0) + { + if (thd->get_stmt_da()->is_ok() && + thd->get_stmt_da()->affected_rows() > 0 && + !binlog_filter->is_on()) + { + WSREP_DEBUG("empty rbr buffer, query: %s, " + "affected rows: %llu, " + "changed tables: %d, " + "sql_log_bin: %d, " + "wsrep status (%d %d %d)", + thd->query(), thd->get_stmt_da()->affected_rows(), + stmt_has_updated_trans_table(thd), thd->variables.sql_log_bin, + thd->wsrep_exec_mode, thd->wsrep_query_state, + thd->wsrep_conflict_state); + } + else + { + WSREP_DEBUG("empty rbr buffer, query: %s", thd->query()); + } + thd->wsrep_query_state= QUERY_EXEC; + DBUG_RETURN(WSREP_TRX_OK); + } + + if (WSREP_UNDEFINED_TRX_ID == thd->wsrep_ws_handle.trx_id) + { + WSREP_WARN("SQL statement was ineffective, THD: %lu, buf: %zu\n" + "QUERY: %s\n" + " => Skipping replication", + thd->thread_id, data_len, thd->query()); + rcode = WSREP_TRX_FAIL; + } + else if (!rcode) + { + if (WSREP_OK == rcode) + rcode = wsrep->pre_commit(wsrep, + (wsrep_conn_id_t)thd->thread_id, + &thd->wsrep_ws_handle, + WSREP_FLAG_COMMIT | + ((thd->wsrep_PA_safe) ? + 0ULL : WSREP_FLAG_PA_UNSAFE), + &thd->wsrep_trx_meta); + + if (rcode == WSREP_TRX_MISSING) { + WSREP_WARN("Transaction missing in provider, thd: %ld, SQL: %s", + thd->thread_id, thd->query()); + rcode = WSREP_TRX_FAIL; + } else if (rcode == WSREP_BF_ABORT) { + WSREP_DEBUG("thd %lu seqno %lld BF aborted by provider, will replay", + thd->thread_id, (long long)thd->wsrep_trx_meta.gtid.seqno); + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + thd->wsrep_conflict_state = MUST_REPLAY; + DBUG_ASSERT(wsrep_thd_trx_seqno(thd) > 0); + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + mysql_mutex_lock(&LOCK_wsrep_replaying); + wsrep_replaying++; + WSREP_DEBUG("replaying increased: %d, thd: %lu", + wsrep_replaying, thd->thread_id); + mysql_mutex_unlock(&LOCK_wsrep_replaying); + } + } else { + WSREP_ERROR("I/O error reading from thd's binlog iocache: " + "errno=%d, io cache code=%d", my_errno, cache->error); + DBUG_ASSERT(0); // failure like this can not normally happen + DBUG_RETURN(WSREP_TRX_ERROR); + } + + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + switch(rcode) { + case 0: + /* + About MUST_ABORT: We assume that even if thd conflict state was set + to MUST_ABORT, underlying transaction was not rolled back or marked + as deadlock victim in QUERY_COMMITTING state. Conflict state is + set to NO_CONFLICT and commit proceeds as usual. + */ + if (thd->wsrep_conflict_state == MUST_ABORT) + thd->wsrep_conflict_state= NO_CONFLICT; + + if (thd->wsrep_conflict_state != NO_CONFLICT) + { + WSREP_WARN("thd %lu seqno %lld: conflict state %d after post commit", + thd->thread_id, + (long long)thd->wsrep_trx_meta.gtid.seqno, + thd->wsrep_conflict_state); + } + thd->wsrep_exec_mode= LOCAL_COMMIT; + DBUG_ASSERT(thd->wsrep_trx_meta.gtid.seqno != WSREP_SEQNO_UNDEFINED); + /* Override XID iff it was generated by mysql */ + if (thd->transaction.xid_state.xid.get_my_xid()) + { + wsrep_xid_init(&thd->transaction.xid_state.xid, + &thd->wsrep_trx_meta.gtid.uuid, + thd->wsrep_trx_meta.gtid.seqno); + } + DBUG_PRINT("wsrep", ("replicating commit success")); + break; + case WSREP_BF_ABORT: + DBUG_ASSERT(thd->wsrep_trx_meta.gtid.seqno != WSREP_SEQNO_UNDEFINED); + case WSREP_TRX_FAIL: + WSREP_DEBUG("commit failed for reason: %d", rcode); + DBUG_PRINT("wsrep", ("replicating commit fail")); + + thd->wsrep_query_state= QUERY_EXEC; + + if (thd->wsrep_conflict_state == MUST_ABORT) { + thd->wsrep_conflict_state= ABORTED; + } + else + { + WSREP_DEBUG("conflict state: %d", thd->wsrep_conflict_state); + if (thd->wsrep_conflict_state == NO_CONFLICT) + { + thd->wsrep_conflict_state = CERT_FAILURE; + WSREP_LOG_CONFLICT(NULL, thd, FALSE); + } + } + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + + DBUG_RETURN(WSREP_TRX_CERT_FAIL); + + case WSREP_SIZE_EXCEEDED: + WSREP_ERROR("transaction size exceeded"); + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + DBUG_RETURN(WSREP_TRX_SIZE_EXCEEDED); + case WSREP_CONN_FAIL: + WSREP_ERROR("connection failure"); + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + DBUG_RETURN(WSREP_TRX_ERROR); + default: + WSREP_ERROR("unknown connection failure"); + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + DBUG_RETURN(WSREP_TRX_ERROR); + } + + thd->wsrep_query_state= QUERY_EXEC; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + + DBUG_RETURN(WSREP_TRX_OK); +} + + +static int wsrep_hton_init(void *p) +{ + wsrep_hton= (handlerton *)p; + //wsrep_hton->state=opt_bin_log ? SHOW_OPTION_YES : SHOW_OPTION_NO; + wsrep_hton->state= SHOW_OPTION_YES; + wsrep_hton->db_type=(legacy_db_type)0; + wsrep_hton->savepoint_offset= sizeof(my_off_t); + wsrep_hton->close_connection= wsrep_close_connection; + wsrep_hton->savepoint_set= wsrep_savepoint_set; + wsrep_hton->savepoint_rollback= wsrep_savepoint_rollback; + wsrep_hton->commit= wsrep_commit; + wsrep_hton->rollback= wsrep_rollback; + wsrep_hton->prepare= wsrep_prepare; + wsrep_hton->flags= HTON_NOT_USER_SELECTABLE | HTON_HIDDEN; // todo: fix flags + wsrep_hton->slot= 0; + return 0; +} + + +struct st_mysql_storage_engine wsrep_storage_engine= +{ MYSQL_HANDLERTON_INTERFACE_VERSION }; + + +mysql_declare_plugin(wsrep) +{ + MYSQL_STORAGE_ENGINE_PLUGIN, + &wsrep_storage_engine, + "wsrep", + "Codership Oy", + "A pseudo storage engine to represent transactions in multi-master " + "synchornous replication", + PLUGIN_LICENSE_GPL, + wsrep_hton_init, /* Plugin Init */ + NULL, /* Plugin Deinit */ + 0x0100 /* 1.0 */, + NULL, /* status variables */ + NULL, /* system variables */ + NULL, /* config options */ + 0, /* flags */ +} +mysql_declare_plugin_end; diff --git a/sql/wsrep_mysqld.cc b/sql/wsrep_mysqld.cc new file mode 100644 index 00000000000..c1b15bd02c2 --- /dev/null +++ b/sql/wsrep_mysqld.cc @@ -0,0 +1,2433 @@ +/* Copyright 2008-2014 Codership Oy <http://www.codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include <mysqld.h> +#include <sql_class.h> +#include <sql_parse.h> +#include "slave.h" +#include "rpl_mi.h" +#include "sql_repl.h" +#include "rpl_filter.h" +#include "sql_callback.h" +#include "sp_head.h" +#include "sp.h" +#include "wsrep_priv.h" +#include "wsrep_thd.h" +#include "wsrep_sst.h" +#include "wsrep_utils.h" +#include "wsrep_var.h" +#include "wsrep_binlog.h" +#include "wsrep_applier.h" +#include <cstdio> +#include <cstdlib> +#include "log_event.h" +#include <slave.h> + +wsrep_t *wsrep = NULL; +my_bool wsrep_emulate_bin_log = FALSE; // activating parts of binlog interface +#ifdef GTID_SUPPORT +/* Sidno in global_sid_map corresponding to group uuid */ +rpl_sidno wsrep_sidno= -1; +#endif /* GTID_SUPPORT */ +my_bool wsrep_preordered_opt= FALSE; + +/* + * Begin configuration options and their default values + */ + +extern int wsrep_replaying; +extern ulong wsrep_running_threads; +extern ulong my_bind_addr; +extern my_bool plugins_are_initialized; +extern uint kill_cached_threads; +extern mysql_cond_t COND_thread_cache; + +const char* wsrep_data_home_dir = NULL; +const char* wsrep_dbug_option = ""; + +long wsrep_slave_threads = 1; // # of slave action appliers wanted +int wsrep_slave_count_change = 0; // # of appliers to stop or start +my_bool wsrep_debug = 0; // enable debug level logging +my_bool wsrep_convert_LOCK_to_trx = 1; // convert locking sessions to trx +ulong wsrep_retry_autocommit = 5; // retry aborted autocommit trx +my_bool wsrep_auto_increment_control = 1; // control auto increment variables +my_bool wsrep_drupal_282555_workaround = 1; // retry autoinc insert after dupkey +my_bool wsrep_incremental_data_collection = 0; // incremental data collection +ulong wsrep_max_ws_size = 1073741824UL;//max ws (RBR buffer) size +ulong wsrep_max_ws_rows = 65536; // max number of rows in ws +int wsrep_to_isolation = 0; // # of active TO isolation threads +my_bool wsrep_certify_nonPK = 1; // certify, even when no primary key +long wsrep_max_protocol_version = 3; // maximum protocol version to use +ulong wsrep_forced_binlog_format = BINLOG_FORMAT_UNSPEC; +my_bool wsrep_recovery = 0; // recovery +my_bool wsrep_replicate_myisam = 0; // enable myisam replication +my_bool wsrep_log_conflicts = 0; +ulong wsrep_mysql_replication_bundle = 0; +my_bool wsrep_desync = 0; // desynchronize the node from the + // cluster +my_bool wsrep_load_data_splitting = 1; // commit load data every 10K intervals +my_bool wsrep_restart_slave = 0; // should mysql slave thread be + // restarted, if node joins back +my_bool wsrep_restart_slave_activated = 0; // node has dropped, and slave + // restart will be needed +my_bool wsrep_slave_UK_checks = 0; // slave thread does UK checks +my_bool wsrep_slave_FK_checks = 0; // slave thread does FK checks +/* + * End configuration options + */ + +/* + * Other wsrep global variables. + */ +my_bool wsrep_inited = 0; // initialized ? + +static const wsrep_uuid_t cluster_uuid = WSREP_UUID_UNDEFINED; +static char cluster_uuid_str[40]= { 0, }; +static const char* cluster_status_str[WSREP_VIEW_MAX] = +{ + "Primary", + "non-Primary", + "Disconnected" +}; + +static char provider_name[256]= { 0, }; +static char provider_version[256]= { 0, }; +static char provider_vendor[256]= { 0, }; + +/* + * wsrep status variables + */ +my_bool wsrep_connected = FALSE; +my_bool wsrep_ready = FALSE; // node can accept queries +const char* wsrep_cluster_state_uuid = cluster_uuid_str; +long long wsrep_cluster_conf_id = WSREP_SEQNO_UNDEFINED; +const char* wsrep_cluster_status = cluster_status_str[WSREP_VIEW_DISCONNECTED]; +long wsrep_cluster_size = 0; +long wsrep_local_index = -1; +long long wsrep_local_bf_aborts = 0; +const char* wsrep_provider_name = provider_name; +const char* wsrep_provider_version = provider_version; +const char* wsrep_provider_vendor = provider_vendor; +/* End wsrep status variables */ + +wsrep_uuid_t local_uuid = WSREP_UUID_UNDEFINED; +wsrep_seqno_t local_seqno = WSREP_SEQNO_UNDEFINED; +wsp::node_status local_status; +long wsrep_protocol_version = 3; + +// Boolean denoting if server is in initial startup phase. This is needed +// to make sure that main thread waiting in wsrep_sst_wait() is signaled +// if there was no state gap on receiving first view event. +static my_bool wsrep_startup = TRUE; + + +static void wsrep_log_cb(wsrep_log_level_t level, const char *msg) { + switch (level) { + case WSREP_LOG_INFO: + sql_print_information("WSREP: %s", msg); + break; + case WSREP_LOG_WARN: + sql_print_warning("WSREP: %s", msg); + break; + case WSREP_LOG_ERROR: + case WSREP_LOG_FATAL: + sql_print_error("WSREP: %s", msg); + break; + case WSREP_LOG_DEBUG: + if (wsrep_debug) sql_print_information ("[Debug] WSREP: %s", msg); + default: + break; + } +} + +static void wsrep_log_states (wsrep_log_level_t const level, + const wsrep_uuid_t* const group_uuid, + wsrep_seqno_t const group_seqno, + const wsrep_uuid_t* const node_uuid, + wsrep_seqno_t const node_seqno) +{ + char uuid_str[37]; + char msg[256]; + + wsrep_uuid_print (group_uuid, uuid_str, sizeof(uuid_str)); + snprintf (msg, 255, "WSREP: Group state: %s:%lld", + uuid_str, (long long)group_seqno); + wsrep_log_cb (level, msg); + + wsrep_uuid_print (node_uuid, uuid_str, sizeof(uuid_str)); + snprintf (msg, 255, "WSREP: Local state: %s:%lld", + uuid_str, (long long)node_seqno); + wsrep_log_cb (level, msg); +} + +static my_bool set_SE_checkpoint(THD* unused, plugin_ref plugin, void* arg) +{ + XID* xid= reinterpret_cast<XID*>(arg); + handlerton* hton= plugin_data(plugin, handlerton *); + if (hton->set_checkpoint) + { + const wsrep_uuid_t* uuid(wsrep_xid_uuid(xid)); + char uuid_str[40] = {0, }; + wsrep_uuid_print(uuid, uuid_str, sizeof(uuid_str)); + WSREP_DEBUG("Set WSREPXid for InnoDB: %s:%lld", + uuid_str, (long long)wsrep_xid_seqno(xid)); + hton->set_checkpoint(hton, xid); + } + return FALSE; +} + +void wsrep_set_SE_checkpoint(XID* xid) +{ + plugin_foreach(NULL, set_SE_checkpoint, MYSQL_STORAGE_ENGINE_PLUGIN, xid); +} + +static my_bool get_SE_checkpoint(THD* unused, plugin_ref plugin, void* arg) +{ + XID* xid= reinterpret_cast<XID*>(arg); + handlerton* hton= plugin_data(plugin, handlerton *); + if (hton->get_checkpoint) + { + hton->get_checkpoint(hton, xid); + const wsrep_uuid_t* uuid(wsrep_xid_uuid(xid)); + char uuid_str[40] = {0, }; + wsrep_uuid_print(uuid, uuid_str, sizeof(uuid_str)); + WSREP_DEBUG("Read WSREPXid from InnoDB: %s:%lld", + uuid_str, (long long)wsrep_xid_seqno(xid)); + } + return FALSE; +} + +void wsrep_get_SE_checkpoint(XID* xid) +{ + plugin_foreach(NULL, get_SE_checkpoint, MYSQL_STORAGE_ENGINE_PLUGIN, xid); +} + +#ifdef GTID_SUPPORT +void wsrep_init_sidno(const wsrep_uuid_t& uuid) +{ + /* generate new Sid map entry from inverted uuid */ + rpl_sid sid; + wsrep_uuid_t ltid_uuid; + for (size_t i= 0; i < sizeof(ltid_uuid.data); ++i) + { + ltid_uuid.data[i] = ~local_uuid.data[i]; + } + sid.copy_from(ltid_uuid.data); + global_sid_lock->wrlock(); + wsrep_sidno= global_sid_map->add_sid(sid); + WSREP_INFO("inited wsrep sidno %d", wsrep_sidno); + global_sid_lock->unlock(); +} +#endif /* GTID_SUPPORT */ + +static wsrep_cb_status_t +wsrep_view_handler_cb (void* app_ctx, + void* recv_ctx, + const wsrep_view_info_t* view, + const char* state, + size_t state_len, + void** sst_req, + size_t* sst_req_len) +{ + *sst_req = NULL; + *sst_req_len = 0; + + wsrep_member_status_t new_status= local_status.get(); + + if (memcmp(&cluster_uuid, &view->state_id.uuid, sizeof(wsrep_uuid_t))) + { + memcpy((wsrep_uuid_t*)&cluster_uuid, &view->state_id.uuid, + sizeof(cluster_uuid)); + + wsrep_uuid_print (&cluster_uuid, cluster_uuid_str, + sizeof(cluster_uuid_str)); + } + + wsrep_cluster_conf_id= view->view; + wsrep_cluster_status= cluster_status_str[view->status]; + wsrep_cluster_size= view->memb_num; + wsrep_local_index= view->my_idx; + + WSREP_INFO("New cluster view: global state: %s:%lld, view# %lld: %s, " + "number of nodes: %ld, my index: %ld, protocol version %d", + wsrep_cluster_state_uuid, (long long)view->state_id.seqno, + (long long)wsrep_cluster_conf_id, wsrep_cluster_status, + wsrep_cluster_size, wsrep_local_index, view->proto_ver); + + /* Proceed further only if view is PRIMARY */ + if (WSREP_VIEW_PRIMARY != view->status) { + wsrep_ready_set(FALSE); + new_status= WSREP_MEMBER_UNDEFINED; + /* Always record local_uuid and local_seqno in non-prim since this + * may lead to re-initializing provider and start position is + * determined according to these variables */ + // WRONG! local_uuid should be the last primary configuration uuid we were + // a member of. local_seqno should be updated in commit calls. + // local_uuid= cluster_uuid; + // local_seqno= view->first - 1; + goto out; + } + + switch (view->proto_ver) + { + case 0: + case 1: + case 2: + case 3: + // version change + if (view->proto_ver != wsrep_protocol_version) + { + my_bool wsrep_ready_saved= wsrep_ready; + wsrep_ready_set(FALSE); + WSREP_INFO("closing client connections for " + "protocol change %ld -> %d", + wsrep_protocol_version, view->proto_ver); + wsrep_close_client_connections(TRUE); + wsrep_protocol_version= view->proto_ver; + wsrep_ready_set(wsrep_ready_saved); + } + break; + default: + WSREP_ERROR("Unsupported application protocol version: %d", + view->proto_ver); + unireg_abort(1); + } + + if (view->state_gap) + { + WSREP_WARN("Gap in state sequence. Need state transfer."); + + /* After that wsrep will call wsrep_sst_prepare. */ + /* keep ready flag 0 until we receive the snapshot */ + wsrep_ready_set(FALSE); + + /* Close client connections to ensure that they don't interfere + * with SST */ + WSREP_DEBUG("[debug]: closing client connections for PRIM"); + wsrep_close_client_connections(TRUE); + + ssize_t const req_len= wsrep_sst_prepare (sst_req); + + if (req_len < 0) + { + WSREP_ERROR("SST preparation failed: %zd (%s)", -req_len, + strerror(-req_len)); + new_status= WSREP_MEMBER_UNDEFINED; + } + else + { + assert(sst_req != NULL); + *sst_req_len= req_len; + new_status= WSREP_MEMBER_JOINER; + } + } + else + { + /* + * NOTE: Initialize wsrep_group_uuid here only if it wasn't initialized + * before - OR - it was reinitilized on startup (lp:992840) + */ + if (wsrep_startup) + { + if (wsrep_before_SE()) + { + wsrep_SE_init_grab(); + // Signal mysqld init thread to continue + wsrep_sst_complete (&cluster_uuid, view->state_id.seqno, false); + // and wait for SE initialization + wsrep_SE_init_wait(); + } + else + { + local_uuid= cluster_uuid; + local_seqno= view->state_id.seqno; + } + /* Init storage engine XIDs from first view */ + XID xid; + wsrep_xid_init(&xid, &local_uuid, local_seqno); + wsrep_set_SE_checkpoint(&xid); + new_status= WSREP_MEMBER_JOINED; +#ifdef GTID_SUPPORT + wsrep_init_sidno(local_uuid); +#endif /* GTID_SUPPORT */ + } + + // just some sanity check + if (memcmp (&local_uuid, &cluster_uuid, sizeof (wsrep_uuid_t))) + { + WSREP_ERROR("Undetected state gap. Can't continue."); + wsrep_log_states(WSREP_LOG_FATAL, &cluster_uuid, view->state_id.seqno, + &local_uuid, -1); + unireg_abort(1); + } + } + + if (wsrep_auto_increment_control) + { + global_system_variables.auto_increment_offset= view->my_idx + 1; + global_system_variables.auto_increment_increment= view->memb_num; + } + + { /* capabilities may be updated on new configuration */ + uint64_t const caps(wsrep->capabilities (wsrep)); + + my_bool const idc((caps & WSREP_CAP_INCREMENTAL_WRITESET) != 0); + if (TRUE == wsrep_incremental_data_collection && FALSE == idc) + { + WSREP_WARN("Unsupported protocol downgrade: " + "incremental data collection disabled. Expect abort."); + } + wsrep_incremental_data_collection = idc; + } + +out: + if (view->status == WSREP_VIEW_PRIMARY) wsrep_startup= FALSE; + local_status.set(new_status, view); + + return WSREP_CB_SUCCESS; +} + +void wsrep_ready_set (my_bool x) +{ + WSREP_DEBUG("Setting wsrep_ready to %d", x); + if (mysql_mutex_lock (&LOCK_wsrep_ready)) abort(); + if (wsrep_ready != x) + { + wsrep_ready= x; + mysql_cond_signal (&COND_wsrep_ready); + } + mysql_mutex_unlock (&LOCK_wsrep_ready); +} + +// Wait until wsrep has reached ready state +void wsrep_ready_wait () +{ + if (mysql_mutex_lock (&LOCK_wsrep_ready)) abort(); + while (!wsrep_ready) + { + WSREP_INFO("Waiting to reach ready state"); + mysql_cond_wait (&COND_wsrep_ready, &LOCK_wsrep_ready); + } + WSREP_INFO("ready state reached"); + mysql_mutex_unlock (&LOCK_wsrep_ready); +} + +static void wsrep_synced_cb(void* app_ctx) +{ + WSREP_INFO("Synchronized with group, ready for connections"); + bool signal_main= false; + if (mysql_mutex_lock (&LOCK_wsrep_ready)) abort(); + if (!wsrep_ready) + { + wsrep_ready= TRUE; + mysql_cond_signal (&COND_wsrep_ready); + signal_main= true; + + } + local_status.set(WSREP_MEMBER_SYNCED); + mysql_mutex_unlock (&LOCK_wsrep_ready); + + if (signal_main) + { + wsrep_SE_init_grab(); + // Signal mysqld init thread to continue + wsrep_sst_complete (&local_uuid, local_seqno, false); + // and wait for SE initialization + wsrep_SE_init_wait(); + } + if (wsrep_restart_slave_activated) + { + int rcode; + WSREP_INFO("MySQL slave restart"); + wsrep_restart_slave_activated= FALSE; + + mysql_mutex_lock(&LOCK_active_mi); + if ((rcode = start_slave_threads(1 /* need mutex */, + 0 /* no wait for start*/, + active_mi, + master_info_file, + relay_log_info_file, + SLAVE_SQL))) + { + WSREP_WARN("Failed to create slave threads: %d", rcode); + } + mysql_mutex_unlock(&LOCK_active_mi); + + } +} + +static void wsrep_init_position() +{ + /* read XIDs from storage engines */ + XID xid; + memset(&xid, 0, sizeof(xid)); + xid.formatID= -1; + wsrep_get_SE_checkpoint(&xid); + + if (xid.formatID == -1) + { + WSREP_INFO("Read nil XID from storage engines, skipping position init"); + return; + } + else if (!wsrep_is_wsrep_xid(&xid)) + { + WSREP_WARN("Read non-wsrep XID from storage engines, skipping position init"); + return; + } + + const wsrep_uuid_t* uuid= wsrep_xid_uuid(&xid); + const wsrep_seqno_t seqno= wsrep_xid_seqno(&xid); + + char uuid_str[40] = {0, }; + wsrep_uuid_print(uuid, uuid_str, sizeof(uuid_str)); + WSREP_INFO("Initial position: %s:%lld", uuid_str, (long long)seqno); + + + if (!memcmp(&local_uuid, &WSREP_UUID_UNDEFINED, sizeof(local_uuid)) && + local_seqno == WSREP_SEQNO_UNDEFINED) + { + // Initial state + local_uuid= *uuid; + local_seqno= seqno; + } + else if (memcmp(&local_uuid, uuid, sizeof(local_uuid)) || + local_seqno != seqno) + { + WSREP_WARN("Initial position was provided by configuration or SST, " + "avoiding override"); + } +} + +extern char* my_bind_addr_str; + +int wsrep_init() +{ + int rcode= -1; + DBUG_ASSERT(wsrep_inited == 0); + + wsrep_ready_set(FALSE); + assert(wsrep_provider); + + wsrep_init_position(); + + if ((rcode= wsrep_load(wsrep_provider, &wsrep, wsrep_log_cb)) != WSREP_OK) + { + if (strcasecmp(wsrep_provider, WSREP_NONE)) + { + WSREP_ERROR("wsrep_load(%s) failed: %s (%d). Reverting to no provider.", + wsrep_provider, strerror(rcode), rcode); + strcpy((char*)wsrep_provider, WSREP_NONE); // damn it's a dirty hack + (void) wsrep_init(); + return rcode; + } + else /* this is for recursive call above */ + { + WSREP_ERROR("Could not revert to no provider: %s (%d). Need to abort.", + strerror(rcode), rcode); + unireg_abort(1); + } + } + + if (!WSREP_PROVIDER_EXISTS) + { + // enable normal operation in case no provider is specified + wsrep_ready_set(TRUE); + wsrep_inited= 1; + global_system_variables.wsrep_on = 0; + wsrep_init_args args; + args.logger_cb = wsrep_log_cb; + args.options = (wsrep_provider_options) ? + wsrep_provider_options : ""; + rcode = wsrep->init(wsrep, &args); + if (rcode) + { + DBUG_PRINT("wsrep",("wsrep::init() failed: %d", rcode)); + WSREP_ERROR("wsrep::init() failed: %d, must shutdown", rcode); + wsrep->free(wsrep); + free(wsrep); + wsrep = NULL; + } + return rcode; + } + else + { + global_system_variables.wsrep_on = 1; + strncpy(provider_name, + wsrep->provider_name, sizeof(provider_name) - 1); + strncpy(provider_version, + wsrep->provider_version, sizeof(provider_version) - 1); + strncpy(provider_vendor, + wsrep->provider_vendor, sizeof(provider_vendor) - 1); + } + + if (!wsrep_data_home_dir || strlen(wsrep_data_home_dir) == 0) + wsrep_data_home_dir = mysql_real_data_home; + + char node_addr[512]= { 0, }; + size_t const node_addr_max= sizeof(node_addr) - 1; + if (!wsrep_node_address || !strcmp(wsrep_node_address, "")) + { + size_t const ret= wsrep_guess_ip(node_addr, node_addr_max); + if (!(ret > 0 && ret < node_addr_max)) + { + WSREP_WARN("Failed to guess base node address. Set it explicitly via " + "wsrep_node_address."); + node_addr[0]= '\0'; + } + } + else + { + strncpy(node_addr, wsrep_node_address, node_addr_max); + } + + char inc_addr[512]= { 0, }; + size_t const inc_addr_max= sizeof (inc_addr); + if ((!wsrep_node_incoming_address || + !strcmp (wsrep_node_incoming_address, WSREP_NODE_INCOMING_AUTO))) + { + unsigned int my_bind_ip= INADDR_ANY; // default if not set + if (my_bind_addr_str && strlen(my_bind_addr_str)) + { + my_bind_ip= wsrep_check_ip(my_bind_addr_str); + } + + if (INADDR_ANY != my_bind_ip) + { + if (INADDR_NONE != my_bind_ip && INADDR_LOOPBACK != my_bind_ip) + { + snprintf(inc_addr, inc_addr_max, "%s:%u", + my_bind_addr_str, (int)mysqld_port); + } // else leave inc_addr an empty string - mysqld is not listening for + // client connections on network interfaces. + } + else // mysqld binds to 0.0.0.0, take IP from wsrep_node_address if possible + { + size_t const node_addr_len= strlen(node_addr); + if (node_addr_len > 0) + { + const char* const colon= strrchr(node_addr, ':'); + if (strchr(node_addr, ':') == colon) // 1 or 0 ':' + { + size_t const ip_len= colon ? colon - node_addr : node_addr_len; + if (ip_len + 7 /* :55555\0 */ < inc_addr_max) + { + memcpy (inc_addr, node_addr, ip_len); + snprintf(inc_addr + ip_len, inc_addr_max - ip_len, ":%u", + (int)mysqld_port); + } + else + { + WSREP_WARN("Guessing address for incoming client connections: " + "address too long."); + inc_addr[0]= '\0'; + } + } + else + { + WSREP_WARN("Guessing address for incoming client connections: " + "too many colons :) ."); + inc_addr[0]= '\0'; + } + } + + if (!strlen(inc_addr)) + { + WSREP_WARN("Guessing address for incoming client connections failed. " + "Try setting wsrep_node_incoming_address explicitly."); + } + } + } + else if (!strchr(wsrep_node_incoming_address, ':')) // no port included + { + if ((int)inc_addr_max <= + snprintf(inc_addr, inc_addr_max, "%s:%u", + wsrep_node_incoming_address,(int)mysqld_port)) + { + WSREP_WARN("Guessing address for incoming client connections: " + "address too long."); + inc_addr[0]= '\0'; + } + } + else + { + size_t const need = strlen (wsrep_node_incoming_address); + if (need >= inc_addr_max) { + WSREP_WARN("wsrep_node_incoming_address too long: %zu", need); + inc_addr[0]= '\0'; + } + else { + memcpy (inc_addr, wsrep_node_incoming_address, need); + } + } + + struct wsrep_init_args wsrep_args; + + struct wsrep_gtid const state_id = { local_uuid, local_seqno }; + + wsrep_args.data_dir = wsrep_data_home_dir; + wsrep_args.node_name = (wsrep_node_name) ? wsrep_node_name : ""; + wsrep_args.node_address = node_addr; + wsrep_args.node_incoming = inc_addr; + wsrep_args.options = (wsrep_provider_options) ? + wsrep_provider_options : ""; + wsrep_args.proto_ver = wsrep_max_protocol_version; + + wsrep_args.state_id = &state_id; + + wsrep_args.logger_cb = wsrep_log_cb; + wsrep_args.view_handler_cb = wsrep_view_handler_cb; + wsrep_args.apply_cb = wsrep_apply_cb; + wsrep_args.commit_cb = wsrep_commit_cb; + wsrep_args.unordered_cb = wsrep_unordered_cb; + wsrep_args.sst_donate_cb = wsrep_sst_donate_cb; + wsrep_args.synced_cb = wsrep_synced_cb; + + rcode = wsrep->init(wsrep, &wsrep_args); + + if (rcode) + { + DBUG_PRINT("wsrep",("wsrep::init() failed: %d", rcode)); + WSREP_ERROR("wsrep::init() failed: %d, must shutdown", rcode); + wsrep->free(wsrep); + free(wsrep); + wsrep = NULL; + } else { + wsrep_inited= 1; + } + + return rcode; +} + +extern int wsrep_on(void *); + +void wsrep_init_startup (bool first) +{ + if (wsrep_init()) unireg_abort(1); + + wsrep_thr_lock_init(wsrep_thd_is_BF, wsrep_abort_thd, + wsrep_debug, wsrep_convert_LOCK_to_trx, wsrep_on); + + /* Skip replication start if no cluster address */ + if (!wsrep_cluster_address || strlen(wsrep_cluster_address) == 0) return; + + if (first) wsrep_sst_grab(); // do it so we can wait for SST below + + if (!wsrep_start_replication()) unireg_abort(1); + + wsrep_create_rollbacker(); + wsrep_create_appliers(1); + + if (first && !wsrep_sst_wait()) unireg_abort(1);// wait until SST is completed +} + + +void wsrep_deinit(bool free_options) +{ + DBUG_ASSERT(wsrep_inited == 1); + wsrep_unload(wsrep); + wsrep= 0; + provider_name[0]= '\0'; + provider_version[0]= '\0'; + provider_vendor[0]= '\0'; + wsrep_inited= 0; + + if (free_options) + { + wsrep_sst_auth_free(); + } +} + +void wsrep_recover() +{ + if (!memcmp(&local_uuid, &WSREP_UUID_UNDEFINED, sizeof(wsrep_uuid_t)) && + local_seqno == -2) + { + char uuid_str[40]; + wsrep_uuid_print(&local_uuid, uuid_str, sizeof(uuid_str)); + WSREP_INFO("Position %s:%lld given at startup, skipping position recovery", + uuid_str, (long long)local_seqno); + return; + } + XID xid; + memset(&xid, 0, sizeof(xid)); + xid.formatID= -1; + wsrep_get_SE_checkpoint(&xid); + char uuid_str[40]; + wsrep_uuid_print(wsrep_xid_uuid(&xid), uuid_str, sizeof(uuid_str)); + WSREP_INFO("Recovered position: %s:%lld", uuid_str, + (long long)wsrep_xid_seqno(&xid)); +} + + +void wsrep_stop_replication(THD *thd) +{ + WSREP_INFO("Stop replication"); + if (!wsrep) + { + WSREP_INFO("Provider was not loaded, in stop replication"); + return; + } + + /* disconnect from group first to get wsrep_ready == FALSE */ + WSREP_DEBUG("Provider disconnect"); + wsrep->disconnect(wsrep); + + wsrep_connected= FALSE; + + wsrep_close_client_connections(TRUE); + + /* wait until appliers have stopped */ + wsrep_wait_appliers_close(thd); + + return; +} + +/* This one is set to true when --wsrep-new-cluster is found in the command + * line arguments */ +static my_bool wsrep_new_cluster= FALSE; +#define WSREP_NEW_CLUSTER "--wsrep-new-cluster" +/* Finds and hides --wsrep-new-cluster from the arguments list + * by moving it to the end of the list and decrementing argument count */ +void wsrep_filter_new_cluster (int* argc, char* argv[]) +{ + int i; + for (i= *argc - 1; i > 0; i--) + { + /* make a copy of the argument to convert possible underscores to hyphens. + * the copy need not to be longer than WSREP_NEW_CLUSTER option */ + char arg[sizeof(WSREP_NEW_CLUSTER) + 1]= { 0, }; + strncpy(arg, argv[i], sizeof(arg) - 1); + char* underscore(arg); + while (NULL != (underscore= strchr(underscore, '_'))) *underscore= '-'; + + if (!strcmp(arg, WSREP_NEW_CLUSTER)) + { + wsrep_new_cluster= TRUE; + *argc -= 1; + /* preserve the order of remaining arguments AND + * preserve the original argument pointers - just in case */ + char* wnc= argv[i]; + memmove(&argv[i], &argv[i + 1], (*argc - i)*sizeof(argv[i])); + argv[*argc]= wnc; /* this will be invisible to the rest of the program */ + } + } +} + +bool wsrep_start_replication() +{ + wsrep_status_t rcode; + + /* + if provider is trivial, don't even try to connect, + but resume local node operation + */ + if (!WSREP_PROVIDER_EXISTS) + { + // enable normal operation in case no provider is specified + wsrep_ready_set(TRUE); + return true; + } + + if (!wsrep_cluster_address || strlen(wsrep_cluster_address)== 0) + { + // if provider is non-trivial, but no address is specified, wait for address + wsrep_ready_set(FALSE); + return true; + } + + bool const bootstrap(TRUE == wsrep_new_cluster); + wsrep_new_cluster= FALSE; + + WSREP_INFO("Start replication"); + + if ((rcode = wsrep->connect(wsrep, + wsrep_cluster_name, + wsrep_cluster_address, + wsrep_sst_donor, + bootstrap))) + { + if (-ESOCKTNOSUPPORT == rcode) + { + DBUG_PRINT("wsrep",("unrecognized cluster address: '%s', rcode: %d", + wsrep_cluster_address, rcode)); + WSREP_ERROR("unrecognized cluster address: '%s', rcode: %d", + wsrep_cluster_address, rcode); + } + else + { + DBUG_PRINT("wsrep",("wsrep->connect() failed: %d", rcode)); + WSREP_ERROR("wsrep::connect() failed: %d", rcode); + } + + return false; + } + else + { + wsrep_connected= TRUE; + + char* opts= wsrep->options_get(wsrep); + if (opts) + { + wsrep_provider_options_init(opts); + free(opts); + } + else + { + WSREP_WARN("Failed to get wsrep options"); + } + } + + return true; +} + +bool wsrep_sync_wait (THD* thd, uint mask) +{ + if ((thd->variables.wsrep_sync_wait & mask) && + thd->variables.wsrep_on && + !thd->in_active_multi_stmt_transaction() && + thd->wsrep_conflict_state != REPLAYING) + { + WSREP_DEBUG("wsrep_sync_wait: thd->variables.wsrep_sync_wait = %u, mask = %u", + thd->variables.wsrep_sync_wait, mask); + // This allows autocommit SELECTs and a first SELECT after SET AUTOCOMMIT=0 + // TODO: modify to check if thd has locked any rows. + wsrep_gtid_t gtid; + wsrep_status_t ret= wsrep->causal_read (wsrep, >id); + + if (unlikely(WSREP_OK != ret)) + { + const char* msg; + int err; + + // Possibly relevant error codes: + // ER_CHECKREAD, ER_ERROR_ON_READ, ER_INVALID_DEFAULT, ER_EMPTY_QUERY, + // ER_FUNCTION_NOT_DEFINED, ER_NOT_ALLOWED_COMMAND, ER_NOT_SUPPORTED_YET, + // ER_FEATURE_DISABLED, ER_QUERY_INTERRUPTED + + switch (ret) + { + case WSREP_NOT_IMPLEMENTED: + msg= "synchronous reads by wsrep backend. " + "Please unset wsrep_causal_reads variable."; + err= ER_NOT_SUPPORTED_YET; + break; + default: + msg= "Synchronous wait failed."; + err= ER_LOCK_WAIT_TIMEOUT; // NOTE: the above msg won't be displayed + // with ER_LOCK_WAIT_TIMEOUT + } + + my_error(err, MYF(0), msg); + + return true; + } + } + + return false; +} + +/* + * Helpers to deal with TOI key arrays + */ +typedef struct wsrep_key_arr +{ + wsrep_key_t* keys; + size_t keys_len; +} wsrep_key_arr_t; + + +static void wsrep_keys_free(wsrep_key_arr_t* key_arr) +{ + for (size_t i= 0; i < key_arr->keys_len; ++i) + { + my_free((void*)key_arr->keys[i].key_parts); + } + my_free(key_arr->keys); + key_arr->keys= 0; + key_arr->keys_len= 0; +} + + +/*! + * @param db Database string + * @param table Table string + * @param key Array of wsrep_key_t + * @param key_len In: number of elements in key array, Out: number of + * elements populated + * + * @return true if preparation was successful, otherwise false. + */ + +static bool wsrep_prepare_key_for_isolation(const char* db, + const char* table, + wsrep_buf_t* key, + size_t* key_len) +{ + if (*key_len < 2) return false; + + switch (wsrep_protocol_version) + { + case 0: + *key_len= 0; + break; + case 1: + case 2: + case 3: + { + *key_len= 0; + if (db) + { + // sql_print_information("%s.%s", db, table); + if (db) + { + key[*key_len].ptr= db; + key[*key_len].len= strlen(db); + ++(*key_len); + if (table) + { + key[*key_len].ptr= table; + key[*key_len].len= strlen(table); + ++(*key_len); + } + } + } + break; + } + default: + return false; + } + + return true; +} + +/* Prepare key list from db/table and table_list */ +static bool wsrep_prepare_keys_for_isolation(THD* thd, + const char* db, + const char* table, + const TABLE_LIST* table_list, + wsrep_key_arr_t* ka) +{ + ka->keys= 0; + ka->keys_len= 0; + + extern TABLE* find_temporary_table(THD*, const TABLE_LIST*); + + if (db || table) + { + TABLE_LIST tmp_table; + MDL_request mdl_request; + + memset(&tmp_table, 0, sizeof(tmp_table)); + tmp_table.table_name= (char*)table; + tmp_table.db= (char*)db; + tmp_table.mdl_request.init(MDL_key::GLOBAL, (db) ? db : "", + (table) ? table : "", + MDL_INTENTION_EXCLUSIVE, MDL_STATEMENT); + + if (!table || !find_temporary_table(thd, &tmp_table)) + { + if (!(ka->keys= (wsrep_key_t*)my_malloc(sizeof(wsrep_key_t), MYF(0)))) + { + WSREP_ERROR("Can't allocate memory for key_array"); + goto err; + } + ka->keys_len= 1; + if (!(ka->keys[0].key_parts= (wsrep_buf_t*) + my_malloc(sizeof(wsrep_buf_t)*2, MYF(0)))) + { + WSREP_ERROR("Can't allocate memory for key_parts"); + goto err; + } + ka->keys[0].key_parts_num= 2; + if (!wsrep_prepare_key_for_isolation( + db, table, + (wsrep_buf_t*)ka->keys[0].key_parts, + &ka->keys[0].key_parts_num)) + { + WSREP_ERROR("Preparing keys for isolation failed"); + goto err; + } + } + } + + for (const TABLE_LIST* table= table_list; table; table= table->next_global) + { + if (!find_temporary_table(thd, table)) + { + wsrep_key_t* tmp; + tmp= (wsrep_key_t*)my_realloc( + ka->keys, (ka->keys_len + 1) * sizeof(wsrep_key_t), + MYF(MY_ALLOW_ZERO_PTR)); + + if (!tmp) + { + WSREP_ERROR("Can't allocate memory for key_array"); + goto err; + } + ka->keys= tmp; + if (!(ka->keys[ka->keys_len].key_parts= (wsrep_buf_t*) + my_malloc(sizeof(wsrep_buf_t)*2, MYF(0)))) + { + WSREP_ERROR("Can't allocate memory for key_parts"); + goto err; + } + ka->keys[ka->keys_len].key_parts_num= 2; + ++ka->keys_len; + if (!wsrep_prepare_key_for_isolation( + table->db, table->table_name, + (wsrep_buf_t*)ka->keys[ka->keys_len - 1].key_parts, + &ka->keys[ka->keys_len - 1].key_parts_num)) + { + WSREP_ERROR("Preparing keys for isolation failed"); + goto err; + } + } + } + return true; +err: + wsrep_keys_free(ka); + return false; +} + + +bool wsrep_prepare_key_for_innodb(const uchar* cache_key, + size_t cache_key_len, + const uchar* row_id, + size_t row_id_len, + wsrep_buf_t* key, + size_t* key_len) +{ + if (*key_len < 3) return false; + + *key_len= 0; + switch (wsrep_protocol_version) + { + case 0: + { + key[0].ptr = cache_key; + key[0].len = cache_key_len; + + *key_len = 1; + break; + } + case 1: + case 2: + case 3: + { + key[0].ptr = cache_key; + key[0].len = strlen( (char*)cache_key ); + + key[1].ptr = cache_key + strlen( (char*)cache_key ) + 1; + key[1].len = strlen( (char*)(key[1].ptr) ); + + *key_len = 2; + break; + } + default: + return false; + } + + key[*key_len].ptr = row_id; + key[*key_len].len = row_id_len; + ++(*key_len); + + return true; +} + + +/* + * Construct Query_log_Event from thd query and serialize it + * into buffer. + * + * Return 0 in case of success, 1 in case of error. + */ +int wsrep_to_buf_helper( + THD* thd, const char *query, uint query_len, uchar** buf, size_t* buf_len) +{ + IO_CACHE tmp_io_cache; + if (open_cached_file(&tmp_io_cache, mysql_tmpdir, TEMP_PREFIX, + 65536, MYF(MY_WME))) + return 1; + int ret(0); + +#ifdef GTID_SUPPORT + if (thd->variables.gtid_next.type == GTID_GROUP) + { + Gtid_log_event gtid_ev(thd, FALSE, &thd->variables.gtid_next); + if (!gtid_ev.is_valid()) ret= 0; + if (!ret && gtid_ev.write(&tmp_io_cache)) ret= 1; + } +#endif /* GTID_SUPPORT */ + + /* if there is prepare query, add event for it */ + if (!ret && thd->wsrep_TOI_pre_query) + { + Query_log_event ev(thd, thd->wsrep_TOI_pre_query, + thd->wsrep_TOI_pre_query_len, + FALSE, FALSE, FALSE, 0); + if (ev.write(&tmp_io_cache)) ret= 1; + } + + /* continue to append the actual query */ + Query_log_event ev(thd, query, query_len, FALSE, FALSE, FALSE, 0); + if (!ret && ev.write(&tmp_io_cache)) ret= 1; + if (!ret && wsrep_write_cache_buf(&tmp_io_cache, buf, buf_len)) ret= 1; + close_cached_file(&tmp_io_cache); + return ret; +} + +#include "sql_show.h" +static int +create_view_query(THD *thd, uchar** buf, size_t* buf_len) +{ + LEX *lex= thd->lex; + SELECT_LEX *select_lex= &lex->select_lex; + TABLE_LIST *first_table= select_lex->table_list.first; + TABLE_LIST *views = first_table; + + String buff; + const LEX_STRING command[3]= + {{ C_STRING_WITH_LEN("CREATE ") }, + { C_STRING_WITH_LEN("ALTER ") }, + { C_STRING_WITH_LEN("CREATE OR REPLACE ") }}; + + buff.append(command[thd->lex->create_view_mode].str, + command[thd->lex->create_view_mode].length); + + if (!lex->definer) + { + /* + DEFINER-clause is missing; we have to create default definer in + persistent arena to be PS/SP friendly. + If this is an ALTER VIEW then the current user should be set as + the definer. + */ + + if (!(lex->definer= create_default_definer(thd, false))) + { + WSREP_WARN("view default definer issue"); + } + } + + views->algorithm = lex->create_view_algorithm; + views->definer.user = lex->definer->user; + views->definer.host = lex->definer->host; + views->view_suid = lex->create_view_suid; + views->with_check = lex->create_view_check; + + view_store_options(thd, views, &buff); + buff.append(STRING_WITH_LEN("VIEW ")); + /* Test if user supplied a db (ie: we did not use thd->db) */ + if (views->db && views->db[0] && + (thd->db == NULL || strcmp(views->db, thd->db))) + { + append_identifier(thd, &buff, views->db, + views->db_length); + buff.append('.'); + } + append_identifier(thd, &buff, views->table_name, + views->table_name_length); + if (lex->view_list.elements) + { + List_iterator_fast<LEX_STRING> names(lex->view_list); + LEX_STRING *name; + int i; + + for (i= 0; (name= names++); i++) + { + buff.append(i ? ", " : "("); + append_identifier(thd, &buff, name->str, name->length); + } + buff.append(')'); + } + buff.append(STRING_WITH_LEN(" AS ")); + //buff.append(views->source.str, views->source.length); + buff.append(thd->lex->create_view_select.str, + thd->lex->create_view_select.length); + //int errcode= query_error_code(thd, TRUE); + //if (thd->binlog_query(THD::STMT_QUERY_TYPE, + // buff.ptr(), buff.length(), FALSE, FALSE, FALSE, errcod + return wsrep_to_buf_helper(thd, buff.ptr(), buff.length(), buf, buf_len); +} + +static int wsrep_TOI_begin(THD *thd, char *db_, char *table_, + const TABLE_LIST* table_list) +{ + wsrep_status_t ret(WSREP_WARNING); + uchar* buf(0); + size_t buf_len(0); + int buf_err; + + WSREP_DEBUG("TO BEGIN: %lld, %d : %s", (long long)wsrep_thd_trx_seqno(thd), + thd->wsrep_exec_mode, thd->query() ); + switch (thd->lex->sql_command) + { + case SQLCOM_CREATE_VIEW: + buf_err= create_view_query(thd, &buf, &buf_len); + break; + case SQLCOM_CREATE_PROCEDURE: + case SQLCOM_CREATE_SPFUNCTION: + buf_err= wsrep_create_sp(thd, &buf, &buf_len); + break; + case SQLCOM_CREATE_TRIGGER: + buf_err= wsrep_create_trigger_query(thd, &buf, &buf_len); + break; + case SQLCOM_CREATE_EVENT: + buf_err= wsrep_create_event_query(thd, &buf, &buf_len); + break; + case SQLCOM_ALTER_EVENT: + buf_err= wsrep_alter_event_query(thd, &buf, &buf_len); + break; + default: + buf_err= wsrep_to_buf_helper(thd, thd->query(), thd->query_length(), &buf, + &buf_len); + break; + } + + wsrep_key_arr_t key_arr= {0, 0}; + struct wsrep_buf buff = { buf, buf_len }; + if (!buf_err && + wsrep_prepare_keys_for_isolation(thd, db_, table_, table_list, &key_arr)&& + WSREP_OK == (ret = wsrep->to_execute_start(wsrep, thd->thread_id, + key_arr.keys, key_arr.keys_len, + &buff, 1, + &thd->wsrep_trx_meta))) + { + thd->wsrep_exec_mode= TOTAL_ORDER; + wsrep_to_isolation++; + my_free(buf); + wsrep_keys_free(&key_arr); + WSREP_DEBUG("TO BEGIN: %lld, %d",(long long)wsrep_thd_trx_seqno(thd), + thd->wsrep_exec_mode); + } + else { + /* jump to error handler in mysql_execute_command() */ + WSREP_WARN("TO isolation failed for: %d, sql: %s. Check wsrep " + "connection state and retry the query.", + ret, (thd->query()) ? thd->query() : "void"); + my_error(ER_LOCK_DEADLOCK, MYF(0), "WSREP replication failed. Check " + "your wsrep connection state and retry the query."); + if (buf) my_free(buf); + wsrep_keys_free(&key_arr); + return -1; + } + return 0; +} + +static void wsrep_TOI_end(THD *thd) { + wsrep_status_t ret; + wsrep_to_isolation--; + + WSREP_DEBUG("TO END: %lld, %d : %s", (long long)wsrep_thd_trx_seqno(thd), + thd->wsrep_exec_mode, (thd->query()) ? thd->query() : "void"); + + XID xid; + wsrep_xid_init(&xid, &thd->wsrep_trx_meta.gtid.uuid, + thd->wsrep_trx_meta.gtid.seqno); + wsrep_set_SE_checkpoint(&xid); + WSREP_DEBUG("TO END: %lld, update seqno", + (long long)wsrep_thd_trx_seqno(thd)); + + if (WSREP_OK == (ret = wsrep->to_execute_end(wsrep, thd->thread_id))) { + WSREP_DEBUG("TO END: %lld", (long long)wsrep_thd_trx_seqno(thd)); + } + else { + WSREP_WARN("TO isolation end failed for: %d, sql: %s", + ret, (thd->query()) ? thd->query() : "void"); + } +} + +static int wsrep_RSU_begin(THD *thd, char *db_, char *table_) +{ + wsrep_status_t ret(WSREP_WARNING); + WSREP_DEBUG("RSU BEGIN: %lld, %d : %s", (long long)wsrep_thd_trx_seqno(thd), + thd->wsrep_exec_mode, thd->query() ); + + ret = wsrep->desync(wsrep); + if (ret != WSREP_OK) + { + WSREP_WARN("RSU desync failed %d for %s", ret, thd->query()); + my_error(ER_LOCK_DEADLOCK, MYF(0)); + return(ret); + } + mysql_mutex_lock(&LOCK_wsrep_replaying); + wsrep_replaying++; + mysql_mutex_unlock(&LOCK_wsrep_replaying); + + if (wsrep_wait_committing_connections_close(5000)) + { + /* no can do, bail out from DDL */ + WSREP_WARN("RSU failed due to pending transactions, %s", thd->query()); + mysql_mutex_lock(&LOCK_wsrep_replaying); + wsrep_replaying--; + mysql_mutex_unlock(&LOCK_wsrep_replaying); + + ret = wsrep->resync(wsrep); + if (ret != WSREP_OK) + { + WSREP_WARN("resync failed %d for %s", ret, thd->query()); + } + my_error(ER_LOCK_DEADLOCK, MYF(0)); + return(1); + } + + wsrep_seqno_t seqno = wsrep->pause(wsrep); + if (seqno == WSREP_SEQNO_UNDEFINED) + { + WSREP_WARN("pause failed %lld for %s", (long long)seqno, thd->query()); + return(1); + } + WSREP_DEBUG("paused at %lld", (long long)seqno); + thd->variables.wsrep_on = 0; + return 0; +} + +static void wsrep_RSU_end(THD *thd) +{ + wsrep_status_t ret(WSREP_WARNING); + WSREP_DEBUG("RSU END: %lld, %d : %s", (long long)wsrep_thd_trx_seqno(thd), + thd->wsrep_exec_mode, thd->query() ); + + + mysql_mutex_lock(&LOCK_wsrep_replaying); + wsrep_replaying--; + mysql_mutex_unlock(&LOCK_wsrep_replaying); + + ret = wsrep->resume(wsrep); + if (ret != WSREP_OK) + { + WSREP_WARN("resume failed %d for %s", ret, thd->query()); + } + ret = wsrep->resync(wsrep); + if (ret != WSREP_OK) + { + WSREP_WARN("resync failed %d for %s", ret, thd->query()); + return; + } + thd->variables.wsrep_on = 1; +} + +int wsrep_to_isolation_begin(THD *thd, char *db_, char *table_, + const TABLE_LIST* table_list) +{ + int ret= 0; + + /* + No isolation for applier or replaying threads. + */ + if (thd->wsrep_exec_mode == REPL_RECV) + return 0; + + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + + if (thd->wsrep_conflict_state == MUST_ABORT) + { + WSREP_INFO("thread: %lu, %s has been aborted due to multi-master conflict", + thd->thread_id, thd->query()); + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + return WSREP_TRX_FAIL; + } + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + + DBUG_ASSERT(thd->wsrep_exec_mode == LOCAL_STATE); + DBUG_ASSERT(thd->wsrep_trx_meta.gtid.seqno == WSREP_SEQNO_UNDEFINED); + + if (thd->global_read_lock.can_acquire_protection()) + { + WSREP_DEBUG("Aborting TOI: Global Read-Lock (FTWRL) in place: %s %lu", + thd->query(), thd->thread_id); + return -1; + } + + if (wsrep_debug && thd->mdl_context.has_locks()) + { + WSREP_DEBUG("thread holds MDL locks at TI begin: %s %lu", + thd->query(), thd->thread_id); + } + + /* + It makes sense to set auto_increment_* to defaults in TOI operations. + Must be done before wsrep_TOI_begin() since Query_log_event encapsulating + TOI statement and auto inc variables for wsrep replication is constructed + there. Variables are reset back in THD::reset_for_next_command() before + processing of next command. + */ + if (wsrep_auto_increment_control) + { + thd->variables.auto_increment_offset = 1; + thd->variables.auto_increment_increment = 1; + } + + if (thd->variables.wsrep_on && thd->wsrep_exec_mode==LOCAL_STATE) + { + switch (wsrep_OSU_method_options) { + case WSREP_OSU_TOI: ret = wsrep_TOI_begin(thd, db_, table_, + table_list); break; + case WSREP_OSU_RSU: ret = wsrep_RSU_begin(thd, db_, table_); break; + } + if (!ret) + { + thd->wsrep_exec_mode= TOTAL_ORDER; + } + } + return ret; +} + +void wsrep_to_isolation_end(THD *thd) +{ + if (thd->wsrep_exec_mode == TOTAL_ORDER) + { + switch(wsrep_OSU_method_options) + { + case WSREP_OSU_TOI: wsrep_TOI_end(thd); break; + case WSREP_OSU_RSU: wsrep_RSU_end(thd); break; + } + wsrep_cleanup_transaction(thd); + } +} + +#define WSREP_MDL_LOG(severity, msg, req, gra) \ + WSREP_##severity( \ + "%s\n" \ + "request: (%lu \tseqno %lld \twsrep (%d, %d, %d) cmd %d %d \t%s)\n" \ + "granted: (%lu \tseqno %lld \twsrep (%d, %d, %d) cmd %d %d \t%s)", \ + msg, \ + req->thread_id, (long long)wsrep_thd_trx_seqno(req), \ + req->wsrep_exec_mode, req->wsrep_query_state, req->wsrep_conflict_state, \ + req->get_command(), req->lex->sql_command, req->query(), \ + gra->thread_id, (long long)wsrep_thd_trx_seqno(gra), \ + gra->wsrep_exec_mode, gra->wsrep_query_state, gra->wsrep_conflict_state, \ + gra->get_command(), gra->lex->sql_command, gra->query()); + +bool +wsrep_grant_mdl_exception(MDL_context *requestor_ctx, + MDL_ticket *ticket +) { + if (!WSREP_ON) return FALSE; + + THD *request_thd = requestor_ctx->get_thd(); + THD *granted_thd = ticket->get_ctx()->get_thd(); + bool ret = FALSE; + + mysql_mutex_lock(&request_thd->LOCK_wsrep_thd); + if (request_thd->wsrep_exec_mode == TOTAL_ORDER || + request_thd->wsrep_exec_mode == REPL_RECV) + { + mysql_mutex_unlock(&request_thd->LOCK_wsrep_thd); + WSREP_MDL_LOG(DEBUG, "MDL conflict ", request_thd, granted_thd); + ticket->wsrep_report(wsrep_debug); + + mysql_mutex_lock(&granted_thd->LOCK_wsrep_thd); + if (granted_thd->wsrep_exec_mode == TOTAL_ORDER || + granted_thd->wsrep_exec_mode == REPL_RECV) + { + WSREP_MDL_LOG(INFO, "MDL BF-BF conflict", request_thd, granted_thd); + ticket->wsrep_report(true); + mysql_mutex_unlock(&granted_thd->LOCK_wsrep_thd); + ret = TRUE; + } + else if (granted_thd->lex->sql_command == SQLCOM_FLUSH) + { + WSREP_DEBUG("mdl granted over FLUSH BF"); + ticket->wsrep_report(wsrep_debug); + mysql_mutex_unlock(&granted_thd->LOCK_wsrep_thd); + ret = TRUE; + } + else if (request_thd->lex->sql_command == SQLCOM_DROP_TABLE) + { + WSREP_DEBUG("DROP caused BF abort"); + ticket->wsrep_report(wsrep_debug); + mysql_mutex_unlock(&granted_thd->LOCK_wsrep_thd); + wsrep_abort_thd((void*)request_thd, (void*)granted_thd, 1); + ret = FALSE; + } + else if (granted_thd->wsrep_query_state == QUERY_COMMITTING) + { + WSREP_DEBUG("mdl granted, but commiting thd abort scheduled"); + ticket->wsrep_report(wsrep_debug); + mysql_mutex_unlock(&granted_thd->LOCK_wsrep_thd); + wsrep_abort_thd((void*)request_thd, (void*)granted_thd, 1); + ret = FALSE; + } + else + { + WSREP_MDL_LOG(DEBUG, "MDL conflict-> BF abort", request_thd, granted_thd); + ticket->wsrep_report(wsrep_debug); + mysql_mutex_unlock(&granted_thd->LOCK_wsrep_thd); + wsrep_abort_thd((void*)request_thd, (void*)granted_thd, 1); + ret = FALSE; + } + } + else + { + mysql_mutex_unlock(&request_thd->LOCK_wsrep_thd); + } + return ret; +} + + +pthread_handler_t start_wsrep_THD(void *arg) +{ + THD *thd; + rpl_sql_thread_info sql_info(NULL); + wsrep_thd_processor_fun processor= (wsrep_thd_processor_fun)arg; + + if (my_thread_init()) + { + WSREP_ERROR("Could not initialize thread"); + return(NULL); + } + + if (!(thd= new THD(true))) + { + return(NULL); + } + mysql_mutex_lock(&LOCK_thread_count); + thd->thread_id=thread_id++; + + thd->real_id=pthread_self(); // Keep purify happy + thread_count++; + thread_created++; + threads.append(thd); + + my_net_init(&thd->net,(st_vio*) 0, MYF(0)); + + DBUG_PRINT("wsrep",(("creating thread %lld"), (long long)thd->thread_id)); + thd->prior_thr_create_utime= thd->start_utime= microsecond_interval_timer(); + (void) mysql_mutex_unlock(&LOCK_thread_count); + + /* from bootstrap()... */ + thd->bootstrap=1; + thd->max_client_packet_length= thd->net.max_packet; + thd->security_ctx->master_access= ~(ulong)0; + thd->system_thread_info.rpl_sql_info= &sql_info; + + /* from handle_one_connection... */ + pthread_detach_this_thread(); + + mysql_thread_set_psi_id(thd->thread_id); + thd->thr_create_utime= microsecond_interval_timer(); + if (MYSQL_CALLBACK_ELSE(thread_scheduler, init_new_connection_thread, (), 0)) + { + close_connection(thd, ER_OUT_OF_RESOURCES); + statistic_increment(aborted_connects,&LOCK_status); + MYSQL_CALLBACK(thread_scheduler, end_thread, (thd, 0)); + + return(NULL); + } + +// </5.1.17> + /* + handle_one_connection() is normally the only way a thread would + start and would always be on the very high end of the stack , + therefore, the thread stack always starts at the address of the + first local variable of handle_one_connection, which is thd. We + need to know the start of the stack so that we could check for + stack overruns. + */ + DBUG_PRINT("wsrep", ("handle_one_connection called by thread %lld\n", + (long long)thd->thread_id)); + /* now that we've called my_thread_init(), it is safe to call DBUG_* */ + + thd->thread_stack= (char*) &thd; + if (thd->store_globals()) + { + close_connection(thd, ER_OUT_OF_RESOURCES); + statistic_increment(aborted_connects,&LOCK_status); + MYSQL_CALLBACK(thread_scheduler, end_thread, (thd, 0)); + delete thd; + + return(NULL); + } + + thd->system_thread= SYSTEM_THREAD_SLAVE_SQL; + thd->security_ctx->skip_grants(); + + /* handle_one_connection() again... */ + //thd->version= refresh_version; + thd->proc_info= 0; + thd->set_command(COM_SLEEP); + thd->set_time(); + thd->init_for_queries(); + + mysql_mutex_lock(&LOCK_thread_count); + wsrep_running_threads++; + mysql_cond_broadcast(&COND_thread_count); + mysql_mutex_unlock(&LOCK_thread_count); + + processor(thd); + + close_connection(thd, 0); + + mysql_mutex_lock(&LOCK_thread_count); + wsrep_running_threads--; + WSREP_DEBUG("wsrep running threads now: %lu", wsrep_running_threads); + mysql_cond_broadcast(&COND_thread_count); + mysql_mutex_unlock(&LOCK_thread_count); + + // Note: We can't call THD destructor without crashing + // if plugins have not been initialized. However, in most of the + // cases this means that pre SE initialization SST failed and + // we are going to exit anyway. + if (plugins_are_initialized) + { + net_end(&thd->net); + MYSQL_CALLBACK(thread_scheduler, end_thread, (thd, 1)); + } + else + { + // TODO: lightweight cleanup to get rid of: + // 'Error in my_thread_global_end(): 2 threads didn't exit' + // at server shutdown + } + + my_thread_end(); + if (thread_handling > SCHEDULER_ONE_THREAD_PER_CONNECTION) + { + mysql_mutex_lock(&LOCK_thread_count); + delete thd; + thread_count--; + mysql_mutex_unlock(&LOCK_thread_count); + } + return(NULL); +} + + +/**/ +static bool abort_replicated(THD *thd) +{ + bool ret_code= false; + if (thd->wsrep_query_state== QUERY_COMMITTING) + { + if (wsrep_debug) WSREP_INFO("aborting replicated trx: %lu", thd->real_id); + + (void)wsrep_abort_thd(thd, thd, TRUE); + ret_code= true; + } + return ret_code; +} + + +/**/ +static inline bool is_client_connection(THD *thd) +{ + return (thd->wsrep_client_thread && thd->variables.wsrep_on); +} + + +static inline bool is_replaying_connection(THD *thd) +{ + bool ret; + + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + ret= (thd->wsrep_conflict_state == REPLAYING) ? true : false; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + + return ret; +} + + +static inline bool is_committing_connection(THD *thd) +{ + bool ret; + + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + ret= (thd->wsrep_query_state == QUERY_COMMITTING) ? true : false; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + + return ret; +} + + +static bool have_client_connections() +{ + THD *tmp; + + I_List_iterator<THD> it(threads); + while ((tmp=it++)) + { + DBUG_PRINT("quit",("Informing thread %ld that it's time to die", + tmp->thread_id)); + if (is_client_connection(tmp) && tmp->killed == KILL_CONNECTION) + { + (void)abort_replicated(tmp); + return true; + } + } + return false; +} + +/* + returns the number of wsrep appliers running. + However, the caller (thd parameter) is not taken in account + */ +static int have_wsrep_appliers(THD *thd) +{ + int ret= 0; + THD *tmp; + + I_List_iterator<THD> it(threads); + while ((tmp=it++)) + { + ret+= (tmp != thd && tmp->wsrep_applier); + } + return ret; +} + + +static void wsrep_close_thread(THD *thd) +{ + thd->killed= KILL_CONNECTION; + MYSQL_CALLBACK(thread_scheduler, post_kill_notification, (thd)); + if (thd->mysys_var) + { + thd->mysys_var->abort=1; + mysql_mutex_lock(&thd->mysys_var->mutex); + if (thd->mysys_var->current_cond) + { + mysql_mutex_lock(thd->mysys_var->current_mutex); + mysql_cond_broadcast(thd->mysys_var->current_cond); + mysql_mutex_unlock(thd->mysys_var->current_mutex); + } + mysql_mutex_unlock(&thd->mysys_var->mutex); + } +} + + +static my_bool have_committing_connections() +{ + THD *tmp; + mysql_mutex_lock(&LOCK_thread_count); // For unlink from list + + I_List_iterator<THD> it(threads); + while ((tmp=it++)) + { + if (!is_client_connection(tmp)) + continue; + + if (is_committing_connection(tmp)) + { + return TRUE; + } + } + mysql_mutex_unlock(&LOCK_thread_count); + return FALSE; +} + + +int wsrep_wait_committing_connections_close(int wait_time) +{ + int sleep_time= 100; + + while (have_committing_connections() && wait_time > 0) + { + WSREP_DEBUG("wait for committing transaction to close: %d", wait_time); + my_sleep(sleep_time); + wait_time -= sleep_time; + } + if (have_committing_connections()) + { + return 1; + } + return 0; +} + + +void wsrep_close_client_connections(my_bool wait_to_end) +{ + /* + First signal all threads that it's time to die + */ + + THD *tmp; + mysql_mutex_lock(&LOCK_thread_count); // For unlink from list + + bool kill_cached_threads_saved= kill_cached_threads; + kill_cached_threads= true; // prevent future threads caching + mysql_cond_broadcast(&COND_thread_cache); // tell cached threads to die + + I_List_iterator<THD> it(threads); + while ((tmp=it++)) + { + DBUG_PRINT("quit",("Informing thread %ld that it's time to die", + tmp->thread_id)); + /* We skip slave threads & scheduler on this first loop through. */ + if (!is_client_connection(tmp)) + continue; + + if (is_replaying_connection(tmp)) + { + tmp->killed= KILL_CONNECTION; + continue; + } + + /* replicated transactions must be skipped */ + if (abort_replicated(tmp)) + continue; + + WSREP_DEBUG("closing connection %ld", tmp->thread_id); + wsrep_close_thread(tmp); + } + mysql_mutex_unlock(&LOCK_thread_count); + + if (thread_count) + sleep(2); // Give threads time to die + + mysql_mutex_lock(&LOCK_thread_count); + /* + Force remaining threads to die by closing the connection to the client + */ + + I_List_iterator<THD> it2(threads); + while ((tmp=it2++)) + { +#ifndef __bsdi__ // Bug in BSDI kernel + if (is_client_connection(tmp) && + !abort_replicated(tmp) && + !is_replaying_connection(tmp)) + { + WSREP_INFO("killing local connection: %ld",tmp->thread_id); + close_connection(tmp,0); + } +#endif + } + + DBUG_PRINT("quit",("Waiting for threads to die (count=%u)",thread_count)); + if (wsrep_debug) + WSREP_INFO("waiting for client connections to close: %u", thread_count); + + while (wait_to_end && have_client_connections()) + { + mysql_cond_wait(&COND_thread_count, &LOCK_thread_count); + DBUG_PRINT("quit",("One thread died (count=%u)", thread_count)); + } + + kill_cached_threads= kill_cached_threads_saved; + + mysql_mutex_unlock(&LOCK_thread_count); + + /* All client connection threads have now been aborted */ +} + + +void wsrep_close_applier(THD *thd) +{ + WSREP_DEBUG("closing applier %ld", thd->thread_id); + wsrep_close_thread(thd); +} + + +void wsrep_close_threads(THD *thd) +{ + THD *tmp; + mysql_mutex_lock(&LOCK_thread_count); // For unlink from list + + I_List_iterator<THD> it(threads); + while ((tmp=it++)) + { + DBUG_PRINT("quit",("Informing thread %ld that it's time to die", + tmp->thread_id)); + /* We skip slave threads & scheduler on this first loop through. */ + if (tmp->wsrep_applier && tmp != thd) + { + WSREP_DEBUG("closing wsrep thread %ld", tmp->thread_id); + wsrep_close_thread (tmp); + } + } + + mysql_mutex_unlock(&LOCK_thread_count); +} + + +void wsrep_close_applier_threads(int count) +{ + THD *tmp; + mysql_mutex_lock(&LOCK_thread_count); // For unlink from list + + I_List_iterator<THD> it(threads); + while ((tmp=it++) && count) + { + DBUG_PRINT("quit",("Informing thread %ld that it's time to die", + tmp->thread_id)); + /* We skip slave threads & scheduler on this first loop through. */ + if (tmp->wsrep_applier) + { + WSREP_DEBUG("closing wsrep applier thread %ld", tmp->thread_id); + tmp->wsrep_applier_closing= TRUE; + count--; + } + } + + mysql_mutex_unlock(&LOCK_thread_count); +} + + +void wsrep_wait_appliers_close(THD *thd) +{ + /* Wait for wsrep appliers to gracefully exit */ + mysql_mutex_lock(&LOCK_thread_count); + while (have_wsrep_appliers(thd) > 1) + // 1 is for rollbacker thread which needs to be killed explicitly. + // This gotta be fixed in a more elegant manner if we gonna have arbitrary + // number of non-applier wsrep threads. + { + if (thread_handling > SCHEDULER_ONE_THREAD_PER_CONNECTION) + { + mysql_mutex_unlock(&LOCK_thread_count); + my_sleep(100); + mysql_mutex_lock(&LOCK_thread_count); + } + else + mysql_cond_wait(&COND_thread_count,&LOCK_thread_count); + DBUG_PRINT("quit",("One applier died (count=%u)",thread_count)); + } + mysql_mutex_unlock(&LOCK_thread_count); + /* Now kill remaining wsrep threads: rollbacker */ + wsrep_close_threads (thd); + /* and wait for them to die */ + mysql_mutex_lock(&LOCK_thread_count); + while (have_wsrep_appliers(thd) > 0) + { + if (thread_handling > SCHEDULER_ONE_THREAD_PER_CONNECTION) + { + mysql_mutex_unlock(&LOCK_thread_count); + my_sleep(100); + mysql_mutex_lock(&LOCK_thread_count); + } + else + mysql_cond_wait(&COND_thread_count,&LOCK_thread_count); + DBUG_PRINT("quit",("One thread died (count=%u)",thread_count)); + } + mysql_mutex_unlock(&LOCK_thread_count); + + /* All wsrep applier threads have now been aborted. However, if this thread + is also applier, we are still running... + */ +} + + +void wsrep_kill_mysql(THD *thd) +{ + if (mysqld_server_started) + { + if (!shutdown_in_progress) + { + WSREP_INFO("starting shutdown"); + kill_mysql(); + } + } + else + { + unireg_abort(1); + } +} + + +int wsrep_create_sp(THD *thd, uchar** buf, size_t* buf_len) +{ + String log_query; + sp_head *sp = thd->lex->sphead; + ulong saved_mode= thd->variables.sql_mode; + String retstr(64); + retstr.set_charset(system_charset_info); + + log_query.set_charset(system_charset_info); + + if (sp->m_type == TYPE_ENUM_FUNCTION) + { + sp_returns_type(thd, retstr, sp); + } + + if (!create_string(thd, &log_query, + sp->m_type, + (sp->m_explicit_name ? sp->m_db.str : NULL), + (sp->m_explicit_name ? sp->m_db.length : 0), + sp->m_name.str, sp->m_name.length, + sp->m_params.str, sp->m_params.length, + retstr.c_ptr(), retstr.length(), + sp->m_body.str, sp->m_body.length, + sp->m_chistics, &(thd->lex->definer->user), + &(thd->lex->definer->host), + saved_mode)) + { + WSREP_WARN("SP create string failed: %s", thd->query()); + return 1; + } + + return wsrep_to_buf_helper(thd, log_query.ptr(), log_query.length(), buf, buf_len); +} + + +extern int wsrep_on(void *thd) +{ + return (int)(WSREP(((THD*)thd))); +} + + +extern "C" bool wsrep_thd_is_wsrep_on(THD *thd) +{ + return thd->variables.wsrep_on; +} + + +extern "C" bool wsrep_consistency_check(void *thd) +{ + return ((THD*)thd)->wsrep_consistency_check == CONSISTENCY_CHECK_RUNNING; +} + + +extern "C" void wsrep_thd_set_exec_mode(THD *thd, enum wsrep_exec_mode mode) +{ + thd->wsrep_exec_mode= mode; +} + + +extern "C" void wsrep_thd_set_query_state( + THD *thd, enum wsrep_query_state state) +{ + thd->wsrep_query_state= state; +} + + +extern "C" void wsrep_thd_set_conflict_state( + THD *thd, enum wsrep_conflict_state state) +{ + thd->wsrep_conflict_state= state; +} + + +extern "C" enum wsrep_exec_mode wsrep_thd_exec_mode(THD *thd) +{ + return thd->wsrep_exec_mode; +} + + +extern "C" const char *wsrep_thd_exec_mode_str(THD *thd) +{ + return + (!thd) ? "void" : + (thd->wsrep_exec_mode == LOCAL_STATE) ? "local" : + (thd->wsrep_exec_mode == REPL_RECV) ? "applier" : + (thd->wsrep_exec_mode == TOTAL_ORDER) ? "total order" : + (thd->wsrep_exec_mode == LOCAL_COMMIT) ? "local commit" : "void"; +} + + +extern "C" enum wsrep_query_state wsrep_thd_query_state(THD *thd) +{ + return thd->wsrep_query_state; +} + + +extern "C" const char *wsrep_thd_query_state_str(THD *thd) +{ + return + (!thd) ? "void" : + (thd->wsrep_query_state == QUERY_IDLE) ? "idle" : + (thd->wsrep_query_state == QUERY_EXEC) ? "executing" : + (thd->wsrep_query_state == QUERY_COMMITTING) ? "committing" : + (thd->wsrep_query_state == QUERY_EXITING) ? "exiting" : + (thd->wsrep_query_state == QUERY_ROLLINGBACK) ? "rolling back" : "void"; +} + + +extern "C" enum wsrep_conflict_state wsrep_thd_conflict_state(THD *thd) +{ + return thd->wsrep_conflict_state; +} + + +extern "C" const char *wsrep_thd_conflict_state_str(THD *thd) +{ + return + (!thd) ? "void" : + (thd->wsrep_conflict_state == NO_CONFLICT) ? "no conflict" : + (thd->wsrep_conflict_state == MUST_ABORT) ? "must abort" : + (thd->wsrep_conflict_state == ABORTING) ? "aborting" : + (thd->wsrep_conflict_state == MUST_REPLAY) ? "must replay" : + (thd->wsrep_conflict_state == REPLAYING) ? "replaying" : + (thd->wsrep_conflict_state == RETRY_AUTOCOMMIT) ? "retrying" : + (thd->wsrep_conflict_state == CERT_FAILURE) ? "cert failure" : "void"; +} + + +extern "C" wsrep_ws_handle_t* wsrep_thd_ws_handle(THD *thd) +{ + return &thd->wsrep_ws_handle; +} + + +extern "C" void wsrep_thd_LOCK(THD *thd) +{ + mysql_mutex_lock(&thd->LOCK_wsrep_thd); +} + + +extern "C" void wsrep_thd_UNLOCK(THD *thd) +{ + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); +} + + +extern "C" time_t wsrep_thd_query_start(THD *thd) +{ + return thd->query_start(); +} + + +extern "C" uint32 wsrep_thd_wsrep_rand(THD *thd) +{ + return thd->wsrep_rand; +} + + +extern "C" my_thread_id wsrep_thd_thread_id(THD *thd) +{ + return thd->thread_id; +} + + +extern "C" wsrep_seqno_t wsrep_thd_trx_seqno(THD *thd) +{ + return (thd) ? thd->wsrep_trx_meta.gtid.seqno : WSREP_SEQNO_UNDEFINED; +} + + +extern "C" query_id_t wsrep_thd_query_id(THD *thd) +{ + return thd->query_id; +} + + +extern "C" char *wsrep_thd_query(THD *thd) +{ + return (thd) ? thd->query() : NULL; +} + + +extern "C" query_id_t wsrep_thd_wsrep_last_query_id(THD *thd) +{ + return thd->wsrep_last_query_id; +} + + +extern "C" void wsrep_thd_set_wsrep_last_query_id(THD *thd, query_id_t id) +{ + thd->wsrep_last_query_id= id; +} + + +extern "C" void wsrep_thd_awake(THD *thd, my_bool signal) +{ + if (signal) + { + mysql_mutex_lock(&thd->LOCK_thd_data); + thd->awake(KILL_QUERY); + mysql_mutex_unlock(&thd->LOCK_thd_data); + } + else + { + mysql_mutex_lock(&LOCK_wsrep_replaying); + mysql_cond_broadcast(&COND_wsrep_replaying); + mysql_mutex_unlock(&LOCK_wsrep_replaying); + } +} + + +extern "C" int wsrep_thd_retry_counter(THD *thd) +{ + return(thd->wsrep_retry_counter); +} + + +extern int +wsrep_trx_order_before(void *thd1, void *thd2) +{ + if (wsrep_thd_trx_seqno((THD*)thd1) < wsrep_thd_trx_seqno((THD*)thd2)) { + WSREP_DEBUG("BF conflict, order: %lld %lld\n", + (long long)wsrep_thd_trx_seqno((THD*)thd1), + (long long)wsrep_thd_trx_seqno((THD*)thd2)); + return 1; + } + WSREP_DEBUG("waiting for BF, trx order: %lld %lld\n", + (long long)wsrep_thd_trx_seqno((THD*)thd1), + (long long)wsrep_thd_trx_seqno((THD*)thd2)); + return 0; +} + + +extern "C" int +wsrep_trx_is_aborting(void *thd_ptr) +{ + if (thd_ptr) { + if ((((THD *)thd_ptr)->wsrep_conflict_state == MUST_ABORT) || + (((THD *)thd_ptr)->wsrep_conflict_state == ABORTING)) { + return 1; + } + } + return 0; +} + + +my_bool wsrep_read_only_option(THD *thd, TABLE_LIST *all_tables) +{ + int opt_readonly_saved = opt_readonly; + ulong flag_saved = (ulong)(thd->security_ctx->master_access & SUPER_ACL); + + opt_readonly = 0; + thd->security_ctx->master_access &= ~SUPER_ACL; + + my_bool ret = !deny_updates_if_read_only_option(thd, all_tables); + + opt_readonly = opt_readonly_saved; + thd->security_ctx->master_access |= flag_saved; + + return ret; +} + + +void wsrep_copy_query(THD *thd) +{ + thd->wsrep_retry_command = thd->get_command(); + thd->wsrep_retry_query_len = thd->query_length(); + if (thd->wsrep_retry_query) { + my_free(thd->wsrep_retry_query); + } + thd->wsrep_retry_query = (char *)my_malloc( + thd->wsrep_retry_query_len + 1, MYF(0)); + strncpy(thd->wsrep_retry_query, thd->query(), thd->wsrep_retry_query_len); + thd->wsrep_retry_query[thd->wsrep_retry_query_len] = '\0'; +} + + +bool wsrep_is_show_query(enum enum_sql_command command) +{ + DBUG_ASSERT(command >= 0 && command <= SQLCOM_END); + return (sql_command_flags[command] & CF_STATUS_COMMAND) != 0; +} + +bool wsrep_create_like_table(THD* thd, TABLE_LIST* table, + TABLE_LIST* src_table, + HA_CREATE_INFO *create_info) +{ + TABLE *tmp_table; + bool is_tmp_table= FALSE; + + for (tmp_table= thd->temporary_tables; tmp_table; tmp_table=tmp_table->next) + { + if (!strcmp(src_table->db, tmp_table->s->db.str) && + !strcmp(src_table->table_name, tmp_table->s->table_name.str)) + { + is_tmp_table= TRUE; + break; + } + } + if (create_info->options & HA_LEX_CREATE_TMP_TABLE) + { + + /* CREATE TEMPORARY TABLE LIKE must be skipped from replication */ + WSREP_DEBUG("CREATE TEMPORARY TABLE LIKE... skipped replication\n %s", + thd->query()); + } + else if (!is_tmp_table) + { + /* this is straight CREATE TABLE LIKE... eith no tmp tables */ + WSREP_TO_ISOLATION_BEGIN(table->db, table->table_name, NULL); + } + else + { + /* here we have CREATE TABLE LIKE <temporary table> + the temporary table definition will be needed in slaves to + enable the create to succeed + */ + TABLE_LIST tbl; + bzero((void*) &tbl, sizeof(tbl)); + tbl.db= src_table->db; + tbl.table_name= tbl.alias= src_table->table_name; + tbl.table= tmp_table; + char buf[2048]; + String query(buf, sizeof(buf), system_charset_info); + query.length(0); // Have to zero it since constructor doesn't + + (void) store_create_info(thd, &tbl, &query, NULL, TRUE, FALSE); + WSREP_DEBUG("TMP TABLE: %s", query.ptr()); + + thd->wsrep_TOI_pre_query= query.ptr(); + thd->wsrep_TOI_pre_query_len= query.length(); + + WSREP_TO_ISOLATION_BEGIN(table->db, table->table_name, NULL); + + thd->wsrep_TOI_pre_query= NULL; + thd->wsrep_TOI_pre_query_len= 0; + } + + return(false); + +error: + thd->wsrep_TOI_pre_query= NULL; + return (true); +} + + +int wsrep_create_trigger_query(THD *thd, uchar** buf, size_t* buf_len) +{ + LEX *lex= thd->lex; + String stmt_query; + + LEX_STRING definer_user; + LEX_STRING definer_host; + + if (!lex->definer) + { + if (!thd->slave_thread) + { + if (!(lex->definer= create_default_definer(thd, false))) + return 1; + } + } + + if (lex->definer) + { + /* SUID trigger. */ + + definer_user= lex->definer->user; + definer_host= lex->definer->host; + } + else + { + /* non-SUID trigger. */ + + definer_user.str= 0; + definer_user.length= 0; + + definer_host.str= 0; + definer_host.length= 0; + } + + stmt_query.append(STRING_WITH_LEN("CREATE ")); + + append_definer(thd, &stmt_query, &definer_user, &definer_host); + + LEX_STRING stmt_definition; + stmt_definition.str= (char*) thd->lex->stmt_definition_begin; + stmt_definition.length= thd->lex->stmt_definition_end + - thd->lex->stmt_definition_begin; + trim_whitespace(thd->charset(), & stmt_definition); + + stmt_query.append(stmt_definition.str, stmt_definition.length); + + return wsrep_to_buf_helper(thd, stmt_query.c_ptr(), stmt_query.length(), + buf, buf_len); +} diff --git a/sql/wsrep_mysqld.h b/sql/wsrep_mysqld.h new file mode 100644 index 00000000000..2150ac85bf0 --- /dev/null +++ b/sql/wsrep_mysqld.h @@ -0,0 +1,370 @@ +/* Copyright 2008-2013 Codership Oy <http://www.codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include <wsrep.h> + +#if !defined(WSREP_MYSQLD_H) && defined(WITH_WSREP) +#define WSREP_MYSQLD_H + +typedef struct st_mysql_show_var SHOW_VAR; +#include <sql_priv.h> +//#include "rpl_gtid.h" +#include "../wsrep/wsrep_api.h" +#include "mdl.h" +#include "mysqld.h" +#include "sql_table.h" + +#define WSREP_UNDEFINED_TRX_ID ULONGLONG_MAX + +class set_var; +class THD; + +enum wsrep_exec_mode { + LOCAL_STATE, + REPL_RECV, + TOTAL_ORDER, + LOCAL_COMMIT +}; + +enum wsrep_query_state { + QUERY_IDLE, + QUERY_EXEC, + QUERY_COMMITTING, + QUERY_EXITING, + QUERY_ROLLINGBACK, +}; + +enum wsrep_conflict_state { + NO_CONFLICT, + MUST_ABORT, + ABORTING, + ABORTED, + MUST_REPLAY, + REPLAYING, + RETRY_AUTOCOMMIT, + CERT_FAILURE, +}; + +enum wsrep_consistency_check_mode { + NO_CONSISTENCY_CHECK, + CONSISTENCY_CHECK_DECLARED, + CONSISTENCY_CHECK_RUNNING, +}; + +struct wsrep_thd_shadow { + ulonglong options; + uint server_status; + enum wsrep_exec_mode wsrep_exec_mode; + Vio *vio; + ulong tx_isolation; + char *db; + size_t db_length; +}; + +// Global wsrep parameters +extern wsrep_t* wsrep; + +// MySQL wsrep options +extern const char* wsrep_provider; +extern const char* wsrep_provider_options; +extern const char* wsrep_cluster_name; +extern const char* wsrep_cluster_address; +extern const char* wsrep_node_name; +extern const char* wsrep_node_address; +extern const char* wsrep_node_incoming_address; +extern const char* wsrep_data_home_dir; +extern const char* wsrep_dbug_option; +extern long wsrep_slave_threads; +extern int wsrep_slave_count_change; +extern MYSQL_PLUGIN_IMPORT my_bool wsrep_debug; +extern my_bool wsrep_convert_LOCK_to_trx; +extern ulong wsrep_retry_autocommit; +extern my_bool wsrep_auto_increment_control; +extern my_bool wsrep_drupal_282555_workaround; +extern my_bool wsrep_incremental_data_collection; +extern const char* wsrep_start_position; +extern ulong wsrep_max_ws_size; +extern ulong wsrep_max_ws_rows; +extern const char* wsrep_notify_cmd; +extern my_bool wsrep_certify_nonPK; +extern long wsrep_max_protocol_version; +extern long wsrep_protocol_version; +extern ulong wsrep_forced_binlog_format; +extern ulong wsrep_OSU_method_options; +extern my_bool wsrep_desync; +extern my_bool wsrep_recovery; +extern my_bool wsrep_replicate_myisam; +extern my_bool wsrep_log_conflicts; +extern ulong wsrep_mysql_replication_bundle; +extern my_bool wsrep_load_data_splitting; +extern my_bool wsrep_restart_slave; +extern my_bool wsrep_restart_slave_activated; +extern my_bool wsrep_slave_FK_checks; +extern my_bool wsrep_slave_UK_checks; + +enum enum_wsrep_OSU_method { WSREP_OSU_TOI, WSREP_OSU_RSU }; +enum enum_wsrep_sync_wait { + WSREP_SYNC_WAIT_NONE = 0x0, + // show, select, begin + WSREP_SYNC_WAIT_BEFORE_READ = 0x1, + WSREP_SYNC_WAIT_BEFORE_UPDATE_DELETE = 0x2, + WSREP_SYNC_WAIT_BEFORE_INSERT_REPLACE = 0x4, + WSREP_SYNC_WAIT_MAX = 0x7 +}; + +// MySQL status variables +extern my_bool wsrep_connected; +extern my_bool wsrep_ready; +extern const char* wsrep_cluster_state_uuid; +extern long long wsrep_cluster_conf_id; +extern const char* wsrep_cluster_status; +extern long wsrep_cluster_size; +extern long wsrep_local_index; +extern long long wsrep_local_bf_aborts; +extern const char* wsrep_provider_name; +extern const char* wsrep_provider_version; +extern const char* wsrep_provider_vendor; + +int wsrep_show_status(THD *thd, SHOW_VAR *var, char *buff); +void wsrep_free_status(THD *thd); + +/* Filters out --wsrep-new-cluster oprtion from argv[] + * should be called in the very beginning of main() */ +void wsrep_filter_new_cluster (int* argc, char* argv[]); + +int wsrep_init(); +void wsrep_deinit(bool free_options); +void wsrep_recover(); +bool wsrep_before_SE(); // initialize wsrep before storage + // engines (true) or after (false) +/* wsrep initialization sequence at startup + * @param before wsrep_before_SE() value */ +void wsrep_init_startup(bool before); + +// Other wsrep global variables +extern my_bool wsrep_inited; // whether wsrep is initialized ? + +extern "C" enum wsrep_exec_mode wsrep_thd_exec_mode(THD *thd); +extern "C" enum wsrep_conflict_state wsrep_thd_conflict_state(THD *thd); +extern "C" enum wsrep_query_state wsrep_thd_query_state(THD *thd); +extern "C" const char * wsrep_thd_exec_mode_str(THD *thd); +extern "C" const char * wsrep_thd_conflict_state_str(THD *thd); +extern "C" const char * wsrep_thd_query_state_str(THD *thd); +extern "C" wsrep_ws_handle_t* wsrep_thd_ws_handle(THD *thd); + +extern "C" void wsrep_thd_set_exec_mode(THD *thd, enum wsrep_exec_mode mode); +extern "C" void wsrep_thd_set_query_state( + THD *thd, enum wsrep_query_state state); +extern "C" void wsrep_thd_set_conflict_state( + THD *thd, enum wsrep_conflict_state state); + +extern "C" void wsrep_thd_set_trx_to_replay(THD *thd, uint64 trx_id); + +extern "C" void wsrep_thd_LOCK(THD *thd); +extern "C" void wsrep_thd_UNLOCK(THD *thd); +extern "C" uint32 wsrep_thd_wsrep_rand(THD *thd); +extern "C" time_t wsrep_thd_query_start(THD *thd); +extern "C" my_thread_id wsrep_thd_thread_id(THD *thd); +extern "C" int64_t wsrep_thd_trx_seqno(THD *thd); +extern "C" query_id_t wsrep_thd_query_id(THD *thd); +extern "C" char * wsrep_thd_query(THD *thd); +extern "C" query_id_t wsrep_thd_wsrep_last_query_id(THD *thd); +extern "C" void wsrep_thd_set_wsrep_last_query_id(THD *thd, query_id_t id); +extern "C" void wsrep_thd_awake(THD *thd, my_bool signal); +extern "C" int wsrep_thd_retry_counter(THD *thd); + + +extern void wsrep_close_client_connections(my_bool wait_to_end); +extern int wsrep_wait_committing_connections_close(int wait_time); +extern void wsrep_close_applier(THD *thd); +extern void wsrep_wait_appliers_close(THD *thd); +extern void wsrep_close_applier_threads(int count); +extern void wsrep_kill_mysql(THD *thd); + +/* new defines */ +extern void wsrep_stop_replication(THD *thd); +extern bool wsrep_start_replication(); +extern bool wsrep_sync_wait (THD* thd, uint mask = WSREP_SYNC_WAIT_BEFORE_READ); +extern int wsrep_check_opts (int argc, char* const* argv); +extern void wsrep_prepend_PATH (const char* path); +/* some inline functions are defined in wsrep_mysqld_inl.h */ + +/* Other global variables */ +extern wsrep_seqno_t wsrep_locked_seqno; + +#define WSREP_ON \ + (global_system_variables.wsrep_on) + +#define WSREP(thd) \ + (WSREP_ON && (thd && thd->variables.wsrep_on)) + +#define WSREP_CLIENT(thd) \ + (WSREP(thd) && thd->wsrep_client_thread) + +#define WSREP_EMULATE_BINLOG(thd) \ + (WSREP(thd) && wsrep_emulate_bin_log) + +// MySQL logging functions don't seem to understand long long length modifer. +// This is a workaround. It also prefixes all messages with "WSREP" +#define WSREP_LOG(fun, ...) \ + { \ + char msg[1024] = {'\0'}; \ + snprintf(msg, sizeof(msg) - 1, ## __VA_ARGS__); \ + fun("WSREP: %s", msg); \ + } + +#define WSREP_LOG_CONFLICT_THD(thd, role) \ + WSREP_LOG(sql_print_information, \ + "%s: \n " \ + " THD: %lu, mode: %s, state: %s, conflict: %s, seqno: %lld\n " \ + " SQL: %s", \ + role, wsrep_thd_thread_id(thd), wsrep_thd_exec_mode_str(thd), \ + wsrep_thd_query_state_str(thd), \ + wsrep_thd_conflict_state_str(thd), (long long)wsrep_thd_trx_seqno(thd), \ + wsrep_thd_query(thd) \ + ); + +#define WSREP_LOG_CONFLICT(bf_thd, victim_thd, bf_abort) \ + if (wsrep_debug || wsrep_log_conflicts) \ + { \ + WSREP_LOG(sql_print_information, "cluster conflict due to %s for threads:",\ + (bf_abort) ? "high priority abort" : "certification failure" \ + ); \ + if (bf_thd != NULL) WSREP_LOG_CONFLICT_THD(bf_thd, "Winning thread"); \ + if (victim_thd) WSREP_LOG_CONFLICT_THD(victim_thd, "Victim thread"); \ + } + +#define WSREP_PROVIDER_EXISTS \ + (wsrep_provider && strncasecmp(wsrep_provider, WSREP_NONE, FN_REFLEN)) + +extern void wsrep_ready_wait(); + +enum wsrep_trx_status { + WSREP_TRX_OK, + WSREP_TRX_CERT_FAIL, /* certification failure, must abort */ + WSREP_TRX_SIZE_EXCEEDED, /* trx size exceeded */ + WSREP_TRX_ERROR, /* native mysql error */ +}; + +extern enum wsrep_trx_status +wsrep_run_wsrep_commit(THD *thd, handlerton *hton, bool all); +class Ha_trx_info; +struct THD_TRANS; +void wsrep_register_hton(THD* thd, bool all); +void wsrep_post_commit(THD* thd, bool all); +void wsrep_brute_force_killer(THD *thd); +int wsrep_hire_brute_force_killer(THD *thd, uint64_t trx_id); + +extern "C" bool wsrep_consistency_check(void *thd_ptr); + +/* this is visible for client build so that innodb plugin gets this */ +typedef struct wsrep_aborting_thd { + struct wsrep_aborting_thd *next; + THD *aborting_thd; +} *wsrep_aborting_thd_t; + +extern mysql_mutex_t LOCK_wsrep_ready; +extern mysql_cond_t COND_wsrep_ready; +extern mysql_mutex_t LOCK_wsrep_sst; +extern mysql_cond_t COND_wsrep_sst; +extern mysql_mutex_t LOCK_wsrep_sst_init; +extern mysql_cond_t COND_wsrep_sst_init; +extern mysql_mutex_t LOCK_wsrep_rollback; +extern mysql_cond_t COND_wsrep_rollback; +extern int wsrep_replaying; +extern mysql_mutex_t LOCK_wsrep_replaying; +extern mysql_cond_t COND_wsrep_replaying; +extern mysql_mutex_t LOCK_wsrep_slave_threads; +extern mysql_mutex_t LOCK_wsrep_desync; +extern wsrep_aborting_thd_t wsrep_aborting_thd; +extern my_bool wsrep_emulate_bin_log; +extern int wsrep_to_isolation; +#ifdef GTID_SUPPORT +extern rpl_sidno wsrep_sidno; +#endif /* GTID_SUPPORT */ +extern my_bool wsrep_preordered_opt; +extern handlerton *wsrep_hton; + +#ifdef HAVE_PSI_INTERFACE +extern PSI_mutex_key key_LOCK_wsrep_ready; +extern PSI_mutex_key key_COND_wsrep_ready; +extern PSI_mutex_key key_LOCK_wsrep_sst; +extern PSI_cond_key key_COND_wsrep_sst; +extern PSI_mutex_key key_LOCK_wsrep_sst_init; +extern PSI_cond_key key_COND_wsrep_sst_init; +extern PSI_mutex_key key_LOCK_wsrep_sst_thread; +extern PSI_cond_key key_COND_wsrep_sst_thread; +extern PSI_mutex_key key_LOCK_wsrep_rollback; +extern PSI_cond_key key_COND_wsrep_rollback; +extern PSI_mutex_key key_LOCK_wsrep_replaying; +extern PSI_cond_key key_COND_wsrep_replaying; +extern PSI_mutex_key key_LOCK_wsrep_slave_threads; +extern PSI_mutex_key key_LOCK_wsrep_desync; +#endif /* HAVE_PSI_INTERFACE */ +struct TABLE_LIST; +int wsrep_to_isolation_begin(THD *thd, char *db_, char *table_, + const TABLE_LIST* table_list); +void wsrep_to_isolation_end(THD *thd); +void wsrep_cleanup_transaction(THD *thd); +int wsrep_to_buf_helper( + THD* thd, const char *query, uint query_len, uchar** buf, size_t* buf_len); +int wsrep_create_sp(THD *thd, uchar** buf, size_t* buf_len); +int wsrep_create_trigger_query(THD *thd, uchar** buf, size_t* buf_len); +int wsrep_create_event_query(THD *thd, uchar** buf, size_t* buf_len); +int wsrep_alter_event_query(THD *thd, uchar** buf, size_t* buf_len); + +struct xid_t; +void wsrep_get_SE_checkpoint(xid_t*); +void wsrep_set_SE_checkpoint(xid_t*); +void wsrep_init_sidno(const wsrep_uuid_t&); +void wsrep_xid_init(xid_t*, const wsrep_uuid_t*, wsrep_seqno_t); +const wsrep_uuid_t* wsrep_xid_uuid(const xid_t*); +wsrep_seqno_t wsrep_xid_seqno(const xid_t*); +extern "C" int wsrep_is_wsrep_xid(const void* xid); + +extern "C" my_thread_id wsrep_thd_thread_id(THD *thd); +extern "C" char *wsrep_thd_query(THD *thd); + +extern bool +wsrep_grant_mdl_exception(MDL_context *requestor_ctx, + MDL_ticket *ticket); +IO_CACHE * get_trans_log(THD * thd); +bool wsrep_trans_cache_is_empty(THD *thd); +void thd_binlog_flush_pending_rows_event(THD *thd, bool stmt_end); +void thd_binlog_rollback_stmt(THD * thd); +void thd_binlog_trx_reset(THD * thd); + +typedef void (*wsrep_thd_processor_fun)(THD *); +pthread_handler_t start_wsrep_THD(void *arg); +int wsrep_wait_committing_connections_close(int wait_time); +void wsrep_close_client_connections(my_bool wait_to_end); +void wsrep_close_applier(THD *thd); +void wsrep_close_applier_threads(int count); +void wsrep_wait_appliers_close(THD *thd); +void wsrep_kill_mysql(THD *thd); +void wsrep_close_threads(THD *thd); +int wsrep_create_sp(THD *thd, uchar** buf, size_t* buf_len); +my_bool wsrep_read_only_option(THD *thd, TABLE_LIST *all_tables); +void wsrep_copy_query(THD *thd); +bool wsrep_is_show_query(enum enum_sql_command command); +void wsrep_replay_transaction(THD *thd); +bool wsrep_create_like_table(THD* thd, TABLE_LIST* table, + TABLE_LIST* src_table, + HA_CREATE_INFO *create_info); +int wsrep_create_trigger_query(THD *thd, uchar** buf, size_t* buf_len); + +extern my_bool deny_updates_if_read_only_option(THD *thd, + TABLE_LIST *all_tables); +#endif /* WSREP_MYSQLD_H */ diff --git a/sql/wsrep_notify.cc b/sql/wsrep_notify.cc new file mode 100644 index 00000000000..6eefb961b62 --- /dev/null +++ b/sql/wsrep_notify.cc @@ -0,0 +1,111 @@ +/* Copyright 2010 Codership Oy <http://www.codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include <mysqld.h> +#include "wsrep_priv.h" +#include "wsrep_utils.h" + +const char* wsrep_notify_cmd=""; + +static const char* _status_str(wsrep_member_status_t status) +{ + switch (status) + { + case WSREP_MEMBER_UNDEFINED: return "Undefined"; + case WSREP_MEMBER_JOINER: return "Joiner"; + case WSREP_MEMBER_DONOR: return "Donor"; + case WSREP_MEMBER_JOINED: return "Joined"; + case WSREP_MEMBER_SYNCED: return "Synced"; + default: return "Error(?)"; + } +} + +void wsrep_notify_status (wsrep_member_status_t status, + const wsrep_view_info_t* view) +{ + if (!wsrep_notify_cmd || 0 == strlen(wsrep_notify_cmd)) + { + WSREP_INFO("wsrep_notify_cmd is not defined, skipping notification."); + return; + } + + char cmd_buf[1 << 16]; // this can be long + long cmd_len = sizeof(cmd_buf) - 1; + char* cmd_ptr = cmd_buf; + long cmd_off = 0; + + cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, "%s", + wsrep_notify_cmd); + + if (status >= WSREP_MEMBER_UNDEFINED && status < WSREP_MEMBER_ERROR) + { + cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, " --status %s", + _status_str(status)); + } + else + { + /* here we preserve provider error codes */ + cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, + " --status 'Error(%d)'", status); + } + + if (0 != view) + { + char uuid_str[40]; + + wsrep_uuid_print (&view->state_id.uuid, uuid_str, sizeof(uuid_str)); + cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, + " --uuid %s", uuid_str); + + cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, + " --primary %s", view->view >= 0 ? "yes" : "no"); + + cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, + " --index %d", view->my_idx); + + if (view->memb_num) + { + cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, " --members"); + + for (int i = 0; i < view->memb_num; i++) + { + wsrep_uuid_print (&view->members[i].id, uuid_str, sizeof(uuid_str)); + cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, + "%c%s/%s/%s", i > 0 ? ',' : ' ', + uuid_str, view->members[i].name, + view->members[i].incoming); + } + } + } + + if (cmd_off == cmd_len) + { + WSREP_ERROR("Notification buffer too short (%ld). Aborting notification.", + cmd_len); + return; + } + + wsp::process p(cmd_ptr, "r"); + + p.wait(); + int err = p.error(); + + if (err) + { + WSREP_ERROR("Notification command failed: %d (%s): \"%s\"", + err, strerror(err), cmd_ptr); + } +} + diff --git a/sql/wsrep_priv.h b/sql/wsrep_priv.h new file mode 100644 index 00000000000..5c66587d757 --- /dev/null +++ b/sql/wsrep_priv.h @@ -0,0 +1,53 @@ +/* Copyright 2010 Codership Oy <http://www.codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +//! @file declares symbols private to wsrep integration layer + +#ifndef WSREP_PRIV_H +#define WSREP_PRIV_H + +#include "wsrep_mysqld.h" +#include "../wsrep/wsrep_api.h" + +#include <log.h> +#include <pthread.h> +#include <cstdio> + +void wsrep_ready_set (my_bool x); + +ssize_t wsrep_sst_prepare (void** msg); +wsrep_cb_status wsrep_sst_donate_cb (void* app_ctx, + void* recv_ctx, + const void* msg, size_t msg_len, + const wsrep_gtid_t* current_id, + const char* state, size_t state_len, + bool bypass); +extern unsigned int wsrep_check_ip (const char* addr); +extern size_t wsrep_guess_ip (char* buf, size_t buf_len); +extern size_t wsrep_guess_address(char* buf, size_t buf_len); + +extern wsrep_uuid_t local_uuid; +extern wsrep_seqno_t local_seqno; + +// a helper function +extern void wsrep_sst_received(wsrep_t*, const wsrep_uuid_t*, wsrep_seqno_t, + const void*, size_t); +/*! SST thread signals init thread about sst completion */ +extern void wsrep_sst_complete(const wsrep_uuid_t*, wsrep_seqno_t, bool); + +void wsrep_notify_status (wsrep_member_status_t new_status, + const wsrep_view_info_t* view = 0); +#endif /* WSREP_PRIV_H */ diff --git a/sql/wsrep_sst.cc b/sql/wsrep_sst.cc new file mode 100644 index 00000000000..c122d88f7cc --- /dev/null +++ b/sql/wsrep_sst.cc @@ -0,0 +1,1127 @@ +/* Copyright 2008-2012 Codership Oy <http://www.codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "wsrep_sst.h" + +#include <mysqld.h> +#include <m_ctype.h> +#include <my_sys.h> +#include <strfunc.h> +#include <sql_class.h> +#include <set_var.h> +#include <sql_acl.h> +#include <sql_reload.h> +#include <sql_parse.h> +#include "wsrep_priv.h" +#include "wsrep_utils.h" +#include <cstdio> +#include <cstdlib> + +extern const char wsrep_defaults_file[]; + +const char* wsrep_sst_method = WSREP_SST_DEFAULT; +const char* wsrep_sst_receive_address = WSREP_SST_ADDRESS_AUTO; +const char* wsrep_sst_donor = ""; + char* wsrep_sst_auth = NULL; + +// container for real auth string +static const char* sst_auth_real = NULL; +my_bool wsrep_sst_donor_rejects_queries = FALSE; + +bool wsrep_sst_method_check (sys_var *self, THD* thd, set_var* var) +{ + if ((! var->save_result.string_value.str) || + (var->save_result.string_value.length == 0 )) + { + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str, + var->save_result.string_value.str ? + var->save_result.string_value.str : "NULL"); + return 1; + } + + return 0; +} + +bool wsrep_sst_method_update (sys_var *self, THD* thd, enum_var_type type) +{ + return 0; +} + +// TODO: Improve address verification. +static bool sst_receive_address_check (const char* str) +{ + if (!strncasecmp(str, "127.0.0.1", strlen("127.0.0.1")) || + !strncasecmp(str, "localhost", strlen("localhost"))) + { + return 1; + } + + return 0; +} + +bool wsrep_sst_receive_address_check (sys_var *self, THD* thd, set_var* var) +{ + char addr_buf[FN_REFLEN]; + + if ((! var->save_result.string_value.str) || + (var->save_result.string_value.length > (FN_REFLEN - 1))) // safety + { + goto err; + } + + memcpy(addr_buf, var->save_result.string_value.str, + var->save_result.string_value.length); + addr_buf[var->save_result.string_value.length]= 0; + + if (sst_receive_address_check(addr_buf)) + { + goto err; + } + + return 0; + +err: + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str, + var->save_result.string_value.str ? + var->save_result.string_value.str : "NULL"); + return 1; +} + +bool wsrep_sst_receive_address_update (sys_var *self, THD* thd, + enum_var_type type) +{ + return 0; +} + +bool wsrep_sst_auth_check (sys_var *self, THD* thd, set_var* var) +{ + return 0; +} + +static bool sst_auth_real_set (const char* value) +{ + const char* v= NULL; + + if (value) + { + v= my_strdup(value, MYF(0)); + } + else // its NULL + { + wsrep_sst_auth_free(); + return 0; + } + + if (v) + { + // set sst_auth_real + if (sst_auth_real) { my_free((void *) sst_auth_real); } + sst_auth_real = v; + + // mask wsrep_sst_auth + if (strlen(sst_auth_real)) + { + if (wsrep_sst_auth) { my_free((void*) wsrep_sst_auth); } + wsrep_sst_auth= my_strdup(WSREP_SST_AUTH_MASK, MYF(0)); + } + return 0; + } + return 1; +} + +void wsrep_sst_auth_free() +{ + if (wsrep_sst_auth) { my_free((void *) wsrep_sst_auth); } + if (sst_auth_real) { my_free((void *) sst_auth_real); } + wsrep_sst_auth= NULL; + sst_auth_real= NULL; +} + +bool wsrep_sst_auth_update (sys_var *self, THD* thd, enum_var_type type) +{ + return sst_auth_real_set (wsrep_sst_auth); +} + +void wsrep_sst_auth_init (const char* value) +{ + if (wsrep_sst_auth == value) wsrep_sst_auth = NULL; + if (value) sst_auth_real_set (value); +} + +bool wsrep_sst_donor_check (sys_var *self, THD* thd, set_var* var) +{ + return 0; +} + +bool wsrep_sst_donor_update (sys_var *self, THD* thd, enum_var_type type) +{ + return 0; +} + +static wsrep_uuid_t cluster_uuid = WSREP_UUID_UNDEFINED; + +bool wsrep_before_SE() +{ + return (wsrep_provider != NULL + && strcmp (wsrep_provider, WSREP_NONE) + && strcmp (wsrep_sst_method, WSREP_SST_SKIP) + && strcmp (wsrep_sst_method, WSREP_SST_MYSQLDUMP)); +} + +static bool sst_complete = false; +static bool sst_needed = false; + +void wsrep_sst_grab () +{ + WSREP_INFO("wsrep_sst_grab()"); + if (mysql_mutex_lock (&LOCK_wsrep_sst)) abort(); + sst_complete = false; + mysql_mutex_unlock (&LOCK_wsrep_sst); +} + +// Wait for end of SST +bool wsrep_sst_wait () +{ + if (mysql_mutex_lock (&LOCK_wsrep_sst)) abort(); + while (!sst_complete) + { + WSREP_INFO("Waiting for SST to complete."); + mysql_cond_wait (&COND_wsrep_sst, &LOCK_wsrep_sst); + } + + if (local_seqno >= 0) + { + WSREP_INFO("SST complete, seqno: %lld", (long long) local_seqno); + } + else + { + WSREP_ERROR("SST failed: %d (%s)", + int(-local_seqno), strerror(-local_seqno)); + } + + mysql_mutex_unlock (&LOCK_wsrep_sst); + + return (local_seqno >= 0); +} + +// Signal end of SST +void wsrep_sst_complete (const wsrep_uuid_t* sst_uuid, + wsrep_seqno_t sst_seqno, + bool needed) +{ + if (mysql_mutex_lock (&LOCK_wsrep_sst)) abort(); + if (!sst_complete) + { + sst_complete = true; + sst_needed = needed; + local_uuid = *sst_uuid; + local_seqno = sst_seqno; + mysql_cond_signal (&COND_wsrep_sst); + } + else + { + /* This can happen when called from wsrep_synced_cb(). + At the moment there is no way to check there + if main thread is still waiting for signal, + so wsrep_sst_complete() is called from there + each time wsrep_ready changes from FALSE -> TRUE. + */ + WSREP_DEBUG("Nobody is waiting for SST."); + } + mysql_mutex_unlock (&LOCK_wsrep_sst); +} + +void wsrep_sst_received (wsrep_t* const wsrep, + const wsrep_uuid_t* const uuid, + wsrep_seqno_t const seqno, + const void* const state, + size_t const state_len) +{ + int const rcode(seqno < 0 ? seqno : 0); + wsrep_gtid_t const state_id = { + *uuid, (rcode ? WSREP_SEQNO_UNDEFINED : seqno) + }; +#ifdef GTID_SUPPORT + wsrep_init_sidno(state_id.uuid); +#endif /* GTID_SUPPORT */ + wsrep->sst_received(wsrep, &state_id, state, state_len, rcode); +} + +// Let applier threads to continue +void wsrep_sst_continue () +{ + if (sst_needed) + { + WSREP_INFO("Signalling provider to continue."); + wsrep_sst_received (wsrep, &local_uuid, local_seqno, NULL, 0); + } +} + +struct sst_thread_arg +{ + const char* cmd; + int err; + char* ret_str; + mysql_mutex_t lock; + mysql_cond_t cond; + + sst_thread_arg (const char* c) : cmd(c), err(-1), ret_str(0) + { + mysql_mutex_init(key_LOCK_wsrep_sst_thread, &lock, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_wsrep_sst_thread, &cond, NULL); + } + + ~sst_thread_arg() + { + mysql_cond_destroy (&cond); + mysql_mutex_unlock (&lock); + mysql_mutex_destroy (&lock); + } +}; + +static int sst_scan_uuid_seqno (const char* str, + wsrep_uuid_t* uuid, wsrep_seqno_t* seqno) +{ + int offt = wsrep_uuid_scan (str, strlen(str), uuid); + if (offt > 0 && strlen(str) > (unsigned int)offt && ':' == str[offt]) + { + *seqno = strtoll (str + offt + 1, NULL, 10); + if (*seqno != LLONG_MAX || errno != ERANGE) + { + return 0; + } + } + + WSREP_ERROR("Failed to parse uuid:seqno pair: '%s'", str); + return EINVAL; +} + +// get rid of trailing \n +static char* my_fgets (char* buf, size_t buf_len, FILE* stream) +{ + char* ret= fgets (buf, buf_len, stream); + + if (ret) + { + size_t len = strlen(ret); + if (len > 0 && ret[len - 1] == '\n') ret[len - 1] = '\0'; + } + + return ret; +} + +/* + Generate opt_binlog_opt_val for sst_donate_other(), sst_prepare_other(). + + Returns zero on success, negative error code otherwise. + + String containing binlog name is stored in param ret if binlog is enabled + and GTID mode is on, otherwise empty string. Returned string should be + freed with my_free(). + */ +static int generate_binlog_opt_val(char** ret) +{ + DBUG_ASSERT(ret); + *ret= NULL; + if (opt_bin_log) + { + assert(opt_bin_logname); + *ret= strcmp(opt_bin_logname, "0") ? + my_strdup(opt_bin_logname, MYF(0)) : my_strdup("", MYF(0)); + } + else + { + *ret= my_strdup("", MYF(0)); + } + if (!*ret) return -ENOMEM; + return 0; +} + +static void* sst_joiner_thread (void* a) +{ + sst_thread_arg* arg= (sst_thread_arg*) a; + int err= 1; + + { + const char magic[] = "ready"; + const size_t magic_len = sizeof(magic) - 1; + const size_t out_len = 512; + char out[out_len]; + + WSREP_INFO("Running: '%s'", arg->cmd); + + wsp::process proc (arg->cmd, "r"); + + if (proc.pipe() && !proc.error()) + { + const char* tmp= my_fgets (out, out_len, proc.pipe()); + + if (!tmp || strlen(tmp) < (magic_len + 2) || + strncasecmp (tmp, magic, magic_len)) + { + WSREP_ERROR("Failed to read '%s <addr>' from: %s\n\tRead: '%s'", + magic, arg->cmd, tmp); + proc.wait(); + if (proc.error()) err = proc.error(); + } + else + { + err = 0; + } + } + else + { + err = proc.error(); + WSREP_ERROR("Failed to execute: %s : %d (%s)", + arg->cmd, err, strerror(err)); + } + + // signal sst_prepare thread with ret code, + // it will go on sending SST request + mysql_mutex_lock (&arg->lock); + if (!err) + { + arg->ret_str = strdup (out + magic_len + 1); + if (!arg->ret_str) err = ENOMEM; + } + arg->err = -err; + mysql_cond_signal (&arg->cond); + mysql_mutex_unlock (&arg->lock); //! @note arg is unusable after that. + + if (err) return NULL; /* lp:808417 - return immediately, don't signal + * initializer thread to ensure single thread of + * shutdown. */ + + wsrep_uuid_t ret_uuid = WSREP_UUID_UNDEFINED; + wsrep_seqno_t ret_seqno = WSREP_SEQNO_UNDEFINED; + + // in case of successfull receiver start, wait for SST completion/end + char* tmp = my_fgets (out, out_len, proc.pipe()); + + proc.wait(); + err= EINVAL; + + if (!tmp) + { + WSREP_ERROR("Failed to read uuid:seqno from joiner script."); + if (proc.error()) err = proc.error(); + } + else + { + err= sst_scan_uuid_seqno (out, &ret_uuid, &ret_seqno); + } + + if (err) + { + ret_uuid= WSREP_UUID_UNDEFINED; + ret_seqno= -err; + } + + // Tell initializer thread that SST is complete + wsrep_sst_complete (&ret_uuid, ret_seqno, true); + } + + return NULL; +} + +static ssize_t sst_prepare_other (const char* method, + const char* addr_in, + const char** addr_out) +{ + ssize_t cmd_len= 1024; + char cmd_str[cmd_len]; + const char* sst_dir= mysql_real_data_home; + const char* binlog_opt= ""; + char* binlog_opt_val= NULL; + + int ret; + if ((ret= generate_binlog_opt_val(&binlog_opt_val))) + { + WSREP_ERROR("sst_prepare_other(): generate_binlog_opt_val() failed: %d", + ret); + return ret; + } + if (strlen(binlog_opt_val)) binlog_opt= WSREP_SST_OPT_BINLOG; + + + ret= snprintf (cmd_str, cmd_len, + "wsrep_sst_%s " + WSREP_SST_OPT_ROLE" 'joiner' " + WSREP_SST_OPT_ADDR" '%s' " + WSREP_SST_OPT_AUTH" '%s' " + WSREP_SST_OPT_DATA" '%s' " + WSREP_SST_OPT_CONF" '%s' " + WSREP_SST_OPT_PARENT" '%d'" + " %s '%s' ", + method, addr_in, (sst_auth_real) ? sst_auth_real : "", + sst_dir, wsrep_defaults_file, (int)getpid(), + binlog_opt, binlog_opt_val); + my_free(binlog_opt_val); + + if (ret < 0 || ret >= cmd_len) + { + WSREP_ERROR("sst_prepare_other(): snprintf() failed: %d", ret); + return (ret < 0 ? ret : -EMSGSIZE); + } + + pthread_t tmp; + sst_thread_arg arg(cmd_str); + mysql_mutex_lock (&arg.lock); + ret = pthread_create (&tmp, NULL, sst_joiner_thread, &arg); + if (ret) + { + WSREP_ERROR("sst_prepare_other(): pthread_create() failed: %d (%s)", + ret, strerror(ret)); + return ret; + } + mysql_cond_wait (&arg.cond, &arg.lock); + + *addr_out= arg.ret_str; + + if (!arg.err) + ret = strlen(*addr_out); + else + { + assert (arg.err < 0); + ret = arg.err; + } + + pthread_detach (tmp); + + return ret; +} + +extern uint mysqld_port; + +/*! Just tells donor where to send mysqldump */ +static ssize_t sst_prepare_mysqldump (const char* addr_in, + const char** addr_out) +{ + ssize_t ret = strlen (addr_in); + + if (!strrchr(addr_in, ':')) + { + ssize_t s = ret + 7; + char* tmp = (char*) malloc (s); + + if (tmp) + { + ret= snprintf (tmp, s, "%s:%u", addr_in, mysqld_port); + + if (ret > 0 && ret < s) + { + *addr_out= tmp; + return ret; + } + if (ret > 0) /* buffer too short */ ret = -EMSGSIZE; + free (tmp); + } + else { + ret= -ENOMEM; + } + + WSREP_ERROR ("Could not prepare state transfer request: " + "adding default port failed: %zd.", ret); + } + else { + *addr_out= addr_in; + } + + return ret; +} + +static bool SE_initialized = false; + +ssize_t wsrep_sst_prepare (void** msg) +{ + const ssize_t ip_max= 256; + char ip_buf[ip_max]; + const char* addr_in= NULL; + const char* addr_out= NULL; + + if (!strcmp(wsrep_sst_method, WSREP_SST_SKIP)) + { + ssize_t ret = strlen(WSREP_STATE_TRANSFER_TRIVIAL) + 1; + *msg = strdup(WSREP_STATE_TRANSFER_TRIVIAL); + if (!msg) + { + WSREP_ERROR("Could not allocate %zd bytes for state request", ret); + unireg_abort(1); + } + return ret; + } + + // Figure out SST address. Common for all SST methods + if (wsrep_sst_receive_address && + strcmp (wsrep_sst_receive_address, WSREP_SST_ADDRESS_AUTO)) + { + addr_in= wsrep_sst_receive_address; + } + else if (wsrep_node_address && strlen(wsrep_node_address)) + { + const char* const colon= strchr (wsrep_node_address, ':'); + if (colon) + { + ptrdiff_t const len= colon - wsrep_node_address; + strncpy (ip_buf, wsrep_node_address, len); + ip_buf[len]= '\0'; + addr_in= ip_buf; + } + else + { + addr_in= wsrep_node_address; + } + } + else + { + ssize_t ret= wsrep_guess_ip (ip_buf, ip_max); + + if (ret && ret < ip_max) + { + addr_in= ip_buf; + } + else + { + WSREP_ERROR("Could not prepare state transfer request: " + "failed to guess address to accept state transfer at. " + "wsrep_sst_receive_address must be set manually."); + unireg_abort(1); + } + } + + ssize_t addr_len= -ENOSYS; + if (!strcmp(wsrep_sst_method, WSREP_SST_MYSQLDUMP)) + { + addr_len= sst_prepare_mysqldump (addr_in, &addr_out); + if (addr_len < 0) unireg_abort(1); + } + else + { + /*! A heuristic workaround until we learn how to stop and start engines */ + if (SE_initialized) + { + // we already did SST at initializaiton, now engines are running + // sql_print_information() is here because the message is too long + // for WSREP_INFO. + sql_print_information ("WSREP: " + "You have configured '%s' state snapshot transfer method " + "which cannot be performed on a running server. " + "Wsrep provider won't be able to fall back to it " + "if other means of state transfer are unavailable. " + "In that case you will need to restart the server.", + wsrep_sst_method); + *msg = 0; + return 0; + } + + addr_len = sst_prepare_other (wsrep_sst_method, addr_in, &addr_out); + if (addr_len < 0) + { + WSREP_ERROR("Failed to prepare for '%s' SST. Unrecoverable.", + wsrep_sst_method); + unireg_abort(1); + } + } + + size_t const method_len(strlen(wsrep_sst_method)); + size_t const msg_len (method_len + addr_len + 2 /* + auth_len + 1*/); + + *msg = malloc (msg_len); + if (NULL != *msg) { + char* const method_ptr(reinterpret_cast<char*>(*msg)); + strcpy (method_ptr, wsrep_sst_method); + char* const addr_ptr(method_ptr + method_len + 1); + strcpy (addr_ptr, addr_out); + + WSREP_INFO ("Prepared SST request: %s|%s", method_ptr, addr_ptr); + } + else { + WSREP_ERROR("Failed to allocate SST request of size %zu. Can't continue.", + msg_len); + unireg_abort(1); + } + + if (addr_out != addr_in) /* malloc'ed */ free ((char*)addr_out); + + return msg_len; +} + +// helper method for donors +static int sst_run_shell (const char* cmd_str, int max_tries) +{ + int ret = 0; + + for (int tries=1; tries <= max_tries; tries++) + { + wsp::process proc (cmd_str, "r"); + + if (NULL != proc.pipe()) + { + proc.wait(); + } + + if ((ret = proc.error())) + { + WSREP_ERROR("Try %d/%d: '%s' failed: %d (%s)", + tries, max_tries, proc.cmd(), ret, strerror(ret)); + sleep (1); + } + else + { + WSREP_DEBUG("SST script successfully completed."); + break; + } + } + + return -ret; +} + +static void sst_reject_queries(my_bool close_conn) +{ + wsrep_ready_set (FALSE); // this will be resotred when donor becomes synced + WSREP_INFO("Rejecting client queries for the duration of SST."); + if (TRUE == close_conn) wsrep_close_client_connections(FALSE); +} + +static int sst_mysqldump_check_addr (const char* user, const char* pswd, + const char* host, const char* port) +{ + return 0; +} + +static int sst_donate_mysqldump (const char* addr, + const wsrep_uuid_t* uuid, + const char* uuid_str, + wsrep_seqno_t seqno, + bool bypass) +{ + size_t host_len; + const char* port = strchr (addr, ':'); + + if (port) + { + port += 1; + host_len = port - addr; + } + else + { + port = ""; + host_len = strlen (addr) + 1; + } + + char host[host_len]; + + strncpy (host, addr, host_len - 1); + host[host_len - 1] = '\0'; + + const char* auth = sst_auth_real; + const char* pswd = (auth) ? strchr (auth, ':') : NULL; + size_t user_len; + + if (pswd) + { + pswd += 1; + user_len = pswd - auth; + } + else + { + pswd = ""; + user_len = (auth) ? strlen (auth) + 1 : 1; + } + + char user[user_len]; + + strncpy (user, (auth) ? auth : "", user_len - 1); + user[user_len - 1] = '\0'; + + int ret = sst_mysqldump_check_addr (user, pswd, host, port); + if (!ret) + { + size_t cmd_len= 1024; + char cmd_str[cmd_len]; + + if (!bypass && wsrep_sst_donor_rejects_queries) sst_reject_queries(TRUE); + + snprintf (cmd_str, cmd_len, + "wsrep_sst_mysqldump " + WSREP_SST_OPT_USER" '%s' " + WSREP_SST_OPT_PSWD" '%s' " + WSREP_SST_OPT_HOST" '%s' " + WSREP_SST_OPT_PORT" '%s' " + WSREP_SST_OPT_LPORT" '%u' " + WSREP_SST_OPT_SOCKET" '%s' " + WSREP_SST_OPT_CONF" '%s' " + WSREP_SST_OPT_GTID" '%s:%lld'" + "%s", + user, pswd, host, port, mysqld_port, mysqld_unix_port, + wsrep_defaults_file, uuid_str, + (long long)seqno, bypass ? " "WSREP_SST_OPT_BYPASS : ""); + + WSREP_DEBUG("Running: '%s'", cmd_str); + + ret= sst_run_shell (cmd_str, 3); + } + + wsrep_gtid_t const state_id = { *uuid, (ret ? WSREP_SEQNO_UNDEFINED : seqno)}; + + wsrep->sst_sent (wsrep, &state_id, ret); + + return ret; +} + +wsrep_seqno_t wsrep_locked_seqno= WSREP_SEQNO_UNDEFINED; + +static int run_sql_command(THD *thd, const char *query) +{ + thd->set_query((char *)query, strlen(query)); + + Parser_state ps; + if (ps.init(thd, thd->query(), thd->query_length())) + { + WSREP_ERROR("SST query: %s failed", query); + return -1; + } + + mysql_parse(thd, thd->query(), thd->query_length(), &ps); + if (thd->is_error()) + { + int const err= thd->get_stmt_da()->sql_errno(); + WSREP_WARN ("error executing '%s': %d (%s)%s", + query, err, thd->get_stmt_da()->message(), + err == ER_UNKNOWN_SYSTEM_VARIABLE ? + ". Was mysqld built with --with-innodb-disallow-writes ?" : ""); + thd->clear_error(); + return -1; + } + return 0; +} + +static int sst_flush_tables(THD* thd) +{ + WSREP_INFO("Flushing tables for SST..."); + + int err; + int not_used; + CHARSET_INFO *current_charset; + + current_charset = thd->variables.character_set_client; + + if (!is_supported_parser_charset(current_charset)) + { + /* Do not use non-supported parser character sets */ + WSREP_WARN("Current client character set is non-supported parser character set: %s", current_charset->csname); + thd->variables.character_set_client = &my_charset_latin1; + WSREP_WARN("For SST temporally setting character set to : %s", + my_charset_latin1.csname); + } + + if (run_sql_command(thd, "FLUSH TABLES WITH READ LOCK")) + { + WSREP_ERROR("Failed to flush and lock tables"); + err = -1; + } + else + { + /* make sure logs are flushed after global read lock acquired */ + err= reload_acl_and_cache(thd, REFRESH_ENGINE_LOG | REFRESH_BINARY_LOG, + (TABLE_LIST*) 0, ¬_used); + } + + thd->variables.character_set_client = current_charset; + + + if (err) + { + WSREP_ERROR("Failed to flush tables: %d (%s)", err, strerror(err)); + } + else + { + WSREP_INFO("Tables flushed."); + const char base_name[]= "tables_flushed"; + ssize_t const full_len= strlen(mysql_real_data_home) + strlen(base_name)+2; + char real_name[full_len]; + sprintf(real_name, "%s/%s", mysql_real_data_home, base_name); + char tmp_name[full_len + 4]; + sprintf(tmp_name, "%s.tmp", real_name); + + FILE* file= fopen(tmp_name, "w+"); + if (0 == file) + { + err= errno; + WSREP_ERROR("Failed to open '%s': %d (%s)", tmp_name, err,strerror(err)); + } + else + { + fprintf(file, "%s:%lld\n", + wsrep_cluster_state_uuid, (long long)wsrep_locked_seqno); + fsync(fileno(file)); + fclose(file); + if (rename(tmp_name, real_name) == -1) + { + err= errno; + WSREP_ERROR("Failed to rename '%s' to '%s': %d (%s)", + tmp_name, real_name, err,strerror(err)); + } + } + } + + return err; +} + +static void sst_disallow_writes (THD* thd, bool yes) +{ + char query_str[64] = { 0, }; + ssize_t const query_max = sizeof(query_str) - 1; + CHARSET_INFO *current_charset; + + current_charset = thd->variables.character_set_client; + + if (!is_supported_parser_charset(current_charset)) + { + /* Do not use non-supported parser character sets */ + WSREP_WARN("Current client character set is non-supported parser character set: %s", current_charset->csname); + thd->variables.character_set_client = &my_charset_latin1; + WSREP_WARN("For SST temporally setting character set to : %s", + my_charset_latin1.csname); + } + + snprintf (query_str, query_max, "SET GLOBAL innodb_disallow_writes=%d", + yes ? 1 : 0); + + if (run_sql_command(thd, query_str)) + { + WSREP_ERROR("Failed to disallow InnoDB writes"); + } + thd->variables.character_set_client = current_charset; +} + +static void* sst_donor_thread (void* a) +{ + sst_thread_arg* arg= (sst_thread_arg*)a; + + WSREP_INFO("Running: '%s'", arg->cmd); + + int err= 1; + bool locked= false; + + const char* out= NULL; + const size_t out_len= 128; + char out_buf[out_len]; + + wsrep_uuid_t ret_uuid= WSREP_UUID_UNDEFINED; + wsrep_seqno_t ret_seqno= WSREP_SEQNO_UNDEFINED; // seqno of complete SST + + wsp::thd thd(FALSE); // we turn off wsrep_on for this THD so that it can + // operate with wsrep_ready == OFF + wsp::process proc(arg->cmd, "r"); + + err= proc.error(); + +/* Inform server about SST script startup and release TO isolation */ + mysql_mutex_lock (&arg->lock); + arg->err = -err; + mysql_cond_signal (&arg->cond); + mysql_mutex_unlock (&arg->lock); //! @note arg is unusable after that. + + if (proc.pipe() && !err) + { +wait_signal: + out= my_fgets (out_buf, out_len, proc.pipe()); + + if (out) + { + const char magic_flush[]= "flush tables"; + const char magic_cont[]= "continue"; + const char magic_done[]= "done"; + + if (!strcasecmp (out, magic_flush)) + { + err= sst_flush_tables (thd.ptr); + if (!err) + { + sst_disallow_writes (thd.ptr, true); + locked= true; + goto wait_signal; + } + } + else if (!strcasecmp (out, magic_cont)) + { + if (locked) + { + sst_disallow_writes (thd.ptr, false); + thd.ptr->global_read_lock.unlock_global_read_lock (thd.ptr); + locked= false; + } + err= 0; + goto wait_signal; + } + else if (!strncasecmp (out, magic_done, strlen(magic_done))) + { + err= sst_scan_uuid_seqno (out + strlen(magic_done) + 1, + &ret_uuid, &ret_seqno); + } + else + { + WSREP_WARN("Received unknown signal: '%s'", out); + } + } + else + { + WSREP_ERROR("Failed to read from: %s", proc.cmd()); + proc.wait(); + } + if (!err && proc.error()) err= proc.error(); + } + else + { + WSREP_ERROR("Failed to execute: %s : %d (%s)", + proc.cmd(), err, strerror(err)); + } + + if (locked) // don't forget to unlock server before return + { + sst_disallow_writes (thd.ptr, false); + thd.ptr->global_read_lock.unlock_global_read_lock (thd.ptr); + } + + // signal to donor that SST is over + struct wsrep_gtid const state_id = { + ret_uuid, err ? WSREP_SEQNO_UNDEFINED : ret_seqno + }; + wsrep->sst_sent (wsrep, &state_id, -err); + proc.wait(); + + return NULL; +} + + + +static int sst_donate_other (const char* method, + const char* addr, + const char* uuid, + wsrep_seqno_t seqno, + bool bypass) +{ + ssize_t cmd_len = 4096; + char cmd_str[cmd_len]; + const char* binlog_opt= ""; + char* binlog_opt_val= NULL; + + int ret; + if ((ret= generate_binlog_opt_val(&binlog_opt_val))) + { + WSREP_ERROR("sst_donate_other(): generate_binlog_opt_val() failed: %d",ret); + return ret; + } + if (strlen(binlog_opt_val)) binlog_opt= WSREP_SST_OPT_BINLOG; + + ret= snprintf (cmd_str, cmd_len, + "wsrep_sst_%s " + WSREP_SST_OPT_ROLE" 'donor' " + WSREP_SST_OPT_ADDR" '%s' " + WSREP_SST_OPT_AUTH" '%s' " + WSREP_SST_OPT_SOCKET" '%s' " + WSREP_SST_OPT_DATA" '%s' " + WSREP_SST_OPT_CONF" '%s' " + " %s '%s' " + WSREP_SST_OPT_GTID" '%s:%lld'" + "%s", + method, addr, sst_auth_real, mysqld_unix_port, + mysql_real_data_home, wsrep_defaults_file, + binlog_opt, binlog_opt_val, + uuid, (long long) seqno, + bypass ? " "WSREP_SST_OPT_BYPASS : ""); + my_free(binlog_opt_val); + + if (ret < 0 || ret >= cmd_len) + { + WSREP_ERROR("sst_donate_other(): snprintf() failed: %d", ret); + return (ret < 0 ? ret : -EMSGSIZE); + } + + if (!bypass && wsrep_sst_donor_rejects_queries) sst_reject_queries(FALSE); + + pthread_t tmp; + sst_thread_arg arg(cmd_str); + mysql_mutex_lock (&arg.lock); + ret = pthread_create (&tmp, NULL, sst_donor_thread, &arg); + if (ret) + { + WSREP_ERROR("sst_donate_other(): pthread_create() failed: %d (%s)", + ret, strerror(ret)); + return ret; + } + mysql_cond_wait (&arg.cond, &arg.lock); + + WSREP_INFO("sst_donor_thread signaled with %d", arg.err); + return arg.err; +} + +wsrep_cb_status_t wsrep_sst_donate_cb (void* app_ctx, void* recv_ctx, + const void* msg, size_t msg_len, + const wsrep_gtid_t* current_gtid, + const char* state, size_t state_len, + bool bypass) +{ + /* This will be reset when sync callback is called. + * Should we set wsrep_ready to FALSE here too? */ +// wsrep_notify_status(WSREP_MEMBER_DONOR); + local_status.set(WSREP_MEMBER_DONOR); + + const char* method = (char*)msg; + size_t method_len = strlen (method); + const char* data = method + method_len + 1; + + char uuid_str[37]; + wsrep_uuid_print (¤t_gtid->uuid, uuid_str, sizeof(uuid_str)); + + int ret; + if (!strcmp (WSREP_SST_MYSQLDUMP, method)) + { + ret = sst_donate_mysqldump(data, ¤t_gtid->uuid, uuid_str, + current_gtid->seqno, bypass); + } + else + { + ret = sst_donate_other(method, data, uuid_str, current_gtid->seqno,bypass); + } + + return (ret > 0 ? WSREP_CB_SUCCESS : WSREP_CB_FAILURE); +} + +void wsrep_SE_init_grab() +{ + if (mysql_mutex_lock (&LOCK_wsrep_sst_init)) abort(); +} + +void wsrep_SE_init_wait() +{ + while (SE_initialized == false) + { + mysql_cond_wait (&COND_wsrep_sst_init, &LOCK_wsrep_sst_init); + } + mysql_mutex_unlock (&LOCK_wsrep_sst_init); +} + +void wsrep_SE_init_done() +{ + mysql_cond_signal (&COND_wsrep_sst_init); + mysql_mutex_unlock (&LOCK_wsrep_sst_init); +} + +void wsrep_SE_initialized() +{ + SE_initialized = true; +} diff --git a/sql/wsrep_sst.h b/sql/wsrep_sst.h new file mode 100644 index 00000000000..d85fed97fca --- /dev/null +++ b/sql/wsrep_sst.h @@ -0,0 +1,68 @@ +/* Copyright (C) 2013 Codership Oy <info@codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ + +#if !defined(WSREP_SST_H) && defined(WITH_WSREP) +#define WSREP_SST_H + +#include <mysql.h> // my_bool + +#define WSREP_SST_OPT_ROLE "--role" +#define WSREP_SST_OPT_ADDR "--address" +#define WSREP_SST_OPT_AUTH "--auth" +#define WSREP_SST_OPT_DATA "--datadir" +#define WSREP_SST_OPT_CONF "--defaults-file" +#define WSREP_SST_OPT_PARENT "--parent" +#define WSREP_SST_OPT_BINLOG "--binlog" + +// mysqldump-specific options +#define WSREP_SST_OPT_USER "--user" +#define WSREP_SST_OPT_PSWD "--password" +#define WSREP_SST_OPT_HOST "--host" +#define WSREP_SST_OPT_PORT "--port" +#define WSREP_SST_OPT_LPORT "--local-port" + +// donor-specific +#define WSREP_SST_OPT_SOCKET "--socket" +#define WSREP_SST_OPT_GTID "--gtid" +#define WSREP_SST_OPT_BYPASS "--bypass" + +#define WSREP_SST_MYSQLDUMP "mysqldump" +#define WSREP_SST_RSYNC "rsync" +#define WSREP_SST_SKIP "skip" +#define WSREP_SST_DEFAULT WSREP_SST_RSYNC +#define WSREP_SST_ADDRESS_AUTO "AUTO" +#define WSREP_SST_AUTH_MASK "********" + +/* system variables */ +extern const char* wsrep_sst_method; +extern const char* wsrep_sst_receive_address; +extern const char* wsrep_sst_donor; +extern char* wsrep_sst_auth; +extern my_bool wsrep_sst_donor_rejects_queries; + +/*! Synchronizes applier thread start with init thread */ +extern void wsrep_sst_grab(); +/*! Init thread waits for SST completion */ +extern bool wsrep_sst_wait(); +/*! Signals wsrep that initialization is complete, writesets can be applied */ +extern void wsrep_sst_continue(); +extern void wsrep_sst_auth_free(); + +extern void wsrep_SE_init_grab(); /*! grab init critical section */ +extern void wsrep_SE_init_wait(); /*! wait for SE init to complete */ +extern void wsrep_SE_init_done(); /*! signal that SE init is complte */ +extern void wsrep_SE_initialized(); /*! mark SE initialization complete */ + +#endif /* WSREP_SST_H */ diff --git a/sql/wsrep_thd.cc b/sql/wsrep_thd.cc new file mode 100644 index 00000000000..fabced2d11a --- /dev/null +++ b/sql/wsrep_thd.cc @@ -0,0 +1,602 @@ +/* Copyright (C) 2013 Codership Oy <info@codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ + +#include "wsrep_thd.h" + +#include "transaction.h" +#include "rpl_rli.h" +#include "log_event.h" +#include "sql_parse.h" +//#include "global_threads.h" // LOCK_thread_count, etc. +#include "sql_base.h" // close_thread_tables() +#include "mysqld.h" // start_wsrep_THD(); + +#include "slave.h" // opt_log_slave_updates +#include "rpl_filter.h" +#include "rpl_rli.h" +#include "rpl_mi.h" + +#if (__LP64__) +static volatile int64 wsrep_bf_aborts_counter(0); +#define WSREP_ATOMIC_LOAD_LONG my_atomic_load64 +#define WSREP_ATOMIC_ADD_LONG my_atomic_add64 +#else +static volatile int32 wsrep_bf_aborts_counter(0); +#define WSREP_ATOMIC_LOAD_LONG my_atomic_load32 +#define WSREP_ATOMIC_ADD_LONG my_atomic_add32 +#endif + +int wsrep_show_bf_aborts (THD *thd, SHOW_VAR *var, char *buff) +{ + wsrep_local_bf_aborts = WSREP_ATOMIC_LOAD_LONG(&wsrep_bf_aborts_counter); + var->type = SHOW_LONGLONG; + var->value = (char*)&wsrep_local_bf_aborts; + return 0; +} + +/* must have (&thd->LOCK_wsrep_thd) */ +void wsrep_client_rollback(THD *thd) +{ + WSREP_DEBUG("client rollback due to BF abort for (%ld), query: %s", + thd->thread_id, thd->query()); + + WSREP_ATOMIC_ADD_LONG(&wsrep_bf_aborts_counter, 1); + + thd->wsrep_conflict_state= ABORTING; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + trans_rollback(thd); + + if (thd->locked_tables_mode && thd->lock) + { + WSREP_DEBUG("unlocking tables for BF abort (%ld)", thd->thread_id); + thd->locked_tables_list.unlock_locked_tables(thd); + thd->variables.option_bits&= ~(OPTION_TABLE_LOCK); + } + + if (thd->global_read_lock.is_acquired()) + { + WSREP_DEBUG("unlocking GRL for BF abort (%ld)", thd->thread_id); + thd->global_read_lock.unlock_global_read_lock(thd); + } + + /* Release transactional metadata locks. */ + thd->mdl_context.release_transactional_locks(); + + /* release explicit MDL locks */ + thd->mdl_context.release_explicit_locks(); + + if (thd->get_binlog_table_maps()) + { + WSREP_DEBUG("clearing binlog table map for BF abort (%ld)", thd->thread_id); + thd->clear_binlog_table_maps(); + } + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + thd->wsrep_conflict_state= ABORTED; +} + +#define NUMBER_OF_FIELDS_TO_IDENTIFY_COORDINATOR 1 +#define NUMBER_OF_FIELDS_TO_IDENTIFY_WORKER 2 +//#include "rpl_info_factory.h" + +static Relay_log_info* wsrep_relay_log_init(const char* log_fname) +{ + + /* MySQL 5.6 version has rli factory: */ +#ifdef MYSQL_56 + uint rli_option = INFO_REPOSITORY_DUMMY; + Relay_log_info *rli= NULL; + rli = Rpl_info_factory::create_rli(rli_option, false); + rli->set_rli_description_event( + new Format_description_log_event(BINLOG_VERSION)); +#endif + Relay_log_info* rli= new Relay_log_info(false); + rli->sql_driver_thd= current_thd; + + rli->no_storage= true; + rli->relay_log.description_event_for_exec= + new Format_description_log_event(4); + + return rli; +} + +class Master_info; + +static rpl_group_info* wsrep_relay_group_init(const char* log_fname) +{ + Relay_log_info* rli= new Relay_log_info(false); + + rli->no_storage= true; + if (!rli->relay_log.description_event_for_exec) + { + rli->relay_log.description_event_for_exec= + new Format_description_log_event(4); + } + static LEX_STRING dbname= { C_STRING_WITH_LEN("mysql") }; + + rli->mi = new Master_info( &dbname, false); + //rli->mi = new Master_info( &(C_STRING_WITH_LEN("wsrep")), false); + + rli->mi->rpl_filter = new Rpl_filter; + copy_filter_setting(rli->mi->rpl_filter, get_or_create_rpl_filter("", 0)); + + rli->sql_driver_thd= current_thd; + + struct rpl_group_info *rgi= new rpl_group_info(rli); + rgi->thd= current_thd; + + return rgi; +} + +static void wsrep_prepare_bf_thd(THD *thd, struct wsrep_thd_shadow* shadow) +{ + shadow->options = thd->variables.option_bits; + shadow->server_status = thd->server_status; + shadow->wsrep_exec_mode = thd->wsrep_exec_mode; + shadow->vio = thd->net.vio; + + if (opt_log_slave_updates) + thd->variables.option_bits|= OPTION_BIN_LOG; + else + thd->variables.option_bits&= ~(OPTION_BIN_LOG); + + //if (!thd->wsrep_rli) thd->wsrep_rli= wsrep_relay_log_init("wsrep_relay"); + if (!thd->wsrep_rgi) thd->wsrep_rgi= wsrep_relay_group_init("wsrep_relay"); + // thd->wsrep_rli->info_thd = thd; + + thd->wsrep_exec_mode= REPL_RECV; + thd->net.vio= 0; + thd->clear_error(); + + shadow->tx_isolation = thd->variables.tx_isolation; + thd->variables.tx_isolation = ISO_READ_COMMITTED; + thd->tx_isolation = ISO_READ_COMMITTED; + + shadow->db = thd->db; + shadow->db_length = thd->db_length; + thd->reset_db(NULL, 0); +} + +static void wsrep_return_from_bf_mode(THD *thd, struct wsrep_thd_shadow* shadow) +{ + thd->variables.option_bits = shadow->options; + thd->server_status = shadow->server_status; + thd->wsrep_exec_mode = shadow->wsrep_exec_mode; + thd->net.vio = shadow->vio; + thd->variables.tx_isolation = shadow->tx_isolation; + thd->reset_db(shadow->db, shadow->db_length); + + delete thd->wsrep_rgi->rli->mi->rpl_filter; + delete thd->wsrep_rgi->rli->mi; + delete thd->wsrep_rgi->rli; + delete thd->wsrep_rgi; + thd->wsrep_rgi = NULL; +; +} + +void wsrep_replay_transaction(THD *thd) +{ + /* checking if BF trx must be replayed */ + if (thd->wsrep_conflict_state== MUST_REPLAY) { + DBUG_ASSERT(wsrep_thd_trx_seqno(thd)); + if (thd->wsrep_exec_mode!= REPL_RECV) { + if (thd->get_stmt_da()->is_sent()) + { + WSREP_ERROR("replay issue, thd has reported status already"); + } + thd->get_stmt_da()->reset_diagnostics_area(); + + thd->wsrep_conflict_state= REPLAYING; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + + mysql_reset_thd_for_next_command(thd); + thd->killed= NOT_KILLED; + close_thread_tables(thd); + if (thd->locked_tables_mode && thd->lock) + { + WSREP_DEBUG("releasing table lock for replaying (%ld)", + thd->thread_id); + thd->locked_tables_list.unlock_locked_tables(thd); + thd->variables.option_bits&= ~(OPTION_TABLE_LOCK); + } + thd->mdl_context.release_transactional_locks(); + /* + Replaying will call MYSQL_START_STATEMENT when handling + BEGIN Query_log_event so end statement must be called before + replaying. + */ + MYSQL_END_STATEMENT(thd->m_statement_psi, thd->get_stmt_da()); + thd->m_statement_psi= NULL; + thd_proc_info(thd, "wsrep replaying trx"); + WSREP_DEBUG("replay trx: %s %lld", + thd->query() ? thd->query() : "void", + (long long)wsrep_thd_trx_seqno(thd)); + struct wsrep_thd_shadow shadow; + wsrep_prepare_bf_thd(thd, &shadow); + + /* From trans_begin() */ + thd->variables.option_bits|= OPTION_BEGIN; + thd->server_status|= SERVER_STATUS_IN_TRANS; + + int rcode = wsrep->replay_trx(wsrep, + &thd->wsrep_ws_handle, + (void *)thd); + + wsrep_return_from_bf_mode(thd, &shadow); + if (thd->wsrep_conflict_state!= REPLAYING) + WSREP_WARN("lost replaying mode: %d", thd->wsrep_conflict_state ); + + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + + switch (rcode) + { + case WSREP_OK: + thd->wsrep_conflict_state= NO_CONFLICT; + wsrep->post_commit(wsrep, &thd->wsrep_ws_handle); + WSREP_DEBUG("trx_replay successful for: %ld %llu", + thd->thread_id, (long long)thd->real_id); + if (thd->get_stmt_da()->is_sent()) + { + WSREP_WARN("replay ok, thd has reported status"); + } + else if (thd->get_stmt_da()->is_set()) + { + if (thd->get_stmt_da()->status() != Diagnostics_area::DA_OK) + { + WSREP_WARN("replay ok, thd has error status %d", + thd->get_stmt_da()->status()); + } + } + else + { + my_ok(thd); + } + break; + case WSREP_TRX_FAIL: + if (thd->get_stmt_da()->is_sent()) + { + WSREP_ERROR("replay failed, thd has reported status"); + } + else + { + WSREP_DEBUG("replay failed, rolling back"); + //my_error(ER_LOCK_DEADLOCK, MYF(0), "wsrep aborted transaction"); + } + thd->wsrep_conflict_state= ABORTED; + wsrep->post_rollback(wsrep, &thd->wsrep_ws_handle); + break; + default: + WSREP_ERROR("trx_replay failed for: %d, query: %s", + rcode, thd->query() ? thd->query() : "void"); + /* we're now in inconsistent state, must abort */ + unireg_abort(1); + break; + } + + wsrep_cleanup_transaction(thd); + + mysql_mutex_lock(&LOCK_wsrep_replaying); + wsrep_replaying--; + WSREP_DEBUG("replaying decreased: %d, thd: %lu", + wsrep_replaying, thd->thread_id); + mysql_cond_broadcast(&COND_wsrep_replaying); + mysql_mutex_unlock(&LOCK_wsrep_replaying); + } + } +} + +static void wsrep_replication_process(THD *thd) +{ + int rcode; + DBUG_ENTER("wsrep_replication_process"); + + struct wsrep_thd_shadow shadow; + wsrep_prepare_bf_thd(thd, &shadow); + + /* From trans_begin() */ + thd->variables.option_bits|= OPTION_BEGIN; + thd->server_status|= SERVER_STATUS_IN_TRANS; + + rcode = wsrep->recv(wsrep, (void *)thd); + DBUG_PRINT("wsrep",("wsrep_repl returned: %d", rcode)); + + WSREP_INFO("applier thread exiting (code:%d)", rcode); + + switch (rcode) { + case WSREP_OK: + case WSREP_NOT_IMPLEMENTED: + case WSREP_CONN_FAIL: + /* provider does not support slave operations / disconnected from group, + * just close applier thread */ + break; + case WSREP_NODE_FAIL: + /* data inconsistency => SST is needed */ + /* Note: we cannot just blindly restart replication here, + * SST might require server restart if storage engines must be + * initialized after SST */ + WSREP_ERROR("node consistency compromised, aborting"); + wsrep_kill_mysql(thd); + break; + case WSREP_WARNING: + case WSREP_TRX_FAIL: + case WSREP_TRX_MISSING: + /* these suggests a bug in provider code */ + WSREP_WARN("bad return from recv() call: %d", rcode); + /* fall through to node shutdown */ + case WSREP_FATAL: + /* Cluster connectivity is lost. + * + * If applier was killed on purpose (KILL_CONNECTION), we + * avoid mysql shutdown. This is because the killer will then handle + * shutdown processing (or replication restarting) + */ + if (thd->killed != KILL_CONNECTION) + { + wsrep_kill_mysql(thd); + } + break; + } + + mysql_mutex_lock(&LOCK_thread_count); + wsrep_close_applier(thd); + mysql_cond_broadcast(&COND_thread_count); + mysql_mutex_unlock(&LOCK_thread_count); + + TABLE *tmp; + while ((tmp = thd->temporary_tables)) + { + WSREP_WARN("Applier %lu, has temporary tables at exit: %s.%s", + thd->thread_id, + (tmp->s) ? tmp->s->db.str : "void", + (tmp->s) ? tmp->s->table_name.str : "void"); + } + wsrep_return_from_bf_mode(thd, &shadow); + DBUG_VOID_RETURN; +} + +void wsrep_create_appliers(long threads) +{ + if (!wsrep_connected) + { + /* see wsrep_replication_start() for the logic */ + if (wsrep_cluster_address && strlen(wsrep_cluster_address) && + wsrep_provider && strcasecmp(wsrep_provider, "none")) + { + WSREP_ERROR("Trying to launch slave threads before creating " + "connection at '%s'", wsrep_cluster_address); + assert(0); + } + return; + } + + long wsrep_threads=0; + pthread_t hThread; + while (wsrep_threads++ < threads) { + if (pthread_create( + &hThread, &connection_attrib, + start_wsrep_THD, (void*)wsrep_replication_process)) + WSREP_WARN("Can't create thread to manage wsrep replication"); + } +} + +static void wsrep_rollback_process(THD *thd) +{ + DBUG_ENTER("wsrep_rollback_process"); + + mysql_mutex_lock(&LOCK_wsrep_rollback); + wsrep_aborting_thd= NULL; + + while (thd->killed == NOT_KILLED) { + thd_proc_info(thd, "wsrep aborter idle"); + thd->mysys_var->current_mutex= &LOCK_wsrep_rollback; + thd->mysys_var->current_cond= &COND_wsrep_rollback; + + mysql_cond_wait(&COND_wsrep_rollback,&LOCK_wsrep_rollback); + + WSREP_DEBUG("WSREP rollback thread wakes for signal"); + + mysql_mutex_lock(&thd->mysys_var->mutex); + thd_proc_info(thd, "wsrep aborter active"); + thd->mysys_var->current_mutex= 0; + thd->mysys_var->current_cond= 0; + mysql_mutex_unlock(&thd->mysys_var->mutex); + + /* check for false alarms */ + if (!wsrep_aborting_thd) + { + WSREP_DEBUG("WSREP rollback thread has empty abort queue"); + } + /* process all entries in the queue */ + while (wsrep_aborting_thd) { + THD *aborting; + wsrep_aborting_thd_t next = wsrep_aborting_thd->next; + aborting = wsrep_aborting_thd->aborting_thd; + my_free(wsrep_aborting_thd); + wsrep_aborting_thd= next; + /* + * must release mutex, appliers my want to add more + * aborting thds in our work queue, while we rollback + */ + mysql_mutex_unlock(&LOCK_wsrep_rollback); + + mysql_mutex_lock(&aborting->LOCK_wsrep_thd); + if (aborting->wsrep_conflict_state== ABORTED) + { + WSREP_DEBUG("WSREP, thd already aborted: %llu state: %d", + (long long)aborting->real_id, + aborting->wsrep_conflict_state); + + mysql_mutex_unlock(&aborting->LOCK_wsrep_thd); + mysql_mutex_lock(&LOCK_wsrep_rollback); + continue; + } + aborting->wsrep_conflict_state= ABORTING; + + mysql_mutex_unlock(&aborting->LOCK_wsrep_thd); + + set_current_thd(aborting); + aborting->store_globals(); + + mysql_mutex_lock(&aborting->LOCK_wsrep_thd); + wsrep_client_rollback(aborting); + WSREP_DEBUG("WSREP rollbacker aborted thd: (%lu %llu)", + aborting->thread_id, (long long)aborting->real_id); + mysql_mutex_unlock(&aborting->LOCK_wsrep_thd); + + set_current_thd(thd); + thd->store_globals(); + + mysql_mutex_lock(&LOCK_wsrep_rollback); + } + } + + mysql_mutex_unlock(&LOCK_wsrep_rollback); + sql_print_information("WSREP: rollbacker thread exiting"); + + DBUG_PRINT("wsrep",("wsrep rollbacker thread exiting")); + DBUG_VOID_RETURN; +} + +void wsrep_create_rollbacker() +{ + if (wsrep_provider && strcasecmp(wsrep_provider, "none")) + { + pthread_t hThread; + /* create rollbacker */ + if (pthread_create( &hThread, &connection_attrib, + start_wsrep_THD, (void*)wsrep_rollback_process)) + WSREP_WARN("Can't create thread to manage wsrep rollback"); + } +} + +void wsrep_thd_set_PA_safe(void *thd_ptr, my_bool safe) +{ + if (thd_ptr) + { + THD* thd = (THD*)thd_ptr; + thd->wsrep_PA_safe = safe; + } +} + +int wsrep_thd_conflict_state(void *thd_ptr, my_bool sync) +{ + int state = -1; + if (thd_ptr) + { + THD* thd = (THD*)thd_ptr; + if (sync) mysql_mutex_lock(&thd->LOCK_wsrep_thd); + + state = thd->wsrep_conflict_state; + if (sync) mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } + return state; +} + +my_bool wsrep_thd_is_wsrep(void *thd_ptr) +{ + my_bool status = FALSE; + if (thd_ptr) + { + THD* thd = (THD*)thd_ptr; + + status = (WSREP(thd) && WSREP_PROVIDER_EXISTS); + } + return status; +} + +my_bool wsrep_thd_is_BF(void *thd_ptr, my_bool sync) +{ + my_bool status = FALSE; + if (thd_ptr) + { + THD* thd = (THD*)thd_ptr; + // THD can be BF only if provider exists + if (wsrep_thd_is_wsrep(thd_ptr)) + { + if (sync) + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + + status = ((thd->wsrep_exec_mode == REPL_RECV) || + (thd->wsrep_exec_mode == TOTAL_ORDER)); + if (sync) + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } + } + return status; +} + +extern "C" +my_bool wsrep_thd_is_BF_or_commit(void *thd_ptr, my_bool sync) +{ + bool status = FALSE; + if (thd_ptr) + { + THD* thd = (THD*)thd_ptr; + if (sync) mysql_mutex_lock(&thd->LOCK_wsrep_thd); + + status = ((thd->wsrep_exec_mode == REPL_RECV) || + (thd->wsrep_exec_mode == TOTAL_ORDER) || + (thd->wsrep_exec_mode == LOCAL_COMMIT)); + if (sync) mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } + return status; +} + +extern "C" +my_bool wsrep_thd_is_local(void *thd_ptr, my_bool sync) +{ + bool status = FALSE; + if (thd_ptr) + { + THD* thd = (THD*)thd_ptr; + if (sync) mysql_mutex_lock(&thd->LOCK_wsrep_thd); + + status = (thd->wsrep_exec_mode == LOCAL_STATE); + if (sync) mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } + return status; +} + +int wsrep_abort_thd(void *bf_thd_ptr, void *victim_thd_ptr, my_bool signal) +{ + THD *victim_thd = (THD *) victim_thd_ptr; + THD *bf_thd = (THD *) bf_thd_ptr; + DBUG_ENTER("wsrep_abort_thd"); + + if ( (WSREP(bf_thd) || + ( (WSREP_ON || wsrep_OSU_method_options == WSREP_OSU_RSU) && + bf_thd->wsrep_exec_mode == TOTAL_ORDER) ) && + victim_thd) + { + WSREP_DEBUG("wsrep_abort_thd, by: %llu, victim: %llu", (bf_thd) ? + (long long)bf_thd->real_id : 0, (long long)victim_thd->real_id); + ha_abort_transaction(bf_thd, victim_thd, signal); + } + else + { + WSREP_DEBUG("wsrep_abort_thd not effective: %p %p", bf_thd, victim_thd); + } + + DBUG_RETURN(1); +} + +extern "C" +int wsrep_thd_in_locking_session(void *thd_ptr) +{ + if (thd_ptr && ((THD *)thd_ptr)->in_lock_tables) { + return 1; + } + return 0; +} + diff --git a/sql/wsrep_thd.h b/sql/wsrep_thd.h new file mode 100644 index 00000000000..c5a497bb428 --- /dev/null +++ b/sql/wsrep_thd.h @@ -0,0 +1,40 @@ +/* Copyright (C) 2013 Codership Oy <info@codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ + +#if !defined(WSREP_THD_H) && defined(WITH_WSREP) +#define WSREP_THD_H + +#include "sql_class.h" + +int wsrep_show_bf_aborts (THD *thd, SHOW_VAR *var, char *buff); +void wsrep_client_rollback(THD *thd); +void wsrep_replay_transaction(THD *thd); +void wsrep_create_appliers(long threads); +void wsrep_create_rollbacker(); + +int wsrep_abort_thd(void *bf_thd_ptr, void *victim_thd_ptr, + my_bool signal); + +extern void wsrep_thd_set_PA_safe(void *thd_ptr, my_bool safe); +extern my_bool wsrep_thd_is_BF(void *thd_ptr, my_bool sync); +extern my_bool wsrep_thd_is_wsrep(void *thd_ptr); + +extern int wsrep_thd_conflict_state(void *thd_ptr, my_bool sync); +//extern "C" my_bool wsrep_thd_is_BF(void *thd_ptr, my_bool sync); +extern "C" my_bool wsrep_thd_is_BF_or_commit(void *thd_ptr, my_bool sync); +extern "C" my_bool wsrep_thd_is_local(void *thd_ptr, my_bool sync); +extern "C" int wsrep_thd_in_locking_session(void *thd_ptr); + +#endif /* WSREP_THD_H */ diff --git a/sql/wsrep_utils.cc b/sql/wsrep_utils.cc new file mode 100644 index 00000000000..cdee1c2cece --- /dev/null +++ b/sql/wsrep_utils.cc @@ -0,0 +1,524 @@ +/* Copyright 2010 Codership Oy <http://www.codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +//! @file some utility functions and classes not directly related to replication + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE // POSIX_SPAWN_USEVFORK flag +#endif + +#include "wsrep_utils.h" +#include "wsrep_mysqld.h" + +#include <sql_class.h> + +#include <spawn.h> // posix_spawn() +#include <unistd.h> // pipe() +#include <errno.h> // errno +#include <string.h> // strerror() +#include <sys/wait.h> // waitpid() +#include <sys/types.h> +#include <sys/socket.h> +#include <netdb.h> // getaddrinfo() + +extern char** environ; // environment variables + +static wsp::string wsrep_PATH; + +void +wsrep_prepend_PATH (const char* path) +{ + int count = 0; + + while (environ[count]) + { + if (strncmp (environ[count], "PATH=", 5)) + { + count++; + continue; + } + + char* const old_path (environ[count]); + + if (strstr (old_path, path)) return; // path already there + + size_t const new_path_len(strlen(old_path) + strlen(":") + + strlen(path) + 1); + + char* const new_path (reinterpret_cast<char*>(malloc(new_path_len))); + + if (new_path) + { + snprintf (new_path, new_path_len, "PATH=%s:%s", path, + old_path + strlen("PATH=")); + + wsrep_PATH.set (new_path); + environ[count] = new_path; + } + else + { + WSREP_ERROR ("Failed to allocate 'PATH' environment variable " + "buffer of size %zu.", new_path_len); + } + + return; + } + + WSREP_ERROR ("Failed to find 'PATH' environment variable. " + "State snapshot transfer may not be working."); +} + +namespace wsp +{ + +#define PIPE_READ 0 +#define PIPE_WRITE 1 +#define STDIN_FD 0 +#define STDOUT_FD 1 + +#ifndef POSIX_SPAWN_USEVFORK +# define POSIX_SPAWN_USEVFORK 0 +#endif + +process::process (const char* cmd, const char* type) + : str_(cmd ? strdup(cmd) : strdup("")), io_(NULL), err_(EINVAL), pid_(0) +{ + if (0 == str_) + { + WSREP_ERROR ("Can't allocate command line of size: %zu", strlen(cmd)); + err_ = ENOMEM; + return; + } + + if (0 == strlen(str_)) + { + WSREP_ERROR ("Can't start a process: null or empty command line."); + return; + } + + if (NULL == type || (strcmp (type, "w") && strcmp(type, "r"))) + { + WSREP_ERROR ("type argument should be either \"r\" or \"w\"."); + return; + } + + int pipe_fds[2] = { -1, }; + if (::pipe(pipe_fds)) + { + err_ = errno; + WSREP_ERROR ("pipe() failed: %d (%s)", err_, strerror(err_)); + return; + } + + // which end of pipe will be returned to parent + int const parent_end (strcmp(type,"w") ? PIPE_READ : PIPE_WRITE); + int const child_end (parent_end == PIPE_READ ? PIPE_WRITE : PIPE_READ); + int const close_fd (parent_end == PIPE_READ ? STDOUT_FD : STDIN_FD); + + char* const pargv[4] = { strdup("sh"), strdup("-c"), strdup(str_), NULL }; + if (!(pargv[0] && pargv[1] && pargv[2])) + { + err_ = ENOMEM; + WSREP_ERROR ("Failed to allocate pargv[] array."); + goto cleanup_pipe; + } + + posix_spawnattr_t attr; + err_ = posix_spawnattr_init (&attr); + if (err_) + { + WSREP_ERROR ("posix_spawnattr_init() failed: %d (%s)", + err_, strerror(err_)); + goto cleanup_pipe; + } + + err_ = posix_spawnattr_setflags (&attr, POSIX_SPAWN_SETSIGDEF | + POSIX_SPAWN_USEVFORK); + if (err_) + { + WSREP_ERROR ("posix_spawnattr_setflags() failed: %d (%s)", + err_, strerror(err_)); + goto cleanup_attr; + } + + posix_spawn_file_actions_t fact; + err_ = posix_spawn_file_actions_init (&fact); + if (err_) + { + WSREP_ERROR ("posix_spawn_file_actions_init() failed: %d (%s)", + err_, strerror(err_)); + goto cleanup_attr; + } + + // close child's stdout|stdin depending on what we returning + err_ = posix_spawn_file_actions_addclose (&fact, close_fd); + if (err_) + { + WSREP_ERROR ("posix_spawn_file_actions_addclose() failed: %d (%s)", + err_, strerror(err_)); + goto cleanup_fact; + } + + // substitute our pipe descriptor in place of the closed one + err_ = posix_spawn_file_actions_adddup2 (&fact, + pipe_fds[child_end], close_fd); + if (err_) + { + WSREP_ERROR ("posix_spawn_file_actions_addup2() failed: %d (%s)", + err_, strerror(err_)); + goto cleanup_fact; + } + + err_ = posix_spawnp (&pid_, pargv[0], &fact, &attr, pargv, environ); + if (err_) + { + WSREP_ERROR ("posix_spawnp(%s) failed: %d (%s)", + pargv[2], err_, strerror(err_)); + pid_ = 0; // just to make sure it was not messed up in the call + goto cleanup_fact; + } + + io_ = fdopen (pipe_fds[parent_end], type); + + if (io_) + { + pipe_fds[parent_end] = -1; // skip close on cleanup + } + else + { + err_ = errno; + WSREP_ERROR ("fdopen() failed: %d (%s)", err_, strerror(err_)); + } + +cleanup_fact: + int err; // to preserve err_ code + err = posix_spawn_file_actions_destroy (&fact); + if (err) + { + WSREP_ERROR ("posix_spawn_file_actions_destroy() failed: %d (%s)\n", + err, strerror(err)); + } + +cleanup_attr: + err = posix_spawnattr_destroy (&attr); + if (err) + { + WSREP_ERROR ("posix_spawnattr_destroy() failed: %d (%s)", + err, strerror(err)); + } + +cleanup_pipe: + if (pipe_fds[0] >= 0) close (pipe_fds[0]); + if (pipe_fds[1] >= 0) close (pipe_fds[1]); + + free (pargv[0]); + free (pargv[1]); + free (pargv[2]); +} + +process::~process () +{ + if (io_) + { + assert (pid_); + assert (str_); + + WSREP_WARN("Closing pipe to child process: %s, PID(%ld) " + "which might still be running.", str_, (long)pid_); + + if (fclose (io_) == -1) + { + err_ = errno; + WSREP_ERROR("fclose() failed: %d (%s)", err_, strerror(err_)); + } + } + + if (str_) free (const_cast<char*>(str_)); +} + +int +process::wait () +{ + if (pid_) + { + int status; + if (-1 == waitpid(pid_, &status, 0)) + { + err_ = errno; assert (err_); + WSREP_ERROR("Waiting for process failed: %s, PID(%ld): %d (%s)", + str_, (long)pid_, err_, strerror (err_)); + } + else + { // command completed, check exit status + if (WIFEXITED (status)) { + err_ = WEXITSTATUS (status); + } + else { // command didn't complete with exit() + WSREP_ERROR("Process was aborted."); + err_ = errno ? errno : ECHILD; + } + + if (err_) { + switch (err_) /* Translate error codes to more meaningful */ + { + case 126: err_ = EACCES; break; /* Permission denied */ + case 127: err_ = ENOENT; break; /* No such file or directory */ + } + WSREP_ERROR("Process completed with error: %s: %d (%s)", + str_, err_, strerror(err_)); + } + + pid_ = 0; + if (io_) fclose (io_); + io_ = NULL; + } + } + else { + assert (NULL == io_); + WSREP_ERROR("Command did not run: %s", str_); + } + + return err_; +} + +thd::thd (my_bool won) : init(), ptr(new THD) +{ + if (ptr) + { + ptr->thread_stack= (char*) &ptr; + ptr->store_globals(); + ptr->variables.option_bits&= ~OPTION_BIN_LOG; // disable binlog + ptr->variables.wsrep_on = won; + ptr->security_ctx->master_access= ~(ulong)0; + lex_start(ptr); + } +} + +thd::~thd () +{ + if (ptr) + { + delete ptr; + my_pthread_setspecific_ptr (THR_THD, 0); + } +} + +} // namespace wsp + +/* Returns INADDR_NONE, INADDR_ANY, INADDR_LOOPBACK or something else */ +unsigned int wsrep_check_ip (const char* const addr) +{ +#if 0 + if (addr && 0 == strcasecmp(addr, MY_BIND_ALL_ADDRESSES)) return INADDR_ANY; +#endif + + unsigned int ret = INADDR_NONE; + struct addrinfo *res, hints; + + memset (&hints, 0, sizeof(hints)); + hints.ai_flags= AI_PASSIVE/*|AI_ADDRCONFIG*/; + hints.ai_socktype= SOCK_STREAM; + hints.ai_family= AF_UNSPEC; + + int gai_ret = getaddrinfo(addr, NULL, &hints, &res); + if (0 == gai_ret) + { + if (AF_INET == res->ai_family) /* IPv4 */ + { + struct sockaddr_in* a= (struct sockaddr_in*)res->ai_addr; + ret= htonl(a->sin_addr.s_addr); + } + else /* IPv6 */ + { + struct sockaddr_in6* a= (struct sockaddr_in6*)res->ai_addr; + if (IN6_IS_ADDR_UNSPECIFIED(&a->sin6_addr)) + ret= INADDR_ANY; + else if (IN6_IS_ADDR_LOOPBACK(&a->sin6_addr)) + ret= INADDR_LOOPBACK; + else + ret= 0xdeadbeef; + } + freeaddrinfo (res); + } + else { + WSREP_ERROR ("getaddrinfo() failed on '%s': %d (%s)", + addr, gai_ret, gai_strerror(gai_ret)); + } + + // uint8_t* b= (uint8_t*)&ret; + // fprintf (stderr, "########## wsrep_check_ip returning: %hhu.%hhu.%hhu.%hhu\n", + // b[0], b[1], b[2], b[3]); + + return ret; +} + +extern char* my_bind_addr_str; + +size_t wsrep_guess_ip (char* buf, size_t buf_len) +{ + size_t ip_len = 0; + + if (my_bind_addr_str && strlen(my_bind_addr_str)) + { + unsigned int const ip_type= wsrep_check_ip(my_bind_addr_str); + + if (INADDR_NONE == ip_type) { + WSREP_ERROR("Networking not configured, cannot receive state transfer."); + return 0; + } + + if (INADDR_ANY != ip_type) {; + strncpy (buf, my_bind_addr_str, buf_len); + return strlen(buf); + } + } + + // mysqld binds to all interfaces - try IP from wsrep_node_address + if (wsrep_node_address && wsrep_node_address[0] != '\0') { + const char* const colon_ptr = strchr(wsrep_node_address, ':'); + + if (colon_ptr) + ip_len = colon_ptr - wsrep_node_address; + else + ip_len = strlen(wsrep_node_address); + + if (ip_len >= buf_len) { + WSREP_WARN("default_ip(): buffer too short: %zu <= %zd", buf_len, ip_len); + return 0; + } + + memcpy (buf, wsrep_node_address, ip_len); + buf[ip_len] = '\0'; + return ip_len; + } + + // try to find the address of the first one +#if (TARGET_OS_LINUX == 1) + const char cmd[] = "/sbin/ifconfig | " +// "grep -m1 -1 -E '^[a-z]?eth[0-9]' | tail -n 1 | " + "grep -E '^[[:space:]]+inet addr:' | grep -m1 -v 'inet addr:127' | " + "sed 's/:/ /' | awk '{ print $3 }'"; +#elif defined(__sun__) + const char cmd[] = "/sbin/ifconfig -a | " + "/usr/gnu/bin/grep -m1 -1 -E 'net[0-9]:' | tail -n 1 | awk '{ print $2 }'"; +#elif defined(__APPLE__) || defined(__FreeBSD__) + const char cmd[] = "/sbin/route -nv get 8.8.8.8 | tail -n1 | awk '{print $(NF)}'"; +#else + char *cmd; +#error "OS not supported" +#endif + wsp::process proc (cmd, "r"); + + if (NULL != proc.pipe()) { + char* ret; + + ret = fgets (buf, buf_len, proc.pipe()); + + if (proc.wait()) return 0; + + if (NULL == ret) { + WSREP_ERROR("Failed to read output of: '%s'", cmd); + return 0; + } + } + else { + WSREP_ERROR("Failed to execute: '%s'", cmd); + return 0; + } + + // clear possible \n at the end of ip string left by fgets() + ip_len = strlen (buf); + if (ip_len > 0 && '\n' == buf[ip_len - 1]) { + ip_len--; + buf[ip_len] = '\0'; + } + + if (INADDR_NONE == inet_addr(buf)) { + if (strlen(buf) != 0) { + WSREP_WARN("Shell command returned invalid address: '%s'", buf); + } + return 0; + } + + return ip_len; +} + +size_t wsrep_guess_address(char* buf, size_t buf_len) +{ + size_t addr_len = wsrep_guess_ip (buf, buf_len); + + if (addr_len && addr_len < buf_len) { + addr_len += snprintf (buf + addr_len, buf_len - addr_len, + ":%u", mysqld_port); + } + + return addr_len; +} + +/* + * WSREPXid + */ + +#define WSREP_XID_PREFIX "WSREPXid" +#define WSREP_XID_PREFIX_LEN MYSQL_XID_PREFIX_LEN +#define WSREP_XID_UUID_OFFSET 8 +#define WSREP_XID_SEQNO_OFFSET (WSREP_XID_UUID_OFFSET + sizeof(wsrep_uuid_t)) +#define WSREP_XID_GTRID_LEN (WSREP_XID_SEQNO_OFFSET + sizeof(wsrep_seqno_t)) + +void wsrep_xid_init(XID* xid, const wsrep_uuid_t* uuid, wsrep_seqno_t seqno) +{ + xid->formatID= 1; + xid->gtrid_length= WSREP_XID_GTRID_LEN; + xid->bqual_length= 0; + memset(xid->data, 0, sizeof(xid->data)); + memcpy(xid->data, WSREP_XID_PREFIX, WSREP_XID_PREFIX_LEN); + memcpy(xid->data + WSREP_XID_UUID_OFFSET, uuid, sizeof(wsrep_uuid_t)); + memcpy(xid->data + WSREP_XID_SEQNO_OFFSET, &seqno, sizeof(wsrep_seqno_t)); +} + +const wsrep_uuid_t* wsrep_xid_uuid(const XID* xid) +{ + if (wsrep_is_wsrep_xid(xid)) + return reinterpret_cast<const wsrep_uuid_t*>(xid->data + + WSREP_XID_UUID_OFFSET); + else + return &WSREP_UUID_UNDEFINED; +} + +wsrep_seqno_t wsrep_xid_seqno(const XID* xid) +{ + + if (wsrep_is_wsrep_xid(xid)) + { + wsrep_seqno_t seqno; + memcpy(&seqno, xid->data + WSREP_XID_SEQNO_OFFSET, sizeof(wsrep_seqno_t)); + return seqno; + } + else + { + return WSREP_SEQNO_UNDEFINED; + } +} + +extern +int wsrep_is_wsrep_xid(const void* xid_ptr) +{ + const XID* xid= reinterpret_cast<const XID*>(xid_ptr); + return (xid->formatID == 1 && + xid->gtrid_length == WSREP_XID_GTRID_LEN && + xid->bqual_length == 0 && + !memcmp(xid->data, WSREP_XID_PREFIX, WSREP_XID_PREFIX_LEN)); +} diff --git a/sql/wsrep_utils.h b/sql/wsrep_utils.h new file mode 100644 index 00000000000..337678238f8 --- /dev/null +++ b/sql/wsrep_utils.h @@ -0,0 +1,208 @@ +/* Copyright (C) 2013 Codership Oy <info@codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ + +#ifndef WSREP_UTILS_H +#define WSREP_UTILS_H + +#include "wsrep_priv.h" + +unsigned int wsrep_check_ip (const char* addr); +size_t wsrep_guess_ip (char* buf, size_t buf_len); +size_t wsrep_guess_address(char* buf, size_t buf_len); + +namespace wsp { +class node_status +{ +public: + node_status() : status(WSREP_MEMBER_UNDEFINED) {} + void set(wsrep_member_status_t new_status, + const wsrep_view_info_t* view = 0) + { + if (status != new_status || 0 != view) + { + wsrep_notify_status(new_status, view); + status = new_status; + } + } + wsrep_member_status_t get() const { return status; } +private: + wsrep_member_status_t status; +}; +} /* namespace wsp */ + +extern wsp::node_status local_status; + +namespace wsp { +/* A small class to run external programs. */ +class process +{ +private: + const char* const str_; + FILE* io_; + int err_; + pid_t pid_; + +public: +/*! @arg type is a pointer to a null-terminated string which must contain + either the letter 'r' for reading or the letter 'w' for writing. + */ + process (const char* cmd, const char* type); + ~process (); + + FILE* pipe () { return io_; } + int error() { return err_; } + int wait (); + const char* cmd() { return str_; } +}; + +class thd +{ + class thd_init + { + public: + thd_init() { my_thread_init(); } + ~thd_init() { my_thread_end(); } + } + init; + + thd (const thd&); + thd& operator= (const thd&); + +public: + + thd(my_bool wsrep_on); + ~thd(); + THD* const ptr; +}; + +class string +{ +public: + string() : string_(0) {} + void set(char* str) { if (string_) free (string_); string_ = str; } + ~string() { set (0); } +private: + char* string_; +}; + +#ifdef REMOVED +class lock +{ + pthread_mutex_t* const mtx_; + +public: + + lock (pthread_mutex_t* mtx) : mtx_(mtx) + { + int err = pthread_mutex_lock (mtx_); + + if (err) + { + WSREP_ERROR("Mutex lock failed: %s", strerror(err)); + abort(); + } + } + + virtual ~lock () + { + int err = pthread_mutex_unlock (mtx_); + + if (err) + { + WSREP_ERROR("Mutex unlock failed: %s", strerror(err)); + abort(); + } + } + + inline void wait (pthread_cond_t* cond) + { + pthread_cond_wait (cond, mtx_); + } + +private: + + lock (const lock&); + lock& operator=(const lock&); + +}; + +class monitor +{ + int mutable refcnt; + pthread_mutex_t mutable mtx; + pthread_cond_t mutable cond; + +public: + + monitor() : refcnt(0) + { + pthread_mutex_init (&mtx, NULL); + pthread_cond_init (&cond, NULL); + } + + ~monitor() + { + pthread_mutex_destroy (&mtx); + pthread_cond_destroy (&cond); + } + + void enter() const + { + lock l(&mtx); + + while (refcnt) + { + l.wait(&cond); + } + refcnt++; + } + + void leave() const + { + lock l(&mtx); + + refcnt--; + if (refcnt == 0) + { + pthread_cond_signal (&cond); + } + } + +private: + + monitor (const monitor&); + monitor& operator= (const monitor&); +}; + +class critical +{ + const monitor& mon; + +public: + + critical(const monitor& m) : mon(m) { mon.enter(); } + + ~critical() { mon.leave(); } + +private: + + critical (const critical&); + critical& operator= (const critical&); +}; +#endif + +} // namespace wsrep + +#endif /* WSREP_UTILS_H */ diff --git a/sql/wsrep_var.cc b/sql/wsrep_var.cc new file mode 100644 index 00000000000..2272945535d --- /dev/null +++ b/sql/wsrep_var.cc @@ -0,0 +1,602 @@ +/* Copyright 2008 Codership Oy <http://www.codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "wsrep_var.h" + +#include <mysqld.h> +#include <sql_class.h> +#include <sql_plugin.h> +#include <set_var.h> +#include <sql_acl.h> +#include "wsrep_priv.h" +#include "wsrep_thd.h" +#include <my_dir.h> +#include <cstdio> +#include <cstdlib> + +const char* wsrep_provider = 0; +const char* wsrep_provider_options = 0; +const char* wsrep_cluster_address = 0; +const char* wsrep_cluster_name = 0; +const char* wsrep_node_name = 0; +const char* wsrep_node_address = 0; +const char* wsrep_node_incoming_address = 0; +const char* wsrep_start_position = 0; +ulong wsrep_OSU_method_options; + +int wsrep_init_vars() +{ + wsrep_provider = my_strdup(WSREP_NONE, MYF(MY_WME)); + wsrep_provider_options= my_strdup("", MYF(MY_WME)); + wsrep_cluster_address = my_strdup("", MYF(MY_WME)); + wsrep_cluster_name = my_strdup(WSREP_CLUSTER_NAME, MYF(MY_WME)); + wsrep_node_name = my_strdup("", MYF(MY_WME)); + wsrep_node_address = my_strdup("", MYF(MY_WME)); + wsrep_node_incoming_address= my_strdup(WSREP_NODE_INCOMING_AUTO, MYF(MY_WME)); + wsrep_start_position = my_strdup(WSREP_START_POSITION_ZERO, MYF(MY_WME)); + + global_system_variables.binlog_format=BINLOG_FORMAT_ROW; + return 0; +} + +bool wsrep_on_update (sys_var *self, THD* thd, enum_var_type var_type) +{ + if (var_type == OPT_GLOBAL) { + // FIXME: this variable probably should be changed only per session + thd->variables.wsrep_on = global_system_variables.wsrep_on; + } + return false; +} + +bool wsrep_causal_reads_update (sys_var *self, THD* thd, enum_var_type var_type) +{ + // global setting should not affect session setting. + // if (var_type == OPT_GLOBAL) { + // thd->variables.wsrep_causal_reads = global_system_variables.wsrep_causal_reads; + // } + if (thd->variables.wsrep_causal_reads) { + thd->variables.wsrep_sync_wait |= WSREP_SYNC_WAIT_BEFORE_READ; + } else { + thd->variables.wsrep_sync_wait &= ~WSREP_SYNC_WAIT_BEFORE_READ; + } + return false; +} + +bool wsrep_sync_wait_update (sys_var* self, THD* thd, enum_var_type var_type) +{ + // global setting should not affect session setting. + // if (var_type == OPT_GLOBAL) { + // thd->variables.wsrep_sync_wait = global_system_variables.wsrep_sync_wait; + // } + thd->variables.wsrep_causal_reads = thd->variables.wsrep_sync_wait & + WSREP_SYNC_WAIT_BEFORE_READ; + return false; +} + +static int wsrep_start_position_verify (const char* start_str) +{ + size_t start_len; + wsrep_uuid_t uuid; + ssize_t uuid_len; + + start_len = strlen (start_str); + if (start_len < 34) + return 1; + + uuid_len = wsrep_uuid_scan (start_str, start_len, &uuid); + if (uuid_len < 0 || (start_len - uuid_len) < 2) + return 1; + + if (start_str[uuid_len] != ':') // separator should follow UUID + return 1; + + char* endptr; + wsrep_seqno_t const seqno __attribute__((unused)) // to avoid GCC warnings + (strtoll(&start_str[uuid_len + 1], &endptr, 10)); + + if (*endptr == '\0') return 0; // remaining string was seqno + + return 1; +} + +bool wsrep_start_position_check (sys_var *self, THD* thd, set_var* var) +{ + char start_pos_buf[FN_REFLEN]; + + if ((! var->save_result.string_value.str) || + (var->save_result.string_value.length > (FN_REFLEN - 1))) // safety + goto err; + + memcpy(start_pos_buf, var->save_result.string_value.str, + var->save_result.string_value.length); + start_pos_buf[var->save_result.string_value.length]= 0; + + if (!wsrep_start_position_verify(start_pos_buf)) return 0; + +err: + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str, + var->save_result.string_value.str ? + var->save_result.string_value.str : "NULL"); + return 1; +} + +void wsrep_set_local_position (const char* value) +{ + size_t value_len = strlen (value); + size_t uuid_len = wsrep_uuid_scan (value, value_len, &local_uuid); + + local_seqno = strtoll (value + uuid_len + 1, NULL, 10); + + XID xid; + wsrep_xid_init(&xid, &local_uuid, local_seqno); + wsrep_set_SE_checkpoint(&xid); + WSREP_INFO ("wsrep_start_position var submitted: '%s'", wsrep_start_position); +} + +bool wsrep_start_position_update (sys_var *self, THD* thd, enum_var_type type) +{ + // since this value passed wsrep_start_position_check, don't check anything + // here + wsrep_set_local_position (wsrep_start_position); + + if (wsrep) { + wsrep_sst_received (wsrep, &local_uuid, local_seqno, NULL, 0); + } + + return 0; +} + +void wsrep_start_position_init (const char* val) +{ + if (NULL == val || wsrep_start_position_verify (val)) + { + WSREP_ERROR("Bad initial value for wsrep_start_position: %s", + (val ? val : "")); + return; + } + + wsrep_set_local_position (val); +} + +static bool refresh_provider_options() +{ + WSREP_DEBUG("refresh_provider_options: %s", + (wsrep_provider_options) ? wsrep_provider_options : "null"); + char* opts= wsrep->options_get(wsrep); + if (opts) + { + if (wsrep_provider_options) my_free((void *)wsrep_provider_options); + wsrep_provider_options = (char*)my_memdup(opts, strlen(opts) + 1, + MYF(MY_WME)); + } + else + { + WSREP_ERROR("Failed to get provider options"); + return true; + } + return false; +} + +static int wsrep_provider_verify (const char* provider_str) +{ + MY_STAT f_stat; + char path[FN_REFLEN]; + + if (!provider_str || strlen(provider_str)== 0) + return 1; + + if (!strcmp(provider_str, WSREP_NONE)) + return 0; + + if (!unpack_filename(path, provider_str)) + return 1; + + /* check that provider file exists */ + bzero(&f_stat, sizeof(MY_STAT)); + if (!my_stat(path, &f_stat, MYF(0))) + { + return 1; + } + return 0; +} + +bool wsrep_provider_check (sys_var *self, THD* thd, set_var* var) +{ + char wsrep_provider_buf[FN_REFLEN]; + + if ((! var->save_result.string_value.str) || + (var->save_result.string_value.length > (FN_REFLEN - 1))) // safety + goto err; + + memcpy(wsrep_provider_buf, var->save_result.string_value.str, + var->save_result.string_value.length); + wsrep_provider_buf[var->save_result.string_value.length]= 0; + + if (!wsrep_provider_verify(wsrep_provider_buf)) return 0; + +err: + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str, + var->save_result.string_value.str ? + var->save_result.string_value.str : "NULL"); + return 1; +} + +bool wsrep_provider_update (sys_var *self, THD* thd, enum_var_type type) +{ + bool rcode= false; + + bool wsrep_on_saved= thd->variables.wsrep_on; + thd->variables.wsrep_on= false; + + WSREP_DEBUG("wsrep_provider_update: %s", wsrep_provider); + + /* stop replication is heavy operation, and includes closing all client + connections. Closing clients may need to get LOCK_global_system_variables + at least in MariaDB. + + Note: releasing LOCK_global_system_variables may cause race condition, if + there can be several concurrent clients changing wsrep_provider + */ + mysql_mutex_unlock(&LOCK_global_system_variables); + wsrep_stop_replication(thd); + mysql_mutex_lock(&LOCK_global_system_variables); + + if (wsrep_inited == 1) + wsrep_deinit(false); + + char* tmp= strdup(wsrep_provider); // wsrep_init() rewrites provider + //when fails + if (wsrep_init()) + { + my_error(ER_CANT_OPEN_LIBRARY, MYF(0), tmp); + rcode = true; + } + free(tmp); + + // we sure don't want to use old address with new provider + wsrep_cluster_address_init(NULL); + wsrep_provider_options_init(NULL); + + thd->variables.wsrep_on= wsrep_on_saved; + + refresh_provider_options(); + + return rcode; +} + +void wsrep_provider_init (const char* value) +{ + WSREP_DEBUG("wsrep_provider_init: %s -> %s", + (wsrep_provider) ? wsrep_provider : "null", + (value) ? value : "null"); + if (NULL == value || wsrep_provider_verify (value)) + { + WSREP_ERROR("Bad initial value for wsrep_provider: %s", + (value ? value : "")); + return; + } + + if (wsrep_provider) my_free((void *)wsrep_provider); + wsrep_provider = my_strdup(value, MYF(0)); +} + +bool wsrep_provider_options_check(sys_var *self, THD* thd, set_var* var) +{ + return 0; +} + +bool wsrep_provider_options_update(sys_var *self, THD* thd, enum_var_type type) +{ + wsrep_status_t ret= wsrep->options_set(wsrep, wsrep_provider_options); + if (ret != WSREP_OK) + { + WSREP_ERROR("Set options returned %d", ret); + refresh_provider_options(); + return true; + } + return refresh_provider_options(); +} + +void wsrep_provider_options_init(const char* value) +{ + if (wsrep_provider_options && wsrep_provider_options != value) + my_free((void *)wsrep_provider_options); + wsrep_provider_options = (value) ? my_strdup(value, MYF(0)) : NULL; +} + +static int wsrep_cluster_address_verify (const char* cluster_address_str) +{ + /* There is no predefined address format, it depends on provider. */ + return 0; +} + +bool wsrep_cluster_address_check (sys_var *self, THD* thd, set_var* var) +{ + char addr_buf[FN_REFLEN]; + + if ((! var->save_result.string_value.str) || + (var->save_result.string_value.length > (FN_REFLEN - 1))) // safety + goto err; + + memcpy(addr_buf, var->save_result.string_value.str, + var->save_result.string_value.length); + addr_buf[var->save_result.string_value.length]= 0; + + if (!wsrep_cluster_address_verify(addr_buf)) return 0; + + err: + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str, + var->save_result.string_value.str ? + var->save_result.string_value.str : "NULL"); + return 1; +} + +bool wsrep_cluster_address_update (sys_var *self, THD* thd, enum_var_type type) +{ + bool wsrep_on_saved= thd->variables.wsrep_on; + thd->variables.wsrep_on= false; + + /* stop replication is heavy operation, and includes closing all client + connections. Closing clients may need to get LOCK_global_system_variables + at least in MariaDB. + + Note: releasing LOCK_global_system_variables may cause race condition, if + there can be several concurrent clients changing wsrep_provider + */ + mysql_mutex_unlock(&LOCK_global_system_variables); + wsrep_stop_replication(thd); + mysql_mutex_lock(&LOCK_global_system_variables); + + if (wsrep_start_replication()) + { + wsrep_create_rollbacker(); + wsrep_create_appliers(wsrep_slave_threads); + } + + thd->variables.wsrep_on= wsrep_on_saved; + + return false; +} + +void wsrep_cluster_address_init (const char* value) +{ + WSREP_DEBUG("wsrep_cluster_address_init: %s -> %s", + (wsrep_cluster_address) ? wsrep_cluster_address : "null", + (value) ? value : "null"); + + if (wsrep_cluster_address) my_free ((void*)wsrep_cluster_address); + wsrep_cluster_address = (value) ? my_strdup(value, MYF(0)) : NULL; +} + +/* wsrep_cluster_name cannot be NULL or an empty string. */ +bool wsrep_cluster_name_check (sys_var *self, THD* thd, set_var* var) +{ + if (!var->save_result.string_value.str || + (var->save_result.string_value.length == 0)) + { + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str, + (var->save_result.string_value.str ? + var->save_result.string_value.str : "NULL")); + return 1; + } + return 0; +} + +bool wsrep_cluster_name_update (sys_var *self, THD* thd, enum_var_type type) +{ + return 0; +} + +bool wsrep_node_name_check (sys_var *self, THD* thd, set_var* var) +{ + // TODO: for now 'allow' 0-length string to be valid (default) + if (!var->save_result.string_value.str) + { + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str, + (var->save_result.string_value.str ? + var->save_result.string_value.str : "NULL")); + return 1; + } + return 0; +} + +bool wsrep_node_name_update (sys_var *self, THD* thd, enum_var_type type) +{ + return 0; +} + +// TODO: do something more elaborate, like checking connectivity +bool wsrep_node_address_check (sys_var *self, THD* thd, set_var* var) +{ + char addr_buf[FN_REFLEN]; + + if ((! var->save_result.string_value.str) || + (var->save_result.string_value.length > (FN_REFLEN - 1))) // safety + goto err; + + memcpy(addr_buf, var->save_result.string_value.str, + var->save_result.string_value.length); + addr_buf[var->save_result.string_value.length]= 0; + + // TODO: for now 'allow' 0-length string to be valid (default) + return 0; + +err: + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str, + var->save_result.string_value.str ? + var->save_result.string_value.str : "NULL"); + return 1; +} + +bool wsrep_node_address_update (sys_var *self, THD* thd, enum_var_type type) +{ + return 0; +} + +void wsrep_node_address_init (const char* value) +{ + if (wsrep_node_address && strcmp(wsrep_node_address, value)) + my_free ((void*)wsrep_node_address); + + wsrep_node_address = (value) ? my_strdup(value, MYF(0)) : NULL; +} + +bool wsrep_slave_threads_check (sys_var *self, THD* thd, set_var* var) +{ + mysql_mutex_lock(&LOCK_wsrep_slave_threads); + wsrep_slave_count_change += (var->save_result.ulonglong_value - + wsrep_slave_threads); + mysql_mutex_unlock(&LOCK_wsrep_slave_threads); + + return 0; +} + +bool wsrep_slave_threads_update (sys_var *self, THD* thd, enum_var_type type) +{ + if (wsrep_slave_count_change > 0) + { + wsrep_create_appliers(wsrep_slave_count_change); + wsrep_slave_count_change = 0; + } + return false; +} + +bool wsrep_desync_check (sys_var *self, THD* thd, set_var* var) +{ + bool new_wsrep_desync= (bool) var->save_result.ulonglong_value; + if (wsrep_desync == new_wsrep_desync) { + if (new_wsrep_desync) { + push_warning (thd, Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_VALUE_FOR_VAR, + "'wsrep_desync' is already ON."); + } else { + push_warning (thd, Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_VALUE_FOR_VAR, + "'wsrep_desync' is already OFF."); + } + } + return 0; +} + +bool wsrep_desync_update (sys_var *self, THD* thd, enum_var_type type) +{ + wsrep_status_t ret(WSREP_WARNING); + if (wsrep_desync) { + ret = wsrep->desync (wsrep); + if (ret != WSREP_OK) { + WSREP_WARN ("SET desync failed %d for %s", ret, thd->query()); + my_error (ER_CANNOT_USER, MYF(0), "'desync'", thd->query()); + return true; + } + } else { + ret = wsrep->resync (wsrep); + if (ret != WSREP_OK) { + WSREP_WARN ("SET resync failed %d for %s", ret, thd->query()); + my_error (ER_CANNOT_USER, MYF(0), "'resync'", thd->query()); + return true; + } + } + return false; +} + +/* + * Status variables stuff below + */ +static inline void +wsrep_assign_to_mysql (SHOW_VAR* mysql, wsrep_stats_var* wsrep) +{ + mysql->name = wsrep->name; + switch (wsrep->type) { + case WSREP_VAR_INT64: + mysql->value = (char*) &wsrep->value._int64; + mysql->type = SHOW_LONGLONG; + break; + case WSREP_VAR_STRING: + mysql->value = (char*) &wsrep->value._string; + mysql->type = SHOW_CHAR_PTR; + break; + case WSREP_VAR_DOUBLE: + mysql->value = (char*) &wsrep->value._double; + mysql->type = SHOW_DOUBLE; + break; + } +} + +#if DYNAMIC +// somehow this mysql status thing works only with statically allocated arrays. +static SHOW_VAR* mysql_status_vars = NULL; +static int mysql_status_len = -1; +#else +static SHOW_VAR mysql_status_vars[512 + 1]; +static const int mysql_status_len = 512; +#endif + +static void export_wsrep_status_to_mysql(THD* thd) +{ + int wsrep_status_len, i; + + thd->wsrep_status_vars = wsrep->stats_get(wsrep); + + if (!thd->wsrep_status_vars) { + return; + } + + for (wsrep_status_len = 0; + thd->wsrep_status_vars[wsrep_status_len].name != NULL; + wsrep_status_len++); + +#if DYNAMIC + if (wsrep_status_len != mysql_status_len) { + void* tmp = realloc (mysql_status_vars, + (wsrep_status_len + 1) * sizeof(SHOW_VAR)); + if (!tmp) { + + sql_print_error ("Out of memory for wsrep status variables." + "Number of variables: %d", wsrep_status_len); + return; + } + + mysql_status_len = wsrep_status_len; + mysql_status_vars = (SHOW_VAR*)tmp; + } + /* @TODO: fix this: */ +#else + if (mysql_status_len < wsrep_status_len) wsrep_status_len= mysql_status_len; +#endif + + for (i = 0; i < wsrep_status_len; i++) + wsrep_assign_to_mysql (mysql_status_vars + i, thd->wsrep_status_vars + i); + + mysql_status_vars[wsrep_status_len].name = NullS; + mysql_status_vars[wsrep_status_len].value = NullS; + mysql_status_vars[wsrep_status_len].type = SHOW_LONG; +} + +int wsrep_show_status (THD *thd, SHOW_VAR *var, char *buff) +{ + export_wsrep_status_to_mysql(thd); + var->type= SHOW_ARRAY; + var->value= (char *) &mysql_status_vars; + return 0; +} + +void wsrep_free_status (THD* thd) +{ + if (thd->wsrep_status_vars) + { + wsrep->stats_free (wsrep, thd->wsrep_status_vars); + thd->wsrep_status_vars = 0; + } +} diff --git a/sql/wsrep_var.h b/sql/wsrep_var.h new file mode 100644 index 00000000000..58d0374baa5 --- /dev/null +++ b/sql/wsrep_var.h @@ -0,0 +1,86 @@ +/* Copyright (C) 2013 Codership Oy <info@codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ + +#if !defined (WSREP_VAR_H) && defined(WITH_WSREP) +#define WSREP_VAR_H + +#define WSREP_CLUSTER_NAME "my_wsrep_cluster" +#define WSREP_NODE_INCOMING_AUTO "AUTO" +#define WSREP_START_POSITION_ZERO "00000000-0000-0000-0000-000000000000:-1" + +// MySQL variables funcs + +#include "sql_priv.h" +class sys_var; +class set_var; +class THD; + +int wsrep_init_vars(); + +#define CHECK_ARGS (sys_var *self, THD* thd, set_var *var) +#define UPDATE_ARGS (sys_var *self, THD* thd, enum_var_type type) +#define DEFAULT_ARGS (THD* thd, enum_var_type var_type) +#define INIT_ARGS (const char* opt) + +extern bool wsrep_on_update UPDATE_ARGS; +extern bool wsrep_causal_reads_update UPDATE_ARGS; +extern bool wsrep_sync_wait_update UPDATE_ARGS; +extern bool wsrep_start_position_check CHECK_ARGS; +extern bool wsrep_start_position_update UPDATE_ARGS; +extern void wsrep_start_position_init INIT_ARGS; + +extern bool wsrep_provider_check CHECK_ARGS; +extern bool wsrep_provider_update UPDATE_ARGS; +extern void wsrep_provider_init INIT_ARGS; + +extern bool wsrep_provider_options_check CHECK_ARGS; +extern bool wsrep_provider_options_update UPDATE_ARGS; +extern void wsrep_provider_options_init INIT_ARGS; + +extern bool wsrep_cluster_address_check CHECK_ARGS; +extern bool wsrep_cluster_address_update UPDATE_ARGS; +extern void wsrep_cluster_address_init INIT_ARGS; + +extern bool wsrep_cluster_name_check CHECK_ARGS; +extern bool wsrep_cluster_name_update UPDATE_ARGS; + +extern bool wsrep_node_name_check CHECK_ARGS; +extern bool wsrep_node_name_update UPDATE_ARGS; + +extern bool wsrep_node_address_check CHECK_ARGS; +extern bool wsrep_node_address_update UPDATE_ARGS; +extern void wsrep_node_address_init INIT_ARGS; + +extern bool wsrep_sst_method_check CHECK_ARGS; +extern bool wsrep_sst_method_update UPDATE_ARGS; +extern void wsrep_sst_method_init INIT_ARGS; + +extern bool wsrep_sst_receive_address_check CHECK_ARGS; +extern bool wsrep_sst_receive_address_update UPDATE_ARGS; + +extern bool wsrep_sst_auth_check CHECK_ARGS; +extern bool wsrep_sst_auth_update UPDATE_ARGS; +extern void wsrep_sst_auth_init INIT_ARGS; + +extern bool wsrep_sst_donor_check CHECK_ARGS; +extern bool wsrep_sst_donor_update UPDATE_ARGS; + +extern bool wsrep_slave_threads_check CHECK_ARGS; +extern bool wsrep_slave_threads_update UPDATE_ARGS; + +extern bool wsrep_desync_check CHECK_ARGS; +extern bool wsrep_desync_update UPDATE_ARGS; + +#endif /* WSREP_VAR_H */ |