diff options
Diffstat (limited to 'sql')
75 files changed, 12026 insertions, 134 deletions
diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt index ca2b059eeef..0f771d26bb5 100644 --- a/sql/CMakeLists.txt +++ b/sql/CMakeLists.txt @@ -14,6 +14,10 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +IF(WITH_WSREP) + SET(WSREP_INCLUDES ${CMAKE_SOURCE_DIR}/wsrep) +ENDIF() + INCLUDE_DIRECTORIES( ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/sql @@ -21,6 +25,7 @@ ${PCRE_INCLUDES} ${ZLIB_INCLUDE_DIR} ${SSL_INCLUDE_DIRS} ${CMAKE_BINARY_DIR}/sql +${WSREP_INCLUDES} ) SET(GEN_SOURCES @@ -62,6 +67,23 @@ IF(SSL_DEFINES) ADD_DEFINITIONS(${SSL_DEFINES}) ENDIF() +IF(WITH_WSREP) + SET(WSREP_SOURCES + wsrep_utils.cc + wsrep_xid.cc + wsrep_check_opts.cc + wsrep_hton.cc + wsrep_mysqld.cc + wsrep_notify.cc + wsrep_sst.cc + wsrep_var.cc + wsrep_binlog.cc + wsrep_applier.cc + wsrep_thd.cc + ) + SET(WSREP_LIB wsrep) +ENDIF() + SET (SQL_SOURCE ../sql-common/client.c compat56.cc derror.cc des_key_file.cc discover.cc ../libmysql/errmsg.c field.cc field_conv.cc @@ -107,7 +129,6 @@ SET (SQL_SOURCE sql_signal.cc rpl_handler.cc mdl.cc sql_admin.cc transaction.cc sys_vars.cc sql_truncate.cc datadict.cc sql_reload.cc sql_cmd.h item_inetfunc.cc - # added in MariaDB: sql_explain.h sql_explain.cc sql_lifo_buffer.h sql_join_cache.h sql_join_cache.cc @@ -119,6 +140,7 @@ SET (SQL_SOURCE ../sql-common/mysql_async.c my_apc.cc my_apc.h rpl_gtid.cc rpl_parallel.cc + ${WSREP_SOURCES} table_cache.cc ${CMAKE_CURRENT_BINARY_DIR}/sql_builtin.cc ${GEN_SOURCES} @@ -148,6 +170,7 @@ DTRACE_INSTRUMENT(sql) TARGET_LINK_LIBRARIES(sql ${MYSQLD_STATIC_PLUGIN_LIBS} mysys mysys_ssl dbug strings vio pcre ${LIBJEMALLOC} ${LIBWRAP} ${LIBCRYPT} ${LIBDL} ${CMAKE_THREAD_LIBS_INIT} + ${WSREP_LIB} ${SSL_LIBRARIES}) IF(WIN32) diff --git a/sql/event_data_objects.cc b/sql/event_data_objects.cc index 09256a34853..e7bdc42b2e6 100644 --- a/sql/event_data_objects.cc +++ b/sql/event_data_objects.cc @@ -1472,8 +1472,25 @@ end: bool save_tx_read_only= thd->tx_read_only; thd->tx_read_only= false; +#ifdef WITH_WSREP + if (WSREP(thd)) { + // sql_print_information("sizeof(LEX) = %d", sizeof(struct LEX)); + // sizeof(LEX) = 4512, so it's relatively safe to allocate it on stack. + LEX lex; + LEX* saved = thd->lex; + lex.sql_command = SQLCOM_DROP_EVENT; + thd->lex = &lex; + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL); + thd->lex = saved; + } +#endif + ret= Events::drop_event(thd, dbname, name, FALSE); +#ifdef WITH_WSREP + WSREP_TO_ISOLATION_END; + error: +#endif thd->tx_read_only= save_tx_read_only; thd->security_ctx->master_access= saved_master_access; } diff --git a/sql/events.cc b/sql/events.cc index 8d78497a29e..0be9ce4e6f5 100644 --- a/sql/events.cc +++ b/sql/events.cc @@ -1145,7 +1145,20 @@ Events::load_events_from_db(THD *thd) delete et; goto end; } - +#ifdef WITH_WSREP + // when SST from master node who initials event, the event status is ENABLED + // this is problematic because there are two nodes with same events and both enabled. + if (et->originator != (longlong) thd->variables.server_id) + { + store_record(table, record[1]); + table->field[ET_FIELD_STATUS]-> + store((longlong) Event_parse_data::SLAVESIDE_DISABLED, + TRUE); + (void) table->file->ha_update_row(table->record[1], table->record[0]); + delete et; + continue; + } +#endif /** Since the Event_queue_element object could be deleted inside Event_queue::create_event we should save the value of dropped flag @@ -1194,6 +1207,48 @@ end: DBUG_RETURN(ret); } +#ifdef WITH_WSREP +int wsrep_create_event_query(THD *thd, uchar** buf, size_t* buf_len) +{ + String log_query; + + if (create_query_string(thd, &log_query)) + { + WSREP_WARN("events create string failed: schema: %s, query: %s", + (thd->db ? thd->db : "(null)"), thd->query()); + return 1; + } + return wsrep_to_buf_helper(thd, log_query.ptr(), log_query.length(), buf, buf_len); +} +static int +wsrep_alter_query_string(THD *thd, String *buf) +{ + /* Append the "ALTER" part of the query */ + if (buf->append(STRING_WITH_LEN("ALTER "))) + return 1; + /* Append definer */ + append_definer(thd, buf, &(thd->lex->definer->user), &(thd->lex->definer->host)); + /* Append the left part of thd->query after event name part */ + if (buf->append(thd->lex->stmt_definition_begin, + thd->lex->stmt_definition_end - + thd->lex->stmt_definition_begin)) + return 1; + + return 0; +} +int wsrep_alter_event_query(THD *thd, uchar** buf, size_t* buf_len) +{ + String log_query; + + if (wsrep_alter_query_string(thd, &log_query)) + { + WSREP_WARN("events alter string failed: schema: %s, query: %s", + (thd->db ? thd->db : "(null)"), thd->query()); + return 1; + } + return wsrep_to_buf_helper(thd, log_query.ptr(), log_query.length(), buf, buf_len); +} +#endif /* WITH_WSREP */ /** @} (End of group Event_Scheduler) */ diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc index 0824fc9b76b..4441e975f93 100644 --- a/sql/ha_partition.cc +++ b/sql/ha_partition.cc @@ -387,7 +387,13 @@ const char *ha_partition::table_type() const // we can do this since we only support a single engine type return m_file[0]->table_type(); } - +#ifdef WITH_WSREP +int ha_partition::wsrep_db_type() const +{ + // we can do this since we only support a single engine type + return ha_legacy_type(m_file[0]->ht); +} +#endif /* WITH_WSREP */ /* Destructor method diff --git a/sql/ha_partition.h b/sql/ha_partition.h index 71ae84b06a0..33a1a8ab43b 100644 --- a/sql/ha_partition.h +++ b/sql/ha_partition.h @@ -1282,7 +1282,9 @@ public: DBUG_ASSERT(h == m_file[i]->ht); return h; } - +#ifdef WITH_WSREP + virtual int wsrep_db_type() const; +#endif /* WITH_WSREP */ friend int cmp_key_rowid_part_id(void *ptr, uchar *ref1, uchar *ref2); }; diff --git a/sql/handler.cc b/sql/handler.cc index 2696d69bfcf..19f648219ac 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -51,6 +51,10 @@ #include "../storage/maria/ha_maria.h" #endif +#ifdef WITH_WSREP +#include "wsrep_mysqld.h" +#include "wsrep_xid.h" +#endif /* While we have legacy_db_type, we have this array to check for dups and to find handlerton from legacy_db_type. @@ -1157,10 +1161,27 @@ int ha_prepare(THD *thd) { if ((err= ht->prepare(ht, thd, all))) { +#ifdef WITH_WSREP + if (WSREP(thd) && ht->db_type== DB_TYPE_WSREP) + { + error= 1; + /* avoid sending error, if we need to replay */ + if (thd->wsrep_conflict_state!= MUST_REPLAY) + { + my_error(ER_LOCK_DEADLOCK, MYF(0), err); + } + } + else + { + /* not wsrep hton, bail to native mysql behavior */ +#endif my_error(ER_ERROR_DURING_COMMIT, MYF(0), err); ha_rollback_trans(thd, all); error=1; break; +#ifdef WITH_WSREP + } +#endif } } else @@ -1356,7 +1377,12 @@ int ha_commit_trans(THD *thd, bool all) mdl_request.init(MDL_key::COMMIT, "", "", MDL_INTENTION_EXCLUSIVE, MDL_EXPLICIT); +#ifdef WITH_WSREP + if (!WSREP(thd) && + thd->mdl_context.acquire_lock(&mdl_request, +#else if (thd->mdl_context.acquire_lock(&mdl_request, +#endif /* WITH_WSREP */ thd->variables.lock_wait_timeout)) { ha_rollback_trans(thd, all); @@ -1403,7 +1429,33 @@ int ha_commit_trans(THD *thd, bool all) err= ht->prepare(ht, thd, all); status_var_increment(thd->status_var.ha_prepare_count); if (err) - my_error(ER_ERROR_DURING_COMMIT, MYF(0), err); +#ifdef WITH_WSREP + { + if (WSREP(thd) && ht->db_type== DB_TYPE_WSREP) + { + error= 1; + switch (err) + { + case WSREP_TRX_SIZE_EXCEEDED: + /* give user size exeeded erro from wsrep_api.h */ + my_error(ER_ERROR_DURING_COMMIT, MYF(0), WSREP_SIZE_EXCEEDED); + break; + case WSREP_TRX_CERT_FAIL: + case WSREP_TRX_ERROR: + /* avoid sending error, if we need to replay */ + if (thd->wsrep_conflict_state!= MUST_REPLAY) + { + my_error(ER_LOCK_DEADLOCK, MYF(0), err); + } + } + } + else + /* not wsrep hton, bail to native mysql behavior */ +#endif /* WITH_WSREP */ + my_error(ER_ERROR_DURING_COMMIT, MYF(0), err); +#ifdef WITH_WSREP + } +#endif /* WITH_WSREP */ if (err) goto err; @@ -1414,6 +1466,13 @@ int ha_commit_trans(THD *thd, bool all) DEBUG_SYNC(thd, "ha_commit_trans_after_prepare"); DBUG_EXECUTE_IF("crash_commit_after_prepare", DBUG_SUICIDE();); +#ifdef WITH_WSREP + if (!error && wsrep_is_wsrep_xid(&thd->transaction.xid_state.xid)) + { + // xid was rewritten by wsrep + xid= wsrep_xid_seqno(thd->transaction.xid_state.xid); + } +#endif // WITH_WSREP if (!is_real_trans) { error= commit_one_phase_2(thd, all, trans, is_real_trans); @@ -1818,7 +1877,13 @@ static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin, got, hton_name(hton)->str); for (int i=0; i < got; i ++) { +#ifdef WITH_WSREP + my_xid x=(wsrep_is_wsrep_xid(&info->list[i]) ? + wsrep_xid_seqno(info->list[i]) : + info->list[i].get_my_xid()); +#else my_xid x=info->list[i].get_my_xid(); +#endif /* WITH_WSREP */ if (!x) // not "mine" - that is generated by external TM { #ifndef DBUG_OFF @@ -3133,7 +3198,12 @@ int handler::update_auto_increment() variables->auto_increment_increment); auto_inc_intervals_count++; /* Row-based replication does not need to store intervals in binlog */ +#ifdef WITH_WSREP + if (((WSREP(thd) && wsrep_emulate_bin_log) || mysql_bin_log.is_open()) && + !thd->is_current_stmt_binlog_format_row()) +#else if (mysql_bin_log.is_open() && !thd->is_current_stmt_binlog_format_row()) +#endif /* WITH_WSREP */ thd->auto_inc_intervals_in_cur_stmt_for_binlog.append(auto_inc_interval_for_cur_row.minimum(), auto_inc_interval_for_cur_row.values(), variables->auto_increment_increment); @@ -5758,7 +5828,13 @@ static bool check_table_binlog_row_based(THD *thd, TABLE *table) return (thd->is_current_stmt_binlog_format_row() && table->s->cached_row_logging_check && (thd->variables.option_bits & OPTION_BIN_LOG) && +#ifdef WITH_WSREP + /* applier and replayer should not binlog */ + ((WSREP_EMULATE_BINLOG(thd) && (thd->wsrep_exec_mode != REPL_RECV)) || + mysql_bin_log.is_open())); +#else mysql_bin_log.is_open()); +#endif } @@ -5858,6 +5934,30 @@ static int binlog_log_row(TABLE* table, bool error= 0; THD *const thd= table->in_use; +#ifdef WITH_WSREP + /* only InnoDB tables will be replicated through binlog emulation */ + if (WSREP_EMULATE_BINLOG(thd) && + table->file->ht->db_type != DB_TYPE_INNODB && + !(table->file->ht->db_type == DB_TYPE_PARTITION_DB && + (((ha_partition*)(table->file))->wsrep_db_type() == DB_TYPE_INNODB))) + { + return 0; + } + + /* enforce wsrep_max_ws_rows */ + if (table->s->tmp_table == NO_TMP_TABLE && WSREP(thd)) + { + thd->wsrep_affected_rows++; + if (wsrep_max_ws_rows && + thd->wsrep_exec_mode != REPL_RECV && + thd->wsrep_affected_rows > wsrep_max_ws_rows) + { + trans_rollback_stmt(thd) || trans_rollback(thd); + my_message(ER_ERROR_DURING_COMMIT, "wsrep_max_ws_rows exceeded", MYF(0)); + return ER_ERROR_DURING_COMMIT; + } + } +#endif /* WITH_WSREP */ if (check_table_binlog_row_based(thd, table)) { MY_BITMAP cols; @@ -6189,6 +6289,64 @@ void handler::set_lock_type(enum thr_lock_type lock) table->reginfo.lock_type= lock; } +#ifdef WITH_WSREP +/** + @details + This function makes the storage engine to force the victim transaction + to abort. Currently, only innodb has this functionality, but any SE + implementing the wsrep API should provide this service to support + multi-master operation. + + @param bf_thd brute force THD asking for the abort + @param victim_thd victim THD to be aborted + + @return + always 0 +*/ + +int ha_wsrep_abort_transaction(THD *bf_thd, THD *victim_thd, my_bool signal) +{ + DBUG_ENTER("ha_wsrep_abort_transaction"); + if (!WSREP(bf_thd) && + !(bf_thd->variables.wsrep_OSU_method == WSREP_OSU_RSU && + bf_thd->wsrep_exec_mode == TOTAL_ORDER)) { + DBUG_RETURN(0); + } + + handlerton *hton= installed_htons[DB_TYPE_INNODB]; + if (hton && hton->wsrep_abort_transaction) + { + hton->wsrep_abort_transaction(hton, bf_thd, victim_thd, signal); + } + else + { + WSREP_WARN("cannot abort InnoDB transaction"); + } + + DBUG_RETURN(0); +} + +void ha_wsrep_fake_trx_id(THD *thd) +{ + DBUG_ENTER("ha_wsrep_fake_trx_id"); + if (!WSREP(thd)) + { + DBUG_VOID_RETURN; + } + + handlerton *hton= installed_htons[DB_TYPE_INNODB]; + if (hton && hton->wsrep_fake_trx_id) + { + hton->wsrep_fake_trx_id(hton, thd); + } + else + { + WSREP_WARN("cannot get fake InnoDB transaction ID"); + } + + DBUG_VOID_RETURN; +} +#endif /* WITH_WSREP */ #ifdef TRANS_LOG_MGM_EXAMPLE_CODE /* Example of transaction log management functions based on assumption that logs diff --git a/sql/handler.h b/sql/handler.h index 772f2e68dab..51b301ae22e 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -441,6 +441,7 @@ enum legacy_db_type DB_TYPE_BINLOG=21, DB_TYPE_PBXT=23, DB_TYPE_PERFORMANCE_SCHEMA=28, + DB_TYPE_WSREP=41, DB_TYPE_ARIA=42, DB_TYPE_TOKUDB=43, DB_TYPE_FIRST_DYNAMIC=44, @@ -1234,6 +1235,7 @@ struct handlerton enum handler_create_iterator_result (*create_iterator)(handlerton *hton, enum handler_iterator_type type, struct handler_iterator *fill_this_in); + /* Optional clauses in the CREATE/ALTER TABLE */ @@ -1346,6 +1348,14 @@ struct handlerton */ int (*discover_table_structure)(handlerton *hton, THD* thd, TABLE_SHARE *share, HA_CREATE_INFO *info); + +#ifdef WITH_WSREP + int (*wsrep_abort_transaction)(handlerton *hton, THD *bf_thd, + THD *victim_thd, my_bool signal); + int (*wsrep_set_checkpoint)(handlerton *hton, const XID* xid); + int (*wsrep_get_checkpoint)(handlerton *hton, XID* xid); + void (*wsrep_fake_trx_id)(handlerton *hton, THD *thd); +#endif /* WITH_WSREP */ }; @@ -3997,6 +4007,9 @@ bool key_uses_partial_cols(TABLE_SHARE *table, uint keyno); extern const char *ha_row_type[]; extern MYSQL_PLUGIN_IMPORT const char *tx_isolation_names[]; extern MYSQL_PLUGIN_IMPORT const char *binlog_format_names[]; +#ifdef WITH_WSREP +extern MYSQL_PLUGIN_IMPORT const char *wsrep_binlog_format_names[]; +#endif /* WITH_WSREP */ extern TYPELIB tx_isolation_typelib; extern const char *myisam_stats_method_names[]; extern ulong total_ha, total_ha_2pc; @@ -4116,6 +4129,10 @@ int ha_rollback_to_savepoint(THD *thd, SAVEPOINT *sv); bool ha_rollback_to_savepoint_can_release_mdl(THD *thd); int ha_savepoint(THD *thd, SAVEPOINT *sv); int ha_release_savepoint(THD *thd, SAVEPOINT *sv); +#ifdef WITH_WSREP +int ha_wsrep_abort_transaction(THD *bf_thd, THD *victim_thd, my_bool signal); +void ha_wsrep_fake_trx_id(THD *thd); +#endif /* WITH_WSREP */ /* these are called by storage engines */ void trans_register_ha(THD *thd, bool all, handlerton *ht); @@ -4146,6 +4163,9 @@ int ha_binlog_end(THD *thd); #define ha_binlog_wait(a) do {} while (0) #define ha_binlog_end(a) do {} while (0) #endif +#ifdef WITH_WSREP +void wsrep_brute_force_aborts(); +#endif const char *get_canonical_filename(handler *file, const char *path, char *tmp_path); diff --git a/sql/item_func.cc b/sql/item_func.cc index 0ca8f700bfc..7e4ddb7427c 100644 --- a/sql/item_func.cc +++ b/sql/item_func.cc @@ -2759,7 +2759,19 @@ void Item_func_rand::seed_random(Item *arg) TODO: do not do reinit 'rand' for every execute of PS/SP if args[0] is a constant. */ +#ifdef WITH_WSREP + uint32 tmp; + if (WSREP(current_thd)) + { + if (current_thd->wsrep_exec_mode==REPL_RECV) + tmp= current_thd->wsrep_rand; + else + tmp= current_thd->wsrep_rand= (uint32) arg->val_int(); + } else + tmp= (uint32) arg->val_int(); +#else uint32 tmp= (uint32) arg->val_int(); +#endif /* WITH_WSREP */ my_rnd_init(rand, (uint32) (tmp*0x10001L+55555555L), (uint32) (tmp*0x10000001L)); } diff --git a/sql/keycaches.cc b/sql/keycaches.cc index 120aa7e1029..a559c99bbd8 100644 --- a/sql/keycaches.cc +++ b/sql/keycaches.cc @@ -223,6 +223,7 @@ Rpl_filter *get_or_create_rpl_filter(const char *name, uint length) void free_rpl_filter(const char *name, Rpl_filter *filter) { delete filter; + filter= 0; } void free_all_rpl_filters() diff --git a/sql/lock.cc b/sql/lock.cc index 3354da2640b..965f7dcab99 100644 --- a/sql/lock.cc +++ b/sql/lock.cc @@ -83,6 +83,10 @@ #include "sql_acl.h" // SUPER_ACL #include <hash.h> +#ifdef WITH_WSREP +#include "wsrep_mysqld.h" +#endif /* WITH_WSREP */ + /** @defgroup Locking Locking @{ @@ -313,6 +317,10 @@ bool mysql_lock_tables(THD *thd, MYSQL_LOCK *sql_lock, uint flags) /* Copy the lock data array. thr_multi_lock() reorders its contents. */ memmove(sql_lock->locks + sql_lock->lock_count, sql_lock->locks, sql_lock->lock_count * sizeof(*sql_lock->locks)); +#ifdef WITH_WSREP + thd->lock_info.in_lock_tables= thd->in_lock_tables; +#endif + /* Lock on the copied half of the lock data array. */ rc= thr_lock_errno_to_mysql[(int) thr_multi_lock(sql_lock->locks + sql_lock->lock_count, @@ -323,6 +331,11 @@ bool mysql_lock_tables(THD *thd, MYSQL_LOCK *sql_lock, uint flags) end: THD_STAGE_INFO(thd, org_stage); +#ifdef WITH_WSREP + thd_proc_info(thd, "mysql_lock_tables(): unlocking tables II"); +#else /* WITH_WSREP */ + thd_proc_info(thd, 0); +#endif /* WITH_WSREP */ if (thd->killed) { @@ -335,6 +348,9 @@ end: my_error(rc, MYF(0)); thd->set_time_after_lock(); +#ifdef WITH_WSREP + thd_proc_info(thd, "exit mysqld_lock_tables()"); +#endif /* WITH_WSREP */ DBUG_RETURN(rc); } @@ -1024,11 +1040,29 @@ void Global_read_lock::unlock_global_read_lock(THD *thd) { thd->mdl_context.release_lock(m_mdl_blocks_commits_lock); m_mdl_blocks_commits_lock= NULL; +#ifdef WITH_WSREP + if (WSREP(thd) || wsrep_node_is_donor()) + { + wsrep_locked_seqno= WSREP_SEQNO_UNDEFINED; + wsrep->resume(wsrep); + /* resync here only if we did implicit desync earlier */ + if (!wsrep_desync && wsrep_node_is_synced()) + { + int ret = wsrep->resync(wsrep); + if (ret != WSREP_OK) + { + WSREP_WARN("resync failed %d for FTWRL: db: %s, query: %s", ret, + (thd->db ? thd->db : "(null)"), thd->query()); + DBUG_VOID_RETURN; + } + } + } +#endif /* WITH_WSREP */ } thd->mdl_context.release_lock(m_mdl_global_shared_lock); m_mdl_global_shared_lock= NULL; m_state= GRL_NONE; - + DBUG_VOID_RETURN; } @@ -1056,6 +1090,16 @@ bool Global_read_lock::make_global_read_lock_block_commit(THD *thd) If we didn't succeed lock_global_read_lock(), or if we already suceeded make_global_read_lock_block_commit(), do nothing. */ + +#ifdef WITH_WSREP + if (WSREP(thd) && m_mdl_blocks_commits_lock) + { + WSREP_DEBUG("GRL was in block commit mode when entering " + "make_global_read_lock_block_commit"); + DBUG_RETURN(FALSE); + } +#endif /* WITH_WSREP */ + if (m_state != GRL_ACQUIRED) DBUG_RETURN(0); @@ -1068,6 +1112,53 @@ bool Global_read_lock::make_global_read_lock_block_commit(THD *thd) m_mdl_blocks_commits_lock= mdl_request.ticket; m_state= GRL_ACQUIRED_AND_BLOCKS_COMMIT; +#ifdef WITH_WSREP + /* Native threads should bail out before wsrep oprations to follow. + Donor servicing thread is an exception, it should pause provider but not desync, + as it is already desynced in donor state + */ + if (!WSREP(thd) && !wsrep_node_is_donor()) + { + DBUG_RETURN(FALSE); + } + + /* if already desynced or donor, avoid double desyncing + if not in PC and synced, desyncing is not possible either + */ + if (wsrep_desync || !wsrep_node_is_synced()) + { + WSREP_DEBUG("desync set upfont, skipping implicit desync for FTWRL: %d", + wsrep_desync); + } + else + { + int rcode; + WSREP_DEBUG("running implicit desync for node"); + rcode = wsrep->desync(wsrep); + if (rcode != WSREP_OK) + { + WSREP_WARN("FTWRL desync failed %d for schema: %s, query: %s", + rcode, (thd->db ? thd->db : "(null)"), thd->query()); + my_message(ER_LOCK_DEADLOCK, "wsrep desync failed for FTWRL", MYF(0)); + DBUG_RETURN(TRUE); + } + } + + long long ret = wsrep->pause(wsrep); + if (ret >= 0) + { + wsrep_locked_seqno= ret; + } + else if (ret != -ENOSYS) /* -ENOSYS - no provider */ + { + WSREP_ERROR("Failed to pause provider: %lld (%s)", -ret, strerror(-ret)); + + /* m_mdl_blocks_commits_lock is always NULL here */ + wsrep_locked_seqno= WSREP_SEQNO_UNDEFINED; + my_error(ER_LOCK_DEADLOCK, MYF(0)); + DBUG_RETURN(TRUE); + } +#endif /* WITH_WSREP */ DBUG_RETURN(FALSE); } diff --git a/sql/log.cc b/sql/log.cc index ee92f22adb8..c1c6a4758d7 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -52,6 +52,9 @@ #include "sql_plugin.h" #include "rpl_handler.h" +#ifdef WITH_WSREP +#include "wsrep_mysqld.h" +#endif /* WITH_WSREP */ #include "debug_sync.h" #include "sql_show.h" #include "my_pthread.h" @@ -524,6 +527,9 @@ private: }; handlerton *binlog_hton; +#ifdef WITH_WSREP +extern handlerton *wsrep_hton; +#endif bool LOGGER::is_log_table_enabled(uint log_table_type) { @@ -538,6 +544,66 @@ bool LOGGER::is_log_table_enabled(uint log_table_type) } } +#ifdef WITH_WSREP +IO_CACHE * get_trans_log(THD * thd) +{ + binlog_cache_mngr *cache_mngr = (binlog_cache_mngr*) + thd_get_ha_data(thd, binlog_hton); + if (cache_mngr) + { + return cache_mngr->get_binlog_cache_log(true); + } + else + { + WSREP_DEBUG("binlog cache not initialized, conn :%ld", thd->thread_id); + return NULL; + } +} + + +bool wsrep_trans_cache_is_empty(THD *thd) +{ + binlog_cache_mngr *const cache_mngr= + (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); + return (!cache_mngr || cache_mngr->trx_cache.empty()); +} + +void thd_binlog_flush_pending_rows_event(THD *thd, bool stmt_end) +{ + thd->binlog_flush_pending_rows_event(stmt_end); +} +void thd_binlog_trx_reset(THD * thd) +{ + /* + todo: fix autocommit select to not call the caller + */ + if (thd_get_ha_data(thd, binlog_hton) != NULL) + { + binlog_cache_mngr *const cache_mngr= + (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); + if (cache_mngr) + { + cache_mngr->reset(false, true); + if (!cache_mngr->stmt_cache.empty()) + { + WSREP_DEBUG("pending events in stmt cache, sql: %s", thd->query()); + cache_mngr->stmt_cache.reset(); + } + } + } + thd->clear_binlog_table_maps(); +} + +void thd_binlog_rollback_stmt(THD * thd) +{ + WSREP_DEBUG("thd_binlog_rollback_stmt :%ld", thd->thread_id); + binlog_cache_mngr *const cache_mngr= + (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); + if (cache_mngr) cache_mngr->trx_cache.set_prev_position(MY_OFF_T_UNDEF); +} + +#endif + /** Check if a given table is opened log table @@ -1589,7 +1655,11 @@ binlog_trans_log_savepos(THD *thd, my_off_t *pos) DBUG_ENTER("binlog_trans_log_savepos"); DBUG_ASSERT(pos != NULL); binlog_cache_mngr *const cache_mngr= thd->binlog_setup_trx_data(); +#ifdef WITH_WSREP + DBUG_ASSERT((WSREP(thd) && wsrep_emulate_bin_log) || mysql_bin_log.is_open()); +#else DBUG_ASSERT(mysql_bin_log.is_open()); +#endif *pos= cache_mngr->trx_cache.get_byte_position(); DBUG_PRINT("return", ("*pos: %lu", (ulong) *pos)); DBUG_VOID_RETURN; @@ -1637,7 +1707,16 @@ binlog_trans_log_truncate(THD *thd, my_off_t pos) int binlog_init(void *p) { binlog_hton= (handlerton *)p; +#ifdef WITH_WSREP + if (WSREP_ON) + binlog_hton->state= SHOW_OPTION_YES; + else + { +#endif /* WITH_WSREP */ binlog_hton->state=opt_bin_log ? SHOW_OPTION_YES : SHOW_OPTION_NO; +#ifdef WITH_WSREP + } +#endif /* WITH_WSREP */ binlog_hton->db_type=DB_TYPE_BINLOG; binlog_hton->savepoint_offset= sizeof(my_off_t); binlog_hton->close_connection= binlog_close_connection; @@ -1653,15 +1732,36 @@ int binlog_init(void *p) return 0; } +#ifdef WITH_WSREP +#include "wsrep_binlog.h" +#endif /* WITH_WSREP */ static int binlog_close_connection(handlerton *hton, THD *thd) { + DBUG_ENTER("binlog_close_connection"); binlog_cache_mngr *const cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); +#ifdef WITH_WSREP + if (cache_mngr && !cache_mngr->trx_cache.empty()) { + IO_CACHE* cache= get_trans_log(thd); + uchar *buf; + size_t len=0; + wsrep_write_cache_buf(cache, &buf, &len); + WSREP_WARN("binlog trx cache not empty (%lu bytes) @ connection close %lu", + len, thd->thread_id); + if (len > 0) wsrep_dump_rbr_buf(thd, buf, len); + + cache = cache_mngr->get_binlog_cache_log(false); + wsrep_write_cache_buf(cache, &buf, &len); + WSREP_WARN("binlog stmt cache not empty (%lu bytes) @ connection close %lu", + len, thd->thread_id); + if (len > 0) wsrep_dump_rbr_buf(thd, buf, len); + } +#endif /* WITH_WSREP */ DBUG_ASSERT(cache_mngr->trx_cache.empty() && cache_mngr->stmt_cache.empty()); thd_set_ha_data(thd, binlog_hton, NULL); cache_mngr->~binlog_cache_mngr(); my_free(cache_mngr); - return 0; + DBUG_RETURN(0); } /* @@ -1757,6 +1857,14 @@ binlog_commit_flush_stmt_cache(THD *thd, bool all, binlog_cache_mngr *cache_mngr) { DBUG_ENTER("binlog_commit_flush_stmt_cache"); +#ifdef WITH_WSREP + if (thd->wsrep_mysql_replicated > 0) + { + WSREP_DEBUG("avoiding binlog_commit_flush_trx_cache: %d", thd->wsrep_mysql_replicated); + return 0; + } +#endif + Query_log_event end_evt(thd, STRING_WITH_LEN("COMMIT"), FALSE, TRUE, TRUE, 0); DBUG_RETURN(binlog_flush_cache(thd, cache_mngr, &end_evt, all, TRUE, FALSE)); @@ -1912,12 +2020,12 @@ static bool trans_cannot_safely_rollback(THD *thd, bool all) return ((thd->variables.option_bits & OPTION_KEEP_LOG) || (trans_has_updated_non_trans_table(thd) && - thd->variables.binlog_format == BINLOG_FORMAT_STMT) || + WSREP_FORMAT(thd->variables.binlog_format) == BINLOG_FORMAT_STMT) || (cache_mngr->trx_cache.changes_to_non_trans_temp_table() && - thd->variables.binlog_format == BINLOG_FORMAT_MIXED) || + WSREP_FORMAT(thd->variables.binlog_format) == BINLOG_FORMAT_MIXED) || (trans_has_updated_non_trans_table(thd) && ending_single_stmt_trans(thd,all) && - thd->variables.binlog_format == BINLOG_FORMAT_MIXED)); + WSREP_FORMAT(thd->variables.binlog_format) == BINLOG_FORMAT_MIXED)); } @@ -1939,6 +2047,9 @@ static int binlog_commit(handlerton *hton, THD *thd, bool all) DBUG_ENTER("binlog_commit"); binlog_cache_mngr *const cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); +#ifdef WITH_WSREP + if (!cache_mngr) DBUG_RETURN(0); +#endif /* WITH_WSREP */ DBUG_PRINT("debug", ("all: %d, in_transaction: %s, all.modified_non_trans_table: %s, stmt.modified_non_trans_table: %s", @@ -1995,6 +2106,9 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all) int error= 0; binlog_cache_mngr *const cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); +#ifdef WITH_WSREP + if (!cache_mngr) DBUG_RETURN(0); +#endif /* WITH_WSREP */ DBUG_PRINT("debug", ("all: %s, all.modified_non_trans_table: %s, stmt.modified_non_trans_table: %s", YESNO(all), @@ -2023,8 +2137,12 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all) cache_mngr->reset(false, true); DBUG_RETURN(error); } - +#ifdef WITH_WSREP + if (!wsrep_emulate_bin_log && + mysql_bin_log.check_write_error(thd)) +#else if (mysql_bin_log.check_write_error(thd)) +#endif { /* "all == true" means that a "rollback statement" triggered the error and @@ -2040,7 +2158,7 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all) error |= binlog_truncate_trx_cache(thd, cache_mngr, all); } else if (!error) - { + { if (ending_trans(thd, all) && trans_cannot_safely_rollback(thd, all)) error= binlog_rollback_flush_trx_cache(thd, all, cache_mngr); /* @@ -2055,9 +2173,9 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all) else if (ending_trans(thd, all) || (!(thd->variables.option_bits & OPTION_KEEP_LOG) && (!stmt_has_updated_non_trans_table(thd) || - thd->variables.binlog_format != BINLOG_FORMAT_STMT) && + WSREP_FORMAT(thd->variables.binlog_format) != BINLOG_FORMAT_STMT) && (!cache_mngr->trx_cache.changes_to_non_trans_temp_table() || - thd->variables.binlog_format != BINLOG_FORMAT_MIXED))) + WSREP_FORMAT(thd->variables.binlog_format) != BINLOG_FORMAT_MIXED))) error= binlog_truncate_trx_cache(thd, cache_mngr, all); } @@ -2164,8 +2282,15 @@ static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv) { DBUG_ENTER("binlog_savepoint_set"); int error= 1; - char buf[1024]; +#ifdef WITH_WSREP + if (wsrep_emulate_bin_log) DBUG_RETURN(0); + /* + Clear table maps before writing SAVEPOINT event. This enforces + recreation of table map events for the following row event. + */ + thd->clear_binlog_table_maps(); +#endif /* WITH_WSREP */ String log_query(buf, sizeof(buf), &my_charset_bin); if (log_query.copy(STRING_WITH_LEN("SAVEPOINT "), &my_charset_bin) || append_identifier(thd, &log_query, @@ -2202,7 +2327,12 @@ static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv) non-transactional table. Otherwise, truncate the binlog cache starting from the SAVEPOINT command. */ +#ifdef WITH_WSREP + if (!wsrep_emulate_bin_log && + unlikely(trans_has_updated_non_trans_table(thd) || +#else if (unlikely(trans_has_updated_non_trans_table(thd) || +#endif (thd->variables.option_bits & OPTION_KEEP_LOG))) { char buf[1024]; @@ -2216,7 +2346,10 @@ static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv) TRUE, FALSE, TRUE, errcode); DBUG_RETURN(mysql_bin_log.write(&qinfo)); } - binlog_trans_log_truncate(thd, *(my_off_t*)sv); +#ifdef WITH_WSREP + if (!wsrep_emulate_bin_log) +#endif + binlog_trans_log_truncate(thd, *(my_off_t*)sv); DBUG_RETURN(0); } @@ -3553,7 +3686,10 @@ bool MYSQL_BIN_LOG::open(const char *log_name, new_xid_list_entry->binlog_id= current_binlog_id; /* Remove any initial entries with no pending XIDs. */ while ((b= binlog_xid_count_list.head()) && b->xid_count == 0) + { my_free(binlog_xid_count_list.get()); + } + mysql_cond_broadcast(&COND_xid_list); binlog_xid_count_list.push_back(new_xid_list_entry); mysql_mutex_unlock(&LOCK_xid_list); @@ -4074,6 +4210,7 @@ err: DBUG_ASSERT(b->xid_count == 0); my_free(binlog_xid_count_list.get()); } + mysql_cond_broadcast(&COND_xid_list); reset_master_pending--; mysql_mutex_unlock(&LOCK_xid_list); } @@ -4084,6 +4221,26 @@ err: } +void MYSQL_BIN_LOG::wait_for_last_checkpoint_event() +{ + mysql_mutex_lock(&LOCK_xid_list); + for (;;) + { + if (binlog_xid_count_list.is_last(binlog_xid_count_list.head())) + break; + mysql_cond_wait(&COND_xid_list, &LOCK_xid_list); + } + mysql_mutex_unlock(&LOCK_xid_list); + + /* + LOCK_xid_list and LOCK_log are chained, so the LOCK_log will only be + obtained after mark_xid_done() has written the last checkpoint event. + */ + mysql_mutex_lock(&LOCK_log); + mysql_mutex_unlock(&LOCK_log); +} + + /** Delete relay log files prior to rli->group_relay_log_name (i.e. all logs which are not involved in a non-finished group @@ -5267,6 +5424,7 @@ binlog_cache_mngr *THD::binlog_setup_trx_data() DBUG_RETURN(cache_mngr); } + /* Function to start a statement and optionally a transaction for the binary log. @@ -5387,7 +5545,12 @@ int THD::binlog_write_table_map(TABLE *table, bool is_transactional, is_transactional= 1; /* Pre-conditions */ +#ifdef WITH_WSREP + DBUG_ASSERT(is_current_stmt_binlog_format_row() && + (WSREP_EMULATE_BINLOG(this) || mysql_bin_log.is_open())); +#else DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open()); +#endif DBUG_ASSERT(table->s->table_map_id != ULONG_MAX); Table_map_log_event @@ -5527,7 +5690,11 @@ MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd, bool is_transactional) { DBUG_ENTER("MYSQL_BIN_LOG::flush_and_set_pending_rows_event(event)"); +#ifdef WITH_WSREP + DBUG_ASSERT(WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()); +#else DBUG_ASSERT(mysql_bin_log.is_open()); +#endif DBUG_PRINT("enter", ("event: 0x%lx", (long) event)); int error= 0; @@ -5822,11 +5989,23 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info, my_bool *with_annotate) binlog_cache_data *cache_data= 0; bool is_trans_cache= FALSE; bool using_trans= event_info->use_trans_cache(); - bool direct= event_info->use_direct_logging(); + bool direct; ulong prev_binlog_id; DBUG_ENTER("MYSQL_BIN_LOG::write(Log_event *)"); LINT_INIT(prev_binlog_id); +#ifdef WITH_WSREP + /* + When binary logging is not enabled (--log-bin=0), wsrep-patch partially + enables it without opening the binlog file (MSQL_BIN_LOG::open(). + So, avoid writing directly to binlog file. + */ + if (wsrep_emulate_bin_log) + direct= false; + else +#endif /* WITH_WSREP */ + direct= event_info->use_direct_logging(); + if (thd->variables.option_bits & OPTION_GTID_BEGIN) { DBUG_PRINT("info", ("OPTION_GTID_BEGIN was set")); @@ -5861,7 +6040,13 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info, my_bool *with_annotate) mostly called if is_open() *was* true a few instructions before, but it could have changed since. */ +#ifdef WITH_WSREP + /* applier and replayer can skip writing binlog events */ + if ((WSREP_EMULATE_BINLOG(thd) && (thd->wsrep_exec_mode != REPL_RECV)) || + is_open()) +#else if (likely(is_open())) +#endif { my_off_t UNINIT_VAR(my_org_b_tell); #ifdef HAVE_REPLICATION @@ -6212,6 +6397,15 @@ int MYSQL_BIN_LOG::rotate(bool force_rotate, bool* check_purge) { int error= 0; DBUG_ENTER("MYSQL_BIN_LOG::rotate"); +#ifdef WITH_WSREP + if (WSREP_ON && wsrep_to_isolation) + { + *check_purge= false; + WSREP_DEBUG("avoiding binlog rotate due to TO isolation: %d", + wsrep_to_isolation); + DBUG_RETURN(0); + } +#endif //todo: fix the macro def and restore safe_mutex_assert_owner(&LOCK_log); *check_purge= false; @@ -6761,6 +6955,13 @@ MYSQL_BIN_LOG::write_transaction_to_binlog(THD *thd, Ha_trx_info *ha_info; DBUG_ENTER("MYSQL_BIN_LOG::write_transaction_to_binlog"); +#ifdef WITH_WSREP + /* + Control should not be allowed beyond this point in wsrep_emulate_bin_log + mode. + */ + if (wsrep_emulate_bin_log) DBUG_RETURN(0); +#endif /* WITH_WSREP */ entry.thd= thd; entry.cache_mngr= cache_mngr; entry.error= 0; @@ -6769,6 +6970,7 @@ MYSQL_BIN_LOG::write_transaction_to_binlog(THD *thd, entry.using_trx_cache= using_trx_cache; entry.need_unlog= false; ha_info= all ? thd->transaction.all.ha_list : thd->transaction.stmt.ha_list; + for (; ha_info; ha_info= ha_info->next()) { if (ha_info->is_started() && ha_info->ht() != binlog_hton && @@ -8374,7 +8576,7 @@ ulong tc_log_page_waits= 0; static const uchar tc_log_magic[]={(uchar) 254, 0x23, 0x05, 0x74}; -ulong opt_tc_log_size= TC_LOG_MIN_SIZE; +ulong opt_tc_log_size; ulong tc_log_max_pages_used=0, tc_log_page_size=0, tc_log_cur_pages_used=0; int TC_LOG_MMAP::open(const char *opt_name) @@ -8387,7 +8589,6 @@ int TC_LOG_MMAP::open(const char *opt_name) DBUG_ASSERT(opt_name && opt_name[0]); tc_log_page_size= my_getpagesize(); - DBUG_ASSERT(TC_LOG_PAGE_SIZE % tc_log_page_size == 0); fn_format(logname,opt_name,mysql_data_home,"",MY_UNPACK_FILENAME); if ((fd= mysql_file_open(key_file_tclog, logname, O_RDWR, MYF(0))) < 0) @@ -8726,6 +8927,7 @@ mmap_do_checkpoint_callback(void *data) int TC_LOG_MMAP::unlog(ulong cookie, my_xid xid) { pending_cookies *full_buffer= NULL; + uint32 ncookies= tc_log_page_size / sizeof(my_xid); DBUG_ASSERT(*(my_xid *)(data+cookie) == xid); /* @@ -8739,7 +8941,7 @@ int TC_LOG_MMAP::unlog(ulong cookie, my_xid xid) mysql_mutex_lock(&LOCK_pending_checkpoint); if (pending_checkpoint == NULL) { - uint32 size= sizeof(*pending_checkpoint); + uint32 size= sizeof(*pending_checkpoint) + sizeof(ulong) * (ncookies - 1); if (!(pending_checkpoint= (pending_cookies *)my_malloc(size, MYF(MY_ZEROFILL)))) { @@ -8750,8 +8952,7 @@ int TC_LOG_MMAP::unlog(ulong cookie, my_xid xid) } pending_checkpoint->cookies[pending_checkpoint->count++]= cookie; - if (pending_checkpoint->count == sizeof(pending_checkpoint->cookies) / - sizeof(pending_checkpoint->cookies[0])) + if (pending_checkpoint->count == ncookies) { full_buffer= pending_checkpoint; pending_checkpoint= NULL; @@ -8785,7 +8986,7 @@ TC_LOG_MMAP::commit_checkpoint_notify(void *cookie) if (count == 0) { uint i; - for (i= 0; i < sizeof(pending->cookies)/sizeof(pending->cookies[0]); ++i) + for (i= 0; i < tc_log_page_size / sizeof(my_xid); ++i) delete_entry(pending->cookies[i]); my_free(pending); } @@ -8990,7 +9191,14 @@ TC_LOG_BINLOG::log_and_order(THD *thd, my_xid xid, bool all, binlog_cache_mngr *cache_mngr= thd->binlog_setup_trx_data(); if (!cache_mngr) +#ifdef WITH_WSREP + { + WSREP_DEBUG("Skipping empty log_xid: %s", thd->query()); + DBUG_RETURN(0); + } +#else DBUG_RETURN(0); +#endif /* WITH_WSREP */ cache_mngr->using_xa= TRUE; cache_mngr->xa_xid= xid; @@ -9092,7 +9300,7 @@ TC_LOG_BINLOG::mark_xid_done(ulong binlog_id, bool write_checkpoint) */ if (unlikely(reset_master_pending)) { - mysql_cond_signal(&COND_xid_list); + mysql_cond_broadcast(&COND_xid_list); mysql_mutex_unlock(&LOCK_xid_list); DBUG_VOID_RETURN; } @@ -9130,8 +9338,7 @@ TC_LOG_BINLOG::mark_xid_done(ulong binlog_id, bool write_checkpoint) mysql_mutex_lock(&LOCK_log); mysql_mutex_lock(&LOCK_xid_list); --mark_xid_done_waiting; - if (unlikely(reset_master_pending)) - mysql_cond_signal(&COND_xid_list); + mysql_cond_broadcast(&COND_xid_list); /* We need to reload current_binlog_id due to release/re-take of lock. */ current= current_binlog_id; diff --git a/sql/log.h b/sql/log.h index 1c5e09c5c09..9eb9f88031d 100644 --- a/sql/log.h +++ b/sql/log.h @@ -105,7 +105,7 @@ public: int log_and_order(THD *thd, my_xid xid, bool all, bool need_prepare_ordered, bool need_commit_ordered) { - DBUG_ASSERT(0 /* Internal error - TC_LOG_DUMMY::log_and_order() called */); + //DBUG_ASSERT(0 /* Internal error - TC_LOG_DUMMY::log_and_order() called */); return 1; } int unlog(ulong cookie, my_xid xid) { return 0; } @@ -113,7 +113,6 @@ public: }; #define TC_LOG_PAGE_SIZE 8192 -#define TC_LOG_MIN_SIZE (3*TC_LOG_PAGE_SIZE) #ifdef HAVE_MMAP class TC_LOG_MMAP: public TC_LOG @@ -128,7 +127,7 @@ class TC_LOG_MMAP: public TC_LOG struct pending_cookies { uint count; uint pending_count; - ulong cookies[TC_LOG_PAGE_SIZE/sizeof(my_xid)]; + ulong cookies[1]; }; private: @@ -775,6 +774,7 @@ public: bool need_mutex); bool reset_logs(THD* thd, bool create_new_log, rpl_gtid *init_state, uint32 init_state_len); + void wait_for_last_checkpoint_event(); void close(uint exiting); void clear_inuse_flag_when_closing(File file); @@ -978,12 +978,29 @@ public: }; enum enum_binlog_format { + /* + statement-based except for cases where only row-based can work (UUID() + etc): + */ BINLOG_FORMAT_MIXED= 0, ///< statement if safe, otherwise row - autodetected BINLOG_FORMAT_STMT= 1, ///< statement-based BINLOG_FORMAT_ROW= 2, ///< row-based BINLOG_FORMAT_UNSPEC=3 ///< thd_binlog_format() returns it when binlog is closed }; +#ifdef WITH_WSREP +IO_CACHE * get_trans_log(THD * thd); +bool wsrep_trans_cache_is_empty(THD *thd); +void thd_binlog_flush_pending_rows_event(THD *thd, bool stmt_end); +void thd_binlog_trx_reset(THD * thd); +void thd_binlog_rollback_stmt(THD * thd); + +#define WSREP_FORMAT(my_format) \ + ((wsrep_forced_binlog_format != BINLOG_FORMAT_UNSPEC) ? \ + wsrep_forced_binlog_format : my_format) +#else +#define WSREP_FORMAT(my_format) my_format +#endif int query_error_code(THD *thd, bool not_killed); uint purge_log_get_error_code(int res); diff --git a/sql/log_event.cc b/sql/log_event.cc index 43e8df7b801..1ae65a02f15 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -44,6 +44,9 @@ #include <strfunc.h> #include "compat56.h" +#if WITH_WSREP +#include "wsrep_mysqld.h" +#endif #endif /* MYSQL_CLIENT */ #include <base64.h> @@ -3134,7 +3137,14 @@ Query_log_event::Query_log_event(THD* thd_arg, const char* query_arg, master_data_written(0) { time_t end_time; - +#ifdef WITH_WSREP + /* + If Query_log_event will contain non trans keyword (not BEGIN, COMMIT, + SAVEPOINT or ROLLBACK) we disable PA for this transaction. + */ + if (!is_trans_keyword()) + thd->wsrep_PA_safe= false; +#endif /* WITH_WSREP */ memset(&user, 0, sizeof(user)); memset(&host, 0, sizeof(host)); @@ -4113,7 +4123,11 @@ int Query_log_event::do_apply_event(rpl_group_info *rgi, uint64 sub_id= 0; rpl_gtid gtid; Relay_log_info const *rli= rgi->rli; +#ifdef WITH_WSREP + Rpl_filter *rpl_filter= (rli->mi) ? rli->mi->rpl_filter: NULL; +#else Rpl_filter *rpl_filter= rli->mi->rpl_filter; +#endif /* WITH_WSREP */ bool current_stmt_is_commit; DBUG_ENTER("Query_log_event::do_apply_event"); @@ -4602,6 +4616,21 @@ Query_log_event::do_shall_skip(rpl_group_info *rgi) DBUG_RETURN(Log_event::EVENT_SKIP_COUNT); } } +#ifdef WITH_WSREP + else if (wsrep_mysql_replication_bundle && WSREP_ON && thd->wsrep_mysql_replicated > 0 && + (!strncasecmp(query , "BEGIN", 5) || !strncasecmp(query , "COMMIT", 6))) + { + if (++thd->wsrep_mysql_replicated < (int)wsrep_mysql_replication_bundle) + { + WSREP_DEBUG("skipping wsrep commit %d", thd->wsrep_mysql_replicated); + DBUG_RETURN(Log_event::EVENT_SKIP_IGNORE); + } + else + { + thd->wsrep_mysql_replicated = 0; + } + } +#endif DBUG_RETURN(Log_event::do_shall_skip(rgi)); } @@ -7395,6 +7424,10 @@ int Xid_log_event::do_apply_event(rpl_group_info *rgi) Record any GTID in the same transaction, so slave state is transactionally consistent. */ + + /*Set wsrep_affected_rows = 0 */ + thd->wsrep_affected_rows= 0; + if (rgi->gtid_pending) { sub_id= rgi->gtid_sub_id; @@ -7455,6 +7488,20 @@ Xid_log_event::do_shall_skip(rpl_group_info *rgi) thd->variables.option_bits&= ~(OPTION_BEGIN | OPTION_GTID_BEGIN); DBUG_RETURN(Log_event::EVENT_SKIP_COUNT); } +#ifdef WITH_WSREP + else if (wsrep_mysql_replication_bundle && WSREP_ON) + { + if (++thd->wsrep_mysql_replicated < (int)wsrep_mysql_replication_bundle) + { + WSREP_DEBUG("skipping wsrep commit %d", thd->wsrep_mysql_replicated); + DBUG_RETURN(Log_event::EVENT_SKIP_IGNORE); + } + else + { + thd->wsrep_mysql_replicated = 0; + } + } +#endif DBUG_RETURN(Log_event::do_shall_skip(rgi)); } #endif /* !MYSQL_CLIENT */ @@ -8477,6 +8524,14 @@ err: end_io_cache(&file); if (fd >= 0) mysql_file_close(fd, MYF(0)); +#ifdef WITH_WSREP + if (WSREP(thd)) + thd_proc_info(thd, "exit Create_file_log_event::do_apply_event()"); + else + thd_proc_info(thd, 0); +#else /* WITH_WSREP */ + thd_proc_info(thd, 0); +#endif /* WITH_WSREP */ return error != 0; } #endif /* defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) */ @@ -8648,6 +8703,14 @@ int Append_block_log_event::do_apply_event(rpl_group_info *rgi) err: if (fd >= 0) mysql_file_close(fd, MYF(0)); +#ifdef WITH_WSREP + if (WSREP(thd)) + thd_proc_info(thd, "exit Append_block_log_event::do_apply_event()"); + else + thd_proc_info(thd, 0); +#else /* WITH_WSREP */ + thd_proc_info(thd, 0); +#endif /* WITH_WSREP */ DBUG_RETURN(error); } #endif @@ -9734,6 +9797,20 @@ int Rows_log_event::do_apply_event(rpl_group_info *rgi) if (open_and_lock_tables(thd, rgi->tables_to_lock, FALSE, 0)) { uint actual_error= thd->get_stmt_da()->sql_errno(); + +#ifdef WITH_WSREP + if (WSREP(thd)) + { + WSREP_WARN("BF applier failed to open_and_lock_tables: %u, fatal: %d " + "wsrep = (exec_mode: %d conflict_state: %d seqno: %lld)", + thd->get_stmt_da()->sql_errno(), + thd->is_fatal_error, + thd->wsrep_exec_mode, + thd->wsrep_conflict_state, + (long long)wsrep_thd_trx_seqno(thd)); + } +#endif + if ((thd->is_slave_error || thd->is_fatal_error) && !is_parallel_retry_error(rgi, actual_error)) { @@ -9868,7 +9945,18 @@ int Rows_log_event::do_apply_event(rpl_group_info *rgi) } #ifdef HAVE_QUERY_CACHE +#ifdef WITH_WSREP + /* + Moved invalidation right before the call to rows_event_stmt_cleanup(), + to avoid query cache being polluted with stale entries. + */ + if (! (WSREP(thd) && (thd->wsrep_exec_mode == REPL_RECV))) + { +#endif /* WITH_WSREP */ query_cache.invalidate_locked_for_write(thd, rgi->tables_to_lock); +#ifdef WITH_WSREP + } +#endif /* WITH_WSREP */ #endif } @@ -10056,6 +10144,14 @@ int Rows_log_event::do_apply_event(rpl_group_info *rgi) /* remove trigger's tables */ if (slave_run_triggers_for_rbr) restore_empty_query_table_list(thd->lex); + +#if defined(WITH_WSREP) && defined(HAVE_QUERY_CACHE) + if (WSREP(thd) && thd->wsrep_exec_mode == REPL_RECV) + { + query_cache.invalidate_locked_for_write(thd, rgi->tables_to_lock); + } +#endif /* WITH_WSREP && HAVE_QUERY_CACHE */ + if (get_flags(STMT_END_F) && (error= rows_event_stmt_cleanup(rgi, thd))) slave_rows_error_report(ERROR_LEVEL, thd->is_error() ? 0 : error, @@ -10913,7 +11009,12 @@ check_table_map(rpl_group_info *rgi, RPL_TABLE_LIST *table_list) enum_tbl_map_status res= OK_TO_PROCESS; Relay_log_info *rli= rgi->rli; +#ifdef WITH_WSREP + if ((rgi->thd->slave_thread /* filtering is for slave only */ || + (WSREP(rgi->thd) && rgi->thd->wsrep_applier)) && +#else if (rgi->thd->slave_thread /* filtering is for slave only */ && +#endif /* WITH_WSREP */ (!rli->mi->rpl_filter->db_ok(table_list->db) || (rli->mi->rpl_filter->is_on() && !rli->mi->rpl_filter->tables_ok("", table_list)))) res= FILTERED_OUT; @@ -11670,8 +11771,23 @@ int Write_rows_log_event::do_exec_row(rpl_group_info *rgi) { DBUG_ASSERT(m_table != NULL); +#ifdef WITH_WSREP +#ifdef WSREP_PROC_INFO + char info[64]; + info[sizeof(info) - 1] = '\0'; + snprintf(info, sizeof(info) - 1, "Write_rows_log_event::write_row(%lld)", + (long long) wsrep_thd_trx_seqno(thd)); + const char* tmp = (WSREP(thd)) ? thd_proc_info(thd, info) : NULL; +#else + const char* tmp = (WSREP(thd)) ? + thd_proc_info(thd,"Write_rows_log_event::write_row()") : NULL; +#endif /* WSREP_PROC_INFO */ +#endif /* WITH_WSREP */ int error= write_row(rgi, slave_exec_mode == SLAVE_EXEC_MODE_IDEMPOTENT); +#ifdef WITH_WSREP + if (WSREP(thd)) thd_proc_info(thd, tmp); +#endif /* WITH_WSREP */ if (error && !thd->is_error()) { DBUG_ASSERT(0); @@ -12354,11 +12470,33 @@ int Delete_rows_log_event::do_exec_row(rpl_group_info *rgi) slave_run_triggers_for_rbr && !master_had_triggers && m_table->triggers; DBUG_ASSERT(m_table != NULL); +#ifdef WITH_WSREP +#ifdef WSREP_PROC_INFO + char info[64]; + info[sizeof(info) - 1] = '\0'; + snprintf(info, sizeof(info) - 1, "Delete_rows_log_event::find_row(%lld)", + (long long) wsrep_thd_trx_seqno(thd)); + const char* tmp = (WSREP(thd)) ? thd_proc_info(thd, info) : NULL; +#else + const char* tmp = (WSREP(thd)) ? + thd_proc_info(thd,"Delete_rows_log_event::find_row()") : NULL; +#endif /* WSREP_PROC_INFO */ +#endif /* WITH_WSREP */ if (!(error= find_row(rgi))) { /* Delete the record found, located in record[0] */ +#ifdef WITH_WSREP +#ifdef WSREP_PROC_INFO + snprintf(info, sizeof(info) - 1, + "Delete_rows_log_event::ha_delete_row(%lld)", + (long long) wsrep_thd_trx_seqno(thd)); + if (WSREP(thd)) thd_proc_info(thd, info); +#else + if (WSREP(thd)) thd_proc_info(thd,"Delete_rows_log_event::ha_delete_row()"); +#endif /* WSREP_PROC_INFO */ +#endif /* WITH_WSREP */ if (invoke_triggers && process_triggers(TRG_EVENT_DELETE, TRG_ACTION_BEFORE, FALSE)) error= HA_ERR_GENERIC; // in case if error is not set yet @@ -12369,6 +12507,9 @@ int Delete_rows_log_event::do_exec_row(rpl_group_info *rgi) error= HA_ERR_GENERIC; // in case if error is not set yet m_table->file->ha_index_or_rnd_end(); } +#ifdef WITH_WSREP + if (WSREP(thd)) thd_proc_info(thd, tmp); +#endif /* WITH_WSREP */ return error; } @@ -12498,6 +12639,18 @@ Update_rows_log_event::do_exec_row(rpl_group_info *rgi) slave_run_triggers_for_rbr && !master_had_triggers && m_table->triggers; DBUG_ASSERT(m_table != NULL); +#ifdef WITH_WSREP +#ifdef WSREP_PROC_INFO + char info[64]; + info[sizeof(info) - 1] = '\0'; + snprintf(info, sizeof(info) - 1, "Update_rows_log_event::find_row(%lld)", + (long long) wsrep_thd_trx_seqno(thd)); + const char* tmp = (WSREP(thd)) ? thd_proc_info(thd, info) : NULL; +#else + const char* tmp = (WSREP(thd)) ? + thd_proc_info(thd,"Update_rows_log_event::find_row()") : NULL; +#endif /* WSREP_PROC_INFO */ +#endif /* WITH_WSREP */ int error= find_row(rgi); if (error) { @@ -12524,6 +12677,17 @@ Update_rows_log_event::do_exec_row(rpl_group_info *rgi) store_record(m_table,record[1]); m_curr_row= m_curr_row_end; +#ifdef WITH_WSREP +#ifdef WSREP_PROC_INFO + snprintf(info, sizeof(info) - 1, + "Update_rows_log_event::unpack_current_row(%lld)", + (long long) wsrep_thd_trx_seqno(thd)); + if (WSREP(thd)) thd_proc_info(thd, info); +#else + if (WSREP(thd)) + thd_proc_info(thd,"Update_rows_log_event::unpack_current_row()"); +#endif /* WSREP_PROC_INFO */ +#endif /* WITH_WSREP */ /* this also updates m_curr_row_end */ if ((error= unpack_current_row(rgi))) goto err; @@ -12542,6 +12706,17 @@ Update_rows_log_event::do_exec_row(rpl_group_info *rgi) DBUG_DUMP("new values", m_table->record[0], m_table->s->reclength); #endif +#ifdef WITH_WSREP +#ifdef WSREP_PROC_INFO + snprintf(info, sizeof(info) - 1, + "Update_rows_log_event::ha_update_row(%lld)", + (long long) wsrep_thd_trx_seqno(thd)); + if (WSREP(thd)) thd_proc_info(thd, info); +#else + if (WSREP(thd)) thd_proc_info(thd,"Update_rows_log_event::ha_update_row()"); +#endif /* WSREP_PROC_INFO */ +#endif /* WITH_WSREP */ + if (invoke_triggers && process_triggers(TRG_EVENT_UPDATE, TRG_ACTION_BEFORE, TRUE)) { @@ -12557,6 +12732,9 @@ Update_rows_log_event::do_exec_row(rpl_group_info *rgi) process_triggers(TRG_EVENT_UPDATE, TRG_ACTION_AFTER, TRUE)) error= HA_ERR_GENERIC; // in case if error is not set yet +#ifdef WITH_WSREP + if (WSREP(thd)) thd_proc_info(thd, tmp); +#endif /* WITH_WSREP */ err: m_table->file->ha_index_or_rnd_end(); return error; @@ -12657,6 +12835,47 @@ void Incident_log_event::pack_info(THD *thd, Protocol *protocol) protocol->store(buf, bytes, &my_charset_bin); } #endif +#if WITH_WSREP && !defined(MYSQL_CLIENT) +/* + read the first event from (*buf). The size of the (*buf) is (*buf_len). + At the end (*buf) is shitfed to point to the following event or NULL and + (*buf_len) will be changed to account just being read bytes of the 1st event. +*/ +#define WSREP_MAX_ALLOWED_PACKET 1024*1024*1024 // current protocol max + +Log_event* wsrep_read_log_event( + char **arg_buf, size_t *arg_buf_len, + const Format_description_log_event *description_event) +{ + DBUG_ENTER("wsrep_read_log_event"); + char *head= (*arg_buf); + + uint data_len = uint4korr(head + EVENT_LEN_OFFSET); + char *buf= (*arg_buf); + const char *error= 0; + Log_event *res= 0; + + if (data_len > WSREP_MAX_ALLOWED_PACKET) + { + error = "Event too big"; + goto err; + } + + res= Log_event::read_log_event(buf, data_len, &error, description_event, false); + +err: + if (!res) + { + DBUG_ASSERT(error != 0); + sql_print_error("Error in Log_event::read_log_event(): " + "'%s', data_len: %d, event_type: %d", + error,data_len,head[EVENT_TYPE_OFFSET]); + } + (*arg_buf)+= data_len; + (*arg_buf_len)-= data_len; + DBUG_RETURN(res); +} +#endif #ifdef MYSQL_CLIENT diff --git a/sql/mdl.cc b/sql/mdl.cc index 57d5d8e7283..0f1c961aa29 100644 --- a/sql/mdl.cc +++ b/sql/mdl.cc @@ -24,6 +24,20 @@ #include <mysql/service_thd_wait.h> #include <mysql/psi/mysql_stage.h> +#ifdef WITH_WSREP +#include "debug_sync.h" +#include "wsrep_mysqld.h" +#include "wsrep_thd.h" +extern "C" my_thread_id wsrep_thd_thread_id(THD *thd); +extern "C" char *wsrep_thd_query(THD *thd); +void sql_print_information(const char *format, ...) + ATTRIBUTE_FORMAT(printf, 1, 2); + +extern bool +wsrep_grant_mdl_exception(MDL_context *requestor_ctx, + MDL_ticket *ticket, + const MDL_key *key); +#endif /* WITH_WSREP */ #ifdef HAVE_PSI_INTERFACE static PSI_mutex_key key_MDL_map_mutex; static PSI_mutex_key key_MDL_wait_LOCK_wait_status; @@ -1435,6 +1449,22 @@ MDL_wait::timed_wait(MDL_context_owner *owner, struct timespec *abs_timeout, while (!m_wait_status && !owner->is_killed() && wait_result != ETIMEDOUT && wait_result != ETIME) { +#ifdef WITH_WSREP + // Allow tests to block the applier thread using the DBUG facilities + DBUG_EXECUTE_IF("sync.wsrep_before_mdl_wait", + { + const char act[]= + "now " + "wait_for signal.wsrep_before_mdl_wait"; + DBUG_ASSERT(!debug_sync_set_action((owner->get_thd()), + STRING_WITH_LEN(act))); + };); + if (wsrep_thd_is_BF(owner->get_thd(), false)) + { + wait_result= mysql_cond_wait(&m_COND_wait_status, &m_LOCK_wait_status); + } + else +#endif /* WITH_WSREP */ wait_result= mysql_cond_timedwait(&m_COND_wait_status, &m_LOCK_wait_status, abs_timeout); } @@ -1501,11 +1531,58 @@ void MDL_lock::Ticket_list::add_ticket(MDL_ticket *ticket) called by other threads. */ DBUG_ASSERT(ticket->get_lock()); +#ifdef WITH_WSREP + if ((this == &(ticket->get_lock()->m_waiting)) && + wsrep_thd_is_BF((void *)(ticket->get_ctx()->get_thd()), false)) + { + Ticket_iterator itw(ticket->get_lock()->m_waiting); + Ticket_iterator itg(ticket->get_lock()->m_granted); + + MDL_ticket *waiting, *granted; + MDL_ticket *prev=NULL; + bool added= false; + + while ((waiting= itw++) && !added) + { + if (!wsrep_thd_is_BF((void *)(waiting->get_ctx()->get_thd()), true)) + { + WSREP_DEBUG("MDL add_ticket inserted before: %lu %s", + wsrep_thd_thread_id(waiting->get_ctx()->get_thd()), + wsrep_thd_query(waiting->get_ctx()->get_thd())); + /* Insert the ticket before the first non-BF waiting thd. */ + m_list.insert_after(prev, ticket); + added= true; + } + prev= waiting; + } + + /* Otherwise, insert the ticket at the back of the waiting list. */ + if (!added) m_list.push_back(ticket); + + while ((granted= itg++)) + { + if (granted->get_ctx() != ticket->get_ctx() && + granted->is_incompatible_when_granted(ticket->get_type())) + { + if (!wsrep_grant_mdl_exception(ticket->get_ctx(), granted, + &ticket->get_lock()->key)) + { + WSREP_DEBUG("MDL victim killed at add_ticket"); + } + } + } + } + else + { +#endif /* WITH_WSREP */ /* Add ticket to the *back* of the queue to ensure fairness among requests with the same priority. */ m_list.push_back(ticket); +#ifdef WITH_WSREP + } +#endif /* WITH_WSREP */ m_bitmap|= MDL_BIT(ticket->get_type()); } @@ -1821,7 +1898,6 @@ MDL_object_lock::m_waiting_incompatible[MDL_TYPE_END] = 0 }; - /** Check if request for the metadata lock can be satisfied given its current state. @@ -1846,6 +1922,9 @@ MDL_lock::can_grant_lock(enum_mdl_type type_arg, bool can_grant= FALSE; bitmap_t waiting_incompat_map= incompatible_waiting_types_bitmap()[type_arg]; bitmap_t granted_incompat_map= incompatible_granted_types_bitmap()[type_arg]; +#ifdef WITH_WSREP + bool wsrep_can_grant= TRUE; +#endif /* WITH_WSREP */ /* New lock request can be satisfied iff: @@ -1868,12 +1947,59 @@ MDL_lock::can_grant_lock(enum_mdl_type type_arg, { if (ticket->get_ctx() != requestor_ctx && ticket->is_incompatible_when_granted(type_arg)) +#ifdef WITH_WSREP + { + if (wsrep_thd_is_BF((void *)(requestor_ctx->get_thd()),false) && + key.mdl_namespace() == MDL_key::GLOBAL) + { + WSREP_DEBUG("global lock granted for BF: %lu %s", + wsrep_thd_thread_id(requestor_ctx->get_thd()), + wsrep_thd_query(requestor_ctx->get_thd())); + can_grant = true; + } + else if (!wsrep_grant_mdl_exception(requestor_ctx, ticket, &key)) + { + wsrep_can_grant= FALSE; + if (wsrep_log_conflicts) + { + MDL_lock * lock = ticket->get_lock(); + WSREP_INFO( + "MDL conflict db=%s table=%s ticket=%d solved by %s", + lock->key.db_name(), lock->key.name(), ticket->get_type(), "abort" + ); + } + } + else + { + can_grant= TRUE; + } + } +#else break; +#endif /* WITH_WSREP */ } +#ifdef WITH_WSREP + if ((ticket == NULL) && wsrep_can_grant) +#else if (ticket == NULL) /* Incompatible locks are our own. */ +#endif /* WITH_WSREP */ + can_grant= TRUE; } } +#ifdef WITH_WSREP + else + { + if (wsrep_thd_is_BF((void *)(requestor_ctx->get_thd()), false) && + key.mdl_namespace() == MDL_key::GLOBAL) + { + WSREP_DEBUG("global lock granted for BF (waiting queue): %lu %s", + wsrep_thd_thread_id(requestor_ctx->get_thd()), + wsrep_thd_query(requestor_ctx->get_thd())); + can_grant = true; + } + } +#endif /* WITH_WSREP */ return can_grant; } @@ -2980,7 +3106,12 @@ void MDL_context::release_locks_stored_before(enum_mdl_duration duration, DBUG_VOID_RETURN; } - +#ifdef WITH_WSREP +void MDL_context::release_explicit_locks() +{ + release_locks_stored_before(MDL_EXPLICIT, NULL); +} +#endif /** Release all explicit locks in the context which correspond to the same name/object as this lock request. @@ -3288,3 +3419,49 @@ void MDL_context::set_transaction_duration_for_all_locks() ticket->m_duration= MDL_TRANSACTION; #endif } +#ifdef WITH_WSREP +void MDL_ticket::wsrep_report(bool debug) +{ + if (debug) + { + const PSI_stage_info *psi_stage = m_lock->key.get_wait_state_name(); + + WSREP_DEBUG("MDL ticket: type: %s space: %s db: %s name: %s (%s)", + (get_type() == MDL_INTENTION_EXCLUSIVE) ? "intention exclusive" : + ((get_type() == MDL_SHARED) ? "shared" : + ((get_type() == MDL_SHARED_HIGH_PRIO ? "shared high prio" : + ((get_type() == MDL_SHARED_READ) ? "shared read" : + ((get_type() == MDL_SHARED_WRITE) ? "shared write" : + ((get_type() == MDL_SHARED_NO_WRITE) ? "shared no write" : + ((get_type() == MDL_SHARED_NO_READ_WRITE) ? "shared no read write" : + ((get_type() == MDL_EXCLUSIVE) ? "exclusive" : + "UNKNOWN")))))))), + (m_lock->key.mdl_namespace() == MDL_key::GLOBAL) ? "GLOBAL" : + ((m_lock->key.mdl_namespace() == MDL_key::SCHEMA) ? "SCHEMA" : + ((m_lock->key.mdl_namespace() == MDL_key::TABLE) ? "TABLE" : + ((m_lock->key.mdl_namespace() == MDL_key::TABLE) ? "FUNCTION" : + ((m_lock->key.mdl_namespace() == MDL_key::TABLE) ? "PROCEDURE" : + ((m_lock->key.mdl_namespace() == MDL_key::TABLE) ? "TRIGGER" : + ((m_lock->key.mdl_namespace() == MDL_key::TABLE) ? "EVENT" : + ((m_lock->key.mdl_namespace() == MDL_key::COMMIT) ? "COMMIT" : + (char *)"UNKNOWN"))))))), + m_lock->key.db_name(), + m_lock->key.name(), + psi_stage->m_name); + } +} +bool MDL_context::wsrep_has_explicit_locks() +{ + MDL_ticket *ticket = NULL; + + Ticket_iterator it(m_tickets[MDL_EXPLICIT]); + + while ((ticket = it++)) + { + return true; + } + + return false; +} + +#endif /* WITH_WSREP */ diff --git a/sql/mdl.h b/sql/mdl.h index 13de60284da..86f681c90f0 100644 --- a/sql/mdl.h +++ b/sql/mdl.h @@ -457,6 +457,7 @@ public: MDL_key key; public: + static void *operator new(size_t size, MEM_ROOT *mem_root) throw () { return alloc_root(mem_root, size); } static void operator delete(void *ptr, MEM_ROOT *mem_root) {} @@ -586,6 +587,9 @@ public: MDL_ticket *next_in_lock; MDL_ticket **prev_in_lock; public: +#ifdef WITH_WSREP + void wsrep_report(bool debug); +#endif /* WITH_WSREP */ bool has_pending_conflicting_lock() const; MDL_context *get_ctx() const { return m_ctx; } @@ -774,6 +778,13 @@ public: m_tickets[MDL_EXPLICIT].is_empty()); } +#ifdef WITH_WSREP + inline bool has_transactional_locks() const + { + return !m_tickets[MDL_TRANSACTION].is_empty(); + } +#endif /* WITH_WSREP */ + MDL_savepoint mdl_savepoint() { return MDL_savepoint(m_tickets[MDL_STATEMENT].front(), @@ -786,6 +797,9 @@ public: void release_statement_locks(); void release_transactional_locks(); +#ifdef WITH_WSREP + void release_explicit_locks(); +#endif void rollback_to_savepoint(const MDL_savepoint &mdl_savepoint); MDL_context_owner *get_owner() { return m_owner; } @@ -918,6 +932,9 @@ private: MDL_ticket **out_ticket); public: +#ifdef WITH_WSREP + bool wsrep_has_explicit_locks(); +#endif /* WITH_WSREP */ THD *get_thd() const { return m_owner->get_thd(); } void find_deadlock(); diff --git a/sql/mysqld.cc b/sql/mysqld.cc index d9320fa3bcf..45e6b3666bf 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -71,6 +71,13 @@ #include "scheduler.h" #include <waiting_threads.h> #include "debug_sync.h" +#ifdef WITH_WSREP +#include "wsrep_mysqld.h" +#include "wsrep_var.h" +#include "wsrep_thd.h" +#include "wsrep_sst.h" +ulong wsrep_running_threads = 0; // # of currently running wsrep threads +#endif #include "sql_callback.h" #include "threadpool.h" @@ -357,7 +364,11 @@ static char *default_character_set_name; static char *character_set_filesystem_name; static char *lc_messages; static char *lc_time_names_name; +#ifndef WITH_WSREP static char *my_bind_addr_str; +#else +char *my_bind_addr_str; +#endif /* WITH_WSREP */ static char *default_collation_name; char *default_storage_engine; static char compiled_default_collation_name[]= MYSQL_DEFAULT_COLLATION_NAME; @@ -370,6 +381,10 @@ static DYNAMIC_ARRAY all_options; /* Global variables */ +#ifdef WITH_WSREP +ulong my_bind_addr; +bool wsrep_new_cluster= false; +#endif /* WITH_WSREP */ bool opt_bin_log, opt_bin_log_used=0, opt_ignore_builtin_innodb= 0; my_bool opt_log, opt_slow_log, debug_assert_if_crashed_table= 0, opt_help= 0; static my_bool opt_abort; @@ -463,6 +478,10 @@ ulong opt_binlog_rows_event_max_size; my_bool opt_master_verify_checksum= 0; my_bool opt_slave_sql_verify_checksum= 1; const char *binlog_format_names[]= {"MIXED", "STATEMENT", "ROW", NullS}; +#ifdef WITH_WSREP +const char *wsrep_binlog_format_names[]= + {"MIXED", "STATEMENT", "ROW", "NONE", NullS}; +#endif /* WITH_WSREP */ volatile sig_atomic_t calling_initgroups= 0; /**< Used in SIGSEGV handler. */ uint mysqld_port, test_flags, select_errors, dropping_tables, ha_open_options; uint mysqld_extra_port; @@ -733,6 +752,23 @@ mysql_cond_t COND_server_started; int mysqld_server_started=0, mysqld_server_initialized= 0; File_parser_dummy_hook file_parser_dummy_hook; +#ifdef WITH_WSREP +mysql_mutex_t LOCK_wsrep_ready; +mysql_cond_t COND_wsrep_ready; +mysql_mutex_t LOCK_wsrep_sst; +mysql_cond_t COND_wsrep_sst; +mysql_mutex_t LOCK_wsrep_sst_init; +mysql_cond_t COND_wsrep_sst_init; +mysql_mutex_t LOCK_wsrep_rollback; +mysql_cond_t COND_wsrep_rollback; +wsrep_aborting_thd_t wsrep_aborting_thd= NULL; +mysql_mutex_t LOCK_wsrep_replaying; +mysql_cond_t COND_wsrep_replaying; +mysql_mutex_t LOCK_wsrep_slave_threads; +mysql_mutex_t LOCK_wsrep_desync; +int wsrep_replaying= 0; +static void wsrep_close_threads(THD* thd); +#endif /* WITH_WSREP */ /* replication parameters, if master_host is not NULL, we are a slave */ uint report_port= 0; @@ -872,6 +908,12 @@ PSI_mutex_key key_BINLOG_LOCK_index, key_BINLOG_LOCK_xid_list, key_LOCK_error_messages, key_LOG_INFO_lock, key_LOCK_thread_count, key_LOCK_thread_cache, key_PARTITION_LOCK_auto_inc; +#ifdef WITH_WSREP +PSI_mutex_key key_LOCK_wsrep_rollback, key_LOCK_wsrep_thd, + key_LOCK_wsrep_replaying, key_LOCK_wsrep_ready, key_LOCK_wsrep_sst, + key_LOCK_wsrep_sst_thread, key_LOCK_wsrep_sst_init, + key_LOCK_wsrep_slave_threads, key_LOCK_wsrep_desync; +#endif PSI_mutex_key key_RELAYLOG_LOCK_index; PSI_mutex_key key_LOCK_slave_state, key_LOCK_binlog_state, key_LOCK_rpl_thread, key_LOCK_rpl_thread_pool, key_LOCK_parallel_entry; @@ -950,6 +992,18 @@ static PSI_mutex_info all_server_mutexes[]= { &key_LOCK_commit_ordered, "LOCK_commit_ordered", PSI_FLAG_GLOBAL}, { &key_LOCK_slave_init, "LOCK_slave_init", PSI_FLAG_GLOBAL}, { &key_LOG_INFO_lock, "LOG_INFO::lock", 0}, +#ifdef WITH_WSREP + { &key_LOCK_wsrep_ready, "LOCK_wsrep_ready", PSI_FLAG_GLOBAL}, + { &key_LOCK_wsrep_sst, "LOCK_wsrep_sst", PSI_FLAG_GLOBAL}, + { &key_LOCK_wsrep_sst_thread, "wsrep_sst_thread", 0}, + { &key_LOCK_wsrep_sst_init, "LOCK_wsrep_sst_init", PSI_FLAG_GLOBAL}, + { &key_LOCK_wsrep_sst, "LOCK_wsrep_sst", PSI_FLAG_GLOBAL}, + { &key_LOCK_wsrep_rollback, "LOCK_wsrep_rollback", PSI_FLAG_GLOBAL}, + { &key_LOCK_wsrep_thd, "THD::LOCK_wsrep_thd", 0}, + { &key_LOCK_wsrep_replaying, "LOCK_wsrep_replaying", PSI_FLAG_GLOBAL}, + { &key_LOCK_wsrep_slave_threads, "LOCK_wsrep_slave_threads", PSI_FLAG_GLOBAL}, + { &key_LOCK_wsrep_desync, "LOCK_wsrep_desync", PSI_FLAG_GLOBAL}, +#endif { &key_LOCK_thread_count, "LOCK_thread_count", PSI_FLAG_GLOBAL}, { &key_LOCK_thread_cache, "LOCK_thread_cache", PSI_FLAG_GLOBAL}, { &key_PARTITION_LOCK_auto_inc, "HA_DATA_PARTITION::LOCK_auto_inc", 0}, @@ -996,6 +1050,11 @@ PSI_cond_key key_BINLOG_COND_xid_list, key_BINLOG_update_cond, key_TABLE_SHARE_cond, key_user_level_lock_cond, key_COND_thread_count, key_COND_thread_cache, key_COND_flush_thread_cache, key_BINLOG_COND_queue_busy; +#ifdef WITH_WSREP +PSI_cond_key key_COND_wsrep_rollback, + key_COND_wsrep_replaying, key_COND_wsrep_ready, key_COND_wsrep_sst, + key_COND_wsrep_sst_init, key_COND_wsrep_sst_thread; +#endif /* WITH_WSREP */ PSI_cond_key key_RELAYLOG_update_cond, key_COND_wakeup_ready, key_COND_wait_commit; PSI_cond_key key_RELAYLOG_COND_queue_busy; @@ -1045,6 +1104,14 @@ static PSI_cond_info all_server_conds[]= { &key_user_level_lock_cond, "User_level_lock::cond", 0}, { &key_COND_thread_count, "COND_thread_count", PSI_FLAG_GLOBAL}, { &key_COND_thread_cache, "COND_thread_cache", PSI_FLAG_GLOBAL}, +#ifdef WITH_WSREP + { &key_COND_wsrep_ready, "COND_wsrep_ready", PSI_FLAG_GLOBAL}, + { &key_COND_wsrep_sst, "COND_wsrep_sst", PSI_FLAG_GLOBAL}, + { &key_COND_wsrep_sst_init, "COND_wsrep_sst_init", PSI_FLAG_GLOBAL}, + { &key_COND_wsrep_sst_thread, "wsrep_sst_thread", 0}, + { &key_COND_wsrep_rollback, "COND_wsrep_rollback", PSI_FLAG_GLOBAL}, + { &key_COND_wsrep_replaying, "COND_wsrep_replaying", PSI_FLAG_GLOBAL}, +#endif { &key_COND_flush_thread_cache, "COND_flush_thread_cache", PSI_FLAG_GLOBAL}, { &key_COND_rpl_thread, "COND_rpl_thread", 0}, { &key_COND_rpl_thread_queue, "COND_rpl_thread_queue", 0}, @@ -1403,7 +1470,9 @@ bool mysqld_embedded=0; bool mysqld_embedded=1; #endif +#ifndef EMBEDDED_LIBRARY static my_bool plugins_are_initialized= FALSE; +#endif #ifndef DBUG_OFF static const char* default_dbug_option; @@ -1626,6 +1695,11 @@ static void close_connections(void) if (tmp->slave_thread) continue; +#ifdef WITH_WSREP + /* skip wsrep system threads as well */ + if (WSREP(tmp) && (tmp->wsrep_exec_mode==REPL_RECV || tmp->wsrep_applier)) + continue; +#endif tmp->killed= KILL_SERVER_HARD; MYSQL_CALLBACK(thread_scheduler, post_kill_notification, (tmp)); mysql_mutex_lock(&tmp->LOCK_thd_data); @@ -1702,6 +1776,33 @@ static void close_connections(void) close_connection(tmp,ER_SERVER_SHUTDOWN); } #endif +#ifdef WITH_WSREP + /* + * TODO: this code block may turn out redundant. wsrep->disconnect() + * should terminate slave threads gracefully, and we don't need + * to signal them here. + * The code here makes sure mysqld will not hang during shutdown + * even if wsrep provider has problems in shutting down. + */ + if (WSREP(tmp) && tmp->wsrep_exec_mode==REPL_RECV) + { + sql_print_information("closing wsrep system thread"); + tmp->killed= KILL_CONNECTION; + MYSQL_CALLBACK(thread_scheduler, post_kill_notification, (tmp)); + if (tmp->mysys_var) + { + tmp->mysys_var->abort=1; + mysql_mutex_lock(&tmp->mysys_var->mutex); + if (tmp->mysys_var->current_cond) + { + mysql_mutex_lock(tmp->mysys_var->current_mutex); + mysql_cond_broadcast(tmp->mysys_var->current_cond); + mysql_mutex_unlock(tmp->mysys_var->current_mutex); + } + mysql_mutex_unlock(&tmp->mysys_var->mutex); + } + } +#endif DBUG_PRINT("quit",("Unlocking LOCK_thread_count")); mysql_mutex_unlock(&LOCK_thread_count); } @@ -1856,8 +1957,16 @@ static void __cdecl kill_server(int sig_ptr) } } #endif +#ifdef WITH_WSREP + /* Stop wsrep threads in case they are running. */ + wsrep_stop_replication(NULL); +#endif close_connections(); +#ifdef WITH_WSREP + if (wsrep_inited == 1) + wsrep_deinit(true); +#endif if (sig != MYSQL_KILL_SIGNAL && sig != 0) unireg_abort(1); /* purecov: inspected */ @@ -1952,6 +2061,25 @@ extern "C" void unireg_abort(int exit_code) usage(); if (exit_code) sql_print_error("Aborting\n"); +#ifdef WITH_WSREP + if (wsrep) + { + /* This is an abort situation, we cannot expect to gracefully close all + * wsrep threads here, we can only diconnect from service */ + wsrep_close_client_connections(FALSE); + shutdown_in_progress= 1; + THD* thd(0); + wsrep->disconnect(wsrep); + WSREP_INFO("Service disconnected."); + wsrep_close_threads(thd); /* this won't close all threads */ + sleep(1); /* so give some time to exit for those which can */ + WSREP_INFO("Some threads may fail to exit."); + + /* In bootstrap mode we deinitialize wsrep here. */ + if (opt_bootstrap && wsrep_inited) + wsrep_deinit(true); + } +#endif // WITH_WSREP clean_up(!opt_abort && (exit_code || !opt_bootstrap)); /* purecov: inspected */ DBUG_PRINT("quit",("done with cleanup in unireg_abort")); mysqld_exit(exit_code); @@ -2167,6 +2295,20 @@ static void clean_up_mutexes() mysql_cond_destroy(&COND_thread_count); mysql_cond_destroy(&COND_thread_cache); mysql_cond_destroy(&COND_flush_thread_cache); +#ifdef WITH_WSREP + (void) mysql_mutex_destroy(&LOCK_wsrep_ready); + (void) mysql_cond_destroy(&COND_wsrep_ready); + (void) mysql_mutex_destroy(&LOCK_wsrep_sst); + (void) mysql_cond_destroy(&COND_wsrep_sst); + (void) mysql_mutex_destroy(&LOCK_wsrep_sst_init); + (void) mysql_cond_destroy(&COND_wsrep_sst_init); + (void) mysql_mutex_destroy(&LOCK_wsrep_rollback); + (void) mysql_cond_destroy(&COND_wsrep_rollback); + (void) mysql_mutex_destroy(&LOCK_wsrep_replaying); + (void) mysql_cond_destroy(&COND_wsrep_replaying); + (void) mysql_mutex_destroy(&LOCK_wsrep_slave_threads); + (void) mysql_mutex_destroy(&LOCK_wsrep_desync); +#endif mysql_mutex_destroy(&LOCK_server_started); mysql_cond_destroy(&COND_server_started); mysql_mutex_destroy(&LOCK_prepare_ordered); @@ -2451,6 +2593,10 @@ static MYSQL_SOCKET activate_tcp_port(uint port) socket_errno); unireg_abort(1); } +#if defined(WITH_WSREP) && defined(HAVE_FCNTL) && defined(FD_CLOEXEC) + (void) fcntl(mysql_socket_getfd(ip_sock), F_SETFD, FD_CLOEXEC); +#endif /* WITH_WSREP */ + DBUG_RETURN(ip_sock); } @@ -2568,6 +2714,9 @@ static void network_init(void) if (mysql_socket_listen(unix_sock,(int) back_log) < 0) sql_print_warning("listen() on Unix socket failed with error %d", socket_errno); +#if defined(WITH_WSREP) && defined(HAVE_FCNTL) + (void) fcntl(mysql_socket_getfd(unix_sock), F_SETFD, FD_CLOEXEC); +#endif /* WITH_WSREP */ } #endif DBUG_PRINT("info",("server started")); @@ -2642,9 +2791,19 @@ void thd_cleanup(THD *thd) void dec_connection_count(THD *thd) { - mysql_mutex_lock(&LOCK_connection_count); - (*thd->scheduler->connection_count)--; - mysql_mutex_unlock(&LOCK_connection_count); +#ifdef WITH_WSREP + /* + Do not decrement when its wsrep system thread. wsrep_applier is set for + applier as well as rollbacker threads. + */ + if (!thd->wsrep_applier) +#endif /* WITH_WSREP */ + { + DBUG_ASSERT(*thd->scheduler->connection_count > 0); + mysql_mutex_lock(&LOCK_connection_count); + (*thd->scheduler->connection_count)--; + mysql_mutex_unlock(&LOCK_connection_count); + } } @@ -2814,10 +2973,19 @@ static bool cache_thread() bool one_thread_per_connection_end(THD *thd, bool put_in_cache) { DBUG_ENTER("one_thread_per_connection_end"); +#ifdef WITH_WSREP + const bool wsrep_applier(thd->wsrep_applier); +#endif + unlink_thd(thd); /* Mark that current_thd is not valid anymore */ set_current_thd(0); + +#ifdef WITH_WSREP + if (!wsrep_applier && put_in_cache && cache_thread()) +#else if (put_in_cache && cache_thread()) +#endif /* WITH_WSREP */ DBUG_RETURN(0); // Thread is reused signal_thd_deleted(); @@ -3251,8 +3419,8 @@ pthread_handler_t signal_hand(void *arg __attribute__((unused))) should not be any other mysql_cond_signal() calls. */ mysql_mutex_lock(&LOCK_thread_count); - mysql_mutex_unlock(&LOCK_thread_count); mysql_cond_broadcast(&COND_thread_count); + mysql_mutex_unlock(&LOCK_thread_count); (void) pthread_sigmask(SIG_BLOCK,&set,NULL); for (;;) @@ -3930,7 +4098,6 @@ static int init_common_variables() } else opt_log_basename= glob_hostname; - if (!*pidfile_name) { strmake(pidfile_name, opt_log_basename, sizeof(pidfile_name)-5); @@ -3989,7 +4156,11 @@ static int init_common_variables() compile_time_assert(sizeof(com_status_vars)/sizeof(com_status_vars[0]) - 1 == SQLCOM_END + 8); #endif - +#ifdef WITH_WSREP + /* This is a protection against mutually incompatible option values. */ + if (WSREP_ON && wsrep_check_opts (remaining_argc, remaining_argv)) + global_system_variables.wsrep_on= 0; +#endif /* WITH_WSREP */ if (get_options(&remaining_argc, &remaining_argv)) return 1; set_server_version(); @@ -4415,6 +4586,28 @@ static int init_thread_environment() rpl_init_gtid_waiting(); #endif +#ifdef WITH_WSREP + mysql_mutex_init(key_LOCK_wsrep_ready, + &LOCK_wsrep_ready, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_wsrep_ready, &COND_wsrep_ready, NULL); + mysql_mutex_init(key_LOCK_wsrep_sst, + &LOCK_wsrep_sst, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_wsrep_sst, &COND_wsrep_sst, NULL); + mysql_mutex_init(key_LOCK_wsrep_sst_init, + &LOCK_wsrep_sst_init, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_wsrep_sst_init, &COND_wsrep_sst_init, NULL); + mysql_mutex_init(key_LOCK_wsrep_rollback, + &LOCK_wsrep_rollback, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_wsrep_rollback, &COND_wsrep_rollback, NULL); + mysql_mutex_init(key_LOCK_wsrep_replaying, + &LOCK_wsrep_replaying, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_wsrep_replaying, &COND_wsrep_replaying, NULL); + mysql_mutex_init(key_LOCK_wsrep_slave_threads, + &LOCK_wsrep_slave_threads, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_LOCK_wsrep_desync, + &LOCK_wsrep_desync, MY_MUTEX_INIT_FAST); +#endif + DBUG_RETURN(0); } @@ -4667,10 +4860,18 @@ static int init_server_components() /* need to configure logging before initializing storage engines */ if (!opt_bin_log_used) { +#ifdef WITH_WSREP + if (!WSREP_ON && opt_log_slave_updates) +#else if (opt_log_slave_updates) +#endif sql_print_warning("You need to use --log-bin to make " "--log-slave-updates work."); +#ifdef WITH_WSREP + if (!WSREP_ON && binlog_format_used) +#else if (binlog_format_used) +#endif sql_print_warning("You need to use --log-bin to make " "--binlog-format work."); } @@ -4697,8 +4898,6 @@ static int init_server_components() } #endif - DBUG_ASSERT(!opt_bin_log || opt_bin_logname); - if (opt_bin_log) { /* Reports an error and aborts, if the --log-bin's path @@ -4747,10 +4946,72 @@ static int init_server_components() { opt_bin_logname= my_once_strdup(buf, MYF(MY_WME)); } +#ifdef WITH_WSREP /* WSREP BEFORE SE */ + /* + Wsrep initialization must happen at this point, because: + - opt_bin_logname must be known when starting replication + since SST may need it + - SST may modify binlog index file, so it must be opened + after SST has happened + */ + } + if (!wsrep_recovery && !opt_help) + { + if (opt_bootstrap) // bootsrap option given - disable wsrep functionality + { + wsrep_provider_init(WSREP_NONE); + if (wsrep_init()) unireg_abort(1); + } + else // full wsrep initialization + { + // add basedir/bin to PATH to resolve wsrep script names + char* const tmp_path((char*)alloca(strlen(mysql_home) + + strlen("/bin") + 1)); + if (tmp_path) + { + strcpy(tmp_path, mysql_home); + strcat(tmp_path, "/bin"); + wsrep_prepend_PATH(tmp_path); + } + else + { + WSREP_ERROR("Could not append %s/bin to PATH", mysql_home); + } + + if (wsrep_before_SE()) + { + set_ports(); // this is also called in network_init() later but we need + // to know mysqld_port now - lp:1071882 + /* + Plugin initialization (plugin_init()) hasn't happened yet, set + maria_hton to 0. + */ + maria_hton= 0; + wsrep_init_startup(true); + } + } + } + if (opt_bin_log) + { + /* + Variable ln is not defined at this scope. We use opt_bin_logname instead. + It should be the same as ln since + - mysql_bin_log.generate_name() returns first argument if new log name + is not generated + - if new log name is generated, return value is assigned to ln and copied + to opt_bin_logname above + */ + if (mysql_bin_log.open_index_file(opt_binlog_index_name, opt_bin_logname, + TRUE)) + { + unireg_abort(1); + } +#else if (mysql_bin_log.open_index_file(opt_binlog_index_name, ln, TRUE)) { unireg_abort(1); } +#endif /* WITH_WSREP */ } /* call ha_init_key_cache() on all key caches to init them */ @@ -4774,6 +5035,49 @@ static int init_server_components() } plugins_are_initialized= TRUE; /* Don't separate from init function */ +#ifdef WITH_WSREP + /* Wait for wsrep threads to get created. */ + if (wsrep_creating_startup_threads == 1) { + mysql_mutex_lock(&LOCK_thread_count); + while (wsrep_running_threads < 2) + { + mysql_cond_wait(&COND_thread_count, &LOCK_thread_count); + } + + /* Now is the time to initialize threads for queries. */ + THD *tmp; + I_List_iterator<THD> it(threads); + while ((tmp= it++)) + { + if (tmp->wsrep_applier == true) + { + /* + Save/restore server_status and variables.option_bits and they get + altered during init_for_queries(). + */ + unsigned int server_status_saved= tmp->server_status; + ulonglong option_bits_saved= tmp->variables.option_bits; + + /* + Set THR_THD to temporarily point to this THD to register all the + variables that allocates memory for this THD. + */ + THD *current_thd_saved= current_thd; + set_current_thd(tmp); + + tmp->init_for_queries(); + + /* Restore current_thd. */ + set_current_thd(current_thd_saved); + + tmp->server_status= server_status_saved; + tmp->variables.option_bits= option_bits_saved; + } + } + mysql_mutex_unlock(&LOCK_thread_count); + } +#endif + /* we do want to exit if there are any other unknown options */ if (remaining_argc > 1) { @@ -4900,8 +5204,33 @@ static int init_server_components() internal_tmp_table_max_key_segments= myisam_max_key_segments(); #endif +#ifdef WITH_WSREP + if (!opt_bin_log) + { + wsrep_emulate_bin_log= 1; + } +#endif + tc_log= get_tc_log_implementation(); +#ifdef WITH_WSREP + if (tc_log == &tc_log_mmap) + { + /* + wsrep hton raises total_ha_2pc count to 2, even in native mysql mode. + Have to force using tc_log_dummy here, as tc_log_mmap segfaults. + */ + if (WSREP_ON || total_ha_2pc <= 2) + tc_log= &tc_log_dummy; + } + + WSREP_DEBUG("Initial TC log open: %s", + (tc_log == &mysql_bin_log) ? "binlog" : + (tc_log == &tc_log_mmap) ? "mmap" : + (tc_log == &tc_log_dummy) ? "dummy" : "unknown" + ); +#endif + if (tc_log->open(opt_bin_log ? opt_bin_logname : opt_tc_log_file)) { sql_print_error("Can't init tc log"); @@ -4992,6 +5321,437 @@ static void create_shutdown_thread() #endif /* EMBEDDED_LIBRARY */ +#ifdef WITH_WSREP +typedef void (*wsrep_thd_processor_fun)(THD *); + +pthread_handler_t start_wsrep_THD(void *arg) +{ + THD *thd; + wsrep_thd_processor_fun processor= (wsrep_thd_processor_fun)arg; + + if (my_thread_init() || (!(thd= new THD(true)))) + { + goto error; + } + + mysql_mutex_lock(&LOCK_thread_count); + thd->thread_id=thread_id++; + + thd->real_id=pthread_self(); // Keep purify happy + thread_count++; + thread_created++; + threads.append(thd); + + my_net_init(&thd->net,(st_vio*) 0, MYF(0)); + + DBUG_PRINT("wsrep",(("creating thread %lld"), (long long)thd->thread_id)); + thd->prior_thr_create_utime= thd->start_utime= microsecond_interval_timer(); + (void) mysql_mutex_unlock(&LOCK_thread_count); + + /* from bootstrap()... */ + thd->bootstrap=1; + thd->max_client_packet_length= thd->net.max_packet; + thd->security_ctx->master_access= ~(ulong)0; + + /* from handle_one_connection... */ + pthread_detach_this_thread(); + + mysql_thread_set_psi_id(thd->thread_id); + thd->thr_create_utime= microsecond_interval_timer(); + + if (MYSQL_CALLBACK_ELSE(thread_scheduler, init_new_connection_thread, (), 0)) + { + close_connection(thd, ER_OUT_OF_RESOURCES); + statistic_increment(aborted_connects,&LOCK_status); + MYSQL_CALLBACK(thread_scheduler, end_thread, (thd, 0)); + goto error; + } + +// </5.1.17> + /* + handle_one_connection() is normally the only way a thread would + start and would always be on the very high end of the stack , + therefore, the thread stack always starts at the address of the + first local variable of handle_one_connection, which is thd. We + need to know the start of the stack so that we could check for + stack overruns. + */ + DBUG_PRINT("wsrep", ("handle_one_connection called by thread %lld\n", + (long long)thd->thread_id)); + /* now that we've called my_thread_init(), it is safe to call DBUG_* */ + + thd->thread_stack= (char*) &thd; + if (thd->store_globals()) + { + close_connection(thd, ER_OUT_OF_RESOURCES); + statistic_increment(aborted_connects,&LOCK_status); + MYSQL_CALLBACK(thread_scheduler, end_thread, (thd, 0)); + delete thd; + goto error; + } + + thd->system_thread= SYSTEM_THREAD_SLAVE_SQL; + thd->security_ctx->skip_grants(); + + /* handle_one_connection() again... */ + //thd->version= refresh_version; + thd->proc_info= 0; + thd->set_command(COM_SLEEP); + + if (wsrep_creating_startup_threads == 0) + { + thd->init_for_queries(); + } + + mysql_mutex_lock(&LOCK_thread_count); + wsrep_running_threads++; + mysql_cond_broadcast(&COND_thread_count); + + if (wsrep_running_threads > 2) + { + wsrep_creating_startup_threads= 0; + } + + mysql_mutex_unlock(&LOCK_thread_count); + + processor(thd); + + close_connection(thd, 0); + + mysql_mutex_lock(&LOCK_thread_count); + wsrep_running_threads--; + WSREP_DEBUG("wsrep running threads now: %lu", wsrep_running_threads); + mysql_cond_broadcast(&COND_thread_count); + mysql_mutex_unlock(&LOCK_thread_count); + + // Note: We can't call THD destructor without crashing + // if plugins have not been initialized. However, in most of the + // cases this means that pre SE initialization SST failed and + // we are going to exit anyway. + if (plugins_are_initialized) + { + net_end(&thd->net); + MYSQL_CALLBACK(thread_scheduler, end_thread, (thd, 1)); + } + else + { + // TODO: lightweight cleanup to get rid of: + // 'Error in my_thread_global_end(): 2 threads didn't exit' + // at server shutdown + } + + my_thread_end(); + if (thread_handling > SCHEDULER_ONE_THREAD_PER_CONNECTION) + { + mysql_mutex_lock(&LOCK_thread_count); + delete thd; + thread_count--; + mysql_mutex_unlock(&LOCK_thread_count); + } + return(NULL); + +error: + WSREP_ERROR("Failed to create/initialize system thread"); + + /* Abort if its the first applier/rollbacker thread. */ + if (wsrep_creating_startup_threads == 1) + unireg_abort(1); + else + return NULL; +} + +/**/ +static bool abort_replicated(THD *thd) +{ + bool ret_code= false; + if (thd->wsrep_query_state== QUERY_COMMITTING) + { + WSREP_DEBUG("aborting replicated trx: %lu", thd->real_id); + + (void)wsrep_abort_thd(thd, thd, TRUE); + ret_code= true; + } + return ret_code; +} +/**/ +static inline bool is_client_connection(THD *thd) +{ +#if REMOVE +// REMOVE THIS LATER (lp:777201). Below we had to add an explicit check for +// wsrep_applier since wsrep_exec_mode didn't seem to always work +if (thd->wsrep_applier && thd->wsrep_exec_mode != REPL_RECV) +WSREP_WARN("applier has wsrep_exec_mode = %d", thd->wsrep_exec_mode); + + if ( thd->slave_thread || /* declared as mysql slave */ + thd->system_thread || /* declared as system thread */ + !thd->vio_ok() || /* server internal thread */ + thd->wsrep_exec_mode==REPL_RECV || /* applier or replaying thread */ + thd->wsrep_applier || /* wsrep slave applier */ + !thd->variables.wsrep_on) /* client, but fenced outside wsrep */ + return false; + + return true; +#else + return (thd->wsrep_client_thread && thd->variables.wsrep_on); +#endif /* REMOVE */ +} + +static inline bool is_replaying_connection(THD *thd) +{ + bool ret; + + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + ret= (thd->wsrep_conflict_state == REPLAYING) ? true : false; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + + return ret; +} + +static inline bool is_committing_connection(THD *thd) +{ + bool ret; + + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + ret= (thd->wsrep_query_state == QUERY_COMMITTING) ? true : false; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + + return ret; +} + +static bool have_client_connections() +{ + THD *tmp; + + I_List_iterator<THD> it(threads); + while ((tmp=it++)) + { + DBUG_PRINT("quit",("Informing thread %ld that it's time to die", + tmp->thread_id)); + if (is_client_connection(tmp) && tmp->killed == KILL_CONNECTION) + { + (void)abort_replicated(tmp); + return true; + } + } + return false; +} + +static void wsrep_close_thread(THD *thd) +{ + thd->killed= KILL_CONNECTION; + MYSQL_CALLBACK(thread_scheduler, post_kill_notification, (thd)); + if (thd->mysys_var) + { + thd->mysys_var->abort=1; + mysql_mutex_lock(&thd->mysys_var->mutex); + if (thd->mysys_var->current_cond) + { + mysql_mutex_lock(thd->mysys_var->current_mutex); + mysql_cond_broadcast(thd->mysys_var->current_cond); + mysql_mutex_unlock(thd->mysys_var->current_mutex); + } + mysql_mutex_unlock(&thd->mysys_var->mutex); + } +} + +static my_bool have_committing_connections() +{ + THD *tmp; + mysql_mutex_lock(&LOCK_thread_count); // For unlink from list + + I_List_iterator<THD> it(threads); + while ((tmp=it++)) + { + if (!is_client_connection(tmp)) + continue; + + if (is_committing_connection(tmp)) + { + return TRUE; + } + } + mysql_mutex_unlock(&LOCK_thread_count); + return FALSE; +} + +int wsrep_wait_committing_connections_close(int wait_time) +{ + int sleep_time= 100; + + while (have_committing_connections() && wait_time > 0) + { + WSREP_DEBUG("wait for committing transaction to close: %d", wait_time); + my_sleep(sleep_time); + wait_time -= sleep_time; + } + if (have_committing_connections()) + { + return 1; + } + return 0; +} + +void wsrep_close_client_connections(my_bool wait_to_end) +{ + /* + First signal all threads that it's time to die + */ + + THD *tmp; + mysql_mutex_lock(&LOCK_thread_count); // For unlink from list + + bool kill_cached_threads_saved= kill_cached_threads; + kill_cached_threads= true; // prevent future threads caching + mysql_cond_broadcast(&COND_thread_cache); // tell cached threads to die + + I_List_iterator<THD> it(threads); + while ((tmp=it++)) + { + DBUG_PRINT("quit",("Informing thread %ld that it's time to die", + tmp->thread_id)); + /* We skip slave threads & scheduler on this first loop through. */ + if (!is_client_connection(tmp)) + continue; + + if (is_replaying_connection(tmp)) + { + tmp->killed= KILL_CONNECTION; + continue; + } + + /* replicated transactions must be skipped */ + if (abort_replicated(tmp)) + continue; + + WSREP_DEBUG("closing connection %ld", tmp->thread_id); + wsrep_close_thread(tmp); + } + mysql_mutex_unlock(&LOCK_thread_count); + + if (thread_count) + sleep(2); // Give threads time to die + + mysql_mutex_lock(&LOCK_thread_count); + /* + Force remaining threads to die by closing the connection to the client + */ + + I_List_iterator<THD> it2(threads); + while ((tmp=it2++)) + { +#ifndef __bsdi__ // Bug in BSDI kernel + if (is_client_connection(tmp) && + !abort_replicated(tmp) && + !is_replaying_connection(tmp)) + { + WSREP_INFO("killing local connection: %ld",tmp->thread_id); + close_connection(tmp,0); + } +#endif + } + + DBUG_PRINT("quit",("Waiting for threads to die (count=%u)",thread_count)); + WSREP_DEBUG("waiting for client connections to close: %u", thread_count); + + while (wait_to_end && have_client_connections()) + { + mysql_cond_wait(&COND_thread_count, &LOCK_thread_count); + DBUG_PRINT("quit",("One thread died (count=%u)", thread_count)); + } + + kill_cached_threads= kill_cached_threads_saved; + + mysql_mutex_unlock(&LOCK_thread_count); + + /* All client connection threads have now been aborted */ +} + +void wsrep_close_applier(THD *thd) +{ + WSREP_DEBUG("closing applier %ld", thd->thread_id); + wsrep_close_thread(thd); +} + +static void wsrep_close_threads(THD *thd) +{ + THD *tmp; + mysql_mutex_lock(&LOCK_thread_count); // For unlink from list + + I_List_iterator<THD> it(threads); + while ((tmp=it++)) + { + DBUG_PRINT("quit",("Informing thread %ld that it's time to die", + tmp->thread_id)); + /* We skip slave threads & scheduler on this first loop through. */ + if (tmp->wsrep_applier && tmp != thd) + { + WSREP_DEBUG("closing wsrep thread %ld", tmp->thread_id); + wsrep_close_thread (tmp); + } + } + + mysql_mutex_unlock(&LOCK_thread_count); +} + +void wsrep_wait_appliers_close(THD *thd) +{ + /* Wait for wsrep appliers to gracefully exit */ + mysql_mutex_lock(&LOCK_thread_count); + while (wsrep_running_threads > 1) + // 1 is for rollbacker thread which needs to be killed explicitly. + // This gotta be fixed in a more elegant manner if we gonna have arbitrary + // number of non-applier wsrep threads. + { + if (thread_handling > SCHEDULER_ONE_THREAD_PER_CONNECTION) + { + mysql_mutex_unlock(&LOCK_thread_count); + my_sleep(100); + mysql_mutex_lock(&LOCK_thread_count); + } + else + mysql_cond_wait(&COND_thread_count,&LOCK_thread_count); + DBUG_PRINT("quit",("One applier died (count=%u)",thread_count)); + } + mysql_mutex_unlock(&LOCK_thread_count); + /* Now kill remaining wsrep threads: rollbacker */ + wsrep_close_threads (thd); + /* and wait for them to die */ + mysql_mutex_lock(&LOCK_thread_count); + while (wsrep_running_threads > 0) + { + if (thread_handling > SCHEDULER_ONE_THREAD_PER_CONNECTION) + { + mysql_mutex_unlock(&LOCK_thread_count); + my_sleep(100); + mysql_mutex_lock(&LOCK_thread_count); + } + else + mysql_cond_wait(&COND_thread_count,&LOCK_thread_count); + DBUG_PRINT("quit",("One thread died (count=%u)",thread_count)); + } + mysql_mutex_unlock(&LOCK_thread_count); + + /* All wsrep applier threads have now been aborted. However, if this thread + is also applier, we are still running... + */ +} + +void wsrep_kill_mysql(THD *thd) +{ + if (mysqld_server_started) + { + if (!shutdown_in_progress) + { + WSREP_INFO("starting shutdown"); + kill_mysql(); + } + } + else + { + unireg_abort(1); + } +} +#endif /* WITH_WSREP */ #if (defined(_WIN32) || defined(HAVE_SMEM)) && !defined(EMBEDDED_LIBRARY) static void handle_connections_methods() @@ -5390,6 +6150,15 @@ int mysqld_main(int argc, char **argv) } #endif +#ifdef WITH_WSREP /* WSREP AFTER SE */ + if (wsrep_recovery) + { + select_thread_in_use= 0; + wsrep_recover(); + unireg_abort(0); + } +#endif /* WITH_WSREP */ + /* init signals & alarm After this we can't quit by a simple unireg_abort @@ -5448,8 +6217,33 @@ int mysqld_main(int argc, char **argv) if (Events::init((THD*) 0, opt_noacl || opt_bootstrap)) unireg_abort(1); +#ifdef WITH_WSREP /* WSREP AFTER SE */ if (opt_bootstrap) { + /*! bootstrap wsrep init was taken care of above */ + } + else + { + wsrep_SE_initialized(); + + if (wsrep_before_SE()) + { + /*! in case of no SST wsrep waits in view handler callback */ + wsrep_SE_init_grab(); + wsrep_SE_init_done(); + /*! in case of SST wsrep waits for wsrep->sst_received */ + wsrep_sst_continue(); + } + else + { + wsrep_init_startup (false); + } + + wsrep_create_appliers(wsrep_slave_threads - 1); + } +#endif /* WITH_WSREP */ + if (opt_bootstrap) + { select_thread_in_use= 0; // Allow 'kill' to work bootstrap(mysql_stdin); if (!kill_in_progress) @@ -5492,7 +6286,14 @@ int mysqld_main(int argc, char **argv) (char*) "" : mysqld_unix_port), mysqld_port, MYSQL_COMPILATION_COMMENT); - fclose(stdin); + + // try to keep fd=0 busy + if (!freopen(IF_WIN("NUL","/dev/null"), "r", stdin)) + { + // fall back on failure + fclose(stdin); + } + #if defined(_WIN32) && !defined(EMBEDDED_LIBRARY) Service.SetRunning(); #endif @@ -5519,6 +6320,9 @@ int mysqld_main(int argc, char **argv) #ifdef EXTRA_DEBUG2 sql_print_error("Before Lock_thread_count"); #endif +#ifdef WITH_WSREP + WSREP_DEBUG("Before Lock_thread_count"); +#endif mysql_mutex_lock(&LOCK_thread_count); DBUG_PRINT("quit", ("Got thread_count mutex")); select_thread_in_use=0; // For close_connections @@ -5784,6 +6588,9 @@ static void bootstrap(MYSQL_FILE *file) DBUG_ENTER("bootstrap"); THD *thd= new THD; +#ifdef WITH_WSREP + thd->variables.wsrep_on= 0; +#endif thd->bootstrap=1; my_net_init(&thd->net,(st_vio*) 0, MYF(0)); thd->max_client_packet_length= thd->net.max_packet; @@ -6185,6 +6992,9 @@ void handle_connections_sockets() sleep(1); // Give other threads some time continue; } +#if defined(WITH_WSREP) && defined(HAVE_FCNTL) && defined(FD_CLOEXEC) + (void) fcntl(mysql_socket_getfd(new_sock), F_SETFD, FD_CLOEXEC); +#endif /* WITH_WSREP */ #ifdef HAVE_LIBWRAP { @@ -6928,12 +7738,6 @@ struct my_option my_long_options[]= "more than one storage engine, when binary log is disabled).", &opt_tc_log_file, &opt_tc_log_file, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, -#ifdef HAVE_MMAP - {"log-tc-size", 0, "Size of transaction coordinator log.", - &opt_tc_log_size, &opt_tc_log_size, 0, GET_ULONG, - REQUIRED_ARG, TC_LOG_MIN_SIZE, TC_LOG_MIN_SIZE, (ulonglong) ULONG_MAX, 0, - TC_LOG_PAGE_SIZE, 0}, -#endif {"master-info-file", 0, "The location and name of the file that remembers the master and where " "the I/O replication thread is in the master's binlogs. Defaults to " @@ -7117,6 +7921,13 @@ struct my_option my_long_options[]= {"table_cache", 0, "Deprecated; use --table-open-cache instead.", &tc_size, &tc_size, 0, GET_ULONG, REQUIRED_ARG, TABLE_OPEN_CACHE_DEFAULT, 1, 512*1024L, 0, 1, 0}, +#ifdef WITH_WSREP + {"wsrep-new-cluster", 0, "Bootstrap a cluster. It works by overriding the " + "current value of wsrep_cluster_address. It is recommended not to add this " + "option to the config file as this will trigger bootstrap on every server " + "start.", &wsrep_new_cluster, &wsrep_new_cluster, 0, GET_BOOL, NO_ARG, + 0, 0, 0, 0, 0, 0}, +#endif /* The following options exist in 5.6 but not in 10.0 */ MYSQL_TO_BE_IMPLEMENTED_OPTION("default-tmp-storage-engine"), @@ -7968,6 +8779,21 @@ SHOW_VAR status_vars[]= { #ifdef ENABLED_PROFILING {"Uptime_since_flush_status",(char*) &show_flushstatustime, SHOW_SIMPLE_FUNC}, #endif +#ifdef WITH_WSREP + {"wsrep_connected", (char*) &wsrep_connected, SHOW_BOOL}, + {"wsrep_ready", (char*) &wsrep_ready, SHOW_BOOL}, + {"wsrep_cluster_state_uuid", (char*) &wsrep_cluster_state_uuid,SHOW_CHAR_PTR}, + {"wsrep_cluster_conf_id", (char*) &wsrep_cluster_conf_id, SHOW_LONGLONG}, + {"wsrep_cluster_status", (char*) &wsrep_cluster_status, SHOW_CHAR_PTR}, + {"wsrep_cluster_size", (char*) &wsrep_cluster_size, SHOW_LONG_NOFLUSH}, + {"wsrep_local_index", (char*) &wsrep_local_index, SHOW_LONG_NOFLUSH}, + {"wsrep_local_bf_aborts", (char*) &wsrep_show_bf_aborts, SHOW_SIMPLE_FUNC}, + {"wsrep_provider_name", (char*) &wsrep_provider_name, SHOW_CHAR_PTR}, + {"wsrep_provider_version", (char*) &wsrep_provider_version, SHOW_CHAR_PTR}, + {"wsrep_provider_vendor", (char*) &wsrep_provider_vendor, SHOW_CHAR_PTR}, + {"wsrep_thread_count", (char*) &wsrep_running_threads, SHOW_LONG_NOFLUSH}, + {"wsrep", (char*) &wsrep_show_status, SHOW_FUNC}, +#endif {NullS, NullS, SHOW_LONG} }; @@ -8309,6 +9135,10 @@ static int mysql_init_variables(void) tmpenv = DEFAULT_MYSQL_HOME; strmake_buf(mysql_home, tmpenv); #endif +#ifdef WITH_WSREP + if (wsrep_init_vars()) + return 1; +#endif return 0; } @@ -8559,6 +9389,14 @@ mysqld_get_one_option(int optid, case OPT_LOWER_CASE_TABLE_NAMES: lower_case_table_names_used= 1; break; +#ifdef WITH_WSREP + case OPT_WSREP_START_POSITION: + wsrep_start_position_init (argument); + break; + case OPT_WSREP_SST_AUTH: + wsrep_sst_auth_init (argument); + break; +#endif #if defined(ENABLED_DEBUG_SYNC) case OPT_DEBUG_SYNC_TIMEOUT: /* @@ -8859,6 +9697,40 @@ static int get_options(int *argc_ptr, char ***argv_ptr) else global_system_variables.option_bits&= ~OPTION_BIG_SELECTS; +#ifdef WITH_WSREP + if (!opt_bootstrap && WSREP_PROVIDER_EXISTS && + global_system_variables.binlog_format != BINLOG_FORMAT_ROW) { + + WSREP_ERROR ("Only binlog_format = 'ROW' is currently supported. " + "Configured value: '%s'. Please adjust your configuration.", + binlog_format_names[global_system_variables.binlog_format]); + return 1; + } + + if (global_system_variables.wsrep_causal_reads) { + WSREP_WARN("option --wsrep-causal-reads is deprecated"); + if (!(global_system_variables.wsrep_sync_wait & + WSREP_SYNC_WAIT_BEFORE_READ)) { + WSREP_WARN("--wsrep-causal-reads=ON takes precedence over --wsrep-sync-wait=%u. " + "WSREP_SYNC_WAIT_BEFORE_READ is on", + global_system_variables.wsrep_sync_wait); + global_system_variables.wsrep_sync_wait |= WSREP_SYNC_WAIT_BEFORE_READ; + } else { + // they are both turned on. + } + } else { + if (global_system_variables.wsrep_sync_wait & + WSREP_SYNC_WAIT_BEFORE_READ) { + WSREP_WARN("--wsrep-sync-wait=%u takes precedence over --wsrep-causal-reads=OFF. " + "WSREP_SYNC_WAIT_BEFORE_READ is on", + global_system_variables.wsrep_sync_wait); + global_system_variables.wsrep_causal_reads = 1; + } else { + // they are both turned off. + } + } +#endif // WITH_WSREP + // Synchronize @@global.autocommit on --autocommit const ulonglong turn_bit_on= opt_autocommit ? OPTION_AUTOCOMMIT : OPTION_NOT_AUTOCOMMIT; @@ -9024,6 +9896,9 @@ void set_server_version(void) #ifdef EMBEDDED_LIBRARY end= strnmov(end, "-embedded", (version_end-end)); #endif +#ifdef WITH_WSREP + end= strmov(end, "-wsrep"); +#endif #ifndef DBUG_OFF if (!strstr(MYSQL_SERVER_SUFFIX_STR, "-debug")) end= strnmov(end, "-debug", (version_end-end)); @@ -9252,8 +10127,6 @@ static int test_if_case_insensitive(const char *dir_name) DBUG_PRINT("exit", ("result: %d", result)); DBUG_RETURN(result); } - - #ifndef EMBEDDED_LIBRARY /** @@ -9317,6 +10190,9 @@ void refresh_status(THD *thd) /* Reset some global variables */ reset_status_vars(); +#ifdef WITH_WSREP + wsrep->stats_reset(wsrep); +#endif /* WITH_WSREP */ /* Reset the counters of all key caches (default and named). */ process_key_caches(reset_key_cache_counters, 0); diff --git a/sql/mysqld.h b/sql/mysqld.h index 78e832e4abc..4af04a3df75 100644 --- a/sql/mysqld.h +++ b/sql/mysqld.h @@ -248,6 +248,10 @@ extern PSI_mutex_key key_PAGE_lock, key_LOCK_sync, key_LOCK_active, key_LOCK_pool, key_LOCK_pending_checkpoint; #endif /* HAVE_MMAP */ +#ifdef WITH_WSREP +extern PSI_mutex_key key_LOCK_wsrep_thd; +#endif /* WITH_WSREP */ + #ifdef HAVE_OPENSSL extern PSI_mutex_key key_LOCK_des_key_file; #endif @@ -604,6 +608,15 @@ enum options_mysqld OPT_WANT_CORE, OPT_MYSQL_COMPATIBILITY, OPT_MYSQL_TO_BE_IMPLEMENTED, +#ifdef WITH_WSREP + OPT_WSREP_PROVIDER, + OPT_WSREP_PROVIDER_OPTIONS, + OPT_WSREP_CLUSTER_ADDRESS, + OPT_WSREP_START_POSITION, + OPT_WSREP_SST_AUTH, + OPT_WSREP_RECOVER, +#endif /* WITH_WSREP */ + OPT_which_is_always_the_last }; #endif @@ -767,4 +780,9 @@ extern uint internal_tmp_table_max_key_segments; extern uint volatile global_disable_checkpoint; extern my_bool opt_help; +#ifdef WITH_WSREP +#include "my_pthread.h" +pthread_handler_t start_wsrep_THD(void*); +#endif /* WITH_WSREP */ + #endif /* MYSQLD_INCLUDED */ diff --git a/sql/protocol.cc b/sql/protocol.cc index 777f124f502..67fb8924764 100644 --- a/sql/protocol.cc +++ b/sql/protocol.cc @@ -497,6 +497,14 @@ static uchar *net_store_length_fast(uchar *packet, uint length) void Protocol::end_statement() { +#ifdef WITH_WSREP + /*sanity check, can be removed before 1.0 release */ + if (WSREP(thd) && thd->wsrep_conflict_state== REPLAYING) + { + WSREP_ERROR("attempting net_end_statement while replaying"); + return; + } +#endif DBUG_ENTER("Protocol::end_statement"); DBUG_ASSERT(! thd->get_stmt_da()->is_sent()); bool error= FALSE; diff --git a/sql/rpl_gtid.cc b/sql/rpl_gtid.cc index 683f2492097..52cec9f0a85 100644 --- a/sql/rpl_gtid.cc +++ b/sql/rpl_gtid.cc @@ -560,6 +560,14 @@ rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id, if ((err= gtid_check_rpl_slave_state_table(table))) goto end; +#ifdef WITH_WSREP + /* + Updates in slave state table should not be appended to galera transaction + writeset. + */ + thd->wsrep_skip_append_keys= true; +#endif + if (!in_transaction) { DBUG_PRINT("info", ("resetting OPTION_BEGIN")); @@ -673,6 +681,10 @@ IF_DBUG(dbug_break:, ) end: +#ifdef WITH_WSREP + thd->wsrep_skip_append_keys= false; +#endif + if (table_opened) { if (err || (err= ha_commit_trans(thd, FALSE))) diff --git a/sql/rpl_mi.cc b/sql/rpl_mi.cc index c810e030bf2..c23190cda2b 100644 --- a/sql/rpl_mi.cc +++ b/sql/rpl_mi.cc @@ -111,7 +111,13 @@ void Master_info::wait_until_free() Master_info::~Master_info() { - wait_until_free(); +#ifdef WITH_WSREP + /* + Do not free "wsrep" rpl_filter. It will eventually be freed by + free_all_rpl_filters() when server terminates. + */ + if (strncmp(connection_name.str, STRING_WITH_LEN("wsrep"))) +#endif rpl_filters.delete_element(connection_name.str, connection_name.length, (void (*)(const char*, uchar*)) free_rpl_filter); my_free(connection_name.str); diff --git a/sql/rpl_record.cc b/sql/rpl_record.cc index 5e48cfb02e5..21b8a8028a6 100644 --- a/sql/rpl_record.cc +++ b/sql/rpl_record.cc @@ -308,7 +308,11 @@ unpack_row(rpl_group_info *rgi, uint16 const metadata= tabledef->field_metadata(i); #ifndef DBUG_OFF uchar const *const old_pack_ptr= pack_ptr; -#endif +#else +#ifdef WITH_WSREP + uchar const *const old_pack_ptr= pack_ptr; +#endif /* WITH_WSREP */ +#endif /* !DBUF_OFF */ pack_ptr= f->unpack(f->ptr, pack_ptr, row_end, metadata); DBUG_PRINT("debug", ("field: %s; metadata: 0x%x;" " pack_ptr: 0x%lx; pack_ptr': 0x%lx; bytes: %d", @@ -317,6 +321,20 @@ unpack_row(rpl_group_info *rgi, (int) (pack_ptr - old_pack_ptr))); if (!pack_ptr) { +#ifdef WITH_WSREP + /* + Debug message to troubleshoot bug: + https://mariadb.atlassian.net/browse/MDEV-4404 + */ + WSREP_WARN("ROW event unpack field: %s metadata: 0x%x;" + " pack_ptr: 0x%lx; conv_table %p conv_field %p table %s" + " row_end: 0x%lx", + f->field_name, metadata, + (ulong) old_pack_ptr, conv_table, conv_field, + (table_found) ? "found" : "not found", (ulong)row_end + ); +#endif /* WITH_WSREP */ + rgi->rli->report(ERROR_LEVEL, ER_SLAVE_CORRUPT_EVENT, rgi->gtid_info(), "Could not read field '%s' of table '%s.%s'", diff --git a/sql/set_var.cc b/sql/set_var.cc index 0b9699e39f7..7ad528f0eae 100644 --- a/sql/set_var.cc +++ b/sql/set_var.cc @@ -556,7 +556,11 @@ int mysql_del_sys_var_chain(sys_var *first) static int show_cmp(SHOW_VAR *a, SHOW_VAR *b) { +#ifdef WITH_WSREP + return my_strcasecmp(system_charset_info, a->name, b->name); +#else return strcmp(a->name, b->name); +#endif /* WITH_WSREP */ } diff --git a/sql/set_var.h b/sql/set_var.h index 83ba662b76c..064da1404fa 100644 --- a/sql/set_var.h +++ b/sql/set_var.h @@ -247,6 +247,9 @@ public: int check(THD *thd); int update(THD *thd); int light_check(THD *thd); +#ifdef WITH_WSREP + int wsrep_store_variable(THD *thd); +#endif }; @@ -341,6 +344,9 @@ extern sys_var *Sys_autocommit_ptr; CHARSET_INFO *get_old_charset_by_name(const char *old_name); +#ifdef WITH_WSREP +int sql_set_wsrep_variables(THD *thd, List<set_var_base> *var_list); +#endif int sys_var_init(); int sys_var_add_options(DYNAMIC_ARRAY *long_options, int parse_flags); void sys_var_end(void); diff --git a/sql/slave.cc b/sql/slave.cc index 45af5fcd337..f785e4f345c 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -53,6 +53,9 @@ // Create_file_log_event, // Format_description_log_event +#ifdef WITH_WSREP +#include "wsrep_mysqld.h" +#endif #ifdef HAVE_REPLICATION #include "rpl_tblmap.h" @@ -1029,8 +1032,7 @@ static bool io_slave_killed(Master_info* mi) In the event of deffering decision @rli->last_event_start_time waiting timer is set to force the killed status be accepted upon its expiration. - @param thd pointer to a THD instance - @param rli pointer to Relay_log_info instance + @param rgi pointer to relay_group_info instance @return TRUE the killed status is recognized, FALSE a possible killed status is deferred. @@ -1367,6 +1369,10 @@ bool is_network_error(uint errorno) errorno == ER_NET_READ_INTERRUPTED || errorno == ER_SERVER_SHUTDOWN) return TRUE; +#ifdef WITH_WSREP + if (errorno == ER_UNKNOWN_COM_ERROR) + return TRUE; +#endif return FALSE; } @@ -3310,6 +3316,17 @@ int apply_event_and_update_pos(Log_event* ev, THD* thd, if (reason == Log_event::EVENT_SKIP_NOT) exec_res= ev->apply_event(rgi); +#ifdef WITH_WSREP + if (exec_res && thd->wsrep_conflict_state != NO_CONFLICT) + { + WSREP_DEBUG("SQL apply failed, res %d conflict state: %d", + exec_res, thd->wsrep_conflict_state); + rli->abort_slave= 1; + rli->report(ERROR_LEVEL, ER_UNKNOWN_COM_ERROR, rgi->gtid_info(), + "Node has dropped from cluster"); + } +#endif + #ifndef DBUG_OFF /* This only prints information to the debug trace. @@ -3641,6 +3658,10 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli, serial_rgi->event_relay_log_pos= rli->event_relay_log_pos; exec_res= apply_event_and_update_pos(ev, thd, serial_rgi, NULL); +#ifdef WITH_WSREP + WSREP_DEBUG("apply_event_and_update_pos() result: %d", exec_res); +#endif /* WITH_WSREP */ + delete_or_keep_event_post_apply(serial_rgi, typ, ev); /* @@ -3650,6 +3671,12 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli, if (exec_res == 2) DBUG_RETURN(1); +#ifdef WITH_WSREP + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + if (thd->wsrep_conflict_state == NO_CONFLICT) + { + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); +#endif /* WITH_WSREP */ if (slave_trans_retries) { int temp_err; @@ -3723,6 +3750,12 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli, serial_rgi->trans_retries)); } } +#ifdef WITH_WSREP + } else { + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } +#endif /* WITH_WSREP */ + thread_safe_increment64(&rli->executed_entries, &slave_executed_entries_lock); DBUG_RETURN(exec_res); @@ -4465,6 +4498,9 @@ pthread_handler_t handle_slave_sql(void *arg) my_off_t saved_skip= 0; Master_info *mi= ((Master_info*)arg); Relay_log_info* rli = &mi->rli; +#ifdef WITH_WSREP + my_bool wsrep_node_dropped= FALSE; +#endif /* WITH_WSREP */ const char *errmsg; rpl_group_info *serial_rgi; rpl_sql_thread_info sql_info(mi->rpl_filter); @@ -4472,6 +4508,9 @@ pthread_handler_t handle_slave_sql(void *arg) // needs to call my_thread_init(), otherwise we get a coredump in DBUG_ stuff my_thread_init(); DBUG_ENTER("handle_slave_sql"); +#ifdef WITH_WSREP + wsrep_restart_point: +#endif /* WITH_WSREP */ LINT_INIT(saved_master_log_pos); LINT_INIT(saved_log_pos); @@ -4629,6 +4668,11 @@ pthread_handler_t handle_slave_sql(void *arg) } #endif +#ifdef WITH_WSREP + thd->wsrep_exec_mode= LOCAL_STATE; + /* synchronize with wsrep replication */ + if (WSREP_ON) wsrep_ready_wait(); +#endif DBUG_PRINT("master_info",("log_file_name: %s position: %s", rli->group_master_log_name, llstr(rli->group_master_log_pos,llbuff))); @@ -4752,10 +4796,27 @@ log '%s' at position %s, relay log '%s' position: %s%s", RPL_LOG_NAME, if (exec_relay_log_event(thd, rli, serial_rgi)) { +#ifdef WITH_WSREP + if (thd->wsrep_conflict_state != NO_CONFLICT) + { + wsrep_node_dropped= TRUE; + rli->abort_slave= TRUE; + } +#endif /* WITH_WSREP */ + DBUG_PRINT("info", ("exec_relay_log_event() failed")); // do not scare the user if SQL thread was simply killed or stopped if (!sql_slave_killed(serial_rgi)) + { slave_output_error_info(serial_rgi, thd); +#ifdef WITH_WSREP + uint32 const last_errno= rli->last_error().number; + if (WSREP_ON && last_errno == ER_UNKNOWN_COM_ERROR) + { + wsrep_node_dropped= TRUE; + } +#endif /* WITH_WSREP */ + } goto err; } } @@ -4882,6 +4943,33 @@ err_during_init: thd->rgi_fake= thd->rgi_slave= NULL; delete serial_rgi; mysql_mutex_unlock(&LOCK_thread_count); + +#ifdef WITH_WSREP + /* + If slave stopped due to node going non primary, we set global flag to + trigger automatic restart of slave when node joins back to cluster. + */ + if (wsrep_node_dropped && wsrep_restart_slave) + { + if (wsrep_ready) + { + WSREP_INFO("Slave error due to node temporarily non-primary" + "SQL slave will continue"); + wsrep_node_dropped= FALSE; + mysql_mutex_unlock(&rli->run_lock); + WSREP_DEBUG("wsrep_conflict_state now: %d", thd->wsrep_conflict_state); + WSREP_INFO("slave restart: %d", thd->wsrep_conflict_state); + thd->wsrep_conflict_state= NO_CONFLICT; + goto wsrep_restart_point; + } else { + WSREP_INFO("Slave error due to node going non-primary"); + WSREP_INFO("wsrep_restart_slave was set and therefore slave will be " + "automatically restarted when node joins back to cluster."); + wsrep_restart_slave_activated= TRUE; + } + } +#endif /* WITH_WSREP */ + /* Note: the order of the broadcast and unlock calls below (first broadcast, then unlock) is important. Otherwise a killer_thread can execute between the calls and diff --git a/sql/sp.cc b/sql/sp.cc index 52d3c04cbdf..78de0209e6f 100644 --- a/sql/sp.cc +++ b/sql/sp.cc @@ -2274,3 +2274,38 @@ sp_load_for_information_schema(THD *thd, TABLE *proc_table, String *db, thd->lex= old_lex; return sp; } +#ifdef WITH_WSREP +int wsrep_create_sp(THD *thd, uchar** buf, size_t* buf_len) +{ + String log_query; + sp_head *sp = thd->lex->sphead; + ulong saved_mode= thd->variables.sql_mode; + String retstr(64); + retstr.set_charset(system_charset_info); + + log_query.set_charset(system_charset_info); + + if (sp->m_type == TYPE_ENUM_FUNCTION) + { + sp_returns_type(thd, retstr, sp); + } + + if (!create_string(thd, &log_query, + sp->m_type, + (sp->m_explicit_name ? sp->m_db.str : NULL), + (sp->m_explicit_name ? sp->m_db.length : 0), + sp->m_name.str, sp->m_name.length, + sp->m_params.str, sp->m_params.length, + retstr.c_ptr(), retstr.length(), + sp->m_body.str, sp->m_body.length, + sp->m_chistics, &(thd->lex->definer->user), + &(thd->lex->definer->host), + saved_mode)) + { + WSREP_WARN("SP create string failed: schema: %s, query: %s", + (thd->db ? thd->db : "(null)"), thd->query()); + return 1; + } + return wsrep_to_buf_helper(thd, log_query.ptr(), log_query.length(), buf, buf_len); +} +#endif /* WITH_WSREP */ diff --git a/sql/sql_acl.cc b/sql/sql_acl.cc index c7e47c84db0..3ed1a687c5a 100644 --- a/sql/sql_acl.cc +++ b/sql/sql_acl.cc @@ -2553,8 +2553,15 @@ int check_change_password(THD *thd, const char *host, const char *user, my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--skip-grant-tables"); return(1); } + +#ifdef WITH_WSREP + if ((!WSREP(thd) || !thd->wsrep_applier) && + !thd->slave_thread && !thd->security_ctx->priv_user[0] && + !in_bootstrap) +#else if (!thd->slave_thread && !thd->security_ctx->priv_user[0] && !in_bootstrap) +#endif /* WITH_WSREP */ { my_message(ER_PASSWORD_ANONYMOUS_USER, ER(ER_PASSWORD_ANONYMOUS_USER), MYF(0)); @@ -2565,7 +2572,11 @@ int check_change_password(THD *thd, const char *host, const char *user, my_error(ER_PASSWORD_NO_MATCH, MYF(0)); return 1; } + if (!thd->slave_thread && +#ifdef WITH_WSREP + (!WSREP(thd) || !thd->wsrep_applier) && +#endif /* WITH_WSREP */ (strcmp(thd->security_ctx->priv_user, user) || my_strcasecmp(system_charset_info, host, thd->security_ctx->priv_host))) @@ -2604,11 +2615,14 @@ bool change_password(THD *thd, const char *host, const char *user, Rpl_filter *rpl_filter; /* Buffer should be extended when password length is extended. */ char buff[512]; - ulong query_length; + ulong query_length=0; enum_binlog_format save_binlog_format; uint new_password_len= (uint) strlen(new_password); bool result= 1; bool use_salt= 0; +#ifdef WITH_WSREP + const CSET_STRING query_save = thd->query_string; +#endif /* WITH_WSREP */ DBUG_ENTER("change_password"); DBUG_PRINT("enter",("host: '%s' user: '%s' new_password: '%s'", host,user,new_password)); @@ -2616,6 +2630,18 @@ bool change_password(THD *thd, const char *host, const char *user, if (check_change_password(thd, host, user, new_password, new_password_len)) DBUG_RETURN(1); +#ifdef WITH_WSREP + if (WSREP(thd) && !thd->wsrep_applier) + { + query_length= sprintf(buff, "SET PASSWORD FOR '%-.120s'@'%-.120s'='%-.120s'", + user ? user : "", + host ? host : "", + new_password); + thd->set_query_inner(buff, query_length, system_charset_info); + + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, (char*)"user", NULL); + } +#endif /* WITH_WSREP */ tables.init_one_table("mysql", 5, "user", 4, "user", TL_WRITE); @@ -2697,9 +2723,23 @@ bool change_password(THD *thd, const char *host, const char *user, } end: close_mysql_tables(thd); +#ifdef WITH_WSREP + if (WSREP(thd) && !thd->wsrep_applier) + { + WSREP_TO_ISOLATION_END; + + thd->query_string = query_save; + thd->wsrep_exec_mode = LOCAL_STATE; + } +#endif /* WITH_WSREP */ thd->restore_stmt_binlog_format(save_binlog_format); DBUG_RETURN(result); +#ifdef WITH_WSREP + error: + WSREP_ERROR("Replication of SET PASSWORD failed: %s", buff); + DBUG_RETURN(result); +#endif /* WITH_WSREP */ } diff --git a/sql/sql_admin.cc b/sql/sql_admin.cc index 2cb7b596473..e1c66bceedc 100644 --- a/sql/sql_admin.cc +++ b/sql/sql_admin.cc @@ -1194,6 +1194,7 @@ bool Sql_cmd_analyze_table::execute(THD *thd) if (check_table_access(thd, SELECT_ACL | INSERT_ACL, first_table, FALSE, UINT_MAX, FALSE)) goto error; + WSREP_TO_ISOLATION_BEGIN_WRTCHK(NULL, NULL, first_table); thd->enable_slow_log= opt_log_slow_admin_statements; res= mysql_admin_table(thd, first_table, &m_lex->check_opt, "analyze", lock_type, 1, 0, 0, 0, @@ -1249,6 +1250,7 @@ bool Sql_cmd_optimize_table::execute(THD *thd) if (check_table_access(thd, SELECT_ACL | INSERT_ACL, first_table, FALSE, UINT_MAX, FALSE)) goto error; /* purecov: inspected */ + WSREP_TO_ISOLATION_BEGIN_WRTCHK(NULL, NULL, first_table); thd->enable_slow_log= opt_log_slow_admin_statements; res= (specialflag & SPECIAL_NO_NEW_FUNC) ? mysql_recreate_table(thd, first_table, true) : @@ -1282,6 +1284,7 @@ bool Sql_cmd_repair_table::execute(THD *thd) FALSE, UINT_MAX, FALSE)) goto error; /* purecov: inspected */ thd->enable_slow_log= opt_log_slow_admin_statements; + WSREP_TO_ISOLATION_BEGIN_WRTCHK(NULL, NULL, first_table); res= mysql_admin_table(thd, first_table, &m_lex->check_opt, "repair", TL_WRITE, 1, MY_TEST(m_lex->check_opt.sql_flags & TT_USEFRM), diff --git a/sql/sql_alter.cc b/sql/sql_alter.cc index 97b9c127c22..a39f07ae35d 100644 --- a/sql/sql_alter.cc +++ b/sql/sql_alter.cc @@ -18,7 +18,9 @@ // mysql_exchange_partition #include "sql_base.h" // open_temporary_tables #include "sql_alter.h" - +#ifdef WITH_WSREP +#include "wsrep_mysqld.h" +#endif /* WITH_WSREP */ Alter_info::Alter_info(const Alter_info &rhs, MEM_ROOT *mem_root) :drop_list(rhs.drop_list, mem_root), alter_list(rhs.alter_list, mem_root), @@ -303,6 +305,17 @@ bool Sql_cmd_alter_table::execute(THD *thd) thd->enable_slow_log= opt_log_slow_admin_statements; +#ifdef WITH_WSREP + TABLE *find_temporary_table(THD *thd, const TABLE_LIST *tl); + + if ((!thd->is_current_stmt_binlog_format_row() || + !find_temporary_table(thd, first_table))) + { + WSREP_TO_ISOLATION_BEGIN(((lex->name.str) ? select_lex->db : NULL), + ((lex->name.str) ? lex->name.str : NULL), + first_table); + } +#endif /* WITH_WSREP */ result= mysql_alter_table(thd, select_lex->db, lex->name.str, &create_info, first_table, @@ -312,6 +325,12 @@ bool Sql_cmd_alter_table::execute(THD *thd) lex->ignore); DBUG_RETURN(result); + +#ifdef WITH_WSREP +error: + WSREP_WARN("ALTER TABLE isolation failure"); + DBUG_RETURN(TRUE); +#endif /* WITH_WSREP */ } bool Sql_cmd_discard_import_tablespace::execute(THD *thd) diff --git a/sql/sql_base.cc b/sql/sql_base.cc index 73f71a07f15..2c7f3147901 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -62,6 +62,10 @@ #include <io.h> #endif +#ifdef WITH_WSREP +#include "wsrep_mysqld.h" +#include "wsrep_thd.h" +#endif // WITH_WSREP bool No_such_table_error_handler::handle_condition(THD *, @@ -3667,7 +3671,7 @@ thr_lock_type read_lock_type_for_table(THD *thd, */ bool log_on= mysql_bin_log.is_open() && thd->variables.sql_log_bin; ulong binlog_format= thd->variables.binlog_format; - if ((log_on == FALSE) || (binlog_format == BINLOG_FORMAT_ROW) || + if ((log_on == FALSE) || (WSREP_FORMAT(binlog_format) == BINLOG_FORMAT_ROW) || (table_list->table->s->table_category == TABLE_CATEGORY_LOG) || (table_list->table->s->table_category == TABLE_CATEGORY_PERFORMANCE) || !(is_update_query(prelocking_ctx->sql_command) || @@ -4715,9 +4719,38 @@ restart: tbl->reginfo.lock_type= tables->lock_type; } } +#ifdef WITH_WSREP + if (wsrep_replicate_myisam && + (*start) && + (*start)->table && + (*start)->table->file->ht->db_type == DB_TYPE_MYISAM && + thd->get_command() != COM_STMT_PREPARE && + ((thd->lex->sql_command == SQLCOM_INSERT || + thd->lex->sql_command == SQLCOM_INSERT_SELECT || + thd->lex->sql_command == SQLCOM_REPLACE || + thd->lex->sql_command == SQLCOM_REPLACE_SELECT || + thd->lex->sql_command == SQLCOM_UPDATE || + thd->lex->sql_command == SQLCOM_UPDATE_MULTI || + thd->lex->sql_command == SQLCOM_LOAD || + thd->lex->sql_command == SQLCOM_DELETE))) + { + WSREP_TO_ISOLATION_BEGIN(NULL, NULL, (*start)); + } + error: +#endif err: THD_STAGE_INFO(thd, stage_after_opening_tables); + +#ifdef WITH_WSREP + if (WSREP(thd)) + thd_proc_info(thd, "exit open_tables()"); + else + thd_proc_info(thd, 0); +#else /* WITH_WSREP */ + thd_proc_info(thd, 0); +#endif /* WITH_WSREP */ + free_root(&new_frm_mem, MYF(0)); // Free pre-alloced block if (error && *table_to_open) @@ -5183,7 +5216,18 @@ end: trans_rollback_stmt(thd); close_thread_tables(thd); } + THD_STAGE_INFO(thd, stage_after_opening_tables); + +#ifdef WITH_WSREP + if (WSREP(thd)) + thd_proc_info(thd, "End opening table"); + else + thd_proc_info(thd, 0); +#else /* WITH_WSREP */ + thd_proc_info(thd, 0); +#endif /* WITH_WSREP */ + DBUG_RETURN(table); } @@ -9051,7 +9095,19 @@ bool mysql_notify_thread_having_shared_lock(THD *thd, THD *in_use, (e.g. see partitioning code). */ if (!thd_table->needs_reopen()) +#ifdef WITH_WSREP + { + signalled|= mysql_lock_abort_for_thread(thd, thd_table); + if (thd && WSREP(thd) && wsrep_thd_is_BF((void *)thd, true)) + { + WSREP_DEBUG("remove_table_from_cache: %llu", + (unsigned long long) thd->real_id); + wsrep_abort_thd((void *)thd, (void *)in_use, FALSE); + } + } +#else signalled|= mysql_lock_abort_for_thread(thd, thd_table); +#endif } mysql_mutex_unlock(&in_use->LOCK_thd_data); } diff --git a/sql/sql_builtin.cc.in b/sql/sql_builtin.cc.in index 63850650ac9..2de475b0a76 100644 --- a/sql/sql_builtin.cc.in +++ b/sql/sql_builtin.cc.in @@ -25,7 +25,11 @@ extern #endif builtin_maria_plugin @mysql_mandatory_plugins@ @mysql_optional_plugins@ - builtin_maria_binlog_plugin, builtin_maria_mysql_password_plugin; + builtin_maria_binlog_plugin, +#ifdef WITH_WSREP + builtin_wsrep_plugin@mysql_plugin_defs@, +#endif /* WITH_WSREP */ + builtin_maria_mysql_password_plugin; struct st_maria_plugin *mysql_optional_plugins[]= { @@ -35,5 +39,8 @@ struct st_maria_plugin *mysql_optional_plugins[]= struct st_maria_plugin *mysql_mandatory_plugins[]= { builtin_maria_binlog_plugin, builtin_maria_mysql_password_plugin, +#ifdef WITH_WSREP + builtin_wsrep_plugin@mysql_plugin_defs@, +#endif /* WITH_WSREP */ @mysql_mandatory_plugins@ 0 }; diff --git a/sql/sql_cache.cc b/sql/sql_cache.cc index e1efb1e85d6..2e9e90346fb 100644 --- a/sql/sql_cache.cc +++ b/sql/sql_cache.cc @@ -1945,6 +1945,13 @@ def_week_frmt: %lu, in_trans: %d, autocommit: %d", (int)flags.autocommit)); memcpy((uchar *)(sql + (tot_length - QUERY_CACHE_FLAGS_SIZE)), (uchar*) &flags, QUERY_CACHE_FLAGS_SIZE); + +#ifdef WITH_WSREP + bool once_more; + once_more= true; +lookup: +#endif /* WITH_WSREP */ + query_block = (Query_cache_block *) my_hash_search(&queries, (uchar*) sql, tot_length); /* Quick abort on unlocked data */ @@ -1957,6 +1964,19 @@ def_week_frmt: %lu, in_trans: %d, autocommit: %d", } DBUG_PRINT("qcache", ("Query in query hash 0x%lx", (ulong)query_block)); +#ifdef WITH_WSREP + if (once_more && WSREP_CLIENT(thd) && wsrep_must_sync_wait(thd)) + { + unlock(); + if (wsrep_sync_wait(thd)) + goto err; + if (try_lock(thd, Query_cache::TIMEOUT)) + goto err; + once_more= false; + goto lookup; + } +#endif /* WITH_WSREP */ + /* Now lock and test that nothing changed while blocks was unlocked */ BLOCK_LOCK_RD(query_block); diff --git a/sql/sql_class.cc b/sql/sql_class.cc index da1a0e43ac1..f0543becc0c 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -63,6 +63,10 @@ #include "sql_parse.h" // is_update_query #include "sql_callback.h" #include "lock.h" +#ifdef WITH_WSREP +#include "wsrep_mysqld.h" +#include "wsrep_thd.h" +#endif #include "sql_connect.h" /* @@ -813,6 +817,180 @@ char *thd_get_error_context_description(THD *thd, char *buffer, return buffer; } +#ifdef WITH_WSREP +extern int wsrep_on(void *thd) +{ + return (int)(WSREP(((THD*)thd))); +} +extern "C" bool wsrep_thd_is_wsrep_on(THD *thd) +{ + return thd->variables.wsrep_on; +} + +extern "C" bool wsrep_consistency_check(void *thd) +{ + return ((THD*)thd)->wsrep_consistency_check == CONSISTENCY_CHECK_RUNNING; +} + +extern "C" void wsrep_thd_set_exec_mode(THD *thd, enum wsrep_exec_mode mode) +{ + thd->wsrep_exec_mode= mode; +} +extern "C" void wsrep_thd_set_query_state( + THD *thd, enum wsrep_query_state state) +{ + thd->wsrep_query_state= state; +} +extern "C" void wsrep_thd_set_conflict_state( + THD *thd, enum wsrep_conflict_state state) +{ + if (WSREP(thd)) thd->wsrep_conflict_state= state; +} + + +extern "C" enum wsrep_exec_mode wsrep_thd_exec_mode(THD *thd) +{ + return thd->wsrep_exec_mode; +} + +extern "C" const char *wsrep_thd_exec_mode_str(THD *thd) +{ + return + (!thd) ? "void" : + (thd->wsrep_exec_mode == LOCAL_STATE) ? "local" : + (thd->wsrep_exec_mode == REPL_RECV) ? "applier" : + (thd->wsrep_exec_mode == TOTAL_ORDER) ? "total order" : + (thd->wsrep_exec_mode == LOCAL_COMMIT) ? "local commit" : "void"; +} + +extern "C" enum wsrep_query_state wsrep_thd_query_state(THD *thd) +{ + return thd->wsrep_query_state; +} + +extern "C" const char *wsrep_thd_query_state_str(THD *thd) +{ + return + (!thd) ? "void" : + (thd->wsrep_query_state == QUERY_IDLE) ? "idle" : + (thd->wsrep_query_state == QUERY_EXEC) ? "executing" : + (thd->wsrep_query_state == QUERY_COMMITTING) ? "committing" : + (thd->wsrep_query_state == QUERY_EXITING) ? "exiting" : + (thd->wsrep_query_state == QUERY_ROLLINGBACK) ? "rolling back" : "void"; +} + +extern "C" enum wsrep_conflict_state wsrep_thd_conflict_state(THD *thd) +{ + return thd->wsrep_conflict_state; +} +extern "C" const char *wsrep_thd_conflict_state_str(THD *thd) +{ + return + (!thd) ? "void" : + (thd->wsrep_conflict_state == NO_CONFLICT) ? "no conflict" : + (thd->wsrep_conflict_state == MUST_ABORT) ? "must abort" : + (thd->wsrep_conflict_state == ABORTING) ? "aborting" : + (thd->wsrep_conflict_state == MUST_REPLAY) ? "must replay" : + (thd->wsrep_conflict_state == REPLAYING) ? "replaying" : + (thd->wsrep_conflict_state == RETRY_AUTOCOMMIT) ? "retrying" : + (thd->wsrep_conflict_state == CERT_FAILURE) ? "cert failure" : "void"; +} + +extern "C" wsrep_ws_handle_t* wsrep_thd_ws_handle(THD *thd) +{ + return &thd->wsrep_ws_handle; +} + +extern "C" void wsrep_thd_LOCK(THD *thd) +{ + mysql_mutex_lock(&thd->LOCK_wsrep_thd); +} +extern "C" void wsrep_thd_UNLOCK(THD *thd) +{ + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); +} +extern "C" time_t wsrep_thd_query_start(THD *thd) +{ + return thd->query_start(); +} +extern "C" uint32 wsrep_thd_wsrep_rand(THD *thd) +{ + return thd->wsrep_rand; +} +extern "C" my_thread_id wsrep_thd_thread_id(THD *thd) +{ + return thd->thread_id; +} +extern "C" wsrep_seqno_t wsrep_thd_trx_seqno(THD *thd) +{ + return (thd) ? thd->wsrep_trx_meta.gtid.seqno : WSREP_SEQNO_UNDEFINED; +} +extern "C" query_id_t wsrep_thd_query_id(THD *thd) +{ + return thd->query_id; +} +extern "C" char *wsrep_thd_query(THD *thd) +{ + return (thd) ? thd->query() : NULL; +} +extern "C" query_id_t wsrep_thd_wsrep_last_query_id(THD *thd) +{ + return thd->wsrep_last_query_id; +} +extern "C" void wsrep_thd_set_wsrep_last_query_id(THD *thd, query_id_t id) +{ + thd->wsrep_last_query_id= id; +} +extern "C" void wsrep_thd_awake(THD *thd, my_bool signal) +{ + if (signal) + { + mysql_mutex_lock(&thd->LOCK_thd_data); + thd->awake(KILL_QUERY); + mysql_mutex_unlock(&thd->LOCK_thd_data); + } + else + { + mysql_mutex_lock(&LOCK_wsrep_replaying); + mysql_cond_broadcast(&COND_wsrep_replaying); + mysql_mutex_unlock(&LOCK_wsrep_replaying); + } +} +extern "C" int wsrep_thd_retry_counter(THD *thd) +{ + return(thd->wsrep_retry_counter); +} +extern "C" bool wsrep_thd_skip_append_keys(THD *thd) +{ + return thd->wsrep_skip_append_keys; +} + +extern int +wsrep_trx_order_before(void *thd1, void *thd2) +{ + if (wsrep_thd_trx_seqno((THD*)thd1) < wsrep_thd_trx_seqno((THD*)thd2)) { + WSREP_DEBUG("BF conflict, order: %lld %lld\n", + (long long)wsrep_thd_trx_seqno((THD*)thd1), + (long long)wsrep_thd_trx_seqno((THD*)thd2)); + return 1; + } + WSREP_DEBUG("waiting for BF, trx order: %lld %lld\n", + (long long)wsrep_thd_trx_seqno((THD*)thd1), + (long long)wsrep_thd_trx_seqno((THD*)thd2)); + return 0; +} +extern "C" int +wsrep_trx_is_aborting(void *thd_ptr) +{ + if (thd_ptr) { + if ((((THD *)thd_ptr)->wsrep_conflict_state == MUST_ABORT) || + (((THD *)thd_ptr)->wsrep_conflict_state == ABORTING)) { + return 1; + } + } + return 0; +} +#endif #if MARIA_PLUGIN_INTERFACE_VERSION < 0x0200 /** @@ -856,7 +1034,11 @@ bool Drop_table_error_handler::handle_condition(THD *thd, } +#ifdef WITH_WSREP +THD::THD(bool is_applier) +#else THD::THD() +#endif :Statement(&main_lex, &main_mem_root, STMT_CONVENTIONAL_EXECUTION, /* statement id */ 0), rli_fake(0), rgi_fake(0), rgi_slave(NULL), @@ -892,8 +1074,20 @@ THD::THD() debug_sync_control(0), #endif /* defined(ENABLED_DEBUG_SYNC) */ wait_for_commit_ptr(0), - main_da(0, false, false), + main_da(0, false, false), m_stmt_da(&main_da) +#ifdef WITH_WSREP + , + wsrep_applier(is_applier), + wsrep_applier_closing(FALSE), + wsrep_client_thread(0), + wsrep_po_handle(WSREP_PO_INITIALIZER), + wsrep_po_cnt(0), + wsrep_po_in_trans(FALSE), + wsrep_apply_format(0), + wsrep_apply_toi(false), + wsrep_skip_append_keys(false) +#endif { ulong tmp; @@ -1003,6 +1197,23 @@ THD::THD() m_command=COM_CONNECT; *scramble= '\0'; +#ifdef WITH_WSREP + mysql_mutex_init(key_LOCK_wsrep_thd, &LOCK_wsrep_thd, MY_MUTEX_INIT_FAST); + wsrep_ws_handle.trx_id = WSREP_UNDEFINED_TRX_ID; + wsrep_ws_handle.opaque = NULL; + wsrep_retry_counter = 0; + wsrep_PA_safe = true; + wsrep_retry_query = NULL; + wsrep_retry_query_len = 0; + wsrep_retry_command = COM_CONNECT; + wsrep_consistency_check = NO_CONSISTENCY_CHECK; + wsrep_status_vars = 0; + wsrep_mysql_replicated = 0; + wsrep_TOI_pre_query = NULL; + wsrep_TOI_pre_query_len = 0; + wsrep_sync_wait_gtid = WSREP_GTID_UNDEFINED; + wsrep_affected_rows = 0; +#endif /* Call to init() below requires fully initialized Open_tables_state. */ reset_open_tables_state(this); @@ -1042,6 +1253,13 @@ THD::THD() my_rnd_init(&rand, tmp + (ulong) &rand, tmp + (ulong) ::global_query_id); substitute_null_with_insert_id = FALSE; thr_lock_info_init(&lock_info); /* safety: will be reset after start */ +#ifdef WITH_WSREP + lock_info.mysql_thd= (void *)this; + lock_info.in_lock_tables= false; +#ifdef WSREP_PROC_INFO + wsrep_info[sizeof(wsrep_info) - 1] = '\0'; /* make sure it is 0-terminated */ +#endif /* WSREP_PROC_INFO */ +#endif /* WITH_WSREP */ m_token_array= NULL; if (max_digest_length > 0) @@ -1396,6 +1614,32 @@ void THD::init(void) last_commit_gtid.seq_no= 0; status_in_global= 0; +#ifdef WITH_WSREP + wsrep_exec_mode= wsrep_applier ? REPL_RECV : LOCAL_STATE; + wsrep_conflict_state= NO_CONFLICT; + wsrep_query_state= QUERY_IDLE; + wsrep_last_query_id= 0; + wsrep_trx_meta.gtid= WSREP_GTID_UNDEFINED; + wsrep_trx_meta.depends_on= WSREP_SEQNO_UNDEFINED; + wsrep_converted_lock_session= false; + wsrep_retry_counter= 0; + wsrep_rgi= NULL; + wsrep_PA_safe= true; + wsrep_consistency_check = NO_CONSISTENCY_CHECK; + wsrep_mysql_replicated = 0; + wsrep_TOI_pre_query = NULL; + wsrep_TOI_pre_query_len = 0; + wsrep_sync_wait_gtid = WSREP_GTID_UNDEFINED; + wsrep_affected_rows = 0; + + /* + @@wsrep_causal_reads is now being handled via wsrep_sync_wait, update it + appropriately. + */ + if (variables.wsrep_causal_reads) + variables.wsrep_sync_wait|= WSREP_SYNC_WAIT_BEFORE_READ; +#endif /* WITH_WSREP */ + if (variables.sql_log_bin) variables.option_bits|= OPTION_BIN_LOG; else @@ -1591,6 +1835,13 @@ THD::~THD() mysql_mutex_lock(&LOCK_thd_data); mysql_mutex_unlock(&LOCK_thd_data); +#ifdef WITH_WSREP + mysql_mutex_lock(&LOCK_wsrep_thd); + mysql_mutex_unlock(&LOCK_wsrep_thd); + mysql_mutex_destroy(&LOCK_wsrep_thd); + if (wsrep_rgi) delete wsrep_rgi; + wsrep_free_status(this); +#endif /* Close connection */ #ifndef EMBEDDED_LIBRARY if (net.vio) @@ -1772,6 +2023,9 @@ void THD::awake(killed_state state_to_set) /* Interrupt target waiting inside a storage engine. */ if (state_to_set != NOT_KILLED) +#ifdef WITH_WSREP + /* TODO: prevent applier close here */ +#endif /* WITH_WSREP */ ha_kill_query(this, thd_kill_level(this)); /* Broadcast a condition to kick the target if it is waiting on it. */ @@ -1920,7 +2174,19 @@ bool THD::notify_shared_lock(MDL_context_owner *ctx_in_use, thread can see those instances (e.g. see partitioning code). */ if (!thd_table->needs_reopen()) +#ifdef WITH_WSREP + { signalled|= mysql_lock_abort_for_thread(this, thd_table); + if (WSREP(this) && wsrep_thd_is_BF((void *)this, FALSE)) + { + WSREP_DEBUG("remove_table_from_cache: %llu", + (unsigned long long) this->real_id); + wsrep_abort_thd((void *)this, (void *)in_use, FALSE); + } + } +#else + signalled|= mysql_lock_abort_for_thread(this, thd_table); +#endif } } mysql_mutex_unlock(&in_use->LOCK_thd_data); @@ -2103,12 +2369,25 @@ void THD::cleanup_after_query() /* reset table map for multi-table update */ table_map_for_update= 0; m_binlog_invoker= INVOKER_NONE; +#ifdef WITH_WSREP + if (TOTAL_ORDER == wsrep_exec_mode) + { + wsrep_exec_mode = LOCAL_STATE; + } + //wsrep_trx_seqno = 0; +#endif /* WITH_WSREP */ #ifndef EMBEDDED_LIBRARY if (rgi_slave) rgi_slave->cleanup_after_query(); #endif +#ifdef WITH_WSREP + wsrep_sync_wait_gtid= WSREP_GTID_UNDEFINED; + if (!in_active_multi_stmt_transaction()) + wsrep_affected_rows= 0; +#endif /* WITH_WSREP */ + DBUG_VOID_RETURN; } @@ -2508,6 +2787,13 @@ bool sql_exchange::escaped_given(void) bool select_send::send_result_set_metadata(List<Item> &list, uint flags) { bool res; +#ifdef WITH_WSREP + if (WSREP(thd) && thd->wsrep_retry_query) + { + WSREP_DEBUG("skipping select metadata"); + return FALSE; + } +#endif /* WITH_WSREP */ if (!(res= thd->protocol->send_result_set_metadata(&list, flags))) is_result_set_started= 1; return res; @@ -4485,8 +4771,13 @@ extern "C" int thd_non_transactional_update(const MYSQL_THD thd) extern "C" int thd_binlog_format(const MYSQL_THD thd) { +#ifdef WITH_WSREP + if (((WSREP(thd) && wsrep_emulate_bin_log) || mysql_bin_log.is_open()) && + (thd->variables.option_bits & OPTION_BIN_LOG)) +#else if (mysql_bin_log.is_open() && (thd->variables.option_bits & OPTION_BIN_LOG)) - return (int) thd->variables.binlog_format; +#endif + return (int) WSREP_FORMAT(thd->variables.binlog_format); else return BINLOG_FORMAT_UNSPEC; } @@ -4965,7 +5256,11 @@ void THD::get_definer(LEX_USER *definer, bool role) { binlog_invoker(role); #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) +#ifdef WITH_WSREP + if ((wsrep_applier || slave_thread) && has_invoker()) +#else if (slave_thread && has_invoker()) +#endif { definer->user = invoker_user; definer->host= invoker_host; @@ -5315,7 +5610,7 @@ int THD::decide_logging_format(TABLE_LIST *tables) binlog by filtering rules. */ if (mysql_bin_log.is_open() && (variables.option_bits & OPTION_BIN_LOG) && - !(variables.binlog_format == BINLOG_FORMAT_STMT && + !(WSREP_FORMAT(variables.binlog_format) == BINLOG_FORMAT_STMT && !binlog_filter->db_ok(db))) { /* @@ -5552,7 +5847,7 @@ int THD::decide_logging_format(TABLE_LIST *tables) */ my_error((error= ER_BINLOG_ROW_INJECTION_AND_STMT_ENGINE), MYF(0)); } - else if (variables.binlog_format == BINLOG_FORMAT_ROW && + else if (WSREP_FORMAT(variables.binlog_format) == BINLOG_FORMAT_ROW && sqlcom_can_generate_row_events(this)) { /* @@ -5581,7 +5876,7 @@ int THD::decide_logging_format(TABLE_LIST *tables) else { /* binlog_format = STATEMENT */ - if (variables.binlog_format == BINLOG_FORMAT_STMT) + if (WSREP_FORMAT(variables.binlog_format) == BINLOG_FORMAT_STMT) { if (lex->is_stmt_row_injection()) { @@ -5598,7 +5893,14 @@ int THD::decide_logging_format(TABLE_LIST *tables) 5. Error: Cannot modify table that uses a storage engine limited to row-logging when binlog_format = STATEMENT */ +#ifdef WITH_WSREP + if (!WSREP(this) || wsrep_exec_mode == LOCAL_STATE) + { +#endif /* WITH_WSREP */ my_error((error= ER_BINLOG_STMT_MODE_AND_ROW_ENGINE), MYF(0), ""); +#ifdef WITH_WSREP + } +#endif /* WITH_WSREP */ } else if (is_write && (unsafe_flags= lex->get_stmt_unsafe_flags()) != 0) { @@ -5710,7 +6012,7 @@ int THD::decide_logging_format(TABLE_LIST *tables) "and binlog_filter->db_ok(db) = %d", mysql_bin_log.is_open(), (variables.option_bits & OPTION_BIN_LOG), - variables.binlog_format, + WSREP_FORMAT(variables.binlog_format), binlog_filter->db_ok(db))); #endif @@ -5947,7 +6249,13 @@ int THD::binlog_write_row(TABLE* table, bool is_trans, MY_BITMAP const* cols, size_t colcnt, uchar const *record) { +#ifdef WITH_WSREP + DBUG_ASSERT(is_current_stmt_binlog_format_row() && + ((WSREP(this) && wsrep_emulate_bin_log) || + mysql_bin_log.is_open())); +#else DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open()); +#endif /* Pack records into format for transfer. We are allocating more @@ -5981,7 +6289,13 @@ int THD::binlog_update_row(TABLE* table, bool is_trans, const uchar *before_record, const uchar *after_record) { +#ifdef WITH_WSREP + DBUG_ASSERT(is_current_stmt_binlog_format_row() && + ((WSREP(this) && wsrep_emulate_bin_log) + || mysql_bin_log.is_open())); +#else DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open()); +#endif size_t const before_maxlen = max_row_length(table, before_record); size_t const after_maxlen = max_row_length(table, after_record); @@ -6030,7 +6344,13 @@ int THD::binlog_delete_row(TABLE* table, bool is_trans, MY_BITMAP const* cols, size_t colcnt, uchar const *record) { +#ifdef WITH_WSREP + DBUG_ASSERT(is_current_stmt_binlog_format_row() && + ((WSREP(this) && wsrep_emulate_bin_log) + || mysql_bin_log.is_open())); +#else DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open()); +#endif /* Pack records into format for transfer. We are allocating more @@ -6065,7 +6385,11 @@ int THD::binlog_remove_pending_rows_event(bool clear_maps, { DBUG_ENTER("THD::binlog_remove_pending_rows_event"); +#ifdef WITH_WSREP + if (!(WSREP_EMULATE_BINLOG(this) || mysql_bin_log.is_open())) +#else if (!mysql_bin_log.is_open()) +#endif DBUG_RETURN(0); /* Ensure that all events in a GTID group are in the same cache */ @@ -6088,7 +6412,11 @@ int THD::binlog_flush_pending_rows_event(bool stmt_end, bool is_transactional) mode: it might be the case that we left row-based mode before flushing anything (e.g., if we have explicitly locked tables). */ +#ifdef WITH_WSREP + if (!(WSREP_EMULATE_BINLOG(this) || mysql_bin_log.is_open())) +#else if (!mysql_bin_log.is_open()) +#endif DBUG_RETURN(0); /* Ensure that all events in a GTID group are in the same cache */ @@ -6350,7 +6678,12 @@ int THD::binlog_query(THD::enum_binlog_query_type qtype, char const *query_arg, DBUG_ENTER("THD::binlog_query"); DBUG_PRINT("enter", ("qtype: %s query: '%-.*s'", show_query_type(qtype), (int) query_len, query_arg)); +#ifdef WITH_WSREP + DBUG_ASSERT(query_arg && (WSREP_EMULATE_BINLOG(this) + || mysql_bin_log.is_open())); +#else DBUG_ASSERT(query_arg && mysql_bin_log.is_open()); +#endif /* If this is withing a BEGIN ... COMMIT group, don't log it */ if (variables.option_bits & OPTION_GTID_BEGIN) @@ -6368,7 +6701,6 @@ int THD::binlog_query(THD::enum_binlog_query_type qtype, char const *query_arg, */ DBUG_RETURN(0); } - /* If we are not in prelocked mode, mysql_unlock_tables() will be called after this binlog_query(), so we have to flush the pending diff --git a/sql/sql_class.h b/sql/sql_class.h index 6d2c9ef89fd..beef22a8140 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -58,6 +58,20 @@ void set_thd_stage_info(void *thd, #include "my_apc.h" #include "rpl_gtid.h" +#ifdef WITH_WSREP +#include "wsrep_mysqld.h" +struct wsrep_thd_shadow { + ulonglong options; + uint server_status; + enum wsrep_exec_mode wsrep_exec_mode; + Vio *vio; + ulong tx_isolation; + char *db; + size_t db_length; + my_hrtime_t user_time; + longlong row_count_func; +}; +#endif class Reprepare_observer; class Relay_log_info; struct rpl_group_info; @@ -640,6 +654,14 @@ typedef struct system_variables ulong wt_timeout_short, wt_deadlock_search_depth_short; ulong wt_timeout_long, wt_deadlock_search_depth_long; +#ifdef WITH_WSREP + my_bool wsrep_on; + my_bool wsrep_causal_reads; + my_bool wsrep_dirty_reads; + uint wsrep_sync_wait; + ulong wsrep_retry_autocommit; + ulong wsrep_OSU_method; +#endif double long_query_time_double; my_bool pseudo_slave_mode; @@ -2816,7 +2838,11 @@ public: /* Debug Sync facility. See debug_sync.cc. */ struct st_debug_sync_control *debug_sync_control; #endif /* defined(ENABLED_DEBUG_SYNC) */ +#ifdef WITH_WSREP + THD(bool is_applier = false); +#else THD(); +#endif ~THD(); void init(void); @@ -3345,7 +3371,7 @@ public: tests fail and so force them to propagate the lex->binlog_row_based_if_mixed upwards to the caller. */ - if ((variables.binlog_format == BINLOG_FORMAT_MIXED) && + if ((WSREP_FORMAT(variables.binlog_format) == BINLOG_FORMAT_MIXED) && (in_sub_stmt == 0)) set_current_stmt_binlog_format_row(); @@ -3397,7 +3423,7 @@ public: show_system_thread(system_thread))); if (in_sub_stmt == 0) { - if (variables.binlog_format == BINLOG_FORMAT_ROW) + if (WSREP_FORMAT(variables.binlog_format) == BINLOG_FORMAT_ROW) set_current_stmt_binlog_format_row(); else if (temporary_tables == NULL) set_current_stmt_binlog_format_stmt(); @@ -3798,6 +3824,50 @@ public: return (temporary_tables || (rgi_slave && unlikely(rgi_have_temporary_tables()))); } + +#ifdef WITH_WSREP + const bool wsrep_applier; /* dedicated slave applier thread */ + bool wsrep_applier_closing; /* applier marked to close */ + bool wsrep_client_thread; /* to identify client threads*/ + enum wsrep_exec_mode wsrep_exec_mode; + query_id_t wsrep_last_query_id; + enum wsrep_query_state wsrep_query_state; + enum wsrep_conflict_state wsrep_conflict_state; + mysql_mutex_t LOCK_wsrep_thd; + // changed from wsrep_seqno_t to wsrep_trx_meta_t in wsrep API rev 75 + // wsrep_seqno_t wsrep_trx_seqno; + wsrep_trx_meta_t wsrep_trx_meta; + uint32 wsrep_rand; + rpl_group_info* wsrep_rgi; + bool wsrep_converted_lock_session; + wsrep_ws_handle_t wsrep_ws_handle; +#ifdef WSREP_PROC_INFO + char wsrep_info[128]; /* string for dynamic proc info */ +#endif /* WSREP_PROC_INFO */ + ulong wsrep_retry_counter; // of autocommit + bool wsrep_PA_safe; + char* wsrep_retry_query; + size_t wsrep_retry_query_len; + enum enum_server_command wsrep_retry_command; + enum wsrep_consistency_check_mode + wsrep_consistency_check; + wsrep_stats_var* wsrep_status_vars; + int wsrep_mysql_replicated; + const char* wsrep_TOI_pre_query; /* a query to apply before + the actual TOI query */ + size_t wsrep_TOI_pre_query_len; + wsrep_po_handle_t wsrep_po_handle; + size_t wsrep_po_cnt; + my_bool wsrep_po_in_trans; +#ifdef GTID_SUPPORT + rpl_sid wsrep_po_sid; +#endif /* GTID_SUPPORT */ + void* wsrep_apply_format; + bool wsrep_apply_toi; /* applier processing in TOI */ + bool wsrep_skip_append_keys; + wsrep_gtid_t wsrep_sync_wait_gtid; + ulong wsrep_affected_rows; +#endif /* WITH_WSREP */ }; @@ -4113,6 +4183,8 @@ class select_insert :public select_result_interceptor { virtual int send_data(List<Item> &items); virtual void store_values(List<Item> &values); virtual bool can_rollback_data() { return 0; } + bool prepare_eof(); + bool send_ok_packet(); bool send_eof(); virtual void abort_result_set(); /* not implemented: select_insert is never re-used in prepared statements */ diff --git a/sql/sql_connect.cc b/sql/sql_connect.cc index 63f9bfae47a..695d1095125 100644 --- a/sql/sql_connect.cc +++ b/sql/sql_connect.cc @@ -43,6 +43,9 @@ HASH global_index_stats; extern mysql_mutex_t LOCK_global_user_client_stats; extern mysql_mutex_t LOCK_global_table_stats; extern mysql_mutex_t LOCK_global_index_stats; +#ifdef WITH_WSREP +#include "wsrep_mysqld.h" +#endif /* Get structure for logging connection data for the current user @@ -1170,6 +1173,17 @@ exit: void end_connection(THD *thd) { NET *net= &thd->net; +#ifdef WITH_WSREP + if (WSREP(thd)) + { + wsrep_status_t rcode= wsrep->free_connection(wsrep, thd->thread_id); + if (rcode) { + WSREP_WARN("wsrep failed to free connection context: %lu, code: %d", + thd->thread_id, rcode); + } + } + thd->wsrep_client_thread= 0; +#endif plugin_thdvar_cleanup(thd); if (thd->user_connect) @@ -1305,6 +1319,9 @@ bool thd_prepare_connection(THD *thd) (char *) thd->security_ctx->host_or_ip); prepare_new_connection_state(thd); +#ifdef WITH_WSREP + thd->wsrep_client_thread= 1; +#endif /* WITH_WSREP */ return FALSE; } @@ -1378,7 +1395,15 @@ void do_handle_one_connection(THD *thd_arg) break; } end_connection(thd); - + +#ifdef WITH_WSREP + if (WSREP(thd)) + { + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + thd->wsrep_query_state= QUERY_EXITING; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } +#endif end_thread: close_connection(thd); diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc index 6b0135d92d9..ad5bc23a31b 100644 --- a/sql/sql_delete.cc +++ b/sql/sql_delete.cc @@ -643,7 +643,11 @@ cleanup: /* See similar binlogging code in sql_update.cc, for comments */ if ((error < 0) || thd->transaction.stmt.modified_non_trans_table) { +#ifdef WITH_WSREP + if ((WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open())) +#else if (mysql_bin_log.is_open()) +#endif { int errcode= 0; if (error < 0) @@ -1094,7 +1098,11 @@ void multi_delete::abort_result_set() /* there is only side effects; to binlog with the error */ +#ifdef WITH_WSREP + if (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()) +#else if (mysql_bin_log.is_open()) +#endif { int errcode= query_error_code(thd, thd->killed == NOT_KILLED); /* possible error of writing binary log is ignored deliberately */ @@ -1270,7 +1278,11 @@ bool multi_delete::send_eof() } if ((local_error == 0) || thd->transaction.stmt.modified_non_trans_table) { +#ifdef WITH_WSREP + if (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()) +#else if (mysql_bin_log.is_open()) +#endif { int errcode= 0; if (local_error == 0) diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc index ef4b4703455..2e69dc89800 100644 --- a/sql/sql_insert.cc +++ b/sql/sql_insert.cc @@ -1018,7 +1018,11 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list, thd->transaction.stmt.modified_non_trans_table || was_insert_delayed) { +#ifdef WITH_WSREP + if (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()) +#else if (mysql_bin_log.is_open()) +#endif { int errcode= 0; if (error <= 0) @@ -3245,6 +3249,12 @@ bool Delayed_insert::handle_inserts(void) mysql_cond_broadcast(&cond_client); // If waiting clients } } +#ifdef WITH_WSREP + if (WSREP((&thd))) + thd_proc_info(&thd, "insert done"); + else +#endif /* WITH_WSREP */ + thd_proc_info(&thd, 0); mysql_mutex_unlock(&mutex); /* @@ -3687,19 +3697,26 @@ void select_insert::store_values(List<Item> &values) TRG_EVENT_INSERT); } -bool select_insert::send_eof() +bool select_insert::prepare_eof() { int error; bool const trans_table= table->file->has_transactions(); - ulonglong id, row_count; bool changed; killed_state killed_status= thd->killed; - DBUG_ENTER("select_insert::send_eof"); + + DBUG_ENTER("select_insert::prepare_eof"); DBUG_PRINT("enter", ("trans_table=%d, table_type='%s'", trans_table, table->file->table_type())); +#ifdef WITH_WSREP + error= (thd->wsrep_conflict_state == MUST_ABORT || + thd->wsrep_conflict_state == CERT_FAILURE) ? -1 : + (thd->locked_tables_mode <= LTM_LOCK_TABLES ? + table->file->ha_end_bulk_insert() : 0); +#else error= (thd->locked_tables_mode <= LTM_LOCK_TABLES ? table->file->ha_end_bulk_insert() : 0); +#endif /* WITH_WSREP */ if (!error && thd->is_error()) error= thd->get_stmt_da()->sql_errno(); @@ -3727,7 +3744,11 @@ bool select_insert::send_eof() events are in the transaction cache and will be written when ha_autocommit_or_rollback() is issued below. */ +#ifdef WITH_WSREP + if ((WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()) && +#else if (mysql_bin_log.is_open() && +#endif (!error || thd->transaction.stmt.modified_non_trans_table)) { int errcode= 0; @@ -3740,7 +3761,7 @@ bool select_insert::send_eof() trans_table, FALSE, FALSE, errcode)) { table->file->ha_release_auto_increment(); - DBUG_RETURN(1); + DBUG_RETURN(true); } } table->file->ha_release_auto_increment(); @@ -3748,27 +3769,49 @@ bool select_insert::send_eof() if (error) { table->file->print_error(error,MYF(0)); - DBUG_RETURN(1); + DBUG_RETURN(true); } - char buff[160]; + + DBUG_RETURN(false); +} + +bool select_insert::send_ok_packet() { + char message[160]; /* status message */ + ulong row_count; /* rows affected */ + ulong id; /* last insert-id */ + + DBUG_ENTER("select_insert::send_ok_packet"); + if (info.ignore) - sprintf(buff, ER(ER_INSERT_INFO), (ulong) info.records, - (ulong) (info.records - info.copied), - (long) thd->get_stmt_da()->current_statement_warn_count()); + my_snprintf(message, sizeof(message), ER(ER_INSERT_INFO), + (ulong) info.records, (ulong) (info.records - info.copied), + (long) thd->get_stmt_da()->current_statement_warn_count()); else - sprintf(buff, ER(ER_INSERT_INFO), (ulong) info.records, - (ulong) (info.deleted+info.updated), - (long) thd->get_stmt_da()->current_statement_warn_count()); + my_snprintf(message, sizeof(message), ER(ER_INSERT_INFO), + (ulong) info.records, (ulong) (info.deleted + info.updated), + (long) thd->get_stmt_da()->current_statement_warn_count()); + row_count= info.copied + info.deleted + - ((thd->client_capabilities & CLIENT_FOUND_ROWS) ? - info.touched : info.updated); + ((thd->client_capabilities & CLIENT_FOUND_ROWS) ? + info.touched : info.updated); + id= (thd->first_successful_insert_id_in_cur_stmt > 0) ? thd->first_successful_insert_id_in_cur_stmt : (thd->arg_of_last_insert_id_function ? thd->first_successful_insert_id_in_prev_stmt : (info.copied ? autoinc_value_of_last_inserted_row : 0)); - ::my_ok(thd, row_count, id, buff); - DBUG_RETURN(0); + + ::my_ok(thd, row_count, id, message); + + DBUG_RETURN(false); +} + +bool select_insert::send_eof() +{ + bool res; + DBUG_ENTER("select_insert::send_eof"); + res= (prepare_eof() || send_ok_packet()); + DBUG_RETURN(res); } void select_insert::abort_result_set() { @@ -3812,7 +3855,11 @@ void select_insert::abort_result_set() { if (!can_rollback_data()) thd->transaction.all.modified_non_trans_table= TRUE; +#ifdef WITH_WSREP + if (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()) +#else if (mysql_bin_log.is_open()) +#endif { int errcode= query_error_code(thd, thd->killed == NOT_KILLED); /* error of writing binary log is ignored */ @@ -4242,7 +4289,11 @@ select_create::binlog_show_create_table(TABLE **tables, uint count) WITH_DB_NAME); DBUG_ASSERT(result == 0); /* show_create_table() always return 0 */ +#ifdef WITH_WSREP + if (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()) +#else if (mysql_bin_log.is_open()) +#endif /* WITH_WSREP */ { int errcode= query_error_code(thd, thd->killed == NOT_KILLED); result= thd->binlog_query(THD::STMT_QUERY_TYPE, @@ -4252,6 +4303,9 @@ select_create::binlog_show_create_table(TABLE **tables, uint count) /* suppress_use */ FALSE, errcode); } +#ifdef WITH_WSREP + ha_wsrep_fake_trx_id(thd); +#endif return result; } @@ -4264,13 +4318,13 @@ void select_create::store_values(List<Item> &values) bool select_create::send_eof() { - if (select_insert::send_eof()) + DBUG_ENTER("select_create::send_eof"); + if (prepare_eof()) { abort_result_set(); - return 1; + DBUG_RETURN(true); } - exit_done= 1; // Avoid double calls /* Do an implicit commit at end of statement for non-temporary tables. This can fail, but we should unlock the table @@ -4281,13 +4335,33 @@ bool select_create::send_eof() trans_commit_stmt(thd); if (!(thd->variables.option_bits & OPTION_GTID_BEGIN)) trans_commit_implicit(thd); +#ifdef WITH_WSREP + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + if (thd->wsrep_conflict_state != NO_CONFLICT) + { + WSREP_DEBUG("select_create commit failed, thd: %lu err: %d %s", + thd->thread_id, thd->wsrep_conflict_state, thd->query()); + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + abort_result_set(); + DBUG_RETURN(true); + } + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); +#endif /* WITH_WSREP */ } else if (!thd->is_current_stmt_binlog_format_row()) table->s->table_creation_was_logged= 1; + /* + exit_done must only be set after last potential call to + abort_result_set(). + */ + exit_done= 1; // Avoid double calls + table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY); table->file->extra(HA_EXTRA_WRITE_CANNOT_REPLACE); + send_ok_packet(); + if (m_plock) { MYSQL_LOCK *lock= *m_plock; @@ -4308,12 +4382,12 @@ bool select_create::send_eof() create_info-> pos_in_locked_tables, table, lock)) - return 0; // ok + DBUG_RETURN(false); // ok /* Fail. Continue without locking the table */ } mysql_unlock_tables(thd, lock); } - return 0; + DBUG_RETURN(false); } diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index 19568bdd42c..5f22a657dfe 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -1622,6 +1622,17 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd) } else { +#ifdef WITH_WSREP + if (version == 99997 && thd->wsrep_exec_mode == LOCAL_STATE) + { + WSREP_DEBUG("consistency check: %s", thd->query()); + thd->wsrep_consistency_check= CONSISTENCY_CHECK_DECLARED; + lip->yySkipn(5); + lip->set_echo(TRUE); + state=MY_LEX_START; + break; /* Do not treat contents as a comment. */ + } +#endif /* WITH_WSREP */ /* Patch and skip the conditional comment to avoid it being propagated infinitely (eg. to a slave). diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index efcb218eb4c..9f0b6e25a5c 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -50,7 +50,7 @@ #include "sql_connect.h" // decrease_user_connections, // check_mqh, // reset_mqh -#include "sql_rename.h" // mysql_rename_table +#include "sql_rename.h" // mysql_rename_tables #include "sql_tablespace.h" // mysql_alter_tablespace #include "hostname.h" // hostname_cache_refresh #include "sql_acl.h" // *_ACL, check_grant, is_acl_user, @@ -104,6 +104,13 @@ #include "../storage/maria/ha_maria.h" #endif +#ifdef WITH_WSREP +#include "wsrep_mysqld.h" +#include "wsrep_thd.h" +#include "wsrep_binlog.h" +static void wsrep_mysql_parse(THD *thd, char *rawbuf, uint length, + Parser_state *parser_state); +#endif /* WITH_WSREP */ /** @defgroup Runtime_Environment Runtime Environment @{ @@ -124,7 +131,7 @@ static void sql_kill(THD *thd, longlong id, killed_state state, killed_type type static void sql_kill_user(THD *thd, LEX_USER *user, killed_state state); static bool lock_tables_precheck(THD *thd, TABLE_LIST *tables); static bool execute_show_status(THD *, TABLE_LIST *); -static bool execute_rename_table(THD *, TABLE_LIST *, TABLE_LIST *); +static bool check_rename_table(THD *, TABLE_LIST *, TABLE_LIST *); const char *any_db="*any*"; // Special symbol for check_access @@ -879,11 +886,26 @@ bool do_command(THD *thd) { bool return_value; char *packet= 0; +#ifdef WITH_WSREP + ulong packet_length= 0; // just to avoid (false positive) compiler warning +#else ulong packet_length; +#endif /* WITH_WSREP */ NET *net= &thd->net; enum enum_server_command command; DBUG_ENTER("do_command"); - +#ifdef WITH_WSREP + if (WSREP(thd)) + { + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + thd->wsrep_query_state= QUERY_IDLE; + if (thd->wsrep_conflict_state==MUST_ABORT) + { + wsrep_client_rollback(thd); + } + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } +#endif /* WITH_WSREP */ /* indicator of uninitialized lex => normal flow of errors handling (see my_message_sql) @@ -930,12 +952,46 @@ bool do_command(THD *thd) packet_length= my_net_read_packet(net, 1); +#ifdef WITH_WSREP + if (WSREP(thd)) { + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + + /* these THD's are aborted or are aborting during being idle */ + if (thd->wsrep_conflict_state == ABORTING) + { + while (thd->wsrep_conflict_state == ABORTING) { + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + my_sleep(1000); + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + } + thd->store_globals(); + } + else if (thd->wsrep_conflict_state == ABORTED) + { + thd->store_globals(); + } + + thd->wsrep_query_state= QUERY_EXEC; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } +#endif /* WITH_WSREP */ if (packet_length == packet_error) { DBUG_PRINT("info",("Got error %d reading command from socket %s", net->error, vio_description(net->vio))); +#ifdef WITH_WSREP + if (WSREP(thd)) { + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + if (thd->wsrep_conflict_state == MUST_ABORT) + { + DBUG_PRINT("wsrep",("aborted for wsrep rollback: %lu", thd->real_id)); + wsrep_client_rollback(thd); + } + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } +#endif /* WITH_WSREP */ /* Instrument this broken statement as "statement/com/error" */ thd->m_statement_psi= MYSQL_REFINE_STATEMENT(thd->m_statement_psi, com_statement_info[COM_END]. @@ -991,12 +1047,79 @@ bool do_command(THD *thd) vio_description(net->vio), command, command_name[command].str)); +#ifdef WITH_WSREP + if (WSREP(thd)) { + /* + * bail out if DB snapshot has not been installed. We however, + * allow queries "SET" and "SHOW", they are trapped later in execute_command + */ + if (thd->variables.wsrep_on && !thd->wsrep_applier && !wsrep_ready && + command != COM_QUERY && + command != COM_PING && + command != COM_QUIT && + command != COM_PROCESS_INFO && + command != COM_PROCESS_KILL && + command != COM_SET_OPTION && + command != COM_SHUTDOWN && + command != COM_SLEEP && + command != COM_STATISTICS && + command != COM_TIME && + command != COM_STMT_PREPARE && + command != COM_STMT_SEND_LONG_DATA && + command != COM_STMT_EXECUTE && + command != COM_STMT_RESET && + command != COM_STMT_CLOSE && + command != COM_END + ) { + my_message(ER_UNKNOWN_COM_ERROR, + "WSREP has not yet prepared node for application use", + MYF(0)); + thd->protocol->end_statement(); + + /* Performance Schema Interface instrumentation end */ + MYSQL_END_STATEMENT(thd->m_statement_psi, thd->get_stmt_da()); + thd->m_statement_psi= NULL; + thd->m_digest= NULL; + + return_value= FALSE; + goto out; + } + } +#endif /* WITH_WSREP */ /* Restore read timeout value */ my_net_set_read_timeout(net, thd->variables.net_read_timeout); DBUG_ASSERT(packet_length); DBUG_ASSERT(!thd->apc_target.is_enabled()); return_value= dispatch_command(command, thd, packet+1, (uint) (packet_length-1)); +#ifdef WITH_WSREP + if (WSREP(thd)) { + while (thd->wsrep_conflict_state== RETRY_AUTOCOMMIT) + { + WSREP_DEBUG("Retry autocommit for: %s\n", thd->wsrep_retry_query); + CHARSET_INFO *current_charset = thd->variables.character_set_client; + if (!is_supported_parser_charset(current_charset)) + { + /* Do not use non-supported parser character sets */ + WSREP_WARN("Current client character set is non-supported parser " + "character set: %s", current_charset->csname); + thd->variables.character_set_client = &my_charset_latin1; + WSREP_WARN("For retry temporally setting character set to : %s", + my_charset_latin1.csname); + } + return_value= dispatch_command(command, thd, thd->wsrep_retry_query, + thd->wsrep_retry_query_len); + thd->variables.character_set_client = current_charset; + } + } + if (thd->wsrep_retry_query && thd->wsrep_conflict_state != REPLAYING) + { + my_free(thd->wsrep_retry_query); + thd->wsrep_retry_query = NULL; + thd->wsrep_retry_query_len = 0; + thd->wsrep_retry_command = COM_CONNECT; + } +#endif /* WITH_WSREP */ DBUG_ASSERT(!thd->apc_target.is_enabled()); out: @@ -1071,6 +1194,21 @@ static my_bool deny_updates_if_read_only_option(THD *thd, DBUG_RETURN(FALSE); } +#ifdef WITH_WSREP +static void wsrep_copy_query(THD *thd) +{ + thd->wsrep_retry_command = thd->get_command(); + thd->wsrep_retry_query_len = thd->query_length(); + if (thd->wsrep_retry_query) { + my_free(thd->wsrep_retry_query); + } + thd->wsrep_retry_query = (char *)my_malloc( + thd->wsrep_retry_query_len + 1, MYF(0)); + strncpy(thd->wsrep_retry_query, thd->query(), thd->wsrep_retry_query_len); + thd->wsrep_retry_query[thd->wsrep_retry_query_len] = '\0'; +} +#endif /* WITH_WSREP */ + /** Perform one connection-level (COM_XXXX) command. @@ -1100,6 +1238,42 @@ bool dispatch_command(enum enum_server_command command, THD *thd, DBUG_ENTER("dispatch_command"); DBUG_PRINT("info", ("command: %d", command)); +#ifdef WITH_WSREP + if (WSREP(thd)) { + if (!thd->in_multi_stmt_transaction_mode()) + { + thd->wsrep_PA_safe= true; + } + + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + thd->wsrep_query_state= QUERY_EXEC; + if (thd->wsrep_conflict_state== RETRY_AUTOCOMMIT) + { + thd->wsrep_conflict_state= NO_CONFLICT; + } + if (thd->wsrep_conflict_state== MUST_ABORT) + { + wsrep_client_rollback(thd); + } + if (thd->wsrep_conflict_state== ABORTED) + { + my_error(ER_LOCK_DEADLOCK, MYF(0), "wsrep aborted transaction"); + WSREP_DEBUG("Deadlock error for: %s", thd->query()); + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + thd->killed = NOT_KILLED; + thd->mysys_var->abort = 0; + thd->wsrep_conflict_state = NO_CONFLICT; + thd->wsrep_retry_counter = 0; + /* + Increment threads running to compensate dec_thread_running() called + after dispatch_end label. + */ + inc_thread_running(); + goto dispatch_end; + } + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } +#endif /* WITH_WSREP */ #if defined(ENABLED_PROFILING) thd->profiling.start_new_query(); #endif @@ -1302,7 +1476,11 @@ bool dispatch_command(enum enum_server_command command, THD *thd, if (parser_state.init(thd, thd->query(), thd->query_length())) break; +#ifdef WITH_WSREP + wsrep_mysql_parse(thd, thd->query(), thd->query_length(), &parser_state); +#else mysql_parse(thd, thd->query(), thd->query_length(), &parser_state); +#endif /* WITH_WSREP */ while (!thd->killed && (parser_state.m_lip.found_semicolon != NULL) && ! thd->is_error()) @@ -1379,10 +1557,19 @@ bool dispatch_command(enum enum_server_command command, THD *thd, Count each statement from the client. */ statistic_increment(thd->status_var.questions, &LOCK_status); +#ifdef WITH_WSREP + if (!WSREP(thd)) + thd->set_time(); /* Reset the query start time. */ +#else thd->set_time(); /* Reset the query start time. */ +#endif /* WITH_WSREP */ parser_state.reset(beginning_of_next_stmt, length); /* TODO: set thd->lex->sql_command to SQLCOM_END here */ +#ifdef WITH_WSREP + wsrep_mysql_parse(thd, beginning_of_next_stmt, length, &parser_state); +#else mysql_parse(thd, beginning_of_next_stmt, length, &parser_state); +#endif /* WITH_WSREP */ } DBUG_PRINT("info",("query ready")); @@ -1722,6 +1909,26 @@ bool dispatch_command(enum enum_server_command command, THD *thd, my_message(ER_UNKNOWN_COM_ERROR, ER(ER_UNKNOWN_COM_ERROR), MYF(0)); break; } +#ifdef WITH_WSREP + dispatch_end: + + if (WSREP(thd)) { + /* wsrep BF abort in query exec phase */ + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + if ((thd->wsrep_conflict_state != REPLAYING) && + (thd->wsrep_conflict_state != RETRY_AUTOCOMMIT)) + { + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + thd->update_server_status(); + thd->protocol->end_statement(); + query_cache_end_of_result(thd); + } + else + { + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } + } else { /* if (WSREP(thd))... */ +#endif /* WITH_WSREP */ DBUG_ASSERT(thd->derived_tables == NULL && (thd->open_tables == NULL || (thd->locked_tables_mode == LTM_LOCK_TABLES))); @@ -1731,6 +1938,9 @@ bool dispatch_command(enum enum_server_command command, THD *thd, thd->update_server_status(); thd->protocol->end_statement(); query_cache_end_of_result(thd); +#ifdef WITH_WSREP + } +#endif /* WITH_WSREP */ if (!thd->is_error() && !thd->killed_errno()) mysql_audit_general(thd, MYSQL_AUDIT_GENERAL_RESULT, 0, 0); @@ -2184,6 +2394,13 @@ err: return TRUE; } +#ifdef WITH_WSREP +static bool wsrep_is_show_query(enum enum_sql_command command) +{ + DBUG_ASSERT(command >= 0 && command <= SQLCOM_END); + return (sql_command_flags[command] & CF_STATUS_COMMAND) != 0; +} +#endif /* WITH_WSREP */ /** Execute command saved in thd and lex->sql_command. @@ -2389,7 +2606,51 @@ mysql_execute_command(THD *thd) #ifdef HAVE_REPLICATION } /* endif unlikely slave */ #endif +#ifdef WITH_WSREP + if (WSREP(thd)) { + /* + change LOCK TABLE WRITE to transaction + */ + if (lex->sql_command== SQLCOM_LOCK_TABLES && wsrep_convert_LOCK_to_trx) + { + for (TABLE_LIST *table= all_tables; table; table= table->next_global) + { + if (table->lock_type >= TL_WRITE_ALLOW_WRITE) + { + lex->sql_command= SQLCOM_BEGIN; + thd->wsrep_converted_lock_session= true; + break; + } + } + } + if (lex->sql_command== SQLCOM_UNLOCK_TABLES && + thd->wsrep_converted_lock_session) + { + thd->wsrep_converted_lock_session= false; + lex->sql_command= SQLCOM_COMMIT; + lex->tx_release= TVL_NO; + } + + /* + Bail out if DB snapshot has not been installed. SET and SHOW commands, + however, are always allowed. + We additionally allow all other commands that do not change data in + case wsrep_dirty_reads is enabled. + */ + if (thd->variables.wsrep_on && !thd->wsrep_applier && !wsrep_ready && + lex->sql_command != SQLCOM_SET_OPTION && + !(thd->variables.wsrep_dirty_reads && + !is_update_query(lex->sql_command)) && + !wsrep_is_show_query(lex->sql_command)) + { + my_message(ER_UNKNOWN_COM_ERROR, + "WSREP has not yet prepared node for application use", + MYF(0)); + goto error; + } + } +#endif /* WITH_WSREP */ status_var_increment(thd->status_var.com_stat[lex->sql_command]); thd->progress.report_to_client= MY_TEST(sql_command_flags[lex->sql_command] & CF_REPORT_PROGRESS); @@ -2431,7 +2692,13 @@ mysql_execute_command(THD *thd) { /* Commit the normal transaction if one is active. */ if (trans_commit_implicit(thd)) + { + thd->mdl_context.release_transactional_locks(); +#ifdef WITH_WSREP + WSREP_DEBUG("implicit commit failed, MDL released: %lu", thd->thread_id); +#endif /* WITH_WSREP */ goto error; + } /* Release metadata locks acquired in this transaction. */ thd->mdl_context.release_transactional_locks(); } @@ -2521,6 +2788,10 @@ mysql_execute_command(THD *thd) /* fall through */ case SQLCOM_SHOW_STATUS_PROC: case SQLCOM_SHOW_STATUS_FUNC: +#ifdef WITH_WSREP + if (WSREP_CLIENT(thd) && wsrep_sync_wait(thd)) goto error; +#endif /* WITH_WSREP */ + case SQLCOM_SHOW_DATABASES: case SQLCOM_SHOW_TABLES: case SQLCOM_SHOW_TRIGGERS: @@ -2529,17 +2800,27 @@ mysql_execute_command(THD *thd) case SQLCOM_SHOW_PLUGINS: case SQLCOM_SHOW_FIELDS: case SQLCOM_SHOW_KEYS: +#ifndef WITH_WSREP case SQLCOM_SHOW_VARIABLES: case SQLCOM_SHOW_CHARSETS: case SQLCOM_SHOW_COLLATIONS: case SQLCOM_SHOW_STORAGE_ENGINES: case SQLCOM_SHOW_PROFILE: +#endif /* WITH_WSREP */ case SQLCOM_SHOW_CLIENT_STATS: case SQLCOM_SHOW_USER_STATS: case SQLCOM_SHOW_TABLE_STATS: case SQLCOM_SHOW_INDEX_STATS: case SQLCOM_SELECT: - { +#ifdef WITH_WSREP + if (WSREP_CLIENT(thd) && wsrep_sync_wait(thd)) goto error; + case SQLCOM_SHOW_VARIABLES: + case SQLCOM_SHOW_CHARSETS: + case SQLCOM_SHOW_COLLATIONS: + case SQLCOM_SHOW_STORAGE_ENGINES: + case SQLCOM_SHOW_PROFILE: +#endif /* WITH_WSREP */ + { thd->status_var.last_query_cost= 0.0; /* @@ -2916,7 +3197,7 @@ case SQLCOM_PREPARE: */ if (thd->query_name_consts && mysql_bin_log.is_open() && - thd->variables.binlog_format == BINLOG_FORMAT_STMT && + WSREP_FORMAT(thd->variables.binlog_format) == BINLOG_FORMAT_STMT && !mysql_bin_log.is_query_in_union(thd, thd->query_id)) { List_iterator_fast<Item> it(select_lex->item_list); @@ -3001,6 +3282,7 @@ case SQLCOM_PREPARE: /* select_create is currently not re-execution friendly and needs to be created for every execution of a PS/SP. + Note: In wsrep-patch, CTAS is handled like a regular transaction. */ if ((result= new select_create(create_table, &create_info, @@ -3035,6 +3317,15 @@ case SQLCOM_PREPARE: } else { +#ifdef WITH_WSREP + /* in STATEMENT format, we probably have to replicate also temporary + tables, like mysql replication does + */ + if (!thd->is_current_stmt_binlog_format_row() || + !(create_info.options & HA_LEX_CREATE_TMP_TABLE)) + WSREP_TO_ISOLATION_BEGIN(create_table->db, create_table->table_name, + NULL) +#endif /* WITH_WSREP */ /* Regular CREATE TABLE */ res= mysql_create_table(thd, create_table, &create_info, &alter_info); @@ -3072,6 +3363,7 @@ end_with_restore_list: DBUG_ASSERT(first_table == all_tables && first_table != 0); if (check_one_table_access(thd, INDEX_ACL, all_tables)) goto error; /* purecov: inspected */ + WSREP_TO_ISOLATION_BEGIN(first_table->db, first_table->table_name, NULL) /* Currently CREATE INDEX or DROP INDEX cause a full table rebuild and thus classify as slow administrative statements just like @@ -3194,7 +3486,12 @@ end_with_restore_list: #endif /* HAVE_REPLICATION */ case SQLCOM_RENAME_TABLE: { - if (execute_rename_table(thd, first_table, all_tables)) + if (check_rename_table(thd, first_table, all_tables)) + goto error; + + WSREP_TO_ISOLATION_BEGIN(0, 0, first_table) + + if (mysql_rename_tables(thd, first_table, 0)) goto error; break; } @@ -3221,6 +3518,10 @@ end_with_restore_list: goto error; #else { +#ifdef WITH_WSREP + if (WSREP_CLIENT(thd) && wsrep_sync_wait(thd)) goto error; +#endif /* WITH_WSREP */ + /* Access check: SHOW CREATE TABLE require any privileges on the table level (ie @@ -3283,6 +3584,10 @@ end_with_restore_list: case SQLCOM_CHECKSUM: { DBUG_ASSERT(first_table == all_tables && first_table != 0); +#ifdef WITH_WSREP + if (WSREP_CLIENT(thd) && wsrep_sync_wait(thd)) goto error; +#endif /* WITH_WSREP */ + if (check_table_access(thd, SELECT_ACL, all_tables, FALSE, UINT_MAX, FALSE)) goto error; /* purecov: inspected */ @@ -3291,6 +3596,10 @@ end_with_restore_list: break; } case SQLCOM_UPDATE: +#ifdef WITH_WSREP + if (WSREP_CLIENT(thd) && + wsrep_sync_wait(thd, WSREP_SYNC_WAIT_BEFORE_UPDATE_DELETE)) goto error; +#endif /* WITH_WSREP */ { ha_rows found= 0, updated= 0; DBUG_ASSERT(first_table == all_tables && first_table != 0); @@ -3330,6 +3639,10 @@ end_with_restore_list: /* if we switched from normal update, rights are checked */ if (up_result != 2) { +#ifdef WITH_WSREP + if (WSREP_CLIENT(thd) && + wsrep_sync_wait(thd, WSREP_SYNC_WAIT_BEFORE_UPDATE_DELETE)) goto error; +#endif /* WITH_WSREP */ if ((res= multi_update_precheck(thd, all_tables))) break; } @@ -3399,6 +3712,10 @@ end_with_restore_list: break; } case SQLCOM_REPLACE: +#ifdef WITH_WSREP + if (WSREP_CLIENT(thd) && + wsrep_sync_wait(thd, WSREP_SYNC_WAIT_BEFORE_INSERT_REPLACE)) goto error; +#endif /* WITH_WSREP */ #ifndef DBUG_OFF if (mysql_bin_log.is_open()) { @@ -3435,6 +3752,10 @@ end_with_restore_list: #endif /* fall through */ case SQLCOM_INSERT: +#ifdef WITH_WSREP + if (WSREP_CLIENT(thd) && + wsrep_sync_wait(thd, WSREP_SYNC_WAIT_BEFORE_INSERT_REPLACE)) goto error; +#endif /* WITH_WSREP */ { DBUG_ASSERT(first_table == all_tables && first_table != 0); @@ -3488,12 +3809,24 @@ end_with_restore_list: } case SQLCOM_REPLACE_SELECT: case SQLCOM_INSERT_SELECT: +#ifdef WITH_WSREP + if (WSREP_CLIENT(thd) && + wsrep_sync_wait(thd, WSREP_SYNC_WAIT_BEFORE_INSERT_REPLACE)) goto error; +#endif /* WITH_WSREP */ { select_result *sel_result; bool explain= MY_TEST(lex->describe); DBUG_ASSERT(first_table == all_tables && first_table != 0); if ((res= insert_precheck(thd, all_tables))) break; +#ifdef WITH_WSREP + if (thd->wsrep_consistency_check == CONSISTENCY_CHECK_DECLARED) + { + thd->wsrep_consistency_check = CONSISTENCY_CHECK_RUNNING; + WSREP_TO_ISOLATION_BEGIN(first_table->db, first_table->table_name, NULL); + } + +#endif /* INSERT...SELECT...ON DUPLICATE KEY UPDATE/REPLACE SELECT/ INSERT...IGNORE...SELECT can be unsafe, unless ORDER BY PRIMARY KEY @@ -3594,6 +3927,10 @@ end_with_restore_list: break; } case SQLCOM_DELETE: +#ifdef WITH_WSREP + if (WSREP_CLIENT(thd) && + wsrep_sync_wait(thd, WSREP_SYNC_WAIT_BEFORE_UPDATE_DELETE)) goto error; +#endif /* WITH_WSREP */ { select_result *sel_result=lex->result; DBUG_ASSERT(first_table == all_tables && first_table != 0); @@ -3614,6 +3951,10 @@ end_with_restore_list: break; } case SQLCOM_DELETE_MULTI: +#ifdef WITH_WSREP + if (WSREP_CLIENT(thd) && + wsrep_sync_wait(thd, WSREP_SYNC_WAIT_BEFORE_UPDATE_DELETE)) goto error; +#endif /* WITH_WSREP */ { DBUG_ASSERT(first_table == all_tables && first_table != 0); TABLE_LIST *aux_tables= thd->lex->auxiliary_table_list.first; @@ -3689,6 +4030,18 @@ end_with_restore_list: /* So that DROP TEMPORARY TABLE gets to binlog at commit/rollback */ thd->variables.option_bits|= OPTION_KEEP_LOG; } +#ifdef WITH_WSREP + for (TABLE_LIST *table= all_tables; table; table= table->next_global) + { + if (!lex->drop_temporary && + (!thd->is_current_stmt_binlog_format_row() || + !find_temporary_table(thd, table))) + { + WSREP_TO_ISOLATION_BEGIN(NULL, NULL, all_tables); + break; + } + } +#endif /* WITH_WSREP */ /* If we are a slave, we should add IF EXISTS if the query executed on the master without an error. This will help a slave to @@ -3742,7 +4095,6 @@ end_with_restore_list: if (!mysql_change_db(thd, &db_str, FALSE)) my_ok(thd); - break; } @@ -3893,6 +4245,7 @@ end_with_restore_list: #endif if (check_access(thd, CREATE_ACL, lex->name.str, NULL, NULL, 1, 0)) break; + WSREP_TO_ISOLATION_BEGIN(lex->name.str, NULL, NULL) res= mysql_create_db(thd, lex->name.str, &create_info, 0); break; } @@ -3924,6 +4277,7 @@ end_with_restore_list: #endif if (check_access(thd, DROP_ACL, lex->name.str, NULL, NULL, 1, 0)) break; + WSREP_TO_ISOLATION_BEGIN(lex->name.str, NULL, NULL) res= mysql_rm_db(thd, lex->name.str, lex->check_exists, 0); break; } @@ -3955,6 +4309,7 @@ end_with_restore_list: res= 1; break; } + WSREP_TO_ISOLATION_BEGIN(db->str, NULL, NULL) res= mysql_upgrade_db(thd, db); if (!res) my_ok(thd); @@ -3990,6 +4345,7 @@ end_with_restore_list: #endif if (check_access(thd, ALTER_ACL, db->str, NULL, NULL, 1, 0)) break; + WSREP_TO_ISOLATION_BEGIN(db->str, NULL, NULL) res= mysql_alter_db(thd, db->str, &create_info); break; } @@ -4003,6 +4359,11 @@ end_with_restore_list: db_name.str= db_name_buff; db_name.length= lex->name.length; strmov(db_name.str, lex->name.str); + +#ifdef WITH_WSREP + if (WSREP_CLIENT(thd) && wsrep_sync_wait(thd)) goto error; +#endif /* WITH_WSREP */ + if (check_db_name(&db_name)) { my_error(ER_WRONG_DB_NAME, MYF(0), db_name.str); @@ -4028,6 +4389,7 @@ end_with_restore_list: if (res) break; + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) switch (lex->sql_command) { case SQLCOM_CREATE_EVENT: { @@ -4058,10 +4420,14 @@ end_with_restore_list: /* lex->unit.cleanup() is called outside, no need to call it here */ break; case SQLCOM_SHOW_CREATE_EVENT: +#ifdef WITH_WSREP + if (WSREP_CLIENT(thd) && wsrep_sync_wait(thd)) goto error; +#endif /* WITH_WSREP */ res= Events::show_create_event(thd, lex->spname->m_db, lex->spname->m_name); break; case SQLCOM_DROP_EVENT: + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) if (!(res= Events::drop_event(thd, lex->spname->m_db, lex->spname->m_name, lex->check_exists))) @@ -4076,6 +4442,7 @@ end_with_restore_list: if (check_access(thd, INSERT_ACL, "mysql", NULL, NULL, 1, 0)) break; #ifdef HAVE_DLOPEN + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) if (!(res = mysql_create_function(thd, &lex->udf))) my_ok(thd); #else @@ -4091,6 +4458,7 @@ end_with_restore_list: if (check_access(thd, INSERT_ACL, "mysql", NULL, NULL, 1, 1) && check_global_access(thd,CREATE_USER_ACL)) break; + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) /* Conditionally writes to binlog */ if (!(res= mysql_create_user(thd, lex->users_list, lex->sql_command == SQLCOM_CREATE_ROLE))) @@ -4104,6 +4472,7 @@ end_with_restore_list: check_global_access(thd,CREATE_USER_ACL)) break; /* Conditionally writes to binlog */ + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) if (!(res= mysql_drop_user(thd, lex->users_list, lex->sql_command == SQLCOM_DROP_ROLE))) my_ok(thd); @@ -4115,6 +4484,7 @@ end_with_restore_list: check_global_access(thd,CREATE_USER_ACL)) break; /* Conditionally writes to binlog */ + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) if (!(res= mysql_rename_user(thd, lex->users_list))) my_ok(thd); break; @@ -4126,6 +4496,7 @@ end_with_restore_list: break; /* Conditionally writes to binlog */ + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) if (!(res = mysql_revoke_all(thd, lex->users_list))) my_ok(thd); break; @@ -4208,6 +4579,7 @@ end_with_restore_list: lex->type == TYPE_ENUM_PROCEDURE, 0)) goto error; /* Conditionally writes to binlog */ + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) res= mysql_routine_grant(thd, all_tables, lex->type == TYPE_ENUM_PROCEDURE, lex->users_list, grants, @@ -4221,6 +4593,7 @@ end_with_restore_list: all_tables, FALSE, UINT_MAX, FALSE)) goto error; /* Conditionally writes to binlog */ + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) res= mysql_table_grant(thd, all_tables, lex->users_list, lex->columns, lex->grant, lex->sql_command == SQLCOM_REVOKE); @@ -4236,6 +4609,7 @@ end_with_restore_list: } else { + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) /* Conditionally writes to binlog */ res= mysql_grant(thd, select_lex->db, lex->users_list, lex->grant, lex->sql_command == SQLCOM_REVOKE, @@ -4261,6 +4635,7 @@ end_with_restore_list: case SQLCOM_REVOKE_ROLE: case SQLCOM_GRANT_ROLE: { + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) if (!(res= mysql_grant_role(thd, lex->users_list, lex->sql_command != SQLCOM_GRANT_ROLE))) my_ok(thd); @@ -4294,6 +4669,38 @@ end_with_restore_list: break; } +#ifdef WITH_WSREP + if (lex->type & ( + REFRESH_GRANT | + REFRESH_HOSTS | +#ifdef HAVE_OPENSSL + REFRESH_DES_KEY_FILE | +#endif + /* + Write all flush log statements except + FLUSH LOGS + FLUSH BINARY LOGS + Check reload_acl_and_cache for why. + */ + REFRESH_RELAY_LOG | + REFRESH_SLOW_LOG | + REFRESH_GENERAL_LOG | + REFRESH_ENGINE_LOG | + REFRESH_ERROR_LOG | +#ifdef HAVE_QUERY_CACHE + REFRESH_QUERY_CACHE_FREE | +#endif /* HAVE_QUERY_CACHE */ + REFRESH_STATUS | + REFRESH_USER_RESOURCES | + REFRESH_TABLE_STATS | + REFRESH_INDEX_STATS | + REFRESH_USER_STATS | + REFRESH_CLIENT_STATS)) + { + WSREP_TO_ISOLATION_BEGIN_WRTCHK(WSREP_MYSQL_DB, NULL, NULL) + } +#endif /* WITH_WSREP*/ + #ifdef HAVE_REPLICATION if (lex->type & REFRESH_READ_LOCK) { @@ -4307,12 +4714,32 @@ end_with_restore_list: goto error; } #endif + /* reload_acl_and_cache() will tell us if we are allowed to write to the binlog or not. */ if (!reload_acl_and_cache(thd, lex->type, first_table, &write_to_binlog)) { +#ifdef WITH_WSREP + if ((lex->type & REFRESH_TABLES) && !(lex->type & (REFRESH_FOR_EXPORT|REFRESH_READ_LOCK))) + { + /* + This is done after reload_acl_and_cache is because + LOCK TABLES is not replicated in galera, the upgrade of which + is checked in reload_acl_and_cache. + Hence, done after/if we are able to upgrade locks. + */ + if (first_table) + { + WSREP_TO_ISOLATION_BEGIN_WRTCHK(NULL, NULL, first_table); + } + else + { + WSREP_TO_ISOLATION_BEGIN_WRTCHK(WSREP_MYSQL_DB, NULL, NULL); + } + } +#endif /* WITH_WSREP */ /* We WANT to write and we CAN write. ! we write after unlocking the table. @@ -4414,15 +4841,29 @@ end_with_restore_list: able to open it (with SQLCOM_HA_OPEN) in the first place. */ unit->set_limit(select_lex); +#ifdef WITH_WSREP + { char* tmp_info= NULL; + if (WSREP(thd)) tmp_info = (char *)thd_proc_info(thd, "mysql_ha_read()"); +#endif /* WITH_WSREP */ res= mysql_ha_read(thd, first_table, lex->ha_read_mode, lex->ident.str, lex->insert_list, lex->ha_rkey_mode, select_lex->where, unit->select_limit_cnt, unit->offset_limit_cnt); +#ifdef WITH_WSREP + if (WSREP(thd)) thd_proc_info(thd, tmp_info); + } +#endif /* WITH_WSREP */ break; case SQLCOM_BEGIN: DBUG_PRINT("info", ("Executing SQLCOM_BEGIN thd: %p", thd)); if (trans_begin(thd, lex->start_transaction_opt)) + { + thd->mdl_context.release_transactional_locks(); +#ifdef WITH_WSREP + WSREP_DEBUG("BEGIN failed, MDL released: %lu", thd->thread_id); +#endif /* WITH_WSREP */ goto error; + } my_ok(thd); break; case SQLCOM_COMMIT: @@ -4436,7 +4877,13 @@ end_with_restore_list: (thd->variables.completion_type == 2 && lex->tx_release != TVL_NO)); if (trans_commit(thd)) + { + thd->mdl_context.release_transactional_locks(); +#ifdef WITH_WSREP + WSREP_DEBUG("COMMIT failed, MDL released: %lu", thd->thread_id); +#endif /* WITH_WSREP */ goto error; + } thd->mdl_context.release_transactional_locks(); /* Begin transaction with the same isolation level. */ if (tx_chain) @@ -4456,7 +4903,20 @@ end_with_restore_list: thd->killed= KILL_CONNECTION; thd->print_aborted_warning(3, "RELEASE"); } +#ifdef WITH_WSREP + if (WSREP(thd)) { + + if (thd->wsrep_conflict_state == NO_CONFLICT || + thd->wsrep_conflict_state == REPLAYING) + { + my_ok(thd); + } + } else { +#endif /* WITH_WSREP */ my_ok(thd); +#ifdef WITH_WSREP + } +#endif /* WITH_WSREP */ break; } case SQLCOM_ROLLBACK: @@ -4471,7 +4931,13 @@ end_with_restore_list: lex->tx_release != TVL_NO)); if (trans_rollback(thd)) + { + thd->mdl_context.release_transactional_locks(); +#ifdef WITH_WSREP + WSREP_DEBUG("rollback failed, MDL released: %lu", thd->thread_id); +#endif /* WITH_WSREP */ goto error; + } thd->mdl_context.release_transactional_locks(); /* Begin transaction with the same isolation level. */ if (tx_chain) @@ -4488,8 +4954,18 @@ end_with_restore_list: /* Disconnect the current client connection. */ if (tx_release) thd->killed= KILL_CONNECTION; - my_ok(thd); - break; +#ifdef WITH_WSREP + if (WSREP(thd)) { + if (thd->wsrep_conflict_state == NO_CONFLICT) { + my_ok(thd); + } + } else { +#endif /* WITH_WSREP */ + my_ok(thd); +#ifdef WITH_WSREP + } +#endif /* WITH_WSREP */ + break; } case SQLCOM_RELEASE_SAVEPOINT: if (trans_release_savepoint(thd, lex->ident)) @@ -4557,6 +5033,7 @@ end_with_restore_list: if (sp_process_definer(thd)) goto create_sp_error; + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) res= (sp_result= sp_create_routine(thd, lex->sphead->m_type, lex->sphead)); switch (sp_result) { case SP_OK: { @@ -4767,6 +5244,7 @@ create_sp_error: already puts on CREATE FUNCTION. */ /* Conditionally writes to binlog */ + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) sp_result= sp_update_routine(thd, type, lex->spname, &lex->sp_chistics); switch (sp_result) { @@ -4838,6 +5316,7 @@ create_sp_error: if (check_routine_access(thd, ALTER_PROC_ACL, db, name, lex->sql_command == SQLCOM_DROP_PROCEDURE, 0)) goto error; + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) /* Conditionally writes to binlog */ sp_result= sp_drop_routine(thd, type, lex->spname); @@ -4902,12 +5381,18 @@ create_sp_error: } case SQLCOM_SHOW_CREATE_PROC: { +#ifdef WITH_WSREP + if (WSREP_CLIENT(thd) && wsrep_sync_wait(thd)) goto error; +#endif /* WITH_WSREP */ if (sp_show_create_routine(thd, TYPE_ENUM_PROCEDURE, lex->spname)) goto error; break; } case SQLCOM_SHOW_CREATE_FUNC: { +#ifdef WITH_WSREP + if (WSREP_CLIENT(thd) && wsrep_sync_wait(thd)) goto error; +#endif /* WITH_WSREP */ if (sp_show_create_routine(thd, TYPE_ENUM_FUNCTION, lex->spname)) goto error; break; @@ -4920,6 +5405,9 @@ create_sp_error: stored_procedure_type type= (lex->sql_command == SQLCOM_SHOW_PROC_CODE ? TYPE_ENUM_PROCEDURE : TYPE_ENUM_FUNCTION); +#ifdef WITH_WSREP + if (WSREP_CLIENT(thd) && wsrep_sync_wait(thd)) goto error; +#endif /* WITH_WSREP */ if (sp_cache_routine(thd, type, lex->spname, FALSE, &sp)) goto error; if (!sp || sp->show_routine_code(thd)) @@ -4941,6 +5429,9 @@ create_sp_error: if (check_ident_length(&lex->spname->m_name)) goto error; +#ifdef WITH_WSREP + if (WSREP_CLIENT(thd) && wsrep_sync_wait(thd)) goto error; +#endif /* WITH_WSREP */ if (show_create_trigger(thd, lex->spname)) goto error; /* Error has been already logged. */ @@ -4952,6 +5443,7 @@ create_sp_error: Note: SQLCOM_CREATE_VIEW also handles 'ALTER VIEW' commands as specified through the thd->lex->create_view_mode flag. */ + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) res= mysql_create_view(thd, first_table, thd->lex->create_view_mode); break; } @@ -4960,12 +5452,14 @@ create_sp_error: if (check_table_access(thd, DROP_ACL, all_tables, FALSE, UINT_MAX, FALSE)) goto error; /* Conditionally writes to binlog. */ + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) res= mysql_drop_view(thd, first_table, thd->lex->drop_mode); break; } case SQLCOM_CREATE_TRIGGER: { /* Conditionally writes to binlog. */ + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) res= mysql_create_or_drop_trigger(thd, all_tables, 1); break; @@ -4973,6 +5467,7 @@ create_sp_error: case SQLCOM_DROP_TRIGGER: { /* Conditionally writes to binlog. */ + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) res= mysql_create_or_drop_trigger(thd, all_tables, 0); break; } @@ -4993,7 +5488,13 @@ create_sp_error: break; case SQLCOM_XA_COMMIT: if (trans_xa_commit(thd)) + { + thd->mdl_context.release_transactional_locks(); +#ifdef WITH_WSREP + WSREP_DEBUG("XA commit failed, MDL released: %lu", thd->thread_id); +#endif /* WITH_WSREP */ goto error; + } thd->mdl_context.release_transactional_locks(); /* We've just done a commit, reset transaction @@ -5005,7 +5506,13 @@ create_sp_error: break; case SQLCOM_XA_ROLLBACK: if (trans_xa_rollback(thd)) + { + thd->mdl_context.release_transactional_locks(); +#ifdef WITH_WSREP + WSREP_DEBUG("XA rollback failed, MDL released: %lu", thd->thread_id); +#endif /* WITH_WSREP */ goto error; + } thd->mdl_context.release_transactional_locks(); /* We've just done a rollback, reset transaction @@ -5025,11 +5532,13 @@ create_sp_error: my_ok(thd); break; case SQLCOM_INSTALL_PLUGIN: + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) if (! (res= mysql_install_plugin(thd, &thd->lex->comment, &thd->lex->ident))) my_ok(thd); break; case SQLCOM_UNINSTALL_PLUGIN: + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) if (! (res= mysql_uninstall_plugin(thd, &thd->lex->comment, &thd->lex->ident))) my_ok(thd); @@ -5174,6 +5683,9 @@ finish: /* Free tables */ close_thread_tables(thd); +#ifdef WITH_WSREP + thd->wsrep_consistency_check= NO_CONSISTENCY_CHECK; +#endif /* WITH_WSREP */ #ifndef DBUG_OFF if (lex->sql_command != SQLCOM_SET_OPTION && ! thd->in_sub_stmt) @@ -5227,6 +5739,22 @@ finish: { thd->mdl_context.release_statement_locks(); } + WSREP_TO_ISOLATION_END; + +#ifdef WITH_WSREP + /* + Force release of transactional locks if not in active MST and wsrep is on. + */ + if (WSREP(thd) && + ! thd->in_sub_stmt && + ! thd->in_active_multi_stmt_transaction() && + thd->mdl_context.has_transactional_locks()) + { + WSREP_DEBUG("Forcing release of transactional locks for thd %lu", + thd->thread_id); + thd->mdl_context.release_transactional_locks(); + } +#endif /* WITH_WSREP */ DBUG_RETURN(res || thd->is_error()); } @@ -5309,6 +5837,9 @@ static bool execute_sqlcom_select(THD *thd, TABLE_LIST *all_tables) status_var_increment(thd->status_var.empty_queries); else status_var_add(thd->status_var.rows_sent, thd->get_sent_row_count()); +#ifdef WITH_WSREP + if (lex->sql_command == SQLCOM_SHOW_STATUS) wsrep_free_status(thd); +#endif /* WITH_WSREP */ return res; } @@ -5338,8 +5869,8 @@ static bool execute_show_status(THD *thd, TABLE_LIST *all_tables) } -static bool execute_rename_table(THD *thd, TABLE_LIST *first_table, - TABLE_LIST *all_tables) +static bool check_rename_table(THD *thd, TABLE_LIST *first_table, + TABLE_LIST *all_tables) { DBUG_ASSERT(first_table == all_tables && first_table != 0); TABLE_LIST *table; @@ -5353,7 +5884,7 @@ static bool execute_rename_table(THD *thd, TABLE_LIST *first_table, &table->next_local->grant.privilege, &table->next_local->grant.m_internal, 0, 0)) - return 1; + return true; TABLE_LIST old_list, new_list; /* we do not need initialize old_list and new_list because we will @@ -5366,10 +5897,10 @@ static bool execute_rename_table(THD *thd, TABLE_LIST *first_table, INSERT_ACL | CREATE_ACL) && check_grant(thd, INSERT_ACL | CREATE_ACL, &new_list, FALSE, 1, FALSE))) - return 1; + return true; } - return mysql_rename_tables(thd, first_table, 0); + return false; } @@ -6275,6 +6806,24 @@ void THD::reset_for_next_command() thd->auto_inc_intervals_in_cur_stmt_for_binlog.empty(); thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt= 0; +#ifdef WITH_WSREP + /* + Autoinc variables should be adjusted only for locally executed + transactions. Appliers and replayers are either processing ROW + events or get autoinc variable values from Query_log_event and + mysql slave may be processing STATEMENT format events, but he should + use autoinc values passed in binlog events, not the values forced by + the cluster. + */ + if (WSREP(thd) && thd->wsrep_exec_mode == LOCAL_STATE && + !thd->slave_thread && wsrep_auto_increment_control) + { + thd->variables.auto_increment_offset= + global_system_variables.auto_increment_offset; + thd->variables.auto_increment_increment= + global_system_variables.auto_increment_increment; + } +#endif /* WITH_WSREP */ thd->query_start_used= 0; thd->query_start_sec_part_used= 0; thd->is_fatal_error= thd->time_zone_used= 0; @@ -6480,6 +7029,109 @@ void mysql_init_multi_delete(LEX *lex) lex->query_tables_last= &lex->query_tables; } +#ifdef WITH_WSREP +static void wsrep_mysql_parse(THD *thd, char *rawbuf, uint length, + Parser_state *parser_state) +{ + bool is_autocommit= + !thd->in_multi_stmt_transaction_mode() && + thd->wsrep_conflict_state == NO_CONFLICT && + !thd->wsrep_applier; + + do + { + if (thd->wsrep_conflict_state== RETRY_AUTOCOMMIT) + { + thd->wsrep_conflict_state= NO_CONFLICT; + /* Performance Schema Interface instrumentation, begin */ + thd->m_statement_psi= MYSQL_REFINE_STATEMENT(thd->m_statement_psi, + com_statement_info[thd->get_command()].m_key); + MYSQL_SET_STATEMENT_TEXT(thd->m_statement_psi, thd->query(), + thd->query_length()); + } + mysql_parse(thd, rawbuf, length, parser_state); + + if (WSREP(thd)) { + /* wsrep BF abort in query exec phase */ + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + if (thd->wsrep_conflict_state == MUST_ABORT) { + wsrep_client_rollback(thd); + + WSREP_DEBUG("abort in exec query state, avoiding autocommit"); + } + + if (thd->wsrep_conflict_state == MUST_REPLAY) + { + wsrep_replay_transaction(thd); + } + + /* setting error code for BF aborted trxs */ + if (thd->wsrep_conflict_state == ABORTED || + thd->wsrep_conflict_state == CERT_FAILURE) + { + mysql_reset_thd_for_next_command(thd); + thd->killed= NOT_KILLED; + if (is_autocommit && + thd->lex->sql_command != SQLCOM_SELECT && + (thd->wsrep_retry_counter < thd->variables.wsrep_retry_autocommit)) + { + WSREP_DEBUG("wsrep retrying AC query: %s", + (thd->query()) ? thd->query() : "void"); + + /* Performance Schema Interface instrumentation, end */ + MYSQL_END_STATEMENT(thd->m_statement_psi, thd->get_stmt_da()); + thd->m_statement_psi= NULL; + thd->m_digest= NULL; + close_thread_tables(thd); + + thd->wsrep_conflict_state= RETRY_AUTOCOMMIT; + thd->wsrep_retry_counter++; // grow + wsrep_copy_query(thd); + thd->set_time(); + parser_state->reset(rawbuf, length); + } + else + { + WSREP_DEBUG("%s, thd: %lu is_AC: %d, retry: %lu - %lu SQL: %s", + (thd->wsrep_conflict_state == ABORTED) ? + "BF Aborted" : "cert failure", + thd->thread_id, is_autocommit, thd->wsrep_retry_counter, + thd->variables.wsrep_retry_autocommit, thd->query()); + my_error(ER_LOCK_DEADLOCK, MYF(0), "wsrep aborted transaction"); + thd->killed= NOT_KILLED; + thd->wsrep_conflict_state= NO_CONFLICT; + if (thd->wsrep_conflict_state != REPLAYING) + thd->wsrep_retry_counter= 0; // reset + } + } + else + { + set_if_smaller(thd->wsrep_retry_counter, 0); // reset; eventually ok + } + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } + + /* If retry is requested clean up explain structure */ + if (thd->wsrep_conflict_state == RETRY_AUTOCOMMIT && thd->lex->explain) + delete_explain_query(thd->lex); + + } while (thd->wsrep_conflict_state== RETRY_AUTOCOMMIT); + + if (thd->wsrep_retry_query) + { + WSREP_DEBUG("releasing retry_query: conf %d sent %d kill %d errno %d SQL %s", + thd->wsrep_conflict_state, + thd->get_stmt_da()->is_sent(), + thd->killed, + thd->get_stmt_da()->is_error() ? thd->get_stmt_da()->sql_errno() : 0, + thd->wsrep_retry_query); + my_free(thd->wsrep_retry_query); + thd->wsrep_retry_query = NULL; + thd->wsrep_retry_query_len = 0; + thd->wsrep_retry_command = COM_CONNECT; + } +} +#endif /* WITH_WSREP */ /* When you modify mysql_parse(), you may need to modify @@ -6534,6 +7186,7 @@ void mysql_parse(THD *thd, char *rawbuf, uint length, MYSQL_REFINE_STATEMENT(thd->m_statement_psi, sql_statement_info[thd->lex->sql_command]. m_key); + #ifndef NO_EMBEDDED_ACCESS_CHECKS if (mqh_used && thd->user_connect && check_mqh(thd, lex->sql_command)) @@ -6608,6 +7261,12 @@ void mysql_parse(THD *thd, char *rawbuf, uint length, sql_statement_info[SQLCOM_SELECT].m_key); status_var_increment(thd->status_var.com_stat[SQLCOM_SELECT]); thd->update_stats(); +#ifdef WITH_WSREP + if (WSREP_CLIENT(thd)) + { + thd->wsrep_sync_wait_gtid= WSREP_GTID_UNDEFINED; + } +#endif /* WITH_WSREP */ } DBUG_VOID_RETURN; } @@ -7506,8 +8165,14 @@ kill_one_thread(THD *thd, longlong id, killed_state kill_signal, killed_type typ faster and do a harder kill than KILL_SYSTEM_THREAD; */ +#ifdef WITH_WSREP + if (((thd->security_ctx->master_access & SUPER_ACL) || + thd->security_ctx->user_matches(tmp->security_ctx)) && + !wsrep_thd_is_BF((void *)tmp, true)) +#else if ((thd->security_ctx->master_access & SUPER_ACL) || thd->security_ctx->user_matches(tmp->security_ctx)) +#endif /* WITH_WSREP */ { tmp->awake(kill_signal); error=0; @@ -8384,7 +9049,6 @@ LEX_USER *create_definer(THD *thd, LEX_STRING *user_name, LEX_STRING *host_name) return definer; } - /** Check that byte length of a string does not exceed some limit. diff --git a/sql/sql_parse.h b/sql/sql_parse.h index fa414911093..368bba91c20 100644 --- a/sql/sql_parse.h +++ b/sql/sql_parse.h @@ -208,6 +208,31 @@ inline bool is_supported_parser_charset(CHARSET_INFO *cs) { return MY_TEST(cs->mbminlen == 1); } +#ifdef WITH_WSREP + +#define WSREP_MYSQL_DB (char *)"mysql" +#define WSREP_TO_ISOLATION_BEGIN(db_, table_, table_list_) \ + if (WSREP(thd) && wsrep_to_isolation_begin(thd, db_, table_, table_list_)) goto error; + +#define WSREP_TO_ISOLATION_END \ + if (WSREP(thd) || (thd && thd->wsrep_exec_mode==TOTAL_ORDER)) \ + wsrep_to_isolation_end(thd); + +/* + Checks if lex->no_write_to_binlog is set for statements that use LOCAL or + NO_WRITE_TO_BINLOG. +*/ +#define WSREP_TO_ISOLATION_BEGIN_WRTCHK(db_, table_, table_list_) \ + if (WSREP(thd) && !thd->lex->no_write_to_binlog \ + && wsrep_to_isolation_begin(thd, db_, table_, table_list_)) goto error; + +#else + +#define WSREP_TO_ISOLATION_BEGIN(db_, table_, table_list_) +#define WSREP_TO_ISOLATION_END +#define WSREP_TO_ISOLATION_BEGIN_WRTCHK(db_, table_, table_list_) + +#endif /* WITH_WSREP */ #endif /* SQL_PARSE_INCLUDED */ diff --git a/sql/sql_partition_admin.cc b/sql/sql_partition_admin.cc index 9b471f97521..2a76c8d6671 100644 --- a/sql/sql_partition_admin.cc +++ b/sql/sql_partition_admin.cc @@ -530,6 +530,21 @@ bool Sql_cmd_alter_table_exchange_partition:: &alter_prelocking_strategy)) DBUG_RETURN(true); +#ifdef WITH_WSREP + /* Forward declaration */ + TABLE *find_temporary_table(THD *thd, const TABLE_LIST *tl); + + if ((!thd->is_current_stmt_binlog_format_row() || + /* TODO: Do we really need to check for temp tables in this case? */ + !find_temporary_table(thd, table_list)) && + wsrep_to_isolation_begin(thd, table_list->db, table_list->table_name, + NULL)) + { + WSREP_WARN("ALTER TABLE EXCHANGE PARTITION isolation failure"); + DBUG_RETURN(TRUE); + } +#endif /* WITH_WSREP */ + part_table= table_list->table; swap_table= swap_table_list->table; @@ -763,6 +778,20 @@ bool Sql_cmd_alter_table_truncate_partition::execute(THD *thd) if (check_one_table_access(thd, DROP_ACL, first_table)) DBUG_RETURN(TRUE); +#ifdef WITH_WSREP + /* Forward declaration */ + TABLE *find_temporary_table(THD *thd, const TABLE_LIST *tl); + + if (WSREP(thd) && (!thd->is_current_stmt_binlog_format_row() || + !find_temporary_table(thd, first_table)) && + wsrep_to_isolation_begin( + thd, first_table->db, first_table->table_name, NULL) + ) + { + WSREP_WARN("ALTER TABLE TRUNCATE PARTITION isolation failure"); + DBUG_RETURN(TRUE); + } +#endif /* WITH_WSREP */ if (open_tables(thd, &first_table, &table_counter, 0)) DBUG_RETURN(true); diff --git a/sql/sql_plugin.cc b/sql/sql_plugin.cc index e34409b3532..df24f398e97 100644 --- a/sql/sql_plugin.cc +++ b/sql/sql_plugin.cc @@ -3005,11 +3005,17 @@ void plugin_thdvar_init(THD *thd) thd->variables.dynamic_variables_size= 0; thd->variables.dynamic_variables_ptr= 0; +#ifdef WITH_WSREP + if (!WSREP(thd) || !thd->wsrep_applier) { +#endif mysql_mutex_lock(&LOCK_plugin); thd->variables.table_plugin= intern_plugin_lock(NULL, global_system_variables.table_plugin); intern_plugin_unlock(NULL, old_table_plugin); mysql_mutex_unlock(&LOCK_plugin); +#ifdef WITH_WSREP + } +#endif DBUG_VOID_RETURN; } diff --git a/sql/sql_prepare.cc b/sql/sql_prepare.cc index 1a02a2ae84c..1bb465d220b 100644 --- a/sql/sql_prepare.cc +++ b/sql/sql_prepare.cc @@ -3549,6 +3549,10 @@ Prepared_statement::set_parameters(String *expanded_query, return res; } +#ifdef WITH_WSREP +/* forward declaration */ +void wsrep_replay_transaction(THD *thd); +#endif /* WITH_WSREP */ /** Execute a prepared statement. Re-prepare it a limited number @@ -3628,6 +3632,24 @@ reexecute: error= execute(expanded_query, open_cursor) || thd->is_error(); thd->m_reprepare_observer= NULL; +#ifdef WITH_WSREP + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + switch (thd->wsrep_conflict_state) + { + case CERT_FAILURE: + WSREP_DEBUG("PS execute fail for CERT_FAILURE: thd: %ld err: %d", + thd->thread_id, thd->get_stmt_da()->sql_errno() ); + thd->wsrep_conflict_state = NO_CONFLICT; + break; + + case MUST_REPLAY: + (void) wsrep_replay_transaction(thd); + /* fallthrough */ + + default: break; + } + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); +#endif /* WITH_WSREP */ if ((sql_command_flags[lex->sql_command] & CF_REEXECUTION_FRAGILE) && error && !thd->is_fatal_error && !thd->killed && diff --git a/sql/sql_reload.cc b/sql/sql_reload.cc index f9c5757f721..704e2f84437 100644 --- a/sql/sql_reload.cc +++ b/sql/sql_reload.cc @@ -30,6 +30,7 @@ #include "debug_sync.h" #include "des_key_file.h" + static void disable_checkpoints(THD *thd); /** @@ -154,6 +155,12 @@ bool reload_acl_and_cache(THD *thd, unsigned long long options, { if (mysql_bin_log.rotate_and_purge(true)) *write_to_binlog= -1; + + if (WSREP_ON) + { + /* Wait for last binlog checkpoint event to be logged. */ + mysql_bin_log.wait_for_last_checkpoint_event(); + } } } if (options & REFRESH_RELAY_LOG) @@ -253,7 +260,18 @@ bool reload_acl_and_cache(THD *thd, unsigned long long options, } if (options & REFRESH_CHECKPOINT) disable_checkpoints(thd); - } +#ifdef WITH_WSREP + /* + We need to do it second time after wsrep appliers were blocked in + make_global_read_lock_block_commit(thd) above since they could have + modified the tables too. + */ + if (WSREP(thd) && + close_cached_tables(thd, tables, (options & REFRESH_FAST) ? + FALSE : TRUE, TRUE)) + result= 1; +#endif /* WITH_WSREP */ + } else { if (thd && thd->locked_tables_mode) @@ -297,6 +315,16 @@ bool reload_acl_and_cache(THD *thd, unsigned long long options, } } +#ifdef WITH_WSREP + if (thd && thd->wsrep_applier) + { + /* + In case of applier thread, do not wait for table share(s) to be + removed from table definition cache. + */ + options|= REFRESH_FAST; + } +#endif if (close_cached_tables(thd, tables, ((options & REFRESH_FAST) ? FALSE : TRUE), (thd ? thd->variables.lock_wait_timeout : diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc index a2acf52d44e..f1bbe58a8b8 100644 --- a/sql/sql_repl.cc +++ b/sql/sql_repl.cc @@ -2996,6 +2996,15 @@ int start_slave(THD* thd , Master_info* mi, bool net_report) err: mi->unlock_slave_threads(); +#ifdef WITH_WSREP + if (WSREP(thd)) + thd_proc_info(thd, "exit stop_slave()"); + else + thd_proc_info(thd, 0); +#else /* WITH_WSREP */ + thd_proc_info(thd, 0); +#endif /* WITH_WSREP */ + if (slave_errno) { if (net_report) diff --git a/sql/sql_show.cc b/sql/sql_show.cc index a3d834b0e42..1918cbab720 100644 --- a/sql/sql_show.cc +++ b/sql/sql_show.cc @@ -59,6 +59,10 @@ #include "debug_sync.h" #include "keycaches.h" +#if !defined(MYSQL_MAX_VARIABLE_VALUE_LEN) +#define MYSQL_MAX_VARIABLE_VALUE_LEN 1024 +#endif // !defined(MYSQL_MAX_VARIABLE_VALUE_LEN) + #ifdef WITH_PARTITION_STORAGE_ENGINE #include "ha_partition.h" #endif @@ -118,8 +122,6 @@ static void get_cs_converted_string_value(THD *thd, static int show_create_view(THD *thd, TABLE_LIST *table, String *buff); -static void append_algorithm(TABLE_LIST *table, String *buff); - static COND * make_cond_for_info_schema(COND *cond, TABLE_LIST *table); /** @@ -2099,32 +2101,30 @@ static void store_key_options(THD *thd, String *packet, TABLE *table, } } - -void -view_store_options(THD *thd, TABLE_LIST *table, String *buff) -{ - append_algorithm(table, buff); - append_definer(thd, buff, &table->definer.user, &table->definer.host); - if (table->view_suid) - buff->append(STRING_WITH_LEN("SQL SECURITY DEFINER ")); - else - buff->append(STRING_WITH_LEN("SQL SECURITY INVOKER ")); -} - - /* - Append DEFINER clause to the given buffer. + Append ALGORITHM clause to the given buffer. SYNOPSIS - append_definer() - thd [in] thread handle - buffer [inout] buffer to hold DEFINER clause - definer_user [in] user name part of definer - definer_host [in] host name part of definer + append_algorithm() + table [in] table list + buff [inout] buffer to hold the ALGORITHM clause + check_inherit [in] if true, do nothing if algorithm is INHERIT */ -static void append_algorithm(TABLE_LIST *table, String *buff) +static void append_algorithm(TABLE_LIST *table, String *buff, + bool check_inherit) { + int16 algorithm= (int16) table->algorithm; + + DBUG_ENTER("append_algorithm"); + + /* + Handle a special case when ALGORITHM is not specified, in which case we + simply return. + */ + if (check_inherit && (algorithm == VIEW_ALGORITHM_INHERIT)) + DBUG_VOID_RETURN; + buff->append(STRING_WITH_LEN("ALGORITHM=")); switch ((int16)table->algorithm) { case VIEW_ALGORITHM_UNDEFINED: @@ -2139,6 +2139,8 @@ static void append_algorithm(TABLE_LIST *table, String *buff) default: DBUG_ASSERT(0); // never should happen } + + DBUG_VOID_RETURN; } /* @@ -2157,7 +2159,7 @@ void append_definer(THD *thd, String *buffer, const LEX_STRING *definer_user, { buffer->append(STRING_WITH_LEN("DEFINER=")); append_identifier(thd, buffer, definer_user->str, definer_user->length); - if (definer_host->str[0]) + if (definer_host->str && definer_host->str[0]) { buffer->append('@'); append_identifier(thd, buffer, definer_host->str, definer_host->length); @@ -2165,6 +2167,23 @@ void append_definer(THD *thd, String *buffer, const LEX_STRING *definer_user, buffer->append(' '); } +void +view_store_options4(THD *thd, TABLE_LIST *table, String *buff, + bool check_inherit) +{ + append_algorithm(table, buff, check_inherit); + append_definer(thd, buff, &table->definer.user, &table->definer.host); + if (table->view_suid) + buff->append(STRING_WITH_LEN("SQL SECURITY DEFINER ")); + else + buff->append(STRING_WITH_LEN("SQL SECURITY INVOKER ")); +} + +void +view_store_options(THD *thd, TABLE_LIST *table, String *buff) +{ + view_store_options4(thd, table, buff, false); +} static int show_create_view(THD *thd, TABLE_LIST *table, String *buff) { @@ -3019,11 +3038,39 @@ static bool show_status_array(THD *thd, const char *wild, *prefix_end++= '_'; len=name_buffer + sizeof(name_buffer) - prefix_end; +#ifdef WITH_WSREP + bool is_wsrep_var= FALSE; + /* + This is a workaround for lp:1306875 (PBX) to skip switching of wsrep + status variable name's first letter to uppercase. This is an optimization + for status variables defined under wsrep plugin. + TODO: remove once lp:1306875 has been addressed. + */ + if (*prefix && !my_strcasecmp(system_charset_info, prefix, "wsrep")) + { + is_wsrep_var= TRUE; + } +#endif /* WITH_WSREP */ + for (; variables->name; variables++) { bool wild_checked= 0; strnmov(prefix_end, variables->name, len); name_buffer[sizeof(name_buffer)-1]=0; /* Safety */ + +#ifdef WITH_WSREP + /* + If the prefix is NULL, that means we are looking into the status variables + defined directly under mysqld.cc. Do not capitalize wsrep status variable + names until lp:1306875 has been fixed. + TODO: remove once lp:1306875 has been addressed. + */ + if (!(*prefix) && !strncasecmp(name_buffer, "wsrep", strlen("wsrep"))) + { + is_wsrep_var= TRUE; + } +#endif /* WITH_WSREP */ + if (ucase_names) my_caseup_str(system_charset_info, name_buffer); else @@ -3032,8 +3079,13 @@ static bool show_status_array(THD *thd, const char *wild, DBUG_ASSERT(name_buffer[0] >= 'a'); DBUG_ASSERT(name_buffer[0] <= 'z'); +#ifdef WITH_WSREP + // TODO: remove once lp:1306875 has been addressed. + if (status_var && (is_wsrep_var == FALSE)) +#else /* traditionally status variables have a first letter uppercased */ if (status_var) +#endif /* WITH_WSREP */ name_buffer[0]-= 'a' - 'A'; } @@ -8934,7 +8986,8 @@ ST_FIELD_INFO variables_fields_info[]= { {"VARIABLE_NAME", 64, MYSQL_TYPE_STRING, 0, 0, "Variable_name", SKIP_OPEN_TABLE}, - {"VARIABLE_VALUE", 1024, MYSQL_TYPE_STRING, 0, 1, "Value", SKIP_OPEN_TABLE}, + {"VARIABLE_VALUE", MYSQL_MAX_VARIABLE_VALUE_LEN, MYSQL_TYPE_STRING, 0, 1, + "Value", SKIP_OPEN_TABLE}, {0, 0, MYSQL_TYPE_STRING, 0, 0, 0, SKIP_OPEN_TABLE} }; diff --git a/sql/sql_show.h b/sql/sql_show.h index 9ca60557cc0..c1c6388216d 100644 --- a/sql/sql_show.h +++ b/sql/sql_show.h @@ -109,6 +109,8 @@ void free_status_vars(); void reset_status_vars(); bool show_create_trigger(THD *thd, const sp_name *trg_name); void view_store_options(THD *thd, TABLE_LIST *table, String *buff); +void view_store_options4(THD *thd, TABLE_LIST *table, String *buff, + bool check_inherit); void init_fill_schema_files_row(TABLE* table); bool schema_table_store_record(THD *thd, TABLE *table); diff --git a/sql/sql_table.cc b/sql/sql_table.cc index 2eff8fd5e2f..e591b8a1eb7 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -5306,8 +5306,64 @@ bool mysql_create_like_table(THD* thd, TABLE_LIST* table, bool do_logging= FALSE; uint not_used; int create_res; + uint save_thd_create_info_options; DBUG_ENTER("mysql_create_like_table"); +#ifdef WITH_WSREP + if (WSREP(thd) && !thd->wsrep_applier) + { + TABLE *tmp_table; + bool is_tmp_table= FALSE; + + for (tmp_table= thd->temporary_tables; tmp_table; tmp_table=tmp_table->next) + { + if (!strcmp(src_table->db, tmp_table->s->db.str) && + !strcmp(src_table->table_name, tmp_table->s->table_name.str)) + { + is_tmp_table= TRUE; + break; + } + } + if (create_info->options & HA_LEX_CREATE_TMP_TABLE) + { + /* CREATE TEMPORARY TABLE LIKE must be skipped from replication */ + WSREP_DEBUG("CREATE TEMPORARY TABLE LIKE... skipped replication\n %s", + thd->query()); + } + else if (!is_tmp_table) + { + /* this is straight CREATE TABLE LIKE... eith no tmp tables */ + WSREP_TO_ISOLATION_BEGIN(table->db, table->table_name, NULL); + } + else + { + /* here we have CREATE TABLE LIKE <temporary table> + the temporary table definition will be needed in slaves to + enable the create to succeed + */ + TABLE_LIST tbl; + bzero((void*) &tbl, sizeof(tbl)); + tbl.db= src_table->db; + tbl.table_name= tbl.alias= src_table->table_name; + tbl.table= tmp_table; + char buf[2048]; + String query(buf, sizeof(buf), system_charset_info); + query.length(0); // Have to zero it since constructor doesn't + + (void) show_create_table(thd, &tbl, &query, NULL, WITH_DB_NAME); + WSREP_DEBUG("TMP TABLE: %s", query.ptr()); + + thd->wsrep_TOI_pre_query= query.ptr(); + thd->wsrep_TOI_pre_query_len= query.length(); + + WSREP_TO_ISOLATION_BEGIN(table->db, table->table_name, NULL); + + thd->wsrep_TOI_pre_query= NULL; + thd->wsrep_TOI_pre_query_len= 0; + } + } +#endif + /* We the open source table to get its description in HA_CREATE_INFO and Alter_info objects. This also acquires a shared metadata lock @@ -5321,7 +5377,7 @@ bool mysql_create_like_table(THD* thd, TABLE_LIST* table, */ /* Copy temporarily the statement flags to thd for lock_table_names() */ - uint save_thd_create_info_options= thd->lex->create_info.options; + save_thd_create_info_options= thd->lex->create_info.options; thd->lex->create_info.options|= create_info->options; res= open_tables(thd, &thd->lex->query_tables, ¬_used, 0); thd->lex->create_info.options= save_thd_create_info_options; @@ -5595,6 +5651,13 @@ err: res= 1; } DBUG_RETURN(res); + +#ifdef WITH_WSREP + error: + thd->wsrep_TOI_pre_query= NULL; + DBUG_RETURN(TRUE); +#endif /* WITH_WSREP */ + } @@ -8166,6 +8229,10 @@ simple_rename_or_index_change(THD *thd, TABLE_LIST *table_list, : HA_EXTRA_FORCE_REOPEN; DBUG_ENTER("simple_rename_or_index_change"); +#ifdef WITH_WSREP + bool do_log_write(true); +#endif /* WITH_WSREP */ + if (keys_onoff != Alter_info::LEAVE_AS_IS) { if (wait_while_table_is_used(thd, table, extra_func)) @@ -8225,7 +8292,14 @@ simple_rename_or_index_change(THD *thd, TABLE_LIST *table_list, if (!error) { - error= write_bin_log(thd, TRUE, thd->query(), thd->query_length()); +#ifdef WITH_WSREP + if (!WSREP(thd) || do_log_write) { +#endif /* WITH_WSREP */ + error= write_bin_log(thd, TRUE, thd->query(), thd->query_length()); +#ifdef WITH_WSREP + } +#endif /* !WITH_WSREP */ + if (!error) my_ok(thd); } @@ -8352,6 +8426,17 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, DEBUG_SYNC(thd, "alter_opened_table"); +#ifdef WITH_WSREP + DBUG_EXECUTE_IF("sync.alter_opened_table", + { + const char act[]= + "now " + "wait_for signal.alter_opened_table"; + DBUG_ASSERT(!debug_sync_set_action(thd, + STRING_WITH_LEN(act))); + };); +#endif // WITH_WSREP + if (error) DBUG_RETURN(true); diff --git a/sql/sql_trigger.cc b/sql/sql_trigger.cc index b17775abb7c..327b5bb0260 100644 --- a/sql/sql_trigger.cc +++ b/sql/sql_trigger.cc @@ -434,8 +434,14 @@ bool mysql_create_or_drop_trigger(THD *thd, TABLE_LIST *tables, bool create) binlogged, so they share the same danger, so trust_function_creators applies to them too. */ +#ifdef WITH_WSREP + if (!trust_function_creators && + (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()) && + !(thd->security_ctx->master_access & SUPER_ACL)) +#else if (!trust_function_creators && mysql_bin_log.is_open() && !(thd->security_ctx->master_access & SUPER_ACL)) +#endif /* WITH_WSREP */ { my_error(ER_BINLOG_CREATE_ROUTINE_NEED_SUPER, MYF(0)); DBUG_RETURN(TRUE); @@ -2440,3 +2446,59 @@ bool load_table_name_for_trigger(THD *thd, DBUG_RETURN(FALSE); } +#ifdef WITH_WSREP +int wsrep_create_trigger_query(THD *thd, uchar** buf, size_t* buf_len) +{ + LEX *lex= thd->lex; + String stmt_query; + + LEX_STRING definer_user; + LEX_STRING definer_host; + + if (!lex->definer) + { + if (!thd->slave_thread) + { + if (!(lex->definer= create_default_definer(thd, false))) + return 1; + } + } + + if (lex->definer) + { + /* SUID trigger. */ + LEX_USER *d= get_current_user(thd, lex->definer); + + if (!d) + return 1; + + definer_user= d->user; + definer_host= d->host; + } + else + { + /* non-SUID trigger. */ + + definer_user.str= 0; + definer_user.length= 0; + + definer_host.str= 0; + definer_host.length= 0; + } + + stmt_query.append(STRING_WITH_LEN("CREATE ")); + + append_definer(thd, &stmt_query, &definer_user, &definer_host); + + LEX_STRING stmt_definition; + stmt_definition.str= (char*) thd->lex->stmt_definition_begin; + stmt_definition.length= thd->lex->stmt_definition_end + - thd->lex->stmt_definition_begin; + trim_whitespace(thd->charset(), & stmt_definition); + + stmt_query.append(stmt_definition.str, stmt_definition.length); + + return wsrep_to_buf_helper(thd, stmt_query.c_ptr(), stmt_query.length(), + buf, buf_len); +} +#endif /* WITH_WSREP */ diff --git a/sql/sql_truncate.cc b/sql/sql_truncate.cc index 05869b70c8f..16c2a5027e3 100644 --- a/sql/sql_truncate.cc +++ b/sql/sql_truncate.cc @@ -24,6 +24,9 @@ #include "sql_acl.h" // DROP_ACL #include "sql_parse.h" // check_one_table_access() #include "sql_truncate.h" +#ifdef WITH_WSREP +#include "wsrep_mysqld.h" +#endif /* WITH_WSREP */ #include "sql_show.h" //append_identifier() @@ -483,6 +486,12 @@ bool Sql_cmd_truncate_table::truncate_table(THD *thd, TABLE_LIST *table_ref) { bool hton_can_recreate; +#ifdef WITH_WSREP + if (WSREP(thd) && wsrep_to_isolation_begin(thd, + table_ref->db, + table_ref->table_name, NULL)) + DBUG_RETURN(TRUE); +#endif /* WITH_WSREP */ if (lock_table(thd, table_ref, &hton_can_recreate)) DBUG_RETURN(TRUE); @@ -565,7 +574,6 @@ bool Sql_cmd_truncate_table::execute(THD *thd) if (! (res= truncate_table(thd, first_table))) my_ok(thd); - DBUG_RETURN(res); } diff --git a/sql/sql_update.cc b/sql/sql_update.cc index 9ca96bd702e..1a136e5158b 100644 --- a/sql/sql_update.cc +++ b/sql/sql_update.cc @@ -983,7 +983,11 @@ int mysql_update(THD *thd, */ if ((error < 0) || thd->transaction.stmt.modified_non_trans_table) { +#ifdef WITH_WSREP + if (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()) +#else if (mysql_bin_log.is_open()) +#endif { int errcode= 0; if (error < 0) @@ -2255,7 +2259,11 @@ void multi_update::abort_result_set() The query has to binlog because there's a modified non-transactional table either from the query's list or via a stored routine: bug#13270,23333 */ +#ifdef WITH_WSREP + if (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()) +#else if (mysql_bin_log.is_open()) +#endif { /* THD::killed status might not have been set ON at time of an error @@ -2527,7 +2535,11 @@ bool multi_update::send_eof() if (local_error == 0 || thd->transaction.stmt.modified_non_trans_table) { +#ifdef WITH_WSREP + if (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()) +#else if (mysql_bin_log.is_open()) +#endif { int errcode= 0; if (local_error == 0) diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 95a683165e2..5a0c18b5109 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -7151,7 +7151,7 @@ alter: } view_tail {} - | ALTER definer_opt EVENT_SYM sp_name + | ALTER definer_opt remember_name EVENT_SYM sp_name { /* It is safe to use Lex->spname because @@ -7163,9 +7163,12 @@ alter: if (!(Lex->event_parse_data= Event_parse_data::new_instance(thd))) MYSQL_YYABORT; - Lex->event_parse_data->identifier= $4; + Lex->event_parse_data->identifier= $5; Lex->sql_command= SQLCOM_ALTER_EVENT; +#ifdef WITH_WSREP + Lex->stmt_definition_begin= $3; +#endif } ev_alter_on_schedule_completion opt_ev_rename_to @@ -7173,7 +7176,7 @@ alter: opt_ev_comment opt_ev_sql_stmt { - if (!($6 || $7 || $8 || $9 || $10)) + if (!($7 || $8 || $9 || $10 || $11)) { my_parse_error(ER(ER_SYNTAX_ERROR)); MYSQL_YYABORT; @@ -7183,6 +7186,9 @@ alter: can overwrite it */ Lex->sql_command= SQLCOM_ALTER_EVENT; +#ifdef WITH_WSREP + Lex->stmt_definition_end= (char*)YYLIP->get_cpp_ptr(); +#endif } | ALTER TABLESPACE alter_tablespace_info { diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 8d164ea86f5..000a424faeb 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -446,6 +446,27 @@ static bool binlog_format_check(sys_var *self, THD *thd, set_var *var) if (check_has_super(self, thd, var)) return true; +#ifdef WITH_WSREP + /* + MariaDB Galera does not support STATEMENT or MIXED binlog format currently. + */ + if (WSREP(thd) && + var->save_result.ulonglong_value != BINLOG_FORMAT_ROW) + { + // Push a warning to the error log. + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR, + "MariaDB Galera does not support binlog format: %s", + binlog_format_names[var->save_result.ulonglong_value]); + + if (var->type == OPT_GLOBAL) + { + WSREP_ERROR("MariaDB Galera does not support binlog format: %s", + binlog_format_names[var->save_result.ulonglong_value]); + return true; + } + } +#endif + if (var->type == OPT_GLOBAL) return false; @@ -3363,6 +3384,10 @@ static bool fix_autocommit(sys_var *self, THD *thd, enum_var_type type) if (trans_commit_stmt(thd) || trans_commit(thd)) { thd->variables.option_bits&= ~OPTION_AUTOCOMMIT; + thd->mdl_context.release_transactional_locks(); +#ifdef WITH_WSREP + WSREP_DEBUG("autocommit, MDL TRX lock released: %lu", thd->thread_id); +#endif /* WITH_WSREP */ return true; } /* @@ -4474,6 +4499,278 @@ static Sys_var_tz Sys_time_zone( SESSION_VAR(time_zone), NO_CMD_LINE, DEFAULT(&default_tz), NO_MUTEX_GUARD, IN_BINLOG); +#ifdef WITH_WSREP +#include "wsrep_var.h" +#include "wsrep_sst.h" +#include "wsrep_binlog.h" + +static Sys_var_charptr Sys_wsrep_provider( + "wsrep_provider", "Path to replication provider library", + PREALLOCATED GLOBAL_VAR(wsrep_provider), CMD_LINE(REQUIRED_ARG, OPT_WSREP_PROVIDER), + IN_FS_CHARSET, DEFAULT(WSREP_NONE), + NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_provider_check), ON_UPDATE(wsrep_provider_update)); + +static Sys_var_charptr Sys_wsrep_provider_options( + "wsrep_provider_options", "provider specific options", + PREALLOCATED GLOBAL_VAR(wsrep_provider_options), + CMD_LINE(REQUIRED_ARG, OPT_WSREP_PROVIDER_OPTIONS), + IN_FS_CHARSET, DEFAULT(""), NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_provider_options_check), + ON_UPDATE(wsrep_provider_options_update)); + +static Sys_var_charptr Sys_wsrep_data_home_dir( + "wsrep_data_home_dir", "home directory for wsrep provider", + READ_ONLY GLOBAL_VAR(wsrep_data_home_dir), CMD_LINE(REQUIRED_ARG), + IN_FS_CHARSET, DEFAULT(mysql_real_data_home), + NO_MUTEX_GUARD, NOT_IN_BINLOG); + +static Sys_var_charptr Sys_wsrep_cluster_name( + "wsrep_cluster_name", "Name for the cluster", + PREALLOCATED GLOBAL_VAR(wsrep_cluster_name), CMD_LINE(REQUIRED_ARG), + IN_FS_CHARSET, DEFAULT(WSREP_CLUSTER_NAME), + NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_cluster_name_check), + ON_UPDATE(wsrep_cluster_name_update)); + +static PolyLock_mutex PLock_wsrep_slave_threads(&LOCK_wsrep_slave_threads); +static Sys_var_charptr Sys_wsrep_cluster_address ( + "wsrep_cluster_address", "Address to initially connect to cluster", + PREALLOCATED GLOBAL_VAR(wsrep_cluster_address), + CMD_LINE(REQUIRED_ARG, OPT_WSREP_CLUSTER_ADDRESS), + IN_FS_CHARSET, DEFAULT(""), + &PLock_wsrep_slave_threads, NOT_IN_BINLOG, + ON_CHECK(wsrep_cluster_address_check), + ON_UPDATE(wsrep_cluster_address_update)); + +static Sys_var_charptr Sys_wsrep_node_name ( + "wsrep_node_name", "Node name", + PREALLOCATED GLOBAL_VAR(wsrep_node_name), CMD_LINE(REQUIRED_ARG), + IN_FS_CHARSET, DEFAULT(glob_hostname), NO_MUTEX_GUARD, NOT_IN_BINLOG, + wsrep_node_name_check, wsrep_node_name_update); + +static Sys_var_charptr Sys_wsrep_node_address ( + "wsrep_node_address", "Node address", + PREALLOCATED GLOBAL_VAR(wsrep_node_address), CMD_LINE(REQUIRED_ARG), + IN_FS_CHARSET, DEFAULT(""), + NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_node_address_check), + ON_UPDATE(wsrep_node_address_update)); + +static Sys_var_charptr Sys_wsrep_node_incoming_address( + "wsrep_node_incoming_address", "Client connection address", + PREALLOCATED GLOBAL_VAR(wsrep_node_incoming_address),CMD_LINE(REQUIRED_ARG), + IN_FS_CHARSET, DEFAULT(WSREP_NODE_INCOMING_AUTO), + NO_MUTEX_GUARD, NOT_IN_BINLOG); + +static Sys_var_ulong Sys_wsrep_slave_threads( + "wsrep_slave_threads", "Number of slave appliers to launch", + GLOBAL_VAR(wsrep_slave_threads), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1, 512), DEFAULT(1), BLOCK_SIZE(1), + &PLock_wsrep_slave_threads, NOT_IN_BINLOG, + ON_CHECK(wsrep_slave_threads_check), + ON_UPDATE(wsrep_slave_threads_update)); + +static Sys_var_charptr Sys_wsrep_dbug_option( + "wsrep_dbug_option", "DBUG options to provider library", + GLOBAL_VAR(wsrep_dbug_option),CMD_LINE(REQUIRED_ARG), + IN_FS_CHARSET, DEFAULT(""), + NO_MUTEX_GUARD, NOT_IN_BINLOG); + +static Sys_var_mybool Sys_wsrep_debug( + "wsrep_debug", "To enable debug level logging", + GLOBAL_VAR(wsrep_debug), CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static Sys_var_mybool Sys_wsrep_convert_LOCK_to_trx( + "wsrep_convert_LOCK_to_trx", "To convert locking sessions " + "into transactions", + GLOBAL_VAR(wsrep_convert_LOCK_to_trx), + CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static Sys_var_ulong Sys_wsrep_retry_autocommit( + "wsrep_retry_autocommit", "Max number of times to retry " + "a failed autocommit statement", + SESSION_VAR(wsrep_retry_autocommit), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, 10000), DEFAULT(1), BLOCK_SIZE(1)); + +static Sys_var_mybool Sys_wsrep_auto_increment_control( + "wsrep_auto_increment_control", "To automatically control the " + "assignment of autoincrement variables", + GLOBAL_VAR(wsrep_auto_increment_control), + CMD_LINE(OPT_ARG), DEFAULT(TRUE)); + +static Sys_var_mybool Sys_wsrep_drupal_282555_workaround( + "wsrep_drupal_282555_workaround", "To use a workaround for" + "bad autoincrement value", + GLOBAL_VAR(wsrep_drupal_282555_workaround), + CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static Sys_var_charptr sys_wsrep_sst_method( + "wsrep_sst_method", "State snapshot transfer method", + GLOBAL_VAR(wsrep_sst_method),CMD_LINE(REQUIRED_ARG), + IN_FS_CHARSET, DEFAULT(WSREP_SST_DEFAULT), NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_sst_method_check), + ON_UPDATE(wsrep_sst_method_update)); + +static Sys_var_charptr Sys_wsrep_sst_receive_address( + "wsrep_sst_receive_address", "Address where node is waiting for " + "SST contact", + GLOBAL_VAR(wsrep_sst_receive_address),CMD_LINE(REQUIRED_ARG), + IN_FS_CHARSET, DEFAULT(WSREP_SST_ADDRESS_AUTO), NO_MUTEX_GUARD, + NOT_IN_BINLOG, + ON_CHECK(wsrep_sst_receive_address_check), + ON_UPDATE(wsrep_sst_receive_address_update)); + +static Sys_var_charptr Sys_wsrep_sst_auth( + "wsrep_sst_auth", "Authentication for SST connection", + PREALLOCATED GLOBAL_VAR(wsrep_sst_auth), CMD_LINE(REQUIRED_ARG, OPT_WSREP_SST_AUTH), + IN_FS_CHARSET, DEFAULT(NULL), NO_MUTEX_GUARD, + NOT_IN_BINLOG, + ON_CHECK(wsrep_sst_auth_check), + ON_UPDATE(wsrep_sst_auth_update)); + +static Sys_var_charptr Sys_wsrep_sst_donor( + "wsrep_sst_donor", "preferred donor node for the SST", + GLOBAL_VAR(wsrep_sst_donor),CMD_LINE(REQUIRED_ARG), + IN_FS_CHARSET, DEFAULT(""), NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_sst_donor_check), + ON_UPDATE(wsrep_sst_donor_update)); + +static Sys_var_mybool Sys_wsrep_sst_donor_rejects_queries( + "wsrep_sst_donor_rejects_queries", "Reject client queries " + "when donating state snapshot transfer", + GLOBAL_VAR(wsrep_sst_donor_rejects_queries), + CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static Sys_var_mybool Sys_wsrep_on ( + "wsrep_on", "To enable wsrep replication ", + SESSION_VAR(wsrep_on), + CMD_LINE(OPT_ARG), DEFAULT(TRUE), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(wsrep_on_update)); + +static Sys_var_charptr Sys_wsrep_start_position ( + "wsrep_start_position", "global transaction position to start from ", + PREALLOCATED GLOBAL_VAR(wsrep_start_position), + CMD_LINE(REQUIRED_ARG, OPT_WSREP_START_POSITION), + IN_FS_CHARSET, DEFAULT(WSREP_START_POSITION_ZERO), + NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_start_position_check), + ON_UPDATE(wsrep_start_position_update)); + +static Sys_var_ulong Sys_wsrep_max_ws_size ( + "wsrep_max_ws_size", "Max write set size (bytes)", + GLOBAL_VAR(wsrep_max_ws_size), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1024, WSREP_MAX_WS_SIZE), DEFAULT(WSREP_MAX_WS_SIZE), + BLOCK_SIZE(1), NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(wsrep_max_ws_size_update)); + +static Sys_var_ulong Sys_wsrep_max_ws_rows ( + "wsrep_max_ws_rows", "Max number of rows in write set", + GLOBAL_VAR(wsrep_max_ws_rows), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, 1048576), DEFAULT(0), BLOCK_SIZE(1)); + +static Sys_var_charptr Sys_wsrep_notify_cmd( + "wsrep_notify_cmd", "", + GLOBAL_VAR(wsrep_notify_cmd),CMD_LINE(REQUIRED_ARG), + IN_FS_CHARSET, DEFAULT(""), NO_MUTEX_GUARD, NOT_IN_BINLOG); + +static Sys_var_mybool Sys_wsrep_certify_nonPK( + "wsrep_certify_nonPK", "Certify tables with no primary key", + GLOBAL_VAR(wsrep_certify_nonPK), + CMD_LINE(OPT_ARG), DEFAULT(TRUE)); + +static Sys_var_mybool Sys_wsrep_causal_reads( + "wsrep_causal_reads", "(DEPRECATED) Setting this variable is equivalent " + "to setting wsrep_sync_wait READ flag", + SESSION_VAR(wsrep_causal_reads), CMD_LINE(OPT_ARG), DEFAULT(FALSE), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(wsrep_causal_reads_update)); + +static Sys_var_uint Sys_wsrep_sync_wait( + "wsrep_sync_wait", "Ensure \"synchronous\" read view before executing " + "an operation of the type specified by bitmask: 1 - READ(includes " + "SELECT, SHOW and BEGIN/START TRANSACTION); 2 - UPDATE and DELETE; 4 - " + "INSERT and REPLACE", + SESSION_VAR(wsrep_sync_wait), CMD_LINE(OPT_ARG), + VALID_RANGE(WSREP_SYNC_WAIT_NONE, WSREP_SYNC_WAIT_MAX), + DEFAULT(WSREP_SYNC_WAIT_NONE), BLOCK_SIZE(1), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(wsrep_sync_wait_update)); + +static const char *wsrep_OSU_method_names[]= { "TOI", "RSU", NullS }; +static Sys_var_enum Sys_wsrep_OSU_method( + "wsrep_OSU_method", "Method for Online Schema Upgrade", + SESSION_VAR(wsrep_OSU_method), CMD_LINE(OPT_ARG), + wsrep_OSU_method_names, DEFAULT(WSREP_OSU_TOI), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(0)); + +static PolyLock_mutex PLock_wsrep_desync(&LOCK_wsrep_desync); +static Sys_var_mybool Sys_wsrep_desync ( + "wsrep_desync", "To desynchronize the node from the cluster", + GLOBAL_VAR(wsrep_desync), + CMD_LINE(OPT_ARG), DEFAULT(FALSE), + &PLock_wsrep_desync, NOT_IN_BINLOG, + ON_CHECK(wsrep_desync_check), + ON_UPDATE(wsrep_desync_update)); + +static Sys_var_enum Sys_wsrep_forced_binlog_format( + "wsrep_forced_binlog_format", "binlog format to take effect over user's choice", + GLOBAL_VAR(wsrep_forced_binlog_format), + CMD_LINE(REQUIRED_ARG, OPT_BINLOG_FORMAT), + wsrep_binlog_format_names, DEFAULT(BINLOG_FORMAT_UNSPEC), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(0)); + +static Sys_var_mybool Sys_wsrep_recover_datadir( + "wsrep_recover", "Recover database state after crash and exit", + READ_ONLY GLOBAL_VAR(wsrep_recovery), + CMD_LINE(OPT_ARG, OPT_WSREP_RECOVER), DEFAULT(FALSE)); + +static Sys_var_mybool Sys_wsrep_replicate_myisam( + "wsrep_replicate_myisam", "To enable myisam replication", + GLOBAL_VAR(wsrep_replicate_myisam), CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static Sys_var_mybool Sys_wsrep_log_conflicts( + "wsrep_log_conflicts", "To log multi-master conflicts", + GLOBAL_VAR(wsrep_log_conflicts), CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static Sys_var_ulong Sys_wsrep_mysql_replication_bundle( + "wsrep_mysql_replication_bundle", "mysql replication group commit ", + GLOBAL_VAR(wsrep_mysql_replication_bundle), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, 1000), DEFAULT(0), BLOCK_SIZE(1)); + +static Sys_var_mybool Sys_wsrep_load_data_splitting( + "wsrep_load_data_splitting", "To commit LOAD DATA " + "transaction after every 10K rows inserted", + GLOBAL_VAR(wsrep_load_data_splitting), + CMD_LINE(OPT_ARG), DEFAULT(TRUE)); + +static Sys_var_mybool Sys_wsrep_slave_FK_checks( + "wsrep_slave_FK_checks", "Should slave thread do " + "foreign key constraint checks", + GLOBAL_VAR(wsrep_slave_FK_checks), + CMD_LINE(OPT_ARG), DEFAULT(TRUE)); + +static Sys_var_mybool Sys_wsrep_slave_UK_checks( + "wsrep_slave_UK_checks", "Should slave thread do " + "secondary index uniqueness checks", + GLOBAL_VAR(wsrep_slave_UK_checks), + CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static Sys_var_mybool Sys_wsrep_restart_slave( + "wsrep_restart_slave", "Should MySQL slave be restarted automatically, when node joins back to cluster", + GLOBAL_VAR(wsrep_restart_slave), CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static Sys_var_mybool Sys_wsrep_dirty_reads( + "wsrep_dirty_reads", + "Allow reads even when the node is not in the primary component.", + SESSION_VAR(wsrep_dirty_reads), CMD_LINE(OPT_ARG), + DEFAULT(FALSE), NO_MUTEX_GUARD, NOT_IN_BINLOG); + +#endif /* WITH_WSREP */ + static bool fix_host_cache_size(sys_var *, THD *, enum_var_type) { hostname_cache_resize((uint) host_cache_size); @@ -4830,3 +5127,13 @@ static Sys_var_mybool Sys_pseudo_slave_mode( SESSION_ONLY(pseudo_slave_mode), NO_CMD_LINE, DEFAULT(FALSE), NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(check_pseudo_slave_mode)); +#ifdef HAVE_MMAP +static Sys_var_ulong Sys_log_tc_size( + "log_tc_size", + "Size of transaction coordinator log.", + READ_ONLY GLOBAL_VAR(opt_tc_log_size), + CMD_LINE(REQUIRED_ARG), + VALID_RANGE(my_getpagesize() * 3, ULONG_MAX), + DEFAULT(my_getpagesize() * 6), + BLOCK_SIZE(my_getpagesize())); +#endif diff --git a/sql/table.cc b/sql/table.cc index 975d9d53882..7b43944e008 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -39,6 +39,9 @@ #include "sql_statistics.h" #include "discover.h" #include "mdl.h" // MDL_wait_for_graph_visitor +#ifdef WITH_WSREP +#include "ha_partition.h" +#endif /* WITH_WSREP */ /* INFORMATION_SCHEMA name */ LEX_STRING INFORMATION_SCHEMA_NAME= {C_STRING_WITH_LEN("information_schema")}; diff --git a/sql/transaction.cc b/sql/transaction.cc index a70c075e142..f478b4a18f7 100644 --- a/sql/transaction.cc +++ b/sql/transaction.cc @@ -98,6 +98,9 @@ static bool xa_trans_force_rollback(THD *thd) by ha_rollback()/THD::transaction::cleanup(). */ thd->transaction.xid_state.rm_error= 0; +#ifdef WITH_WSREP + wsrep_register_hton(thd, TRUE); +#endif /* WITH_WSREP */ if (ha_rollback_trans(thd, true)) { my_error(ER_XAER_RMERR, MYF(0)); @@ -136,10 +139,16 @@ bool trans_begin(THD *thd, uint flags) (thd->variables.option_bits & OPTION_TABLE_LOCK)) { thd->variables.option_bits&= ~OPTION_TABLE_LOCK; +#ifdef WITH_WSREP + wsrep_register_hton(thd, TRUE); +#endif /* WITH_WSREP */ thd->server_status&= ~(SERVER_STATUS_IN_TRANS | SERVER_STATUS_IN_TRANS_READONLY); DBUG_PRINT("info", ("clearing SERVER_STATUS_IN_TRANS")); res= MY_TEST(ha_commit_trans(thd, TRUE)); +#ifdef WITH_WSREP + wsrep_post_commit(thd, TRUE); +#endif /* WITH_WSREP */ } thd->variables.option_bits&= ~(OPTION_BEGIN | OPTION_KEEP_LOG); @@ -184,6 +193,12 @@ bool trans_begin(THD *thd, uint flags) thd->tx_read_only= false; } +#ifdef WITH_WSREP + thd->wsrep_PA_safe= true; + if (WSREP_CLIENT(thd) && wsrep_sync_wait(thd)) + DBUG_RETURN(TRUE); +#endif /* WITH_WSREP */ + thd->variables.option_bits|= OPTION_BEGIN; thd->server_status|= SERVER_STATUS_IN_TRANS; if (thd->tx_read_only) @@ -215,10 +230,16 @@ bool trans_commit(THD *thd) if (trans_check(thd)) DBUG_RETURN(TRUE); +#ifdef WITH_WSREP + wsrep_register_hton(thd, TRUE); +#endif /* WITH_WSREP */ thd->server_status&= ~(SERVER_STATUS_IN_TRANS | SERVER_STATUS_IN_TRANS_READONLY); DBUG_PRINT("info", ("clearing SERVER_STATUS_IN_TRANS")); res= ha_commit_trans(thd, TRUE); +#ifdef WITH_WSREP + wsrep_post_commit(thd, TRUE); +#endif /* WITH_WSREP */ /* if res is non-zero, then ha_commit_trans has rolled back the transaction, so the hooks for rollback will be called. @@ -264,10 +285,16 @@ bool trans_commit_implicit(THD *thd) /* Safety if one did "drop table" on locked tables */ if (!thd->locked_tables_mode) thd->variables.option_bits&= ~OPTION_TABLE_LOCK; +#ifdef WITH_WSREP + wsrep_register_hton(thd, TRUE); +#endif /* WITH_WSREP */ thd->server_status&= ~(SERVER_STATUS_IN_TRANS | SERVER_STATUS_IN_TRANS_READONLY); DBUG_PRINT("info", ("clearing SERVER_STATUS_IN_TRANS")); res= MY_TEST(ha_commit_trans(thd, TRUE)); +#ifdef WITH_WSREP + wsrep_post_commit(thd, TRUE); +#endif /* WITH_WSREP */ } thd->variables.option_bits&= ~(OPTION_BEGIN | OPTION_KEEP_LOG); @@ -300,9 +327,15 @@ bool trans_rollback(THD *thd) int res; DBUG_ENTER("trans_rollback"); +#ifdef WITH_WSREP + thd->wsrep_PA_safe= true; +#endif /* WITH_WSREP */ if (trans_check(thd)) DBUG_RETURN(TRUE); +#ifdef WITH_WSREP + wsrep_register_hton(thd, TRUE); +#endif /* WITH_WSREP */ thd->server_status&= ~(SERVER_STATUS_IN_TRANS | SERVER_STATUS_IN_TRANS_READONLY); DBUG_PRINT("info", ("clearing SERVER_STATUS_IN_TRANS")); @@ -393,11 +426,17 @@ bool trans_commit_stmt(THD *thd) if (thd->transaction.stmt.ha_list) { +#ifdef WITH_WSREP + wsrep_register_hton(thd, FALSE); +#endif /* WITH_WSREP */ res= ha_commit_trans(thd, FALSE); if (! thd->in_active_multi_stmt_transaction()) { thd->tx_isolation= (enum_tx_isolation) thd->variables.tx_isolation; thd->tx_read_only= thd->variables.tx_read_only; +#ifdef WITH_WSREP + wsrep_post_commit(thd, FALSE); +#endif /* WITH_WSREP */ } } @@ -438,6 +477,9 @@ bool trans_rollback_stmt(THD *thd) if (thd->transaction.stmt.ha_list) { +#ifdef WITH_WSREP + wsrep_register_hton(thd, FALSE); +#endif /* WITH_WSREP */ ha_rollback_trans(thd, FALSE); if (! thd->in_active_multi_stmt_transaction()) { @@ -815,9 +857,15 @@ bool trans_xa_commit(THD *thd) } else if (xa_state == XA_IDLE && thd->lex->xa_opt == XA_ONE_PHASE) { +#ifdef WITH_WSREP + wsrep_register_hton(thd, TRUE); +#endif /* WITH_WSREP */ int r= ha_commit_trans(thd, TRUE); if ((res= MY_TEST(r))) my_error(r == 1 ? ER_XA_RBROLLBACK : ER_XAER_RMERR, MYF(0)); +#ifdef WITH_WSREP + wsrep_post_commit(thd, TRUE); +#endif /* WITH_WSREP */ } else if (xa_state == XA_PREPARED && thd->lex->xa_opt == XA_NONE) { @@ -836,6 +884,9 @@ bool trans_xa_commit(THD *thd) if (thd->mdl_context.acquire_lock(&mdl_request, thd->variables.lock_wait_timeout)) { +#ifdef WITH_WSREP + wsrep_register_hton(thd, TRUE); +#endif /* WITH_WSREP */ ha_rollback_trans(thd, TRUE); my_error(ER_XAER_RMERR, MYF(0)); } diff --git a/sql/tztime.cc b/sql/tztime.cc index 75b732f4436..d4321604a7a 100644 --- a/sql/tztime.cc +++ b/sql/tztime.cc @@ -2705,6 +2705,12 @@ main(int argc, char **argv) free_defaults(default_argv); return 1; } + +#ifdef WITH_WSREP + // Replicate MyISAM DDL for this session, cf. lp:1161432 + printf("SET GLOBAL wsrep_replicate_myisam= ON;\n"); +#endif /* WITH_WSREP */ + if (argc == 1 && !opt_leap) { /* Argument is timezonedir */ @@ -2752,6 +2758,11 @@ main(int argc, char **argv) free_root(&tz_storage, MYF(0)); } +#ifdef WITH_WSREP + // Reset wsrep_replicate_myisam. lp:1161432 + printf("SET GLOBAL wsrep_replicate_myisam= OFF;\n"); +#endif /* WITH_WSREP */ + free_defaults(default_argv); my_end(0); return 0; diff --git a/sql/wsrep_applier.cc b/sql/wsrep_applier.cc new file mode 100644 index 00000000000..73a43185162 --- /dev/null +++ b/sql/wsrep_applier.cc @@ -0,0 +1,400 @@ +/* Copyright (C) 2013-2015 Codership Oy <info@codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ + +#include "wsrep_applier.h" +#include "wsrep_priv.h" +#include "wsrep_binlog.h" // wsrep_dump_rbr_buf() +#include "wsrep_xid.h" + +#include "log_event.h" // class THD, EVENT_LEN_OFFSET, etc. +#include "debug_sync.h" + +/* + read the first event from (*buf). The size of the (*buf) is (*buf_len). + At the end (*buf) is shitfed to point to the following event or NULL and + (*buf_len) will be changed to account just being read bytes of the 1st event. +*/ + +static Log_event* wsrep_read_log_event( + char **arg_buf, size_t *arg_buf_len, + const Format_description_log_event *description_event) +{ + DBUG_ENTER("wsrep_read_log_event"); + char *head= (*arg_buf); + + uint data_len = uint4korr(head + EVENT_LEN_OFFSET); + char *buf= (*arg_buf); + const char *error= 0; + Log_event *res= 0; + + res= Log_event::read_log_event(buf, data_len, &error, description_event, + true); + + if (!res) + { + DBUG_ASSERT(error != 0); + sql_print_error("Error in Log_event::read_log_event(): " + "'%s', data_len: %d, event_type: %d", + error,data_len,head[EVENT_TYPE_OFFSET]); + } + (*arg_buf)+= data_len; + (*arg_buf_len)-= data_len; + DBUG_RETURN(res); +} + +#include "transaction.h" // trans_commit(), trans_rollback() +#include "rpl_rli.h" // class Relay_log_info; +#include "sql_base.h" // close_temporary_table() + +static inline void +wsrep_set_apply_format(THD* thd, Format_description_log_event* ev) +{ + if (thd->wsrep_apply_format) + { + delete (Format_description_log_event*)thd->wsrep_apply_format; + } + thd->wsrep_apply_format= ev; +} + +static inline Format_description_log_event* +wsrep_get_apply_format(THD* thd) +{ + if (thd->wsrep_apply_format) + { + return (Format_description_log_event*) thd->wsrep_apply_format; + } + return thd->wsrep_rgi->rli->relay_log.description_event_for_exec; +} + +static wsrep_cb_status_t wsrep_apply_events(THD* thd, + const void* events_buf, + size_t buf_len) +{ + char *buf= (char *)events_buf; + int rcode= 0; + int event= 1; + Log_event_type typ; + + DBUG_ENTER("wsrep_apply_events"); + + if (thd->killed == KILL_CONNECTION && + thd->wsrep_conflict_state != REPLAYING) + { + WSREP_INFO("applier has been aborted, skipping apply_rbr: %lld", + (long long) wsrep_thd_trx_seqno(thd)); + DBUG_RETURN(WSREP_CB_FAILURE); + } + + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + thd->wsrep_query_state= QUERY_EXEC; + if (thd->wsrep_conflict_state!= REPLAYING) + thd->wsrep_conflict_state= NO_CONFLICT; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + + if (!buf_len) WSREP_DEBUG("empty rbr buffer to apply: %lld", + (long long) wsrep_thd_trx_seqno(thd)); + + while(buf_len) + { + int exec_res; + Log_event* ev= wsrep_read_log_event(&buf, &buf_len, + wsrep_get_apply_format(thd)); + + if (!ev) + { + WSREP_ERROR("applier could not read binlog event, seqno: %lld, len: %zu", + (long long)wsrep_thd_trx_seqno(thd), buf_len); + rcode= 1; + goto error; + } + + typ= ev->get_type_code(); + + switch (typ) { + case FORMAT_DESCRIPTION_EVENT: + wsrep_set_apply_format(thd, (Format_description_log_event*)ev); + continue; +#ifdef GTID_SUPPORT + case GTID_LOG_EVENT: + { + Gtid_log_event* gev= (Gtid_log_event*)ev; + if (gev->get_gno() == 0) + { + /* Skip GTID log event to make binlog to generate LTID on commit */ + delete ev; + continue; + } + } +#endif /* GTID_SUPPORT */ + default: + break; + } + + /* Use the original server id for logging. */ + thd->set_server_id(ev->server_id); + thd->set_time(); // time the query + wsrep_xid_init(&thd->transaction.xid_state.xid, + thd->wsrep_trx_meta.gtid.uuid, + thd->wsrep_trx_meta.gtid.seqno); + thd->lex->current_select= 0; + if (!ev->when) + { + my_hrtime_t hrtime= my_hrtime(); + ev->when= hrtime_to_my_time(hrtime); + ev->when_sec_part= hrtime_sec_part(hrtime); + } + + thd->variables.option_bits= + (thd->variables.option_bits & ~OPTION_SKIP_REPLICATION) | + (ev->flags & LOG_EVENT_SKIP_REPLICATION_F ? OPTION_SKIP_REPLICATION : 0); + + ev->thd = thd; + exec_res = ev->apply_event(thd->wsrep_rgi); + DBUG_PRINT("info", ("exec_event result: %d", exec_res)); + + if (exec_res) + { + WSREP_WARN("RBR event %d %s apply warning: %d, %lld", + event, ev->get_type_str(), exec_res, + (long long) wsrep_thd_trx_seqno(thd)); + rcode= exec_res; + /* stop processing for the first error */ + delete ev; + goto error; + } + event++; + + if (thd->wsrep_conflict_state!= NO_CONFLICT && + thd->wsrep_conflict_state!= REPLAYING) + WSREP_WARN("conflict state after RBR event applying: %d, %lld", + thd->wsrep_query_state, (long long)wsrep_thd_trx_seqno(thd)); + + if (thd->wsrep_conflict_state == MUST_ABORT) { + WSREP_WARN("RBR event apply failed, rolling back: %lld", + (long long) wsrep_thd_trx_seqno(thd)); + trans_rollback(thd); + thd->locked_tables_list.unlock_locked_tables(thd); + /* Release transactional metadata locks. */ + thd->mdl_context.release_transactional_locks(); + thd->wsrep_conflict_state= NO_CONFLICT; + DBUG_RETURN(WSREP_CB_FAILURE); + } + + delete_or_keep_event_post_apply(thd->wsrep_rgi, typ, ev); + } + + error: + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + thd->wsrep_query_state= QUERY_IDLE; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + + assert(thd->wsrep_exec_mode== REPL_RECV); + + if (thd->killed == KILL_CONNECTION) + WSREP_INFO("applier aborted: %lld", (long long)wsrep_thd_trx_seqno(thd)); + + if (rcode) DBUG_RETURN(WSREP_CB_FAILURE); + DBUG_RETURN(WSREP_CB_SUCCESS); +} + +wsrep_cb_status_t wsrep_apply_cb(void* const ctx, + const void* const buf, + size_t const buf_len, + uint32_t const flags, + const wsrep_trx_meta_t* meta) +{ + THD* const thd((THD*)ctx); + + // Allow tests to block the applier thread using the DBUG facilities. + DBUG_EXECUTE_IF("sync.wsrep_apply_cb", + { + const char act[]= + "now " + "wait_for signal.wsrep_apply_cb"; + DBUG_ASSERT(!debug_sync_set_action(thd, + STRING_WITH_LEN(act))); + };); + + thd->wsrep_trx_meta = *meta; + +#ifdef WSREP_PROC_INFO + snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, + "applying write set %lld: %p, %zu", + (long long)wsrep_thd_trx_seqno(thd), buf, buf_len); + thd_proc_info(thd, thd->wsrep_info); +#else + thd_proc_info(thd, "applying write set"); +#endif /* WSREP_PROC_INFO */ + + /* tune FK and UK checking policy */ + if (wsrep_slave_UK_checks == FALSE) + thd->variables.option_bits|= OPTION_RELAXED_UNIQUE_CHECKS; + else + thd->variables.option_bits&= ~OPTION_RELAXED_UNIQUE_CHECKS; + + if (wsrep_slave_FK_checks == FALSE) + thd->variables.option_bits|= OPTION_NO_FOREIGN_KEY_CHECKS; + else + thd->variables.option_bits&= ~OPTION_NO_FOREIGN_KEY_CHECKS; + + if (flags & WSREP_FLAG_ISOLATION) + { + thd->wsrep_apply_toi= true; + /* + Don't run in transaction mode with TOI actions. + */ + thd->variables.option_bits&= ~OPTION_BEGIN; + thd->server_status&= ~SERVER_STATUS_IN_TRANS; + } + wsrep_cb_status_t rcode(wsrep_apply_events(thd, buf, buf_len)); + +#ifdef WSREP_PROC_INFO + snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, + "applied write set %lld", (long long)wsrep_thd_trx_seqno(thd)); + thd_proc_info(thd, thd->wsrep_info); +#else + thd_proc_info(thd, "applied write set"); +#endif /* WSREP_PROC_INFO */ + + if (WSREP_CB_SUCCESS != rcode) + { + wsrep_dump_rbr_buf(thd, buf, buf_len); + } + + TABLE *tmp; + while ((tmp = thd->temporary_tables)) + { + WSREP_DEBUG("Applier %lu, has temporary tables: %s.%s", + thd->thread_id, + (tmp->s) ? tmp->s->db.str : "void", + (tmp->s) ? tmp->s->table_name.str : "void"); + close_temporary_table(thd, tmp, 1, 1); + } + + return rcode; +} + +static wsrep_cb_status_t wsrep_commit(THD* const thd) +{ +#ifdef WSREP_PROC_INFO + snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, + "committing %lld", (long long)wsrep_thd_trx_seqno(thd)); + thd_proc_info(thd, thd->wsrep_info); +#else + thd_proc_info(thd, "committing"); +#endif /* WSREP_PROC_INFO */ + + wsrep_cb_status_t const rcode(trans_commit(thd) ? + WSREP_CB_FAILURE : WSREP_CB_SUCCESS); + + if (WSREP_CB_SUCCESS == rcode) + { + thd->wsrep_rgi->cleanup_context(thd, false); +#ifdef GTID_SUPPORT + thd->variables.gtid_next.set_automatic(); +#endif /* GTID_SUPPORT */ + if (thd->wsrep_apply_toi) + { + wsrep_set_SE_checkpoint(thd->wsrep_trx_meta.gtid.uuid, + thd->wsrep_trx_meta.gtid.seqno); + } + } + +#ifdef WSREP_PROC_INFO + snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, + "committed %lld", (long long) wsrep_thd_trx_seqno(thd)); + thd_proc_info(thd, thd->wsrep_info); +#else + thd_proc_info(thd, "committed"); +#endif /* WSREP_PROC_INFO */ + + return rcode; +} + +static wsrep_cb_status_t wsrep_rollback(THD* const thd) +{ +#ifdef WSREP_PROC_INFO + snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, + "rolling back %lld", (long long)wsrep_thd_trx_seqno(thd)); + thd_proc_info(thd, thd->wsrep_info); +#else + thd_proc_info(thd, "rolling back"); +#endif /* WSREP_PROC_INFO */ + + wsrep_cb_status_t const rcode(trans_rollback(thd) ? + WSREP_CB_FAILURE : WSREP_CB_SUCCESS); + +#ifdef WSREP_PROC_INFO + snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, + "rolled back %lld", (long long)wsrep_thd_trx_seqno(thd)); + thd_proc_info(thd, thd->wsrep_info); +#else + thd_proc_info(thd, "rolled back"); +#endif /* WSREP_PROC_INFO */ + + return rcode; +} + +wsrep_cb_status_t wsrep_commit_cb(void* const ctx, + uint32_t const flags, + const wsrep_trx_meta_t* meta, + wsrep_bool_t* const exit, + bool const commit) +{ + THD* const thd((THD*)ctx); + + assert(meta->gtid.seqno == wsrep_thd_trx_seqno(thd)); + + wsrep_cb_status_t rcode; + + if (commit) + rcode = wsrep_commit(thd); + else + rcode = wsrep_rollback(thd); + + wsrep_set_apply_format(thd, NULL); + thd->mdl_context.release_transactional_locks(); + free_root(thd->mem_root,MYF(MY_KEEP_PREALLOC)); + thd->tx_isolation= (enum_tx_isolation) thd->variables.tx_isolation; + + if (wsrep_slave_count_change < 0 && commit && WSREP_CB_SUCCESS == rcode) + { + mysql_mutex_lock(&LOCK_wsrep_slave_threads); + if (wsrep_slave_count_change < 0) + { + wsrep_slave_count_change++; + *exit = true; + } + mysql_mutex_unlock(&LOCK_wsrep_slave_threads); + } + + if (*exit == false && thd->wsrep_applier) + { + /* From trans_begin() */ + thd->variables.option_bits|= OPTION_BEGIN; + thd->server_status|= SERVER_STATUS_IN_TRANS; + thd->wsrep_apply_toi= false; + } + + return rcode; +} + + +wsrep_cb_status_t wsrep_unordered_cb(void* const ctx, + const void* const data, + size_t const size) +{ + return WSREP_CB_SUCCESS; +} diff --git a/sql/wsrep_applier.h b/sql/wsrep_applier.h new file mode 100644 index 00000000000..424db466e53 --- /dev/null +++ b/sql/wsrep_applier.h @@ -0,0 +1,39 @@ +/* Copyright 2013 Codership Oy <http://www.codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef WSREP_APPLIER_H +#define WSREP_APPLIER_H + +#include <my_config.h> +#include "../wsrep/wsrep_api.h" + +/* wsrep callback prototypes */ + +wsrep_cb_status_t wsrep_apply_cb(void *ctx, + const void* buf, size_t buf_len, + uint32_t flags, + const wsrep_trx_meta_t* meta); + +wsrep_cb_status_t wsrep_commit_cb(void *ctx, + uint32_t flags, + const wsrep_trx_meta_t* meta, + wsrep_bool_t* exit, + bool commit); + +wsrep_cb_status_t wsrep_unordered_cb(void* ctx, + const void* data, + size_t size); + +#endif /* WSREP_APPLIER_H */ diff --git a/sql/wsrep_binlog.cc b/sql/wsrep_binlog.cc new file mode 100644 index 00000000000..5c5ebb9f780 --- /dev/null +++ b/sql/wsrep_binlog.cc @@ -0,0 +1,412 @@ +/* Copyright (C) 2013 Codership Oy <info@codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ + +#include "wsrep_binlog.h" +#include "wsrep_priv.h" + +/* + Write the contents of a cache to a memory buffer. + + This function quite the same as MYSQL_BIN_LOG::write_cache(), + with the exception that here we write in buffer instead of log file. + */ +int wsrep_write_cache_buf(IO_CACHE *cache, uchar **buf, size_t *buf_len) +{ + *buf= NULL; + *buf_len= 0; + + my_off_t const saved_pos(my_b_tell(cache)); + + if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0)) + { + WSREP_ERROR("failed to initialize io-cache"); + return ER_ERROR_ON_WRITE; + } + + uint length = my_b_bytes_in_cache(cache); + if (unlikely(0 == length)) length = my_b_fill(cache); + + size_t total_length = 0; + + if (likely(length > 0)) do + { + total_length += length; + /* + Bail out if buffer grows too large. + A temporary fix to avoid allocating indefinitely large buffer, + not a real limit on a writeset size which includes other things + like header and keys. + */ + if (total_length > wsrep_max_ws_size) + { + WSREP_WARN("transaction size limit (%lu) exceeded: %zu", + wsrep_max_ws_size, total_length); + goto error; + } + uchar* tmp = (uchar *)my_realloc(*buf, total_length, + MYF(MY_ALLOW_ZERO_PTR)); + if (!tmp) + { + WSREP_ERROR("could not (re)allocate buffer: %zu + %u", + *buf_len, length); + goto error; + } + *buf = tmp; + + memcpy(*buf + *buf_len, cache->read_pos, length); + *buf_len = total_length; + cache->read_pos = cache->read_end; + } while ((cache->file >= 0) && (length = my_b_fill(cache))); + + if (reinit_io_cache(cache, WRITE_CACHE, saved_pos, 0, 0)) + { + WSREP_WARN("failed to initialize io-cache"); + goto cleanup; + } + + return 0; + +error: + if (reinit_io_cache(cache, WRITE_CACHE, saved_pos, 0, 0)) + { + WSREP_WARN("failed to initialize io-cache"); + } +cleanup: + my_free(*buf); + *buf= NULL; + *buf_len= 0; + return ER_ERROR_ON_WRITE; +} + +#define STACK_SIZE 4096 /* 4K - for buffer preallocated on the stack: + * many transactions would fit in there + * so there is no need to reach for the heap */ + +/* Returns minimum multiple of HEAP_PAGE_SIZE that is >= length */ +static inline size_t +heap_size(size_t length) +{ + return (length + HEAP_PAGE_SIZE - 1)/HEAP_PAGE_SIZE*HEAP_PAGE_SIZE; +} + +/* append data to writeset */ +static inline wsrep_status_t +wsrep_append_data(wsrep_t* const wsrep, + wsrep_ws_handle_t* const ws, + const void* const data, + size_t const len) +{ + struct wsrep_buf const buff = { data, len }; + wsrep_status_t const rc(wsrep->append_data(wsrep, ws, &buff, 1, + WSREP_DATA_ORDERED, true)); + if (rc != WSREP_OK) + { + WSREP_WARN("append_data() returned %d", rc); + } + + return rc; +} + +/* + Write the contents of a cache to wsrep provider. + + This function quite the same as MYSQL_BIN_LOG::write_cache(), + with the exception that here we write in buffer instead of log file. + + This version reads all of cache into single buffer and then appends to a + writeset at once. + */ +static int wsrep_write_cache_once(wsrep_t* const wsrep, + THD* const thd, + IO_CACHE* const cache, + size_t* const len) +{ + my_off_t const saved_pos(my_b_tell(cache)); + + if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0)) + { + WSREP_ERROR("failed to initialize io-cache"); + return ER_ERROR_ON_WRITE; + } + + int err(WSREP_OK); + + size_t total_length(0); + uchar stack_buf[STACK_SIZE]; /* to avoid dynamic allocations for few data*/ + uchar* heap_buf(NULL); + uchar* buf(stack_buf); + size_t allocated(sizeof(stack_buf)); + size_t used(0); + + uint length(my_b_bytes_in_cache(cache)); + if (unlikely(0 == length)) length = my_b_fill(cache); + + if (likely(length > 0)) do + { + total_length += length; + /* + Bail out if buffer grows too large. + A temporary fix to avoid allocating indefinitely large buffer, + not a real limit on a writeset size which includes other things + like header and keys. + */ + if (unlikely(total_length > wsrep_max_ws_size)) + { + WSREP_WARN("transaction size limit (%lu) exceeded: %zu", + wsrep_max_ws_size, total_length); + err = WSREP_TRX_SIZE_EXCEEDED; + goto cleanup; + } + + if (total_length > allocated) + { + size_t const new_size(heap_size(total_length)); + uchar* tmp = (uchar *)my_realloc(heap_buf, new_size, + MYF(MY_ALLOW_ZERO_PTR)); + if (!tmp) + { + WSREP_ERROR("could not (re)allocate buffer: %zu + %u", + allocated, length); + err = WSREP_TRX_SIZE_EXCEEDED; + goto cleanup; + } + + heap_buf = tmp; + buf = heap_buf; + allocated = new_size; + + if (used <= STACK_SIZE && used > 0) // there's data in stack_buf + { + DBUG_ASSERT(buf == stack_buf); + memcpy(heap_buf, stack_buf, used); + } + } + + memcpy(buf + used, cache->read_pos, length); + used = total_length; + cache->read_pos = cache->read_end; + } while ((cache->file >= 0) && (length = my_b_fill(cache))); + + if (used > 0) + err = wsrep_append_data(wsrep, &thd->wsrep_ws_handle, buf, used); + + if (WSREP_OK == err) *len = total_length; + +cleanup: + if (reinit_io_cache(cache, WRITE_CACHE, saved_pos, 0, 0)) + { + WSREP_ERROR("failed to reinitialize io-cache"); + } + + if (unlikely(WSREP_OK != err)) wsrep_dump_rbr_buf(thd, buf, used); + + my_free(heap_buf); + return err; +} + +/* + Write the contents of a cache to wsrep provider. + + This function quite the same as MYSQL_BIN_LOG::write_cache(), + with the exception that here we write in buffer instead of log file. + + This version uses incremental data appending as it reads it from cache. + */ +static int wsrep_write_cache_inc(wsrep_t* const wsrep, + THD* const thd, + IO_CACHE* const cache, + size_t* const len) +{ + my_off_t const saved_pos(my_b_tell(cache)); + + if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0)) + { + WSREP_ERROR("failed to initialize io-cache"); + return WSREP_TRX_ERROR; + } + + int err(WSREP_OK); + + size_t total_length(0); + + uint length(my_b_bytes_in_cache(cache)); + if (unlikely(0 == length)) length = my_b_fill(cache); + + if (likely(length > 0)) do + { + total_length += length; + /* bail out if buffer grows too large + not a real limit on a writeset size which includes other things + like header and keys. + */ + if (unlikely(total_length > wsrep_max_ws_size)) + { + WSREP_WARN("transaction size limit (%lu) exceeded: %zu", + wsrep_max_ws_size, total_length); + err = WSREP_TRX_SIZE_EXCEEDED; + goto cleanup; + } + + if(WSREP_OK != (err=wsrep_append_data(wsrep, &thd->wsrep_ws_handle, + cache->read_pos, length))) + goto cleanup; + + cache->read_pos = cache->read_end; + } while ((cache->file >= 0) && (length = my_b_fill(cache))); + + if (WSREP_OK == err) *len = total_length; + +cleanup: + if (reinit_io_cache(cache, WRITE_CACHE, saved_pos, 0, 0)) + { + WSREP_ERROR("failed to reinitialize io-cache"); + } + + return err; +} + +/* + Write the contents of a cache to wsrep provider. + + This function quite the same as MYSQL_BIN_LOG::write_cache(), + with the exception that here we write in buffer instead of log file. + */ +int wsrep_write_cache(wsrep_t* const wsrep, + THD* const thd, + IO_CACHE* const cache, + size_t* const len) +{ + if (wsrep_incremental_data_collection) { + return wsrep_write_cache_inc(wsrep, thd, cache, len); + } + else { + return wsrep_write_cache_once(wsrep, thd, cache, len); + } +} + +void wsrep_dump_rbr_buf(THD *thd, const void* rbr_buf, size_t buf_len) +{ + char filename[PATH_MAX]= {0}; + int len= snprintf(filename, PATH_MAX, "%s/GRA_%ld_%lld.log", + wsrep_data_home_dir, thd->thread_id, + (long long)wsrep_thd_trx_seqno(thd)); + if (len >= PATH_MAX) + { + WSREP_ERROR("RBR dump path too long: %d, skipping dump.", len); + return; + } + + FILE *of= fopen(filename, "wb"); + + if (of) + { + if (fwrite(rbr_buf, buf_len, 1, of) == 0) + WSREP_ERROR("Failed to write buffer of length %llu to '%s'", + (unsigned long long)buf_len, filename); + + fclose(of); + } + else + { + WSREP_ERROR("Failed to open file '%s': %d (%s)", + filename, errno, strerror(errno)); + } +} +extern handlerton *binlog_hton; + +/* + wsrep exploits binlog's caches even if binlogging itself is not + activated. In such case connection close needs calling + actual binlog's method. + Todo: split binlog hton from its caches to use ones by wsrep + without referring to binlog's stuff. +*/ +int wsrep_binlog_close_connection(THD* thd) +{ + DBUG_ENTER("wsrep_binlog_close_connection"); + if (thd_get_ha_data(thd, binlog_hton) != NULL) + binlog_hton->close_connection (binlog_hton, thd); + DBUG_RETURN(0); +} + +int wsrep_binlog_savepoint_set(THD *thd, void *sv) +{ + if (!wsrep_emulate_bin_log) return 0; + int rcode = binlog_hton->savepoint_set(binlog_hton, thd, sv); + return rcode; +} + +int wsrep_binlog_savepoint_rollback(THD *thd, void *sv) +{ + if (!wsrep_emulate_bin_log) return 0; + int rcode = binlog_hton->savepoint_rollback(binlog_hton, thd, sv); + return rcode; +} + +void wsrep_dump_rbr_direct(THD* thd, IO_CACHE* cache) +{ + char filename[PATH_MAX]= {0}; + int len= snprintf(filename, PATH_MAX, "%s/GRA_%ld_%lld.log", + wsrep_data_home_dir, thd->thread_id, + (long long)wsrep_thd_trx_seqno(thd)); + size_t bytes_in_cache = 0; + // check path + if (len >= PATH_MAX) + { + WSREP_ERROR("RBR dump path too long: %d, skipping dump.", len); + return ; + } + // init cache + my_off_t const saved_pos(my_b_tell(cache)); + if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0)) + { + WSREP_ERROR("failed to initialize io-cache"); + return ; + } + // open file + FILE* of = fopen(filename, "wb"); + if (!of) + { + WSREP_ERROR("Failed to open file '%s': %d (%s)", + filename, errno, strerror(errno)); + goto cleanup; + } + // ready to write + bytes_in_cache= my_b_bytes_in_cache(cache); + if (unlikely(bytes_in_cache == 0)) bytes_in_cache = my_b_fill(cache); + if (likely(bytes_in_cache > 0)) do + { + if (my_fwrite(of, cache->read_pos, bytes_in_cache, + MYF(MY_WME | MY_NABP)) == (size_t) -1) + { + WSREP_ERROR("Failed to write file '%s'", filename); + goto cleanup; + } + cache->read_pos= cache->read_end; + } while ((cache->file >= 0) && (bytes_in_cache= my_b_fill(cache))); + if(cache->error == -1) + { + WSREP_ERROR("RBR inconsistent"); + goto cleanup; + } +cleanup: + // init back + if (reinit_io_cache(cache, WRITE_CACHE, saved_pos, 0, 0)) + { + WSREP_ERROR("failed to reinitialize io-cache"); + } + // close file + if (of) fclose(of); +} diff --git a/sql/wsrep_binlog.h b/sql/wsrep_binlog.h new file mode 100644 index 00000000000..c29d51caf2c --- /dev/null +++ b/sql/wsrep_binlog.h @@ -0,0 +1,56 @@ +/* Copyright (C) 2013 Codership Oy <info@codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ + +#ifndef WSREP_BINLOG_H +#define WSREP_BINLOG_H + +#include "sql_class.h" // THD, IO_CACHE + +#define HEAP_PAGE_SIZE 65536 /* 64K */ +#define WSREP_MAX_WS_SIZE 2147483647 /* 2GB */ + +/* + Write the contents of a cache to a memory buffer. + + This function quite the same as MYSQL_BIN_LOG::write_cache(), + with the exception that here we write in buffer instead of log file. + */ +int wsrep_write_cache_buf(IO_CACHE *cache, uchar **buf, size_t *buf_len); + +/* + Write the contents of a cache to wsrep provider. + + This function quite the same as MYSQL_BIN_LOG::write_cache(), + with the exception that here we write in buffer instead of log file. + + @param len total amount of data written + @return wsrep error status + */ +int wsrep_write_cache (wsrep_t* wsrep, + THD* thd, + IO_CACHE* cache, + size_t* len); + +/* Dump replication buffer to disk */ +void wsrep_dump_rbr_buf(THD *thd, const void* rbr_buf, size_t buf_len); + +/* Dump replication buffer to disk without intermediate buffer */ +void wsrep_dump_rbr_direct(THD* thd, IO_CACHE* cache); + +int wsrep_binlog_close_connection(THD* thd); +int wsrep_binlog_savepoint_set(THD *thd, void *sv); +int wsrep_binlog_savepoint_rollback(THD *thd, void *sv); + +#endif /* WSREP_BINLOG_H */ diff --git a/sql/wsrep_check_opts.cc b/sql/wsrep_check_opts.cc new file mode 100644 index 00000000000..188f0696bff --- /dev/null +++ b/sql/wsrep_check_opts.cc @@ -0,0 +1,396 @@ +/* Copyright 2011 Codership Oy <http://www.codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +//#include <mysqld.h> +#include <sql_class.h> +//#include <sql_plugin.h> +//#include <set_var.h> + +#include "wsrep_mysqld.h" + +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <ctype.h> + +/* This file is about checking for correctness of mysql configuration options */ + +struct opt +{ + const char* const name; + const char* value; +}; + +/* A list of options to check. + * At first we assume default values and then see if they are changed on CLI or + * in my.cnf */ +static struct opt opts[] = +{ + { "wsrep_slave_threads", "1" }, // mysqld.cc + { "bind_address", "0.0.0.0" }, // mysqld.cc + { "wsrep_sst_method", "rsync" }, // mysqld.cc + { "wsrep_sst_receive_address","AUTO"}, // mysqld.cc + { "binlog_format", "ROW" }, // mysqld.cc + { "wsrep_provider", "none" }, // mysqld.cc +#if 0 + { "query_cache_type", "0" }, // mysqld.cc + { "query_cache_size", "0" }, // mysqld.cc +#endif + { "locked_in_memory", "0" }, // mysqld.cc + { "wsrep_cluster_address", "0" }, // mysqld.cc + { "locks_unsafe_for_binlog", "0" }, // ha_innodb.cc + { "autoinc_lock_mode", "1" }, // ha_innodb.cc + { 0, 0 } +}; + +enum +{ + WSREP_SLAVE_THREADS, + BIND_ADDRESS, + WSREP_SST_METHOD, + WSREP_SST_RECEIVE_ADDRESS, + BINLOG_FORMAT, + WSREP_PROVIDER, +#if 0 + QUERY_CACHE_TYPE, + QUERY_CACHE_SIZE, +#endif + LOCKED_IN_MEMORY, + WSREP_CLUSTER_ADDRESS, + LOCKS_UNSAFE_FOR_BINLOG, + AUTOINC_LOCK_MODE +}; + + +/* A class to make a copy of argv[] vector */ +struct argv_copy +{ + int const argc_; + char** argv_; + + argv_copy (int const argc, const char* const argv[]) : + argc_ (argc), + argv_ (reinterpret_cast<char**>(calloc(argc_, sizeof(char*)))) + { + if (argv_) + { + for (int i = 0; i < argc_; ++i) + { + argv_[i] = strdup(argv[i]); + + if (!argv_[i]) + { + argv_free (); // free whatever bee allocated + return; + } + } + } + } + + ~argv_copy () { argv_free (); } + +private: + argv_copy (const argv_copy&); + argv_copy& operator= (const argv_copy&); + + void argv_free() + { + if (argv_) + { + for (int i = 0; (i < argc_) && argv_[i] ; ++i) free (argv_[i]); + free (argv_); + argv_ = 0; + } + } +}; + +/* a short corresponding to '--' byte sequence */ +static short const long_opt_prefix ('-' + ('-' << 8)); + +/* Normalizes long options to have '_' instead of '-' */ +static int +normalize_opts (argv_copy& a) +{ + if (a.argv_) + { + for (int i = 0; i < a.argc_; ++i) + { + char* ptr = a.argv_[i]; + if (long_opt_prefix == *(short*)ptr) // long option + { + ptr += 2; + const char* end = strchr(ptr, '='); + + if (!end) end = ptr + strlen(ptr); + + for (; ptr != end; ++ptr) if ('-' == *ptr) *ptr = '_'; + } + } + + return 0; + } + + return EINVAL; +} + +/* Find required options in the argument list and change their values */ +static int +find_opts (argv_copy& a, struct opt* const opts) +{ + for (int i = 0; i < a.argc_; ++i) + { + char *ptr; + + /* + We're interested only in long options, ensure that the arg is of + sufficient length. + */ + if (strlen(a.argv_[i]) > 2) + { + ptr= a.argv_[i] + 2; + } + else + { + continue; + } + + struct opt* opt = opts; + for (; 0 != opt->name; ++opt) + { + if (!strstr(ptr, opt->name)) continue; // try next option + + /* 1. try to find value after the '=' */ + opt->value = strchr(ptr, '=') + 1; + + /* 2. if no '=', try next element in the argument vector */ + if (reinterpret_cast<void*>(1) == opt->value) + { + /* also check that the next element is not an option itself */ + if (i + 1 < a.argc_ && *(a.argv_[i + 1]) != '-') + { + ++i; + opt->value = a.argv_[i]; + } + else opt->value = ""; // no value supplied (like boolean opt) + } + + break; // option found, break inner loop + } + } + + return 0; +} + +/* Parses string for an integer. Returns 0 on success. */ +int get_long_long (const struct opt& opt, long long* const val, int const base) +{ + const char* const str = opt.value; + + if ('\0' != *str) + { + char* endptr; + + *val = strtoll (str, &endptr, base); + + if ('k' == *endptr || 'K' == *endptr) + { + *val *= 1024L; + endptr++; + } + else if ('m' == *endptr || 'M' == *endptr) + { + *val *= 1024L * 1024L; + endptr++; + } + else if ('g' == *endptr || 'G' == *endptr) + { + *val *= 1024L * 1024L * 1024L; + endptr++; + } + + if ('\0' == *endptr) return 0; // the whole string was a valid integer + } + + WSREP_ERROR ("Bad value for *%s: '%s'. Should be integer.", + opt.name, opt.value); + + return EINVAL; +} + +/* This is flimzy coz hell knows how mysql interprets boolean strings... + * and, no, I'm not going to become versed in how mysql handles options - + * I'd rather sing. + + Aha, http://dev.mysql.com/doc/refman/5.1/en/dynamic-system-variables.html: + Variables that have a type of “boolean” can be set to 0, 1, ON or OFF. (If you + set them on the command line or in an option file, use the numeric values.) + + So it is '0' for FALSE, '1' or empty string for TRUE + + */ +int get_bool (const struct opt& opt, bool* const val) +{ + const char* str = opt.value; + + while (isspace(*str)) ++str; // skip initial whitespaces + + ssize_t str_len = strlen(str); + switch (str_len) + { + case 0: + *val = true; + return 0; + case 1: + if ('0' == *str || '1' == *str) + { + *val = ('1' == *str); + return 0; + } + } + + WSREP_ERROR ("Bad value for *%s: '%s'. Should be '0', '1' or empty string.", + opt.name, opt.value); + + return EINVAL; +} + +static int +check_opts (int const argc, const char* const argv[], struct opt opts[]) +{ + /* First, make a copy of argv to be able to manipulate it */ + argv_copy a(argc, argv); + + if (!a.argv_) + { + WSREP_ERROR ("Could not copy argv vector: not enough memory."); + return ENOMEM; + } + + int err = normalize_opts (a); + if (err) + { + WSREP_ERROR ("Failed to normalize options."); + return err; + } + + err = find_opts (a, opts); + if (err) + { + WSREP_ERROR ("Failed to parse options."); + return err; + } + + /* At this point we have updated default values in our option list to + what has been specified on the command line / my.cnf */ + + long long slave_threads; + err = get_long_long (opts[WSREP_SLAVE_THREADS], &slave_threads, 10); + if (err) return err; + + int rcode = 0; + + if (slave_threads > 1) + /* Need to check AUTOINC_LOCK_MODE and LOCKS_UNSAFE_FOR_BINLOG */ + { + long long autoinc_lock_mode; + err = get_long_long (opts[AUTOINC_LOCK_MODE], &autoinc_lock_mode, 10); + if (err) return err; + + bool locks_unsafe_for_binlog; + err = get_bool (opts[LOCKS_UNSAFE_FOR_BINLOG],&locks_unsafe_for_binlog); + if (err) return err; + + if (autoinc_lock_mode != 2) + { + WSREP_ERROR ("Parallel applying (wsrep_slave_threads > 1) requires" + " innodb_autoinc_lock_mode = 2."); + rcode = EINVAL; + } + } + + bool locked_in_memory; + err = get_bool (opts[LOCKED_IN_MEMORY], &locked_in_memory); + if (err) { WSREP_ERROR("get_bool error: %s", strerror(err)); return err; } + if (locked_in_memory) + { + WSREP_ERROR ("Memory locking is not supported (locked_in_memory=%s)", + locked_in_memory ? "ON" : "OFF"); + rcode = EINVAL; + } + + if (!strcasecmp(opts[WSREP_SST_METHOD].value,"mysqldump")) + { + if (!strcasecmp(opts[BIND_ADDRESS].value, "127.0.0.1") || + !strcasecmp(opts[BIND_ADDRESS].value, "localhost")) + { + WSREP_ERROR ("wsrep_sst_method is set to 'mysqldump' yet " + "mysqld bind_address is set to '%s', which makes it " + "impossible to receive state transfer from another " + "node, since mysqld won't accept such connections. " + "If you wish to use mysqldump state transfer method, " + "set bind_address to allow mysql client connections " + "from other cluster members (e.g. 0.0.0.0).", + opts[BIND_ADDRESS].value); + rcode = EINVAL; + } + } + else + { + // non-mysqldump SST requires wsrep_cluster_address on startup + if (strlen(opts[WSREP_CLUSTER_ADDRESS].value) == 0) + { + WSREP_ERROR ("%s SST method requires wsrep_cluster_address to be " + "configured on startup.",opts[WSREP_SST_METHOD].value); + rcode = EINVAL; + } + } + + if (strcasecmp(opts[WSREP_SST_RECEIVE_ADDRESS].value, "AUTO")) + { + if (!strncasecmp(opts[WSREP_SST_RECEIVE_ADDRESS].value, + "127.0.0.1", strlen("127.0.0.1")) || + !strncasecmp(opts[WSREP_SST_RECEIVE_ADDRESS].value, + "localhost", strlen("localhost"))) + { + WSREP_WARN ("wsrep_sst_receive_address is set to '%s' which " + "makes it impossible for another host to reach this " + "one. Please set it to the address which this node " + "can be connected at by other cluster members.", + opts[WSREP_SST_RECEIVE_ADDRESS].value); +// rcode = EINVAL; + } + } + + if (strcasecmp(opts[WSREP_PROVIDER].value, "none")) + { + if (strcasecmp(opts[BINLOG_FORMAT].value, "ROW")) + { + WSREP_ERROR ("Only binlog_format = 'ROW' is currently supported. " + "Configured value: '%s'. Please adjust your " + "configuration.", opts[BINLOG_FORMAT].value); + + rcode = EINVAL; + } + } + + return rcode; +} + +int +wsrep_check_opts (int const argc, char* const* const argv) +{ + return check_opts (argc, argv, opts); +} + diff --git a/sql/wsrep_hton.cc b/sql/wsrep_hton.cc new file mode 100644 index 00000000000..78d189fbd61 --- /dev/null +++ b/sql/wsrep_hton.cc @@ -0,0 +1,615 @@ +/* Copyright 2008-2015 Codership Oy <http://www.codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include <mysqld.h> +#include "sql_base.h" +#include "rpl_filter.h" +#include <sql_class.h> +#include "wsrep_mysqld.h" +#include "wsrep_binlog.h" +#include "wsrep_xid.h" +#include <cstdio> +#include <cstdlib> +#include "debug_sync.h" + +extern ulonglong thd_to_trx_id(THD *thd); + +extern "C" int thd_binlog_format(const MYSQL_THD thd); +// todo: share interface with ha_innodb.c + +enum wsrep_trx_status wsrep_run_wsrep_commit(THD *thd, handlerton *hton, + bool all); + +/* + Cleanup after local transaction commit/rollback, replay or TOI. +*/ +void wsrep_cleanup_transaction(THD *thd) +{ + if (!WSREP(thd)) return; + + if (wsrep_emulate_bin_log) thd_binlog_trx_reset(thd); + thd->wsrep_ws_handle.trx_id= WSREP_UNDEFINED_TRX_ID; + thd->wsrep_trx_meta.gtid= WSREP_GTID_UNDEFINED; + thd->wsrep_trx_meta.depends_on= WSREP_SEQNO_UNDEFINED; + thd->wsrep_exec_mode= LOCAL_STATE; + thd->wsrep_affected_rows= 0; + return; +} + +/* + wsrep hton +*/ +handlerton *wsrep_hton; + + +/* + Registers wsrep hton at commit time if transaction has registered htons + for supported engine types. + + Hton should not be registered for TOTAL_ORDER operations. + + Registration is needed for both LOCAL_MODE and REPL_RECV transactions to run + commit in 2pc so that wsrep position gets properly recorded in storage + engines. + + Note that all hton calls should immediately return for threads that are + in REPL_RECV mode as their states are controlled by wsrep appliers or + replaying code. Only threads in LOCAL_MODE should run wsrep callbacks + from hton methods. +*/ +void wsrep_register_hton(THD* thd, bool all) +{ + if (thd->wsrep_exec_mode != TOTAL_ORDER && !thd->wsrep_apply_toi) + { + if (thd->wsrep_exec_mode == LOCAL_STATE && + (thd_sql_command(thd) == SQLCOM_OPTIMIZE || + thd_sql_command(thd) == SQLCOM_ANALYZE || + thd_sql_command(thd) == SQLCOM_REPAIR) && + thd->lex->no_write_to_binlog == 1) + { + WSREP_DEBUG("Skipping wsrep_register_hton for LOCAL sql admin command : %s", + thd->query()); + return; + } + + THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt; + for (Ha_trx_info *i= trans->ha_list; WSREP(thd) && i; i = i->next()) + { + if ((i->ht()->db_type == DB_TYPE_INNODB) || + (i->ht()->db_type == DB_TYPE_TOKUDB)) + { + trans_register_ha(thd, all, wsrep_hton); + + /* follow innodb read/write settting + * but, as an exception: CTAS with empty result set will not be + * replicated unless we declare wsrep hton as read/write here + */ + if (i->is_trx_read_write() || + (thd->lex->sql_command == SQLCOM_CREATE_TABLE && + thd->wsrep_exec_mode == LOCAL_STATE)) + { + thd->ha_data[wsrep_hton->slot].ha_info[all].set_trx_read_write(); + } + break; + } + } + } +} + +/* + Calls wsrep->post_commit() for locally executed transactions that have + got seqno from provider (must commit) and don't require replaying. + */ +void wsrep_post_commit(THD* thd, bool all) +{ + if (!WSREP(thd)) return; + + switch (thd->wsrep_exec_mode) + { + case LOCAL_COMMIT: + { + DBUG_ASSERT(thd->wsrep_trx_meta.gtid.seqno != WSREP_SEQNO_UNDEFINED); + if (wsrep->post_commit(wsrep, &thd->wsrep_ws_handle)) + { + DBUG_PRINT("wsrep", ("set committed fail")); + WSREP_WARN("set committed fail: %llu %d", + (long long)thd->real_id, thd->get_stmt_da()->status()); + } + wsrep_cleanup_transaction(thd); + break; + } + case LOCAL_STATE: + { + /* + Non-InnoDB statements may have populated events in stmt cache => cleanup + */ + WSREP_DEBUG("cleanup transaction for LOCAL_STATE: %s", thd->query()); + wsrep_cleanup_transaction(thd); + break; + } + default: break; + } + +} + +/* + wsrep exploits binlog's caches even if binlogging itself is not + activated. In such case connection close needs calling + actual binlog's method. + Todo: split binlog hton from its caches to use ones by wsrep + without referring to binlog's stuff. +*/ +static int +wsrep_close_connection(handlerton* hton, THD* thd) +{ + DBUG_ENTER("wsrep_close_connection"); + + if (thd->wsrep_exec_mode == REPL_RECV) + { + DBUG_RETURN(0); + } + DBUG_RETURN(wsrep_binlog_close_connection (thd)); +} + +/* + prepare/wsrep_run_wsrep_commit can fail in two ways + - certification test or an equivalent. As a result, + the current transaction just rolls back + Error codes: + WSREP_TRX_CERT_FAIL, WSREP_TRX_SIZE_EXCEEDED, WSREP_TRX_ERROR + - a post-certification failure makes this server unable to + commit its own WS and therefore the server must abort +*/ +static int wsrep_prepare(handlerton *hton, THD *thd, bool all) +{ + DBUG_ENTER("wsrep_prepare"); + + if (thd->wsrep_exec_mode == REPL_RECV) + { + DBUG_RETURN(0); + } + + DBUG_ASSERT(thd->ha_data[wsrep_hton->slot].ha_info[all].is_trx_read_write()); + DBUG_ASSERT(thd->wsrep_exec_mode == LOCAL_STATE); + DBUG_ASSERT(thd->wsrep_trx_meta.gtid.seqno == WSREP_SEQNO_UNDEFINED); + + if ((all || + !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) && + (thd->variables.wsrep_on && !wsrep_trans_cache_is_empty(thd))) + { + DBUG_RETURN (wsrep_run_wsrep_commit(thd, hton, all)); + } + DBUG_RETURN(0); +} + +static int wsrep_savepoint_set(handlerton *hton, THD *thd, void *sv) +{ + DBUG_ENTER("wsrep_savepoint_set"); + + if (thd->wsrep_exec_mode == REPL_RECV) + { + DBUG_RETURN(0); + } + + if (!wsrep_emulate_bin_log) DBUG_RETURN(0); + int rcode = wsrep_binlog_savepoint_set(thd, sv); + DBUG_RETURN(rcode); +} + +static int wsrep_savepoint_rollback(handlerton *hton, THD *thd, void *sv) +{ + DBUG_ENTER("wsrep_savepoint_rollback"); + + if (thd->wsrep_exec_mode == REPL_RECV) + { + DBUG_RETURN(0); + } + + if (!wsrep_emulate_bin_log) DBUG_RETURN(0); + int rcode = wsrep_binlog_savepoint_rollback(thd, sv); + DBUG_RETURN(rcode); +} + +static int wsrep_rollback(handlerton *hton, THD *thd, bool all) +{ + DBUG_ENTER("wsrep_rollback"); + + if (thd->wsrep_exec_mode == REPL_RECV) + { + DBUG_RETURN(0); + } + + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + switch (thd->wsrep_exec_mode) + { + case TOTAL_ORDER: + case REPL_RECV: + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + WSREP_DEBUG("Avoiding wsrep rollback for failed DDL: %s", thd->query()); + DBUG_RETURN(0); + default: break; + } + + if ((all || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) && + thd->wsrep_conflict_state != MUST_REPLAY) + { + if (WSREP(thd) && wsrep->post_rollback(wsrep, &thd->wsrep_ws_handle)) + { + DBUG_PRINT("wsrep", ("setting rollback fail")); + WSREP_ERROR("settting rollback fail: thd: %llu, schema: %s, SQL: %s", + (long long)thd->real_id, (thd->db ? thd->db : "(null)"), + thd->query()); + } + wsrep_cleanup_transaction(thd); + } + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + DBUG_RETURN(0); +} + +int wsrep_commit(handlerton *hton, THD *thd, bool all) +{ + DBUG_ENTER("wsrep_commit"); + + if (thd->wsrep_exec_mode == REPL_RECV) + { + DBUG_RETURN(0); + } + + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + if ((all || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) && + (thd->variables.wsrep_on && thd->wsrep_conflict_state != MUST_REPLAY)) + { + if (thd->wsrep_exec_mode == LOCAL_COMMIT) + { + DBUG_ASSERT(thd->ha_data[wsrep_hton->slot].ha_info[all].is_trx_read_write()); + /* + Call to wsrep->post_commit() (moved to wsrep_post_commit()) must + be done only after commit has done for all involved htons. + */ + DBUG_PRINT("wsrep", ("commit")); + } + else + { + /* + Transaction didn't go through wsrep->pre_commit() so just roll back + possible changes to clean state. + */ + if (WSREP(thd) && wsrep->post_rollback(wsrep, &thd->wsrep_ws_handle)) + { + DBUG_PRINT("wsrep", ("setting rollback fail")); + WSREP_ERROR("settting rollback fail: thd: %llu, schema: %s, SQL: %s", + (long long)thd->real_id, (thd->db ? thd->db : "(null)"), + thd->query()); + } + wsrep_cleanup_transaction(thd); + } + } + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + DBUG_RETURN(0); +} + + +extern Rpl_filter* binlog_filter; +extern my_bool opt_log_slave_updates; + +enum wsrep_trx_status +wsrep_run_wsrep_commit(THD *thd, handlerton *hton, bool all) +{ + int rcode= -1; + size_t data_len= 0; + IO_CACHE *cache; + int replay_round= 0; + + if (thd->get_stmt_da()->is_error()) { + WSREP_DEBUG("commit issue, error: %d %s", + thd->get_stmt_da()->sql_errno(), thd->get_stmt_da()->message()); + } + + DBUG_ENTER("wsrep_run_wsrep_commit"); + + DEBUG_SYNC(thd, "wsrep_before_replication"); + + if (thd->slave_thread && !opt_log_slave_updates) DBUG_RETURN(WSREP_TRX_OK); + + if (thd->wsrep_exec_mode == REPL_RECV) { + + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + if (thd->wsrep_conflict_state == MUST_ABORT) { + if (wsrep_debug) + WSREP_INFO("WSREP: must abort for BF"); + DBUG_PRINT("wsrep", ("BF apply commit fail")); + thd->wsrep_conflict_state = NO_CONFLICT; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + // + // TODO: test all calls of the rollback. + // rollback must happen automagically innobase_rollback(hton, thd, 1); + // + DBUG_RETURN(WSREP_TRX_ERROR); + } + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } + + if (thd->wsrep_exec_mode != LOCAL_STATE) DBUG_RETURN(WSREP_TRX_OK); + + if (thd->wsrep_consistency_check == CONSISTENCY_CHECK_RUNNING) { + WSREP_DEBUG("commit for consistency check: %s", thd->query()); + DBUG_RETURN(WSREP_TRX_OK); + } + + DBUG_PRINT("wsrep", ("replicating commit")); + + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + if (thd->wsrep_conflict_state == MUST_ABORT) { + DBUG_PRINT("wsrep", ("replicate commit fail")); + thd->wsrep_conflict_state = ABORTED; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + if (wsrep_debug) { + WSREP_INFO("innobase_commit, abort %s", + (thd->query()) ? thd->query() : "void"); + } + DBUG_RETURN(WSREP_TRX_CERT_FAIL); + } + + mysql_mutex_lock(&LOCK_wsrep_replaying); + + while (wsrep_replaying > 0 && + thd->wsrep_conflict_state == NO_CONFLICT && + thd->killed == NOT_KILLED && + !shutdown_in_progress) + { + + mysql_mutex_unlock(&LOCK_wsrep_replaying); + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + + mysql_mutex_lock(&thd->mysys_var->mutex); + thd_proc_info(thd, "wsrep waiting on replaying"); + thd->mysys_var->current_mutex= &LOCK_wsrep_replaying; + thd->mysys_var->current_cond= &COND_wsrep_replaying; + mysql_mutex_unlock(&thd->mysys_var->mutex); + + mysql_mutex_lock(&LOCK_wsrep_replaying); + // Using timedwait is a hack to avoid deadlock in case if BF victim + // misses the signal. + struct timespec wtime = {0, 1000000}; + mysql_cond_timedwait(&COND_wsrep_replaying, &LOCK_wsrep_replaying, + &wtime); + + if (replay_round++ % 100000 == 0) + WSREP_DEBUG("commit waiting for replaying: replayers %d, thd: (%lu) " + "conflict: %d (round: %d)", + wsrep_replaying, thd->thread_id, + thd->wsrep_conflict_state, replay_round); + + mysql_mutex_unlock(&LOCK_wsrep_replaying); + + mysql_mutex_lock(&thd->mysys_var->mutex); + thd->mysys_var->current_mutex= 0; + thd->mysys_var->current_cond= 0; + mysql_mutex_unlock(&thd->mysys_var->mutex); + + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + mysql_mutex_lock(&LOCK_wsrep_replaying); + } + mysql_mutex_unlock(&LOCK_wsrep_replaying); + + if (thd->wsrep_conflict_state == MUST_ABORT) { + DBUG_PRINT("wsrep", ("replicate commit fail")); + thd->wsrep_conflict_state = ABORTED; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + WSREP_DEBUG("innobase_commit abort after replaying wait %s", + (thd->query()) ? thd->query() : "void"); + DBUG_RETURN(WSREP_TRX_CERT_FAIL); + } + + thd->wsrep_query_state = QUERY_COMMITTING; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + + cache = get_trans_log(thd); + rcode = 0; + if (cache) { + thd->binlog_flush_pending_rows_event(true); + rcode = wsrep_write_cache(wsrep, thd, cache, &data_len); + if (WSREP_OK != rcode) { + WSREP_ERROR("rbr write fail, data_len: %zu, %d", data_len, rcode); + DBUG_RETURN(WSREP_TRX_SIZE_EXCEEDED); + } + } + + if (data_len == 0) + { + if (thd->get_stmt_da()->is_ok() && + thd->get_stmt_da()->affected_rows() > 0 && + !binlog_filter->is_on()) + { + WSREP_DEBUG("empty rbr buffer, query: %s, " + "affected rows: %llu, " + "changed tables: %d, " + "sql_log_bin: %d, " + "wsrep status (%d %d %d)", + thd->query(), thd->get_stmt_da()->affected_rows(), + stmt_has_updated_trans_table(thd), thd->variables.sql_log_bin, + thd->wsrep_exec_mode, thd->wsrep_query_state, + thd->wsrep_conflict_state); + } + else + { + WSREP_DEBUG("empty rbr buffer, query: %s", thd->query()); + } + thd->wsrep_query_state= QUERY_EXEC; + DBUG_RETURN(WSREP_TRX_OK); + } + + if (WSREP_UNDEFINED_TRX_ID == thd->wsrep_ws_handle.trx_id) + { + WSREP_WARN("SQL statement was ineffective, THD: %lu, buf: %zu\n" + "schema: %s \n" + "QUERY: %s\n" + " => Skipping replication", + thd->thread_id, data_len, + (thd->db ? thd->db : "(null)"), thd->query()); + rcode = WSREP_TRX_FAIL; + } + else if (!rcode) + { + if (WSREP_OK == rcode) + rcode = wsrep->pre_commit(wsrep, + (wsrep_conn_id_t)thd->thread_id, + &thd->wsrep_ws_handle, + WSREP_FLAG_COMMIT | + ((thd->wsrep_PA_safe) ? + 0ULL : WSREP_FLAG_PA_UNSAFE), + &thd->wsrep_trx_meta); + + if (rcode == WSREP_TRX_MISSING) { + WSREP_WARN("Transaction missing in provider, thd: %ld, schema: %s, SQL: %s", + thd->thread_id, (thd->db ? thd->db : "(null)"), thd->query()); + rcode = WSREP_TRX_FAIL; + } else if (rcode == WSREP_BF_ABORT) { + WSREP_DEBUG("thd %lu seqno %lld BF aborted by provider, will replay", + thd->thread_id, (long long)thd->wsrep_trx_meta.gtid.seqno); + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + thd->wsrep_conflict_state = MUST_REPLAY; + DBUG_ASSERT(wsrep_thd_trx_seqno(thd) > 0); + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + mysql_mutex_lock(&LOCK_wsrep_replaying); + wsrep_replaying++; + WSREP_DEBUG("replaying increased: %d, thd: %lu", + wsrep_replaying, thd->thread_id); + mysql_mutex_unlock(&LOCK_wsrep_replaying); + } + } else { + WSREP_ERROR("I/O error reading from thd's binlog iocache: " + "errno=%d, io cache code=%d", my_errno, cache->error); + DBUG_ASSERT(0); // failure like this can not normally happen + DBUG_RETURN(WSREP_TRX_ERROR); + } + + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + switch(rcode) { + case 0: + /* + About MUST_ABORT: We assume that even if thd conflict state was set + to MUST_ABORT, underlying transaction was not rolled back or marked + as deadlock victim in QUERY_COMMITTING state. Conflict state is + set to NO_CONFLICT and commit proceeds as usual. + */ + if (thd->wsrep_conflict_state == MUST_ABORT) + thd->wsrep_conflict_state= NO_CONFLICT; + + if (thd->wsrep_conflict_state != NO_CONFLICT) + { + WSREP_WARN("thd %lu seqno %lld: conflict state %d after post commit", + thd->thread_id, + (long long)thd->wsrep_trx_meta.gtid.seqno, + thd->wsrep_conflict_state); + } + thd->wsrep_exec_mode= LOCAL_COMMIT; + DBUG_ASSERT(thd->wsrep_trx_meta.gtid.seqno != WSREP_SEQNO_UNDEFINED); + /* Override XID iff it was generated by mysql */ + if (thd->transaction.xid_state.xid.get_my_xid()) + { + wsrep_xid_init(&thd->transaction.xid_state.xid, + thd->wsrep_trx_meta.gtid.uuid, + thd->wsrep_trx_meta.gtid.seqno); + } + DBUG_PRINT("wsrep", ("replicating commit success")); + break; + case WSREP_BF_ABORT: + DBUG_ASSERT(thd->wsrep_trx_meta.gtid.seqno != WSREP_SEQNO_UNDEFINED); + case WSREP_TRX_FAIL: + WSREP_DEBUG("commit failed for reason: %d %lu %s", rcode, thd->thread_id, + thd->query()); + DBUG_PRINT("wsrep", ("replicating commit fail")); + + thd->wsrep_query_state= QUERY_EXEC; + + if (thd->wsrep_conflict_state == MUST_ABORT) { + thd->wsrep_conflict_state= ABORTED; + } + else + { + WSREP_DEBUG("conflict state: %d", thd->wsrep_conflict_state); + if (thd->wsrep_conflict_state == NO_CONFLICT) + { + thd->wsrep_conflict_state = CERT_FAILURE; + WSREP_LOG_CONFLICT(NULL, thd, FALSE); + } + } + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + + DBUG_RETURN(WSREP_TRX_CERT_FAIL); + + case WSREP_SIZE_EXCEEDED: + WSREP_ERROR("transaction size exceeded"); + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + DBUG_RETURN(WSREP_TRX_SIZE_EXCEEDED); + case WSREP_CONN_FAIL: + WSREP_ERROR("connection failure"); + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + DBUG_RETURN(WSREP_TRX_ERROR); + default: + WSREP_ERROR("unknown connection failure"); + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + DBUG_RETURN(WSREP_TRX_ERROR); + } + + thd->wsrep_query_state= QUERY_EXEC; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + + DBUG_RETURN(WSREP_TRX_OK); +} + + +static int wsrep_hton_init(void *p) +{ + wsrep_hton= (handlerton *)p; + //wsrep_hton->state=opt_bin_log ? SHOW_OPTION_YES : SHOW_OPTION_NO; + wsrep_hton->state= SHOW_OPTION_YES; + wsrep_hton->db_type=DB_TYPE_WSREP; + wsrep_hton->savepoint_offset= sizeof(my_off_t); + wsrep_hton->close_connection= wsrep_close_connection; + wsrep_hton->savepoint_set= wsrep_savepoint_set; + wsrep_hton->savepoint_rollback= wsrep_savepoint_rollback; + wsrep_hton->commit= wsrep_commit; + wsrep_hton->rollback= wsrep_rollback; + wsrep_hton->prepare= wsrep_prepare; + wsrep_hton->flags= HTON_NOT_USER_SELECTABLE | HTON_HIDDEN; // todo: fix flags + wsrep_hton->slot= 0; + return 0; +} + + +struct st_mysql_storage_engine wsrep_storage_engine= +{ MYSQL_HANDLERTON_INTERFACE_VERSION }; + + +mysql_declare_plugin(wsrep) +{ + MYSQL_STORAGE_ENGINE_PLUGIN, + &wsrep_storage_engine, + "wsrep", + "Codership Oy", + "A pseudo storage engine to represent transactions in multi-master " + "synchornous replication", + PLUGIN_LICENSE_GPL, + wsrep_hton_init, /* Plugin Init */ + NULL, /* Plugin Deinit */ + 0x0100 /* 1.0 */, + NULL, /* status variables */ + NULL, /* system variables */ + NULL, /* config options */ + 0, /* flags */ +} +mysql_declare_plugin_end; diff --git a/sql/wsrep_mysqld.cc b/sql/wsrep_mysqld.cc new file mode 100644 index 00000000000..353911dcfde --- /dev/null +++ b/sql/wsrep_mysqld.cc @@ -0,0 +1,1601 @@ +/* Copyright 2008-2015 Codership Oy <http://www.codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include <mysqld.h> +#include <sql_class.h> +#include <sql_parse.h> +#include "wsrep_priv.h" +#include "wsrep_thd.h" +#include "wsrep_sst.h" +#include "wsrep_utils.h" +#include "wsrep_var.h" +#include "wsrep_binlog.h" +#include "wsrep_applier.h" +#include "wsrep_xid.h" +#include <cstdio> +#include <cstdlib> +#include "log_event.h" +#include <slave.h> + +wsrep_t *wsrep = NULL; +my_bool wsrep_emulate_bin_log = FALSE; // activating parts of binlog interface +#ifdef GTID_SUPPORT +/* Sidno in global_sid_map corresponding to group uuid */ +rpl_sidno wsrep_sidno= -1; +#endif /* GTID_SUPPORT */ +my_bool wsrep_preordered_opt= FALSE; + +/* + * Begin configuration options and their default values + */ + +const char* wsrep_data_home_dir = NULL; +const char* wsrep_dbug_option = ""; + +long wsrep_slave_threads = 1; // # of slave action appliers wanted +int wsrep_slave_count_change = 0; // # of appliers to stop or start +my_bool wsrep_debug = 0; // enable debug level logging +my_bool wsrep_convert_LOCK_to_trx = 1; // convert locking sessions to trx +ulong wsrep_retry_autocommit = 5; // retry aborted autocommit trx +my_bool wsrep_auto_increment_control = 1; // control auto increment variables +my_bool wsrep_drupal_282555_workaround = 1; // retry autoinc insert after dupkey +my_bool wsrep_incremental_data_collection = 0; // incremental data collection +ulong wsrep_max_ws_size = 1073741824UL;//max ws (RBR buffer) size +ulong wsrep_max_ws_rows = 65536; // max number of rows in ws +int wsrep_to_isolation = 0; // # of active TO isolation threads +my_bool wsrep_certify_nonPK = 1; // certify, even when no primary key +long wsrep_max_protocol_version = 3; // maximum protocol version to use +ulong wsrep_forced_binlog_format = BINLOG_FORMAT_UNSPEC; +my_bool wsrep_recovery = 0; // recovery +my_bool wsrep_replicate_myisam = 0; // enable myisam replication +my_bool wsrep_log_conflicts = 0; +ulong wsrep_mysql_replication_bundle = 0; +my_bool wsrep_desync = 0; // desynchronize the node from the + // cluster +my_bool wsrep_load_data_splitting = 1; // commit load data every 10K intervals +my_bool wsrep_restart_slave = 0; // should mysql slave thread be + // restarted, if node joins back +my_bool wsrep_restart_slave_activated = 0; // node has dropped, and slave + // restart will be needed +my_bool wsrep_slave_UK_checks = 0; // slave thread does UK checks +my_bool wsrep_slave_FK_checks = 0; // slave thread does FK checks +// Allow reads even if the node is not in the primary component. +bool wsrep_dirty_reads = false; + +/* + Set during the creation of first wsrep applier and rollback threads. + Since these threads are critical, abort if the thread creation fails. +*/ +my_bool wsrep_creating_startup_threads = 0; + +/* + * End configuration options + */ + +/* + * Other wsrep global variables. + */ +my_bool wsrep_inited = 0; // initialized ? + +static wsrep_uuid_t cluster_uuid = WSREP_UUID_UNDEFINED; +static char cluster_uuid_str[40]= { 0, }; +static const char* cluster_status_str[WSREP_VIEW_MAX] = +{ + "Primary", + "non-Primary", + "Disconnected" +}; + +static char provider_name[256]= { 0, }; +static char provider_version[256]= { 0, }; +static char provider_vendor[256]= { 0, }; + +/* + * wsrep status variables + */ +my_bool wsrep_connected = FALSE; +my_bool wsrep_ready = FALSE; // node can accept queries +const char* wsrep_cluster_state_uuid = cluster_uuid_str; +long long wsrep_cluster_conf_id = WSREP_SEQNO_UNDEFINED; +const char* wsrep_cluster_status = cluster_status_str[WSREP_VIEW_DISCONNECTED]; +long wsrep_cluster_size = 0; +long wsrep_local_index = -1; +long long wsrep_local_bf_aborts = 0; +const char* wsrep_provider_name = provider_name; +const char* wsrep_provider_version = provider_version; +const char* wsrep_provider_vendor = provider_vendor; +/* End wsrep status variables */ + +wsrep_uuid_t local_uuid = WSREP_UUID_UNDEFINED; +wsrep_seqno_t local_seqno = WSREP_SEQNO_UNDEFINED; +wsp::node_status local_status; +long wsrep_protocol_version = 3; + +// Boolean denoting if server is in initial startup phase. This is needed +// to make sure that main thread waiting in wsrep_sst_wait() is signaled +// if there was no state gap on receiving first view event. +static my_bool wsrep_startup = TRUE; + + +static void wsrep_log_cb(wsrep_log_level_t level, const char *msg) { + switch (level) { + case WSREP_LOG_INFO: + sql_print_information("WSREP: %s", msg); + break; + case WSREP_LOG_WARN: + sql_print_warning("WSREP: %s", msg); + break; + case WSREP_LOG_ERROR: + case WSREP_LOG_FATAL: + sql_print_error("WSREP: %s", msg); + break; + case WSREP_LOG_DEBUG: + if (wsrep_debug) sql_print_information ("[Debug] WSREP: %s", msg); + default: + break; + } +} + +static void wsrep_log_states (wsrep_log_level_t const level, + const wsrep_uuid_t* const group_uuid, + wsrep_seqno_t const group_seqno, + const wsrep_uuid_t* const node_uuid, + wsrep_seqno_t const node_seqno) +{ + char uuid_str[37]; + char msg[256]; + + wsrep_uuid_print (group_uuid, uuid_str, sizeof(uuid_str)); + snprintf (msg, 255, "WSREP: Group state: %s:%lld", + uuid_str, (long long)group_seqno); + wsrep_log_cb (level, msg); + + wsrep_uuid_print (node_uuid, uuid_str, sizeof(uuid_str)); + snprintf (msg, 255, "WSREP: Local state: %s:%lld", + uuid_str, (long long)node_seqno); + wsrep_log_cb (level, msg); +} + +#ifdef GTID_SUPPORT +void wsrep_init_sidno(const wsrep_uuid_t& wsrep_uuid) +{ + /* generate new Sid map entry from inverted uuid */ + rpl_sid sid; + wsrep_uuid_t ltid_uuid; + + for (size_t i= 0; i < sizeof(ltid_uuid.data); ++i) + { + ltid_uuid.data[i] = ~wsrep_uuid.data[i]; + } + + sid.copy_from(ltid_uuid.data); + global_sid_lock->wrlock(); + wsrep_sidno= global_sid_map->add_sid(sid); + WSREP_INFO("Initialized wsrep sidno %d", wsrep_sidno); + global_sid_lock->unlock(); +} +#endif /* GTID_SUPPORT */ + +static wsrep_cb_status_t +wsrep_view_handler_cb (void* app_ctx, + void* recv_ctx, + const wsrep_view_info_t* view, + const char* state, + size_t state_len, + void** sst_req, + size_t* sst_req_len) +{ + *sst_req = NULL; + *sst_req_len = 0; + + wsrep_member_status_t new_status= local_status.get(); + + if (memcmp(&cluster_uuid, &view->state_id.uuid, sizeof(wsrep_uuid_t))) + { + memcpy((wsrep_uuid_t*)&cluster_uuid, &view->state_id.uuid, + sizeof(cluster_uuid)); + + wsrep_uuid_print (&cluster_uuid, cluster_uuid_str, + sizeof(cluster_uuid_str)); + } + + wsrep_cluster_conf_id= view->view; + wsrep_cluster_status= cluster_status_str[view->status]; + wsrep_cluster_size= view->memb_num; + wsrep_local_index= view->my_idx; + + WSREP_INFO("New cluster view: global state: %s:%lld, view# %lld: %s, " + "number of nodes: %ld, my index: %ld, protocol version %d", + wsrep_cluster_state_uuid, (long long)view->state_id.seqno, + (long long)wsrep_cluster_conf_id, wsrep_cluster_status, + wsrep_cluster_size, wsrep_local_index, view->proto_ver); + + /* Proceed further only if view is PRIMARY */ + if (WSREP_VIEW_PRIMARY != view->status) + { +#ifdef HAVE_QUERY_CACHE + // query cache must be initialised by now + query_cache.flush(); +#endif /* HAVE_QUERY_CACHE */ + + wsrep_ready_set(FALSE); + new_status= WSREP_MEMBER_UNDEFINED; + /* Always record local_uuid and local_seqno in non-prim since this + * may lead to re-initializing provider and start position is + * determined according to these variables */ + // WRONG! local_uuid should be the last primary configuration uuid we were + // a member of. local_seqno should be updated in commit calls. + // local_uuid= cluster_uuid; + // local_seqno= view->first - 1; + goto out; + } + + switch (view->proto_ver) + { + case 0: + case 1: + case 2: + case 3: + // version change + if (view->proto_ver != wsrep_protocol_version) + { + my_bool wsrep_ready_saved= wsrep_ready; + wsrep_ready_set(FALSE); + WSREP_INFO("closing client connections for " + "protocol change %ld -> %d", + wsrep_protocol_version, view->proto_ver); + wsrep_close_client_connections(TRUE); + wsrep_protocol_version= view->proto_ver; + wsrep_ready_set(wsrep_ready_saved); + } + break; + default: + WSREP_ERROR("Unsupported application protocol version: %d", + view->proto_ver); + unireg_abort(1); + } + + if (view->state_gap) + { + WSREP_WARN("Gap in state sequence. Need state transfer."); + + /* After that wsrep will call wsrep_sst_prepare. */ + /* keep ready flag 0 until we receive the snapshot */ + wsrep_ready_set(FALSE); + + /* Close client connections to ensure that they don't interfere + * with SST. Necessary only if storage engines are initialized + * before SST. + * TODO: Just killing all ongoing transactions should be enough + * since wsrep_ready is OFF and no new transactions can start. + */ + if (!wsrep_before_SE()) + { + WSREP_DEBUG("[debug]: closing client connections for PRIM"); + wsrep_close_client_connections(TRUE); + } + + ssize_t const req_len= wsrep_sst_prepare (sst_req); + + if (req_len < 0) + { + WSREP_ERROR("SST preparation failed: %zd (%s)", -req_len, + strerror(-req_len)); + new_status= WSREP_MEMBER_UNDEFINED; + } + else + { + assert(sst_req != NULL); + *sst_req_len= req_len; + new_status= WSREP_MEMBER_JOINER; + } + } + else + { + /* + * NOTE: Initialize wsrep_group_uuid here only if it wasn't initialized + * before - OR - it was reinitilized on startup (lp:992840) + */ + if (wsrep_startup) + { + if (wsrep_before_SE()) + { + wsrep_SE_init_grab(); + // Signal mysqld init thread to continue + wsrep_sst_complete (&cluster_uuid, view->state_id.seqno, false); + // and wait for SE initialization + wsrep_SE_init_wait(); + } + else + { + local_uuid= cluster_uuid; + local_seqno= view->state_id.seqno; + } + /* Init storage engine XIDs from first view */ + wsrep_set_SE_checkpoint(local_uuid, local_seqno); +#ifdef GTID_SUPPORT + wsrep_init_sidno(local_uuid); +#endif /* GTID_SUPPORT */ + new_status= WSREP_MEMBER_JOINED; + } + + // just some sanity check + if (memcmp (&local_uuid, &cluster_uuid, sizeof (wsrep_uuid_t))) + { + WSREP_ERROR("Undetected state gap. Can't continue."); + wsrep_log_states(WSREP_LOG_FATAL, &cluster_uuid, view->state_id.seqno, + &local_uuid, -1); + unireg_abort(1); + } + } + + if (wsrep_auto_increment_control) + { + global_system_variables.auto_increment_offset= view->my_idx + 1; + global_system_variables.auto_increment_increment= view->memb_num; + } + + { /* capabilities may be updated on new configuration */ + uint64_t const caps(wsrep->capabilities (wsrep)); + + my_bool const idc((caps & WSREP_CAP_INCREMENTAL_WRITESET) != 0); + if (TRUE == wsrep_incremental_data_collection && FALSE == idc) + { + WSREP_WARN("Unsupported protocol downgrade: " + "incremental data collection disabled. Expect abort."); + } + wsrep_incremental_data_collection = idc; + } + +out: + if (view->status == WSREP_VIEW_PRIMARY) wsrep_startup= FALSE; + local_status.set(new_status, view); + + return WSREP_CB_SUCCESS; +} + +void wsrep_ready_set (my_bool x) +{ + WSREP_DEBUG("Setting wsrep_ready to %d", x); + if (mysql_mutex_lock (&LOCK_wsrep_ready)) abort(); + if (wsrep_ready != x) + { + wsrep_ready= x; + mysql_cond_signal (&COND_wsrep_ready); + } + mysql_mutex_unlock (&LOCK_wsrep_ready); +} + +// Wait until wsrep has reached ready state +void wsrep_ready_wait () +{ + if (mysql_mutex_lock (&LOCK_wsrep_ready)) abort(); + while (!wsrep_ready) + { + WSREP_INFO("Waiting to reach ready state"); + mysql_cond_wait (&COND_wsrep_ready, &LOCK_wsrep_ready); + } + WSREP_INFO("ready state reached"); + mysql_mutex_unlock (&LOCK_wsrep_ready); +} + +static void wsrep_synced_cb(void* app_ctx) +{ + WSREP_INFO("Synchronized with group, ready for connections"); + bool signal_main= false; + if (mysql_mutex_lock (&LOCK_wsrep_ready)) abort(); + if (!wsrep_ready) + { + wsrep_ready= TRUE; + mysql_cond_signal (&COND_wsrep_ready); + signal_main= true; + + } + local_status.set(WSREP_MEMBER_SYNCED); + mysql_mutex_unlock (&LOCK_wsrep_ready); + + if (signal_main) + { + wsrep_SE_init_grab(); + // Signal mysqld init thread to continue + wsrep_sst_complete (&local_uuid, local_seqno, false); + // and wait for SE initialization + wsrep_SE_init_wait(); + } + if (wsrep_restart_slave_activated) + { + int rcode; + WSREP_INFO("MySQL slave restart"); + wsrep_restart_slave_activated= FALSE; + + mysql_mutex_lock(&LOCK_active_mi); + if ((rcode = start_slave_threads(1 /* need mutex */, + 0 /* no wait for start*/, + active_mi, + master_info_file, + relay_log_info_file, + SLAVE_SQL))) + { + WSREP_WARN("Failed to create slave threads: %d", rcode); + } + mysql_mutex_unlock(&LOCK_active_mi); + + } +} + +static void wsrep_init_position() +{ + /* read XIDs from storage engines */ + wsrep_uuid_t uuid; + wsrep_seqno_t seqno; + wsrep_get_SE_checkpoint(uuid, seqno); + + if (!memcmp(&uuid, &WSREP_UUID_UNDEFINED, sizeof(wsrep_uuid_t))) + { + WSREP_INFO("Read nil XID from storage engines, skipping position init"); + return; + } + + char uuid_str[40] = {0, }; + wsrep_uuid_print(&uuid, uuid_str, sizeof(uuid_str)); + WSREP_INFO("Initial position: %s:%lld", uuid_str, (long long)seqno); + + if (!memcmp(&local_uuid, &WSREP_UUID_UNDEFINED, sizeof(local_uuid)) && + local_seqno == WSREP_SEQNO_UNDEFINED) + { + // Initial state + local_uuid= uuid; + local_seqno= seqno; + } + else if (memcmp(&local_uuid, &uuid, sizeof(local_uuid)) || + local_seqno != seqno) + { + WSREP_WARN("Initial position was provided by configuration or SST, " + "avoiding override"); + } +} + +extern char* my_bind_addr_str; + +int wsrep_init() +{ + int rcode= -1; + DBUG_ASSERT(wsrep_inited == 0); + + wsrep_ready_set(FALSE); + assert(wsrep_provider); + + wsrep_init_position(); + + if ((rcode= wsrep_load(wsrep_provider, &wsrep, wsrep_log_cb)) != WSREP_OK) + { + if (strcasecmp(wsrep_provider, WSREP_NONE)) + { + WSREP_ERROR("wsrep_load(%s) failed: %s (%d). Reverting to no provider.", + wsrep_provider, strerror(rcode), rcode); + strcpy((char*)wsrep_provider, WSREP_NONE); // damn it's a dirty hack + return wsrep_init(); + } + else /* this is for recursive call above */ + { + WSREP_ERROR("Could not revert to no provider: %s (%d). Need to abort.", + strerror(rcode), rcode); + unireg_abort(1); + } + } + + if (!WSREP_PROVIDER_EXISTS) + { + // enable normal operation in case no provider is specified + wsrep_ready_set(TRUE); + wsrep_inited= 1; + global_system_variables.wsrep_on = 0; + wsrep_init_args args; + args.logger_cb = wsrep_log_cb; + args.options = (wsrep_provider_options) ? + wsrep_provider_options : ""; + rcode = wsrep->init(wsrep, &args); + if (rcode) + { + DBUG_PRINT("wsrep",("wsrep::init() failed: %d", rcode)); + WSREP_ERROR("wsrep::init() failed: %d, must shutdown", rcode); + wsrep->free(wsrep); + free(wsrep); + wsrep = NULL; + } + return rcode; + } + else + { + global_system_variables.wsrep_on = 1; + strncpy(provider_name, + wsrep->provider_name, sizeof(provider_name) - 1); + strncpy(provider_version, + wsrep->provider_version, sizeof(provider_version) - 1); + strncpy(provider_vendor, + wsrep->provider_vendor, sizeof(provider_vendor) - 1); + } + + char node_addr[512]= { 0, }; + size_t const node_addr_max= sizeof(node_addr) - 1; + if (!wsrep_node_address || !strcmp(wsrep_node_address, "")) + { + size_t const ret= wsrep_guess_ip(node_addr, node_addr_max); + if (!(ret > 0 && ret < node_addr_max)) + { + WSREP_WARN("Failed to guess base node address. Set it explicitly via " + "wsrep_node_address."); + node_addr[0]= '\0'; + } + } + else + { + strncpy(node_addr, wsrep_node_address, node_addr_max); + } + + char inc_addr[512]= { 0, }; + size_t const inc_addr_max= sizeof (inc_addr); + if ((!wsrep_node_incoming_address || + !strcmp (wsrep_node_incoming_address, WSREP_NODE_INCOMING_AUTO))) + { + unsigned int my_bind_ip= INADDR_ANY; // default if not set + if (my_bind_addr_str && strlen(my_bind_addr_str)) + { + my_bind_ip= wsrep_check_ip(my_bind_addr_str); + } + + if (INADDR_ANY != my_bind_ip) + { + if (INADDR_NONE != my_bind_ip && INADDR_LOOPBACK != my_bind_ip) + { + snprintf(inc_addr, inc_addr_max, "%s:%u", + my_bind_addr_str, (int)mysqld_port); + } // else leave inc_addr an empty string - mysqld is not listening for + // client connections on network interfaces. + } + else // mysqld binds to 0.0.0.0, take IP from wsrep_node_address if possible + { + size_t const node_addr_len= strlen(node_addr); + if (node_addr_len > 0) + { + const char* const colon= strrchr(node_addr, ':'); + if (strchr(node_addr, ':') == colon) // 1 or 0 ':' + { + size_t const ip_len= colon ? colon - node_addr : node_addr_len; + if (ip_len + 7 /* :55555\0 */ < inc_addr_max) + { + memcpy (inc_addr, node_addr, ip_len); + snprintf(inc_addr + ip_len, inc_addr_max - ip_len, ":%u", + (int)mysqld_port); + } + else + { + WSREP_WARN("Guessing address for incoming client connections: " + "address too long."); + inc_addr[0]= '\0'; + } + } + else + { + WSREP_WARN("Guessing address for incoming client connections: " + "too many colons :) ."); + inc_addr[0]= '\0'; + } + } + + if (!strlen(inc_addr)) + { + WSREP_WARN("Guessing address for incoming client connections failed. " + "Try setting wsrep_node_incoming_address explicitly."); + } + } + } + else if (!strchr(wsrep_node_incoming_address, ':')) // no port included + { + if ((int)inc_addr_max <= + snprintf(inc_addr, inc_addr_max, "%s:%u", + wsrep_node_incoming_address,(int)mysqld_port)) + { + WSREP_WARN("Guessing address for incoming client connections: " + "address too long."); + inc_addr[0]= '\0'; + } + } + else + { + size_t const need = strlen (wsrep_node_incoming_address); + if (need >= inc_addr_max) { + WSREP_WARN("wsrep_node_incoming_address too long: %zu", need); + inc_addr[0]= '\0'; + } + else { + memcpy (inc_addr, wsrep_node_incoming_address, need); + } + } + + struct wsrep_init_args wsrep_args; + + struct wsrep_gtid const state_id = { local_uuid, local_seqno }; + + wsrep_args.data_dir = wsrep_data_home_dir; + wsrep_args.node_name = (wsrep_node_name) ? wsrep_node_name : ""; + wsrep_args.node_address = node_addr; + wsrep_args.node_incoming = inc_addr; + wsrep_args.options = (wsrep_provider_options) ? + wsrep_provider_options : ""; + wsrep_args.proto_ver = wsrep_max_protocol_version; + + wsrep_args.state_id = &state_id; + + wsrep_args.logger_cb = wsrep_log_cb; + wsrep_args.view_handler_cb = wsrep_view_handler_cb; + wsrep_args.apply_cb = wsrep_apply_cb; + wsrep_args.commit_cb = wsrep_commit_cb; + wsrep_args.unordered_cb = wsrep_unordered_cb; + wsrep_args.sst_donate_cb = wsrep_sst_donate_cb; + wsrep_args.synced_cb = wsrep_synced_cb; + + rcode = wsrep->init(wsrep, &wsrep_args); + + if (rcode) + { + DBUG_PRINT("wsrep",("wsrep::init() failed: %d", rcode)); + WSREP_ERROR("wsrep::init() failed: %d, must shutdown", rcode); + wsrep->free(wsrep); + free(wsrep); + wsrep = NULL; + } else { + wsrep_inited= 1; + } + + return rcode; +} + +extern int wsrep_on(void *); + +void wsrep_init_startup (bool first) +{ + if (wsrep_init()) unireg_abort(1); + + wsrep_thr_lock_init(wsrep_thd_is_BF, wsrep_abort_thd, + wsrep_debug, wsrep_convert_LOCK_to_trx, wsrep_on); + + /* Skip replication start if dummy wsrep provider is loaded */ + if (!strcmp(wsrep_provider, WSREP_NONE)) return; + + /* Skip replication start if no cluster address */ + if (!wsrep_cluster_address || strlen(wsrep_cluster_address) == 0) return; + + if (first) wsrep_sst_grab(); // do it so we can wait for SST below + + if (!wsrep_start_replication()) unireg_abort(1); + + wsrep_creating_startup_threads= 1; + wsrep_create_rollbacker(); + wsrep_create_appliers(1); + + if (first && !wsrep_sst_wait()) unireg_abort(1);// wait until SST is completed +} + + +void wsrep_deinit(bool free_options) +{ + DBUG_ASSERT(wsrep_inited == 1); + wsrep_unload(wsrep); + wsrep= 0; + provider_name[0]= '\0'; + provider_version[0]= '\0'; + provider_vendor[0]= '\0'; + + wsrep_inited= 0; + + if (free_options) + { + wsrep_sst_auth_free(); + } +} + +void wsrep_recover() +{ + if (!memcmp(&local_uuid, &WSREP_UUID_UNDEFINED, sizeof(wsrep_uuid_t)) && + local_seqno == -2) + { + char uuid_str[40]; + wsrep_uuid_print(&local_uuid, uuid_str, sizeof(uuid_str)); + WSREP_INFO("Position %s:%lld given at startup, skipping position recovery", + uuid_str, (long long)local_seqno); + return; + } + wsrep_uuid_t uuid; + wsrep_seqno_t seqno; + wsrep_get_SE_checkpoint(uuid, seqno); + char uuid_str[40]; + wsrep_uuid_print(&uuid, uuid_str, sizeof(uuid_str)); + WSREP_INFO("Recovered position: %s:%lld", uuid_str, (long long)seqno); +} + + +void wsrep_stop_replication(THD *thd) +{ + WSREP_INFO("Stop replication"); + if (!wsrep) + { + WSREP_INFO("Provider was not loaded, in stop replication"); + return; + } + + /* disconnect from group first to get wsrep_ready == FALSE */ + WSREP_DEBUG("Provider disconnect"); + wsrep->disconnect(wsrep); + + wsrep_connected= FALSE; + + wsrep_close_client_connections(TRUE); + + /* wait until appliers have stopped */ + wsrep_wait_appliers_close(thd); + + return; +} + + +bool wsrep_start_replication() +{ + wsrep_status_t rcode; + + /* + if provider is trivial, don't even try to connect, + but resume local node operation + */ + if (!WSREP_PROVIDER_EXISTS) + { + // enable normal operation in case no provider is specified + wsrep_ready_set(TRUE); + return true; + } + + if (!wsrep_cluster_address || strlen(wsrep_cluster_address)== 0) + { + // if provider is non-trivial, but no address is specified, wait for address + wsrep_ready_set(FALSE); + return true; + } + + bool const bootstrap= wsrep_new_cluster; + + WSREP_INFO("Start replication"); + + if (wsrep_new_cluster) + { + WSREP_INFO("'wsrep-new-cluster' option used, bootstrapping the cluster"); + wsrep_new_cluster= false; + } + + if ((rcode = wsrep->connect(wsrep, + wsrep_cluster_name, + wsrep_cluster_address, + wsrep_sst_donor, + bootstrap))) + { + DBUG_PRINT("wsrep",("wsrep->connect(%s) failed: %d", + wsrep_cluster_address, rcode)); + WSREP_ERROR("wsrep::connect(%s) failed: %d", + wsrep_cluster_address, rcode); + return false; + } + else + { + wsrep_connected= TRUE; + + char* opts= wsrep->options_get(wsrep); + if (opts) + { + wsrep_provider_options_init(opts); + free(opts); + } + else + { + WSREP_WARN("Failed to get wsrep options"); + } + } + + return true; +} + +bool wsrep_must_sync_wait (THD* thd, uint mask) +{ + return (thd->variables.wsrep_sync_wait & mask) && + thd->variables.wsrep_on && + !(thd->variables.wsrep_dirty_reads && + !is_update_query(thd->lex->sql_command)) && + !thd->in_active_multi_stmt_transaction() && + thd->wsrep_conflict_state != REPLAYING && + thd->wsrep_sync_wait_gtid.seqno == WSREP_SEQNO_UNDEFINED; +} + +bool wsrep_sync_wait (THD* thd, uint mask) +{ + if (wsrep_must_sync_wait(thd, mask)) + { + WSREP_DEBUG("wsrep_sync_wait: thd->variables.wsrep_sync_wait = %u, mask = %u", + thd->variables.wsrep_sync_wait, mask); + // This allows autocommit SELECTs and a first SELECT after SET AUTOCOMMIT=0 + // TODO: modify to check if thd has locked any rows. + wsrep_status_t ret= wsrep->causal_read (wsrep, &thd->wsrep_sync_wait_gtid); + + if (unlikely(WSREP_OK != ret)) + { + const char* msg; + int err; + + // Possibly relevant error codes: + // ER_CHECKREAD, ER_ERROR_ON_READ, ER_INVALID_DEFAULT, ER_EMPTY_QUERY, + // ER_FUNCTION_NOT_DEFINED, ER_NOT_ALLOWED_COMMAND, ER_NOT_SUPPORTED_YET, + // ER_FEATURE_DISABLED, ER_QUERY_INTERRUPTED + + switch (ret) + { + case WSREP_NOT_IMPLEMENTED: + msg= "synchronous reads by wsrep backend. " + "Please unset wsrep_causal_reads variable."; + err= ER_NOT_SUPPORTED_YET; + break; + default: + msg= "Synchronous wait failed."; + err= ER_LOCK_WAIT_TIMEOUT; // NOTE: the above msg won't be displayed + // with ER_LOCK_WAIT_TIMEOUT + } + + my_error(err, MYF(0), msg); + + return true; + } + } + + return false; +} + +/* + * Helpers to deal with TOI key arrays + */ +typedef struct wsrep_key_arr +{ + wsrep_key_t* keys; + size_t keys_len; +} wsrep_key_arr_t; + + +static void wsrep_keys_free(wsrep_key_arr_t* key_arr) +{ + for (size_t i= 0; i < key_arr->keys_len; ++i) + { + my_free((void*)key_arr->keys[i].key_parts); + } + my_free(key_arr->keys); + key_arr->keys= 0; + key_arr->keys_len= 0; +} + + +/*! + * @param db Database string + * @param table Table string + * @param key Array of wsrep_key_t + * @param key_len In: number of elements in key array, Out: number of + * elements populated + * + * @return true if preparation was successful, otherwise false. + */ + +static bool wsrep_prepare_key_for_isolation(const char* db, + const char* table, + wsrep_buf_t* key, + size_t* key_len) +{ + if (*key_len < 2) return false; + + switch (wsrep_protocol_version) + { + case 0: + *key_len= 0; + break; + case 1: + case 2: + case 3: + { + *key_len= 0; + if (db) + { + // sql_print_information("%s.%s", db, table); + if (db) + { + key[*key_len].ptr= db; + key[*key_len].len= strlen(db); + ++(*key_len); + if (table) + { + key[*key_len].ptr= table; + key[*key_len].len= strlen(table); + ++(*key_len); + } + } + } + break; + } + default: + return false; + } + + return true; +} + +/* Prepare key list from db/table and table_list */ +static bool wsrep_prepare_keys_for_isolation(THD* thd, + const char* db, + const char* table, + const TABLE_LIST* table_list, + wsrep_key_arr_t* ka) +{ + ka->keys= 0; + ka->keys_len= 0; + + extern TABLE* find_temporary_table(THD*, const TABLE_LIST*); + + if (db || table) + { + TABLE_LIST tmp_table; + MDL_request mdl_request; + + memset(&tmp_table, 0, sizeof(tmp_table)); + tmp_table.table_name= (char*)table; + tmp_table.db= (char*)db; + tmp_table.mdl_request.init(MDL_key::GLOBAL, (db) ? db : "", + (table) ? table : "", + MDL_INTENTION_EXCLUSIVE, MDL_STATEMENT); + + if (!table || !find_temporary_table(thd, &tmp_table)) + { + if (!(ka->keys= (wsrep_key_t*)my_malloc(sizeof(wsrep_key_t), MYF(0)))) + { + WSREP_ERROR("Can't allocate memory for key_array"); + goto err; + } + ka->keys_len= 1; + if (!(ka->keys[0].key_parts= (wsrep_buf_t*) + my_malloc(sizeof(wsrep_buf_t)*2, MYF(0)))) + { + WSREP_ERROR("Can't allocate memory for key_parts"); + goto err; + } + ka->keys[0].key_parts_num= 2; + if (!wsrep_prepare_key_for_isolation( + db, table, + (wsrep_buf_t*)ka->keys[0].key_parts, + &ka->keys[0].key_parts_num)) + { + WSREP_ERROR("Preparing keys for isolation failed"); + goto err; + } + } + } + + for (const TABLE_LIST* table= table_list; table; table= table->next_global) + { + if (!find_temporary_table(thd, table)) + { + wsrep_key_t* tmp; + tmp= (wsrep_key_t*)my_realloc( + ka->keys, (ka->keys_len + 1) * sizeof(wsrep_key_t), + MYF(MY_ALLOW_ZERO_PTR)); + + if (!tmp) + { + WSREP_ERROR("Can't allocate memory for key_array"); + goto err; + } + ka->keys= tmp; + if (!(ka->keys[ka->keys_len].key_parts= (wsrep_buf_t*) + my_malloc(sizeof(wsrep_buf_t)*2, MYF(0)))) + { + WSREP_ERROR("Can't allocate memory for key_parts"); + goto err; + } + ka->keys[ka->keys_len].key_parts_num= 2; + ++ka->keys_len; + if (!wsrep_prepare_key_for_isolation( + table->db, table->table_name, + (wsrep_buf_t*)ka->keys[ka->keys_len - 1].key_parts, + &ka->keys[ka->keys_len - 1].key_parts_num)) + { + WSREP_ERROR("Preparing keys for isolation failed"); + goto err; + } + } + } + return true; +err: + wsrep_keys_free(ka); + return false; +} + + +bool wsrep_prepare_key_for_innodb(const uchar* cache_key, + size_t cache_key_len, + const uchar* row_id, + size_t row_id_len, + wsrep_buf_t* key, + size_t* key_len) +{ + if (*key_len < 3) return false; + + *key_len= 0; + switch (wsrep_protocol_version) + { + case 0: + { + key[0].ptr = cache_key; + key[0].len = cache_key_len; + + *key_len = 1; + break; + } + case 1: + case 2: + case 3: + { + key[0].ptr = cache_key; + key[0].len = strlen( (char*)cache_key ); + + key[1].ptr = cache_key + strlen( (char*)cache_key ) + 1; + key[1].len = strlen( (char*)(key[1].ptr) ); + + *key_len = 2; + break; + } + default: + return false; + } + + key[*key_len].ptr = row_id; + key[*key_len].len = row_id_len; + ++(*key_len); + + return true; +} + + +/* + * Construct Query_log_Event from thd query and serialize it + * into buffer. + * + * Return 0 in case of success, 1 in case of error. + */ +int wsrep_to_buf_helper( + THD* thd, const char *query, uint query_len, uchar** buf, size_t* buf_len) +{ + IO_CACHE tmp_io_cache; + if (open_cached_file(&tmp_io_cache, mysql_tmpdir, TEMP_PREFIX, + 65536, MYF(MY_WME))) + return 1; + int ret(0); + + Format_description_log_event *tmp_fd= new Format_description_log_event(4); + tmp_fd->checksum_alg= binlog_checksum_options; + tmp_fd->write(&tmp_io_cache); + delete tmp_fd; + +#ifdef GTID_SUPPORT + if (thd->variables.gtid_next.type == GTID_GROUP) + { + Gtid_log_event gtid_ev(thd, FALSE, &thd->variables.gtid_next); + if (!gtid_ev.is_valid()) ret= 0; + if (!ret && gtid_ev.write(&tmp_io_cache)) ret= 1; + } +#endif /* GTID_SUPPORT */ + + /* if there is prepare query, add event for it */ + if (!ret && thd->wsrep_TOI_pre_query) + { + Query_log_event ev(thd, thd->wsrep_TOI_pre_query, + thd->wsrep_TOI_pre_query_len, + FALSE, FALSE, FALSE, 0); + if (ev.write(&tmp_io_cache)) ret= 1; + } + + /* continue to append the actual query */ + Query_log_event ev(thd, query, query_len, FALSE, FALSE, FALSE, 0); + if (!ret && ev.write(&tmp_io_cache)) ret= 1; + if (!ret && wsrep_write_cache_buf(&tmp_io_cache, buf, buf_len)) ret= 1; + close_cached_file(&tmp_io_cache); + return ret; +} + +#include "sql_show.h" +static int +create_view_query(THD *thd, uchar** buf, size_t* buf_len) +{ + LEX *lex= thd->lex; + SELECT_LEX *select_lex= &lex->select_lex; + TABLE_LIST *first_table= select_lex->table_list.first; + TABLE_LIST *views = first_table; + + String buff; + const LEX_STRING command[3]= + {{ C_STRING_WITH_LEN("CREATE ") }, + { C_STRING_WITH_LEN("ALTER ") }, + { C_STRING_WITH_LEN("CREATE OR REPLACE ") }}; + + buff.append(command[thd->lex->create_view_mode].str, + command[thd->lex->create_view_mode].length); + + LEX_USER *definer; + + if (lex->definer) + { + definer= get_current_user(thd, lex->definer); + } + else + { + /* + DEFINER-clause is missing; we have to create default definer in + persistent arena to be PS/SP friendly. + If this is an ALTER VIEW then the current user should be set as + the definer. + */ + definer= create_default_definer(thd, false); + } + + if (definer) + { + views->definer.user = definer->user; + views->definer.host = definer->host; + } else { + WSREP_ERROR("Failed to get DEFINER for VIEW."); + return 1; + } + + views->algorithm = lex->create_view_algorithm; + views->view_suid = lex->create_view_suid; + views->with_check = lex->create_view_check; + + view_store_options4(thd, views, &buff, true); + buff.append(STRING_WITH_LEN("VIEW ")); + /* Test if user supplied a db (ie: we did not use thd->db) */ + if (views->db && views->db[0] && + (thd->db == NULL || strcmp(views->db, thd->db))) + { + append_identifier(thd, &buff, views->db, + views->db_length); + buff.append('.'); + } + append_identifier(thd, &buff, views->table_name, + views->table_name_length); + if (lex->view_list.elements) + { + List_iterator_fast<LEX_STRING> names(lex->view_list); + LEX_STRING *name; + int i; + + for (i= 0; (name= names++); i++) + { + buff.append(i ? ", " : "("); + append_identifier(thd, &buff, name->str, name->length); + } + buff.append(')'); + } + buff.append(STRING_WITH_LEN(" AS ")); + //buff.append(views->source.str, views->source.length); + buff.append(thd->lex->create_view_select.str, + thd->lex->create_view_select.length); + //int errcode= query_error_code(thd, TRUE); + //if (thd->binlog_query(THD::STMT_QUERY_TYPE, + // buff.ptr(), buff.length(), FALSE, FALSE, FALSE, errcod + return wsrep_to_buf_helper(thd, buff.ptr(), buff.length(), buf, buf_len); +} + +/* + returns: + 0: statement was replicated as TOI + 1: TOI replication was skipped + -1: TOI replication failed + */ +static int wsrep_TOI_begin(THD *thd, char *db_, char *table_, + const TABLE_LIST* table_list) +{ + wsrep_status_t ret(WSREP_WARNING); + uchar* buf(0); + size_t buf_len(0); + int buf_err; + int rc= 0; + + WSREP_DEBUG("TO BEGIN: %lld, %d : %s", (long long)wsrep_thd_trx_seqno(thd), + thd->wsrep_exec_mode, thd->query() ); + switch (thd->lex->sql_command) + { + case SQLCOM_CREATE_VIEW: + buf_err= create_view_query(thd, &buf, &buf_len); + break; + case SQLCOM_CREATE_PROCEDURE: + case SQLCOM_CREATE_SPFUNCTION: + buf_err= wsrep_create_sp(thd, &buf, &buf_len); + break; + case SQLCOM_CREATE_TRIGGER: + buf_err= wsrep_create_trigger_query(thd, &buf, &buf_len); + break; + case SQLCOM_CREATE_EVENT: + buf_err= wsrep_create_event_query(thd, &buf, &buf_len); + break; + case SQLCOM_ALTER_EVENT: + buf_err= wsrep_alter_event_query(thd, &buf, &buf_len); + break; + case SQLCOM_CREATE_ROLE: + if (sp_process_definer(thd)) + { + WSREP_WARN("Failed to set CREATE ROLE definer for TOI."); + } + /* fallthrough */ + default: + buf_err= wsrep_to_buf_helper(thd, thd->query(), thd->query_length(), &buf, + &buf_len); + break; + } + + wsrep_key_arr_t key_arr= {0, 0}; + struct wsrep_buf buff = { buf, buf_len }; + if (!buf_err && + wsrep_prepare_keys_for_isolation(thd, db_, table_, table_list, &key_arr)&& + key_arr.keys_len > 0 && + WSREP_OK == (ret = wsrep->to_execute_start(wsrep, thd->thread_id, + key_arr.keys, key_arr.keys_len, + &buff, 1, + &thd->wsrep_trx_meta))) + { + thd->wsrep_exec_mode= TOTAL_ORDER; + wsrep_to_isolation++; + wsrep_keys_free(&key_arr); + WSREP_DEBUG("TO BEGIN: %lld, %d",(long long)wsrep_thd_trx_seqno(thd), + thd->wsrep_exec_mode); + } + else if (key_arr.keys_len > 0) { + /* jump to error handler in mysql_execute_command() */ + WSREP_WARN("TO isolation failed for: %d, schema: %s, sql: %s. Check wsrep " + "connection state and retry the query.", + ret, + (thd->db ? thd->db : "(null)"), + (thd->query()) ? thd->query() : "void"); + my_error(ER_LOCK_DEADLOCK, MYF(0), "WSREP replication failed. Check " + "your wsrep connection state and retry the query."); + wsrep_keys_free(&key_arr); + rc= -1; + } + else { + /* non replicated DDL, affecting temporary tables only */ + WSREP_DEBUG("TO isolation skipped for: %d, sql: %s." + "Only temporary tables affected.", + ret, (thd->query()) ? thd->query() : "void"); + rc= 1; + } + if (buf) my_free(buf); + return rc; +} + +static void wsrep_TOI_end(THD *thd) { + wsrep_status_t ret; + wsrep_to_isolation--; + + WSREP_DEBUG("TO END: %lld, %d : %s", (long long)wsrep_thd_trx_seqno(thd), + thd->wsrep_exec_mode, (thd->query()) ? thd->query() : "void"); + + wsrep_set_SE_checkpoint(thd->wsrep_trx_meta.gtid.uuid, + thd->wsrep_trx_meta.gtid.seqno); + WSREP_DEBUG("TO END: %lld, update seqno", + (long long)wsrep_thd_trx_seqno(thd)); + + if (WSREP_OK == (ret = wsrep->to_execute_end(wsrep, thd->thread_id))) { + WSREP_DEBUG("TO END: %lld", (long long)wsrep_thd_trx_seqno(thd)); + } + else { + WSREP_WARN("TO isolation end failed for: %d, schema: %s, sql: %s", + ret, + (thd->db ? thd->db : "(null)"), + (thd->query()) ? thd->query() : "void"); + } +} + +static int wsrep_RSU_begin(THD *thd, char *db_, char *table_) +{ + wsrep_status_t ret(WSREP_WARNING); + WSREP_DEBUG("RSU BEGIN: %lld, %d : %s", (long long)wsrep_thd_trx_seqno(thd), + thd->wsrep_exec_mode, thd->query() ); + + ret = wsrep->desync(wsrep); + if (ret != WSREP_OK) + { + WSREP_WARN("RSU desync failed %d for schema: %s, query: %s", + ret, (thd->db ? thd->db : "(null)"), thd->query()); + my_error(ER_LOCK_DEADLOCK, MYF(0)); + return(ret); + } + + mysql_mutex_lock(&LOCK_wsrep_replaying); + wsrep_replaying++; + mysql_mutex_unlock(&LOCK_wsrep_replaying); + + if (wsrep_wait_committing_connections_close(5000)) + { + /* no can do, bail out from DDL */ + WSREP_WARN("RSU failed due to pending transactions, schema: %s, query %s", + (thd->db ? thd->db : "(null)"), + thd->query()); + mysql_mutex_lock(&LOCK_wsrep_replaying); + wsrep_replaying--; + mysql_mutex_unlock(&LOCK_wsrep_replaying); + + ret = wsrep->resync(wsrep); + if (ret != WSREP_OK) + { + WSREP_WARN("resync failed %d for schema: %s, query: %s", + ret, (thd->db ? thd->db : "(null)"), thd->query()); + } + + my_error(ER_LOCK_DEADLOCK, MYF(0)); + return(1); + } + + wsrep_seqno_t seqno = wsrep->pause(wsrep); + if (seqno == WSREP_SEQNO_UNDEFINED) + { + WSREP_WARN("pause failed %lld for schema: %s, query: %s", (long long)seqno, + (thd->db ? thd->db : "(null)"), + thd->query()); + return(1); + } + WSREP_DEBUG("paused at %lld", (long long)seqno); + thd->variables.wsrep_on = 0; + return 0; +} + +static void wsrep_RSU_end(THD *thd) +{ + wsrep_status_t ret(WSREP_WARNING); + WSREP_DEBUG("RSU END: %lld, %d : %s", (long long)wsrep_thd_trx_seqno(thd), + thd->wsrep_exec_mode, thd->query() ); + + + mysql_mutex_lock(&LOCK_wsrep_replaying); + wsrep_replaying--; + mysql_mutex_unlock(&LOCK_wsrep_replaying); + + ret = wsrep->resume(wsrep); + if (ret != WSREP_OK) + { + WSREP_WARN("resume failed %d for schema: %s, query: %s", ret, + (thd->db ? thd->db : "(null)"), + thd->query()); + } + + ret = wsrep->resync(wsrep); + if (ret != WSREP_OK) + { + WSREP_WARN("resync failed %d for schema: %s, query: %s", ret, + (thd->db ? thd->db : "(null)"), thd->query()); + return; + } + + thd->variables.wsrep_on = 1; +} + +int wsrep_to_isolation_begin(THD *thd, char *db_, char *table_, + const TABLE_LIST* table_list) +{ + + /* + No isolation for applier or replaying threads. + */ + if (thd->wsrep_exec_mode == REPL_RECV) return 0; + + int ret= 0; + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + + if (thd->wsrep_conflict_state == MUST_ABORT) + { + WSREP_INFO("thread: %lu, schema: %s, query: %s has been aborted due to multi-master conflict", + thd->thread_id, + (thd->db ? thd->db : "(null)"), + thd->query()); + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + return WSREP_TRX_FAIL; + } + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + + DBUG_ASSERT(thd->wsrep_exec_mode == LOCAL_STATE); + DBUG_ASSERT(thd->wsrep_trx_meta.gtid.seqno == WSREP_SEQNO_UNDEFINED); + + if (thd->global_read_lock.can_acquire_protection()) + { + WSREP_DEBUG("Aborting TOI: Global Read-Lock (FTWRL) in place: %s %lu", + thd->query(), thd->thread_id); + return -1; + } + + if (wsrep_debug && thd->mdl_context.has_locks()) + { + WSREP_DEBUG("thread holds MDL locks at TI begin: %s %lu", + thd->query(), thd->thread_id); + } + + /* + It makes sense to set auto_increment_* to defaults in TOI operations. + Must be done before wsrep_TOI_begin() since Query_log_event encapsulating + TOI statement and auto inc variables for wsrep replication is constructed + there. Variables are reset back in THD::reset_for_next_command() before + processing of next command. + */ + if (wsrep_auto_increment_control) + { + thd->variables.auto_increment_offset = 1; + thd->variables.auto_increment_increment = 1; + } + + if (thd->variables.wsrep_on && thd->wsrep_exec_mode==LOCAL_STATE) + { + switch (thd->variables.wsrep_OSU_method) { + case WSREP_OSU_TOI: ret = wsrep_TOI_begin(thd, db_, table_, + table_list); break; + case WSREP_OSU_RSU: ret = wsrep_RSU_begin(thd, db_, table_); break; + default: + WSREP_ERROR("Unsupported OSU method: %lu", + thd->variables.wsrep_OSU_method); + ret= -1; + break; + } + switch (ret) { + case 0: thd->wsrep_exec_mode= TOTAL_ORDER; break; + case 1: + /* TOI replication skipped, treat as success */ + ret = 0; + break; + case -1: + /* TOI replication failed, treat as error */ + break; + } + } + return ret; +} + +void wsrep_to_isolation_end(THD *thd) +{ + if (thd->wsrep_exec_mode == TOTAL_ORDER) + { + switch(thd->variables.wsrep_OSU_method) + { + case WSREP_OSU_TOI: wsrep_TOI_end(thd); break; + case WSREP_OSU_RSU: wsrep_RSU_end(thd); break; + default: + WSREP_WARN("Unsupported wsrep OSU method at isolation end: %lu", + thd->variables.wsrep_OSU_method); + break; + } + wsrep_cleanup_transaction(thd); + } +} + +#define WSREP_MDL_LOG(severity, msg, schema, schema_len, req, gra) \ + WSREP_##severity( \ + "%s\n" \ + "schema: %.*s\n" \ + "request: (%lu \tseqno %lld \twsrep (%d, %d, %d) cmd %d %d \t%s)\n" \ + "granted: (%lu \tseqno %lld \twsrep (%d, %d, %d) cmd %d %d \t%s)", \ + msg, schema_len, schema, \ + req->thread_id, (long long)wsrep_thd_trx_seqno(req), \ + req->wsrep_exec_mode, req->wsrep_query_state, req->wsrep_conflict_state, \ + req->get_command(), req->lex->sql_command, req->query(), \ + gra->thread_id, (long long)wsrep_thd_trx_seqno(gra), \ + gra->wsrep_exec_mode, gra->wsrep_query_state, gra->wsrep_conflict_state, \ + gra->get_command(), gra->lex->sql_command, gra->query()); + +/** + Check if request for the metadata lock should be granted to the requester. + + @param requestor_ctx The MDL context of the requestor + @param ticket MDL ticket for the requested lock + + @retval TRUE Lock request can be granted + @retval FALSE Lock request cannot be granted +*/ + +bool +wsrep_grant_mdl_exception(MDL_context *requestor_ctx, + MDL_ticket *ticket, + const MDL_key *key +) { + /* Fallback to the non-wsrep behaviour */ + if (!WSREP_ON) return FALSE; + + THD *request_thd = requestor_ctx->get_thd(); + THD *granted_thd = ticket->get_ctx()->get_thd(); + bool ret = FALSE; + + const char* schema= key->db_name(); + int schema_len= key->db_name_length(); + + mysql_mutex_lock(&request_thd->LOCK_wsrep_thd); + if (request_thd->wsrep_exec_mode == TOTAL_ORDER || + request_thd->wsrep_exec_mode == REPL_RECV) + { + mysql_mutex_unlock(&request_thd->LOCK_wsrep_thd); + WSREP_MDL_LOG(DEBUG, "MDL conflict ", schema, schema_len, + request_thd, granted_thd); + ticket->wsrep_report(wsrep_debug); + + mysql_mutex_lock(&granted_thd->LOCK_wsrep_thd); + if (granted_thd->wsrep_exec_mode == TOTAL_ORDER || + granted_thd->wsrep_exec_mode == REPL_RECV) + { + WSREP_MDL_LOG(INFO, "MDL BF-BF conflict", schema, schema_len, + request_thd, granted_thd); + ticket->wsrep_report(true); + mysql_mutex_unlock(&granted_thd->LOCK_wsrep_thd); + ret = TRUE; + } + else if (granted_thd->lex->sql_command == SQLCOM_FLUSH || + granted_thd->mdl_context.wsrep_has_explicit_locks()) + { + WSREP_DEBUG("BF thread waiting for FLUSH"); + ticket->wsrep_report(wsrep_debug); + mysql_mutex_unlock(&granted_thd->LOCK_wsrep_thd); + ret = FALSE; + } + else if (request_thd->lex->sql_command == SQLCOM_DROP_TABLE) + { + WSREP_DEBUG("DROP caused BF abort"); + ticket->wsrep_report(wsrep_debug); + mysql_mutex_unlock(&granted_thd->LOCK_wsrep_thd); + wsrep_abort_thd((void*)request_thd, (void*)granted_thd, 1); + ret = FALSE; + } + else if (granted_thd->wsrep_query_state == QUERY_COMMITTING) + { + WSREP_DEBUG("mdl granted, but commiting thd abort scheduled"); + ticket->wsrep_report(wsrep_debug); + mysql_mutex_unlock(&granted_thd->LOCK_wsrep_thd); + wsrep_abort_thd((void*)request_thd, (void*)granted_thd, 1); + ret = FALSE; + } + else + { + WSREP_MDL_LOG(DEBUG, "MDL conflict-> BF abort", schema, schema_len, + request_thd, granted_thd); + ticket->wsrep_report(wsrep_debug); + mysql_mutex_unlock(&granted_thd->LOCK_wsrep_thd); + wsrep_abort_thd((void*)request_thd, (void*)granted_thd, 1); + ret = FALSE; + } + } + else + { + mysql_mutex_unlock(&request_thd->LOCK_wsrep_thd); + } + return ret; +} + +bool wsrep_node_is_donor() +{ + return (WSREP_ON) ? (local_status.get() == 2) : false; +} +bool wsrep_node_is_synced() +{ + return (WSREP_ON) ? (local_status.get() == 4) : false; +} diff --git a/sql/wsrep_mysqld.h b/sql/wsrep_mysqld.h new file mode 100644 index 00000000000..5ec183f7186 --- /dev/null +++ b/sql/wsrep_mysqld.h @@ -0,0 +1,338 @@ +/* Copyright 2008-2015 Codership Oy <http://www.codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef WSREP_MYSQLD_H +#define WSREP_MYSQLD_H + +#include "mysqld.h" +typedef struct st_mysql_show_var SHOW_VAR; +#include <sql_priv.h> +//#include "rpl_gtid.h" +#include "../wsrep/wsrep_api.h" + +#define WSREP_UNDEFINED_TRX_ID ULONGLONG_MAX + +class set_var; +class THD; + +enum wsrep_exec_mode { + /* Transaction processing before replication. */ + LOCAL_STATE, + /* Slave thread applying write sets from other nodes or replaying thread. */ + REPL_RECV, + /* Total-order-isolation mode */ + TOTAL_ORDER, + /* + Transaction procession after it has been replicated in prepare stage and + has passed certification + */ + LOCAL_COMMIT +}; + +enum wsrep_query_state { + QUERY_IDLE, + QUERY_EXEC, + QUERY_COMMITTING, + QUERY_EXITING, + QUERY_ROLLINGBACK, +}; + +enum wsrep_conflict_state { + NO_CONFLICT, + MUST_ABORT, + ABORTING, + ABORTED, + MUST_REPLAY, + REPLAYING, + RETRY_AUTOCOMMIT, + CERT_FAILURE, +}; + +enum wsrep_consistency_check_mode { + NO_CONSISTENCY_CHECK, + CONSISTENCY_CHECK_DECLARED, + CONSISTENCY_CHECK_RUNNING, +}; + + +// Global wsrep parameters +extern wsrep_t* wsrep; + +// MySQL wsrep options +extern const char* wsrep_provider; +extern const char* wsrep_provider_options; +extern const char* wsrep_cluster_name; +extern const char* wsrep_cluster_address; +extern const char* wsrep_node_name; +extern const char* wsrep_node_address; +extern const char* wsrep_node_incoming_address; +extern const char* wsrep_data_home_dir; +extern const char* wsrep_dbug_option; +extern long wsrep_slave_threads; +extern int wsrep_slave_count_change; +extern MYSQL_PLUGIN_IMPORT my_bool wsrep_debug; +extern my_bool wsrep_convert_LOCK_to_trx; +extern ulong wsrep_retry_autocommit; +extern my_bool wsrep_auto_increment_control; +extern my_bool wsrep_drupal_282555_workaround; +extern my_bool wsrep_incremental_data_collection; +extern bool wsrep_dirty_reads; +extern const char* wsrep_start_position; +extern ulong wsrep_max_ws_size; +extern ulong wsrep_max_ws_rows; +extern const char* wsrep_notify_cmd; +extern my_bool wsrep_certify_nonPK; +extern long wsrep_max_protocol_version; +extern long wsrep_protocol_version; +extern ulong wsrep_forced_binlog_format; +extern my_bool wsrep_desync; +extern my_bool wsrep_recovery; +extern my_bool wsrep_replicate_myisam; +extern my_bool wsrep_log_conflicts; +extern ulong wsrep_mysql_replication_bundle; +extern my_bool wsrep_load_data_splitting; +extern my_bool wsrep_restart_slave; +extern my_bool wsrep_restart_slave_activated; +extern my_bool wsrep_slave_FK_checks; +extern my_bool wsrep_slave_UK_checks; +extern bool wsrep_new_cluster; // bootstrap the cluster ? +extern my_bool wsrep_creating_startup_threads; + +enum enum_wsrep_OSU_method { + WSREP_OSU_TOI, + WSREP_OSU_RSU, + WSREP_OSU_NONE, +}; + +enum enum_wsrep_sync_wait { + WSREP_SYNC_WAIT_NONE = 0x0, + // show, select, begin + WSREP_SYNC_WAIT_BEFORE_READ = 0x1, + WSREP_SYNC_WAIT_BEFORE_UPDATE_DELETE = 0x2, + WSREP_SYNC_WAIT_BEFORE_INSERT_REPLACE = 0x4, + WSREP_SYNC_WAIT_MAX = 0x7 +}; + +// MySQL status variables +extern my_bool wsrep_connected; +extern my_bool wsrep_ready; +extern const char* wsrep_cluster_state_uuid; +extern long long wsrep_cluster_conf_id; +extern const char* wsrep_cluster_status; +extern long wsrep_cluster_size; +extern long wsrep_local_index; +extern long long wsrep_local_bf_aborts; +extern const char* wsrep_provider_name; +extern const char* wsrep_provider_version; +extern const char* wsrep_provider_vendor; + +int wsrep_show_status(THD *thd, SHOW_VAR *var, char *buff); +void wsrep_free_status(THD *thd); + +int wsrep_init(); +void wsrep_deinit(bool free_options); +void wsrep_recover(); +bool wsrep_before_SE(); // initialize wsrep before storage + // engines (true) or after (false) +/* wsrep initialization sequence at startup + * @param before wsrep_before_SE() value */ +void wsrep_init_startup(bool before); + +// Other wsrep global variables +extern my_bool wsrep_inited; // whether wsrep is initialized ? + +extern "C" enum wsrep_exec_mode wsrep_thd_exec_mode(THD *thd); +extern "C" enum wsrep_conflict_state wsrep_thd_conflict_state(THD *thd); +extern "C" enum wsrep_query_state wsrep_thd_query_state(THD *thd); +extern "C" const char * wsrep_thd_exec_mode_str(THD *thd); +extern "C" const char * wsrep_thd_conflict_state_str(THD *thd); +extern "C" const char * wsrep_thd_query_state_str(THD *thd); +extern "C" wsrep_ws_handle_t* wsrep_thd_ws_handle(THD *thd); + +extern "C" void wsrep_thd_set_exec_mode(THD *thd, enum wsrep_exec_mode mode); +extern "C" void wsrep_thd_set_query_state( + THD *thd, enum wsrep_query_state state); +extern "C" void wsrep_thd_set_conflict_state( + THD *thd, enum wsrep_conflict_state state); + +extern "C" void wsrep_thd_set_trx_to_replay(THD *thd, uint64 trx_id); + +extern "C" void wsrep_thd_LOCK(THD *thd); +extern "C" void wsrep_thd_UNLOCK(THD *thd); +extern "C" uint32 wsrep_thd_wsrep_rand(THD *thd); +extern "C" time_t wsrep_thd_query_start(THD *thd); +extern "C" my_thread_id wsrep_thd_thread_id(THD *thd); +extern "C" int64_t wsrep_thd_trx_seqno(THD *thd); +extern "C" query_id_t wsrep_thd_query_id(THD *thd); +extern "C" char * wsrep_thd_query(THD *thd); +extern "C" query_id_t wsrep_thd_wsrep_last_query_id(THD *thd); +extern "C" void wsrep_thd_set_wsrep_last_query_id(THD *thd, query_id_t id); +extern "C" void wsrep_thd_awake(THD *thd, my_bool signal); +extern "C" int wsrep_thd_retry_counter(THD *thd); +extern "C" bool wsrep_thd_skip_append_keys(THD *thd); + +extern void wsrep_close_client_connections(my_bool wait_to_end); +extern int wsrep_wait_committing_connections_close(int wait_time); +extern void wsrep_close_applier(THD *thd); +extern void wsrep_wait_appliers_close(THD *thd); +extern void wsrep_close_applier_threads(int count); +extern void wsrep_kill_mysql(THD *thd); + +/* new defines */ +extern void wsrep_stop_replication(THD *thd); +extern bool wsrep_start_replication(); +extern bool wsrep_must_sync_wait (THD* thd, uint mask = WSREP_SYNC_WAIT_BEFORE_READ); +extern bool wsrep_sync_wait (THD* thd, uint mask = WSREP_SYNC_WAIT_BEFORE_READ); +extern int wsrep_check_opts (int argc, char* const* argv); +extern void wsrep_prepend_PATH (const char* path); + +/* Other global variables */ +extern wsrep_seqno_t wsrep_locked_seqno; + +#define WSREP_ON \ + ((global_system_variables.wsrep_on) && \ + wsrep_provider && \ + strcmp(wsrep_provider, WSREP_NONE)) + +#define WSREP(thd) \ + (WSREP_ON && wsrep && (thd && thd->variables.wsrep_on)) + +#define WSREP_CLIENT(thd) \ + (WSREP(thd) && thd->wsrep_client_thread) + +#define WSREP_EMULATE_BINLOG(thd) \ + (WSREP(thd) && wsrep_emulate_bin_log) + +// MySQL logging functions don't seem to understand long long length modifer. +// This is a workaround. It also prefixes all messages with "WSREP" +#define WSREP_LOG(fun, ...) \ + { \ + char msg[1024] = {'\0'}; \ + snprintf(msg, sizeof(msg) - 1, ## __VA_ARGS__); \ + fun("WSREP: %s", msg); \ + } + +#define WSREP_DEBUG(...) \ + if (wsrep_debug) WSREP_LOG(sql_print_information, ##__VA_ARGS__) +#define WSREP_INFO(...) WSREP_LOG(sql_print_information, ##__VA_ARGS__) +#define WSREP_WARN(...) WSREP_LOG(sql_print_warning, ##__VA_ARGS__) +#define WSREP_ERROR(...) WSREP_LOG(sql_print_error, ##__VA_ARGS__) + +#define WSREP_LOG_CONFLICT_THD(thd, role) \ + WSREP_LOG(sql_print_information, \ + "%s: \n " \ + " THD: %lu, mode: %s, state: %s, conflict: %s, seqno: %lld\n " \ + " SQL: %s", \ + role, wsrep_thd_thread_id(thd), wsrep_thd_exec_mode_str(thd), \ + wsrep_thd_query_state_str(thd), \ + wsrep_thd_conflict_state_str(thd), (long long)wsrep_thd_trx_seqno(thd), \ + wsrep_thd_query(thd) \ + ); + +#define WSREP_LOG_CONFLICT(bf_thd, victim_thd, bf_abort) \ + if (wsrep_debug || wsrep_log_conflicts) \ + { \ + WSREP_LOG(sql_print_information, "cluster conflict due to %s for threads:",\ + (bf_abort) ? "high priority abort" : "certification failure" \ + ); \ + if (bf_thd != NULL) WSREP_LOG_CONFLICT_THD(bf_thd, "Winning thread"); \ + if (victim_thd) WSREP_LOG_CONFLICT_THD(victim_thd, "Victim thread"); \ + } + +#define WSREP_PROVIDER_EXISTS \ + (wsrep_provider && strncasecmp(wsrep_provider, WSREP_NONE, FN_REFLEN)) + +extern void wsrep_ready_wait(); + +enum wsrep_trx_status { + WSREP_TRX_OK, + WSREP_TRX_CERT_FAIL, /* certification failure, must abort */ + WSREP_TRX_SIZE_EXCEEDED, /* trx size exceeded */ + WSREP_TRX_ERROR, /* native mysql error */ +}; + +extern enum wsrep_trx_status +wsrep_run_wsrep_commit(THD *thd, handlerton *hton, bool all); +class Ha_trx_info; +struct THD_TRANS; +void wsrep_register_hton(THD* thd, bool all); +void wsrep_post_commit(THD* thd, bool all); +void wsrep_brute_force_killer(THD *thd); +int wsrep_hire_brute_force_killer(THD *thd, uint64_t trx_id); + +extern "C" bool wsrep_consistency_check(void *thd_ptr); + +/* this is visible for client build so that innodb plugin gets this */ +typedef struct wsrep_aborting_thd { + struct wsrep_aborting_thd *next; + THD *aborting_thd; +} *wsrep_aborting_thd_t; + +extern mysql_mutex_t LOCK_wsrep_ready; +extern mysql_cond_t COND_wsrep_ready; +extern mysql_mutex_t LOCK_wsrep_sst; +extern mysql_cond_t COND_wsrep_sst; +extern mysql_mutex_t LOCK_wsrep_sst_init; +extern mysql_cond_t COND_wsrep_sst_init; +extern mysql_mutex_t LOCK_wsrep_rollback; +extern mysql_cond_t COND_wsrep_rollback; +extern int wsrep_replaying; +extern mysql_mutex_t LOCK_wsrep_replaying; +extern mysql_cond_t COND_wsrep_replaying; +extern mysql_mutex_t LOCK_wsrep_slave_threads; +extern mysql_mutex_t LOCK_wsrep_desync; +extern wsrep_aborting_thd_t wsrep_aborting_thd; +extern my_bool wsrep_emulate_bin_log; +extern int wsrep_to_isolation; +#ifdef GTID_SUPPORT +extern rpl_sidno wsrep_sidno; +#endif /* GTID_SUPPORT */ +extern my_bool wsrep_preordered_opt; + +#ifdef HAVE_PSI_INTERFACE +extern PSI_mutex_key key_LOCK_wsrep_ready; +extern PSI_mutex_key key_COND_wsrep_ready; +extern PSI_mutex_key key_LOCK_wsrep_sst; +extern PSI_cond_key key_COND_wsrep_sst; +extern PSI_mutex_key key_LOCK_wsrep_sst_init; +extern PSI_cond_key key_COND_wsrep_sst_init; +extern PSI_mutex_key key_LOCK_wsrep_sst_thread; +extern PSI_cond_key key_COND_wsrep_sst_thread; +extern PSI_mutex_key key_LOCK_wsrep_rollback; +extern PSI_cond_key key_COND_wsrep_rollback; +extern PSI_mutex_key key_LOCK_wsrep_replaying; +extern PSI_cond_key key_COND_wsrep_replaying; +extern PSI_mutex_key key_LOCK_wsrep_slave_threads; +extern PSI_mutex_key key_LOCK_wsrep_desync; +#endif /* HAVE_PSI_INTERFACE */ +struct TABLE_LIST; +int wsrep_to_isolation_begin(THD *thd, char *db_, char *table_, + const TABLE_LIST* table_list); +void wsrep_to_isolation_end(THD *thd); +void wsrep_cleanup_transaction(THD *thd); +int wsrep_to_buf_helper( + THD* thd, const char *query, uint query_len, uchar** buf, size_t* buf_len); +int wsrep_create_sp(THD *thd, uchar** buf, size_t* buf_len); +int wsrep_create_trigger_query(THD *thd, uchar** buf, size_t* buf_len); +int wsrep_create_event_query(THD *thd, uchar** buf, size_t* buf_len); +int wsrep_alter_event_query(THD *thd, uchar** buf, size_t* buf_len); + +#ifdef GTID_SUPPORT +void wsrep_init_sidno(const wsrep_uuid_t&); +#endif /* GTID_SUPPORT */ + +bool wsrep_node_is_donor(); +bool wsrep_node_is_synced(); +#endif /* WSREP_MYSQLD_H */ diff --git a/sql/wsrep_notify.cc b/sql/wsrep_notify.cc new file mode 100644 index 00000000000..e7d30d5a9c1 --- /dev/null +++ b/sql/wsrep_notify.cc @@ -0,0 +1,111 @@ +/* Copyright 2010 Codership Oy <http://www.codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include <mysqld.h> +#include "wsrep_priv.h" +#include "wsrep_utils.h" + +const char* wsrep_notify_cmd=""; + +static const char* _status_str(wsrep_member_status_t status) +{ + switch (status) + { + case WSREP_MEMBER_UNDEFINED: return "Undefined"; + case WSREP_MEMBER_JOINER: return "Joiner"; + case WSREP_MEMBER_DONOR: return "Donor"; + case WSREP_MEMBER_JOINED: return "Joined"; + case WSREP_MEMBER_SYNCED: return "Synced"; + default: return "Error(?)"; + } +} + +void wsrep_notify_status (wsrep_member_status_t status, + const wsrep_view_info_t* view) +{ + if (!wsrep_notify_cmd || 0 == strlen(wsrep_notify_cmd)) + { + WSREP_INFO("wsrep_notify_cmd is not defined, skipping notification."); + return; + } + + char cmd_buf[1 << 16]; // this can be long + long cmd_len = sizeof(cmd_buf) - 1; + char* cmd_ptr = cmd_buf; + long cmd_off = 0; + + cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, "%s", + wsrep_notify_cmd); + + if (status >= WSREP_MEMBER_UNDEFINED && status < WSREP_MEMBER_ERROR) + { + cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, " --status %s", + _status_str(status)); + } + else + { + /* here we preserve provider error codes */ + cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, + " --status 'Error(%d)'", status); + } + + if (0 != view) + { + char uuid_str[40]; + + wsrep_uuid_print (&view->state_id.uuid, uuid_str, sizeof(uuid_str)); + cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, + " --uuid %s", uuid_str); + + cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, + " --primary %s", view->view >= 0 ? "yes" : "no"); + + cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, + " --index %d", view->my_idx); + + if (view->memb_num) + { + cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, " --members"); + + for (int i = 0; i < view->memb_num; i++) + { + wsrep_uuid_print (&view->members[i].id, uuid_str, sizeof(uuid_str)); + cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, + "%c%s/%s/%s", i > 0 ? ',' : ' ', + uuid_str, view->members[i].name, + view->members[i].incoming); + } + } + } + + if (cmd_off == cmd_len) + { + WSREP_ERROR("Notification buffer too short (%ld). Aborting notification.", + cmd_len); + return; + } + + wsp::process p(cmd_ptr, "r", NULL); + + p.wait(); + int err = p.error(); + + if (err) + { + WSREP_ERROR("Notification command failed: %d (%s): \"%s\"", + err, strerror(err), cmd_ptr); + } +} + diff --git a/sql/wsrep_priv.h b/sql/wsrep_priv.h new file mode 100644 index 00000000000..30dce78c1a4 --- /dev/null +++ b/sql/wsrep_priv.h @@ -0,0 +1,51 @@ +/* Copyright 2010 Codership Oy <http://www.codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +//! @file declares symbols private to wsrep integration layer + +#ifndef WSREP_PRIV_H +#define WSREP_PRIV_H + +#include "wsrep_mysqld.h" +#include "../wsrep/wsrep_api.h" + +#include <log.h> +#include <pthread.h> +#include <cstdio> + +void wsrep_ready_set (my_bool x); + +ssize_t wsrep_sst_prepare (void** msg); +wsrep_cb_status wsrep_sst_donate_cb (void* app_ctx, + void* recv_ctx, + const void* msg, size_t msg_len, + const wsrep_gtid_t* state_id, + const char* state, size_t state_len, + bool bypass); + +extern wsrep_uuid_t local_uuid; +extern wsrep_seqno_t local_seqno; + +// a helper function +void wsrep_sst_received(wsrep_t*, const wsrep_uuid_t&, wsrep_seqno_t, + const void*, size_t); +/*! SST thread signals init thread about sst completion */ +void wsrep_sst_complete(const wsrep_uuid_t*, wsrep_seqno_t, bool); + +void wsrep_notify_status (wsrep_member_status_t new_status, + const wsrep_view_info_t* view = 0); + +#endif /* WSREP_PRIV_H */ diff --git a/sql/wsrep_sst.cc b/sql/wsrep_sst.cc new file mode 100644 index 00000000000..11698089582 --- /dev/null +++ b/sql/wsrep_sst.cc @@ -0,0 +1,1224 @@ +/* Copyright 2008-2015 Codership Oy <http://www.codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "wsrep_sst.h" + +#include <mysqld.h> +#include <m_ctype.h> +#include <my_sys.h> +#include <strfunc.h> +#include <sql_class.h> +#include <set_var.h> +#include <sql_acl.h> +#include <sql_reload.h> +#include <sql_parse.h> +#include "wsrep_priv.h" +#include "wsrep_utils.h" +#include "wsrep_xid.h" +#include <cstdio> +#include <cstdlib> + +extern const char wsrep_defaults_file[]; +extern const char wsrep_defaults_group_suffix[]; + +const char* wsrep_sst_method = WSREP_SST_DEFAULT; +const char* wsrep_sst_receive_address = WSREP_SST_ADDRESS_AUTO; +const char* wsrep_sst_donor = ""; + char* wsrep_sst_auth = NULL; + +// container for real auth string +static const char* sst_auth_real = NULL; +my_bool wsrep_sst_donor_rejects_queries = FALSE; + +bool wsrep_sst_method_check (sys_var *self, THD* thd, set_var* var) +{ + if ((! var->save_result.string_value.str) || + (var->save_result.string_value.length == 0 )) + { + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str, + var->save_result.string_value.str ? + var->save_result.string_value.str : "NULL"); + return 1; + } + + return 0; +} + +bool wsrep_sst_method_update (sys_var *self, THD* thd, enum_var_type type) +{ + return 0; +} + +// TODO: Improve address verification. +static bool sst_receive_address_check (const char* str) +{ + if (!strncasecmp(str, "127.0.0.1", strlen("127.0.0.1")) || + !strncasecmp(str, "localhost", strlen("localhost"))) + { + return 1; + } + + return 0; +} + +bool wsrep_sst_receive_address_check (sys_var *self, THD* thd, set_var* var) +{ + char addr_buf[FN_REFLEN]; + + if ((! var->save_result.string_value.str) || + (var->save_result.string_value.length > (FN_REFLEN - 1))) // safety + { + goto err; + } + + memcpy(addr_buf, var->save_result.string_value.str, + var->save_result.string_value.length); + addr_buf[var->save_result.string_value.length]= 0; + + if (sst_receive_address_check(addr_buf)) + { + goto err; + } + + return 0; + +err: + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str, + var->save_result.string_value.str ? + var->save_result.string_value.str : "NULL"); + return 1; +} + +bool wsrep_sst_receive_address_update (sys_var *self, THD* thd, + enum_var_type type) +{ + return 0; +} + +bool wsrep_sst_auth_check (sys_var *self, THD* thd, set_var* var) +{ + return 0; +} + +static bool sst_auth_real_set (const char* value) +{ + const char* v= NULL; + + if (value) + { + v= my_strdup(value, MYF(0)); + } + else // its NULL + { + wsrep_sst_auth_free(); + return 0; + } + + if (v) + { + // set sst_auth_real + if (sst_auth_real) { my_free((void *) sst_auth_real); } + sst_auth_real = v; + + // mask wsrep_sst_auth + if (strlen(sst_auth_real)) + { + if (wsrep_sst_auth) { my_free((void*) wsrep_sst_auth); } + wsrep_sst_auth= my_strdup(WSREP_SST_AUTH_MASK, MYF(0)); + } + return 0; + } + return 1; +} + +void wsrep_sst_auth_free() +{ + if (wsrep_sst_auth) { my_free((void *) wsrep_sst_auth); } + if (sst_auth_real) { my_free((void *) sst_auth_real); } + wsrep_sst_auth= NULL; + sst_auth_real= NULL; +} + +bool wsrep_sst_auth_update (sys_var *self, THD* thd, enum_var_type type) +{ + return sst_auth_real_set (wsrep_sst_auth); +} + +void wsrep_sst_auth_init (const char* value) +{ + if (wsrep_sst_auth == value) wsrep_sst_auth = NULL; + if (value) sst_auth_real_set (value); +} + +bool wsrep_sst_donor_check (sys_var *self, THD* thd, set_var* var) +{ + return 0; +} + +bool wsrep_sst_donor_update (sys_var *self, THD* thd, enum_var_type type) +{ + return 0; +} + +static wsrep_uuid_t cluster_uuid = WSREP_UUID_UNDEFINED; + +bool wsrep_before_SE() +{ + return (wsrep_provider != NULL + && strcmp (wsrep_provider, WSREP_NONE) + && strcmp (wsrep_sst_method, WSREP_SST_SKIP) + && strcmp (wsrep_sst_method, WSREP_SST_MYSQLDUMP)); +} + +static bool sst_complete = false; +static bool sst_needed = false; + +void wsrep_sst_grab () +{ + WSREP_INFO("wsrep_sst_grab()"); + if (mysql_mutex_lock (&LOCK_wsrep_sst)) abort(); + sst_complete = false; + mysql_mutex_unlock (&LOCK_wsrep_sst); +} + +// Wait for end of SST +bool wsrep_sst_wait () +{ + if (mysql_mutex_lock (&LOCK_wsrep_sst)) abort(); + while (!sst_complete) + { + WSREP_INFO("Waiting for SST to complete."); + mysql_cond_wait (&COND_wsrep_sst, &LOCK_wsrep_sst); + } + + if (local_seqno >= 0) + { + WSREP_INFO("SST complete, seqno: %lld", (long long) local_seqno); + } + else + { + WSREP_ERROR("SST failed: %d (%s)", + int(-local_seqno), strerror(-local_seqno)); + } + + mysql_mutex_unlock (&LOCK_wsrep_sst); + + return (local_seqno >= 0); +} + +// Signal end of SST +void wsrep_sst_complete (const wsrep_uuid_t* sst_uuid, + wsrep_seqno_t sst_seqno, + bool needed) +{ + if (mysql_mutex_lock (&LOCK_wsrep_sst)) abort(); + if (!sst_complete) + { + sst_complete = true; + sst_needed = needed; + local_uuid = *sst_uuid; + local_seqno = sst_seqno; + mysql_cond_signal (&COND_wsrep_sst); + } + else + { + /* This can happen when called from wsrep_synced_cb(). + At the moment there is no way to check there + if main thread is still waiting for signal, + so wsrep_sst_complete() is called from there + each time wsrep_ready changes from FALSE -> TRUE. + */ + WSREP_DEBUG("Nobody is waiting for SST."); + } + mysql_mutex_unlock (&LOCK_wsrep_sst); +} + +void wsrep_sst_received (wsrep_t* const wsrep, + const wsrep_uuid_t& uuid, + wsrep_seqno_t const seqno, + const void* const state, + size_t const state_len) +{ + wsrep_get_SE_checkpoint(local_uuid, local_seqno); + + if (memcmp(&local_uuid, &uuid, sizeof(wsrep_uuid_t)) || + local_seqno < seqno || seqno < 0) + { + wsrep_set_SE_checkpoint(uuid, seqno); + local_uuid = uuid; + local_seqno = seqno; + } + else if (local_seqno > seqno) + { + WSREP_WARN("SST postion is in the past: %lld, current: %lld. " + "Can't continue.", + (long long)seqno, (long long)local_seqno); + unireg_abort(1); + } + +#ifdef GTID_SUPPORT + wsrep_init_sidno(uuid); +#endif /* GTID_SUPPORT */ + + if (wsrep) + { + int const rcode(seqno < 0 ? seqno : 0); + wsrep_gtid_t const state_id = { + uuid, (rcode ? WSREP_SEQNO_UNDEFINED : seqno) + }; + + wsrep->sst_received(wsrep, &state_id, state, state_len, rcode); + } +} + +// Let applier threads to continue +void wsrep_sst_continue () +{ + if (sst_needed) + { + WSREP_INFO("Signalling provider to continue."); + wsrep_sst_received (wsrep, local_uuid, local_seqno, NULL, 0); + } +} + +struct sst_thread_arg +{ + const char* cmd; + char** env; + char* ret_str; + int err; + mysql_mutex_t lock; + mysql_cond_t cond; + + sst_thread_arg (const char* c, char** e) + : cmd(c), env(e), ret_str(0), err(-1) + { + mysql_mutex_init(key_LOCK_wsrep_sst_thread, &lock, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_wsrep_sst_thread, &cond, NULL); + } + + ~sst_thread_arg() + { + mysql_cond_destroy (&cond); + mysql_mutex_unlock (&lock); + mysql_mutex_destroy (&lock); + } +}; + +static int sst_scan_uuid_seqno (const char* str, + wsrep_uuid_t* uuid, wsrep_seqno_t* seqno) +{ + int offt = wsrep_uuid_scan (str, strlen(str), uuid); + if (offt > 0 && strlen(str) > (unsigned int)offt && ':' == str[offt]) + { + *seqno = strtoll (str + offt + 1, NULL, 10); + if (*seqno != LLONG_MAX || errno != ERANGE) + { + return 0; + } + } + + WSREP_ERROR("Failed to parse uuid:seqno pair: '%s'", str); + return EINVAL; +} + +// get rid of trailing \n +static char* my_fgets (char* buf, size_t buf_len, FILE* stream) +{ + char* ret= fgets (buf, buf_len, stream); + + if (ret) + { + size_t len = strlen(ret); + if (len > 0 && ret[len - 1] == '\n') ret[len - 1] = '\0'; + } + + return ret; +} + +/* + Generate opt_binlog_opt_val for sst_donate_other(), sst_prepare_other(). + + Returns zero on success, negative error code otherwise. + + String containing binlog name is stored in param ret if binlog is enabled + and GTID mode is on, otherwise empty string. Returned string should be + freed with my_free(). + */ +static int generate_binlog_opt_val(char** ret) +{ + DBUG_ASSERT(ret); + *ret= NULL; + if (opt_bin_log) + { + assert(opt_bin_logname); + *ret= strcmp(opt_bin_logname, "0") ? + my_strdup(opt_bin_logname, MYF(0)) : my_strdup("", MYF(0)); + } + else + { + *ret= my_strdup("", MYF(0)); + } + if (!*ret) return -ENOMEM; + return 0; +} + +static void* sst_joiner_thread (void* a) +{ + sst_thread_arg* arg= (sst_thread_arg*) a; + int err= 1; + + { + const char magic[] = "ready"; + const size_t magic_len = sizeof(magic) - 1; + const size_t out_len = 512; + char out[out_len]; + + WSREP_INFO("Running: '%s'", arg->cmd); + + wsp::process proc (arg->cmd, "r", arg->env); + + if (proc.pipe() && !proc.error()) + { + const char* tmp= my_fgets (out, out_len, proc.pipe()); + + if (!tmp || strlen(tmp) < (magic_len + 2) || + strncasecmp (tmp, magic, magic_len)) + { + WSREP_ERROR("Failed to read '%s <addr>' from: %s\n\tRead: '%s'", + magic, arg->cmd, tmp); + proc.wait(); + if (proc.error()) err = proc.error(); + } + else + { + err = 0; + } + } + else + { + err = proc.error(); + WSREP_ERROR("Failed to execute: %s : %d (%s)", + arg->cmd, err, strerror(err)); + } + + // signal sst_prepare thread with ret code, + // it will go on sending SST request + mysql_mutex_lock (&arg->lock); + if (!err) + { + arg->ret_str = strdup (out + magic_len + 1); + if (!arg->ret_str) err = ENOMEM; + } + arg->err = -err; + mysql_cond_signal (&arg->cond); + mysql_mutex_unlock (&arg->lock); //! @note arg is unusable after that. + + if (err) return NULL; /* lp:808417 - return immediately, don't signal + * initializer thread to ensure single thread of + * shutdown. */ + + wsrep_uuid_t ret_uuid = WSREP_UUID_UNDEFINED; + wsrep_seqno_t ret_seqno = WSREP_SEQNO_UNDEFINED; + + // in case of successfull receiver start, wait for SST completion/end + char* tmp = my_fgets (out, out_len, proc.pipe()); + + proc.wait(); + err= EINVAL; + + if (!tmp) + { + WSREP_ERROR("Failed to read uuid:seqno from joiner script."); + if (proc.error()) err = proc.error(); + } + else + { + err= sst_scan_uuid_seqno (out, &ret_uuid, &ret_seqno); + } + + if (err) + { + ret_uuid= WSREP_UUID_UNDEFINED; + ret_seqno= -err; + } + + // Tell initializer thread that SST is complete + wsrep_sst_complete (&ret_uuid, ret_seqno, true); + } + + return NULL; +} + +#define WSREP_SST_AUTH_ENV "WSREP_SST_OPT_AUTH" + +static int sst_append_auth_env(wsp::env& env, const char* sst_auth) +{ + int const sst_auth_size= strlen(WSREP_SST_AUTH_ENV) + 1 /* = */ + + (sst_auth ? strlen(sst_auth) : 0) + 1 /* \0 */; + + wsp::string sst_auth_str(sst_auth_size); // for automatic cleanup on return + if (!sst_auth_str()) return -ENOMEM; + + int ret= snprintf(sst_auth_str(), sst_auth_size, "%s=%s", + WSREP_SST_AUTH_ENV, sst_auth ? sst_auth : ""); + + if (ret < 0 || ret >= sst_auth_size) + { + WSREP_ERROR("sst_append_auth_env(): snprintf() failed: %d", ret); + return (ret < 0 ? ret : -EMSGSIZE); + } + + env.append(sst_auth_str()); + return -env.error(); +} + +static ssize_t sst_prepare_other (const char* method, + const char* sst_auth, + const char* addr_in, + const char** addr_out) +{ + int const cmd_len= 4096; + wsp::string cmd_str(cmd_len); + + if (!cmd_str()) + { + WSREP_ERROR("sst_prepare_other(): could not allocate cmd buffer of %d bytes", + cmd_len); + return -ENOMEM; + } + + const char* binlog_opt= ""; + char* binlog_opt_val= NULL; + + int ret; + if ((ret= generate_binlog_opt_val(&binlog_opt_val))) + { + WSREP_ERROR("sst_prepare_other(): generate_binlog_opt_val() failed: %d", + ret); + return ret; + } + if (strlen(binlog_opt_val)) binlog_opt= WSREP_SST_OPT_BINLOG; + + ret= snprintf (cmd_str(), cmd_len, + "wsrep_sst_%s " + WSREP_SST_OPT_ROLE" 'joiner' " + WSREP_SST_OPT_ADDR" '%s' " + WSREP_SST_OPT_DATA" '%s' " + WSREP_SST_OPT_CONF" '%s' " + WSREP_SST_OPT_CONF_SUFFIX" '%s' " + WSREP_SST_OPT_PARENT" '%d'" + " %s '%s' ", + method, addr_in, mysql_real_data_home, + wsrep_defaults_file, wsrep_defaults_group_suffix, + (int)getpid(), binlog_opt, binlog_opt_val); + my_free(binlog_opt_val); + + if (ret < 0 || ret >= cmd_len) + { + WSREP_ERROR("sst_prepare_other(): snprintf() failed: %d", ret); + return (ret < 0 ? ret : -EMSGSIZE); + } + + wsp::env env(NULL); + if (env.error()) + { + WSREP_ERROR("sst_prepare_other(): env. var ctor failed: %d", -env.error()); + return -env.error(); + } + + if ((ret= sst_append_auth_env(env, sst_auth))) + { + WSREP_ERROR("sst_prepare_other(): appending auth failed: %d", ret); + return ret; + } + + pthread_t tmp; + sst_thread_arg arg(cmd_str(), env()); + mysql_mutex_lock (&arg.lock); + ret = pthread_create (&tmp, NULL, sst_joiner_thread, &arg); + if (ret) + { + WSREP_ERROR("sst_prepare_other(): pthread_create() failed: %d (%s)", + ret, strerror(ret)); + return -ret; + } + mysql_cond_wait (&arg.cond, &arg.lock); + + *addr_out= arg.ret_str; + + if (!arg.err) + ret = strlen(*addr_out); + else + { + assert (arg.err < 0); + ret = arg.err; + } + + pthread_detach (tmp); + + return ret; +} + +extern uint mysqld_port; + +/*! Just tells donor where to send mysqldump */ +static ssize_t sst_prepare_mysqldump (const char* addr_in, + const char** addr_out) +{ + ssize_t ret = strlen (addr_in); + + if (!strrchr(addr_in, ':')) + { + ssize_t s = ret + 7; + char* tmp = (char*) malloc (s); + + if (tmp) + { + ret= snprintf (tmp, s, "%s:%u", addr_in, mysqld_port); + + if (ret > 0 && ret < s) + { + *addr_out= tmp; + return ret; + } + if (ret > 0) /* buffer too short */ ret = -EMSGSIZE; + free (tmp); + } + else { + ret= -ENOMEM; + } + + WSREP_ERROR ("Could not prepare state transfer request: " + "adding default port failed: %zd.", ret); + } + else { + *addr_out= addr_in; + } + + return ret; +} + +static bool SE_initialized = false; + +ssize_t wsrep_sst_prepare (void** msg) +{ + const ssize_t ip_max= 256; + char ip_buf[ip_max]; + const char* addr_in= NULL; + const char* addr_out= NULL; + + if (!strcmp(wsrep_sst_method, WSREP_SST_SKIP)) + { + ssize_t ret = strlen(WSREP_STATE_TRANSFER_TRIVIAL) + 1; + *msg = strdup(WSREP_STATE_TRANSFER_TRIVIAL); + if (!msg) + { + WSREP_ERROR("Could not allocate %zd bytes for state request", ret); + unireg_abort(1); + } + return ret; + } + + // Figure out SST address. Common for all SST methods + if (wsrep_sst_receive_address && + strcmp (wsrep_sst_receive_address, WSREP_SST_ADDRESS_AUTO)) + { + addr_in= wsrep_sst_receive_address; + } + else if (wsrep_node_address && strlen(wsrep_node_address)) + { + const char* const colon= strchr (wsrep_node_address, ':'); + if (colon) + { + ptrdiff_t const len= colon - wsrep_node_address; + strncpy (ip_buf, wsrep_node_address, len); + ip_buf[len]= '\0'; + addr_in= ip_buf; + } + else + { + addr_in= wsrep_node_address; + } + } + else + { + ssize_t ret= wsrep_guess_ip (ip_buf, ip_max); + + if (ret && ret < ip_max) + { + addr_in= ip_buf; + } + else + { + WSREP_ERROR("Could not prepare state transfer request: " + "failed to guess address to accept state transfer at. " + "wsrep_sst_receive_address must be set manually."); + unireg_abort(1); + } + } + + ssize_t addr_len= -ENOSYS; + if (!strcmp(wsrep_sst_method, WSREP_SST_MYSQLDUMP)) + { + addr_len= sst_prepare_mysqldump (addr_in, &addr_out); + if (addr_len < 0) unireg_abort(1); + } + else + { + /*! A heuristic workaround until we learn how to stop and start engines */ + if (SE_initialized) + { + // we already did SST at initializaiton, now engines are running + // sql_print_information() is here because the message is too long + // for WSREP_INFO. + sql_print_information ("WSREP: " + "You have configured '%s' state snapshot transfer method " + "which cannot be performed on a running server. " + "Wsrep provider won't be able to fall back to it " + "if other means of state transfer are unavailable. " + "In that case you will need to restart the server.", + wsrep_sst_method); + *msg = 0; + return 0; + } + + addr_len = sst_prepare_other (wsrep_sst_method, sst_auth_real, + addr_in, &addr_out); + if (addr_len < 0) + { + WSREP_ERROR("Failed to prepare for '%s' SST. Unrecoverable.", + wsrep_sst_method); + unireg_abort(1); + } + } + + size_t const method_len(strlen(wsrep_sst_method)); + size_t const msg_len (method_len + addr_len + 2 /* + auth_len + 1*/); + + *msg = malloc (msg_len); + if (NULL != *msg) { + char* const method_ptr(reinterpret_cast<char*>(*msg)); + strcpy (method_ptr, wsrep_sst_method); + char* const addr_ptr(method_ptr + method_len + 1); + strcpy (addr_ptr, addr_out); + + WSREP_INFO ("Prepared SST request: %s|%s", method_ptr, addr_ptr); + } + else { + WSREP_ERROR("Failed to allocate SST request of size %zu. Can't continue.", + msg_len); + unireg_abort(1); + } + + if (addr_out != addr_in) /* malloc'ed */ free ((char*)addr_out); + + return msg_len; +} + +// helper method for donors +static int sst_run_shell (const char* cmd_str, char** env, int max_tries) +{ + int ret = 0; + + for (int tries=1; tries <= max_tries; tries++) + { + wsp::process proc (cmd_str, "r", env); + + if (NULL != proc.pipe()) + { + proc.wait(); + } + + if ((ret = proc.error())) + { + WSREP_ERROR("Try %d/%d: '%s' failed: %d (%s)", + tries, max_tries, proc.cmd(), ret, strerror(ret)); + sleep (1); + } + else + { + WSREP_DEBUG("SST script successfully completed."); + break; + } + } + + return -ret; +} + +static void sst_reject_queries(my_bool close_conn) +{ + wsrep_ready_set (FALSE); // this will be resotred when donor becomes synced + WSREP_INFO("Rejecting client queries for the duration of SST."); + if (TRUE == close_conn) wsrep_close_client_connections(FALSE); +} + +static int sst_donate_mysqldump (const char* addr, + const wsrep_uuid_t* uuid, + const char* uuid_str, + wsrep_seqno_t seqno, + bool bypass, + char** env) // carries auth info +{ + size_t host_len; + const char* port = strchr (addr, ':'); + + if (port) + { + port += 1; + host_len = port - addr; + } + else + { + port = ""; + host_len = strlen (addr) + 1; + } + + char *host= (char *) alloca(host_len); + + strncpy (host, addr, host_len - 1); + host[host_len - 1] = '\0'; + + int const cmd_len= 4096; + wsp::string cmd_str(cmd_len); + + if (!cmd_str()) + { + WSREP_ERROR("sst_donate_mysqldump(): " + "could not allocate cmd buffer of %d bytes", cmd_len); + return -ENOMEM; + } + + if (!bypass && wsrep_sst_donor_rejects_queries) sst_reject_queries(TRUE); + + int ret= snprintf (cmd_str(), cmd_len, + "wsrep_sst_mysqldump " + WSREP_SST_OPT_HOST" '%s' " + WSREP_SST_OPT_PORT" '%s' " + WSREP_SST_OPT_LPORT" '%u' " + WSREP_SST_OPT_SOCKET" '%s' " + WSREP_SST_OPT_CONF" '%s' " + WSREP_SST_OPT_GTID" '%s:%lld'" + "%s", + host, port, mysqld_port, mysqld_unix_port, + wsrep_defaults_file, uuid_str, + (long long)seqno, bypass ? " " WSREP_SST_OPT_BYPASS : ""); + + if (ret < 0 || ret >= cmd_len) + { + WSREP_ERROR("sst_donate_mysqldump(): snprintf() failed: %d", ret); + return (ret < 0 ? ret : -EMSGSIZE); + } + + WSREP_DEBUG("Running: '%s'", cmd_str()); + + ret= sst_run_shell (cmd_str(), env, 3); + + wsrep_gtid_t const state_id = { *uuid, (ret ? WSREP_SEQNO_UNDEFINED : seqno)}; + + wsrep->sst_sent (wsrep, &state_id, ret); + + return ret; +} + +wsrep_seqno_t wsrep_locked_seqno= WSREP_SEQNO_UNDEFINED; + +static int run_sql_command(THD *thd, const char *query) +{ + thd->set_query((char *)query, strlen(query)); + + Parser_state ps; + if (ps.init(thd, thd->query(), thd->query_length())) + { + WSREP_ERROR("SST query: %s failed", query); + return -1; + } + + mysql_parse(thd, thd->query(), thd->query_length(), &ps); + if (thd->is_error()) + { + int const err= thd->get_stmt_da()->sql_errno(); + WSREP_WARN ("error executing '%s': %d (%s)%s", + query, err, thd->get_stmt_da()->message(), + err == ER_UNKNOWN_SYSTEM_VARIABLE ? + ". Was mysqld built with --with-innodb-disallow-writes ?" : ""); + thd->clear_error(); + return -1; + } + return 0; +} + +static int sst_flush_tables(THD* thd) +{ + WSREP_INFO("Flushing tables for SST..."); + + int err; + int not_used; + CHARSET_INFO *current_charset; + + current_charset = thd->variables.character_set_client; + + if (!is_supported_parser_charset(current_charset)) + { + /* Do not use non-supported parser character sets */ + WSREP_WARN("Current client character set is non-supported parser character set: %s", current_charset->csname); + thd->variables.character_set_client = &my_charset_latin1; + WSREP_WARN("For SST temporally setting character set to : %s", + my_charset_latin1.csname); + } + + if (run_sql_command(thd, "FLUSH TABLES WITH READ LOCK")) + { + WSREP_ERROR("Failed to flush and lock tables"); + err = -1; + } + else + { + /* make sure logs are flushed after global read lock acquired */ + err= reload_acl_and_cache(thd, REFRESH_ENGINE_LOG | REFRESH_BINARY_LOG, + (TABLE_LIST*) 0, ¬_used); + } + + thd->variables.character_set_client = current_charset; + + + if (err) + { + WSREP_ERROR("Failed to flush tables: %d (%s)", err, strerror(err)); + } + else + { + WSREP_INFO("Tables flushed."); + const char base_name[]= "tables_flushed"; + + ssize_t const full_len= strlen(mysql_real_data_home) + strlen(base_name)+2; + char *real_name= (char *) alloca(full_len); + snprintf(real_name, (size_t) full_len, "%s/%s", mysql_real_data_home, + base_name); + char *tmp_name= (char *) alloca(full_len + 4); + snprintf(tmp_name, (size_t) full_len + 4, "%s.tmp", real_name); + + FILE* file= fopen(tmp_name, "w+"); + if (0 == file) + { + err= errno; + WSREP_ERROR("Failed to open '%s': %d (%s)", tmp_name, err,strerror(err)); + } + else + { + fprintf(file, "%s:%lld\n", + wsrep_cluster_state_uuid, (long long)wsrep_locked_seqno); + fsync(fileno(file)); + fclose(file); + if (rename(tmp_name, real_name) == -1) + { + err= errno; + WSREP_ERROR("Failed to rename '%s' to '%s': %d (%s)", + tmp_name, real_name, err,strerror(err)); + } + } + } + + return err; +} + +static void sst_disallow_writes (THD* thd, bool yes) +{ + char query_str[64] = { 0, }; + ssize_t const query_max = sizeof(query_str) - 1; + CHARSET_INFO *current_charset; + + current_charset = thd->variables.character_set_client; + + if (!is_supported_parser_charset(current_charset)) + { + /* Do not use non-supported parser character sets */ + WSREP_WARN("Current client character set is non-supported parser character set: %s", current_charset->csname); + thd->variables.character_set_client = &my_charset_latin1; + WSREP_WARN("For SST temporally setting character set to : %s", + my_charset_latin1.csname); + } + + snprintf (query_str, query_max, "SET GLOBAL innodb_disallow_writes=%d", + yes ? 1 : 0); + + if (run_sql_command(thd, query_str)) + { + WSREP_ERROR("Failed to disallow InnoDB writes"); + } + thd->variables.character_set_client = current_charset; +} + +static void* sst_donor_thread (void* a) +{ + sst_thread_arg* arg= (sst_thread_arg*)a; + + WSREP_INFO("Running: '%s'", arg->cmd); + + int err= 1; + bool locked= false; + + const char* out= NULL; + const size_t out_len= 128; + char out_buf[out_len]; + + wsrep_uuid_t ret_uuid= WSREP_UUID_UNDEFINED; + wsrep_seqno_t ret_seqno= WSREP_SEQNO_UNDEFINED; // seqno of complete SST + + wsp::thd thd(FALSE); // we turn off wsrep_on for this THD so that it can + // operate with wsrep_ready == OFF + wsp::process proc(arg->cmd, "r", arg->env); + + err= proc.error(); + +/* Inform server about SST script startup and release TO isolation */ + mysql_mutex_lock (&arg->lock); + arg->err = -err; + mysql_cond_signal (&arg->cond); + mysql_mutex_unlock (&arg->lock); //! @note arg is unusable after that. + + if (proc.pipe() && !err) + { +wait_signal: + out= my_fgets (out_buf, out_len, proc.pipe()); + + if (out) + { + const char magic_flush[]= "flush tables"; + const char magic_cont[]= "continue"; + const char magic_done[]= "done"; + + if (!strcasecmp (out, magic_flush)) + { + err= sst_flush_tables (thd.ptr); + if (!err) + { + sst_disallow_writes (thd.ptr, true); + /* + Lets also keep statements that modify binary logs (like RESET LOGS, + RESET MASTER) from proceeding until the files have been transferred + to the joiner node. + */ + if (mysql_bin_log.is_open()) + { + mysql_mutex_lock(mysql_bin_log.get_log_lock()); + } + + locked= true; + goto wait_signal; + } + } + else if (!strcasecmp (out, magic_cont)) + { + if (locked) + { + if (mysql_bin_log.is_open()) + { + mysql_mutex_assert_owner(mysql_bin_log.get_log_lock()); + mysql_mutex_unlock(mysql_bin_log.get_log_lock()); + } + sst_disallow_writes (thd.ptr, false); + thd.ptr->global_read_lock.unlock_global_read_lock (thd.ptr); + locked= false; + } + err= 0; + goto wait_signal; + } + else if (!strncasecmp (out, magic_done, strlen(magic_done))) + { + err= sst_scan_uuid_seqno (out + strlen(magic_done) + 1, + &ret_uuid, &ret_seqno); + } + else + { + WSREP_WARN("Received unknown signal: '%s'", out); + } + } + else + { + WSREP_ERROR("Failed to read from: %s", proc.cmd()); + proc.wait(); + } + if (!err && proc.error()) err= proc.error(); + } + else + { + WSREP_ERROR("Failed to execute: %s : %d (%s)", + proc.cmd(), err, strerror(err)); + } + + if (locked) // don't forget to unlock server before return + { + if (mysql_bin_log.is_open()) + { + mysql_mutex_assert_owner(mysql_bin_log.get_log_lock()); + mysql_mutex_unlock(mysql_bin_log.get_log_lock()); + } + sst_disallow_writes (thd.ptr, false); + thd.ptr->global_read_lock.unlock_global_read_lock (thd.ptr); + } + + // signal to donor that SST is over + struct wsrep_gtid const state_id = { + ret_uuid, err ? WSREP_SEQNO_UNDEFINED : ret_seqno + }; + wsrep->sst_sent (wsrep, &state_id, -err); + proc.wait(); + + return NULL; +} + + + +static int sst_donate_other (const char* method, + const char* addr, + const char* uuid, + wsrep_seqno_t seqno, + bool bypass, + char** env) // carries auth info +{ + int const cmd_len= 4096; + wsp::string cmd_str(cmd_len); + + if (!cmd_str()) + { + WSREP_ERROR("sst_donate_other(): " + "could not allocate cmd buffer of %d bytes", cmd_len); + return -ENOMEM; + } + + const char* binlog_opt= ""; + char* binlog_opt_val= NULL; + + int ret; + if ((ret= generate_binlog_opt_val(&binlog_opt_val))) + { + WSREP_ERROR("sst_donate_other(): generate_binlog_opt_val() failed: %d",ret); + return ret; + } + if (strlen(binlog_opt_val)) binlog_opt= WSREP_SST_OPT_BINLOG; + + ret= snprintf (cmd_str(), cmd_len, + "wsrep_sst_%s " + WSREP_SST_OPT_ROLE" 'donor' " + WSREP_SST_OPT_ADDR" '%s' " + WSREP_SST_OPT_SOCKET" '%s' " + WSREP_SST_OPT_DATA" '%s' " + WSREP_SST_OPT_CONF" '%s' " + WSREP_SST_OPT_CONF_SUFFIX" '%s' " + " %s '%s' " + WSREP_SST_OPT_GTID" '%s:%lld'" + "%s", + method, addr, mysqld_unix_port, mysql_real_data_home, + wsrep_defaults_file, wsrep_defaults_group_suffix, + binlog_opt, binlog_opt_val, + uuid, (long long) seqno, + bypass ? " " WSREP_SST_OPT_BYPASS : ""); + my_free(binlog_opt_val); + + if (ret < 0 || ret >= cmd_len) + { + WSREP_ERROR("sst_donate_other(): snprintf() failed: %d", ret); + return (ret < 0 ? ret : -EMSGSIZE); + } + + if (!bypass && wsrep_sst_donor_rejects_queries) sst_reject_queries(FALSE); + + pthread_t tmp; + sst_thread_arg arg(cmd_str(), env); + mysql_mutex_lock (&arg.lock); + ret = pthread_create (&tmp, NULL, sst_donor_thread, &arg); + if (ret) + { + WSREP_ERROR("sst_donate_other(): pthread_create() failed: %d (%s)", + ret, strerror(ret)); + return ret; + } + mysql_cond_wait (&arg.cond, &arg.lock); + + WSREP_INFO("sst_donor_thread signaled with %d", arg.err); + return arg.err; +} + +wsrep_cb_status_t wsrep_sst_donate_cb (void* app_ctx, void* recv_ctx, + const void* msg, size_t msg_len, + const wsrep_gtid_t* current_gtid, + const char* state, size_t state_len, + bool bypass) +{ + /* This will be reset when sync callback is called. + * Should we set wsrep_ready to FALSE here too? */ +// wsrep_notify_status(WSREP_MEMBER_DONOR); + local_status.set(WSREP_MEMBER_DONOR); + + const char* method = (char*)msg; + size_t method_len = strlen (method); + const char* data = method + method_len + 1; + + char uuid_str[37]; + wsrep_uuid_print (¤t_gtid->uuid, uuid_str, sizeof(uuid_str)); + + wsp::env env(NULL); + if (env.error()) + { + WSREP_ERROR("wsrep_sst_donate_cb(): env var ctor failed: %d", -env.error()); + return WSREP_CB_FAILURE; + } + + int ret; + if ((ret= sst_append_auth_env(env, sst_auth_real))) + { + WSREP_ERROR("wsrep_sst_donate_cb(): appending auth env failed: %d", ret); + return WSREP_CB_FAILURE; + } + + if (!strcmp (WSREP_SST_MYSQLDUMP, method)) + { + ret = sst_donate_mysqldump(data, ¤t_gtid->uuid, uuid_str, + current_gtid->seqno, bypass, env()); + } + else + { + ret = sst_donate_other(method, data, uuid_str, + current_gtid->seqno, bypass, env()); + } + + return (ret >= 0 ? WSREP_CB_SUCCESS : WSREP_CB_FAILURE); +} + +void wsrep_SE_init_grab() +{ + if (mysql_mutex_lock (&LOCK_wsrep_sst_init)) abort(); +} + +void wsrep_SE_init_wait() +{ + while (SE_initialized == false) + { + mysql_cond_wait (&COND_wsrep_sst_init, &LOCK_wsrep_sst_init); + } + mysql_mutex_unlock (&LOCK_wsrep_sst_init); +} + +void wsrep_SE_init_done() +{ + mysql_cond_signal (&COND_wsrep_sst_init); + mysql_mutex_unlock (&LOCK_wsrep_sst_init); +} + +void wsrep_SE_initialized() +{ + SE_initialized = true; +} diff --git a/sql/wsrep_sst.h b/sql/wsrep_sst.h new file mode 100644 index 00000000000..42f1055bde2 --- /dev/null +++ b/sql/wsrep_sst.h @@ -0,0 +1,70 @@ +/* Copyright (C) 2013 Codership Oy <info@codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ + +#ifndef WSREP_SST_H +#define WSREP_SST_H + +#include <my_config.h> +#include <mysql.h> // my_bool + +#define WSREP_SST_OPT_ROLE "--role" +#define WSREP_SST_OPT_ADDR "--address" +#define WSREP_SST_OPT_AUTH "--auth" +#define WSREP_SST_OPT_DATA "--datadir" +#define WSREP_SST_OPT_CONF "--defaults-file" +#define WSREP_SST_OPT_CONF_SUFFIX "--defaults-group-suffix" +#define WSREP_SST_OPT_PARENT "--parent" +#define WSREP_SST_OPT_BINLOG "--binlog" + +// mysqldump-specific options +#define WSREP_SST_OPT_USER "--user" +#define WSREP_SST_OPT_PSWD "--password" +#define WSREP_SST_OPT_HOST "--host" +#define WSREP_SST_OPT_PORT "--port" +#define WSREP_SST_OPT_LPORT "--local-port" + +// donor-specific +#define WSREP_SST_OPT_SOCKET "--socket" +#define WSREP_SST_OPT_GTID "--gtid" +#define WSREP_SST_OPT_BYPASS "--bypass" + +#define WSREP_SST_MYSQLDUMP "mysqldump" +#define WSREP_SST_RSYNC "rsync" +#define WSREP_SST_SKIP "skip" +#define WSREP_SST_DEFAULT WSREP_SST_RSYNC +#define WSREP_SST_ADDRESS_AUTO "AUTO" +#define WSREP_SST_AUTH_MASK "********" + +/* system variables */ +extern const char* wsrep_sst_method; +extern const char* wsrep_sst_receive_address; +extern const char* wsrep_sst_donor; +extern char* wsrep_sst_auth; +extern my_bool wsrep_sst_donor_rejects_queries; + +/*! Synchronizes applier thread start with init thread */ +extern void wsrep_sst_grab(); +/*! Init thread waits for SST completion */ +extern bool wsrep_sst_wait(); +/*! Signals wsrep that initialization is complete, writesets can be applied */ +extern void wsrep_sst_continue(); +extern void wsrep_sst_auth_free(); + +extern void wsrep_SE_init_grab(); /*! grab init critical section */ +extern void wsrep_SE_init_wait(); /*! wait for SE init to complete */ +extern void wsrep_SE_init_done(); /*! signal that SE init is complte */ +extern void wsrep_SE_initialized(); /*! mark SE initialization complete */ + +#endif /* WSREP_SST_H */ diff --git a/sql/wsrep_thd.cc b/sql/wsrep_thd.cc new file mode 100644 index 00000000000..307745ff1b0 --- /dev/null +++ b/sql/wsrep_thd.cc @@ -0,0 +1,666 @@ +/* Copyright (C) 2013 Codership Oy <info@codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ + +#include "wsrep_thd.h" + +#include "transaction.h" +#include "rpl_rli.h" +#include "log_event.h" +#include "sql_parse.h" +//#include "global_threads.h" // LOCK_thread_count, etc. +#include "sql_base.h" // close_thread_tables() +#include "mysqld.h" // start_wsrep_THD(); + +#include "slave.h" // opt_log_slave_updates +#include "rpl_filter.h" +#include "rpl_rli.h" +#include "rpl_mi.h" + +#if (__LP64__) +static volatile int64 wsrep_bf_aborts_counter(0); +#define WSREP_ATOMIC_LOAD_LONG my_atomic_load64 +#define WSREP_ATOMIC_ADD_LONG my_atomic_add64 +#else +static volatile int32 wsrep_bf_aborts_counter(0); +#define WSREP_ATOMIC_LOAD_LONG my_atomic_load32 +#define WSREP_ATOMIC_ADD_LONG my_atomic_add32 +#endif + +int wsrep_show_bf_aborts (THD *thd, SHOW_VAR *var, char *buff) +{ + wsrep_local_bf_aborts = WSREP_ATOMIC_LOAD_LONG(&wsrep_bf_aborts_counter); + var->type = SHOW_LONGLONG; + var->value = (char*)&wsrep_local_bf_aborts; + return 0; +} + +/* must have (&thd->LOCK_wsrep_thd) */ +void wsrep_client_rollback(THD *thd) +{ + WSREP_DEBUG("client rollback due to BF abort for (%ld), query: %s", + thd->thread_id, thd->query()); + + WSREP_ATOMIC_ADD_LONG(&wsrep_bf_aborts_counter, 1); + + thd->wsrep_conflict_state= ABORTING; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + trans_rollback(thd); + + if (thd->locked_tables_mode && thd->lock) + { + WSREP_DEBUG("unlocking tables for BF abort (%ld)", thd->thread_id); + thd->locked_tables_list.unlock_locked_tables(thd); + thd->variables.option_bits&= ~(OPTION_TABLE_LOCK); + } + + if (thd->global_read_lock.is_acquired()) + { + WSREP_DEBUG("unlocking GRL for BF abort (%ld)", thd->thread_id); + thd->global_read_lock.unlock_global_read_lock(thd); + } + + /* Release transactional metadata locks. */ + thd->mdl_context.release_transactional_locks(); + + /* release explicit MDL locks */ + thd->mdl_context.release_explicit_locks(); + + if (thd->get_binlog_table_maps()) + { + WSREP_DEBUG("clearing binlog table map for BF abort (%ld)", thd->thread_id); + thd->clear_binlog_table_maps(); + } + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + thd->wsrep_conflict_state= ABORTED; +} + +#define NUMBER_OF_FIELDS_TO_IDENTIFY_COORDINATOR 1 +#define NUMBER_OF_FIELDS_TO_IDENTIFY_WORKER 2 + +static rpl_group_info* wsrep_relay_group_init(const char* log_fname) +{ + Relay_log_info* rli= new Relay_log_info(false); + + rli->no_storage= true; + if (!rli->relay_log.description_event_for_exec) + { + rli->relay_log.description_event_for_exec= + new Format_description_log_event(4); + } + + static LEX_STRING connection_name= { C_STRING_WITH_LEN("wsrep") }; + + /* + Master_info's constructor initializes rpl_filter by either an already + constructed Rpl_filter object from global 'rpl_filters' list if the + specified connection name is same, or it constructs a new Rpl_filter + object and adds it to rpl_filters. This object is later destructed by + Mater_info's destructor by looking it up based on connection name in + rpl_filters list. + + However, since all Master_info objects created here would share same + connection name ("wsrep"), destruction of any of the existing Master_info + objects (in wsrep_return_from_bf_mode()) would free rpl_filter referenced + by any/all existing Master_info objects. + + In order to avoid that, we have added a check in Master_info's destructor + to not free the "wsrep" rpl_filter. It will eventually be freed by + free_all_rpl_filters() when server terminates. + */ + rli->mi = new Master_info(&connection_name, false); + + struct rpl_group_info *rgi= new rpl_group_info(rli); + rgi->thd= rli->sql_driver_thd= current_thd; + + if ((rgi->deferred_events_collecting= rli->mi->rpl_filter->is_on())) + { + rgi->deferred_events= new Deferred_log_events(rli); + } + + return rgi; +} + +static void wsrep_prepare_bf_thd(THD *thd, struct wsrep_thd_shadow* shadow) +{ + shadow->options = thd->variables.option_bits; + shadow->server_status = thd->server_status; + shadow->wsrep_exec_mode = thd->wsrep_exec_mode; + shadow->vio = thd->net.vio; + + // Disable general logging on applier threads + thd->variables.option_bits |= OPTION_LOG_OFF; + // Enable binlogging if opt_log_slave_updates is set + if (opt_log_slave_updates) + thd->variables.option_bits|= OPTION_BIN_LOG; + else + thd->variables.option_bits&= ~(OPTION_BIN_LOG); + + if (!thd->wsrep_rgi) thd->wsrep_rgi= wsrep_relay_group_init("wsrep_relay"); + + /* thd->system_thread_info.rpl_sql_info isn't initialized. */ + thd->system_thread_info.rpl_sql_info= + new rpl_sql_thread_info(thd->wsrep_rgi->rli->mi->rpl_filter); + + thd->wsrep_exec_mode= REPL_RECV; + thd->net.vio= 0; + thd->clear_error(); + + shadow->tx_isolation = thd->variables.tx_isolation; + thd->variables.tx_isolation = ISO_READ_COMMITTED; + thd->tx_isolation = ISO_READ_COMMITTED; + + shadow->db = thd->db; + shadow->db_length = thd->db_length; + shadow->user_time = thd->user_time; + shadow->row_count_func= thd->get_row_count_func(); + thd->reset_db(NULL, 0); +} + +static void wsrep_return_from_bf_mode(THD *thd, struct wsrep_thd_shadow* shadow) +{ + thd->variables.option_bits = shadow->options; + thd->server_status = shadow->server_status; + thd->wsrep_exec_mode = shadow->wsrep_exec_mode; + thd->net.vio = shadow->vio; + thd->variables.tx_isolation = shadow->tx_isolation; + thd->user_time = shadow->user_time; + thd->reset_db(shadow->db, shadow->db_length); + + delete thd->system_thread_info.rpl_sql_info; + delete thd->wsrep_rgi->rli->mi; + delete thd->wsrep_rgi->rli; + + thd->wsrep_rgi->cleanup_after_session(); + delete thd->wsrep_rgi; + thd->wsrep_rgi = NULL; + thd->set_row_count_func(shadow->row_count_func); +} + +void wsrep_replay_transaction(THD *thd) +{ + DBUG_ENTER("wsrep_replay_transaction"); + /* checking if BF trx must be replayed */ + if (thd->wsrep_conflict_state== MUST_REPLAY) { + DBUG_ASSERT(wsrep_thd_trx_seqno(thd)); + if (thd->wsrep_exec_mode!= REPL_RECV) { + if (thd->get_stmt_da()->is_sent()) + { + WSREP_ERROR("replay issue, thd has reported status already"); + } + + + /* + PS reprepare observer should have been removed already. + open_table() will fail if we have dangling observer here. + */ + DBUG_ASSERT(thd->m_reprepare_observer == NULL); + + struct da_shadow + { + enum Diagnostics_area::enum_diagnostics_status status; + ulonglong affected_rows; + ulonglong last_insert_id; + char message[MYSQL_ERRMSG_SIZE]; + }; + struct da_shadow da_status; + da_status.status= thd->get_stmt_da()->status(); + if (da_status.status == Diagnostics_area::DA_OK) + { + da_status.affected_rows= thd->get_stmt_da()->affected_rows(); + da_status.last_insert_id= thd->get_stmt_da()->last_insert_id(); + strmake(da_status.message, + thd->get_stmt_da()->message(), + sizeof(da_status.message)-1); + } + + thd->get_stmt_da()->reset_diagnostics_area(); + + thd->wsrep_conflict_state= REPLAYING; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + + mysql_reset_thd_for_next_command(thd); + thd->killed= NOT_KILLED; + close_thread_tables(thd); + if (thd->locked_tables_mode && thd->lock) + { + WSREP_DEBUG("releasing table lock for replaying (%ld)", + thd->thread_id); + thd->locked_tables_list.unlock_locked_tables(thd); + thd->variables.option_bits&= ~(OPTION_TABLE_LOCK); + } + thd->mdl_context.release_transactional_locks(); + /* + Replaying will call MYSQL_START_STATEMENT when handling + BEGIN Query_log_event so end statement must be called before + replaying. + */ + MYSQL_END_STATEMENT(thd->m_statement_psi, thd->get_stmt_da()); + thd->m_statement_psi= NULL; + thd->m_digest= NULL; + thd_proc_info(thd, "wsrep replaying trx"); + WSREP_DEBUG("replay trx: %s %lld", + thd->query() ? thd->query() : "void", + (long long)wsrep_thd_trx_seqno(thd)); + struct wsrep_thd_shadow shadow; + wsrep_prepare_bf_thd(thd, &shadow); + + /* From trans_begin() */ + thd->variables.option_bits|= OPTION_BEGIN; + thd->server_status|= SERVER_STATUS_IN_TRANS; + + int rcode = wsrep->replay_trx(wsrep, + &thd->wsrep_ws_handle, + (void *)thd); + + wsrep_return_from_bf_mode(thd, &shadow); + if (thd->wsrep_conflict_state!= REPLAYING) + WSREP_WARN("lost replaying mode: %d", thd->wsrep_conflict_state ); + + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + + switch (rcode) + { + case WSREP_OK: + thd->wsrep_conflict_state= NO_CONFLICT; + wsrep->post_commit(wsrep, &thd->wsrep_ws_handle); + WSREP_DEBUG("trx_replay successful for: %ld %llu", + thd->thread_id, (long long)thd->real_id); + if (thd->get_stmt_da()->is_sent()) + { + WSREP_WARN("replay ok, thd has reported status"); + } + else if (thd->get_stmt_da()->is_set()) + { + if (thd->get_stmt_da()->status() != Diagnostics_area::DA_OK) + { + WSREP_WARN("replay ok, thd has error status %d", + thd->get_stmt_da()->status()); + } + } + else + { + if (da_status.status == Diagnostics_area::DA_OK) + { + my_ok(thd, + da_status.affected_rows, + da_status.last_insert_id, + da_status.message); + } + else + { + my_ok(thd); + } + } + break; + case WSREP_TRX_FAIL: + if (thd->get_stmt_da()->is_sent()) + { + WSREP_ERROR("replay failed, thd has reported status"); + } + else + { + WSREP_DEBUG("replay failed, rolling back"); + //my_error(ER_LOCK_DEADLOCK, MYF(0), "wsrep aborted transaction"); + } + thd->wsrep_conflict_state= ABORTED; + wsrep->post_rollback(wsrep, &thd->wsrep_ws_handle); + break; + default: + WSREP_ERROR("trx_replay failed for: %d, schema: %s, query: %s", + rcode, + (thd->db ? thd->db : "(null)"), + thd->query() ? thd->query() : "void"); + /* we're now in inconsistent state, must abort */ + + /* http://bazaar.launchpad.net/~codership/codership-mysql/5.6/revision/3962#sql/wsrep_thd.cc */ + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + + unireg_abort(1); + break; + } + + wsrep_cleanup_transaction(thd); + + mysql_mutex_lock(&LOCK_wsrep_replaying); + wsrep_replaying--; + WSREP_DEBUG("replaying decreased: %d, thd: %lu", + wsrep_replaying, thd->thread_id); + mysql_cond_broadcast(&COND_wsrep_replaying); + mysql_mutex_unlock(&LOCK_wsrep_replaying); + } + } + DBUG_VOID_RETURN; +} + +static void wsrep_replication_process(THD *thd) +{ + int rcode; + DBUG_ENTER("wsrep_replication_process"); + + struct wsrep_thd_shadow shadow; + wsrep_prepare_bf_thd(thd, &shadow); + + /* From trans_begin() */ + thd->variables.option_bits|= OPTION_BEGIN; + thd->server_status|= SERVER_STATUS_IN_TRANS; + + rcode = wsrep->recv(wsrep, (void *)thd); + DBUG_PRINT("wsrep",("wsrep_repl returned: %d", rcode)); + + WSREP_INFO("applier thread exiting (code:%d)", rcode); + + switch (rcode) { + case WSREP_OK: + case WSREP_NOT_IMPLEMENTED: + case WSREP_CONN_FAIL: + /* provider does not support slave operations / disconnected from group, + * just close applier thread */ + break; + case WSREP_NODE_FAIL: + /* data inconsistency => SST is needed */ + /* Note: we cannot just blindly restart replication here, + * SST might require server restart if storage engines must be + * initialized after SST */ + WSREP_ERROR("node consistency compromised, aborting"); + wsrep_kill_mysql(thd); + break; + case WSREP_WARNING: + case WSREP_TRX_FAIL: + case WSREP_TRX_MISSING: + /* these suggests a bug in provider code */ + WSREP_WARN("bad return from recv() call: %d", rcode); + /* fall through to node shutdown */ + case WSREP_FATAL: + /* Cluster connectivity is lost. + * + * If applier was killed on purpose (KILL_CONNECTION), we + * avoid mysql shutdown. This is because the killer will then handle + * shutdown processing (or replication restarting) + */ + if (thd->killed != KILL_CONNECTION) + { + wsrep_kill_mysql(thd); + } + break; + } + + mysql_mutex_lock(&LOCK_thread_count); + wsrep_close_applier(thd); + mysql_cond_broadcast(&COND_thread_count); + mysql_mutex_unlock(&LOCK_thread_count); + + TABLE *tmp; + while ((tmp = thd->temporary_tables)) + { + WSREP_WARN("Applier %lu, has temporary tables at exit: %s.%s", + thd->thread_id, + (tmp->s) ? tmp->s->db.str : "void", + (tmp->s) ? tmp->s->table_name.str : "void"); + } + wsrep_return_from_bf_mode(thd, &shadow); + DBUG_VOID_RETURN; +} + +void wsrep_create_appliers(long threads) +{ + if (!wsrep_connected) + { + /* see wsrep_replication_start() for the logic */ + if (wsrep_cluster_address && strlen(wsrep_cluster_address) && + wsrep_provider && strcasecmp(wsrep_provider, "none")) + { + WSREP_ERROR("Trying to launch slave threads before creating " + "connection at '%s'", wsrep_cluster_address); + assert(0); + } + return; + } + + long wsrep_threads=0; + pthread_t hThread; + while (wsrep_threads++ < threads) { + if (pthread_create( + &hThread, &connection_attrib, + start_wsrep_THD, (void*)wsrep_replication_process)) + WSREP_WARN("Can't create thread to manage wsrep replication"); + } +} + +static void wsrep_rollback_process(THD *thd) +{ + DBUG_ENTER("wsrep_rollback_process"); + + mysql_mutex_lock(&LOCK_wsrep_rollback); + wsrep_aborting_thd= NULL; + + while (thd->killed == NOT_KILLED) { + thd_proc_info(thd, "wsrep aborter idle"); + thd->mysys_var->current_mutex= &LOCK_wsrep_rollback; + thd->mysys_var->current_cond= &COND_wsrep_rollback; + + mysql_cond_wait(&COND_wsrep_rollback,&LOCK_wsrep_rollback); + + WSREP_DEBUG("WSREP rollback thread wakes for signal"); + + mysql_mutex_lock(&thd->mysys_var->mutex); + thd_proc_info(thd, "wsrep aborter active"); + thd->mysys_var->current_mutex= 0; + thd->mysys_var->current_cond= 0; + mysql_mutex_unlock(&thd->mysys_var->mutex); + + /* check for false alarms */ + if (!wsrep_aborting_thd) + { + WSREP_DEBUG("WSREP rollback thread has empty abort queue"); + } + /* process all entries in the queue */ + while (wsrep_aborting_thd) { + THD *aborting; + wsrep_aborting_thd_t next = wsrep_aborting_thd->next; + aborting = wsrep_aborting_thd->aborting_thd; + my_free(wsrep_aborting_thd); + wsrep_aborting_thd= next; + /* + * must release mutex, appliers my want to add more + * aborting thds in our work queue, while we rollback + */ + mysql_mutex_unlock(&LOCK_wsrep_rollback); + + mysql_mutex_lock(&aborting->LOCK_wsrep_thd); + if (aborting->wsrep_conflict_state== ABORTED) + { + WSREP_DEBUG("WSREP, thd already aborted: %llu state: %d", + (long long)aborting->real_id, + aborting->wsrep_conflict_state); + + mysql_mutex_unlock(&aborting->LOCK_wsrep_thd); + mysql_mutex_lock(&LOCK_wsrep_rollback); + continue; + } + aborting->wsrep_conflict_state= ABORTING; + + mysql_mutex_unlock(&aborting->LOCK_wsrep_thd); + + set_current_thd(aborting); + aborting->store_globals(); + + mysql_mutex_lock(&aborting->LOCK_wsrep_thd); + wsrep_client_rollback(aborting); + WSREP_DEBUG("WSREP rollbacker aborted thd: (%lu %llu)", + aborting->thread_id, (long long)aborting->real_id); + mysql_mutex_unlock(&aborting->LOCK_wsrep_thd); + + set_current_thd(thd); + thd->store_globals(); + + mysql_mutex_lock(&LOCK_wsrep_rollback); + } + } + + mysql_mutex_unlock(&LOCK_wsrep_rollback); + sql_print_information("WSREP: rollbacker thread exiting"); + + DBUG_PRINT("wsrep",("wsrep rollbacker thread exiting")); + DBUG_VOID_RETURN; +} + +void wsrep_create_rollbacker() +{ + if (wsrep_provider && strcasecmp(wsrep_provider, "none")) + { + pthread_t hThread; + /* create rollbacker */ + if (pthread_create( &hThread, &connection_attrib, + start_wsrep_THD, (void*)wsrep_rollback_process)) + WSREP_WARN("Can't create thread to manage wsrep rollback"); + } +} + +void wsrep_thd_set_PA_safe(void *thd_ptr, my_bool safe) +{ + if (thd_ptr) + { + THD* thd = (THD*)thd_ptr; + thd->wsrep_PA_safe = safe; + } +} + +int wsrep_thd_conflict_state(void *thd_ptr, my_bool sync) +{ + int state = -1; + if (thd_ptr) + { + THD* thd = (THD*)thd_ptr; + if (sync) mysql_mutex_lock(&thd->LOCK_wsrep_thd); + + state = thd->wsrep_conflict_state; + if (sync) mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } + return state; +} + +my_bool wsrep_thd_is_wsrep(void *thd_ptr) +{ + my_bool status = FALSE; + if (thd_ptr) + { + THD* thd = (THD*)thd_ptr; + + status = (WSREP(thd) && WSREP_PROVIDER_EXISTS); + } + return status; +} + +my_bool wsrep_thd_is_BF(void *thd_ptr, my_bool sync) +{ + my_bool status = FALSE; + if (thd_ptr) + { + THD* thd = (THD*)thd_ptr; + // THD can be BF only if provider exists + if (wsrep_thd_is_wsrep(thd_ptr)) + { + if (sync) + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + + status = ((thd->wsrep_exec_mode == REPL_RECV) || + (thd->wsrep_exec_mode == TOTAL_ORDER)); + if (sync) + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } + } + return status; +} + +extern "C" +my_bool wsrep_thd_is_BF_or_commit(void *thd_ptr, my_bool sync) +{ + bool status = FALSE; + if (thd_ptr) + { + THD* thd = (THD*)thd_ptr; + if (sync) mysql_mutex_lock(&thd->LOCK_wsrep_thd); + + status = ((thd->wsrep_exec_mode == REPL_RECV) || + (thd->wsrep_exec_mode == TOTAL_ORDER) || + (thd->wsrep_exec_mode == LOCAL_COMMIT)); + if (sync) mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } + return status; +} + +extern "C" +my_bool wsrep_thd_is_local(void *thd_ptr, my_bool sync) +{ + bool status = FALSE; + if (thd_ptr) + { + THD* thd = (THD*)thd_ptr; + if (sync) mysql_mutex_lock(&thd->LOCK_wsrep_thd); + + status = (thd->wsrep_exec_mode == LOCAL_STATE); + if (sync) mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } + return status; +} + +int wsrep_abort_thd(void *bf_thd_ptr, void *victim_thd_ptr, my_bool signal) +{ + THD *victim_thd = (THD *) victim_thd_ptr; + THD *bf_thd = (THD *) bf_thd_ptr; + DBUG_ENTER("wsrep_abort_thd"); + + if ( (WSREP(bf_thd) || + ( (WSREP_ON || bf_thd->variables.wsrep_OSU_method == WSREP_OSU_RSU) && + bf_thd->wsrep_exec_mode == TOTAL_ORDER) ) && + victim_thd) + { + if ((victim_thd->wsrep_conflict_state == MUST_ABORT) || + (victim_thd->wsrep_conflict_state == ABORTED) || + (victim_thd->wsrep_conflict_state == ABORTING)) + { + WSREP_DEBUG("wsrep_abort_thd called by %llu with victim %llu already " + "aborted. Ignoring.", + (bf_thd) ? (long long)bf_thd->real_id : 0, + (long long)victim_thd->real_id); + DBUG_RETURN(1); + } + + WSREP_DEBUG("wsrep_abort_thd, by: %llu, victim: %llu", (bf_thd) ? + (long long)bf_thd->real_id : 0, (long long)victim_thd->real_id); + ha_wsrep_abort_transaction(bf_thd, victim_thd, signal); + } + else + { + WSREP_DEBUG("wsrep_abort_thd not effective: %p %p", bf_thd, victim_thd); + } + + DBUG_RETURN(1); +} + +extern "C" +int wsrep_thd_in_locking_session(void *thd_ptr) +{ + if (thd_ptr && ((THD *)thd_ptr)->in_lock_tables) { + return 1; + } + return 0; +} + +bool wsrep_thd_has_explicit_locks(THD *thd) +{ + assert(thd); + return (thd->mdl_context.wsrep_has_explicit_locks()); +} diff --git a/sql/wsrep_thd.h b/sql/wsrep_thd.h new file mode 100644 index 00000000000..700e0f1cc56 --- /dev/null +++ b/sql/wsrep_thd.h @@ -0,0 +1,40 @@ +/* Copyright (C) 2013 Codership Oy <info@codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ + +#ifndef WSREP_THD_H +#define WSREP_THD_H + +#include "sql_class.h" + +int wsrep_show_bf_aborts (THD *thd, SHOW_VAR *var, char *buff); +void wsrep_client_rollback(THD *thd); +void wsrep_replay_transaction(THD *thd); +void wsrep_create_appliers(long threads); +void wsrep_create_rollbacker(); + +int wsrep_abort_thd(void *bf_thd_ptr, void *victim_thd_ptr, + my_bool signal); + +extern void wsrep_thd_set_PA_safe(void *thd_ptr, my_bool safe); +extern my_bool wsrep_thd_is_BF(void *thd_ptr, my_bool sync); +extern my_bool wsrep_thd_is_wsrep(void *thd_ptr); + +extern int wsrep_thd_conflict_state(void *thd_ptr, my_bool sync); +//extern "C" my_bool wsrep_thd_is_BF(void *thd_ptr, my_bool sync); +extern "C" my_bool wsrep_thd_is_BF_or_commit(void *thd_ptr, my_bool sync); +extern "C" my_bool wsrep_thd_is_local(void *thd_ptr, my_bool sync); +extern "C" int wsrep_thd_in_locking_session(void *thd_ptr); + +#endif /* WSREP_THD_H */ diff --git a/sql/wsrep_utils.cc b/sql/wsrep_utils.cc new file mode 100644 index 00000000000..719e8e6b473 --- /dev/null +++ b/sql/wsrep_utils.cc @@ -0,0 +1,556 @@ +/* Copyright 2010-2015 Codership Oy <http://www.codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +//! @file some utility functions and classes not directly related to replication + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE // POSIX_SPAWN_USEVFORK flag +#endif + +#include "wsrep_utils.h" +#include "wsrep_mysqld.h" + +#include <sql_class.h> + +#include <spawn.h> // posix_spawn() +#include <unistd.h> // pipe() +#include <errno.h> // errno +#include <string.h> // strerror() +#include <sys/wait.h> // waitpid() +#include <sys/types.h> +#include <sys/socket.h> +#include <netdb.h> // getaddrinfo() + +#ifdef HAVE_GETIFADDRS +#include <net/if.h> +#include <ifaddrs.h> +#endif /* HAVE_GETIFADDRS */ + +extern char** environ; // environment variables + +static wsp::string wsrep_PATH; + +void +wsrep_prepend_PATH (const char* path) +{ + int count = 0; + + while (environ[count]) + { + if (strncmp (environ[count], "PATH=", 5)) + { + count++; + continue; + } + + char* const old_path (environ[count]); + + if (strstr (old_path, path)) return; // path already there + + size_t const new_path_len(strlen(old_path) + strlen(":") + + strlen(path) + 1); + + char* const new_path (static_cast<char*>(malloc(new_path_len))); + + if (new_path) + { + snprintf (new_path, new_path_len, "PATH=%s:%s", path, + old_path + strlen("PATH=")); + + wsrep_PATH.set (new_path); + environ[count] = new_path; + } + else + { + WSREP_ERROR ("Failed to allocate 'PATH' environment variable " + "buffer of size %zu.", new_path_len); + } + + return; + } + + WSREP_ERROR ("Failed to find 'PATH' environment variable. " + "State snapshot transfer may not be working."); +} + +namespace wsp +{ + +bool +env::ctor_common(char** e) +{ + env_ = static_cast<char**>(malloc((len_ + 1) * sizeof(char*))); + + if (env_) + { + for (size_t i(0); i < len_; ++i) + { + assert(e[i]); // caller should make sure about len_ + env_[i] = strdup(e[i]); + if (!env_[i]) + { + errno_ = errno; + WSREP_ERROR("Failed to allocate env. var: %s", e[i]); + return true; + } + } + + env_[len_] = NULL; + return false; + } + else + { + errno_ = errno; + WSREP_ERROR("Failed to allocate env. var vector of length: %zu", len_); + return true; + } +} + +void +env::dtor() +{ + if (env_) + { + /* don't need to go beyond the first NULL */ + for (size_t i(0); env_[i] != NULL; ++i) { free(env_[i]); } + free(env_); + env_ = NULL; + } + len_ = 0; +} + +env::env(char** e) + : len_(0), env_(NULL), errno_(0) +{ + if (!e) { e = environ; } + /* count the size of the vector */ + while (e[len_]) { ++len_; } + + if (ctor_common(e)) dtor(); +} + +env::env(const env& e) + : len_(e.len_), env_(0), errno_(0) +{ + if (ctor_common(e.env_)) dtor(); +} + +env::~env() { dtor(); } + +int +env::append(const char* val) +{ + char** tmp = static_cast<char**>(realloc(env_, (len_ + 2)*sizeof(char*))); + + if (tmp) + { + env_ = tmp; + env_[len_] = strdup(val); + + if (env_[len_]) + { + ++len_; + env_[len_] = NULL; + } + else errno_ = errno; + } + else errno_ = errno; + + return errno_; +} + + +#define PIPE_READ 0 +#define PIPE_WRITE 1 +#define STDIN_FD 0 +#define STDOUT_FD 1 + +#ifndef POSIX_SPAWN_USEVFORK +# define POSIX_SPAWN_USEVFORK 0 +#endif + +process::process (const char* cmd, const char* type, char** env) + : str_(cmd ? strdup(cmd) : strdup("")), io_(NULL), err_(EINVAL), pid_(0) +{ + if (0 == str_) + { + WSREP_ERROR ("Can't allocate command line of size: %zu", strlen(cmd)); + err_ = ENOMEM; + return; + } + + if (0 == strlen(str_)) + { + WSREP_ERROR ("Can't start a process: null or empty command line."); + return; + } + + if (NULL == type || (strcmp (type, "w") && strcmp(type, "r"))) + { + WSREP_ERROR ("type argument should be either \"r\" or \"w\"."); + return; + } + + if (NULL == env) { env = environ; } // default to global environment + + int pipe_fds[2] = { -1, }; + if (::pipe(pipe_fds)) + { + err_ = errno; + WSREP_ERROR ("pipe() failed: %d (%s)", err_, strerror(err_)); + return; + } + + // which end of pipe will be returned to parent + int const parent_end (strcmp(type,"w") ? PIPE_READ : PIPE_WRITE); + int const child_end (parent_end == PIPE_READ ? PIPE_WRITE : PIPE_READ); + int const close_fd (parent_end == PIPE_READ ? STDOUT_FD : STDIN_FD); + + char* const pargv[4] = { strdup("sh"), strdup("-c"), strdup(str_), NULL }; + if (!(pargv[0] && pargv[1] && pargv[2])) + { + err_ = ENOMEM; + WSREP_ERROR ("Failed to allocate pargv[] array."); + goto cleanup_pipe; + } + + posix_spawnattr_t attr; + err_ = posix_spawnattr_init (&attr); + if (err_) + { + WSREP_ERROR ("posix_spawnattr_init() failed: %d (%s)", + err_, strerror(err_)); + goto cleanup_pipe; + } + + /* make sure that no signlas are masked in child process */ + sigset_t sigmask_empty; sigemptyset(&sigmask_empty); + err_ = posix_spawnattr_setsigmask(&attr, &sigmask_empty); + if (err_) + { + WSREP_ERROR ("posix_spawnattr_setsigmask() failed: %d (%s)", + err_, strerror(err_)); + goto cleanup_attr; + } + + /* make sure the following signals are not ignored in child process */ + sigset_t default_signals; sigemptyset(&default_signals); + sigaddset(&default_signals, SIGHUP); + sigaddset(&default_signals, SIGINT); + sigaddset(&default_signals, SIGQUIT); + sigaddset(&default_signals, SIGPIPE); + sigaddset(&default_signals, SIGTERM); + sigaddset(&default_signals, SIGCHLD); + err_ = posix_spawnattr_setsigdefault(&attr, &default_signals); + if (err_) + { + WSREP_ERROR ("posix_spawnattr_setsigdefault() failed: %d (%s)", + err_, strerror(err_)); + goto cleanup_attr; + } + + err_ = posix_spawnattr_setflags (&attr, POSIX_SPAWN_SETSIGDEF | + POSIX_SPAWN_SETSIGMASK | + /* start a new process group */ POSIX_SPAWN_SETPGROUP | + POSIX_SPAWN_USEVFORK); + if (err_) + { + WSREP_ERROR ("posix_spawnattr_setflags() failed: %d (%s)", + err_, strerror(err_)); + goto cleanup_attr; + } + + posix_spawn_file_actions_t fact; + err_ = posix_spawn_file_actions_init (&fact); + if (err_) + { + WSREP_ERROR ("posix_spawn_file_actions_init() failed: %d (%s)", + err_, strerror(err_)); + goto cleanup_attr; + } + + // close child's stdout|stdin depending on what we returning + err_ = posix_spawn_file_actions_addclose (&fact, close_fd); + if (err_) + { + WSREP_ERROR ("posix_spawn_file_actions_addclose() failed: %d (%s)", + err_, strerror(err_)); + goto cleanup_fact; + } + + // substitute our pipe descriptor in place of the closed one + err_ = posix_spawn_file_actions_adddup2 (&fact, + pipe_fds[child_end], close_fd); + if (err_) + { + WSREP_ERROR ("posix_spawn_file_actions_addup2() failed: %d (%s)", + err_, strerror(err_)); + goto cleanup_fact; + } + + err_ = posix_spawnp (&pid_, pargv[0], &fact, &attr, pargv, env); + if (err_) + { + WSREP_ERROR ("posix_spawnp(%s) failed: %d (%s)", + pargv[2], err_, strerror(err_)); + pid_ = 0; // just to make sure it was not messed up in the call + goto cleanup_fact; + } + + io_ = fdopen (pipe_fds[parent_end], type); + + if (io_) + { + pipe_fds[parent_end] = -1; // skip close on cleanup + } + else + { + err_ = errno; + WSREP_ERROR ("fdopen() failed: %d (%s)", err_, strerror(err_)); + } + +cleanup_fact: + int err; // to preserve err_ code + err = posix_spawn_file_actions_destroy (&fact); + if (err) + { + WSREP_ERROR ("posix_spawn_file_actions_destroy() failed: %d (%s)\n", + err, strerror(err)); + } + +cleanup_attr: + err = posix_spawnattr_destroy (&attr); + if (err) + { + WSREP_ERROR ("posix_spawnattr_destroy() failed: %d (%s)", + err, strerror(err)); + } + +cleanup_pipe: + if (pipe_fds[0] >= 0) close (pipe_fds[0]); + if (pipe_fds[1] >= 0) close (pipe_fds[1]); + + free (pargv[0]); + free (pargv[1]); + free (pargv[2]); +} + +process::~process () +{ + if (io_) + { + assert (pid_); + assert (str_); + + WSREP_WARN("Closing pipe to child process: %s, PID(%ld) " + "which might still be running.", str_, (long)pid_); + + if (fclose (io_) == -1) + { + err_ = errno; + WSREP_ERROR("fclose() failed: %d (%s)", err_, strerror(err_)); + } + } + + if (str_) free (const_cast<char*>(str_)); +} + +int +process::wait () +{ + if (pid_) + { + int status; + if (-1 == waitpid(pid_, &status, 0)) + { + err_ = errno; assert (err_); + WSREP_ERROR("Waiting for process failed: %s, PID(%ld): %d (%s)", + str_, (long)pid_, err_, strerror (err_)); + } + else + { // command completed, check exit status + if (WIFEXITED (status)) { + err_ = WEXITSTATUS (status); + } + else { // command didn't complete with exit() + WSREP_ERROR("Process was aborted."); + err_ = errno ? errno : ECHILD; + } + + if (err_) { + switch (err_) /* Translate error codes to more meaningful */ + { + case 126: err_ = EACCES; break; /* Permission denied */ + case 127: err_ = ENOENT; break; /* No such file or directory */ + case 143: err_ = EINTR; break; /* Subprocess killed */ + } + WSREP_ERROR("Process completed with error: %s: %d (%s)", + str_, err_, strerror(err_)); + } + + pid_ = 0; + if (io_) fclose (io_); + io_ = NULL; + } + } + else { + assert (NULL == io_); + WSREP_ERROR("Command did not run: %s", str_); + } + + return err_; +} + +thd::thd (my_bool won) : init(), ptr(new THD) +{ + if (ptr) + { + ptr->thread_stack= (char*) &ptr; + ptr->store_globals(); + ptr->variables.option_bits&= ~OPTION_BIN_LOG; // disable binlog + ptr->variables.wsrep_on = won; + ptr->security_ctx->master_access= ~(ulong)0; + lex_start(ptr); + } +} + +thd::~thd () +{ + if (ptr) + { + delete ptr; + my_pthread_setspecific_ptr (THR_THD, 0); + } +} + +} // namespace wsp + +/* Returns INADDR_NONE, INADDR_ANY, INADDR_LOOPBACK or something else */ +unsigned int wsrep_check_ip (const char* const addr) +{ + unsigned int ret = INADDR_NONE; + struct addrinfo *res, hints; + + memset (&hints, 0, sizeof(hints)); + hints.ai_flags= AI_PASSIVE/*|AI_ADDRCONFIG*/; + hints.ai_socktype= SOCK_STREAM; + hints.ai_family= AF_UNSPEC; + + int gai_ret = getaddrinfo(addr, NULL, &hints, &res); + if (0 == gai_ret) + { + if (AF_INET == res->ai_family) /* IPv4 */ + { + struct sockaddr_in* a= (struct sockaddr_in*)res->ai_addr; + ret= htonl(a->sin_addr.s_addr); + } + else /* IPv6 */ + { + struct sockaddr_in6* a= (struct sockaddr_in6*)res->ai_addr; + if (IN6_IS_ADDR_UNSPECIFIED(&a->sin6_addr)) + ret= INADDR_ANY; + else if (IN6_IS_ADDR_LOOPBACK(&a->sin6_addr)) + ret= INADDR_LOOPBACK; + else + ret= 0xdeadbeef; + } + freeaddrinfo (res); + } + else { + WSREP_ERROR ("getaddrinfo() failed on '%s': %d (%s)", + addr, gai_ret, gai_strerror(gai_ret)); + } + + // uint8_t* b= (uint8_t*)&ret; + // fprintf (stderr, "########## wsrep_check_ip returning: %hhu.%hhu.%hhu.%hhu\n", + // b[0], b[1], b[2], b[3]); + + return ret; +} + +extern char* my_bind_addr_str; + +size_t wsrep_guess_ip (char* buf, size_t buf_len) +{ + size_t ip_len = 0; + + if (my_bind_addr_str && my_bind_addr_str[0] != '\0') + { + unsigned int const ip_type= wsrep_check_ip(my_bind_addr_str); + + if (INADDR_NONE == ip_type) { + WSREP_ERROR("Networking not configured, cannot receive state " + "transfer."); + return 0; + } + + if (INADDR_ANY != ip_type) { + strncpy (buf, my_bind_addr_str, buf_len); + return strlen(buf); + } + } + + // mysqld binds to all interfaces - try IP from wsrep_node_address + if (wsrep_node_address && wsrep_node_address[0] != '\0') { + const char* const colon_ptr = strchr(wsrep_node_address, ':'); + + if (colon_ptr) + ip_len = colon_ptr - wsrep_node_address; + else + ip_len = strlen(wsrep_node_address); + + if (ip_len >= buf_len) { + WSREP_WARN("default_ip(): buffer too short: %zu <= %zd", buf_len, ip_len); + return 0; + } + + memcpy (buf, wsrep_node_address, ip_len); + buf[ip_len] = '\0'; + return ip_len; + } + + /* + getifaddrs() is avaiable at least on Linux since glib 2.3, FreeBSD, + MAC OSX, OpenSolaris, Solaris. + + On platforms which do not support getifaddrs() this function returns + a failure and user is prompted to do manual configuration. + */ +#if HAVE_GETIFADDRS + struct ifaddrs *ifaddr, *ifa; + if (getifaddrs(&ifaddr) == 0) + { + for (ifa= ifaddr; ifa != NULL; ifa = ifa->ifa_next) + { + if (!ifa->ifa_addr || ifa->ifa_addr->sa_family != AF_INET) // TODO AF_INET6 + continue; + + // Skip loopback interfaces (like lo:127.0.0.1) + if (ifa->ifa_flags & IFF_LOOPBACK) + continue; + + if (vio_getnameinfo(ifa->ifa_addr, buf, buf_len, NULL, 0, NI_NUMERICHOST)) + continue; + + freeifaddrs(ifaddr); + return strlen(buf); + } + freeifaddrs(ifaddr); + } +#endif /* HAVE_GETIFADDRS */ + + return 0; +} diff --git a/sql/wsrep_utils.h b/sql/wsrep_utils.h new file mode 100644 index 00000000000..7d864603c7f --- /dev/null +++ b/sql/wsrep_utils.h @@ -0,0 +1,229 @@ +/* Copyright (C) 2013-2015 Codership Oy <info@codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ + +#ifndef WSREP_UTILS_H +#define WSREP_UTILS_H + +#include "wsrep_priv.h" + +unsigned int wsrep_check_ip (const char* addr); +size_t wsrep_guess_ip (char* buf, size_t buf_len); + +namespace wsp { +class node_status +{ +public: + node_status() : status(WSREP_MEMBER_UNDEFINED) {} + void set(wsrep_member_status_t new_status, + const wsrep_view_info_t* view = 0) + { + if (status != new_status || 0 != view) + { + wsrep_notify_status(new_status, view); + status = new_status; + } + } + wsrep_member_status_t get() const { return status; } +private: + wsrep_member_status_t status; +}; +} /* namespace wsp */ + +extern wsp::node_status local_status; + +namespace wsp { +/* a class to manage env vars array */ +class env +{ +private: + size_t len_; + char** env_; + int errno_; + bool ctor_common(char** e); + void dtor(); + env& operator =(env); +public: + explicit env(char** env); + explicit env(const env&); + ~env(); + int append(const char* var); /* add a new env. var */ + int error() const { return errno_; } + char** operator()() { return env_; } +}; + +/* A small class to run external programs. */ +class process +{ +private: + const char* const str_; + FILE* io_; + int err_; + pid_t pid_; + +public: +/*! @arg type is a pointer to a null-terminated string which must contain + either the letter 'r' for reading or the letter 'w' for writing. + @arg env optional null-terminated vector of environment variables + */ + process (const char* cmd, const char* type, char** env); + ~process (); + + FILE* pipe () { return io_; } + int error() { return err_; } + int wait (); + const char* cmd() { return str_; } +}; + +class thd +{ + class thd_init + { + public: + thd_init() { my_thread_init(); } + ~thd_init() { my_thread_end(); } + } + init; + + thd (const thd&); + thd& operator= (const thd&); + +public: + + thd(my_bool wsrep_on); + ~thd(); + THD* const ptr; +}; + +class string +{ +public: + string() : string_(0) {} + explicit string(size_t s) : string_(static_cast<char*>(malloc(s))) {} + char* operator()() { return string_; } + void set(char* str) { if (string_) free (string_); string_ = str; } + ~string() { set (0); } +private: + char* string_; +}; + +#ifdef REMOVED +class lock +{ + pthread_mutex_t* const mtx_; + +public: + + lock (pthread_mutex_t* mtx) : mtx_(mtx) + { + int err = pthread_mutex_lock (mtx_); + + if (err) + { + WSREP_ERROR("Mutex lock failed: %s", strerror(err)); + abort(); + } + } + + virtual ~lock () + { + int err = pthread_mutex_unlock (mtx_); + + if (err) + { + WSREP_ERROR("Mutex unlock failed: %s", strerror(err)); + abort(); + } + } + + inline void wait (pthread_cond_t* cond) + { + pthread_cond_wait (cond, mtx_); + } + +private: + + lock (const lock&); + lock& operator=(const lock&); + +}; + +class monitor +{ + int mutable refcnt; + pthread_mutex_t mutable mtx; + pthread_cond_t mutable cond; + +public: + + monitor() : refcnt(0) + { + pthread_mutex_init (&mtx, NULL); + pthread_cond_init (&cond, NULL); + } + + ~monitor() + { + pthread_mutex_destroy (&mtx); + pthread_cond_destroy (&cond); + } + + void enter() const + { + lock l(&mtx); + + while (refcnt) + { + l.wait(&cond); + } + refcnt++; + } + + void leave() const + { + lock l(&mtx); + + refcnt--; + if (refcnt == 0) + { + pthread_cond_signal (&cond); + } + } + +private: + + monitor (const monitor&); + monitor& operator= (const monitor&); +}; + +class critical +{ + const monitor& mon; + +public: + + critical(const monitor& m) : mon(m) { mon.enter(); } + + ~critical() { mon.leave(); } + +private: + + critical (const critical&); + critical& operator= (const critical&); +}; +#endif + +} // namespace wsrep + +#endif /* WSREP_UTILS_H */ diff --git a/sql/wsrep_var.cc b/sql/wsrep_var.cc new file mode 100644 index 00000000000..8a507711daf --- /dev/null +++ b/sql/wsrep_var.cc @@ -0,0 +1,670 @@ +/* Copyright 2008-2015 Codership Oy <http://www.codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "wsrep_var.h" + +#include <mysqld.h> +#include <sql_class.h> +#include <sql_plugin.h> +#include <set_var.h> +#include <sql_acl.h> +#include "wsrep_priv.h" +#include "wsrep_thd.h" +#include "wsrep_xid.h" +#include <my_dir.h> +#include <cstdio> +#include <cstdlib> + +const char* wsrep_provider = 0; +const char* wsrep_provider_options = 0; +const char* wsrep_cluster_address = 0; +const char* wsrep_cluster_name = 0; +const char* wsrep_node_name = 0; +const char* wsrep_node_address = 0; +const char* wsrep_node_incoming_address = 0; +const char* wsrep_start_position = 0; + +int wsrep_init_vars() +{ + wsrep_provider = my_strdup(WSREP_NONE, MYF(MY_WME)); + wsrep_provider_options= my_strdup("", MYF(MY_WME)); + wsrep_cluster_address = my_strdup("", MYF(MY_WME)); + wsrep_cluster_name = my_strdup(WSREP_CLUSTER_NAME, MYF(MY_WME)); + wsrep_node_name = my_strdup("", MYF(MY_WME)); + wsrep_node_address = my_strdup("", MYF(MY_WME)); + wsrep_node_incoming_address= my_strdup(WSREP_NODE_INCOMING_AUTO, MYF(MY_WME)); + wsrep_start_position = my_strdup(WSREP_START_POSITION_ZERO, MYF(MY_WME)); + + global_system_variables.binlog_format=BINLOG_FORMAT_ROW; + return 0; +} + +bool wsrep_on_update (sys_var *self, THD* thd, enum_var_type var_type) +{ + if (var_type == OPT_GLOBAL) { + // FIXME: this variable probably should be changed only per session + thd->variables.wsrep_on = global_system_variables.wsrep_on; + } + return false; +} + +bool wsrep_causal_reads_update (sys_var *self, THD* thd, enum_var_type var_type) +{ + // wsrep_sync_wait should also be updated. + if (var_type == OPT_GLOBAL) { + if (global_system_variables.wsrep_causal_reads) { + global_system_variables.wsrep_sync_wait |= WSREP_SYNC_WAIT_BEFORE_READ; + } else { + global_system_variables.wsrep_sync_wait &= ~WSREP_SYNC_WAIT_BEFORE_READ; + } + } else { + if (thd->variables.wsrep_causal_reads) { + thd->variables.wsrep_sync_wait |= WSREP_SYNC_WAIT_BEFORE_READ; + } else { + thd->variables.wsrep_sync_wait &= ~WSREP_SYNC_WAIT_BEFORE_READ; + } + } + + return false; +} + +bool wsrep_sync_wait_update (sys_var* self, THD* thd, enum_var_type var_type) +{ + // wsrep_causal_reads should also be updated. + if (var_type == OPT_GLOBAL) { + global_system_variables.wsrep_causal_reads= + global_system_variables.wsrep_sync_wait & WSREP_SYNC_WAIT_BEFORE_READ; + } else { + thd->variables.wsrep_causal_reads= + thd->variables.wsrep_sync_wait & WSREP_SYNC_WAIT_BEFORE_READ; + } + return false; +} + +static int wsrep_start_position_verify (const char* start_str) +{ + size_t start_len; + wsrep_uuid_t uuid; + ssize_t uuid_len; + + start_len = strlen (start_str); + if (start_len < 34) + return 1; + + uuid_len = wsrep_uuid_scan (start_str, start_len, &uuid); + if (uuid_len < 0 || (start_len - uuid_len) < 2) + return 1; + + if (start_str[uuid_len] != ':') // separator should follow UUID + return 1; + + char* endptr; + wsrep_seqno_t const seqno __attribute__((unused)) // to avoid GCC warnings + (strtoll(&start_str[uuid_len + 1], &endptr, 10)); + + if (*endptr == '\0') return 0; // remaining string was seqno + + return 1; +} + +bool wsrep_start_position_check (sys_var *self, THD* thd, set_var* var) +{ + char start_pos_buf[FN_REFLEN]; + + if ((! var->save_result.string_value.str) || + (var->save_result.string_value.length > (FN_REFLEN - 1))) // safety + goto err; + + memcpy(start_pos_buf, var->save_result.string_value.str, + var->save_result.string_value.length); + start_pos_buf[var->save_result.string_value.length]= 0; + + if (!wsrep_start_position_verify(start_pos_buf)) return 0; + +err: + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str, + var->save_result.string_value.str ? + var->save_result.string_value.str : "NULL"); + return 1; +} + +static +void wsrep_set_local_position(const char* const value, bool const sst) +{ + size_t const value_len = strlen(value); + wsrep_uuid_t uuid; + size_t const uuid_len = wsrep_uuid_scan(value, value_len, &uuid); + wsrep_seqno_t const seqno = strtoll(value + uuid_len + 1, NULL, 10); + + if (sst) { + wsrep_sst_received (wsrep, uuid, seqno, NULL, 0); + } else { + // initialization + local_uuid = uuid; + local_seqno = seqno; + } +} + +bool wsrep_start_position_update (sys_var *self, THD* thd, enum_var_type type) +{ + WSREP_INFO ("wsrep_start_position var submitted: '%s'", + wsrep_start_position); + // since this value passed wsrep_start_position_check, don't check anything + // here + wsrep_set_local_position (wsrep_start_position, true); + return 0; +} + +void wsrep_start_position_init (const char* val) +{ + if (NULL == val || wsrep_start_position_verify (val)) + { + WSREP_ERROR("Bad initial value for wsrep_start_position: %s", + (val ? val : "")); + return; + } + + wsrep_set_local_position (val, false); +} + +static int get_provider_option_value(const char* opts, + const char* opt_name, + ulong* opt_value) +{ + int ret= 1; + ulong opt_value_tmp; + char *opt_value_str, *s, *opts_copy= my_strdup(opts, MYF(MY_WME)); + + if ((opt_value_str= strstr(opts_copy, opt_name)) == NULL) + goto end; + opt_value_str= strtok_r(opt_value_str, "=", &s); + if (opt_value_str == NULL) goto end; + opt_value_str= strtok_r(NULL, ";", &s); + if (opt_value_str == NULL) goto end; + + opt_value_tmp= strtoul(opt_value_str, NULL, 10); + if (errno == ERANGE) goto end; + + *opt_value= opt_value_tmp; + ret= 0; + +end: + my_free(opts_copy); + return ret; +} + +static bool refresh_provider_options() +{ + WSREP_DEBUG("refresh_provider_options: %s", + (wsrep_provider_options) ? wsrep_provider_options : "null"); + char* opts= wsrep->options_get(wsrep); + if (opts) + { + wsrep_provider_options_init(opts); + get_provider_option_value(wsrep_provider_options, + (char*)"repl.max_ws_size", + &wsrep_max_ws_size); + free(opts); + } + else + { + WSREP_ERROR("Failed to get provider options"); + return true; + } + return false; +} + +static int wsrep_provider_verify (const char* provider_str) +{ + MY_STAT f_stat; + char path[FN_REFLEN]; + + if (!provider_str || strlen(provider_str)== 0) + return 1; + + if (!strcmp(provider_str, WSREP_NONE)) + return 0; + + if (!unpack_filename(path, provider_str)) + return 1; + + /* check that provider file exists */ + memset(&f_stat, 0, sizeof(MY_STAT)); + if (!my_stat(path, &f_stat, MYF(0))) + { + return 1; + } + return 0; +} + +bool wsrep_provider_check (sys_var *self, THD* thd, set_var* var) +{ + char wsrep_provider_buf[FN_REFLEN]; + + if ((! var->save_result.string_value.str) || + (var->save_result.string_value.length > (FN_REFLEN - 1))) // safety + goto err; + + memcpy(wsrep_provider_buf, var->save_result.string_value.str, + var->save_result.string_value.length); + wsrep_provider_buf[var->save_result.string_value.length]= 0; + + if (!wsrep_provider_verify(wsrep_provider_buf)) return 0; + +err: + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str, + var->save_result.string_value.str ? + var->save_result.string_value.str : "NULL"); + return 1; +} + +bool wsrep_provider_update (sys_var *self, THD* thd, enum_var_type type) +{ + bool rcode= false; + + bool wsrep_on_saved= thd->variables.wsrep_on; + thd->variables.wsrep_on= false; + + WSREP_DEBUG("wsrep_provider_update: %s", wsrep_provider); + + /* stop replication is heavy operation, and includes closing all client + connections. Closing clients may need to get LOCK_global_system_variables + at least in MariaDB. + + Note: releasing LOCK_global_system_variables may cause race condition, if + there can be several concurrent clients changing wsrep_provider + */ + mysql_mutex_unlock(&LOCK_global_system_variables); + wsrep_stop_replication(thd); + mysql_mutex_lock(&LOCK_global_system_variables); + + if (wsrep_inited == 1) + wsrep_deinit(false); + + char* tmp= strdup(wsrep_provider); // wsrep_init() rewrites provider + //when fails + if (wsrep_init()) + { + my_error(ER_CANT_OPEN_LIBRARY, MYF(0), tmp); + rcode = true; + } + free(tmp); + + // we sure don't want to use old address with new provider + wsrep_cluster_address_init(NULL); + wsrep_provider_options_init(NULL); + + thd->variables.wsrep_on= wsrep_on_saved; + + refresh_provider_options(); + + return rcode; +} + +void wsrep_provider_init (const char* value) +{ + WSREP_DEBUG("wsrep_provider_init: %s -> %s", + (wsrep_provider) ? wsrep_provider : "null", + (value) ? value : "null"); + if (NULL == value || wsrep_provider_verify (value)) + { + WSREP_ERROR("Bad initial value for wsrep_provider: %s", + (value ? value : "")); + return; + } + + if (wsrep_provider) my_free((void *)wsrep_provider); + wsrep_provider = my_strdup(value, MYF(0)); +} + +bool wsrep_provider_options_check(sys_var *self, THD* thd, set_var* var) +{ + return 0; +} + +bool wsrep_provider_options_update(sys_var *self, THD* thd, enum_var_type type) +{ + wsrep_status_t ret= wsrep->options_set(wsrep, wsrep_provider_options); + if (ret != WSREP_OK) + { + WSREP_ERROR("Set options returned %d", ret); + refresh_provider_options(); + return true; + } + return refresh_provider_options(); +} + +void wsrep_provider_options_init(const char* value) +{ + if (wsrep_provider_options && wsrep_provider_options != value) + my_free((void *)wsrep_provider_options); + wsrep_provider_options = (value) ? my_strdup(value, MYF(0)) : NULL; +} + +static int wsrep_cluster_address_verify (const char* cluster_address_str) +{ + /* There is no predefined address format, it depends on provider. */ + return 0; +} + +bool wsrep_cluster_address_check (sys_var *self, THD* thd, set_var* var) +{ + char addr_buf[FN_REFLEN]; + + if ((! var->save_result.string_value.str) || + (var->save_result.string_value.length > (FN_REFLEN - 1))) // safety + goto err; + + memcpy(addr_buf, var->save_result.string_value.str, + var->save_result.string_value.length); + addr_buf[var->save_result.string_value.length]= 0; + + if (!wsrep_cluster_address_verify(addr_buf)) return 0; + + err: + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str, + var->save_result.string_value.str ? + var->save_result.string_value.str : "NULL"); + return 1; +} + +bool wsrep_cluster_address_update (sys_var *self, THD* thd, enum_var_type type) +{ + bool wsrep_on_saved= thd->variables.wsrep_on; + thd->variables.wsrep_on= false; + + /* stop replication is heavy operation, and includes closing all client + connections. Closing clients may need to get LOCK_global_system_variables + at least in MariaDB. + + Note: releasing LOCK_global_system_variables may cause race condition, if + there can be several concurrent clients changing wsrep_provider + */ + mysql_mutex_unlock(&LOCK_global_system_variables); + wsrep_stop_replication(thd); + + /* + Unlock and lock LOCK_wsrep_slave_threads to maintain lock order & avoid + any potential deadlock. + */ + mysql_mutex_unlock(&LOCK_wsrep_slave_threads); + mysql_mutex_lock(&LOCK_global_system_variables); + mysql_mutex_lock(&LOCK_wsrep_slave_threads); + + if (wsrep_start_replication()) + { + wsrep_create_rollbacker(); + wsrep_create_appliers(wsrep_slave_threads); + } + + thd->variables.wsrep_on= wsrep_on_saved; + + return false; +} + +void wsrep_cluster_address_init (const char* value) +{ + WSREP_DEBUG("wsrep_cluster_address_init: %s -> %s", + (wsrep_cluster_address) ? wsrep_cluster_address : "null", + (value) ? value : "null"); + + if (wsrep_cluster_address) my_free ((void*)wsrep_cluster_address); + wsrep_cluster_address = (value) ? my_strdup(value, MYF(0)) : NULL; +} + +/* wsrep_cluster_name cannot be NULL or an empty string. */ +bool wsrep_cluster_name_check (sys_var *self, THD* thd, set_var* var) +{ + if (!var->save_result.string_value.str || + (var->save_result.string_value.length == 0)) + { + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str, + (var->save_result.string_value.str ? + var->save_result.string_value.str : "NULL")); + return 1; + } + return 0; +} + +bool wsrep_cluster_name_update (sys_var *self, THD* thd, enum_var_type type) +{ + return 0; +} + +bool wsrep_node_name_check (sys_var *self, THD* thd, set_var* var) +{ + // TODO: for now 'allow' 0-length string to be valid (default) + if (!var->save_result.string_value.str) + { + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str, + (var->save_result.string_value.str ? + var->save_result.string_value.str : "NULL")); + return 1; + } + return 0; +} + +bool wsrep_node_name_update (sys_var *self, THD* thd, enum_var_type type) +{ + return 0; +} + +// TODO: do something more elaborate, like checking connectivity +bool wsrep_node_address_check (sys_var *self, THD* thd, set_var* var) +{ + char addr_buf[FN_REFLEN]; + + if ((! var->save_result.string_value.str) || + (var->save_result.string_value.length > (FN_REFLEN - 1))) // safety + goto err; + + memcpy(addr_buf, var->save_result.string_value.str, + var->save_result.string_value.length); + addr_buf[var->save_result.string_value.length]= 0; + + // TODO: for now 'allow' 0-length string to be valid (default) + return 0; + +err: + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str, + var->save_result.string_value.str ? + var->save_result.string_value.str : "NULL"); + return 1; +} + +bool wsrep_node_address_update (sys_var *self, THD* thd, enum_var_type type) +{ + return 0; +} + +void wsrep_node_address_init (const char* value) +{ + if (wsrep_node_address && strcmp(wsrep_node_address, value)) + my_free ((void*)wsrep_node_address); + + wsrep_node_address = (value) ? my_strdup(value, MYF(0)) : NULL; +} + +bool wsrep_slave_threads_check (sys_var *self, THD* thd, set_var* var) +{ + mysql_mutex_lock(&LOCK_wsrep_slave_threads); + wsrep_slave_count_change += (var->save_result.ulonglong_value - + wsrep_slave_threads); + mysql_mutex_unlock(&LOCK_wsrep_slave_threads); + + return 0; +} + +bool wsrep_slave_threads_update (sys_var *self, THD* thd, enum_var_type type) +{ + if (wsrep_slave_count_change > 0) + { + wsrep_create_appliers(wsrep_slave_count_change); + wsrep_slave_count_change = 0; + } + return false; +} + +bool wsrep_desync_check (sys_var *self, THD* thd, set_var* var) +{ + bool new_wsrep_desync= (bool) var->save_result.ulonglong_value; + if (wsrep_desync == new_wsrep_desync) { + if (new_wsrep_desync) { + push_warning (thd, Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_VALUE_FOR_VAR, + "'wsrep_desync' is already ON."); + } else { + push_warning (thd, Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_VALUE_FOR_VAR, + "'wsrep_desync' is already OFF."); + } + return false; + } + wsrep_status_t ret(WSREP_WARNING); + if (new_wsrep_desync) { + ret = wsrep->desync (wsrep); + if (ret != WSREP_OK) { + WSREP_WARN ("SET desync failed %d for schema: %s, query: %s", ret, + (thd->db ? thd->db : "(null)"), + thd->query()); + my_error (ER_CANNOT_USER, MYF(0), "'desync'", thd->query()); + return true; + } + } else { + ret = wsrep->resync (wsrep); + if (ret != WSREP_OK) { + WSREP_WARN ("SET resync failed %d for schema: %s, query: %s", ret, + (thd->db ? thd->db : "(null)"), + thd->query()); + my_error (ER_CANNOT_USER, MYF(0), "'resync'", thd->query()); + return true; + } + } + return false; +} + +bool wsrep_desync_update (sys_var *self, THD* thd, enum_var_type type) +{ + return false; +} + +bool wsrep_max_ws_size_update (sys_var *self, THD *thd, enum_var_type) +{ + char max_ws_size_opt[128]; + my_snprintf(max_ws_size_opt, sizeof(max_ws_size_opt), + "repl.max_ws_size=%d", wsrep_max_ws_size); + wsrep_status_t ret= wsrep->options_set(wsrep, max_ws_size_opt); + if (ret != WSREP_OK) + { + WSREP_ERROR("Set options returned %d", ret); + refresh_provider_options(); + return true; + } + return refresh_provider_options(); +} + +/* + * Status variables stuff below + */ +static inline void +wsrep_assign_to_mysql (SHOW_VAR* mysql, wsrep_stats_var* wsrep) +{ + mysql->name = wsrep->name; + switch (wsrep->type) { + case WSREP_VAR_INT64: + mysql->value = (char*) &wsrep->value._int64; + mysql->type = SHOW_LONGLONG; + break; + case WSREP_VAR_STRING: + mysql->value = (char*) &wsrep->value._string; + mysql->type = SHOW_CHAR_PTR; + break; + case WSREP_VAR_DOUBLE: + mysql->value = (char*) &wsrep->value._double; + mysql->type = SHOW_DOUBLE; + break; + } +} + +#if DYNAMIC +// somehow this mysql status thing works only with statically allocated arrays. +static SHOW_VAR* mysql_status_vars = NULL; +static int mysql_status_len = -1; +#else +static SHOW_VAR mysql_status_vars[512 + 1]; +static const int mysql_status_len = 512; +#endif + +static void export_wsrep_status_to_mysql(THD* thd) +{ + int wsrep_status_len, i; + + wsrep_free_status(thd); + + thd->wsrep_status_vars = wsrep->stats_get(wsrep); + + if (!thd->wsrep_status_vars) { + return; + } + + for (wsrep_status_len = 0; + thd->wsrep_status_vars[wsrep_status_len].name != NULL; + wsrep_status_len++) { + /* */ + } + +#if DYNAMIC + if (wsrep_status_len != mysql_status_len) { + void* tmp = realloc (mysql_status_vars, + (wsrep_status_len + 1) * sizeof(SHOW_VAR)); + if (!tmp) { + + sql_print_error ("Out of memory for wsrep status variables." + "Number of variables: %d", wsrep_status_len); + return; + } + + mysql_status_len = wsrep_status_len; + mysql_status_vars = (SHOW_VAR*)tmp; + } + /* @TODO: fix this: */ +#else + if (mysql_status_len < wsrep_status_len) wsrep_status_len= mysql_status_len; +#endif + + for (i = 0; i < wsrep_status_len; i++) + wsrep_assign_to_mysql (mysql_status_vars + i, thd->wsrep_status_vars + i); + + mysql_status_vars[wsrep_status_len].name = NullS; + mysql_status_vars[wsrep_status_len].value = NullS; + mysql_status_vars[wsrep_status_len].type = SHOW_LONG; +} + +int wsrep_show_status (THD *thd, SHOW_VAR *var, char *buff) +{ + export_wsrep_status_to_mysql(thd); + var->type= SHOW_ARRAY; + var->value= (char *) &mysql_status_vars; + return 0; +} + +void wsrep_free_status (THD* thd) +{ + if (thd->wsrep_status_vars) + { + wsrep->stats_free (wsrep, thd->wsrep_status_vars); + thd->wsrep_status_vars = 0; + } +} diff --git a/sql/wsrep_var.h b/sql/wsrep_var.h new file mode 100644 index 00000000000..f72df9d098a --- /dev/null +++ b/sql/wsrep_var.h @@ -0,0 +1,88 @@ +/* Copyright (C) 2013 Codership Oy <info@codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ + +#ifndef WSREP_VAR_H +#define WSREP_VAR_H + +#define WSREP_CLUSTER_NAME "my_wsrep_cluster" +#define WSREP_NODE_INCOMING_AUTO "AUTO" +#define WSREP_START_POSITION_ZERO "00000000-0000-0000-0000-000000000000:-1" + +// MySQL variables funcs + +#include "sql_priv.h" +class sys_var; +class set_var; +class THD; + +int wsrep_init_vars(); + +#define CHECK_ARGS (sys_var *self, THD* thd, set_var *var) +#define UPDATE_ARGS (sys_var *self, THD* thd, enum_var_type type) +#define DEFAULT_ARGS (THD* thd, enum_var_type var_type) +#define INIT_ARGS (const char* opt) + +extern bool wsrep_on_update UPDATE_ARGS; +extern bool wsrep_causal_reads_update UPDATE_ARGS; +extern bool wsrep_sync_wait_update UPDATE_ARGS; +extern bool wsrep_start_position_check CHECK_ARGS; +extern bool wsrep_start_position_update UPDATE_ARGS; +extern void wsrep_start_position_init INIT_ARGS; + +extern bool wsrep_provider_check CHECK_ARGS; +extern bool wsrep_provider_update UPDATE_ARGS; +extern void wsrep_provider_init INIT_ARGS; + +extern bool wsrep_provider_options_check CHECK_ARGS; +extern bool wsrep_provider_options_update UPDATE_ARGS; +extern void wsrep_provider_options_init INIT_ARGS; + +extern bool wsrep_cluster_address_check CHECK_ARGS; +extern bool wsrep_cluster_address_update UPDATE_ARGS; +extern void wsrep_cluster_address_init INIT_ARGS; + +extern bool wsrep_cluster_name_check CHECK_ARGS; +extern bool wsrep_cluster_name_update UPDATE_ARGS; + +extern bool wsrep_node_name_check CHECK_ARGS; +extern bool wsrep_node_name_update UPDATE_ARGS; + +extern bool wsrep_node_address_check CHECK_ARGS; +extern bool wsrep_node_address_update UPDATE_ARGS; +extern void wsrep_node_address_init INIT_ARGS; + +extern bool wsrep_sst_method_check CHECK_ARGS; +extern bool wsrep_sst_method_update UPDATE_ARGS; +extern void wsrep_sst_method_init INIT_ARGS; + +extern bool wsrep_sst_receive_address_check CHECK_ARGS; +extern bool wsrep_sst_receive_address_update UPDATE_ARGS; + +extern bool wsrep_sst_auth_check CHECK_ARGS; +extern bool wsrep_sst_auth_update UPDATE_ARGS; +extern void wsrep_sst_auth_init INIT_ARGS; + +extern bool wsrep_sst_donor_check CHECK_ARGS; +extern bool wsrep_sst_donor_update UPDATE_ARGS; + +extern bool wsrep_slave_threads_check CHECK_ARGS; +extern bool wsrep_slave_threads_update UPDATE_ARGS; + +extern bool wsrep_desync_check CHECK_ARGS; +extern bool wsrep_desync_update UPDATE_ARGS; + +extern bool wsrep_max_ws_size_update UPDATE_ARGS; + +#endif /* WSREP_VAR_H */ diff --git a/sql/wsrep_xid.cc b/sql/wsrep_xid.cc new file mode 100644 index 00000000000..056da5748b9 --- /dev/null +++ b/sql/wsrep_xid.cc @@ -0,0 +1,150 @@ +/* Copyright 2015 Codership Oy <http://www.codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +//! @file some utility functions and classes not directly related to replication + +#include "wsrep_xid.h" +#include "sql_class.h" +#include "wsrep_mysqld.h" // for logging macros + +/* + * WSREPXid + */ + +#define WSREP_XID_PREFIX "WSREPXid" +#define WSREP_XID_PREFIX_LEN MYSQL_XID_PREFIX_LEN +#define WSREP_XID_UUID_OFFSET 8 +#define WSREP_XID_SEQNO_OFFSET (WSREP_XID_UUID_OFFSET + sizeof(wsrep_uuid_t)) +#define WSREP_XID_GTRID_LEN (WSREP_XID_SEQNO_OFFSET + sizeof(wsrep_seqno_t)) + +void wsrep_xid_init(XID* xid, const wsrep_uuid_t& uuid, wsrep_seqno_t seqno) +{ + xid->formatID= 1; + xid->gtrid_length= WSREP_XID_GTRID_LEN; + xid->bqual_length= 0; + memset(xid->data, 0, sizeof(xid->data)); + memcpy(xid->data, WSREP_XID_PREFIX, WSREP_XID_PREFIX_LEN); + memcpy(xid->data + WSREP_XID_UUID_OFFSET, &uuid, sizeof(wsrep_uuid_t)); + memcpy(xid->data + WSREP_XID_SEQNO_OFFSET, &seqno, sizeof(wsrep_seqno_t)); +} + +int wsrep_is_wsrep_xid(const void* xid_ptr) +{ + const XID* xid= reinterpret_cast<const XID*>(xid_ptr); + return (xid->formatID == 1 && + xid->gtrid_length == WSREP_XID_GTRID_LEN && + xid->bqual_length == 0 && + !memcmp(xid->data, WSREP_XID_PREFIX, WSREP_XID_PREFIX_LEN)); +} + +const wsrep_uuid_t* wsrep_xid_uuid(const XID& xid) +{ + if (wsrep_is_wsrep_xid(&xid)) + return reinterpret_cast<const wsrep_uuid_t*>(xid.data + + WSREP_XID_UUID_OFFSET); + else + return &WSREP_UUID_UNDEFINED; +} + +wsrep_seqno_t wsrep_xid_seqno(const XID& xid) +{ + if (wsrep_is_wsrep_xid(&xid)) + { + wsrep_seqno_t seqno; + memcpy(&seqno, xid.data + WSREP_XID_SEQNO_OFFSET, sizeof(wsrep_seqno_t)); + return seqno; + } + else + { + return WSREP_SEQNO_UNDEFINED; + } +} + +static my_bool set_SE_checkpoint(THD* unused, plugin_ref plugin, void* arg) +{ + XID* xid= static_cast<XID*>(arg); + handlerton* hton= plugin_data(plugin, handlerton *); + + if (hton->db_type == DB_TYPE_INNODB) + { + const wsrep_uuid_t* uuid(wsrep_xid_uuid(*xid)); + char uuid_str[40] = {0, }; + wsrep_uuid_print(uuid, uuid_str, sizeof(uuid_str)); + WSREP_DEBUG("Set WSREPXid for InnoDB: %s:%lld", + uuid_str, (long long)wsrep_xid_seqno(*xid)); + hton->wsrep_set_checkpoint(hton, xid); + } + + return FALSE; +} + +void wsrep_set_SE_checkpoint(XID& xid) +{ + plugin_foreach(NULL, set_SE_checkpoint, MYSQL_STORAGE_ENGINE_PLUGIN, &xid); +} + +void wsrep_set_SE_checkpoint(const wsrep_uuid_t& uuid, wsrep_seqno_t seqno) +{ + XID xid; + wsrep_xid_init(&xid, uuid, seqno); + wsrep_set_SE_checkpoint(xid); +} + +static my_bool get_SE_checkpoint(THD* unused, plugin_ref plugin, void* arg) +{ + XID* xid= reinterpret_cast<XID*>(arg); + handlerton* hton= plugin_data(plugin, handlerton *); + + if (hton->db_type == DB_TYPE_INNODB) + { + hton->wsrep_get_checkpoint(hton, xid); + const wsrep_uuid_t* uuid(wsrep_xid_uuid(*xid)); + char uuid_str[40] = {0, }; + wsrep_uuid_print(uuid, uuid_str, sizeof(uuid_str)); + WSREP_DEBUG("Read WSREPXid from InnoDB: %s:%lld", + uuid_str, (long long)wsrep_xid_seqno(*xid)); + } + + return FALSE; +} + +void wsrep_get_SE_checkpoint(XID& xid) +{ + plugin_foreach(NULL, get_SE_checkpoint, MYSQL_STORAGE_ENGINE_PLUGIN, &xid); +} + +void wsrep_get_SE_checkpoint(wsrep_uuid_t& uuid, wsrep_seqno_t& seqno) +{ + uuid= WSREP_UUID_UNDEFINED; + seqno= WSREP_SEQNO_UNDEFINED; + + XID xid; + memset(&xid, 0, sizeof(xid)); + xid.formatID= -1; + + wsrep_get_SE_checkpoint(xid); + + if (xid.formatID == -1) return; // nil XID + + if (!wsrep_is_wsrep_xid(&xid)) + { + WSREP_WARN("Read non-wsrep XID from storage engines."); + return; + } + + uuid= *wsrep_xid_uuid(xid); + seqno= wsrep_xid_seqno(xid); +} diff --git a/sql/wsrep_xid.h b/sql/wsrep_xid.h new file mode 100644 index 00000000000..8a43e49c733 --- /dev/null +++ b/sql/wsrep_xid.h @@ -0,0 +1,33 @@ +/* Copyright (C) 2015 Codership Oy <info@codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ + +#ifndef WSREP_XID_H +#define WSREP_XID_H + +#include <my_config.h> +#include "../wsrep/wsrep_api.h" +#include "handler.h" // XID typedef + +void wsrep_xid_init(xid_t*, const wsrep_uuid_t&, wsrep_seqno_t); +int wsrep_is_wsrep_xid(const void* xid); +const wsrep_uuid_t* wsrep_xid_uuid(const XID&); +wsrep_seqno_t wsrep_xid_seqno(const XID&); + +//void wsrep_get_SE_checkpoint(XID&); /* uncomment if needed */ +void wsrep_get_SE_checkpoint(wsrep_uuid_t&, wsrep_seqno_t&); +//void wsrep_set_SE_checkpoint(XID&); /* uncomment if needed */ +void wsrep_set_SE_checkpoint(const wsrep_uuid_t&, wsrep_seqno_t); + +#endif /* WSREP_UTILS_H */ |