diff options
Diffstat (limited to 'sql/handler.cc')
-rw-r--r-- | sql/handler.cc | 324 |
1 files changed, 155 insertions, 169 deletions
diff --git a/sql/handler.cc b/sql/handler.cc index 518ca7b860d..cded7fb8cd8 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -32,6 +32,7 @@ #include "sql_acl.h" // SUPER_ACL #include "sql_base.h" // free_io_cache #include "discover.h" // extension_based_table_discovery, etc +#include "log.h" // for assert_LOCK_log_owner #include "log_event.h" // *_rows_log_event #include "create_options.h" #include "rpl_filter.h" @@ -51,6 +52,9 @@ #include "../storage/maria/ha_maria.h" #endif +#include "wsrep_mysqld.h" +#include "wsrep.h" + /* While we have legacy_db_type, we have this array to check for dups and to find handlerton from legacy_db_type. @@ -75,7 +79,6 @@ ulong savepoint_alloc_size= 0; static const LEX_STRING sys_table_aliases[]= { { C_STRING_WITH_LEN("INNOBASE") }, { C_STRING_WITH_LEN("INNODB") }, - { C_STRING_WITH_LEN("NDB") }, { C_STRING_WITH_LEN("NDBCLUSTER") }, { C_STRING_WITH_LEN("HEAP") }, { C_STRING_WITH_LEN("MEMORY") }, { C_STRING_WITH_LEN("MERGE") }, { C_STRING_WITH_LEN("MRG_MYISAM") }, { C_STRING_WITH_LEN("Maria") }, { C_STRING_WITH_LEN("Aria") }, @@ -106,6 +109,14 @@ static plugin_ref ha_default_plugin(THD *thd) return my_plugin_lock(thd, global_system_variables.table_plugin); } +static plugin_ref ha_default_tmp_plugin(THD *thd) +{ + if (thd->variables.tmp_table_plugin) + return thd->variables.tmp_table_plugin; + if (global_system_variables.tmp_table_plugin) + return my_plugin_lock(thd, global_system_variables.tmp_table_plugin); + return ha_default_plugin(thd); +} /** @brief Return the default storage engine handlerton for thread @@ -127,6 +138,16 @@ handlerton *ha_default_handlerton(THD *thd) } +handlerton *ha_default_tmp_handlerton(THD *thd) +{ + plugin_ref plugin= ha_default_tmp_plugin(thd); + DBUG_ASSERT(plugin); + handlerton *hton= plugin_hton(plugin); + DBUG_ASSERT(hton); + return hton; +} + + /** @brief Return the storage engine handlerton for the supplied name @@ -138,7 +159,7 @@ handlerton *ha_default_handlerton(THD *thd) RETURN pointer to storage engine plugin handle */ -plugin_ref ha_resolve_by_name(THD *thd, const LEX_STRING *name) +plugin_ref ha_resolve_by_name(THD *thd, const LEX_STRING *name, bool tmp_table) { const LEX_STRING *table_alias; plugin_ref plugin; @@ -148,7 +169,7 @@ redo: if (thd && !my_charset_latin1.coll->strnncoll(&my_charset_latin1, (const uchar *)name->str, name->length, (const uchar *)STRING_WITH_LEN("DEFAULT"), 0)) - return ha_default_plugin(thd); + return tmp_table ? ha_default_tmp_plugin(thd) : ha_default_plugin(thd); if ((plugin= my_plugin_lock_by_name(thd, name, MYSQL_STORAGE_ENGINE_PLUGIN))) { @@ -252,7 +273,8 @@ handler *get_new_handler(TABLE_SHARE *share, MEM_ROOT *alloc, Here the call to current_thd() is ok as we call this function a lot of times but we enter this branch very seldom. */ - DBUG_RETURN(get_new_handler(share, alloc, ha_default_handlerton(current_thd))); + file= get_new_handler(share, alloc, ha_default_handlerton(current_thd)); + DBUG_RETURN(file); } @@ -1133,6 +1155,25 @@ void trans_register_ha(THD *thd, bool all, handlerton *ht_arg) DBUG_VOID_RETURN; } + +static int prepare_or_error(handlerton *ht, THD *thd, bool all) +{ + int err= ht->prepare(ht, thd, all); + status_var_increment(thd->status_var.ha_prepare_count); + if (err) + { + /* avoid sending error, if we're going to replay the transaction */ +#ifdef WITH_WSREP + if (ht == wsrep_hton && + err != EMSGSIZE && + thd->wsrep_conflict_state != MUST_REPLAY) +#endif + my_error(ER_ERROR_DURING_COMMIT, MYF(0), err); + } + return err; +} + + /** @retval 0 ok @@ -1150,14 +1191,11 @@ int ha_prepare(THD *thd) { for (; ha_info; ha_info= ha_info->next()) { - int err; handlerton *ht= ha_info->ht(); - status_var_increment(thd->status_var.ha_prepare_count); if (ht->prepare) { - if ((err= ht->prepare(ht, thd, all))) + if (prepare_or_error(ht, thd, all)) { - my_error(ER_ERROR_DURING_COMMIT, MYF(0), err); ha_rollback_trans(thd, all); error=1; break; @@ -1355,8 +1393,9 @@ int ha_commit_trans(THD *thd, bool all) mdl_request.init(MDL_key::COMMIT, "", "", MDL_INTENTION_EXCLUSIVE, MDL_EXPLICIT); - if (thd->mdl_context.acquire_lock(&mdl_request, - thd->variables.lock_wait_timeout)) + if (!WSREP(thd) && + thd->mdl_context.acquire_lock(&mdl_request, + thd->variables.lock_wait_timeout)) { ha_rollback_trans(thd, all); DBUG_RETURN(1); @@ -1386,7 +1425,6 @@ int ha_commit_trans(THD *thd, bool all) for (Ha_trx_info *hi= ha_info; hi; hi= hi->next()) { - int err; handlerton *ht= hi->ht(); /* Do not call two-phase commit if this particular @@ -1399,12 +1437,7 @@ int ha_commit_trans(THD *thd, bool all) Sic: we know that prepare() is not NULL since otherwise trans->no_2pc would have been set. */ - err= ht->prepare(ht, thd, all); - status_var_increment(thd->status_var.ha_prepare_count); - if (err) - my_error(ER_ERROR_DURING_COMMIT, MYF(0), err); - - if (err) + if (prepare_or_error(ht, thd, all)) goto err; need_prepare_ordered|= (ht->prepare_ordered != NULL); @@ -1413,6 +1446,12 @@ int ha_commit_trans(THD *thd, bool all) DEBUG_SYNC(thd, "ha_commit_trans_after_prepare"); DBUG_EXECUTE_IF("crash_commit_after_prepare", DBUG_SUICIDE();); + if (!error && WSREP_ON && wsrep_is_wsrep_xid(&thd->transaction.xid_state.xid)) + { + // xid was rewritten by wsrep + xid= wsrep_xid_seqno(&thd->transaction.xid_state.xid); + } + if (!is_real_trans) { error= commit_one_phase_2(thd, all, trans, is_real_trans); @@ -1439,6 +1478,12 @@ int ha_commit_trans(THD *thd, bool all) done: DBUG_EXECUTE_IF("crash_commit_after", DBUG_SUICIDE();); + + /* documentation of which mutexes are (not) owned */ + mysql_mutex_assert_not_owner(&LOCK_prepare_ordered); + assert_LOCK_log_owner(false); + mysql_mutex_assert_not_owner(&LOCK_after_binlog_sync); + mysql_mutex_assert_not_owner(&LOCK_commit_ordered); RUN_HOOK(transaction, after_commit, (thd, FALSE)); goto end; @@ -1785,7 +1830,9 @@ static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin, got, hton_name(hton)->str); for (int i=0; i < got; i ++) { - my_xid x=info->list[i].get_my_xid(); + my_xid x= WSREP_ON && wsrep_is_wsrep_xid(&info->list[i]) ? + wsrep_xid_seqno(&info->list[i]) : + info->list[i].get_my_xid(); if (!x) // not "mine" - that is generated by external TM { #ifndef DBUG_OFF @@ -3070,10 +3117,12 @@ int handler::update_auto_increment() variables->auto_increment_increment); auto_inc_intervals_count++; /* Row-based replication does not need to store intervals in binlog */ - if (mysql_bin_log.is_open() && !thd->is_current_stmt_binlog_format_row()) - thd->auto_inc_intervals_in_cur_stmt_for_binlog.append(auto_inc_interval_for_cur_row.minimum(), - auto_inc_interval_for_cur_row.values(), - variables->auto_increment_increment); + if (((WSREP(thd) && wsrep_emulate_bin_log ) || mysql_bin_log.is_open()) + && !thd->is_current_stmt_binlog_format_row()) + thd->auto_inc_intervals_in_cur_stmt_for_binlog. + append(auto_inc_interval_for_cur_row.minimum(), + auto_inc_interval_for_cur_row.values(), + variables->auto_increment_increment); } /* @@ -4366,10 +4415,10 @@ handler::ha_rename_partitions(const char *path) /** Tell the storage engine that it is allowed to "disable transaction" in the - handler. It is a hint that ACID is not required - it is used in NDB for + handler. It is a hint that ACID is not required - it was used in NDB for ALTER TABLE, for example, when data are copied to temporary table. A storage engine may treat this hint any way it likes. NDB for example - starts to commit every now and then automatically. + started to commit every now and then automatically. This hint can be safely ignored. */ int ha_enable_transaction(THD *thd, bool on) @@ -4950,12 +4999,12 @@ bool ha_table_exists(THD *thd, const char *db, const char *table_name, else if (engines_with_discover) hton= &dummy; - TABLE_SHARE *share= tdc_lock_share(db, table_name); - if (share) + TDC_element *element= tdc_lock_share(thd, db, table_name); + if (element && element != MY_ERRPTR) { if (hton) - *hton= share->db_type(); - tdc_unlock_share(share); + *hton= element->share->db_type(); + tdc_unlock_share(element); DBUG_RETURN(TRUE); } @@ -5188,145 +5237,6 @@ int ha_discover_table_names(THD *thd, LEX_STRING *db, MY_DIR *dirp, } -#ifdef HAVE_NDB_BINLOG -/* - TODO: change this into a dynamic struct - List<handlerton> does not work as - 1. binlog_end is called when MEM_ROOT is gone - 2. cannot work with thd MEM_ROOT as memory should be freed -*/ -#define MAX_HTON_LIST_ST 63 -struct hton_list_st -{ - handlerton *hton[MAX_HTON_LIST_ST]; - uint sz; -}; - -struct binlog_func_st -{ - enum_binlog_func fn; - void *arg; -}; - -/** @brief - Listing handlertons first to avoid recursive calls and deadlock -*/ -static my_bool binlog_func_list(THD *thd, plugin_ref plugin, void *arg) -{ - hton_list_st *hton_list= (hton_list_st *)arg; - handlerton *hton= plugin_hton(plugin); - if (hton->state == SHOW_OPTION_YES && hton->binlog_func) - { - uint sz= hton_list->sz; - if (sz == MAX_HTON_LIST_ST-1) - { - /* list full */ - return FALSE; - } - hton_list->hton[sz]= hton; - hton_list->sz= sz+1; - } - return FALSE; -} - -static my_bool binlog_func_foreach(THD *thd, binlog_func_st *bfn) -{ - hton_list_st hton_list; - uint i, sz; - - hton_list.sz= 0; - plugin_foreach(thd, binlog_func_list, - MYSQL_STORAGE_ENGINE_PLUGIN, &hton_list); - - for (i= 0, sz= hton_list.sz; i < sz ; i++) - hton_list.hton[i]->binlog_func(hton_list.hton[i], thd, bfn->fn, bfn->arg); - return FALSE; -} - -int ha_reset_logs(THD *thd) -{ - binlog_func_st bfn= {BFN_RESET_LOGS, 0}; - binlog_func_foreach(thd, &bfn); - return 0; -} - -void ha_reset_slave(THD* thd) -{ - binlog_func_st bfn= {BFN_RESET_SLAVE, 0}; - binlog_func_foreach(thd, &bfn); -} - -void ha_binlog_wait(THD* thd) -{ - binlog_func_st bfn= {BFN_BINLOG_WAIT, 0}; - binlog_func_foreach(thd, &bfn); -} - -int ha_binlog_end(THD* thd) -{ - binlog_func_st bfn= {BFN_BINLOG_END, 0}; - binlog_func_foreach(thd, &bfn); - return 0; -} - -int ha_binlog_index_purge_file(THD *thd, const char *file) -{ - binlog_func_st bfn= {BFN_BINLOG_PURGE_FILE, (void *)file}; - binlog_func_foreach(thd, &bfn); - return 0; -} - -struct binlog_log_query_st -{ - enum_binlog_command binlog_command; - const char *query; - uint query_length; - const char *db; - const char *table_name; -}; - -static my_bool binlog_log_query_handlerton2(THD *thd, - handlerton *hton, - void *args) -{ - struct binlog_log_query_st *b= (struct binlog_log_query_st*)args; - if (hton->state == SHOW_OPTION_YES && hton->binlog_log_query) - hton->binlog_log_query(hton, thd, - b->binlog_command, - b->query, - b->query_length, - b->db, - b->table_name); - return FALSE; -} - -static my_bool binlog_log_query_handlerton(THD *thd, - plugin_ref plugin, - void *args) -{ - return binlog_log_query_handlerton2(thd, plugin_hton(plugin), args); -} - -void ha_binlog_log_query(THD *thd, handlerton *hton, - enum_binlog_command binlog_command, - const char *query, uint query_length, - const char *db, const char *table_name) -{ - struct binlog_log_query_st b; - b.binlog_command= binlog_command; - b.query= query; - b.query_length= query_length; - b.db= db; - b.table_name= table_name; - if (hton == 0) - plugin_foreach(thd, binlog_log_query_handlerton, - MYSQL_STORAGE_ENGINE_PLUGIN, &b); - else - binlog_log_query_handlerton2(thd, hton, &b); -} -#endif - - /** Read first row between two ranges. Store ranges for future calls to read_range_next. @@ -5689,10 +5599,13 @@ static bool check_table_binlog_row_based(THD *thd, TABLE *table) DBUG_ASSERT(table->s->cached_row_logging_check == 0 || table->s->cached_row_logging_check == 1); - return (thd->is_current_stmt_binlog_format_row() && + return thd->is_current_stmt_binlog_format_row() && table->s->cached_row_logging_check && (thd->variables.option_bits & OPTION_BIN_LOG) && - mysql_bin_log.is_open()); + /* applier and replayer should not binlog */ + ((IF_WSREP(WSREP_EMULATE_BINLOG(thd) && + thd->wsrep_exec_mode != REPL_RECV, 0)) || + mysql_bin_log.is_open()); } @@ -5792,6 +5705,11 @@ static int binlog_log_row(TABLE* table, bool error= 0; THD *const thd= table->in_use; + /* only InnoDB tables will be replicated through binlog emulation */ + if (WSREP_EMULATE_BINLOG(thd) && + table->file->partition_ht()->db_type != DB_TYPE_INNODB) + return 0; + if (check_table_binlog_row_based(thd, table)) { MY_BITMAP cols; @@ -6121,6 +6039,74 @@ void handler::set_lock_type(enum thr_lock_type lock) table->reginfo.lock_type= lock; } +#ifdef WITH_WSREP +/** + @details + This function makes the storage engine to force the victim transaction + to abort. Currently, only innodb has this functionality, but any SE + implementing the wsrep API should provide this service to support + multi-master operation. + + @param bf_thd brute force THD asking for the abort + @param victim_thd victim THD to be aborted + + @return + always 0 +*/ + +int ha_abort_transaction(THD *bf_thd, THD *victim_thd, my_bool signal) +{ + DBUG_ENTER("ha_abort_transaction"); + if (!WSREP(bf_thd) && + !(wsrep_OSU_method_options == WSREP_OSU_RSU && + bf_thd->wsrep_exec_mode == TOTAL_ORDER)) { + DBUG_RETURN(0); + } + + THD_TRANS *trans= &victim_thd->transaction.all; + Ha_trx_info *ha_info= trans->ha_list, *ha_info_next; + + for (; ha_info; ha_info= ha_info_next) + { + handlerton *hton= ha_info->ht(); + if (!hton->abort_transaction) + WSREP_WARN("cannot abort transaction"); + else + hton->abort_transaction(hton, bf_thd, victim_thd, signal); + ha_info_next= ha_info->next(); + ha_info->reset(); /* keep it conveniently zero-filled */ + } + DBUG_RETURN(0); +} + +void ha_fake_trx_id(THD *thd) +{ + DBUG_ENTER("ha_fake_trx_id"); + if (!WSREP(thd)) + { + DBUG_VOID_RETURN; + } + + THD_TRANS *trans= &thd->transaction.all; + Ha_trx_info *ha_info= trans->ha_list, *ha_info_next; + + for (; ha_info; ha_info= ha_info_next) + { + handlerton *hton= ha_info->ht(); + if (!hton->fake_trx_id) + { + WSREP_WARN("cannot get fake InnoDB transaction ID"); + } + else + hton->fake_trx_id(hton, thd); + ha_info_next= ha_info->next(); + ha_info->reset(); /* keep it conveniently zero-filled */ + } + DBUG_VOID_RETURN; +} +#endif /* WITH_WSREP */ + + #ifdef TRANS_LOG_MGM_EXAMPLE_CODE /* Example of transaction log management functions based on assumption that logs |