diff options
Diffstat (limited to 'sql/handler.cc')
-rw-r--r-- | sql/handler.cc | 619 |
1 files changed, 555 insertions, 64 deletions
diff --git a/sql/handler.cc b/sql/handler.cc index eba9b0dc5ea..0fb61895a61 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -576,6 +576,295 @@ void ha_close_connection(THD* thd) ======================= TRANSACTIONS ===================================*/ /** + Transaction handling in the server + ================================== + + In each client connection, MySQL maintains two transactional + states: + - a statement transaction, + - a standard, also called normal transaction. + + Historical note + --------------- + "Statement transaction" is a non-standard term that comes + from the times when MySQL supported BerkeleyDB storage engine. + + First of all, it should be said that in BerkeleyDB auto-commit + mode auto-commits operations that are atomic to the storage + engine itself, such as a write of a record, and are too + high-granular to be atomic from the application perspective + (MySQL). One SQL statement could involve many BerkeleyDB + auto-committed operations and thus BerkeleyDB auto-commit was of + little use to MySQL. + + Secondly, instead of SQL standard savepoints, BerkeleyDB + provided the concept of "nested transactions". In a nutshell, + transactions could be arbitrarily nested, but when the parent + transaction was committed or aborted, all its child (nested) + transactions were handled committed or aborted as well. + Commit of a nested transaction, in turn, made its changes + visible, but not durable: it destroyed the nested transaction, + all its changes would become available to the parent and + currently active nested transactions of this parent. + + So the mechanism of nested transactions was employed to + provide "all or nothing" guarantee of SQL statements + required by the standard. + A nested transaction would be created at start of each SQL + statement, and destroyed (committed or aborted) at statement + end. Such nested transaction was internally referred to as + a "statement transaction" and gave birth to the term. + + <Historical note ends> + + Since then a statement transaction is started for each statement + that accesses transactional tables or uses the binary log. If + the statement succeeds, the statement transaction is committed. + If the statement fails, the transaction is rolled back. Commits + of statement transactions are not durable -- each such + transaction is nested in the normal transaction, and if the + normal transaction is rolled back, the effects of all enclosed + statement transactions are undone as well. Technically, + a statement transaction can be viewed as a savepoint which is + maintained automatically in order to make effects of one + statement atomic. + + The normal transaction is started by the user and is ended + usually upon a user request as well. The normal transaction + encloses transactions of all statements issued between + its beginning and its end. + In autocommit mode, the normal transaction is equivalent + to the statement transaction. + + Since MySQL supports PSEA (pluggable storage engine + architecture), more than one transactional engine can be + active at a time. Hence transactions, from the server + point of view, are always distributed. In particular, + transactional state is maintained independently for each + engine. In order to commit a transaction the two phase + commit protocol is employed. + + Not all statements are executed in context of a transaction. + Administrative and status information statements do not modify + engine data, and thus do not start a statement transaction and + also have no effect on the normal transaction. Examples of such + statements are SHOW STATUS and RESET SLAVE. + + Similarly DDL statements are not transactional, + and therefore a transaction is [almost] never started for a DDL + statement. The difference between a DDL statement and a purely + administrative statement though is that a DDL statement always + commits the current transaction before proceeding, if there is + any. + + At last, SQL statements that work with non-transactional + engines also have no effect on the transaction state of the + connection. Even though they are written to the binary log, + and the binary log is, overall, transactional, the writes + are done in "write-through" mode, directly to the binlog + file, followed with a OS cache sync, in other words, + bypassing the binlog undo log (translog). + They do not commit the current normal transaction. + A failure of a statement that uses non-transactional tables + would cause a rollback of the statement transaction, but + in case there no non-transactional tables are used, + no statement transaction is started. + + Data layout + ----------- + + The server stores its transaction-related data in + thd->transaction. This structure has two members of type + THD_TRANS. These members correspond to the statement and + normal transactions respectively: + + - thd->transaction.stmt contains a list of engines + that are participating in the given statement + - thd->transaction.all contains a list of engines that + have participated in any of the statement transactions started + within the context of the normal transaction. + Each element of the list contains a pointer to the storage + engine, engine-specific transactional data, and engine-specific + transaction flags. + + In autocommit mode thd->transaction.all is empty. + Instead, data of thd->transaction.stmt is + used to commit/rollback the normal transaction. + + The list of registered engines has a few important properties: + - no engine is registered in the list twice + - engines are present in the list a reverse temporal order -- + new participants are always added to the beginning of the list. + + Transaction life cycle + ---------------------- + + When a new connection is established, thd->transaction + members are initialized to an empty state. + If a statement uses any tables, all affected engines + are registered in the statement engine list. In + non-autocommit mode, the same engines are registered in + the normal transaction list. + At the end of the statement, the server issues a commit + or a roll back for all engines in the statement list. + At this point transaction flags of an engine, if any, are + propagated from the statement list to the list of the normal + transaction. + When commit/rollback is finished, the statement list is + cleared. It will be filled in again by the next statement, + and emptied again at the next statement's end. + + The normal transaction is committed in a similar way + (by going over all engines in thd->transaction.all list) + but at different times: + - upon COMMIT SQL statement is issued by the user + - implicitly, by the server, at the beginning of a DDL statement + or SET AUTOCOMMIT={0|1} statement. + + The normal transaction can be rolled back as well: + - if the user has requested so, by issuing ROLLBACK SQL + statement + - if one of the storage engines requested a rollback + by setting thd->transaction_rollback_request. This may + happen in case, e.g., when the transaction in the engine was + chosen a victim of the internal deadlock resolution algorithm + and rolled back internally. When such a situation happens, there + is little the server can do and the only option is to rollback + transactions in all other participating engines. In this case + the rollback is accompanied by an error sent to the user. + + As follows from the use cases above, the normal transaction + is never committed when there is an outstanding statement + transaction. In most cases there is no conflict, since + commits of the normal transaction are issued by a stand-alone + administrative or DDL statement, thus no outstanding statement + transaction of the previous statement exists. Besides, + all statements that manipulate with the normal transaction + are prohibited in stored functions and triggers, therefore + no conflicting situation can occur in a sub-statement either. + The remaining rare cases when the server explicitly has + to commit the statement transaction prior to committing the normal + one cover error-handling scenarios (see for example + SQLCOM_LOCK_TABLES). + + When committing a statement or a normal transaction, the server + either uses the two-phase commit protocol, or issues a commit + in each engine independently. The two-phase commit protocol + is used only if: + - all participating engines support two-phase commit (provide + handlerton::prepare PSEA API call) and + - transactions in at least two engines modify data (i.e. are + not read-only). + + Note that the two phase commit is used for + statement transactions, even though they are not durable anyway. + This is done to ensure logical consistency of data in a multiple- + engine transaction. + For example, imagine that some day MySQL supports unique + constraint checks deferred till the end of statement. In such + case a commit in one of the engines may yield ER_DUP_KEY, + and MySQL should be able to gracefully abort statement + transactions of other participants. + + After the normal transaction has been committed, + thd->transaction.all list is cleared. + + When a connection is closed, the current normal transaction, if + any, is rolled back. + + Roles and responsibilities + -------------------------- + + The server has no way to know that an engine participates in + the statement and a transaction has been started + in it unless the engine says so. Thus, in order to be + a part of a transaction, the engine must "register" itself. + This is done by invoking trans_register_ha() server call. + Normally the engine registers itself whenever handler::external_lock() + is called. trans_register_ha() can be invoked many times: if + an engine is already registered, the call does nothing. + In case autocommit is not set, the engine must register itself + twice -- both in the statement list and in the normal transaction + list. + In which list to register is a parameter of trans_register_ha(). + + Note, that although the registration interface in itself is + fairly clear, the current usage practice often leads to undesired + effects. E.g. since a call to trans_register_ha() in most engines + is embedded into implementation of handler::external_lock(), some + DDL statements start a transaction (at least from the server + point of view) even though they are not expected to. E.g. + CREATE TABLE does not start a transaction, since + handler::external_lock() is never called during CREATE TABLE. But + CREATE TABLE ... SELECT does, since handler::external_lock() is + called for the table that is being selected from. This has no + practical effects currently, but must be kept in mind + nevertheless. + + Once an engine is registered, the server will do the rest + of the work. + + During statement execution, whenever any of data-modifying + PSEA API methods is used, e.g. handler::write_row() or + handler::update_row(), the read-write flag is raised in the + statement transaction for the involved engine. + Currently All PSEA calls are "traced", and the data can not be + changed in a way other than issuing a PSEA call. Important: + unless this invariant is preserved the server will not know that + a transaction in a given engine is read-write and will not + involve the two-phase commit protocol! + + At the end of a statement, server call + ha_autocommit_or_rollback() is invoked. This call in turn + invokes handlerton::prepare() for every involved engine. + Prepare is followed by a call to handlerton::commit_one_phase() + If a one-phase commit will suffice, handlerton::prepare() is not + invoked and the server only calls handlerton::commit_one_phase(). + At statement commit, the statement-related read-write engine + flag is propagated to the corresponding flag in the normal + transaction. When the commit is complete, the list of registered + engines is cleared. + + Rollback is handled in a similar fashion. + + Additional notes on DDL and the normal transaction. + --------------------------------------------------- + + DDLs and operations with non-transactional engines + do not "register" in thd->transaction lists, and thus do not + modify the transaction state. Besides, each DDL in + MySQL is prefixed with an implicit normal transaction commit + (a call to end_active_trans()), and thus leaves nothing + to modify. + However, as it has been pointed out with CREATE TABLE .. SELECT, + some DDL statements can start a *new* transaction. + + Behaviour of the server in this case is currently badly + defined. + DDL statements use a form of "semantic" logging + to maintain atomicity: if CREATE TABLE .. SELECT failed, + the newly created table is deleted. + In addition, some DDL statements issue interim transaction + commits: e.g. ALTER TABLE issues a commit after data is copied + from the original table to the internal temporary table. Other + statements, e.g. CREATE TABLE ... SELECT do not always commit + after itself. + And finally there is a group of DDL statements such as + RENAME/DROP TABLE that doesn't start a new transaction + and doesn't commit. + + This diversity makes it hard to say what will happen if + by chance a stored function is invoked during a DDL -- + whether any modifications it makes will be committed or not + is not clear. Fortunately, SQL grammar of few DDLs allows + invocation of a stored function. + + A consistent behaviour is perhaps to always commit the normal + transaction after all DDLs, just like the statement transaction + is always committed at the end of all statements. +*/ + +/** Register a storage engine for a transaction. Every storage engine MUST call this function when it starts @@ -592,7 +881,7 @@ void ha_close_connection(THD* thd) void trans_register_ha(THD *thd, bool all, handlerton *ht_arg) { THD_TRANS *trans; - handlerton **ht; + Ha_trx_info *ha_info; DBUG_ENTER("trans_register_ha"); DBUG_PRINT("enter",("%s", all ? "all" : "stmt")); @@ -604,12 +893,13 @@ void trans_register_ha(THD *thd, bool all, handlerton *ht_arg) else trans= &thd->transaction.stmt; - for (ht=trans->ht; *ht; ht++) - if (*ht == ht_arg) - DBUG_VOID_RETURN; /* already registered, return */ + ha_info= thd->ha_data[ht_arg->slot].ha_info + static_cast<unsigned>(all); + + if (ha_info->is_started()) + DBUG_VOID_RETURN; /* already registered, return */ + + ha_info->register_ha(trans, ht_arg); - trans->ht[trans->nht++]=ht_arg; - DBUG_ASSERT(*ht == ht_arg); trans->no_2pc|=(ht_arg->prepare==0); if (thd->transaction.xid_state.xid.is_null()) thd->transaction.xid_state.xid.set(thd->query_id); @@ -626,18 +916,19 @@ int ha_prepare(THD *thd) { int error=0, all=1; THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt; - handlerton **ht=trans->ht; + Ha_trx_info *ha_info= trans->ha_list; DBUG_ENTER("ha_prepare"); #ifdef USING_TRANSACTIONS - if (trans->nht) + if (ha_info) { - for (; *ht; ht++) + for (; ha_info; ha_info= ha_info->next()) { int err; + handlerton *ht= ha_info->ht(); status_var_increment(thd->status_var.ha_prepare_count); - if ((*ht)->prepare) + if (ht->prepare) { - if ((err= (*(*ht)->prepare)(*ht, thd, all))) + if ((err= ht->prepare(ht, thd, all))) { my_error(ER_ERROR_DURING_COMMIT, MYF(0), err); ha_rollback_trans(thd, all); @@ -649,7 +940,7 @@ int ha_prepare(THD *thd) { push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_ILLEGAL_HA, ER(ER_ILLEGAL_HA), - ha_resolve_storage_engine_name(*ht)); + ha_resolve_storage_engine_name(ht)); } } } @@ -658,6 +949,62 @@ int ha_prepare(THD *thd) } /** + Check if we can skip the two-phase commit. + + A helper function to evaluate if two-phase commit is mandatory. + As a side effect, propagates the read-only/read-write flags + of the statement transaction to its enclosing normal transaction. + + @retval TRUE we must run a two-phase commit. Returned + if we have at least two engines with read-write changes. + @retval FALSE Don't need two-phase commit. Even if we have two + transactional engines, we can run two independent + commits if changes in one of the engines are read-only. +*/ + +static +bool +ha_check_and_coalesce_trx_read_only(THD *thd, Ha_trx_info *ha_list, + bool all) +{ + /* The number of storage engines that have actual changes. */ + unsigned rw_ha_count= 0; + Ha_trx_info *ha_info; + + for (ha_info= ha_list; ha_info; ha_info= ha_info->next()) + { + if (ha_info->is_trx_read_write()) + ++rw_ha_count; + + if (! all) + { + Ha_trx_info *ha_info_all= &thd->ha_data[ha_info->ht()->slot].ha_info[1]; + DBUG_ASSERT(ha_info != ha_info_all); + /* + Merge read-only/read-write information about statement + transaction to its enclosing normal transaction. Do this + only if in a real transaction -- that is, if we know + that ha_info_all is registered in thd->transaction.all. + Since otherwise we only clutter the normal transaction flags. + */ + if (ha_info_all->is_started()) /* FALSE if autocommit. */ + ha_info_all->coalesce_trx_with(ha_info); + } + else if (rw_ha_count > 1) + { + /* + It is a normal transaction, so we don't need to merge read/write + information up, and the need for two-phase commit has been + already established. Break the loop prematurely. + */ + break; + } + } + return rw_ha_count > 1; +} + + +/** @retval 0 ok @retval @@ -674,12 +1021,25 @@ int ha_prepare(THD *thd) int ha_commit_trans(THD *thd, bool all) { int error= 0, cookie= 0; + /* + 'all' means that this is either an explicit commit issued by + user, or an implicit commit issued by a DDL. + */ THD_TRANS *trans= all ? &thd->transaction.all : &thd->transaction.stmt; - bool is_real_trans= all || thd->transaction.all.nht == 0; - handlerton **ht= trans->ht; + bool is_real_trans= all || thd->transaction.all.ha_list == 0; + Ha_trx_info *ha_info= trans->ha_list; my_xid xid= thd->transaction.xid_state.xid.get_my_xid(); DBUG_ENTER("ha_commit_trans"); + /* + We must not commit the normal transaction if a statement + transaction is pending. Otherwise statement transaction + flags will not get propagated to its normal transaction's + counterpart. + */ + DBUG_ASSERT(thd->transaction.stmt.ha_list == NULL || + trans == &thd->transaction.stmt); + if (thd->in_sub_stmt) { /* @@ -701,8 +1061,10 @@ int ha_commit_trans(THD *thd, bool all) DBUG_RETURN(2); } #ifdef USING_TRANSACTIONS - if (trans->nht) + if (ha_info) { + bool must_2pc; + if (is_real_trans && wait_if_global_read_lock(thd, 0, 0)) { ha_rollback_trans(thd, all); @@ -727,12 +1089,26 @@ int ha_commit_trans(THD *thd, bool all) if (is_real_trans) /* not a statement commit */ thd->stmt_map.close_transient_cursors(); - if (!trans->no_2pc && trans->nht > 1) + must_2pc= ha_check_and_coalesce_trx_read_only(thd, ha_info, all); + + if (!trans->no_2pc && must_2pc) { - for (; *ht && !error; ht++) + for (; ha_info && !error; ha_info= ha_info->next()) { int err; - if ((err= (*(*ht)->prepare)(*ht, thd, all))) + handlerton *ht= ha_info->ht(); + /* + Do not call two-phase commit if this particular + transaction is read-only. This allows for simpler + implementation in engines that are always read-only. + */ + if (! ha_info->is_trx_read_write()) + continue; + /* + Sic: we know that prepare() is not NULL since otherwise + trans->no_2pc would have been set. + */ + if ((err= ht->prepare(ht, thd, all))) { my_error(ER_ERROR_DURING_COMMIT, MYF(0), err); error= 1; @@ -770,24 +1146,26 @@ int ha_commit_one_phase(THD *thd, bool all) { int error=0; THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt; - bool is_real_trans=all || thd->transaction.all.nht == 0; - handlerton **ht=trans->ht; + bool is_real_trans=all || thd->transaction.all.ha_list == 0; + Ha_trx_info *ha_info= trans->ha_list, *ha_info_next; DBUG_ENTER("ha_commit_one_phase"); #ifdef USING_TRANSACTIONS - if (trans->nht) + if (ha_info) { - for (ht=trans->ht; *ht; ht++) + for (; ha_info; ha_info= ha_info_next) { int err; - if ((err= (*(*ht)->commit)(*ht, thd, all))) + handlerton *ht= ha_info->ht(); + if ((err= ht->commit(ht, thd, all))) { my_error(ER_ERROR_DURING_COMMIT, MYF(0), err); error=1; } status_var_increment(thd->status_var.ha_commit_count); - *ht= 0; + ha_info_next= ha_info->next(); + ha_info->reset(); /* keep it conveniently zero-filled */ } - trans->nht=0; + trans->ha_list= 0; trans->no_2pc=0; if (is_real_trans) thd->transaction.xid_state.xid.null(); @@ -810,8 +1188,17 @@ int ha_rollback_trans(THD *thd, bool all) { int error=0; THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt; - bool is_real_trans=all || thd->transaction.all.nht == 0; + Ha_trx_info *ha_info= trans->ha_list, *ha_info_next; + bool is_real_trans=all || thd->transaction.all.ha_list == 0; DBUG_ENTER("ha_rollback_trans"); + + /* + We must not rollback the normal transaction if a statement + transaction is pending. + */ + DBUG_ASSERT(thd->transaction.stmt.ha_list == NULL || + trans == &thd->transaction.stmt); + if (thd->in_sub_stmt) { /* @@ -826,24 +1213,26 @@ int ha_rollback_trans(THD *thd, bool all) DBUG_RETURN(1); } #ifdef USING_TRANSACTIONS - if (trans->nht) + if (ha_info) { /* Close all cursors that can not survive ROLLBACK */ if (is_real_trans) /* not a statement commit */ thd->stmt_map.close_transient_cursors(); - for (handlerton **ht=trans->ht; *ht; ht++) + for (; ha_info; ha_info= ha_info_next) { int err; - if ((err= (*(*ht)->rollback)(*ht, thd, all))) + handlerton *ht= ha_info->ht(); + if ((err= ht->rollback(ht, thd, all))) { // cannot happen my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err); error=1; } status_var_increment(thd->status_var.ha_rollback_count); - *ht= 0; + ha_info_next= ha_info->next(); + ha_info->reset(); /* keep it conveniently zero-filled */ } - trans->nht=0; + trans->ha_list= 0; trans->no_2pc=0; if (is_real_trans) thd->transaction.xid_state.xid.null(); @@ -889,17 +1278,19 @@ int ha_autocommit_or_rollback(THD *thd, int error) { DBUG_ENTER("ha_autocommit_or_rollback"); #ifdef USING_TRANSACTIONS - if (thd->transaction.stmt.nht) + if (thd->transaction.stmt.ha_list) { if (!error) { - if (ha_commit_stmt(thd)) + if (ha_commit_trans(thd, 0)) error=1; } - else if (thd->transaction_rollback_request && !thd->in_sub_stmt) - (void) ha_rollback(thd); - else - (void) ha_rollback_stmt(thd); + else + { + (void) ha_rollback_trans(thd, 0); + if (thd->transaction_rollback_request && !thd->in_sub_stmt) + (void) ha_rollback(thd); + } thd->variables.tx_isolation=thd->session_tx_isolation; } @@ -1199,7 +1590,7 @@ bool mysql_xa_recover(THD *thd) } pthread_mutex_unlock(&LOCK_xid_cache); - send_eof(thd); + my_eof(thd); DBUG_RETURN(0); } @@ -1246,43 +1637,49 @@ int ha_rollback_to_savepoint(THD *thd, SAVEPOINT *sv) int error=0; THD_TRANS *trans= (thd->in_sub_stmt ? &thd->transaction.stmt : &thd->transaction.all); - handlerton **ht=trans->ht, **end_ht; + Ha_trx_info *ha_info, *ha_info_next; + DBUG_ENTER("ha_rollback_to_savepoint"); - trans->nht=sv->nht; trans->no_2pc=0; - end_ht=ht+sv->nht; /* rolling back to savepoint in all storage engines that were part of the transaction when the savepoint was set */ - for (; ht < end_ht; ht++) + for (ha_info= sv->ha_list; ha_info; ha_info= ha_info->next()) { int err; - DBUG_ASSERT((*ht)->savepoint_set != 0); - if ((err= (*(*ht)->savepoint_rollback)(*ht, thd, (uchar *)(sv+1)+(*ht)->savepoint_offset))) + handlerton *ht= ha_info->ht(); + DBUG_ASSERT(ht); + DBUG_ASSERT(ht->savepoint_set != 0); + if ((err= ht->savepoint_rollback(ht, thd, + (uchar *)(sv+1)+ht->savepoint_offset))) { // cannot happen my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err); error=1; } status_var_increment(thd->status_var.ha_savepoint_rollback_count); - trans->no_2pc|=(*ht)->prepare == 0; + trans->no_2pc|= ht->prepare == 0; } /* rolling back the transaction in all storage engines that were not part of the transaction when the savepoint was set */ - for (; *ht ; ht++) + for (ha_info= trans->ha_list; ha_info != sv->ha_list; + ha_info= ha_info_next) { int err; - if ((err= (*(*ht)->rollback)(*ht, thd, !thd->in_sub_stmt))) + handlerton *ht= ha_info->ht(); + if ((err= ht->rollback(ht, thd, !thd->in_sub_stmt))) { // cannot happen my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err); error=1; } status_var_increment(thd->status_var.ha_rollback_count); - *ht=0; // keep it conveniently zero-filled + ha_info_next= ha_info->next(); + ha_info->reset(); /* keep it conveniently zero-filled */ } + trans->ha_list= sv->ha_list; DBUG_RETURN(error); } @@ -1297,26 +1694,32 @@ int ha_savepoint(THD *thd, SAVEPOINT *sv) int error=0; THD_TRANS *trans= (thd->in_sub_stmt ? &thd->transaction.stmt : &thd->transaction.all); - handlerton **ht=trans->ht; + Ha_trx_info *ha_info= trans->ha_list; DBUG_ENTER("ha_savepoint"); #ifdef USING_TRANSACTIONS - for (; *ht; ht++) + for (; ha_info; ha_info= ha_info->next()) { int err; - if (! (*ht)->savepoint_set) + handlerton *ht= ha_info->ht(); + DBUG_ASSERT(ht); + if (! ht->savepoint_set) { my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), "SAVEPOINT"); error=1; break; } - if ((err= (*(*ht)->savepoint_set)(*ht, thd, (uchar *)(sv+1)+(*ht)->savepoint_offset))) + if ((err= ht->savepoint_set(ht, thd, (uchar *)(sv+1)+ht->savepoint_offset))) { // cannot happen my_error(ER_GET_ERRNO, MYF(0), err); error=1; } status_var_increment(thd->status_var.ha_savepoint_count); } - sv->nht=trans->nht; + /* + Remember the list of registered storage engines. All new + engines are prepended to the beginning of the list. + */ + sv->ha_list= trans->ha_list; #endif /* USING_TRANSACTIONS */ DBUG_RETURN(error); } @@ -1324,20 +1727,19 @@ int ha_savepoint(THD *thd, SAVEPOINT *sv) int ha_release_savepoint(THD *thd, SAVEPOINT *sv) { int error=0; - THD_TRANS *trans= (thd->in_sub_stmt ? &thd->transaction.stmt : - &thd->transaction.all); - handlerton **ht=trans->ht, **end_ht; + Ha_trx_info *ha_info= sv->ha_list; DBUG_ENTER("ha_release_savepoint"); - end_ht=ht+sv->nht; - for (; ht < end_ht; ht++) + for (; ha_info; ha_info= ha_info->next()) { int err; - if (!(*ht)->savepoint_release) + handlerton *ht= ha_info->ht(); + /* Savepoint life time is enclosed into transaction life time. */ + DBUG_ASSERT(ht); + if (!ht->savepoint_release) continue; - if ((err= (*(*ht)->savepoint_release)(*ht, thd, - (uchar *)(sv+1)+ - (*ht)->savepoint_offset))) + if ((err= ht->savepoint_release(ht, thd, + (uchar *)(sv+1) + ht->savepoint_offset))) { // cannot happen my_error(ER_GET_ERRNO, MYF(0), err); error=1; @@ -2506,6 +2908,36 @@ int handler::ha_check(THD *thd, HA_CHECK_OPT *check_opt) return update_frm_version(table); } +/** + A helper function to mark a transaction read-write, + if it is started. +*/ + +inline +void +handler::mark_trx_read_write() +{ + Ha_trx_info *ha_info= &ha_thd()->ha_data[ht->slot].ha_info[0]; + /* + When a storage engine method is called, the transaction must + have been started, unless it's a DDL call, for which the + storage engine starts the transaction internally, and commits + it internally, without registering in the ha_list. + Unfortunately here we can't know know for sure if the engine + has registered the transaction or not, so we must check. + */ + if (ha_info->is_started()) + { + DBUG_ASSERT(has_transactions()); + /* + table_share can be NULL in ha_delete_table(). See implementation + of standalone function ha_delete_table() in sql_base.cc. + */ + if (table_share == NULL || table_share->tmp_table == NO_TMP_TABLE) + ha_info->set_trx_read_write(); + } +} + /** Repair table: public interface. @@ -2516,6 +2948,9 @@ int handler::ha_check(THD *thd, HA_CHECK_OPT *check_opt) int handler::ha_repair(THD* thd, HA_CHECK_OPT* check_opt) { int result; + + mark_trx_read_write(); + if ((result= repair(thd, check_opt))) return result; return update_frm_version(table); @@ -2532,6 +2967,8 @@ int handler::ha_bulk_update_row(const uchar *old_data, uchar *new_data, uint *dup_key_found) { + mark_trx_read_write(); + return bulk_update_row(old_data, new_data, dup_key_found); } @@ -2545,6 +2982,8 @@ handler::ha_bulk_update_row(const uchar *old_data, uchar *new_data, int handler::ha_delete_all_rows() { + mark_trx_read_write(); + return delete_all_rows(); } @@ -2558,6 +2997,8 @@ handler::ha_delete_all_rows() int handler::ha_reset_auto_increment(ulonglong value) { + mark_trx_read_write(); + return reset_auto_increment(value); } @@ -2571,6 +3012,8 @@ handler::ha_reset_auto_increment(ulonglong value) int handler::ha_backup(THD* thd, HA_CHECK_OPT* check_opt) { + mark_trx_read_write(); + return backup(thd, check_opt); } @@ -2584,6 +3027,8 @@ handler::ha_backup(THD* thd, HA_CHECK_OPT* check_opt) int handler::ha_restore(THD* thd, HA_CHECK_OPT* check_opt) { + mark_trx_read_write(); + return restore(thd, check_opt); } @@ -2597,6 +3042,8 @@ handler::ha_restore(THD* thd, HA_CHECK_OPT* check_opt) int handler::ha_optimize(THD* thd, HA_CHECK_OPT* check_opt) { + mark_trx_read_write(); + return optimize(thd, check_opt); } @@ -2610,6 +3057,8 @@ handler::ha_optimize(THD* thd, HA_CHECK_OPT* check_opt) int handler::ha_analyze(THD* thd, HA_CHECK_OPT* check_opt) { + mark_trx_read_write(); + return analyze(thd, check_opt); } @@ -2623,6 +3072,8 @@ handler::ha_analyze(THD* thd, HA_CHECK_OPT* check_opt) bool handler::ha_check_and_repair(THD *thd) { + mark_trx_read_write(); + return check_and_repair(thd); } @@ -2636,6 +3087,8 @@ handler::ha_check_and_repair(THD *thd) int handler::ha_disable_indexes(uint mode) { + mark_trx_read_write(); + return disable_indexes(mode); } @@ -2649,6 +3102,8 @@ handler::ha_disable_indexes(uint mode) int handler::ha_enable_indexes(uint mode) { + mark_trx_read_write(); + return enable_indexes(mode); } @@ -2662,6 +3117,8 @@ handler::ha_enable_indexes(uint mode) int handler::ha_discard_or_import_tablespace(my_bool discard) { + mark_trx_read_write(); + return discard_or_import_tablespace(discard); } @@ -2677,6 +3134,8 @@ handler::ha_discard_or_import_tablespace(my_bool discard) void handler::ha_prepare_for_alter() { + mark_trx_read_write(); + prepare_for_alter(); } @@ -2690,6 +3149,8 @@ handler::ha_prepare_for_alter() int handler::ha_rename_table(const char *from, const char *to) { + mark_trx_read_write(); + return rename_table(from, to); } @@ -2703,6 +3164,8 @@ handler::ha_rename_table(const char *from, const char *to) int handler::ha_delete_table(const char *name) { + mark_trx_read_write(); + return delete_table(name); } @@ -2716,6 +3179,8 @@ handler::ha_delete_table(const char *name) void handler::ha_drop_table(const char *name) { + mark_trx_read_write(); + return drop_table(name); } @@ -2729,6 +3194,8 @@ handler::ha_drop_table(const char *name) int handler::ha_create(const char *name, TABLE *form, HA_CREATE_INFO *info) { + mark_trx_read_write(); + return create(name, form, info); } @@ -2743,6 +3210,8 @@ int handler::ha_create_handler_files(const char *name, const char *old_name, int action_flag, HA_CREATE_INFO *info) { + mark_trx_read_write(); + return create_handler_files(name, old_name, action_flag, info); } @@ -2761,6 +3230,8 @@ handler::ha_change_partitions(HA_CREATE_INFO *create_info, const uchar *pack_frm_data, size_t pack_frm_len) { + mark_trx_read_write(); + return change_partitions(create_info, path, copied, deleted, pack_frm_data, pack_frm_len); } @@ -2775,6 +3246,8 @@ handler::ha_change_partitions(HA_CREATE_INFO *create_info, int handler::ha_drop_partitions(const char *path) { + mark_trx_read_write(); + return drop_partitions(path); } @@ -2788,6 +3261,8 @@ handler::ha_drop_partitions(const char *path) int handler::ha_rename_partitions(const char *path) { + mark_trx_read_write(); + return rename_partitions(path); } @@ -2801,6 +3276,8 @@ handler::ha_rename_partitions(const char *path) int handler::ha_optimize_partitions(THD *thd) { + mark_trx_read_write(); + return optimize_partitions(thd); } @@ -2814,6 +3291,8 @@ handler::ha_optimize_partitions(THD *thd) int handler::ha_analyze_partitions(THD *thd) { + mark_trx_read_write(); + return analyze_partitions(thd); } @@ -2827,6 +3306,8 @@ handler::ha_analyze_partitions(THD *thd) int handler::ha_check_partitions(THD *thd) { + mark_trx_read_write(); + return check_partitions(thd); } @@ -2840,6 +3321,8 @@ handler::ha_check_partitions(THD *thd) int handler::ha_repair_partitions(THD *thd) { + mark_trx_read_write(); + return repair_partitions(thd); } @@ -2866,7 +3349,7 @@ int ha_enable_transaction(THD *thd, bool on) is an optimization hint that storage engine is free to ignore. So, let's commit an open transaction (if any) now. */ - if (!(error= ha_commit_stmt(thd))) + if (!(error= ha_commit_trans(thd, 0))) error= end_trans(thd, COMMIT); } DBUG_RETURN(error); @@ -3826,7 +4309,7 @@ bool ha_show_status(THD *thd, handlerton *db_type, enum ha_stat_type stat) } if (!result) - send_eof(thd); + my_eof(thd); return result; } @@ -4044,6 +4527,9 @@ int handler::ha_write_row(uchar *buf) { int error; DBUG_ENTER("handler::ha_write_row"); + + mark_trx_read_write(); + if (unlikely(error= write_row(buf))) DBUG_RETURN(error); if (unlikely(error= binlog_log_row<Write_rows_log_event>(table, 0, buf))) @@ -4062,6 +4548,8 @@ int handler::ha_update_row(const uchar *old_data, uchar *new_data) */ DBUG_ASSERT(new_data == table->record[0]); + mark_trx_read_write(); + if (unlikely(error= update_row(old_data, new_data))) return error; if (unlikely(error= binlog_log_row<Update_rows_log_event>(table, old_data, new_data))) @@ -4072,6 +4560,9 @@ int handler::ha_update_row(const uchar *old_data, uchar *new_data) int handler::ha_delete_row(const uchar *buf) { int error; + + mark_trx_read_write(); + if (unlikely(error= delete_row(buf))) return error; if (unlikely(error= binlog_log_row<Delete_rows_log_event>(table, buf, 0))) |