summaryrefslogtreecommitdiff
path: root/storage/xtradb/handler/ha_innodb.cc
diff options
context:
space:
mode:
Diffstat (limited to 'storage/xtradb/handler/ha_innodb.cc')
-rw-r--r--storage/xtradb/handler/ha_innodb.cc322
1 files changed, 194 insertions, 128 deletions
diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc
index e4208e8d0ea..0964f4925f3 100644
--- a/storage/xtradb/handler/ha_innodb.cc
+++ b/storage/xtradb/handler/ha_innodb.cc
@@ -61,6 +61,9 @@ Place, Suite 330, Boston, MA 02111-1307 USA
#include <log_event.h> // rpl_get_position_info
#endif /* MYSQL_SERVER */
+#ifdef _WIN32
+#include <io.h>
+#endif
/** @file ha_innodb.cc */
/* Include necessary InnoDB headers */
@@ -113,8 +116,6 @@ extern ib_int64_t trx_sys_mysql_relay_log_pos;
/** to protect innobase_open_files */
static mysql_mutex_t innobase_share_mutex;
-/** to force correct commit order in binlog */
-static mysql_mutex_t prepare_commit_mutex;
static ulong commit_threads = 0;
static mysql_mutex_t commit_threads_m;
static mysql_cond_t commit_cond;
@@ -122,7 +123,7 @@ static mysql_mutex_t commit_cond_m;
static bool innodb_inited = 0;
C_MODE_START
-static int index_cond_func_innodb(void *arg);
+static xtradb_icp_result_t index_cond_func_innodb(void *arg);
C_MODE_END
@@ -249,7 +250,6 @@ static const char* innobase_change_buffering_values[IBUF_USE_COUNT] = {
/* Keys to register pthread mutexes/cond in the current file with
performance schema */
static mysql_pfs_key_t innobase_share_mutex_key;
-static mysql_pfs_key_t prepare_commit_mutex_key;
static mysql_pfs_key_t commit_threads_m_key;
static mysql_pfs_key_t commit_cond_mutex_key;
static mysql_pfs_key_t commit_cond_key;
@@ -257,8 +257,7 @@ static mysql_pfs_key_t commit_cond_key;
static PSI_mutex_info all_pthread_mutexes[] = {
{&commit_threads_m_key, "commit_threads_m", 0},
{&commit_cond_mutex_key, "commit_cond_mutex", 0},
- {&innobase_share_mutex_key, "innobase_share_mutex", 0},
- {&prepare_commit_mutex_key, "prepare_commit_mutex", 0}
+ {&innobase_share_mutex_key, "innobase_share_mutex", 0}
};
static PSI_cond_info all_innodb_conds[] = {
@@ -376,6 +375,7 @@ static PSI_file_info all_innodb_files[] = {
static INNOBASE_SHARE *get_share(const char *table_name);
static void free_share(INNOBASE_SHARE *share);
static int innobase_close_connection(handlerton *hton, THD* thd);
+static void innobase_commit_ordered(handlerton *hton, THD* thd, bool all);
static int innobase_commit(handlerton *hton, THD* thd, bool all);
static int innobase_rollback(handlerton *hton, THD* thd, bool all);
static int innobase_rollback_to_savepoint(handlerton *hton, THD* thd,
@@ -632,6 +632,17 @@ bool innobase_show_status(handlerton *hton, THD* thd,
stat_print_fn* stat_print,
enum ha_stat_type stat_type);
+/* Enable / disable checkpoints */
+static int innobase_checkpoint_state(handlerton *hton, bool disable)
+{
+ if (disable)
+ (void) log_disable_checkpoint();
+ else
+ log_enable_checkpoint();
+ return 0;
+}
+
+
/*****************************************************************//**
Commits a transaction in an InnoDB database. */
static
@@ -1104,6 +1115,9 @@ convert_error_code_to_mysql(
case DB_RECORD_NOT_FOUND:
return(HA_ERR_NO_ACTIVE_RECORD);
+ case DB_SEARCH_ABORTED_BY_USER:
+ return(HA_ERR_ABORTED_BY_USER);
+
case DB_DEADLOCK:
/* Since we rolled back the whole transaction, we must
tell it also to MySQL so that MySQL knows to empty the
@@ -1675,7 +1689,6 @@ innobase_trx_init(
trx_t* trx) /*!< in/out: InnoDB transaction handle */
{
DBUG_ENTER("innobase_trx_init");
- DBUG_ASSERT(EQ_CURRENT_THD(thd));
DBUG_ASSERT(thd == trx->mysql_thd);
trx->check_foreigns = !thd_test_options(
@@ -1734,8 +1747,6 @@ check_trx_exists(
{
trx_t*& trx = thd_to_trx(thd);
- ut_ad(EQ_CURRENT_THD(thd));
-
if (trx == NULL) {
trx = innobase_trx_allocate(thd);
} else if (UNIV_UNLIKELY(trx->magic_n != TRX_MAGIC_N)) {
@@ -1787,15 +1798,15 @@ trx_is_registered_for_2pc(
}
/*********************************************************************//**
-Note that a transaction owns the prepare_commit_mutex. */
+Note that innobase_commit_ordered() was run. */
static inline
void
-trx_owns_prepare_commit_mutex_set(
+trx_set_active_commit_ordered(
/*==============================*/
trx_t* trx) /* in: transaction */
{
ut_a(trx_is_registered_for_2pc(trx));
- trx->owns_prepare_mutex = 1;
+ trx->active_commit_ordered = 1;
}
/*********************************************************************//**
@@ -1807,7 +1818,7 @@ trx_register_for_2pc(
trx_t* trx) /* in: transaction */
{
trx->is_registered = 1;
- ut_ad(trx->owns_prepare_mutex == 0);
+ ut_ad(trx->active_commit_ordered == 0);
}
/*********************************************************************//**
@@ -1819,19 +1830,18 @@ trx_deregister_from_2pc(
trx_t* trx) /* in: transaction */
{
trx->is_registered = 0;
- trx->owns_prepare_mutex = 0;
+ trx->active_commit_ordered = 0;
}
/*********************************************************************//**
-Check whether atransaction owns the prepare_commit_mutex.
-@return true if transaction owns the prepare commit mutex */
+Check whether a transaction has active_commit_ordered set */
static inline
bool
-trx_has_prepare_commit_mutex(
+trx_is_active_commit_ordered(
/*=========================*/
const trx_t* trx) /* in: transaction */
{
- return(trx->owns_prepare_mutex == 1);
+ return(trx->active_commit_ordered == 1);
}
/*********************************************************************//**
@@ -1852,7 +1862,7 @@ UNIV_INTERN
ha_innobase::ha_innobase(handlerton *hton, TABLE_SHARE *table_arg)
:handler(hton, table_arg),
int_table_flags(HA_REC_NOT_IN_SEQ |
- HA_NULL_IN_KEY |
+ HA_NULL_IN_KEY | HA_CAN_VIRTUAL_COLUMNS |
HA_CAN_INDEX_BLOBS |
HA_CAN_SQL_HANDLER |
HA_PRIMARY_KEY_REQUIRED_FOR_POSITION |
@@ -2386,12 +2396,14 @@ innobase_init(
innobase_hton->savepoint_set=innobase_savepoint;
innobase_hton->savepoint_rollback=innobase_rollback_to_savepoint;
innobase_hton->savepoint_release=innobase_release_savepoint;
+ innobase_hton->commit_ordered=innobase_commit_ordered;
innobase_hton->commit=innobase_commit;
innobase_hton->rollback=innobase_rollback;
innobase_hton->prepare=innobase_xa_prepare;
innobase_hton->recover=innobase_xa_recover;
innobase_hton->commit_by_xid=innobase_commit_by_xid;
innobase_hton->rollback_by_xid=innobase_rollback_by_xid;
+ innobase_hton->checkpoint_state= innobase_checkpoint_state;
innobase_hton->create_cursor_read_view=innobase_create_cursor_view;
innobase_hton->set_cursor_read_view=innobase_set_cursor_view;
innobase_hton->close_cursor_read_view=innobase_close_cursor_view;
@@ -2958,8 +2970,6 @@ skip_overwrite:
mysql_mutex_init(innobase_share_mutex_key,
&innobase_share_mutex,
MY_MUTEX_INIT_FAST);
- mysql_mutex_init(prepare_commit_mutex_key,
- &prepare_commit_mutex, MY_MUTEX_INIT_FAST);
mysql_mutex_init(commit_threads_m_key,
&commit_threads_m, MY_MUTEX_INIT_FAST);
mysql_mutex_init(commit_cond_mutex_key,
@@ -3010,7 +3020,6 @@ innobase_end(
srv_free_paths_and_sizes();
my_free(internal_innobase_data_file_path);
mysql_mutex_destroy(&innobase_share_mutex);
- mysql_mutex_destroy(&prepare_commit_mutex);
mysql_mutex_destroy(&commit_threads_m);
mysql_mutex_destroy(&commit_cond_m);
mysql_cond_destroy(&commit_cond);
@@ -3135,6 +3144,108 @@ innobase_start_trx_and_assign_read_view(
DBUG_RETURN(0);
}
+static
+void
+innobase_commit_ordered_2(
+/*============*/
+ trx_t* trx, /*!< in: Innodb transaction */
+ THD* thd) /*!< in: MySQL thread handle */
+{
+ ulonglong tmp_pos;
+ DBUG_ENTER("innobase_commit_ordered");
+
+ /* We need current binlog position for ibbackup to work.
+ Note, the position is current because commit_ordered is guaranteed
+ to be called in same sequenece as writing to binlog. */
+
+retry:
+ if (innobase_commit_concurrency > 0) {
+ mysql_mutex_lock(&commit_cond_m);
+ commit_threads++;
+
+ if (commit_threads > innobase_commit_concurrency) {
+ commit_threads--;
+ mysql_cond_wait(&commit_cond,
+ &commit_cond_m);
+ mysql_mutex_unlock(&commit_cond_m);
+ goto retry;
+ }
+ else {
+ mysql_mutex_unlock(&commit_cond_m);
+ }
+ }
+
+ mysql_bin_log_commit_pos(thd, &tmp_pos, &(trx->mysql_log_file_name));
+ trx->mysql_log_offset = (ib_int64_t) tmp_pos;
+
+ /* Don't do write + flush right now. For group commit
+ to work we want to do the flush in the innobase_commit()
+ method, which runs without holding any locks. */
+ trx->flush_log_later = TRUE;
+ innobase_commit_low(trx);
+ trx->flush_log_later = FALSE;
+
+ if (innobase_commit_concurrency > 0) {
+ mysql_mutex_lock(&commit_cond_m);
+ commit_threads--;
+ mysql_cond_signal(&commit_cond);
+ mysql_mutex_unlock(&commit_cond_m);
+ }
+
+ DBUG_VOID_RETURN;
+}
+
+/*****************************************************************//**
+Perform the first, fast part of InnoDB commit.
+
+Doing it in this call ensures that we get the same commit order here
+as in binlog and any other participating transactional storage engines.
+
+Note that we want to do as little as really needed here, as we run
+under a global mutex. The expensive fsync() is done later, in
+innobase_commit(), without a lock so group commit can take place.
+
+Note also that this method can be called from a different thread than
+the one handling the rest of the transaction. */
+static
+void
+innobase_commit_ordered(
+/*============*/
+ handlerton *hton, /*!< in: Innodb handlerton */
+ THD* thd, /*!< in: MySQL thread handle of the user for whom
+ the transaction should be committed */
+ bool all) /*!< in: TRUE - commit transaction
+ FALSE - the current SQL statement ended */
+{
+ trx_t* trx;
+ DBUG_ENTER("innobase_commit_ordered");
+ DBUG_ASSERT(hton == innodb_hton_ptr);
+
+ trx = check_trx_exists(thd);
+
+ /* Since we will reserve the kernel mutex, we must not be holding the
+ search system latch, or we will disobey the latching order. But we
+ already released it in innobase_xa_prepare() (if not before), so just
+ have an assert here.*/
+ ut_ad(!trx->has_search_latch);
+
+ if (!trx_is_registered_for_2pc(trx) && trx_is_started(trx)) {
+ /* We cannot throw error here; instead we will catch this error
+ again in innobase_commit() and report it from there. */
+ DBUG_VOID_RETURN;
+ }
+
+ /* commit_ordered is only called when committing the whole transaction
+ (or an SQL statement when autocommit is on). */
+ DBUG_ASSERT(all ||
+ (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)));
+
+ innobase_commit_ordered_2(trx, thd);
+
+ trx_set_active_commit_ordered(trx);
+ DBUG_VOID_RETURN;
+}
+
/*****************************************************************//**
Commits a transaction in an InnoDB database or marks an SQL statement
ended.
@@ -3160,7 +3271,7 @@ innobase_commit(
/* Since we will reserve the kernel mutex, we have to release
the search system latch first to obey the latching order. */
- if (trx->has_search_latch) {
+ if (trx->has_search_latch && !trx_is_active_commit_ordered(trx)) {
trx_search_latch_release_if_reserved(trx);
}
@@ -3178,68 +3289,18 @@ innobase_commit(
if (all
|| (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
- /* We were instructed to commit the whole transaction, or
- this is an SQL statement end and autocommit is on */
-
- /* We need current binlog position for ibbackup to work.
- Note, the position is current because of
- prepare_commit_mutex */
-retry:
- if (innobase_commit_concurrency > 0) {
- mysql_mutex_lock(&commit_cond_m);
- commit_threads++;
-
- if (commit_threads > innobase_commit_concurrency) {
- commit_threads--;
- mysql_cond_wait(&commit_cond,
- &commit_cond_m);
- mysql_mutex_unlock(&commit_cond_m);
- goto retry;
- }
- else {
- mysql_mutex_unlock(&commit_cond_m);
- }
- }
-
- /* The following calls to read the MySQL binary log
- file name and the position return consistent results:
- 1) Other InnoDB transactions cannot intervene between
- these calls as we are holding prepare_commit_mutex.
- 2) Binary logging of other engines is not relevant
- to InnoDB as all InnoDB requires is that committing
- InnoDB transactions appear in the same order in the
- MySQL binary log as they appear in InnoDB logs.
- 3) A MySQL log file rotation cannot happen because
- MySQL protects against this by having a counter of
- transactions in prepared state and it only allows
- a rotation when the counter drops to zero. See
- LOCK_prep_xids and COND_prep_xids in log.cc. */
- trx->mysql_log_file_name = mysql_bin_log_file_name();
- trx->mysql_log_offset = (ib_int64_t) mysql_bin_log_file_pos();
-
- /* Don't do write + flush right now. For group commit
- to work we want to do the flush after releasing the
- prepare_commit_mutex. */
- trx->flush_log_later = TRUE;
- innobase_commit_low(trx);
- trx->flush_log_later = FALSE;
-
- if (innobase_commit_concurrency > 0) {
- mysql_mutex_lock(&commit_cond_m);
- commit_threads--;
- mysql_cond_signal(&commit_cond);
- mysql_mutex_unlock(&commit_cond_m);
+ /* Run the fast part of commit if we did not already. */
+ if (!trx_is_active_commit_ordered(trx)) {
+ innobase_commit_ordered_2(trx, thd);
}
- if (trx_has_prepare_commit_mutex(trx)) {
-
- mysql_mutex_unlock(&prepare_commit_mutex);
- }
-
- trx_deregister_from_2pc(trx);
+ /* We were instructed to commit the whole transaction, or
+ this is an SQL statement end and autocommit is on */
- /* Now do a write + flush of logs. */
+ /* We did the first part already in innobase_commit_ordered(),
+ Now finish by doing a write + flush of logs. */
trx_commit_complete_for_mysql(trx);
+ trx_deregister_from_2pc(trx);
} else {
/* We just mark the SQL statement ended and do not do a
transaction commit */
@@ -3615,12 +3676,15 @@ UNIV_INTERN
ulong
ha_innobase::index_flags(
/*=====================*/
- uint,
- uint,
- bool)
+ uint index,
+ uint part,
+ bool all_parts)
const
{
- return(HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER
+ ulong extra_flag= 0;
+ if (table && index == table->s->primary_key)
+ extra_flag= HA_CLUSTERED_INDEX;
+ return(HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER | extra_flag
| HA_READ_RANGE | HA_KEYREAD_ONLY | HA_DO_INDEX_COND_PUSHDOWN);
}
@@ -4273,6 +4337,7 @@ retry:
of length ref_length! */
if (!row_table_got_default_clust_index(ib_table)) {
+
prebuilt->clust_index_was_generated = FALSE;
if (UNIV_UNLIKELY(primary_key >= MAX_KEY)) {
@@ -4678,12 +4743,24 @@ get_innobase_type_from_mysql_type(
case MYSQL_TYPE_SHORT:
case MYSQL_TYPE_INT24:
case MYSQL_TYPE_DATE:
- case MYSQL_TYPE_DATETIME:
case MYSQL_TYPE_YEAR:
case MYSQL_TYPE_NEWDATE:
+ return(DATA_INT);
+
case MYSQL_TYPE_TIME:
+ case MYSQL_TYPE_DATETIME:
case MYSQL_TYPE_TIMESTAMP:
- return(DATA_INT);
+ /*
+ XtraDB should ideally just check field->keytype() and never
+ field->type(). The following check is here to only
+ change the new hires datetime/timestamp/time fields to
+ use DATA_FIXBINARY. We can't convert this function to
+ just test for field->keytype() as then the check if a
+ table is compatible will fail for old tables.
+ */
+ if (field->key_type() == HA_KEYTYPE_BINARY)
+ return(DATA_FIXBINARY);
+ return(DATA_INT);
case MYSQL_TYPE_FLOAT:
return(DATA_FLOAT);
case MYSQL_TYPE_DOUBLE:
@@ -4697,10 +4774,7 @@ get_innobase_type_from_mysql_type(
case MYSQL_TYPE_LONG_BLOB:
return(DATA_BLOB);
case MYSQL_TYPE_NULL:
- /* MySQL currently accepts "NULL" datatype, but will
- reject such datatype in the next release. We will cope
- with it and not trigger assertion failure in 5.1 */
- break;
+ return(DATA_FIXBINARY);
default:
ut_error;
}
@@ -9387,6 +9461,7 @@ ha_innobase::extra(
pushed_idx_cond= FALSE;
pushed_idx_cond_keyno= MAX_KEY;
prebuilt->idx_cond_func= NULL;
+ in_range_check_pushed_down= FALSE;
break;
case HA_EXTRA_NO_KEYREAD:
prebuilt->read_just_key = 0;
@@ -9437,6 +9512,7 @@ ha_innobase::reset()
/* Reset index condition pushdown state */
pushed_idx_cond_keyno= MAX_KEY;
pushed_idx_cond= NULL;
+ in_range_check_pushed_down= FALSE;
ds_mrr.dsmrr_close();
prebuilt->idx_cond_func= NULL;
@@ -10881,33 +10957,6 @@ innobase_xa_prepare(
srv_active_wake_master_thread();
- if (thd_sql_command(thd) != SQLCOM_XA_PREPARE
- && (all
- || !thd_test_options(
- thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
-
- /* For ibbackup to work the order of transactions in binlog
- and InnoDB must be the same. Consider the situation
-
- thread1> prepare; write to binlog; ...
- <context switch>
- thread2> prepare; write to binlog; commit
- thread1> ... commit
-
- To ensure this will not happen we're taking the mutex on
- prepare, and releasing it on commit.
-
- Note: only do it for normal commits, done via ha_commit_trans.
- If 2pc protocol is executed by external transaction
- coordinator, it will be just a regular MySQL client
- executing XA PREPARE and XA COMMIT commands.
- In this case we cannot know how many minutes or hours
- will be between XA PREPARE and XA COMMIT, and we don't want
- to block for undefined period of time. */
- mysql_mutex_lock(&prepare_commit_mutex);
- trx_owns_prepare_commit_mutex_set(trx);
- }
-
return(error);
}
@@ -12230,7 +12279,7 @@ static MYSQL_SYSVAR_ENUM(corrupt_table_action, srv_pass_corrupt_table,
"except for the deletion.",
NULL, NULL, 0, &corrupt_table_action_typelib);
-static MYSQL_SYSVAR_ULONG(lazy_drop_table, srv_lazy_drop_table,
+static MYSQL_SYSVAR_ULINT(lazy_drop_table, srv_lazy_drop_table,
PLUGIN_VAR_RQCMDARG,
"At deleting tablespace, only miminum needed processes at the time are done. "
"e.g. for http://bugs.mysql.com/51325",
@@ -12563,12 +12612,13 @@ test_innobase_convert_name()
*/
int ha_innobase::multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
- uint n_ranges, uint mode, HANDLER_BUFFER *buf)
+ uint n_ranges, uint mode,
+ HANDLER_BUFFER *buf)
{
return ds_mrr.dsmrr_init(this, seq, seq_init_param, n_ranges, mode, buf);
}
-int ha_innobase::multi_range_read_next(char **range_info)
+int ha_innobase::multi_range_read_next(range_id_t *range_info)
{
return ds_mrr.dsmrr_next(range_info);
}
@@ -12590,16 +12640,29 @@ ha_rows ha_innobase::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
return res;
}
-ha_rows ha_innobase::multi_range_read_info(uint keyno, uint n_ranges,
- uint keys, uint *bufsz,
+ha_rows ha_innobase::multi_range_read_info(uint keyno, uint n_ranges, uint keys,
+ uint key_parts, uint *bufsz,
uint *flags, COST_VECT *cost)
{
ds_mrr.init(this, table);
- ha_rows res= ds_mrr.dsmrr_info(keyno, n_ranges, keys, bufsz, flags, cost);
+ ha_rows res= ds_mrr.dsmrr_info(keyno, n_ranges, keys, key_parts, bufsz,
+ flags, cost);
return res;
}
+int ha_innobase::multi_range_read_explain_info(uint mrr_mode, char *str, size_t size)
+{
+ return ds_mrr.dsmrr_explain_info(mrr_mode, str, size);
+}
+
+/*
+ A helper function used only in index_cond_func_innodb
+*/
+bool ha_innobase::is_thd_killed()
+{
+ return thd_killed(user_thd);
+}
/**
* Index Condition Pushdown interface implementation
@@ -12612,15 +12675,18 @@ C_MODE_START
See note on ICP_RESULT for return values description.
*/
-static int index_cond_func_innodb(void *arg)
+static xtradb_icp_result_t index_cond_func_innodb(void *arg)
{
ha_innobase *h= (ha_innobase*)arg;
+ if (h->is_thd_killed())
+ return XTRADB_ICP_ABORTED_BY_USER;
+
if (h->end_range)
{
if (h->compare_key2(h->end_range) > 0)
- return ICP_OUT_OF_RANGE; /* caller should return HA_ERR_END_OF_FILE already */
+ return XTRADB_ICP_OUT_OF_RANGE; /* caller should return HA_ERR_END_OF_FILE already */
}
- return h->pushed_idx_cond->val_int()? ICP_MATCH : ICP_NO_MATCH;
+ return h->pushed_idx_cond->val_int()? XTRADB_ICP_MATCH : XTRADB_ICP_NO_MATCH;
}
C_MODE_END
@@ -12628,7 +12694,7 @@ C_MODE_END
Item *ha_innobase::idx_cond_push(uint keyno_arg, Item* idx_cond_arg)
{
- if ((keyno_arg != primary_key) && (prebuilt->select_lock_type == LOCK_NONE))
+ if (keyno_arg != primary_key && prebuilt->select_lock_type != LOCK_X)
{
pushed_idx_cond_keyno= keyno_arg;
pushed_idx_cond= idx_cond_arg;