summaryrefslogtreecommitdiff
path: root/storage
diff options
context:
space:
mode:
authorSergei Petrunia <psergey@askmonty.org>2017-12-21 23:34:49 +0300
committerSergei Petrunia <psergey@askmonty.org>2017-12-21 23:34:49 +0300
commit9c28fd7a3337a4d6773f3b53e70af9a3b0dbb919 (patch)
tree3ce00a77220f8f31a19f936472d5da4feaebb9e8 /storage
parent15219eb08a28261aa730c301583f1c47a92790b8 (diff)
parent207976d6b9c42868d982a54a2bdd16b77a3c73da (diff)
downloadmariadb-git-9c28fd7a3337a4d6773f3b53e70af9a3b0dbb919.tar.gz
Merge branch '10.2' into bb-10.2-mariarocks
Diffstat (limited to 'storage')
-rw-r--r--storage/innobase/dict/dict0stats.cc4
-rw-r--r--storage/innobase/ha/ha0ha.cc58
-rw-r--r--storage/innobase/handler/ha_innodb.cc14
-rw-r--r--storage/innobase/include/ha0ha.h7
-rw-r--r--storage/innobase/include/que0que.h3
-rw-r--r--storage/innobase/include/row0vers.h20
-rw-r--r--storage/innobase/include/trx0roll.h7
-rw-r--r--storage/innobase/row/row0mysql.cc129
-rw-r--r--storage/innobase/row/row0quiesce.cc4
-rw-r--r--storage/innobase/row/row0sel.cc51
-rw-r--r--storage/innobase/row/row0umod.cc18
-rw-r--r--storage/innobase/row/row0undo.cc8
-rw-r--r--storage/innobase/row/row0vers.cc49
-rw-r--r--storage/innobase/srv/srv0srv.cc28
-rw-r--r--storage/innobase/srv/srv0start.cc4
-rw-r--r--storage/innobase/trx/trx0purge.cc70
-rw-r--r--storage/innobase/trx/trx0roll.cc165
-rw-r--r--storage/innobase/trx/trx0undo.cc16
-rw-r--r--storage/maria/ma_loghandler.c1
-rw-r--r--storage/rocksdb/ha_rocksdb.cc71
-rw-r--r--storage/rocksdb/ha_rocksdb.h3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace.result9
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/mariadb_port_fixes.result26
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace.test9
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/disabled.def11
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/mariadb_port_fixes.test19
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/disabled.def14
-rw-r--r--storage/rocksdb/mysql-test/storage_engine/disabled.def1
-rw-r--r--storage/rocksdb/mysql-test/storage_engine/type_bit_indexes.rdiff20
-rw-r--r--storage/rocksdb/mysql-test/storage_engine/type_enum_indexes.rdiff11
-rw-r--r--storage/rocksdb/mysql-test/storage_engine/type_set_indexes.rdiff20
-rw-r--r--storage/rocksdb/rdb_datadic.cc2
-rw-r--r--storage/rocksdb/rdb_datadic.h2
-rw-r--r--storage/rocksdb/rdb_i_s.cc22
-rw-r--r--storage/tokudb/CMakeLists.txt8
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_tokudb_row_log.result2
-rw-r--r--storage/tokudb/mysql-test/rpl/r/rpl_tokudb_stm_log.result2
-rw-r--r--storage/xtradb/handler/ha_innodb.cc21
-rw-r--r--storage/xtradb/include/que0que.h3
-rw-r--r--storage/xtradb/include/trx0roll.h9
-rw-r--r--storage/xtradb/lock/lock0lock.cc91
-rw-r--r--storage/xtradb/lock/lock0wait.cc39
-rw-r--r--storage/xtradb/row/row0mysql.cc136
-rw-r--r--storage/xtradb/row/row0undo.cc8
-rw-r--r--storage/xtradb/trx/trx0roll.cc164
-rw-r--r--storage/xtradb/trx/trx0undo.cc16
46 files changed, 765 insertions, 630 deletions
diff --git a/storage/innobase/dict/dict0stats.cc b/storage/innobase/dict/dict0stats.cc
index e4aca96da4b..662ea959b9e 100644
--- a/storage/innobase/dict/dict0stats.cc
+++ b/storage/innobase/dict/dict0stats.cc
@@ -3659,8 +3659,8 @@ dict_stats_rename_table(
new_db_utf8, new_table_utf8, trx);
mutex_exit(&dict_sys->mutex);
/* fall through */
- case DB_DEADLOCK:
case DB_LOCK_WAIT_TIMEOUT:
+ trx->error_state = DB_SUCCESS;
os_thread_sleep(200000 /* 0.2 sec */);
continue;
case DB_STATS_DO_NOT_EXIST:
@@ -3701,8 +3701,8 @@ dict_stats_rename_table(
new_db_utf8, new_table_utf8, trx);
mutex_exit(&dict_sys->mutex);
/* fall through */
- case DB_DEADLOCK:
case DB_LOCK_WAIT_TIMEOUT:
+ trx->error_state = DB_SUCCESS;
os_thread_sleep(200000 /* 0.2 sec */);
continue;
case DB_STATS_DO_NOT_EXIST:
diff --git a/storage/innobase/ha/ha0ha.cc b/storage/innobase/ha/ha0ha.cc
index f620db6f62e..da542d4f742 100644
--- a/storage/innobase/ha/ha0ha.cc
+++ b/storage/innobase/ha/ha0ha.cc
@@ -489,62 +489,4 @@ ha_validate(
return(ok);
}
#endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */
-
-/*************************************************************//**
-Prints info of a hash table. */
-void
-ha_print_info(
-/*==========*/
- FILE* file, /*!< in: file where to print */
- hash_table_t* table) /*!< in: hash table */
-{
-#ifdef UNIV_DEBUG
-/* Some of the code here is disabled for performance reasons in production
-builds, see http://bugs.mysql.com/36941 */
-#define PRINT_USED_CELLS
-#endif /* UNIV_DEBUG */
-
-#ifdef PRINT_USED_CELLS
- hash_cell_t* cell;
- ulint cells = 0;
- ulint i;
-#endif /* PRINT_USED_CELLS */
- ulint n_bufs;
-
- ut_ad(table);
- ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
-#ifdef PRINT_USED_CELLS
- for (i = 0; i < hash_get_n_cells(table); i++) {
-
- cell = hash_get_nth_cell(table, i);
-
- if (cell->node) {
-
- cells++;
- }
- }
-#endif /* PRINT_USED_CELLS */
-
- fprintf(file, "Hash table size %lu",
- (ulong) hash_get_n_cells(table));
-
-#ifdef PRINT_USED_CELLS
- fprintf(file, ", used cells %lu", (ulong) cells);
-#endif /* PRINT_USED_CELLS */
-
- if (table->heaps == NULL && table->heap != NULL) {
-
- /* This calculation is intended for the adaptive hash
- index: how many buffer frames we have reserved? */
-
- n_bufs = UT_LIST_GET_LEN(table->heap->base) - 1;
-
- if (table->heap->free_block) {
- n_bufs++;
- }
-
- fprintf(file, ", node heap has %lu buffer(s)\n",
- (ulong) n_bufs);
- }
-}
#endif /* BTR_CUR_HASH_ADAPT */
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index e7a4532a604..2297149f272 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -3866,8 +3866,7 @@ innobase_init(
/* Currently, Galera does not support VATS lock schedule algorithm. */
if (innodb_lock_schedule_algorithm == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS
&& global_system_variables.wsrep_on) {
- ib::info() << "In Galera environment Variance-Aware-Transaction-Sheduling Algorithm"
- " is not supported. Falling back to First-Come-First-Served order. ";
+ ib::info() << "For Galera, using innodb_lock_schedule_algorithm=fcfs";
innodb_lock_schedule_algorithm = INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS;
}
#endif /* WITH_WSREP */
@@ -8614,8 +8613,8 @@ no_commit:
whether we update the table autoinc counter or not. */
col_max_value = innobase_get_int_col_max_value(table->next_number_field);
- /* Get the value that MySQL attempted to store in the table. */
- auto_inc = table->next_number_field->val_int();
+ /* Get the value that MySQL attempted to store in the table.*/
+ auto_inc = table->next_number_field->val_uint();
switch (error) {
case DB_DUPLICATE_KEY:
@@ -9081,12 +9080,7 @@ calc_row_difference(
if (field != table->found_next_number_field
|| dfield_is_null(&ufield->new_val)) {
} else {
- auto_inc = row_parse_int(
- static_cast<const byte*>(
- ufield->new_val.data),
- ufield->new_val.len,
- col->mtype,
- col->prtype & DATA_UNSIGNED);
+ auto_inc = field->val_uint();
}
}
n_changed++;
diff --git a/storage/innobase/include/ha0ha.h b/storage/innobase/include/ha0ha.h
index db53b6c6580..ca4cb0a5f8f 100644
--- a/storage/innobase/include/ha0ha.h
+++ b/storage/innobase/include/ha0ha.h
@@ -198,13 +198,6 @@ ha_validate(
ulint start_index, /*!< in: start index */
ulint end_index); /*!< in: end index */
#endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */
-/*************************************************************//**
-Prints info of a hash table. */
-void
-ha_print_info(
-/*==========*/
- FILE* file, /*!< in: file where to print */
- hash_table_t* table); /*!< in: hash table */
/** The hash table external chain node */
struct ha_node_t {
diff --git a/storage/innobase/include/que0que.h b/storage/innobase/include/que0que.h
index 763b16820d8..13be7291f00 100644
--- a/storage/innobase/include/que0que.h
+++ b/storage/innobase/include/que0que.h
@@ -381,9 +381,6 @@ struct que_thr_t{
thrs; /*!< list of thread nodes of the fork
node */
UT_LIST_NODE_T(que_thr_t)
- trx_thrs; /*!< lists of threads in wait list of
- the trx */
- UT_LIST_NODE_T(que_thr_t)
queue; /*!< list of runnable thread nodes in
the server task queue */
ulint fk_cascade_depth; /*!< maximum cascading call depth
diff --git a/storage/innobase/include/row0vers.h b/storage/innobase/include/row0vers.h
index b28533578e1..576b53358f8 100644
--- a/storage/innobase/include/row0vers.h
+++ b/storage/innobase/include/row0vers.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -38,19 +39,18 @@ Created 2/6/1997 Heikki Tuuri
// Forward declaration
class ReadView;
-/*****************************************************************//**
-Finds out if an active transaction has inserted or modified a secondary
+/** Determine if an active transaction has inserted or modified a secondary
index record.
-@return 0 if committed, else the active transaction id;
-NOTE that this function can return false positives but never false
-negatives. The caller must confirm all positive results by calling
-trx_is_active() while holding lock_sys->mutex. */
+@param[in] rec secondary index record
+@param[in] index secondary index
+@param[in] offsets rec_get_offsets(rec, index)
+@return the active transaction; trx_release_reference() must be invoked
+@retval NULL if the record was committed */
trx_t*
row_vers_impl_x_locked(
-/*===================*/
- const rec_t* rec, /*!< in: record in a secondary index */
- dict_index_t* index, /*!< in: the secondary index */
- const ulint* offsets);/*!< in: rec_get_offsets(rec, index) */
+ const rec_t* rec,
+ dict_index_t* index,
+ const ulint* offsets);
/*****************************************************************//**
Finds out if we must preserve a delete marked earlier version of a clustered
diff --git a/storage/innobase/include/trx0roll.h b/storage/innobase/include/trx0roll.h
index 8908376bff1..f7b999ae70a 100644
--- a/storage/innobase/include/trx0roll.h
+++ b/storage/innobase/include/trx0roll.h
@@ -33,7 +33,8 @@ Created 3/26/1996 Heikki Tuuri
#include "mtr0mtr.h"
#include "trx0sys.h"
-extern bool trx_rollback_or_clean_is_active;
+extern bool trx_rollback_or_clean_is_active;
+extern const trx_t* trx_roll_crash_recv_trx;
/*******************************************************************//**
Determines if this transaction is rolling back an incomplete transaction
@@ -62,6 +63,10 @@ trx_undo_rec_t*
trx_roll_pop_top_rec_of_trx(trx_t* trx, roll_ptr_t* roll_ptr, mem_heap_t* heap)
MY_ATTRIBUTE((nonnull, warn_unused_result));
+/** Report progress when rolling back a row of a recovered transaction.
+@return whether the rollback should be aborted due to pending shutdown */
+bool
+trx_roll_must_shutdown();
/*******************************************************************//**
Rollback or clean up any incomplete transactions which were
encountered in crash recovery. If the transaction already was
diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc
index 5b2e80bc50e..335ca35db95 100644
--- a/storage/innobase/row/row0mysql.cc
+++ b/storage/innobase/row/row0mysql.cc
@@ -75,7 +75,7 @@ ibool row_rollback_on_timeout = FALSE;
/** Chain node of the list of tables to drop in the background. */
struct row_mysql_drop_t{
- char* table_name; /*!< table name */
+ table_id_t table_id; /*!< table id */
UT_LIST_NODE_T(row_mysql_drop_t)row_mysql_drop_list;
/*!< list chain node */
};
@@ -112,19 +112,6 @@ row_mysql_is_system_table(
|| 0 == strcmp(name + 6, "db"));
}
-/*********************************************************************//**
-If a table is not yet in the drop list, adds the table to the list of tables
-which the master thread drops in background. We need this on Unix because in
-ALTER TABLE MySQL may call drop table even if the table has running queries on
-it. Also, if there are running foreign key checks on the table, we drop the
-table lazily.
-@return TRUE if the table was not yet in the drop list, and was added there */
-static
-ibool
-row_add_table_to_background_drop_list(
-/*==================================*/
- const char* name); /*!< in: table name */
-
#ifdef UNIV_DEBUG
/** Wait for the background drop list to become empty. */
void
@@ -2817,7 +2804,7 @@ loop:
mutex_enter(&row_drop_list_mutex);
ut_a(row_mysql_drop_list_inited);
-
+next:
drop = UT_LIST_GET_FIRST(row_mysql_drop_list);
n_tables = UT_LIST_GET_LEN(row_mysql_drop_list);
@@ -2830,61 +2817,38 @@ loop:
return(n_tables + n_tables_dropped);
}
- DBUG_EXECUTE_IF("row_drop_tables_in_background_sleep",
- os_thread_sleep(5000000);
- );
-
- table = dict_table_open_on_name(drop->table_name, FALSE, FALSE,
- DICT_ERR_IGNORE_NONE);
-
- if (table == NULL) {
- /* If for some reason the table has already been dropped
- through some other mechanism, do not try to drop it */
+ table = dict_table_open_on_id(drop->table_id, FALSE,
+ DICT_TABLE_OP_OPEN_ONLY_IF_CACHED);
- goto already_dropped;
+ if (!table) {
+ n_tables_dropped++;
+ mutex_enter(&row_drop_list_mutex);
+ UT_LIST_REMOVE(row_mysql_drop_list, drop);
+ MONITOR_DEC(MONITOR_BACKGROUND_DROP_TABLE);
+ ut_free(drop);
+ goto next;
}
+ ut_a(!table->can_be_evicted);
+
if (!table->to_be_dropped) {
- /* There is a scenario: the old table is dropped
- just after it's added into drop list, and new
- table with the same name is created, then we try
- to drop the new table in background. */
dict_table_close(table, FALSE, FALSE);
- goto already_dropped;
+ mutex_enter(&row_drop_list_mutex);
+ UT_LIST_REMOVE(row_mysql_drop_list, drop);
+ UT_LIST_ADD_LAST(row_mysql_drop_list, drop);
+ goto next;
}
- ut_a(!table->can_be_evicted);
-
dict_table_close(table, FALSE, FALSE);
if (DB_SUCCESS != row_drop_table_for_mysql_in_background(
- drop->table_name)) {
+ table->name.m_name)) {
/* If the DROP fails for some table, we return, and let the
main thread retry later */
-
return(n_tables + n_tables_dropped);
}
- n_tables_dropped++;
-
-already_dropped:
- mutex_enter(&row_drop_list_mutex);
-
- UT_LIST_REMOVE(row_mysql_drop_list, drop);
-
- MONITOR_DEC(MONITOR_BACKGROUND_DROP_TABLE);
-
- ib::info() << "Dropped table "
- << ut_get_name(NULL, drop->table_name)
- << " in background drop queue.",
-
- ut_free(drop->table_name);
-
- ut_free(drop);
-
- mutex_exit(&row_drop_list_mutex);
-
goto loop;
}
@@ -2915,14 +2879,13 @@ which the master thread drops in background. We need this on Unix because in
ALTER TABLE MySQL may call drop table even if the table has running queries on
it. Also, if there are running foreign key checks on the table, we drop the
table lazily.
-@return TRUE if the table was not yet in the drop list, and was added there */
+@return whether background DROP TABLE was scheduled for the first time */
static
-ibool
-row_add_table_to_background_drop_list(
-/*==================================*/
- const char* name) /*!< in: table name */
+bool
+row_add_table_to_background_drop_list(table_id_t table_id)
{
row_mysql_drop_t* drop;
+ bool added = true;
mutex_enter(&row_drop_list_mutex);
@@ -2933,27 +2896,21 @@ row_add_table_to_background_drop_list(
drop != NULL;
drop = UT_LIST_GET_NEXT(row_mysql_drop_list, drop)) {
- if (strcmp(drop->table_name, name) == 0) {
- /* Already in the list */
-
- mutex_exit(&row_drop_list_mutex);
-
- return(FALSE);
+ if (drop->table_id == table_id) {
+ added = false;
+ goto func_exit;
}
}
- drop = static_cast<row_mysql_drop_t*>(
- ut_malloc_nokey(sizeof(row_mysql_drop_t)));
-
- drop->table_name = mem_strdup(name);
+ drop = static_cast<row_mysql_drop_t*>(ut_malloc_nokey(sizeof *drop));
+ drop->table_id = table_id;
UT_LIST_ADD_LAST(row_mysql_drop_list, drop);
MONITOR_INC(MONITOR_BACKGROUND_DROP_TABLE);
-
+func_exit:
mutex_exit(&row_drop_list_mutex);
-
- return(TRUE);
+ return added;
}
/** Reassigns the table identifier of a table.
@@ -3687,7 +3644,7 @@ row_drop_table_for_mysql(
DBUG_EXECUTE_IF("row_drop_table_add_to_background",
- row_add_table_to_background_drop_list(table->name.m_name);
+ row_add_table_to_background_drop_list(table->id);
err = DB_SUCCESS;
goto funct_exit;
);
@@ -3699,28 +3656,17 @@ row_drop_table_for_mysql(
checks take an IS or IX lock on the table. */
if (table->n_foreign_key_checks_running > 0) {
-
- const char* save_tablename = table->name.m_name;
- ibool added;
-
- added = row_add_table_to_background_drop_list(save_tablename);
-
- if (added) {
+ if (row_add_table_to_background_drop_list(table->id)) {
ib::info() << "You are trying to drop table "
<< table->name
<< " though there is a foreign key check"
" running on it. Adding the table to the"
" background drop queue.";
-
- /* We return DB_SUCCESS to MySQL though the drop will
- happen lazily later */
-
- err = DB_SUCCESS;
- } else {
- /* The table is already in the background drop list */
- err = DB_ERROR;
}
+ /* We return DB_SUCCESS to MySQL though the drop will
+ happen lazily later */
+ err = DB_SUCCESS;
goto funct_exit;
}
@@ -3745,12 +3691,7 @@ row_drop_table_for_mysql(
lock_remove_all_on_table(table, TRUE);
ut_a(table->n_rec_locks == 0);
} else if (table->get_ref_count() > 0 || table->n_rec_locks > 0) {
- ibool added;
-
- added = row_add_table_to_background_drop_list(
- table->name.m_name);
-
- if (added) {
+ if (row_add_table_to_background_drop_list(table->id)) {
ib::info() << "MySQL is trying to drop table "
<< table->name
<< " though there are still open handles to"
diff --git a/storage/innobase/row/row0quiesce.cc b/storage/innobase/row/row0quiesce.cc
index dd6289c91e6..21cc67620f6 100644
--- a/storage/innobase/row/row0quiesce.cc
+++ b/storage/innobase/row/row0quiesce.cc
@@ -520,7 +520,7 @@ row_quiesce_table_start(
ut_ad(fil_space_get(table->space) != NULL);
ib::info() << "Sync to disk of " << table->name << " started.";
- if (trx_purge_state() != PURGE_STATE_DISABLED) {
+ if (srv_undo_sources) {
trx_purge_stop();
}
@@ -603,7 +603,7 @@ row_quiesce_table_complete(
ib::info() << "Deleting the meta-data file '" << cfg_name << "'";
}
- if (trx_purge_state() != PURGE_STATE_DISABLED) {
+ if (srv_undo_sources) {
trx_purge_run();
}
diff --git a/storage/innobase/row/row0sel.cc b/storage/innobase/row/row0sel.cc
index 9bf71592f54..9cad3ab7de6 100644
--- a/storage/innobase/row/row0sel.cc
+++ b/storage/innobase/row/row0sel.cc
@@ -4890,9 +4890,44 @@ wrong_offs:
ulint lock_type;
+ if (srv_locks_unsafe_for_binlog
+ || trx->isolation_level <= TRX_ISO_READ_COMMITTED) {
+ /* At READ COMMITTED or READ UNCOMMITTED
+ isolation levels, do not lock committed
+ delete-marked records. */
+ if (!rec_get_deleted_flag(rec, comp)) {
+ goto no_gap_lock;
+ }
+ if (index == clust_index) {
+ trx_id_t trx_id = row_get_rec_trx_id(
+ rec, index, offsets);
+ /* In delete-marked records, DB_TRX_ID must
+ always refer to an existing undo log record. */
+ ut_ad(trx_id);
+ if (!trx_rw_is_active(trx_id, NULL, false)) {
+ /* The clustered index record
+ was delete-marked in a committed
+ transaction. Ignore the record. */
+ goto locks_ok_del_marked;
+ }
+ } else if (trx_t* trx = row_vers_impl_x_locked(
+ rec, index, offsets)) {
+ /* The record belongs to an active
+ transaction. We must acquire a lock. */
+ trx_release_reference(trx);
+ } else {
+ /* The secondary index record does not
+ point to a delete-marked clustered index
+ record that belongs to an active transaction.
+ Ignore the secondary index record, because
+ it is not locked. */
+ goto next_rec;
+ }
+
+ goto no_gap_lock;
+ }
+
if (!set_also_gap_locks
- || srv_locks_unsafe_for_binlog
- || trx->isolation_level <= TRX_ISO_READ_COMMITTED
|| (unique_search && !rec_get_deleted_flag(rec, comp))
|| dict_index_is_spatial(index)) {
@@ -5096,6 +5131,7 @@ locks_ok:
page_rec_is_comp() cannot be used! */
if (rec_get_deleted_flag(rec, comp)) {
+locks_ok_del_marked:
/* In delete-marked records, DB_TRX_ID must
always refer to an existing undo log record. */
ut_ad(index != clust_index
@@ -5103,17 +5139,6 @@ locks_ok:
/* The record is delete-marked: we can skip it */
- if ((srv_locks_unsafe_for_binlog
- || trx->isolation_level <= TRX_ISO_READ_COMMITTED)
- && prebuilt->select_lock_type != LOCK_NONE
- && !did_semi_consistent_read) {
-
- /* No need to keep a lock on a delete-marked record
- if we do not want to use next-key locking. */
-
- row_unlock_for_mysql(prebuilt, TRUE);
- }
-
/* This is an optimization to skip setting the next key lock
on the record that follows this delete-marked record. This
optimization works because of the unique search criteria
diff --git a/storage/innobase/row/row0umod.cc b/storage/innobase/row/row0umod.cc
index 2269711ffef..b13b770f0bd 100644
--- a/storage/innobase/row/row0umod.cc
+++ b/storage/innobase/row/row0umod.cc
@@ -1178,7 +1178,16 @@ close_table:
if (!row_undo_search_clust_to_pcur(node)) {
/* As long as this rolling-back transaction exists,
the PRIMARY KEY value pointed to by the undo log
- record must exist. But, it is possible that the record
+ record should exist.
+
+ However, if InnoDB is killed during a rollback, or
+ shut down during the rollback of recovered
+ transactions, then after restart we may try to roll
+ back some of the same undo log records again, because
+ trx_roll_try_truncate() is not being invoked after
+ every undo log record.
+
+ It is also possible that the record
was not modified yet (the DB_ROLL_PTR does not match
node->roll_ptr) and thus there is nothing to roll back.
@@ -1186,8 +1195,11 @@ close_table:
record after successfully acquiring an exclusive lock
on the the clustered index record. That lock will not
be released before the transaction is committed or
- fully rolled back. */
- ut_ad(node->pcur.btr_cur.low_match == node->ref->n_fields);
+ fully rolled back. (Exception: if the server was
+ killed, restarted, and shut down again before the
+ rollback of the recovered transaction was completed,
+ it is possible that the transaction was partially
+ rolled back and locks released.) */
goto close_table;
}
diff --git a/storage/innobase/row/row0undo.cc b/storage/innobase/row/row0undo.cc
index 5a0cd0dc985..252dfb4a6a6 100644
--- a/storage/innobase/row/row0undo.cc
+++ b/storage/innobase/row/row0undo.cc
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -339,6 +340,13 @@ row_undo_step(
ut_ad(que_node_get_type(node) == QUE_NODE_UNDO);
+ if (UNIV_UNLIKELY(trx == trx_roll_crash_recv_trx)
+ && trx_roll_must_shutdown()) {
+ /* Shutdown has been initiated. */
+ trx->error_state = DB_INTERRUPTED;
+ return(NULL);
+ }
+
err = row_undo(node, thr);
trx->error_state = err;
diff --git a/storage/innobase/row/row0vers.cc b/storage/innobase/row/row0vers.cc
index c5d08e1ece6..23ba751ea67 100644
--- a/storage/innobase/row/row0vers.cc
+++ b/storage/innobase/row/row0vers.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation
+Copyright (c) 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -63,23 +63,25 @@ row_vers_non_vc_match(
const dtuple_t* ientry,
mem_heap_t* heap,
ulint* n_non_v_col);
-/*****************************************************************//**
-Finds out if an active transaction has inserted or modified a secondary
+/** Determine if an active transaction has inserted or modified a secondary
index record.
-@return 0 if committed, else the active transaction id;
-NOTE that this function can return false positives but never false
-negatives. The caller must confirm all positive results by calling
-trx_is_active() while holding lock_sys->mutex. */
+@param[in] clust_rec clustered index record
+@param[in] clust_index clustered index
+@param[in] rec secondary index record
+@param[in] index secondary index
+@param[in] offsets rec_get_offsets(rec, index)
+@param[in,out] mtr mini-transaction
+@return the active transaction; trx_release_reference() must be invoked
+@retval NULL if the record was committed */
UNIV_INLINE
trx_t*
row_vers_impl_x_locked_low(
-/*=======================*/
- const rec_t* clust_rec, /*!< in: clustered index record */
- dict_index_t* clust_index, /*!< in: the clustered index */
- const rec_t* rec, /*!< in: secondary index record */
- dict_index_t* index, /*!< in: the secondary index */
- const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */
- mtr_t* mtr) /*!< in/out: mini-transaction */
+ const rec_t* clust_rec,
+ dict_index_t* clust_index,
+ const rec_t* rec,
+ dict_index_t* index,
+ const ulint* offsets,
+ mtr_t* mtr)
{
trx_id_t trx_id;
ibool corrupt;
@@ -325,19 +327,18 @@ result_check:
DBUG_RETURN(trx);
}
-/*****************************************************************//**
-Finds out if an active transaction has inserted or modified a secondary
+/** Determine if an active transaction has inserted or modified a secondary
index record.
-@return 0 if committed, else the active transaction id;
-NOTE that this function can return false positives but never false
-negatives. The caller must confirm all positive results by calling
-trx_is_active() while holding lock_sys->mutex. */
+@param[in] rec secondary index record
+@param[in] index secondary index
+@param[in] offsets rec_get_offsets(rec, index)
+@return the active transaction; trx_release_reference() must be invoked
+@retval NULL if the record was committed */
trx_t*
row_vers_impl_x_locked(
-/*===================*/
- const rec_t* rec, /*!< in: record in a secondary index */
- dict_index_t* index, /*!< in: the secondary index */
- const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
+ const rec_t* rec,
+ dict_index_t* index,
+ const ulint* offsets)
{
mtr_t mtr;
trx_t* trx;
diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc
index b6d9fbe635d..19be088912a 100644
--- a/storage/innobase/srv/srv0srv.cc
+++ b/storage/innobase/srv/srv0srv.cc
@@ -1323,9 +1323,28 @@ srv_printf_innodb_monitor(
#ifdef BTR_CUR_HASH_ADAPT
for (ulint i = 0; i < btr_ahi_parts; ++i) {
- rw_lock_s_lock(btr_search_latches[i]);
- ha_print_info(file, btr_search_sys->hash_tables[i]);
- rw_lock_s_unlock(btr_search_latches[i]);
+ const hash_table_t* table = btr_search_sys->hash_tables[i];
+
+ ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
+ /* this is only used for buf_pool->page_hash */
+ ut_ad(!table->heaps);
+ /* this is used for the adaptive hash index */
+ ut_ad(table->heap);
+
+ const mem_heap_t* heap = table->heap;
+ /* The heap may change during the following call,
+ so the data displayed may be garbage. We intentionally
+ avoid acquiring btr_search_latches[] so that the
+ diagnostic output will not stop here even in case another
+ thread hangs while holding btr_search_latches[].
+
+ This should be safe from crashes, because
+ table->heap will be pointing to the same object
+ for the full lifetime of the server. Even during
+ btr_search_disable() the heap will stay valid. */
+ fprintf(file, "Hash table size " ULINTPF
+ ", node heap has " ULINTPF " buffer(s)\n",
+ table->n_cells, heap->base.count - !heap->free_block);
}
fprintf(file,
@@ -2853,6 +2872,9 @@ DECLARE_THREAD(srv_purge_coordinator_thread)(
purge_sys->running = false;
+ /* Ensure that the wait in trx_purge_stop() will terminate. */
+ os_event_set(purge_sys->event);
+
rw_lock_x_unlock(&purge_sys->latch);
#ifdef UNIV_DEBUG_THREAD_CREATION
diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc
index d776595568b..886d47e44c0 100644
--- a/storage/innobase/srv/srv0start.cc
+++ b/storage/innobase/srv/srv0start.cc
@@ -2633,8 +2633,6 @@ files_checked:
trx_temp_rseg_create();
}
- srv_is_being_started = false;
-
ut_a(trx_purge_state() == PURGE_STATE_INIT);
/* Create the master thread which does purge and other utility
@@ -2683,6 +2681,8 @@ files_checked:
purge_sys->state = PURGE_STATE_DISABLED;
}
+ srv_is_being_started = false;
+
if (!srv_read_only_mode) {
/* wake main loop of page cleaner up */
os_event_set(buf_flush_event);
diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc
index 61c36637a4e..4fd9333c0ba 100644
--- a/storage/innobase/trx/trx0purge.cc
+++ b/storage/innobase/trx/trx0purge.cc
@@ -1770,52 +1770,48 @@ void
trx_purge_stop(void)
/*================*/
{
- ut_a(srv_n_purge_threads > 0);
-
rw_lock_x_lock(&purge_sys->latch);
- const int64_t sig_count = os_event_reset(purge_sys->event);
- const purge_state_t state = purge_sys->state;
-
- ut_a(state == PURGE_STATE_RUN || state == PURGE_STATE_STOP);
-
- ++purge_sys->n_stop;
-
- if (state == PURGE_STATE_RUN) {
+ switch (purge_sys->state) {
+ case PURGE_STATE_INIT:
+ case PURGE_STATE_DISABLED:
+ ut_error;
+ case PURGE_STATE_EXIT:
+ /* Shutdown must have been initiated during
+ FLUSH TABLES FOR EXPORT. */
+ ut_ad(!srv_undo_sources);
+unlock:
+ rw_lock_x_unlock(&purge_sys->latch);
+ break;
+ case PURGE_STATE_STOP:
+ ut_ad(srv_n_purge_threads > 0);
+ ++purge_sys->n_stop;
+ purge_sys->state = PURGE_STATE_STOP;
+ if (!purge_sys->running) {
+ goto unlock;
+ }
+ ib::info() << "Waiting for purge to stop";
+ do {
+ rw_lock_x_unlock(&purge_sys->latch);
+ os_thread_sleep(10000);
+ rw_lock_x_lock(&purge_sys->latch);
+ } while (purge_sys->running);
+ goto unlock;
+ case PURGE_STATE_RUN:
+ ut_ad(srv_n_purge_threads > 0);
+ ++purge_sys->n_stop;
ib::info() << "Stopping purge";
/* We need to wakeup the purge thread in case it is suspended,
so that it can acknowledge the state change. */
+ const int64_t sig_count = os_event_reset(purge_sys->event);
+ purge_sys->state = PURGE_STATE_STOP;
srv_purge_wakeup();
- }
-
- purge_sys->state = PURGE_STATE_STOP;
-
- if (state != PURGE_STATE_STOP) {
rw_lock_x_unlock(&purge_sys->latch);
/* Wait for purge coordinator to signal that it
is suspended. */
os_event_wait_low(purge_sys->event, sig_count);
- } else {
- bool once = true;
-
- /* Wait for purge to signal that it has actually stopped. */
- while (purge_sys->running) {
-
- if (once) {
- ib::info() << "Waiting for purge to stop";
- once = false;
- }
-
- rw_lock_x_unlock(&purge_sys->latch);
-
- os_thread_sleep(10000);
-
- rw_lock_x_lock(&purge_sys->latch);
- }
-
- rw_lock_x_unlock(&purge_sys->latch);
}
MONITOR_INC_VALUE(MONITOR_PURGE_STOP_COUNT, 1);
@@ -1830,8 +1826,12 @@ trx_purge_run(void)
rw_lock_x_lock(&purge_sys->latch);
switch (purge_sys->state) {
- case PURGE_STATE_INIT:
case PURGE_STATE_EXIT:
+ /* Shutdown must have been initiated during
+ FLUSH TABLES FOR EXPORT. */
+ ut_ad(!srv_undo_sources);
+ break;
+ case PURGE_STATE_INIT:
case PURGE_STATE_DISABLED:
ut_error;
diff --git a/storage/innobase/trx/trx0roll.cc b/storage/innobase/trx/trx0roll.cc
index d6857b892da..c9c77acba11 100644
--- a/storage/innobase/trx/trx0roll.cc
+++ b/storage/innobase/trx/trx0roll.cc
@@ -24,8 +24,10 @@ Transaction rollback
Created 3/26/1996 Heikki Tuuri
*******************************************************/
-#include "ha_prototypes.h"
+#include "my_config.h"
+#include <my_systemd.h>
+#include "ha_prototypes.h"
#include "trx0roll.h"
#include <mysql/service_wsrep.h>
@@ -56,14 +58,7 @@ static const ulint TRX_ROLL_TRUNC_THRESHOLD = 1;
bool trx_rollback_or_clean_is_active;
/** In crash recovery, the current trx to be rolled back; NULL otherwise */
-static const trx_t* trx_roll_crash_recv_trx = NULL;
-
-/** In crash recovery we set this to the undo n:o of the current trx to be
-rolled back. Then we can print how many % the rollback has progressed. */
-static undo_no_t trx_roll_max_undo_no;
-
-/** Auxiliary variable which tells the previous progress % we printed */
-static ulint trx_roll_progress_printed_pct;
+const trx_t* trx_roll_crash_recv_trx;
/****************************************************************//**
Finishes a transaction rollback. */
@@ -631,8 +626,6 @@ trx_rollback_active(
que_thr_t* thr;
roll_node_t* roll_node;
dict_table_t* table;
- int64_t rows_to_undo;
- const char* unit = "";
ibool dictionary_locked = FALSE;
heap = mem_heap_create(512);
@@ -651,28 +644,8 @@ trx_rollback_active(
ut_a(thr == que_fork_start_command(fork));
- trx_sys_mutex_enter();
-
trx_roll_crash_recv_trx = trx;
- trx_roll_max_undo_no = trx->undo_no;
-
- trx_roll_progress_printed_pct = 0;
-
- rows_to_undo = trx_roll_max_undo_no;
-
- trx_sys_mutex_exit();
-
- if (rows_to_undo > 1000000000) {
- rows_to_undo = rows_to_undo / 1000000;
- unit = "M";
- }
-
- const trx_id_t trx_id = trx_get_id_for_print(trx);
-
- ib::info() << "Rolling back trx with id " << trx_id << ", "
- << rows_to_undo << unit << " rows to undo";
-
if (trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) {
row_mysql_lock_data_dictionary(trx);
dictionary_locked = TRUE;
@@ -683,6 +656,17 @@ trx_rollback_active(
que_run_threads(roll_node->undo_thr);
+ if (trx->error_state != DB_SUCCESS) {
+ ut_ad(trx->error_state == DB_INTERRUPTED);
+ ut_ad(!srv_is_being_started);
+ ut_ad(!srv_undo_sources);
+ ut_ad(srv_fast_shutdown);
+ ut_ad(!dictionary_locked);
+ que_graph_free(static_cast<que_t*>(
+ roll_node->undo_thr->common.parent));
+ goto func_exit;
+ }
+
trx_rollback_finish(thr_get_trx(roll_node->undo_thr));
/* Free the memory reserved by the undo graph */
@@ -714,11 +698,13 @@ trx_rollback_active(
}
}
+func_exit:
if (dictionary_locked) {
row_mysql_unlock_data_dictionary(trx);
}
- ib::info() << "Rollback of trx with id " << trx_id << " completed";
+ ib::info() << "Rollback of trx with id " << ib::hex(trx->id)
+ << " completed";
mem_heap_free(heap);
@@ -736,7 +722,7 @@ ibool
trx_rollback_resurrected(
/*=====================*/
trx_t* trx, /*!< in: transaction to rollback or clean */
- ibool all) /*!< in: FALSE=roll back dictionary transactions;
+ ibool* all) /*!< in/out: FALSE=roll back dictionary transactions;
TRUE=roll back all non-PREPARED transactions */
{
ut_ad(trx_sys_mutex_own());
@@ -747,40 +733,102 @@ trx_rollback_resurrected(
to accidentally clean up a non-recovered transaction here. */
trx_mutex_enter(trx);
- bool is_recovered = trx->is_recovered;
- trx_state_t state = trx->state;
- trx_mutex_exit(trx);
-
- if (!is_recovered) {
+ if (!trx->is_recovered) {
+func_exit:
+ trx_mutex_exit(trx);
return(FALSE);
}
- switch (state) {
+ switch (trx->state) {
case TRX_STATE_COMMITTED_IN_MEMORY:
+ trx_mutex_exit(trx);
trx_sys_mutex_exit();
- ib::info() << "Cleaning up trx with id "
- << trx_get_id_for_print(trx);
+ ib::info() << "Cleaning up trx with id " << ib::hex(trx->id);
trx_cleanup_at_db_startup(trx);
trx_free_resurrected(trx);
return(TRUE);
case TRX_STATE_ACTIVE:
- if (all || trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) {
+ if (!srv_is_being_started
+ && !srv_undo_sources && srv_fast_shutdown) {
+fake_prepared:
+ trx->state = TRX_STATE_PREPARED;
+ trx_sys->n_prepared_trx++;
+ trx_sys->n_prepared_recovered_trx++;
+ *all = FALSE;
+ goto func_exit;
+ }
+ trx_mutex_exit(trx);
+
+ if (*all || trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) {
trx_sys_mutex_exit();
trx_rollback_active(trx);
+ if (trx->error_state != DB_SUCCESS) {
+ ut_ad(trx->error_state == DB_INTERRUPTED);
+ trx->error_state = DB_SUCCESS;
+ ut_ad(!srv_undo_sources);
+ ut_ad(srv_fast_shutdown);
+ mutex_enter(&trx_sys->mutex);
+ trx_mutex_enter(trx);
+ goto fake_prepared;
+ }
trx_free_for_background(trx);
return(TRUE);
}
return(FALSE);
case TRX_STATE_PREPARED:
- return(FALSE);
+ goto func_exit;
case TRX_STATE_NOT_STARTED:
case TRX_STATE_FORCED_ROLLBACK:
break;
}
ut_error;
- return(FALSE);
+ goto func_exit;
+}
+
+/** Report progress when rolling back a row of a recovered transaction.
+@return whether the rollback should be aborted due to pending shutdown */
+bool
+trx_roll_must_shutdown()
+{
+ const trx_t* trx = trx_roll_crash_recv_trx;
+ ut_ad(trx);
+ ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
+ ut_ad(trx->in_rollback);
+
+ if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE
+ && !srv_is_being_started
+ && !srv_undo_sources && srv_fast_shutdown) {
+ return true;
+ }
+
+ ib_time_t time = ut_time();
+ mutex_enter(&trx_sys->mutex);
+ mutex_enter(&recv_sys->mutex);
+
+ if (recv_sys->report(time)) {
+ ulint n_trx = 0, n_rows = 0;
+ for (const trx_t* t = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
+ t != NULL;
+ t = UT_LIST_GET_NEXT(trx_list, t)) {
+
+ assert_trx_in_rw_list(t);
+ if (t->is_recovered
+ && trx_state_eq(t, TRX_STATE_ACTIVE)) {
+ n_trx++;
+ n_rows += t->undo_no;
+ }
+ }
+ ib::info() << "To roll back: " << n_trx << " transactions, "
+ << n_rows << " rows";
+ sd_notifyf(0, "STATUS=To roll back: " ULINTPF " transactions, "
+ ULINTPF " rows", n_trx, n_rows);
+ }
+
+ mutex_exit(&recv_sys->mutex);
+ mutex_exit(&trx_sys->mutex);
+ return false;
}
/*******************************************************************//**
@@ -825,17 +873,11 @@ trx_rollback_or_clean_recovered(
assert_trx_in_rw_list(trx);
- if (srv_shutdown_state != SRV_SHUTDOWN_NONE
- && srv_fast_shutdown != 0) {
- all = FALSE;
- break;
- }
-
/* If this function does a cleanup or rollback
then it will release the trx_sys->mutex, therefore
we need to reacquire it before retrying the loop. */
- if (trx_rollback_resurrected(trx, all)) {
+ if (trx_rollback_resurrected(trx, &all)) {
trx_sys_mutex_enter();
@@ -1042,27 +1084,6 @@ trx_roll_pop_top_rec_of_trx(trx_t* trx, roll_ptr_t* roll_ptr, mem_heap_t* heap)
ut_ad(trx_roll_check_undo_rec_ordering(
undo_no, undo->rseg->space, trx));
- /* We print rollback progress info if we are in a crash recovery
- and the transaction has at least 1000 row operations to undo. */
-
- if (trx == trx_roll_crash_recv_trx && trx_roll_max_undo_no > 1000) {
-
- ulint progress_pct = 100 - (ulint)
- ((undo_no * 100) / trx_roll_max_undo_no);
- if (progress_pct != trx_roll_progress_printed_pct) {
- if (trx_roll_progress_printed_pct == 0) {
- fprintf(stderr,
- "\nInnoDB: Progress in percents:"
- " %lu", (ulong) progress_pct);
- } else {
- fprintf(stderr,
- " %lu", (ulong) progress_pct);
- }
- fflush(stderr);
- trx_roll_progress_printed_pct = progress_pct;
- }
- }
-
trx->undo_no = undo_no;
trx->undo_rseg_space = undo->rseg->space;
mutex_exit(&trx->undo_mutex);
diff --git a/storage/innobase/trx/trx0undo.cc b/storage/innobase/trx/trx0undo.cc
index 5e6b67007ad..11df7011027 100644
--- a/storage/innobase/trx/trx0undo.cc
+++ b/storage/innobase/trx/trx0undo.cc
@@ -1826,10 +1826,14 @@ trx_undo_free_prepared(
/* fall through */
case TRX_UNDO_ACTIVE:
/* lock_trx_release_locks() assigns
- trx->is_recovered=false */
+ trx->is_recovered=false and
+ trx->state = TRX_STATE_COMMITTED_IN_MEMORY,
+ also for transactions that we faked
+ to TRX_STATE_PREPARED in trx_rollback_resurrected(). */
ut_a(!srv_was_started
|| srv_read_only_mode
- || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO);
+ || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO
+ || srv_fast_shutdown);
break;
default:
ut_error;
@@ -1854,10 +1858,14 @@ trx_undo_free_prepared(
/* fall through */
case TRX_UNDO_ACTIVE:
/* lock_trx_release_locks() assigns
- trx->is_recovered=false */
+ trx->is_recovered=false and
+ trx->state = TRX_STATE_COMMITTED_IN_MEMORY,
+ also for transactions that we faked
+ to TRX_STATE_PREPARED in trx_rollback_resurrected(). */
ut_a(!srv_was_started
|| srv_read_only_mode
- || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO);
+ || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO
+ || srv_fast_shutdown);
break;
default:
ut_error;
diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c
index bc30adad43a..e30ddb756a6 100644
--- a/storage/maria/ma_loghandler.c
+++ b/storage/maria/ma_loghandler.c
@@ -6342,7 +6342,6 @@ my_bool translog_write_record(LSN *lsn,
short_trid, &parts, trn, hook_arg);
break;
case LOGRECTYPE_NOT_ALLOWED:
- DBUG_ASSERT(0);
default:
DBUG_ASSERT(0);
rc= 1;
diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc
index e08329ab61e..8857b2e8cba 100644
--- a/storage/rocksdb/ha_rocksdb.cc
+++ b/storage/rocksdb/ha_rocksdb.cc
@@ -5957,13 +5957,37 @@ rdb_is_index_collation_supported(const my_core::Field *const field) {
const my_core::enum_field_types type = field->real_type();
/* Handle [VAR](CHAR|BINARY) or TEXT|BLOB */
if (type == MYSQL_TYPE_VARCHAR || type == MYSQL_TYPE_STRING ||
- type == MYSQL_TYPE_BLOB) {
- return RDB_INDEX_COLLATIONS.find(field->charset()->number) !=
- RDB_INDEX_COLLATIONS.end();
+ type == MYSQL_TYPE_BLOB) {
+
+ return (RDB_INDEX_COLLATIONS.find(field->charset()->number) !=
+ RDB_INDEX_COLLATIONS.end()) ||
+ rdb_is_collation_supported(field->charset());
}
return true;
}
+
+static bool
+rdb_field_uses_nopad_collation(const my_core::Field *const field) {
+ const my_core::enum_field_types type = field->real_type();
+ /* Handle [VAR](CHAR|BINARY) or TEXT|BLOB */
+ if (type == MYSQL_TYPE_VARCHAR || type == MYSQL_TYPE_STRING ||
+ type == MYSQL_TYPE_BLOB) {
+
+ /*
+ This is technically a NOPAD collation but it's a binary collation
+ that we can handle.
+ */
+ if (RDB_INDEX_COLLATIONS.find(field->charset()->number) !=
+ RDB_INDEX_COLLATIONS.end())
+ return false;
+
+ return (field->charset()->state & MY_CS_NOPAD);
+ }
+ return false;
+}
+
+
/*
Create structures needed for storing data in rocksdb. This is called when the
table is created. The structures will be shared by all TABLE* objects.
@@ -6072,8 +6096,7 @@ int ha_rocksdb::create_cfs(
for (uint i = 0; i < tbl_def_arg->m_key_count; i++) {
rocksdb::ColumnFamilyHandle *cf_handle;
- if (rocksdb_strict_collation_check &&
- !is_hidden_pk(i, table_arg, tbl_def_arg) &&
+ if (!is_hidden_pk(i, table_arg, tbl_def_arg) &&
tbl_def_arg->base_tablename().find(tmp_file_prefix) != 0) {
if (!tsys_set)
{
@@ -6085,21 +6108,28 @@ int ha_rocksdb::create_cfs(
for (uint part = 0; part < table_arg->key_info[i].ext_key_parts;
part++)
{
- if (!rdb_is_index_collation_supported(
+ /* MariaDB: disallow NOPAD collations */
+ if (rdb_field_uses_nopad_collation(
+ table_arg->key_info[i].key_part[part].field))
+ {
+ my_error(ER_MYROCKS_CANT_NOPAD_COLLATION, MYF(0));
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
+ if (rocksdb_strict_collation_check &&
+ !rdb_is_index_collation_supported(
table_arg->key_info[i].key_part[part].field) &&
!rdb_collation_exceptions->matches(tablename_sys)) {
- std::string collation_err;
- for (const auto &coll : RDB_INDEX_COLLATIONS) {
- if (collation_err != "") {
- collation_err += ", ";
- }
- collation_err += get_charset_name(coll);
- }
- my_error(ER_UNSUPPORTED_COLLATION, MYF(0),
- tbl_def_arg->full_tablename().c_str(),
- table_arg->key_info[i].key_part[part].field->field_name,
- collation_err.c_str());
- DBUG_RETURN(HA_EXIT_FAILURE);
+
+ char buf[1024];
+ my_snprintf(buf, sizeof(buf),
+ "Indexed column %s.%s uses a collation that does not "
+ "allow index-only access in secondary key and has "
+ "reduced disk space efficiency in primary key.",
+ tbl_def_arg->full_tablename().c_str(),
+ table_arg->key_info[i].key_part[part].field->field_name);
+
+ my_error(ER_INTERNAL_ERROR, MYF(ME_JUST_WARNING), buf);
}
}
}
@@ -12307,6 +12337,7 @@ void rocksdb_set_update_cf_options(THD *const /* unused */,
// Basic sanity checking and parsing the options into a map. If this fails
// then there's no point to proceed.
if (!Rdb_cf_options::parse_cf_options(val, &option_map)) {
+ my_free(*reinterpret_cast<char**>(var_ptr));
*reinterpret_cast<char**>(var_ptr) = nullptr;
// NO_LINT_DEBUG
@@ -12375,6 +12406,7 @@ void rocksdb_set_update_cf_options(THD *const /* unused */,
// the CF options. This will results in consistent behavior and avoids
// dealing with cases when only a subset of CF-s was successfully updated.
if (val) {
+ my_free(*reinterpret_cast<char**>(var_ptr));
*reinterpret_cast<char**>(var_ptr) = my_strdup(val, MYF(0));
} else {
*reinterpret_cast<char**>(var_ptr) = nullptr;
@@ -12469,6 +12501,7 @@ void print_keydup_error(TABLE *table, KEY *key, myf errflag,
its name generation.
*/
+
struct st_mysql_storage_engine rocksdb_storage_engine = {
MYSQL_HANDLERTON_INTERFACE_VERSION};
@@ -12485,7 +12518,7 @@ maria_declare_plugin(rocksdb_se){
myrocks::rocksdb_status_vars, /* status variables */
myrocks::rocksdb_system_variables, /* system variables */
"1.0", /* string version */
- MariaDB_PLUGIN_MATURITY_ALPHA /* maturity */
+ myrocks::MYROCKS_MARIADB_PLUGIN_MATURITY_LEVEL
},
myrocks::rdb_i_s_cfstats, myrocks::rdb_i_s_dbstats,
myrocks::rdb_i_s_perf_context, myrocks::rdb_i_s_perf_context_global,
diff --git a/storage/rocksdb/ha_rocksdb.h b/storage/rocksdb/ha_rocksdb.h
index d064988d0f3..82819bbf7b2 100644
--- a/storage/rocksdb/ha_rocksdb.h
+++ b/storage/rocksdb/ha_rocksdb.h
@@ -1411,4 +1411,7 @@ private:
Rdb_inplace_alter_ctx(const Rdb_inplace_alter_ctx &);
Rdb_inplace_alter_ctx &operator=(const Rdb_inplace_alter_ctx &);
};
+
+const int MYROCKS_MARIADB_PLUGIN_MATURITY_LEVEL= MariaDB_PLUGIN_MATURITY_ALPHA;
+
} // namespace myrocks
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace.result b/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace.result
index 01fa9f1d35b..6325dc97cf5 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace.result
@@ -279,8 +279,15 @@ DROP TABLE t1;
set @tmp_rocksdb_strict_collation_check= @@rocksdb_strict_collation_check;
set global rocksdb_strict_collation_check=1;
CREATE TABLE t1 (a INT, b TEXT);
+# MariaDB no longer gives ER_UNSUPPORTED_COLLATION
ALTER TABLE t1 ADD KEY kb(b(10));
-ERROR HY000: Unsupported collation on string indexed column test.t1.b Use binary collation (latin1_bin, binary, utf8_bin).
+ALTER TABLE t1 ADD PRIMARY KEY(a);
+DROP TABLE t1;
+CREATE TABLE t1 (a INT, b TEXT collate utf8_general_ci);
+# MariaDB no longer gives ER_UNSUPPORTED_COLLATION
+ALTER TABLE t1 ADD KEY kb(b(10));
+Warnings:
+Warning 1815 Internal error: Indexed column test.t1.b uses a collation that does not allow index-only access in secondary key and has reduced disk space efficiency in primary key.
ALTER TABLE t1 ADD PRIMARY KEY(a);
DROP TABLE t1;
set global rocksdb_strict_collation_check= @tmp_rocksdb_strict_collation_check;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/mariadb_port_fixes.result b/storage/rocksdb/mysql-test/rocksdb/r/mariadb_port_fixes.result
index 686b5637f7d..9952314cd2c 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/mariadb_port_fixes.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/mariadb_port_fixes.result
@@ -55,3 +55,29 @@ id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 range a a 32 NULL # Using where
drop table t1,t2;
set global rocksdb_strict_collation_check=@tmp_rscc;
+#
+# MDEV-14389: MyRocks and NOPAD collations
+#
+create table t1 (pk varchar(10) collate latin1_nopad_bin, primary key(pk)) engine=rocksdb;
+ERROR HY000: MyRocks doesn't currently support collations with "No pad" attribute.
+set global rocksdb_strict_collation_check=off;
+create table t1 (pk varchar(10) collate latin1_nopad_bin, primary key(pk)) engine=rocksdb;
+ERROR HY000: MyRocks doesn't currently support collations with "No pad" attribute.
+set global rocksdb_strict_collation_check=@tmp_rscc;
+#
+# MDEV-14679: RocksdB plugin fails to load with "Loading of unknown plugin ROCKSDB_CFSTATS
+#
+select plugin_name, plugin_maturity from information_schema.plugins where plugin_name like '%rocksdb%';
+plugin_name plugin_maturity
+ROCKSDB Alpha
+ROCKSDB_CFSTATS Alpha
+ROCKSDB_DBSTATS Alpha
+ROCKSDB_PERF_CONTEXT Alpha
+ROCKSDB_PERF_CONTEXT_GLOBAL Alpha
+ROCKSDB_CF_OPTIONS Alpha
+ROCKSDB_COMPACTION_STATS Alpha
+ROCKSDB_GLOBAL_INFO Alpha
+ROCKSDB_DDL Alpha
+ROCKSDB_INDEX_FILE_MAP Alpha
+ROCKSDB_LOCKS Alpha
+ROCKSDB_TRX Alpha
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace.test b/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace.test
index c1a91c2a5a2..876ef2c9965 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace.test
@@ -173,10 +173,17 @@ set @tmp_rocksdb_strict_collation_check= @@rocksdb_strict_collation_check;
set global rocksdb_strict_collation_check=1;
CREATE TABLE t1 (a INT, b TEXT);
---error ER_UNSUPPORTED_COLLATION
+--echo # MariaDB no longer gives ER_UNSUPPORTED_COLLATION
ALTER TABLE t1 ADD KEY kb(b(10));
ALTER TABLE t1 ADD PRIMARY KEY(a);
DROP TABLE t1;
+
+CREATE TABLE t1 (a INT, b TEXT collate utf8_general_ci);
+--echo # MariaDB no longer gives ER_UNSUPPORTED_COLLATION
+ALTER TABLE t1 ADD KEY kb(b(10));
+ALTER TABLE t1 ADD PRIMARY KEY(a);
+DROP TABLE t1;
+
set global rocksdb_strict_collation_check= @tmp_rocksdb_strict_collation_check;
# make sure race condition between connection close and alter on another
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/disabled.def b/storage/rocksdb/mysql-test/rocksdb/t/disabled.def
index a0810fe63ec..dc2e04b93d7 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/disabled.def
+++ b/storage/rocksdb/mysql-test/rocksdb/t/disabled.def
@@ -41,7 +41,7 @@ rocksdb_deadlock_stress_rr: stress test
##
persistent_cache: Upstream RocksDB bug https://github.com/facebook/mysql-5.6/issues/579
collation: Fails on gcc 4.8 and before, MDEV-12433
-col_opt_zerofill: MDEV-14165: not MyRocks -problem in ps-protocol, happens in upstream too
+col_opt_zerofill: MDEV-14729 (also MDEV-14165 which was fixed): problem in the client
##
@@ -68,19 +68,12 @@ lock_wait_timeout_stats: MDEV-13404
compact_deletes: MDEV-12663 : rocksdb.compact_deletes times out and causes other tests to fail
blind_delete_without_tx_api: MDEV-12286: rocksdb.blind_delete_without_tx_api test fails
-# Enabling these didn't seem to cause any trouble:
-# autoinc_vars_thread : MDEV-12474 Regularly fails on buildbot
-# unique_check : MDEV-12474 Regularly fails on buildbot
-# bloomfilter : MDEV-12474 Regularly fails on buildbot
-# unique_sec : Intermittent failures in BB
-
-
##
## Tests that fail for some other reason
##
+
information_schema : MariaRocks: requires GTIDs
mysqlbinlog_gtid_skip_empty_trans_rocksdb : MariaRocks: requires GTIDs
-#read_only_tx : MariaRocks: requires GTIDs
rpl_row_triggers : MariaRocks: requires GTIDs
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/mariadb_port_fixes.test b/storage/rocksdb/mysql-test/rocksdb/t/mariadb_port_fixes.test
index f003aaf2032..980f2e302b2 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/mariadb_port_fixes.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/mariadb_port_fixes.test
@@ -54,3 +54,22 @@ explain select a from t2 where a <'zzz';
drop table t1,t2;
set global rocksdb_strict_collation_check=@tmp_rscc;
+
+--echo #
+--echo # MDEV-14389: MyRocks and NOPAD collations
+--echo #
+
+--error ER_MYROCKS_CANT_NOPAD_COLLATION
+create table t1 (pk varchar(10) collate latin1_nopad_bin, primary key(pk)) engine=rocksdb;
+
+set global rocksdb_strict_collation_check=off;
+--error ER_MYROCKS_CANT_NOPAD_COLLATION
+create table t1 (pk varchar(10) collate latin1_nopad_bin, primary key(pk)) engine=rocksdb;
+
+set global rocksdb_strict_collation_check=@tmp_rscc;
+
+--echo #
+--echo # MDEV-14679: RocksdB plugin fails to load with "Loading of unknown plugin ROCKSDB_CFSTATS
+--echo #
+select plugin_name, plugin_maturity from information_schema.plugins where plugin_name like '%rocksdb%';
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/disabled.def b/storage/rocksdb/mysql-test/rocksdb_rpl/t/disabled.def
index 07a2738eee5..e46e89517a9 100644
--- a/storage/rocksdb/mysql-test/rocksdb_rpl/t/disabled.def
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/disabled.def
@@ -1,15 +1,21 @@
+##
+## Tests that require FB/MySQL specific features for which there are
+## no plans to port them to MariaDB
+##
+rpl_no_unique_check_on_lag : unique_check_lag_threshold is not available in MariaDB
+rpl_no_unique_check_on_lag_mts : unique_check_lag_threshold is not available in MariaDB
+consistent_snapshot_mixed_engines : Tests START TRANSACTION WITH CONSISTENT $ENGINE_NAME SNAPSHOT
-# rpl_rocksdb_2pc_crash_recover
+##
+## Tests that are disabled for other reasons
+##
-consistent_snapshot_mixed_engines : Didn't try with MariaDB, yet
multiclient_2pc : Didn't try with MariaDB, yet
rpl_crash_safe_wal_corrupt : Didn't try with MariaDB, yet
rpl_ddl_high_priority : Didn't try with MariaDB, yet
rpl_gtid_crash_safe : Didn't try with MariaDB, yet
rpl_gtid_crash_safe_wal_corrupt : Didn't try with MariaDB, yet
rpl_gtid_rocksdb_sys_header : Didn't try with MariaDB, yet
-rpl_no_unique_check_on_lag : Didn't try with MariaDB, yet
-rpl_no_unique_check_on_lag_mts : Didn't try with MariaDB, yet
rpl_rocksdb_snapshot : Didn't try with MariaDB, yet
rpl_rocksdb_snapshot_without_gtid : Didn't try with MariaDB, yet
rpl_rocksdb_stress_crash : Didn't try with MariaDB, yet
diff --git a/storage/rocksdb/mysql-test/storage_engine/disabled.def b/storage/rocksdb/mysql-test/storage_engine/disabled.def
index 0643b2052e2..930e1d82b87 100644
--- a/storage/rocksdb/mysql-test/storage_engine/disabled.def
+++ b/storage/rocksdb/mysql-test/storage_engine/disabled.def
@@ -14,6 +14,7 @@ lock_concurrent : MDEV-13148 - LOCK TABLE on RocksDB table fails with a bog
optimize_table : MDEV-13148 - LOCK TABLE on RocksDB table fails with a bogus error message
repair_table : MDEV-13148 - LOCK TABLE on RocksDB table fails with a bogus error message
select_high_prio : Not supported
+show_engine : SHOW ENGINE produces different number of lines depending on previous tests
show_table_status : MDEV-13152 - Indeterministic row number in SHOW TABLE STATUS on RocksDB table
tbl_opt_data_dir : Not supported
tbl_opt_index_dir : Not supported
diff --git a/storage/rocksdb/mysql-test/storage_engine/type_bit_indexes.rdiff b/storage/rocksdb/mysql-test/storage_engine/type_bit_indexes.rdiff
new file mode 100644
index 00000000000..e53a33b4fba
--- /dev/null
+++ b/storage/rocksdb/mysql-test/storage_engine/type_bit_indexes.rdiff
@@ -0,0 +1,20 @@
+--- suite/storage_engine/type_bit_indexes.result 2017-12-12 20:34:34.000000000 +0200
++++ suite/storage_engine/type_bit_indexes.reject 2017-12-12 20:35:24.539330056 +0200
+@@ -69,7 +69,7 @@
+ (1,0xFFFF,0xFFFFFFFF,0xFFFFFFFFFFFFFFFF);
+ EXPLAIN SELECT HEX(b+c) FROM t1 WHERE c > 1 OR HEX(b) < 0xFFFFFF;
+ id select_type table type possible_keys key key_len ref rows Extra
+-# # # # # b_c # # # #
++# # # # # NULL # # # #
+ SELECT HEX(b+c) FROM t1 WHERE c > 1 OR HEX(b) < 0xFFFFFF;
+ HEX(b+c)
+ 10
+@@ -98,7 +98,7 @@
+ (1,0xFFFF,0xFFFFFFFF,0xFFFFFFFFFFFFFFFF);
+ EXPLAIN SELECT DISTINCT a+0 FROM t1 ORDER BY a;
+ id select_type table type possible_keys key key_len ref rows Extra
+-# # # # # a # # # #
++# # # # # NULL # # # #
+ SELECT DISTINCT a+0 FROM t1 ORDER BY a;
+ a+0
+ 0
diff --git a/storage/rocksdb/mysql-test/storage_engine/type_enum_indexes.rdiff b/storage/rocksdb/mysql-test/storage_engine/type_enum_indexes.rdiff
new file mode 100644
index 00000000000..be83fb6e212
--- /dev/null
+++ b/storage/rocksdb/mysql-test/storage_engine/type_enum_indexes.rdiff
@@ -0,0 +1,11 @@
+--- suite/storage_engine/type_enum_indexes.result 2017-03-12 04:38:50.000000000 +0200
++++ suite/storage_engine/type_enum_indexes.reject 2017-12-12 20:36:47.455331726 +0200
+@@ -30,7 +30,7 @@
+ t1 0 a_b 2 b # # NULL NULL # #
+ EXPLAIN SELECT a FROM t1 WHERE b > 'test2' ORDER BY a;
+ id select_type table type possible_keys key key_len ref rows Extra
+-# # # # # a_b # # # #
++# # # # # NULL # # # #
+ SELECT a FROM t1 WHERE b > 'test2' ORDER BY a;
+ a
+ Africa
diff --git a/storage/rocksdb/mysql-test/storage_engine/type_set_indexes.rdiff b/storage/rocksdb/mysql-test/storage_engine/type_set_indexes.rdiff
new file mode 100644
index 00000000000..2703e81b745
--- /dev/null
+++ b/storage/rocksdb/mysql-test/storage_engine/type_set_indexes.rdiff
@@ -0,0 +1,20 @@
+--- suite/storage_engine/type_set_indexes.result 2017-03-12 04:38:50.000000000 +0200
++++ suite/storage_engine/type_set_indexes.reject 2017-12-12 20:37:16.187332305 +0200
+@@ -97,7 +97,7 @@
+ Warning 1265 Data truncated for column 'b' at row 7
+ EXPLAIN SELECT a FROM t1 WHERE FIND_IN_SET('Europe',a) > 0;
+ id select_type table type possible_keys key key_len ref rows Extra
+-# # # # # a # # # #
++# # # # # NULL # # # #
+ SELECT a FROM t1 WHERE FIND_IN_SET('Europe',a) > 0;
+ a
+ Africa,Europe,Asia
+@@ -124,7 +124,7 @@
+ Warning 1265 Data truncated for column 'b' at row 7
+ EXPLAIN SELECT DISTINCT a, b FROM t1 ORDER BY b DESC, a;
+ id select_type table type possible_keys key key_len ref rows Extra
+-# # # # # b_a # # # #
++# # # # # NULL # # # #
+ SELECT DISTINCT a, b FROM t1 ORDER BY b DESC, a;
+ a b
+ test1,test3
diff --git a/storage/rocksdb/rdb_datadic.cc b/storage/rocksdb/rdb_datadic.cc
index 3efd3ae6433..df743ff0c48 100644
--- a/storage/rocksdb/rdb_datadic.cc
+++ b/storage/rocksdb/rdb_datadic.cc
@@ -2921,7 +2921,7 @@ std::array<const Rdb_collation_codec *, MY_ALL_CHARSETS_SIZE>
rdb_collation_data;
mysql_mutex_t rdb_collation_data_mutex;
-static bool rdb_is_collation_supported(const my_core::CHARSET_INFO *const cs) {
+bool rdb_is_collation_supported(const my_core::CHARSET_INFO *const cs) {
return cs->strxfrm_multiply==1 && cs->mbmaxlen == 1 &&
!(cs->state & (MY_CS_BINSORT | MY_CS_NOPAD));
}
diff --git a/storage/rocksdb/rdb_datadic.h b/storage/rocksdb/rdb_datadic.h
index b1ecef045d1..5796132de39 100644
--- a/storage/rocksdb/rdb_datadic.h
+++ b/storage/rocksdb/rdb_datadic.h
@@ -1358,4 +1358,6 @@ struct Rdb_index_info {
uint64 m_ttl_duration = 0;
};
+bool rdb_is_collation_supported(const my_core::CHARSET_INFO *const cs);
+
} // namespace myrocks
diff --git a/storage/rocksdb/rdb_i_s.cc b/storage/rocksdb/rdb_i_s.cc
index 346a2994ec1..424a9e6c1f4 100644
--- a/storage/rocksdb/rdb_i_s.cc
+++ b/storage/rocksdb/rdb_i_s.cc
@@ -1484,7 +1484,7 @@ struct st_maria_plugin rdb_i_s_cfstats = {
nullptr, /* status variables */
nullptr, /* system variables */
nullptr, /* config options */
- 0, /* flags */
+ MYROCKS_MARIADB_PLUGIN_MATURITY_LEVEL
};
struct st_maria_plugin rdb_i_s_dbstats = {
@@ -1500,7 +1500,7 @@ struct st_maria_plugin rdb_i_s_dbstats = {
nullptr, /* status variables */
nullptr, /* system variables */
nullptr, /* config options */
- 0, /* flags */
+ MYROCKS_MARIADB_PLUGIN_MATURITY_LEVEL
};
struct st_maria_plugin rdb_i_s_perf_context = {
@@ -1516,7 +1516,7 @@ struct st_maria_plugin rdb_i_s_perf_context = {
nullptr, /* status variables */
nullptr, /* system variables */
nullptr, /* config options */
- 0, /* flags */
+ MYROCKS_MARIADB_PLUGIN_MATURITY_LEVEL
};
struct st_maria_plugin rdb_i_s_perf_context_global = {
@@ -1532,7 +1532,7 @@ struct st_maria_plugin rdb_i_s_perf_context_global = {
nullptr, /* status variables */
nullptr, /* system variables */
nullptr, /* config options */
- 0, /* flags */
+ MYROCKS_MARIADB_PLUGIN_MATURITY_LEVEL
};
struct st_maria_plugin rdb_i_s_cfoptions = {
@@ -1548,7 +1548,7 @@ struct st_maria_plugin rdb_i_s_cfoptions = {
nullptr, /* status variables */
nullptr, /* system variables */
nullptr, /* config options */
- 0, /* flags */
+ MYROCKS_MARIADB_PLUGIN_MATURITY_LEVEL
};
struct st_maria_plugin rdb_i_s_global_info = {
@@ -1564,7 +1564,7 @@ struct st_maria_plugin rdb_i_s_global_info = {
nullptr, /* status variables */
nullptr, /* system variables */
nullptr, /* config options */
- 0, /* flags */
+ MYROCKS_MARIADB_PLUGIN_MATURITY_LEVEL
};
struct st_maria_plugin rdb_i_s_compact_stats = {
@@ -1580,7 +1580,7 @@ struct st_maria_plugin rdb_i_s_compact_stats = {
nullptr, /* status variables */
nullptr, /* system variables */
nullptr, /* config options */
- 0, /* flags */
+ MYROCKS_MARIADB_PLUGIN_MATURITY_LEVEL
};
struct st_maria_plugin rdb_i_s_ddl = {
@@ -1596,7 +1596,7 @@ struct st_maria_plugin rdb_i_s_ddl = {
nullptr, /* status variables */
nullptr, /* system variables */
nullptr, /* config options */
- 0, /* flags */
+ MYROCKS_MARIADB_PLUGIN_MATURITY_LEVEL
};
struct st_maria_plugin rdb_i_s_index_file_map = {
@@ -1612,7 +1612,7 @@ struct st_maria_plugin rdb_i_s_index_file_map = {
nullptr, /* status variables */
nullptr, /* system variables */
nullptr, /* config options */
- 0, /* flags */
+ MYROCKS_MARIADB_PLUGIN_MATURITY_LEVEL
};
struct st_maria_plugin rdb_i_s_lock_info = {
@@ -1628,7 +1628,7 @@ struct st_maria_plugin rdb_i_s_lock_info = {
nullptr, /* status variables */
nullptr, /* system variables */
nullptr, /* config options */
- 0, /* flags */
+ MYROCKS_MARIADB_PLUGIN_MATURITY_LEVEL
};
struct st_maria_plugin rdb_i_s_trx_info = {
@@ -1644,6 +1644,6 @@ struct st_maria_plugin rdb_i_s_trx_info = {
nullptr, /* status variables */
nullptr, /* system variables */
nullptr, /* config options */
- 0, /* flags */
+ MYROCKS_MARIADB_PLUGIN_MATURITY_LEVEL
};
} // namespace myrocks
diff --git a/storage/tokudb/CMakeLists.txt b/storage/tokudb/CMakeLists.txt
index 566a0856925..7490f4e620b 100644
--- a/storage/tokudb/CMakeLists.txt
+++ b/storage/tokudb/CMakeLists.txt
@@ -4,13 +4,17 @@ IF(CMAKE_VERSION VERSION_LESS "2.8.9")
MESSAGE(STATUS "CMake 2.8.9 or higher is required by TokuDB")
ELSEIF(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" OR
CMAKE_SYSTEM_PROCESSOR STREQUAL "amd64")
-# tokudb requires F_NOCACHE, O_DIRECT, and designated initializers
+# tokudb requires F_NOCACHE or O_DIRECT, and designated initializers
CHECK_CXX_SOURCE_COMPILES(
"
#include <fcntl.h>
struct a {int b; int c; };
struct a d = { .b=1, .c=2 };
-int main() { return F_NOCACHE + O_DIRECT; }
+#if defined(O_DIRECT) || defined(F_NOCACHE)
+int main() { return 0; }
+#else
+#error
+#endif
" TOKUDB_OK)
ENDIF()
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_row_log.result b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_row_log.result
index 73c010c6eb7..ab33725fa3f 100644
--- a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_row_log.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_row_log.result
@@ -226,7 +226,6 @@ master-bin.000001 # Xid # # COMMIT /* XID */
master-bin.000001 # Rotate # # master-bin.000002;pos=POS
include/show_binlog_events.inc
Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000002 # Binlog_checkpoint # # master-bin.000002
master-bin.000002 # Gtid # # GTID #-#-#
master-bin.000002 # Query # # use `test`; create table t3 (a int)ENGINE=TokuDB
master-bin.000002 # Gtid # # GTID #-#-#
@@ -268,7 +267,6 @@ slave-bin.000001 # Query # # use `test`; create table t3 (a int)ENGINE=TokuDB
slave-bin.000001 # Rotate # # slave-bin.000002;pos=POS
include/show_binlog_events.inc
Log_name Pos Event_type Server_id End_log_pos Info
-slave-bin.000002 # Binlog_checkpoint # # slave-bin.000002
slave-bin.000002 # Gtid # # GTID #-#-#
slave-bin.000002 # Query # # use `test`; create table t2 (n int)ENGINE=TokuDB
slave-bin.000002 # Gtid # # BEGIN GTID #-#-#
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_stm_log.result b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_stm_log.result
index 83335b0237c..652ef18c039 100644
--- a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_stm_log.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_stm_log.result
@@ -222,7 +222,6 @@ master-bin.000001 # Xid # # COMMIT /* XID */
master-bin.000001 # Rotate # # master-bin.000002;pos=POS
include/show_binlog_events.inc
Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000002 # Binlog_checkpoint # # master-bin.000002
master-bin.000002 # Gtid # # GTID #-#-#
master-bin.000002 # Query # # use `test`; create table t3 (a int)ENGINE=TokuDB
master-bin.000002 # Gtid # # GTID #-#-#
@@ -260,7 +259,6 @@ slave-bin.000001 # Query # # use `test`; create table t3 (a int)ENGINE=TokuDB
slave-bin.000001 # Rotate # # slave-bin.000002;pos=POS
include/show_binlog_events.inc
Log_name Pos Event_type Server_id End_log_pos Info
-slave-bin.000002 # Binlog_checkpoint # # slave-bin.000002
slave-bin.000002 # Gtid # # GTID #-#-#
slave-bin.000002 # Query # # use `test`; create table t2 (n int)ENGINE=TokuDB
slave-bin.000002 # Gtid # # BEGIN GTID #-#-#
diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc
index 4c3d6a1c11c..68813635b6a 100644
--- a/storage/xtradb/handler/ha_innodb.cc
+++ b/storage/xtradb/handler/ha_innodb.cc
@@ -3903,6 +3903,17 @@ innobase_init(
}
}
+#ifdef WITH_WSREP
+ /* Currently, Galera does not support VATS lock schedule algorithm. */
+ if (innodb_lock_schedule_algorithm == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS
+ && global_system_variables.wsrep_on) {
+ /* Do not allow InnoDB startup with VATS and Galera */
+ sql_print_error("In Galera, innodb_lock_schedule_algorithm=vats"
+ " is not supported.");
+ goto error;
+ }
+#endif /* WITH_WSREP */
+
#ifndef HAVE_LZ4
if (innodb_compression_algorithm == PAGE_LZ4_ALGORITHM) {
sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
@@ -5454,8 +5465,8 @@ innobase_kill_connection(
wsrep_thd_is_BF(current_thd, FALSE),
lock_get_info(trx->lock.wait_lock).c_str());
- if (!wsrep_thd_is_BF(trx->mysql_thd, FALSE) &&
- trx->abort_type == TRX_SERVER_ABORT) {
+ if (!wsrep_thd_is_BF(trx->mysql_thd, FALSE)
+ && trx->abort_type == TRX_SERVER_ABORT) {
ut_ad(!lock_mutex_own());
lock_mutex_enter();
}
@@ -8833,7 +8844,7 @@ no_commit:
table->next_number_field);
/* Get the value that MySQL attempted to store in the table.*/
- auto_inc = table->next_number_field->val_int();
+ auto_inc = table->next_number_field->val_uint();
switch (error) {
case DB_DUPLICATE_KEY:
@@ -9425,7 +9436,7 @@ ha_innobase::update_row(
ulonglong auto_inc;
ulonglong col_max_value;
- auto_inc = table->next_number_field->val_int();
+ auto_inc = table->next_number_field->val_uint();
/* We need the upper limit of the col type to check for
whether we update the table autoinc counter or not. */
@@ -20463,7 +20474,7 @@ static MYSQL_SYSVAR_ENUM(empty_free_list_algorithm,
&innodb_empty_free_list_algorithm_typelib);
static MYSQL_SYSVAR_ENUM(lock_schedule_algorithm, innodb_lock_schedule_algorithm,
- PLUGIN_VAR_RQCMDARG,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"The algorithm Innodb uses for deciding which locks to grant next when"
" a lock is released. Possible values are"
" FCFS"
diff --git a/storage/xtradb/include/que0que.h b/storage/xtradb/include/que0que.h
index e5b2a1ba3fc..005f28d2af1 100644
--- a/storage/xtradb/include/que0que.h
+++ b/storage/xtradb/include/que0que.h
@@ -385,9 +385,6 @@ struct que_thr_t{
thrs; /*!< list of thread nodes of the fork
node */
UT_LIST_NODE_T(que_thr_t)
- trx_thrs; /*!< lists of threads in wait list of
- the trx */
- UT_LIST_NODE_T(que_thr_t)
queue; /*!< list of runnable thread nodes in
the server task queue */
ulint fk_cascade_depth; /*!< maximum cascading call depth
diff --git a/storage/xtradb/include/trx0roll.h b/storage/xtradb/include/trx0roll.h
index b2e9d8a077f..565079b17b4 100644
--- a/storage/xtradb/include/trx0roll.h
+++ b/storage/xtradb/include/trx0roll.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -33,7 +34,8 @@ Created 3/26/1996 Heikki Tuuri
#include "mtr0mtr.h"
#include "trx0sys.h"
-extern bool trx_rollback_or_clean_is_active;
+extern bool trx_rollback_or_clean_is_active;
+extern const trx_t* trx_roll_crash_recv_trx;
/*******************************************************************//**
Determines if this transaction is rolling back an incomplete transaction
@@ -104,6 +106,11 @@ trx_undo_rec_release(
/*=================*/
trx_t* trx, /*!< in/out: transaction */
undo_no_t undo_no);/*!< in: undo number */
+/** Report progress when rolling back a row of a recovered transaction.
+@return whether the rollback should be aborted due to pending shutdown */
+UNIV_INTERN
+bool
+trx_roll_must_shutdown();
/*******************************************************************//**
Rollback or clean up any incomplete transactions which were
encountered in crash recovery. If the transaction already was
diff --git a/storage/xtradb/lock/lock0lock.cc b/storage/xtradb/lock/lock0lock.cc
index ddaeff69f10..20e3f5adeb7 100644
--- a/storage/xtradb/lock/lock0lock.cc
+++ b/storage/xtradb/lock/lock0lock.cc
@@ -937,14 +937,21 @@ lock_reset_lock_and_trx_wait(
ib_logf(IB_LOG_LEVEL_INFO,
"Trx id " TRX_ID_FMT
- " is waiting a lock in statement %s"
+ " is waiting a lock "
" for this trx id " TRX_ID_FMT
- " and statement %s wait_lock %p",
+ " wait_lock %p",
lock->trx->id,
- stmt ? stmt : "NULL",
trx_id,
- stmt2 ? stmt2 : "NULL",
lock->trx->lock.wait_lock);
+
+ if (stmt) {
+ ib_logf(IB_LOG_LEVEL_INFO, " SQL1: %s\n", stmt);
+ }
+
+ if (stmt2) {
+ ib_logf(IB_LOG_LEVEL_INFO, " SQL2: %s\n", stmt2);
+ }
+
ut_ad(lock->trx->lock.wait_lock == lock);
}
@@ -1162,7 +1169,7 @@ lock_rec_has_to_wait(
type_mode, lock_is_on_supremum);
fprintf(stderr,
"conflicts states: my %d locked %d\n",
- wsrep_thd_conflict_state(trx->mysql_thd, FALSE),
+ wsrep_thd_conflict_state(trx->mysql_thd, FALSE),
wsrep_thd_conflict_state(lock2->trx->mysql_thd, FALSE) );
lock_rec_print(stderr, lock2);
if (for_locking) return FALSE;
@@ -1714,7 +1721,7 @@ lock_rec_other_has_expl_req(
ulint heap_no,/*!< in: heap number of the record */
trx_id_t trx_id) /*!< in: transaction */
{
- const lock_t* lock;
+ lock_t* lock;
ut_ad(lock_mutex_own());
ut_ad(mode == LOCK_X || mode == LOCK_S);
@@ -1723,7 +1730,7 @@ lock_rec_other_has_expl_req(
for (lock = lock_rec_get_first(block, heap_no);
lock != NULL;
- lock = lock_rec_get_next_const(heap_no, lock)) {
+ lock = lock_rec_get_next(heap_no, lock)) {
if (lock->trx->id != trx_id
&& (gap
@@ -1810,7 +1817,7 @@ Checks if some other transaction has a conflicting explicit lock request
in the queue, so that we have to wait.
@return lock or NULL */
static
-const lock_t*
+lock_t*
lock_rec_other_has_conflicting(
/*===========================*/
enum lock_mode mode, /*!< in: LOCK_S or LOCK_X,
@@ -1822,7 +1829,7 @@ lock_rec_other_has_conflicting(
ulint heap_no,/*!< in: heap number of the record */
const trx_t* trx) /*!< in: our transaction */
{
- const lock_t* lock;
+ lock_t* lock;
ibool is_supremum;
ut_ad(lock_mutex_own());
@@ -1831,13 +1838,16 @@ lock_rec_other_has_conflicting(
for (lock = lock_rec_get_first(block, heap_no);
lock != NULL;
- lock = lock_rec_get_next_const(heap_no, lock)) {
+ lock = lock_rec_get_next(heap_no, lock)) {
#ifdef WITH_WSREP
if (lock_rec_has_to_wait(TRUE, trx, mode, lock, is_supremum)) {
if (wsrep_on_trx(trx)) {
trx_mutex_enter(lock->trx);
- wsrep_kill_victim(trx, lock);
+ /* Below function will roll back either trx
+ or lock->trx depending on priority of the
+ transaction. */
+ wsrep_kill_victim(const_cast<trx_t*>(trx), lock);
trx_mutex_exit(lock->trx);
}
#else
@@ -2045,15 +2055,17 @@ wsrep_print_wait_locks(
{
if (wsrep_debug && c_lock->trx->lock.wait_lock != c_lock) {
fprintf(stderr, "WSREP: c_lock != wait lock\n");
- if (lock_get_type_low(c_lock) & LOCK_TABLE)
+ if (lock_get_type_low(c_lock) & LOCK_TABLE) {
lock_table_print(stderr, c_lock);
- else
+ } else {
lock_rec_print(stderr, c_lock);
+ }
- if (lock_get_type_low(c_lock->trx->lock.wait_lock) & LOCK_TABLE)
+ if (lock_get_type_low(c_lock->trx->lock.wait_lock) & LOCK_TABLE) {
lock_table_print(stderr, c_lock->trx->lock.wait_lock);
- else
+ } else {
lock_rec_print(stderr, c_lock->trx->lock.wait_lock);
+ }
}
}
#endif /* WITH_WSREP */
@@ -2358,8 +2370,8 @@ lock_rec_create(
if (wsrep_debug) {
fprintf(
stderr,
- "WSREP: c_lock canceled %llu\n",
- (ulonglong) c_lock->trx->id);
+ "WSREP: c_lock canceled " TRX_ID_FMT "\n",
+ c_lock->trx->id);
}
/* have to bail out here to avoid lock_set_lock... */
@@ -2551,6 +2563,16 @@ lock_rec_enqueue_waiting(
err = DB_LOCK_WAIT;
}
+#ifdef WITH_WSREP
+ if (!lock_get_wait(lock) && wsrep_thd_is_BF(trx->mysql_thd, FALSE)) {
+ if (wsrep_debug) {
+ fprintf(stderr, "WSREP: BF thread got lock granted early, ID " TRX_ID_FMT
+ "\n",
+ lock->trx->id);
+ }
+ return(DB_SUCCESS);
+ }
+#endif /* WITH_WSREP */
// Move it only when it does not cause a deadlock.
if (err != DB_DEADLOCK
&& innodb_lock_schedule_algorithm
@@ -2981,6 +3003,15 @@ lock_rec_has_to_wait_in_queue(
#ifdef WITH_WSREP
if (wsrep_thd_is_BF(wait_lock->trx->mysql_thd, FALSE) &&
wsrep_thd_is_BF(lock->trx->mysql_thd, TRUE)) {
+ if (wsrep_debug) {
+ fprintf(stderr,
+ "BF-BF lock conflict " TRX_ID_FMT
+ " : " TRX_ID_FMT "\n",
+ wait_lock->trx->id,
+ lock->trx->id);
+ lock_rec_print(stderr, wait_lock);
+ lock_rec_print(stderr, lock);
+ }
/* don't wait for another BF lock */
continue;
}
@@ -3139,7 +3170,7 @@ lock_grant_and_move_on_page(
&& !lock_rec_has_to_wait_in_queue(lock)) {
lock_grant(lock, false);
-
+
if (previous != NULL) {
/* Move the lock to the head of the list. */
HASH_GET_NEXT(hash, previous) = HASH_GET_NEXT(hash, lock);
@@ -5017,8 +5048,8 @@ lock_table_create(
}
if (wsrep_debug) {
- fprintf(stderr, "WSREP: c_lock canceled %llu\n",
- (ulonglong) c_lock->trx->id);
+ fprintf(stderr, "WSREP: c_lock canceled " TRX_ID_FMT "\n",
+ c_lock->trx->id);
}
}
if (c_lock) {
@@ -5297,7 +5328,7 @@ Checks if other transactions have an incompatible mode lock request in
the lock queue.
@return lock or NULL */
UNIV_INLINE
-const lock_t*
+lock_t*
lock_table_other_has_incompatible(
/*==============================*/
const trx_t* trx, /*!< in: transaction, or NULL if all
@@ -5308,7 +5339,7 @@ lock_table_other_has_incompatible(
const dict_table_t* table, /*!< in: table */
enum lock_mode mode) /*!< in: lock mode */
{
- const lock_t* lock;
+ lock_t* lock;
ut_ad(lock_mutex_own());
@@ -5361,7 +5392,7 @@ lock_table(
#endif
trx_t* trx;
dberr_t err;
- const lock_t* wait_for;
+ lock_t* wait_for;
ut_ad(table != NULL);
ut_ad(thr != NULL);
@@ -5412,13 +5443,13 @@ lock_table(
if (wait_for != NULL) {
#ifdef WITH_WSREP
- err = lock_table_enqueue_waiting((ib_lock_t*)wait_for, mode | flags, table, thr);
+ err = lock_table_enqueue_waiting(wait_for, mode | flags, table, thr);
#else
err = lock_table_enqueue_waiting(mode | flags, table, thr);
#endif
} else {
#ifdef WITH_WSREP
- lock_table_create(c_lock, table, mode | flags, trx);
+ lock_table_create(c_lock, table, mode | flags, trx);
#else
lock_table_create(table, mode | flags, trx);
#endif
@@ -7101,10 +7132,10 @@ lock_rec_insert_check_and_lock(
on the successor, which produced an unnecessary deadlock. */
#ifdef WITH_WSREP
- if ((c_lock = (ib_lock_t*)lock_rec_other_has_conflicting(
- static_cast<enum lock_mode>(
- LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION),
- block, next_rec_heap_no, trx))) {
+ if ((c_lock = lock_rec_other_has_conflicting(
+ static_cast<enum lock_mode>(
+ LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION),
+ block, next_rec_heap_no, trx))) {
#else
if (lock_rec_other_has_conflicting(
static_cast<enum lock_mode>(
@@ -7117,7 +7148,7 @@ lock_rec_insert_check_and_lock(
#ifdef WITH_WSREP
err = lock_rec_enqueue_waiting(c_lock,
- LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION,
+ LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION,
block, next_rec_heap_no, index, thr);
#else
err = lock_rec_enqueue_waiting(
diff --git a/storage/xtradb/lock/lock0wait.cc b/storage/xtradb/lock/lock0wait.cc
index ca9d05a4829..a0f557e18e5 100644
--- a/storage/xtradb/lock/lock0wait.cc
+++ b/storage/xtradb/lock/lock0wait.cc
@@ -191,22 +191,25 @@ lock_wait_table_reserve_slot(
/*********************************************************************//**
check if lock timeout was for priority thread,
as a side effect trigger lock monitor
+@param[in] trx transaction owning the lock
+@param[in] locked true if trx and lock_sys_mutex is ownd
@return false for regular lock timeout */
-static ibool
+static
+bool
wsrep_is_BF_lock_timeout(
-/*====================*/
- trx_t* trx) /* in: trx to check for lock priority */
+ const trx_t* trx,
+ bool locked = true)
{
- if (wsrep_on_trx(trx) &&
- wsrep_thd_is_BF(trx->mysql_thd, FALSE)) {
- fprintf(stderr, "WSREP: BF lock wait long\n");
- srv_print_innodb_monitor = TRUE;
- srv_print_innodb_lock_monitor = TRUE;
- os_event_set(srv_monitor_event);
- return TRUE;
- }
- return FALSE;
- }
+ if (wsrep_on_trx(trx)
+ && wsrep_thd_is_BF(trx->mysql_thd, FALSE)) {
+ fprintf(stderr, "WSREP: BF lock wait long for trx " TRX_ID_FMT "\n", trx->id);
+ srv_print_innodb_monitor = TRUE;
+ srv_print_innodb_lock_monitor = TRUE;
+ os_event_set(srv_monitor_event);
+ return true;
+ }
+ return false;
+}
#endif /* WITH_WSREP */
/***************************************************************//**
@@ -402,15 +405,15 @@ lock_wait_suspend_thread(
if (lock_wait_timeout < 100000000
&& wait_time > (double) lock_wait_timeout) {
#ifdef WITH_WSREP
- if (!wsrep_on_trx(trx) ||
- (!wsrep_is_BF_lock_timeout(trx) &&
- trx->error_state != DB_DEADLOCK)) {
+ if (!wsrep_on_trx(trx) ||
+ (!wsrep_is_BF_lock_timeout(trx) &&
+ trx->error_state != DB_DEADLOCK)) {
#endif /* WITH_WSREP */
- trx->error_state = DB_LOCK_WAIT_TIMEOUT;
+ trx->error_state = DB_LOCK_WAIT_TIMEOUT;
#ifdef WITH_WSREP
- }
+ }
#endif /* WITH_WSREP */
MONITOR_INC(MONITOR_TIMEOUT);
}
diff --git a/storage/xtradb/row/row0mysql.cc b/storage/xtradb/row/row0mysql.cc
index 3f79c3af6c8..43f4d4bcdd3 100644
--- a/storage/xtradb/row/row0mysql.cc
+++ b/storage/xtradb/row/row0mysql.cc
@@ -74,7 +74,7 @@ UNIV_INTERN ibool row_rollback_on_timeout = FALSE;
/** Chain node of the list of tables to drop in the background. */
struct row_mysql_drop_t{
- char* table_name; /*!< table name */
+ table_id_t table_id; /*!< table id */
UT_LIST_NODE_T(row_mysql_drop_t)row_mysql_drop_list;
/*!< list chain node */
};
@@ -137,19 +137,6 @@ row_mysql_is_system_table(
|| 0 == strcmp(name + 6, "db"));
}
-/*********************************************************************//**
-If a table is not yet in the drop list, adds the table to the list of tables
-which the master thread drops in background. We need this on Unix because in
-ALTER TABLE MySQL may call drop table even if the table has running queries on
-it. Also, if there are running foreign key checks on the table, we drop the
-table lazily.
-@return TRUE if the table was not yet in the drop list, and was added there */
-static
-ibool
-row_add_table_to_background_drop_list(
-/*==================================*/
- const char* name); /*!< in: table name */
-
/*******************************************************************//**
Delays an INSERT, DELETE or UPDATE operation if the purge is lagging. */
static
@@ -2787,7 +2774,7 @@ loop:
mutex_enter(&row_drop_list_mutex);
ut_a(row_mysql_drop_list_inited);
-
+next:
drop = UT_LIST_GET_FIRST(row_mysql_drop_list);
n_tables = UT_LIST_GET_LEN(row_mysql_drop_list);
@@ -2800,62 +2787,39 @@ loop:
return(n_tables + n_tables_dropped);
}
- DBUG_EXECUTE_IF("row_drop_tables_in_background_sleep",
- os_thread_sleep(5000000);
- );
-
- table = dict_table_open_on_name(drop->table_name, FALSE, FALSE,
- DICT_ERR_IGNORE_NONE);
-
- if (table == NULL) {
- /* If for some reason the table has already been dropped
- through some other mechanism, do not try to drop it */
+ table = dict_table_open_on_id(drop->table_id, FALSE,
+ DICT_TABLE_OP_NORMAL);
- goto already_dropped;
+ if (!table) {
+ n_tables_dropped++;
+ mutex_enter(&row_drop_list_mutex);
+ UT_LIST_REMOVE(row_mysql_drop_list, row_mysql_drop_list, drop);
+ MONITOR_DEC(MONITOR_BACKGROUND_DROP_TABLE);
+ ut_free(drop);
+ goto next;
}
+ ut_a(!table->can_be_evicted);
+
if (!table->to_be_dropped) {
- /* There is a scenario: the old table is dropped
- just after it's added into drop list, and new
- table with the same name is created, then we try
- to drop the new table in background. */
dict_table_close(table, FALSE, FALSE);
- goto already_dropped;
+ mutex_enter(&row_drop_list_mutex);
+ UT_LIST_REMOVE(row_mysql_drop_list, row_mysql_drop_list, drop);
+ UT_LIST_ADD_LAST(row_mysql_drop_list, row_mysql_drop_list,
+ drop);
+ goto next;
}
- ut_a(!table->can_be_evicted);
-
dict_table_close(table, FALSE, FALSE);
if (DB_SUCCESS != row_drop_table_for_mysql_in_background(
- drop->table_name)) {
+ table->name)) {
/* If the DROP fails for some table, we return, and let the
main thread retry later */
-
return(n_tables + n_tables_dropped);
}
- n_tables_dropped++;
-
-already_dropped:
- mutex_enter(&row_drop_list_mutex);
-
- UT_LIST_REMOVE(row_mysql_drop_list, row_mysql_drop_list, drop);
-
- MONITOR_DEC(MONITOR_BACKGROUND_DROP_TABLE);
-
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Dropped table ", stderr);
- ut_print_name(stderr, NULL, TRUE, drop->table_name);
- fputs(" in background drop queue.\n", stderr);
-
- mem_free(drop->table_name);
-
- mem_free(drop);
-
- mutex_exit(&row_drop_list_mutex);
-
goto loop;
}
@@ -2887,14 +2851,13 @@ which the master thread drops in background. We need this on Unix because in
ALTER TABLE MySQL may call drop table even if the table has running queries on
it. Also, if there are running foreign key checks on the table, we drop the
table lazily.
-@return TRUE if the table was not yet in the drop list, and was added there */
+@return whether background DROP TABLE was scheduled for the first time */
static
-ibool
-row_add_table_to_background_drop_list(
-/*==================================*/
- const char* name) /*!< in: table name */
+bool
+row_add_table_to_background_drop_list(table_id_t table_id)
{
row_mysql_drop_t* drop;
+ bool added = true;
mutex_enter(&row_drop_list_mutex);
@@ -2905,31 +2868,21 @@ row_add_table_to_background_drop_list(
drop != NULL;
drop = UT_LIST_GET_NEXT(row_mysql_drop_list, drop)) {
- if (strcmp(drop->table_name, name) == 0) {
- /* Already in the list */
-
- mutex_exit(&row_drop_list_mutex);
-
- return(FALSE);
+ if (drop->table_id == table_id) {
+ added = false;
+ goto func_exit;
}
}
- drop = static_cast<row_mysql_drop_t*>(
- mem_alloc(sizeof(row_mysql_drop_t)));
-
- drop->table_name = mem_strdup(name);
+ drop = static_cast<row_mysql_drop_t*>(ut_malloc(sizeof *drop));
+ drop->table_id = table_id;
UT_LIST_ADD_LAST(row_mysql_drop_list, row_mysql_drop_list, drop);
MONITOR_INC(MONITOR_BACKGROUND_DROP_TABLE);
-
- /* fputs("InnoDB: Adding table ", stderr);
- ut_print_name(stderr, trx, TRUE, drop->table_name);
- fputs(" to background drop list\n", stderr); */
-
+func_exit:
mutex_exit(&row_drop_list_mutex);
-
- return(TRUE);
+ return added;
}
/*********************************************************************//**
@@ -4150,7 +4103,7 @@ row_drop_table_for_mysql(
DBUG_EXECUTE_IF("row_drop_table_add_to_background",
- row_add_table_to_background_drop_list(table->name);
+ row_add_table_to_background_drop_list(table->id);
err = DB_SUCCESS;
goto funct_exit;
);
@@ -4162,33 +4115,22 @@ row_drop_table_for_mysql(
checks take an IS or IX lock on the table. */
if (table->n_foreign_key_checks_running > 0) {
-
- const char* save_tablename = table->name;
- ibool added;
-
- added = row_add_table_to_background_drop_list(save_tablename);
-
- if (added) {
+ if (row_add_table_to_background_drop_list(table->id)) {
ut_print_timestamp(stderr);
fputs(" InnoDB: You are trying to drop table ",
stderr);
- ut_print_name(stderr, trx, TRUE, save_tablename);
+ ut_print_name(stderr, trx, TRUE, table->name);
fputs("\n"
"InnoDB: though there is a"
" foreign key check running on it.\n"
"InnoDB: Adding the table to"
" the background drop queue.\n",
stderr);
-
- /* We return DB_SUCCESS to MySQL though the drop will
- happen lazily later */
-
- err = DB_SUCCESS;
- } else {
- /* The table is already in the background drop list */
- err = DB_ERROR;
}
+ /* We return DB_SUCCESS to MySQL though the drop will
+ happen lazily later */
+ err = DB_SUCCESS;
goto funct_exit;
}
@@ -4213,11 +4155,7 @@ row_drop_table_for_mysql(
lock_remove_all_on_table(table, TRUE);
ut_a(table->n_rec_locks == 0);
} else if (table->n_ref_count > 0 || table->n_rec_locks > 0) {
- ibool added;
-
- added = row_add_table_to_background_drop_list(table->name);
-
- if (added) {
+ if (row_add_table_to_background_drop_list(table->id)) {
ut_print_timestamp(stderr);
fputs(" InnoDB: Warning: MySQL is"
" trying to drop table ", stderr);
diff --git a/storage/xtradb/row/row0undo.cc b/storage/xtradb/row/row0undo.cc
index 82b1ab049fa..552b99ab4d4 100644
--- a/storage/xtradb/row/row0undo.cc
+++ b/storage/xtradb/row/row0undo.cc
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -348,6 +349,13 @@ row_undo_step(
ut_ad(que_node_get_type(node) == QUE_NODE_UNDO);
+ if (UNIV_UNLIKELY(trx == trx_roll_crash_recv_trx)
+ && trx_roll_must_shutdown()) {
+ /* Shutdown has been initiated. */
+ trx->error_state = DB_INTERRUPTED;
+ return(NULL);
+ }
+
err = row_undo(node, thr);
trx->error_state = err;
diff --git a/storage/xtradb/trx/trx0roll.cc b/storage/xtradb/trx/trx0roll.cc
index 335ef8859c4..9a5fcea71de 100644
--- a/storage/xtradb/trx/trx0roll.cc
+++ b/storage/xtradb/trx/trx0roll.cc
@@ -24,6 +24,9 @@ Transaction rollback
Created 3/26/1996 Heikki Tuuri
*******************************************************/
+#include "my_config.h"
+#include <my_systemd.h>
+
#include "trx0roll.h"
#ifdef UNIV_NONINL
@@ -60,14 +63,7 @@ rollback */
bool trx_rollback_or_clean_is_active;
/** In crash recovery, the current trx to be rolled back; NULL otherwise */
-static const trx_t* trx_roll_crash_recv_trx = NULL;
-
-/** In crash recovery we set this to the undo n:o of the current trx to be
-rolled back. Then we can print how many % the rollback has progressed. */
-static undo_no_t trx_roll_max_undo_no;
-
-/** Auxiliary variable which tells the previous progress % we printed */
-static ulint trx_roll_progress_printed_pct;
+const trx_t* trx_roll_crash_recv_trx;
/****************************************************************//**
Finishes a transaction rollback. */
@@ -564,8 +560,6 @@ trx_rollback_active(
que_thr_t* thr;
roll_node_t* roll_node;
dict_table_t* table;
- ib_int64_t rows_to_undo;
- const char* unit = "";
ibool dictionary_locked = FALSE;
heap = mem_heap_create(512);
@@ -584,30 +578,8 @@ trx_rollback_active(
ut_a(thr == que_fork_start_command(fork));
- mutex_enter(&trx_sys->mutex);
-
trx_roll_crash_recv_trx = trx;
- trx_roll_max_undo_no = trx->undo_no;
-
- trx_roll_progress_printed_pct = 0;
-
- rows_to_undo = trx_roll_max_undo_no;
-
- mutex_exit(&trx_sys->mutex);
-
- if (rows_to_undo > 1000000000) {
- rows_to_undo = rows_to_undo / 1000000;
- unit = "M";
- }
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Rolling back trx with id " TRX_ID_FMT ", %lu%s"
- " rows to undo\n",
- trx->id,
- (ulong) rows_to_undo, unit);
-
if (trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) {
row_mysql_lock_data_dictionary(trx);
dictionary_locked = TRUE;
@@ -618,6 +590,16 @@ trx_rollback_active(
que_run_threads(roll_node->undo_thr);
+ if (trx->error_state != DB_SUCCESS) {
+ ut_ad(trx->error_state == DB_INTERRUPTED);
+ ut_ad(!srv_undo_sources);
+ ut_ad(srv_fast_shutdown);
+ ut_ad(!dictionary_locked);
+ que_graph_free(static_cast<que_t*>(
+ roll_node->undo_thr->common.parent));
+ goto func_exit;
+ }
+
trx_rollback_finish(thr_get_trx(roll_node->undo_thr));
/* Free the memory reserved by the undo graph */
@@ -662,13 +644,14 @@ trx_rollback_active(
}
}
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Rollback of trx with id " TRX_ID_FMT " completed", trx->id);
+
+func_exit:
if (dictionary_locked) {
row_mysql_unlock_data_dictionary(trx);
}
- ib_logf(IB_LOG_LEVEL_INFO,
- "Rollback of trx with id " TRX_ID_FMT " completed", trx->id);
-
mem_heap_free(heap);
trx_roll_crash_recv_trx = NULL;
@@ -685,7 +668,7 @@ ibool
trx_rollback_resurrected(
/*=====================*/
trx_t* trx, /*!< in: transaction to rollback or clean */
- ibool all) /*!< in: FALSE=roll back dictionary transactions;
+ ibool* all) /*!< in/out: FALSE=roll back dictionary transactions;
TRUE=roll back all non-PREPARED transactions */
{
ut_ad(mutex_own(&trx_sys->mutex));
@@ -696,16 +679,15 @@ trx_rollback_resurrected(
to accidentally clean up a non-recovered transaction here. */
trx_mutex_enter(trx);
- bool is_recovered = trx->is_recovered;
- trx_state_t state = trx->state;
- trx_mutex_exit(trx);
-
- if (!is_recovered) {
+ if (!trx->is_recovered) {
+func_exit:
+ trx_mutex_exit(trx);
return(FALSE);
}
- switch (state) {
+ switch (trx->state) {
case TRX_STATE_COMMITTED_IN_MEMORY:
+ trx_mutex_exit(trx);
mutex_exit(&trx_sys->mutex);
fprintf(stderr,
"InnoDB: Cleaning up trx with id " TRX_ID_FMT "\n",
@@ -714,21 +696,83 @@ trx_rollback_resurrected(
trx_free_for_background(trx);
return(TRUE);
case TRX_STATE_ACTIVE:
- if (all || trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) {
+ if (!srv_undo_sources && srv_fast_shutdown) {
+fake_prepared:
+ trx->state = TRX_STATE_PREPARED;
+ trx_sys->n_prepared_trx++;
+ trx_sys->n_prepared_recovered_trx++;
+ *all = FALSE;
+ goto func_exit;
+ }
+ trx_mutex_exit(trx);
+
+ if (*all || trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) {
mutex_exit(&trx_sys->mutex);
trx_rollback_active(trx);
+ if (trx->error_state != DB_SUCCESS) {
+ ut_ad(trx->error_state == DB_INTERRUPTED);
+ ut_ad(!srv_undo_sources);
+ ut_ad(srv_fast_shutdown);
+ mutex_enter(&trx_sys->mutex);
+ trx_mutex_enter(trx);
+ goto fake_prepared;
+ }
trx_free_for_background(trx);
return(TRUE);
}
return(FALSE);
case TRX_STATE_PREPARED:
- return(FALSE);
+ goto func_exit;
case TRX_STATE_NOT_STARTED:
break;
}
ut_error;
- return(FALSE);
+ goto func_exit;
+}
+
+/** Report progress when rolling back a row of a recovered transaction.
+@return whether the rollback should be aborted due to pending shutdown */
+UNIV_INTERN
+bool
+trx_roll_must_shutdown()
+{
+ const trx_t* trx = trx_roll_crash_recv_trx;
+ ut_ad(trx);
+ ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
+
+ if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE
+ && !srv_undo_sources && srv_fast_shutdown) {
+ return true;
+ }
+
+ ib_time_t time = ut_time();
+ mutex_enter(&trx_sys->mutex);
+ mutex_enter(&recv_sys->mutex);
+
+ if (recv_sys->report(time)) {
+ ulint n_trx = 0, n_rows = 0;
+ for (const trx_t* t = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
+ t != NULL;
+ t = UT_LIST_GET_NEXT(trx_list, t)) {
+
+ assert_trx_in_rw_list(t);
+ if (t->is_recovered
+ && trx_state_eq(t, TRX_STATE_ACTIVE)) {
+ n_trx++;
+ n_rows += t->undo_no;
+ }
+ }
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "To roll back: " ULINTPF " transactions, "
+ ULINTPF " rows", n_trx, n_rows);
+ sd_notifyf(0, "STATUS=To roll back: " ULINTPF " transactions, "
+ ULINTPF " rows", n_trx, n_rows);
+ }
+
+ mutex_exit(&recv_sys->mutex);
+ mutex_exit(&trx_sys->mutex);
+ return false;
}
/*******************************************************************//**
@@ -775,17 +819,11 @@ trx_rollback_or_clean_recovered(
assert_trx_in_rw_list(trx);
- if (srv_shutdown_state != SRV_SHUTDOWN_NONE
- && srv_fast_shutdown != 0) {
- all = FALSE;
- break;
- }
-
/* If this function does a cleanup or rollback
then it will release the trx_sys->mutex, therefore
we need to reacquire it before retrying the loop. */
- if (trx_rollback_resurrected(trx, all)) {
+ if (trx_rollback_resurrected(trx, &all)) {
mutex_enter(&trx_sys->mutex);
@@ -1118,7 +1156,6 @@ trx_roll_pop_top_rec_of_trx(
undo_no_t undo_no;
ibool is_insert;
trx_rseg_t* rseg;
- ulint progress_pct;
mtr_t mtr;
rseg = trx->rseg;
@@ -1176,27 +1213,6 @@ try_again:
ut_ad(undo_no + 1 == trx->undo_no);
- /* We print rollback progress info if we are in a crash recovery
- and the transaction has at least 1000 row operations to undo. */
-
- if (trx == trx_roll_crash_recv_trx && trx_roll_max_undo_no > 1000) {
-
- progress_pct = 100 - (ulint)
- ((undo_no * 100) / trx_roll_max_undo_no);
- if (progress_pct != trx_roll_progress_printed_pct) {
- if (trx_roll_progress_printed_pct == 0) {
- fprintf(stderr,
- "\nInnoDB: Progress in percents:"
- " %lu", (ulong) progress_pct);
- } else {
- fprintf(stderr,
- " %lu", (ulong) progress_pct);
- }
- fflush(stderr);
- trx_roll_progress_printed_pct = progress_pct;
- }
- }
-
trx->undo_no = undo_no;
if (!trx_undo_arr_store_info(trx, undo_no)) {
diff --git a/storage/xtradb/trx/trx0undo.cc b/storage/xtradb/trx/trx0undo.cc
index 3259bcb70b1..24d14e06080 100644
--- a/storage/xtradb/trx/trx0undo.cc
+++ b/storage/xtradb/trx/trx0undo.cc
@@ -2023,10 +2023,14 @@ trx_undo_free_prepared(
/* fall through */
case TRX_UNDO_ACTIVE:
/* lock_trx_release_locks() assigns
- trx->is_recovered=false */
+ trx->is_recovered=false and
+ trx->state = TRX_STATE_COMMITTED_IN_MEMORY,
+ also for transactions that we faked
+ to TRX_STATE_PREPARED in trx_rollback_resurrected(). */
ut_a(srv_read_only_mode
|| srv_apply_log_only
- || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO);
+ || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO
+ || srv_fast_shutdown);
break;
default:
ut_error;
@@ -2048,10 +2052,14 @@ trx_undo_free_prepared(
/* fall through */
case TRX_UNDO_ACTIVE:
/* lock_trx_release_locks() assigns
- trx->is_recovered=false */
+ trx->is_recovered=false and
+ trx->state = TRX_STATE_COMMITTED_IN_MEMORY,
+ also for transactions that we faked
+ to TRX_STATE_PREPARED in trx_rollback_resurrected(). */
ut_a(srv_read_only_mode
|| srv_apply_log_only
- || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO);
+ || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO
+ || srv_fast_shutdown);
break;
default:
ut_error;