summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--storage/rocksdb/CMakeLists.txt6
-rw-r--r--storage/rocksdb/ha_rocksdb.cc819
-rw-r--r--storage/rocksdb/ha_rocksdb.h72
-rw-r--r--storage/rocksdb/ha_rocksdb_proto.h1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/prefix_index_only_query_check.inc21
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/my.cnf1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/2pc_group_commit.result14
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace.result13
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load_errors.result26
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result101
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_errors.result4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bytes_written.result10
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/cons_snapshot_read_committed.result12
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/cons_snapshot_repeatable_read.result2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/covered_unpack_info_format.result73
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/ddl_high_priority.result1009
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/deadlock_tracking.result490
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/drop_table.result3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/drop_table2.result2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/drop_table3.result2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/dup_key_update.result4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/fast_prefix_index_fetch.result80
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/i_s_ddl.result6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/index.result27
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/index_primary.result23
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/index_type_btree.result27
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/index_type_hash.result27
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/issue243_transactionStatus.result18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/multi_varchar_sk_lookup.result37
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result48
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/show_engine.result33
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/skip_validate_tmp_table.result4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/truncate_table3.result2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/ttl_primary.result4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary.result709
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_read_filtering.result494
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_read_filtering_multiple_index.result82
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_with_partitions.result389
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/write_sync.result25
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/2pc_group_commit.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace.test8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test26
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted.test136
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted_errors.test8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bytes_written.test22
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/covered_unpack_info_format.test79
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ddl_high_priority.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/deadlock_tracking.test153
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/drop_table-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/drop_table.test5
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/drop_table2.test4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/drop_table3.inc4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/dup_key_update.test4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/fast_prefix_index_fetch.test120
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/index.inc34
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/index_primary.test32
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/multi_varchar_sk_lookup.test49
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/show_engine.test13
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/skip_validate_tmp_table.test39
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_primary.test14
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary-master.opt2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary.test780
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering.test500
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering_multiple_index.test87
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_with_partitions-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_with_partitions.test300
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/write_sync.test23
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/r/multiclient_2pc.result9
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_ddl_high_priority.result39
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc.test29
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_ddl_high_priority.test2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/my.cnf4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bulk_load_allow_unsorted_basic.result100
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_concurrent_prepare_basic.result14
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_deadlock_detect_depth_basic.result79
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_ttl_ignore_pk_basic.result64
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_compute_memtable_stats_basic.result4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_compute_memtable_stats_cachetime_basic.result68
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_large_prefix_basic.result64
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_manual_wal_flush_basic.result14
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_latest_deadlocks_basic.result53
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_merge_tmp_file_removal_delay_ms_basic.result93
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_sim_cache_size_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_clock_cache_basic.result19
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bulk_load_allow_unsorted_basic.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_concurrent_prepare_basic.test16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_deadlock_detect_depth_basic.test17
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_ttl_ignore_pk_basic.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_compute_memtable_stats_basic.test4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_compute_memtable_stats_cachetime_basic.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_large_prefix_basic.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_manual_wal_flush_basic.test16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_latest_deadlocks_basic.test17
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_merge_tmp_file_removal_delay_ms_basic.test49
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_sim_cache_size_basic.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_clock_cache_basic.test21
-rw-r--r--storage/rocksdb/rdb_buff.h16
-rw-r--r--storage/rocksdb/rdb_compact_filter.h8
-rw-r--r--storage/rocksdb/rdb_datadic.cc357
-rw-r--r--storage/rocksdb/rdb_datadic.h66
-rw-r--r--storage/rocksdb/rdb_i_s.cc15
-rw-r--r--storage/rocksdb/rdb_index_merge.cc186
-rw-r--r--storage/rocksdb/rdb_index_merge.h70
-rw-r--r--storage/rocksdb/rdb_perf_context.cc22
-rw-r--r--storage/rocksdb/rdb_perf_context.h16
107 files changed, 8417 insertions, 410 deletions
diff --git a/storage/rocksdb/CMakeLists.txt b/storage/rocksdb/CMakeLists.txt
index ec9705891e0..9e6c8160e32 100644
--- a/storage/rocksdb/CMakeLists.txt
+++ b/storage/rocksdb/CMakeLists.txt
@@ -106,6 +106,12 @@ IF (NOT "$ENV{WITH_ZSTD}" STREQUAL "")
ADD_DEFINITIONS(-DZSTD)
ENDIF()
+IF (NOT "$ENV{WITH_TBB}" STREQUAL "")
+ SET(rocksdb_static_libs ${rocksdb_static_libs}
+ $ENV{WITH_TBB}/libtbb${PIC_EXT}.a)
+ ADD_DEFINITIONS(-DTBB)
+ENDIF()
+
CHECK_INCLUDE_FILES(zlib.h HAVE_ZLIB_H)
IF (HAVE_ZLIB_H)
diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc
index 0df153b4853..cc52b343dd1 100644
--- a/storage/rocksdb/ha_rocksdb.cc
+++ b/storage/rocksdb/ha_rocksdb.cc
@@ -36,6 +36,7 @@
#include "./debug_sync.h"
#include "./my_bit.h"
#include "./my_stacktrace.h"
+#include "./my_sys.h"
#include "./sql_audit.h"
#include "./sql_table.h"
#include <mysql/psi/mysql_table.h>
@@ -57,6 +58,7 @@
#include "rocksdb/utilities/checkpoint.h"
#include "rocksdb/utilities/convenience.h"
#include "rocksdb/utilities/memory_util.h"
+#include "rocksdb/utilities/sim_cache.h"
#include "util/stop_watch.h"
/* MyRocks includes */
@@ -101,6 +103,7 @@ namespace myrocks {
static st_global_stats global_stats;
static st_export_stats export_stats;
static st_memory_stats memory_stats;
+static st_io_stall_stats io_stall_stats;
const std::string DEFAULT_CF_NAME("default");
const std::string DEFAULT_SYSTEM_CF_NAME("__system__");
@@ -127,6 +130,14 @@ bool can_use_bloom_filter(THD *thd, const Rdb_key_def &kd,
const rocksdb::Slice &eq_cond,
const bool use_all_keys);
+static rocksdb::CompactRangeOptions getCompactRangeOptions() {
+ rocksdb::CompactRangeOptions compact_range_options;
+ compact_range_options.bottommost_level_compaction =
+ rocksdb::BottommostLevelCompaction::kForce;
+ compact_range_options.exclusive_manual_compaction = false;
+ return compact_range_options;
+}
+
///////////////////////////////////////////////////////////
// Parameters and settings
///////////////////////////////////////////////////////////
@@ -149,7 +160,6 @@ static std::shared_ptr<Rdb_tbl_prop_coll_factory> properties_collector_factory;
Rdb_dict_manager dict_manager;
Rdb_cf_manager cf_manager;
Rdb_ddl_manager ddl_manager;
-const char *m_mysql_gtid;
Rdb_binlog_manager binlog_manager;
Rdb_io_watchdog *io_watchdog = nullptr;
@@ -165,7 +175,7 @@ static Rdb_background_thread rdb_bg_thread;
// collation check requirement.
Regex_list_handler *rdb_collation_exceptions;
-static const char **rdb_get_error_messages();
+static const char *rdb_get_error_message(int nr);
static void rocksdb_flush_all_memtables() {
const Rdb_cf_manager &cf_manager = rdb_get_cf_manager();
@@ -192,7 +202,7 @@ static int rocksdb_compact_column_family(THD *const thd,
if (cfh != nullptr && rdb != nullptr) {
sql_print_information("RocksDB: Manual compaction of column family: %s\n",
cf);
- rdb->CompactRange(rocksdb::CompactRangeOptions(), cfh, nullptr, nullptr);
+ rdb->CompactRange(getCompactRangeOptions(), cfh, nullptr, nullptr);
}
}
return HA_EXIT_SUCCESS;
@@ -388,6 +398,10 @@ static void rocksdb_set_delayed_write_rate(THD *thd,
struct st_mysql_sys_var *var,
void *var_ptr, const void *save);
+static void rocksdb_set_max_latest_deadlocks(THD *thd,
+ struct st_mysql_sys_var *var,
+ void *var_ptr, const void *save);
+
static void rdb_set_collation_exception_list(const char *exception_list);
static void rocksdb_set_collation_exception_list(THD *thd,
struct st_mysql_sys_var *var,
@@ -404,6 +418,10 @@ rocksdb_set_bulk_load(THD *thd,
struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)),
void *var_ptr, const void *save);
+static void rocksdb_set_bulk_load_allow_unsorted(
+ THD *thd, struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)),
+ void *var_ptr, const void *save);
+
static void rocksdb_set_max_background_jobs(THD *thd,
struct st_mysql_sys_var *const var,
void *const var_ptr,
@@ -412,12 +430,15 @@ static void rocksdb_set_max_background_jobs(THD *thd,
// Options definitions
//////////////////////////////////////////////////////////////////////////////
static long long rocksdb_block_cache_size;
+static long long rocksdb_sim_cache_size;
+static my_bool rocksdb_use_clock_cache;
/* Use unsigned long long instead of uint64_t because of MySQL compatibility */
static unsigned long long // NOLINT(runtime/int)
rocksdb_rate_limiter_bytes_per_sec;
static unsigned long long // NOLINT(runtime/int)
rocksdb_sst_mgr_rate_bytes_per_sec;
static unsigned long long rocksdb_delayed_write_rate;
+static uint32_t rocksdb_max_latest_deadlocks;
static unsigned long // NOLINT(runtime/int)
rocksdb_persistent_cache_size_mb;
static uint64_t rocksdb_info_log_level;
@@ -427,6 +448,7 @@ static uint64_t rocksdb_index_type;
static uint32_t rocksdb_flush_log_at_trx_commit;
static uint32_t rocksdb_debug_optimizer_n_rows;
static my_bool rocksdb_force_compute_memtable_stats;
+static uint32_t rocksdb_force_compute_memtable_stats_cachetime;
static my_bool rocksdb_debug_optimizer_no_zero_cardinality;
static uint32_t rocksdb_wal_recovery_mode;
static uint32_t rocksdb_access_hint_on_compaction_start;
@@ -444,6 +466,7 @@ static my_bool rocksdb_enable_ttl_read_filtering = 1;
static int rocksdb_debug_ttl_rec_ts = 0;
static int rocksdb_debug_ttl_snapshot_ts = 0;
static int rocksdb_debug_ttl_read_filter_ts = 0;
+static my_bool rocksdb_debug_ttl_ignore_pk = 0;
static my_bool rocksdb_reset_stats = 0;
static uint32_t rocksdb_io_write_timeout_secs = 0;
static uint64_t rocksdb_number_stat_computes = 0;
@@ -456,6 +479,7 @@ static char *rocksdb_datadir;
static uint32_t rocksdb_table_stats_sampling_pct;
static my_bool rocksdb_enable_bulk_load_api = 1;
static my_bool rocksdb_print_snapshot_conflict_queries = 0;
+static my_bool rocksdb_large_prefix = 0;
std::atomic<uint64_t> rocksdb_snapshot_conflict_errors(0);
std::atomic<uint64_t> rocksdb_wal_group_syncs(0);
@@ -468,6 +492,8 @@ static std::unique_ptr<rocksdb::DBOptions> rdb_init_rocksdb_db_options(void) {
o->info_log_level = rocksdb::InfoLogLevel::INFO_LEVEL;
o->max_subcompactions = DEFAULT_SUBCOMPACTIONS;
+ o->concurrent_prepare = true;
+ o->manual_wal_flush = true;
return o;
}
@@ -559,9 +585,12 @@ const size_t RDB_DEFAULT_MERGE_BUF_SIZE = 64 * 1024 * 1024;
const size_t RDB_MIN_MERGE_BUF_SIZE = 100;
const size_t RDB_DEFAULT_MERGE_COMBINE_READ_SIZE = 1024 * 1024 * 1024;
const size_t RDB_MIN_MERGE_COMBINE_READ_SIZE = 100;
+const size_t RDB_DEFAULT_MERGE_TMP_FILE_REMOVAL_DELAY = 0;
+const size_t RDB_MIN_MERGE_TMP_FILE_REMOVAL_DELAY = 0;
const int64 RDB_DEFAULT_BLOCK_CACHE_SIZE = 512 * 1024 * 1024;
const int64 RDB_MIN_BLOCK_CACHE_SIZE = 1024;
const int RDB_MAX_CHECKSUMS_PCT = 100;
+const ulong RDB_DEADLOCK_DETECT_DEPTH = 50;
// TODO: 0 means don't wait at all, and we don't support it yet?
static MYSQL_THDVAR_ULONG(lock_wait_timeout, PLUGIN_VAR_RQCMDARG,
@@ -572,6 +601,14 @@ static MYSQL_THDVAR_ULONG(lock_wait_timeout, PLUGIN_VAR_RQCMDARG,
static MYSQL_THDVAR_BOOL(deadlock_detect, PLUGIN_VAR_RQCMDARG,
"Enables deadlock detection", nullptr, nullptr, FALSE);
+static MYSQL_THDVAR_ULONG(deadlock_detect_depth, PLUGIN_VAR_RQCMDARG,
+ "Number of transactions deadlock detection will "
+ "traverse through before assuming deadlock",
+ nullptr, nullptr,
+ /*default*/ RDB_DEADLOCK_DETECT_DEPTH,
+ /*min*/ 2,
+ /*max*/ ULONG_MAX, 0);
+
static MYSQL_THDVAR_BOOL(
trace_sst_api, PLUGIN_VAR_RQCMDARG,
"Generate trace output in the log for each call to the SstFileWriter",
@@ -583,6 +620,11 @@ static MYSQL_THDVAR_BOOL(
"unique_checks and enables rocksdb_commit_in_the_middle.",
nullptr, rocksdb_set_bulk_load, FALSE);
+static MYSQL_THDVAR_BOOL(bulk_load_allow_unsorted, PLUGIN_VAR_RQCMDARG,
+ "Allow unsorted input during bulk-load. "
+ "Can be changed only when bulk load is disabled.",
+ nullptr, rocksdb_set_bulk_load_allow_unsorted, FALSE);
+
static MYSQL_SYSVAR_BOOL(enable_bulk_load_api, rocksdb_enable_bulk_load_api,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"Enables using SstFileWriter for bulk loading",
@@ -662,6 +704,18 @@ static MYSQL_THDVAR_ULONGLONG(
/* min (100B) */ RDB_MIN_MERGE_COMBINE_READ_SIZE,
/* max */ SIZE_T_MAX, 1);
+static MYSQL_THDVAR_ULONGLONG(
+ merge_tmp_file_removal_delay_ms, PLUGIN_VAR_RQCMDARG,
+ "Fast index creation creates a large tmp file on disk during index "
+ "creation. Removing this large file all at once when index creation is "
+ "complete can cause trim stalls on Flash. This variable specifies a "
+ "duration to sleep (in milliseconds) between calling chsize() to truncate "
+ "the file in chunks. The chunk size is the same as merge_buf_size.",
+ nullptr, nullptr,
+ /* default (0ms) */ RDB_DEFAULT_MERGE_TMP_FILE_REMOVAL_DELAY,
+ /* min (0ms) */ RDB_MIN_MERGE_TMP_FILE_REMOVAL_DELAY,
+ /* max */ SIZE_T_MAX, 1);
+
static MYSQL_SYSVAR_BOOL(
create_if_missing,
*reinterpret_cast<my_bool *>(&rocksdb_db_options->create_if_missing),
@@ -670,6 +724,20 @@ static MYSQL_SYSVAR_BOOL(
rocksdb_db_options->create_if_missing);
static MYSQL_SYSVAR_BOOL(
+ concurrent_prepare,
+ *reinterpret_cast<my_bool *>(&rocksdb_db_options->concurrent_prepare),
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "DBOptions::concurrent_prepare for RocksDB", nullptr, nullptr,
+ rocksdb_db_options->concurrent_prepare);
+
+static MYSQL_SYSVAR_BOOL(
+ manual_wal_flush,
+ *reinterpret_cast<my_bool *>(&rocksdb_db_options->manual_wal_flush),
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "DBOptions::manual_wal_flush for RocksDB", nullptr, nullptr,
+ rocksdb_db_options->manual_wal_flush);
+
+static MYSQL_SYSVAR_BOOL(
create_missing_column_families,
*reinterpret_cast<my_bool *>(
&rocksdb_db_options->create_missing_column_families),
@@ -712,6 +780,13 @@ static MYSQL_SYSVAR_ULONGLONG(delayed_write_rate, rocksdb_delayed_write_rate,
rocksdb_db_options->delayed_write_rate, 0,
UINT64_MAX, 0);
+static MYSQL_SYSVAR_UINT(max_latest_deadlocks, rocksdb_max_latest_deadlocks,
+ PLUGIN_VAR_RQCMDARG,
+ "Maximum number of recent "
+ "deadlocks to store",
+ nullptr, rocksdb_set_max_latest_deadlocks,
+ rocksdb::kInitialMaxDeadlocks, 0, UINT32_MAX, 0);
+
static MYSQL_SYSVAR_ENUM(
info_log_level, rocksdb_info_log_level, PLUGIN_VAR_RQCMDARG,
"Filter level for info logs to be written mysqld error log. "
@@ -988,6 +1063,22 @@ static MYSQL_SYSVAR_LONGLONG(block_cache_size, rocksdb_block_cache_size,
/* max */ LONGLONG_MAX,
/* Block size */ RDB_MIN_BLOCK_CACHE_SIZE);
+static MYSQL_SYSVAR_LONGLONG(sim_cache_size, rocksdb_sim_cache_size,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "Simulated cache size for RocksDB", nullptr,
+ nullptr,
+ /* default */ 0,
+ /* min */ 0,
+ /* max */ LONGLONG_MAX,
+ /* Block size */ 0);
+
+static MYSQL_SYSVAR_BOOL(
+ use_clock_cache,
+ rocksdb_use_clock_cache,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "Use ClockCache instead of default LRUCache for RocksDB",
+ nullptr, nullptr, false);
+
static MYSQL_SYSVAR_BOOL(
cache_index_and_filter_blocks,
*reinterpret_cast<my_bool *>(
@@ -1076,12 +1167,21 @@ static MYSQL_SYSVAR_STR(update_cf_options, rocksdb_update_cf_options,
"Option updates per column family for RocksDB", nullptr,
rocksdb_set_update_cf_options, nullptr);
+enum rocksdb_flush_log_at_trx_commit_type : unsigned int {
+ FLUSH_LOG_NEVER = 0,
+ FLUSH_LOG_SYNC,
+ FLUSH_LOG_BACKGROUND,
+ FLUSH_LOG_MAX /* must be last */
+};
+
static MYSQL_SYSVAR_UINT(flush_log_at_trx_commit,
rocksdb_flush_log_at_trx_commit, PLUGIN_VAR_RQCMDARG,
"Sync on transaction commit. Similar to "
"innodb_flush_log_at_trx_commit. 1: sync on commit, "
"0,2: not sync on commit",
- nullptr, nullptr, 1, 0, 2, 0);
+ nullptr, nullptr, /* default */ FLUSH_LOG_SYNC,
+ /* min */ FLUSH_LOG_NEVER,
+ /* max */ FLUSH_LOG_BACKGROUND, 0);
static MYSQL_THDVAR_BOOL(write_disable_wal, PLUGIN_VAR_RQCMDARG,
"WriteOptions::disableWAL for RocksDB", nullptr,
@@ -1125,6 +1225,13 @@ static MYSQL_SYSVAR_BOOL(force_compute_memtable_stats,
"Force to always compute memtable stats",
nullptr, nullptr, TRUE);
+static MYSQL_SYSVAR_UINT(force_compute_memtable_stats_cachetime,
+ rocksdb_force_compute_memtable_stats_cachetime,
+ PLUGIN_VAR_RQCMDARG,
+ "Time in usecs to cache memtable estimates", nullptr,
+ nullptr, /* default */ 60 * 1000 * 1000,
+ /* min */ 0, /* max */ INT_MAX, 0);
+
static MYSQL_SYSVAR_BOOL(
debug_optimizer_no_zero_cardinality,
rocksdb_debug_optimizer_no_zero_cardinality, PLUGIN_VAR_RQCMDARG,
@@ -1192,6 +1299,12 @@ static MYSQL_SYSVAR_INT(
nullptr, nullptr, 0, /* min */ -3600, /* max */ 3600, 0);
static MYSQL_SYSVAR_BOOL(
+ debug_ttl_ignore_pk, rocksdb_debug_ttl_ignore_pk, PLUGIN_VAR_RQCMDARG,
+ "For debugging purposes only. If true, compaction filtering will not occur "
+ "on PK TTL data. This variable is a no-op in non-debug builds.",
+ nullptr, nullptr, FALSE);
+
+static MYSQL_SYSVAR_BOOL(
reset_stats, rocksdb_reset_stats, PLUGIN_VAR_RQCMDARG,
"Reset the RocksDB internal statistics without restarting the DB.", nullptr,
rocksdb_set_reset_stats, FALSE);
@@ -1346,15 +1459,23 @@ static MYSQL_SYSVAR_UINT(
RDB_DEFAULT_TBL_STATS_SAMPLE_PCT, /* everything */ 0,
/* max */ RDB_TBL_STATS_SAMPLE_PCT_MAX, 0);
+static MYSQL_SYSVAR_BOOL(
+ large_prefix, rocksdb_large_prefix, PLUGIN_VAR_RQCMDARG,
+ "Support large index prefix length of 3072 bytes. If off, the maximum "
+ "index prefix length is 767.",
+ nullptr, nullptr, FALSE);
+
static const int ROCKSDB_ASSUMED_KEY_VALUE_DISK_SIZE = 100;
static struct st_mysql_sys_var *rocksdb_system_variables[] = {
MYSQL_SYSVAR(lock_wait_timeout),
MYSQL_SYSVAR(deadlock_detect),
+ MYSQL_SYSVAR(deadlock_detect_depth),
MYSQL_SYSVAR(max_row_locks),
MYSQL_SYSVAR(write_batch_max_bytes),
MYSQL_SYSVAR(lock_scanned_rows),
MYSQL_SYSVAR(bulk_load),
+ MYSQL_SYSVAR(bulk_load_allow_unsorted),
MYSQL_SYSVAR(skip_unique_check_tables),
MYSQL_SYSVAR(trace_sst_api),
MYSQL_SYSVAR(commit_in_the_middle),
@@ -1365,15 +1486,19 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = {
MYSQL_SYSVAR(enable_bulk_load_api),
MYSQL_SYSVAR(tmpdir),
MYSQL_SYSVAR(merge_combine_read_size),
+ MYSQL_SYSVAR(merge_tmp_file_removal_delay_ms),
MYSQL_SYSVAR(skip_bloom_filter_on_read),
MYSQL_SYSVAR(create_if_missing),
+ MYSQL_SYSVAR(concurrent_prepare),
+ MYSQL_SYSVAR(manual_wal_flush),
MYSQL_SYSVAR(create_missing_column_families),
MYSQL_SYSVAR(error_if_exists),
MYSQL_SYSVAR(paranoid_checks),
MYSQL_SYSVAR(rate_limiter_bytes_per_sec),
MYSQL_SYSVAR(sst_mgr_rate_bytes_per_sec),
MYSQL_SYSVAR(delayed_write_rate),
+ MYSQL_SYSVAR(max_latest_deadlocks),
MYSQL_SYSVAR(info_log_level),
MYSQL_SYSVAR(max_open_files),
MYSQL_SYSVAR(max_total_wal_size),
@@ -1413,6 +1538,8 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = {
MYSQL_SYSVAR(enable_write_thread_adaptive_yield),
MYSQL_SYSVAR(block_cache_size),
+ MYSQL_SYSVAR(sim_cache_size),
+ MYSQL_SYSVAR(use_clock_cache),
MYSQL_SYSVAR(cache_index_and_filter_blocks),
MYSQL_SYSVAR(pin_l0_filter_and_index_blocks_in_cache),
MYSQL_SYSVAR(index_type),
@@ -1438,6 +1565,7 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = {
MYSQL_SYSVAR(force_index_records_in_range),
MYSQL_SYSVAR(debug_optimizer_n_rows),
MYSQL_SYSVAR(force_compute_memtable_stats),
+ MYSQL_SYSVAR(force_compute_memtable_stats_cachetime),
MYSQL_SYSVAR(debug_optimizer_no_zero_cardinality),
MYSQL_SYSVAR(compact_cf),
@@ -1454,6 +1582,7 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = {
MYSQL_SYSVAR(debug_ttl_rec_ts),
MYSQL_SYSVAR(debug_ttl_snapshot_ts),
MYSQL_SYSVAR(debug_ttl_read_filter_ts),
+ MYSQL_SYSVAR(debug_ttl_ignore_pk),
MYSQL_SYSVAR(reset_stats),
MYSQL_SYSVAR(io_write_timeout),
MYSQL_SYSVAR(flush_memtable_on_analyze),
@@ -1475,13 +1604,15 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = {
MYSQL_SYSVAR(validate_tables),
MYSQL_SYSVAR(table_stats_sampling_pct),
+
+ MYSQL_SYSVAR(large_prefix),
nullptr};
static rocksdb::WriteOptions
rdb_get_rocksdb_write_options(my_core::THD *const thd) {
rocksdb::WriteOptions opt;
- opt.sync = (rocksdb_flush_log_at_trx_commit == 1);
+ opt.sync = (rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC);
opt.disableWAL = THDVAR(thd, write_disable_wal);
opt.ignore_missing_column_families =
THDVAR(thd, write_ignore_missing_column_families);
@@ -1743,6 +1874,13 @@ public:
}
}
+ void update_bytes_written(ulonglong bytes_written) {
+ if (m_tbl_io_perf != nullptr) {
+ m_tbl_io_perf->update_bytes_written(rocksdb_perf_context_level(m_thd),
+ bytes_written);
+ }
+ }
+
void set_params(int timeout_sec_arg, int max_row_locks_arg) {
m_timeout_sec = timeout_sec_arg;
m_max_row_locks = max_row_locks_arg;
@@ -2238,9 +2376,10 @@ public:
tx_opts.set_snapshot = false;
tx_opts.lock_timeout = rdb_convert_sec_to_ms(m_timeout_sec);
tx_opts.deadlock_detect = THDVAR(m_thd, deadlock_detect);
+ tx_opts.deadlock_detect_depth = THDVAR(m_thd, deadlock_detect_depth);
tx_opts.max_write_batch_size = THDVAR(m_thd, write_batch_max_bytes);
- write_opts.sync = (rocksdb_flush_log_at_trx_commit == 1);
+ write_opts.sync = (rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC);
write_opts.disableWAL = THDVAR(m_thd, write_disable_wal);
write_opts.ignore_missing_column_families =
THDVAR(m_thd, write_ignore_missing_column_families);
@@ -2459,7 +2598,7 @@ public:
void start_tx() override {
reset();
- write_opts.sync = (rocksdb_flush_log_at_trx_commit == 1);
+ write_opts.sync = (rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC);
write_opts.disableWAL = THDVAR(m_thd, write_disable_wal);
write_opts.ignore_missing_column_families =
THDVAR(m_thd, write_ignore_missing_column_families);
@@ -2503,14 +2642,21 @@ namespace {
class Rdb_perf_context_guard {
Rdb_io_perf m_io_perf;
- THD *m_thd;
+ Rdb_io_perf *m_io_perf_ptr;
+ Rdb_transaction *m_tx;
+ uint m_level;
-public:
+ public:
Rdb_perf_context_guard(const Rdb_perf_context_guard &) = delete;
Rdb_perf_context_guard &operator=(const Rdb_perf_context_guard &) = delete;
- explicit Rdb_perf_context_guard(THD *const thd) : m_thd(thd) {
- Rdb_transaction *&tx = get_tx_from_thd(m_thd);
+ explicit Rdb_perf_context_guard(Rdb_io_perf *io_perf, uint level)
+ : m_io_perf_ptr(io_perf), m_tx(nullptr), m_level(level) {
+ m_io_perf_ptr->start(m_level);
+ }
+
+ explicit Rdb_perf_context_guard(Rdb_transaction *tx, uint level)
+ : m_io_perf_ptr(nullptr), m_tx(tx), m_level(level) {
/*
if perf_context information is already being recorded, this becomes a
no-op
@@ -2521,9 +2667,10 @@ public:
}
~Rdb_perf_context_guard() {
- Rdb_transaction *&tx = get_tx_from_thd(m_thd);
- if (tx != nullptr) {
- tx->io_perf_end_and_record();
+ if (m_tx != nullptr) {
+ m_tx->io_perf_end_and_record();
+ } else if (m_io_perf_ptr != nullptr) {
+ m_io_perf_ptr->end_and_record(m_level);
}
}
};
@@ -2611,8 +2758,17 @@ static std::string rdb_xid_to_string(const XID &src) {
static bool rocksdb_flush_wal(handlerton *const hton MY_ATTRIBUTE((__unused__)),
ulonglong target_lsn MY_ATTRIBUTE((__unused__))) {
DBUG_ASSERT(rdb != nullptr);
- rocksdb_wal_group_syncs++;
- const rocksdb::Status s = rdb->SyncWAL();
+
+ rocksdb::Status s;
+ /*
+ target_lsn is set to 0 when MySQL wants to sync the wal files
+ */
+ if (target_lsn == 0 || rocksdb_flush_log_at_trx_commit != FLUSH_LOG_NEVER) {
+ rocksdb_wal_group_syncs++;
+ s = rdb->FlushWAL(target_lsn == 0 ||
+ rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC);
+ }
+
if (!s.ok()) {
rdb_log_status_error(s);
return HA_EXIT_FAILURE;
@@ -2651,7 +2807,7 @@ static int rocksdb_prepare(handlerton *const hton, THD *const thd,
return HA_EXIT_FAILURE;
}
if (thd->durability_property == HA_IGNORE_DURABILITY &&
- (rocksdb_flush_log_at_trx_commit == 1)) {
+ (rocksdb_flush_log_at_trx_commit != FLUSH_LOG_NEVER)) {
/**
we set the log sequence as '1' just to trigger hton->flush_logs
*/
@@ -2812,12 +2968,12 @@ static int rocksdb_commit(handlerton *const hton, THD *const thd,
rocksdb::StopWatchNano timer(rocksdb::Env::Default(), true);
- /* this will trigger saving of perf_context information */
- Rdb_perf_context_guard guard(thd);
-
/* note: h->external_lock(F_UNLCK) is called after this function is called) */
Rdb_transaction *&tx = get_tx_from_thd(thd);
+ /* this will trigger saving of perf_context information */
+ Rdb_perf_context_guard guard(tx, rocksdb_perf_context_level(thd));
+
if (tx != nullptr) {
if (commit_tx || (!my_core::thd_test_options(thd, OPTION_NOT_AUTOCOMMIT |
OPTION_BEGIN))) {
@@ -2854,8 +3010,8 @@ static int rocksdb_commit(handlerton *const hton, THD *const thd,
static int rocksdb_rollback(handlerton *const hton, THD *const thd,
bool rollback_tx) {
- Rdb_perf_context_guard guard(thd);
Rdb_transaction *&tx = get_tx_from_thd(thd);
+ Rdb_perf_context_guard guard(tx, rocksdb_perf_context_level(thd));
if (tx != nullptr) {
if (rollback_tx) {
@@ -2978,7 +3134,82 @@ private:
"=========================================\n";
}
-public:
+ static std::string get_dlock_txn_info(const rocksdb::DeadlockInfo &txn,
+ const GL_INDEX_ID &gl_index_id,
+ bool is_last_path = false) {
+ std::string txn_data;
+
+ /* extract table name and index names using the index id */
+ std::string table_name = ddl_manager.safe_get_table_name(gl_index_id);
+ if (table_name.empty()) {
+ table_name =
+ "NOT FOUND; INDEX_ID: " + std::to_string(gl_index_id.index_id);
+ }
+ auto kd = ddl_manager.safe_find(gl_index_id);
+ std::string idx_name =
+ (kd) ? kd->get_name()
+ : "NOT FOUND; INDEX_ID: " + std::to_string(gl_index_id.index_id);
+
+ /* get the name of the column family */
+ rocksdb::ColumnFamilyHandle *cfh = cf_manager.get_cf(txn.m_cf_id);
+ std::string cf_name = cfh->GetName();
+
+ txn_data += format_string(
+ "TRANSACTIONID: %u\n"
+ "COLUMN FAMILY NAME: %s\n"
+ "WAITING KEY: %s\n"
+ "LOCK TYPE: %s\n"
+ "INDEX NAME: %s\n"
+ "TABLE NAME: %s\n",
+ txn.m_txn_id, cf_name.c_str(),
+ rdb_hexdump(txn.m_waiting_key.c_str(), txn.m_waiting_key.length())
+ .c_str(),
+ txn.m_exclusive ? "EXCLUSIVE" : "SHARED", idx_name.c_str(),
+ table_name.c_str());
+ if (!is_last_path) {
+ txn_data += "---------------WAITING FOR---------------\n";
+ }
+ return txn_data;
+ }
+
+ static std::string
+ get_dlock_path_info(const rocksdb::DeadlockPath &path_entry) {
+ std::string path_data;
+ if (path_entry.limit_exceeded) {
+ path_data += "\n-------DEADLOCK EXCEEDED MAX DEPTH-------\n";
+ } else {
+ path_data += "\n*** DEADLOCK PATH\n"
+ "=========================================\n";
+ for (auto it = path_entry.path.begin(); it != path_entry.path.end();
+ it++) {
+ auto txn = *it;
+ const GL_INDEX_ID gl_index_id = {
+ txn.m_cf_id, rdb_netbuf_to_uint32(reinterpret_cast<const uchar *>(
+ txn.m_waiting_key.c_str()))};
+ path_data += get_dlock_txn_info(txn, gl_index_id);
+ }
+
+ DBUG_ASSERT_IFF(path_entry.limit_exceeded, path_entry.path.empty());
+ /* print the first txn in the path to display the full deadlock cycle */
+ if (!path_entry.path.empty() && !path_entry.limit_exceeded) {
+ auto txn = path_entry.path[0];
+ const GL_INDEX_ID gl_index_id = {
+ txn.m_cf_id, rdb_netbuf_to_uint32(reinterpret_cast<const uchar *>(
+ txn.m_waiting_key.c_str()))};
+ path_data += get_dlock_txn_info(txn, gl_index_id, true);
+
+ /* prints the txn id of the transaction that caused the deadlock */
+ auto deadlocking_txn = *(path_entry.path.end() - 1);
+ path_data +=
+ format_string("\n--------TRANSACTIONID: %u GOT DEADLOCK---------\n",
+ deadlocking_txn.m_txn_id);
+ }
+ }
+
+ return path_data;
+ }
+
+ public:
Rdb_snapshot_status() : m_data(get_header()) {}
std::string getResult() { return m_data + get_footer(); }
@@ -3007,6 +3238,15 @@ public:
tx->get_delete_count());
}
}
+
+ void populate_deadlock_buffer() {
+ auto dlock_buffer = rdb->GetDeadlockInfoBuffer();
+ m_data += "----------LATEST DETECTED DEADLOCKS----------\n";
+
+ for (auto path_entry : dlock_buffer) {
+ m_data += get_dlock_path_info(path_entry);
+ }
+ }
};
/**
@@ -3103,10 +3343,10 @@ static bool rocksdb_show_snapshot_status(handlerton *const hton, THD *const thd,
Rdb_snapshot_status showStatus;
Rdb_transaction::walk_tx_list(&showStatus);
+ showStatus.populate_deadlock_buffer();
/* Send the result data back to MySQL */
- return print_stats(thd, "SNAPSHOTS", "rocksdb", showStatus.getResult(),
- stat_print);
+ return print_stats(thd, "rocksdb", "", showStatus.getResult(), stat_print);
}
/*
@@ -3332,8 +3572,6 @@ static int rocksdb_start_tx_and_assign_read_view(
char **gtid_executed, /* out: Gtids logged until last commit */
int *const gtid_executed_length) /*out: Length of gtid_executed string */
{
- Rdb_perf_context_guard guard(thd);
-
ulong const tx_isolation = my_core::thd_tx_isolation(thd);
if (tx_isolation != ISO_REPEATABLE_READ) {
@@ -3349,6 +3587,8 @@ static int rocksdb_start_tx_and_assign_read_view(
}
Rdb_transaction *const tx = get_or_create_tx(thd);
+ Rdb_perf_context_guard guard(tx, rocksdb_perf_context_level(thd));
+
DBUG_ASSERT(!tx->has_snapshot());
tx->set_tx_read_only(true);
rocksdb_register_tx(hton, thd, tx);
@@ -3395,6 +3635,7 @@ static void rocksdb_update_table_stats(
int n_lock_wait, int n_lock_wait_timeout, int n_lock_deadlock,
const char *engine)) {
my_io_perf_t io_perf_read;
+ my_io_perf_t io_perf_write;
my_io_perf_t io_perf;
page_stats_t page_stats;
comp_stats_t comp_stats;
@@ -3409,6 +3650,7 @@ static void rocksdb_update_table_stats(
memset(&io_perf, 0, sizeof(io_perf));
memset(&page_stats, 0, sizeof(page_stats));
memset(&comp_stats, 0, sizeof(comp_stats));
+ memset(&io_perf_write, 0, sizeof(io_perf_write));
tablenames = rdb_open_tables.get_table_names();
@@ -3441,6 +3683,8 @@ static void rocksdb_update_table_stats(
io_perf_read.bytes = table_handler->m_io_perf_read.bytes.load();
io_perf_read.requests = table_handler->m_io_perf_read.requests.load();
+ io_perf_write.bytes = table_handler->m_io_perf_write.bytes.load();
+ io_perf_write.requests = table_handler->m_io_perf_write.requests.load();
lock_wait_timeout_stats = table_handler->m_lock_wait_timeout_counter.load();
deadlock_stats = table_handler->m_deadlock_counter.load();
@@ -3468,9 +3712,10 @@ static void rocksdb_update_table_stats(
sizeof(dbname_sys));
my_core::filename_to_tablename(tablename.c_str(), tablename_sys,
sizeof(tablename_sys));
- (*cb)(dbname_sys, tablename_sys, is_partition, &io_perf_read, &io_perf,
- &io_perf, &io_perf, &io_perf, &page_stats, &comp_stats, 0,
- lock_wait_timeout_stats, deadlock_stats, rocksdb_hton_name);
+ (*cb)(dbname_sys, tablename_sys, is_partition, &io_perf_read,
+ &io_perf_write, &io_perf, &io_perf, &io_perf, &page_stats,
+ &comp_stats, 0, lock_wait_timeout_stats, deadlock_stats,
+ rocksdb_hton_name);
}
}
@@ -3680,8 +3925,18 @@ static int rocksdb_init_func(void *const p) {
(rocksdb::BlockBasedTableOptions::IndexType)rocksdb_index_type;
if (!rocksdb_tbl_options->no_block_cache) {
- rocksdb_tbl_options->block_cache =
- rocksdb::NewLRUCache(rocksdb_block_cache_size);
+ std::shared_ptr<rocksdb::Cache> block_cache = rocksdb_use_clock_cache
+ ? rocksdb::NewClockCache(rocksdb_block_cache_size)
+ : rocksdb::NewLRUCache(rocksdb_block_cache_size);
+ if (rocksdb_sim_cache_size > 0) {
+ // Simulated cache enabled
+ // Wrap block cache inside a simulated cache and pass it to RocksDB
+ rocksdb_tbl_options->block_cache =
+ rocksdb::NewSimCache(block_cache, rocksdb_sim_cache_size, 6);
+ } else {
+ // Pass block cache to RocksDB
+ rocksdb_tbl_options->block_cache = block_cache;
+ }
}
// Using newer BlockBasedTable format version for better compression
// and better memory allocation.
@@ -3870,7 +4125,7 @@ static int rocksdb_init_func(void *const p) {
}
#endif
- err = my_error_register(rdb_get_error_messages, HA_ERR_ROCKSDB_FIRST,
+ err = my_error_register(rdb_get_error_message, HA_ERR_ROCKSDB_FIRST,
HA_ERR_ROCKSDB_LAST);
if (err != 0) {
// NO_LINT_DEBUG
@@ -4064,6 +4319,7 @@ Rdb_open_tables_map::get_table_handler(const char *const table_name) {
thr_lock_init(&table_handler->m_thr_lock);
table_handler->m_io_perf_read.init();
+ table_handler->m_io_perf_write.init();
}
DBUG_ASSERT(table_handler->m_ref_count >= 0);
table_handler->m_ref_count++;
@@ -4330,11 +4586,11 @@ bool ha_rocksdb::init_with_fields() {
rows within a transaction, etc, because the compaction filter ignores
snapshots when filtering keys.
*/
-bool ha_rocksdb::should_hide_ttl_rec(const rocksdb::Slice &ttl_rec_val,
+bool ha_rocksdb::should_hide_ttl_rec(const Rdb_key_def &kd,
+ const rocksdb::Slice &ttl_rec_val,
const int64_t curr_ts) {
- DBUG_ASSERT(m_pk_descr != nullptr);
- DBUG_ASSERT(m_pk_descr->has_ttl());
- DBUG_ASSERT(m_pk_descr->m_ttl_rec_offset != UINT_MAX);
+ DBUG_ASSERT(kd.has_ttl());
+ DBUG_ASSERT(kd.m_ttl_rec_offset != UINT_MAX);
/*
Curr_ts can only be 0 if there are no snapshots open.
@@ -4360,7 +4616,7 @@ bool ha_rocksdb::should_hide_ttl_rec(const rocksdb::Slice &ttl_rec_val,
Find where the 8-byte ttl is for each record in this index.
*/
uint64 ts;
- if (!reader.read(m_pk_descr->m_ttl_rec_offset) || reader.read_uint64(&ts)) {
+ if (!reader.read(kd.m_ttl_rec_offset) || reader.read_uint64(&ts)) {
/*
This condition should never be reached since all TTL records have an
8 byte ttl field in front. Don't filter the record out, and log an error.
@@ -4368,7 +4624,7 @@ bool ha_rocksdb::should_hide_ttl_rec(const rocksdb::Slice &ttl_rec_val,
std::string buf;
buf = rdb_hexdump(ttl_rec_val.data(), ttl_rec_val.size(),
RDB_MAX_HEXDUMP_LEN);
- const GL_INDEX_ID gl_index_id = m_pk_descr->get_gl_index_id();
+ const GL_INDEX_ID gl_index_id = kd.get_gl_index_id();
// NO_LINT_DEBUG
sql_print_error("Decoding ttl from PK value failed, "
"for index (%u,%u), val: %s",
@@ -4382,10 +4638,23 @@ bool ha_rocksdb::should_hide_ttl_rec(const rocksdb::Slice &ttl_rec_val,
#ifndef NDEBUG
read_filter_ts += rdb_dbug_set_ttl_read_filter_ts();
#endif
- return ts + m_pk_descr->m_ttl_duration + read_filter_ts <=
+ return ts + kd.m_ttl_duration + read_filter_ts <=
static_cast<uint64>(curr_ts);
}
+void ha_rocksdb::rocksdb_skip_expired_records(const Rdb_key_def &kd,
+ rocksdb::Iterator *const iter,
+ bool seek_backward) {
+ if (kd.has_ttl()) {
+ while (iter->Valid() &&
+ should_hide_ttl_rec(
+ kd, iter->value(),
+ get_or_create_tx(table->in_use)->m_snapshot_timestamp)) {
+ rocksdb_smart_next(seek_backward, iter);
+ }
+ }
+}
+
/**
Convert record from table->record[0] form into a form that can be written
into rocksdb.
@@ -4411,6 +4680,7 @@ int ha_rocksdb::convert_record_to_storage_format(
if (has_ttl) {
/* If it's a TTL record, reserve space for 8 byte TTL value in front. */
m_storage_record.fill(ROCKSDB_SIZEOF_TTL_RECORD + m_null_bytes_in_rec, 0);
+ m_ttl_bytes_updated = false;
/*
If the TTL is contained within the key, we use the offset to find the
@@ -4441,6 +4711,8 @@ int ha_rocksdb::convert_record_to_storage_format(
rdb_netbuf_to_uint64(reinterpret_cast<const uchar *>(data)) +
rdb_dbug_set_ttl_rec_ts());
#endif
+ // Also store in m_ttl_bytes to propagate to update_sk
+ memcpy(m_ttl_bytes, data, ROCKSDB_SIZEOF_TTL_RECORD);
} else if (!has_ttl_column) {
/*
For implicitly generated TTL records we need to copy over the old
@@ -4459,6 +4731,8 @@ int ha_rocksdb::convert_record_to_storage_format(
#endif
char *const data = const_cast<char *>(m_storage_record.ptr());
rdb_netbuf_store_uint64(reinterpret_cast<uchar *>(data), ts);
+ // Also store in m_ttl_bytes to propagate to update_sk
+ memcpy(m_ttl_bytes, data, ROCKSDB_SIZEOF_TTL_RECORD);
}
}
} else {
@@ -4540,6 +4814,15 @@ int ha_rocksdb::convert_record_to_storage_format(
ts += rdb_dbug_set_ttl_rec_ts();
#endif
rdb_netbuf_store_uint64(reinterpret_cast<uchar *>(data), ts);
+
+ // If this is an update and the timestamp has been updated, take note
+ // so we can avoid updating SKs unnecessarily.
+ if (!row_info.old_pk_slice.empty()) {
+ m_ttl_bytes_updated =
+ memcmp(m_ttl_bytes, data, ROCKSDB_SIZEOF_TTL_RECORD);
+ }
+ // Store timestamp in m_ttl_bytes to propagate to update_sk
+ memcpy(m_ttl_bytes, data, ROCKSDB_SIZEOF_TTL_RECORD);
}
}
}
@@ -4818,8 +5101,9 @@ int ha_rocksdb::convert_record_from_storage_format(
}
if (m_maybe_unpack_info) {
- unpack_info = reader.read(RDB_UNPACK_HEADER_SIZE);
- if (!unpack_info || unpack_info[0] != RDB_UNPACK_DATA_TAG) {
+ unpack_info = reader.get_current_ptr();
+ if (!unpack_info || !Rdb_key_def::is_unpack_data_tag(unpack_info[0]) ||
+ !reader.read(Rdb_key_def::get_unpack_header_size(unpack_info[0]))) {
return HA_ERR_ROCKSDB_CORRUPT_DATA;
}
@@ -4827,7 +5111,8 @@ int ha_rocksdb::convert_record_from_storage_format(
rdb_netbuf_to_uint16(reinterpret_cast<const uchar *>(unpack_info + 1));
unpack_slice = rocksdb::Slice(unpack_info, unpack_info_len);
- reader.read(unpack_info_len - RDB_UNPACK_HEADER_SIZE);
+ reader.read(unpack_info_len -
+ Rdb_key_def::get_unpack_header_size(unpack_info[0]));
}
int err = m_pk_descr->unpack_record(table, buf, &rowkey_slice,
@@ -4983,8 +5268,7 @@ void ha_rocksdb::setup_field_converters() {
If hidden pk exists, we skip this check since the field will never be
part of the hidden pk.
*/
- if (!has_hidden_pk(table) &&
- field->part_of_key.is_set(table->s->primary_key)) {
+ if (!has_hidden_pk(table)) {
KEY *const pk_info = &table->key_info[table->s->primary_key];
for (uint kp = 0; kp < pk_info->user_defined_key_parts; kp++) {
/* key_part->fieldnr is counted from 1 */
@@ -5185,7 +5469,10 @@ int ha_rocksdb::open(const char *const name, int mode, uint test_if_locked) {
my_core::thr_lock_data_init(&m_table_handler->m_thr_lock, &m_db_lock,
nullptr);
m_io_perf.init(&m_table_handler->m_table_perf_context,
- &m_table_handler->m_io_perf_read, &stats);
+ &m_table_handler->m_io_perf_read,
+ &m_table_handler->m_io_perf_write, &stats);
+ Rdb_perf_context_guard guard(&m_io_perf,
+ rocksdb_perf_context_level(ha_thd()));
std::string fullname;
err = rdb_normalize_tablename(name, &fullname);
@@ -5282,6 +5569,13 @@ int ha_rocksdb::close(void) {
m_table_handler = nullptr;
}
+ // These are needed to suppress valgrind errors in rocksdb.partition
+ m_storage_record.free();
+ m_last_rowkey.free();
+ m_sk_tails.free();
+ m_sk_tails_old.free();
+ m_pk_unpack_info.free();
+
DBUG_RETURN(HA_EXIT_SUCCESS);
}
@@ -5317,7 +5611,9 @@ static_assert((sizeof(rdb_error_messages) / sizeof(rdb_error_messages[0])) ==
((HA_ERR_ROCKSDB_LAST - HA_ERR_ROCKSDB_FIRST) + 1),
"Number of error messages doesn't match number of error codes");
-static const char **rdb_get_error_messages() { return rdb_error_messages; }
+static const char *rdb_get_error_message(int nr) {
+ return rdb_error_messages[nr - HA_ERR_ROCKSDB_FIRST];
+}
bool ha_rocksdb::get_error_message(const int error, String *const buf) {
DBUG_ENTER_FUNC();
@@ -5865,9 +6161,8 @@ int ha_rocksdb::create_key_def(const TABLE *const table_arg, const uint &i,
DBUG_RETURN(err);
}
- /* We don't currently support TTL on tables with secondary keys. */
- if (ttl_duration > 0 &&
- (table_arg->s->keys > 1 || is_hidden_pk(i, table_arg, tbl_def_arg))) {
+ /* We don't currently support TTL on tables with hidden primary keys. */
+ if (ttl_duration > 0 && is_hidden_pk(i, table_arg, tbl_def_arg)) {
my_error(ER_RDB_TTL_UNSUPPORTED, MYF(0));
DBUG_RETURN(HA_EXIT_FAILURE);
}
@@ -5906,6 +6201,12 @@ int ha_rocksdb::create_key_def(const TABLE *const table_arg, const uint &i,
kv_version = Rdb_key_def::PRIMARY_FORMAT_VERSION_UPDATE1;
});
+ DBUG_EXECUTE_IF("MYROCKS_NO_COVERED_BITMAP_FORMAT", {
+ if (index_type == Rdb_key_def::INDEX_TYPE_SECONDARY) {
+ kv_version = Rdb_key_def::SECONDARY_FORMAT_VERSION_UPDATE2;
+ }
+ });
+
uint32 index_flags = (ttl_duration > 0 ? Rdb_key_def::TTL_FLAG : 0);
uint32 ttl_rec_offset =
@@ -6263,7 +6564,7 @@ int ha_rocksdb::read_key_exact(const Rdb_key_def &kd,
from the POV of the current transaction. If it has, try going to the next
key.
*/
- if (kd.has_ttl() && should_hide_ttl_rec(iter->value(), ttl_filter_ts)) {
+ if (kd.has_ttl() && should_hide_ttl_rec(kd, iter->value(), ttl_filter_ts)) {
rocksdb_smart_next(kd.m_is_reverse_cf, iter);
continue;
}
@@ -6299,7 +6600,7 @@ int ha_rocksdb::read_before_key(const Rdb_key_def &kd,
if ((full_key_match &&
kd.value_matches_prefix(m_scan_it->key(), key_slice)) ||
(kd.has_ttl() &&
- should_hide_ttl_rec(m_scan_it->value(), ttl_filter_ts))) {
+ should_hide_ttl_rec(kd, m_scan_it->value(), ttl_filter_ts))) {
rocksdb_smart_next(!kd.m_is_reverse_cf, m_scan_it);
continue;
}
@@ -6329,7 +6630,7 @@ int ha_rocksdb::read_after_key(const Rdb_key_def &kd,
key.
*/
while (m_scan_it->Valid() && kd.has_ttl() &&
- should_hide_ttl_rec(m_scan_it->value(), ttl_filter_ts)) {
+ should_hide_ttl_rec(kd, m_scan_it->value(), ttl_filter_ts)) {
rocksdb_smart_next(kd.m_is_reverse_cf, m_scan_it);
}
@@ -6487,17 +6788,21 @@ int ha_rocksdb::read_row_from_secondary_key(uchar *const buf,
int rc = 0;
uint pk_size;
- if (m_keyread_only && m_lock_rows == RDB_LOCK_NONE && !has_hidden_pk(table)) {
- /* Get the key columns and primary key value */
- const rocksdb::Slice &rkey = m_scan_it->key();
+ /* Get the key columns and primary key value */
+ const rocksdb::Slice &rkey = m_scan_it->key();
+ const rocksdb::Slice &value = m_scan_it->value();
+
+ bool covered_lookup =
+ m_keyread_only || kd.covers_lookup(table, &value, &m_lookup_bitmap);
+ if (covered_lookup && m_lock_rows == RDB_LOCK_NONE && !has_hidden_pk(table)) {
pk_size =
kd.get_primary_key_tuple(table, *m_pk_descr, &rkey, m_pk_packed_tuple);
- const rocksdb::Slice &value = m_scan_it->value();
if (pk_size == RDB_INVALID_KEY_LEN) {
rc = HA_ERR_ROCKSDB_CORRUPT_DATA;
} else {
rc = kd.unpack_record(table, buf, &rkey, &value,
m_verify_row_debug_checksums);
+ global_stats.covered_secondary_key_lookups.inc();
}
} else {
if (kd.m_is_reverse_cf)
@@ -6607,11 +6912,15 @@ int ha_rocksdb::secondary_index_read(const int keyno, uchar *const buf) {
m_last_rowkey.copy((const char *)m_pk_packed_tuple, size,
&my_charset_bin);
- if (m_keyread_only && m_lock_rows == RDB_LOCK_NONE &&
+ rocksdb::Slice value = m_scan_it->value();
+ bool covered_lookup =
+ m_keyread_only || m_key_descr_arr[keyno]->covers_lookup(
+ table, &value, &m_lookup_bitmap);
+ if (covered_lookup && m_lock_rows == RDB_LOCK_NONE &&
!has_hidden_pk(table)) {
- rocksdb::Slice value = m_scan_it->value();
rc = m_key_descr_arr[keyno]->unpack_record(
table, buf, &key, &value, m_verify_row_debug_checksums);
+ global_stats.covered_secondary_key_lookups.inc();
} else {
rc = get_row_by_rowid(buf, m_pk_packed_tuple, size);
}
@@ -6898,6 +7207,8 @@ int ha_rocksdb::find_icp_matching_index_rec(const bool &move_forward,
const Rdb_key_def &kd = *m_key_descr_arr[active_index];
while (1) {
+ rocksdb_skip_expired_records(kd, m_scan_it, !move_forward);
+
if (!m_scan_it->Valid()) {
table->status = STATUS_NOT_FOUND;
return HA_ERR_END_OF_FILE;
@@ -6927,10 +7238,7 @@ int ha_rocksdb::find_icp_matching_index_rec(const bool &move_forward,
const enum icp_result icp_status = check_index_cond();
if (icp_status == ICP_NO_MATCH) {
- if (move_forward)
- m_scan_it->Next();
- else
- m_scan_it->Prev();
+ rocksdb_smart_next(!move_forward, m_scan_it);
continue; /* Get the next (or prev) index tuple */
} else if (icp_status == ICP_OUT_OF_RANGE) {
/* We have walked out of range we are scanning */
@@ -7263,7 +7571,8 @@ int ha_rocksdb::get_row_by_rowid(uchar *const buf, const char *const rowid,
if (found) {
/* If we found the record, but it's expired, pretend we didn't find it. */
if (!skip_ttl_check && m_pk_descr->has_ttl() &&
- should_hide_ttl_rec(rocksdb::Slice(&m_retrieved_record.front(),
+ should_hide_ttl_rec(*m_pk_descr,
+ rocksdb::Slice(&m_retrieved_record.front(),
m_retrieved_record.size()),
tx->m_snapshot_timestamp)) {
DBUG_RETURN(HA_ERR_KEY_NOT_FOUND);
@@ -7344,6 +7653,8 @@ int ha_rocksdb::index_next_with_direction(uchar *const buf, bool move_forward) {
else
m_scan_it->Prev();
}
+ rocksdb_skip_expired_records(*m_key_descr_arr[active_index], m_scan_it,
+ !move_forward);
rc = find_icp_matching_index_rec(move_forward, buf);
if (!rc)
rc = secondary_index_read(active_index, buf);
@@ -7696,6 +8007,12 @@ bool ha_rocksdb::is_pk(const uint index, const TABLE *const table_arg,
is_hidden_pk(index, table_arg, tbl_def_arg);
}
+uint ha_rocksdb::max_supported_key_part_length() const {
+ DBUG_ENTER_FUNC();
+ DBUG_RETURN(rocksdb_large_prefix ? MAX_INDEX_COL_LEN_LARGE
+ : MAX_INDEX_COL_LEN_SMALL);
+}
+
const char *ha_rocksdb::get_key_name(const uint index,
const TABLE *const table_arg,
const Rdb_tbl_def *const tbl_def_arg) {
@@ -8075,7 +8392,8 @@ int ha_rocksdb::check_uniqueness_and_lock(
m_retrieved_record by check_and_lock_unique_pk().
*/
if (is_pk(key_id, table, m_tbl_def) && found && m_pk_descr->has_ttl() &&
- should_hide_ttl_rec(rocksdb::Slice(&m_retrieved_record.front(),
+ should_hide_ttl_rec(*m_pk_descr,
+ rocksdb::Slice(&m_retrieved_record.front(),
m_retrieved_record.size()),
(row_info.tx->m_snapshot_timestamp
? row_info.tx->m_snapshot_timestamp
@@ -8123,48 +8441,112 @@ int ha_rocksdb::check_duplicate_sk(const TABLE *table_arg,
int ha_rocksdb::bulk_load_key(Rdb_transaction *const tx, const Rdb_key_def &kd,
const rocksdb::Slice &key,
- const rocksdb::Slice &value) {
- rocksdb::ColumnFamilyHandle *const cf = kd.get_cf();
+ const rocksdb::Slice &value, bool sort) {
+ DBUG_ENTER_FUNC();
+
+ rocksdb::ColumnFamilyHandle *cf = kd.get_cf();
DBUG_ASSERT(cf != nullptr);
- if (m_sst_info == nullptr) {
- m_sst_info = std::make_shared<Rdb_sst_info>(
- rdb, m_table_handler->m_table_name, kd.get_name(), cf,
- *rocksdb_db_options, THDVAR(ha_thd(), trace_sst_api));
- tx->start_bulk_load(this);
- m_bulk_load_tx = tx;
- }
+ int res = HA_EXIT_SUCCESS;
- DBUG_ASSERT(m_sst_info != nullptr);
+ if (sort) {
+ GL_INDEX_ID kd_gl_id = kd.get_gl_index_id();
+ auto it = m_key_merge.find(kd_gl_id);
+ if (it == m_key_merge.end()) {
+ m_key_merge.emplace(
+ std::piecewise_construct, std::make_tuple(kd_gl_id),
+ std::make_tuple(
+ thd_rocksdb_tmpdir(), THDVAR(ha_thd(), merge_buf_size),
+ THDVAR(ha_thd(), merge_combine_read_size),
+ THDVAR(ha_thd(), merge_tmp_file_removal_delay_ms), cf));
+ it = m_key_merge.find(kd_gl_id);
+ if ((res = it->second.init()) != 0) {
+ DBUG_RETURN(res);
+ }
- return m_sst_info->put(key, value);
+ if (m_bulk_load_tx == nullptr) {
+ tx->start_bulk_load(this);
+ m_bulk_load_tx = tx;
+ }
+ }
+ res = it->second.add(key, value);
+ } else {
+ if (!m_sst_info) {
+ m_sst_info.reset(new Rdb_sst_info(rdb, m_table_handler->m_table_name,
+ kd.get_name(), cf, *rocksdb_db_options,
+ THDVAR(ha_thd(), trace_sst_api)));
+ tx->start_bulk_load(this);
+ m_bulk_load_tx = tx;
+ }
+
+ DBUG_ASSERT(m_sst_info);
+
+ res = m_sst_info->put(key, value);
+ }
+
+ DBUG_RETURN(res);
}
int ha_rocksdb::finalize_bulk_load() {
- int rc = 0;
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT_IMP(!m_key_merge.empty() || m_sst_info,
+ m_bulk_load_tx != nullptr);
/* Skip if there are no possible ongoing bulk loads */
- if (m_sst_info == nullptr && m_bulk_load_tx == nullptr) {
- return rc;
+ if (m_key_merge.empty() && !m_sst_info && m_bulk_load_tx == nullptr) {
+ DBUG_RETURN(HA_EXIT_SUCCESS);
}
+ int res = HA_EXIT_SUCCESS;
+
RDB_MUTEX_LOCK_CHECK(m_bulk_load_mutex);
- /*
- We need this check because it's possible that m_sst_info has been
- flushed and cleared by another thread by the time the mutex has been
- acquired.
- */
- if (m_sst_info != nullptr) {
- rc = m_sst_info->commit();
- m_sst_info = nullptr;
+ if (m_sst_info) {
+ res = m_sst_info->commit();
+ m_sst_info.reset();
+ }
+
+ if (!m_key_merge.empty()) {
+ rocksdb::Slice merge_key;
+ rocksdb::Slice merge_val;
+ for (auto it = m_key_merge.begin(); it != m_key_merge.end(); it++) {
+ const std::string &index_name =
+ ddl_manager.safe_find(it->first)->get_name();
+ Rdb_index_merge &rdb_merge = it->second;
+ Rdb_sst_info sst_info(rdb, m_table_handler->m_table_name, index_name,
+ rdb_merge.get_cf(), *rocksdb_db_options,
+ THDVAR(ha_thd(), trace_sst_api));
+
+ while ((res = rdb_merge.next(&merge_key, &merge_val)) == 0) {
+ if ((res = sst_info.put(merge_key, merge_val)) != 0) {
+ break;
+ }
+ }
+ // res == -1 => finished ok; res > 0 => error
+ if (res <= 0) {
+ if ((res = sst_info.commit()) != 0) {
+ break;
+ }
+ }
+ }
+ m_key_merge.clear();
+
+ /*
+ Explicitly tell jemalloc to clean up any unused dirty pages at this point.
+ See https://reviews.facebook.net/D63723 for more details.
+ */
+ purge_all_jemalloc_arenas();
+ }
+
+ if (m_bulk_load_tx != nullptr) {
m_bulk_load_tx->end_bulk_load(this);
m_bulk_load_tx = nullptr;
}
RDB_MUTEX_UNLOCK_CHECK(m_bulk_load_mutex);
- return rc;
+ DBUG_RETURN(res);
}
int ha_rocksdb::update_pk(const Rdb_key_def &kd,
@@ -8172,6 +8554,7 @@ int ha_rocksdb::update_pk(const Rdb_key_def &kd,
const bool &pk_changed) {
const uint key_id = kd.get_keyno();
const bool hidden_pk = is_hidden_pk(key_id, table, m_tbl_def);
+ ulonglong bytes_written = 0;
/*
If the PK has changed, or if this PK uses single deletes and this is an
@@ -8186,6 +8569,8 @@ int ha_rocksdb::update_pk(const Rdb_key_def &kd,
if (!s.ok()) {
return row_info.tx->set_status_error(table->in_use, s, kd, m_tbl_def,
m_table_handler);
+ } else {
+ bytes_written = row_info.old_pk_slice.size();
}
}
@@ -8206,7 +8591,8 @@ int ha_rocksdb::update_pk(const Rdb_key_def &kd,
/*
Write the primary key directly to an SST file using an SstFileWriter
*/
- rc = bulk_load_key(row_info.tx, kd, row_info.new_pk_slice, value_slice);
+ rc = bulk_load_key(row_info.tx, kd, row_info.new_pk_slice, value_slice,
+ THDVAR(table->in_use, bulk_load_allow_unsorted));
} else if (row_info.skip_unique_check || row_info.tx->m_ddl_transaction) {
/*
It is responsibility of the user to make sure that the data being
@@ -8228,6 +8614,10 @@ int ha_rocksdb::update_pk(const Rdb_key_def &kd,
}
}
+ if (rc == HA_EXIT_SUCCESS) {
+ row_info.tx->update_bytes_written(
+ bytes_written + row_info.new_pk_slice.size() + value_slice.size());
+ }
return rc;
}
@@ -8241,24 +8631,31 @@ int ha_rocksdb::update_sk(const TABLE *const table_arg, const Rdb_key_def &kd,
rocksdb::Slice old_key_slice;
const uint key_id = kd.get_keyno();
+
+ ulonglong bytes_written = 0;
+
/*
- Can skip updating this key if none of the key fields have changed.
+ Can skip updating this key if none of the key fields have changed and, if
+ this table has TTL, the TTL timestamp has not changed.
*/
- if (row_info.old_data != nullptr && !m_update_scope.is_set(key_id)) {
+ if (row_info.old_data != nullptr && !m_update_scope.is_set(key_id) &&
+ (!kd.has_ttl() || !m_ttl_bytes_updated)) {
return HA_EXIT_SUCCESS;
}
const bool store_row_debug_checksums = should_store_row_debug_checksums();
- new_packed_size = kd.pack_record(
- table_arg, m_pack_buffer, row_info.new_data, m_sk_packed_tuple,
- &m_sk_tails, store_row_debug_checksums, row_info.hidden_pk_id);
+ new_packed_size =
+ kd.pack_record(table_arg, m_pack_buffer, row_info.new_data,
+ m_sk_packed_tuple, &m_sk_tails, store_row_debug_checksums,
+ row_info.hidden_pk_id, 0, nullptr, nullptr, m_ttl_bytes);
if (row_info.old_data != nullptr) {
// The old value
old_packed_size = kd.pack_record(
table_arg, m_pack_buffer, row_info.old_data, m_sk_packed_tuple_old,
- &m_sk_tails_old, store_row_debug_checksums, row_info.hidden_pk_id);
+ &m_sk_tails_old, store_row_debug_checksums, row_info.hidden_pk_id, 0,
+ nullptr, nullptr, m_ttl_bytes);
/*
Check if we are going to write the same value. This can happen when
@@ -8276,6 +8673,7 @@ int ha_rocksdb::update_sk(const TABLE *const table_arg, const Rdb_key_def &kd,
*/
if (old_packed_size == new_packed_size &&
m_sk_tails_old.get_current_pos() == m_sk_tails.get_current_pos() &&
+ !(kd.has_ttl() && m_ttl_bytes_updated) &&
memcmp(m_sk_packed_tuple_old, m_sk_packed_tuple, old_packed_size) ==
0 &&
memcmp(m_sk_tails_old.ptr(), m_sk_tails.ptr(),
@@ -8294,6 +8692,8 @@ int ha_rocksdb::update_sk(const TABLE *const table_arg, const Rdb_key_def &kd,
row_info.tx->get_indexed_write_batch()->SingleDelete(kd.get_cf(),
old_key_slice);
+
+ bytes_written = old_key_slice.size();
}
new_key_slice = rocksdb::Slice(
@@ -8305,6 +8705,9 @@ int ha_rocksdb::update_sk(const TABLE *const table_arg, const Rdb_key_def &kd,
row_info.tx->get_indexed_write_batch()->Put(kd.get_cf(), new_key_slice,
new_value_slice);
+ row_info.tx->update_bytes_written(bytes_written + new_key_slice.size() +
+ new_value_slice.size());
+
return HA_EXIT_SUCCESS;
}
@@ -8312,14 +8715,19 @@ int ha_rocksdb::update_indexes(const struct update_row_info &row_info,
const bool &pk_changed) {
int rc;
+ // The PK must be updated first to pull out the TTL value.
+ rc = update_pk(*m_pk_descr, row_info, pk_changed);
+ if (rc != HA_EXIT_SUCCESS) {
+ return rc;
+ }
+
+ // Update the remaining indexes.
for (uint key_id = 0; key_id < m_tbl_def->m_key_count; key_id++) {
- const Rdb_key_def &kd = *m_key_descr_arr[key_id];
if (is_pk(key_id, table, m_tbl_def)) {
- rc = update_pk(kd, row_info, pk_changed);
- } else {
- rc = update_sk(table, kd, row_info);
+ continue;
}
+ rc = update_sk(table, *m_key_descr_arr[key_id], row_info);
if (rc != HA_EXIT_SUCCESS) {
return rc;
}
@@ -8597,7 +9005,8 @@ int ha_rocksdb::rnd_next_with_direction(uchar *const buf, bool move_forward) {
DEBUG_SYNC(ha_thd(), "rocksdb_concurrent_delete");
if (m_pk_descr->has_ttl() &&
- should_hide_ttl_rec(m_scan_it->value(), tx->m_snapshot_timestamp)) {
+ should_hide_ttl_rec(*m_pk_descr, m_scan_it->value(),
+ tx->m_snapshot_timestamp)) {
continue;
}
@@ -8628,7 +9037,8 @@ int ha_rocksdb::rnd_next_with_direction(uchar *const buf, bool move_forward) {
if (m_pk_descr->has_ttl() &&
should_hide_ttl_rec(
- value, get_or_create_tx(table->in_use)->m_snapshot_timestamp)) {
+ *m_pk_descr, value,
+ get_or_create_tx(table->in_use)->m_snapshot_timestamp)) {
continue;
}
@@ -8670,6 +9080,10 @@ int ha_rocksdb::index_init(uint idx, bool sorted) {
setup_read_decoders();
+ if (!m_keyread_only) {
+ m_key_descr_arr[idx]->get_lookup_bitmap(table, &m_lookup_bitmap);
+ }
+
// If m_lock_rows is not RDB_LOCK_NONE then we will be doing a get_for_update
// when accessing the index, so don't acquire the snapshot right away.
// Otherwise acquire the snapshot immediately.
@@ -8689,6 +9103,8 @@ int ha_rocksdb::index_end() {
release_scan_iterator();
+ bitmap_free(&m_lookup_bitmap);
+
active_index = MAX_KEY;
in_range_check_pushed_down = FALSE;
@@ -8733,6 +9149,7 @@ int ha_rocksdb::delete_row(const uchar *const buf) {
rocksdb::Slice key_slice(m_last_rowkey.ptr(), m_last_rowkey.length());
Rdb_transaction *const tx = get_or_create_tx(table->in_use);
+ ulonglong bytes_written = 0;
const uint index = pk_index(table, m_tbl_def);
rocksdb::Status s =
@@ -8740,6 +9157,8 @@ int ha_rocksdb::delete_row(const uchar *const buf) {
if (!s.ok()) {
DBUG_RETURN(tx->set_status_error(table->in_use, s, *m_pk_descr, m_tbl_def,
m_table_handler));
+ } else {
+ bytes_written = key_slice.size();
}
longlong hidden_pk_id = 0;
@@ -8762,6 +9181,7 @@ int ha_rocksdb::delete_row(const uchar *const buf) {
/* Deleting on secondary key doesn't need any locks: */
tx->get_indexed_write_batch()->SingleDelete(kd.get_cf(),
secondary_key_slice);
+ bytes_written += secondary_key_slice.size();
}
}
@@ -8772,6 +9192,7 @@ int ha_rocksdb::delete_row(const uchar *const buf) {
}
stats.rows_deleted++;
update_row_stats(ROWS_DELETED);
+ tx->update_bytes_written(bytes_written);
DBUG_RETURN(HA_EXIT_SUCCESS);
}
@@ -8860,13 +9281,37 @@ int ha_rocksdb::info(uint flag) {
stats.data_file_length+= sz;
}
- // Second, compute memtable stats
- uint64_t memtableCount;
- uint64_t memtableSize;
- rdb->GetApproximateMemTableStats(m_pk_descr->get_cf(), r,
- &memtableCount, &memtableSize);
- stats.records += memtableCount;
- stats.data_file_length += memtableSize;
+ // Second, compute memtable stats. This call is expensive, so cache
+ // values computed for some time.
+ uint64_t cachetime = rocksdb_force_compute_memtable_stats_cachetime;
+ uint64_t time = (cachetime == 0) ? 0 : my_micro_time();
+ if (cachetime == 0 ||
+ time > m_table_handler->m_mtcache_last_update + cachetime) {
+ uint64_t memtableCount;
+ uint64_t memtableSize;
+
+ rdb->GetApproximateMemTableStats(m_pk_descr->get_cf(), r,
+ &memtableCount, &memtableSize);
+
+ // Atomically update all of these fields at the same time
+ if (cachetime > 0) {
+ if (m_table_handler->m_mtcache_lock.fetch_add(
+ 1, std::memory_order_acquire) == 0) {
+ m_table_handler->m_mtcache_count = memtableCount;
+ m_table_handler->m_mtcache_size = memtableSize;
+ m_table_handler->m_mtcache_last_update = time;
+ }
+ m_table_handler->m_mtcache_lock.fetch_sub(1,
+ std::memory_order_release);
+ }
+
+ stats.records += memtableCount;
+ stats.data_file_length += memtableSize;
+ } else {
+ // Cached data is still valid, so use it instead
+ stats.records += m_table_handler->m_mtcache_count;
+ stats.data_file_length += m_table_handler->m_mtcache_size;
+ }
if (rocksdb_debug_optimizer_n_rows > 0)
stats.records = rocksdb_debug_optimizer_n_rows;
@@ -9386,10 +9831,6 @@ void Rdb_drop_index_thread::run() {
uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2];
rocksdb::Range range = get_range(d.index_id, buf, is_reverse_cf ? 1 : 0,
is_reverse_cf ? 0 : 1);
- rocksdb::CompactRangeOptions compact_range_options;
- compact_range_options.bottommost_level_compaction =
- rocksdb::BottommostLevelCompaction::kForce;
- compact_range_options.exclusive_manual_compaction = false;
rocksdb::Status status = DeleteFilesInRange(rdb->GetBaseDB(), cfh,
&range.start, &range.limit);
if (!status.ok()) {
@@ -9398,7 +9839,7 @@ void Rdb_drop_index_thread::run() {
}
rdb_handle_io_error(status, RDB_IO_ERROR_BG_THREAD);
}
- status = rdb->CompactRange(compact_range_options, cfh, &range.start,
+ status = rdb->CompactRange(getCompactRangeOptions(), cfh, &range.start,
&range.limit);
if (!status.ok()) {
if (status.IsShutdownInProgress()) {
@@ -9485,6 +9926,7 @@ int ha_rocksdb::remove_rows(Rdb_tbl_def *const tbl) {
char key_buf[MAX_KEY_LENGTH];
uint key_len;
+ ulonglong bytes_written = 0;
/*
Remove all records in each index.
(This is is not crash-safe, but it doesn't matter, because bulk row
@@ -9516,11 +9958,13 @@ int ha_rocksdb::remove_rows(Rdb_tbl_def *const tbl) {
return tx->set_status_error(table->in_use, s, *m_pk_descr, m_tbl_def,
m_table_handler);
}
-
+ bytes_written += key.size();
it->Next();
}
}
+ tx->update_bytes_written(bytes_written);
+
return HA_EXIT_SUCCESS;
}
@@ -9569,6 +10013,8 @@ int ha_rocksdb::rename_table(const char *const from, const char *const to) {
DBUG_RETURN(-1);
}
+ DBUG_EXECUTE_IF("gen_sql_table_name", to_str = to_str + "#sql-test";);
+
const std::unique_ptr<rocksdb::WriteBatch> wb = dict_manager.begin();
rocksdb::WriteBatch *const batch = wb.get();
dict_manager.lock();
@@ -9773,7 +10219,7 @@ int ha_rocksdb::optimize(THD *const thd, HA_CHECK_OPT *const check_opt) {
for (uint i = 0; i < table->s->keys; i++) {
uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2];
auto range = get_range(i, buf);
- const rocksdb::Status s = rdb->CompactRange(rocksdb::CompactRangeOptions(),
+ const rocksdb::Status s = rdb->CompactRange(getCompactRangeOptions(),
m_key_descr_arr[i]->get_cf(),
&range.start, &range.limit);
if (!s.ok()) {
@@ -10311,7 +10757,6 @@ int ha_rocksdb::inplace_populate_sk(
const std::unordered_set<std::shared_ptr<Rdb_key_def>> &indexes) {
DBUG_ENTER_FUNC();
int res = HA_EXIT_SUCCESS;
-
const std::unique_ptr<rocksdb::WriteBatch> wb = dict_manager.begin();
rocksdb::WriteBatch *const batch = wb.get();
@@ -10367,7 +10812,7 @@ int ha_rocksdb::inplace_populate_sk(
open handlers have been closed at this point, and the one we're on is the
only one left.
*/
- if (m_sst_info != nullptr) {
+ if (m_sst_info) {
if ((res = finalize_bulk_load())) {
DBUG_RETURN(res);
}
@@ -10377,14 +10822,16 @@ int ha_rocksdb::inplace_populate_sk(
const ulonglong rdb_merge_buf_size = THDVAR(ha_thd(), merge_buf_size);
const ulonglong rdb_merge_combine_read_size =
THDVAR(ha_thd(), merge_combine_read_size);
+ const ulonglong rdb_merge_tmp_file_removal_delay =
+ THDVAR(ha_thd(), merge_tmp_file_removal_delay_ms);
for (const auto &index : indexes) {
- const rocksdb::Comparator *index_comp = index->get_cf()->GetComparator();
bool is_unique_index =
new_table_arg->key_info[index->get_keyno()].flags & HA_NOSAME;
- Rdb_index_merge rdb_merge(thd_rocksdb_tmpdir(), rdb_merge_buf_size,
- rdb_merge_combine_read_size, index_comp);
+ Rdb_index_merge rdb_merge(
+ thd_rocksdb_tmpdir(), rdb_merge_buf_size, rdb_merge_combine_read_size,
+ rdb_merge_tmp_file_removal_delay, index->get_cf());
if ((res = rdb_merge.init())) {
DBUG_RETURN(res);
@@ -10413,7 +10860,8 @@ int ha_rocksdb::inplace_populate_sk(
/* Create new secondary index entry */
const int new_packed_size = index->pack_record(
new_table_arg, m_pack_buffer, table->record[0], m_sk_packed_tuple,
- &m_sk_tails, should_store_row_debug_checksums(), hidden_pk_id);
+ &m_sk_tails, should_store_row_debug_checksums(), hidden_pk_id, 0,
+ nullptr, nullptr, m_ttl_bytes);
const rocksdb::Slice key = rocksdb::Slice(
reinterpret_cast<const char *>(m_sk_packed_tuple), new_packed_size);
@@ -10478,7 +10926,7 @@ int ha_rocksdb::inplace_populate_sk(
/*
Insert key and slice to SST via SSTFileWriter API.
*/
- if ((res = bulk_load_key(tx, *index, merge_key, merge_val))) {
+ if ((res = bulk_load_key(tx, *index, merge_key, merge_val, false))) {
break;
}
}
@@ -10831,6 +11279,9 @@ static void myrocks_update_status() {
export_stats.queries_point = global_stats.queries[QUERIES_POINT];
export_stats.queries_range = global_stats.queries[QUERIES_RANGE];
+
+ export_stats.covered_secondary_key_lookups =
+ global_stats.covered_secondary_key_lookups;
}
static void myrocks_update_memory_status() {
@@ -10874,6 +11325,9 @@ static SHOW_VAR myrocks_status_variables[] = {
SHOW_LONGLONG),
DEF_STATUS_VAR_FUNC("queries_range", &export_stats.queries_range,
SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("covered_secondary_key_lookups",
+ &export_stats.covered_secondary_key_lookups,
+ SHOW_LONGLONG),
{NullS, NullS, SHOW_LONG}};
@@ -10884,6 +11338,91 @@ static void show_myrocks_vars(THD *thd, SHOW_VAR *var, char *buff) {
var->value = reinterpret_cast<char *>(&myrocks_status_variables);
}
+static ulonglong
+io_stall_prop_value(const std::map<std::string, std::string> &props,
+ const std::string &key) {
+ std::map<std::string, std::string>::const_iterator iter =
+ props.find("io_stalls." + key);
+ if (iter != props.end()) {
+ return std::stoull(iter->second);
+ } else {
+ DBUG_PRINT("warning",
+ ("RocksDB GetMapPropery hasn't returned key=%s", key.c_str()));
+ DBUG_ASSERT(0);
+ return 0;
+ }
+}
+
+static void update_rocksdb_stall_status() {
+ st_io_stall_stats local_io_stall_stats;
+ for (const auto &cf_name : cf_manager.get_cf_names()) {
+ rocksdb::ColumnFamilyHandle *cfh = cf_manager.get_cf(cf_name);
+ if (cfh == nullptr) {
+ continue;
+ }
+
+ std::map<std::string, std::string> props;
+ if (!rdb->GetMapProperty(cfh, "rocksdb.cfstats", &props)) {
+ continue;
+ }
+
+ local_io_stall_stats.level0_slowdown +=
+ io_stall_prop_value(props, "level0_slowdown");
+ local_io_stall_stats.level0_slowdown_with_compaction +=
+ io_stall_prop_value(props, "level0_slowdown_with_compaction");
+ local_io_stall_stats.level0_numfiles +=
+ io_stall_prop_value(props, "level0_numfiles");
+ local_io_stall_stats.level0_numfiles_with_compaction +=
+ io_stall_prop_value(props, "level0_numfiles_with_compaction");
+ local_io_stall_stats.stop_for_pending_compaction_bytes +=
+ io_stall_prop_value(props, "stop_for_pending_compaction_bytes");
+ local_io_stall_stats.slowdown_for_pending_compaction_bytes +=
+ io_stall_prop_value(props, "slowdown_for_pending_compaction_bytes");
+ local_io_stall_stats.memtable_compaction +=
+ io_stall_prop_value(props, "memtable_compaction");
+ local_io_stall_stats.memtable_slowdown +=
+ io_stall_prop_value(props, "memtable_slowdown");
+ local_io_stall_stats.total_stop += io_stall_prop_value(props, "total_stop");
+ local_io_stall_stats.total_slowdown +=
+ io_stall_prop_value(props, "total_slowdown");
+ }
+ io_stall_stats = local_io_stall_stats;
+}
+
+static SHOW_VAR rocksdb_stall_status_variables[] = {
+ DEF_STATUS_VAR_FUNC("l0_file_count_limit_slowdowns",
+ &io_stall_stats.level0_slowdown, SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("locked_l0_file_count_limit_slowdowns",
+ &io_stall_stats.level0_slowdown_with_compaction,
+ SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("l0_file_count_limit_stops",
+ &io_stall_stats.level0_numfiles, SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("locked_l0_file_count_limit_stops",
+ &io_stall_stats.level0_numfiles_with_compaction,
+ SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("pending_compaction_limit_stops",
+ &io_stall_stats.stop_for_pending_compaction_bytes,
+ SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("pending_compaction_limit_slowdowns",
+ &io_stall_stats.slowdown_for_pending_compaction_bytes,
+ SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("memtable_limit_stops",
+ &io_stall_stats.memtable_compaction, SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("memtable_limit_slowdowns",
+ &io_stall_stats.memtable_slowdown, SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("total_stops", &io_stall_stats.total_stop,
+ SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("total_slowdowns", &io_stall_stats.total_slowdown,
+ SHOW_LONGLONG),
+ // end of the array marker
+ {NullS, NullS, SHOW_LONG}};
+
+static void show_rocksdb_stall_vars(THD *thd, SHOW_VAR *var, char *buff) {
+ update_rocksdb_stall_status();
+ var->type = SHOW_ARRAY;
+ var->value = reinterpret_cast<char *>(&rocksdb_stall_status_variables);
+}
+
static SHOW_VAR rocksdb_status_vars[] = {
DEF_STATUS_VAR(block_cache_miss),
DEF_STATUS_VAR(block_cache_hit),
@@ -10951,7 +11490,12 @@ static SHOW_VAR rocksdb_status_vars[] = {
SHOW_LONGLONG),
DEF_STATUS_VAR_PTR("number_sst_entry_other", &rocksdb_num_sst_entry_other,
SHOW_LONGLONG),
+ // the variables generated by SHOW_FUNC are sorted only by prefix (first
+ // arg in the tuple below), so make sure it is unique to make sorting
+ // deterministic as quick sort is not stable
{"rocksdb", reinterpret_cast<char *>(&show_myrocks_vars), SHOW_FUNC},
+ {"rocksdb_stall", reinterpret_cast<char *>(&show_rocksdb_stall_vars),
+ SHOW_FUNC},
{NullS, NullS, SHOW_LONG}};
/*
@@ -10998,10 +11542,13 @@ void Rdb_background_thread::run() {
timespec ts;
clock_gettime(CLOCK_REALTIME, &ts);
- // Flush the WAL.
- if (rdb && (rocksdb_flush_log_at_trx_commit == 2)) {
+ // Flush the WAL. Sync it for both background and never modes to copy
+ // InnoDB's behavior. For mode never, the wal file isn't even written,
+ // whereas background writes to the wal file, but issues the syncs in a
+ // background thread.
+ if (rdb && (rocksdb_flush_log_at_trx_commit != FLUSH_LOG_SYNC)) {
DBUG_ASSERT(!rocksdb_db_options->allow_mmap_writes);
- const rocksdb::Status s = rdb->SyncWAL();
+ const rocksdb::Status s = rdb->FlushWAL(true);
if (!s.ok()) {
rdb_handle_io_error(s, RDB_IO_ERROR_BG_THREAD);
}
@@ -11101,6 +11648,7 @@ int rdb_dbug_set_ttl_snapshot_ts() { return rocksdb_debug_ttl_snapshot_ts; }
int rdb_dbug_set_ttl_read_filter_ts() {
return rocksdb_debug_ttl_read_filter_ts;
}
+bool rdb_dbug_set_ttl_ignore_pk() { return rocksdb_debug_ttl_ignore_pk; }
#endif
void rdb_update_global_stats(const operation_type &type, uint count,
@@ -11322,6 +11870,17 @@ void rocksdb_set_delayed_write_rate(THD *thd, struct st_mysql_sys_var *var,
RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex);
}
+void rocksdb_set_max_latest_deadlocks(THD *thd, struct st_mysql_sys_var *var,
+ void *var_ptr, const void *save) {
+ RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex);
+ const uint32_t new_val = *static_cast<const uint32_t *>(save);
+ if (rocksdb_max_latest_deadlocks != new_val) {
+ rocksdb_max_latest_deadlocks = new_val;
+ rdb->SetDeadlockInfoBufferSize(rocksdb_max_latest_deadlocks);
+ }
+ RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex);
+}
+
void rdb_set_collation_exception_list(const char *const exception_list) {
DBUG_ASSERT(rdb_collation_exceptions != nullptr);
@@ -11361,6 +11920,18 @@ void rocksdb_set_bulk_load(THD *const thd, struct st_mysql_sys_var *const var
*static_cast<bool *>(var_ptr) = *static_cast<const bool *>(save);
}
+void rocksdb_set_bulk_load_allow_unsorted(
+ THD *const thd,
+ struct st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)),
+ void *const var_ptr, const void *const save) {
+ if (THDVAR(thd, bulk_load)) {
+ my_error(ER_ERROR_WHEN_EXECUTING_COMMAND, MYF(0), "SET",
+ "Cannot change this setting while bulk load is enabled");
+ } else {
+ *static_cast<bool *>(var_ptr) = *static_cast<const bool *>(save);
+ }
+}
+
static void rocksdb_set_max_background_jobs(THD *thd,
struct st_mysql_sys_var *const var,
void *const var_ptr,
diff --git a/storage/rocksdb/ha_rocksdb.h b/storage/rocksdb/ha_rocksdb.h
index e6d4c2e6485..3b72173f94c 100644
--- a/storage/rocksdb/ha_rocksdb.h
+++ b/storage/rocksdb/ha_rocksdb.h
@@ -22,6 +22,7 @@
/* C++ standard header files */
#include <set>
#include <string>
+#include <unordered_map>
#include <unordered_set>
#include <vector>
@@ -236,6 +237,12 @@ const char *const RDB_TTL_COL_QUALIFIER = "ttl_col";
#define ROCKSDB_SIZEOF_TTL_RECORD sizeof(longlong)
/*
+ Maximum index prefix length in bytes.
+*/
+#define MAX_INDEX_COL_LEN_LARGE 3072
+#define MAX_INDEX_COL_LEN_SMALL 767
+
+/*
MyRocks specific error codes. NB! Please make sure that you will update
HA_ERR_ROCKSDB_LAST when adding new ones. Also update the strings in
rdb_error_messages to include any new error messages.
@@ -293,7 +300,14 @@ struct Rdb_table_handler {
/* Stores cumulative table statistics */
my_io_perf_atomic_t m_io_perf_read;
+ my_io_perf_atomic_t m_io_perf_write;
Rdb_atomic_perf_counters m_table_perf_context;
+
+ /* Stores cached memtable estimate statistics */
+ std::atomic_uint m_mtcache_lock;
+ uint64_t m_mtcache_count;
+ uint64_t m_mtcache_size;
+ uint64_t m_mtcache_last_update;
};
class Rdb_key_def;
@@ -360,6 +374,8 @@ struct st_global_stats {
ib_counter_t<ulonglong, 64, RDB_INDEXER> system_rows[ROWS_MAX];
ib_counter_t<ulonglong, 64, RDB_INDEXER> queries[QUERIES_MAX];
+
+ ib_counter_t<ulonglong, 64, RDB_INDEXER> covered_secondary_key_lookups;
};
/* Struct used for exporting status to MySQL */
@@ -379,6 +395,8 @@ struct st_export_stats {
ulonglong queries_point;
ulonglong queries_range;
+
+ ulonglong covered_secondary_key_lookups;
};
/* Struct used for exporting RocksDB memory status */
@@ -387,6 +405,27 @@ struct st_memory_stats {
ulonglong memtable_unflushed;
};
+/* Struct used for exporting RocksDB IO stalls stats */
+struct st_io_stall_stats {
+ ulonglong level0_slowdown;
+ ulonglong level0_slowdown_with_compaction;
+ ulonglong level0_numfiles;
+ ulonglong level0_numfiles_with_compaction;
+ ulonglong stop_for_pending_compaction_bytes;
+ ulonglong slowdown_for_pending_compaction_bytes;
+ ulonglong memtable_compaction;
+ ulonglong memtable_slowdown;
+ ulonglong total_stop;
+ ulonglong total_slowdown;
+
+ st_io_stall_stats()
+ : level0_slowdown(0), level0_slowdown_with_compaction(0),
+ level0_numfiles(0), level0_numfiles_with_compaction(0),
+ stop_for_pending_compaction_bytes(0),
+ slowdown_for_pending_compaction_bytes(0), memtable_compaction(0),
+ memtable_slowdown(0), total_stop(0), total_slowdown(0) {}
+};
+
} // namespace myrocks
#include "./rdb_buff.h"
@@ -495,6 +534,12 @@ class ha_rocksdb : public my_core::handler {
Pointer to the original TTL timestamp value (8 bytes) during UPDATE.
*/
char m_ttl_bytes[ROCKSDB_SIZEOF_TTL_RECORD];
+ /*
+ The TTL timestamp value can change if the explicit TTL column is
+ updated. If we detect this when updating the PK, we indicate it here so
+ we know we must always update any SK's.
+ */
+ bool m_ttl_bytes_updated;
/* rowkey of the last record we've read, in StorageFormat. */
String m_last_rowkey;
@@ -541,7 +586,9 @@ class ha_rocksdb : public my_core::handler {
bool m_update_scope_is_valid;
/* SST information used for bulk loading the primary key */
- std::shared_ptr<Rdb_sst_info> m_sst_info;
+ std::unique_ptr<Rdb_sst_info> m_sst_info;
+ /* External merge sorts for bulk load: key ID -> merge sort instance */
+ std::unordered_map<GL_INDEX_ID, Rdb_index_merge> m_key_merge;
Rdb_transaction *m_bulk_load_tx;
/* Mutex to protect finalizing bulk load */
mysql_mutex_t m_bulk_load_mutex;
@@ -637,6 +684,13 @@ class ha_rocksdb : public my_core::handler {
void setup_read_decoders();
/*
+ For the active index, indicates which columns must be covered for the
+ current lookup to be covered. If the bitmap field is null, that means this
+ index does not cover the current lookup for any record.
+ */
+ MY_BITMAP m_lookup_bitmap = {nullptr, 0, 0, nullptr, nullptr};
+
+ /*
Number of bytes in on-disk (storage) record format that are used for
storing SQL NULL flags.
*/
@@ -854,11 +908,7 @@ public:
DBUG_RETURN(MAX_REF_PARTS);
}
- uint max_supported_key_part_length() const override {
- DBUG_ENTER_FUNC();
-
- DBUG_RETURN(2048);
- }
+ uint max_supported_key_part_length() const override;
/** @brief
unireg.cc will call this to make sure that the storage engine can handle
@@ -1040,9 +1090,13 @@ private:
rocksdb::Slice *const packed_rec)
MY_ATTRIBUTE((__nonnull__));
- bool should_hide_ttl_rec(const rocksdb::Slice &ttl_rec_val,
+ bool should_hide_ttl_rec(const Rdb_key_def &kd,
+ const rocksdb::Slice &ttl_rec_val,
const int64_t curr_ts)
MY_ATTRIBUTE((__warn_unused_result__));
+ void rocksdb_skip_expired_records(const Rdb_key_def &kd,
+ rocksdb::Iterator *const iter,
+ bool seek_backward);
int index_first_intern(uchar *buf)
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
@@ -1076,8 +1130,10 @@ private:
struct unique_sk_buf_info *sk_info)
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
int bulk_load_key(Rdb_transaction *const tx, const Rdb_key_def &kd,
- const rocksdb::Slice &key, const rocksdb::Slice &value)
+ const rocksdb::Slice &key, const rocksdb::Slice &value,
+ bool sort)
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+ void update_bytes_written(ulonglong bytes_written);
int update_pk(const Rdb_key_def &kd, const struct update_row_info &row_info,
const bool &pk_changed) MY_ATTRIBUTE((__warn_unused_result__));
int update_sk(const TABLE *const table_arg, const Rdb_key_def &kd,
diff --git a/storage/rocksdb/ha_rocksdb_proto.h b/storage/rocksdb/ha_rocksdb_proto.h
index 1beb6b80622..71ec6957456 100644
--- a/storage/rocksdb/ha_rocksdb_proto.h
+++ b/storage/rocksdb/ha_rocksdb_proto.h
@@ -76,6 +76,7 @@ bool rdb_is_ttl_read_filtering_enabled();
int rdb_dbug_set_ttl_rec_ts();
int rdb_dbug_set_ttl_snapshot_ts();
int rdb_dbug_set_ttl_read_filter_ts();
+bool rdb_dbug_set_ttl_ignore_pk();
#endif
enum operation_type : int;
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/prefix_index_only_query_check.inc b/storage/rocksdb/mysql-test/rocksdb/include/prefix_index_only_query_check.inc
new file mode 100644
index 00000000000..e96eb573c1f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/prefix_index_only_query_check.inc
@@ -0,0 +1,21 @@
+#
+# A helper include file for prefix index index-only query tests
+#
+# Parameters:
+# $prefix_index_check_title - title of the test
+# $prefix_index_check_query - test query
+# $prefix_index_check_read_avoided_delta - expected change of
+# 'rocksdb_covered_secondary_key_lookups' status variable
+# value after running the query
+
+--let $show_count_statement = show status like 'rocksdb_covered_secondary_key_lookups'
+
+--echo # $prefix_index_check_title
+--let $base_count = query_get_value($show_count_statement, Value, 1)
+
+--eval $prefix_index_check_query
+
+--let $count = query_get_value($show_count_statement, Value, 1)
+--let $assert_text= $prefix_index_check_title: $prefix_index_check_read_avoided_delta rocksdb_covered_secondary_key_lookups
+--let $assert_cond= $count - $base_count = $prefix_index_check_read_avoided_delta
+--source include/assert.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/my.cnf b/storage/rocksdb/mysql-test/rocksdb/my.cnf
index 2ed68088259..faeae84da5f 100644
--- a/storage/rocksdb/mysql-test/rocksdb/my.cnf
+++ b/storage/rocksdb/mysql-test/rocksdb/my.cnf
@@ -5,3 +5,4 @@ sql-mode=NO_ENGINE_SUBSTITUTION
explicit-defaults-for-timestamp=1
rocksdb_lock_wait_timeout=1
rocksdb_strict_collation_check=0
+rocksdb_force_compute_memtable_stats_cachetime=0
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/2pc_group_commit.result b/storage/rocksdb/mysql-test/rocksdb/r/2pc_group_commit.result
index 06452a5437f..7b15ed47d44 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/2pc_group_commit.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/2pc_group_commit.result
@@ -4,20 +4,20 @@ CREATE DATABASE mysqlslap;
USE mysqlslap;
CREATE TABLE t1(id BIGINT AUTO_INCREMENT, value BIGINT, PRIMARY KEY(id)) ENGINE=rocksdb;
# 2PC enabled, MyRocks durability enabled
-SET GLOBAL rocksdb_enable_2pc=0;
+SET GLOBAL rocksdb_enable_2pc=1;
SET GLOBAL rocksdb_flush_log_at_trx_commit=1;
## 2PC + durability + single thread
select variable_value into @c from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
select case when variable_value-@c = 1000 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
case when variable_value-@c = 1000 then 'true' else 'false' end
-false
+true
## 2PC + durability + group commit
select variable_value into @c from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
select case when variable_value-@c > 0 and variable_value-@c < 10000 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
case when variable_value-@c > 0 and variable_value-@c < 10000 then 'true' else 'false' end
-false
+true
# 2PC enabled, MyRocks durability disabled
-SET GLOBAL rocksdb_enable_2pc=0;
+SET GLOBAL rocksdb_enable_2pc=1;
SET GLOBAL rocksdb_flush_log_at_trx_commit=0;
select variable_value into @c from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
select case when variable_value-@c = 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
@@ -28,16 +28,16 @@ select case when variable_value-@c = 0 then 'true' else 'false' end from informa
case when variable_value-@c = 0 then 'true' else 'false' end
true
# 2PC disabled, MyRocks durability enabled
-SET GLOBAL rocksdb_enable_2pc=1;
+SET GLOBAL rocksdb_enable_2pc=0;
SET GLOBAL rocksdb_flush_log_at_trx_commit=1;
select variable_value into @c from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
select case when variable_value-@c = 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
case when variable_value-@c = 0 then 'true' else 'false' end
-false
+true
select variable_value into @c from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
select case when variable_value-@c = 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
case when variable_value-@c = 0 then 'true' else 'false' end
-false
+true
SET GLOBAL rocksdb_enable_2pc=1;
SET GLOBAL rocksdb_flush_log_at_trx_commit=1;
DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace.result b/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace.result
index f5a2b28ee42..b7b60f26816 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace.result
@@ -284,12 +284,16 @@ DROP TABLE t1;
set global rocksdb_bulk_load=1;
# Establish connection con1 (user=root)
# Switch to connection con1
-show global variables like 'rocksdb_bulk_load';
+show global variables like 'rocksdb_bulk_load%';
Variable_name Value
rocksdb_bulk_load ON
-show session variables like 'rocksdb_bulk_load';
+rocksdb_bulk_load_allow_unsorted OFF
+rocksdb_bulk_load_size 1000
+show session variables like 'rocksdb_bulk_load%';
Variable_name Value
rocksdb_bulk_load ON
+rocksdb_bulk_load_allow_unsorted OFF
+rocksdb_bulk_load_size 1000
CREATE TABLE t1 (i INT, j INT, PRIMARY KEY (i)) ENGINE = ROCKSDB;
INSERT INTO t1 VALUES (1,1);
# Disconnecting on con1
@@ -327,10 +331,11 @@ SET @prior_rocksdb_strict_collation_check= @@rocksdb_strict_collation_check;
SET @prior_rocksdb_merge_buf_size = @@rocksdb_merge_buf_size;
SET global rocksdb_strict_collation_check = off;
SET session rocksdb_merge_combine_read_size = 566;
-SET session rocksdb_merge_buf_size = 336;
-show variables like '%rocksdb_bulk_load%';
+SET session rocksdb_merge_buf_size = 340;
+show variables like 'rocksdb_bulk_load%';
Variable_name Value
rocksdb_bulk_load OFF
+rocksdb_bulk_load_allow_unsorted OFF
rocksdb_bulk_load_size 1000
CREATE TABLE t1 (a VARCHAR(80)) ENGINE=RocksDB;
INSERT INTO t1 (a) VALUES (REPEAT("a", 80));
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_errors.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_errors.result
index 31562d1da10..be198d02aaf 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_errors.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_errors.result
@@ -16,4 +16,30 @@ INSERT INTO t1 VALUES(20);
INSERT INTO t1 VALUES(21);
SET rocksdb_bulk_load=0;
ERROR HY000: Lost connection to MySQL server during query
+TRUNCATE TABLE t1;
+SET rocksdb_bulk_load_allow_unsorted=1;
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES(100);
+INSERT INTO t1 VALUES(101);
+INSERT INTO t1 VALUES(99);
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1;
+pk
+99
+100
+101
+TRUNCATE TABLE t1;
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES(201);
+INSERT INTO t1 VALUES(200);
+INSERT INTO t1 VALUES(202);
+INSERT INTO t1 VALUES(201);
+ERROR 23000: Failed to insert the record: the key already exists
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1;
+pk
+200
+201
+202
+SET rocksdb_bulk_load_allow_unsorted=DEFAULT;
DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result
new file mode 100644
index 00000000000..31509227279
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result
@@ -0,0 +1,101 @@
+DROP TABLE IF EXISTS t1;
+SET rocksdb_bulk_load_size=3;
+SET rocksdb_bulk_load_allow_unsorted=1;
+CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1");
+SET rocksdb_bulk_load=1;
+SELECT * FROM t1;
+a b
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1;
+a b
+-3 5
+-1 3
+2 0
+4 -2
+6 -4
+DROP TABLE t1;
+CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1", KEY(b));
+SET rocksdb_bulk_load=1;
+SELECT * FROM t1;
+a b
+6 -4
+4 -2
+2 0
+-1 3
+-3 5
+SET rocksdb_bulk_load=0;
+DROP TABLE t1;
+CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1");
+CREATE TABLE t2(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1");
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES (1,1);
+INSERT INTO t2 VALUES (1,1);
+SELECT * FROM t1;
+a b
+1 1
+INSERT INTO t1 VALUES (2,2);
+SELECT * FROM t2;
+a b
+1 1
+SELECT * FROM t1;
+a b
+1 1
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1;
+a b
+1 1
+2 2
+DROP TABLE t1, t2;
+CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1");
+CREATE TABLE t2(a INT, b INT, PRIMARY KEY(b) COMMENT "cf1");
+CREATE TABLE t3(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1")
+PARTITION BY KEY() PARTITIONS 4;
+set session transaction isolation level repeatable read;
+select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+STAT_TYPE VALUE
+DB_NUM_SNAPSHOTS 0
+start transaction with consistent snapshot;
+select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+STAT_TYPE VALUE
+DB_NUM_SNAPSHOTS 1
+set rocksdb_bulk_load=1;
+set rocksdb_bulk_load_size=100000;
+LOAD DATA INFILE <input_file> INTO TABLE t1;
+LOAD DATA INFILE <input_file> INTO TABLE t2;
+LOAD DATA INFILE <input_file> INTO TABLE t3;
+set rocksdb_bulk_load=0;
+SHOW TABLE STATUS WHERE name LIKE 't%';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+ANALYZE TABLE t1, t2, t3;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+test.t2 analyze status OK
+test.t3 analyze status OK
+SHOW TABLE STATUS WHERE name LIKE 't%';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+select count(a) from t1;
+count(a)
+5000000
+select count(b) from t1;
+count(b)
+5000000
+select count(a) from t2;
+count(a)
+5000000
+select count(b) from t2;
+count(b)
+5000000
+select count(a) from t3;
+count(a)
+5000000
+select count(b) from t3;
+count(b)
+5000000
+DROP TABLE t1, t2, t3;
+SET rocksdb_bulk_load_allow_unsorted=0;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_errors.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_errors.result
new file mode 100644
index 00000000000..d8e5b92e897
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_errors.result
@@ -0,0 +1,4 @@
+SET rocksdb_bulk_load=1;
+SET rocksdb_bulk_load_allow_unsorted=1;
+ERROR HY000: Error when executing command SET: Cannot change this setting while bulk load is enabled
+SET rocksdb_bulk_load=0;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bytes_written.result b/storage/rocksdb/mysql-test/rocksdb/r/bytes_written.result
new file mode 100644
index 00000000000..d9d29e6ac69
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bytes_written.result
@@ -0,0 +1,10 @@
+DROP TABLE IF EXISTS stats_test_table;
+CREATE TABLE stats_test_table (a INT, b INT, PRIMARY KEY (a)) ENGINE=ROCKSDB;
+SET GLOBAL rocksdb_perf_context_level=3;
+INSERT INTO stats_test_table VALUES (7,1);
+INSERT INTO stats_test_table VALUES (2,2);
+SELECT io_write_bytes > 0 FROM INFORMATION_SCHEMA.TABLE_STATISTICS WHERE TABLE_NAME = "stats_test_table";
+io_write_bytes > 0
+1
+DROP TABLE stats_test_table;
+SET GLOBAL rocksdb_perf_context_level=DEFAULT;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/cons_snapshot_read_committed.result b/storage/rocksdb/mysql-test/rocksdb/r/cons_snapshot_read_committed.result
index 521edec0c83..7f1e3d8e53f 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/cons_snapshot_read_committed.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/cons_snapshot_read_committed.result
@@ -5,7 +5,7 @@ connection con1;
CREATE TABLE t1 (a INT, pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=ROCKSDB;
SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
START TRANSACTION WITH CONSISTENT SNAPSHOT;
-ERROR: 1938
+ERROR: 12048
connection con2;
select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
STAT_TYPE VALUE
@@ -18,7 +18,7 @@ STAT_TYPE VALUE
DB_NUM_SNAPSHOTS 0
connection con1;
START TRANSACTION WITH CONSISTENT SNAPSHOT;
-ERROR: 1938
+ERROR: 12048
connection con2;
INSERT INTO t1 (a) VALUES (1);
connection con1;
@@ -69,7 +69,7 @@ id value value2
5 5 5
6 6 6
START TRANSACTION WITH CONSISTENT SNAPSHOT;
-ERROR: 1938
+ERROR: 12048
connection con2;
INSERT INTO r1 values (7,7,7);
connection con1;
@@ -107,12 +107,12 @@ id value value2
7 7 7
8 8 8
START TRANSACTION WITH CONSISTENT SNAPSHOT;
-ERROR: 1938
+ERROR: 12048
connection con2;
INSERT INTO r1 values (9,9,9);
connection con1;
START TRANSACTION WITH CONSISTENT SNAPSHOT;
-ERROR: 1938
+ERROR: 12048
connection con2;
INSERT INTO r1 values (10,10,10);
connection con1;
@@ -129,7 +129,7 @@ id value value2
9 9 9
10 10 10
START TRANSACTION WITH CONSISTENT SNAPSHOT;
-ERROR: 1938
+ERROR: 12048
INSERT INTO r1 values (11,11,11);
ERROR: 0
SELECT * FROM r1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/cons_snapshot_repeatable_read.result b/storage/rocksdb/mysql-test/rocksdb/r/cons_snapshot_repeatable_read.result
index 805a0aaa0fd..90723ff762c 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/cons_snapshot_repeatable_read.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/cons_snapshot_repeatable_read.result
@@ -125,7 +125,7 @@ id value value2
START TRANSACTION WITH CONSISTENT SNAPSHOT;
ERROR: 0
INSERT INTO r1 values (11,11,11);
-ERROR: 1935
+ERROR: 12045
SELECT * FROM r1;
id value value2
1 1 1
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/covered_unpack_info_format.result b/storage/rocksdb/mysql-test/rocksdb/r/covered_unpack_info_format.result
new file mode 100644
index 00000000000..195215331b8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/covered_unpack_info_format.result
@@ -0,0 +1,73 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+id INT,
+fake_id INT,
+bigfield VARCHAR(4096),
+PRIMARY KEY (id),
+KEY bf (bigfield(32)),
+KEY fid (fake_id, bigfield(32))
+) ENGINE=rocksdb;
+INSERT INTO t1 VALUES (1, 1001, REPEAT('a', 1)),
+(8, 1008, REPEAT('b', 8)),
+(24, 1024, REPEAT('c', 24)),
+(31, 1031, REPEAT('d', 31)),
+(32, 1032, REPEAT('x', 32)),
+(33, 1033, REPEAT('y', 33)),
+(128, 1128, REPEAT('z', 128));
+SELECT * FROM t1;
+id fake_id bigfield
+1 1001 a
+8 1008 bbbbbbbb
+24 1024 cccccccccccccccccccccccc
+31 1031 ddddddddddddddddddddddddddddddd
+32 1032 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+33 1033 yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
+128 1128 zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
+# Eligible for optimization, access via fake_id only
+SELECT id, bigfield FROM t1 FORCE INDEX(fid) WHERE fake_id = 1031;
+id bigfield
+31 ddddddddddddddddddddddddddddddd
+include/assert.inc [Eligible for optimization, access via fake_id only: 2 rocksdb_covered_secondary_key_lookups]
+# Not eligible for optimization, access via fake_id of big row.
+SELECT id, bigfield FROM t1 FORCE INDEX(fid) WHERE fake_id = 1033;
+id bigfield
+33 yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
+include/assert.inc [Not eligible for optimization, access via fake_id of big row.: 0 rocksdb_covered_secondary_key_lookups]
+DROP TABLE t1;
+set session debug= '+d,MYROCKS_NO_COVERED_BITMAP_FORMAT';
+CREATE TABLE t1 (
+id INT,
+fake_id INT,
+bigfield VARCHAR(4096),
+PRIMARY KEY (id),
+KEY bf (bigfield(32)),
+KEY fid (fake_id, bigfield(32))
+) ENGINE=rocksdb;
+set session debug= '-d,MYROCKS_NO_COVERED_BITMAP_FORMAT';
+INSERT INTO t1 VALUES (1, 1001, REPEAT('a', 1)),
+(8, 1008, REPEAT('b', 8)),
+(24, 1024, REPEAT('c', 24)),
+(31, 1031, REPEAT('d', 31)),
+(32, 1032, REPEAT('x', 32)),
+(33, 1033, REPEAT('y', 33)),
+(128, 1128, REPEAT('z', 128));
+SELECT * FROM t1;
+id fake_id bigfield
+1 1001 a
+8 1008 bbbbbbbb
+24 1024 cccccccccccccccccccccccc
+31 1031 ddddddddddddddddddddddddddddddd
+32 1032 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+33 1033 yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
+128 1128 zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
+# No longer eligible for optimization since no covered bitmap was stored.
+SELECT id, bigfield FROM t1 FORCE INDEX(fid) WHERE fake_id = 1031;
+id bigfield
+31 ddddddddddddddddddddddddddddddd
+include/assert.inc [No longer eligible for optimization since no covered bitmap was stored.: 0 rocksdb_covered_secondary_key_lookups]
+# Not eligible for optimization.
+SELECT id, bigfield FROM t1 FORCE INDEX(fid) WHERE fake_id = 1033;
+id bigfield
+33 yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
+include/assert.inc [Not eligible for optimization.: 0 rocksdb_covered_secondary_key_lookups]
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/ddl_high_priority.result b/storage/rocksdb/mysql-test/rocksdb/r/ddl_high_priority.result
new file mode 100644
index 00000000000..39130475349
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/ddl_high_priority.result
@@ -0,0 +1,1009 @@
+##
+## Using the system variable high_priority_ddl"
+##
+create user test_user1@localhost;
+grant all on test to test_user1@localhost;
+create user test_user2@localhost;
+grant all on test to test_user2@localhost;
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = default
+## cmd = alter table t1 modify i bigint;
+## high_priority_cmd = alter high_priority table t1 modify i bigint;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+alter table t1 modify i bigint;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+lock tables t1 write;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+alter table t1 modify i bigint;;
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = default
+## cmd = alter table t1 rename t1_new;
+## high_priority_cmd = alter high_priority table t1 rename t1_new;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+begin; insert into t1 values (4); select i from t1;;
+i
+1
+2
+3
+4
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+alter table t1 rename t1_new;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+lock tables t1 write;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+alter table t1 rename t1_new;;
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+select * from t1_new;
+i
+1
+2
+3
+drop table t1_new;
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = default
+## cmd = drop table t1;
+## high_priority_cmd = drop high_priority table t1;
+## should_kill = 0
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 write;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+drop table t1;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+lock tables t1 write;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+drop table t1;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = default
+## cmd = drop table t1;
+## high_priority_cmd = drop high_priority table t1;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read; begin; insert into t1 values (4);;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+drop table t1;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+lock tables t1 write;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+drop table t1;;
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = con2
+## cmd = alter table t1 modify i bigint;
+## high_priority_cmd = alter high_priority table t1 modify i bigint;
+## should_kill = 0
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: con2
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and con2 exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user2 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: con2
+alter table t1 modify i bigint;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+lock tables t1 write;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+alter table t1 modify i bigint;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user2 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = default
+## cmd = create index idx1 on t1 (i);
+## high_priority_cmd = create high_priority index idx1 on t1 (i);
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+create index idx1 on t1 (i);;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+lock tables t1 write;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+create index idx1 on t1 (i);;
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = default
+## cmd = drop index idx1 on t1;
+## high_priority_cmd = drop high_priority index idx1 on t1;
+## should_kill = 1
+## recreate_table = 0
+## throw_error = 1
+
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES MUL NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+drop index idx1 on t1;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+lock tables t1 write;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+drop index idx1 on t1;;
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = default
+## cmd = truncate t1;
+## high_priority_cmd = truncate high_priority t1;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+truncate t1;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+lock tables t1 write;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+truncate t1;;
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = default
+## cmd = create trigger ins_sum before insert on t1 for each row set @sum = @sum + new.i;
+## high_priority_cmd = create high_priority trigger ins_sum before insert on t1 for each row set @sum = @sum + new.i;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+create trigger ins_sum before insert on t1 for each row set @sum = @sum + new.i;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+lock tables t1 write;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+create trigger ins_sum before insert on t1 for each row set @sum = @sum + new.i;;
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = default
+## cmd = drop trigger ins_sum;
+## high_priority_cmd = drop high_priority trigger ins_sum;
+## should_kill = 1
+## recreate_table = 0
+## throw_error = 1
+
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+drop trigger ins_sum;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+lock tables t1 write;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+drop trigger ins_sum;;
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = default
+## cmd = optimize table t1;
+## high_priority_cmd = optimize high_priority table t1;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 0
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+optimize table t1;;
+Table Op Msg_type Msg_text
+test.t1 optimize Error Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+test.t1 optimize status Operation failed
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+lock tables t1 write;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+optimize table t1;;
+Table Op Msg_type Msg_text
+test.t1 optimize status OK
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+drop user test_user1@localhost;
+drop user test_user2@localhost;
+drop table if exists t1;
+##
+## Using HIGH_PRIORITY syntax
+##
+create user test_user1@localhost;
+grant all on test to test_user1@localhost;
+create user test_user2@localhost;
+grant all on test to test_user2@localhost;
+
+## Test parameters:
+## use_sys_var = 0;
+## con_block = con1
+## con_kill = default
+## cmd = alter table t1 modify i bigint;
+## high_priority_cmd = alter high_priority table t1 modify i bigint;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+alter table t1 modify i bigint;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+alter high_priority table t1 modify i bigint;;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 0;
+## con_block = con1
+## con_kill = default
+## cmd = alter table t1 rename t1_new;
+## high_priority_cmd = alter high_priority table t1 rename t1_new;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+begin; insert into t1 values (4); select i from t1;;
+i
+1
+2
+3
+4
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+alter table t1 rename t1_new;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+alter high_priority table t1 rename t1_new;;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+select * from t1_new;
+i
+1
+2
+3
+drop table t1_new;
+
+## Test parameters:
+## use_sys_var = 0;
+## con_block = con1
+## con_kill = default
+## cmd = drop table t1;
+## high_priority_cmd = drop high_priority table t1;
+## should_kill = 0
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 write;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+drop table t1;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+drop high_priority table t1;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 0;
+## con_block = con1
+## con_kill = default
+## cmd = drop table t1;
+## high_priority_cmd = drop high_priority table t1;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read; begin; insert into t1 values (4);;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+drop table t1;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+drop high_priority table t1;;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 0;
+## con_block = con1
+## con_kill = con2
+## cmd = alter table t1 modify i bigint;
+## high_priority_cmd = alter high_priority table t1 modify i bigint;
+## should_kill = 0
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: con2
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and con2 exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user2 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: con2
+alter table t1 modify i bigint;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+alter high_priority table t1 modify i bigint;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user2 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 0;
+## con_block = con1
+## con_kill = default
+## cmd = create index idx1 on t1 (i);
+## high_priority_cmd = create high_priority index idx1 on t1 (i);
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+create index idx1 on t1 (i);;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+create high_priority index idx1 on t1 (i);;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 0;
+## con_block = con1
+## con_kill = default
+## cmd = drop index idx1 on t1;
+## high_priority_cmd = drop high_priority index idx1 on t1;
+## should_kill = 1
+## recreate_table = 0
+## throw_error = 1
+
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES MUL NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+drop index idx1 on t1;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+drop high_priority index idx1 on t1;;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 0;
+## con_block = con1
+## con_kill = default
+## cmd = truncate t1;
+## high_priority_cmd = truncate high_priority t1;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+truncate t1;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+truncate high_priority t1;;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 0;
+## con_block = con1
+## con_kill = default
+## cmd = create trigger ins_sum before insert on t1 for each row set @sum = @sum + new.i;
+## high_priority_cmd = create high_priority trigger ins_sum before insert on t1 for each row set @sum = @sum + new.i;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+create trigger ins_sum before insert on t1 for each row set @sum = @sum + new.i;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+create high_priority trigger ins_sum before insert on t1 for each row set @sum = @sum + new.i;;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 0;
+## con_block = con1
+## con_kill = default
+## cmd = drop trigger ins_sum;
+## high_priority_cmd = drop high_priority trigger ins_sum;
+## should_kill = 1
+## recreate_table = 0
+## throw_error = 1
+
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+drop trigger ins_sum;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+drop high_priority trigger ins_sum;;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 0;
+## con_block = con1
+## con_kill = default
+## cmd = optimize table t1;
+## high_priority_cmd = optimize high_priority table t1;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 0
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+optimize table t1;;
+Table Op Msg_type Msg_text
+test.t1 optimize Error Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+test.t1 optimize status Operation failed
+optimize high_priority table t1;;
+Table Op Msg_type Msg_text
+test.t1 optimize status OK
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+drop user test_user1@localhost;
+drop user test_user2@localhost;
+drop table if exists t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/deadlock_tracking.result b/storage/rocksdb/mysql-test/rocksdb/r/deadlock_tracking.result
new file mode 100644
index 00000000000..d7cb89becb7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/deadlock_tracking.result
@@ -0,0 +1,490 @@
+set @prior_lock_wait_timeout = @@rocksdb_lock_wait_timeout;
+set @prior_deadlock_detect = @@rocksdb_deadlock_detect;
+set @prior_max_latest_deadlocks = @@rocksdb_max_latest_deadlocks;
+set global rocksdb_deadlock_detect = on;
+set global rocksdb_lock_wait_timeout = 10000;
+# Clears deadlock buffer of any prior deadlocks.
+set global rocksdb_max_latest_deadlocks = 0;
+set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks;
+create table t (i int primary key) engine=rocksdb;
+insert into t values (1), (2), (3);
+show engine rocksdb transaction status;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+Deadlock #1
+begin;
+select * from t where i=1 for update;
+i
+1
+begin;
+select * from t where i=2 for update;
+i
+2
+select * from t where i=2 for update;
+select * from t where i=1 for update;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+rollback;
+i
+2
+rollback;
+show engine rocksdb transaction status;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+
+*** DEADLOCK PATH
+=========================================
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+
+--------TXN_ID GOT DEADLOCK---------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+Deadlock #2
+begin;
+select * from t where i=1 for update;
+i
+1
+begin;
+select * from t where i=2 for update;
+i
+2
+select * from t where i=2 for update;
+select * from t where i=1 for update;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+rollback;
+i
+2
+rollback;
+show engine rocksdb transaction status;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+
+*** DEADLOCK PATH
+=========================================
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+
+--------TXN_ID GOT DEADLOCK---------
+
+*** DEADLOCK PATH
+=========================================
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+
+--------TXN_ID GOT DEADLOCK---------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+set global rocksdb_max_latest_deadlocks = 10;
+Deadlock #3
+begin;
+select * from t where i=1 for update;
+i
+1
+begin;
+select * from t where i=2 for update;
+i
+2
+select * from t where i=2 for update;
+select * from t where i=1 for update;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+rollback;
+i
+2
+rollback;
+show engine rocksdb transaction status;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+
+*** DEADLOCK PATH
+=========================================
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+
+--------TXN_ID GOT DEADLOCK---------
+
+*** DEADLOCK PATH
+=========================================
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+
+--------TXN_ID GOT DEADLOCK---------
+
+*** DEADLOCK PATH
+=========================================
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+
+--------TXN_ID GOT DEADLOCK---------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+set global rocksdb_max_latest_deadlocks = 1;
+show engine rocksdb transaction status;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+
+*** DEADLOCK PATH
+=========================================
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+
+--------TXN_ID GOT DEADLOCK---------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+set rocksdb_deadlock_detect_depth = 2;
+Deadlock #4
+begin;
+select * from t where i=1 for update;
+i
+1
+begin;
+select * from t where i=2 for update;
+i
+2
+begin;
+select * from t where i=3 for update;
+i
+3
+select * from t where i=2 for update;
+select * from t where i=3 for update;
+select * from t where i=1 for update;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+rollback;
+i
+3
+rollback;
+i
+2
+rollback;
+set global rocksdb_max_latest_deadlocks = 5;
+show engine rocksdb transaction status;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+
+-------DEADLOCK EXCEEDED MAX DEPTH-------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+Deadlock #5
+begin;
+select * from t where i=1 for update;
+i
+1
+begin;
+select * from t where i=2 for update;
+i
+2
+begin;
+select * from t where i=3 lock in share mode;
+i
+3
+select * from t where i=100 for update;
+i
+select * from t where i=101 for update;
+i
+select * from t where i=2 for update;
+select * from t where i=3 lock in share mode;
+i
+3
+select * from t where i=200 for update;
+i
+select * from t where i=201 for update;
+i
+select * from t where i=1 lock in share mode;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+rollback;
+i
+2
+rollback;
+rollback;
+show engine rocksdb transaction status;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+
+*** DEADLOCK PATH
+=========================================
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: SHARED
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+
+--------TXN_ID GOT DEADLOCK---------
+
+-------DEADLOCK EXCEEDED MAX DEPTH-------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+set global rocksdb_lock_wait_timeout = @prior_lock_wait_timeout;
+set global rocksdb_deadlock_detect = @prior_deadlock_detect;
+drop table t;
+show engine rocksdb transaction status;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+
+*** DEADLOCK PATH
+=========================================
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: NOT FOUND; IDX_ID
+TABLE NAME: NOT FOUND; IDX_ID
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: SHARED
+INDEX NAME: NOT FOUND; IDX_ID
+TABLE NAME: NOT FOUND; IDX_ID
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: NOT FOUND; IDX_ID
+TABLE NAME: NOT FOUND; IDX_ID
+
+--------TXN_ID GOT DEADLOCK---------
+
+-------DEADLOCK EXCEEDED MAX DEPTH-------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+set global rocksdb_max_latest_deadlocks = 0;
+# Clears deadlock buffer of any existent deadlocks.
+set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks;
+show engine rocksdb transaction status;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/drop_table.result b/storage/rocksdb/mysql-test/rocksdb/r/drop_table.result
index 7abe692c8ad..5e55c054c23 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/drop_table.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/drop_table.result
@@ -3,6 +3,9 @@ DROP TABLE IF EXISTS t2;
DROP TABLE IF EXISTS t3;
DROP TABLE IF EXISTS t4;
DROP TABLE IF EXISTS t5;
+call mtr.add_suppression("Column family 'cf1' not found");
+call mtr.add_suppression("Column family 'rev:cf2' not found");
+call mtr.add_suppression("LibRocksDB");
set global rocksdb_compact_cf = 'cf1';
set global rocksdb_compact_cf = 'rev:cf2';
set global rocksdb_signal_drop_index_thread = 1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/drop_table2.result b/storage/rocksdb/mysql-test/rocksdb/r/drop_table2.result
index 53624a9899d..be594abdec6 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/drop_table2.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/drop_table2.result
@@ -3,6 +3,8 @@ DROP TABLE IF EXISTS t2;
DROP TABLE IF EXISTS t3;
DROP TABLE IF EXISTS t4;
DROP TABLE IF EXISTS t5;
+call mtr.add_suppression("Column family 'cf1' not found");
+call mtr.add_suppression("Column family 'rev:cf2' not found");
set global rocksdb_compact_cf = 'cf1';
set global rocksdb_compact_cf = 'rev:cf2';
set global rocksdb_signal_drop_index_thread = 1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/drop_table3.result b/storage/rocksdb/mysql-test/rocksdb/r/drop_table3.result
index c69d789c12a..107300ad813 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/drop_table3.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/drop_table3.result
@@ -1,4 +1,6 @@
DROP TABLE IF EXISTS t1;
+call mtr.add_suppression("Column family 'cf1' not found");
+call mtr.add_suppression("Column family 'rev:cf2' not found");
set global rocksdb_compact_cf = 'cf1';
set global rocksdb_compact_cf = 'rev:cf2';
set global rocksdb_signal_drop_index_thread = 1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/dup_key_update.result b/storage/rocksdb/mysql-test/rocksdb/r/dup_key_update.result
index 954335debf2..b4cebb08bb1 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/dup_key_update.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/dup_key_update.result
@@ -178,16 +178,20 @@ id1 id2 id3
9 17 9
DROP TABLE t1;
DROP TABLE t2;
+set global rocksdb_large_prefix=1;
CREATE TABLE t1 (id1 varchar(128) CHARACTER SET latin1 COLLATE latin1_bin,
id2 varchar(256) CHARACTER SET utf8 COLLATE utf8_bin,
id3 varchar(200) CHARACTER SET latin1 COLLATE latin1_swedish_ci,
PRIMARY KEY (id1, id2, id3),
UNIQUE KEY (id3, id1)) ENGINE=ROCKSDB;
+set global rocksdb_large_prefix=DEFAULT;
+set global rocksdb_large_prefix=1;
CREATE TABLE t2 (id1 varchar(128) CHARACTER SET latin1 COLLATE latin1_bin,
id2 varchar(256) CHARACTER SET utf8 COLLATE utf8_bin,
id3 varchar(200) CHARACTER SET latin1 COLLATE latin1_swedish_ci,
PRIMARY KEY (id1, id2, id3),
UNIQUE KEY (id3, id1) COMMENT 'rev:cf') ENGINE=ROCKSDB;
+set global rocksdb_large_prefix=DEFAULT;
INSERT INTO t1 VALUES (1, 1, 1) ON DUPLICATE KEY UPDATE id2 = 9;
SELECT * FROM t1 WHERE id1 = 1;
id1 id2 id3
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/fast_prefix_index_fetch.result b/storage/rocksdb/mysql-test/rocksdb/r/fast_prefix_index_fetch.result
new file mode 100644
index 00000000000..963f9706ee8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/fast_prefix_index_fetch.result
@@ -0,0 +1,80 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+id INT,
+fake_id INT,
+bigfield VARCHAR(4096),
+PRIMARY KEY (id),
+KEY bf (bigfield(32)),
+KEY fid (fake_id, bigfield(32))
+) ENGINE=rocksdb;
+INSERT INTO t1 VALUES (1, 1001, REPEAT('a', 1)),
+(8, 1008, REPEAT('b', 8)),
+(24, 1024, REPEAT('c', 24)),
+(31, 1031, REPEAT('d', 31)),
+(32, 1032, REPEAT('x', 32)),
+(33, 1033, REPEAT('y', 33)),
+(128, 1128, REPEAT('z', 128));
+SELECT * FROM t1;
+id fake_id bigfield
+1 1001 a
+8 1008 bbbbbbbb
+24 1024 cccccccccccccccccccccccc
+31 1031 ddddddddddddddddddddddddddddddd
+32 1032 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+33 1033 yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
+128 1128 zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
+# Baseline sanity check
+no-op query
+no-op query
+include/assert.inc [Baseline sanity check: 0 rocksdb_covered_secondary_key_lookups]
+# Eligible for optimization.
+id bigfield
+31 ddddddddddddddddddddddddddddddd
+include/assert.inc [Eligible for optimization.: 2 rocksdb_covered_secondary_key_lookups]
+# Eligible for optimization, access via fake_id only
+id bigfield
+31 ddddddddddddddddddddddddddddddd
+include/assert.inc [Eligible for optimization, access via fake_id only: 2 rocksdb_covered_secondary_key_lookups]
+# Not eligible for optimization, access via fake_id of big row.
+id bigfield
+33 yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
+include/assert.inc [Not eligible for optimization, access via fake_id of big row.: 0 rocksdb_covered_secondary_key_lookups]
+# Eligible for optimization.
+id bigfield
+32 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+include/assert.inc [Eligible for optimization.: 1 rocksdb_covered_secondary_key_lookups]
+# Not eligible for optimization.
+id bigfield
+33 yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
+include/assert.inc [Not eligible for optimization.: 0 rocksdb_covered_secondary_key_lookups]
+# Eligible for optimization.
+id bigfield
+8 bbbbbbbb
+include/assert.inc [Eligible for optimization.: 2 rocksdb_covered_secondary_key_lookups]
+# Eligible for optimization.
+id bigfield
+24 cccccccccccccccccccccccc
+include/assert.inc [Eligible for optimization.: 2 rocksdb_covered_secondary_key_lookups]
+# Not eligible for optimization.
+id bigfield
+128 zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
+include/assert.inc [Not eligible for optimization.: 0 rocksdb_covered_secondary_key_lookups]
+#
+# Test that multi-byte charsets are handled correctly
+#
+# Charset record obviously shorter than the prefix
+a b
+1 a
+include/assert.inc [Charset record obviously shorter than the prefix: 2 rocksdb_covered_secondary_key_lookups]
+# Charset record shorter than prefix
+a b
+2 cc
+include/assert.inc [Charset record shorter than prefix: 2 rocksdb_covered_secondary_key_lookups]
+# Charset record with glyphs shorter than prefix
+a b
+3 ŽŽ
+include/assert.inc [Charset record with glyphs shorter than prefix: 1 rocksdb_covered_secondary_key_lookups]
+# Charset record longer than prefix
+a b
+4 žžžž
+include/assert.inc [Charset record longer than prefix: 0 rocksdb_covered_secondary_key_lookups]
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/i_s_ddl.result b/storage/rocksdb/mysql-test/rocksdb/r/i_s_ddl.result
index 7eeeb35197b..c6c484a7dc5 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/i_s_ddl.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/i_s_ddl.result
@@ -9,9 +9,9 @@ KEY (x)) ENGINE = ROCKSDB;
SELECT TABLE_SCHEMA,TABLE_NAME,PARTITION_NAME,INDEX_NAME,INDEX_TYPE,KV_FORMAT_VERSION,CF FROM INFORMATION_SCHEMA.ROCKSDB_DDL WHERE TABLE_NAME like 'is_ddl_t%';
TABLE_SCHEMA TABLE_NAME PARTITION_NAME INDEX_NAME INDEX_TYPE KV_FORMAT_VERSION CF
test is_ddl_t2 NULL PRIMARY 1 13 zy_cf
-test is_ddl_t2 NULL x 2 12 default
+test is_ddl_t2 NULL x 2 13 default
test is_ddl_t1 NULL PRIMARY 1 13 default
-test is_ddl_t1 NULL j 2 12 default
-test is_ddl_t1 NULL k 2 12 kl_cf
+test is_ddl_t1 NULL j 2 13 default
+test is_ddl_t1 NULL k 2 13 kl_cf
DROP TABLE is_ddl_t1;
DROP TABLE is_ddl_t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/index.result b/storage/rocksdb/mysql-test/rocksdb/r/index.result
index 99390c8ceb2..b5950618285 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/index.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/index.result
@@ -40,6 +40,33 @@ t1 0 PRIMARY 1 pk A # NULL NULL LSMTREE
t1 1 a 1 a A # NULL NULL YES LSMTREE simple index on a
ALTER TABLE t1 DROP KEY a;
DROP TABLE t1;
+set global rocksdb_large_prefix=0;
+CREATE TABLE t1 (
+a BLOB(1024),
+KEY (a(767))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a BLOB(1024),
+KEY (a(768))
+) ENGINE=rocksdb;
+Warnings:
+Warning 1071 Specified key was too long; max key length is 767 bytes
+DROP TABLE t1;
+set global rocksdb_large_prefix=1;
+CREATE TABLE t1 (
+a BLOB(4096),
+KEY (a(3072))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a BLOB(4096),
+KEY (a(3073))
+) ENGINE=rocksdb;
+Warnings:
+Warning 1071 Specified key was too long; max key length is 3072 bytes
+DROP TABLE t1;
+set global rocksdb_large_prefix=DEFAULT;
#
# Issue #376: MyRocks: ORDER BY optimizer is unable to use the index extension
#
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/index_primary.result b/storage/rocksdb/mysql-test/rocksdb/r/index_primary.result
index 66481f81c67..1f4c88e5a33 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/index_primary.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/index_primary.result
@@ -46,3 +46,26 @@ SHOW KEYS IN t1;
Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
t1 0 PRIMARY 1 b A # NULL NULL LSMTREE
DROP TABLE t1;
+set global rocksdb_large_prefix=0;
+CREATE TABLE t1 (
+a BLOB(1024),
+PRIMARY KEY (a(767))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a BLOB(1024),
+PRIMARY KEY (a(768))
+) ENGINE=rocksdb;
+ERROR 42000: Specified key was too long; max key length is 767 bytes
+set global rocksdb_large_prefix=1;
+CREATE TABLE t1 (
+a BLOB(4096),
+PRIMARY KEY (a(3072))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a BLOB(4096),
+PRIMARY KEY (a(3073))
+) ENGINE=rocksdb;
+ERROR 42000: Specified key was too long; max key length is 3072 bytes
+set global rocksdb_large_prefix=DEFAULT;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/index_type_btree.result b/storage/rocksdb/mysql-test/rocksdb/r/index_type_btree.result
index a604663954b..6ddb3d45695 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/index_type_btree.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/index_type_btree.result
@@ -40,3 +40,30 @@ t1 0 PRIMARY 1 pk A # NULL NULL LSMTREE
t1 1 a 1 a A # NULL NULL YES LSMTREE simple index on a
ALTER TABLE t1 DROP KEY a;
DROP TABLE t1;
+set global rocksdb_large_prefix=0;
+CREATE TABLE t1 (
+a BLOB(1024),
+KEY (a(767))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a BLOB(1024),
+KEY (a(768))
+) ENGINE=rocksdb;
+Warnings:
+Warning 1071 Specified key was too long; max key length is 767 bytes
+DROP TABLE t1;
+set global rocksdb_large_prefix=1;
+CREATE TABLE t1 (
+a BLOB(4096),
+KEY (a(3072))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a BLOB(4096),
+KEY (a(3073))
+) ENGINE=rocksdb;
+Warnings:
+Warning 1071 Specified key was too long; max key length is 3072 bytes
+DROP TABLE t1;
+set global rocksdb_large_prefix=DEFAULT;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/index_type_hash.result b/storage/rocksdb/mysql-test/rocksdb/r/index_type_hash.result
index ae99badff14..08b17ffe8ee 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/index_type_hash.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/index_type_hash.result
@@ -40,3 +40,30 @@ t1 0 PRIMARY 1 pk A # NULL NULL LSMTREE
t1 1 a 1 a A # NULL NULL YES LSMTREE simple index on a
ALTER TABLE t1 DROP KEY a;
DROP TABLE t1;
+set global rocksdb_large_prefix=0;
+CREATE TABLE t1 (
+a BLOB(1024),
+KEY (a(767))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a BLOB(1024),
+KEY (a(768))
+) ENGINE=rocksdb;
+Warnings:
+Warning 1071 Specified key was too long; max key length is 767 bytes
+DROP TABLE t1;
+set global rocksdb_large_prefix=1;
+CREATE TABLE t1 (
+a BLOB(4096),
+KEY (a(3072))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a BLOB(4096),
+KEY (a(3073))
+) ENGINE=rocksdb;
+Warnings:
+Warning 1071 Specified key was too long; max key length is 3072 bytes
+DROP TABLE t1;
+set global rocksdb_large_prefix=DEFAULT;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/issue243_transactionStatus.result b/storage/rocksdb/mysql-test/rocksdb/r/issue243_transactionStatus.result
index 83db308627d..d4d211b9288 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/issue243_transactionStatus.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/issue243_transactionStatus.result
@@ -17,7 +17,7 @@ id val1 val2
2 2 2
SHOW ENGINE rocksdb TRANSACTION STATUS;
Type Name Status
-SNAPSHOTS rocksdb
+rocksdb
============================================================
TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
============================================================
@@ -25,6 +25,7 @@ TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
SNAPSHOTS
---------
LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
-----------------------------------------
END OF ROCKSDB TRANSACTION MONITOR OUTPUT
=========================================
@@ -48,7 +49,7 @@ id val1 val2
DELETE FROM t1 WHERE id=30;
SHOW ENGINE rocksdb TRANSACTION STATUS;
Type Name Status
-SNAPSHOTS rocksdb
+rocksdb
============================================================
TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
============================================================
@@ -61,6 +62,7 @@ MySQL thread id TID, OS thread handle PTR, query id QID localhost root ACTION
SHOW ENGINE rocksdb TRANSACTION STATUS
lock count 8, write count 4
insert count 2, update count 1, delete count 1
+----------LATEST DETECTED DEADLOCKS----------
-----------------------------------------
END OF ROCKSDB TRANSACTION MONITOR OUTPUT
=========================================
@@ -68,7 +70,7 @@ END OF ROCKSDB TRANSACTION MONITOR OUTPUT
ROLLBACK;
SHOW ENGINE rocksdb TRANSACTION STATUS;
Type Name Status
-SNAPSHOTS rocksdb
+rocksdb
============================================================
TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
============================================================
@@ -76,6 +78,7 @@ TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
SNAPSHOTS
---------
LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
-----------------------------------------
END OF ROCKSDB TRANSACTION MONITOR OUTPUT
=========================================
@@ -84,7 +87,7 @@ START TRANSACTION;
INSERT INTO t1 VALUES(40,40,40);
SHOW ENGINE rocksdb TRANSACTION STATUS;
Type Name Status
-SNAPSHOTS rocksdb
+rocksdb
============================================================
TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
============================================================
@@ -97,6 +100,7 @@ MySQL thread id TID, OS thread handle PTR, query id QID localhost root ACTION
SHOW ENGINE rocksdb TRANSACTION STATUS
lock count 2, write count 1
insert count 1, update count 0, delete count 0
+----------LATEST DETECTED DEADLOCKS----------
-----------------------------------------
END OF ROCKSDB TRANSACTION MONITOR OUTPUT
=========================================
@@ -104,7 +108,7 @@ END OF ROCKSDB TRANSACTION MONITOR OUTPUT
COMMIT;
SHOW ENGINE rocksdb TRANSACTION STATUS;
Type Name Status
-SNAPSHOTS rocksdb
+rocksdb
============================================================
TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
============================================================
@@ -112,6 +116,7 @@ TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
SNAPSHOTS
---------
LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
-----------------------------------------
END OF ROCKSDB TRANSACTION MONITOR OUTPUT
=========================================
@@ -133,7 +138,7 @@ UPDATE t2 SET value=3 WHERE id2=2;
DELETE FROM t2 WHERE id1=10;
SHOW ENGINE rocksdb TRANSACTION STATUS;
Type Name Status
-SNAPSHOTS rocksdb
+rocksdb
============================================================
TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
============================================================
@@ -146,6 +151,7 @@ MySQL thread id TID, OS thread handle PTR, query id QID localhost root ACTION
SHOW ENGINE rocksdb TRANSACTION STATUS
lock count 9, write count 7
insert count 2, update count 1, delete count 1
+----------LATEST DETECTED DEADLOCKS----------
-----------------------------------------
END OF ROCKSDB TRANSACTION MONITOR OUTPUT
=========================================
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/multi_varchar_sk_lookup.result b/storage/rocksdb/mysql-test/rocksdb/r/multi_varchar_sk_lookup.result
new file mode 100644
index 00000000000..86ba6d923a8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/multi_varchar_sk_lookup.result
@@ -0,0 +1,37 @@
+DROP TABLE IF EXISTS T1;
+CREATE TABLE T1 (
+P1 VARCHAR(64), P2 VARCHAR(64), P3 VARCHAR(64), P4 VARCHAR(64),
+S1 VARCHAR(64), S2 VARCHAR(64), S3 VARCHAR(64), S4 VARCHAR(64),
+S5 VARCHAR(64), S6 VARCHAR(64), S7 VARCHAR(64), S8 VARCHAR(64),
+S9 VARCHAR(64), S10 VARCHAR(64), S11 VARCHAR(64), S12 VARCHAR(64),
+S13 VARCHAR(64), S14 VARCHAR(64), S15 VARCHAR(64), S16 VARCHAR(64),
+PRIMARY KEY (P1(8), P2(8), P3(8), P4(8)),
+KEY SK (S1(8), S2(8), S3(8), S4(8),
+S5(8), S6(8), S7(8), S8(8),
+S9(8), S10(8), S11(8), S12(8),
+S13(8), S14(8), S15(8), S16(8))
+) ENGINE=rocksdb;
+INSERT INTO T1 VALUES ('1', '2', '3', '4',
+'5', '6', '7', '8',
+'9', '10', '11', '12',
+'13', '14', '15', '16',
+'17', '18', '19', '20');
+SELECT * FROM T1;
+P1 P2 P3 P4 S1 S2 S3 S4 S5 S6 S7 S8 S9 S10 S11 S12 S13 S14 S15 S16
+1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
+# Not eligible for optimization, shorter than prefix length.
+SELECT P4, S2 FROM T1 FORCE INDEX(SK) WHERE S1='5';
+P4 S2
+4 6
+include/assert.inc [Not eligible for optimization, shorter than prefix length.: 0 rocksdb_covered_secondary_key_lookups]
+DELETE FROM T1;
+INSERT INTO T1 VALUES ('100000000', '200000000', '300000000', '400000000',
+'500000000', '600000000', '700000000', '800000000',
+'900000000', '100000000', '110000000', '120000000',
+'130000000', '140000000', '150000000', '160000000',
+'170000000', '180000000', '190000000', '200000000');
+# Not eligible for optimization, longer than prefix length.
+SELECT P4, S2 FROM T1 FORCE INDEX(SK) WHERE S1='5';
+P4 S2
+include/assert.inc [Not eligible for optimization, longer than prefix length.: 0 rocksdb_covered_secondary_key_lookups]
+DROP TABLE T1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result
index 3f08450db6d..76de58ce813 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result
@@ -868,6 +868,7 @@ rocksdb_block_restart_interval 16
rocksdb_block_size 4096
rocksdb_block_size_deviation 10
rocksdb_bulk_load OFF
+rocksdb_bulk_load_allow_unsorted OFF
rocksdb_bulk_load_size 1000
rocksdb_bytes_per_sync 0
rocksdb_cache_index_and_filter_blocks ON
@@ -880,13 +881,16 @@ rocksdb_compaction_sequential_deletes 0
rocksdb_compaction_sequential_deletes_count_sd OFF
rocksdb_compaction_sequential_deletes_file_size 0
rocksdb_compaction_sequential_deletes_window 0
+rocksdb_concurrent_prepare ON
rocksdb_create_checkpoint
rocksdb_create_if_missing ON
rocksdb_create_missing_column_families OFF
rocksdb_datadir ./.rocksdb
rocksdb_db_write_buffer_size 0
rocksdb_deadlock_detect OFF
+rocksdb_deadlock_detect_depth 50
rocksdb_debug_optimizer_no_zero_cardinality ON
+rocksdb_debug_ttl_ignore_pk OFF
rocksdb_debug_ttl_read_filter_ts 0
rocksdb_debug_ttl_rec_ts 0
rocksdb_debug_ttl_snapshot_ts 0
@@ -903,6 +907,7 @@ rocksdb_error_if_exists OFF
rocksdb_flush_log_at_trx_commit 1
rocksdb_flush_memtable_on_analyze ON
rocksdb_force_compute_memtable_stats ON
+rocksdb_force_compute_memtable_stats_cachetime 0
rocksdb_force_flush_memtable_and_lzero_now OFF
rocksdb_force_flush_memtable_now OFF
rocksdb_force_index_records_in_range 0
@@ -912,12 +917,15 @@ rocksdb_info_log_level error_level
rocksdb_io_write_timeout 0
rocksdb_is_fd_close_on_exec ON
rocksdb_keep_log_file_num 1000
+rocksdb_large_prefix OFF
rocksdb_lock_scanned_rows OFF
rocksdb_lock_wait_timeout 1
rocksdb_log_file_time_to_roll 0
rocksdb_manifest_preallocation_size 4194304
+rocksdb_manual_wal_flush ON
rocksdb_master_skip_tx_api OFF
rocksdb_max_background_jobs 2
+rocksdb_max_latest_deadlocks 5
rocksdb_max_log_file_size 0
rocksdb_max_manifest_file_size 18446744073709551615
rocksdb_max_open_files -1
@@ -926,6 +934,7 @@ rocksdb_max_subcompactions 1
rocksdb_max_total_wal_size 0
rocksdb_merge_buf_size 67108864
rocksdb_merge_combine_read_size 1073741824
+rocksdb_merge_tmp_file_removal_delay_ms 0
rocksdb_new_table_reader_for_compaction_inputs OFF
rocksdb_no_block_cache OFF
rocksdb_override_cf_options
@@ -942,6 +951,7 @@ rocksdb_records_in_range 50
rocksdb_reset_stats OFF
rocksdb_seconds_between_stat_computes 3600
rocksdb_signal_drop_index_thread OFF
+rocksdb_sim_cache_size 0
rocksdb_skip_bloom_filter_on_read OFF
rocksdb_skip_fill_cache OFF
rocksdb_skip_unique_check_tables .*
@@ -957,6 +967,7 @@ rocksdb_trace_sst_api OFF
rocksdb_unsafe_for_binlog OFF
rocksdb_update_cf_options
rocksdb_use_adaptive_mutex OFF
+rocksdb_use_clock_cache OFF
rocksdb_use_direct_io_for_flush_and_compaction OFF
rocksdb_use_direct_reads OFF
rocksdb_use_fsync OFF
@@ -1444,6 +1455,7 @@ rocksdb_memtable_total #
rocksdb_memtable_unflushed #
rocksdb_queries_point #
rocksdb_queries_range #
+rocksdb_covered_secondary_key_lookups #
rocksdb_block_cache_add #
rocksdb_block_cache_data_hit #
rocksdb_block_cache_data_miss #
@@ -1496,6 +1508,16 @@ rocksdb_number_superversion_cleanups #
rocksdb_number_superversion_releases #
rocksdb_rate_limit_delay_millis #
rocksdb_snapshot_conflict_errors #
+rocksdb_stall_l0_file_count_limit_slowdowns #
+rocksdb_stall_locked_l0_file_count_limit_slowdowns #
+rocksdb_stall_l0_file_count_limit_stops #
+rocksdb_stall_locked_l0_file_count_limit_stops #
+rocksdb_stall_pending_compaction_limit_stops #
+rocksdb_stall_pending_compaction_limit_slowdowns #
+rocksdb_stall_memtable_limit_stops #
+rocksdb_stall_memtable_limit_slowdowns #
+rocksdb_stall_total_stops #
+rocksdb_stall_total_slowdowns #
rocksdb_stall_micros #
rocksdb_wal_bytes #
rocksdb_wal_group_syncs #
@@ -1520,6 +1542,7 @@ ROCKSDB_MEMTABLE_TOTAL
ROCKSDB_MEMTABLE_UNFLUSHED
ROCKSDB_QUERIES_POINT
ROCKSDB_QUERIES_RANGE
+ROCKSDB_COVERED_SECONDARY_KEY_LOOKUPS
ROCKSDB_BLOCK_CACHE_ADD
ROCKSDB_BLOCK_CACHE_DATA_HIT
ROCKSDB_BLOCK_CACHE_DATA_MISS
@@ -1572,6 +1595,16 @@ ROCKSDB_NUMBER_SUPERVERSION_CLEANUPS
ROCKSDB_NUMBER_SUPERVERSION_RELEASES
ROCKSDB_RATE_LIMIT_DELAY_MILLIS
ROCKSDB_SNAPSHOT_CONFLICT_ERRORS
+ROCKSDB_STALL_L0_FILE_COUNT_LIMIT_SLOWDOWNS
+ROCKSDB_STALL_LOCKED_L0_FILE_COUNT_LIMIT_SLOWDOWNS
+ROCKSDB_STALL_L0_FILE_COUNT_LIMIT_STOPS
+ROCKSDB_STALL_LOCKED_L0_FILE_COUNT_LIMIT_STOPS
+ROCKSDB_STALL_PENDING_COMPACTION_LIMIT_STOPS
+ROCKSDB_STALL_PENDING_COMPACTION_LIMIT_SLOWDOWNS
+ROCKSDB_STALL_MEMTABLE_LIMIT_STOPS
+ROCKSDB_STALL_MEMTABLE_LIMIT_SLOWDOWNS
+ROCKSDB_STALL_TOTAL_STOPS
+ROCKSDB_STALL_TOTAL_SLOWDOWNS
ROCKSDB_STALL_MICROS
ROCKSDB_WAL_BYTES
ROCKSDB_WAL_GROUP_SYNCS
@@ -1598,6 +1631,7 @@ ROCKSDB_MEMTABLE_TOTAL
ROCKSDB_MEMTABLE_UNFLUSHED
ROCKSDB_QUERIES_POINT
ROCKSDB_QUERIES_RANGE
+ROCKSDB_COVERED_SECONDARY_KEY_LOOKUPS
ROCKSDB_BLOCK_CACHE_ADD
ROCKSDB_BLOCK_CACHE_DATA_HIT
ROCKSDB_BLOCK_CACHE_DATA_MISS
@@ -1650,6 +1684,16 @@ ROCKSDB_NUMBER_SUPERVERSION_CLEANUPS
ROCKSDB_NUMBER_SUPERVERSION_RELEASES
ROCKSDB_RATE_LIMIT_DELAY_MILLIS
ROCKSDB_SNAPSHOT_CONFLICT_ERRORS
+ROCKSDB_STALL_L0_FILE_COUNT_LIMIT_SLOWDOWNS
+ROCKSDB_STALL_LOCKED_L0_FILE_COUNT_LIMIT_SLOWDOWNS
+ROCKSDB_STALL_L0_FILE_COUNT_LIMIT_STOPS
+ROCKSDB_STALL_LOCKED_L0_FILE_COUNT_LIMIT_STOPS
+ROCKSDB_STALL_PENDING_COMPACTION_LIMIT_STOPS
+ROCKSDB_STALL_PENDING_COMPACTION_LIMIT_SLOWDOWNS
+ROCKSDB_STALL_MEMTABLE_LIMIT_STOPS
+ROCKSDB_STALL_MEMTABLE_LIMIT_SLOWDOWNS
+ROCKSDB_STALL_TOTAL_STOPS
+ROCKSDB_STALL_TOTAL_SLOWDOWNS
ROCKSDB_STALL_MICROS
ROCKSDB_WAL_BYTES
ROCKSDB_WAL_GROUP_SYNCS
@@ -2149,7 +2193,9 @@ SET @old_mode = @@sql_mode;
SET sql_mode = 'strict_all_tables';
create table t1 (a int, b text, c varchar(400), Primary Key(a), Key(c, b(255))) engine=rocksdb;
drop table t1;
+set global rocksdb_large_prefix=1;
create table t1 (a int, b text, c varchar(400), Primary Key(a), Key(b(1255))) engine=rocksdb;
+set global rocksdb_large_prefix=0;
insert into t1 values (1, '1abcde', '1abcde'), (2, '2abcde', '2abcde'), (3, '3abcde', '3abcde');
select * from t1;
a b c
@@ -2170,7 +2216,7 @@ a b c
3 3abcde 3abcde
drop table t1;
create table t1 (a int, b text, c varchar(400), Primary Key(a), Key(b(2255))) engine=rocksdb;
-ERROR 42000: Specified key was too long; max key length is 2048 bytes
+ERROR 42000: Specified key was too long; max key length is 767 bytes
SET sql_mode = @old_mode;
drop table t0;
#
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/show_engine.result b/storage/rocksdb/mysql-test/rocksdb/r/show_engine.result
index d1072eee4ad..1bcd3692b4a 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/show_engine.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/show_engine.result
@@ -7,17 +7,8 @@ CREATE TABLE t2 (j INT, PRIMARY KEY (j) COMMENT 'rev:cf_t2') ENGINE = ROCKSDB;
CREATE TABLE t3 (k INT, PRIMARY KEY (k) COMMENT 'cf_t1') ENGINE = ROCKSDB;
CREATE TABLE t4 (l INT, PRIMARY KEY (l) COMMENT 'cf_t4') ENGINE = ROCKSDB
PARTITION BY KEY(l) PARTITIONS 4;
-SET GLOBAL rocksdb_force_flush_memtable_now=1;
-SHOW ENGINE rocksdb STATUS;
-Type Name Status
-STATISTICS # #
-DBSTATS # #
-CF_COMPACTION # #
-CF_COMPACTION # #
-CF_COMPACTION # #
-CF_COMPACTION # #
-MEMORY_STATS # #
-BG_THREADS # #
+SET @save.rocksdb_max_background_jobs= @@global.rocksdb_max_background_jobs;
+SET GLOBAL rocksdb_max_background_jobs= 1;
INSERT INTO t1 VALUES (1), (2), (3);
SELECT COUNT(*) FROM t1;
COUNT(*)
@@ -30,6 +21,19 @@ INSERT INTO t4 VALUES (1), (2), (3), (4), (5);
SELECT COUNT(*) FROM t4;
COUNT(*)
5
+SET GLOBAL rocksdb_force_flush_memtable_now=1;
+SET GLOBAL rocksdb_compact_cf="cf_t1";
+SHOW ENGINE rocksdb STATUS;
+Type Name Status
+STATISTICS # #
+DBSTATS # #
+CF_COMPACTION # #
+CF_COMPACTION # #
+CF_COMPACTION # #
+CF_COMPACTION # #
+MEMORY_STATS # #
+BG_THREADS # #
+BG_THREADS # #
SELECT * FROM INFORMATION_SCHEMA.ROCKSDB_CFSTATS;
CF_NAME STAT_TYPE VALUE
__system__ NUM_IMMUTABLE_MEM_TABLE #
@@ -356,7 +360,7 @@ Type Name Status
SHOW ENGINE ALL MUTEX;
SHOW ENGINE rocksdb TRANSACTION STATUS;
Type Name Status
-SNAPSHOTS rocksdb
+rocksdb
============================================================
TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
============================================================
@@ -364,6 +368,7 @@ TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
SNAPSHOTS
---------
LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
-----------------------------------------
END OF ROCKSDB TRANSACTION MONITOR OUTPUT
=========================================
@@ -371,7 +376,7 @@ END OF ROCKSDB TRANSACTION MONITOR OUTPUT
START TRANSACTION WITH CONSISTENT SNAPSHOT;
SHOW ENGINE rocksdb TRANSACTION STATUS;
Type Name Status
-SNAPSHOTS rocksdb
+rocksdb
============================================================
TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
============================================================
@@ -384,8 +389,10 @@ MySQL thread id TID, OS thread handle PTR, query id QID localhost root ACTION
SHOW ENGINE rocksdb TRANSACTION STATUS
lock count 0, write count 0
insert count 0, update count 0, delete count 0
+----------LATEST DETECTED DEADLOCKS----------
-----------------------------------------
END OF ROCKSDB TRANSACTION MONITOR OUTPUT
=========================================
ROLLBACK;
+SET GLOBAL rocksdb_max_background_jobs= @save.rocksdb_max_background_jobs;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/skip_validate_tmp_table.result b/storage/rocksdb/mysql-test/rocksdb/r/skip_validate_tmp_table.result
new file mode 100644
index 00000000000..36d5eb24e30
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/skip_validate_tmp_table.result
@@ -0,0 +1,4 @@
+CREATE TABLE t1 (pk int primary key) ENGINE=ROCKSDB;
+set session debug="+d,gen_sql_table_name";
+rename table t1 to t2;
+set session debug= "-d,gen_sql_table_name";
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/truncate_table3.result b/storage/rocksdb/mysql-test/rocksdb/r/truncate_table3.result
index 7adf50f9ff3..30b59beac1b 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/truncate_table3.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/truncate_table3.result
@@ -1,4 +1,6 @@
DROP TABLE IF EXISTS t1;
+call mtr.add_suppression("Column family 'cf1' not found");
+call mtr.add_suppression("Column family 'rev:cf2' not found");
set global rocksdb_compact_cf = 'cf1';
set global rocksdb_compact_cf = 'rev:cf2';
set global rocksdb_signal_drop_index_thread = 1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary.result b/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary.result
index 79ed7ec0396..2903e9aa7eb 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary.result
@@ -337,9 +337,7 @@ b INT
) ENGINE=rocksdb
COMMENT='ttl_duration=100;';
ALTER TABLE t1 DROP PRIMARY KEY;
-ERROR HY000: TTL support is currently disabled when table has secondary indexes or hidden PK.
-ALTER TABLE t1 ADD INDEX kb(b), ALGORITHM=INPLACE;
-ERROR HY000: TTL support is currently disabled when table has secondary indexes or hidden PK.
+ERROR HY000: TTL support is currently disabled when table has a hidden PK.
DROP TABLE t1;
CREATE TABLE t1 (
a INT PRIMARY KEY,
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary.result b/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary.result
new file mode 100644
index 00000000000..1f748a3841a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary.result
@@ -0,0 +1,709 @@
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64) NOT NULL,
+`c` varbinary(256) NOT NULL,
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`b`,`a`,`c`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2
+DROP TABLE t1;
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64) NOT NULL,
+`c` varbinary(256) NOT NULL,
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`b`,`a`,`c`),
+KEY kb (`b`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+2
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int NOT NULL,
+ts bigint(20) UNSIGNED NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, UNIX_TIMESTAMP(), 5);
+INSERT INTO t1 values (2, 4, UNIX_TIMESTAMP(), 6);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+2
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+ts bigint(20) UNSIGNED NOT NULL,
+PRIMARY KEY (a,c),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, 5, UNIX_TIMESTAMP());
+INSERT INTO t1 values (2, 4, 6, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+2
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int,
+c int,
+ts bigint(20) UNSIGNED NOT NULL,
+PRIMARY KEY (a),
+KEY kbc (b, c)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, NULL, NULL, UNIX_TIMESTAMP());
+INSERT INTO t1 values (2, NULL, NULL, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+2
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64),
+`c` varbinary(256),
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`a`),
+KEY kbc (`b`, `c`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', NULL, 'bc', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'efghijk', NULL, UNIX_TIMESTAMP(), 'l');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+2
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, 5);
+INSERT INTO t1 values (2, 4, 6);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+2
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a int,
+ts bigint(20) UNSIGNED NOT NULL,
+PRIMARY KEY (a, ts),
+KEY kt (ts)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;ttl_col=ts;';
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+INSERT INTO t1 values (2, UNIX_TIMESTAMP());
+SELECT COUNT(*) FROM t1 FORCE INDEX(kt);
+COUNT(*)
+2
+set global rocksdb_debug_ttl_snapshot_ts = -10;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX(kt);
+COUNT(*)
+2
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_debug_ttl_snapshot_ts = 10;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1 FORCE INDEX(kt);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int NOT NULL,
+ts bigint(20) UNSIGNED NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a, ts),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, UNIX_TIMESTAMP(), 5);
+INSERT INTO t1 values (2, 4, UNIX_TIMESTAMP(), 6);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+2
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64),
+`c` varbinary(256),
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`a`, `ts`),
+KEY kb (`b`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', NULL, 'bc', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('de', 'fghijk', NULL, UNIX_TIMESTAMP(), 'l');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a INT NOT NULL,
+b varbinary(64) NOT NULL,
+c varbinary(256) NOT NULL,
+ts bigint(20) UNSIGNED NOT NULL,
+value mediumblob NOT NULL,
+PRIMARY KEY (b,a,c),
+KEY kb (b)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=10;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values (2, 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (3, 'i', 'j', UNIX_TIMESTAMP(), 'k');
+INSERT INTO t1 values (4, 'm', 'n', UNIX_TIMESTAMP(), 'o');
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_debug_ttl_snapshot_ts = -3600;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT a FROM t1 FORCE INDEX (kb);
+a
+1
+2
+3
+4
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT a FROM t1 FORCE INDEX (kb);
+a
+3
+4
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT a FROM t1 FORCE INDEX (kb);
+a
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) UNSIGNED NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+ts bigint(20),
+PRIMARY KEY (a,c),
+KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+ERROR HY000: TTL column (ts) in MyRocks must be an unsigned non-null 64-bit integer, exist inside the table, and have an accompanying ttl duration.
+CREATE TABLE t1 (
+a bigint(20) UNSIGNED NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+ts int,
+PRIMARY KEY (a,c),
+KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+ERROR HY000: TTL column (ts) in MyRocks must be an unsigned non-null 64-bit integer, exist inside the table, and have an accompanying ttl duration.
+CREATE TABLE t1 (
+a bigint(20) UNSIGNED NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a,c),
+KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=abc;';
+ERROR HY000: TTL duration (abc) in MyRocks must be an unsigned non-null 64-bit integer.
+CREATE TABLE t1 (
+a bigint(20) UNSIGNED NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a,c),
+KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=abc;';
+ERROR HY000: TTL column (abc) in MyRocks must be an unsigned non-null 64-bit integer, exist inside the table, and have an accompanying ttl duration.
+CREATE TABLE t1 (
+a bigint(20) UNSIGNED NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a,c),
+KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_col=abc;';
+ERROR HY000: TTL column (abc) in MyRocks must be an unsigned non-null 64-bit integer, exist inside the table, and have an accompanying ttl duration.
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int NOT NULL,
+PRIMARY KEY (a),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=500;';
+INSERT INTO t1 values (1, 1);
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+1
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+1
+DROP TABLE t1;
+CREATE TABLE t1 (
+a INT PRIMARY KEY,
+b INT NOT NULL,
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=100;';
+INSERT INTO t1 values (1, 1);
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+1 1
+set global rocksdb_debug_ttl_rec_ts = -300;
+ALTER TABLE t1 COMMENT = 'ttl_duration=1';
+set global rocksdb_debug_ttl_rec_ts = 0;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` int(11) NOT NULL,
+ PRIMARY KEY (`a`),
+ KEY `kb` (`b`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COMMENT='ttl_duration=1'
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a INT PRIMARY KEY,
+b INT,
+KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=100;';
+ALTER TABLE t1 DROP PRIMARY KEY;
+ERROR HY000: TTL support is currently disabled when table has a hidden PK.
+DROP TABLE t1;
+CREATE TABLE t1 (
+a INT PRIMARY KEY,
+b INT,
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;';
+INSERT INTO t1 VALUES (1,1);
+INSERT INTO t1 VALUES (2,2);
+ALTER TABLE t1 DROP PRIMARY KEY, ADD PRIMARY KEY(b);
+set global rocksdb_debug_ttl_snapshot_ts = -3600;
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+2
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) UNSIGNED NOT NULL,
+b int,
+PRIMARY KEY (a,b),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='asdadfasdfsadfadf ;ttl_duration=1; asfasdfasdfadfa';
+INSERT INTO t1 values (UNIX_TIMESTAMP(), 1);
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+1
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+ALTER TABLE t1 COMMENT = 'adsf;;ttl_duration=5;asfasdfa;ttl_col=a;asdfasdf;';
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (UNIX_TIMESTAMP(), 2);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+1
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int NOT NULL,
+PRIMARY KEY (a),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;';
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, 0);
+INSERT INTO t1 values (3, 0);
+INSERT INTO t1 values (5, 0);
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (7, 0);
+INSERT INTO t1 values (9, 0);
+set global rocksdb_debug_ttl_rec_ts = 0;
+UPDATE t1 SET a=a+1;
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+10 0
+2 0
+4 0
+6 0
+8 0
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT * FROM t1;
+a b
+10 0
+8 0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a INT,
+b bigint(20) UNSIGNED NOT NULL,
+PRIMARY KEY (a),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;ttl_col=b;';
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+INSERT INTO t1 values (3, UNIX_TIMESTAMP());
+INSERT INTO t1 values (5, UNIX_TIMESTAMP());
+INSERT INTO t1 values (7, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 300;
+UPDATE t1 SET b=(UNIX_TIMESTAMP()+1) WHERE a < 4;
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT a FROM t1 FORCE INDEX (kb);
+a
+1
+3
+5
+7
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT a FROM t1 FORCE INDEX (kb);
+a
+1
+3
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int NOT NULL,
+PRIMARY KEY (a),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (2, 1);
+INSERT INTO t1 values (3, 1);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_enable_ttl=0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_enable_ttl=1;
+set global rocksdb_compact_cf='default';
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+variable_value-@c
+6
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int NOT NULL,
+PRIMARY KEY (a),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=100;';
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (2, 2);
+INSERT INTO t1 values (3, 3);
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+variable_value-@c
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a INT,
+b bigint(20) UNSIGNED NOT NULL,
+PRIMARY KEY (a, b),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;ttl_col=b;';
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+INSERT INTO t1 values (3, UNIX_TIMESTAMP());
+INSERT INTO t1 values (5, UNIX_TIMESTAMP());
+INSERT INTO t1 values (7, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 300;
+UPDATE t1 SET b=(UNIX_TIMESTAMP()+1) WHERE a < 4;
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT a FROM t1 FORCE INDEX (kb);
+a
+1
+3
+5
+7
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT a FROM t1 FORCE INDEX (kb);
+a
+1
+3
+DROP TABLE t1;
+CREATE TABLE t1 (
+a INT,
+b bigint(20) UNSIGNED NOT NULL,
+PRIMARY KEY (a, b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;ttl_col=b;';
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+INSERT INTO t1 values (3, UNIX_TIMESTAMP());
+INSERT INTO t1 values (5, UNIX_TIMESTAMP());
+INSERT INTO t1 values (7, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 300;
+UPDATE t1 SET b=(UNIX_TIMESTAMP()+1) WHERE a < 4;
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT a FROM t1;
+a
+1
+3
+5
+7
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT a FROM t1;
+a
+1
+3
+DROP TABLE t1;
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64) NOT NULL,
+`c` varbinary(256) NOT NULL,
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`b`,`a`,`c`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*);
+COUNT(*)
+1
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+SELECT COUNT(*);
+COUNT(*)
+1
+CREATE INDEX kb on t1 (b);
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64) NOT NULL,
+`c` varbinary(256) NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`b`,`a`,`c`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', 'b', 'c', 'd');
+INSERT INTO t1 values ('d', 'e', 'f', 'g');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*);
+COUNT(*)
+1
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+SELECT COUNT(*);
+COUNT(*)
+1
+CREATE INDEX kb on t1 (b);
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64) NOT NULL,
+`c` varbinary(256) NOT NULL,
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`b`,`a`,`c`, `ts`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*);
+COUNT(*)
+1
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+SELECT COUNT(*);
+COUNT(*)
+1
+CREATE INDEX kb on t1 (b);
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_read_filtering.result b/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_read_filtering.result
new file mode 100644
index 00000000000..735181d19a2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_read_filtering.result
@@ -0,0 +1,494 @@
+CREATE TABLE t1 (
+a int PRIMARY KEY,
+b int NOT NULL,
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (2, 2);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+variable_value-@c
+2
+DROP TABLE t1;
+CREATE TABLE t1 (
+a int PRIMARY KEY,
+b BIGINT UNSIGNED NOT NULL,
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=10;';
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (2, UNIX_TIMESTAMP());
+INSERT INTO t1 values (3, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+# 1 should be hidden
+SELECT a FROM t1 FORCE INDEX (PRIMARY);
+a
+2
+3
+SELECT a FROM t1 FORCE INDEX (kb);
+a
+2
+3
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+# none should be hidden yet, compaction runs but records aren't expired
+SELECT a FROM t1 FORCE INDEX (PRIMARY);
+a
+2
+3
+SELECT a FROM t1 FORCE INDEX (kb);
+a
+2
+3
+# all should be hidden now, even though compaction hasn't run again
+set global rocksdb_debug_ttl_read_filter_ts = -310;
+SELECT a FROM t1 FORCE INDEX (PRIMARY);
+a
+SELECT a FROM t1 FORCE INDEX (kb);
+a
+set global rocksdb_debug_ttl_read_filter_ts = 0;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a int PRIMARY KEY,
+b int NOT NULL,
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (3, 3);
+INSERT INTO t1 values (5, 5);
+INSERT INTO t1 values (7, 7);
+set global rocksdb_debug_ttl_rec_ts = 0;
+# should return nothing.
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+set global rocksdb_enable_ttl_read_filtering=0;
+# should return everything
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+1 1
+3 3
+5 5
+7 7
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+1 1
+3 3
+5 5
+7 7
+set global rocksdb_enable_ttl_read_filtering=1;
+# should return nothing.
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+DROP TABLE t1;
+# Read filtering index scan tests (None of these queries should return any results)
+CREATE TABLE t1 (
+a int,
+b int,
+c int,
+PRIMARY KEY (a,b,c),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (0,0,0);
+INSERT INTO t1 values (0,0,1);
+INSERT INTO t1 values (0,1,0);
+INSERT INTO t1 values (0,1,1);
+INSERT INTO t1 values (1,1,2);
+INSERT INTO t1 values (1,2,1);
+INSERT INTO t1 values (1,2,2);
+INSERT INTO t1 values (1,2,3);
+set global rocksdb_debug_ttl_rec_ts = 0;
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_force_flush_memtable_now=1;
+SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE a=1 AND b=2 AND c=2;
+a b c
+SELECT * FROM t1 FORCE INDEX (kb) WHERE a=1 AND b=2 AND c=2;
+a b c
+SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE a = 1;
+a b c
+SELECT * FROM t1 FORCE INDEX (kb) WHERE a = 1;
+a b c
+SELECT max(a) FROM t1 FORCE INDEX (PRIMARY) WHERE a < 3;
+max(a)
+NULL
+SELECT max(a) FROM t1 FORCE INDEX (kb) WHERE a < 3;
+max(a)
+NULL
+SELECT max(a) FROM t1 FORCE INDEX (PRIMARY) WHERE a < 2 AND b = 1 AND c < 3;
+max(a)
+NULL
+SELECT max(a) FROM t1 FORCE INDEX (kb) WHERE a < 2 AND b = 1 AND c < 3;
+max(a)
+NULL
+SELECT min(a) FROM t1 FORCE INDEX (PRIMARY) WHERE a >= 1;
+min(a)
+NULL
+SELECT min(a) FROM t1 FORCE INDEX (kb) WHERE a >= 1;
+min(a)
+NULL
+SELECT min(a) FROM t1 FORCE INDEX (PRIMARY) WHERE a > 1;
+min(a)
+NULL
+SELECT min(a) FROM t1 FORCE INDEX (kb) WHERE a > 1;
+min(a)
+NULL
+SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE a=1 and b in (1) order by c desc;
+a b c
+SELECT * FROM t1 FORCE INDEX (kb) WHERE a=1 and b in (1) order by c desc;
+a b c
+SELECT max(a) FROM t1 FORCE INDEX (PRIMARY) WHERE a <=10;
+max(a)
+NULL
+SELECT max(a) FROM t1 FORCE INDEX (kb) WHERE a <=10;
+max(a)
+NULL
+SELECT a FROM t1 FORCE INDEX (PRIMARY) WHERE a > 0 and a <= 2;
+a
+SELECT a FROM t1 FORCE INDEX (kb) WHERE a > 0 and a <= 2;
+a
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+variable_value-@c
+0
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+variable_value-@c
+8
+DROP TABLE t1;
+# Attempt to update expired value, should filter out
+set global rocksdb_force_flush_memtable_now=1;
+CREATE TABLE t1 (
+a int PRIMARY KEY
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a
+SELECT * FROM t1;
+a
+# No error is thrown here, under the hood index_next_with_direction is
+# filtering out the record from being seen in the first place.
+UPDATE t1 set a = 1;
+DROP TABLE t1;
+# Ensure no rows can disappear in the middle of long-running transactions
+# Also ensure repeatable-read works as expected
+CREATE TABLE t1 (
+a int PRIMARY KEY,
+b int NOT NULL,
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;';
+INSERT INTO t1 values (1, 1);
+# Creating Snapshot (start transaction)
+BEGIN;
+# Nothing filtered out here
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+1 1
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+1 1
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+1 1
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+1 1
+# Switching to connection 2
+# compaction doesn't do anything since con1 snapshot is still open
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+# read filtered out, because on a different connection, on
+# this connection the records have 'expired' already so they are filtered out
+# even though they have not yet been removed by compaction
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+# Switching to connection 1
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+1 1
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+1 1
+UPDATE t1 set a = a + 1;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+2 1
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+2 1
+COMMIT;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+DROP TABLE t1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+CREATE TABLE t1 (
+a int PRIMARY KEY,
+b int NOT NULL,
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+# On Connection 1
+# Creating Snapshot (start transaction)
+BEGIN;
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+# On Connection 2
+set global rocksdb_debug_ttl_rec_ts = -2;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (3, 3);
+INSERT INTO t1 values (5, 5);
+INSERT INTO t1 values (7, 7);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+# On Connection 1
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+# On Connection 2
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+set global rocksdb_enable_ttl_read_filtering=0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+1 1
+3 3
+5 5
+7 7
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+1 1
+3 3
+5 5
+7 7
+set global rocksdb_enable_ttl_read_filtering=1;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a int,
+b int,
+ts bigint(20) UNSIGNED NOT NULL,
+PRIMARY KEY (a),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = 100;
+INSERT INTO t1 VALUES (1, 1, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (2, 2, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (3, 3, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (4, 4, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (5, 5, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (6, 6, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (7, 7, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (8, 8, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (9, 9, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (10, 10, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+# None are expired
+SELECT a, b FROM t1 FORCE INDEX (kb);
+a b
+1 1
+2 2
+3 3
+4 4
+5 5
+6 6
+7 7
+8 8
+9 9
+10 10
+set global rocksdb_debug_ttl_rec_ts = -100;
+UPDATE t1 SET ts=(UNIX_TIMESTAMP()+1) WHERE a IN (4, 7);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+# 4 and 7 should be gone
+SELECT a, b FROM t1 FORCE INDEX (kb);
+a b
+1 1
+2 2
+3 3
+5 5
+6 6
+8 8
+9 9
+10 10
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 INT,
+c2 INT,
+name VARCHAR(25) NOT NULL,
+PRIMARY KEY (c1, c2),
+KEY kc2 (c2)
+) ENGINE=ROCKSDB
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -1200;
+INSERT INTO t1 values (1,1,'a');
+INSERT INTO t1 values (2,2,'b');
+set global rocksdb_debug_ttl_rec_ts = 1200;
+INSERT INTO t1 values (3,3,'c');
+INSERT INTO t1 values (4,4,'d');
+set global rocksdb_debug_ttl_rec_ts = -1200;
+INSERT INTO t1 values (5,5,'e');
+INSERT INTO t1 values (6,6,'f');
+set global rocksdb_debug_ttl_rec_ts = 1200;
+INSERT INTO t1 values (7,7,'g');
+INSERT INTO t1 values (8,8,'h');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+c1 c2 name
+3 3 c
+4 4 d
+7 7 g
+8 8 h
+SELECT * FROM t1 FORCE INDEX (kc2);
+c1 c2 name
+3 3 c
+4 4 d
+7 7 g
+8 8 h
+SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE c1 > 5;
+c1 c2 name
+7 7 g
+8 8 h
+SELECT * FROM t1 FORCE INDEX (kc2) WHERE c2 > 5;
+c1 c2 name
+7 7 g
+8 8 h
+SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE 3 < c1 AND c1 < 6;
+c1 c2 name
+4 4 d
+SELECT * FROM t1 FORCE INDEX (kc2) WHERE 3 < c2 AND c2 < 6;
+c1 c2 name
+4 4 d
+DROP TABLE t1;
+CREATE TABLE t1 (
+a int,
+b int,
+PRIMARY KEY (a),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1800;';
+set global rocksdb_debug_ttl_rec_ts = 0;
+INSERT INTO t1 values (1,1);
+INSERT INTO t1 values (2,2);
+INSERT INTO t1 values (7,7);
+INSERT INTO t1 values (10,10);
+INSERT INTO t1 values (11,11);
+INSERT INTO t1 values (12,12);
+set global rocksdb_debug_ttl_rec_ts = 450;
+INSERT INTO t1 values (3,3);
+INSERT INTO t1 values (4,4);
+INSERT INTO t1 values (8,8);
+INSERT INTO t1 values (16,16);
+INSERT INTO t1 values (17,17);
+INSERT INTO t1 values (18,18);
+set global rocksdb_debug_ttl_rec_ts = 900;
+INSERT INTO t1 values (5,5);
+INSERT INTO t1 values (6,6);
+INSERT INTO t1 values (9,9);
+INSERT INTO t1 values (13,13);
+INSERT INTO t1 values (14,14);
+INSERT INTO t1 values (15,15);
+set global rocksdb_debug_ttl_rec_ts = 0;
+# Should see everything
+SELECT * FROM t1;
+a b
+1 1
+2 2
+3 3
+4 4
+5 5
+6 6
+7 7
+8 8
+9 9
+10 10
+11 11
+12 12
+13 13
+14 14
+15 15
+16 16
+17 17
+18 18
+# Should have no records from the first group
+set global rocksdb_debug_ttl_read_filter_ts = -1800;
+SELECT * FROM t1;
+a b
+3 3
+4 4
+5 5
+6 6
+8 8
+9 9
+13 13
+14 14
+15 15
+16 16
+17 17
+18 18
+SELECT * FROM t1 FORCE INDEX (kb) WHERE a > 5 AND a < 15;
+a b
+6 6
+8 8
+9 9
+13 13
+14 14
+# Should only have records from the last group
+set global rocksdb_debug_ttl_read_filter_ts = -1800 - 450;
+SELECT * FROM t1;
+a b
+5 5
+6 6
+9 9
+13 13
+14 14
+15 15
+SELECT * FROM t1 FORCE INDEX (kb) WHERE a < 10;
+a b
+5 5
+6 6
+9 9
+# Should be empty
+set global rocksdb_debug_ttl_read_filter_ts = -1800 - 900;
+SELECT * FROM t1;
+a b
+set global rocksdb_debug_ttl_read_filter_ts = 0;
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_read_filtering_multiple_index.result b/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_read_filtering_multiple_index.result
new file mode 100644
index 00000000000..e4c361576f5
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_read_filtering_multiple_index.result
@@ -0,0 +1,82 @@
+CREATE TABLE t1 (
+a int NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a),
+KEY kb (b) COMMENT 'kb',
+KEY kc (c) COMMENT 'kc'
+) ENGINE=ROCKSDB
+COMMENT='ttl_duration=1';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1, 1);
+INSERT INTO t1 values (2, 2, 2);
+set global rocksdb_debug_ttl_rec_ts = 100;
+INSERT INTO t1 values (3, 3, 3);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='kb';
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b c
+3 3 3
+SELECT * FROM t1 FORCE INDEX (kb);
+a b c
+3 3 3
+SELECT * FROM t1 FORCE INDEX (kc);
+a b c
+3 3 3
+DROP TABLE t1;
+CREATE TABLE t1 (
+a int NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a),
+KEY kb (b) COMMENT 'kb',
+KEY kc (c) COMMENT 'kc'
+) ENGINE=ROCKSDB
+COMMENT='ttl_duration=1';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1, 1);
+INSERT INTO t1 values (2, 2, 2);
+set global rocksdb_debug_ttl_rec_ts = 100;
+INSERT INTO t1 values (3, 3, 3);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b c
+3 3 3
+SELECT * FROM t1 FORCE INDEX (kb);
+a b c
+3 3 3
+SELECT * FROM t1 FORCE INDEX (kc);
+a b c
+3 3 3
+DROP TABLE t1;
+CREATE TABLE t1 (
+a int NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a),
+KEY kb (b) COMMENT 'kb',
+KEY kc (c) COMMENT 'kc'
+) ENGINE=ROCKSDB
+COMMENT='ttl_duration=1';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1, 1);
+INSERT INTO t1 values (2, 2, 2);
+set global rocksdb_debug_ttl_rec_ts = 100;
+INSERT INTO t1 values (3, 3, 3);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_compact_cf='kb';
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b c
+3 3 3
+SELECT * FROM t1 FORCE INDEX (kb);
+a b c
+3 3 3
+SELECT * FROM t1 FORCE INDEX (kc);
+a b c
+3 3 3
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_with_partitions.result b/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_with_partitions.result
new file mode 100644
index 00000000000..442dc1167d9
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_with_partitions.result
@@ -0,0 +1,389 @@
+CREATE TABLE t1 (
+c1 INT,
+c2 INT,
+PRIMARY KEY (`c1`),
+KEY kc2 (`c2`)
+) ENGINE=ROCKSDB
+COMMENT="custom_p0_ttl_duration=1;"
+PARTITION BY LIST(c1) (
+PARTITION custom_p0 VALUES IN (1, 4, 7),
+PARTITION custom_p1 VALUES IN (2, 5, 8),
+PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+set global rocksdb_debug_ttl_rec_ts = -3600;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (2, 2);
+INSERT INTO t1 values (3, 3);
+INSERT INTO t1 values (4, 4);
+INSERT INTO t1 values (5, 5);
+INSERT INTO t1 values (6, 6);
+INSERT INTO t1 values (7, 7);
+INSERT INTO t1 values (8, 8);
+INSERT INTO t1 values (9, 9);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+c1 c2
+1 1
+2 2
+3 3
+4 4
+5 5
+6 6
+7 7
+8 8
+9 9
+SELECT * FROM t1 FORCE INDEX (kc2);
+c1 c2
+1 1
+2 2
+3 3
+4 4
+5 5
+6 6
+7 7
+8 8
+9 9
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+c1 c2
+2 2
+3 3
+5 5
+6 6
+8 8
+9 9
+SELECT * FROM t1 FORCE INDEX (kc2);
+c1 c2
+2 2
+3 3
+5 5
+6 6
+8 8
+9 9
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 INT,
+c2 INT,
+name VARCHAR(25) NOT NULL,
+PRIMARY KEY (`c1`, `c2`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=my_custom_cf;custom_p2_cfname=baz',
+KEY kc2 (`c2`)
+) ENGINE=ROCKSDB
+COMMENT="custom_p0_ttl_duration=1;custom_p1_ttl_duration=7;"
+PARTITION BY LIST(c1) (
+PARTITION custom_p0 VALUES IN (1, 4, 7),
+PARTITION custom_p1 VALUES IN (2, 5, 8),
+PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+set global rocksdb_debug_ttl_rec_ts = -1200;
+INSERT INTO t1 values (1,1,'a');
+INSERT INTO t1 values (4,4,'aaaa');
+INSERT INTO t1 values (7,7,'aaaaaaa');
+set global rocksdb_debug_ttl_rec_ts = 1200;
+INSERT INTO t1 values (2,2,'aa');
+INSERT INTO t1 values (3,3,'aaa');
+INSERT INTO t1 values (5,5,'aaaaa');
+INSERT INTO t1 values (6,6,'aaaaaa');
+INSERT INTO t1 values (8,8,'aaaaaaaa');
+INSERT INTO t1 values (9,9,'aaaaaaaaa');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+c1 c2 name
+1 1 a
+2 2 aa
+3 3 aaa
+4 4 aaaa
+5 5 aaaaa
+6 6 aaaaaa
+7 7 aaaaaaa
+8 8 aaaaaaaa
+9 9 aaaaaaaaa
+SELECT * FROM t1 FORCE INDEX (kc2);
+c1 c2 name
+1 1 a
+2 2 aa
+3 3 aaa
+4 4 aaaa
+5 5 aaaaa
+6 6 aaaaaa
+7 7 aaaaaaa
+8 8 aaaaaaaa
+9 9 aaaaaaaaa
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'foo';
+set @@global.rocksdb_compact_cf = 'my_custom_cf';
+set @@global.rocksdb_compact_cf = 'default';
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+c1 c2 name
+2 2 aa
+3 3 aaa
+5 5 aaaaa
+6 6 aaaaaa
+8 8 aaaaaaaa
+9 9 aaaaaaaaa
+SELECT * FROM t1 FORCE INDEX (kc2);
+c1 c2 name
+2 2 aa
+3 3 aaa
+5 5 aaaaa
+6 6 aaaaaa
+8 8 aaaaaaaa
+9 9 aaaaaaaaa
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set @@global.rocksdb_compact_cf = 'foo';
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+c1 c2 name
+2 2 aa
+3 3 aaa
+5 5 aaaaa
+6 6 aaaaaa
+8 8 aaaaaaaa
+9 9 aaaaaaaaa
+SELECT * FROM t1 FORCE INDEX (kc2);
+c1 c2 name
+2 2 aa
+3 3 aaa
+5 5 aaaaa
+6 6 aaaaaa
+8 8 aaaaaaaa
+9 9 aaaaaaaaa
+set @@global.rocksdb_compact_cf = 'my_custom_cf';
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+c1 c2 name
+3 3 aaa
+6 6 aaaaaa
+9 9 aaaaaaaaa
+SELECT * FROM t1 FORCE INDEX (kc2);
+c1 c2 name
+3 3 aaa
+6 6 aaaaaa
+9 9 aaaaaaaaa
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 INT,
+c2 INT,
+name VARCHAR(25) NOT NULL,
+event DATE,
+PRIMARY KEY (`c1`, `c2`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz;',
+KEY kc2 (c2)
+) ENGINE=ROCKSDB
+COMMENT="custom_p0_ttl_duration=9999;custom_p2_ttl_duration=5;"
+PARTITION BY LIST(c1) (
+PARTITION custom_p0 VALUES IN (1, 2, 3),
+PARTITION custom_p1 VALUES IN (4, 5, 6),
+PARTITION custom_p2 VALUES IN (7, 8, 9)
+);
+INSERT INTO t1 VALUES (1, 1, "one", null);
+INSERT INTO t1 VALUES (2, 2, "two", null);
+INSERT INTO t1 VALUES (3, 3, "three", null);
+INSERT INTO t1 VALUES (4, 4, "four", null);
+INSERT INTO t1 VALUES (5, 5, "five", null);
+INSERT INTO t1 VALUES (6, 6, "six", null);
+INSERT INTO t1 VALUES (7, 7, "seven", null);
+INSERT INTO t1 VALUES (8, 8, "eight", null);
+INSERT INTO t1 VALUES (9, 9, "nine", null);
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+c1 c2 name event
+1 1 one NULL
+2 2 two NULL
+3 3 three NULL
+4 4 four NULL
+5 5 five NULL
+6 6 six NULL
+7 7 seven NULL
+8 8 eight NULL
+9 9 nine NULL
+SELECT * FROM t1 FORCE INDEX (kc2);
+c1 c2 name event
+1 1 one NULL
+2 2 two NULL
+3 3 three NULL
+4 4 four NULL
+5 5 five NULL
+6 6 six NULL
+7 7 seven NULL
+8 8 eight NULL
+9 9 nine NULL
+set global rocksdb_debug_ttl_rec_ts = 600;
+ALTER TABLE t1 DROP PRIMARY KEY, ADD PRIMARY KEY(`c2`,`c1`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz;';
+set global rocksdb_debug_ttl_rec_ts = 0;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `c1` int(11) NOT NULL DEFAULT '0',
+ `c2` int(11) NOT NULL DEFAULT '0',
+ `name` varchar(25) NOT NULL,
+ `event` date DEFAULT NULL,
+ PRIMARY KEY (`c2`,`c1`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz;',
+ KEY `kc2` (`c2`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COMMENT='custom_p0_ttl_duration=9999;custom_p2_ttl_duration=5;'
+/*!50100 PARTITION BY LIST (c1)
+(PARTITION custom_p0 VALUES IN (1,2,3) ENGINE = ROCKSDB,
+ PARTITION custom_p1 VALUES IN (4,5,6) ENGINE = ROCKSDB,
+ PARTITION custom_p2 VALUES IN (7,8,9) ENGINE = ROCKSDB) */
+set global rocksdb_debug_ttl_snapshot_ts = 100;
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'baz';
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+c1 c2 name event
+1 1 one NULL
+2 2 two NULL
+3 3 three NULL
+4 4 four NULL
+5 5 five NULL
+6 6 six NULL
+7 7 seven NULL
+8 8 eight NULL
+9 9 nine NULL
+SELECT * FROM t1 FORCE INDEX (kc2);
+c1 c2 name event
+1 1 one NULL
+2 2 two NULL
+3 3 three NULL
+4 4 four NULL
+5 5 five NULL
+6 6 six NULL
+7 7 seven NULL
+8 8 eight NULL
+9 9 nine NULL
+set global rocksdb_debug_ttl_snapshot_ts = 1200;
+set @@global.rocksdb_compact_cf = 'foo';
+set @@global.rocksdb_compact_cf = 'baz';
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+c1 c2 name event
+1 1 one NULL
+2 2 two NULL
+3 3 three NULL
+4 4 four NULL
+5 5 five NULL
+6 6 six NULL
+SELECT * FROM t1 FORCE INDEX (kc2);
+c1 c2 name event
+1 1 one NULL
+2 2 two NULL
+3 3 three NULL
+4 4 four NULL
+5 5 five NULL
+6 6 six NULL
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 BIGINT,
+c2 BIGINT UNSIGNED NOT NULL,
+name VARCHAR(25) NOT NULL,
+event DATE,
+PRIMARY KEY (`c1`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz;',
+KEY kc2 (`c2`)
+) ENGINE=ROCKSDB
+COMMENT="ttl_duration=1;custom_p1_ttl_duration=100;custom_p1_ttl_col=c2;custom_p2_ttl_duration=5000;"
+PARTITION BY LIST(c1) (
+PARTITION custom_p0 VALUES IN (1, 2, 3),
+PARTITION custom_p1 VALUES IN (4, 5, 6),
+PARTITION custom_p2 VALUES IN (7, 8, 9)
+);
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 VALUES (1, UNIX_TIMESTAMP(), "one", null);
+INSERT INTO t1 VALUES (2, UNIX_TIMESTAMP(), "two", null);
+INSERT INTO t1 VALUES (3, UNIX_TIMESTAMP(), "three", null);
+set global rocksdb_debug_ttl_rec_ts = 0;
+INSERT INTO t1 VALUES (4, UNIX_TIMESTAMP(), "four", null);
+INSERT INTO t1 VALUES (5, UNIX_TIMESTAMP(), "five", null);
+INSERT INTO t1 VALUES (6, UNIX_TIMESTAMP(), "six", null);
+INSERT INTO t1 VALUES (7, UNIX_TIMESTAMP(), "seven", null);
+INSERT INTO t1 VALUES (8, UNIX_TIMESTAMP(), "eight", null);
+INSERT INTO t1 VALUES (9, UNIX_TIMESTAMP(), "nine", null);
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'foo';
+set @@global.rocksdb_compact_cf = 'baz';
+set @@global.rocksdb_compact_cf = 'bar';
+set @@global.rocksdb_compact_cf = 'default';
+SELECT c1 FROM t1 FORCE INDEX (PRIMARY);
+c1
+4
+5
+6
+7
+8
+9
+SELECT c1 FROM t1 FORCE INDEX (kc2);
+c1
+4
+5
+6
+7
+8
+9
+set global rocksdb_debug_ttl_snapshot_ts = 600;
+set @@global.rocksdb_compact_cf = 'bar';
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT c1 FROM t1 FORCE INDEX (PRIMARY);
+c1
+7
+8
+9
+SELECT c1 FROM t1 FORCE INDEX (kc2);
+c1
+7
+8
+9
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 INT,
+c2 INT,
+PRIMARY KEY (`c1`) COMMENT 'custom_p0_cfname=foo;'
+) ENGINE=ROCKSDB
+COMMENT="custom_p0_ttl_duration=1;"
+PARTITION BY LIST(c1) (
+PARTITION custom_p0 VALUES IN (1, 4, 7),
+PARTITION custom_p1 VALUES IN (2, 5, 8),
+PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+set global rocksdb_debug_ttl_rec_ts = -3600;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (4, 4);
+INSERT INTO t1 values (7, 7);
+set global rocksdb_debug_ttl_rec_ts = 0;
+INSERT INTO t1 values (2, 2);
+INSERT INTO t1 values (3, 3);
+INSERT INTO t1 values (5, 5);
+INSERT INTO t1 values (6, 6);
+INSERT INTO t1 values (8, 8);
+INSERT INTO t1 values (9, 9);
+SELECT * FROM t1;
+c1 c2
+1 1
+2 2
+3 3
+4 4
+5 5
+6 6
+7 7
+8 8
+9 9
+CREATE INDEX kc2 on t1 (c2);
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='foo';
+set global rocksdb_compact_cf='default';
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+c1 c2
+2 2
+3 3
+5 5
+6 6
+8 8
+9 9
+SELECT * FROM t1 FORCE INDEX (kc2);
+c1 c2
+2 2
+3 3
+5 5
+6 6
+8 8
+9 9
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/write_sync.result b/storage/rocksdb/mysql-test/rocksdb/r/write_sync.result
index 9eed611a970..ee23446eec0 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/write_sync.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/write_sync.result
@@ -2,35 +2,26 @@ SET GLOBAL rocksdb_write_disable_wal=false;
SET GLOBAL rocksdb_write_ignore_missing_column_families=true;
create table aaa (id int primary key, i int) engine rocksdb;
set @save_rocksdb_flush_log_at_trx_commit=@@global.rocksdb_flush_log_at_trx_commit;
-SET GLOBAL rocksdb_flush_log_at_trx_commit=0;
+SET GLOBAL rocksdb_flush_log_at_trx_commit=1;
select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced';
insert aaa(id, i) values(1,1);
select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
variable_value-@a
-0
-insert aaa(id, i) values(2,1);
-select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
-variable_value-@a
-0
-insert aaa(id, i) values(3,1);
-select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
-variable_value-@a
-0
-SET GLOBAL rocksdb_flush_log_at_trx_commit=1;
-insert aaa(id, i) values(4,1);
-select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
-variable_value-@a
1
-insert aaa(id, i) values(5,1);
+insert aaa(id, i) values(2,1);
select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
variable_value-@a
2
-insert aaa(id, i) values(6,1);
+insert aaa(id, i) values(3,1);
select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
variable_value-@a
3
+SET GLOBAL rocksdb_flush_log_at_trx_commit=0;
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced';
+insert aaa(id, i) values(4,1);
SET GLOBAL rocksdb_flush_log_at_trx_commit=2;
-insert aaa(id, i) values(7,1);
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced';
+insert aaa(id, i) values(5,1);
truncate table aaa;
drop table aaa;
set @@global.rocksdb_flush_log_at_trx_commit=@save_rocksdb_flush_log_at_trx_commit;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/2pc_group_commit.test b/storage/rocksdb/mysql-test/rocksdb/t/2pc_group_commit.test
index 009fd7beaf2..923c9db17f2 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/2pc_group_commit.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/2pc_group_commit.test
@@ -13,7 +13,7 @@ USE mysqlslap;
CREATE TABLE t1(id BIGINT AUTO_INCREMENT, value BIGINT, PRIMARY KEY(id)) ENGINE=rocksdb;
--echo # 2PC enabled, MyRocks durability enabled
-SET GLOBAL rocksdb_enable_2pc=0;
+SET GLOBAL rocksdb_enable_2pc=1;
SET GLOBAL rocksdb_flush_log_at_trx_commit=1;
--echo ## 2PC + durability + single thread
@@ -28,7 +28,7 @@ select case when variable_value-@c > 0 and variable_value-@c < 10000 then 'true'
--echo # 2PC enabled, MyRocks durability disabled
-SET GLOBAL rocksdb_enable_2pc=0;
+SET GLOBAL rocksdb_enable_2pc=1;
SET GLOBAL rocksdb_flush_log_at_trx_commit=0;
select variable_value into @c from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
@@ -41,7 +41,7 @@ select case when variable_value-@c = 0 then 'true' else 'false' end from informa
--echo # 2PC disabled, MyRocks durability enabled
-SET GLOBAL rocksdb_enable_2pc=1;
+SET GLOBAL rocksdb_enable_2pc=0;
SET GLOBAL rocksdb_flush_log_at_trx_commit=1;
select variable_value into @c from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace.test b/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace.test
index 5216e84646a..f700afda529 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace.test
@@ -186,8 +186,8 @@ connect (con1,localhost,root,,);
--echo # Switch to connection con1
connection con1;
-show global variables like 'rocksdb_bulk_load';
-show session variables like 'rocksdb_bulk_load';
+show global variables like 'rocksdb_bulk_load%';
+show session variables like 'rocksdb_bulk_load%';
CREATE TABLE t1 (i INT, j INT, PRIMARY KEY (i)) ENGINE = ROCKSDB;
@@ -255,9 +255,9 @@ SET @prior_rocksdb_merge_buf_size = @@rocksdb_merge_buf_size;
SET global rocksdb_strict_collation_check = off;
SET session rocksdb_merge_combine_read_size = 566;
-SET session rocksdb_merge_buf_size = 336;
+SET session rocksdb_merge_buf_size = 340;
-show variables like '%rocksdb_bulk_load%';
+show variables like 'rocksdb_bulk_load%';
CREATE TABLE t1 (a VARCHAR(80)) ENGINE=RocksDB;
INSERT INTO t1 (a) VALUES (REPEAT("a", 80));
INSERT INTO t1 (a) VALUES (REPEAT("a", 80));
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test
index 284e29d1f5a..b80361f325c 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test
@@ -1,5 +1,6 @@
--source include/have_rocksdb.inc
+### Bulk load ###
CREATE TABLE t1(pk INT, PRIMARY KEY(pk));
# Make sure we get an error with out of order keys during bulk load
@@ -36,4 +37,29 @@ SET rocksdb_bulk_load=0;
--enable_reconnect
--source include/wait_until_connected_again.inc
+TRUNCATE TABLE t1;
+
+### Bulk load with unsorted PKs ###
+SET rocksdb_bulk_load_allow_unsorted=1;
+
+# We should not get an error with out of order PKs
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES(100);
+INSERT INTO t1 VALUES(101);
+INSERT INTO t1 VALUES(99);
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1;
+TRUNCATE TABLE t1;
+
+# We should get an error with duplicate PKs in the same bulk load
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES(201);
+INSERT INTO t1 VALUES(200);
+INSERT INTO t1 VALUES(202);
+--error ER_DUP_ENTRY
+INSERT INTO t1 VALUES(201);
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1;
+
+SET rocksdb_bulk_load_allow_unsorted=DEFAULT;
DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted.test
new file mode 100644
index 00000000000..9cd3aaeafac
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted.test
@@ -0,0 +1,136 @@
+--source include/have_rocksdb.inc
+
+--let pk_cf=cf1
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+SET rocksdb_bulk_load_size=3;
+SET rocksdb_bulk_load_allow_unsorted=1;
+
+### Test individual INSERTs ###
+
+# A table with only a PK won't have rows until the bulk load is finished
+eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf");
+SET rocksdb_bulk_load=1;
+--disable_query_log
+let $sign = 1;
+let $max = 5;
+let $i = 1;
+while ($i <= $max) {
+ let $a = 1 + $sign * $i;
+ let $b = 1 - $sign * $i;
+ let $sign = -$sign;
+ let $insert = INSERT INTO t1 VALUES ($a, $b);
+ eval $insert;
+ inc $i;
+}
+--enable_query_log
+SELECT * FROM t1;
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1;
+DROP TABLE t1;
+
+# A table with a PK and a SK shows rows immediately
+eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf", KEY(b));
+SET rocksdb_bulk_load=1;
+--disable_query_log
+let $sign = 1;
+let $max = 5;
+let $i = 1;
+while ($i <= $max) {
+ let $a = 1 + $sign * $i;
+ let $b = 1 - $sign * $i;
+ let $sign = -$sign;
+ let $insert = INSERT INTO t1 VALUES ($a, $b);
+ eval $insert;
+ inc $i;
+}
+--enable_query_log
+
+SELECT * FROM t1;
+SET rocksdb_bulk_load=0;
+DROP TABLE t1;
+
+# Inserting into another table finishes bulk load to the previous table
+eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf");
+eval CREATE TABLE t2(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf");
+
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES (1,1);
+INSERT INTO t2 VALUES (1,1);
+SELECT * FROM t1;
+INSERT INTO t1 VALUES (2,2);
+SELECT * FROM t2;
+SELECT * FROM t1;
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1;
+DROP TABLE t1, t2;
+
+### Test bulk load from a file ###
+eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf");
+eval CREATE TABLE t2(a INT, b INT, PRIMARY KEY(b) COMMENT "$pk_cf");
+eval CREATE TABLE t3(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf")
+ PARTITION BY KEY() PARTITIONS 4;
+
+--let $file = `SELECT CONCAT(@@datadir, "test_loadfile.txt")`
+# Create a text file with data to import into the table.
+# PK and SK are not in any order
+--let ROCKSDB_INFILE = $file
+perl;
+my $fn = $ENV{'ROCKSDB_INFILE'};
+open(my $fh, '>', $fn) || die "perl open($fn): $!";
+my $max = 5000000;
+my $sign = 1;
+for (my $ii = 0; $ii < $max; $ii++)
+{
+ my $a = 1 + $sign * $ii;
+ my $b = 1 - $sign * $ii;
+ print $fh "$a\t$b\n";
+}
+close($fh);
+EOF
+--file_exists $file
+
+# Make sure a snapshot held by another user doesn't block the bulk load
+connect (other,localhost,root,,);
+set session transaction isolation level repeatable read;
+select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+start transaction with consistent snapshot;
+select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+
+connection default;
+set rocksdb_bulk_load=1;
+set rocksdb_bulk_load_size=100000;
+--disable_query_log
+--echo LOAD DATA INFILE <input_file> INTO TABLE t1;
+eval LOAD DATA INFILE '$file' INTO TABLE t1;
+--echo LOAD DATA INFILE <input_file> INTO TABLE t2;
+eval LOAD DATA INFILE '$file' INTO TABLE t2;
+--echo LOAD DATA INFILE <input_file> INTO TABLE t3;
+eval LOAD DATA INFILE '$file' INTO TABLE t3;
+--enable_query_log
+set rocksdb_bulk_load=0;
+
+--remove_file $file
+
+# Make sure row count index stats are correct
+--replace_column 6 # 7 # 8 # 9 #
+SHOW TABLE STATUS WHERE name LIKE 't%';
+
+ANALYZE TABLE t1, t2, t3;
+
+--replace_column 6 # 7 # 8 # 9 #
+SHOW TABLE STATUS WHERE name LIKE 't%';
+
+# Make sure all the data is there.
+select count(a) from t1;
+select count(b) from t1;
+select count(a) from t2;
+select count(b) from t2;
+select count(a) from t3;
+select count(b) from t3;
+
+DROP TABLE t1, t2, t3;
+SET rocksdb_bulk_load_allow_unsorted=0;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted_errors.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted_errors.test
new file mode 100644
index 00000000000..067a91d5d8b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted_errors.test
@@ -0,0 +1,8 @@
+--source include/have_rocksdb.inc
+--source include/have_nodebug.inc
+
+# Cannot change unsorted input preference during bulk load
+SET rocksdb_bulk_load=1;
+--error ER_ERROR_WHEN_EXECUTING_COMMAND
+SET rocksdb_bulk_load_allow_unsorted=1;
+SET rocksdb_bulk_load=0;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bytes_written.test b/storage/rocksdb/mysql-test/rocksdb/t/bytes_written.test
new file mode 100644
index 00000000000..f0361707355
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bytes_written.test
@@ -0,0 +1,22 @@
+--source include/have_rocksdb.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS stats_test_table;
+--enable_warnings
+
+# Create the table
+CREATE TABLE stats_test_table (a INT, b INT, PRIMARY KEY (a)) ENGINE=ROCKSDB;
+
+# Ensure appropriate perf_context_level is set
+SET GLOBAL rocksdb_perf_context_level=3;
+
+# Insert some values
+INSERT INTO stats_test_table VALUES (7,1);
+INSERT INTO stats_test_table VALUES (2,2);
+
+# Verify the bytes written are updated in the table stats
+SELECT io_write_bytes > 0 FROM INFORMATION_SCHEMA.TABLE_STATISTICS WHERE TABLE_NAME = "stats_test_table";
+
+# Cleanup
+DROP TABLE stats_test_table;
+SET GLOBAL rocksdb_perf_context_level=DEFAULT;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/covered_unpack_info_format.test b/storage/rocksdb/mysql-test/rocksdb/t/covered_unpack_info_format.test
new file mode 100644
index 00000000000..344b10bb042
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/covered_unpack_info_format.test
@@ -0,0 +1,79 @@
+# Disable until covered unpack_info format enabled in prod
+--source include/have_debug.inc
+--source include/have_nodebug.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+#
+# Normal case
+#
+
+CREATE TABLE t1 (
+ id INT,
+ fake_id INT,
+ bigfield VARCHAR(4096),
+ PRIMARY KEY (id),
+ KEY bf (bigfield(32)),
+ KEY fid (fake_id, bigfield(32))
+) ENGINE=rocksdb;
+
+INSERT INTO t1 VALUES (1, 1001, REPEAT('a', 1)),
+ (8, 1008, REPEAT('b', 8)),
+ (24, 1024, REPEAT('c', 24)),
+ (31, 1031, REPEAT('d', 31)),
+ (32, 1032, REPEAT('x', 32)),
+ (33, 1033, REPEAT('y', 33)),
+ (128, 1128, REPEAT('z', 128));
+
+SELECT * FROM t1;
+
+--let $prefix_index_check_title= Eligible for optimization, access via fake_id only
+--let $prefix_index_check_read_avoided_delta= 2
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(fid) WHERE fake_id = 1031
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Not eligible for optimization, access via fake_id of big row.
+--let $prefix_index_check_read_avoided_delta= 0
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(fid) WHERE fake_id = 1033
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+DROP TABLE t1;
+
+#
+# Create same table with older format to test compatibility
+#
+
+set session debug= '+d,MYROCKS_NO_COVERED_BITMAP_FORMAT';
+CREATE TABLE t1 (
+ id INT,
+ fake_id INT,
+ bigfield VARCHAR(4096),
+ PRIMARY KEY (id),
+ KEY bf (bigfield(32)),
+ KEY fid (fake_id, bigfield(32))
+) ENGINE=rocksdb;
+set session debug= '-d,MYROCKS_NO_COVERED_BITMAP_FORMAT';
+
+INSERT INTO t1 VALUES (1, 1001, REPEAT('a', 1)),
+ (8, 1008, REPEAT('b', 8)),
+ (24, 1024, REPEAT('c', 24)),
+ (31, 1031, REPEAT('d', 31)),
+ (32, 1032, REPEAT('x', 32)),
+ (33, 1033, REPEAT('y', 33)),
+ (128, 1128, REPEAT('z', 128));
+
+SELECT * FROM t1;
+
+--let $prefix_index_check_title= No longer eligible for optimization since no covered bitmap was stored.
+--let $prefix_index_check_read_avoided_delta= 0
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(fid) WHERE fake_id = 1031
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Not eligible for optimization.
+--let $prefix_index_check_read_avoided_delta= 0
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(fid) WHERE fake_id = 1033
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ddl_high_priority.test b/storage/rocksdb/mysql-test/rocksdb/t/ddl_high_priority.test
new file mode 100644
index 00000000000..01bb5cfa4ed
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ddl_high_priority.test
@@ -0,0 +1,18 @@
+# Test high priority DDL
+# There are two modes of high priority DDL:
+# 1. Through the system variable high_priority_ddl
+# 2. Through syntax: CREATE/ALTER/DROP/OPTIMIZE HIGH_PRIORITY ...
+
+--source include/have_rocksdb.inc
+
+--echo ##
+--echo ## Using the system variable high_priority_ddl"
+--echo ##
+--let $use_sys_var = 1;
+--source include/ddl_high_priority.inc
+
+--echo ##
+--echo ## Using HIGH_PRIORITY syntax
+--echo ##
+--let $use_sys_var = 0;
+--source include/ddl_high_priority.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/deadlock_tracking.test b/storage/rocksdb/mysql-test/rocksdb/t/deadlock_tracking.test
new file mode 100644
index 00000000000..d2abcb3b63b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/deadlock_tracking.test
@@ -0,0 +1,153 @@
+set @prior_lock_wait_timeout = @@rocksdb_lock_wait_timeout;
+set @prior_deadlock_detect = @@rocksdb_deadlock_detect;
+set @prior_max_latest_deadlocks = @@rocksdb_max_latest_deadlocks;
+set global rocksdb_deadlock_detect = on;
+set global rocksdb_lock_wait_timeout = 10000;
+--echo # Clears deadlock buffer of any prior deadlocks.
+set global rocksdb_max_latest_deadlocks = 0;
+set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks;
+let $engine = rocksdb;
+
+--source include/count_sessions.inc
+connect (con1,localhost,root,,);
+let $con1= `SELECT CONNECTION_ID()`;
+
+connect (con2,localhost,root,,);
+let $con2= `SELECT CONNECTION_ID()`;
+
+connect (con3,localhost,root,,);
+let $con3= `SELECT CONNECTION_ID()`;
+
+connection default;
+eval create table t (i int primary key) engine=$engine;
+insert into t values (1), (2), (3);
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/
+show engine rocksdb transaction status;
+
+echo Deadlock #1;
+--source include/simple_deadlock.inc
+connection default;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/
+show engine rocksdb transaction status;
+
+echo Deadlock #2;
+--source include/simple_deadlock.inc
+connection default;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/
+show engine rocksdb transaction status;
+set global rocksdb_max_latest_deadlocks = 10;
+
+echo Deadlock #3;
+--source include/simple_deadlock.inc
+connection default;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/
+show engine rocksdb transaction status;
+set global rocksdb_max_latest_deadlocks = 1;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/
+show engine rocksdb transaction status;
+
+connection con3;
+set rocksdb_deadlock_detect_depth = 2;
+
+echo Deadlock #4;
+connection con1;
+begin;
+select * from t where i=1 for update;
+
+connection con2;
+begin;
+select * from t where i=2 for update;
+
+connection con3;
+begin;
+select * from t where i=3 for update;
+
+connection con1;
+send select * from t where i=2 for update;
+
+connection con2;
+let $wait_condition = select count(*) = 1 from information_schema.rocksdb_trx
+where thread_id = $con1 and waiting_key != "";
+--source include/wait_condition.inc
+
+send select * from t where i=3 for update;
+
+connection con3;
+let $wait_condition = select count(*) = 1 from information_schema.rocksdb_trx
+where thread_id = $con2 and waiting_key != "";
+--source include/wait_condition.inc
+
+--error ER_LOCK_DEADLOCK
+select * from t where i=1 for update;
+rollback;
+
+connection con2;
+reap;
+rollback;
+
+connection con1;
+reap;
+rollback;
+
+connection default;
+set global rocksdb_max_latest_deadlocks = 5;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/
+show engine rocksdb transaction status;
+
+echo Deadlock #5;
+connection con1;
+begin;
+select * from t where i=1 for update;
+
+connection con2;
+begin;
+select * from t where i=2 for update;
+
+connection con3;
+begin;
+select * from t where i=3 lock in share mode;
+
+connection con1;
+select * from t where i=100 for update;
+select * from t where i=101 for update;
+send select * from t where i=2 for update;
+
+connection con2;
+let $wait_condition = select count(*) = 1 from information_schema.rocksdb_trx
+where thread_id = $con1 and waiting_key != "";
+--source include/wait_condition.inc
+
+select * from t where i=3 lock in share mode;
+select * from t where i=200 for update;
+select * from t where i=201 for update;
+
+--error ER_LOCK_DEADLOCK
+select * from t where i=1 lock in share mode;
+rollback;
+
+connection con1;
+reap;
+rollback;
+
+connection con3;
+rollback;
+
+connection default;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/
+show engine rocksdb transaction status;
+
+disconnect con1;
+disconnect con2;
+disconnect con3;
+
+set global rocksdb_lock_wait_timeout = @prior_lock_wait_timeout;
+set global rocksdb_deadlock_detect = @prior_deadlock_detect;
+drop table t;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/ /INDEX_ID: [0-9a-f]*/IDX_ID/
+show engine rocksdb transaction status;
+set global rocksdb_max_latest_deadlocks = 0;
+--echo # Clears deadlock buffer of any existent deadlocks.
+set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/ /INDEX_ID: [0-9a-f]*/IDX_ID/
+show engine rocksdb transaction status;
+--source include/wait_until_count_sessions.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/drop_table-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/drop_table-master.opt
index 4afcf2caa5d..f40e9db55b2 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/drop_table-master.opt
+++ b/storage/rocksdb/mysql-test/rocksdb/t/drop_table-master.opt
@@ -1,4 +1,3 @@
--rocksdb_max_subcompactions=1
---rocksdb_info_log_level=info_level
--rocksdb_default_cf_options=write_buffer_size=512k;target_file_size_base=512k;level0_file_num_compaction_trigger=2;level0_slowdown_writes_trigger=-1;level0_stop_writes_trigger=1000;max_bytes_for_level_base=1m
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/drop_table.test b/storage/rocksdb/mysql-test/rocksdb/t/drop_table.test
index 7867caf1c3c..5e98a82962b 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/drop_table.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/drop_table.test
@@ -8,12 +8,15 @@ DROP TABLE IF EXISTS t4;
DROP TABLE IF EXISTS t5;
--enable_warnings
+call mtr.add_suppression("Column family 'cf1' not found");
+call mtr.add_suppression("Column family 'rev:cf2' not found");
+call mtr.add_suppression("LibRocksDB");
+
# Start from clean slate
set global rocksdb_compact_cf = 'cf1';
set global rocksdb_compact_cf = 'rev:cf2';
set global rocksdb_signal_drop_index_thread = 1;
--source include/restart_mysqld.inc
---exec truncate --size=0 $MYSQLTEST_VARDIR/log/mysqld.1.err
CREATE TABLE t1 (
a int not null,
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/drop_table2.test b/storage/rocksdb/mysql-test/rocksdb/t/drop_table2.test
index ac6b17461e2..b2152303ead 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/drop_table2.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/drop_table2.test
@@ -8,12 +8,14 @@ DROP TABLE IF EXISTS t4;
DROP TABLE IF EXISTS t5;
--enable_warnings
+call mtr.add_suppression("Column family 'cf1' not found");
+call mtr.add_suppression("Column family 'rev:cf2' not found");
+
# Start from clean slate
set global rocksdb_compact_cf = 'cf1';
set global rocksdb_compact_cf = 'rev:cf2';
set global rocksdb_signal_drop_index_thread = 1;
--source include/restart_mysqld.inc
---exec truncate --size=0 $MYSQLTEST_VARDIR/log/mysqld.1.err
CREATE TABLE t1 (
a int not null,
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/drop_table3.inc b/storage/rocksdb/mysql-test/rocksdb/t/drop_table3.inc
index 4d23f7a1c5f..6070b7ba414 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/drop_table3.inc
+++ b/storage/rocksdb/mysql-test/rocksdb/t/drop_table3.inc
@@ -4,12 +4,14 @@
DROP TABLE IF EXISTS t1;
--enable_warnings
+call mtr.add_suppression("Column family 'cf1' not found");
+call mtr.add_suppression("Column family 'rev:cf2' not found");
+
# Start from clean slate
set global rocksdb_compact_cf = 'cf1';
set global rocksdb_compact_cf = 'rev:cf2';
set global rocksdb_signal_drop_index_thread = 1;
--source include/restart_mysqld.inc
---exec truncate --size=0 $MYSQLTEST_VARDIR/log/mysqld.1.err
CREATE TABLE t1 (
a int not null,
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/dup_key_update.test b/storage/rocksdb/mysql-test/rocksdb/t/dup_key_update.test
index ebe18ab2e95..a163e64fc53 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/dup_key_update.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/dup_key_update.test
@@ -22,17 +22,21 @@ CREATE TABLE t2 (id1 INT, id2 INT, id3 INT,
DROP TABLE t1;
DROP TABLE t2;
+set global rocksdb_large_prefix=1;
CREATE TABLE t1 (id1 varchar(128) CHARACTER SET latin1 COLLATE latin1_bin,
id2 varchar(256) CHARACTER SET utf8 COLLATE utf8_bin,
id3 varchar(200) CHARACTER SET latin1 COLLATE latin1_swedish_ci,
PRIMARY KEY (id1, id2, id3),
UNIQUE KEY (id3, id1)) ENGINE=ROCKSDB;
+set global rocksdb_large_prefix=DEFAULT;
+set global rocksdb_large_prefix=1;
CREATE TABLE t2 (id1 varchar(128) CHARACTER SET latin1 COLLATE latin1_bin,
id2 varchar(256) CHARACTER SET utf8 COLLATE utf8_bin,
id3 varchar(200) CHARACTER SET latin1 COLLATE latin1_swedish_ci,
PRIMARY KEY (id1, id2, id3),
UNIQUE KEY (id3, id1) COMMENT 'rev:cf') ENGINE=ROCKSDB;
+set global rocksdb_large_prefix=DEFAULT;
--source suite/rocksdb/include/dup_key_update.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/fast_prefix_index_fetch.test b/storage/rocksdb/mysql-test/rocksdb/t/fast_prefix_index_fetch.test
new file mode 100644
index 00000000000..15f6c36f52d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/fast_prefix_index_fetch.test
@@ -0,0 +1,120 @@
+# Disable until covered unpack_info format enabled in prod
+--source include/have_debug.inc
+--source include/have_nodebug.inc
+
+--source include/have_rocksdb.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (
+ id INT,
+ fake_id INT,
+ bigfield VARCHAR(4096),
+ PRIMARY KEY (id),
+ KEY bf (bigfield(32)),
+ KEY fid (fake_id, bigfield(32))
+) ENGINE=rocksdb;
+
+INSERT INTO t1 VALUES (1, 1001, REPEAT('a', 1)),
+ (8, 1008, REPEAT('b', 8)),
+ (24, 1024, REPEAT('c', 24)),
+ (31, 1031, REPEAT('d', 31)),
+ (32, 1032, REPEAT('x', 32)),
+ (33, 1033, REPEAT('y', 33)),
+ (128, 1128, REPEAT('z', 128));
+
+SELECT * FROM t1;
+
+--disable_query_log
+
+--let $prefix_index_check_title= Baseline sanity check
+--let $prefix_index_check_read_avoided_delta= 0
+--let $prefix_index_check_query= SELECT "no-op query"
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Eligible for optimization.
+--let $prefix_index_check_read_avoided_delta= 2
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(bf) WHERE bigfield = REPEAT('d', 31)
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Eligible for optimization, access via fake_id only
+--let $prefix_index_check_read_avoided_delta= 2
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(fid) WHERE fake_id = 1031
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Not eligible for optimization, access via fake_id of big row.
+--let $prefix_index_check_read_avoided_delta= 0
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(fid) WHERE fake_id = 1033
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+# The secondary_index_read call isn't covered because the next record in the
+# index has a bigfield value of length 33, so only one of two lookups are
+# covered here.
+--let $prefix_index_check_title= Eligible for optimization.
+--let $prefix_index_check_read_avoided_delta= 1
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(bf) WHERE bigfield = REPEAT('x', 32)
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Not eligible for optimization.
+--let $prefix_index_check_read_avoided_delta= 0
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(bf) WHERE bigfield = REPEAT('y', 33)
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Eligible for optimization.
+--let $prefix_index_check_read_avoided_delta= 2
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(bf) WHERE bigfield = REPEAT('b', 8)
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Eligible for optimization.
+--let $prefix_index_check_read_avoided_delta= 2
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(bf) WHERE bigfield = REPEAT('c', 24)
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Not eligible for optimization.
+--let $prefix_index_check_read_avoided_delta= 0
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(bf) WHERE bigfield = REPEAT('z', 128)
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+DROP TABLE t1;
+
+--echo #
+--echo # Test that multi-byte charsets are handled correctly
+--echo #
+
+SET NAMES utf8;
+
+CREATE TABLE t1 (
+ a INT PRIMARY KEY,
+ b VARCHAR(30) CHARACTER SET utf8 collate utf8_bin,
+ KEY k (b(2))
+) ENGINE=rocksdb;
+
+INSERT INTO t1 VALUES
+ (1, 'a'),
+ (2, 'cc'),
+ (3, 'ŽŽ'),
+ (4, 'žžžž');
+
+--let $prefix_index_check_title= Charset record obviously shorter than the prefix
+--let $prefix_index_check_query= SELECT * FROM t1 FORCE INDEX(k) WHERE b = "a"
+--let $prefix_index_check_read_avoided_delta= 2
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Charset record shorter than prefix
+--let $prefix_index_check_query= SELECT * FROM t1 FORCE INDEX(k) WHERE b = "cc"
+--let $prefix_index_check_read_avoided_delta= 2
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Charset record with glyphs shorter than prefix
+--let $prefix_index_check_query= SELECT * FROM t1 FORCE INDEX(k) WHERE b = "ŽŽ"
+--let $prefix_index_check_read_avoided_delta= 1
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Charset record longer than prefix
+--let $prefix_index_check_query= SELECT * FROM t1 FORCE INDEX(k) WHERE b = "žžžž"
+--let $prefix_index_check_read_avoided_delta= 0
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/index.inc b/storage/rocksdb/mysql-test/rocksdb/t/index.inc
index 6b4e4ff233b..8b000a255b3 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/index.inc
+++ b/storage/rocksdb/mysql-test/rocksdb/t/index.inc
@@ -119,3 +119,37 @@ DROP TABLE t1;
--enable_parsing
+#
+# Test index prefix length limits.
+#
+set global rocksdb_large_prefix=0;
+
+CREATE TABLE t1 (
+ a BLOB(1024),
+ KEY (a(767))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+
+# Should display warning
+CREATE TABLE t1 (
+ a BLOB(1024),
+ KEY (a(768))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+
+set global rocksdb_large_prefix=1;
+
+CREATE TABLE t1 (
+ a BLOB(4096),
+ KEY (a(3072))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+
+# Should display warning
+CREATE TABLE t1 (
+ a BLOB(4096),
+ KEY (a(3073))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+
+set global rocksdb_large_prefix=DEFAULT;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/index_primary.test b/storage/rocksdb/mysql-test/rocksdb/t/index_primary.test
index 3abd2dd05fe..5dcfbaa8d3b 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/index_primary.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/index_primary.test
@@ -62,3 +62,35 @@ ALTER TABLE t1 ADD CONSTRAINT PRIMARY KEY pk (a);
SHOW KEYS IN t1;
DROP TABLE t1;
+#
+# Test index prefix length limits.
+#
+set global rocksdb_large_prefix=0;
+
+CREATE TABLE t1 (
+ a BLOB(1024),
+ PRIMARY KEY (a(767))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+
+--error ER_TOO_LONG_KEY
+CREATE TABLE t1 (
+ a BLOB(1024),
+ PRIMARY KEY (a(768))
+) ENGINE=rocksdb;
+
+set global rocksdb_large_prefix=1;
+
+CREATE TABLE t1 (
+ a BLOB(4096),
+ PRIMARY KEY (a(3072))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+
+--error ER_TOO_LONG_KEY
+CREATE TABLE t1 (
+ a BLOB(4096),
+ PRIMARY KEY (a(3073))
+) ENGINE=rocksdb;
+
+set global rocksdb_large_prefix=DEFAULT;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/multi_varchar_sk_lookup.test b/storage/rocksdb/mysql-test/rocksdb/t/multi_varchar_sk_lookup.test
new file mode 100644
index 00000000000..de6b94de8e4
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/multi_varchar_sk_lookup.test
@@ -0,0 +1,49 @@
+# Disable until covered unpack_info format enabled in prod
+--source include/have_debug.inc
+--source include/have_nodebug.inc
+
+--source include/have_rocksdb.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS T1;
+--enable_warnings
+
+CREATE TABLE T1 (
+ P1 VARCHAR(64), P2 VARCHAR(64), P3 VARCHAR(64), P4 VARCHAR(64),
+ S1 VARCHAR(64), S2 VARCHAR(64), S3 VARCHAR(64), S4 VARCHAR(64),
+ S5 VARCHAR(64), S6 VARCHAR(64), S7 VARCHAR(64), S8 VARCHAR(64),
+ S9 VARCHAR(64), S10 VARCHAR(64), S11 VARCHAR(64), S12 VARCHAR(64),
+ S13 VARCHAR(64), S14 VARCHAR(64), S15 VARCHAR(64), S16 VARCHAR(64),
+ PRIMARY KEY (P1(8), P2(8), P3(8), P4(8)),
+ KEY SK (S1(8), S2(8), S3(8), S4(8),
+ S5(8), S6(8), S7(8), S8(8),
+ S9(8), S10(8), S11(8), S12(8),
+ S13(8), S14(8), S15(8), S16(8))
+) ENGINE=rocksdb;
+
+INSERT INTO T1 VALUES ('1', '2', '3', '4',
+ '5', '6', '7', '8',
+ '9', '10', '11', '12',
+ '13', '14', '15', '16',
+ '17', '18', '19', '20');
+
+SELECT * FROM T1;
+
+--let $prefix_index_check_title= Not eligible for optimization, shorter than prefix length.
+--let $prefix_index_check_read_avoided_delta= 0
+--let $prefix_index_check_query= SELECT P4, S2 FROM T1 FORCE INDEX(SK) WHERE S1='5'
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+DELETE FROM T1;
+INSERT INTO T1 VALUES ('100000000', '200000000', '300000000', '400000000',
+ '500000000', '600000000', '700000000', '800000000',
+ '900000000', '100000000', '110000000', '120000000',
+ '130000000', '140000000', '150000000', '160000000',
+ '170000000', '180000000', '190000000', '200000000');
+
+--let $prefix_index_check_title= Not eligible for optimization, longer than prefix length.
+--let $prefix_index_check_read_avoided_delta= 0
+--let $prefix_index_check_query= SELECT P4, S2 FROM T1 FORCE INDEX(SK) WHERE S1='5'
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+DROP TABLE T1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test
index adf829a1e37..f5e7c6ee043 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test
@@ -1657,7 +1657,9 @@ SET @old_mode = @@sql_mode;
SET sql_mode = 'strict_all_tables';
create table t1 (a int, b text, c varchar(400), Primary Key(a), Key(c, b(255))) engine=rocksdb;
drop table t1;
+set global rocksdb_large_prefix=1;
create table t1 (a int, b text, c varchar(400), Primary Key(a), Key(b(1255))) engine=rocksdb;
+set global rocksdb_large_prefix=0;
insert into t1 values (1, '1abcde', '1abcde'), (2, '2abcde', '2abcde'), (3, '3abcde', '3abcde');
select * from t1;
--replace_column 9 #
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/show_engine.test b/storage/rocksdb/mysql-test/rocksdb/t/show_engine.test
index 1ae34cc0830..2376e77740f 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/show_engine.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/show_engine.test
@@ -23,10 +23,8 @@ CREATE TABLE t3 (k INT, PRIMARY KEY (k) COMMENT 'cf_t1') ENGINE = ROCKSDB;
CREATE TABLE t4 (l INT, PRIMARY KEY (l) COMMENT 'cf_t4') ENGINE = ROCKSDB
PARTITION BY KEY(l) PARTITIONS 4;
-SET GLOBAL rocksdb_force_flush_memtable_now=1;
-
---replace_column 2 # 3 #
-SHOW ENGINE rocksdb STATUS;
+SET @save.rocksdb_max_background_jobs= @@global.rocksdb_max_background_jobs;
+SET GLOBAL rocksdb_max_background_jobs= 1;
INSERT INTO t1 VALUES (1), (2), (3);
SELECT COUNT(*) FROM t1;
@@ -37,6 +35,11 @@ SELECT COUNT(*) FROM t2;
INSERT INTO t4 VALUES (1), (2), (3), (4), (5);
SELECT COUNT(*) FROM t4;
+SET GLOBAL rocksdb_force_flush_memtable_now=1;
+SET GLOBAL rocksdb_compact_cf="cf_t1";
+--replace_column 2 # 3 #
+SHOW ENGINE rocksdb STATUS;
+
# Fetch data from information schema as well
--replace_column 3 #
SELECT * FROM INFORMATION_SCHEMA.ROCKSDB_CFSTATS;
@@ -80,3 +83,5 @@ SHOW ENGINE rocksdb TRANSACTION STATUS;
ROLLBACK;
+# Restore old values
+SET GLOBAL rocksdb_max_background_jobs= @save.rocksdb_max_background_jobs;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/skip_validate_tmp_table.test b/storage/rocksdb/mysql-test/rocksdb/t/skip_validate_tmp_table.test
new file mode 100644
index 00000000000..caffaa25ad9
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/skip_validate_tmp_table.test
@@ -0,0 +1,39 @@
+--source include/have_rocksdb.inc
+--source include/have_debug.inc
+
+# Write file to make mysql-test-run.pl expect the "crash", but don't restart the
+# server until it is told to
+--let $_server_id= `SELECT @@server_id`
+--let $_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.$_server_id.expect
+
+CREATE TABLE t1 (pk int primary key) ENGINE=ROCKSDB;
+
+# Create a .frm file without a matching table
+--exec cp $MYSQLTEST_VARDIR/mysqld.$_server_id/data/test/t1.frm $MYSQLTEST_VARDIR/mysqld.$_server_id/data/test/t1#sql-test.frm
+
+# Restart the server with a .frm file exist but that table is not registered in RocksDB
+--exec echo "wait" >$_expect_file_name
+shutdown_server 10;
+--exec echo "restart" >$_expect_file_name
+--sleep 5
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+--disable_reconnect
+
+# This will append '#sql-test' to the end of new name
+set session debug="+d,gen_sql_table_name";
+rename table t1 to t2;
+set session debug= "-d,gen_sql_table_name";
+
+# Remove the corresponding .frm files
+--remove_files_wildcard $MYSQLTEST_VARDIR/mysqld.$_server_id/data/test *t1*.frm
+--remove_files_wildcard $MYSQLTEST_VARDIR/mysqld.$_server_id/data/test *t2*.frm
+
+# Restart the server with a table registered in RocksDB but does not have a .frm file
+--exec echo "wait" >$_expect_file_name
+shutdown_server 10;
+--exec echo "restart" >$_expect_file_name
+--sleep 5
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+--disable_reconnect
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary.test b/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary.test
index 92200e859ed..38bfb2eef8f 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary.test
@@ -279,7 +279,7 @@ SELECT a FROM t1;
DROP TABLE t1;
# TTL field with nullable ttl column (should fail)
---error 1948
+--error ER_RDB_TTL_COL_FORMAT
CREATE TABLE t1 (
a bigint(20) UNSIGNED NOT NULL,
b int NOT NULL,
@@ -290,7 +290,7 @@ CREATE TABLE t1 (
COMMENT='ttl_duration=1;ttl_col=ts;';
# TTL field with non 8-bit integer column (should fail)
---error 1948
+--error ER_RDB_TTL_COL_FORMAT
CREATE TABLE t1 (
a bigint(20) UNSIGNED NOT NULL,
b int NOT NULL,
@@ -301,7 +301,7 @@ CREATE TABLE t1 (
COMMENT='ttl_duration=1;ttl_col=ts;';
# TTL duration as some random garbage value
---error 1949
+--error ER_RDB_TTL_DURATION_FORMAT
CREATE TABLE t1 (
a bigint(20) UNSIGNED NOT NULL,
b int NOT NULL,
@@ -311,7 +311,7 @@ CREATE TABLE t1 (
COMMENT='ttl_duration=abc;';
# TTL col is some column outside of the table
---error 1948
+--error ER_RDB_TTL_COL_FORMAT
CREATE TABLE t1 (
a bigint(20) UNSIGNED NOT NULL,
b int NOT NULL,
@@ -321,7 +321,7 @@ CREATE TABLE t1 (
COMMENT='ttl_duration=1;ttl_col=abc;';
# TTL col must have accompanying duration
---error 1948
+--error ER_RDB_TTL_COL_FORMAT
CREATE TABLE t1 (
a bigint(20) UNSIGNED NOT NULL,
b int NOT NULL,
@@ -372,10 +372,8 @@ CREATE TABLE t1 (
) ENGINE=rocksdb
COMMENT='ttl_duration=100;';
---error 1947
+--error ER_RDB_TTL_UNSUPPORTED
ALTER TABLE t1 DROP PRIMARY KEY;
---error 1947
-ALTER TABLE t1 ADD INDEX kb(b), ALGORITHM=INPLACE;
DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary-master.opt
new file mode 100644
index 00000000000..b991f718a33
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary-master.opt
@@ -0,0 +1,2 @@
+--rocksdb_enable_ttl_read_filtering=0
+--rocksdb_default_cf_options=disable_auto_compactions=true
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary.test b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary.test
new file mode 100644
index 00000000000..fb439e109e7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary.test
@@ -0,0 +1,780 @@
+--source include/have_debug.inc
+--source include/have_rocksdb.inc
+
+# Basic TTL test, pk ignored, no sk
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64) NOT NULL,
+`c` varbinary(256) NOT NULL,
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`b`,`a`,`c`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+
+# no rows should be filtered
+SELECT COUNT(*) FROM t1;
+DROP TABLE t1;
+
+# Basic TTL test
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64) NOT NULL,
+`c` varbinary(256) NOT NULL,
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`b`,`a`,`c`),
+KEY kb (`b`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# column before TTL in value
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int NOT NULL,
+ ts bigint(20) UNSIGNED NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, UNIX_TIMESTAMP(), 5);
+INSERT INTO t1 values (2, 4, UNIX_TIMESTAMP(), 6);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# multi-part PK w/ TTL
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ ts bigint(20) UNSIGNED NOT NULL,
+ PRIMARY KEY (a,c),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, 5, UNIX_TIMESTAMP());
+INSERT INTO t1 values (2, 4, 6, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# nullable column(s) before TTL
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int,
+ c int,
+ ts bigint(20) UNSIGNED NOT NULL,
+ PRIMARY KEY (a),
+ KEY kbc (b, c)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, NULL, NULL, UNIX_TIMESTAMP());
+INSERT INTO t1 values (2, NULL, NULL, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# variable len columns + null column(s) before TTL
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64),
+`c` varbinary(256),
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`a`),
+KEY kbc (`b`, `c`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', NULL, 'bc', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'efghijk', NULL, UNIX_TIMESTAMP(), 'l');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# TTL implicitly generated (no ttl column)
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, 5);
+INSERT INTO t1 values (2, 4, 6);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# TTL field as the PK
+CREATE TABLE t1 (
+ a int,
+ ts bigint(20) UNSIGNED NOT NULL,
+ PRIMARY KEY (a, ts),
+ KEY kt (ts)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;ttl_col=ts;';
+
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+INSERT INTO t1 values (2, UNIX_TIMESTAMP());
+SELECT COUNT(*) FROM t1 FORCE INDEX(kt);
+
+set global rocksdb_debug_ttl_snapshot_ts = -10;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+# should all still be there..
+SELECT COUNT(*) FROM t1 FORCE INDEX(kt);
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_debug_ttl_snapshot_ts = 10;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1 FORCE INDEX(kt);
+DROP TABLE t1;
+
+# TTL field inside multi-part pk
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int NOT NULL,
+ ts bigint(20) UNSIGNED NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a, ts),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, UNIX_TIMESTAMP(), 5);
+INSERT INTO t1 values (2, 4, UNIX_TIMESTAMP(), 6);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1;
+DROP TABLE t1;
+
+# TTL field inside key with variable length things..
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64),
+`c` varbinary(256),
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`a`, `ts`),
+KEY kb (`b`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', NULL, 'bc', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('de', 'fghijk', NULL, UNIX_TIMESTAMP(), 'l');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1;
+DROP TABLE t1;
+
+# TTL test where you compact (values still exist), real_sleep, then compact again,
+# values should now be gone.
+CREATE TABLE t1 (
+a INT NOT NULL,
+b varbinary(64) NOT NULL,
+c varbinary(256) NOT NULL,
+ts bigint(20) UNSIGNED NOT NULL,
+value mediumblob NOT NULL,
+PRIMARY KEY (b,a,c),
+KEY kb (b)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=10;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values (2, 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (3, 'i', 'j', UNIX_TIMESTAMP(), 'k');
+INSERT INTO t1 values (4, 'm', 'n', UNIX_TIMESTAMP(), 'o');
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+# Nothing should get removed here.
+set global rocksdb_debug_ttl_snapshot_ts = -3600;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (kb);
+
+# 1 and 2 should get removed here.
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (kb);
+
+# 3 and 4 should get removed here.
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+set global rocksdb_debug_ttl_ignore_pk=0;
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (kb);
+
+DROP TABLE t1;
+
+# TTL field with nullable ttl column (should fail)
+--error ER_RDB_TTL_COL_FORMAT
+CREATE TABLE t1 (
+ a bigint(20) UNSIGNED NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ ts bigint(20),
+ PRIMARY KEY (a,c),
+ KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+# TTL field with non 8-bit integer column (should fail)
+--error ER_RDB_TTL_COL_FORMAT
+CREATE TABLE t1 (
+ a bigint(20) UNSIGNED NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ ts int,
+ PRIMARY KEY (a,c),
+ KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+# TTL duration as some random garbage value
+--error ER_RDB_TTL_DURATION_FORMAT
+CREATE TABLE t1 (
+ a bigint(20) UNSIGNED NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a,c),
+ KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=abc;';
+
+# TTL col is some column outside of the table
+--error ER_RDB_TTL_COL_FORMAT
+CREATE TABLE t1 (
+ a bigint(20) UNSIGNED NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a,c),
+ KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=abc;';
+
+# TTL col must have accompanying duration
+--error ER_RDB_TTL_COL_FORMAT
+CREATE TABLE t1 (
+ a bigint(20) UNSIGNED NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a,c),
+ KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_col=abc;';
+
+# Make sure it doesn't filter out things early
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int NOT NULL,
+ PRIMARY KEY (a),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=500;';
+
+INSERT INTO t1 values (1, 1);
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# Testing altering table comment with updated TTL duration
+# This should trigger a rebuild of the table
+CREATE TABLE t1 (
+ a INT PRIMARY KEY,
+ b INT NOT NULL,
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=100;';
+
+INSERT INTO t1 values (1, 1);
+SELECT * FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_rec_ts = -300;
+ALTER TABLE t1 COMMENT = 'ttl_duration=1';
+set global rocksdb_debug_ttl_rec_ts = 0;
+SHOW CREATE TABLE t1;
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# Tables with hidden PK disabled
+CREATE TABLE t1 (
+ a INT PRIMARY KEY,
+ b INT,
+ KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=100;';
+
+--error ER_RDB_TTL_UNSUPPORTED
+ALTER TABLE t1 DROP PRIMARY KEY;
+
+DROP TABLE t1;
+
+# Test replacing PK, ttl should still work after
+CREATE TABLE t1 (
+ a INT PRIMARY KEY,
+ b INT,
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;';
+
+INSERT INTO t1 VALUES (1,1);
+INSERT INTO t1 VALUES (2,2);
+
+ALTER TABLE t1 DROP PRIMARY KEY, ADD PRIMARY KEY(b);
+set global rocksdb_debug_ttl_snapshot_ts = -3600;
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+
+--sorted_result
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+--sorted_result
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+DROP TABLE t1;
+
+# Make sure table comment filled with other text before/after will work
+# (basically, it needs semicolon before and after)
+CREATE TABLE t1 (
+ a bigint(20) UNSIGNED NOT NULL,
+ b int,
+ PRIMARY KEY (a,b),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='asdadfasdfsadfadf ;ttl_duration=1; asfasdfasdfadfa';
+INSERT INTO t1 values (UNIX_TIMESTAMP(), 1);
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+ALTER TABLE t1 COMMENT = 'adsf;;ttl_duration=5;asfasdfa;ttl_col=a;asdfasdf;';
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (UNIX_TIMESTAMP(), 2);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+
+# nothing removed here
+set global rocksdb_compact_cf='default';
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+# all removed here
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+DROP TABLE t1;
+
+# Test to make sure that TTL retains original timestamp during update
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int NOT NULL,
+ PRIMARY KEY (a),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;';
+
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, 0);
+INSERT INTO t1 values (3, 0);
+INSERT INTO t1 values (5, 0);
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (7, 0);
+INSERT INTO t1 values (9, 0);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+UPDATE t1 SET a=a+1;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+# 1,3,5 should be dropped
+--sorted_result
+SELECT * FROM t1;
+DROP TABLE t1;
+
+# test behaviour on update with TTL column, TTL time can be updated here.
+CREATE TABLE t1 (
+ a INT,
+ b bigint(20) UNSIGNED NOT NULL,
+ PRIMARY KEY (a),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;ttl_col=b;';
+
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+INSERT INTO t1 values (3, UNIX_TIMESTAMP());
+INSERT INTO t1 values (5, UNIX_TIMESTAMP());
+INSERT INTO t1 values (7, UNIX_TIMESTAMP());
+
+set global rocksdb_debug_ttl_rec_ts = 300;
+UPDATE t1 SET b=(UNIX_TIMESTAMP()+1) WHERE a < 4;
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+# 5 and 7 should be gone here
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# Test rows expired stat variable and disable ttl variable
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int NOT NULL,
+ PRIMARY KEY (a),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (2, 1);
+INSERT INTO t1 values (3, 1);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+set global rocksdb_enable_ttl=0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_enable_ttl=1;
+set global rocksdb_compact_cf='default';
+
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# Table with TTL won't increment rows expired when no records have been
+# compacted
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int NOT NULL,
+ PRIMARY KEY (a),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=100;';
+
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (2, 2);
+INSERT INTO t1 values (3, 3);
+
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+
+DROP TABLE t1;
+
+# Test update on TTL column in pk
+CREATE TABLE t1 (
+ a INT,
+ b bigint(20) UNSIGNED NOT NULL,
+ PRIMARY KEY (a, b),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;ttl_col=b;';
+
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+INSERT INTO t1 values (3, UNIX_TIMESTAMP());
+INSERT INTO t1 values (5, UNIX_TIMESTAMP());
+INSERT INTO t1 values (7, UNIX_TIMESTAMP());
+
+set global rocksdb_debug_ttl_rec_ts = 300;
+UPDATE t1 SET b=(UNIX_TIMESTAMP()+1) WHERE a < 4;
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+# 5 and 7 should be gone here
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# test behaviour on update with TTL column, TTL time can be updated here.
+CREATE TABLE t1 (
+ a INT,
+ b bigint(20) UNSIGNED NOT NULL,
+ PRIMARY KEY (a, b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;ttl_col=b;';
+
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+INSERT INTO t1 values (3, UNIX_TIMESTAMP());
+INSERT INTO t1 values (5, UNIX_TIMESTAMP());
+INSERT INTO t1 values (7, UNIX_TIMESTAMP());
+
+set global rocksdb_debug_ttl_rec_ts = 300;
+UPDATE t1 SET b=(UNIX_TIMESTAMP()+1) WHERE a < 4;
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+--sorted_result
+SELECT a FROM t1;
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+# 7 should be gone here
+--sorted_result
+SELECT a FROM t1;
+DROP TABLE t1;
+
+# Add index inplace
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64) NOT NULL,
+`c` varbinary(256) NOT NULL,
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`b`,`a`,`c`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*);
+
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+
+# nothing filtered out
+SELECT COUNT(*);
+
+CREATE INDEX kb on t1 (b);
+
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# Add index inplace, implicit TTL
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64) NOT NULL,
+`c` varbinary(256) NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`b`,`a`,`c`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', 'b', 'c', 'd');
+INSERT INTO t1 values ('d', 'e', 'f', 'g');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*);
+
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+
+# nothing filtered out
+SELECT COUNT(*);
+
+CREATE INDEX kb on t1 (b);
+
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# Add index inplace, TTL column in PK
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64) NOT NULL,
+`c` varbinary(256) NOT NULL,
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`b`,`a`,`c`, `ts`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*);
+
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+
+# nothing filtered out
+SELECT COUNT(*);
+
+CREATE INDEX kb on t1 (b);
+
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering-master.opt
new file mode 100644
index 00000000000..aefc2f5da34
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering-master.opt
@@ -0,0 +1 @@
+--rocksdb_default_cf_options=disable_auto_compactions=true
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering.test b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering.test
new file mode 100644
index 00000000000..d6be7d95f8d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering.test
@@ -0,0 +1,500 @@
+--source include/have_debug.inc
+--source include/have_rocksdb.inc
+
+# The purpose of read filtering for tables with TTL is to ensure that during a
+# transaction a key which has expired already but not removed by compaction
+# yet, is not returned to the user.
+#
+# Without this the user might be hit with problems such as disappearing rows
+# within a transaction, etc, because the compaction filter ignores snapshots
+# when filtering keys.
+
+# Basic read filtering test
+CREATE TABLE t1 (
+ a int PRIMARY KEY,
+ b int NOT NULL,
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (2, 2);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+set global rocksdb_force_flush_memtable_now=1;
+
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+
+DROP TABLE t1;
+
+# Test that some rows are hidden but others aren't...
+CREATE TABLE t1 (
+ a int PRIMARY KEY,
+ b BIGINT UNSIGNED NOT NULL,
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=10;';
+
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (2, UNIX_TIMESTAMP());
+INSERT INTO t1 values (3, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+set global rocksdb_force_flush_memtable_now=1;
+
+--echo # 1 should be hidden
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+
+--echo # none should be hidden yet, compaction runs but records aren't expired
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (kb);
+
+--echo # all should be hidden now, even though compaction hasn't run again
+set global rocksdb_debug_ttl_read_filter_ts = -310;
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (kb);
+set global rocksdb_debug_ttl_read_filter_ts = 0;
+
+DROP TABLE t1;
+
+# Test the filtering code explicitly.
+CREATE TABLE t1 (
+ a int PRIMARY KEY,
+ b int NOT NULL,
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (3, 3);
+INSERT INTO t1 values (5, 5);
+INSERT INTO t1 values (7, 7);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+--echo # should return nothing.
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+
+# disable filtering
+set global rocksdb_enable_ttl_read_filtering=0;
+
+--echo # should return everything
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+
+# enable filtering
+set global rocksdb_enable_ttl_read_filtering=1;
+
+--echo # should return nothing.
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+
+DROP TABLE t1;
+
+--echo # Read filtering index scan tests (None of these queries should return any results)
+CREATE TABLE t1 (
+ a int,
+ b int,
+ c int,
+ PRIMARY KEY (a,b,c),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (0,0,0);
+INSERT INTO t1 values (0,0,1);
+INSERT INTO t1 values (0,1,0);
+INSERT INTO t1 values (0,1,1);
+INSERT INTO t1 values (1,1,2);
+INSERT INTO t1 values (1,2,1);
+INSERT INTO t1 values (1,2,2);
+INSERT INTO t1 values (1,2,3);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+
+set global rocksdb_force_flush_memtable_now=1;
+
+# HA_READ_KEY_EXACT, using full key
+SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE a=1 AND b=2 AND c=2;
+SELECT * FROM t1 FORCE INDEX (kb) WHERE a=1 AND b=2 AND c=2;
+
+# HA_READ_KEY_EXACT, not using full key
+SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE a = 1;
+SELECT * FROM t1 FORCE INDEX (kb) WHERE a = 1;
+
+# HA_READ_BEFORE_KEY, not using full key
+SELECT max(a) FROM t1 FORCE INDEX (PRIMARY) WHERE a < 3;
+SELECT max(a) FROM t1 FORCE INDEX (kb) WHERE a < 3;
+
+# HA_READ_BEFORE_KEY, using full key
+SELECT max(a) FROM t1 FORCE INDEX (PRIMARY) WHERE a < 2 AND b = 1 AND c < 3;
+SELECT max(a) FROM t1 FORCE INDEX (kb) WHERE a < 2 AND b = 1 AND c < 3;
+
+# HA_READ_KEY_OR_NEXT
+SELECT min(a) FROM t1 FORCE INDEX (PRIMARY) WHERE a >= 1;
+SELECT min(a) FROM t1 FORCE INDEX (kb) WHERE a >= 1;
+
+# HA_READ_AFTER_KEY, /* Find next rec. after key-record */
+SELECT min(a) FROM t1 FORCE INDEX (PRIMARY) WHERE a > 1;
+SELECT min(a) FROM t1 FORCE INDEX (kb) WHERE a > 1;
+
+# HA_READ_PREFIX_LAST, /* Last key with the same prefix */
+SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE a=1 and b in (1) order by c desc;
+SELECT * FROM t1 FORCE INDEX (kb) WHERE a=1 and b in (1) order by c desc;
+
+# HA_READ_PREFIX_LAST_OR_PREV, /* Last or prev key with the same prefix */
+SELECT max(a) FROM t1 FORCE INDEX (PRIMARY) WHERE a <=10;
+SELECT max(a) FROM t1 FORCE INDEX (kb) WHERE a <=10;
+
+# need to test read_range_first()
+# calls into read_range_next() and uses compare_keys() to see if its out of
+# range
+SELECT a FROM t1 FORCE INDEX (PRIMARY) WHERE a > 0 and a <= 2;
+SELECT a FROM t1 FORCE INDEX (kb) WHERE a > 0 and a <= 2;
+
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+DROP TABLE t1;
+
+--echo # Attempt to update expired value, should filter out
+set global rocksdb_force_flush_memtable_now=1;
+CREATE TABLE t1 (
+ a int PRIMARY KEY
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1;
+
+--echo # No error is thrown here, under the hood index_next_with_direction is
+--echo # filtering out the record from being seen in the first place.
+UPDATE t1 set a = 1;
+DROP TABLE t1;
+
+--echo # Ensure no rows can disappear in the middle of long-running transactions
+--echo # Also ensure repeatable-read works as expected
+--source include/count_sessions.inc
+connect (con1,localhost,root,,);
+connect (con2,localhost,root,,);
+
+CREATE TABLE t1 (
+ a int PRIMARY KEY,
+ b int NOT NULL,
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;';
+
+INSERT INTO t1 values (1, 1);
+
+connection con1;
+--echo # Creating Snapshot (start transaction)
+BEGIN;
+
+# We need the below snippet in case establishing con1 took an arbitrary
+# amount of time. See https://github.com/facebook/mysql-5.6/pull/617#discussion_r120525391.
+--disable_query_log
+--let $snapshot_size= `SELECT COUNT(*) FROM t1`
+--let $i= 0
+while ($snapshot_size != 1)
+{
+ if ($i == 1000)
+ {
+ --die Your testing host is too slow for reasonable TTL testing
+ }
+
+ $i++;
+ ROLLBACK;
+ INSERT INTO t1 values (1,1);
+ BEGIN;
+ --let $snapshot_size= `SELECT COUNT(*) FROM t1`
+}
+--enable_query_log
+
+--echo # Nothing filtered out here
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+
+--sleep 5
+
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb); # <= shouldn't be filtered out here
+
+--echo # Switching to connection 2
+connection con2;
+--echo # compaction doesn't do anything since con1 snapshot is still open
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+--echo # read filtered out, because on a different connection, on
+--echo # this connection the records have 'expired' already so they are filtered out
+--echo # even though they have not yet been removed by compaction
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+
+--echo # Switching to connection 1
+connection con1;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb); # <= shouldn't be filtered out here
+
+UPDATE t1 set a = a + 1;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb); # <= shouldn't be filtered out here
+
+COMMIT;
+
+--sorted_result # <= filtered out here because time has passed.
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+
+DROP TABLE t1;
+disconnect con1;
+disconnect con2;
+
+#transaction 1, create a snapshot and select * => returns nothing.
+#transaction 2, insert into table, flush
+#transaction 1, select * => returns nothing, but the snapshot should prevent the compaction code from removing the rows, no matter what the ttl duration is.
+#transaction 2, select * -> sees nothing, disable filter, select * -> sees everything, enable filter, select * -> sees nothing.
+connect (con1,localhost,root,,);
+connect (con2,localhost,root,,);
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+CREATE TABLE t1 (
+ a int PRIMARY KEY,
+ b int NOT NULL,
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+
+--echo # On Connection 1
+connection con1;
+--echo # Creating Snapshot (start transaction)
+BEGIN;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+# Sleep 5 secs after creating snapshot, this ensures any records created after
+# this can't be removed by compaction until this snapshot is released.
+--sleep 5
+
+--echo # On Connection 2
+connection con2;
+set global rocksdb_debug_ttl_rec_ts = -2;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (3, 3);
+INSERT INTO t1 values (5, 5);
+INSERT INTO t1 values (7, 7);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+--echo # On Connection 1
+connection con1;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+
+--echo # On Connection 2
+connection con2;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+set global rocksdb_enable_ttl_read_filtering=0;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+set global rocksdb_enable_ttl_read_filtering=1;
+
+disconnect con2;
+disconnect con1;
+connection default;
+
+DROP TABLE t1;
+# Wait till we reached the initial number of concurrent sessions
+--source include/wait_until_count_sessions.inc
+
+# Test that index_next_with_direction skips records properly
+CREATE TABLE t1 (
+ a int,
+ b int,
+ ts bigint(20) UNSIGNED NOT NULL,
+ PRIMARY KEY (a),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = 100;
+INSERT INTO t1 VALUES (1, 1, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (2, 2, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (3, 3, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (4, 4, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (5, 5, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (6, 6, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (7, 7, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (8, 8, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (9, 9, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (10, 10, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+set global rocksdb_force_flush_memtable_now=1;
+--echo # None are expired
+SELECT a, b FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+UPDATE t1 SET ts=(UNIX_TIMESTAMP()+1) WHERE a IN (4, 7);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+--echo # 4 and 7 should be gone
+SELECT a, b FROM t1 FORCE INDEX (kb);
+
+DROP TABLE t1;
+
+# Test range scans with various conditionals
+CREATE TABLE t1 (
+ c1 INT,
+ c2 INT,
+ name VARCHAR(25) NOT NULL,
+ PRIMARY KEY (c1, c2),
+ KEY kc2 (c2)
+) ENGINE=ROCKSDB
+COMMENT='ttl_duration=1;';
+
+set global rocksdb_debug_ttl_rec_ts = -1200;
+INSERT INTO t1 values (1,1,'a');
+INSERT INTO t1 values (2,2,'b');
+set global rocksdb_debug_ttl_rec_ts = 1200;
+INSERT INTO t1 values (3,3,'c');
+INSERT INTO t1 values (4,4,'d');
+set global rocksdb_debug_ttl_rec_ts = -1200;
+INSERT INTO t1 values (5,5,'e');
+INSERT INTO t1 values (6,6,'f');
+set global rocksdb_debug_ttl_rec_ts = 1200;
+INSERT INTO t1 values (7,7,'g');
+INSERT INTO t1 values (8,8,'h');
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2);
+
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE c1 > 5;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2) WHERE c2 > 5;
+
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE 3 < c1 AND c1 < 6;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2) WHERE 3 < c2 AND c2 < 6;
+
+DROP TABLE t1;
+
+# Test range scans with varying expirations
+CREATE TABLE t1 (
+ a int,
+ b int,
+ PRIMARY KEY (a),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1800;';
+
+set global rocksdb_debug_ttl_rec_ts = 0;
+INSERT INTO t1 values (1,1);
+INSERT INTO t1 values (2,2);
+INSERT INTO t1 values (7,7);
+INSERT INTO t1 values (10,10);
+INSERT INTO t1 values (11,11);
+INSERT INTO t1 values (12,12);
+set global rocksdb_debug_ttl_rec_ts = 450;
+INSERT INTO t1 values (3,3);
+INSERT INTO t1 values (4,4);
+INSERT INTO t1 values (8,8);
+INSERT INTO t1 values (16,16);
+INSERT INTO t1 values (17,17);
+INSERT INTO t1 values (18,18);
+set global rocksdb_debug_ttl_rec_ts = 900;
+INSERT INTO t1 values (5,5);
+INSERT INTO t1 values (6,6);
+INSERT INTO t1 values (9,9);
+INSERT INTO t1 values (13,13);
+INSERT INTO t1 values (14,14);
+INSERT INTO t1 values (15,15);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+--echo # Should see everything
+SELECT * FROM t1;
+
+--echo # Should have no records from the first group
+set global rocksdb_debug_ttl_read_filter_ts = -1800;
+SELECT * FROM t1;
+SELECT * FROM t1 FORCE INDEX (kb) WHERE a > 5 AND a < 15;
+
+--echo # Should only have records from the last group
+set global rocksdb_debug_ttl_read_filter_ts = -1800 - 450;
+SELECT * FROM t1;
+SELECT * FROM t1 FORCE INDEX (kb) WHERE a < 10;
+
+--echo # Should be empty
+set global rocksdb_debug_ttl_read_filter_ts = -1800 - 900;
+SELECT * FROM t1;
+
+set global rocksdb_debug_ttl_read_filter_ts = 0;
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering_multiple_index.test b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering_multiple_index.test
new file mode 100644
index 00000000000..4f9788ce33c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering_multiple_index.test
@@ -0,0 +1,87 @@
+--source include/have_debug.inc
+--source include/have_rocksdb.inc
+
+# Multiple indexes, trigger compaction on sk and check consistency
+CREATE TABLE t1 (
+ a int NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a),
+ KEY kb (b) COMMENT 'kb',
+ KEY kc (c) COMMENT 'kc'
+) ENGINE=ROCKSDB
+COMMENT='ttl_duration=1';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1, 1);
+INSERT INTO t1 values (2, 2, 2);
+set global rocksdb_debug_ttl_rec_ts = 100;
+INSERT INTO t1 values (3, 3, 3);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='kb';
+
+# Results should be consistent
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+SELECT * FROM t1 FORCE INDEX (kb);
+SELECT * FROM t1 FORCE INDEX (kc);
+
+DROP TABLE t1;
+
+# Trigger compaction on pk and check consistency
+CREATE TABLE t1 (
+ a int NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a),
+ KEY kb (b) COMMENT 'kb',
+ KEY kc (c) COMMENT 'kc'
+) ENGINE=ROCKSDB
+COMMENT='ttl_duration=1';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1, 1);
+INSERT INTO t1 values (2, 2, 2);
+set global rocksdb_debug_ttl_rec_ts = 100;
+INSERT INTO t1 values (3, 3, 3);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+# Results should be consistent
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+SELECT * FROM t1 FORCE INDEX (kb);
+SELECT * FROM t1 FORCE INDEX (kc);
+
+DROP TABLE t1;
+
+# Trigger compaction on pk and sk and check consistency
+CREATE TABLE t1 (
+ a int NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a),
+ KEY kb (b) COMMENT 'kb',
+ KEY kc (c) COMMENT 'kc'
+) ENGINE=ROCKSDB
+COMMENT='ttl_duration=1';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1, 1);
+INSERT INTO t1 values (2, 2, 2);
+set global rocksdb_debug_ttl_rec_ts = 100;
+INSERT INTO t1 values (3, 3, 3);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_compact_cf='kb';
+
+# Results should be consistent
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+SELECT * FROM t1 FORCE INDEX (kb);
+SELECT * FROM t1 FORCE INDEX (kc);
+
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_with_partitions-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_with_partitions-master.opt
new file mode 100644
index 00000000000..10a88c30361
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_with_partitions-master.opt
@@ -0,0 +1 @@
+--rocksdb_enable_ttl_read_filtering=0
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_with_partitions.test b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_with_partitions.test
new file mode 100644
index 00000000000..c10c83f9f9d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_with_partitions.test
@@ -0,0 +1,300 @@
+--source include/have_debug.inc
+--source include/have_rocksdb.inc
+--source include/have_partition.inc
+
+#
+# Create a table with multiple partitions, but in the comment don't specify
+# that per-partition based column families (CF) should be created. Expect that
+# default CF will be used and new one won't be created.
+#
+# In addition, specify TTL on one of the partitions. Insert a few things
+# inside all the partitions, verify after compaction that the rows inside the
+# partition with TTL has disappeared.
+#
+# Add secondary keys to all tables to test compatibility.
+#
+CREATE TABLE t1 (
+ c1 INT,
+ c2 INT,
+ PRIMARY KEY (`c1`),
+ KEY kc2 (`c2`)
+) ENGINE=ROCKSDB
+COMMENT="custom_p0_ttl_duration=1;"
+PARTITION BY LIST(c1) (
+ PARTITION custom_p0 VALUES IN (1, 4, 7),
+ PARTITION custom_p1 VALUES IN (2, 5, 8),
+ PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+
+set global rocksdb_debug_ttl_rec_ts = -3600;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (2, 2);
+INSERT INTO t1 values (3, 3);
+INSERT INTO t1 values (4, 4);
+INSERT INTO t1 values (5, 5);
+INSERT INTO t1 values (6, 6);
+INSERT INTO t1 values (7, 7);
+INSERT INTO t1 values (8, 8);
+INSERT INTO t1 values (9, 9);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2);
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+# 1,4, and 7 should be gone
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2);
+DROP TABLE t1;
+
+#
+# Create a table with multiple partitions and request for separate CF to be
+# created per every partition. As a result we expect three different CF-s to be
+# created.
+#
+# In addition, specify TTL on some of the partitions. Insert a few things
+# inside all the partitions, verify after compaction that the rows inside the
+# partition with TTL has disappeared.
+#
+CREATE TABLE t1 (
+ c1 INT,
+ c2 INT,
+ name VARCHAR(25) NOT NULL,
+ PRIMARY KEY (`c1`, `c2`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=my_custom_cf;custom_p2_cfname=baz',
+ KEY kc2 (`c2`)
+) ENGINE=ROCKSDB
+COMMENT="custom_p0_ttl_duration=1;custom_p1_ttl_duration=7;"
+PARTITION BY LIST(c1) (
+ PARTITION custom_p0 VALUES IN (1, 4, 7),
+ PARTITION custom_p1 VALUES IN (2, 5, 8),
+ PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+
+set global rocksdb_debug_ttl_rec_ts = -1200;
+INSERT INTO t1 values (1,1,'a');
+INSERT INTO t1 values (4,4,'aaaa');
+INSERT INTO t1 values (7,7,'aaaaaaa');
+
+set global rocksdb_debug_ttl_rec_ts = 1200;
+INSERT INTO t1 values (2,2,'aa');
+INSERT INTO t1 values (3,3,'aaa');
+INSERT INTO t1 values (5,5,'aaaaa');
+INSERT INTO t1 values (6,6,'aaaaaa');
+INSERT INTO t1 values (8,8,'aaaaaaaa');
+INSERT INTO t1 values (9,9,'aaaaaaaaa');
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2);
+
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'foo';
+set @@global.rocksdb_compact_cf = 'my_custom_cf';
+set @@global.rocksdb_compact_cf = 'default';
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2);
+
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set @@global.rocksdb_compact_cf = 'foo';
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2);
+
+# Now 2,5,8 should be removed (this verifies that TTL is only operating on the
+# particular CF.
+set @@global.rocksdb_compact_cf = 'my_custom_cf';
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2);
+
+DROP TABLE t1;
+
+#
+# Create a table with CF-s/TTL per partition and verify that ALTER TABLE + DROP
+# PRIMARY, ADD PRIMARY work for that scenario and data is persisted/filtered as
+# expected.
+#
+CREATE TABLE t1 (
+ c1 INT,
+ c2 INT,
+ name VARCHAR(25) NOT NULL,
+ event DATE,
+ PRIMARY KEY (`c1`, `c2`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz;',
+ KEY kc2 (c2)
+) ENGINE=ROCKSDB
+COMMENT="custom_p0_ttl_duration=9999;custom_p2_ttl_duration=5;"
+PARTITION BY LIST(c1) (
+ PARTITION custom_p0 VALUES IN (1, 2, 3),
+ PARTITION custom_p1 VALUES IN (4, 5, 6),
+ PARTITION custom_p2 VALUES IN (7, 8, 9)
+);
+
+INSERT INTO t1 VALUES (1, 1, "one", null);
+INSERT INTO t1 VALUES (2, 2, "two", null);
+INSERT INTO t1 VALUES (3, 3, "three", null);
+
+INSERT INTO t1 VALUES (4, 4, "four", null);
+INSERT INTO t1 VALUES (5, 5, "five", null);
+INSERT INTO t1 VALUES (6, 6, "six", null);
+
+INSERT INTO t1 VALUES (7, 7, "seven", null);
+INSERT INTO t1 VALUES (8, 8, "eight", null);
+INSERT INTO t1 VALUES (9, 9, "nine", null);
+
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2);
+
+# TTL should be reset after alter table
+set global rocksdb_debug_ttl_rec_ts = 600;
+ALTER TABLE t1 DROP PRIMARY KEY, ADD PRIMARY KEY(`c2`,`c1`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz;';
+set global rocksdb_debug_ttl_rec_ts = 0;
+SHOW CREATE TABLE t1;
+
+# ...so nothing should be gone here
+set global rocksdb_debug_ttl_snapshot_ts = 100;
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'baz';
+# Filter out expired secondary keys too
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2);
+
+set global rocksdb_debug_ttl_snapshot_ts = 1200;
+set @@global.rocksdb_compact_cf = 'foo';
+set @@global.rocksdb_compact_cf = 'baz';
+# Filter out expired secondary keys too
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2);
+
+DROP TABLE t1;
+
+#
+# Create a table with non-partitioned TTL duration, with partitioned TTL
+# columns
+#
+# In this case the same TTL duration will be applied across different TTL
+# columns in different partitions, except for in p2 where we override the ttl
+# duration.
+#
+CREATE TABLE t1 (
+ c1 BIGINT,
+ c2 BIGINT UNSIGNED NOT NULL,
+ name VARCHAR(25) NOT NULL,
+ event DATE,
+ PRIMARY KEY (`c1`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz;',
+ KEY kc2 (`c2`)
+) ENGINE=ROCKSDB
+COMMENT="ttl_duration=1;custom_p1_ttl_duration=100;custom_p1_ttl_col=c2;custom_p2_ttl_duration=5000;"
+PARTITION BY LIST(c1) (
+ PARTITION custom_p0 VALUES IN (1, 2, 3),
+ PARTITION custom_p1 VALUES IN (4, 5, 6),
+ PARTITION custom_p2 VALUES IN (7, 8, 9)
+);
+
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 VALUES (1, UNIX_TIMESTAMP(), "one", null);
+INSERT INTO t1 VALUES (2, UNIX_TIMESTAMP(), "two", null);
+INSERT INTO t1 VALUES (3, UNIX_TIMESTAMP(), "three", null);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+INSERT INTO t1 VALUES (4, UNIX_TIMESTAMP(), "four", null);
+INSERT INTO t1 VALUES (5, UNIX_TIMESTAMP(), "five", null);
+INSERT INTO t1 VALUES (6, UNIX_TIMESTAMP(), "six", null);
+
+INSERT INTO t1 VALUES (7, UNIX_TIMESTAMP(), "seven", null);
+INSERT INTO t1 VALUES (8, UNIX_TIMESTAMP(), "eight", null);
+INSERT INTO t1 VALUES (9, UNIX_TIMESTAMP(), "nine", null);
+
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'foo';
+set @@global.rocksdb_compact_cf = 'baz';
+set @@global.rocksdb_compact_cf = 'bar';
+# Filter out expired secondary keys too
+set @@global.rocksdb_compact_cf = 'default';
+
+# here we expect only 1,2,3 to be gone, ttl implicit.
+--sorted_result
+SELECT c1 FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT c1 FROM t1 FORCE INDEX (kc2);
+
+# here we expect only 4,5,6 to be gone, ttl based on column c2.
+set global rocksdb_debug_ttl_snapshot_ts = 600;
+set @@global.rocksdb_compact_cf = 'bar';
+# Filter out expired secondary keys too
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+--sorted_result
+SELECT c1 FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT c1 FROM t1 FORCE INDEX (kc2);
+
+# at this point only 7,8,9 should be left..
+DROP TABLE t1;
+
+#
+# Add index inplace
+#
+CREATE TABLE t1 (
+ c1 INT,
+ c2 INT,
+ PRIMARY KEY (`c1`) COMMENT 'custom_p0_cfname=foo;'
+) ENGINE=ROCKSDB
+COMMENT="custom_p0_ttl_duration=1;"
+PARTITION BY LIST(c1) (
+ PARTITION custom_p0 VALUES IN (1, 4, 7),
+ PARTITION custom_p1 VALUES IN (2, 5, 8),
+ PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+
+set global rocksdb_debug_ttl_rec_ts = -3600;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (4, 4);
+INSERT INTO t1 values (7, 7);
+set global rocksdb_debug_ttl_rec_ts = 0;
+INSERT INTO t1 values (2, 2);
+INSERT INTO t1 values (3, 3);
+INSERT INTO t1 values (5, 5);
+INSERT INTO t1 values (6, 6);
+INSERT INTO t1 values (8, 8);
+INSERT INTO t1 values (9, 9);
+
+# Nothing filtered out
+--sorted_result
+SELECT * FROM t1;
+
+CREATE INDEX kc2 on t1 (c2);
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='foo';
+set global rocksdb_compact_cf='default';
+
+# 1,4, and 7 should be gone
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2);
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/write_sync.test b/storage/rocksdb/mysql-test/rocksdb/t/write_sync.test
index c21c65a7b40..16032f8cff7 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/write_sync.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/write_sync.test
@@ -6,8 +6,8 @@ SET GLOBAL rocksdb_write_ignore_missing_column_families=true;
create table aaa (id int primary key, i int) engine rocksdb;
set @save_rocksdb_flush_log_at_trx_commit=@@global.rocksdb_flush_log_at_trx_commit;
-SET GLOBAL rocksdb_flush_log_at_trx_commit=0;
---exec sleep 30
+SET GLOBAL rocksdb_flush_log_at_trx_commit=1;
+--exec sleep 5
select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced';
insert aaa(id, i) values(1,1);
select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
@@ -16,19 +16,22 @@ select variable_value-@a from information_schema.global_status where variable_na
insert aaa(id, i) values(3,1);
select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
-SET GLOBAL rocksdb_flush_log_at_trx_commit=1;
+SET GLOBAL rocksdb_flush_log_at_trx_commit=0;
+--exec sleep 5
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced';
insert aaa(id, i) values(4,1);
-select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
-insert aaa(id, i) values(5,1);
-select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
-insert aaa(id, i) values(6,1);
-select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
+
+let $status_var=rocksdb_wal_synced;
+let $status_var_value=`select @a+1`;
+source include/wait_for_status_var.inc;
SET GLOBAL rocksdb_flush_log_at_trx_commit=2;
-insert aaa(id, i) values(7,1);
+--exec sleep 5
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced';
+insert aaa(id, i) values(5,1);
let $status_var=rocksdb_wal_synced;
-let $status_var_value=`select @a+4`;
+let $status_var_value=`select @a+1`;
source include/wait_for_status_var.inc;
truncate table aaa;
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/multiclient_2pc.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/multiclient_2pc.result
index 7a7400f17e1..493107ec071 100644
--- a/storage/rocksdb/mysql-test/rocksdb_rpl/r/multiclient_2pc.result
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/multiclient_2pc.result
@@ -1,4 +1,5 @@
DROP TABLE IF EXISTS t1;
+SET GLOBAL MAX_BINLOG_SIZE = 4096;
SET GLOBAL ROCKSDB_ENABLE_2PC = ON;
create table t1 (a int primary key, b int, c varchar(255)) engine=rocksdb;
'con1'
@@ -7,12 +8,10 @@ SET DEBUG_SYNC='rocksdb.prepared SIGNAL parked WAIT_FOR go';
insert into t1 values (1, 1, "iamtheogthealphaandomega");;
'con2'
insert into t1 values (2, 1, "i_am_just_here_to_trigger_a_flush");
-SET GLOBAL ROCKSDB_ENABLE_2PC = OFF;
-SET GLOBAL ROCKSDB_WRITE_SYNC = OFF;
+SET GLOBAL ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT = 0;
SET GLOBAL SYNC_BINLOG = 0;
SET DEBUG_SYNC='now WAIT_FOR parked';
-SET GLOBAL ROCKSDB_ENABLE_2PC = ON;
-SET GLOBAL ROCKSDB_WRITE_SYNC = ON;
+SET GLOBAL ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT = 2;
SET GLOBAL SYNC_BINLOG = 1;
insert into t1 values (1000000, 1, "i_am_just_here_to_trigger_a_flush");
SET DEBUG_SYNC='now SIGNAL go';
@@ -23,5 +22,5 @@ a b c
1 1 iamtheogthealphaandomega
select count(*) from t1;
count(*)
-1000000
+4096
drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_ddl_high_priority.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_ddl_high_priority.result
new file mode 100644
index 00000000000..8a1fd1b94e0
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_ddl_high_priority.result
@@ -0,0 +1,39 @@
+include/master-slave.inc
+Warnings:
+Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
+[connection master]
+include/rpl_connect.inc [creating slave_block]
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection slave
+select * from t1;
+i
+1
+2
+3
+connection slave_block
+lock tables t1 read;
+connection master;
+create high_priority index idx1 on t1 (i);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL,
+ KEY `idx1` (`i`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+connection slave;
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL,
+ KEY `idx1` (`i`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+drop table t1;
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc.test
index 0f68de04712..6143824eea6 100644
--- a/storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc.test
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc.test
@@ -3,6 +3,11 @@
--source include/have_debug.inc
--source include/have_debug_sync.inc
--source include/big_test.inc
+# The test involves a crash which does not seem to be handled well with
+# mysql-test/lib/My/SafeProcess/my_safe_process under valgrind as it hangs
+# forever. The test did not mean to verify the memory leaks so not much
+# coverage should be missed by not running it under valgrind.
+--source include/not_valgrind.inc
--exec echo > $MYSQLTEST_VARDIR/log/mysqld.1.err
@@ -10,16 +15,18 @@
DROP TABLE IF EXISTS t1;
--enable_warnings
+# Set it to the minimum so that we can make the binlog rotate with a few inserts
+SET GLOBAL MAX_BINLOG_SIZE = 4096;
SET GLOBAL ROCKSDB_ENABLE_2PC = ON;
create table t1 (a int primary key, b int, c varchar(255)) engine=rocksdb;
connect (con1, localhost, root,,);
connect (con2, localhost, root,,);
-# On connection one we insert a row and pause after commit marker is written to WAL.
-# Connection two then inserts many rows. After connection two
-# completes connection one continues only to crash before commit but after
-# binlog write. On crash recovery we see that connection one's value
+# On connection one we insert a row and pause after prepare marker is written to
+# WAL. Connection two then inserts many rows to rotate the binlog. After
+# connection two completes, connection one continues only to crash before commit
+# but after binlog write. On crash recovery we see that connection one's value
# has been recovered and commited
connection con1;
--echo 'con1'
@@ -35,14 +42,14 @@ insert into t1 values (2, 1, "i_am_just_here_to_trigger_a_flush");
# Disable 2PC and syncing for faster inserting of dummy rows
# These rows only purpose is to rotate the binlog
-SET GLOBAL ROCKSDB_ENABLE_2PC = OFF;
-SET GLOBAL ROCKSDB_WRITE_SYNC = OFF;
+SET GLOBAL ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT = 0;
SET GLOBAL SYNC_BINLOG = 0;
SET DEBUG_SYNC='now WAIT_FOR parked';
--disable_query_log
--let $pk= 3
-while ($pk < 1000000) {
+# binlog size is 4096 bytes so with that many insertion it will definitely rotate
+while ($pk < 4096) {
eval insert into t1 values ($pk, 1, "foobardatagoesheresothatmorelogsrollwhichiswhatwewant");
--inc $pk
}
@@ -50,18 +57,16 @@ while ($pk < 1000000) {
# re-enable 2PC an syncing then write to trigger a flush
# before we trigger the crash to simulate full-durability
-SET GLOBAL ROCKSDB_ENABLE_2PC = ON;
-SET GLOBAL ROCKSDB_WRITE_SYNC = ON;
+SET GLOBAL ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT = 2;
SET GLOBAL SYNC_BINLOG = 1;
insert into t1 values (1000000, 1, "i_am_just_here_to_trigger_a_flush");
SET DEBUG_SYNC='now SIGNAL go';
-
+--source include/wait_until_disconnected.inc
--enable_reconnect
--source include/wait_until_connected_again.inc
-
---exec sleep 60
+--disable_reconnect
--exec python suite/rocksdb/t/check_log_for_xa.py $MYSQLTEST_VARDIR/log/mysqld.1.err commit,prepare,rollback
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_ddl_high_priority.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_ddl_high_priority.test
new file mode 100644
index 00000000000..7cf4a4d32b5
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_ddl_high_priority.test
@@ -0,0 +1,2 @@
+--source include/have_rocksdb.inc
+--source include/rpl_ddl_high_priority.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/my.cnf b/storage/rocksdb/mysql-test/rocksdb_sys_vars/my.cnf
new file mode 100644
index 00000000000..85ac85aad3b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/my.cnf
@@ -0,0 +1,4 @@
+!include include/default_my.cnf
+
+[server]
+rocksdb_force_compute_memtable_stats_cachetime=0
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bulk_load_allow_unsorted_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bulk_load_allow_unsorted_basic.result
new file mode 100644
index 00000000000..a59ba561181
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bulk_load_allow_unsorted_basic.result
@@ -0,0 +1,100 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+SELECT @start_global_value;
+@start_global_value
+0
+SET @start_session_value = @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+SELECT @start_session_value;
+@start_session_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED to 1"
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = 1;
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = DEFAULT;
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+"Trying to set variable @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED to 0"
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = 0;
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = DEFAULT;
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+"Trying to set variable @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED to on"
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = on;
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = DEFAULT;
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+'# Setting to valid values in session scope#'
+"Trying to set variable @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED to 1"
+SET @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = 1;
+SELECT @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = DEFAULT;
+SELECT @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+"Trying to set variable @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED to 0"
+SET @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = 0;
+SELECT @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = DEFAULT;
+SELECT @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+"Trying to set variable @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED to on"
+SET @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = on;
+SELECT @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = DEFAULT;
+SELECT @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED to 'aaa'"
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+"Trying to set variable @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED to 'bbb'"
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = @start_global_value;
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+SET @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = @start_session_value;
+SELECT @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_concurrent_prepare_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_concurrent_prepare_basic.result
new file mode 100644
index 00000000000..11d4f2363f6
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_concurrent_prepare_basic.result
@@ -0,0 +1,14 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_CONCURRENT_PREPARE;
+SELECT @start_global_value;
+@start_global_value
+1
+"Trying to set variable @@global.ROCKSDB_CONCURRENT_PREPARE to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_CONCURRENT_PREPARE = 444;
+ERROR HY000: Variable 'rocksdb_concurrent_prepare' is a read only variable
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_deadlock_detect_depth_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_deadlock_detect_depth_basic.result
new file mode 100644
index 00000000000..6f05268745d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_deadlock_detect_depth_basic.result
@@ -0,0 +1,79 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(100);
+INSERT INTO valid_values VALUES(2);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'123\'');
+SET @start_global_value = @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+SELECT @start_global_value;
+@start_global_value
+50
+SET @start_session_value = @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+SELECT @start_session_value;
+@start_session_value
+50
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH to 100"
+SET @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH = 100;
+SELECT @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@global.ROCKSDB_DEADLOCK_DETECT_DEPTH
+100
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH = DEFAULT;
+SELECT @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@global.ROCKSDB_DEADLOCK_DETECT_DEPTH
+50
+"Trying to set variable @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH to 2"
+SET @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH = 2;
+SELECT @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@global.ROCKSDB_DEADLOCK_DETECT_DEPTH
+2
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH = DEFAULT;
+SELECT @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@global.ROCKSDB_DEADLOCK_DETECT_DEPTH
+50
+'# Setting to valid values in session scope#'
+"Trying to set variable @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH to 100"
+SET @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH = 100;
+SELECT @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@session.ROCKSDB_DEADLOCK_DETECT_DEPTH
+100
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH = DEFAULT;
+SELECT @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@session.ROCKSDB_DEADLOCK_DETECT_DEPTH
+50
+"Trying to set variable @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH to 2"
+SET @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH = 2;
+SELECT @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@session.ROCKSDB_DEADLOCK_DETECT_DEPTH
+2
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH = DEFAULT;
+SELECT @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@session.ROCKSDB_DEADLOCK_DETECT_DEPTH
+50
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH to 'aaa'"
+SET @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@global.ROCKSDB_DEADLOCK_DETECT_DEPTH
+50
+"Trying to set variable @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH to '123'"
+SET @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH = '123';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@global.ROCKSDB_DEADLOCK_DETECT_DEPTH
+50
+SET @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH = @start_global_value;
+SELECT @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@global.ROCKSDB_DEADLOCK_DETECT_DEPTH
+50
+SET @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH = @start_session_value;
+SELECT @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@session.ROCKSDB_DEADLOCK_DETECT_DEPTH
+50
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_ttl_ignore_pk_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_ttl_ignore_pk_basic.result
new file mode 100644
index 00000000000..1d8eb721c1b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_ttl_ignore_pk_basic.result
@@ -0,0 +1,64 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK;
+SELECT @start_global_value;
+@start_global_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK to 1"
+SET @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK = 1;
+SELECT @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK;
+@@global.ROCKSDB_DEBUG_TTL_IGNORE_PK
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK = DEFAULT;
+SELECT @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK;
+@@global.ROCKSDB_DEBUG_TTL_IGNORE_PK
+0
+"Trying to set variable @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK to 0"
+SET @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK = 0;
+SELECT @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK;
+@@global.ROCKSDB_DEBUG_TTL_IGNORE_PK
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK = DEFAULT;
+SELECT @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK;
+@@global.ROCKSDB_DEBUG_TTL_IGNORE_PK
+0
+"Trying to set variable @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK to on"
+SET @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK = on;
+SELECT @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK;
+@@global.ROCKSDB_DEBUG_TTL_IGNORE_PK
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK = DEFAULT;
+SELECT @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK;
+@@global.ROCKSDB_DEBUG_TTL_IGNORE_PK
+0
+"Trying to set variable @@session.ROCKSDB_DEBUG_TTL_IGNORE_PK to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_DEBUG_TTL_IGNORE_PK = 444;
+ERROR HY000: Variable 'rocksdb_debug_ttl_ignore_pk' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK to 'aaa'"
+SET @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK;
+@@global.ROCKSDB_DEBUG_TTL_IGNORE_PK
+0
+"Trying to set variable @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK to 'bbb'"
+SET @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK;
+@@global.ROCKSDB_DEBUG_TTL_IGNORE_PK
+0
+SET @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK = @start_global_value;
+SELECT @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK;
+@@global.ROCKSDB_DEBUG_TTL_IGNORE_PK
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_compute_memtable_stats_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_compute_memtable_stats_basic.result
index a1c4d3caaa4..90fd829e7c3 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_compute_memtable_stats_basic.result
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_compute_memtable_stats_basic.result
@@ -1,7 +1,7 @@
DROP TABLE IF EXISTS t;
CREATE TABLE t (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb;
INSERT INTO t (a,b) VALUES (1,'bar'),(2,'foo');
-SET @ORIG_PAUSE_BACKGROUND_WORK = @@rocksdb_force_compute_memtable_stats;
+SET @ORIG_FORCE_COMPUTE_MEMTABLE_STATS = @@rocksdb_force_compute_memtable_stats;
set global rocksdb_force_flush_memtable_now = true;
INSERT INTO t (a,b) VALUES (3,'dead'),(4,'beef'),(5,'a'),(6,'bbb'),(7,'c'),(8,'d');
set global rocksdb_force_compute_memtable_stats=0;
@@ -12,4 +12,4 @@ select case when @ROWS_INCLUDE_MEMTABLE-@ROWS_EXCLUDE_MEMTABLE > 0 then 'true' e
case when @ROWS_INCLUDE_MEMTABLE-@ROWS_EXCLUDE_MEMTABLE > 0 then 'true' else 'false' end
true
DROP TABLE t;
-set global rocksdb_force_compute_memtable_stats = @ORIG_PAUSE_BACKGROUND_WORK;
+set global rocksdb_force_compute_memtable_stats = @ORIG_FORCE_COMPUTE_MEMTABLE_STATS;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_compute_memtable_stats_cachetime_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_compute_memtable_stats_cachetime_basic.result
new file mode 100644
index 00000000000..50e06b5bacb
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_compute_memtable_stats_cachetime_basic.result
@@ -0,0 +1,68 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+INSERT INTO valid_values VALUES(1024 * 1024 * 1024);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME;
+SELECT @start_global_value;
+@start_global_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME to 0"
+SET @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME = 0;
+SELECT @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME;
+@@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME = DEFAULT;
+SELECT @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME;
+@@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME
+60000000
+"Trying to set variable @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME to 1"
+SET @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME = 1;
+SELECT @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME;
+@@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME = DEFAULT;
+SELECT @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME;
+@@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME
+60000000
+"Trying to set variable @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME to 1024"
+SET @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME = 1024;
+SELECT @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME;
+@@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME
+1024
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME = DEFAULT;
+SELECT @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME;
+@@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME
+60000000
+"Trying to set variable @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME to 1073741824"
+SET @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME = 1073741824;
+SELECT @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME;
+@@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME
+1073741824
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME = DEFAULT;
+SELECT @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME;
+@@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME
+60000000
+"Trying to set variable @@session.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME = 444;
+ERROR HY000: Variable 'rocksdb_force_compute_memtable_stats_cachetime' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME to 'aaa'"
+SET @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME;
+@@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME
+60000000
+SET @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME = @start_global_value;
+SELECT @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME;
+@@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_large_prefix_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_large_prefix_basic.result
new file mode 100644
index 00000000000..89697683d1c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_large_prefix_basic.result
@@ -0,0 +1,64 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_LARGE_PREFIX;
+SELECT @start_global_value;
+@start_global_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_LARGE_PREFIX to 1"
+SET @@global.ROCKSDB_LARGE_PREFIX = 1;
+SELECT @@global.ROCKSDB_LARGE_PREFIX;
+@@global.ROCKSDB_LARGE_PREFIX
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_LARGE_PREFIX = DEFAULT;
+SELECT @@global.ROCKSDB_LARGE_PREFIX;
+@@global.ROCKSDB_LARGE_PREFIX
+0
+"Trying to set variable @@global.ROCKSDB_LARGE_PREFIX to 0"
+SET @@global.ROCKSDB_LARGE_PREFIX = 0;
+SELECT @@global.ROCKSDB_LARGE_PREFIX;
+@@global.ROCKSDB_LARGE_PREFIX
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_LARGE_PREFIX = DEFAULT;
+SELECT @@global.ROCKSDB_LARGE_PREFIX;
+@@global.ROCKSDB_LARGE_PREFIX
+0
+"Trying to set variable @@global.ROCKSDB_LARGE_PREFIX to on"
+SET @@global.ROCKSDB_LARGE_PREFIX = on;
+SELECT @@global.ROCKSDB_LARGE_PREFIX;
+@@global.ROCKSDB_LARGE_PREFIX
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_LARGE_PREFIX = DEFAULT;
+SELECT @@global.ROCKSDB_LARGE_PREFIX;
+@@global.ROCKSDB_LARGE_PREFIX
+0
+"Trying to set variable @@session.ROCKSDB_LARGE_PREFIX to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_LARGE_PREFIX = 444;
+ERROR HY000: Variable 'rocksdb_large_prefix' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_LARGE_PREFIX to 'aaa'"
+SET @@global.ROCKSDB_LARGE_PREFIX = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_LARGE_PREFIX;
+@@global.ROCKSDB_LARGE_PREFIX
+0
+"Trying to set variable @@global.ROCKSDB_LARGE_PREFIX to 'bbb'"
+SET @@global.ROCKSDB_LARGE_PREFIX = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_LARGE_PREFIX;
+@@global.ROCKSDB_LARGE_PREFIX
+0
+SET @@global.ROCKSDB_LARGE_PREFIX = @start_global_value;
+SELECT @@global.ROCKSDB_LARGE_PREFIX;
+@@global.ROCKSDB_LARGE_PREFIX
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_manual_wal_flush_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_manual_wal_flush_basic.result
new file mode 100644
index 00000000000..9b176263a23
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_manual_wal_flush_basic.result
@@ -0,0 +1,14 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_MANUAL_WAL_FLUSH;
+SELECT @start_global_value;
+@start_global_value
+1
+"Trying to set variable @@global.ROCKSDB_MANUAL_WAL_FLUSH to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_MANUAL_WAL_FLUSH = 444;
+ERROR HY000: Variable 'rocksdb_manual_wal_flush' is a read only variable
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_latest_deadlocks_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_latest_deadlocks_basic.result
new file mode 100644
index 00000000000..74dbdb4288c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_latest_deadlocks_basic.result
@@ -0,0 +1,53 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(100);
+INSERT INTO valid_values VALUES(1);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'123\'');
+SET @start_global_value = @@global.ROCKSDB_MAX_LATEST_DEADLOCKS;
+SELECT @start_global_value;
+@start_global_value
+5
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_MAX_LATEST_DEADLOCKS to 100"
+SET @@global.ROCKSDB_MAX_LATEST_DEADLOCKS = 100;
+SELECT @@global.ROCKSDB_MAX_LATEST_DEADLOCKS;
+@@global.ROCKSDB_MAX_LATEST_DEADLOCKS
+100
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_MAX_LATEST_DEADLOCKS = DEFAULT;
+SELECT @@global.ROCKSDB_MAX_LATEST_DEADLOCKS;
+@@global.ROCKSDB_MAX_LATEST_DEADLOCKS
+5
+"Trying to set variable @@global.ROCKSDB_MAX_LATEST_DEADLOCKS to 1"
+SET @@global.ROCKSDB_MAX_LATEST_DEADLOCKS = 1;
+SELECT @@global.ROCKSDB_MAX_LATEST_DEADLOCKS;
+@@global.ROCKSDB_MAX_LATEST_DEADLOCKS
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_MAX_LATEST_DEADLOCKS = DEFAULT;
+SELECT @@global.ROCKSDB_MAX_LATEST_DEADLOCKS;
+@@global.ROCKSDB_MAX_LATEST_DEADLOCKS
+5
+"Trying to set variable @@session.ROCKSDB_MAX_LATEST_DEADLOCKS to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_MAX_LATEST_DEADLOCKS = 444;
+ERROR HY000: Variable 'rocksdb_max_latest_deadlocks' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_MAX_LATEST_DEADLOCKS to 'aaa'"
+SET @@global.ROCKSDB_MAX_LATEST_DEADLOCKS = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_MAX_LATEST_DEADLOCKS;
+@@global.ROCKSDB_MAX_LATEST_DEADLOCKS
+5
+"Trying to set variable @@global.ROCKSDB_MAX_LATEST_DEADLOCKS to '123'"
+SET @@global.ROCKSDB_MAX_LATEST_DEADLOCKS = '123';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_MAX_LATEST_DEADLOCKS;
+@@global.ROCKSDB_MAX_LATEST_DEADLOCKS
+5
+SET @@global.ROCKSDB_MAX_LATEST_DEADLOCKS = @start_global_value;
+SELECT @@global.ROCKSDB_MAX_LATEST_DEADLOCKS;
+@@global.ROCKSDB_MAX_LATEST_DEADLOCKS
+5
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_merge_tmp_file_removal_delay_ms_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_merge_tmp_file_removal_delay_ms_basic.result
new file mode 100644
index 00000000000..277de716d70
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_merge_tmp_file_removal_delay_ms_basic.result
@@ -0,0 +1,93 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+INSERT INTO invalid_values VALUES('on');
+SET @start_global_value = @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+SELECT @start_global_value;
+@start_global_value
+0
+SET @start_session_value = @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+SELECT @start_session_value;
+@start_session_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS to 1"
+SET @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = 1;
+SELECT @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = DEFAULT;
+SELECT @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+0
+"Trying to set variable @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS to 0"
+SET @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = 0;
+SELECT @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = DEFAULT;
+SELECT @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+0
+'# Setting to valid values in session scope#'
+"Trying to set variable @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS to 1"
+SET @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = 1;
+SELECT @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = DEFAULT;
+SELECT @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+0
+"Trying to set variable @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS to 0"
+SET @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = 0;
+SELECT @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+0
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = DEFAULT;
+SELECT @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+0
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS to 'aaa'"
+SET @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+0
+"Trying to set variable @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS to 'bbb'"
+SET @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+0
+"Trying to set variable @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS to on"
+SET @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = on;
+Got one of the listed errors
+SELECT @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+0
+SET @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = @start_global_value;
+SELECT @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+0
+SET @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = @start_session_value;
+SELECT @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
+set session rocksdb_merge_buf_size=250;
+set session rocksdb_merge_combine_read_size=1000;
+set session rocksdb_merge_tmp_file_removal_delay_ms=1000;
+CREATE TABLE t1 (i INT, j INT, PRIMARY KEY (i)) ENGINE = ROCKSDB;
+ALTER TABLE t1 ADD INDEX kj(j), ALGORITHM=INPLACE;
+include/assert.inc [Alter should have taken at least 10 seconds]
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_sim_cache_size_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_sim_cache_size_basic.result
new file mode 100644
index 00000000000..ef4c619457b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_sim_cache_size_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_SIM_CACHE_SIZE;
+SELECT @start_global_value;
+@start_global_value
+0
+"Trying to set variable @@global.ROCKSDB_SIM_CACHE_SIZE to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_SIM_CACHE_SIZE = 444;
+ERROR HY000: Variable 'rocksdb_sim_cache_size' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_clock_cache_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_clock_cache_basic.result
new file mode 100644
index 00000000000..f23d1889027
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_clock_cache_basic.result
@@ -0,0 +1,19 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+INSERT INTO valid_values VALUES('off');
+INSERT INTO valid_values VALUES('true');
+INSERT INTO valid_values VALUES('false');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_USE_CLOCK_CACHE;
+SELECT @start_global_value;
+@start_global_value
+0
+"Trying to set variable @@global.ROCKSDB_USE_CLOCK_CACHE to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_USE_CLOCK_CACHE = 444;
+ERROR HY000: Variable 'rocksdb_use_clock_cache' is a read only variable
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bulk_load_allow_unsorted_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bulk_load_allow_unsorted_basic.test
new file mode 100644
index 00000000000..e57396e0fdc
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bulk_load_allow_unsorted_basic.test
@@ -0,0 +1,18 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+--let $read_only=0
+--let $session=1
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_concurrent_prepare_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_concurrent_prepare_basic.test
new file mode 100644
index 00000000000..451653fe769
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_concurrent_prepare_basic.test
@@ -0,0 +1,16 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_CONCURRENT_PREPARE
+--let $read_only=1
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_deadlock_detect_depth_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_deadlock_detect_depth_basic.test
new file mode 100644
index 00000000000..cab72a11e18
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_deadlock_detect_depth_basic.test
@@ -0,0 +1,17 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(100);
+INSERT INTO valid_values VALUES(2);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'123\'');
+
+--let $sys_var=ROCKSDB_DEADLOCK_DETECT_DEPTH
+--let $read_only=0
+--let $session=1
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_ttl_ignore_pk_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_ttl_ignore_pk_basic.test
new file mode 100644
index 00000000000..8ad071e131b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_ttl_ignore_pk_basic.test
@@ -0,0 +1,18 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_DEBUG_TTL_IGNORE_PK
+--let $read_only=0
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_compute_memtable_stats_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_compute_memtable_stats_basic.test
index 3a0d7f63938..318ae1ee598 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_compute_memtable_stats_basic.test
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_compute_memtable_stats_basic.test
@@ -6,7 +6,7 @@ DROP TABLE IF EXISTS t;
CREATE TABLE t (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb;
INSERT INTO t (a,b) VALUES (1,'bar'),(2,'foo');
-SET @ORIG_PAUSE_BACKGROUND_WORK = @@rocksdb_force_compute_memtable_stats;
+SET @ORIG_FORCE_COMPUTE_MEMTABLE_STATS = @@rocksdb_force_compute_memtable_stats;
set global rocksdb_force_flush_memtable_now = true;
INSERT INTO t (a,b) VALUES (3,'dead'),(4,'beef'),(5,'a'),(6,'bbb'),(7,'c'),(8,'d');
@@ -20,4 +20,4 @@ SELECT TABLE_ROWS INTO @ROWS_INCLUDE_MEMTABLE FROM information_schema.TABLES WHE
select case when @ROWS_INCLUDE_MEMTABLE-@ROWS_EXCLUDE_MEMTABLE > 0 then 'true' else 'false' end;
DROP TABLE t;
-set global rocksdb_force_compute_memtable_stats = @ORIG_PAUSE_BACKGROUND_WORK;
+set global rocksdb_force_compute_memtable_stats = @ORIG_FORCE_COMPUTE_MEMTABLE_STATS;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_compute_memtable_stats_cachetime_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_compute_memtable_stats_cachetime_basic.test
new file mode 100644
index 00000000000..20180ec16a9
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_compute_memtable_stats_cachetime_basic.test
@@ -0,0 +1,18 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+INSERT INTO valid_values VALUES(1024 * 1024 * 1024);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME
+--let $read_only=0
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_large_prefix_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_large_prefix_basic.test
new file mode 100644
index 00000000000..5ed3c74131e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_large_prefix_basic.test
@@ -0,0 +1,18 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_LARGE_PREFIX
+--let $read_only=0
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_manual_wal_flush_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_manual_wal_flush_basic.test
new file mode 100644
index 00000000000..3e01722d5ea
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_manual_wal_flush_basic.test
@@ -0,0 +1,16 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_MANUAL_WAL_FLUSH
+--let $read_only=1
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_latest_deadlocks_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_latest_deadlocks_basic.test
new file mode 100644
index 00000000000..9917ec31d9b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_latest_deadlocks_basic.test
@@ -0,0 +1,17 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(100);
+INSERT INTO valid_values VALUES(1);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'123\'');
+
+--let $sys_var=ROCKSDB_MAX_LATEST_DEADLOCKS
+--let $read_only=0
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_merge_tmp_file_removal_delay_ms_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_merge_tmp_file_removal_delay_ms_basic.test
new file mode 100644
index 00000000000..03cc0b11d8c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_merge_tmp_file_removal_delay_ms_basic.test
@@ -0,0 +1,49 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+INSERT INTO invalid_values VALUES('on');
+
+--let $sys_var=ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+--let $read_only=0
+--let $session=1
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
+
+set session rocksdb_merge_buf_size=250;
+set session rocksdb_merge_combine_read_size=1000;
+set session rocksdb_merge_tmp_file_removal_delay_ms=1000;
+
+CREATE TABLE t1 (i INT, j INT, PRIMARY KEY (i)) ENGINE = ROCKSDB;
+
+--disable_query_log
+let $max = 100;
+let $i = 1;
+while ($i <= $max) {
+ let $insert = INSERT INTO t1 VALUES ($i, FLOOR(RAND() * 100));
+ inc $i;
+ eval $insert;
+}
+--enable_query_log
+
+let $start= `SELECT UNIX_TIMESTAMP()`;
+# this should take a lot longer than normal because each deleted merge file
+# will sleep for 1 secs. There should be about 13 buffers.
+# So it should take at least 13 secs
+ALTER TABLE t1 ADD INDEX kj(j), ALGORITHM=INPLACE;
+let $stop_exec= `SELECT UNIX_TIMESTAMP()`;
+
+let $time_diff= `SELECT ($stop_exec - $start)`;
+let $assert_text= Alter should have taken at least 10 seconds;
+let $assert_cond= $time_diff >= 10;
+source include/assert.inc;
+
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_sim_cache_size_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_sim_cache_size_basic.test
new file mode 100644
index 00000000000..a82d50e1d03
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_sim_cache_size_basic.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_SIM_CACHE_SIZE
+--let $read_only=1
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_clock_cache_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_clock_cache_basic.test
new file mode 100644
index 00000000000..d3e7b652039
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_clock_cache_basic.test
@@ -0,0 +1,21 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+INSERT INTO valid_values VALUES('off');
+INSERT INTO valid_values VALUES('true');
+INSERT INTO valid_values VALUES('false');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_USE_CLOCK_CACHE
+--let $read_only=1
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/rdb_buff.h b/storage/rocksdb/rdb_buff.h
index 7a8a0021509..3ba0ca99259 100644
--- a/storage/rocksdb/rdb_buff.h
+++ b/storage/rocksdb/rdb_buff.h
@@ -359,6 +359,22 @@ public:
DBUG_ASSERT(pos < get_current_pos() && (pos + 1) < get_current_pos());
rdb_netbuf_store_uint16(m_data.data() + pos, new_val);
}
+
+ void truncate(const size_t &pos) {
+ DBUG_ASSERT(0 <= pos && pos < m_data.size());
+ m_data.resize(pos);
+ }
+
+ void allocate(const size_t &len, const uchar &val = 0) {
+ DBUG_ASSERT(len > 0);
+ m_data.resize(m_data.size() + len, val);
+ }
+
+ /*
+ An awful hack to deallocate the buffer without relying on the deconstructor.
+ This is needed to suppress valgrind errors in rocksdb.partition
+ */
+ void free() { std::vector<uchar>().swap(m_data); }
};
/*
diff --git a/storage/rocksdb/rdb_compact_filter.h b/storage/rocksdb/rdb_compact_filter.h
index 4696a1985b6..c457fba4af2 100644
--- a/storage/rocksdb/rdb_compact_filter.h
+++ b/storage/rocksdb/rdb_compact_filter.h
@@ -138,6 +138,14 @@ public:
gl_index_id.cf_id, gl_index_id.index_id);
}
+#ifndef NDEBUG
+ if (rdb_dbug_set_ttl_ignore_pk() &&
+ index_info.m_index_type == Rdb_key_def::INDEX_TYPE_PRIMARY) {
+ *ttl_duration = 0;
+ return;
+ }
+#endif
+
*ttl_duration = index_info.m_ttl_duration;
if (Rdb_key_def::has_index_flag(index_info.m_index_flags,
Rdb_key_def::TTL_FLAG)) {
diff --git a/storage/rocksdb/rdb_datadic.cc b/storage/rocksdb/rdb_datadic.cc
index 7d6001a0f98..1694bee7d76 100644
--- a/storage/rocksdb/rdb_datadic.cc
+++ b/storage/rocksdb/rdb_datadic.cc
@@ -31,9 +31,12 @@
#include <vector>
/* MySQL header files */
+#include "./field.h"
#include "./key.h"
#include "./m_ctype.h"
#include "./my_bit.h"
+#include "./my_bitmap.h"
+#include "./sql_table.h"
/* MyRocks header files */
#include "./ha_rocksdb_proto.h"
@@ -72,6 +75,14 @@ Rdb_key_def::Rdb_key_def(uint indexnr_arg, uint keyno_arg,
{
mysql_mutex_init(0, &m_mutex, MY_MUTEX_INIT_FAST);
rdb_netbuf_store_index(m_index_number_storage_form, m_index_number);
+ m_total_index_flags_length =
+ calculate_index_flag_offset(m_index_flags_bitmap, MAX_FLAG);
+ DBUG_ASSERT_IMP(m_index_type == INDEX_TYPE_SECONDARY &&
+ m_kv_format_version <= SECONDARY_FORMAT_VERSION_UPDATE2,
+ m_total_index_flags_length == 0);
+ DBUG_ASSERT_IMP(m_index_type == INDEX_TYPE_PRIMARY &&
+ m_kv_format_version <= PRIMARY_FORMAT_VERSION_UPDATE2,
+ m_total_index_flags_length == 0);
DBUG_ASSERT(m_cf_handle != nullptr);
}
@@ -89,6 +100,14 @@ Rdb_key_def::Rdb_key_def(const Rdb_key_def &k)
m_maxlength(k.m_maxlength) {
mysql_mutex_init(0, &m_mutex, MY_MUTEX_INIT_FAST);
rdb_netbuf_store_index(m_index_number_storage_form, m_index_number);
+ m_total_index_flags_length =
+ calculate_index_flag_offset(m_index_flags_bitmap, MAX_FLAG);
+ DBUG_ASSERT_IMP(m_index_type == INDEX_TYPE_SECONDARY &&
+ m_kv_format_version <= SECONDARY_FORMAT_VERSION_UPDATE2,
+ m_total_index_flags_length == 0);
+ DBUG_ASSERT_IMP(m_index_type == INDEX_TYPE_PRIMARY &&
+ m_kv_format_version <= PRIMARY_FORMAT_VERSION_UPDATE2,
+ m_total_index_flags_length == 0);
if (k.m_pack_info) {
const size_t size = sizeof(Rdb_field_packing) * k.m_key_parts;
m_pack_info =
@@ -749,11 +768,14 @@ uint Rdb_key_def::pack_index_tuple(TABLE *const tbl, uchar *const pack_buffer,
*/
bool Rdb_key_def::unpack_info_has_checksum(const rocksdb::Slice &unpack_info) {
- const uchar *ptr = (const uchar *)unpack_info.data();
size_t size = unpack_info.size();
+ if (size == 0) {
+ return false;
+ }
+ const uchar *ptr = (const uchar *)unpack_info.data();
// Skip unpack info if present.
- if (size >= RDB_UNPACK_HEADER_SIZE && ptr[0] == RDB_UNPACK_DATA_TAG) {
+ if (is_unpack_data_tag(ptr[0]) && size >= get_unpack_header_size(ptr[0])) {
const uint16 skip_len = rdb_netbuf_to_uint16(ptr + 1);
SHIP_ASSERT(size >= skip_len);
@@ -783,6 +805,120 @@ int Rdb_key_def::successor(uchar *const packed_tuple, const uint &len) {
return changed;
}
+static const std::map<char, size_t> UNPACK_HEADER_SIZES = {
+ {RDB_UNPACK_DATA_TAG, RDB_UNPACK_HEADER_SIZE},
+ {RDB_UNPACK_COVERED_DATA_TAG, RDB_UNPACK_COVERED_HEADER_SIZE}};
+
+/*
+ @return The length in bytes of the header specified by the given tag
+*/
+size_t Rdb_key_def::get_unpack_header_size(char tag) {
+ DBUG_ASSERT(is_unpack_data_tag(tag));
+ return UNPACK_HEADER_SIZES.at(tag);
+}
+
+/*
+ Get a bitmap indicating which varchar columns must be covered for this
+ lookup to be covered. If the bitmap is a subset of the covered bitmap, then
+ the lookup is covered. If it can already be determined that the lookup is
+ not covered, map->bitmap will be set to null.
+ */
+void Rdb_key_def::get_lookup_bitmap(const TABLE *table, MY_BITMAP *map) const {
+ DBUG_ASSERT(map->bitmap == nullptr);
+ bitmap_init(map, nullptr, MAX_REF_PARTS, false);
+ uint curr_bitmap_pos = 0;
+
+ // Indicates which columns in the read set might be covered.
+ MY_BITMAP maybe_covered_bitmap;
+ bitmap_init(&maybe_covered_bitmap, nullptr, table->read_set->n_bits, false);
+
+ for (uint i = 0; i < m_key_parts; i++) {
+ if (table_has_hidden_pk(table) && i + 1 == m_key_parts) {
+ continue;
+ }
+
+ Field *const field = m_pack_info[i].get_field_in_table(table);
+
+ // Columns which are always covered are not stored in the covered bitmap so
+ // we can ignore them here too.
+ if (m_pack_info[i].m_covered &&
+ bitmap_is_set(table->read_set, field->field_index)) {
+ bitmap_set_bit(&maybe_covered_bitmap, field->field_index);
+ continue;
+ }
+
+ switch (field->real_type()) {
+ // This type may be covered depending on the record. If it was requested,
+ // we require the covered bitmap to have this bit set.
+ case MYSQL_TYPE_VARCHAR:
+ if (curr_bitmap_pos < MAX_REF_PARTS) {
+ if (bitmap_is_set(table->read_set, field->field_index)) {
+ bitmap_set_bit(map, curr_bitmap_pos);
+ bitmap_set_bit(&maybe_covered_bitmap, field->field_index);
+ }
+ curr_bitmap_pos++;
+ } else {
+ bitmap_free(&maybe_covered_bitmap);
+ bitmap_free(map);
+ return;
+ }
+ break;
+ // This column is a type which is never covered. If it was requested, we
+ // know this lookup will never be covered.
+ default:
+ if (bitmap_is_set(table->read_set, field->field_index)) {
+ bitmap_free(&maybe_covered_bitmap);
+ bitmap_free(map);
+ return;
+ }
+ break;
+ }
+ }
+
+ // If there are columns which are not covered in the read set, the lookup
+ // can't be covered.
+ if (!bitmap_cmp(table->read_set, &maybe_covered_bitmap)) {
+ bitmap_free(map);
+ }
+ bitmap_free(&maybe_covered_bitmap);
+}
+
+/*
+ Return true if for this secondary index
+ - All of the requested columns are in the index
+ - All values for columns that are prefix-only indexes are shorter or equal
+ in length to the prefix
+ */
+bool Rdb_key_def::covers_lookup(TABLE *const table,
+ const rocksdb::Slice *const unpack_info,
+ const MY_BITMAP *const lookup_bitmap) const {
+ DBUG_ASSERT(lookup_bitmap != nullptr);
+ if (!use_covered_bitmap_format() || lookup_bitmap->bitmap == nullptr) {
+ return false;
+ }
+
+ Rdb_string_reader unp_reader = Rdb_string_reader::read_or_empty(unpack_info);
+
+ // Check if this unpack_info has a covered_bitmap
+ const char *unpack_header = unp_reader.get_current_ptr();
+ const bool has_covered_unpack_info =
+ unp_reader.remaining_bytes() &&
+ unpack_header[0] == RDB_UNPACK_COVERED_DATA_TAG;
+ if (!has_covered_unpack_info ||
+ !unp_reader.read(RDB_UNPACK_COVERED_HEADER_SIZE)) {
+ return false;
+ }
+
+ MY_BITMAP covered_bitmap;
+ my_bitmap_map covered_bits;
+ bitmap_init(&covered_bitmap, &covered_bits, MAX_REF_PARTS, false);
+ covered_bits = rdb_netbuf_to_uint16((const uchar *)unpack_header +
+ sizeof(RDB_UNPACK_COVERED_DATA_TAG) +
+ RDB_UNPACK_COVERED_DATA_LEN_SIZE);
+
+ return bitmap_is_subset(lookup_bitmap, &covered_bitmap);
+}
+
uchar *Rdb_key_def::pack_field(Field *const field, Rdb_field_packing *pack_info,
uchar *tuple, uchar *const packed_tuple,
uchar *const pack_buffer,
@@ -847,14 +983,12 @@ uchar *Rdb_key_def::pack_field(Field *const field, Rdb_field_packing *pack_info,
Length of the packed tuple
*/
-uint Rdb_key_def::pack_record(const TABLE *const tbl, uchar *const pack_buffer,
- const uchar *const record,
- uchar *const packed_tuple,
- Rdb_string_writer *const unpack_info,
- const bool &should_store_row_debug_checksums,
- const longlong &hidden_pk_id, uint n_key_parts,
- uint *const n_null_fields,
- uint *const ttl_pk_offset) const {
+uint Rdb_key_def::pack_record(
+ const TABLE *const tbl, uchar *const pack_buffer, const uchar *const record,
+ uchar *const packed_tuple, Rdb_string_writer *const unpack_info,
+ const bool &should_store_row_debug_checksums, const longlong &hidden_pk_id,
+ uint n_key_parts, uint *const n_null_fields, uint *const ttl_pk_offset,
+ const char *const ttl_bytes) const {
DBUG_ASSERT(tbl != nullptr);
DBUG_ASSERT(pack_buffer != nullptr);
DBUG_ASSERT(record != nullptr);
@@ -865,7 +999,9 @@ uint Rdb_key_def::pack_record(const TABLE *const tbl, uchar *const pack_buffer,
(m_index_type == INDEX_TYPE_SECONDARY));
uchar *tuple = packed_tuple;
+ size_t unpack_start_pos = size_t(-1);
size_t unpack_len_pos = size_t(-1);
+ size_t covered_bitmap_pos = size_t(-1);
const bool hidden_pk_exists = table_has_hidden_pk(tbl);
rdb_netbuf_store_index(tuple, m_index_number);
@@ -887,14 +1023,57 @@ uint Rdb_key_def::pack_record(const TABLE *const tbl, uchar *const pack_buffer,
if (n_null_fields)
*n_null_fields = 0;
+ // Check if we need a covered bitmap. If it is certain that all key parts are
+ // covering, we don't need one.
+ bool store_covered_bitmap = false;
+ if (unpack_info && use_covered_bitmap_format()) {
+ for (uint i = 0; i < n_key_parts; i++) {
+ if (!m_pack_info[i].m_covered) {
+ store_covered_bitmap = true;
+ break;
+ }
+ }
+ }
+
+ const char tag =
+ store_covered_bitmap ? RDB_UNPACK_COVERED_DATA_TAG : RDB_UNPACK_DATA_TAG;
+
if (unpack_info) {
unpack_info->clear();
- unpack_info->write_uint8(RDB_UNPACK_DATA_TAG);
+
+ if (m_index_type == INDEX_TYPE_SECONDARY &&
+ m_total_index_flags_length > 0) {
+ // Reserve space for index flag fields
+ unpack_info->allocate(m_total_index_flags_length);
+
+ // Insert TTL timestamp
+ if (has_ttl() && ttl_bytes) {
+ write_index_flag_field(unpack_info,
+ reinterpret_cast<const uchar *const>(ttl_bytes),
+ Rdb_key_def::TTL_FLAG);
+ }
+ }
+
+ unpack_start_pos = unpack_info->get_current_pos();
+ unpack_info->write_uint8(tag);
unpack_len_pos = unpack_info->get_current_pos();
// we don't know the total length yet, so write a zero
unpack_info->write_uint16(0);
+
+ if (store_covered_bitmap) {
+ // Reserve two bytes for the covered bitmap. This will store, for key
+ // parts which are not always covering, whether or not it is covering
+ // for this record.
+ covered_bitmap_pos = unpack_info->get_current_pos();
+ unpack_info->write_uint16(0);
+ }
}
+ MY_BITMAP covered_bitmap;
+ my_bitmap_map covered_bits;
+ uint curr_bitmap_pos = 0;
+ bitmap_init(&covered_bitmap, &covered_bits, MAX_REF_PARTS, false);
+
for (uint i = 0; i < n_key_parts; i++) {
// Fill hidden pk id into the last key part for secondary keys for tables
// with no pk
@@ -928,6 +1107,25 @@ uint Rdb_key_def::pack_record(const TABLE *const tbl, uchar *const pack_buffer,
tuple = pack_field(field, &m_pack_info[i], tuple, packed_tuple, pack_buffer,
unpack_info, n_null_fields);
+ // If this key part is a prefix of a VARCHAR field, check if it's covered.
+ if (store_covered_bitmap && field->real_type() == MYSQL_TYPE_VARCHAR &&
+ !m_pack_info[i].m_covered && curr_bitmap_pos < MAX_REF_PARTS) {
+ size_t data_length = field->data_length();
+ uint16 key_length;
+ if (m_pk_part_no[i] == (uint)-1) {
+ key_length = tbl->key_info[get_keyno()].key_part[i].length;
+ } else {
+ key_length =
+ tbl->key_info[tbl->s->primary_key].key_part[m_pk_part_no[i]].length;
+ }
+
+ if (m_pack_info[i].m_unpack_func != nullptr &&
+ data_length <= key_length) {
+ bitmap_set_bit(&covered_bitmap, curr_bitmap_pos);
+ }
+ curr_bitmap_pos++;
+ }
+
// Restore field->ptr and field->null_ptr
field->move_field(tbl->record[0] + field_offset,
maybe_null ? tbl->record[0] + null_offset : nullptr,
@@ -935,7 +1133,7 @@ uint Rdb_key_def::pack_record(const TABLE *const tbl, uchar *const pack_buffer,
}
if (unpack_info) {
- const size_t len = unpack_info->get_current_pos();
+ const size_t len = unpack_info->get_current_pos() - unpack_start_pos;
DBUG_ASSERT(len <= std::numeric_limits<uint16_t>::max());
// Don't store the unpack_info if it has only the header (that is, there's
@@ -943,9 +1141,12 @@ uint Rdb_key_def::pack_record(const TABLE *const tbl, uchar *const pack_buffer,
// Primary Keys are special: for them, store the unpack_info even if it's
// empty (provided m_maybe_unpack_info==true, see
// ha_rocksdb::convert_record_to_storage_format)
- if (len == RDB_UNPACK_HEADER_SIZE &&
- m_index_type != Rdb_key_def::INDEX_TYPE_PRIMARY) {
- unpack_info->clear();
+ if (m_index_type == Rdb_key_def::INDEX_TYPE_SECONDARY) {
+ if (len == get_unpack_header_size(tag) && !covered_bits) {
+ unpack_info->truncate(unpack_start_pos);
+ } else if (store_covered_bitmap) {
+ unpack_info->write_uint16_at(covered_bitmap_pos, covered_bits);
+ }
} else {
unpack_info->write_uint16_at(unpack_len_pos, len);
}
@@ -1186,11 +1387,30 @@ int Rdb_key_def::unpack_record(TABLE *const table, uchar *const buf,
// For secondary keys, we expect the value field to contain unpack data and
// checksum data in that order. One or both can be missing, but they cannot
// be reordered.
+ const char *unpack_header = unp_reader.get_current_ptr();
const bool has_unpack_info =
- unp_reader.remaining_bytes() &&
- *unp_reader.get_current_ptr() == RDB_UNPACK_DATA_TAG;
- if (has_unpack_info && !unp_reader.read(RDB_UNPACK_HEADER_SIZE)) {
- return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ unp_reader.remaining_bytes() && is_unpack_data_tag(unpack_header[0]);
+ if (has_unpack_info) {
+ if ((m_index_type == INDEX_TYPE_SECONDARY &&
+ m_total_index_flags_length > 0 &&
+ !unp_reader.read(m_total_index_flags_length)) ||
+ !unp_reader.read(get_unpack_header_size(unpack_header[0]))) {
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
+ }
+
+ // Read the covered bitmap
+ MY_BITMAP covered_bitmap;
+ my_bitmap_map covered_bits;
+ uint curr_bitmap_pos = 0;
+ bitmap_init(&covered_bitmap, &covered_bits, MAX_REF_PARTS, false);
+
+ const bool has_covered_bitmap =
+ has_unpack_info && (unpack_header[0] == RDB_UNPACK_COVERED_DATA_TAG);
+ if (has_covered_bitmap) {
+ covered_bits = rdb_netbuf_to_uint16((const uchar *)unpack_header +
+ sizeof(RDB_UNPACK_COVERED_DATA_TAG) +
+ RDB_UNPACK_COVERED_DATA_LEN_SIZE);
}
for (uint i = 0; i < m_key_parts; i++) {
@@ -1211,7 +1431,13 @@ int Rdb_key_def::unpack_record(TABLE *const table, uchar *const buf,
Field *const field = fpi->get_field_in_table(table);
- if (fpi->m_unpack_func) {
+ bool covered_column = true;
+ if (has_covered_bitmap && field->real_type() == MYSQL_TYPE_VARCHAR &&
+ !m_pack_info[i].m_covered) {
+ covered_column = curr_bitmap_pos < MAX_REF_PARTS &&
+ bitmap_is_set(&covered_bitmap, curr_bitmap_pos++);
+ }
+ if (fpi->m_unpack_func && covered_column) {
/* It is possible to unpack this column. Do it. */
uint field_offset = field->ptr - table->record[0];
@@ -2804,6 +3030,8 @@ bool Rdb_field_packing::setup(const Rdb_key_def *const key_descr,
m_skip_func = &Rdb_key_def::skip_max_length;
m_pack_func = &Rdb_key_def::pack_with_make_sort_key;
+ m_covered = false;
+
switch (type) {
case MYSQL_TYPE_LONGLONG:
case MYSQL_TYPE_LONG:
@@ -2811,14 +3039,17 @@ bool Rdb_field_packing::setup(const Rdb_key_def *const key_descr,
case MYSQL_TYPE_SHORT:
case MYSQL_TYPE_TINY:
m_unpack_func = &Rdb_key_def::unpack_integer;
+ m_covered = true;
return true;
case MYSQL_TYPE_DOUBLE:
m_unpack_func = &Rdb_key_def::unpack_double;
+ m_covered = true;
return true;
case MYSQL_TYPE_FLOAT:
m_unpack_func = &Rdb_key_def::unpack_float;
+ m_covered = true;
return true;
case MYSQL_TYPE_NEWDECIMAL:
@@ -2836,6 +3067,7 @@ bool Rdb_field_packing::setup(const Rdb_key_def *const key_descr,
case MYSQL_TYPE_YEAR: /* YEAR is packed with Field_tiny::make_sort_key */
/* Everything that comes here is packed with just a memcpy(). */
m_unpack_func = &Rdb_key_def::unpack_binary_str;
+ m_covered = true;
return true;
case MYSQL_TYPE_NEWDATE:
@@ -2845,6 +3077,7 @@ bool Rdb_field_packing::setup(const Rdb_key_def *const key_descr,
and little-endian)
*/
m_unpack_func = &Rdb_key_def::unpack_newdate;
+ m_covered = true;
return true;
case MYSQL_TYPE_TINY_BLOB:
case MYSQL_TYPE_MEDIUM_BLOB:
@@ -3013,27 +3246,36 @@ bool Rdb_field_packing::setup(const Rdb_key_def *const key_descr,
}
}
- // Make an adjustment: unpacking partially covered columns is not
- // possible. field->table is populated when called through
- // Rdb_key_def::setup, but not during ha_rocksdb::index_flags.
+ // Make an adjustment: if this column is partially covered, tell the SQL
+ // layer we can't do index-only scans. Later when we perform an index read,
+ // we'll check on a record-by-record basis if we can do an index-only scan
+ // or not.
+ uint field_length;
if (field->table) {
- // Get the original Field object and compare lengths. If this key part is
- // a prefix of a column, then we can't do index-only scans.
- if (field->table->field[field->field_index]->field_length != key_length) {
- m_unpack_func = nullptr;
- m_make_unpack_info_func = nullptr;
- m_unpack_info_stores_value = true;
- res = false;
- }
+ field_length = field->table->field[field->field_index]->field_length;
} else {
- if (field->field_length != key_length) {
+ field_length = field->field_length;
+ }
+
+ if (field_length != key_length) {
+ res = false;
+ // If this index doesn't support covered bitmaps, then we won't know
+ // during a read if the column is actually covered or not. If so, we need
+ // to assume the column isn't covered and skip it during unpacking.
+ //
+ // If key_descr == NULL, then this is a dummy field and we probably don't
+ // need to perform this step. However, to preserve the behavior before
+ // this change, we'll only skip this step if we have an index which
+ // supports covered bitmaps.
+ if (!key_descr || !key_descr->use_covered_bitmap_format()) {
m_unpack_func = nullptr;
m_make_unpack_info_func = nullptr;
m_unpack_info_stores_value = true;
- res = false;
}
}
}
+
+ m_covered = res;
return res;
}
@@ -3149,18 +3391,19 @@ bool Rdb_tbl_def::put_dict(Rdb_dict_manager *const dict,
// Length that each index flag takes inside the record.
// Each index in the array maps to the enum INDEX_FLAG
-static const std::array<int, 1> index_flag_lengths = {
+static const std::array<uint, 1> index_flag_lengths = {
{ROCKSDB_SIZEOF_TTL_RECORD}};
-
bool Rdb_key_def::has_index_flag(uint32 index_flags, enum INDEX_FLAG flag) {
return flag & index_flags;
}
uint32 Rdb_key_def::calculate_index_flag_offset(uint32 index_flags,
- enum INDEX_FLAG flag) {
+ enum INDEX_FLAG flag,
+ uint *const length) {
- DBUG_ASSERT(Rdb_key_def::has_index_flag(index_flags, flag));
+ DBUG_ASSERT_IMP(flag != MAX_FLAG,
+ Rdb_key_def::has_index_flag(index_flags, flag));
uint offset = 0;
for (size_t bit = 0; bit < sizeof(index_flags) * CHAR_BIT; ++bit) {
@@ -3168,6 +3411,9 @@ uint32 Rdb_key_def::calculate_index_flag_offset(uint32 index_flags,
/* Exit once we've reached the proper flag */
if (flag & mask) {
+ if (length != nullptr) {
+ *length = index_flag_lengths[bit];
+ }
break;
}
@@ -3179,6 +3425,15 @@ uint32 Rdb_key_def::calculate_index_flag_offset(uint32 index_flags,
return offset;
}
+void Rdb_key_def::write_index_flag_field(Rdb_string_writer *const buf,
+ const uchar *const val,
+ enum INDEX_FLAG flag) const {
+ uint len;
+ uint offset = calculate_index_flag_offset(m_index_flags_bitmap, flag, &len);
+ DBUG_ASSERT(offset + len <= buf->get_current_pos());
+ memcpy(buf->ptr() + offset, val, len);
+}
+
void Rdb_tbl_def::check_if_is_mysql_system_table() {
static const char *const system_dbs[] = {
"mysql", "performance_schema", "information_schema",
@@ -3277,10 +3532,12 @@ struct Rdb_validate_tbls : public Rdb_tables_scanner {
int Rdb_validate_tbls::add_table(Rdb_tbl_def *tdef) {
DBUG_ASSERT(tdef != nullptr);
- /* Add the database/table into the list */
- bool is_partition = tdef->base_partition().size() != 0;
- m_list[tdef->base_dbname()].insert(
- tbl_info_t(tdef->base_tablename(), is_partition));
+ /* Add the database/table into the list that are not temp table */
+ if (tdef->base_tablename().find(tmp_file_prefix) == std::string::npos) {
+ bool is_partition = tdef->base_partition().size() != 0;
+ m_list[tdef->base_dbname()].insert(
+ tbl_info_t(tdef->base_tablename(), is_partition));
+ }
return HA_EXIT_SUCCESS;
}
@@ -3361,9 +3618,9 @@ bool Rdb_validate_tbls::scan_for_frms(const std::string &datadir,
/* Scan through the files in the directory */
struct fileinfo *file_info = dir_info->dir_entry;
for (uint ii = 0; ii < dir_info->number_off_files; ii++, file_info++) {
- /* Find .frm files that are not temp files (those that start with '#') */
+ /* Find .frm files that are not temp files (those that contain '#sql') */
const char *ext = strrchr(file_info->name, '.');
- if (ext != nullptr && !is_prefix(file_info->name, tmp_file_prefix) &&
+ if (ext != nullptr && strstr(file_info->name, tmp_file_prefix) == nullptr &&
strcmp(ext, ".frm") == 0) {
std::string tablename =
std::string(file_info->name, ext - file_info->name);
@@ -3690,6 +3947,20 @@ Rdb_ddl_manager::find(GL_INDEX_ID gl_index_id) {
return empty;
}
+// this method returns the name of the table based on an index id. It acquires
+// a read lock on m_rwlock.
+const std::string
+Rdb_ddl_manager::safe_get_table_name(const GL_INDEX_ID &gl_index_id) {
+ std::string ret;
+ mysql_rwlock_rdlock(&m_rwlock);
+ auto it = m_index_num_to_keydef.find(gl_index_id);
+ if (it != m_index_num_to_keydef.end()) {
+ ret = it->second.first;
+ }
+ mysql_rwlock_unlock(&m_rwlock);
+ return ret;
+}
+
void Rdb_ddl_manager::set_stats(
const std::unordered_map<GL_INDEX_ID, Rdb_index_stats> &stats) {
mysql_rwlock_wrlock(&m_rwlock);
diff --git a/storage/rocksdb/rdb_datadic.h b/storage/rocksdb/rdb_datadic.h
index e91e19aa925..987647f13c6 100644
--- a/storage/rocksdb/rdb_datadic.h
+++ b/storage/rocksdb/rdb_datadic.h
@@ -106,8 +106,8 @@ const size_t RDB_CHECKSUM_CHUNK_SIZE = 2 * RDB_CHECKSUM_SIZE + 1;
const char RDB_CHECKSUM_DATA_TAG = 0x01;
/*
- Unpack data is variable length. It is a 1 tag-byte plus a
- two byte length field. The length field includes the header as well.
+ Unpack data is variable length. The header is 1 tag-byte plus a two byte
+ length field. The length field includes the header as well.
*/
const char RDB_UNPACK_DATA_TAG = 0x02;
const size_t RDB_UNPACK_DATA_LEN_SIZE = sizeof(uint16_t);
@@ -115,6 +115,17 @@ const size_t RDB_UNPACK_HEADER_SIZE =
sizeof(RDB_UNPACK_DATA_TAG) + RDB_UNPACK_DATA_LEN_SIZE;
/*
+ This header format is 1 tag-byte plus a two byte length field plus a two byte
+ covered bitmap. The length field includes the header size.
+*/
+const char RDB_UNPACK_COVERED_DATA_TAG = 0x03;
+const size_t RDB_UNPACK_COVERED_DATA_LEN_SIZE = sizeof(uint16_t);
+const size_t RDB_COVERED_BITMAP_SIZE = sizeof(uint16_t);
+const size_t RDB_UNPACK_COVERED_HEADER_SIZE =
+ sizeof(RDB_UNPACK_COVERED_DATA_TAG) + RDB_UNPACK_COVERED_DATA_LEN_SIZE +
+ RDB_COVERED_BITMAP_SIZE;
+
+/*
Data dictionary index info field sizes.
*/
const size_t RDB_SIZEOF_INDEX_INFO_VERSION = sizeof(uint16);
@@ -187,7 +198,8 @@ public:
const bool &should_store_row_debug_checksums,
const longlong &hidden_pk_id = 0, uint n_key_parts = 0,
uint *const n_null_fields = nullptr,
- uint *const ttl_pk_offset = nullptr) const;
+ uint *const ttl_pk_offset = nullptr,
+ const char *const ttl_bytes = nullptr) const;
/* Pack the hidden primary key into mem-comparable form. */
uint pack_hidden_pk(const longlong &hidden_pk_id,
uchar *const packed_tuple) const;
@@ -245,6 +257,17 @@ public:
return true;
}
+ void get_lookup_bitmap(const TABLE *table, MY_BITMAP *map) const;
+
+ bool covers_lookup(TABLE *const table,
+ const rocksdb::Slice *const unpack_info,
+ const MY_BITMAP *const map) const;
+
+ inline bool use_covered_bitmap_format() const {
+ return m_index_type == INDEX_TYPE_SECONDARY &&
+ m_kv_format_version >= SECONDARY_FORMAT_VERSION_UPDATE3;
+ }
+
/*
Return true if the passed mem-comparable key
- is from this index, and
@@ -302,6 +325,8 @@ public:
return m_prefix_extractor.get();
}
+ static size_t get_unpack_header_size(char tag);
+
Rdb_key_def &operator=(const Rdb_key_def &) = delete;
Rdb_key_def(const Rdb_key_def &k);
Rdb_key_def(uint indexnr_arg, uint keyno_arg,
@@ -425,7 +450,16 @@ public:
// an inefficient where data that was a multiple of 8 bytes in length
// had an extra 9 bytes of encoded data.
SECONDARY_FORMAT_VERSION_UPDATE2 = 12,
- SECONDARY_FORMAT_VERSION_LATEST = SECONDARY_FORMAT_VERSION_UPDATE2,
+ // This change includes support for TTL
+ // - This means that when TTL is specified for the table an 8-byte TTL
+ // field is prepended in front of each value.
+ SECONDARY_FORMAT_VERSION_TTL = 13,
+ SECONDARY_FORMAT_VERSION_LATEST = SECONDARY_FORMAT_VERSION_TTL,
+ // This change includes support for covering SK lookups for varchars. A
+ // 2-byte bitmap is added after the tag-byte to unpack_info only for
+ // records which have covered varchar columns. Currently waiting before
+ // enabling in prod.
+ SECONDARY_FORMAT_VERSION_UPDATE3 = 65535,
};
void setup(const TABLE *const table, const Rdb_tbl_def *const tbl_def);
@@ -441,7 +475,11 @@ public:
static bool has_index_flag(uint32 index_flags, enum INDEX_FLAG flag);
static uint32 calculate_index_flag_offset(uint32 index_flags,
- enum INDEX_FLAG flag);
+ enum INDEX_FLAG flag,
+ uint *const field_length = nullptr);
+ void write_index_flag_field(Rdb_string_writer *const buf,
+ const uchar *const val,
+ enum INDEX_FLAG flag) const;
static const std::string
gen_qualifier_for_table(const char *const qualifier,
@@ -594,6 +632,10 @@ public:
SECONDARY_FORMAT_VERSION_UPDATE2);
}
+ static inline bool is_unpack_data_tag(char c) {
+ return c == RDB_UNPACK_DATA_TAG || c == RDB_UNPACK_COVERED_DATA_TAG;
+ }
+
private:
#ifndef DBUG_OFF
inline bool is_storage_available(const int &offset, const int &needed) const {
@@ -640,6 +682,11 @@ public:
uint32 m_index_flags_bitmap;
/*
+ How much space in bytes the index flag fields occupy.
+ */
+ uint32 m_total_index_flags_length;
+
+ /*
Offset in the records where the 8-byte TTL is stored (UINT_MAX if no TTL)
*/
uint32 m_ttl_rec_offset;
@@ -757,6 +804,13 @@ public:
// spaces in the upack_info
bool m_unpack_info_uses_two_bytes;
+ /*
+ True implies that an index-only read is always possible for this field.
+ False means an index-only read may be possible depending on the record and
+ field type.
+ */
+ bool m_covered;
+
const std::vector<uchar> *space_xfrm;
size_t space_xfrm_len;
size_t space_mb_len;
@@ -1033,6 +1087,8 @@ public:
return m_sequence.get_and_update_next_number(dict);
}
+ const std::string safe_get_table_name(const GL_INDEX_ID &gl_index_id);
+
/* Walk the data dictionary */
int scan_for_tables(Rdb_tables_scanner *tables_scanner);
diff --git a/storage/rocksdb/rdb_i_s.cc b/storage/rocksdb/rdb_i_s.cc
index 1db521cee4c..15e4393b41e 100644
--- a/storage/rocksdb/rdb_i_s.cc
+++ b/storage/rocksdb/rdb_i_s.cc
@@ -845,17 +845,22 @@ static int rdb_i_s_compact_stats_fill_table(
continue;
}
- std::map<std::string, double> props;
+ std::map<std::string, std::string> props;
bool bool_ret MY_ATTRIBUTE((__unused__));
bool_ret = rdb->GetMapProperty(cfh, "rocksdb.cfstats", &props);
DBUG_ASSERT(bool_ret);
+ const std::string prop_name_prefix = "compaction.";
for (auto const &prop_ent : props) {
std::string prop_name = prop_ent.first;
- double value = prop_ent.second;
- std::size_t del_pos = prop_name.find('.');
+ if (prop_name.find(prop_name_prefix) != 0) {
+ continue;
+ }
+ std::string value = prop_ent.second;
+ std::size_t del_pos = prop_name.find('.', prop_name_prefix.size());
DBUG_ASSERT(del_pos != std::string::npos);
- std::string level_str = prop_name.substr(0, del_pos);
+ std::string level_str = prop_name.substr(
+ prop_name_prefix.size(), del_pos - prop_name_prefix.size());
std::string type_str = prop_name.substr(del_pos + 1);
Field **field = tables->table->field;
@@ -864,7 +869,7 @@ static int rdb_i_s_compact_stats_fill_table(
field[0]->store(cf_name.c_str(), cf_name.size(), system_charset_info);
field[1]->store(level_str.c_str(), level_str.size(), system_charset_info);
field[2]->store(type_str.c_str(), type_str.size(), system_charset_info);
- field[3]->store(value, true);
+ field[3]->store(std::stod(value));
ret |= static_cast<int>(
my_core::schema_table_store_record(thd, tables->table));
diff --git a/storage/rocksdb/rdb_index_merge.cc b/storage/rocksdb/rdb_index_merge.cc
index 3f2de63ccfe..1441b81a684 100644
--- a/storage/rocksdb/rdb_index_merge.cc
+++ b/storage/rocksdb/rdb_index_merge.cc
@@ -29,17 +29,38 @@ namespace myrocks {
Rdb_index_merge::Rdb_index_merge(const char *const tmpfile_path,
const ulonglong &merge_buf_size,
const ulonglong &merge_combine_read_size,
- const rocksdb::Comparator *const comparator)
+ const ulonglong &merge_tmp_file_removal_delay,
+ rocksdb::ColumnFamilyHandle *cf)
: m_tmpfile_path(tmpfile_path), m_merge_buf_size(merge_buf_size),
m_merge_combine_read_size(merge_combine_read_size),
- m_comparator(comparator), m_rec_buf_unsorted(nullptr),
- m_output_buf(nullptr) {}
+ m_merge_tmp_file_removal_delay(merge_tmp_file_removal_delay),
+ m_cf_handle(cf), m_rec_buf_unsorted(nullptr), m_output_buf(nullptr) {}
Rdb_index_merge::~Rdb_index_merge() {
/*
- Close tmp file, we don't need to worry about deletion, mysql handles it.
+ If merge_tmp_file_removal_delay is set, sleep between calls to chsize.
+
+ This helps mitigate potential trim stalls on flash when large files are
+ being deleted too quickly.
+ */
+ if (m_merge_tmp_file_removal_delay > 0) {
+ uint64 curr_size = m_merge_buf_size * m_merge_file.m_num_sort_buffers;
+ for (uint i = 0; i < m_merge_file.m_num_sort_buffers; i++) {
+ if (my_chsize(m_merge_file.m_fd, curr_size, 0, MYF(MY_WME))) {
+ // NO_LINT_DEBUG
+ sql_print_error("Error truncating file during fast index creation.");
+ }
+
+ my_sleep(m_merge_tmp_file_removal_delay * 1000);
+ curr_size -= m_merge_buf_size;
+ }
+ }
+
+ /*
+ Close file descriptor, we don't need to worry about deletion,
+ mysql handles it.
*/
- my_close(m_merge_file.fd, MYF(MY_WME));
+ my_close(m_merge_file.m_fd, MYF(MY_WME));
}
int Rdb_index_merge::init() {
@@ -73,7 +94,7 @@ int Rdb_index_merge::init() {
Create a merge file in the given location.
*/
int Rdb_index_merge::merge_file_create() {
- DBUG_ASSERT(m_merge_file.fd == -1);
+ DBUG_ASSERT(m_merge_file.m_fd == -1);
int fd;
/* If no path set for tmpfile, use mysql_tmpdir by default */
@@ -84,11 +105,13 @@ int Rdb_index_merge::merge_file_create() {
}
if (fd < 0) {
+ // NO_LINT_DEBUG
+ sql_print_error("Failed to create temp file during fast index creation.");
return HA_ERR_ROCKSDB_MERGE_FILE_ERR;
}
- m_merge_file.fd = fd;
- m_merge_file.num_sort_buffers = 0;
+ m_merge_file.m_fd = fd;
+ m_merge_file.m_num_sort_buffers = 0;
return HA_EXIT_SUCCESS;
}
@@ -109,10 +132,10 @@ int Rdb_index_merge::add(const rocksdb::Slice &key, const rocksdb::Slice &val) {
out to disk in sorted order using offset tree.
*/
const uint total_offset = RDB_MERGE_CHUNK_LEN +
- m_rec_buf_unsorted->curr_offset +
+ m_rec_buf_unsorted->m_curr_offset +
RDB_MERGE_KEY_DELIMITER + RDB_MERGE_VAL_DELIMITER +
key.size() + val.size();
- if (total_offset >= m_rec_buf_unsorted->total_size) {
+ if (total_offset >= m_rec_buf_unsorted->m_total_size) {
/*
If the offset tree is empty here, that means that the proposed key to
add is too large for the buffer.
@@ -131,7 +154,7 @@ int Rdb_index_merge::add(const rocksdb::Slice &key, const rocksdb::Slice &val) {
}
}
- const ulonglong rec_offset = m_rec_buf_unsorted->curr_offset;
+ const ulonglong rec_offset = m_rec_buf_unsorted->m_curr_offset;
/*
Store key and value in temporary unsorted in memory buffer pointed to by
@@ -140,8 +163,15 @@ int Rdb_index_merge::add(const rocksdb::Slice &key, const rocksdb::Slice &val) {
m_rec_buf_unsorted->store_key_value(key, val);
/* Find sort order of the new record */
- m_offset_tree.emplace(m_rec_buf_unsorted->block.get() + rec_offset,
- m_comparator);
+ auto res =
+ m_offset_tree.emplace(m_rec_buf_unsorted->m_block.get() + rec_offset,
+ m_cf_handle->GetComparator());
+ if (!res.second) {
+ my_printf_error(ER_DUP_ENTRY,
+ "Failed to insert the record: the key already exists",
+ MYF(0));
+ return ER_DUP_ENTRY;
+ }
return HA_EXIT_SUCCESS;
}
@@ -150,33 +180,33 @@ int Rdb_index_merge::add(const rocksdb::Slice &key, const rocksdb::Slice &val) {
Sort + write merge buffer chunk out to disk.
*/
int Rdb_index_merge::merge_buf_write() {
- DBUG_ASSERT(m_merge_file.fd != -1);
+ DBUG_ASSERT(m_merge_file.m_fd != -1);
DBUG_ASSERT(m_rec_buf_unsorted != nullptr);
DBUG_ASSERT(m_output_buf != nullptr);
DBUG_ASSERT(!m_offset_tree.empty());
/* Write actual chunk size to first 8 bytes of the merge buffer */
- merge_store_uint64(m_output_buf->block.get(),
- m_rec_buf_unsorted->curr_offset + RDB_MERGE_CHUNK_LEN);
- m_output_buf->curr_offset += RDB_MERGE_CHUNK_LEN;
+ merge_store_uint64(m_output_buf->m_block.get(),
+ m_rec_buf_unsorted->m_curr_offset + RDB_MERGE_CHUNK_LEN);
+ m_output_buf->m_curr_offset += RDB_MERGE_CHUNK_LEN;
/*
Iterate through the offset tree. Should be ordered by the secondary key
at this point.
*/
for (const auto &rec : m_offset_tree) {
- DBUG_ASSERT(m_output_buf->curr_offset <= m_merge_buf_size);
+ DBUG_ASSERT(m_output_buf->m_curr_offset <= m_merge_buf_size);
/* Read record from offset (should never fail) */
rocksdb::Slice key;
rocksdb::Slice val;
- merge_read_rec(rec.block, &key, &val);
+ merge_read_rec(rec.m_block, &key, &val);
/* Store key and value into sorted output buffer */
m_output_buf->store_key_value(key, val);
}
- DBUG_ASSERT(m_output_buf->curr_offset <= m_output_buf->total_size);
+ DBUG_ASSERT(m_output_buf->m_curr_offset <= m_output_buf->m_total_size);
/*
Write output buffer to disk.
@@ -184,8 +214,9 @@ int Rdb_index_merge::merge_buf_write() {
Need to position cursor to the chunk it needs to be at on filesystem
then write into the respective merge buffer.
*/
- if (my_seek(m_merge_file.fd, m_merge_file.num_sort_buffers * m_merge_buf_size,
- SEEK_SET, MYF(0)) == MY_FILEPOS_ERROR) {
+ if (my_seek(m_merge_file.m_fd,
+ m_merge_file.m_num_sort_buffers * m_merge_buf_size, SEEK_SET,
+ MYF(0)) == MY_FILEPOS_ERROR) {
// NO_LINT_DEBUG
sql_print_error("Error seeking to location in merge file on disk.");
return HA_ERR_ROCKSDB_MERGE_FILE_ERR;
@@ -196,16 +227,16 @@ int Rdb_index_merge::merge_buf_write() {
cache can flush out all of the files at the same time, causing a write
burst.
*/
- if (my_write(m_merge_file.fd, m_output_buf->block.get(),
- m_output_buf->total_size, MYF(MY_WME | MY_NABP)) ||
- mysql_file_sync(m_merge_file.fd, MYF(MY_WME))) {
+ if (my_write(m_merge_file.m_fd, m_output_buf->m_block.get(),
+ m_output_buf->m_total_size, MYF(MY_WME | MY_NABP)) ||
+ mysql_file_sync(m_merge_file.m_fd, MYF(MY_WME))) {
// NO_LINT_DEBUG
sql_print_error("Error writing sorted merge buffer to disk.");
return HA_ERR_ROCKSDB_MERGE_FILE_ERR;
}
/* Increment merge file offset to track number of merge buffers written */
- m_merge_file.num_sort_buffers += 1;
+ m_merge_file.m_num_sort_buffers += 1;
/* Reset everything for next run */
merge_reset();
@@ -228,28 +259,29 @@ int Rdb_index_merge::merge_heap_prepare() {
return HA_ERR_ROCKSDB_MERGE_FILE_ERR;
}
- DBUG_ASSERT(m_merge_file.num_sort_buffers > 0);
+ DBUG_ASSERT(m_merge_file.m_num_sort_buffers > 0);
/*
For an n-way merge, we need to read chunks of each merge file
simultaneously.
*/
ulonglong chunk_size =
- m_merge_combine_read_size / m_merge_file.num_sort_buffers;
+ m_merge_combine_read_size / m_merge_file.m_num_sort_buffers;
if (chunk_size >= m_merge_buf_size) {
chunk_size = m_merge_buf_size;
}
/* Allocate buffers for each chunk */
- for (ulonglong i = 0; i < m_merge_file.num_sort_buffers; i++) {
- const auto entry = std::make_shared<merge_heap_entry>(m_comparator);
+ for (ulonglong i = 0; i < m_merge_file.m_num_sort_buffers; i++) {
+ const auto entry =
+ std::make_shared<merge_heap_entry>(m_cf_handle->GetComparator());
/*
Read chunk_size bytes from each chunk on disk, and place inside
respective chunk buffer.
*/
const size_t total_size =
- entry->prepare(m_merge_file.fd, i * m_merge_buf_size, chunk_size);
+ entry->prepare(m_merge_file.m_fd, i * m_merge_buf_size, chunk_size);
if (total_size == (size_t)-1) {
return HA_ERR_ROCKSDB_MERGE_FILE_ERR;
@@ -261,7 +293,7 @@ int Rdb_index_merge::merge_heap_prepare() {
}
/* Read the first record from each buffer to initially populate the heap */
- if (entry->read_rec(&entry->key, &entry->val)) {
+ if (entry->read_rec(&entry->m_key, &entry->m_val)) {
// NO_LINT_DEBUG
sql_print_error("Chunk size is too small to process merge.");
return HA_ERR_ROCKSDB_MERGE_FILE_ERR;
@@ -286,7 +318,7 @@ int Rdb_index_merge::next(rocksdb::Slice *const key,
If there are no sort buffer records (alters on empty tables),
also exit here.
*/
- if (m_merge_file.num_sort_buffers == 0) {
+ if (m_merge_file.m_num_sort_buffers == 0) {
if (m_offset_tree.empty()) {
return -1;
}
@@ -294,7 +326,7 @@ int Rdb_index_merge::next(rocksdb::Slice *const key,
const auto rec = m_offset_tree.begin();
/* Read record from offset */
- merge_read_rec(rec->block, key, val);
+ merge_read_rec(rec->m_block, key, val);
m_offset_tree.erase(rec);
return HA_EXIT_SUCCESS;
@@ -334,8 +366,8 @@ void Rdb_index_merge::merge_heap_top(rocksdb::Slice *const key,
DBUG_ASSERT(!m_merge_min_heap.empty());
const std::shared_ptr<merge_heap_entry> &entry = m_merge_min_heap.top();
- *key = entry->key;
- *val = entry->val;
+ *key = entry->m_key;
+ *val = entry->m_val;
}
/**
@@ -355,12 +387,12 @@ int Rdb_index_merge::merge_heap_pop_and_get_next(rocksdb::Slice *const key,
/*
We are finished w/ current chunk if:
- current_offset + disk_offset == total_size
+ current_offset + disk_offset == m_total_size
Return without adding entry back onto heap.
If heap is also empty, we must be finished with merge.
*/
- if (entry->chunk_info->is_chunk_finished()) {
+ if (entry->m_chunk_info->is_chunk_finished()) {
if (m_merge_min_heap.empty()) {
return -1;
}
@@ -372,19 +404,19 @@ int Rdb_index_merge::merge_heap_pop_and_get_next(rocksdb::Slice *const key,
/*
Make sure we haven't reached the end of the chunk.
*/
- DBUG_ASSERT(!entry->chunk_info->is_chunk_finished());
+ DBUG_ASSERT(!entry->m_chunk_info->is_chunk_finished());
/*
If merge_read_rec fails, it means the either the chunk was cut off
or we've reached the end of the respective chunk.
*/
- if (entry->read_rec(&entry->key, &entry->val)) {
- if (entry->read_next_chunk_from_disk(m_merge_file.fd)) {
+ if (entry->read_rec(&entry->m_key, &entry->m_val)) {
+ if (entry->read_next_chunk_from_disk(m_merge_file.m_fd)) {
return HA_ERR_ROCKSDB_MERGE_FILE_ERR;
}
/* Try reading record again, should never fail. */
- if (entry->read_rec(&entry->key, &entry->val)) {
+ if (entry->read_rec(&entry->m_key, &entry->m_val)) {
return HA_ERR_ROCKSDB_MERGE_FILE_ERR;
}
}
@@ -398,32 +430,33 @@ int Rdb_index_merge::merge_heap_pop_and_get_next(rocksdb::Slice *const key,
}
int Rdb_index_merge::merge_heap_entry::read_next_chunk_from_disk(File fd) {
- if (chunk_info->read_next_chunk_from_disk(fd)) {
+ if (m_chunk_info->read_next_chunk_from_disk(fd)) {
return HA_EXIT_FAILURE;
}
- block = chunk_info->block.get();
+ m_block = m_chunk_info->m_block.get();
return HA_EXIT_SUCCESS;
}
int Rdb_index_merge::merge_buf_info::read_next_chunk_from_disk(File fd) {
- disk_curr_offset += curr_offset;
+ m_disk_curr_offset += m_curr_offset;
- if (my_seek(fd, disk_curr_offset, SEEK_SET, MYF(0)) == MY_FILEPOS_ERROR) {
+ if (my_seek(fd, m_disk_curr_offset, SEEK_SET, MYF(0)) == MY_FILEPOS_ERROR) {
// NO_LINT_DEBUG
sql_print_error("Error seeking to location in merge file on disk.");
return HA_EXIT_FAILURE;
}
/* Overwrite the old block */
- const size_t bytes_read = my_read(fd, block.get(), block_len, MYF(MY_WME));
+ const size_t bytes_read =
+ my_read(fd, m_block.get(), m_block_len, MYF(MY_WME));
if (bytes_read == (size_t)-1) {
// NO_LINT_DEBUG
sql_print_error("Error reading merge file from disk.");
return HA_EXIT_FAILURE;
}
- curr_offset = 0;
+ m_curr_offset = 0;
return HA_EXIT_SUCCESS;
}
@@ -459,39 +492,39 @@ void Rdb_index_merge::read_slice(rocksdb::Slice *slice,
int Rdb_index_merge::merge_heap_entry::read_rec(rocksdb::Slice *const key,
rocksdb::Slice *const val) {
- const uchar *block_ptr = block;
- const auto orig_offset = chunk_info->curr_offset;
- const auto orig_block = block;
+ const uchar *block_ptr = m_block;
+ const auto orig_offset = m_chunk_info->m_curr_offset;
+ const auto orig_block = m_block;
/* Read key at block offset into key slice and the value into value slice*/
if (read_slice(key, &block_ptr) != 0) {
return HA_EXIT_FAILURE;
}
- chunk_info->curr_offset += (uintptr_t)block_ptr - (uintptr_t)block;
- block += (uintptr_t)block_ptr - (uintptr_t)block;
+ m_chunk_info->m_curr_offset += (uintptr_t)block_ptr - (uintptr_t)m_block;
+ m_block += (uintptr_t)block_ptr - (uintptr_t)m_block;
if (read_slice(val, &block_ptr) != 0) {
- chunk_info->curr_offset = orig_offset;
- block = orig_block;
+ m_chunk_info->m_curr_offset = orig_offset;
+ m_block = orig_block;
return HA_EXIT_FAILURE;
}
- chunk_info->curr_offset += (uintptr_t)block_ptr - (uintptr_t)block;
- block += (uintptr_t)block_ptr - (uintptr_t)block;
+ m_chunk_info->m_curr_offset += (uintptr_t)block_ptr - (uintptr_t)m_block;
+ m_block += (uintptr_t)block_ptr - (uintptr_t)m_block;
return HA_EXIT_SUCCESS;
}
int Rdb_index_merge::merge_heap_entry::read_slice(rocksdb::Slice *const slice,
const uchar **block_ptr) {
- if (!chunk_info->has_space(RDB_MERGE_REC_DELIMITER)) {
+ if (!m_chunk_info->has_space(RDB_MERGE_REC_DELIMITER)) {
return HA_EXIT_FAILURE;
}
uint64 slice_len;
merge_read_uint64(block_ptr, &slice_len);
- if (!chunk_info->has_space(RDB_MERGE_REC_DELIMITER + slice_len)) {
+ if (!m_chunk_info->has_space(RDB_MERGE_REC_DELIMITER + slice_len)) {
return HA_EXIT_FAILURE;
}
@@ -503,18 +536,18 @@ int Rdb_index_merge::merge_heap_entry::read_slice(rocksdb::Slice *const slice,
size_t Rdb_index_merge::merge_heap_entry::prepare(File fd, ulonglong f_offset,
ulonglong chunk_size) {
- chunk_info = std::make_shared<merge_buf_info>(chunk_size);
- const size_t res = chunk_info->prepare(fd, f_offset);
+ m_chunk_info = std::make_shared<merge_buf_info>(chunk_size);
+ const size_t res = m_chunk_info->prepare(fd, f_offset);
if (res != (size_t)-1) {
- block = chunk_info->block.get() + RDB_MERGE_CHUNK_LEN;
+ m_block = m_chunk_info->m_block.get() + RDB_MERGE_CHUNK_LEN;
}
return res;
}
size_t Rdb_index_merge::merge_buf_info::prepare(File fd, ulonglong f_offset) {
- disk_start_offset = f_offset;
- disk_curr_offset = f_offset;
+ m_disk_start_offset = f_offset;
+ m_disk_curr_offset = f_offset;
/*
Need to position cursor to the chunk it needs to be at on filesystem
@@ -526,7 +559,8 @@ size_t Rdb_index_merge::merge_buf_info::prepare(File fd, ulonglong f_offset) {
return (size_t)-1;
}
- const size_t bytes_read = my_read(fd, block.get(), total_size, MYF(MY_WME));
+ const size_t bytes_read =
+ my_read(fd, m_block.get(), m_total_size, MYF(MY_WME));
if (bytes_read == (size_t)-1) {
// NO_LINT_DEBUG
sql_print_error("Error reading merge file from disk.");
@@ -537,10 +571,10 @@ size_t Rdb_index_merge::merge_buf_info::prepare(File fd, ulonglong f_offset) {
Read the first 8 bytes of each chunk, this gives us the actual
size of each chunk.
*/
- const uchar *block_ptr = block.get();
- merge_read_uint64(&block_ptr, &total_size);
- curr_offset += RDB_MERGE_CHUNK_LEN;
- return total_size;
+ const uchar *block_ptr = m_block.get();
+ merge_read_uint64(&block_ptr, &m_total_size);
+ m_curr_offset += RDB_MERGE_CHUNK_LEN;
+ return m_total_size;
}
/* Store key and value w/ their respective delimiters at the given offset */
@@ -552,13 +586,13 @@ void Rdb_index_merge::merge_buf_info::store_key_value(
void Rdb_index_merge::merge_buf_info::store_slice(const rocksdb::Slice &slice) {
/* Store length delimiter */
- merge_store_uint64(&block[curr_offset], slice.size());
+ merge_store_uint64(&m_block[m_curr_offset], slice.size());
/* Store slice data */
- memcpy(&block[curr_offset + RDB_MERGE_REC_DELIMITER], slice.data(),
+ memcpy(&m_block[m_curr_offset + RDB_MERGE_REC_DELIMITER], slice.data(),
slice.size());
- curr_offset += slice.size() + RDB_MERGE_REC_DELIMITER;
+ m_curr_offset += slice.size() + RDB_MERGE_REC_DELIMITER;
}
void Rdb_index_merge::merge_reset() {
@@ -569,13 +603,13 @@ void Rdb_index_merge::merge_reset() {
m_offset_tree.clear();
/* Reset sort buffer block */
- if (m_rec_buf_unsorted && m_rec_buf_unsorted->block) {
- m_rec_buf_unsorted->curr_offset = 0;
+ if (m_rec_buf_unsorted && m_rec_buf_unsorted->m_block) {
+ m_rec_buf_unsorted->m_curr_offset = 0;
}
/* Reset output buf */
- if (m_output_buf && m_output_buf->block) {
- m_output_buf->curr_offset = 0;
+ if (m_output_buf && m_output_buf->m_block) {
+ m_output_buf->m_curr_offset = 0;
}
}
diff --git a/storage/rocksdb/rdb_index_merge.h b/storage/rocksdb/rdb_index_merge.h
index 9d1469fc34e..6e53663160a 100644
--- a/storage/rocksdb/rdb_index_merge.h
+++ b/storage/rocksdb/rdb_index_merge.h
@@ -49,23 +49,23 @@ class Rdb_index_merge {
Rdb_index_merge(const Rdb_index_merge &p) = delete;
Rdb_index_merge &operator=(const Rdb_index_merge &p) = delete;
-public:
+ public:
/* Information about temporary files used in external merge sort */
struct merge_file_info {
- File fd = -1; /* file descriptor */
- ulong num_sort_buffers; /* number of sort buffers in temp file */
+ File m_fd = -1; /* file descriptor */
+ ulong m_num_sort_buffers = 0; /* number of sort buffers in temp file */
};
/* Buffer for sorting in main memory. */
struct merge_buf_info {
/* heap memory allocated for main memory sort/merge */
- std::unique_ptr<uchar[]> block;
+ std::unique_ptr<uchar[]> m_block;
const ulonglong
- block_len; /* amount of data bytes allocated for block above */
- ulonglong curr_offset; /* offset of the record pointer for the block */
- ulonglong disk_start_offset; /* where the chunk starts on disk */
- ulonglong disk_curr_offset; /* current offset on disk */
- ulonglong total_size; /* total # of data bytes in chunk */
+ m_block_len; /* amount of data bytes allocated for block above */
+ ulonglong m_curr_offset; /* offset of the record pointer for the block */
+ ulonglong m_disk_start_offset; /* where the chunk starts on disk */
+ ulonglong m_disk_curr_offset; /* current offset on disk */
+ ulonglong m_total_size; /* total # of data bytes in chunk */
void store_key_value(const rocksdb::Slice &key, const rocksdb::Slice &val)
MY_ATTRIBUTE((__nonnull__));
@@ -78,32 +78,33 @@ public:
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
inline bool is_chunk_finished() const {
- return curr_offset + disk_curr_offset - disk_start_offset == total_size;
+ return m_curr_offset + m_disk_curr_offset - m_disk_start_offset ==
+ m_total_size;
}
inline bool has_space(uint64 needed) const {
- return curr_offset + needed <= block_len;
+ return m_curr_offset + needed <= m_block_len;
}
explicit merge_buf_info(const ulonglong merge_block_size)
- : block(nullptr), block_len(merge_block_size), curr_offset(0),
- disk_start_offset(0), disk_curr_offset(0),
- total_size(merge_block_size) {
+ : m_block(nullptr), m_block_len(merge_block_size), m_curr_offset(0),
+ m_disk_start_offset(0), m_disk_curr_offset(0),
+ m_total_size(merge_block_size) {
/* Will throw an exception if it runs out of memory here */
- block = std::unique_ptr<uchar[]>(new uchar[merge_block_size]);
+ m_block = std::unique_ptr<uchar[]>(new uchar[merge_block_size]);
/* Initialize entire buffer to 0 to avoid valgrind errors */
- memset(block.get(), 0, merge_block_size);
+ memset(m_block.get(), 0, merge_block_size);
}
};
/* Represents an entry in the heap during merge phase of external sort */
struct merge_heap_entry {
- std::shared_ptr<merge_buf_info> chunk_info; /* pointer to buffer info */
- uchar *block; /* pointer to heap memory where record is stored */
- const rocksdb::Comparator *const comparator;
- rocksdb::Slice key; /* current key pointed to by block ptr */
- rocksdb::Slice val;
+ std::shared_ptr<merge_buf_info> m_chunk_info; /* pointer to buffer info */
+ uchar *m_block; /* pointer to heap memory where record is stored */
+ const rocksdb::Comparator *const m_comparator;
+ rocksdb::Slice m_key; /* current key pointed to by block ptr */
+ rocksdb::Slice m_val;
size_t prepare(File fd, ulonglong f_offset, ulonglong chunk_size)
MY_ATTRIBUTE((__nonnull__));
@@ -118,35 +119,37 @@ public:
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
explicit merge_heap_entry(const rocksdb::Comparator *const comparator)
- : chunk_info(nullptr), block(nullptr), comparator(comparator) {}
+ : m_chunk_info(nullptr), m_block(nullptr), m_comparator(comparator) {}
};
struct merge_heap_comparator {
bool operator()(const std::shared_ptr<merge_heap_entry> &lhs,
const std::shared_ptr<merge_heap_entry> &rhs) {
- return lhs->comparator->Compare(rhs->key, lhs->key) < 0;
+ return lhs->m_comparator->Compare(rhs->m_key, lhs->m_key) < 0;
}
};
/* Represents a record in unsorted buffer */
struct merge_record {
- uchar *block; /* points to offset of key in sort buffer */
- const rocksdb::Comparator *const comparator;
+ uchar *m_block; /* points to offset of key in sort buffer */
+ const rocksdb::Comparator *const m_comparator;
bool operator<(const merge_record &record) const {
- return merge_record_compare(this->block, record.block, comparator) < 0;
+ return merge_record_compare(this->m_block, record.m_block, m_comparator) <
+ 0;
}
merge_record(uchar *const block,
const rocksdb::Comparator *const comparator)
- : block(block), comparator(comparator) {}
+ : m_block(block), m_comparator(comparator) {}
};
-private:
+ private:
const char *m_tmpfile_path;
const ulonglong m_merge_buf_size;
const ulonglong m_merge_combine_read_size;
- const rocksdb::Comparator *m_comparator;
+ const ulonglong m_merge_tmp_file_removal_delay;
+ rocksdb::ColumnFamilyHandle *m_cf_handle;
struct merge_file_info m_merge_file;
std::shared_ptr<merge_buf_info> m_rec_buf_unsorted;
std::shared_ptr<merge_buf_info> m_output_buf;
@@ -184,11 +187,12 @@ private:
void read_slice(rocksdb::Slice *slice, const uchar *block_ptr)
MY_ATTRIBUTE((__nonnull__));
-public:
+ public:
Rdb_index_merge(const char *const tmpfile_path,
const ulonglong &merge_buf_size,
const ulonglong &merge_combine_read_size,
- const rocksdb::Comparator *const comparator);
+ const ulonglong &merge_tmp_file_removal_delay,
+ rocksdb::ColumnFamilyHandle *cf);
~Rdb_index_merge();
int init() MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
@@ -213,6 +217,8 @@ public:
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
void merge_reset();
+
+ rocksdb::ColumnFamilyHandle *get_cf() const { return m_cf_handle; }
};
-} // namespace myrocks
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_perf_context.cc b/storage/rocksdb/rdb_perf_context.cc
index be75efa73e5..91f4a7d6ae4 100644
--- a/storage/rocksdb/rdb_perf_context.cc
+++ b/storage/rocksdb/rdb_perf_context.cc
@@ -176,6 +176,16 @@ bool Rdb_io_perf::start(const uint32_t perf_context_level) {
return true;
}
+void Rdb_io_perf::update_bytes_written(const uint32_t perf_context_level,
+ ulonglong bytes_written) {
+ const rocksdb::PerfLevel perf_level =
+ static_cast<rocksdb::PerfLevel>(perf_context_level);
+ if (perf_level != rocksdb::kDisable && m_shared_io_perf_write) {
+ io_write_bytes += bytes_written;
+ io_write_requests += 1;
+ }
+}
+
void Rdb_io_perf::end_and_record(const uint32_t perf_context_level) {
const rocksdb::PerfLevel perf_level =
static_cast<rocksdb::PerfLevel>(perf_context_level);
@@ -210,6 +220,18 @@ void Rdb_io_perf::end_and_record(const uint32_t perf_context_level) {
m_stats->table_io_perf_read.sum(io_perf_read);
}
+ if (m_shared_io_perf_write &&
+ (io_write_bytes != 0 || io_write_requests != 0)) {
+ my_io_perf_t io_perf_write;
+ io_perf_write.init();
+ io_perf_write.bytes = io_write_bytes;
+ io_perf_write.requests = io_write_requests;
+ m_shared_io_perf_write->sum(io_perf_write);
+ m_stats->table_io_perf_write.sum(io_perf_write);
+ io_write_bytes = 0;
+ io_write_requests = 0;
+ }
+
if (m_stats) {
if (rocksdb::get_perf_context()->internal_key_skipped_count != 0) {
m_stats->key_skipped +=
diff --git a/storage/rocksdb/rdb_perf_context.h b/storage/rocksdb/rdb_perf_context.h
index 87c6426342e..ad16d95284b 100644
--- a/storage/rocksdb/rdb_perf_context.h
+++ b/storage/rocksdb/rdb_perf_context.h
@@ -108,30 +108,42 @@ class Rdb_io_perf {
// Context management
Rdb_atomic_perf_counters *m_atomic_counters = nullptr;
my_io_perf_atomic_t *m_shared_io_perf_read = nullptr;
+ my_io_perf_atomic_t *m_shared_io_perf_write = nullptr;
ha_statistics *m_stats = nullptr;
-public:
+ uint64_t io_write_bytes;
+ uint64_t io_write_requests;
+
+ public:
Rdb_io_perf(const Rdb_io_perf &) = delete;
Rdb_io_perf &operator=(const Rdb_io_perf &) = delete;
void init(Rdb_atomic_perf_counters *const atomic_counters,
my_io_perf_atomic_t *const shared_io_perf_read,
+ my_io_perf_atomic_t *const shared_io_perf_write,
ha_statistics *const stats) {
DBUG_ASSERT(atomic_counters != nullptr);
DBUG_ASSERT(shared_io_perf_read != nullptr);
+ DBUG_ASSERT(shared_io_perf_write != nullptr);
DBUG_ASSERT(stats != nullptr);
m_atomic_counters = atomic_counters;
m_shared_io_perf_read = shared_io_perf_read;
+ m_shared_io_perf_write = shared_io_perf_write;
m_stats = stats;
+
+ io_write_bytes = 0;
+ io_write_requests = 0;
}
bool start(const uint32_t perf_context_level);
+ void update_bytes_written(const uint32_t perf_context_level,
+ ulonglong bytes_written);
void end_and_record(const uint32_t perf_context_level);
explicit Rdb_io_perf()
: m_atomic_counters(nullptr), m_shared_io_perf_read(nullptr),
- m_stats(nullptr) {}
+ m_stats(nullptr), io_write_bytes(0), io_write_requests(0) {}
};
} // namespace myrocks