summaryrefslogtreecommitdiff
path: root/storage
diff options
context:
space:
mode:
authorSergei Petrunia <psergey@askmonty.org>2018-05-07 21:38:18 +0300
committerSergei Petrunia <psergey@askmonty.org>2018-05-07 21:38:18 +0300
commitdbe73588cd82dc6b6547425e7233b7899f61dfd5 (patch)
treeb64bdea6c90446476d40fd8a477c522d2c499cea /storage
parente44ca6cc9c300cbdf93c64110bd8cf2be8125379 (diff)
parent03edf2ed04dbffe8c413fe0dd2715684e1627371 (diff)
downloadmariadb-git-dbe73588cd82dc6b6547425e7233b7899f61dfd5.tar.gz
Merge branch 'bb-10.2-mariarocks-merge' of github.com:MariaDB/server into 10.2
Manually resolved the conflicts
Diffstat (limited to 'storage')
-rw-r--r--storage/rocksdb/CMakeLists.txt8
-rw-r--r--storage/rocksdb/build_rocksdb.cmake9
-rw-r--r--storage/rocksdb/event_listener.cc10
-rw-r--r--storage/rocksdb/event_listener.h3
-rw-r--r--storage/rocksdb/ha_rocksdb.cc1883
-rw-r--r--storage/rocksdb/ha_rocksdb.h101
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/autoinc_crash_safe.inc150
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/bulk_load.inc (renamed from storage/rocksdb/mysql-test/rocksdb/t/bulk_load.inc)12
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc144
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/restart_mysqld_with_invalid_option.inc8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/start_mysqld_with_option.inc14
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_sstfilewriter.result4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/allow_no_primary_key_with_sk.result17
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/allow_to_start_after_corruption.result38
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe.result132
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe_partition.result132
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/autoinc_debug.result107
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars.result79
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/autoincrement.result1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bloomfilter5.result62
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load_drop_table.result11
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load_errors.result64
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf_and_data.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_data.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result59
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_rev.result120
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/cardinality.result35
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/check_ignore_unknown_options.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/deadlock_tracking.result67
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/i_s_ddl.result19
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/i_s_deadlock.result215
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/index_merge_rocksdb.result2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/information_schema.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/issue255.result47
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/lock_wait_timeout_stats.result8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/mariadb_port_fixes.result1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/max_open_files.result21
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/optimizer_loose_index_scans.result42
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/perf_context.result32
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result90
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rocksdb_debug.result11
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rocksdb_range2.result2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/show_engine.result36
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/skip_validate_tmp_table.result18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/ttl_primary_read_filtering.result28
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/type_varchar.result14
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/use_direct_reads_writes.result32
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/write_sync.result5
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace_sstfilewriter.test5
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/allow_no_primary_key_with_sk.test12
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/allow_to_start_after_corruption.test75
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.cnf8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.test9
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.cnf8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.test10
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug.test118
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars.test38
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/autoincrement.test3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5.test61
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load.test2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load_drop_table.test19
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test97
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf.test2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf_and_data.test2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_data.test2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted.test134
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted_rev.test5
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/cardinality.test42
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/check_ignore_unknown_options.test22
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/deadlock_tracking.test20
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/i_s_ddl.test7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/i_s_deadlock.test158
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb.test7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb2.test4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/information_schema.test4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/insert_optimized_config-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/issue255.test32
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/lock_wait_timeout_stats.test4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/max_open_files.test53
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/mysqldump.test8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/mysqldump2.test2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test5
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_debug.test14
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_range2.test1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/skip_validate_tmp_table.test38
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_read_filtering.test19
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_varchar.test15
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test64
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/write_sync.test9
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/include/rpl_no_unique_check_on_lag.inc1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/r/singledelete_idempotent_recovery.result1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc.test1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_recovery.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress.result2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress_crash.result2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_stress/t/load_generator.py13
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress.test2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress_crash.test2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_allow_to_start_after_corruption_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bytes_per_sync_basic.result84
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_flush_memtable_on_analyze_basic.result58
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_ignore_unknown_options_basic.result14
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_open_files_basic.result10
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_row_locks_basic.result18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_two_write_queues_basic.result (renamed from storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_concurrent_prepare_basic.result)8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options.result38
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options_basic.result18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_wal_bytes_per_sync_basic.result84
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_allow_to_start_after_corruption_basic.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bytes_per_sync_basic.test17
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_flush_memtable_on_analyze_basic.test46
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_ignore_unknown_options_basic.test16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_open_files_basic.test10
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_two_write_queues_basic.test (renamed from storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_concurrent_prepare_basic.test)2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options.test22
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options_basic.test17
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_wal_bytes_per_sync_basic.test18
-rw-r--r--storage/rocksdb/patch/port/win/io_win.h446
-rw-r--r--storage/rocksdb/properties_collector.cc145
-rw-r--r--storage/rocksdb/properties_collector.h41
-rw-r--r--storage/rocksdb/rdb_cf_options.cc8
-rw-r--r--storage/rocksdb/rdb_cf_options.h3
-rw-r--r--storage/rocksdb/rdb_compact_filter.h2
-rw-r--r--storage/rocksdb/rdb_datadic.cc196
-rw-r--r--storage/rocksdb/rdb_datadic.h161
-rw-r--r--storage/rocksdb/rdb_i_s.cc149
-rw-r--r--storage/rocksdb/rdb_i_s.h1
-rw-r--r--storage/rocksdb/rdb_io_watchdog.cc2
-rw-r--r--storage/rocksdb/rdb_perf_context.cc31
-rw-r--r--storage/rocksdb/rdb_perf_context.h14
-rw-r--r--storage/rocksdb/rdb_psi.cc3
-rw-r--r--storage/rocksdb/rdb_psi.h3
-rw-r--r--storage/rocksdb/rdb_sst_info.cc38
-rw-r--r--storage/rocksdb/rdb_sst_info.h6
-rw-r--r--storage/rocksdb/rdb_utils.cc33
-rw-r--r--storage/rocksdb/rdb_utils.h12
m---------storage/rocksdb/rocksdb0
141 files changed, 5177 insertions, 1716 deletions
diff --git a/storage/rocksdb/CMakeLists.txt b/storage/rocksdb/CMakeLists.txt
index bf95201fb4b..7156150b042 100644
--- a/storage/rocksdb/CMakeLists.txt
+++ b/storage/rocksdb/CMakeLists.txt
@@ -95,6 +95,8 @@ SET(ROCKSDB_SE_SOURCES
rdb_threads.h
rdb_psi.h
rdb_psi.cc
+ rdb_sst_info.cc
+ rdb_sst_info.h
)
# MariaDB: the following is added in build_rocksdb.cmake, when appropriate:
@@ -137,8 +139,6 @@ ADD_CONVENIENCE_LIBRARY(rocksdb_aux_lib
event_listener.h
rdb_perf_context.cc
rdb_perf_context.h
- rdb_sst_info.cc
- rdb_sst_info.h
rdb_buff.h
rdb_mariadb_port.h
)
@@ -178,9 +178,9 @@ IF(HAVE_SCHED_GETCPU)
ADD_DEFINITIONS(-DHAVE_SCHED_GETCPU=1)
ENDIF()
-IF (NOT "$ENV{WITH_TBB}" STREQUAL "")
+IF (WITH_TBB)
SET(rocksdb_static_libs ${rocksdb_static_libs}
- $ENV{WITH_TBB}/libtbb${PIC_EXT}.a)
+ ${WITH_TBB}/lib/libtbb${PIC_EXT}.a)
ADD_DEFINITIONS(-DTBB)
ENDIF()
diff --git a/storage/rocksdb/build_rocksdb.cmake b/storage/rocksdb/build_rocksdb.cmake
index 5810412f566..c76f711463e 100644
--- a/storage/rocksdb/build_rocksdb.cmake
+++ b/storage/rocksdb/build_rocksdb.cmake
@@ -12,11 +12,6 @@ INCLUDE_DIRECTORIES(
${ROCKSDB_SOURCE_DIR}/third-party/gtest-1.7.0/fused-src
)
-IF(WIN32)
- INCLUDE_DIRECTORIES(BEFORE
- ${CMAKE_CURRENT_SOURCE_DIR}/patch)
-ENDIF()
-
list(APPEND CMAKE_MODULE_PATH "${ROCKSDB_SOURCE_DIR}/cmake/modules/")
if(WIN32)
@@ -245,6 +240,7 @@ set(ROCKSDB_SOURCES
table/block_based_table_factory.cc
table/block_based_table_reader.cc
table/block_builder.cc
+ table/block_fetcher.cc
table/block_prefix_index.cc
table/bloom_block.cc
table/cuckoo_table_builder.cc
@@ -340,15 +336,16 @@ set(ROCKSDB_SOURCES
utilities/transactions/optimistic_transaction_db_impl.cc
utilities/transactions/pessimistic_transaction.cc
utilities/transactions/pessimistic_transaction_db.cc
+ utilities/transactions/snapshot_checker.cc
utilities/transactions/transaction_base.cc
utilities/transactions/transaction_db_mutex_impl.cc
utilities/transactions/transaction_lock_mgr.cc
utilities/transactions/transaction_util.cc
utilities/transactions/write_prepared_txn.cc
+ utilities/transactions/write_prepared_txn_db.cc
utilities/ttl/db_ttl_impl.cc
utilities/write_batch_with_index/write_batch_with_index.cc
utilities/write_batch_with_index/write_batch_with_index_internal.cc
-
)
if(WIN32)
diff --git a/storage/rocksdb/event_listener.cc b/storage/rocksdb/event_listener.cc
index 5854da54210..1a621b5df4b 100644
--- a/storage/rocksdb/event_listener.cc
+++ b/storage/rocksdb/event_listener.cc
@@ -83,4 +83,14 @@ void Rdb_event_listener::OnExternalFileIngested(
DBUG_ASSERT(db != nullptr);
update_index_stats(info.table_properties);
}
+
+void Rdb_event_listener::OnBackgroundError(
+ rocksdb::BackgroundErrorReason reason, rocksdb::Status *status) {
+ rdb_log_status_error(*status, "Error detected in background");
+ sql_print_error("RocksDB: BackgroundErrorReason: %d", (int)reason);
+ if (status->IsCorruption()) {
+ rdb_persist_corruption_marker();
+ abort();
+ }
+}
} // namespace myrocks
diff --git a/storage/rocksdb/event_listener.h b/storage/rocksdb/event_listener.h
index d535031644b..8772105de36 100644
--- a/storage/rocksdb/event_listener.h
+++ b/storage/rocksdb/event_listener.h
@@ -37,6 +37,9 @@ public:
rocksdb::DB *db,
const rocksdb::ExternalFileIngestionInfo &ingestion_info) override;
+ void OnBackgroundError(rocksdb::BackgroundErrorReason reason,
+ rocksdb::Status *status) override;
+
private:
Rdb_ddl_manager *m_ddl_manager;
diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc
index 03086db3da9..baada708c61 100644
--- a/storage/rocksdb/ha_rocksdb.cc
+++ b/storage/rocksdb/ha_rocksdb.cc
@@ -144,10 +144,6 @@ static handler *rocksdb_create_handler(my_core::handlerton *hton,
my_core::TABLE_SHARE *table_arg,
my_core::MEM_ROOT *mem_root);
-bool can_use_bloom_filter(THD *thd, const Rdb_key_def &kd,
- const rocksdb::Slice &eq_cond,
- const bool use_all_keys);
-
static rocksdb::CompactRangeOptions getCompactRangeOptions() {
rocksdb::CompactRangeOptions compact_range_options;
compact_range_options.bottommost_level_compaction =
@@ -423,24 +419,37 @@ static void rocksdb_set_collation_exception_list(THD *thd,
void *var_ptr,
const void *save);
-void rocksdb_set_update_cf_options(THD *thd,
- struct st_mysql_sys_var *var,
- void *var_ptr,
- const void *save);
+static int rocksdb_validate_update_cf_options(THD *thd,
+ struct st_mysql_sys_var *var,
+ void *save,
+ st_mysql_value *value);
-static void
-rocksdb_set_bulk_load(THD *thd,
- struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)),
- void *var_ptr, const void *save);
+static void rocksdb_set_update_cf_options(THD *thd,
+ struct st_mysql_sys_var *var,
+ void *var_ptr, const void *save);
+
+static int rocksdb_check_bulk_load(THD *const thd,
+ struct st_mysql_sys_var *var
+ MY_ATTRIBUTE((__unused__)),
+ void *save,
+ struct st_mysql_value *value);
-static void rocksdb_set_bulk_load_allow_unsorted(
- THD *thd, struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)),
- void *var_ptr, const void *save);
+static int rocksdb_check_bulk_load_allow_unsorted(
+ THD *const thd, struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)),
+ void *save, struct st_mysql_value *value);
static void rocksdb_set_max_background_jobs(THD *thd,
struct st_mysql_sys_var *const var,
void *const var_ptr,
const void *const save);
+static void rocksdb_set_bytes_per_sync(THD *thd,
+ struct st_mysql_sys_var *const var,
+ void *const var_ptr,
+ const void *const save);
+static void rocksdb_set_wal_bytes_per_sync(THD *thd,
+ struct st_mysql_sys_var *const var,
+ void *const var_ptr,
+ const void *const save);
//////////////////////////////////////////////////////////////////////////////
// Options definitions
//////////////////////////////////////////////////////////////////////////////
@@ -471,6 +480,7 @@ static char *rocksdb_compact_cf_name;
static char *rocksdb_checkpoint_name;
static my_bool rocksdb_signal_drop_index_thread;
static my_bool rocksdb_strict_collation_check = 1;
+static my_bool rocksdb_ignore_unknown_options = 1;
static my_bool rocksdb_enable_2pc = 0;
static char *rocksdb_strict_collation_exceptions;
static my_bool rocksdb_collect_sst_properties = 1;
@@ -484,7 +494,6 @@ static int rocksdb_debug_ttl_read_filter_ts = 0;
static my_bool rocksdb_debug_ttl_ignore_pk = 0;
static my_bool rocksdb_reset_stats = 0;
static uint32_t rocksdb_io_write_timeout_secs = 0;
-static uint64_t rocksdb_number_stat_computes = 0;
static uint32_t rocksdb_seconds_between_stat_computes = 3600;
static long long rocksdb_compaction_sequential_deletes = 0l;
static long long rocksdb_compaction_sequential_deletes_window = 0l;
@@ -495,11 +504,14 @@ static uint32_t rocksdb_table_stats_sampling_pct;
static my_bool rocksdb_enable_bulk_load_api = 1;
static my_bool rocksdb_print_snapshot_conflict_queries = 0;
static my_bool rocksdb_large_prefix = 0;
+static my_bool rocksdb_allow_to_start_after_corruption = 0;
static char* rocksdb_git_hash;
char *compression_types_val=
const_cast<char*>(get_rocksdb_supported_compression_types());
+std::atomic<uint64_t> rocksdb_row_lock_deadlocks(0);
+std::atomic<uint64_t> rocksdb_row_lock_wait_timeouts(0);
std::atomic<uint64_t> rocksdb_snapshot_conflict_errors(0);
std::atomic<uint64_t> rocksdb_wal_group_syncs(0);
@@ -510,8 +522,9 @@ static std::unique_ptr<rocksdb::DBOptions> rdb_init_rocksdb_db_options(void) {
o->listeners.push_back(std::make_shared<Rdb_event_listener>(&ddl_manager));
o->info_log_level = rocksdb::InfoLogLevel::INFO_LEVEL;
o->max_subcompactions = DEFAULT_SUBCOMPACTIONS;
+ o->max_open_files = -2; // auto-tune to 50% open_files_limit
- o->concurrent_prepare = true;
+ o->two_write_queues = true;
o->manual_wal_flush = true;
return o;
}
@@ -593,6 +606,33 @@ static void rocksdb_set_io_write_timeout(
RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex);
}
+enum rocksdb_flush_log_at_trx_commit_type : unsigned int {
+ FLUSH_LOG_NEVER = 0,
+ FLUSH_LOG_SYNC,
+ FLUSH_LOG_BACKGROUND,
+ FLUSH_LOG_MAX /* must be last */
+};
+
+static int rocksdb_validate_flush_log_at_trx_commit(
+ THD *const thd,
+ struct st_mysql_sys_var *const var, /* in: pointer to system variable */
+ void *var_ptr, /* out: immediate result for update function */
+ struct st_mysql_value *const value /* in: incoming value */) {
+ long long new_value;
+
+ /* value is NULL */
+ if (value->val_int(value, &new_value)) {
+ return HA_EXIT_FAILURE;
+ }
+
+ if (rocksdb_db_options->allow_mmap_writes && new_value != FLUSH_LOG_NEVER) {
+ return HA_EXIT_FAILURE;
+ }
+
+ *static_cast<uint32_t *>(var_ptr) = static_cast<uint32_t>(new_value);
+ return HA_EXIT_SUCCESS;
+}
+
static const char *index_type_names[] = {"kBinarySearch", "kHashSearch", NullS};
static TYPELIB index_type_typelib = {array_elements(index_type_names) - 1,
@@ -600,7 +640,7 @@ static TYPELIB index_type_typelib = {array_elements(index_type_names) - 1,
nullptr};
const ulong RDB_MAX_LOCK_WAIT_SECONDS = 1024 * 1024 * 1024;
-const ulong RDB_MAX_ROW_LOCKS = 1024 * 1024 * 1024;
+const ulong RDB_MAX_ROW_LOCKS = 1024 * 1024;
const ulong RDB_DEFAULT_BULK_LOAD_SIZE = 1000;
const ulong RDB_MAX_BULK_LOAD_SIZE = 1024 * 1024 * 1024;
const size_t RDB_DEFAULT_MERGE_BUF_SIZE = 64 * 1024 * 1024;
@@ -640,12 +680,13 @@ static MYSQL_THDVAR_BOOL(
bulk_load, PLUGIN_VAR_RQCMDARG,
"Use bulk-load mode for inserts. This disables "
"unique_checks and enables rocksdb_commit_in_the_middle.",
- nullptr, rocksdb_set_bulk_load, FALSE);
+ rocksdb_check_bulk_load, nullptr, FALSE);
static MYSQL_THDVAR_BOOL(bulk_load_allow_unsorted, PLUGIN_VAR_RQCMDARG,
"Allow unsorted input during bulk-load. "
"Can be changed only when bulk load is disabled.",
- nullptr, rocksdb_set_bulk_load_allow_unsorted, FALSE);
+ rocksdb_check_bulk_load_allow_unsorted, nullptr,
+ FALSE);
static MYSQL_SYSVAR_BOOL(enable_bulk_load_api, rocksdb_enable_bulk_load_api,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
@@ -751,11 +792,11 @@ static MYSQL_SYSVAR_BOOL(
rocksdb_db_options->create_if_missing);
static MYSQL_SYSVAR_BOOL(
- concurrent_prepare,
- *reinterpret_cast<my_bool *>(&rocksdb_db_options->concurrent_prepare),
+ two_write_queues,
+ *reinterpret_cast<my_bool *>(&rocksdb_db_options->two_write_queues),
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "DBOptions::concurrent_prepare for RocksDB", nullptr, nullptr,
- rocksdb_db_options->concurrent_prepare);
+ "DBOptions::two_write_queues for RocksDB", nullptr, nullptr,
+ rocksdb_db_options->two_write_queues);
static MYSQL_SYSVAR_BOOL(
manual_wal_flush,
@@ -882,7 +923,7 @@ static MYSQL_SYSVAR_INT(max_open_files, rocksdb_db_options->max_open_files,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"DBOptions::max_open_files for RocksDB", nullptr,
nullptr, rocksdb_db_options->max_open_files,
- /* min */ -1, /* max */ INT_MAX, 0);
+ /* min */ -2, /* max */ INT_MAX, 0);
static MYSQL_SYSVAR_UINT64_T(max_total_wal_size,
rocksdb_db_options->max_total_wal_size,
@@ -1063,16 +1104,18 @@ static MYSQL_SYSVAR_BOOL(
rocksdb_db_options->use_adaptive_mutex);
static MYSQL_SYSVAR_UINT64_T(bytes_per_sync, rocksdb_db_options->bytes_per_sync,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ PLUGIN_VAR_RQCMDARG,
"DBOptions::bytes_per_sync for RocksDB", nullptr,
- nullptr, rocksdb_db_options->bytes_per_sync,
+ rocksdb_set_bytes_per_sync,
+ rocksdb_db_options->bytes_per_sync,
/* min */ 0L, /* max */ ULONGLONG_MAX, 0);
static MYSQL_SYSVAR_UINT64_T(wal_bytes_per_sync,
rocksdb_db_options->wal_bytes_per_sync,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ PLUGIN_VAR_RQCMDARG,
"DBOptions::wal_bytes_per_sync for RocksDB", nullptr,
- nullptr, rocksdb_db_options->wal_bytes_per_sync,
+ rocksdb_set_wal_bytes_per_sync,
+ rocksdb_db_options->wal_bytes_per_sync,
/* min */ 0L, /* max */ ULONGLONG_MAX, 0);
static MYSQL_SYSVAR_BOOL(
@@ -1190,22 +1233,17 @@ static MYSQL_SYSVAR_STR(override_cf_options, rocksdb_override_cf_options,
static MYSQL_SYSVAR_STR(update_cf_options, rocksdb_update_cf_options,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC
/* psergey-merge: need this? : PLUGIN_VAR_ALLOCATED*/,
- "Option updates per column family for RocksDB", nullptr,
+ "Option updates per column family for RocksDB",
+ rocksdb_validate_update_cf_options,
rocksdb_set_update_cf_options, nullptr);
-enum rocksdb_flush_log_at_trx_commit_type : unsigned int {
- FLUSH_LOG_NEVER = 0,
- FLUSH_LOG_SYNC,
- FLUSH_LOG_BACKGROUND,
- FLUSH_LOG_MAX /* must be last */
-};
-
static MYSQL_SYSVAR_UINT(flush_log_at_trx_commit,
rocksdb_flush_log_at_trx_commit, PLUGIN_VAR_RQCMDARG,
"Sync on transaction commit. Similar to "
"innodb_flush_log_at_trx_commit. 1: sync on commit, "
"0,2: not sync on commit",
- nullptr, nullptr, /* default */ FLUSH_LOG_SYNC,
+ rocksdb_validate_flush_log_at_trx_commit, nullptr,
+ /* default */ FLUSH_LOG_SYNC,
/* min */ FLUSH_LOG_NEVER,
/* max */ FLUSH_LOG_BACKGROUND, 0);
@@ -1346,6 +1384,11 @@ static MYSQL_SYSVAR_BOOL(enable_2pc, rocksdb_enable_2pc, PLUGIN_VAR_RQCMDARG,
"Enable two phase commit for MyRocks", nullptr,
nullptr, TRUE);
+static MYSQL_SYSVAR_BOOL(ignore_unknown_options, rocksdb_ignore_unknown_options,
+ PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
+ "Enable ignoring unknown options passed to RocksDB",
+ nullptr, nullptr, TRUE);
+
static MYSQL_SYSVAR_BOOL(strict_collation_check, rocksdb_strict_collation_check,
PLUGIN_VAR_RQCMDARG,
"Enforce case sensitive collation for MyRocks indexes",
@@ -1377,11 +1420,6 @@ static MYSQL_SYSVAR_BOOL(
rocksdb_force_flush_memtable_and_lzero_now,
rocksdb_force_flush_memtable_and_lzero_now_stub, FALSE);
-static MYSQL_THDVAR_BOOL(
- flush_memtable_on_analyze, PLUGIN_VAR_RQCMDARG,
- "Forces memtable flush on ANALZYE table to get accurate cardinality",
- nullptr, nullptr, true);
-
static MYSQL_SYSVAR_UINT(
seconds_between_stat_computes, rocksdb_seconds_between_stat_computes,
PLUGIN_VAR_RQCMDARG,
@@ -1498,6 +1536,13 @@ static MYSQL_SYSVAR_BOOL(
"index prefix length is 767.",
nullptr, nullptr, FALSE);
+static MYSQL_SYSVAR_BOOL(
+ allow_to_start_after_corruption, rocksdb_allow_to_start_after_corruption,
+ PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
+ "Allow server still to start successfully even if RocksDB corruption is "
+ "detected.",
+ nullptr, nullptr, FALSE);
+
static const int ROCKSDB_ASSUMED_KEY_VALUE_DISK_SIZE = 100;
static struct st_mysql_sys_var *rocksdb_system_variables[] = {
@@ -1523,7 +1568,7 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = {
MYSQL_SYSVAR(skip_bloom_filter_on_read),
MYSQL_SYSVAR(create_if_missing),
- MYSQL_SYSVAR(concurrent_prepare),
+ MYSQL_SYSVAR(two_write_queues),
MYSQL_SYSVAR(manual_wal_flush),
MYSQL_SYSVAR(create_missing_column_families),
MYSQL_SYSVAR(error_if_exists),
@@ -1605,6 +1650,7 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = {
MYSQL_SYSVAR(signal_drop_index_thread),
MYSQL_SYSVAR(pause_background_work),
MYSQL_SYSVAR(enable_2pc),
+ MYSQL_SYSVAR(ignore_unknown_options),
MYSQL_SYSVAR(strict_collation_check),
MYSQL_SYSVAR(strict_collation_exceptions),
MYSQL_SYSVAR(collect_sst_properties),
@@ -1618,7 +1664,6 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = {
MYSQL_SYSVAR(debug_ttl_ignore_pk),
MYSQL_SYSVAR(reset_stats),
MYSQL_SYSVAR(io_write_timeout),
- MYSQL_SYSVAR(flush_memtable_on_analyze),
MYSQL_SYSVAR(seconds_between_stat_computes),
MYSQL_SYSVAR(compaction_sequential_deletes),
@@ -1640,6 +1685,7 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = {
MYSQL_SYSVAR(table_stats_sampling_pct),
MYSQL_SYSVAR(large_prefix),
+ MYSQL_SYSVAR(allow_to_start_after_corruption),
MYSQL_SYSVAR(git_hash),
nullptr};
@@ -1768,6 +1814,7 @@ protected:
ulonglong m_update_count = 0;
ulonglong m_delete_count = 0;
ulonglong m_lock_count = 0;
+ std::unordered_map<GL_INDEX_ID, ulonglong> m_auto_incr_map;
bool m_is_delayed_snapshot = false;
bool m_is_two_phase = false;
@@ -1819,7 +1866,30 @@ protected:
virtual void do_set_savepoint() = 0;
virtual void do_rollback_to_savepoint() = 0;
-public:
+ /*
+ @detail
+ This function takes in the WriteBatch of the transaction to add
+ all the AUTO_INCREMENT merges. It does so by iterating through
+ m_auto_incr_map and then constructing key/value pairs to call merge upon.
+
+ @param wb
+ */
+ rocksdb::Status merge_auto_incr_map(rocksdb::WriteBatchBase *const wb) {
+ DBUG_EXECUTE_IF("myrocks_autoinc_upgrade", return rocksdb::Status::OK(););
+
+ // Iterate through the merge map merging all keys into data dictionary.
+ rocksdb::Status s;
+ for (auto &it : m_auto_incr_map) {
+ s = dict_manager.put_auto_incr_val(wb, it.first, it.second);
+ if (!s.ok()) {
+ return s;
+ }
+ }
+ m_auto_incr_map.clear();
+ return s;
+ }
+
+ public:
const char *m_mysql_log_file_name;
my_off_t m_mysql_log_offset;
#ifdef MARIAROCKS_NOT_YET
@@ -1888,6 +1958,7 @@ public:
m_detailed_error.copy(timeout_message(
"index", tbl_def->full_tablename().c_str(), kd.get_name().c_str()));
table_handler->m_lock_wait_timeout_counter.inc();
+ rocksdb_row_lock_wait_timeouts++;
return HA_ERR_LOCK_WAIT_TIMEOUT;
}
@@ -1897,6 +1968,7 @@ public:
false /* just statement */);
m_detailed_error = String();
table_handler->m_deadlock_counter.inc();
+ rocksdb_row_lock_deadlocks++;
return HA_ERR_LOCK_DEADLOCK;
} else if (s.IsBusy()) {
rocksdb_snapshot_conflict_errors++;
@@ -2026,10 +2098,16 @@ public:
rollback();
return true;
} else {
+#ifdef MARIAROCKS_NOT_YET
+ /*
+ Storing binlog position inside MyRocks is needed only for restoring
+ MyRocks from backups. This feature is not supported yet.
+ */
mysql_bin_log_commit_pos(m_thd, &m_mysql_log_offset,
&m_mysql_log_file_name);
binlog_manager.update(m_mysql_log_file_name, m_mysql_log_offset,
get_write_batch());
+#endif
return commit_no_binlog();
}
}
@@ -2050,28 +2128,110 @@ public:
bool has_snapshot() const { return m_read_opts.snapshot != nullptr; }
private:
- // The tables we are currently loading. In a partitioned table this can
- // have more than one entry
- std::vector<ha_rocksdb *> m_curr_bulk_load;
+ // The Rdb_sst_info structures we are currently loading. In a partitioned
+ // table this can have more than one entry
+ std::vector<std::shared_ptr<Rdb_sst_info>> m_curr_bulk_load;
+ std::string m_curr_bulk_load_tablename;
+
+ /* External merge sorts for bulk load: key ID -> merge sort instance */
+ std::unordered_map<GL_INDEX_ID, Rdb_index_merge> m_key_merge;
public:
- int finish_bulk_load() {
- int rc = 0;
+ int get_key_merge(GL_INDEX_ID kd_gl_id, rocksdb::ColumnFamilyHandle *cf,
+ Rdb_index_merge **key_merge) {
+ int res;
+ auto it = m_key_merge.find(kd_gl_id);
+ if (it == m_key_merge.end()) {
+ m_key_merge.emplace(
+ std::piecewise_construct, std::make_tuple(kd_gl_id),
+ std::make_tuple(
+ get_rocksdb_tmpdir(), THDVAR(get_thd(), merge_buf_size),
+ THDVAR(get_thd(), merge_combine_read_size),
+ THDVAR(get_thd(), merge_tmp_file_removal_delay_ms), cf));
+ it = m_key_merge.find(kd_gl_id);
+ if ((res = it->second.init()) != 0) {
+ return res;
+ }
+ }
+ *key_merge = &it->second;
+ return HA_EXIT_SUCCESS;
+ }
- std::vector<ha_rocksdb *>::iterator it;
- while ((it = m_curr_bulk_load.begin()) != m_curr_bulk_load.end()) {
- int rc2 = (*it)->finalize_bulk_load();
+ int finish_bulk_load(int print_client_error = true) {
+ int rc = 0, rc2;
+
+ std::vector<std::shared_ptr<Rdb_sst_info>>::iterator it;
+ for (it = m_curr_bulk_load.begin(); it != m_curr_bulk_load.end(); it++) {
+ rc2 = (*it)->commit(print_client_error);
if (rc2 != 0 && rc == 0) {
rc = rc2;
}
}
-
+ m_curr_bulk_load.clear();
+ m_curr_bulk_load_tablename.clear();
DBUG_ASSERT(m_curr_bulk_load.size() == 0);
+ // Flush the index_merge sort buffers
+ if (!m_key_merge.empty()) {
+ rocksdb::Slice merge_key;
+ rocksdb::Slice merge_val;
+ for (auto it = m_key_merge.begin(); it != m_key_merge.end(); it++) {
+ GL_INDEX_ID index_id = it->first;
+ std::shared_ptr<const Rdb_key_def> keydef =
+ ddl_manager.safe_find(index_id);
+ std::string table_name = ddl_manager.safe_get_table_name(index_id);
+
+ // Unable to find key definition or table name since the
+ // table could have been dropped.
+ // TODO(herman): there is a race here between dropping the table
+ // and detecting a drop here. If the table is dropped while bulk
+ // loading is finishing, these keys being added here may
+ // be missed by the compaction filter and not be marked for
+ // removal. It is unclear how to lock the sql table from the storage
+ // engine to prevent modifications to it while bulk load is occurring.
+ if (keydef == nullptr || table_name.empty()) {
+ rc2 = HA_ERR_ROCKSDB_BULK_LOAD;
+ break;
+ }
+ const std::string &index_name = keydef->get_name();
+ Rdb_index_merge &rdb_merge = it->second;
+
+ // Rdb_sst_info expects a denormalized table name in the form of
+ // "./database/table"
+ std::replace(table_name.begin(), table_name.end(), '.', '/');
+ table_name = "./" + table_name;
+ Rdb_sst_info sst_info(rdb, table_name, index_name, rdb_merge.get_cf(),
+ *rocksdb_db_options,
+ THDVAR(get_thd(), trace_sst_api));
+
+ while ((rc2 = rdb_merge.next(&merge_key, &merge_val)) == 0) {
+ if ((rc2 = sst_info.put(merge_key, merge_val)) != 0) {
+ break;
+ }
+ }
+
+ // rc2 == -1 => finished ok; rc2 > 0 => error
+ if (rc2 > 0 || (rc2 = sst_info.commit(print_client_error)) != 0) {
+ if (rc == 0) {
+ rc = rc2;
+ }
+ break;
+ }
+ }
+ m_key_merge.clear();
+
+ /*
+ Explicitly tell jemalloc to clean up any unused dirty pages at this
+ point.
+ See https://reviews.facebook.net/D63723 for more details.
+ */
+ purge_all_jemalloc_arenas();
+ }
return rc;
}
- void start_bulk_load(ha_rocksdb *const bulk_load) {
+ int start_bulk_load(ha_rocksdb *const bulk_load,
+ std::shared_ptr<Rdb_sst_info> sst_info) {
/*
If we already have an open bulk load of a table and the name doesn't
match the current one, close out the currently running one. This allows
@@ -2081,29 +2241,46 @@ public:
DBUG_ASSERT(bulk_load != nullptr);
if (!m_curr_bulk_load.empty() &&
- !bulk_load->same_table(*m_curr_bulk_load[0])) {
+ bulk_load->get_table_basename() != m_curr_bulk_load_tablename) {
const auto res = finish_bulk_load();
- SHIP_ASSERT(res == 0);
- }
-
- m_curr_bulk_load.push_back(bulk_load);
- }
-
- void end_bulk_load(ha_rocksdb *const bulk_load) {
- for (auto it = m_curr_bulk_load.begin(); it != m_curr_bulk_load.end();
- it++) {
- if (*it == bulk_load) {
- m_curr_bulk_load.erase(it);
- return;
+ if (res != HA_EXIT_SUCCESS) {
+ m_curr_bulk_load.clear();
+ m_curr_bulk_load_tablename.clear();
+ return res;
}
}
- // Should not reach here
- SHIP_ASSERT(0);
+ /*
+ This used to track ha_rocksdb handler objects, but those can be
+ freed by the table cache while this was referencing them. Instead
+ of tracking ha_rocksdb handler objects, this now tracks the
+ Rdb_sst_info allocated, and both the ha_rocksdb handler and the
+ Rdb_transaction both have shared pointers to them.
+
+ On transaction complete, it will commit each Rdb_sst_info structure found.
+ If the ha_rocksdb object is freed, etc., it will also commit
+ the Rdb_sst_info. The Rdb_sst_info commit path needs to be idempotent.
+ */
+ m_curr_bulk_load.push_back(sst_info);
+ m_curr_bulk_load_tablename = bulk_load->get_table_basename();
+ return HA_EXIT_SUCCESS;
}
int num_ongoing_bulk_load() const { return m_curr_bulk_load.size(); }
+ const char *get_rocksdb_tmpdir() const {
+ const char *tmp_dir = THDVAR(get_thd(), tmpdir);
+
+ /*
+ We want to treat an empty string as nullptr, in these cases DDL operations
+ will use the default --tmpdir passed to mysql instead.
+ */
+ if (tmp_dir != nullptr && *tmp_dir == '\0') {
+ tmp_dir = nullptr;
+ }
+ return (tmp_dir);
+ }
+
/*
Flush the data accumulated so far. This assumes we're doing a bulk insert.
@@ -2130,6 +2307,20 @@ public:
return false;
}
+ void set_auto_incr(const GL_INDEX_ID &gl_index_id, ulonglong curr_id) {
+ m_auto_incr_map[gl_index_id] =
+ std::max(m_auto_incr_map[gl_index_id], curr_id);
+ }
+
+#ifndef NDEBUG
+ ulonglong get_auto_incr(const GL_INDEX_ID &gl_index_id) {
+ if (m_auto_incr_map.count(gl_index_id) > 0) {
+ return m_auto_incr_map[gl_index_id];
+ }
+ return 0;
+ }
+#endif
+
virtual rocksdb::Status put(rocksdb::ColumnFamilyHandle *const column_family,
const rocksdb::Slice &key,
const rocksdb::Slice &value) = 0;
@@ -2153,15 +2344,17 @@ public:
virtual rocksdb::Status get(rocksdb::ColumnFamilyHandle *const column_family,
const rocksdb::Slice &key,
- std::string *value) const = 0;
+ rocksdb::PinnableSlice *const value) const = 0;
virtual rocksdb::Status
get_for_update(rocksdb::ColumnFamilyHandle *const column_family,
- const rocksdb::Slice &key, std::string *const value,
+ const rocksdb::Slice &key, rocksdb::PinnableSlice *const value,
bool exclusive) = 0;
rocksdb::Iterator *
get_iterator(rocksdb::ColumnFamilyHandle *const column_family,
bool skip_bloom_filter, bool fill_cache,
+ const rocksdb::Slice &eq_cond_lower_bound,
+ const rocksdb::Slice &eq_cond_upper_bound,
bool read_current = false, bool create_snapshot = true) {
// Make sure we are not doing both read_current (which implies we don't
// want a snapshot) and create_snapshot which makes sure we create
@@ -2176,6 +2369,8 @@ public:
if (skip_bloom_filter) {
options.total_order_seek = true;
+ options.iterate_lower_bound = &eq_cond_lower_bound;
+ options.iterate_upper_bound = &eq_cond_upper_bound;
} else {
// With this option, Iterator::Valid() returns false if key
// is outside of the prefix bloom filter range set at Seek().
@@ -2333,6 +2528,12 @@ private:
return false;
}
+ s = merge_auto_incr_map(m_rocksdb_tx->GetWriteBatch()->GetWriteBatch());
+ if (!s.ok()) {
+ rdb_handle_io_error(s, RDB_IO_ERROR_TX_COMMIT);
+ return false;
+ }
+
s = m_rocksdb_tx->Prepare();
if (!s.ok()) {
rdb_handle_io_error(s, RDB_IO_ERROR_TX_COMMIT);
@@ -2343,13 +2544,24 @@ private:
bool commit_no_binlog() override {
bool res = false;
+ rocksdb::Status s;
+
+ s = merge_auto_incr_map(m_rocksdb_tx->GetWriteBatch()->GetWriteBatch());
+ if (!s.ok()) {
+ rdb_handle_io_error(s, RDB_IO_ERROR_TX_COMMIT);
+ res = true;
+ goto error;
+ }
+
release_snapshot();
- const rocksdb::Status s = m_rocksdb_tx->Commit();
+ s = m_rocksdb_tx->Commit();
if (!s.ok()) {
rdb_handle_io_error(s, RDB_IO_ERROR_TX_COMMIT);
res = true;
+ goto error;
}
+error:
/* Save the transaction object to be reused */
release_tx();
@@ -2370,6 +2582,7 @@ public:
m_update_count = 0;
m_delete_count = 0;
m_lock_count = 0;
+ m_auto_incr_map.clear();
m_ddl_transaction = false;
if (m_rocksdb_tx) {
release_snapshot();
@@ -2471,18 +2684,25 @@ public:
rocksdb::Status get(rocksdb::ColumnFamilyHandle *const column_family,
const rocksdb::Slice &key,
- std::string *value) const override {
+ rocksdb::PinnableSlice *const value) const override {
+ // clean PinnableSlice right begfore Get() for multiple gets per statement
+ // the resources after the last Get in a statement are cleared in
+ // handler::reset call
+ value->Reset();
global_stats.queries[QUERIES_POINT].inc();
return m_rocksdb_tx->Get(m_read_opts, column_family, key, value);
}
rocksdb::Status
get_for_update(rocksdb::ColumnFamilyHandle *const column_family,
- const rocksdb::Slice &key, std::string *const value,
+ const rocksdb::Slice &key, rocksdb::PinnableSlice *const value,
bool exclusive) override {
if (++m_lock_count > m_max_row_locks)
return rocksdb::Status::Aborted(rocksdb::Status::kLockLimit);
+ if (value != nullptr) {
+ value->Reset();
+ }
return m_rocksdb_tx->GetForUpdate(m_read_opts, column_family, key, value,
exclusive);
}
@@ -2620,13 +2840,24 @@ private:
bool commit_no_binlog() override {
bool res = false;
+ rocksdb::Status s;
+
+ s = merge_auto_incr_map(m_batch->GetWriteBatch());
+ if (!s.ok()) {
+ rdb_handle_io_error(s, RDB_IO_ERROR_TX_COMMIT);
+ res = true;
+ goto error;
+ }
+
release_snapshot();
- const rocksdb::Status s =
- rdb->GetBaseDB()->Write(write_opts, m_batch->GetWriteBatch());
+
+ s = rdb->GetBaseDB()->Write(write_opts, m_batch->GetWriteBatch());
if (!s.ok()) {
rdb_handle_io_error(s, RDB_IO_ERROR_TX_COMMIT);
res = true;
+ goto error;
}
+error:
reset();
m_write_count = 0;
@@ -2725,14 +2956,15 @@ public:
rocksdb::Status get(rocksdb::ColumnFamilyHandle *const column_family,
const rocksdb::Slice &key,
- std::string *const value) const override {
+ rocksdb::PinnableSlice *const value) const override {
+ value->Reset();
return m_batch->GetFromBatchAndDB(rdb, m_read_opts, column_family, key,
value);
}
rocksdb::Status
get_for_update(rocksdb::ColumnFamilyHandle *const column_family,
- const rocksdb::Slice &key, std::string *const value,
+ const rocksdb::Slice &key, rocksdb::PinnableSlice *const value,
bool exclusive) override {
return get(column_family, key, value);
}
@@ -2863,13 +3095,12 @@ static Rdb_transaction *get_or_create_tx(THD *const thd) {
static int rocksdb_close_connection(handlerton *const hton, THD *const thd) {
Rdb_transaction *&tx = get_tx_from_thd(thd);
if (tx != nullptr) {
- int rc = tx->finish_bulk_load();
+ int rc = tx->finish_bulk_load(false);
if (rc != 0) {
// NO_LINT_DEBUG
sql_print_error("RocksDB: Error %d finalizing last SST file while "
"disconnecting",
rc);
- abort_with_stack_traces();
}
delete tx;
@@ -2918,7 +3149,8 @@ static bool rocksdb_flush_wal(handlerton* hton __attribute__((__unused__)))
/*
target_lsn is set to 0 when MySQL wants to sync the wal files
*/
- if (target_lsn == 0 || rocksdb_flush_log_at_trx_commit != FLUSH_LOG_NEVER) {
+ if ((target_lsn == 0 && !rocksdb_db_options->allow_mmap_writes) ||
+ rocksdb_flush_log_at_trx_commit != FLUSH_LOG_NEVER) {
rocksdb_wal_group_syncs++;
s = rdb->FlushWAL(target_lsn == 0 ||
rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC);
@@ -2949,7 +3181,11 @@ static int rocksdb_prepare(handlerton* hton, THD* thd, bool prepare_tx)
/* We were instructed to prepare the whole transaction, or
this is an SQL statement end and autocommit is on */
-#ifdef MARIAROCKS_NOT_YET // Crash-safe slave does not work yet
+#ifdef MARIAROCKS_NOT_YET
+ /*
+ Storing binlog position inside MyRocks is needed only for restoring
+ MyRocks from backups. This feature is not supported yet.
+ */
std::vector<st_slave_gtid_info> slave_gtid_info;
my_core::thd_slave_gtid_info(thd, &slave_gtid_info);
for (const auto &it : slave_gtid_info) {
@@ -3403,79 +3639,54 @@ private:
"=========================================\n";
}
- static std::string get_dlock_txn_info(const rocksdb::DeadlockInfo &txn,
- const GL_INDEX_ID &gl_index_id,
- bool is_last_path = false) {
- std::string txn_data;
+ static Rdb_deadlock_info::Rdb_dl_trx_info
+ get_dl_txn_info(const rocksdb::DeadlockInfo &txn,
+ const GL_INDEX_ID &gl_index_id) {
+ Rdb_deadlock_info::Rdb_dl_trx_info txn_data;
- /* extract table name and index names using the index id */
- std::string table_name = ddl_manager.safe_get_table_name(gl_index_id);
- if (table_name.empty()) {
- table_name =
+ txn_data.trx_id = txn.m_txn_id;
+
+ txn_data.table_name = ddl_manager.safe_get_table_name(gl_index_id);
+ if (txn_data.table_name.empty()) {
+ txn_data.table_name =
"NOT FOUND; INDEX_ID: " + std::to_string(gl_index_id.index_id);
}
+
auto kd = ddl_manager.safe_find(gl_index_id);
- std::string idx_name =
+ txn_data.index_name =
(kd) ? kd->get_name()
: "NOT FOUND; INDEX_ID: " + std::to_string(gl_index_id.index_id);
- /* get the name of the column family */
rocksdb::ColumnFamilyHandle *cfh = cf_manager.get_cf(txn.m_cf_id);
- std::string cf_name = cfh->GetName();
-
- txn_data += format_string(
- "TRANSACTIONID: %u\n"
- "COLUMN FAMILY NAME: %s\n"
- "WAITING KEY: %s\n"
- "LOCK TYPE: %s\n"
- "INDEX NAME: %s\n"
- "TABLE NAME: %s\n",
- txn.m_txn_id, cf_name.c_str(),
- rdb_hexdump(txn.m_waiting_key.c_str(), txn.m_waiting_key.length())
- .c_str(),
- txn.m_exclusive ? "EXCLUSIVE" : "SHARED", idx_name.c_str(),
- table_name.c_str());
- if (!is_last_path) {
- txn_data += "---------------WAITING FOR---------------\n";
- }
+ txn_data.cf_name = cfh->GetName();
+
+ txn_data.waiting_key =
+ rdb_hexdump(txn.m_waiting_key.c_str(), txn.m_waiting_key.length());
+
+ txn_data.exclusive_lock = txn.m_exclusive;
+
return txn_data;
}
- static std::string
- get_dlock_path_info(const rocksdb::DeadlockPath &path_entry) {
- std::string path_data;
- if (path_entry.limit_exceeded) {
- path_data += "\n-------DEADLOCK EXCEEDED MAX DEPTH-------\n";
- } else {
- path_data += "\n*** DEADLOCK PATH\n"
- "=========================================\n";
- for (auto it = path_entry.path.begin(); it != path_entry.path.end();
- it++) {
- auto txn = *it;
- const GL_INDEX_ID gl_index_id = {
- txn.m_cf_id, rdb_netbuf_to_uint32(reinterpret_cast<const uchar *>(
- txn.m_waiting_key.c_str()))};
- path_data += get_dlock_txn_info(txn, gl_index_id);
- }
+ static Rdb_deadlock_info
+ get_dl_path_trx_info(const rocksdb::DeadlockPath &path_entry) {
+ Rdb_deadlock_info deadlock_info;
- DBUG_ASSERT_IFF(path_entry.limit_exceeded, path_entry.path.empty());
- /* print the first txn in the path to display the full deadlock cycle */
- if (!path_entry.path.empty() && !path_entry.limit_exceeded) {
- auto txn = path_entry.path[0];
- const GL_INDEX_ID gl_index_id = {
- txn.m_cf_id, rdb_netbuf_to_uint32(reinterpret_cast<const uchar *>(
- txn.m_waiting_key.c_str()))};
- path_data += get_dlock_txn_info(txn, gl_index_id, true);
-
- /* prints the txn id of the transaction that caused the deadlock */
- auto deadlocking_txn = *(path_entry.path.end() - 1);
- path_data +=
- format_string("\n--------TRANSACTIONID: %u GOT DEADLOCK---------\n",
- deadlocking_txn.m_txn_id);
- }
+ for (auto it = path_entry.path.begin(); it != path_entry.path.end();
+ it++) {
+ auto txn = *it;
+ const GL_INDEX_ID gl_index_id = {
+ txn.m_cf_id, rdb_netbuf_to_uint32(reinterpret_cast<const uchar *>(
+ txn.m_waiting_key.c_str()))};
+ deadlock_info.path.push_back(get_dl_txn_info(txn, gl_index_id));
}
-
- return path_data;
+ DBUG_ASSERT_IFF(path_entry.limit_exceeded, path_entry.path.empty());
+ /* print the first txn in the path to display the full deadlock cycle */
+ if (!path_entry.path.empty() && !path_entry.limit_exceeded) {
+ auto deadlocking_txn = *(path_entry.path.end() - 1);
+ deadlock_info.victim_trx_id = deadlocking_txn.m_txn_id;
+ }
+ return deadlock_info;
}
public:
@@ -3514,9 +3725,48 @@ private:
m_data += "----------LATEST DETECTED DEADLOCKS----------\n";
for (auto path_entry : dlock_buffer) {
- m_data += get_dlock_path_info(path_entry);
+ std::string path_data;
+ if (path_entry.limit_exceeded) {
+ path_data += "\n-------DEADLOCK EXCEEDED MAX DEPTH-------\n";
+ } else {
+ path_data += "\n*** DEADLOCK PATH\n"
+ "=========================================\n";
+ const auto dl_info = get_dl_path_trx_info(path_entry);
+ for (auto it = dl_info.path.begin(); it != dl_info.path.end(); it++) {
+ const auto trx_info = *it;
+ path_data += format_string(
+ "TRANSACTION ID: %u\n"
+ "COLUMN FAMILY NAME: %s\n"
+ "WAITING KEY: %s\n"
+ "LOCK TYPE: %s\n"
+ "INDEX NAME: %s\n"
+ "TABLE NAME: %s\n",
+ trx_info.trx_id, trx_info.cf_name.c_str(),
+ trx_info.waiting_key.c_str(),
+ trx_info.exclusive_lock ? "EXCLUSIVE" : "SHARED",
+ trx_info.index_name.c_str(), trx_info.table_name.c_str());
+ if (it != dl_info.path.end() - 1) {
+ path_data += "---------------WAITING FOR---------------\n";
+ }
+ }
+ path_data +=
+ format_string("\n--------TRANSACTION ID: %u GOT DEADLOCK---------\n",
+ dl_info.victim_trx_id);
+ }
+ m_data += path_data;
}
}
+
+ std::vector<Rdb_deadlock_info> get_deadlock_info() {
+ std::vector<Rdb_deadlock_info> deadlock_info;
+ auto dlock_buffer = rdb->GetDeadlockInfoBuffer();
+ for (auto path_entry : dlock_buffer) {
+ if (!path_entry.limit_exceeded) {
+ deadlock_info.push_back(get_dl_path_trx_info(path_entry));
+ }
+ }
+ return deadlock_info;
+ }
};
/**
@@ -3605,6 +3855,17 @@ std::vector<Rdb_trx_info> rdb_get_all_trx_info() {
return trx_info;
}
+
+/*
+ returns a vector of info of recent deadlocks
+ for use by information_schema.rocksdb_deadlock
+*/
+std::vector<Rdb_deadlock_info> rdb_get_deadlock_info() {
+ Rdb_snapshot_status showStatus;
+ Rdb_transaction::walk_tx_list(&showStatus);
+ return showStatus.get_deadlock_info();
+}
+
#ifdef MARIAROCKS_NOT_YET
/* Generate the snapshot status table */
static bool rocksdb_show_snapshot_status(handlerton *const hton, THD *const thd,
@@ -3911,6 +4172,7 @@ static void rocksdb_update_table_stats(
comp_stats_t comp_stats;
uint lock_wait_timeout_stats;
uint deadlock_stats;
+ uint lock_wait_stats;
std::vector<std::string> tablenames;
/*
@@ -3957,6 +4219,9 @@ static void rocksdb_update_table_stats(
io_perf_write.requests = table_handler->m_io_perf_write.requests.load();
lock_wait_timeout_stats = table_handler->m_lock_wait_timeout_counter.load();
deadlock_stats = table_handler->m_deadlock_counter.load();
+ lock_wait_stats =
+ table_handler->m_table_perf_context.m_value[PC_KEY_LOCK_WAIT_COUNT]
+ .load();
/*
Convert from rocksdb timer to mysql timer. RocksDB values are
@@ -3984,7 +4249,7 @@ static void rocksdb_update_table_stats(
sizeof(tablename_sys));
(*cb)(dbname_sys, tablename_sys, is_partition, &io_perf_read,
&io_perf_write, &io_perf, &io_perf, &io_perf, &page_stats,
- &comp_stats, 0, lock_wait_timeout_stats, deadlock_stats,
+ &comp_stats, lock_wait_stats, lock_wait_timeout_stats, deadlock_stats,
rocksdb_hton_name);
}
}
@@ -3996,8 +4261,9 @@ static rocksdb::Status check_rocksdb_options_compatibility(
rocksdb::DBOptions loaded_db_opt;
std::vector<rocksdb::ColumnFamilyDescriptor> loaded_cf_descs;
- rocksdb::Status status = LoadLatestOptions(dbpath, rocksdb::Env::Default(),
- &loaded_db_opt, &loaded_cf_descs);
+ rocksdb::Status status =
+ LoadLatestOptions(dbpath, rocksdb::Env::Default(), &loaded_db_opt,
+ &loaded_cf_descs, rocksdb_ignore_unknown_options);
// If we're starting from scratch and there are no options saved yet then this
// is a valid case. Therefore we can't compare the current set of options to
@@ -4036,7 +4302,8 @@ static rocksdb::Status check_rocksdb_options_compatibility(
// This is the essence of the function - determine if it's safe to open the
// database or not.
status = CheckOptionsCompatibility(dbpath, rocksdb::Env::Default(), main_opts,
- loaded_cf_descs);
+ loaded_cf_descs,
+ rocksdb_ignore_unknown_options);
return status;
}
@@ -4060,6 +4327,22 @@ static int rocksdb_init_func(void *const p) {
DBUG_RETURN(1);
}
+ if (rdb_check_rocksdb_corruption()) {
+ sql_print_error("RocksDB: There was a corruption detected in RockDB files. "
+ "Check error log emitted earlier for more details.");
+ if (rocksdb_allow_to_start_after_corruption) {
+ sql_print_information(
+ "RocksDB: Remove rocksdb_allow_to_start_after_corruption to prevent "
+ "server operating if RocksDB corruption is detected.");
+ } else {
+ sql_print_error("RocksDB: The server will exit normally and stop restart "
+ "attempts. Remove %s file from data directory and "
+ "start mysqld manually.",
+ rdb_corruption_marker_file_name().c_str());
+ exit(0);
+ }
+ }
+
// Validate the assumption about the size of ROCKSDB_SIZEOF_HIDDEN_PK_COLUMN.
static_assert(sizeof(longlong) == 8, "Assuming that longlong is 8 bytes.");
@@ -4136,6 +4419,16 @@ static int rocksdb_init_func(void *const p) {
rocksdb_hton->tablefile_extensions= ha_rocksdb_exts;
DBUG_ASSERT(!mysqld_embedded);
+ if (rocksdb_db_options->max_open_files > (long)open_files_limit) {
+ sql_print_information("RocksDB: rocksdb_max_open_files should not be "
+ "greater than the open_files_limit, effective value "
+ "of rocksdb_max_open_files is being set to "
+ "open_files_limit / 2.");
+ rocksdb_db_options->max_open_files = open_files_limit / 2;
+ } else if (rocksdb_db_options->max_open_files == -2) {
+ rocksdb_db_options->max_open_files = open_files_limit / 2;
+ }
+
rocksdb_stats = rocksdb::CreateDBStatistics();
rocksdb_db_options->statistics = rocksdb_stats;
@@ -4184,14 +4477,20 @@ static int rocksdb_init_func(void *const p) {
DBUG_RETURN(HA_EXIT_FAILURE);
}
+ if (rocksdb_db_options->allow_mmap_writes &&
+ rocksdb_flush_log_at_trx_commit != FLUSH_LOG_NEVER) {
+ // NO_LINT_DEBUG
+ sql_print_error("RocksDB: rocksdb_flush_log_at_trx_commit needs to be 0 "
+ "to use allow_mmap_writes");
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
// sst_file_manager will move deleted rocksdb sst files to trash_dir
// to be deleted in a background thread.
std::string trash_dir = std::string(rocksdb_datadir) + "/trash";
- rocksdb_db_options->sst_file_manager.reset(
- NewSstFileManager(rocksdb_db_options->env, myrocks_logger, trash_dir));
-
- rocksdb_db_options->sst_file_manager->SetDeleteRateBytesPerSecond(
- rocksdb_sst_mgr_rate_bytes_per_sec);
+ rocksdb_db_options->sst_file_manager.reset(NewSstFileManager(
+ rocksdb_db_options->env, myrocks_logger, trash_dir,
+ rocksdb_sst_mgr_rate_bytes_per_sec, true /* delete_existing_trash */));
std::vector<std::string> cf_names;
rocksdb::Status status;
@@ -4264,9 +4563,15 @@ static int rocksdb_init_func(void *const p) {
if (rocksdb_persistent_cache_size_mb > 0) {
std::shared_ptr<rocksdb::PersistentCache> pcache;
uint64_t cache_size_bytes= rocksdb_persistent_cache_size_mb * 1024 * 1024;
- rocksdb::NewPersistentCache(
+ status = rocksdb::NewPersistentCache(
rocksdb::Env::Default(), std::string(rocksdb_persistent_cache_path),
cache_size_bytes, myrocks_logger, true, &pcache);
+ if (!status.ok()) {
+ // NO_LINT_DEBUG
+ sql_print_error("RocksDB: Persistent cache returned error: (%s)",
+ status.getState());
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
rocksdb_tbl_options->persistent_cache = pcache;
} else if (strlen(rocksdb_persistent_cache_path)) {
sql_print_error("RocksDB: Must specify rocksdb_persistent_cache_size_mb");
@@ -4643,6 +4948,33 @@ static inline void rocksdb_smart_next(bool seek_backward,
}
}
+#ifndef NDEBUG
+// simulate that RocksDB has reported corrupted data
+static void dbug_change_status_to_corrupted(rocksdb::Status *status) {
+ *status = rocksdb::Status::Corruption();
+}
+#endif
+
+// If the iterator is not valid it might be because of EOF but might be due
+// to IOError or corruption. The good practice is always check it.
+// https://github.com/facebook/rocksdb/wiki/Iterator#error-handling
+static inline bool is_valid(rocksdb::Iterator *scan_it) {
+ if (scan_it->Valid()) {
+ return true;
+ } else {
+ rocksdb::Status s = scan_it->status();
+ DBUG_EXECUTE_IF("rocksdb_return_status_corrupted",
+ dbug_change_status_to_corrupted(&s););
+ if (s.IsIOError() || s.IsCorruption()) {
+ if (s.IsCorruption()) {
+ rdb_persist_corruption_marker();
+ }
+ rdb_handle_io_error(s, RDB_IO_ERROR_GENERAL);
+ }
+ return false;
+ }
+}
+
/**
@brief
Example of simple lock controls. The "table_handler" it creates is a
@@ -4719,28 +5051,124 @@ std::vector<std::string> Rdb_open_tables_map::get_table_names(void) const {
return names;
}
+/*
+ Inspired by innobase_get_int_col_max_value from InnoDB. This returns the
+ maximum value a type can take on.
+*/
+static ulonglong rdb_get_int_col_max_value(const Field *field) {
+ ulonglong max_value = 0;
+ switch (field->key_type()) {
+ case HA_KEYTYPE_BINARY:
+ max_value = 0xFFULL;
+ break;
+ case HA_KEYTYPE_INT8:
+ max_value = 0x7FULL;
+ break;
+ case HA_KEYTYPE_USHORT_INT:
+ max_value = 0xFFFFULL;
+ break;
+ case HA_KEYTYPE_SHORT_INT:
+ max_value = 0x7FFFULL;
+ break;
+ case HA_KEYTYPE_UINT24:
+ max_value = 0xFFFFFFULL;
+ break;
+ case HA_KEYTYPE_INT24:
+ max_value = 0x7FFFFFULL;
+ break;
+ case HA_KEYTYPE_ULONG_INT:
+ max_value = 0xFFFFFFFFULL;
+ break;
+ case HA_KEYTYPE_LONG_INT:
+ max_value = 0x7FFFFFFFULL;
+ break;
+ case HA_KEYTYPE_ULONGLONG:
+ max_value = 0xFFFFFFFFFFFFFFFFULL;
+ break;
+ case HA_KEYTYPE_LONGLONG:
+ max_value = 0x7FFFFFFFFFFFFFFFULL;
+ break;
+ default:
+ abort();
+ }
+
+ return max_value;
+}
+
void ha_rocksdb::load_auto_incr_value() {
+ ulonglong auto_incr = 0;
+ bool validate_last = false, use_datadic = true;
+#ifndef NDEBUG
+ DBUG_EXECUTE_IF("myrocks_autoinc_upgrade", use_datadic = false;);
+ validate_last = true;
+#endif
+
+ if (use_datadic && dict_manager.get_auto_incr_val(
+ m_tbl_def->get_autoincr_gl_index_id(), &auto_incr)) {
+ update_auto_incr_val(auto_incr);
+ }
+
+ // If we find nothing in the data dictionary, or if we are in debug mode,
+ // then call index_last to get the last value.
+ //
+ // This is needed when upgrading from a server that did not support
+ // persistent auto_increment, of if the table is empty.
+ //
+ // For debug mode, we are just verifying that the data dictionary value is
+ // greater than or equal to the maximum value in the table.
+ if (auto_incr == 0 || validate_last) {
+ auto_incr = load_auto_incr_value_from_index();
+ update_auto_incr_val(auto_incr);
+ }
+
+ // If we failed to find anything from the data dictionary and index, then
+ // initialize auto_increment to 1.
+ if (m_tbl_def->m_auto_incr_val == 0) {
+ update_auto_incr_val(1);
+ }
+}
+
+ulonglong ha_rocksdb::load_auto_incr_value_from_index() {
const int save_active_index = active_index;
active_index = table->s->next_number_index;
const uint8 save_table_status = table->status;
+ ulonglong last_val = 0;
- /*
- load_auto_incr_value() may be called by statements that
- do not execute implicit commits (i.e. SHOW CREATE TABLE).
- index_last() creates a snapshot. When a snapshot is created
- here, it has to be released as well. (GitHub issue#189)
- */
Rdb_transaction *const tx = get_or_create_tx(table->in_use);
const bool is_new_snapshot = !tx->has_snapshot();
+ if (is_new_snapshot) {
+ tx->acquire_snapshot(true);
+ }
// Do a lookup. We only need index column, so it should be index-only.
- // (another reason to make it index-only is that table->read_set is
- // not set appropriately and non-index-only lookup will not read the value)
+ // (another reason to make it index-only is that table->read_set is not set
+ // appropriately and non-index-only lookup will not read the value)
const bool save_keyread_only = m_keyread_only;
m_keyread_only = true;
+ m_key_requested = true;
- if (!index_last(table->record[0]))
- update_auto_incr_val();
+ if (!index_last(table->record[0])) {
+ Field *field =
+ table->key_info[table->s->next_number_index].key_part[0].field;
+ ulonglong max_val = rdb_get_int_col_max_value(field);
+ my_bitmap_map *const old_map =
+ dbug_tmp_use_all_columns(table, table->read_set);
+ last_val = field->val_int();
+ if (last_val != max_val) {
+ last_val++;
+ }
+#ifndef NDEBUG
+ ulonglong dd_val;
+ if (last_val <= max_val) {
+ const auto &gl_index_id = m_tbl_def->get_autoincr_gl_index_id();
+ if (dict_manager.get_auto_incr_val(gl_index_id, &dd_val) &&
+ tx->get_auto_incr(gl_index_id) == 0) {
+ DBUG_ASSERT(dd_val >= last_val);
+ }
+ }
+#endif
+ dbug_tmp_restore_column_map(table->read_set, old_map);
+ }
m_keyread_only = save_keyread_only;
if (is_new_snapshot) {
@@ -4753,38 +5181,46 @@ void ha_rocksdb::load_auto_incr_value() {
/*
Do what ha_rocksdb::index_end() does.
(Why don't we use index_init/index_end? class handler defines index_init
- as private, for some reason).
- */
+ as private, for some reason).
+ */
release_scan_iterator();
+
+ return last_val;
}
-/* Get PK value from table->record[0]. */
-/*
- TODO(alexyang): No existing support for auto_increment on non-pk columns, see
- end of ha_rocksdb::create. Also see opened issue here:
- https://github.com/facebook/mysql-5.6/issues/153
-*/
-void ha_rocksdb::update_auto_incr_val() {
+void ha_rocksdb::update_auto_incr_val(ulonglong val) {
+ ulonglong auto_incr_val = m_tbl_def->m_auto_incr_val;
+ while (
+ auto_incr_val < val &&
+ !m_tbl_def->m_auto_incr_val.compare_exchange_weak(auto_incr_val, val)) {
+ // Do nothing - just loop until auto_incr_val is >= val or we successfully
+ // set it
+ }
+}
+
+void ha_rocksdb::update_auto_incr_val_from_field() {
Field *field;
- longlong new_val;
+ ulonglong new_val, max_val;
field = table->key_info[table->s->next_number_index].key_part[0].field;
+ max_val = rdb_get_int_col_max_value(field);
my_bitmap_map *const old_map =
dbug_tmp_use_all_columns(table, table->read_set);
new_val = field->val_int();
// don't increment if we would wrap around
- if (new_val != std::numeric_limits<longlong>::max()) {
+ if (new_val != max_val) {
new_val++;
}
dbug_tmp_restore_column_map(table->read_set, old_map);
- longlong auto_incr_val = m_tbl_def->m_auto_incr_val;
- while (auto_incr_val < new_val &&
- !m_tbl_def->m_auto_incr_val.compare_exchange_weak(auto_incr_val,
- new_val)) {
- // Do nothing - just loop until auto_incr_val is >= new_val or
- // we successfully set it
+ // Only update if positive value was set for auto_incr column.
+ if (new_val <= max_val) {
+ Rdb_transaction *const tx = get_or_create_tx(table->in_use);
+ tx->set_auto_incr(m_tbl_def->get_autoincr_gl_index_id(), new_val);
+
+ // Update the in memory auto_incr value in m_tbl_def.
+ update_auto_incr_val(new_val);
}
}
@@ -4796,12 +5232,12 @@ int ha_rocksdb::load_hidden_pk_value() {
Rdb_transaction *const tx = get_or_create_tx(table->in_use);
const bool is_new_snapshot = !tx->has_snapshot();
+ longlong hidden_pk_id = 1;
// Do a lookup.
if (!index_last(table->record[0])) {
/*
Decode PK field from the key
*/
- longlong hidden_pk_id = 0;
auto err = read_hidden_pk_id_from_rowkey(&hidden_pk_id);
if (err) {
if (is_new_snapshot) {
@@ -4811,11 +5247,11 @@ int ha_rocksdb::load_hidden_pk_value() {
}
hidden_pk_id++;
- longlong old = m_tbl_def->m_hidden_pk_val;
- while (
- old < hidden_pk_id &&
- !m_tbl_def->m_hidden_pk_val.compare_exchange_weak(old, hidden_pk_id)) {
- }
+ }
+
+ longlong old = m_tbl_def->m_hidden_pk_val;
+ while (old < hidden_pk_id &&
+ !m_tbl_def->m_hidden_pk_val.compare_exchange_weak(old, hidden_pk_id)) {
}
if (is_new_snapshot) {
@@ -4901,18 +5337,15 @@ ha_rocksdb::ha_rocksdb(my_core::handlerton *const hton,
m_sk_packed_tuple(nullptr), m_end_key_packed_tuple(nullptr),
m_sk_match_prefix(nullptr), m_sk_match_prefix_buf(nullptr),
m_sk_packed_tuple_old(nullptr), m_dup_sk_packed_tuple(nullptr),
- m_dup_sk_packed_tuple_old(nullptr), m_pack_buffer(nullptr),
- m_lock_rows(RDB_LOCK_NONE), m_keyread_only(FALSE),
- m_bulk_load_tx(nullptr), m_encoder_arr(nullptr),
+ m_dup_sk_packed_tuple_old(nullptr), m_eq_cond_lower_bound(nullptr),
+ m_eq_cond_upper_bound(nullptr), m_pack_buffer(nullptr),
+ m_lock_rows(RDB_LOCK_NONE), m_keyread_only(FALSE), m_encoder_arr(nullptr),
m_row_checksums_checked(0), m_in_rpl_delete_rows(false),
- m_in_rpl_update_rows(false), m_force_skip_unique_check(false) {
- // TODO(alexyang): create a valid PSI_mutex_key for this mutex
- mysql_mutex_init(0, &m_bulk_load_mutex, MY_MUTEX_INIT_FAST);
-}
+ m_in_rpl_update_rows(false), m_force_skip_unique_check(false) {}
-bool ha_rocksdb::same_table(const ha_rocksdb &other) const {
- return m_tbl_def->base_tablename() == other.m_tbl_def->base_tablename();
+const std::string &ha_rocksdb::get_table_basename() const {
+ return m_tbl_def->base_tablename();
}
/**
@@ -4998,8 +5431,12 @@ bool ha_rocksdb::should_hide_ttl_rec(const Rdb_key_def &kd,
#ifndef NDEBUG
read_filter_ts += rdb_dbug_set_ttl_read_filter_ts();
#endif
- return ts + kd.m_ttl_duration + read_filter_ts <=
- static_cast<uint64>(curr_ts);
+ bool is_hide_ttl =
+ ts + kd.m_ttl_duration + read_filter_ts <= static_cast<uint64>(curr_ts);
+ if (is_hide_ttl) {
+ update_row_stats(ROWS_FILTERED);
+ }
+ return is_hide_ttl;
}
void ha_rocksdb::rocksdb_skip_expired_records(const Rdb_key_def &kd,
@@ -5213,12 +5650,12 @@ int ha_rocksdb::convert_record_to_storage_format(
Setup which fields will be unpacked when reading rows
@detail
- Two special cases when we still unpack all fields:
+ Three special cases when we still unpack all fields:
- When this table is being updated (m_lock_rows==RDB_LOCK_WRITE).
- When @@rocksdb_verify_row_debug_checksums is ON (In this mode, we need to
- read all
- fields to find whether there is a row checksum at the end. We could skip
- the fields instead of decoding them, but currently we do decoding.)
+ read all fields to find whether there is a row checksum at the end. We could
+ skip the fields instead of decoding them, but currently we do decoding.)
+ - On index merge as bitmap is cleared during that operation
@seealso
ha_rocksdb::setup_field_converters()
@@ -5226,20 +5663,29 @@ int ha_rocksdb::convert_record_to_storage_format(
*/
void ha_rocksdb::setup_read_decoders() {
m_decoders_vect.clear();
+ m_key_requested = false;
int last_useful = 0;
int skip_size = 0;
for (uint i = 0; i < table->s->fields; i++) {
+ // bitmap is cleared on index merge, but it still needs to decode columns
+ const bool field_requested =
+ m_lock_rows == RDB_LOCK_WRITE || m_verify_row_debug_checksums ||
+ bitmap_is_clear_all(table->read_set) ||
+ bitmap_is_set(table->read_set, table->field[i]->field_index);
+
// We only need the decoder if the whole record is stored.
if (m_encoder_arr[i].m_storage_type != Rdb_field_encoder::STORE_ALL) {
+ // the field potentially needs unpacking
+ if (field_requested) {
+ // the field is in the read set
+ m_key_requested = true;
+ }
continue;
}
- // bitmap is cleared on index merge, but it still needs to decode columns
- if (m_lock_rows == RDB_LOCK_WRITE || m_verify_row_debug_checksums ||
- bitmap_is_clear_all(table->read_set) ||
- bitmap_is_set(table->read_set, table->field[i]->field_index)) {
+ if (field_requested) {
// We will need to decode this field
m_decoders_vect.push_back({&m_encoder_arr[i], true, skip_size});
last_useful = m_decoders_vect.size();
@@ -5265,13 +5711,18 @@ void ha_rocksdb::setup_read_decoders() {
}
#ifndef NDEBUG
-void dbug_append_garbage_at_end(std::string &on_disk_rec) {
- on_disk_rec.append("abc");
+void dbug_append_garbage_at_end(rocksdb::PinnableSlice *on_disk_rec) {
+ std::string str(on_disk_rec->data(), on_disk_rec->size());
+ on_disk_rec->Reset();
+ str.append("abc");
+ on_disk_rec->PinSelf(rocksdb::Slice(str));
}
-void dbug_truncate_record(std::string &on_disk_rec) { on_disk_rec.resize(0); }
+void dbug_truncate_record(rocksdb::PinnableSlice *on_disk_rec) {
+ on_disk_rec->remove_suffix(on_disk_rec->size());
+}
-void dbug_modify_rec_varchar12(std::string &on_disk_rec) {
+void dbug_modify_rec_varchar12(rocksdb::PinnableSlice *on_disk_rec) {
std::string res;
// The record is NULL-byte followed by VARCHAR(10).
// Put the NULL-byte
@@ -5280,7 +5731,8 @@ void dbug_modify_rec_varchar12(std::string &on_disk_rec) {
res.append("\xC", 1);
res.append("123456789ab", 12);
- on_disk_rec.assign(res);
+ on_disk_rec->Reset();
+ on_disk_rec->PinSelf(rocksdb::Slice(res));
}
void dbug_modify_key_varchar8(String &on_disk_rec) {
@@ -5302,16 +5754,15 @@ void dbug_create_err_inplace_alter() {
int ha_rocksdb::convert_record_from_storage_format(
const rocksdb::Slice *const key, uchar *const buf) {
+
DBUG_EXECUTE_IF("myrocks_simulate_bad_row_read1",
- dbug_append_garbage_at_end(m_retrieved_record););
+ dbug_append_garbage_at_end(&m_retrieved_record););
DBUG_EXECUTE_IF("myrocks_simulate_bad_row_read2",
- dbug_truncate_record(m_retrieved_record););
+ dbug_truncate_record(&m_retrieved_record););
DBUG_EXECUTE_IF("myrocks_simulate_bad_row_read3",
- dbug_modify_rec_varchar12(m_retrieved_record););
+ dbug_modify_rec_varchar12(&m_retrieved_record););
- const rocksdb::Slice retrieved_rec_slice(&m_retrieved_record.front(),
- m_retrieved_record.size());
- return convert_record_from_storage_format(key, &retrieved_rec_slice, buf);
+ return convert_record_from_storage_format(key, &m_retrieved_record, buf);
}
int ha_rocksdb::convert_blob_from_storage_format(
@@ -5474,9 +5925,13 @@ int ha_rocksdb::convert_record_from_storage_format(
Rdb_key_def::get_unpack_header_size(unpack_info[0]));
}
- int err = m_pk_descr->unpack_record(table, buf, &rowkey_slice,
- unpack_info ? &unpack_slice : nullptr,
- false /* verify_checksum */);
+ int err = HA_EXIT_SUCCESS;
+ if (m_key_requested) {
+ err = m_pk_descr->unpack_record(table, buf, &rowkey_slice,
+ unpack_info ? &unpack_slice : nullptr,
+ false /* verify_checksum */);
+ }
+
if (err != HA_EXIT_SUCCESS) {
return err;
}
@@ -5720,6 +6175,11 @@ int ha_rocksdb::alloc_key_buffers(const TABLE *const table_arg,
m_pack_buffer =
reinterpret_cast<uchar *>(my_malloc(max_packed_sk_len, MYF(0)));
+ m_eq_cond_upper_bound =
+ reinterpret_cast<uchar *>(my_malloc(max_packed_sk_len, MYF(0)));
+ m_eq_cond_lower_bound =
+ reinterpret_cast<uchar *>(my_malloc(max_packed_sk_len, MYF(0)));
+
/*
If inplace alter is happening, allocate special buffers for unique
secondary index duplicate checking.
@@ -5734,6 +6194,7 @@ int ha_rocksdb::alloc_key_buffers(const TABLE *const table_arg,
if (m_pk_tuple == nullptr || m_pk_packed_tuple == nullptr ||
m_sk_packed_tuple == nullptr || m_sk_packed_tuple_old == nullptr ||
m_end_key_packed_tuple == nullptr || m_pack_buffer == nullptr ||
+ m_eq_cond_upper_bound == nullptr || m_eq_cond_lower_bound == nullptr ||
(alloc_alter_buffers && (m_dup_sk_packed_tuple == nullptr ||
m_dup_sk_packed_tuple_old == nullptr))) {
// One or more of the above allocations failed. Clean up and exit
@@ -5772,6 +6233,12 @@ void ha_rocksdb::free_key_buffers() {
my_free(m_dup_sk_packed_tuple_old);
m_dup_sk_packed_tuple_old = nullptr;
+
+ my_free(m_eq_cond_upper_bound);
+ m_eq_cond_upper_bound = nullptr;
+
+ my_free(m_eq_cond_lower_bound);
+ m_eq_cond_lower_bound = nullptr;
}
#ifdef MARIAROCKS_NOT_YET
@@ -5927,11 +6394,13 @@ int ha_rocksdb::open(const char *const name, int mode, uint test_if_locked) {
*/
m_verify_row_debug_checksums = false;
- /* TODO: move the following to where TABLE_SHARE is opened: */
- if (table->found_next_number_field)
+ /* Load auto_increment value only once on first use. */
+ if (table->found_next_number_field && m_tbl_def->m_auto_incr_val == 0) {
load_auto_incr_value();
+ }
- if (has_hidden_pk(table) &&
+ /* Load hidden pk only once on first use. */
+ if (has_hidden_pk(table) && m_tbl_def->m_hidden_pk_val == 0 &&
(err = load_hidden_pk_value()) != HA_EXIT_SUCCESS) {
free_key_buffers();
DBUG_RETURN(err);
@@ -6904,6 +7373,20 @@ int ha_rocksdb::create(const char *const name, TABLE *const table_arg,
m_pk_descr = m_key_descr_arr[pk_index(table_arg, m_tbl_def)];
+ if (create_info->auto_increment_value) {
+ bool autoinc_upgrade_test = false;
+ m_tbl_def->m_auto_incr_val = create_info->auto_increment_value;
+ DBUG_EXECUTE_IF("myrocks_autoinc_upgrade", autoinc_upgrade_test = true;);
+ if (!autoinc_upgrade_test) {
+ auto s = dict_manager.put_auto_incr_val(
+ batch, m_tbl_def->get_autoincr_gl_index_id(),
+ m_tbl_def->m_auto_incr_val);
+ if (!s.ok()) {
+ goto error;
+ }
+ }
+ }
+
dict_manager.lock();
err = ddl_manager.put_and_write(m_tbl_def, batch);
if (err != HA_EXIT_SUCCESS) {
@@ -6919,23 +7402,6 @@ int ha_rocksdb::create(const char *const name, TABLE *const table_arg,
dict_manager.unlock();
- if (create_info->auto_increment_value)
- m_tbl_def->m_auto_incr_val = create_info->auto_increment_value;
-
- /*
- We only support auto_increment at start of the PRIMARY KEY.
- */
- // Field *field;
- // if ((field= table_arg->next_number_field))
- /* TODO mdcallag: disable this for now to let UNIQUE indexes kind of work
- if ((field= table_arg->found_next_number_field))
- {
- int pk= table_arg->s->primary_key;
- Field *pk_field= table_arg->key_info[pk].key_part[0].field;
- if (field->field_index != pk_field->field_index)
- DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
- }
- */
DBUG_RETURN(HA_EXIT_SUCCESS);
error:
@@ -7033,7 +7499,7 @@ int ha_rocksdb::read_before_key(const Rdb_key_def &kd,
*/
rocksdb_smart_seek(!kd.m_is_reverse_cf, m_scan_it, key_slice);
- while (m_scan_it->Valid()) {
+ while (is_valid(m_scan_it)) {
/*
We are using full key and we've hit an exact match, or...
@@ -7073,12 +7539,12 @@ int ha_rocksdb::read_after_key(const Rdb_key_def &kd,
from the POV of the current transaction. If it has, try going to the next
key.
*/
- while (m_scan_it->Valid() && kd.has_ttl() &&
+ while (is_valid(m_scan_it) && kd.has_ttl() &&
should_hide_ttl_rec(kd, m_scan_it->value(), ttl_filter_ts)) {
rocksdb_smart_next(kd.m_is_reverse_cf, m_scan_it);
}
- return m_scan_it->Valid() ? HA_EXIT_SUCCESS : HA_ERR_KEY_NOT_FOUND;
+ return is_valid(m_scan_it) ? HA_EXIT_SUCCESS : HA_ERR_KEY_NOT_FOUND;
}
int ha_rocksdb::position_to_correct_key(
@@ -7238,7 +7704,7 @@ int ha_rocksdb::read_row_from_secondary_key(uchar *const buf,
bool covered_lookup =
m_keyread_only || kd.covers_lookup(table, &value, &m_lookup_bitmap);
- if (covered_lookup && m_lock_rows == RDB_LOCK_NONE && !has_hidden_pk(table)) {
+ if (covered_lookup && m_lock_rows == RDB_LOCK_NONE) {
pk_size =
kd.get_primary_key_tuple(table, *m_pk_descr, &rkey, m_pk_packed_tuple);
if (pk_size == RDB_INVALID_KEY_LEN) {
@@ -7339,7 +7805,7 @@ int ha_rocksdb::secondary_index_read(const int keyno, uchar *const buf) {
/* Use STATUS_NOT_FOUND when record not found or some error occurred */
table->status = STATUS_NOT_FOUND;
- if (m_scan_it->Valid()) {
+ if (is_valid(m_scan_it)) {
rocksdb::Slice key = m_scan_it->key();
/* Check if we've ran out of records of this index */
@@ -7360,8 +7826,7 @@ int ha_rocksdb::secondary_index_read(const int keyno, uchar *const buf) {
bool covered_lookup =
m_keyread_only || m_key_descr_arr[keyno]->covers_lookup(
table, &value, &m_lookup_bitmap);
- if (covered_lookup && m_lock_rows == RDB_LOCK_NONE &&
- !has_hidden_pk(table)) {
+ if (covered_lookup && m_lock_rows == RDB_LOCK_NONE) {
rc = m_key_descr_arr[keyno]->unpack_record(
table, buf, &key, &value, m_verify_row_debug_checksums);
global_stats.covered_secondary_key_lookups.inc();
@@ -7689,7 +8154,7 @@ int ha_rocksdb::find_icp_matching_index_rec(const bool &move_forward,
while (1) {
rocksdb_skip_expired_records(kd, m_scan_it, !move_forward);
- if (!m_scan_it->Valid()) {
+ if (!is_valid(m_scan_it)) {
table->status = STATUS_NOT_FOUND;
return HA_ERR_END_OF_FILE;
}
@@ -7953,7 +8418,7 @@ void dbug_dump_database(rocksdb::DB *const db) {
rocksdb::Status ha_rocksdb::get_for_update(
Rdb_transaction *const tx, rocksdb::ColumnFamilyHandle *const column_family,
- const rocksdb::Slice &key, std::string *const value) const {
+ const rocksdb::Slice &key, rocksdb::PinnableSlice *const value) const {
DBUG_ASSERT(m_lock_rows != RDB_LOCK_NONE);
const bool exclusive = m_lock_rows != RDB_LOCK_READ;
@@ -8039,6 +8504,9 @@ int ha_rocksdb::get_row_by_rowid(uchar *const buf, const char *const rowid,
&m_retrieved_record);
}
+ DBUG_EXECUTE_IF("rocksdb_return_status_corrupted",
+ dbug_change_status_to_corrupted(&s););
+
if (!s.IsNotFound() && !s.ok()) {
DBUG_RETURN(tx->set_status_error(table->in_use, s, *m_pk_descr, m_tbl_def,
m_table_handler));
@@ -8049,9 +8517,7 @@ int ha_rocksdb::get_row_by_rowid(uchar *const buf, const char *const rowid,
if (found) {
/* If we found the record, but it's expired, pretend we didn't find it. */
if (!skip_ttl_check && m_pk_descr->has_ttl() &&
- should_hide_ttl_rec(*m_pk_descr,
- rocksdb::Slice(&m_retrieved_record.front(),
- m_retrieved_record.size()),
+ should_hide_ttl_rec(*m_pk_descr, m_retrieved_record,
tx->m_snapshot_timestamp)) {
DBUG_RETURN(HA_ERR_KEY_NOT_FOUND);
}
@@ -8224,11 +8690,7 @@ int ha_rocksdb::index_first_intern(uchar *const buf) {
DBUG_ASSERT(key != nullptr);
const Rdb_key_def &kd = *m_key_descr_arr[active_index];
- if (kd.m_is_reverse_cf) {
- kd.get_supremum_key(key, &key_size);
- } else {
- kd.get_infimum_key(key, &key_size);
- }
+ int key_start_matching_bytes = kd.get_first_key(key, &key_size);
rocksdb::Slice index_key((const char *)key, key_size);
@@ -8239,7 +8701,7 @@ int ha_rocksdb::index_first_intern(uchar *const buf) {
// Loop as long as we get a deadlock error AND we end up creating the
// snapshot here (i.e. it did not exist prior to this)
for (;;) {
- setup_scan_iterator(kd, &index_key, false, Rdb_key_def::INDEX_NUMBER_SIZE);
+ setup_scan_iterator(kd, &index_key, false, key_start_matching_bytes);
m_scan_it->Seek(index_key);
m_skip_scan_it_next_call = true;
@@ -8816,23 +9278,21 @@ int ha_rocksdb::check_and_lock_sk(const uint &key_id,
The bloom filter may need to be disabled for this lookup.
*/
- const bool total_order_seek = !can_use_bloom_filter(
+ const bool total_order_seek = !check_bloom_and_set_bounds(
ha_thd(), kd, new_slice, all_parts_used);
const bool fill_cache = !THDVAR(ha_thd(), skip_fill_cache);
- /*
- psergey-todo: we just need to take lock, lookups not needed:
- */
- std::string dummy_value;
const rocksdb::Status s =
- get_for_update(row_info.tx, kd.get_cf(), new_slice, &dummy_value);
+ get_for_update(row_info.tx, kd.get_cf(), new_slice, nullptr);
if (!s.ok() && !s.IsNotFound()) {
return row_info.tx->set_status_error(table->in_use, s, kd, m_tbl_def,
m_table_handler);
}
rocksdb::Iterator *const iter = row_info.tx->get_iterator(
- kd.get_cf(), total_order_seek, fill_cache, true /* read current data */,
+ kd.get_cf(), total_order_seek, fill_cache,
+ m_eq_cond_lower_bound_slice, m_eq_cond_upper_bound_slice,
+ true /* read current data */,
false /* acquire snapshot */);
/*
Need to scan the transaction to see if there is a duplicate key.
@@ -8875,9 +9335,7 @@ int ha_rocksdb::check_uniqueness_and_lock(
m_retrieved_record by check_and_lock_unique_pk().
*/
if (is_pk(key_id, table, m_tbl_def) && found && m_pk_descr->has_ttl() &&
- should_hide_ttl_rec(*m_pk_descr,
- rocksdb::Slice(&m_retrieved_record.front(),
- m_retrieved_record.size()),
+ should_hide_ttl_rec(*m_pk_descr, m_retrieved_record,
(row_info.tx->m_snapshot_timestamp
? row_info.tx->m_snapshot_timestamp
: static_cast<int64_t>(std::time(nullptr))))) {
@@ -8926,109 +9384,48 @@ int ha_rocksdb::bulk_load_key(Rdb_transaction *const tx, const Rdb_key_def &kd,
const rocksdb::Slice &key,
const rocksdb::Slice &value, bool sort) {
DBUG_ENTER_FUNC();
-
+ int res;
rocksdb::ColumnFamilyHandle *cf = kd.get_cf();
- DBUG_ASSERT(cf != nullptr);
- int res = HA_EXIT_SUCCESS;
+ // In the case of unsorted inserts, m_sst_info allocated here is not
+ // used to store the keys. It is still used to indicate when tables
+ // are switched.
+ if (m_sst_info == nullptr || m_sst_info->is_committed()) {
+ m_sst_info.reset(new Rdb_sst_info(rdb, m_table_handler->m_table_name,
+ kd.get_name(), cf, *rocksdb_db_options,
+ THDVAR(ha_thd(), trace_sst_api)));
+ res = tx->start_bulk_load(this, m_sst_info);
+ if (res != HA_EXIT_SUCCESS) {
+ DBUG_RETURN(res);
+ }
+ }
+ DBUG_ASSERT(m_sst_info);
if (sort) {
- GL_INDEX_ID kd_gl_id = kd.get_gl_index_id();
- auto it = m_key_merge.find(kd_gl_id);
- if (it == m_key_merge.end()) {
- m_key_merge.emplace(
- std::piecewise_construct, std::make_tuple(kd_gl_id),
- std::make_tuple(
- thd_rocksdb_tmpdir(), THDVAR(ha_thd(), merge_buf_size),
- THDVAR(ha_thd(), merge_combine_read_size),
- THDVAR(ha_thd(), merge_tmp_file_removal_delay_ms), cf));
- it = m_key_merge.find(kd_gl_id);
- if ((res = it->second.init()) != 0) {
- DBUG_RETURN(res);
- }
+ Rdb_index_merge *key_merge;
+ DBUG_ASSERT(cf != nullptr);
- if (m_bulk_load_tx == nullptr) {
- tx->start_bulk_load(this);
- m_bulk_load_tx = tx;
- }
+ res = tx->get_key_merge(kd.get_gl_index_id(), cf, &key_merge);
+ if (res == HA_EXIT_SUCCESS) {
+ res = key_merge->add(key, value);
}
- res = it->second.add(key, value);
} else {
- if (!m_sst_info) {
- m_sst_info.reset(new Rdb_sst_info(rdb, m_table_handler->m_table_name,
- kd.get_name(), cf, *rocksdb_db_options,
- THDVAR(ha_thd(), trace_sst_api)));
- tx->start_bulk_load(this);
- m_bulk_load_tx = tx;
- }
-
- DBUG_ASSERT(m_sst_info);
-
res = m_sst_info->put(key, value);
}
DBUG_RETURN(res);
}
-int ha_rocksdb::finalize_bulk_load() {
+int ha_rocksdb::finalize_bulk_load(bool print_client_error) {
DBUG_ENTER_FUNC();
- DBUG_ASSERT_IMP(!m_key_merge.empty() || m_sst_info,
- m_bulk_load_tx != nullptr);
-
- /* Skip if there are no possible ongoing bulk loads */
- if (m_key_merge.empty() && !m_sst_info && m_bulk_load_tx == nullptr) {
- DBUG_RETURN(HA_EXIT_SUCCESS);
- }
-
int res = HA_EXIT_SUCCESS;
- RDB_MUTEX_LOCK_CHECK(m_bulk_load_mutex);
-
+ /* Skip if there are no possible ongoing bulk loads */
if (m_sst_info) {
- res = m_sst_info->commit();
+ res = m_sst_info->commit(print_client_error);
m_sst_info.reset();
}
-
- if (!m_key_merge.empty()) {
- rocksdb::Slice merge_key;
- rocksdb::Slice merge_val;
- for (auto it = m_key_merge.begin(); it != m_key_merge.end(); it++) {
- const std::string &index_name =
- ddl_manager.safe_find(it->first)->get_name();
- Rdb_index_merge &rdb_merge = it->second;
- Rdb_sst_info sst_info(rdb, m_table_handler->m_table_name, index_name,
- rdb_merge.get_cf(), *rocksdb_db_options,
- THDVAR(ha_thd(), trace_sst_api));
-
- while ((res = rdb_merge.next(&merge_key, &merge_val)) == 0) {
- if ((res = sst_info.put(merge_key, merge_val)) != 0) {
- break;
- }
- }
- // res == -1 => finished ok; res > 0 => error
- if (res <= 0) {
- if ((res = sst_info.commit()) != 0) {
- break;
- }
- }
- }
- m_key_merge.clear();
-
- /*
- Explicitly tell jemalloc to clean up any unused dirty pages at this point.
- See https://reviews.facebook.net/D63723 for more details.
- */
- purge_all_jemalloc_arenas();
- }
-
- if (m_bulk_load_tx != nullptr) {
- m_bulk_load_tx->end_bulk_load(this);
- m_bulk_load_tx = nullptr;
- }
-
- RDB_MUTEX_UNLOCK_CHECK(m_bulk_load_mutex);
-
DBUG_RETURN(res);
}
@@ -9058,7 +9455,7 @@ int ha_rocksdb::update_pk(const Rdb_key_def &kd,
}
if (table->next_number_field) {
- update_auto_incr_val();
+ update_auto_incr_val_from_field();
}
int rc = HA_EXIT_SUCCESS;
@@ -9290,6 +9687,41 @@ int ha_rocksdb::update_write_row(const uchar *const old_data,
}
/*
+ Setting iterator upper/lower bounds for Seek/SeekForPrev.
+ This makes RocksDB to avoid scanning tombstones outside of
+ the given key ranges, when prefix_same_as_start=true was not passed
+ (when prefix bloom filter can not be used).
+ Inversing upper/lower bound is necessary on reverse order CF.
+ This covers HA_READ_PREFIX_LAST* case as well. For example,
+ if given query eq condition was 12 bytes and condition was
+ 0x0000b3eb003f65c5e78858b8, and if doing HA_READ_PREFIX_LAST,
+ eq_cond_len was 11 (see calc_eq_cond_len() for details).
+ If the index was reverse order, upper bound would be
+ 0x0000b3eb003f65c5e78857, and lower bound would be
+ 0x0000b3eb003f65c5e78859. These cover given eq condition range.
+*/
+void ha_rocksdb::setup_iterator_bounds(const Rdb_key_def &kd,
+ const rocksdb::Slice &eq_cond) {
+ uint eq_cond_len = eq_cond.size();
+ memcpy(m_eq_cond_upper_bound, eq_cond.data(), eq_cond_len);
+ kd.successor(m_eq_cond_upper_bound, eq_cond_len);
+ memcpy(m_eq_cond_lower_bound, eq_cond.data(), eq_cond_len);
+ kd.predecessor(m_eq_cond_lower_bound, eq_cond_len);
+
+ if (kd.m_is_reverse_cf) {
+ m_eq_cond_upper_bound_slice =
+ rocksdb::Slice((const char *)m_eq_cond_lower_bound, eq_cond_len);
+ m_eq_cond_lower_bound_slice =
+ rocksdb::Slice((const char *)m_eq_cond_upper_bound, eq_cond_len);
+ } else {
+ m_eq_cond_upper_bound_slice =
+ rocksdb::Slice((const char *)m_eq_cond_upper_bound, eq_cond_len);
+ m_eq_cond_lower_bound_slice =
+ rocksdb::Slice((const char *)m_eq_cond_lower_bound, eq_cond_len);
+ }
+}
+
+/*
Open a cursor
*/
@@ -9305,7 +9737,7 @@ void ha_rocksdb::setup_scan_iterator(const Rdb_key_def &kd,
bool skip_bloom = true;
const rocksdb::Slice eq_cond(slice->data(), eq_cond_len);
- if (can_use_bloom_filter(ha_thd(), kd, eq_cond, use_all_keys)) {
+ if (check_bloom_and_set_bounds(ha_thd(), kd, eq_cond, use_all_keys)) {
skip_bloom = false;
}
@@ -9345,7 +9777,9 @@ void ha_rocksdb::setup_scan_iterator(const Rdb_key_def &kd,
read_opts.snapshot = m_scan_it_snapshot;
m_scan_it = rdb->NewIterator(read_opts, kd.get_cf());
} else {
- m_scan_it = tx->get_iterator(kd.get_cf(), skip_bloom, fill_cache);
+ m_scan_it = tx->get_iterator(kd.get_cf(), skip_bloom, fill_cache,
+ m_eq_cond_lower_bound_slice,
+ m_eq_cond_upper_bound_slice);
}
m_scan_it_skips_bloom = skip_bloom;
}
@@ -9364,14 +9798,12 @@ void ha_rocksdb::release_scan_iterator() {
void ha_rocksdb::setup_iterator_for_rnd_scan() {
uint key_size;
- if (m_pk_descr->m_is_reverse_cf)
- m_pk_descr->get_supremum_key(m_pk_packed_tuple, &key_size);
- else
- m_pk_descr->get_infimum_key(m_pk_packed_tuple, &key_size);
+ int key_start_matching_bytes = m_pk_descr->get_first_key(m_pk_packed_tuple, &key_size);
rocksdb::Slice table_key((const char *)m_pk_packed_tuple, key_size);
- setup_scan_iterator(*m_pk_descr, &table_key);
+ setup_scan_iterator(*m_pk_descr, &table_key, false,
+ key_start_matching_bytes);
m_scan_it->Seek(table_key);
m_skip_scan_it_next_call = true;
}
@@ -9443,7 +9875,7 @@ int ha_rocksdb::rnd_next_with_direction(uchar *const buf, bool move_forward) {
#ifdef MARIAROCKS_NOT_YET
stats.rows_requested++;
#endif
- if (!m_scan_it || !m_scan_it->Valid()) {
+ if (!m_scan_it || !is_valid(m_scan_it)) {
/*
We can get here when SQL layer has called
@@ -9465,7 +9897,7 @@ int ha_rocksdb::rnd_next_with_direction(uchar *const buf, bool move_forward) {
m_scan_it->Prev(); /* this call cannot fail */
}
- if (!m_scan_it->Valid()) {
+ if (!is_valid(m_scan_it)) {
rc = HA_ERR_END_OF_FILE;
break;
}
@@ -9774,6 +10206,10 @@ int ha_rocksdb::info(uint flag) {
uint64_t memtableCount;
uint64_t memtableSize;
+ // the stats below are calculated from skiplist wich is a probablistic
+ // data structure, so the results vary between test runs
+ // it also can return 0 for quite a large tables which means that
+ // cardinality for memtable only indxes will be reported as 0
rdb->GetApproximateMemTableStats(m_pk_descr->get_cf(), r,
&memtableCount, &memtableSize);
@@ -9807,7 +10243,6 @@ int ha_rocksdb::info(uint flag) {
if (flag & HA_STATUS_CONST) {
ref_length = m_pk_descr->max_storage_fmt_length();
- // TODO: Needs to reimplement after having real index statistics
for (uint i = 0; i < m_tbl_def->m_key_count; i++) {
if (is_hidden_pk(i, table, m_tbl_def)) {
continue;
@@ -10091,20 +10526,6 @@ void ha_rocksdb::read_thd_vars(THD *const thd) {
m_checksums_pct = THDVAR(thd, checksums_pct);
}
-const char *ha_rocksdb::thd_rocksdb_tmpdir() {
- const char *tmp_dir = THDVAR(ha_thd(), tmpdir);
-
- /*
- We want to treat an empty string as nullptr, in these cases DDL operations
- will use the default --tmpdir passed to mysql instead.
- */
- if (tmp_dir != nullptr && *tmp_dir == '\0') {
- tmp_dir = nullptr;
- }
-
- return (tmp_dir);
-}
-
/**
@return
@@ -10298,6 +10719,13 @@ ha_rocksdb::get_range(const int &i,
return myrocks::get_range(*m_key_descr_arr[i], buf);
}
+/*
+ This function is called with total_order_seek=true, but
+ upper/lower bound setting is not necessary.
+ Boundary set is useful when there is no matching key,
+ but in drop_index_thread's case, it means index is marked as removed,
+ so no further seek will happen for the index id.
+*/
static bool is_myrocks_index_empty(
rocksdb::ColumnFamilyHandle *cfh, const bool is_reverse_cf,
const rocksdb::ReadOptions &read_opts,
@@ -10368,7 +10796,7 @@ void Rdb_drop_index_thread::run() {
"from cf id %u. MyRocks data dictionary may "
"get corrupted.",
d.cf_id);
- abort_with_stack_traces();
+ abort();
}
rocksdb::ColumnFamilyHandle *cfh = cf_manager.get_cf(d.cf_id);
DBUG_ASSERT(cfh);
@@ -10457,6 +10885,7 @@ int ha_rocksdb::delete_table(const char *const tablename) {
the persistent data dictionary).
*/
ddl_manager.remove(tbl, batch, true);
+
int err = dict_manager.commit(batch);
if (err) {
DBUG_RETURN(err);
@@ -10487,10 +10916,12 @@ int ha_rocksdb::remove_rows(Rdb_tbl_def *const tbl) {
const Rdb_key_def &kd = *tbl->m_key_descr_arr[i];
kd.get_infimum_key(reinterpret_cast<uchar *>(key_buf), &key_len);
rocksdb::ColumnFamilyHandle *cf = kd.get_cf();
-
+ const rocksdb::Slice table_key(key_buf, key_len);
+ setup_iterator_bounds(kd, table_key);
+ opts.iterate_lower_bound = &m_eq_cond_lower_bound_slice;
+ opts.iterate_upper_bound = &m_eq_cond_upper_bound_slice;
std::unique_ptr<rocksdb::Iterator> it(rdb->NewIterator(opts, cf));
- const rocksdb::Slice table_key(key_buf, key_len);
it->Seek(table_key);
while (it->Valid()) {
const rocksdb::Slice key = it->key();
@@ -10569,6 +11000,7 @@ int ha_rocksdb::rename_table(const char *const from, const char *const to) {
const std::unique_ptr<rocksdb::WriteBatch> wb = dict_manager.begin();
rocksdb::WriteBatch *const batch = wb.get();
dict_manager.lock();
+
if (ddl_manager.rename(from_str, to_str, batch)) {
rc = HA_ERR_NO_SUCH_TABLE;
} else {
@@ -10619,7 +11051,7 @@ int ha_rocksdb::extra(enum ha_extra_function operation) {
If the table has blobs, then they are part of m_retrieved_record.
This call invalidates them.
*/
- m_retrieved_record.clear();
+ m_retrieved_record.Reset();
break;
default:
break;
@@ -10787,24 +11219,21 @@ int ha_rocksdb::calculate_stats(const TABLE *const table_arg, THD *const thd,
std::unordered_map<rocksdb::ColumnFamilyHandle *, std::vector<rocksdb::Range>>
ranges;
std::unordered_set<GL_INDEX_ID> ids_to_check;
- std::unordered_map<GL_INDEX_ID, uint> ids_to_keyparts;
std::vector<uchar> buf(table_arg->s->keys * 2 *
Rdb_key_def::INDEX_NUMBER_SIZE);
+ std::unordered_map<GL_INDEX_ID, Rdb_index_stats> stats;
for (uint i = 0; i < table_arg->s->keys; i++) {
const auto bufp = &buf[i * 2 * Rdb_key_def::INDEX_NUMBER_SIZE];
const Rdb_key_def &kd = *m_key_descr_arr[i];
+ const GL_INDEX_ID index_id = kd.get_gl_index_id();
ranges[kd.get_cf()].push_back(get_range(i, bufp));
- ids_to_check.insert(kd.get_gl_index_id());
- ids_to_keyparts[kd.get_gl_index_id()] = kd.get_key_parts();
- }
- // for analyze statements, force flush on memtable to get accurate cardinality
- Rdb_cf_manager &cf_manager = rdb_get_cf_manager();
- if (thd != nullptr && THDVAR(thd, flush_memtable_on_analyze) &&
- !rocksdb_pause_background_work) {
- for (auto it : ids_to_check) {
- rdb->Flush(rocksdb::FlushOptions(), cf_manager.get_cf(it.cf_id));
- }
+ ids_to_check.insert(index_id);
+ // Initialize the stats to 0. If there are no files that contain
+ // this gl_index_id, then 0 should be stored for the cached stats.
+ stats[index_id] = Rdb_index_stats(index_id);
+ DBUG_ASSERT(kd.get_key_parts() > 0);
+ stats[index_id].m_distinct_keys_per_prefix.resize(kd.get_key_parts());
}
// get RocksDB table properties for these ranges
@@ -10821,15 +11250,6 @@ int ha_rocksdb::calculate_stats(const TABLE *const table_arg, THD *const thd,
}
int num_sst = 0;
- // group stats per index id
- std::unordered_map<GL_INDEX_ID, Rdb_index_stats> stats;
- for (const auto &it : ids_to_check) {
- // Initialize the stats to 0. If there are no files that contain
- // this gl_index_id, then 0 should be stored for the cached stats.
- stats[it] = Rdb_index_stats(it);
- DBUG_ASSERT(ids_to_keyparts.count(it) > 0);
- stats[it].m_distinct_keys_per_prefix.resize(ids_to_keyparts[it]);
- }
for (const auto &it : props) {
std::vector<Rdb_index_stats> sst_stats;
Rdb_tbl_prop_coll::read_stats_from_tbl_props(it.second, &sst_stats);
@@ -10856,6 +11276,53 @@ int ha_rocksdb::calculate_stats(const TABLE *const table_arg, THD *const thd,
num_sst++;
}
+ // calculate memtable cardinality
+ Rdb_tbl_card_coll cardinality_collector(rocksdb_table_stats_sampling_pct);
+ auto read_opts = rocksdb::ReadOptions();
+ read_opts.read_tier = rocksdb::ReadTier::kMemtableTier;
+ for (uint i = 0; i < table_arg->s->keys; i++) {
+ const Rdb_key_def &kd = *m_key_descr_arr[i];
+ Rdb_index_stats &stat = stats[kd.get_gl_index_id()];
+
+ uchar r_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2];
+ auto r = get_range(i, r_buf);
+ uint64_t memtableCount;
+ uint64_t memtableSize;
+ rdb->GetApproximateMemTableStats(kd.get_cf(), r, &memtableCount,
+ &memtableSize);
+ if (memtableCount < (uint64_t)stat.m_rows / 10) {
+ // skip tables that already have enough stats from SST files to reduce
+ // overhead and avoid degradation of big tables stats by sampling from
+ // relatively tiny (less than 10% of full data set) memtable dataset
+ continue;
+ }
+
+ std::unique_ptr<rocksdb::Iterator> it = std::unique_ptr<rocksdb::Iterator>(
+ rdb->NewIterator(read_opts, kd.get_cf()));
+
+ uchar *first_key;
+ uint key_size;
+ if (is_pk(i, table, m_tbl_def)) {
+ first_key = m_pk_packed_tuple;
+ } else {
+ first_key = m_sk_packed_tuple;
+ }
+ kd.get_first_key(first_key, &key_size);
+ rocksdb::Slice first_index_key((const char *)first_key, key_size);
+
+ cardinality_collector.Reset();
+ for (it->Seek(first_index_key); is_valid(it.get()); it->Next()) {
+ const rocksdb::Slice key = it->key();
+ if (!kd.covers_key(key)) {
+ break; // end of this index
+ }
+ stat.m_rows++;
+
+ cardinality_collector.ProcessKey(key, &kd, &stat);
+ }
+ cardinality_collector.AdjustStats(&stat);
+ }
+
// set and persist new stats
ddl_manager.set_stats(stats);
ddl_manager.persist_stats(true);
@@ -10903,32 +11370,73 @@ void ha_rocksdb::get_auto_increment(ulonglong off, ulonglong inc,
off = 1;
}
- longlong new_val;
+ Field *field;
+ ulonglong new_val, max_val;
+ field = table->key_info[table->s->next_number_index].key_part[0].field;
+ max_val = rdb_get_int_col_max_value(field);
// Local variable reference to simplify code below
- std::atomic<longlong> &auto_incr = m_tbl_def->m_auto_incr_val;
+ auto &auto_incr = m_tbl_def->m_auto_incr_val;
- if (inc == 1 && off == 1) {
+ if (inc == 1) {
+ DBUG_ASSERT(off == 1);
// Optimization for the standard case where we are always simply
// incrementing from the last position
// Use CAS operation in a loop to make sure automically get the next auto
- // increment value while ensuring tha we don't wrap around to a negative
+ // increment value while ensuring that we don't wrap around to a negative
// number.
+ //
+ // We set auto_incr to the min of max_val and new_val + 1. This means that
+ // if we're at the maximum, we should be returning the same value for
+ // multiple rows, resulting in duplicate key errors (as expected).
+ //
+ // If we return values greater than the max, the SQL layer will "truncate"
+ // the value anyway, but it means that we store invalid values into
+ // auto_incr that will be visible in SHOW CREATE TABLE.
new_val = auto_incr;
- while (new_val != std::numeric_limits<longlong>::max()) {
- if (auto_incr.compare_exchange_weak(new_val, new_val + 1)) {
+ while (new_val != std::numeric_limits<ulonglong>::max()) {
+ if (auto_incr.compare_exchange_weak(new_val,
+ std::min(new_val + 1, max_val))) {
break;
}
}
} else {
- // The next value can be more complicated if either `inc` or 'off' is not 1
- longlong last_val = auto_incr;
+ // The next value can be more complicated if either 'inc' or 'off' is not 1
+ ulonglong last_val = auto_incr;
// Loop until we can correctly update the atomic value
do {
- if (((last_val - off) / inc) ==
- (std::numeric_limits<longlong>::max() - off) / inc) {
+ DBUG_ASSERT(last_val > 0);
+ // Calculate the next value in the auto increment series: offset
+ // + N * increment where N is 0, 1, 2, ...
+ //
+ // For further information please visit:
+ // http://dev.mysql.com/doc/refman/5.7/en/replication-options-master.html
+ //
+ // The following is confusing so here is an explanation:
+ // To get the next number in the sequence above you subtract out the
+ // offset, calculate the next sequence (N * increment) and then add the
+ // offset back in.
+ //
+ // The additions are rearranged to avoid overflow. The following is
+ // equivalent to (last_val - 1 + inc - off) / inc. This uses the fact
+ // that (a+b)/c = a/c + b/c + (a%c + b%c)/c. To show why:
+ //
+ // (a+b)/c
+ // = (a - a%c + a%c + b - b%c + b%c) / c
+ // = (a - a%c) / c + (b - b%c) / c + (a%c + b%c) / c
+ // = a/c + b/c + (a%c + b%c) / c
+ //
+ // Now, substitute a = last_val - 1, b = inc - off, c = inc to get the
+ // following statement.
+ ulonglong n =
+ (last_val - 1) / inc + ((last_val - 1) % inc + inc - off) / inc;
+
+ // Check if n * inc + off will overflow. This can only happen if we have
+ // an UNSIGNED BIGINT field.
+ if (n > (std::numeric_limits<ulonglong>::max() - off) / inc) {
+ DBUG_ASSERT(max_val == std::numeric_limits<ulonglong>::max());
// The 'last_val' value is already equal to or larger than the largest
// value in the sequence. Continuing would wrap around (technically
// the behavior would be undefined). What should we do?
@@ -10940,31 +11448,30 @@ void ha_rocksdb::get_auto_increment(ulonglong off, ulonglong inc,
// may not be in our sequence, but it is guaranteed to be equal
// to or larger than any other value already inserted.
//
- // For now I'm going to take option @2.
- new_val = std::numeric_limits<longlong>::max();
+ // For now I'm going to take option 2.
+ //
+ // Returning ULLONG_MAX from get_auto_increment will cause the SQL
+ // layer to fail with ER_AUTOINC_READ_FAILED. This means that due to
+ // the SE API for get_auto_increment, inserts will fail with
+ // ER_AUTOINC_READ_FAILED if the column is UNSIGNED BIGINT, but
+ // inserts will fail with ER_DUP_ENTRY for other types (or no failure
+ // if the column is in a non-unique SK).
+ new_val = std::numeric_limits<ulonglong>::max();
auto_incr = new_val; // Store the largest value into auto_incr
break;
}
- // Calculate the next value in the auto increment series:
- // offset + N * increment
- // where N is 0, 1, 2, ...
- //
- // For further information please visit:
- // http://dev.mysql.com/doc/refman/5.7/en/replication-options-master.html
- //
- // The following is confusing so here is an explanation:
- // To get the next number in the sequence above you subtract out
- // the offset, calculate the next sequence (N * increment) and then add
- // the offset back in.
- new_val = (((last_val - off) + (inc - 1)) / inc) * inc + off;
+ new_val = n * inc + off;
// Attempt to store the new value (plus 1 since m_auto_incr_val contains
// the next available value) into the atomic value. If the current
// value no longer matches what we have in 'last_val' this will fail and
// we will repeat the loop (`last_val` will automatically get updated
// with the current value).
- } while (!auto_incr.compare_exchange_weak(last_val, new_val + 1));
+ //
+ // See above explanation for inc == 1 for why we use std::min.
+ } while (!auto_incr.compare_exchange_weak(last_val,
+ std::min(new_val + 1, max_val)));
}
*first_value = new_val;
@@ -11041,7 +11548,8 @@ my_core::enum_alter_inplace_result ha_rocksdb::check_if_supported_inplace_alter(
my_core::Alter_inplace_info::DROP_UNIQUE_INDEX |
my_core::Alter_inplace_info::ADD_INDEX |
my_core::Alter_inplace_info::ALTER_PARTITIONED |
- my_core::Alter_inplace_info::ADD_UNIQUE_INDEX)) {
+ my_core::Alter_inplace_info::ADD_UNIQUE_INDEX |
+ my_core::Alter_inplace_info::CHANGE_CREATE_OPTION)) {
DBUG_RETURN(my_core::HA_ALTER_INPLACE_NOT_SUPPORTED);
}
@@ -11052,6 +11560,13 @@ my_core::enum_alter_inplace_result ha_rocksdb::check_if_supported_inplace_alter(
DBUG_RETURN(my_core::HA_ALTER_INPLACE_NOT_SUPPORTED);
}
+ /* We only support changing auto_increment for table options. */
+ if ((ha_alter_info->handler_flags &
+ my_core::Alter_inplace_info::CHANGE_CREATE_OPTION) &&
+ !(ha_alter_info->create_info->used_fields & HA_CREATE_USED_AUTO)) {
+ DBUG_RETURN(my_core::HA_ALTER_INPLACE_NOT_SUPPORTED);
+ }
+
DBUG_RETURN(my_core::HA_ALTER_INPLACE_SHARED_LOCK_AFTER_PREPARE);
}
@@ -11091,111 +11606,121 @@ bool ha_rocksdb::prepare_inplace_alter_table(
DBUG_ASSERT(altered_table != nullptr);
DBUG_ASSERT(ha_alter_info != nullptr);
- const uint old_n_keys = m_tbl_def->m_key_count;
+ Rdb_tbl_def *new_tdef = nullptr;
+ std::shared_ptr<Rdb_key_def> *old_key_descr = nullptr;
+ std::shared_ptr<Rdb_key_def> *new_key_descr = nullptr;
+ uint old_n_keys = m_tbl_def->m_key_count;
uint new_n_keys = altered_table->s->keys;
+ std::unordered_set<std::shared_ptr<Rdb_key_def>> added_indexes;
+ std::unordered_set<GL_INDEX_ID> dropped_index_ids;
+ uint n_dropped_keys = 0;
+ uint n_added_keys = 0;
+ ulonglong max_auto_incr = 0;
- if (has_hidden_pk(altered_table)) {
- new_n_keys += 1;
- }
+ if (ha_alter_info->handler_flags &
+ (my_core::Alter_inplace_info::DROP_INDEX |
+ my_core::Alter_inplace_info::DROP_UNIQUE_INDEX |
+ my_core::Alter_inplace_info::ADD_INDEX |
+ my_core::Alter_inplace_info::ADD_UNIQUE_INDEX)) {
- const TABLE *const old_table = table;
- std::shared_ptr<Rdb_key_def> *const old_key_descr =
- m_tbl_def->m_key_descr_arr;
- std::shared_ptr<Rdb_key_def> *const new_key_descr =
- new std::shared_ptr<Rdb_key_def>[new_n_keys];
+ if (has_hidden_pk(altered_table)) {
+ new_n_keys += 1;
+ }
- Rdb_tbl_def *const new_tdef = new Rdb_tbl_def(m_tbl_def->full_tablename());
- new_tdef->m_key_descr_arr = new_key_descr;
- new_tdef->m_key_count = new_n_keys;
- new_tdef->m_auto_incr_val =
- m_tbl_def->m_auto_incr_val.load(std::memory_order_relaxed);
- new_tdef->m_hidden_pk_val =
- m_tbl_def->m_hidden_pk_val.load(std::memory_order_relaxed);
+ const TABLE *const old_table = table;
+ old_key_descr = m_tbl_def->m_key_descr_arr;
+ new_key_descr = new std::shared_ptr<Rdb_key_def>[new_n_keys];
- if (ha_alter_info->handler_flags &
- (my_core::Alter_inplace_info::DROP_INDEX |
- my_core::Alter_inplace_info::DROP_UNIQUE_INDEX |
- my_core::Alter_inplace_info::ADD_INDEX |
- my_core::Alter_inplace_info::ADD_UNIQUE_INDEX) &&
- create_key_defs(altered_table, new_tdef, table, m_tbl_def)) {
- /* Delete the new key descriptors */
- delete[] new_key_descr;
+ new_tdef = new Rdb_tbl_def(m_tbl_def->full_tablename());
+ new_tdef->m_key_descr_arr = new_key_descr;
+ new_tdef->m_key_count = new_n_keys;
+ new_tdef->m_auto_incr_val =
+ m_tbl_def->m_auto_incr_val.load(std::memory_order_relaxed);
+ new_tdef->m_hidden_pk_val =
+ m_tbl_def->m_hidden_pk_val.load(std::memory_order_relaxed);
- /*
- Explicitly mark as nullptr so we don't accidentally remove entries
- from data dictionary on cleanup (or cause double delete[]).
- */
- new_tdef->m_key_descr_arr = nullptr;
- delete new_tdef;
+ if (create_key_defs(altered_table, new_tdef, table, m_tbl_def)) {
+ /* Delete the new key descriptors */
+ delete[] new_key_descr;
- my_error(ER_KEY_CREATE_DURING_ALTER, MYF(0));
- DBUG_RETURN(HA_EXIT_FAILURE);
- }
+ /*
+ Explicitly mark as nullptr so we don't accidentally remove entries
+ from data dictionary on cleanup (or cause double delete[]).
+ */
+ new_tdef->m_key_descr_arr = nullptr;
+ delete new_tdef;
- std::unordered_set<std::shared_ptr<Rdb_key_def>> added_indexes;
- std::unordered_set<GL_INDEX_ID> dropped_index_ids;
+ my_error(ER_KEY_CREATE_DURING_ALTER, MYF(0));
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
- uint i;
- uint j;
+ uint i;
+ uint j;
- /* Determine which(if any) key definition(s) need to be dropped */
- for (i = 0; i < ha_alter_info->index_drop_count; i++) {
- const KEY *const dropped_key = ha_alter_info->index_drop_buffer[i];
- for (j = 0; j < old_n_keys; j++) {
- const KEY *const old_key =
- &old_table->key_info[old_key_descr[j]->get_keyno()];
+ /* Determine which(if any) key definition(s) need to be dropped */
+ for (i = 0; i < ha_alter_info->index_drop_count; i++) {
+ const KEY *const dropped_key = ha_alter_info->index_drop_buffer[i];
+ for (j = 0; j < old_n_keys; j++) {
+ const KEY *const old_key =
+ &old_table->key_info[old_key_descr[j]->get_keyno()];
- if (!compare_keys(old_key, dropped_key)) {
- dropped_index_ids.insert(old_key_descr[j]->get_gl_index_id());
- break;
+ if (!compare_keys(old_key, dropped_key)) {
+ dropped_index_ids.insert(old_key_descr[j]->get_gl_index_id());
+ break;
+ }
}
}
- }
-
- /* Determine which(if any) key definitions(s) need to be added */
- int identical_indexes_found = 0;
- for (i = 0; i < ha_alter_info->index_add_count; i++) {
- const KEY *const added_key =
- &ha_alter_info->key_info_buffer[ha_alter_info->index_add_buffer[i]];
- for (j = 0; j < new_n_keys; j++) {
- const KEY *const new_key =
- &altered_table->key_info[new_key_descr[j]->get_keyno()];
- if (!compare_keys(new_key, added_key)) {
- /*
- Check for cases where an 'identical' index is being dropped and
- re-added in a single ALTER statement. Turn this into a no-op as the
- index has not changed.
- E.G. Unique index -> non-unique index requires no change
+ /* Determine which(if any) key definitions(s) need to be added */
+ int identical_indexes_found = 0;
+ for (i = 0; i < ha_alter_info->index_add_count; i++) {
+ const KEY *const added_key =
+ &ha_alter_info->key_info_buffer[ha_alter_info->index_add_buffer[i]];
+ for (j = 0; j < new_n_keys; j++) {
+ const KEY *const new_key =
+ &altered_table->key_info[new_key_descr[j]->get_keyno()];
+ if (!compare_keys(new_key, added_key)) {
+ /*
+ Check for cases where an 'identical' index is being dropped and
+ re-added in a single ALTER statement. Turn this into a no-op as the
+ index has not changed.
+
+ E.G. Unique index -> non-unique index requires no change
+
+ Note that cases where the index name remains the same but the
+ key-parts are changed is already handled in create_inplace_key_defs.
+ In these cases the index needs to be rebuilt.
+ */
+ if (dropped_index_ids.count(new_key_descr[j]->get_gl_index_id())) {
+ dropped_index_ids.erase(new_key_descr[j]->get_gl_index_id());
+ identical_indexes_found++;
+ } else {
+ added_indexes.insert(new_key_descr[j]);
+ }
- Note that cases where the index name remains the same but the
- key-parts are changed is already handled in create_inplace_key_defs.
- In these cases the index needs to be rebuilt.
- */
- if (dropped_index_ids.count(new_key_descr[j]->get_gl_index_id())) {
- dropped_index_ids.erase(new_key_descr[j]->get_gl_index_id());
- identical_indexes_found++;
- } else {
- added_indexes.insert(new_key_descr[j]);
+ break;
}
-
- break;
}
}
- }
- const uint n_dropped_keys =
- ha_alter_info->index_drop_count - identical_indexes_found;
- const uint n_added_keys =
- ha_alter_info->index_add_count - identical_indexes_found;
- DBUG_ASSERT(dropped_index_ids.size() == n_dropped_keys);
- DBUG_ASSERT(added_indexes.size() == n_added_keys);
- DBUG_ASSERT(new_n_keys == (old_n_keys - n_dropped_keys + n_added_keys));
+ n_dropped_keys = ha_alter_info->index_drop_count - identical_indexes_found;
+ n_added_keys = ha_alter_info->index_add_count - identical_indexes_found;
+ DBUG_ASSERT(dropped_index_ids.size() == n_dropped_keys);
+ DBUG_ASSERT(added_indexes.size() == n_added_keys);
+ DBUG_ASSERT(new_n_keys == (old_n_keys - n_dropped_keys + n_added_keys));
+ }
+ if (ha_alter_info->handler_flags &
+ my_core::Alter_inplace_info::CHANGE_CREATE_OPTION) {
+ if (!new_tdef) {
+ new_tdef = m_tbl_def;
+ }
+ max_auto_incr = load_auto_incr_value_from_index();
+ }
ha_alter_info->handler_ctx = new Rdb_inplace_alter_ctx(
new_tdef, old_key_descr, new_key_descr, old_n_keys, new_n_keys,
- added_indexes, dropped_index_ids, n_added_keys, n_dropped_keys);
-
+ added_indexes, dropped_index_ids, n_added_keys, n_dropped_keys,
+ max_auto_incr);
DBUG_RETURN(HA_EXIT_SUCCESS);
}
@@ -11349,9 +11874,10 @@ int ha_rocksdb::inplace_populate_sk(
bool is_unique_index =
new_table_arg->key_info[index->get_keyno()].flags & HA_NOSAME;
- Rdb_index_merge rdb_merge(
- thd_rocksdb_tmpdir(), rdb_merge_buf_size, rdb_merge_combine_read_size,
- rdb_merge_tmp_file_removal_delay, index->get_cf());
+ Rdb_index_merge rdb_merge(tx->get_rocksdb_tmpdir(), rdb_merge_buf_size,
+ rdb_merge_combine_read_size,
+ rdb_merge_tmp_file_removal_delay,
+ index->get_cf());
if ((res = rdb_merge.init())) {
DBUG_RETURN(res);
@@ -11655,6 +12181,29 @@ bool ha_rocksdb::commit_inplace_alter_table(
rdb_drop_idx_thread.signal();
}
+ if (ha_alter_info->handler_flags &
+ (my_core::Alter_inplace_info::CHANGE_CREATE_OPTION)) {
+ const std::unique_ptr<rocksdb::WriteBatch> wb = dict_manager.begin();
+ rocksdb::WriteBatch *const batch = wb.get();
+ std::unordered_set<GL_INDEX_ID> create_index_ids;
+
+ ulonglong auto_incr_val = ha_alter_info->create_info->auto_increment_value;
+
+ for (inplace_alter_handler_ctx **pctx = ctx_array; *pctx; pctx++) {
+ Rdb_inplace_alter_ctx *const ctx =
+ static_cast<Rdb_inplace_alter_ctx *>(*pctx);
+ auto_incr_val = std::max(auto_incr_val, ctx->m_max_auto_incr);
+ dict_manager.put_auto_incr_val(
+ batch, ctx->m_new_tdef->get_autoincr_gl_index_id(), auto_incr_val,
+ true /* overwrite */);
+ ctx->m_new_tdef->m_auto_incr_val = auto_incr_val;
+ }
+
+ if (dict_manager.commit(batch)) {
+ DBUG_ASSERT(0);
+ }
+ }
+
DBUG_RETURN(HA_EXIT_SUCCESS);
}
@@ -11682,15 +12231,29 @@ struct rocksdb_status_counters_t {
uint64_t block_cache_miss;
uint64_t block_cache_hit;
uint64_t block_cache_add;
+ uint64_t block_cache_add_failures;
uint64_t block_cache_index_miss;
uint64_t block_cache_index_hit;
+ uint64_t block_cache_index_add;
+ uint64_t block_cache_index_bytes_insert;
+ uint64_t block_cache_index_bytes_evict;
uint64_t block_cache_filter_miss;
uint64_t block_cache_filter_hit;
+ uint64_t block_cache_filter_add;
+ uint64_t block_cache_filter_bytes_insert;
+ uint64_t block_cache_filter_bytes_evict;
+ uint64_t block_cache_bytes_read;
+ uint64_t block_cache_bytes_write;
+ uint64_t block_cache_data_bytes_insert;
uint64_t block_cache_data_miss;
uint64_t block_cache_data_hit;
+ uint64_t block_cache_data_add;
uint64_t bloom_filter_useful;
uint64_t memtable_hit;
uint64_t memtable_miss;
+ uint64_t get_hit_l0;
+ uint64_t get_hit_l1;
+ uint64_t get_hit_l2_and_up;
uint64_t compaction_key_drop_new;
uint64_t compaction_key_drop_obsolete;
uint64_t compaction_key_drop_user;
@@ -11699,11 +12262,17 @@ struct rocksdb_status_counters_t {
uint64_t number_keys_updated;
uint64_t bytes_written;
uint64_t bytes_read;
+ uint64_t number_db_seek;
+ uint64_t number_db_seek_found;
+ uint64_t number_db_next;
+ uint64_t number_db_next_found;
+ uint64_t number_db_prev;
+ uint64_t number_db_prev_found;
+ uint64_t iter_bytes_read;
uint64_t no_file_closes;
uint64_t no_file_opens;
uint64_t no_file_errors;
uint64_t stall_micros;
- uint64_t rate_limit_delay_millis;
uint64_t num_iterators;
uint64_t number_multiget_get;
uint64_t number_multiget_keys_read;
@@ -11736,15 +12305,29 @@ static rocksdb_status_counters_t rocksdb_status_counters;
DEF_SHOW_FUNC(block_cache_miss, BLOCK_CACHE_MISS)
DEF_SHOW_FUNC(block_cache_hit, BLOCK_CACHE_HIT)
DEF_SHOW_FUNC(block_cache_add, BLOCK_CACHE_ADD)
+DEF_SHOW_FUNC(block_cache_add_failures, BLOCK_CACHE_ADD_FAILURES)
DEF_SHOW_FUNC(block_cache_index_miss, BLOCK_CACHE_INDEX_MISS)
DEF_SHOW_FUNC(block_cache_index_hit, BLOCK_CACHE_INDEX_HIT)
+DEF_SHOW_FUNC(block_cache_index_add, BLOCK_CACHE_INDEX_ADD)
+DEF_SHOW_FUNC(block_cache_index_bytes_insert, BLOCK_CACHE_INDEX_BYTES_INSERT)
+DEF_SHOW_FUNC(block_cache_index_bytes_evict, BLOCK_CACHE_INDEX_BYTES_EVICT)
DEF_SHOW_FUNC(block_cache_filter_miss, BLOCK_CACHE_FILTER_MISS)
DEF_SHOW_FUNC(block_cache_filter_hit, BLOCK_CACHE_FILTER_HIT)
+DEF_SHOW_FUNC(block_cache_filter_add, BLOCK_CACHE_FILTER_ADD)
+DEF_SHOW_FUNC(block_cache_filter_bytes_insert, BLOCK_CACHE_FILTER_BYTES_INSERT)
+DEF_SHOW_FUNC(block_cache_filter_bytes_evict, BLOCK_CACHE_FILTER_BYTES_EVICT)
+DEF_SHOW_FUNC(block_cache_bytes_read, BLOCK_CACHE_BYTES_READ)
+DEF_SHOW_FUNC(block_cache_bytes_write, BLOCK_CACHE_BYTES_WRITE)
+DEF_SHOW_FUNC(block_cache_data_bytes_insert, BLOCK_CACHE_DATA_BYTES_INSERT)
DEF_SHOW_FUNC(block_cache_data_miss, BLOCK_CACHE_DATA_MISS)
DEF_SHOW_FUNC(block_cache_data_hit, BLOCK_CACHE_DATA_HIT)
+DEF_SHOW_FUNC(block_cache_data_add, BLOCK_CACHE_DATA_ADD)
DEF_SHOW_FUNC(bloom_filter_useful, BLOOM_FILTER_USEFUL)
DEF_SHOW_FUNC(memtable_hit, MEMTABLE_HIT)
DEF_SHOW_FUNC(memtable_miss, MEMTABLE_MISS)
+DEF_SHOW_FUNC(get_hit_l0, GET_HIT_L0)
+DEF_SHOW_FUNC(get_hit_l1, GET_HIT_L1)
+DEF_SHOW_FUNC(get_hit_l2_and_up, GET_HIT_L2_AND_UP)
DEF_SHOW_FUNC(compaction_key_drop_new, COMPACTION_KEY_DROP_NEWER_ENTRY)
DEF_SHOW_FUNC(compaction_key_drop_obsolete, COMPACTION_KEY_DROP_OBSOLETE)
DEF_SHOW_FUNC(compaction_key_drop_user, COMPACTION_KEY_DROP_USER)
@@ -11753,11 +12336,17 @@ DEF_SHOW_FUNC(number_keys_read, NUMBER_KEYS_READ)
DEF_SHOW_FUNC(number_keys_updated, NUMBER_KEYS_UPDATED)
DEF_SHOW_FUNC(bytes_written, BYTES_WRITTEN)
DEF_SHOW_FUNC(bytes_read, BYTES_READ)
+DEF_SHOW_FUNC(number_db_seek, NUMBER_DB_SEEK)
+DEF_SHOW_FUNC(number_db_seek_found, NUMBER_DB_SEEK_FOUND)
+DEF_SHOW_FUNC(number_db_next, NUMBER_DB_NEXT)
+DEF_SHOW_FUNC(number_db_next_found, NUMBER_DB_NEXT_FOUND)
+DEF_SHOW_FUNC(number_db_prev, NUMBER_DB_PREV)
+DEF_SHOW_FUNC(number_db_prev_found, NUMBER_DB_PREV_FOUND)
+DEF_SHOW_FUNC(iter_bytes_read, ITER_BYTES_READ)
DEF_SHOW_FUNC(no_file_closes, NO_FILE_CLOSES)
DEF_SHOW_FUNC(no_file_opens, NO_FILE_OPENS)
DEF_SHOW_FUNC(no_file_errors, NO_FILE_ERRORS)
DEF_SHOW_FUNC(stall_micros, STALL_MICROS)
-DEF_SHOW_FUNC(rate_limit_delay_millis, RATE_LIMIT_DELAY_MILLIS)
DEF_SHOW_FUNC(num_iterators, NO_ITERATORS)
DEF_SHOW_FUNC(number_multiget_get, NUMBER_MULTIGET_CALLS)
DEF_SHOW_FUNC(number_multiget_keys_read, NUMBER_MULTIGET_KEYS_READ)
@@ -11791,6 +12380,7 @@ static void myrocks_update_status() {
export_stats.rows_updated = global_stats.rows[ROWS_UPDATED];
export_stats.rows_deleted_blind = global_stats.rows[ROWS_DELETED_BLIND];
export_stats.rows_expired = global_stats.rows[ROWS_EXPIRED];
+ export_stats.rows_filtered = global_stats.rows[ROWS_FILTERED];
export_stats.system_rows_deleted = global_stats.system_rows[ROWS_DELETED];
export_stats.system_rows_inserted = global_stats.system_rows[ROWS_INSERTED];
@@ -11829,6 +12419,8 @@ static SHOW_VAR myrocks_status_variables[] = {
SHOW_LONGLONG),
DEF_STATUS_VAR_FUNC("rows_expired", &export_stats.rows_expired,
SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("rows_filtered", &export_stats.rows_filtered,
+ SHOW_LONGLONG),
DEF_STATUS_VAR_FUNC("system_rows_deleted",
&export_stats.system_rows_deleted, SHOW_LONGLONG),
DEF_STATUS_VAR_FUNC("system_rows_inserted",
@@ -11947,15 +12539,29 @@ static SHOW_VAR rocksdb_status_vars[] = {
DEF_STATUS_VAR(block_cache_miss),
DEF_STATUS_VAR(block_cache_hit),
DEF_STATUS_VAR(block_cache_add),
+ DEF_STATUS_VAR(block_cache_add_failures),
DEF_STATUS_VAR(block_cache_index_miss),
DEF_STATUS_VAR(block_cache_index_hit),
+ DEF_STATUS_VAR(block_cache_index_add),
+ DEF_STATUS_VAR(block_cache_index_bytes_insert),
+ DEF_STATUS_VAR(block_cache_index_bytes_evict),
DEF_STATUS_VAR(block_cache_filter_miss),
DEF_STATUS_VAR(block_cache_filter_hit),
+ DEF_STATUS_VAR(block_cache_filter_add),
+ DEF_STATUS_VAR(block_cache_filter_bytes_insert),
+ DEF_STATUS_VAR(block_cache_filter_bytes_evict),
+ DEF_STATUS_VAR(block_cache_bytes_read),
+ DEF_STATUS_VAR(block_cache_bytes_write),
+ DEF_STATUS_VAR(block_cache_data_bytes_insert),
DEF_STATUS_VAR(block_cache_data_miss),
DEF_STATUS_VAR(block_cache_data_hit),
+ DEF_STATUS_VAR(block_cache_data_add),
DEF_STATUS_VAR(bloom_filter_useful),
DEF_STATUS_VAR(memtable_hit),
DEF_STATUS_VAR(memtable_miss),
+ DEF_STATUS_VAR(get_hit_l0),
+ DEF_STATUS_VAR(get_hit_l1),
+ DEF_STATUS_VAR(get_hit_l2_and_up),
DEF_STATUS_VAR(compaction_key_drop_new),
DEF_STATUS_VAR(compaction_key_drop_obsolete),
DEF_STATUS_VAR(compaction_key_drop_user),
@@ -11964,11 +12570,17 @@ static SHOW_VAR rocksdb_status_vars[] = {
DEF_STATUS_VAR(number_keys_updated),
DEF_STATUS_VAR(bytes_written),
DEF_STATUS_VAR(bytes_read),
+ DEF_STATUS_VAR(number_db_seek),
+ DEF_STATUS_VAR(number_db_seek_found),
+ DEF_STATUS_VAR(number_db_next),
+ DEF_STATUS_VAR(number_db_next_found),
+ DEF_STATUS_VAR(number_db_prev),
+ DEF_STATUS_VAR(number_db_prev_found),
+ DEF_STATUS_VAR(iter_bytes_read),
DEF_STATUS_VAR(no_file_closes),
DEF_STATUS_VAR(no_file_opens),
DEF_STATUS_VAR(no_file_errors),
DEF_STATUS_VAR(stall_micros),
- DEF_STATUS_VAR(rate_limit_delay_millis),
DEF_STATUS_VAR(num_iterators),
DEF_STATUS_VAR(number_multiget_get),
DEF_STATUS_VAR(number_multiget_keys_read),
@@ -11994,12 +12606,14 @@ static SHOW_VAR rocksdb_status_vars[] = {
DEF_STATUS_VAR(number_superversion_releases),
DEF_STATUS_VAR(number_superversion_cleanups),
DEF_STATUS_VAR(number_block_not_compressed),
+ DEF_STATUS_VAR_PTR("row_lock_deadlocks", &rocksdb_row_lock_deadlocks,
+ SHOW_LONGLONG),
+ DEF_STATUS_VAR_PTR("row_lock_wait_timeouts",
+ &rocksdb_row_lock_wait_timeouts, SHOW_LONGLONG),
DEF_STATUS_VAR_PTR("snapshot_conflict_errors",
&rocksdb_snapshot_conflict_errors, SHOW_LONGLONG),
DEF_STATUS_VAR_PTR("wal_group_syncs", &rocksdb_wal_group_syncs,
SHOW_LONGLONG),
- DEF_STATUS_VAR_PTR("number_stat_computes", &rocksdb_number_stat_computes,
- SHOW_LONGLONG),
DEF_STATUS_VAR_PTR("number_sst_entry_put", &rocksdb_num_sst_entry_put,
SHOW_LONGLONG),
DEF_STATUS_VAR_PTR("number_sst_entry_delete", &rocksdb_num_sst_entry_delete,
@@ -12066,8 +12680,8 @@ void Rdb_background_thread::run() {
// InnoDB's behavior. For mode never, the wal file isn't even written,
// whereas background writes to the wal file, but issues the syncs in a
// background thread.
- if (rdb && (rocksdb_flush_log_at_trx_commit != FLUSH_LOG_SYNC)) {
- DBUG_ASSERT(!rocksdb_db_options->allow_mmap_writes);
+ if (rdb && (rocksdb_flush_log_at_trx_commit != FLUSH_LOG_SYNC) &&
+ !rocksdb_db_options->allow_mmap_writes) {
const rocksdb::Status s = rdb->FlushWAL(true);
if (!s.ok()) {
rdb_handle_io_error(s, RDB_IO_ERROR_BG_THREAD);
@@ -12079,6 +12693,16 @@ void Rdb_background_thread::run() {
ddl_manager.persist_stats();
}
+bool ha_rocksdb::check_bloom_and_set_bounds(THD *thd, const Rdb_key_def &kd,
+ const rocksdb::Slice &eq_cond,
+ const bool use_all_keys) {
+ bool can_use_bloom = can_use_bloom_filter(thd, kd, eq_cond, use_all_keys);
+ if (!can_use_bloom) {
+ setup_iterator_bounds(kd, eq_cond);
+ }
+ return can_use_bloom;
+}
+
/**
Deciding if it is possible to use bloom filter or not.
@@ -12097,9 +12721,9 @@ void Rdb_background_thread::run() {
@param use_all_keys True if all key parts are set with equal conditions.
This is aware of extended keys.
*/
-bool can_use_bloom_filter(THD *thd, const Rdb_key_def &kd,
- const rocksdb::Slice &eq_cond,
- const bool use_all_keys) {
+bool ha_rocksdb::can_use_bloom_filter(THD *thd, const Rdb_key_def &kd,
+ const rocksdb::Slice &eq_cond,
+ const bool use_all_keys) {
bool can_use = false;
if (THDVAR(thd, skip_bloom_filter_on_read)) {
@@ -12237,7 +12861,7 @@ void rdb_handle_io_error(const rocksdb::Status status,
rdb_log_status_error(status, "failed to write to WAL");
/* NO_LINT_DEBUG */
sql_print_error("MyRocks: aborting on WAL write error.");
- abort_with_stack_traces();
+ abort();
break;
}
case RDB_IO_ERROR_BG_THREAD: {
@@ -12248,7 +12872,7 @@ void rdb_handle_io_error(const rocksdb::Status status,
rdb_log_status_error(status, "failed on I/O");
/* NO_LINT_DEBUG */
sql_print_error("MyRocks: aborting on I/O error.");
- abort_with_stack_traces();
+ abort();
break;
}
default:
@@ -12257,16 +12881,17 @@ void rdb_handle_io_error(const rocksdb::Status status,
}
} else if (status.IsCorruption()) {
rdb_log_status_error(status, "data corruption detected!");
+ rdb_persist_corruption_marker();
/* NO_LINT_DEBUG */
sql_print_error("MyRocks: aborting because of data corruption.");
- abort_with_stack_traces();
+ abort();
} else if (!status.ok()) {
switch (err_type) {
case RDB_IO_ERROR_DICT_COMMIT: {
rdb_log_status_error(status, "Failed to write to WAL (dictionary)");
/* NO_LINT_DEBUG */
sql_print_error("MyRocks: aborting on WAL write error.");
- abort_with_stack_traces();
+ abort();
break;
}
default:
@@ -12423,11 +13048,43 @@ void rocksdb_set_collation_exception_list(THD *const thd,
*static_cast<const char**>(var_ptr) = val_copy;
}
-void rocksdb_set_bulk_load(THD *const thd, struct st_mysql_sys_var *const var
- MY_ATTRIBUTE((__unused__)),
- void *const var_ptr, const void *const save) {
- Rdb_transaction *&tx = get_tx_from_thd(thd);
+int mysql_value_to_bool(struct st_mysql_value *value, my_bool *return_value) {
+ int new_value_type = value->value_type(value);
+ if (new_value_type == MYSQL_VALUE_TYPE_STRING) {
+ char buf[16];
+ int len = sizeof(buf);
+ const char *str = value->val_str(value, buf, &len);
+ if (str && (my_strcasecmp(system_charset_info, "true", str) == 0 ||
+ my_strcasecmp(system_charset_info, "on", str) == 0)) {
+ *return_value = TRUE;
+ } else if (str && (my_strcasecmp(system_charset_info, "false", str) == 0 ||
+ my_strcasecmp(system_charset_info, "off", str) == 0)) {
+ *return_value = FALSE;
+ } else {
+ return 1;
+ }
+ } else if (new_value_type == MYSQL_VALUE_TYPE_INT) {
+ long long intbuf;
+ value->val_int(value, &intbuf);
+ if (intbuf > 1)
+ return 1;
+ *return_value = intbuf > 0 ? TRUE : FALSE;
+ } else {
+ return 1;
+ }
+
+ return 0;
+}
+
+int rocksdb_check_bulk_load(
+ THD *const thd, struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)),
+ void *save, struct st_mysql_value *value) {
+ my_bool new_value;
+ if (mysql_value_to_bool(value, &new_value) != 0) {
+ return 1;
+ }
+ Rdb_transaction *&tx = get_tx_from_thd(thd);
if (tx != nullptr) {
const int rc = tx->finish_bulk_load();
if (rc != 0) {
@@ -12435,30 +13092,32 @@ void rocksdb_set_bulk_load(THD *const thd, struct st_mysql_sys_var *const var
sql_print_error("RocksDB: Error %d finalizing last SST file while "
"setting bulk loading variable",
rc);
- /*
- MariaDB doesn't do the following:
- abort_with_stack_traces();
- because it doesn't seem a good idea to crash a server when a user makes
- a mistake.
- Instead, we return an error to the user. The error has already been
- produced inside ha_rocksdb::finalize_bulk_load().
- */
+ THDVAR(thd, bulk_load) = 0;
+ return 1;
}
}
- *static_cast<bool *>(var_ptr) = *static_cast<const bool *>(save);
+ *static_cast<bool *>(save) = new_value;
+ return 0;
}
-void rocksdb_set_bulk_load_allow_unsorted(
- THD *const thd,
- struct st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)),
- void *const var_ptr, const void *const save) {
+int rocksdb_check_bulk_load_allow_unsorted(
+ THD *const thd, struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)),
+ void *save, struct st_mysql_value *value) {
+ my_bool new_value;
+ if (mysql_value_to_bool(value, &new_value) != 0) {
+ return 1;
+ }
+
if (THDVAR(thd, bulk_load)) {
my_error(ER_ERROR_WHEN_EXECUTING_COMMAND, MYF(0), "SET",
"Cannot change this setting while bulk load is enabled");
- } else {
- *static_cast<bool *>(var_ptr) = *static_cast<const bool *>(save);
+
+ return 1;
}
+
+ *static_cast<bool *>(save) = new_value;
+ return 0;
}
static void rocksdb_set_max_background_jobs(THD *thd,
@@ -12489,35 +13148,116 @@ static void rocksdb_set_max_background_jobs(THD *thd,
RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex);
}
-void rocksdb_set_update_cf_options(THD *const /* unused */,
- struct st_mysql_sys_var *const /* unused */,
- void *const var_ptr,
- const void *const save) {
+static void rocksdb_set_bytes_per_sync(
+ THD *thd MY_ATTRIBUTE((__unused__)),
+ struct st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)),
+ void *const var_ptr MY_ATTRIBUTE((__unused__)), const void *const save) {
+ DBUG_ASSERT(save != nullptr);
+ DBUG_ASSERT(rocksdb_db_options != nullptr);
+ DBUG_ASSERT(rocksdb_db_options->env != nullptr);
+
+ RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex);
+
+ const ulonglong new_val = *static_cast<const ulonglong *>(save);
+
+ if (rocksdb_db_options->bytes_per_sync != new_val) {
+ rocksdb_db_options->bytes_per_sync = new_val;
+ rocksdb::Status s =
+ rdb->SetDBOptions({{"bytes_per_sync", std::to_string(new_val)}});
+
+ if (!s.ok()) {
+ /* NO_LINT_DEBUG */
+ sql_print_warning("MyRocks: failed to update max_background_jobs. "
+ "Status code = %d, status = %s.",
+ s.code(), s.ToString().c_str());
+ }
+ }
+
+ RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex);
+}
+
+static void rocksdb_set_wal_bytes_per_sync(
+ THD *thd MY_ATTRIBUTE((__unused__)),
+ struct st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)),
+ void *const var_ptr MY_ATTRIBUTE((__unused__)), const void *const save) {
+ DBUG_ASSERT(save != nullptr);
+ DBUG_ASSERT(rocksdb_db_options != nullptr);
+ DBUG_ASSERT(rocksdb_db_options->env != nullptr);
+
+ RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex);
+
+ const ulonglong new_val = *static_cast<const ulonglong *>(save);
+
+ if (rocksdb_db_options->wal_bytes_per_sync != new_val) {
+ rocksdb_db_options->wal_bytes_per_sync = new_val;
+ rocksdb::Status s =
+ rdb->SetDBOptions({{"wal_bytes_per_sync", std::to_string(new_val)}});
+
+ if (!s.ok()) {
+ /* NO_LINT_DEBUG */
+ sql_print_warning("MyRocks: failed to update max_background_jobs. "
+ "Status code = %d, status = %s.",
+ s.code(), s.ToString().c_str());
+ }
+ }
+
+ RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex);
+}
+
+static int
+rocksdb_validate_update_cf_options(THD * /* unused */,
+ struct st_mysql_sys_var * /*unused*/,
+ void *save, struct st_mysql_value *value) {
+
+ char buff[STRING_BUFFER_USUAL_SIZE];
+ const char *str;
+ int length;
+ length = sizeof(buff);
+ str = value->val_str(value, buff, &length);
+ *(const char **)save = str;
+
+ if (str == nullptr) {
+ return HA_EXIT_SUCCESS;
+ }
+
+ Rdb_cf_options::Name_to_config_t option_map;
+
+ // Basic sanity checking and parsing the options into a map. If this fails
+ // then there's no point to proceed.
+ if (!Rdb_cf_options::parse_cf_options(str, &option_map)) {
+ my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), "rocksdb_update_cf_options", str);
+ return HA_EXIT_FAILURE;
+ }
+ return HA_EXIT_SUCCESS;
+}
+
+static void
+rocksdb_set_update_cf_options(THD *const /* unused */,
+ struct st_mysql_sys_var *const /* unused */,
+ void *const var_ptr, const void *const save) {
const char *const val = *static_cast<const char *const *>(save);
+ RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex);
+
if (!val) {
- // NO_LINT_DEBUG
- sql_print_warning("MyRocks: NULL is not a valid option for updates to "
- "column family settings.");
+ *reinterpret_cast<char **>(var_ptr) = nullptr;
+ RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex);
return;
}
- RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex);
-
DBUG_ASSERT(val != nullptr);
+ // Reset the pointers regardless of how much success we had with updating
+ // the CF options. This will results in consistent behavior and avoids
+ // dealing with cases when only a subset of CF-s was successfully updated.
+ *reinterpret_cast<char **>(var_ptr) = my_strdup(val, MYF(0));
+
// Do the real work of applying the changes.
Rdb_cf_options::Name_to_config_t option_map;
- // Basic sanity checking and parsing the options into a map. If this fails
- // then there's no point to proceed.
+ // This should never fail, because of rocksdb_validate_update_cf_options
if (!Rdb_cf_options::parse_cf_options(val, &option_map)) {
my_free(*reinterpret_cast<char**>(var_ptr));
- *reinterpret_cast<char**>(var_ptr) = nullptr;
-
- // NO_LINT_DEBUG
- sql_print_warning("MyRocks: failed to parse the updated column family "
- "options = '%s'.", val);
RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex);
return;
}
@@ -12577,16 +13317,6 @@ void rocksdb_set_update_cf_options(THD *const /* unused */,
}
}
- // Reset the pointers regardless of how much success we had with updating
- // the CF options. This will results in consistent behavior and avoids
- // dealing with cases when only a subset of CF-s was successfully updated.
- if (val) {
- my_free(*reinterpret_cast<char**>(var_ptr));
- *reinterpret_cast<char**>(var_ptr) = my_strdup(val, MYF(0));
- } else {
- *reinterpret_cast<char**>(var_ptr) = nullptr;
- }
-
// Our caller (`plugin_var_memalloc_global_update`) will call `my_free` to
// free up resources used before.
@@ -12655,6 +13385,12 @@ double ha_rocksdb::read_time(uint index, uint ranges, ha_rows rows) {
DBUG_RETURN((rows / 20.0) + 1);
}
+std::string rdb_corruption_marker_file_name() {
+ std::string ret(rocksdb_datadir);
+ ret.append("/ROCKSDB_CORRUPTED");
+ return ret;
+}
+
void sql_print_verbose_info(const char *format, ...)
{
va_list args;
@@ -12712,5 +13448,6 @@ maria_declare_plugin(rocksdb_se){
myrocks::rdb_i_s_cfoptions, myrocks::rdb_i_s_compact_stats,
myrocks::rdb_i_s_global_info, myrocks::rdb_i_s_ddl,
myrocks::rdb_i_s_index_file_map, myrocks::rdb_i_s_lock_info,
- myrocks::rdb_i_s_trx_info
+ myrocks::rdb_i_s_trx_info,
+ myrocks::rdb_i_s_deadlock_info
maria_declare_plugin_end;
diff --git a/storage/rocksdb/ha_rocksdb.h b/storage/rocksdb/ha_rocksdb.h
index d929ca15093..0b008a64390 100644
--- a/storage/rocksdb/ha_rocksdb.h
+++ b/storage/rocksdb/ha_rocksdb.h
@@ -41,6 +41,7 @@
/* RocksDB header files */
#include "rocksdb/cache.h"
+#include "rocksdb/merge_operator.h"
#include "rocksdb/perf_context.h"
#include "rocksdb/sst_file_manager.h"
#include "rocksdb/statistics.h"
@@ -93,6 +94,25 @@ struct Rdb_trx_info {
std::vector<Rdb_trx_info> rdb_get_all_trx_info();
/*
+ * class for exporting deadlock transaction information for
+ * information_schema.rocksdb_deadlock
+ */
+struct Rdb_deadlock_info {
+ struct Rdb_dl_trx_info {
+ ulonglong trx_id;
+ std::string cf_name;
+ std::string waiting_key;
+ bool exclusive_lock;
+ std::string index_name;
+ std::string table_name;
+ };
+ std::vector <Rdb_dl_trx_info> path;
+ ulonglong victim_trx_id;
+};
+
+std::vector<Rdb_deadlock_info> rdb_get_deadlock_info();
+
+/*
This is
- the name of the default Column Family (the CF which stores indexes which
didn't explicitly specify which CF they are in)
@@ -250,6 +270,8 @@ enum collations_used {
*/
#define ROCKSDB_SIZEOF_TTL_RECORD sizeof(longlong)
+#define ROCKSDB_SIZEOF_AUTOINC_VALUE sizeof(longlong)
+
/*
Maximum index prefix length in bytes.
*/
@@ -367,6 +389,7 @@ enum operation_type : int {
ROWS_UPDATED,
ROWS_DELETED_BLIND,
ROWS_EXPIRED,
+ ROWS_FILTERED,
ROWS_HIDDEN_NO_SNAPSHOT,
ROWS_MAX
};
@@ -400,6 +423,7 @@ struct st_export_stats {
ulonglong rows_updated;
ulonglong rows_deleted_blind;
ulonglong rows_expired;
+ ulonglong rows_filtered;
ulonglong rows_hidden_no_snapshot;
ulonglong system_rows_deleted;
@@ -538,6 +562,12 @@ class ha_rocksdb : public my_core::handler {
uchar *m_dup_sk_packed_tuple;
uchar *m_dup_sk_packed_tuple_old;
+ /* Buffers used for passing upper/bound eq conditions. */
+ uchar *m_eq_cond_lower_bound;
+ uchar *m_eq_cond_upper_bound;
+ rocksdb::Slice m_eq_cond_lower_bound_slice;
+ rocksdb::Slice m_eq_cond_upper_bound_slice;
+
/*
Temporary space for packing VARCHARs (we provide it to
pack_record()/pack_index_tuple() calls).
@@ -567,7 +597,7 @@ class ha_rocksdb : public my_core::handler {
This is used only when we get the record with rocksdb's Get() call (The
other option is when we get a rocksdb::Slice from an iterator)
*/
- std::string m_retrieved_record;
+ rocksdb::PinnableSlice m_retrieved_record;
/* Type of locking to apply to rows */
enum { RDB_LOCK_NONE, RDB_LOCK_READ, RDB_LOCK_WRITE } m_lock_rows;
@@ -600,12 +630,7 @@ class ha_rocksdb : public my_core::handler {
bool m_update_scope_is_valid;
/* SST information used for bulk loading the primary key */
- std::unique_ptr<Rdb_sst_info> m_sst_info;
- /* External merge sorts for bulk load: key ID -> merge sort instance */
- std::unordered_map<GL_INDEX_ID, Rdb_index_merge> m_key_merge;
- Rdb_transaction *m_bulk_load_tx;
- /* Mutex to protect finalizing bulk load */
- mysql_mutex_t m_bulk_load_mutex;
+ std::shared_ptr<Rdb_sst_info> m_sst_info;
/*
MySQL index number for duplicate key error
@@ -620,13 +645,17 @@ class ha_rocksdb : public my_core::handler {
int secondary_index_read(const int keyno, uchar *const buf)
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
void setup_iterator_for_rnd_scan();
- void setup_scan_iterator(const Rdb_key_def &kd, rocksdb::Slice *const slice)
- MY_ATTRIBUTE((__nonnull__)) {
- setup_scan_iterator(kd, slice, false, 0);
- }
bool is_ascending(const Rdb_key_def &keydef,
enum ha_rkey_function find_flag) const
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+ void setup_iterator_bounds(const Rdb_key_def &kd,
+ const rocksdb::Slice &eq_cond);
+ bool can_use_bloom_filter(THD *thd, const Rdb_key_def &kd,
+ const rocksdb::Slice &eq_cond,
+ const bool use_all_keys);
+ bool check_bloom_and_set_bounds(THD *thd, const Rdb_key_def &kd,
+ const rocksdb::Slice &eq_cond,
+ const bool use_all_keys);
void setup_scan_iterator(const Rdb_key_def &kd, rocksdb::Slice *slice,
const bool use_all_keys, const uint eq_cond_len)
MY_ATTRIBUTE((__nonnull__));
@@ -635,7 +664,8 @@ class ha_rocksdb : public my_core::handler {
rocksdb::Status
get_for_update(Rdb_transaction *const tx,
rocksdb::ColumnFamilyHandle *const column_family,
- const rocksdb::Slice &key, std::string *const value) const;
+ const rocksdb::Slice &key,
+ rocksdb::PinnableSlice *value) const;
int get_row_by_rowid(uchar *const buf, const char *const rowid,
const uint rowid_size, const bool skip_lookup = false,
@@ -649,8 +679,13 @@ class ha_rocksdb : public my_core::handler {
rowid_size, skip_lookup, skip_ttl_check);
}
- void update_auto_incr_val();
void load_auto_incr_value();
+ ulonglong load_auto_incr_value_from_index();
+ void update_auto_incr_val(ulonglong val);
+ void update_auto_incr_val_from_field();
+ rocksdb::Status get_datadic_auto_incr(Rdb_transaction *const tx,
+ const GL_INDEX_ID &gl_index_id,
+ ulonglong *new_val) const;
longlong update_hidden_pk_val();
int load_hidden_pk_value() MY_ATTRIBUTE((__warn_unused_result__));
int read_hidden_pk_id_from_rowkey(longlong *const hidden_pk_id)
@@ -696,6 +731,12 @@ class ha_rocksdb : public my_core::handler {
*/
std::vector<READ_FIELD> m_decoders_vect;
+ /*
+ This tells if any field which is part of the key needs to be unpacked and
+ decoded.
+ */
+ bool m_key_requested = false;
+
/* Setup field_decoders based on type of scan and table->read_set */
void setup_read_decoders();
@@ -766,9 +807,12 @@ public:
my_core::TABLE_SHARE *const table_arg);
~ha_rocksdb() {
int err MY_ATTRIBUTE((__unused__));
- err = finalize_bulk_load();
- DBUG_ASSERT(err == 0);
- mysql_mutex_destroy(&m_bulk_load_mutex);
+ err = finalize_bulk_load(false);
+ if (err != 0) {
+ sql_print_error("RocksDB: Error %d finalizing bulk load while closing "
+ "handler.",
+ err);
+ }
}
/** @brief
@@ -794,10 +838,9 @@ public:
*/
/*
- See if this is the same base table - this should only be true for different
- partitions of the same table.
+ Returns the name of the table's base name
*/
- bool same_table(const ha_rocksdb &other) const;
+ const std::string &get_table_basename() const;
/** @brief
This is a list of flags that indicate what functionality the storage engine
@@ -1210,8 +1253,6 @@ private:
Rdb_tbl_def *get_table_if_exists(const char *const tablename)
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
void read_thd_vars(THD *const thd) MY_ATTRIBUTE((__nonnull__));
- const char *thd_rocksdb_tmpdir()
- MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
bool contains_foreign_key(THD *const thd)
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
@@ -1221,6 +1262,9 @@ private:
const std::unordered_set<std::shared_ptr<Rdb_key_def>> &indexes)
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+ int finalize_bulk_load(bool print_client_error = true)
+ MY_ATTRIBUTE((__warn_unused_result__));
+
public:
int index_init(uint idx, bool sorted) override
MY_ATTRIBUTE((__warn_unused_result__));
@@ -1264,7 +1308,7 @@ public:
DBUG_ENTER_FUNC();
/* Free blob data */
- m_retrieved_record.clear();
+ m_retrieved_record.Reset();
DBUG_RETURN(HA_EXIT_SUCCESS);
}
@@ -1335,8 +1379,6 @@ public:
my_core::Alter_inplace_info *const ha_alter_info,
bool commit) override;
- int finalize_bulk_load() MY_ATTRIBUTE((__warn_unused_result__));
-
#ifdef MARIAROCKS_NOT_YET // MDEV-10976
void set_use_read_free_rpl(const char *const whitelist);
#endif
@@ -1391,18 +1433,22 @@ struct Rdb_inplace_alter_ctx : public my_core::inplace_alter_handler_ctx {
/* Stores number of keys to drop */
const uint m_n_dropped_keys;
+ /* Stores the largest current auto increment value in the index */
+ const ulonglong m_max_auto_incr;
+
Rdb_inplace_alter_ctx(
Rdb_tbl_def *new_tdef, std::shared_ptr<Rdb_key_def> *old_key_descr,
std::shared_ptr<Rdb_key_def> *new_key_descr, uint old_n_keys,
uint new_n_keys,
std::unordered_set<std::shared_ptr<Rdb_key_def>> added_indexes,
std::unordered_set<GL_INDEX_ID> dropped_index_ids, uint n_added_keys,
- uint n_dropped_keys)
+ uint n_dropped_keys, ulonglong max_auto_incr)
: my_core::inplace_alter_handler_ctx(), m_new_tdef(new_tdef),
m_old_key_descr(old_key_descr), m_new_key_descr(new_key_descr),
m_old_n_keys(old_n_keys), m_new_n_keys(new_n_keys),
m_added_indexes(added_indexes), m_dropped_index_ids(dropped_index_ids),
- m_n_added_keys(n_added_keys), m_n_dropped_keys(n_dropped_keys) {}
+ m_n_added_keys(n_added_keys), m_n_dropped_keys(n_dropped_keys),
+ m_max_auto_incr(max_auto_incr) {}
~Rdb_inplace_alter_ctx() {}
@@ -1412,6 +1458,9 @@ private:
Rdb_inplace_alter_ctx &operator=(const Rdb_inplace_alter_ctx &);
};
+// file name indicating RocksDB data corruption
+std::string rdb_corruption_marker_file_name();
+
const int MYROCKS_MARIADB_PLUGIN_MATURITY_LEVEL= MariaDB_PLUGIN_MATURITY_GAMMA;
extern bool prevent_myrocks_loading;
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/autoinc_crash_safe.inc b/storage/rocksdb/mysql-test/rocksdb/include/autoinc_crash_safe.inc
new file mode 100644
index 00000000000..ba2e7ace0c5
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/autoinc_crash_safe.inc
@@ -0,0 +1,150 @@
+--echo #
+--echo # Testing concurrent transactions.
+--echo #
+
+--source include/count_sessions.inc
+connect (con1,localhost,root,,);
+connect (con2,localhost,root,,);
+connect (con3,localhost,root,,);
+
+connection con1;
+begin;
+insert into t values (); # 1
+
+connection con2;
+begin;
+insert into t values (); # 2
+
+connection con3;
+begin;
+insert into t values (); # 3
+
+connection con1;
+insert into t values (); # 4
+
+connection con2;
+insert into t values (); # 5
+
+connection con3;
+insert into t values (); # 6
+
+connection con2;
+commit;
+
+connection con3;
+rollback;
+
+connection con1;
+commit;
+
+delete from t;
+
+--echo # Master value before restart
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+
+--echo # Slave value before restart
+sync_slave_with_master;
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+
+connection slave;
+--source include/stop_slave.inc
+--let $rpl_server_number = 1
+--source include/rpl_restart_server.inc
+
+connection default;
+--echo # Master value after restart
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+
+--let $rpl_server_number = 2
+--source include/rpl_restart_server.inc
+
+connection slave;
+--source include/start_slave.inc
+--echo # Slave value after restart
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+
+disconnect con1;
+disconnect con2;
+disconnect con3;
+--source include/wait_until_count_sessions.inc
+
+--echo #
+--echo # Testing interaction of merge markers with various DDL statements.
+--echo #
+connection slave;
+--source include/stop_slave.inc
+
+connection default;
+
+--echo # Drop and add primary key.
+alter table t modify i int;
+alter table t drop primary key;
+alter table t add primary key (i);
+alter table t modify i int auto_increment;
+
+--let $rpl_server_number = 1
+--source include/rpl_restart_server.inc
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+
+--echo # Remove auto_increment property.
+alter table t modify i int;
+--let $rpl_server_number = 1
+--source include/rpl_restart_server.inc
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+
+--echo # Add auto_increment property.
+insert into t values (123);
+alter table t modify i int auto_increment;
+--let $rpl_server_number = 1
+--source include/rpl_restart_server.inc
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+
+--echo # Add column j.
+alter table t add column j int;
+--let $rpl_server_number = 1
+--source include/rpl_restart_server.inc
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+
+--echo # Rename tables.
+rename table t to t2;
+rename table t2 to t;
+
+--let $rpl_server_number = 1
+--source include/rpl_restart_server.inc
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+
+--echo # Change auto_increment property
+alter table t auto_increment = 1000;
+--let $rpl_server_number = 1
+--source include/rpl_restart_server.inc
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+
+alter table t auto_increment = 1;
+--let $rpl_server_number = 1
+--source include/rpl_restart_server.inc
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+
+alter table t drop primary key, add key (i), auto_increment = 1;
+--let $rpl_server_number = 1
+--source include/rpl_restart_server.inc
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+
+alter table t add key (j), auto_increment = 1;
+--let $rpl_server_number = 1
+--source include/rpl_restart_server.inc
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+
+alter table t modify i int;
+alter table t add column (k int auto_increment), add key(k), auto_increment=15;
+--let $rpl_server_number = 1
+--source include/rpl_restart_server.inc
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+
+--echo # Drop table.
+drop table t;
+
+--let $rpl_server_number = 1
+--source include/rpl_restart_server.inc
+
+connection slave;
+--source include/start_slave.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load.inc b/storage/rocksdb/mysql-test/rocksdb/include/bulk_load.inc
index 87cb1f70f32..6472b969ce6 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load.inc
+++ b/storage/rocksdb/mysql-test/rocksdb/include/bulk_load.inc
@@ -1,6 +1,4 @@
---disable_warnings
-DROP TABLE IF EXISTS t1, t2, t3;
---enable_warnings
+--source include/count_sessions.inc
if ($data_order_desc)
{
@@ -20,7 +18,7 @@ eval CREATE TABLE t1(
b CHAR(30),
PRIMARY KEY(pk) COMMENT "$pk_cf",
KEY(a)
-) COLLATE 'latin1_bin';
+) ENGINE=ROCKSDB COLLATE 'latin1_bin';
# Create a second identical table to validate that bulk loading different
# tables in the same session works
@@ -30,7 +28,7 @@ eval CREATE TABLE t2(
b CHAR(30),
PRIMARY KEY(pk) COMMENT "$pk_cf",
KEY(a)
-) COLLATE 'latin1_bin';
+) ENGINE=ROCKSDB COLLATE 'latin1_bin';
# Create a third table using partitions to validate that bulk loading works
# across a partitioned table
@@ -40,7 +38,7 @@ eval CREATE TABLE t3(
b CHAR(30),
PRIMARY KEY(pk) COMMENT "$pk_cf",
KEY(a)
-) COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4;
+) ENGINE=ROCKSDB COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4;
--let $file = `SELECT CONCAT(@@datadir, "test_loadfile.txt")`
@@ -154,3 +152,5 @@ EOF
# Cleanup
disconnect other;
DROP TABLE t1, t2, t3;
+
+--source include/wait_until_count_sessions.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc b/storage/rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc
new file mode 100644
index 00000000000..4a3158e814c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc
@@ -0,0 +1,144 @@
+--source include/have_partition.inc
+--source include/count_sessions.inc
+
+SET rocksdb_bulk_load_size=3;
+SET rocksdb_bulk_load_allow_unsorted=1;
+
+### Test individual INSERTs ###
+
+# A table with only a PK won't have rows until the bulk load is finished
+eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf")
+ ENGINE=ROCKSDB;
+SET rocksdb_bulk_load=1;
+--disable_query_log
+let $sign = 1;
+let $max = 5;
+let $i = 1;
+while ($i <= $max) {
+ let $a = 1 + $sign * $i;
+ let $b = 1 - $sign * $i;
+ let $sign = -$sign;
+ let $insert = INSERT INTO t1 VALUES ($a, $b);
+ eval $insert;
+ inc $i;
+}
+--enable_query_log
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+DROP TABLE t1;
+
+# A table with a PK and a SK shows rows immediately
+eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf", KEY(b))
+ ENGINE=ROCKSDB;
+SET rocksdb_bulk_load=1;
+--disable_query_log
+let $sign = 1;
+let $max = 5;
+let $i = 1;
+while ($i <= $max) {
+ let $a = 1 + $sign * $i;
+ let $b = 1 - $sign * $i;
+ let $sign = -$sign;
+ let $insert = INSERT INTO t1 VALUES ($a, $b);
+ eval $insert;
+ inc $i;
+}
+--enable_query_log
+
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+DROP TABLE t1;
+
+# Inserting into another table finishes bulk load to the previous table
+eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf")
+ ENGINE=ROCKSDB;
+eval CREATE TABLE t2(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf")
+ ENGINE=ROCKSDB;
+
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES (1,1);
+INSERT INTO t2 VALUES (1,1);
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+INSERT INTO t1 VALUES (2,2);
+SELECT * FROM t2 FORCE INDEX (PRIMARY);
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+DROP TABLE t1, t2;
+
+### Test bulk load from a file ###
+eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf")
+ ENGINE=ROCKSDB;
+eval CREATE TABLE t2(a INT, b INT, PRIMARY KEY(b) COMMENT "$pk_cf")
+ ENGINE=ROCKSDB;
+eval CREATE TABLE t3(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf")
+ ENGINE=ROCKSDB PARTITION BY KEY() PARTITIONS 4;
+
+--let $file = `SELECT CONCAT(@@datadir, "test_loadfile.txt")`
+# Create a text file with data to import into the table.
+# PK and SK are not in any order
+--let ROCKSDB_INFILE = $file
+perl;
+my $fn = $ENV{'ROCKSDB_INFILE'};
+open(my $fh, '>', $fn) || die "perl open($fn): $!";
+my $max = 5000000;
+my $sign = 1;
+for (my $ii = 0; $ii < $max; $ii++)
+{
+ my $a = 1 + $sign * $ii;
+ my $b = 1 - $sign * $ii;
+ $sign = -$sign;
+ print $fh "$a\t$b\n";
+}
+close($fh);
+EOF
+--file_exists $file
+
+# Make sure a snapshot held by another user doesn't block the bulk load
+connect (other,localhost,root,,);
+set session transaction isolation level repeatable read;
+select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+start transaction with consistent snapshot;
+select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+
+connection default;
+set rocksdb_bulk_load=1;
+set rocksdb_bulk_load_size=100000;
+--disable_query_log
+--echo LOAD DATA INFILE <input_file> INTO TABLE t1;
+eval LOAD DATA INFILE '$file' INTO TABLE t1;
+--echo LOAD DATA INFILE <input_file> INTO TABLE t2;
+eval LOAD DATA INFILE '$file' INTO TABLE t2;
+--echo LOAD DATA INFILE <input_file> INTO TABLE t3;
+eval LOAD DATA INFILE '$file' INTO TABLE t3;
+--enable_query_log
+set rocksdb_bulk_load=0;
+
+--remove_file $file
+
+# Make sure row count index stats are correct
+--replace_column 6 # 7 # 8 # 9 #
+SHOW TABLE STATUS WHERE name LIKE 't%';
+
+ANALYZE TABLE t1, t2, t3;
+
+--replace_column 6 # 7 # 8 # 9 #
+SHOW TABLE STATUS WHERE name LIKE 't%';
+
+# Make sure all the data is there.
+select count(a) from t1;
+select count(b) from t1;
+select count(a) from t2;
+select count(b) from t2;
+select count(a) from t3;
+select count(b) from t3;
+
+SELECT * FROM t1 FORCE INDEX (PRIMARY) LIMIT 3;
+SELECT * FROM t2 FORCE INDEX (PRIMARY) LIMIT 3;
+
+disconnect other;
+DROP TABLE t1, t2, t3;
+
+--source include/wait_until_count_sessions.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/restart_mysqld_with_invalid_option.inc b/storage/rocksdb/mysql-test/rocksdb/include/restart_mysqld_with_invalid_option.inc
new file mode 100644
index 00000000000..8eef7ed2162
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/restart_mysqld_with_invalid_option.inc
@@ -0,0 +1,8 @@
+--source include/shutdown_mysqld.inc
+
+# Expect the server to fail to come up with these options
+--error 1
+--exec $MYSQLD_CMD --plugin_load=$HA_ROCKSDB_SO $_mysqld_option
+
+# Restart the server with the default options
+--source include/start_mysqld.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/start_mysqld_with_option.inc b/storage/rocksdb/mysql-test/rocksdb/include/start_mysqld_with_option.inc
new file mode 100644
index 00000000000..73e30b3e46c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/start_mysqld_with_option.inc
@@ -0,0 +1,14 @@
+# Include this script only after using shutdown_mysqld.inc
+# where $_expect_file_name was initialized.
+# Write file to make mysql-test-run.pl start up the server again
+--exec echo "restart:$_mysqld_option" > $_expect_file_name
+
+# Turn on reconnect
+--enable_reconnect
+
+# Call script that will poll the server waiting for it to be back online again
+--source include/wait_until_connected_again.inc
+
+# Turn off reconnect again
+--disable_reconnect
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_sstfilewriter.result b/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_sstfilewriter.result
index 0c3ad720194..18365338d0c 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_sstfilewriter.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_sstfilewriter.result
@@ -15,6 +15,10 @@ count(b)
300000
ALTER TABLE t1 ADD INDEX kb(b), ALGORITHM=INPLACE;
ALTER TABLE t1 ADD INDEX kb_copy(b), ALGORITHM=COPY;
+ERROR HY000: Status error 10 received from RocksDB: Operation aborted: Failed to acquire lock due to max_num_locks limit
+set session rocksdb_bulk_load=1;
+ALTER TABLE t1 ADD INDEX kb_copy(b), ALGORITHM=COPY;
+set session rocksdb_bulk_load=0;
SELECT COUNT(*) as c FROM
(SELECT COALESCE(LOWER(CONV(BIT_XOR(CAST(CRC32(CONCAT_WS('#', `b`, CONCAT(ISNULL(`b`)))) AS UNSIGNED)), 10, 16)), 0) AS crc FROM `t1` FORCE INDEX(`kb`)
UNION DISTINCT
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/allow_no_primary_key_with_sk.result b/storage/rocksdb/mysql-test/rocksdb/r/allow_no_primary_key_with_sk.result
index f8508febb01..5d947603ec5 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/allow_no_primary_key_with_sk.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/allow_no_primary_key_with_sk.result
@@ -778,3 +778,20 @@ set global rocksdb_force_flush_memtable_now = true;
select * from t1;
col1 col2 extra
DROP TABLE t1;
+create table t1 (i int auto_increment, key(i)) engine=rocksdb;
+insert into t1 values();
+insert into t1 values();
+insert into t1 values();
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) NOT NULL AUTO_INCREMENT,
+ KEY `i` (`i`)
+) ENGINE=ROCKSDB AUTO_INCREMENT=4 DEFAULT CHARSET=latin1
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) NOT NULL AUTO_INCREMENT,
+ KEY `i` (`i`)
+) ENGINE=ROCKSDB AUTO_INCREMENT=4 DEFAULT CHARSET=latin1
+drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/allow_to_start_after_corruption.result b/storage/rocksdb/mysql-test/rocksdb/r/allow_to_start_after_corruption.result
new file mode 100644
index 00000000000..9b5a335b6f8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/allow_to_start_after_corruption.result
@@ -0,0 +1,38 @@
+#
+# Test how MyRocks behaves when RocksDB reports corrupted data.
+#
+#
+# Test server crashes on corrupted data and restarts
+#
+create table t1 (
+pk int not null primary key,
+col1 varchar(10)
+) engine=rocksdb;
+insert into t1 values (1,1),(2,2),(3,3);
+select * from t1 where pk=1;
+pk col1
+1 1
+set session debug_dbug= "+d,rocksdb_return_status_corrupted";
+select * from t1 where pk=1;
+ERROR HY000: Lost connection to MySQL server during query
+FOUND 1 /data corruption detected/ in allow_to_start_after_corruption_debug.err
+#
+# The same for scan queries
+#
+select * from t1;
+pk col1
+1 1
+2 2
+3 3
+set session debug_dbug= "+d,rocksdb_return_status_corrupted";
+select * from t1;
+ERROR HY000: Lost connection to MySQL server during query
+FOUND 1 /data corruption detected/ in allow_to_start_after_corruption_debug.err
+#
+# Test restart failure. The server is shutdown at this point.
+#
+FOUND 1 /The server will exit normally and stop restart attempts/ in allow_to_start_after_corruption_debug.err
+#
+# Remove corruption file and restart cleanly
+#
+drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe.result b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe.result
new file mode 100644
index 00000000000..60395eced7e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe.result
@@ -0,0 +1,132 @@
+include/master-slave.inc
+[connection master]
+create table t (i int primary key auto_increment) engine=rocksdb;
+#
+# Testing concurrent transactions.
+#
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connect con3,localhost,root,,;
+connection con1;
+begin;
+insert into t values ();
+connection con2;
+begin;
+insert into t values ();
+connection con3;
+begin;
+insert into t values ();
+connection con1;
+insert into t values ();
+connection con2;
+insert into t values ();
+connection con3;
+insert into t values ();
+connection con2;
+commit;
+connection con3;
+rollback;
+connection con1;
+commit;
+delete from t;
+# Master value before restart
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 7
+# Slave value before restart
+connection slave;
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 6
+connection slave;
+include/stop_slave.inc
+include/rpl_restart_server.inc [server_number=1]
+connection default;
+# Master value after restart
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 6
+include/rpl_restart_server.inc [server_number=2]
+connection slave;
+include/start_slave.inc
+# Slave value after restart
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 6
+disconnect con1;
+disconnect con2;
+disconnect con3;
+#
+# Testing interaction of merge markers with various DDL statements.
+#
+connection slave;
+include/stop_slave.inc
+connection default;
+# Drop and add primary key.
+alter table t modify i int;
+alter table t drop primary key;
+alter table t add primary key (i);
+alter table t modify i int auto_increment;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 6
+# Remove auto_increment property.
+alter table t modify i int;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t NULL
+# Add auto_increment property.
+insert into t values (123);
+alter table t modify i int auto_increment;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 124
+# Add column j.
+alter table t add column j int;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 124
+# Rename tables.
+rename table t to t2;
+rename table t2 to t;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 124
+# Change auto_increment property
+alter table t auto_increment = 1000;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 1000
+alter table t auto_increment = 1;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 124
+alter table t drop primary key, add key (i), auto_increment = 1;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 124
+alter table t add key (j), auto_increment = 1;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 124
+alter table t modify i int;
+alter table t add column (k int auto_increment), add key(k), auto_increment=15;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 16
+# Drop table.
+drop table t;
+include/rpl_restart_server.inc [server_number=1]
+connection slave;
+include/start_slave.inc
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe_partition.result b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe_partition.result
new file mode 100644
index 00000000000..c837fb7c77d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe_partition.result
@@ -0,0 +1,132 @@
+include/master-slave.inc
+[connection master]
+create table t (i int primary key auto_increment) engine=rocksdb partition by key (i) partitions 3;
+#
+# Testing concurrent transactions.
+#
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connect con3,localhost,root,,;
+connection con1;
+begin;
+insert into t values ();
+connection con2;
+begin;
+insert into t values ();
+connection con3;
+begin;
+insert into t values ();
+connection con1;
+insert into t values ();
+connection con2;
+insert into t values ();
+connection con3;
+insert into t values ();
+connection con2;
+commit;
+connection con3;
+rollback;
+connection con1;
+commit;
+delete from t;
+# Master value before restart
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 7
+# Slave value before restart
+connection slave;
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 6
+connection slave;
+include/stop_slave.inc
+include/rpl_restart_server.inc [server_number=1]
+connection default;
+# Master value after restart
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 6
+include/rpl_restart_server.inc [server_number=2]
+connection slave;
+include/start_slave.inc
+# Slave value after restart
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 6
+disconnect con1;
+disconnect con2;
+disconnect con3;
+#
+# Testing interaction of merge markers with various DDL statements.
+#
+connection slave;
+include/stop_slave.inc
+connection default;
+# Drop and add primary key.
+alter table t modify i int;
+alter table t drop primary key;
+alter table t add primary key (i);
+alter table t modify i int auto_increment;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 1
+# Remove auto_increment property.
+alter table t modify i int;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t NULL
+# Add auto_increment property.
+insert into t values (123);
+alter table t modify i int auto_increment;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 124
+# Add column j.
+alter table t add column j int;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 124
+# Rename tables.
+rename table t to t2;
+rename table t2 to t;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 124
+# Change auto_increment property
+alter table t auto_increment = 1000;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 1000
+alter table t auto_increment = 1;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 124
+alter table t drop primary key, add key (i), auto_increment = 1;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 124
+alter table t add key (j), auto_increment = 1;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 124
+alter table t modify i int;
+alter table t add column (k int auto_increment), add key(k), auto_increment=15;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 16
+# Drop table.
+drop table t;
+include/rpl_restart_server.inc [server_number=1]
+connection slave;
+include/start_slave.inc
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_debug.result b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_debug.result
new file mode 100644
index 00000000000..fe08cd7c361
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_debug.result
@@ -0,0 +1,107 @@
+#
+# Testing upgrading from server without merges for auto_increment
+# to new server with such support.
+#
+set debug_dbug='+d,myrocks_autoinc_upgrade';
+create table t (i int primary key auto_increment);
+insert into t values ();
+insert into t values ();
+insert into t values ();
+select * from t;
+i
+1
+2
+3
+delete from t where i > 1;
+select * from t;
+i
+1
+select table_name, index_name, auto_increment
+from information_schema.rocksdb_ddl where table_name = 't';
+table_name index_name auto_increment
+t PRIMARY NULL
+set debug_dbug='-d,myrocks_autoinc_upgrade';
+insert into t values ();
+insert into t values ();
+insert into t values ();
+select * from t;
+i
+1
+2
+3
+4
+select table_name, index_name, auto_increment
+from information_schema.rocksdb_ddl where table_name = 't';
+table_name index_name auto_increment
+t PRIMARY 5
+delete from t where i > 1;
+insert into t values ();
+insert into t values ();
+insert into t values ();
+select * from t;
+i
+1
+5
+6
+7
+drop table t;
+#
+# Testing crash safety of transactions.
+#
+create table t (i int primary key auto_increment);
+insert into t values ();
+insert into t values ();
+insert into t values ();
+# Before anything
+begin;
+insert into t values ();
+insert into t values ();
+set debug_dbug="+d,crash_commit_before";
+commit;
+ERROR HY000: Lost connection to MySQL server during query
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 4
+select max(i) from t;
+max(i)
+3
+# After engine prepare
+begin;
+insert into t values ();
+insert into t values ();
+set debug_dbug="+d,crash_commit_after_prepare";
+commit;
+ERROR HY000: Lost connection to MySQL server during query
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 4
+select max(i) from t;
+max(i)
+3
+# After binlog
+begin;
+insert into t values ();
+insert into t values ();
+set debug_dbug="+d,crash_commit_after_log";
+commit;
+ERROR HY000: Lost connection to MySQL server during query
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 6
+select max(i) from t;
+max(i)
+5
+# After everything
+begin;
+insert into t values ();
+insert into t values ();
+set debug_dbug="+d,crash_commit_after";
+commit;
+ERROR HY000: Lost connection to MySQL server during query
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 8
+select max(i) from t;
+max(i)
+7
+drop table t;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars.result b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars.result
index 0fb3d96c58f..5da9a7e7e1c 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars.result
@@ -61,3 +61,82 @@ LAST_INSERT_ID()
SELECT a FROM t1 ORDER BY a;
a
DROP TABLE t1;
+#---------------------------
+# test large autoincrement values
+#---------------------------
+SET auto_increment_increment = 1;
+SET auto_increment_offset = 1;
+CREATE TABLE t1 (a BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb;
+INSERT INTO t1 VALUES (18446744073709551613, 'a');
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB AUTO_INCREMENT=18446744073709551614 DEFAULT CHARSET=latin1
+INSERT INTO t1 VALUES (NULL, 'b');
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB AUTO_INCREMENT=18446744073709551615 DEFAULT CHARSET=latin1
+INSERT INTO t1 VALUES (NULL, 'c');
+ERROR HY000: Failed to read auto-increment value from storage engine
+SELECT * FROM t1;
+a b
+18446744073709551613 a
+18446744073709551614 b
+DROP TABLE t1;
+SET auto_increment_increment = 300;
+CREATE TABLE t1 (a BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb;
+INSERT INTO t1 VALUES (18446744073709551613, 'a');
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB AUTO_INCREMENT=18446744073709551614 DEFAULT CHARSET=latin1
+INSERT INTO t1 VALUES (NULL, 'b');
+ERROR HY000: Failed to read auto-increment value from storage engine
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB AUTO_INCREMENT=18446744073709551615 DEFAULT CHARSET=latin1
+INSERT INTO t1 VALUES (NULL, 'c');
+ERROR HY000: Failed to read auto-increment value from storage engine
+SELECT * FROM t1;
+a b
+18446744073709551613 a
+DROP TABLE t1;
+SET auto_increment_offset = 200;
+CREATE TABLE t1 (a BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb;
+INSERT INTO t1 VALUES (18446744073709551613, 'a');
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB AUTO_INCREMENT=18446744073709551614 DEFAULT CHARSET=latin1
+INSERT INTO t1 VALUES (NULL, 'b');
+ERROR HY000: Failed to read auto-increment value from storage engine
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB AUTO_INCREMENT=18446744073709551615 DEFAULT CHARSET=latin1
+INSERT INTO t1 VALUES (NULL, 'c');
+ERROR HY000: Failed to read auto-increment value from storage engine
+SELECT * FROM t1;
+a b
+18446744073709551613 a
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/autoincrement.result b/storage/rocksdb/mysql-test/rocksdb/r/autoincrement.result
deleted file mode 100644
index 28b5b6cd070..00000000000
--- a/storage/rocksdb/mysql-test/rocksdb/r/autoincrement.result
+++ /dev/null
@@ -1 +0,0 @@
-# The test checks AUTO_INCREMENT capabilities that are not supported by RocksDB-SE.
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter5.result b/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter5.result
new file mode 100644
index 00000000000..4f6702b85a7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter5.result
@@ -0,0 +1,62 @@
+#
+# Issue #809: Wrong query result with bloom filters
+#
+create table t1 (
+id1 bigint not null,
+id2 bigint not null,
+id3 varchar(100) not null,
+id4 int not null,
+id5 int not null,
+value bigint,
+value2 varchar(100),
+primary key (id1, id2, id3, id4) COMMENT 'rev:bf5_1'
+) engine=ROCKSDB;
+create table t2(a int);
+insert into t2 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t3(seq int);
+insert into t3
+select
+1+ A.a + B.a* 10 + C.a * 100 + D.a * 1000
+from t2 A, t2 B, t2 C, t2 D;
+insert t1
+select
+(seq+9) div 10, (seq+4) div 5, (seq+4) div 5, seq, seq, 1000, "aaabbbccc"
+from t3;
+set global rocksdb_force_flush_memtable_now=1;
+# Full table scan
+explain
+select * from t1 limit 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 10000
+select * from t1 limit 10;
+id1 id2 id3 id4 id5 value value2
+1000 2000 2000 10000 10000 1000 aaabbbccc
+1000 2000 2000 9999 9999 1000 aaabbbccc
+1000 2000 2000 9998 9998 1000 aaabbbccc
+1000 2000 2000 9997 9997 1000 aaabbbccc
+1000 2000 2000 9996 9996 1000 aaabbbccc
+1000 1999 1999 9995 9995 1000 aaabbbccc
+1000 1999 1999 9994 9994 1000 aaabbbccc
+1000 1999 1999 9993 9993 1000 aaabbbccc
+1000 1999 1999 9992 9992 1000 aaabbbccc
+1000 1999 1999 9991 9991 1000 aaabbbccc
+# An index scan starting from the end of the table:
+explain
+select * from t1 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL PRIMARY 122 NULL 1
+select * from t1 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1;
+id1 id2 id3 id4 id5 value value2
+1000 2000 2000 10000 10000 1000 aaabbbccc
+create table t4 (
+pk int unsigned not null primary key,
+kp1 int unsigned not null,
+kp2 int unsigned not null,
+col1 int unsigned,
+key(kp1, kp2) comment 'rev:bf5_2'
+) engine=rocksdb;
+insert into t4 values (1, 0xFFFF, 0xFFF, 12345);
+# This must not fail an assert:
+select * from t4 force index(kp1) where kp1=0xFFFFFFFF and kp2<=0xFFFFFFFF order by kp2 desc;
+pk kp1 kp2 col1
+drop table t1,t2,t3,t4;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result
index b931a61e233..4a746d64c87 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result
@@ -1,4 +1,3 @@
-DROP TABLE IF EXISTS t1, t2, t3;
Data will be ordered in ascending order
CREATE TABLE t1(
pk CHAR(5),
@@ -6,21 +5,21 @@ a CHAR(30),
b CHAR(30),
PRIMARY KEY(pk) COMMENT "cf1",
KEY(a)
-) COLLATE 'latin1_bin';
+) ENGINE=ROCKSDB COLLATE 'latin1_bin';
CREATE TABLE t2(
pk CHAR(5),
a CHAR(30),
b CHAR(30),
PRIMARY KEY(pk) COMMENT "cf1",
KEY(a)
-) COLLATE 'latin1_bin';
+) ENGINE=ROCKSDB COLLATE 'latin1_bin';
CREATE TABLE t3(
pk CHAR(5),
a CHAR(30),
b CHAR(30),
PRIMARY KEY(pk) COMMENT "cf1",
KEY(a)
-) COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4;
+) ENGINE=ROCKSDB COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4;
connect other,localhost,root,,;
set session transaction isolation level repeatable read;
select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_drop_table.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_drop_table.result
new file mode 100644
index 00000000000..4e79d82810e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_drop_table.result
@@ -0,0 +1,11 @@
+CREATE TABLE t1 (pk INT, PRIMARY KEY (pk)) ENGINE=ROCKSDB;
+SET rocksdb_bulk_load_allow_unsorted=1;
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES (1);
+connect con1,localhost,root,,;
+DROP TABLE t1;
+connection default;
+disconnect con1;
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1;
+ERROR 42S02: Table 'test.t1' doesn't exist
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_errors.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_errors.result
index f230b173892..3703c208d0b 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_errors.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_errors.result
@@ -1,4 +1,4 @@
-CREATE TABLE t1(pk INT, PRIMARY KEY(pk));
+CREATE TABLE t1(pk INT, PRIMARY KEY(pk)) ENGINE=ROCKSDB;
SET rocksdb_bulk_load=1;
INSERT INTO t1 VALUES(10);
INSERT INTO t1 VALUES(11);
@@ -14,18 +14,30 @@ INSERT INTO t1 VALUES(1);
INSERT INTO t1 VALUES(2);
INSERT INTO t1 VALUES(20);
INSERT INTO t1 VALUES(21);
-#
-# In MyRocks, the following statement will intentionally crash the server.
-# In MariaDB, it will cause an error
SET rocksdb_bulk_load=0;
ERROR HY000: Rows inserted during bulk load must not overlap existing rows
-#
-# Despite the error, bulk load operation is over so the variable value
-# will be 0:
-select @@rocksdb_bulk_load;
-@@rocksdb_bulk_load
-0
+SHOW VARIABLES LIKE 'rocksdb_bulk_load';
+Variable_name Value
+rocksdb_bulk_load OFF
call mtr.add_suppression('finalizing last SST file while setting bulk loading variable');
+SELECT * FROM t1;
+pk
+10
+11
+FOUND 1 /RocksDB: Error [0-9]+ finalizing last SST file while setting bulk loading variable/ in rocksdb.bulk_load_errors.1.err
+connect con1,localhost,root,,;
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES(1);
+INSERT INTO t1 VALUES(2);
+INSERT INTO t1 VALUES(20);
+INSERT INTO t1 VALUES(21);
+connection default;
+disconnect con1;
+SELECT * FROM t1;
+pk
+10
+11
+FOUND 1 /RocksDB: Error [0-9]+ finalizing last SST file while disconnecting/ in rocksdb.bulk_load_errors.2.err
TRUNCATE TABLE t1;
SET rocksdb_bulk_load_allow_unsorted=1;
SET rocksdb_bulk_load=1;
@@ -53,3 +65,35 @@ pk
202
SET rocksdb_bulk_load_allow_unsorted=DEFAULT;
DROP TABLE t1;
+CREATE TABLE t1(c1 INT KEY) ENGINE=ROCKSDB;
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES (),(),();
+ERROR HY000: Rows must be inserted in primary key order during bulk load operation
+SET rocksdb_bulk_load=0;
+DROP TABLE t1;
+SET @orig_table_open_cache=@@global.table_open_cache;
+CREATE TABLE t1(a INT AUTO_INCREMENT, b INT, PRIMARY KEY (a)) ENGINE=ROCKSDB DEFAULT CHARSET=latin1;
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES(13, 0);
+INSERT INTO t1 VALUES(2, 'test 2');
+Warnings:
+Warning 1366 Incorrect integer value: 'test 2' for column 'b' at row 1
+INSERT INTO t1 VALUES(@id, @arg04);
+SET @@global.table_open_cache=FALSE;
+Warnings:
+Warning 1292 Truncated incorrect table_open_cache value: '0'
+INSERT INTO t1 VALUES(51479+0.333333333,1);
+DROP TABLE t1;
+SET @@global.table_open_cache=@orig_table_open_cache;
+FOUND 1 /RocksDB: Error [0-9]+ finalizing bulk load while closing handler/ in rocksdb.bulk_load_errors.3.err
+CREATE TABLE t1 (pk INT, PRIMARY KEY (pk)) ENGINE=ROCKSDB;
+CREATE TABLE t2 (pk INT, PRIMARY KEY (pk)) ENGINE=ROCKSDB;
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES (1), (2);
+INSERT INTO t2 VALUES (1), (2);
+INSERT INTO t1 VALUES (1);
+INSERT INTO t2 VALUES (3);
+ERROR HY000: Rows inserted during bulk load must not overlap existing rows
+SET rocksdb_bulk_load=0;
+DROP TABLE t1;
+DROP TABLE t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf.result
index 947f67434a5..4fd7ae9d9a5 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf.result
@@ -1,4 +1,3 @@
-DROP TABLE IF EXISTS t1, t2, t3;
Data will be ordered in ascending order
CREATE TABLE t1(
pk CHAR(5),
@@ -6,21 +5,21 @@ a CHAR(30),
b CHAR(30),
PRIMARY KEY(pk) COMMENT "rev:cf1",
KEY(a)
-) COLLATE 'latin1_bin';
+) ENGINE=ROCKSDB COLLATE 'latin1_bin';
CREATE TABLE t2(
pk CHAR(5),
a CHAR(30),
b CHAR(30),
PRIMARY KEY(pk) COMMENT "rev:cf1",
KEY(a)
-) COLLATE 'latin1_bin';
+) ENGINE=ROCKSDB COLLATE 'latin1_bin';
CREATE TABLE t3(
pk CHAR(5),
a CHAR(30),
b CHAR(30),
PRIMARY KEY(pk) COMMENT "rev:cf1",
KEY(a)
-) COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4;
+) ENGINE=ROCKSDB COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4;
connect other,localhost,root,,;
set session transaction isolation level repeatable read;
select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf_and_data.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf_and_data.result
index 6c38e030afb..7d7c9f34200 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf_and_data.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf_and_data.result
@@ -1,4 +1,3 @@
-DROP TABLE IF EXISTS t1, t2, t3;
Data will be ordered in descending order
CREATE TABLE t1(
pk CHAR(5),
@@ -6,21 +5,21 @@ a CHAR(30),
b CHAR(30),
PRIMARY KEY(pk) COMMENT "rev:cf1",
KEY(a)
-) COLLATE 'latin1_bin';
+) ENGINE=ROCKSDB COLLATE 'latin1_bin';
CREATE TABLE t2(
pk CHAR(5),
a CHAR(30),
b CHAR(30),
PRIMARY KEY(pk) COMMENT "rev:cf1",
KEY(a)
-) COLLATE 'latin1_bin';
+) ENGINE=ROCKSDB COLLATE 'latin1_bin';
CREATE TABLE t3(
pk CHAR(5),
a CHAR(30),
b CHAR(30),
PRIMARY KEY(pk) COMMENT "rev:cf1",
KEY(a)
-) COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4;
+) ENGINE=ROCKSDB COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4;
connect other,localhost,root,,;
set session transaction isolation level repeatable read;
select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_data.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_data.result
index e566691af28..c1b6d48a6a5 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_data.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_data.result
@@ -1,4 +1,3 @@
-DROP TABLE IF EXISTS t1, t2, t3;
Data will be ordered in descending order
CREATE TABLE t1(
pk CHAR(5),
@@ -6,21 +5,21 @@ a CHAR(30),
b CHAR(30),
PRIMARY KEY(pk) COMMENT "cf1",
KEY(a)
-) COLLATE 'latin1_bin';
+) ENGINE=ROCKSDB COLLATE 'latin1_bin';
CREATE TABLE t2(
pk CHAR(5),
a CHAR(30),
b CHAR(30),
PRIMARY KEY(pk) COMMENT "cf1",
KEY(a)
-) COLLATE 'latin1_bin';
+) ENGINE=ROCKSDB COLLATE 'latin1_bin';
CREATE TABLE t3(
pk CHAR(5),
a CHAR(30),
b CHAR(30),
PRIMARY KEY(pk) COMMENT "cf1",
KEY(a)
-) COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4;
+) ENGINE=ROCKSDB COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4;
connect other,localhost,root,,;
set session transaction isolation level repeatable read;
select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result
index 2a7c7bd69fd..2adaba1e228 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result
@@ -1,12 +1,12 @@
-DROP TABLE IF EXISTS t1;
SET rocksdb_bulk_load_size=3;
SET rocksdb_bulk_load_allow_unsorted=1;
-CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1");
+CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1")
+ENGINE=ROCKSDB;
SET rocksdb_bulk_load=1;
-SELECT * FROM t1;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
a b
SET rocksdb_bulk_load=0;
-SELECT * FROM t1;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
a b
-3 5
-1 3
@@ -14,42 +14,49 @@ a b
4 -2
6 -4
DROP TABLE t1;
-CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1", KEY(b));
+CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1", KEY(b))
+ENGINE=ROCKSDB;
SET rocksdb_bulk_load=1;
-SELECT * FROM t1;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
a b
-6 -4
-4 -2
-2 0
--1 3
--3 5
SET rocksdb_bulk_load=0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+-3 5
+-1 3
+2 0
+4 -2
+6 -4
DROP TABLE t1;
-CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1");
-CREATE TABLE t2(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1");
+CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1")
+ENGINE=ROCKSDB;
+CREATE TABLE t2(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1")
+ENGINE=ROCKSDB;
SET rocksdb_bulk_load=1;
INSERT INTO t1 VALUES (1,1);
INSERT INTO t2 VALUES (1,1);
-SELECT * FROM t1;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
a b
1 1
INSERT INTO t1 VALUES (2,2);
-SELECT * FROM t2;
+SELECT * FROM t2 FORCE INDEX (PRIMARY);
a b
1 1
-SELECT * FROM t1;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
a b
1 1
SET rocksdb_bulk_load=0;
-SELECT * FROM t1;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
a b
1 1
2 2
DROP TABLE t1, t2;
-CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1");
-CREATE TABLE t2(a INT, b INT, PRIMARY KEY(b) COMMENT "cf1");
+CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1")
+ENGINE=ROCKSDB;
+CREATE TABLE t2(a INT, b INT, PRIMARY KEY(b) COMMENT "cf1")
+ENGINE=ROCKSDB;
CREATE TABLE t3(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1")
-PARTITION BY KEY() PARTITIONS 4;
+ENGINE=ROCKSDB PARTITION BY KEY() PARTITIONS 4;
connect other,localhost,root,,;
set session transaction isolation level repeatable read;
select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
@@ -99,5 +106,15 @@ count(a)
select count(b) from t3;
count(b)
5000000
+SELECT * FROM t1 FORCE INDEX (PRIMARY) LIMIT 3;
+a b
+-4999998 5000000
+-4999996 4999998
+-4999994 4999996
+SELECT * FROM t2 FORCE INDEX (PRIMARY) LIMIT 3;
+a b
+4999999 -4999997
+4999997 -4999995
+4999995 -4999993
+disconnect other;
DROP TABLE t1, t2, t3;
-SET rocksdb_bulk_load_allow_unsorted=0;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_rev.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_rev.result
new file mode 100644
index 00000000000..f828fa57255
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_rev.result
@@ -0,0 +1,120 @@
+SET rocksdb_bulk_load_size=3;
+SET rocksdb_bulk_load_allow_unsorted=1;
+CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "rev:cf1")
+ENGINE=ROCKSDB;
+SET rocksdb_bulk_load=1;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+6 -4
+4 -2
+2 0
+-1 3
+-3 5
+DROP TABLE t1;
+CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "rev:cf1", KEY(b))
+ENGINE=ROCKSDB;
+SET rocksdb_bulk_load=1;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+6 -4
+4 -2
+2 0
+-1 3
+-3 5
+DROP TABLE t1;
+CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "rev:cf1")
+ENGINE=ROCKSDB;
+CREATE TABLE t2(a INT, b INT, PRIMARY KEY(a) COMMENT "rev:cf1")
+ENGINE=ROCKSDB;
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES (1,1);
+INSERT INTO t2 VALUES (1,1);
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+1 1
+INSERT INTO t1 VALUES (2,2);
+SELECT * FROM t2 FORCE INDEX (PRIMARY);
+a b
+1 1
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+1 1
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+2 2
+1 1
+DROP TABLE t1, t2;
+CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "rev:cf1")
+ENGINE=ROCKSDB;
+CREATE TABLE t2(a INT, b INT, PRIMARY KEY(b) COMMENT "rev:cf1")
+ENGINE=ROCKSDB;
+CREATE TABLE t3(a INT, b INT, PRIMARY KEY(a) COMMENT "rev:cf1")
+ENGINE=ROCKSDB PARTITION BY KEY() PARTITIONS 4;
+connect other,localhost,root,,;
+set session transaction isolation level repeatable read;
+select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+STAT_TYPE VALUE
+DB_NUM_SNAPSHOTS 0
+start transaction with consistent snapshot;
+select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+STAT_TYPE VALUE
+DB_NUM_SNAPSHOTS 1
+connection default;
+set rocksdb_bulk_load=1;
+set rocksdb_bulk_load_size=100000;
+LOAD DATA INFILE <input_file> INTO TABLE t1;
+LOAD DATA INFILE <input_file> INTO TABLE t2;
+LOAD DATA INFILE <input_file> INTO TABLE t3;
+set rocksdb_bulk_load=0;
+SHOW TABLE STATUS WHERE name LIKE 't%';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+ANALYZE TABLE t1, t2, t3;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+test.t2 analyze status OK
+test.t3 analyze status OK
+SHOW TABLE STATUS WHERE name LIKE 't%';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+select count(a) from t1;
+count(a)
+5000000
+select count(b) from t1;
+count(b)
+5000000
+select count(a) from t2;
+count(a)
+5000000
+select count(b) from t2;
+count(b)
+5000000
+select count(a) from t3;
+count(a)
+5000000
+select count(b) from t3;
+count(b)
+5000000
+SELECT * FROM t1 FORCE INDEX (PRIMARY) LIMIT 3;
+a b
+4999999 -4999997
+4999997 -4999995
+4999995 -4999993
+SELECT * FROM t2 FORCE INDEX (PRIMARY) LIMIT 3;
+a b
+-4999998 5000000
+-4999996 4999998
+-4999994 4999996
+disconnect other;
+DROP TABLE t1, t2, t3;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/cardinality.result b/storage/rocksdb/mysql-test/rocksdb/r/cardinality.result
index 3bd87e9ffd6..4b201d523d9 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/cardinality.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/cardinality.result
@@ -1,3 +1,38 @@
+CREATE TABLE t0 (id int PRIMARY KEY, a int, INDEX ix_a (a)) engine=rocksdb;
+insert into t0 values (0, 0),(1, 1),(2, 2),(3, 3),(4, 4),
+(5, 4),(6, 4),(7, 4),(8, 4),(9, 4);
+SELECT cardinality FROM information_schema.statistics where table_name="t0" and
+column_name="id";
+cardinality
+NULL
+SELECT cardinality FROM information_schema.statistics where table_name="t0" and
+column_name="a";
+cardinality
+NULL
+ANALYZE TABLE t0;
+SELECT table_rows into @N FROM information_schema.tables
+WHERE table_name = "t0";
+SELECT FLOOR(@N/cardinality) FROM
+information_schema.statistics where table_name="t0" and column_name="id";
+FLOOR(@N/cardinality)
+1
+SELECT FLOOR(@N/cardinality) FROM
+information_schema.statistics where table_name="t0" and column_name="a";
+FLOOR(@N/cardinality)
+2
+SET GLOBAL rocksdb_force_flush_memtable_now = 1;
+ANALYZE TABLE t0;
+SELECT table_rows into @N FROM information_schema.tables
+WHERE table_name = "t0";
+SELECT FLOOR(@N/cardinality) FROM
+information_schema.statistics where table_name="t0" and column_name="id";
+FLOOR(@N/cardinality)
+1
+SELECT FLOOR(@N/cardinality) FROM
+information_schema.statistics where table_name="t0" and column_name="a";
+FLOOR(@N/cardinality)
+2
+drop table t0;
DROP TABLE IF EXISTS t1,t10,t11;
create table t1(
id bigint not null primary key,
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/check_ignore_unknown_options.result b/storage/rocksdb/mysql-test/rocksdb/r/check_ignore_unknown_options.result
new file mode 100644
index 00000000000..6ff49908a51
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/check_ignore_unknown_options.result
@@ -0,0 +1,7 @@
+select variable_name, variable_value from information_schema.global_variables where variable_name="rocksdb_ignore_unknown_options";
+variable_name variable_value
+ROCKSDB_IGNORE_UNKNOWN_OPTIONS ON
+FOUND 1 /RocksDB: Compatibility check against existing database options failed/ in my_restart.err
+select variable_name, variable_value from information_schema.global_variables where variable_name="rocksdb_ignore_unknown_options";
+variable_name variable_value
+ROCKSDB_IGNORE_UNKNOWN_OPTIONS ON
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/deadlock_tracking.result b/storage/rocksdb/mysql-test/rocksdb/r/deadlock_tracking.result
index d7cb89becb7..1e7509172cb 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/deadlock_tracking.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/deadlock_tracking.result
@@ -66,13 +66,6 @@ KEY
LOCK TYPE: EXCLUSIVE
INDEX NAME: PRIMARY
TABLE NAME: test.t
----------------WAITING FOR---------------
-TXN_ID
-COLUMN FAMILY NAME: default
-KEY
-LOCK TYPE: EXCLUSIVE
-INDEX NAME: PRIMARY
-TABLE NAME: test.t
--------TXN_ID GOT DEADLOCK---------
-----------------------------------------
@@ -122,13 +115,6 @@ KEY
LOCK TYPE: EXCLUSIVE
INDEX NAME: PRIMARY
TABLE NAME: test.t
----------------WAITING FOR---------------
-TXN_ID
-COLUMN FAMILY NAME: default
-KEY
-LOCK TYPE: EXCLUSIVE
-INDEX NAME: PRIMARY
-TABLE NAME: test.t
--------TXN_ID GOT DEADLOCK---------
@@ -147,13 +133,6 @@ KEY
LOCK TYPE: EXCLUSIVE
INDEX NAME: PRIMARY
TABLE NAME: test.t
----------------WAITING FOR---------------
-TXN_ID
-COLUMN FAMILY NAME: default
-KEY
-LOCK TYPE: EXCLUSIVE
-INDEX NAME: PRIMARY
-TABLE NAME: test.t
--------TXN_ID GOT DEADLOCK---------
-----------------------------------------
@@ -204,13 +183,6 @@ KEY
LOCK TYPE: EXCLUSIVE
INDEX NAME: PRIMARY
TABLE NAME: test.t
----------------WAITING FOR---------------
-TXN_ID
-COLUMN FAMILY NAME: default
-KEY
-LOCK TYPE: EXCLUSIVE
-INDEX NAME: PRIMARY
-TABLE NAME: test.t
--------TXN_ID GOT DEADLOCK---------
@@ -229,13 +201,6 @@ KEY
LOCK TYPE: EXCLUSIVE
INDEX NAME: PRIMARY
TABLE NAME: test.t
----------------WAITING FOR---------------
-TXN_ID
-COLUMN FAMILY NAME: default
-KEY
-LOCK TYPE: EXCLUSIVE
-INDEX NAME: PRIMARY
-TABLE NAME: test.t
--------TXN_ID GOT DEADLOCK---------
@@ -254,13 +219,6 @@ KEY
LOCK TYPE: EXCLUSIVE
INDEX NAME: PRIMARY
TABLE NAME: test.t
----------------WAITING FOR---------------
-TXN_ID
-COLUMN FAMILY NAME: default
-KEY
-LOCK TYPE: EXCLUSIVE
-INDEX NAME: PRIMARY
-TABLE NAME: test.t
--------TXN_ID GOT DEADLOCK---------
-----------------------------------------
@@ -295,13 +253,6 @@ KEY
LOCK TYPE: EXCLUSIVE
INDEX NAME: PRIMARY
TABLE NAME: test.t
----------------WAITING FOR---------------
-TXN_ID
-COLUMN FAMILY NAME: default
-KEY
-LOCK TYPE: EXCLUSIVE
-INDEX NAME: PRIMARY
-TABLE NAME: test.t
--------TXN_ID GOT DEADLOCK---------
-----------------------------------------
@@ -324,8 +275,12 @@ i
3
select * from t where i=2 for update;
select * from t where i=3 for update;
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_row_lock_deadlocks';
select * from t where i=1 for update;
ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+select case when variable_value-@a = 1 then 'true' else 'false' end as deadlocks from information_schema.global_status where variable_name='rocksdb_row_lock_deadlocks';
+deadlocks
+true
rollback;
i
3
@@ -410,13 +365,6 @@ KEY
LOCK TYPE: SHARED
INDEX NAME: PRIMARY
TABLE NAME: test.t
----------------WAITING FOR---------------
-TXN_ID
-COLUMN FAMILY NAME: default
-KEY
-LOCK TYPE: EXCLUSIVE
-INDEX NAME: PRIMARY
-TABLE NAME: test.t
--------TXN_ID GOT DEADLOCK---------
@@ -455,13 +403,6 @@ KEY
LOCK TYPE: SHARED
INDEX NAME: NOT FOUND; IDX_ID
TABLE NAME: NOT FOUND; IDX_ID
----------------WAITING FOR---------------
-TXN_ID
-COLUMN FAMILY NAME: default
-KEY
-LOCK TYPE: EXCLUSIVE
-INDEX NAME: NOT FOUND; IDX_ID
-TABLE NAME: NOT FOUND; IDX_ID
--------TXN_ID GOT DEADLOCK---------
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/i_s_ddl.result b/storage/rocksdb/mysql-test/rocksdb/r/i_s_ddl.result
index a39f2d8c0d6..6bca2cbad2d 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/i_s_ddl.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/i_s_ddl.result
@@ -1,17 +1,22 @@
DROP TABLE IF EXISTS is_ddl_t1;
DROP TABLE IF EXISTS is_ddl_t2;
+DROP TABLE IF EXISTS is_ddl_t3;
CREATE TABLE is_ddl_t1 (i INT, j INT, k INT, l INT,
PRIMARY KEY (i), KEY (j), KEY (k, l) COMMENT 'kl_cf')
ENGINE = ROCKSDB;
CREATE TABLE is_ddl_t2 (x INT, y INT, z INT,
PRIMARY KEY (z, y) COMMENT 'zy_cf',
KEY (x)) ENGINE = ROCKSDB;
-SELECT TABLE_SCHEMA,TABLE_NAME,PARTITION_NAME,INDEX_NAME,INDEX_TYPE,KV_FORMAT_VERSION,CF FROM INFORMATION_SCHEMA.ROCKSDB_DDL WHERE TABLE_NAME like 'is_ddl_t%';
-TABLE_SCHEMA TABLE_NAME PARTITION_NAME INDEX_NAME INDEX_TYPE KV_FORMAT_VERSION CF
-test is_ddl_t1 NULL PRIMARY 1 13 default
-test is_ddl_t1 NULL j 2 13 default
-test is_ddl_t1 NULL k 2 13 kl_cf
-test is_ddl_t2 NULL PRIMARY 1 13 zy_cf
-test is_ddl_t2 NULL x 2 13 default
+CREATE TABLE is_ddl_t3 (a INT, b INT, c INT, PRIMARY KEY (a)) ENGINE = ROCKSDB
+COMMENT "ttl_duration=3600;";
+SELECT TABLE_SCHEMA,TABLE_NAME,PARTITION_NAME,INDEX_NAME,INDEX_TYPE,KV_FORMAT_VERSION,CF,TTL_DURATION,INDEX_FLAGS FROM INFORMATION_SCHEMA.ROCKSDB_DDL WHERE TABLE_NAME like 'is_ddl_t%';
+TABLE_SCHEMA TABLE_NAME PARTITION_NAME INDEX_NAME INDEX_TYPE KV_FORMAT_VERSION CF TTL_DURATION INDEX_FLAGS
+test is_ddl_t1 NULL PRIMARY 1 13 default 0 0
+test is_ddl_t1 NULL j 2 13 default 0 0
+test is_ddl_t1 NULL k 2 13 kl_cf 0 0
+test is_ddl_t2 NULL PRIMARY 1 13 zy_cf 0 0
+test is_ddl_t2 NULL x 2 13 default 0 0
+test is_ddl_t3 NULL PRIMARY 1 13 default 3600 1
DROP TABLE is_ddl_t1;
DROP TABLE is_ddl_t2;
+DROP TABLE is_ddl_t3;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/i_s_deadlock.result b/storage/rocksdb/mysql-test/rocksdb/r/i_s_deadlock.result
new file mode 100644
index 00000000000..36db92095e9
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/i_s_deadlock.result
@@ -0,0 +1,215 @@
+set @prior_lock_wait_timeout = @@rocksdb_lock_wait_timeout;
+set @prior_deadlock_detect = @@rocksdb_deadlock_detect;
+set @prior_max_latest_deadlocks = @@rocksdb_max_latest_deadlocks;
+set global rocksdb_deadlock_detect = on;
+set global rocksdb_lock_wait_timeout = 10000;
+# Clears deadlock buffer of any prior deadlocks.
+set global rocksdb_max_latest_deadlocks = 0;
+set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks;
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connect con3,localhost,root,,;
+connection default;
+show create table information_schema.rocksdb_deadlock;
+Table Create Table
+ROCKSDB_DEADLOCK CREATE TEMPORARY TABLE `ROCKSDB_DEADLOCK` (
+ `DEADLOCK_ID` bigint(8) NOT NULL DEFAULT 0,
+ `TRANSACTION_ID` bigint(8) NOT NULL DEFAULT 0,
+ `CF_NAME` varchar(193) NOT NULL DEFAULT '',
+ `WAITING_KEY` varchar(513) NOT NULL DEFAULT '',
+ `LOCK_TYPE` varchar(193) NOT NULL DEFAULT '',
+ `INDEX_NAME` varchar(193) NOT NULL DEFAULT '',
+ `TABLE_NAME` varchar(193) NOT NULL DEFAULT '',
+ `ROLLED_BACK` bigint(8) NOT NULL DEFAULT 0
+) ENGINE=MEMORY DEFAULT CHARSET=utf8
+create table t (i int primary key) engine=rocksdb;
+insert into t values (1), (2), (3);
+select * from information_schema.rocksdb_deadlock;
+DEADLOCK_ID TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK
+Deadlock #1
+connection con1;
+begin;
+select * from t where i=1 for update;
+i
+1
+connection con2;
+begin;
+select * from t where i=2 for update;
+i
+2
+connection con1;
+select * from t where i=2 for update;
+connection con2;
+select * from t where i=1 for update;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+rollback;
+connection con1;
+i
+2
+rollback;
+connection default;
+select * from information_schema.rocksdb_deadlock;
+DEADLOCK_ID TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK
+DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0
+DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 1
+Deadlock #2
+connection con1;
+begin;
+select * from t where i=1 for update;
+i
+1
+connection con2;
+begin;
+select * from t where i=2 for update;
+i
+2
+connection con1;
+select * from t where i=2 for update;
+connection con2;
+select * from t where i=1 for update;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+rollback;
+connection con1;
+i
+2
+rollback;
+connection default;
+select * from information_schema.rocksdb_deadlock;
+DEADLOCK_ID TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK
+DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0
+DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 1
+DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0
+DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 1
+set global rocksdb_max_latest_deadlocks = 10;
+Deadlock #3
+connection con1;
+begin;
+select * from t where i=1 for update;
+i
+1
+connection con2;
+begin;
+select * from t where i=2 for update;
+i
+2
+connection con1;
+select * from t where i=2 for update;
+connection con2;
+select * from t where i=1 for update;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+rollback;
+connection con1;
+i
+2
+rollback;
+connection default;
+select * from information_schema.rocksdb_deadlock;
+DEADLOCK_ID TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK
+DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0
+DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 1
+DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0
+DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 1
+DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0
+DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 1
+set global rocksdb_max_latest_deadlocks = 1;
+select * from information_schema.rocksdb_deadlock;
+DEADLOCK_ID TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK
+DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0
+DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 1
+connection con3;
+set rocksdb_deadlock_detect_depth = 2;
+Deadlock #4
+connection con1;
+begin;
+select * from t where i=1 for update;
+i
+1
+connection con2;
+begin;
+select * from t where i=2 for update;
+i
+2
+connection con3;
+begin;
+select * from t where i=3 for update;
+i
+3
+connection con1;
+select * from t where i=2 for update;
+connection con2;
+select * from t where i=3 for update;
+connection con3;
+select * from t where i=1 for update;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+rollback;
+connection con2;
+i
+3
+rollback;
+connection con1;
+i
+2
+rollback;
+connection default;
+set global rocksdb_max_latest_deadlocks = 5;
+select * from information_schema.rocksdb_deadlock;
+DEADLOCK_ID TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK
+Deadlock #5
+connection con1;
+begin;
+select * from t where i=1 for update;
+i
+1
+connection con2;
+begin;
+select * from t where i=2 for update;
+i
+2
+connection con3;
+begin;
+select * from t where i=3 lock in share mode;
+i
+3
+connection con1;
+select * from t where i=100 for update;
+i
+select * from t where i=101 for update;
+i
+select * from t where i=2 for update;
+connection con2;
+select * from t where i=3 lock in share mode;
+i
+3
+select * from t where i=200 for update;
+i
+select * from t where i=201 for update;
+i
+select * from t where i=1 lock in share mode;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+rollback;
+connection con1;
+i
+2
+rollback;
+connection con3;
+rollback;
+connection default;
+select * from information_schema.rocksdb_deadlock;
+DEADLOCK_ID TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK
+DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0
+DEADLOCK_ID TRANSACTION_ID default WAITING_KEY SHARED PRIMARY test.t 1
+disconnect con1;
+disconnect con2;
+disconnect con3;
+set global rocksdb_lock_wait_timeout = @prior_lock_wait_timeout;
+set global rocksdb_deadlock_detect = @prior_deadlock_detect;
+drop table t;
+select * from information_schema.rocksdb_deadlock;
+DEADLOCK_ID TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK
+DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE INDEX_NAME TABLE_NAME 0
+DEADLOCK_ID TRANSACTION_ID default WAITING_KEY SHARED INDEX_NAME TABLE_NAME 1
+set global rocksdb_max_latest_deadlocks = 0;
+# Clears deadlock buffer of any existent deadlocks.
+set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks;
+select * from information_schema.rocksdb_deadlock;
+DEADLOCK_ID TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/index_merge_rocksdb.result b/storage/rocksdb/mysql-test/rocksdb/r/index_merge_rocksdb.result
index f63a271cdce..7fb9055083b 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/index_merge_rocksdb.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/index_merge_rocksdb.result
@@ -25,10 +25,10 @@ UPDATE t1 SET filler1='to be deleted' WHERE key1=100 and key2=100;
DROP TABLE t0, t1;
create table t1 (key1 int, key2 int, key3 int, key (key1), key (key2), key(key3)) engine=rocksdb;
insert into t1 values (1, 100, 100), (1, 200, 200), (1, 300, 300);
+set global rocksdb_force_flush_memtable_now=1;
analyze table t1;
Table Op Msg_type Msg_text
test.t1 analyze status OK
-set global rocksdb_force_flush_memtable_now=1;
explain select * from t1 where key1 = 1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ref key1 key1 5 const #
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/information_schema.result b/storage/rocksdb/mysql-test/rocksdb/r/information_schema.result
index 6850d8dff16..aba14e3c076 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/information_schema.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/information_schema.result
@@ -4,15 +4,14 @@ DROP TABLE IF EXISTS t3;
SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK=1;
create table t1 (a int) engine=rocksdb;
drop table t1;
-select * from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO;
+select * from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO where type<>'DDL_DROP_INDEX_ONGOING';
TYPE NAME VALUE
MAX_INDEX_ID MAX_INDEX_ID max_index_id
CF_FLAGS 0 default [0]
CF_FLAGS 1 __system__ [0]
-DDL_DROP_INDEX_ONGOING cf_id:0,index_id:max_index_id
-select count(*) from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO;
+select count(*) from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO where type<>'DDL_DROP_INDEX_ONGOING';
count(*)
-4
+3
SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK=0;
select VALUE into @keysIn from INFORMATION_SCHEMA.ROCKSDB_COMPACTION_STATS where CF_NAME = 'default' and LEVEL = 'Sum' and TYPE = 'KeyIn';
CREATE TABLE t1 (i1 INT, i2 INT, PRIMARY KEY (i1)) ENGINE = ROCKSDB;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/issue255.result b/storage/rocksdb/mysql-test/rocksdb/r/issue255.result
index 62875e378a4..797f339d8b1 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/issue255.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/issue255.result
@@ -6,6 +6,19 @@ t1 ROCKSDB # Fixed 1 # # # # # 6 NULL NULL NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES ('538647864786478647864');
Warnings:
Warning 1264 Out of range value for column 'pk' at row 1
+SELECT * FROM t1;
+pk
+5
+9223372036854775807
+SHOW TABLE STATUS LIKE 't1';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB 10 Fixed 2 22 44 0 0 0 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL
+INSERT INTO t1 VALUES ();
+ERROR 23000: Duplicate entry '9223372036854775807' for key 'PRIMARY'
+SELECT * FROM t1;
+pk
+5
+9223372036854775807
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
t1 ROCKSDB # Fixed 2 # # # # # 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL
@@ -19,3 +32,37 @@ SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
t1 ROCKSDB # Fixed 2 # # # # # 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL
DROP TABLE t1;
+CREATE TABLE t1 (pk TINYINT NOT NULL PRIMARY KEY AUTO_INCREMENT);
+INSERT INTO t1 VALUES (5);
+SHOW TABLE STATUS LIKE 't1';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB # Fixed 1 # # # # # 6 NULL NULL NULL latin1_swedish_ci NULL
+INSERT INTO t1 VALUES (1000);
+Warnings:
+Warning 1264 Out of range value for column 'pk' at row 1
+SELECT * FROM t1;
+pk
+5
+127
+SHOW TABLE STATUS LIKE 't1';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB 10 Fixed 2 15 30 0 0 0 127 NULL NULL NULL latin1_swedish_ci NULL
+INSERT INTO t1 VALUES ();
+ERROR 23000: Duplicate entry '127' for key 'PRIMARY'
+SELECT * FROM t1;
+pk
+5
+127
+SHOW TABLE STATUS LIKE 't1';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB # Fixed 2 # # # # # 127 NULL NULL NULL latin1_swedish_ci NULL
+INSERT INTO t1 VALUES ();
+ERROR 23000: Duplicate entry '127' for key 'PRIMARY'
+SELECT * FROM t1;
+pk
+5
+127
+SHOW TABLE STATUS LIKE 't1';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB # Fixed 2 # # # # # 127 NULL NULL NULL latin1_swedish_ci NULL
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/lock_wait_timeout_stats.result b/storage/rocksdb/mysql-test/rocksdb/r/lock_wait_timeout_stats.result
index d0bfb05fd1b..96efca6e2b7 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/lock_wait_timeout_stats.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/lock_wait_timeout_stats.result
@@ -8,6 +8,7 @@ ROW_LOCK_WAIT_TIMEOUTS
begin;
set @@rocksdb_lock_wait_timeout=1;
begin;
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_row_lock_wait_timeouts';
insert into t values(0);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on index: test.t.PRIMARY
select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t";
@@ -16,6 +17,10 @@ ROW_LOCK_WAIT_TIMEOUTS
select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t";
ROW_LOCK_WAIT_TIMEOUTS
1
+select case when variable_value-@a = 1 then 'true' else 'false' end as waits from information_schema.global_status where variable_name='rocksdb_row_lock_wait_timeouts';
+waits
+true
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_row_lock_wait_timeouts';
insert into t values(0);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on index: test.t.PRIMARY
select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t";
@@ -24,4 +29,7 @@ ROW_LOCK_WAIT_TIMEOUTS
select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t";
ROW_LOCK_WAIT_TIMEOUTS
2
+select case when variable_value-@a = 1 then 'true' else 'false' end as waits from information_schema.global_status where variable_name='rocksdb_row_lock_wait_timeouts';
+waits
+true
drop table t;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/mariadb_port_fixes.result b/storage/rocksdb/mysql-test/rocksdb/r/mariadb_port_fixes.result
index 9674b2b0c15..98c5ebe9f4c 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/mariadb_port_fixes.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/mariadb_port_fixes.result
@@ -81,6 +81,7 @@ ROCKSDB_DDL Gamma
ROCKSDB_INDEX_FILE_MAP Gamma
ROCKSDB_LOCKS Gamma
ROCKSDB_TRX Gamma
+ROCKSDB_DEADLOCK Gamma
#
# MDEV-12466 : Assertion `thd->transaction.stmt.is_empty() || thd->in_sub_stmt || ...
#
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/max_open_files.result b/storage/rocksdb/mysql-test/rocksdb/r/max_open_files.result
new file mode 100644
index 00000000000..5d34f4e9640
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/max_open_files.result
@@ -0,0 +1,21 @@
+CALL mtr.add_suppression("RocksDB: rocksdb_max_open_files should not be greater than the open_files_limit*");
+FOUND 1 /RocksDB: rocksdb_max_open_files should not be greater than the open_files_limit/ in rocksdb.max_open_files.err
+SELECT FLOOR(@@global.open_files_limit / 2) = @@global.rocksdb_max_open_files;
+FLOOR(@@global.open_files_limit / 2) = @@global.rocksdb_max_open_files
+1
+SELECT @@global.open_files_limit - 1 = @@global.rocksdb_max_open_files;
+@@global.open_files_limit - 1 = @@global.rocksdb_max_open_files
+1
+SELECT @@global.rocksdb_max_open_files;
+@@global.rocksdb_max_open_files
+0
+CREATE TABLE t1(a INT) ENGINE=ROCKSDB;
+INSERT INTO t1 VALUES(0),(1),(2),(3),(4);
+SET GLOBAL rocksdb_force_flush_memtable_and_lzero_now=1;
+DROP TABLE t1;
+SELECT @@global.rocksdb_max_open_files;
+@@global.rocksdb_max_open_files
+-1
+SELECT FLOOR(@@global.open_files_limit / 2) = @@global.rocksdb_max_open_files;
+FLOOR(@@global.open_files_limit / 2) = @@global.rocksdb_max_open_files
+1
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/optimizer_loose_index_scans.result b/storage/rocksdb/mysql-test/rocksdb/r/optimizer_loose_index_scans.result
index 27b1779627b..1fe61fe9fc5 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/optimizer_loose_index_scans.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/optimizer_loose_index_scans.result
@@ -36,7 +36,7 @@ explain select b, d from t where d > 4;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t range PRIMARY,b b 8 NULL # Using where; Using index for skip scan
rows_read
-1509
+1505
include/diff_tables.inc [temp_orig, temp_skip]
set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on';
set optimizer_switch = 'skip_scan=off';
@@ -44,7 +44,7 @@ explain select a, b, c, d from t where a = 5 and d <= 3;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t ref PRIMARY PRIMARY 4 const # Using where; Using index
rows_read
-251
+250
set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off';
explain select a, b, c, d from t where a = 5 and d <= 3;
id select_type table type possible_keys key key_len ref rows Extra
@@ -58,13 +58,13 @@ explain select e from t where a = 5 and d <= 3;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t ref PRIMARY PRIMARY 4 const # Using where
rows_read
-251
+250
set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off';
explain select e from t where a = 5 and d <= 3;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t ref PRIMARY,b PRIMARY 4 const # Using where
rows_read
-251
+250
include/diff_tables.inc [temp_orig, temp_skip]
set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on';
set optimizer_switch = 'skip_scan=off';
@@ -72,13 +72,13 @@ explain select a, b, c, d from t where a = 5 and d >= 98;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t ref PRIMARY PRIMARY 4 const # Using where; Using index
rows_read
-251
+250
set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off';
explain select a, b, c, d from t where a = 5 and d >= 98;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t range PRIMARY,b PRIMARY 16 NULL # Using where; Using index for skip scan
rows_read
-51
+26
include/diff_tables.inc [temp_orig, temp_skip]
set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on';
set optimizer_switch = 'skip_scan=off';
@@ -86,13 +86,13 @@ explain select e from t where a = 5 and d >= 98;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t ref PRIMARY PRIMARY 4 const # Using where
rows_read
-251
+250
set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off';
explain select e from t where a = 5 and d >= 98;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t ref PRIMARY,b PRIMARY 4 const # Using where
rows_read
-251
+250
include/diff_tables.inc [temp_orig, temp_skip]
set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on';
set optimizer_switch = 'skip_scan=off';
@@ -100,13 +100,13 @@ explain select a, b, c, d from t where a in (1, 5) and d >= 98;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t range PRIMARY PRIMARY 4 NULL # Using where; Using index
rows_read
-502
+500
set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off';
explain select a, b, c, d from t where a in (1, 5) and d >= 98;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t range PRIMARY,b PRIMARY 16 NULL # Using where; Using index for skip scan
rows_read
-102
+52
include/diff_tables.inc [temp_orig, temp_skip]
set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on';
set optimizer_switch = 'skip_scan=off';
@@ -114,13 +114,13 @@ explain select a, b, c, d from t where a in (1, 3, 5) and d >= 98;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t range PRIMARY PRIMARY 4 NULL # Using where; Using index
rows_read
-753
+750
set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off';
explain select a, b, c, d from t where a in (1, 3, 5) and d >= 98;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t range PRIMARY,b PRIMARY 16 NULL # Using where; Using index for skip scan
rows_read
-153
+78
include/diff_tables.inc [temp_orig, temp_skip]
set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on';
set optimizer_switch = 'skip_scan=off';
@@ -128,13 +128,13 @@ explain select a, b, c, d from t where a in (1, 5) and b in (1, 2) and d >= 98;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t range PRIMARY,b PRIMARY 8 NULL # Using where; Using index
rows_read
-204
+200
set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off';
explain select a, b, c, d from t where a in (1, 5) and b in (1, 2) and d >= 98;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t range PRIMARY,b PRIMARY 16 NULL # Using where; Using index for skip scan
rows_read
-44
+24
include/diff_tables.inc [temp_orig, temp_skip]
set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on';
set optimizer_switch = 'skip_scan=off';
@@ -142,13 +142,13 @@ explain select a, b, c, d from t where a in (1, 2, 3, 4, 5) and b in (1, 2, 3) a
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t range PRIMARY,b PRIMARY 8 NULL # Using where; Using index
rows_read
-765
+750
set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off';
explain select a, b, c, d from t where a in (1, 2, 3, 4, 5) and b in (1, 2, 3) and d >= 98;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t range PRIMARY,b PRIMARY 16 NULL # Using where; Using index for skip scan
rows_read
-165
+90
include/diff_tables.inc [temp_orig, temp_skip]
set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on';
set optimizer_switch = 'skip_scan=off';
@@ -156,13 +156,13 @@ explain select a, b, c, d from t where a = 5 and b = 2 and d >= 98;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t ref PRIMARY,b PRIMARY 8 const,const # Using where; Using index
rows_read
-51
+50
set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off';
explain select a, b, c, d from t where a = 5 and b = 2 and d >= 98;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t range PRIMARY,b PRIMARY 16 NULL # Using where; Using index for skip scan
rows_read
-11
+6
include/diff_tables.inc [temp_orig, temp_skip]
set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on';
set optimizer_switch = 'skip_scan=off';
@@ -170,7 +170,7 @@ explain select a+1, b, c, d from t where a = 5 and d < 3;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t ref PRIMARY PRIMARY 4 const # Using where; Using index
rows_read
-251
+250
set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off';
explain select a+1, b, c, d from t where a = 5 and d < 3;
id select_type table type possible_keys key key_len ref rows Extra
@@ -184,7 +184,7 @@ explain select b, c, d from t where a = 5 and d < 3;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t ref PRIMARY PRIMARY 4 const # Using where; Using index
rows_read
-251
+250
set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off';
explain select b, c, d from t where a = 5 and d < 3;
id select_type table type possible_keys key key_len ref rows Extra
@@ -204,7 +204,7 @@ explain select a, b, c, d from t where a = b and d >= 98;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t range PRIMARY,b b 8 NULL # Using where; Using index for skip scan
rows_read
-9
+5
include/diff_tables.inc [temp_orig, temp_skip]
set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on';
set optimizer_switch = 'skip_scan=on';
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/perf_context.result b/storage/rocksdb/mysql-test/rocksdb/r/perf_context.result
index 6586b92d129..28f965843aa 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/perf_context.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/perf_context.result
@@ -14,8 +14,13 @@ test t1 NULL BLOCK_READ_BYTE #
test t1 NULL BLOCK_READ_TIME #
test t1 NULL BLOCK_CHECKSUM_TIME #
test t1 NULL BLOCK_DECOMPRESS_TIME #
+test t1 NULL GET_READ_BYTES #
+test t1 NULL MULTIGET_READ_BYTES #
+test t1 NULL ITER_READ_BYTES #
test t1 NULL INTERNAL_KEY_SKIPPED_COUNT #
test t1 NULL INTERNAL_DELETE_SKIPPED_COUNT #
+test t1 NULL INTERNAL_RECENT_SKIPPED_COUNT #
+test t1 NULL INTERNAL_MERGE_COUNT #
test t1 NULL GET_SNAPSHOT_TIME #
test t1 NULL GET_FROM_MEMTABLE_TIME #
test t1 NULL GET_FROM_MEMTABLE_COUNT #
@@ -23,9 +28,12 @@ test t1 NULL GET_POST_PROCESS_TIME #
test t1 NULL GET_FROM_OUTPUT_FILES_TIME #
test t1 NULL SEEK_ON_MEMTABLE_TIME #
test t1 NULL SEEK_ON_MEMTABLE_COUNT #
+test t1 NULL NEXT_ON_MEMTABLE_COUNT #
+test t1 NULL PREV_ON_MEMTABLE_COUNT #
test t1 NULL SEEK_CHILD_SEEK_TIME #
test t1 NULL SEEK_CHILD_SEEK_COUNT #
-test t1 NULL SEEK_IN_HEAP_TIME #
+test t1 NULL SEEK_MIN_HEAP_TIME #
+test t1 NULL SEEK_MAX_HEAP_TIME #
test t1 NULL SEEK_INTERNAL_SEEK_TIME #
test t1 NULL FIND_NEXT_USER_ENTRY_TIME #
test t1 NULL WRITE_WAL_TIME #
@@ -41,6 +49,12 @@ test t1 NULL NEW_TABLE_BLOCK_ITER_NANOS #
test t1 NULL NEW_TABLE_ITERATOR_NANOS #
test t1 NULL BLOCK_SEEK_NANOS #
test t1 NULL FIND_TABLE_NANOS #
+test t1 NULL BLOOM_MEMTABLE_HIT_COUNT #
+test t1 NULL BLOOM_MEMTABLE_MISS_COUNT #
+test t1 NULL BLOOM_SST_HIT_COUNT #
+test t1 NULL BLOOM_SST_MISS_COUNT #
+test t1 NULL KEY_LOCK_WAIT_TIME #
+test t1 NULL KEY_LOCK_WAIT_COUNT #
test t1 NULL IO_THREAD_POOL_ID #
test t1 NULL IO_BYTES_WRITTEN #
test t1 NULL IO_BYTES_READ #
@@ -59,8 +73,13 @@ BLOCK_READ_BYTE #
BLOCK_READ_TIME #
BLOCK_CHECKSUM_TIME #
BLOCK_DECOMPRESS_TIME #
+GET_READ_BYTES #
+MULTIGET_READ_BYTES #
+ITER_READ_BYTES #
INTERNAL_KEY_SKIPPED_COUNT #
INTERNAL_DELETE_SKIPPED_COUNT #
+INTERNAL_RECENT_SKIPPED_COUNT #
+INTERNAL_MERGE_COUNT #
GET_SNAPSHOT_TIME #
GET_FROM_MEMTABLE_TIME #
GET_FROM_MEMTABLE_COUNT #
@@ -68,9 +87,12 @@ GET_POST_PROCESS_TIME #
GET_FROM_OUTPUT_FILES_TIME #
SEEK_ON_MEMTABLE_TIME #
SEEK_ON_MEMTABLE_COUNT #
+NEXT_ON_MEMTABLE_COUNT #
+PREV_ON_MEMTABLE_COUNT #
SEEK_CHILD_SEEK_TIME #
SEEK_CHILD_SEEK_COUNT #
-SEEK_IN_HEAP_TIME #
+SEEK_MIN_HEAP_TIME #
+SEEK_MAX_HEAP_TIME #
SEEK_INTERNAL_SEEK_TIME #
FIND_NEXT_USER_ENTRY_TIME #
WRITE_WAL_TIME #
@@ -86,6 +108,12 @@ NEW_TABLE_BLOCK_ITER_NANOS #
NEW_TABLE_ITERATOR_NANOS #
BLOCK_SEEK_NANOS #
FIND_TABLE_NANOS #
+BLOOM_MEMTABLE_HIT_COUNT #
+BLOOM_MEMTABLE_MISS_COUNT #
+BLOOM_SST_HIT_COUNT #
+BLOOM_SST_MISS_COUNT #
+KEY_LOCK_WAIT_TIME #
+KEY_LOCK_WAIT_COUNT #
IO_THREAD_POOL_ID #
IO_BYTES_WRITTEN #
IO_BYTES_READ #
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result
index 0b5e512cdc9..6138dac92e5 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result
@@ -573,9 +573,6 @@ insert into t30 values
('row3', 'row3-key', 'row3-data'),
('row4', 'row4-key', 'row4-data'),
('row5', 'row5-key', 'row5-data');
-analyze table t30;
-Table Op Msg_type Msg_text
-test.t30 analyze status OK
explain
select * from t30 where key1 <='row3-key';
id select_type table type possible_keys key key_len ref rows Extra
@@ -868,6 +865,7 @@ ERROR 42S02: Unknown table 'test.t45'
show variables
where
variable_name like 'rocksdb%' and
+variable_name not like 'rocksdb_max_open_files' and
variable_name not like 'rocksdb_supported_compression_types';
Variable_name Value
rocksdb_access_hint_on_compaction_start 1
@@ -875,6 +873,7 @@ rocksdb_advise_random_on_open ON
rocksdb_allow_concurrent_memtable_write OFF
rocksdb_allow_mmap_reads OFF
rocksdb_allow_mmap_writes OFF
+rocksdb_allow_to_start_after_corruption OFF
rocksdb_blind_delete_primary_key OFF
rocksdb_block_cache_size 536870912
rocksdb_block_restart_interval 16
@@ -894,7 +893,6 @@ rocksdb_compaction_sequential_deletes 0
rocksdb_compaction_sequential_deletes_count_sd OFF
rocksdb_compaction_sequential_deletes_file_size 0
rocksdb_compaction_sequential_deletes_window 0
-rocksdb_concurrent_prepare ON
rocksdb_create_checkpoint
rocksdb_create_if_missing ON
rocksdb_create_missing_column_families OFF
@@ -918,7 +916,6 @@ rocksdb_enable_ttl_read_filtering ON
rocksdb_enable_write_thread_adaptive_yield OFF
rocksdb_error_if_exists OFF
rocksdb_flush_log_at_trx_commit 0
-rocksdb_flush_memtable_on_analyze ON
rocksdb_force_compute_memtable_stats ON
rocksdb_force_compute_memtable_stats_cachetime 0
rocksdb_force_flush_memtable_and_lzero_now OFF
@@ -926,6 +923,7 @@ rocksdb_force_flush_memtable_now OFF
rocksdb_force_index_records_in_range 0
rocksdb_git_hash #
rocksdb_hash_index_allow_collision ON
+rocksdb_ignore_unknown_options ON
rocksdb_index_type kBinarySearch
rocksdb_info_log_level error_level
rocksdb_io_write_timeout 0
@@ -942,8 +940,7 @@ rocksdb_max_background_jobs 2
rocksdb_max_latest_deadlocks 5
rocksdb_max_log_file_size 0
rocksdb_max_manifest_file_size 18446744073709551615
-rocksdb_max_open_files -1
-rocksdb_max_row_locks 1073741824
+rocksdb_max_row_locks 1048576
rocksdb_max_subcompactions 1
rocksdb_max_total_wal_size 0
rocksdb_merge_buf_size 67108864
@@ -978,6 +975,7 @@ rocksdb_table_cache_numshardbits 6
rocksdb_table_stats_sampling_pct 10
rocksdb_tmpdir
rocksdb_trace_sst_api OFF
+rocksdb_two_write_queues ON
rocksdb_unsafe_for_binlog OFF
rocksdb_update_cf_options
rocksdb_use_adaptive_mutex OFF
@@ -1464,6 +1462,7 @@ Rocksdb_rows_read #
Rocksdb_rows_updated #
Rocksdb_rows_deleted_blind #
Rocksdb_rows_expired #
+Rocksdb_rows_filtered #
Rocksdb_system_rows_deleted #
Rocksdb_system_rows_inserted #
Rocksdb_system_rows_read #
@@ -1474,11 +1473,22 @@ Rocksdb_queries_point #
Rocksdb_queries_range #
Rocksdb_covered_secondary_key_lookups #
Rocksdb_block_cache_add #
+Rocksdb_block_cache_add_failures #
+Rocksdb_block_cache_bytes_read #
+Rocksdb_block_cache_bytes_write #
+Rocksdb_block_cache_data_add #
+Rocksdb_block_cache_data_bytes_insert #
Rocksdb_block_cache_data_hit #
Rocksdb_block_cache_data_miss #
+Rocksdb_block_cache_filter_add #
+Rocksdb_block_cache_filter_bytes_evict #
+Rocksdb_block_cache_filter_bytes_insert #
Rocksdb_block_cache_filter_hit #
Rocksdb_block_cache_filter_miss #
Rocksdb_block_cache_hit #
+Rocksdb_block_cache_index_add #
+Rocksdb_block_cache_index_bytes_evict #
+Rocksdb_block_cache_index_bytes_insert #
Rocksdb_block_cache_index_hit #
Rocksdb_block_cache_index_miss #
Rocksdb_block_cache_miss #
@@ -1495,7 +1505,11 @@ Rocksdb_compaction_key_drop_new #
Rocksdb_compaction_key_drop_obsolete #
Rocksdb_compaction_key_drop_user #
Rocksdb_flush_write_bytes #
+Rocksdb_get_hit_l0 #
+Rocksdb_get_hit_l1 #
+Rocksdb_get_hit_l2_and_up #
Rocksdb_getupdatessince_calls #
+Rocksdb_iter_bytes_read #
Rocksdb_memtable_hit #
Rocksdb_memtable_miss #
Rocksdb_no_file_closes #
@@ -1503,6 +1517,12 @@ Rocksdb_no_file_errors #
Rocksdb_no_file_opens #
Rocksdb_num_iterators #
Rocksdb_number_block_not_compressed #
+Rocksdb_number_db_next #
+Rocksdb_number_db_next_found #
+Rocksdb_number_db_prev #
+Rocksdb_number_db_prev_found #
+Rocksdb_number_db_seek #
+Rocksdb_number_db_seek_found #
Rocksdb_number_deletes_filtered #
Rocksdb_number_keys_read #
Rocksdb_number_keys_updated #
@@ -1517,11 +1537,11 @@ Rocksdb_number_sst_entry_merge #
Rocksdb_number_sst_entry_other #
Rocksdb_number_sst_entry_put #
Rocksdb_number_sst_entry_singledelete #
-Rocksdb_number_stat_computes #
Rocksdb_number_superversion_acquires #
Rocksdb_number_superversion_cleanups #
Rocksdb_number_superversion_releases #
-Rocksdb_rate_limit_delay_millis #
+Rocksdb_row_lock_deadlocks #
+Rocksdb_row_lock_wait_timeouts #
Rocksdb_snapshot_conflict_errors #
Rocksdb_stall_l0_file_count_limit_slowdowns #
Rocksdb_stall_locked_l0_file_count_limit_slowdowns #
@@ -1549,6 +1569,7 @@ ROCKSDB_ROWS_READ
ROCKSDB_ROWS_UPDATED
ROCKSDB_ROWS_DELETED_BLIND
ROCKSDB_ROWS_EXPIRED
+ROCKSDB_ROWS_FILTERED
ROCKSDB_SYSTEM_ROWS_DELETED
ROCKSDB_SYSTEM_ROWS_INSERTED
ROCKSDB_SYSTEM_ROWS_READ
@@ -1559,11 +1580,22 @@ ROCKSDB_QUERIES_POINT
ROCKSDB_QUERIES_RANGE
ROCKSDB_COVERED_SECONDARY_KEY_LOOKUPS
ROCKSDB_BLOCK_CACHE_ADD
+ROCKSDB_BLOCK_CACHE_ADD_FAILURES
+ROCKSDB_BLOCK_CACHE_BYTES_READ
+ROCKSDB_BLOCK_CACHE_BYTES_WRITE
+ROCKSDB_BLOCK_CACHE_DATA_ADD
+ROCKSDB_BLOCK_CACHE_DATA_BYTES_INSERT
ROCKSDB_BLOCK_CACHE_DATA_HIT
ROCKSDB_BLOCK_CACHE_DATA_MISS
+ROCKSDB_BLOCK_CACHE_FILTER_ADD
+ROCKSDB_BLOCK_CACHE_FILTER_BYTES_EVICT
+ROCKSDB_BLOCK_CACHE_FILTER_BYTES_INSERT
ROCKSDB_BLOCK_CACHE_FILTER_HIT
ROCKSDB_BLOCK_CACHE_FILTER_MISS
ROCKSDB_BLOCK_CACHE_HIT
+ROCKSDB_BLOCK_CACHE_INDEX_ADD
+ROCKSDB_BLOCK_CACHE_INDEX_BYTES_EVICT
+ROCKSDB_BLOCK_CACHE_INDEX_BYTES_INSERT
ROCKSDB_BLOCK_CACHE_INDEX_HIT
ROCKSDB_BLOCK_CACHE_INDEX_MISS
ROCKSDB_BLOCK_CACHE_MISS
@@ -1580,7 +1612,11 @@ ROCKSDB_COMPACTION_KEY_DROP_NEW
ROCKSDB_COMPACTION_KEY_DROP_OBSOLETE
ROCKSDB_COMPACTION_KEY_DROP_USER
ROCKSDB_FLUSH_WRITE_BYTES
+ROCKSDB_GET_HIT_L0
+ROCKSDB_GET_HIT_L1
+ROCKSDB_GET_HIT_L2_AND_UP
ROCKSDB_GETUPDATESSINCE_CALLS
+ROCKSDB_ITER_BYTES_READ
ROCKSDB_MEMTABLE_HIT
ROCKSDB_MEMTABLE_MISS
ROCKSDB_NO_FILE_CLOSES
@@ -1588,6 +1624,12 @@ ROCKSDB_NO_FILE_ERRORS
ROCKSDB_NO_FILE_OPENS
ROCKSDB_NUM_ITERATORS
ROCKSDB_NUMBER_BLOCK_NOT_COMPRESSED
+ROCKSDB_NUMBER_DB_NEXT
+ROCKSDB_NUMBER_DB_NEXT_FOUND
+ROCKSDB_NUMBER_DB_PREV
+ROCKSDB_NUMBER_DB_PREV_FOUND
+ROCKSDB_NUMBER_DB_SEEK
+ROCKSDB_NUMBER_DB_SEEK_FOUND
ROCKSDB_NUMBER_DELETES_FILTERED
ROCKSDB_NUMBER_KEYS_READ
ROCKSDB_NUMBER_KEYS_UPDATED
@@ -1602,11 +1644,11 @@ ROCKSDB_NUMBER_SST_ENTRY_MERGE
ROCKSDB_NUMBER_SST_ENTRY_OTHER
ROCKSDB_NUMBER_SST_ENTRY_PUT
ROCKSDB_NUMBER_SST_ENTRY_SINGLEDELETE
-ROCKSDB_NUMBER_STAT_COMPUTES
ROCKSDB_NUMBER_SUPERVERSION_ACQUIRES
ROCKSDB_NUMBER_SUPERVERSION_CLEANUPS
ROCKSDB_NUMBER_SUPERVERSION_RELEASES
-ROCKSDB_RATE_LIMIT_DELAY_MILLIS
+ROCKSDB_ROW_LOCK_DEADLOCKS
+ROCKSDB_ROW_LOCK_WAIT_TIMEOUTS
ROCKSDB_SNAPSHOT_CONFLICT_ERRORS
ROCKSDB_STALL_L0_FILE_COUNT_LIMIT_SLOWDOWNS
ROCKSDB_STALL_LOCKED_L0_FILE_COUNT_LIMIT_SLOWDOWNS
@@ -1636,6 +1678,7 @@ ROCKSDB_ROWS_READ
ROCKSDB_ROWS_UPDATED
ROCKSDB_ROWS_DELETED_BLIND
ROCKSDB_ROWS_EXPIRED
+ROCKSDB_ROWS_FILTERED
ROCKSDB_SYSTEM_ROWS_DELETED
ROCKSDB_SYSTEM_ROWS_INSERTED
ROCKSDB_SYSTEM_ROWS_READ
@@ -1646,11 +1689,22 @@ ROCKSDB_QUERIES_POINT
ROCKSDB_QUERIES_RANGE
ROCKSDB_COVERED_SECONDARY_KEY_LOOKUPS
ROCKSDB_BLOCK_CACHE_ADD
+ROCKSDB_BLOCK_CACHE_ADD_FAILURES
+ROCKSDB_BLOCK_CACHE_BYTES_READ
+ROCKSDB_BLOCK_CACHE_BYTES_WRITE
+ROCKSDB_BLOCK_CACHE_DATA_ADD
+ROCKSDB_BLOCK_CACHE_DATA_BYTES_INSERT
ROCKSDB_BLOCK_CACHE_DATA_HIT
ROCKSDB_BLOCK_CACHE_DATA_MISS
+ROCKSDB_BLOCK_CACHE_FILTER_ADD
+ROCKSDB_BLOCK_CACHE_FILTER_BYTES_EVICT
+ROCKSDB_BLOCK_CACHE_FILTER_BYTES_INSERT
ROCKSDB_BLOCK_CACHE_FILTER_HIT
ROCKSDB_BLOCK_CACHE_FILTER_MISS
ROCKSDB_BLOCK_CACHE_HIT
+ROCKSDB_BLOCK_CACHE_INDEX_ADD
+ROCKSDB_BLOCK_CACHE_INDEX_BYTES_EVICT
+ROCKSDB_BLOCK_CACHE_INDEX_BYTES_INSERT
ROCKSDB_BLOCK_CACHE_INDEX_HIT
ROCKSDB_BLOCK_CACHE_INDEX_MISS
ROCKSDB_BLOCK_CACHE_MISS
@@ -1667,7 +1721,11 @@ ROCKSDB_COMPACTION_KEY_DROP_NEW
ROCKSDB_COMPACTION_KEY_DROP_OBSOLETE
ROCKSDB_COMPACTION_KEY_DROP_USER
ROCKSDB_FLUSH_WRITE_BYTES
+ROCKSDB_GET_HIT_L0
+ROCKSDB_GET_HIT_L1
+ROCKSDB_GET_HIT_L2_AND_UP
ROCKSDB_GETUPDATESSINCE_CALLS
+ROCKSDB_ITER_BYTES_READ
ROCKSDB_MEMTABLE_HIT
ROCKSDB_MEMTABLE_MISS
ROCKSDB_NO_FILE_CLOSES
@@ -1675,6 +1733,12 @@ ROCKSDB_NO_FILE_ERRORS
ROCKSDB_NO_FILE_OPENS
ROCKSDB_NUM_ITERATORS
ROCKSDB_NUMBER_BLOCK_NOT_COMPRESSED
+ROCKSDB_NUMBER_DB_NEXT
+ROCKSDB_NUMBER_DB_NEXT_FOUND
+ROCKSDB_NUMBER_DB_PREV
+ROCKSDB_NUMBER_DB_PREV_FOUND
+ROCKSDB_NUMBER_DB_SEEK
+ROCKSDB_NUMBER_DB_SEEK_FOUND
ROCKSDB_NUMBER_DELETES_FILTERED
ROCKSDB_NUMBER_KEYS_READ
ROCKSDB_NUMBER_KEYS_UPDATED
@@ -1689,11 +1753,11 @@ ROCKSDB_NUMBER_SST_ENTRY_MERGE
ROCKSDB_NUMBER_SST_ENTRY_OTHER
ROCKSDB_NUMBER_SST_ENTRY_PUT
ROCKSDB_NUMBER_SST_ENTRY_SINGLEDELETE
-ROCKSDB_NUMBER_STAT_COMPUTES
ROCKSDB_NUMBER_SUPERVERSION_ACQUIRES
ROCKSDB_NUMBER_SUPERVERSION_CLEANUPS
ROCKSDB_NUMBER_SUPERVERSION_RELEASES
-ROCKSDB_RATE_LIMIT_DELAY_MILLIS
+ROCKSDB_ROW_LOCK_DEADLOCKS
+ROCKSDB_ROW_LOCK_WAIT_TIMEOUTS
ROCKSDB_SNAPSHOT_CONFLICT_ERRORS
ROCKSDB_STALL_L0_FILE_COUNT_LIMIT_SLOWDOWNS
ROCKSDB_STALL_LOCKED_L0_FILE_COUNT_LIMIT_SLOWDOWNS
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_debug.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_debug.result
new file mode 100644
index 00000000000..a245fa851de
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_debug.result
@@ -0,0 +1,11 @@
+#
+# Issue #728: Assertion `covers_key(b)' failed in int
+# myrocks::Rdb_key_def::cmp_full_keys(const rocks db::Slice&,
+# const rocksdb::Slice&)
+#
+CREATE TABLE t2(c1 TINYINT SIGNED KEY,c2 TINYINT UNSIGNED,c3 INT);
+INSERT INTO t2(c1)VALUES(0);
+SELECT * FROM t2 WHERE c1<=127 ORDER BY c1 DESC;
+c1 c2 c3
+0 NULL NULL
+DROP TABLE t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_range2.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_range2.result
index d7a4f9dd065..10a6a02008e 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_range2.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_range2.result
@@ -7,5 +7,5 @@ count(*)
10000
explain select c1 from t1 where c1 > 5 limit 10;
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t1 range i i 9 NULL 9900 Using where; Using index
+1 SIMPLE t1 range i i 9 NULL # Using where; Using index
drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/show_engine.result b/storage/rocksdb/mysql-test/rocksdb/r/show_engine.result
index 1bcd3692b4a..9fc5db98d7d 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/show_engine.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/show_engine.result
@@ -83,12 +83,12 @@ FROM INFORMATION_SCHEMA.ROCKSDB_PERF_CONTEXT
WHERE TABLE_SCHEMA = 'test'
GROUP BY TABLE_NAME, PARTITION_NAME;
TABLE_SCHEMA TABLE_NAME PARTITION_NAME COUNT(STAT_TYPE)
-test t1 NULL 43
-test t2 NULL 43
-test t4 p0 43
-test t4 p1 43
-test t4 p2 43
-test t4 p3 43
+test t1 NULL 57
+test t2 NULL 57
+test t4 p0 57
+test t4 p1 57
+test t4 p2 57
+test t4 p3 57
SELECT * FROM INFORMATION_SCHEMA.ROCKSDB_CF_OPTIONS;
CF_NAME OPTION_TYPE VALUE
__system__ COMPARATOR #
@@ -153,9 +153,15 @@ __system__ TABLE_FACTORY::BLOCK_SIZE #
__system__ TABLE_FACTORY::BLOCK_SIZE_DEVIATION #
__system__ TABLE_FACTORY::BLOCK_RESTART_INTERVAL #
__system__ TABLE_FACTORY::INDEX_BLOCK_RESTART_INTERVAL #
+__system__ TABLE_FACTORY::METADATA_BLOCK_SIZE #
+__system__ TABLE_FACTORY::PARTITION_FILTERS #
+__system__ TABLE_FACTORY::USE_DELTA_ENCODING #
__system__ TABLE_FACTORY::FILTER_POLICY #
__system__ TABLE_FACTORY::WHOLE_KEY_FILTERING #
+__system__ TABLE_FACTORY::VERIFY_COMPRESSION #
+__system__ TABLE_FACTORY::READ_AMP_BYTES_PER_BIT #
__system__ TABLE_FACTORY::FORMAT_VERSION #
+__system__ TABLE_FACTORY::ENABLE_INDEX_COMPRESSION #
cf_t1 COMPARATOR #
cf_t1 MERGE_OPERATOR #
cf_t1 COMPACTION_FILTER #
@@ -218,9 +224,15 @@ cf_t1 TABLE_FACTORY::BLOCK_SIZE #
cf_t1 TABLE_FACTORY::BLOCK_SIZE_DEVIATION #
cf_t1 TABLE_FACTORY::BLOCK_RESTART_INTERVAL #
cf_t1 TABLE_FACTORY::INDEX_BLOCK_RESTART_INTERVAL #
+cf_t1 TABLE_FACTORY::METADATA_BLOCK_SIZE #
+cf_t1 TABLE_FACTORY::PARTITION_FILTERS #
+cf_t1 TABLE_FACTORY::USE_DELTA_ENCODING #
cf_t1 TABLE_FACTORY::FILTER_POLICY #
cf_t1 TABLE_FACTORY::WHOLE_KEY_FILTERING #
+cf_t1 TABLE_FACTORY::VERIFY_COMPRESSION #
+cf_t1 TABLE_FACTORY::READ_AMP_BYTES_PER_BIT #
cf_t1 TABLE_FACTORY::FORMAT_VERSION #
+cf_t1 TABLE_FACTORY::ENABLE_INDEX_COMPRESSION #
default COMPARATOR #
default MERGE_OPERATOR #
default COMPACTION_FILTER #
@@ -283,9 +295,15 @@ default TABLE_FACTORY::BLOCK_SIZE #
default TABLE_FACTORY::BLOCK_SIZE_DEVIATION #
default TABLE_FACTORY::BLOCK_RESTART_INTERVAL #
default TABLE_FACTORY::INDEX_BLOCK_RESTART_INTERVAL #
+default TABLE_FACTORY::METADATA_BLOCK_SIZE #
+default TABLE_FACTORY::PARTITION_FILTERS #
+default TABLE_FACTORY::USE_DELTA_ENCODING #
default TABLE_FACTORY::FILTER_POLICY #
default TABLE_FACTORY::WHOLE_KEY_FILTERING #
+default TABLE_FACTORY::VERIFY_COMPRESSION #
+default TABLE_FACTORY::READ_AMP_BYTES_PER_BIT #
default TABLE_FACTORY::FORMAT_VERSION #
+default TABLE_FACTORY::ENABLE_INDEX_COMPRESSION #
rev:cf_t2 COMPARATOR #
rev:cf_t2 MERGE_OPERATOR #
rev:cf_t2 COMPACTION_FILTER #
@@ -348,9 +366,15 @@ rev:cf_t2 TABLE_FACTORY::BLOCK_SIZE #
rev:cf_t2 TABLE_FACTORY::BLOCK_SIZE_DEVIATION #
rev:cf_t2 TABLE_FACTORY::BLOCK_RESTART_INTERVAL #
rev:cf_t2 TABLE_FACTORY::INDEX_BLOCK_RESTART_INTERVAL #
+rev:cf_t2 TABLE_FACTORY::METADATA_BLOCK_SIZE #
+rev:cf_t2 TABLE_FACTORY::PARTITION_FILTERS #
+rev:cf_t2 TABLE_FACTORY::USE_DELTA_ENCODING #
rev:cf_t2 TABLE_FACTORY::FILTER_POLICY #
rev:cf_t2 TABLE_FACTORY::WHOLE_KEY_FILTERING #
+rev:cf_t2 TABLE_FACTORY::VERIFY_COMPRESSION #
+rev:cf_t2 TABLE_FACTORY::READ_AMP_BYTES_PER_BIT #
rev:cf_t2 TABLE_FACTORY::FORMAT_VERSION #
+rev:cf_t2 TABLE_FACTORY::ENABLE_INDEX_COMPRESSION #
DROP TABLE t1;
DROP TABLE t2;
DROP TABLE t3;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/skip_validate_tmp_table.result b/storage/rocksdb/mysql-test/rocksdb/r/skip_validate_tmp_table.result
index 7642dcda43f..92906f22b1e 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/skip_validate_tmp_table.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/skip_validate_tmp_table.result
@@ -1,4 +1,20 @@
-CREATE TABLE t1 (pk int primary key) ENGINE=ROCKSDB;
+create table t1 (pk int primary key) engine=rocksdb;
+show tables;
+Tables_in_test
+#mysql50#t1#sql-test
+t1
+call mtr.add_suppression('Invalid .old.. table or database name .t1#sql-test.');
set session debug_dbug="+d,gen_sql_table_name";
rename table t1 to t2;
set session debug_dbug= "-d,gen_sql_table_name";
+show tables;
+Tables_in_test
+#mysql50#t1#sql-test
+t2
+show tables;
+Tables_in_test
+create table t2 (pk int primary key) engine=rocksdb;
+show tables;
+Tables_in_test
+t2
+drop table t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary_read_filtering.result b/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary_read_filtering.result
index 1df6e838bcd..c66b17926b7 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary_read_filtering.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary_read_filtering.result
@@ -51,18 +51,30 @@ INSERT INTO t1 values (3);
INSERT INTO t1 values (5);
INSERT INTO t1 values (7);
set global rocksdb_debug_ttl_rec_ts = 0;
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
SELECT * FROM t1;
a
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
+variable_value-@a
+4
set global rocksdb_enable_ttl_read_filtering=0;
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
SELECT * FROM t1;
a
1
3
5
7
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
+variable_value-@a
+0
set global rocksdb_enable_ttl_read_filtering=1;
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
SELECT * FROM t1;
a
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
+variable_value-@a
+4
DROP TABLE t1;
CREATE TABLE t1 (
a int,
@@ -191,20 +203,36 @@ a
connection con2;
set global rocksdb_force_flush_memtable_now=1;
set global rocksdb_compact_cf='default';
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
SELECT * FROM t1;
a
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
+variable_value-@a
+1
# Switching to connection 1
connection con1;
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
SELECT * FROM t1;
a
1
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
+variable_value-@a
+0
UPDATE t1 set a = a + 1;
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
SELECT * FROM t1;
a
2
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
+variable_value-@a
+0
COMMIT;
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
SELECT * FROM t1;
a
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
+variable_value-@a
+1
DROP TABLE t1;
disconnect con1;
disconnect con2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/type_varchar.result b/storage/rocksdb/mysql-test/rocksdb/r/type_varchar.result
index c9fa716dffc..a7e086fde66 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/type_varchar.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/type_varchar.result
@@ -1,4 +1,3 @@
-drop table if exists t1,t2;
#
# A basic test whether endspace-aware variable length encoding
# works when in PK
@@ -756,3 +755,16 @@ email_i 1
drop table t;
set global rocksdb_checksums_pct = @save_rocksdb_checksums_pct;
set session rocksdb_verify_row_debug_checksums = @save_rocksdb_verify_row_debug_checksums;
+drop table if exists t;
+Warnings:
+Note 1051 Unknown table 'test.t'
+create table t (h varchar(31) character set utf8 collate utf8_bin not null, i varchar(19) collate latin1_bin not null, primary key(i), key(h)) engine=rocksdb;
+insert into t(i,h) values('a','b');
+check table t;
+Table Op Msg_type Msg_text
+test.t check status OK
+alter table t modify h varchar(31) character set cp1257 collate cp1257_bin not null;
+check table t;
+Table Op Msg_type Msg_text
+test.t check status OK
+drop table t;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/use_direct_reads_writes.result b/storage/rocksdb/mysql-test/rocksdb/r/use_direct_reads_writes.result
index 3291826b290..e8456457cdd 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/use_direct_reads_writes.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/use_direct_reads_writes.result
@@ -4,23 +4,15 @@ call mtr.add_suppression("Aborting");
select plugin_name, plugin_type from information_schema.plugins where plugin_name='RocksDB';
plugin_name plugin_type
ROCKSDB STORAGE ENGINE
-# Check that ROCKSDB plugin is not loaded:
-select plugin_name, plugin_type from information_schema.plugins where plugin_name='RocksDB';
-plugin_name plugin_type
-# Check that MyRocks has printed an error message into server error log:
-FOUND 1 /enable both use_direct_reads/ in mysqld.1.err
-# Now, restart the server back with regular settings
-select plugin_name, plugin_type from information_schema.plugins where plugin_name='RocksDB';
-plugin_name plugin_type
-ROCKSDB STORAGE ENGINE
-#
-# Now, repeat the same with another set of invalid arguments
-#
-# Check that ROCKSDB plugin is not loaded:
-select plugin_name, plugin_type from information_schema.plugins where plugin_name='RocksDB';
-plugin_name plugin_type
-FOUND 1 /enable both use_direct_io_for_flush_and_compaction/ in mysqld.1.err
-# Now, restart the server back with regular settings
-select plugin_name, plugin_type from information_schema.plugins where plugin_name='RocksDB';
-plugin_name plugin_type
-ROCKSDB STORAGE ENGINE
+Checking direct reads
+FOUND 1 /enable both use_direct_reads/ in use_direct_reads_writes.err
+Checking direct writes
+FOUND 1 /enable both use_direct_io_for_flush_and_compaction/ in use_direct_reads_writes.err
+Checking rocksdb_flush_log_at_trx_commit
+FOUND 1 /rocksdb_flush_log_at_trx_commit needs to be/ in use_direct_reads_writes.err
+Validate flush_log settings when direct writes is enabled
+set global rocksdb_flush_log_at_trx_commit=0;
+set global rocksdb_flush_log_at_trx_commit=1;
+ERROR 42000: Variable 'rocksdb_flush_log_at_trx_commit' can't be set to the value of '1'
+set global rocksdb_flush_log_at_trx_commit=2;
+ERROR 42000: Variable 'rocksdb_flush_log_at_trx_commit' can't be set to the value of '2'
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/write_sync.result b/storage/rocksdb/mysql-test/rocksdb/r/write_sync.result
index ee23446eec0..d0a9b034927 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/write_sync.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/write_sync.result
@@ -3,6 +3,7 @@ SET GLOBAL rocksdb_write_ignore_missing_column_families=true;
create table aaa (id int primary key, i int) engine rocksdb;
set @save_rocksdb_flush_log_at_trx_commit=@@global.rocksdb_flush_log_at_trx_commit;
SET GLOBAL rocksdb_flush_log_at_trx_commit=1;
+insert aaa(id, i) values(0,1);
select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced';
insert aaa(id, i) values(1,1);
select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
@@ -16,11 +17,11 @@ insert aaa(id, i) values(3,1);
select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
variable_value-@a
3
-SET GLOBAL rocksdb_flush_log_at_trx_commit=0;
select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced';
+SET GLOBAL rocksdb_flush_log_at_trx_commit=0;
insert aaa(id, i) values(4,1);
-SET GLOBAL rocksdb_flush_log_at_trx_commit=2;
select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced';
+SET GLOBAL rocksdb_flush_log_at_trx_commit=2;
insert aaa(id, i) values(5,1);
truncate table aaa;
drop table aaa;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace_sstfilewriter.test b/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace_sstfilewriter.test
index 78ddbe60da5..3977b38d725 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace_sstfilewriter.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace_sstfilewriter.test
@@ -65,7 +65,12 @@ ALTER TABLE t1 ADD INDEX kb(b), ALGORITHM=INPLACE;
# disable duplicate index warning
--disable_warnings
# now do same index using copy algorithm
+# hitting max row locks (1M)
+--error ER_RDB_STATUS_GENERAL
ALTER TABLE t1 ADD INDEX kb_copy(b), ALGORITHM=COPY;
+set session rocksdb_bulk_load=1;
+ALTER TABLE t1 ADD INDEX kb_copy(b), ALGORITHM=COPY;
+set session rocksdb_bulk_load=0;
--enable_warnings
# checksum testing
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/allow_no_primary_key_with_sk.test b/storage/rocksdb/mysql-test/rocksdb/t/allow_no_primary_key_with_sk.test
index 1f3ef49e534..18ccf2e39f6 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/allow_no_primary_key_with_sk.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/allow_no_primary_key_with_sk.test
@@ -135,3 +135,15 @@ set global rocksdb_force_flush_memtable_now = true;
select * from t1;
DROP TABLE t1;
+
+## https://github.com/facebook/mysql-5.6/issues/736
+create table t1 (i int auto_increment, key(i)) engine=rocksdb;
+insert into t1 values();
+insert into t1 values();
+insert into t1 values();
+
+show create table t1;
+--source include/restart_mysqld.inc
+show create table t1;
+
+drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/allow_to_start_after_corruption.test b/storage/rocksdb/mysql-test/rocksdb/t/allow_to_start_after_corruption.test
new file mode 100644
index 00000000000..67b2d5f96d7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/allow_to_start_after_corruption.test
@@ -0,0 +1,75 @@
+--source include/have_rocksdb.inc
+--source include/not_valgrind.inc
+
+--echo #
+--echo # Test how MyRocks behaves when RocksDB reports corrupted data.
+--echo #
+
+--source include/have_debug.inc
+
+# use custom error log to assert on error message in search_pattern_in_file.inc
+--let LOG=$MYSQLTEST_VARDIR/tmp/allow_to_start_after_corruption_debug.err
+--let SEARCH_FILE=$LOG
+
+# restart server to change error log and ignore corruptopn on startup
+--let $_mysqld_option=--log-error=$LOG --rocksdb_allow_to_start_after_corruption=1
+--source include/restart_mysqld_with_option.inc
+
+--echo #
+--echo # Test server crashes on corrupted data and restarts
+--echo #
+create table t1 (
+ pk int not null primary key,
+ col1 varchar(10)
+) engine=rocksdb;
+
+insert into t1 values (1,1),(2,2),(3,3);
+
+select * from t1 where pk=1;
+set session debug_dbug= "+d,rocksdb_return_status_corrupted";
+--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--error 2013
+select * from t1 where pk=1;
+--source include/wait_until_disconnected.inc
+--let SEARCH_PATTERN=data corruption detected
+--source include/search_pattern_in_file.inc
+--remove_file $LOG
+
+--echo #
+--echo # The same for scan queries
+--echo #
+
+--source include/start_mysqld_with_option.inc
+select * from t1;
+set session debug_dbug= "+d,rocksdb_return_status_corrupted";
+--exec echo "wait" > $_expect_file_name
+--error 2013
+select * from t1;
+--source include/wait_until_disconnected.inc
+--let SEARCH_PATTERN=data corruption detected
+--source include/search_pattern_in_file.inc
+--remove_file $LOG
+
+--echo #
+--echo # Test restart failure. The server is shutdown at this point.
+--echo #
+
+# remove flag to ignore corruption
+--let $_mysqld_option=--log-error=$LOG
+--error 0
+--exec $MYSQLD_CMD --plugin_load=$HA_ROCKSDB_SO $_mysqld_option
+--let SEARCH_PATTERN=The server will exit normally and stop restart attempts
+--source include/search_pattern_in_file.inc
+--remove_file $LOG
+
+--echo #
+--echo # Remove corruption file and restart cleanly
+--echo #
+
+--exec rm $MYSQLTEST_VARDIR/mysqld.$_server_id/data/#rocksdb/ROCKSDB_CORRUPTED
+--source include/start_mysqld_with_option.inc
+
+drop table t1;
+
+# Restart mysqld with default options
+--source include/restart_mysqld.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.cnf b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.cnf
new file mode 100644
index 00000000000..a43c4617b96
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.cnf
@@ -0,0 +1,8 @@
+!include suite/rpl/my.cnf
+
+[mysqld.1]
+binlog_format=row
+[mysqld.2]
+binlog_format=row
+slave_parallel_workers=1
+#rpl_skip_tx_api=ON
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.test b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.test
new file mode 100644
index 00000000000..e61ba720aaf
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.test
@@ -0,0 +1,9 @@
+--source include/have_rocksdb.inc
+--source include/have_binlog_format_row.inc
+--source include/master-slave.inc
+
+create table t (i int primary key auto_increment) engine=rocksdb;
+
+--source include/autoinc_crash_safe.inc
+
+--source include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.cnf b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.cnf
new file mode 100644
index 00000000000..0c0b614039e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.cnf
@@ -0,0 +1,8 @@
+!include suite/rpl/my.cnf
+
+[mysqld.1]
+binlog_format=row
+[mysqld.2]
+binlog_format=row
+#slave_parallel_workers=1
+#rpl_skip_tx_api=ON
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.test b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.test
new file mode 100644
index 00000000000..56cf93db9d9
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.test
@@ -0,0 +1,10 @@
+--source include/have_rocksdb.inc
+--source include/have_binlog_format_row.inc
+--source include/master-slave.inc
+--source include/have_partition.inc
+
+create table t (i int primary key auto_increment) engine=rocksdb partition by key (i) partitions 3;
+
+--source include/autoinc_crash_safe.inc
+
+--source include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug-master.opt
new file mode 100644
index 00000000000..83ed8522e72
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug-master.opt
@@ -0,0 +1 @@
+--binlog-format=row
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug.test b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug.test
new file mode 100644
index 00000000000..abcae8d98a5
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug.test
@@ -0,0 +1,118 @@
+--source include/have_rocksdb.inc
+--source include/have_debug.inc
+--source include/have_log_bin.inc
+
+--echo #
+--echo # Testing upgrading from server without merges for auto_increment
+--echo # to new server with such support.
+--echo #
+
+set debug_dbug='+d,myrocks_autoinc_upgrade';
+create table t (i int primary key auto_increment);
+insert into t values ();
+insert into t values ();
+insert into t values ();
+select * from t;
+
+delete from t where i > 1;
+select * from t;
+
+select table_name, index_name, auto_increment
+ from information_schema.rocksdb_ddl where table_name = 't';
+
+set debug_dbug='-d,myrocks_autoinc_upgrade';
+
+--source include/restart_mysqld.inc
+
+insert into t values ();
+insert into t values ();
+insert into t values ();
+select * from t;
+
+select table_name, index_name, auto_increment
+ from information_schema.rocksdb_ddl where table_name = 't';
+
+delete from t where i > 1;
+
+--source include/restart_mysqld.inc
+
+insert into t values ();
+insert into t values ();
+insert into t values ();
+select * from t;
+
+drop table t;
+
+--echo #
+--echo # Testing crash safety of transactions.
+--echo #
+create table t (i int primary key auto_increment);
+insert into t values ();
+insert into t values ();
+insert into t values ();
+
+--echo # Before anything
+begin;
+insert into t values ();
+insert into t values ();
+set debug_dbug="+d,crash_commit_before";
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--error 2013
+commit;
+--source include/wait_until_disconnected.inc
+--enable_reconnect
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--source include/wait_until_connected_again.inc
+--disable_reconnect
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+select max(i) from t;
+
+--echo # After engine prepare
+begin;
+insert into t values ();
+insert into t values ();
+set debug_dbug="+d,crash_commit_after_prepare";
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--error 2013
+commit;
+--source include/wait_until_disconnected.inc
+--enable_reconnect
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--source include/wait_until_connected_again.inc
+--disable_reconnect
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+select max(i) from t;
+
+--echo # After binlog
+begin;
+insert into t values ();
+insert into t values ();
+set debug_dbug="+d,crash_commit_after_log";
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--error 2013
+commit;
+--source include/wait_until_disconnected.inc
+--enable_reconnect
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--source include/wait_until_connected_again.inc
+--disable_reconnect
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+select max(i) from t;
+
+--echo # After everything
+begin;
+insert into t values ();
+insert into t values ();
+set debug_dbug="+d,crash_commit_after";
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--error 2013
+commit;
+--source include/wait_until_disconnected.inc
+--enable_reconnect
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--source include/wait_until_connected_again.inc
+--disable_reconnect
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+select max(i) from t;
+
+drop table t;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars.test b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars.test
index 2fe0a2e3c08..b8968590155 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars.test
@@ -64,4 +64,42 @@ SELECT LAST_INSERT_ID();
SELECT a FROM t1 ORDER BY a;
DROP TABLE t1;
+--echo #---------------------------
+--echo # test large autoincrement values
+--echo #---------------------------
+SET auto_increment_increment = 1;
+SET auto_increment_offset = 1;
+CREATE TABLE t1 (a BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb;
+INSERT INTO t1 VALUES (18446744073709551613, 'a');
+SHOW CREATE TABLE t1;
+INSERT INTO t1 VALUES (NULL, 'b');
+SHOW CREATE TABLE t1;
+--error ER_AUTOINC_READ_FAILED
+INSERT INTO t1 VALUES (NULL, 'c');
+SELECT * FROM t1;
+DROP TABLE t1;
+
+SET auto_increment_increment = 300;
+CREATE TABLE t1 (a BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb;
+INSERT INTO t1 VALUES (18446744073709551613, 'a');
+SHOW CREATE TABLE t1;
+--error ER_AUTOINC_READ_FAILED
+INSERT INTO t1 VALUES (NULL, 'b');
+SHOW CREATE TABLE t1;
+--error ER_AUTOINC_READ_FAILED
+INSERT INTO t1 VALUES (NULL, 'c');
+SELECT * FROM t1;
+DROP TABLE t1;
+
+SET auto_increment_offset = 200;
+CREATE TABLE t1 (a BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb;
+INSERT INTO t1 VALUES (18446744073709551613, 'a');
+SHOW CREATE TABLE t1;
+--error ER_AUTOINC_READ_FAILED
+INSERT INTO t1 VALUES (NULL, 'b');
+SHOW CREATE TABLE t1;
+--error ER_AUTOINC_READ_FAILED
+INSERT INTO t1 VALUES (NULL, 'c');
+SELECT * FROM t1;
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoincrement.test b/storage/rocksdb/mysql-test/rocksdb/t/autoincrement.test
deleted file mode 100644
index 375571f705d..00000000000
--- a/storage/rocksdb/mysql-test/rocksdb/t/autoincrement.test
+++ /dev/null
@@ -1,3 +0,0 @@
---source include/have_rocksdb.inc
-
---echo # The test checks AUTO_INCREMENT capabilities that are not supported by RocksDB-SE.
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5-master.opt
new file mode 100644
index 00000000000..7d63dc74bb8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5-master.opt
@@ -0,0 +1 @@
+--rocksdb_override_cf_options=rev:bf5_1={prefix_extractor=capped:4;block_based_table_factory={filter_policy=bloomfilter:10:false;whole_key_filtering=0;}};
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5.test b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5.test
new file mode 100644
index 00000000000..00968aebb62
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5.test
@@ -0,0 +1,61 @@
+
+--echo #
+--echo # Issue #809: Wrong query result with bloom filters
+--echo #
+
+create table t1 (
+ id1 bigint not null,
+ id2 bigint not null,
+ id3 varchar(100) not null,
+ id4 int not null,
+ id5 int not null,
+ value bigint,
+ value2 varchar(100),
+ primary key (id1, id2, id3, id4) COMMENT 'rev:bf5_1'
+) engine=ROCKSDB;
+
+
+create table t2(a int);
+insert into t2 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+
+create table t3(seq int);
+insert into t3
+select
+ 1+ A.a + B.a* 10 + C.a * 100 + D.a * 1000
+from t2 A, t2 B, t2 C, t2 D;
+
+insert t1
+select
+ (seq+9) div 10, (seq+4) div 5, (seq+4) div 5, seq, seq, 1000, "aaabbbccc"
+from t3;
+
+set global rocksdb_force_flush_memtable_now=1;
+
+--echo # Full table scan
+explain
+select * from t1 limit 10;
+select * from t1 limit 10;
+
+--echo # An index scan starting from the end of the table:
+explain
+select * from t1 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1;
+select * from t1 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1;
+
+# A testcase for an assertion that the fix is removing
+# The only requirement for the used column family is that it is reverse-ordered
+create table t4 (
+ pk int unsigned not null primary key,
+ kp1 int unsigned not null,
+ kp2 int unsigned not null,
+ col1 int unsigned,
+ key(kp1, kp2) comment 'rev:bf5_2'
+) engine=rocksdb;
+
+insert into t4 values (1, 0xFFFF, 0xFFF, 12345);
+
+--echo # This must not fail an assert:
+select * from t4 force index(kp1) where kp1=0xFFFFFFFF and kp2<=0xFFFFFFFF order by kp2 desc;
+
+drop table t1,t2,t3,t4;
+
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load.test
index 6c6c75dd37e..0db5e6d9cc4 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load.test
@@ -7,4 +7,4 @@
--let pk_cf=cf1
--let data_order_desc=0
---source bulk_load.inc
+--source ../include/bulk_load.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_drop_table.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_drop_table.test
new file mode 100644
index 00000000000..18e40fbf4ab
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_drop_table.test
@@ -0,0 +1,19 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE t1 (pk INT, PRIMARY KEY (pk)) ENGINE=ROCKSDB;
+
+SET rocksdb_bulk_load_allow_unsorted=1;
+SET rocksdb_bulk_load=1;
+
+INSERT INTO t1 VALUES (1);
+
+--connect (con1,localhost,root,,)
+DROP TABLE t1;
+
+--connection default
+--disconnect con1
+
+# This would have crashed the server prior to the fix
+SET rocksdb_bulk_load=0;
+--error ER_NO_SUCH_TABLE
+SELECT * FROM t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test
index 4a4c42d1fcd..1e349d0ff18 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test
@@ -1,7 +1,13 @@
--source include/have_rocksdb.inc
+--source include/count_sessions.inc
+
+--let LOG1=$MYSQLTEST_VARDIR/tmp/rocksdb.bulk_load_errors.1.err
+--let $_mysqld_option=--log-error=$LOG1
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--source include/restart_mysqld_with_option.inc
### Bulk load ###
-CREATE TABLE t1(pk INT, PRIMARY KEY(pk));
+CREATE TABLE t1(pk INT, PRIMARY KEY(pk)) ENGINE=ROCKSDB;
# Make sure we get an error with out of order keys during bulk load
SET rocksdb_bulk_load=1;
@@ -21,19 +27,49 @@ INSERT INTO t1 VALUES(2);
INSERT INTO t1 VALUES(20);
INSERT INTO t1 VALUES(21);
---echo #
---echo # In MyRocks, the following statement will intentionally crash the server.
---echo # In MariaDB, it will cause an error
--error ER_OVERLAPPING_KEYS
SET rocksdb_bulk_load=0;
---echo #
---echo # Despite the error, bulk load operation is over so the variable value
---echo # will be 0:
-select @@rocksdb_bulk_load;
-
+SHOW VARIABLES LIKE 'rocksdb_bulk_load';
call mtr.add_suppression('finalizing last SST file while setting bulk loading variable');
+SELECT * FROM t1;
+
+--let SEARCH_FILE=$LOG1
+--let SEARCH_PATTERN=RocksDB: Error [0-9]+ finalizing last SST file while setting bulk loading variable
+--source include/search_pattern_in_file.inc
+
+--let LOG2=$MYSQLTEST_VARDIR/tmp/rocksdb.bulk_load_errors.2.err
+--let $_mysqld_option=--log-error=$LOG2
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--source include/restart_mysqld_with_option.inc
+--remove_file $LOG1
+
+
+# Make sure we get an error in log when we disconnect and do not assert the server
+--connect (con1,localhost,root,,)
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES(1);
+INSERT INTO t1 VALUES(2);
+INSERT INTO t1 VALUES(20);
+INSERT INTO t1 VALUES(21);
+--connection default
+--disconnect con1
+
+SELECT * FROM t1;
+
+--source include/wait_until_count_sessions.inc
+
+--let SEARCH_FILE=$LOG2
+--let SEARCH_PATTERN=RocksDB: Error [0-9]+ finalizing last SST file while disconnecting
+--source include/search_pattern_in_file.inc
+
+--let LOG3=$MYSQLTEST_VARDIR/tmp/rocksdb.bulk_load_errors.3.err
+--let $_mysqld_option=--log-error=$LOG3
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--source include/restart_mysqld_with_option.inc
+--remove_file $LOG2
+
TRUNCATE TABLE t1;
### Bulk load with unsorted PKs ###
@@ -60,3 +96,46 @@ SELECT * FROM t1;
SET rocksdb_bulk_load_allow_unsorted=DEFAULT;
DROP TABLE t1;
+
+# This would trigger a debug assertion that is just an error in release builds
+CREATE TABLE t1(c1 INT KEY) ENGINE=ROCKSDB;
+SET rocksdb_bulk_load=1;
+--error ER_KEYS_OUT_OF_ORDER
+INSERT INTO t1 VALUES (),(),();
+SET rocksdb_bulk_load=0;
+DROP TABLE t1;
+
+# Crash when table open cache closes handler with bulk load operation not finalized
+SET @orig_table_open_cache=@@global.table_open_cache;
+CREATE TABLE t1(a INT AUTO_INCREMENT, b INT, PRIMARY KEY (a)) ENGINE=ROCKSDB DEFAULT CHARSET=latin1;
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES(13, 0);
+INSERT INTO t1 VALUES(2, 'test 2');
+INSERT INTO t1 VALUES(@id, @arg04);
+SET @@global.table_open_cache=FALSE;
+INSERT INTO t1 VALUES(51479+0.333333333,1);
+DROP TABLE t1;
+SET @@global.table_open_cache=@orig_table_open_cache;
+
+--let SEARCH_FILE=$LOG3
+--let SEARCH_PATTERN=RocksDB: Error [0-9]+ finalizing bulk load while closing handler
+--source include/search_pattern_in_file.inc
+
+--source include/restart_mysqld.inc
+
+--remove_file $LOG3
+
+# Switch between tables, but also introduce duplicate key errors
+CREATE TABLE t1 (pk INT, PRIMARY KEY (pk)) ENGINE=ROCKSDB;
+CREATE TABLE t2 (pk INT, PRIMARY KEY (pk)) ENGINE=ROCKSDB;
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES (1), (2);
+INSERT INTO t2 VALUES (1), (2);
+INSERT INTO t1 VALUES (1);
+--error ER_OVERLAPPING_KEYS
+INSERT INTO t2 VALUES (3);
+SET rocksdb_bulk_load=0;
+DROP TABLE t1;
+DROP TABLE t2;
+
+--source include/wait_until_count_sessions.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf.test
index 7c4d7aef0e5..67d68ac7a2d 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf.test
@@ -6,4 +6,4 @@
--let pk_cf=rev:cf1
--let data_order_desc=0
---source bulk_load.inc
+--source ../include/bulk_load.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf_and_data.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf_and_data.test
index a31e86753f3..7110fe5f1d7 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf_and_data.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf_and_data.test
@@ -6,4 +6,4 @@
--let pk_cf=rev:cf1
--let data_order_desc=1
---source bulk_load.inc
+--source ../include/bulk_load.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_data.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_data.test
index f36990ed567..6c6e51a2a51 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_data.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_data.test
@@ -6,4 +6,4 @@
--let pk_cf=cf1
--let data_order_desc=1
---source bulk_load.inc
+--source ../include/bulk_load.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted.test
index 78bb9312ca5..2abeae343c9 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted.test
@@ -3,136 +3,4 @@
--let pk_cf=cf1
---disable_warnings
-DROP TABLE IF EXISTS t1;
---enable_warnings
-
-SET rocksdb_bulk_load_size=3;
-SET rocksdb_bulk_load_allow_unsorted=1;
-
-### Test individual INSERTs ###
-
-# A table with only a PK won't have rows until the bulk load is finished
-eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf");
-SET rocksdb_bulk_load=1;
---disable_query_log
-let $sign = 1;
-let $max = 5;
-let $i = 1;
-while ($i <= $max) {
- let $a = 1 + $sign * $i;
- let $b = 1 - $sign * $i;
- let $sign = -$sign;
- let $insert = INSERT INTO t1 VALUES ($a, $b);
- eval $insert;
- inc $i;
-}
---enable_query_log
-SELECT * FROM t1;
-SET rocksdb_bulk_load=0;
-SELECT * FROM t1;
-DROP TABLE t1;
-
-# A table with a PK and a SK shows rows immediately
-eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf", KEY(b));
-SET rocksdb_bulk_load=1;
---disable_query_log
-let $sign = 1;
-let $max = 5;
-let $i = 1;
-while ($i <= $max) {
- let $a = 1 + $sign * $i;
- let $b = 1 - $sign * $i;
- let $sign = -$sign;
- let $insert = INSERT INTO t1 VALUES ($a, $b);
- eval $insert;
- inc $i;
-}
---enable_query_log
-
-SELECT * FROM t1;
-SET rocksdb_bulk_load=0;
-DROP TABLE t1;
-
-# Inserting into another table finishes bulk load to the previous table
-eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf");
-eval CREATE TABLE t2(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf");
-
-SET rocksdb_bulk_load=1;
-INSERT INTO t1 VALUES (1,1);
-INSERT INTO t2 VALUES (1,1);
-SELECT * FROM t1;
-INSERT INTO t1 VALUES (2,2);
-SELECT * FROM t2;
-SELECT * FROM t1;
-SET rocksdb_bulk_load=0;
-SELECT * FROM t1;
-DROP TABLE t1, t2;
-
-### Test bulk load from a file ###
-eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf");
-eval CREATE TABLE t2(a INT, b INT, PRIMARY KEY(b) COMMENT "$pk_cf");
-eval CREATE TABLE t3(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf")
- PARTITION BY KEY() PARTITIONS 4;
-
---let $file = `SELECT CONCAT(@@datadir, "test_loadfile.txt")`
-# Create a text file with data to import into the table.
-# PK and SK are not in any order
---let ROCKSDB_INFILE = $file
-perl;
-my $fn = $ENV{'ROCKSDB_INFILE'};
-open(my $fh, '>', $fn) || die "perl open($fn): $!";
-binmode $fh;
-my $max = 5000000;
-my $sign = 1;
-for (my $ii = 0; $ii < $max; $ii++)
-{
- my $a = 1 + $sign * $ii;
- my $b = 1 - $sign * $ii;
- print $fh "$a\t$b\n";
-}
-close($fh);
-EOF
---file_exists $file
-
-# Make sure a snapshot held by another user doesn't block the bulk load
-connect (other,localhost,root,,);
-set session transaction isolation level repeatable read;
-select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
-start transaction with consistent snapshot;
-select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
-
-connection default;
-set rocksdb_bulk_load=1;
-set rocksdb_bulk_load_size=100000;
---disable_query_log
---echo LOAD DATA INFILE <input_file> INTO TABLE t1;
-eval LOAD DATA INFILE '$file' INTO TABLE t1;
---echo LOAD DATA INFILE <input_file> INTO TABLE t2;
-eval LOAD DATA INFILE '$file' INTO TABLE t2;
---echo LOAD DATA INFILE <input_file> INTO TABLE t3;
-eval LOAD DATA INFILE '$file' INTO TABLE t3;
---enable_query_log
-set rocksdb_bulk_load=0;
-
---remove_file $file
-
-# Make sure row count index stats are correct
---replace_column 6 # 7 # 8 # 9 #
-SHOW TABLE STATUS WHERE name LIKE 't%';
-
-ANALYZE TABLE t1, t2, t3;
-
---replace_column 6 # 7 # 8 # 9 #
-SHOW TABLE STATUS WHERE name LIKE 't%';
-
-# Make sure all the data is there.
-select count(a) from t1;
-select count(b) from t1;
-select count(a) from t2;
-select count(b) from t2;
-select count(a) from t3;
-select count(b) from t3;
-
-DROP TABLE t1, t2, t3;
-SET rocksdb_bulk_load_allow_unsorted=0;
+--source ../include/bulk_load_unsorted.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted_rev.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted_rev.test
new file mode 100644
index 00000000000..de9a5c26424
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted_rev.test
@@ -0,0 +1,5 @@
+--source include/have_rocksdb.inc
+
+--let pk_cf=rev:cf1
+
+--source ../include/bulk_load_unsorted.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/cardinality.test b/storage/rocksdb/mysql-test/rocksdb/t/cardinality.test
index 689753faf8d..14a82d7e462 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/cardinality.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/cardinality.test
@@ -2,6 +2,48 @@
--source include/restart_mysqld.inc
+# Test memtable cardinality statistics
+CREATE TABLE t0 (id int PRIMARY KEY, a int, INDEX ix_a (a)) engine=rocksdb;
+
+# populate the table with 10 reconds where cardinality of id is N and a is N/2.
+insert into t0 values (0, 0),(1, 1),(2, 2),(3, 3),(4, 4),
+(5, 4),(6, 4),(7, 4),(8, 4),(9, 4);
+
+# Assert no cardinality data exists before ANALYZE TABLE is done
+SELECT cardinality FROM information_schema.statistics where table_name="t0" and
+column_name="id";
+SELECT cardinality FROM information_schema.statistics where table_name="t0" and
+column_name="a";
+
+--disable_result_log
+ANALYZE TABLE t0;
+--enable_result_log
+
+SELECT table_rows into @N FROM information_schema.tables
+WHERE table_name = "t0";
+SELECT FLOOR(@N/cardinality) FROM
+information_schema.statistics where table_name="t0" and column_name="id";
+SELECT FLOOR(@N/cardinality) FROM
+information_schema.statistics where table_name="t0" and column_name="a";
+
+# Flush the table and re-run the test as statistics is calculated a bit
+# differently for memtable and SST files
+SET GLOBAL rocksdb_force_flush_memtable_now = 1;
+--disable_result_log
+ANALYZE TABLE t0;
+--enable_result_log
+
+SELECT table_rows into @N FROM information_schema.tables
+WHERE table_name = "t0";
+SELECT FLOOR(@N/cardinality) FROM
+information_schema.statistics where table_name="t0" and column_name="id";
+SELECT FLOOR(@N/cardinality) FROM
+information_schema.statistics where table_name="t0" and column_name="a";
+
+drop table t0;
+
+# Test big table on SST
+
--disable_warnings
DROP TABLE IF EXISTS t1,t10,t11;
--enable_warnings
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/check_ignore_unknown_options.test b/storage/rocksdb/mysql-test/rocksdb/t/check_ignore_unknown_options.test
new file mode 100644
index 00000000000..b4866de4d3b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/check_ignore_unknown_options.test
@@ -0,0 +1,22 @@
+--disable_warnings
+let $MYSQLD_DATADIR= `select @@datadir`;
+let $restart_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
+let $error_log= $MYSQLTEST_VARDIR/log/my_restart.err;
+select variable_name, variable_value from information_schema.global_variables where variable_name="rocksdb_ignore_unknown_options";
+
+--exec find $MYSQLD_DATADIR/#rocksdb/OPTIONS* | sort -n | tail -1 | xargs -0 -I {} -t sh -c "echo hello=world>>{}"
+
+--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--shutdown_server 10
+
+--error 1
+--exec $MYSQLD_CMD --plugin_load=$HA_ROCKSDB_SO --rocksdb_ignore_unknown_options=0 --loose-console --log-error=$error_log
+
+let SEARCH_FILE= $error_log;
+let SEARCH_PATTERN= RocksDB: Compatibility check against existing database options failed;
+--source include/search_pattern_in_file.inc
+--enable_reconnect
+--exec echo "restart" > $restart_file
+--source include/wait_until_connected_again.inc
+--exec find $MYSQLD_DATADIR/#rocksdb/OPTIONS* | sort -n | tail -1 | xargs -0 -I {} -t sh -c "sed -i '/hello=world/d' {}"
+select variable_name, variable_value from information_schema.global_variables where variable_name="rocksdb_ignore_unknown_options";
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/deadlock_tracking.test b/storage/rocksdb/mysql-test/rocksdb/t/deadlock_tracking.test
index d2abcb3b63b..9677d2dbbaa 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/deadlock_tracking.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/deadlock_tracking.test
@@ -21,29 +21,29 @@ let $con3= `SELECT CONNECTION_ID()`;
connection default;
eval create table t (i int primary key) engine=$engine;
insert into t values (1), (2), (3);
---replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/
show engine rocksdb transaction status;
echo Deadlock #1;
--source include/simple_deadlock.inc
connection default;
---replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/
show engine rocksdb transaction status;
echo Deadlock #2;
--source include/simple_deadlock.inc
connection default;
---replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/
show engine rocksdb transaction status;
set global rocksdb_max_latest_deadlocks = 10;
echo Deadlock #3;
--source include/simple_deadlock.inc
connection default;
---replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/
show engine rocksdb transaction status;
set global rocksdb_max_latest_deadlocks = 1;
---replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/
show engine rocksdb transaction status;
connection con3;
@@ -77,8 +77,10 @@ let $wait_condition = select count(*) = 1 from information_schema.rocksdb_trx
where thread_id = $con2 and waiting_key != "";
--source include/wait_condition.inc
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_row_lock_deadlocks';
--error ER_LOCK_DEADLOCK
select * from t where i=1 for update;
+select case when variable_value-@a = 1 then 'true' else 'false' end as deadlocks from information_schema.global_status where variable_name='rocksdb_row_lock_deadlocks';
rollback;
connection con2;
@@ -91,7 +93,7 @@ rollback;
connection default;
set global rocksdb_max_latest_deadlocks = 5;
---replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/
show engine rocksdb transaction status;
echo Deadlock #5;
@@ -133,7 +135,7 @@ connection con3;
rollback;
connection default;
---replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/
show engine rocksdb transaction status;
disconnect con1;
@@ -143,11 +145,11 @@ disconnect con3;
set global rocksdb_lock_wait_timeout = @prior_lock_wait_timeout;
set global rocksdb_deadlock_detect = @prior_deadlock_detect;
drop table t;
---replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/ /INDEX_ID: [0-9a-f]*/IDX_ID/
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/ /INDEX_ID: [0-9a-f]*/IDX_ID/
show engine rocksdb transaction status;
set global rocksdb_max_latest_deadlocks = 0;
--echo # Clears deadlock buffer of any existent deadlocks.
set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks;
---replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/ /INDEX_ID: [0-9a-f]*/IDX_ID/
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/ /INDEX_ID: [0-9a-f]*/IDX_ID/
show engine rocksdb transaction status;
--source include/wait_until_count_sessions.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/i_s_ddl.test b/storage/rocksdb/mysql-test/rocksdb/t/i_s_ddl.test
index 7dc3c207ecc..716f372067b 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/i_s_ddl.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/i_s_ddl.test
@@ -7,6 +7,7 @@
--disable_warnings
DROP TABLE IF EXISTS is_ddl_t1;
DROP TABLE IF EXISTS is_ddl_t2;
+DROP TABLE IF EXISTS is_ddl_t3;
--enable_warnings
CREATE TABLE is_ddl_t1 (i INT, j INT, k INT, l INT,
@@ -16,9 +17,13 @@ CREATE TABLE is_ddl_t1 (i INT, j INT, k INT, l INT,
CREATE TABLE is_ddl_t2 (x INT, y INT, z INT,
PRIMARY KEY (z, y) COMMENT 'zy_cf',
KEY (x)) ENGINE = ROCKSDB;
+CREATE TABLE is_ddl_t3 (a INT, b INT, c INT, PRIMARY KEY (a)) ENGINE = ROCKSDB
+ COMMENT "ttl_duration=3600;";
+
--sorted_result
-SELECT TABLE_SCHEMA,TABLE_NAME,PARTITION_NAME,INDEX_NAME,INDEX_TYPE,KV_FORMAT_VERSION,CF FROM INFORMATION_SCHEMA.ROCKSDB_DDL WHERE TABLE_NAME like 'is_ddl_t%';
+SELECT TABLE_SCHEMA,TABLE_NAME,PARTITION_NAME,INDEX_NAME,INDEX_TYPE,KV_FORMAT_VERSION,CF,TTL_DURATION,INDEX_FLAGS FROM INFORMATION_SCHEMA.ROCKSDB_DDL WHERE TABLE_NAME like 'is_ddl_t%';
# cleanup
DROP TABLE is_ddl_t1;
DROP TABLE is_ddl_t2;
+DROP TABLE is_ddl_t3;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/i_s_deadlock.test b/storage/rocksdb/mysql-test/rocksdb/t/i_s_deadlock.test
new file mode 100644
index 00000000000..21558899782
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/i_s_deadlock.test
@@ -0,0 +1,158 @@
+--source include/have_rocksdb.inc
+
+set @prior_lock_wait_timeout = @@rocksdb_lock_wait_timeout;
+set @prior_deadlock_detect = @@rocksdb_deadlock_detect;
+set @prior_max_latest_deadlocks = @@rocksdb_max_latest_deadlocks;
+set global rocksdb_deadlock_detect = on;
+set global rocksdb_lock_wait_timeout = 10000;
+--echo # Clears deadlock buffer of any prior deadlocks.
+set global rocksdb_max_latest_deadlocks = 0;
+set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks;
+
+# needed by simple_deadlock.inc
+let $engine = rocksdb;
+
+--source include/count_sessions.inc
+connect (con1,localhost,root,,);
+let $con1= `SELECT CONNECTION_ID()`;
+
+connect (con2,localhost,root,,);
+let $con2= `SELECT CONNECTION_ID()`;
+
+connect (con3,localhost,root,,);
+let $con3= `SELECT CONNECTION_ID()`;
+
+connection default;
+show create table information_schema.rocksdb_deadlock;
+
+create table t (i int primary key) engine=rocksdb;
+insert into t values (1), (2), (3);
+select * from information_schema.rocksdb_deadlock;
+
+echo Deadlock #1;
+--source include/simple_deadlock.inc
+connection default;
+--replace_column 1 DEADLOCK_ID 2 TRANSACTION_ID 4 WAITING_KEY
+select * from information_schema.rocksdb_deadlock;
+
+echo Deadlock #2;
+--source include/simple_deadlock.inc
+connection default;
+--replace_column 1 DEADLOCK_ID 2 TRANSACTION_ID 4 WAITING_KEY
+select * from information_schema.rocksdb_deadlock;
+set global rocksdb_max_latest_deadlocks = 10;
+
+echo Deadlock #3;
+--source include/simple_deadlock.inc
+connection default;
+--replace_column 1 DEADLOCK_ID 2 TRANSACTION_ID 4 WAITING_KEY
+select * from information_schema.rocksdb_deadlock;
+set global rocksdb_max_latest_deadlocks = 1;
+--replace_column 1 DEADLOCK_ID 2 TRANSACTION_ID 4 WAITING_KEY
+select * from information_schema.rocksdb_deadlock;
+
+connection con3;
+set rocksdb_deadlock_detect_depth = 2;
+
+echo Deadlock #4;
+connection con1;
+begin;
+select * from t where i=1 for update;
+
+connection con2;
+begin;
+select * from t where i=2 for update;
+
+connection con3;
+begin;
+select * from t where i=3 for update;
+
+connection con1;
+send select * from t where i=2 for update;
+
+connection con2;
+let $wait_condition = select count(*) = 1 from information_schema.rocksdb_trx
+where thread_id = $con1 and waiting_key != "";
+--source include/wait_condition.inc
+
+send select * from t where i=3 for update;
+
+connection con3;
+let $wait_condition = select count(*) = 1 from information_schema.rocksdb_trx
+where thread_id = $con2 and waiting_key != "";
+--source include/wait_condition.inc
+
+--error ER_LOCK_DEADLOCK
+select * from t where i=1 for update;
+rollback;
+
+connection con2;
+reap;
+rollback;
+
+connection con1;
+reap;
+rollback;
+
+connection default;
+set global rocksdb_max_latest_deadlocks = 5;
+--replace_column 1 DEADLOCK_ID 2 TRANSACTION_ID 4 WAITING_KEY
+select * from information_schema.rocksdb_deadlock;
+
+echo Deadlock #5;
+connection con1;
+begin;
+select * from t where i=1 for update;
+
+connection con2;
+begin;
+select * from t where i=2 for update;
+
+connection con3;
+begin;
+select * from t where i=3 lock in share mode;
+
+connection con1;
+select * from t where i=100 for update;
+select * from t where i=101 for update;
+send select * from t where i=2 for update;
+
+connection con2;
+let $wait_condition = select count(*) = 1 from information_schema.rocksdb_trx
+where thread_id = $con1 and waiting_key != "";
+--source include/wait_condition.inc
+
+select * from t where i=3 lock in share mode;
+select * from t where i=200 for update;
+select * from t where i=201 for update;
+
+--error ER_LOCK_DEADLOCK
+select * from t where i=1 lock in share mode;
+rollback;
+
+connection con1;
+reap;
+rollback;
+
+connection con3;
+rollback;
+
+connection default;
+--replace_column 1 DEADLOCK_ID 2 TRANSACTION_ID 4 WAITING_KEY
+select * from information_schema.rocksdb_deadlock;
+
+disconnect con1;
+disconnect con2;
+disconnect con3;
+
+set global rocksdb_lock_wait_timeout = @prior_lock_wait_timeout;
+set global rocksdb_deadlock_detect = @prior_deadlock_detect;
+drop table t;
+--replace_column 1 DEADLOCK_ID 2 TRANSACTION_ID 4 WAITING_KEY 6 INDEX_NAME 7 TABLE_NAME
+select * from information_schema.rocksdb_deadlock;
+set global rocksdb_max_latest_deadlocks = 0;
+--echo # Clears deadlock buffer of any existent deadlocks.
+set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks;
+--replace_column 1 DEADLOCK_ID 2 TRANSACTION_ID 4 WAITING_KEY
+select * from information_schema.rocksdb_deadlock;
+--source include/wait_until_count_sessions.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb.test b/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb.test
index abf8d71911b..887b4dd6a65 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb.test
@@ -70,14 +70,15 @@ while ($cnt)
SELECT COUNT(*) FROM t1;
+# flush the table first as statistics is calculated a bit differently for memtable and SST files
+SET GLOBAL rocksdb_force_flush_memtable_now = 1;
+
-- disable_query_log
-- disable_result_log
ANALYZE TABLE t1;
-- enable_result_log
-- enable_query_log
-SET GLOBAL rocksdb_force_flush_memtable_now = 1;
-
--replace_column 9 #
EXPLAIN UPDATE t1 SET filler1='to be deleted' WHERE key1=100 AND key2=100;
UPDATE t1 SET filler1='to be deleted' WHERE key1=100 and key2=100;
@@ -95,8 +96,8 @@ while ($i <= 1000) {
eval $insert;
}
--enable_query_log
-analyze table t1;
set global rocksdb_force_flush_memtable_now=1;
+analyze table t1;
--replace_column 9 #
explain select * from t1 where key1 = 1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb2.test b/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb2.test
index a4d26cf7739..2306558ff41 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb2.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb2.test
@@ -5,7 +5,8 @@
# t/index_merge_innodb.test
#
-# Index merge tests
+# Index merge tests (the test is called 'index_merge_rocksdb2' because
+# 'index_merge_rocksdb' has already existed before copying 'index_merge_innodb')
#
# Last update:
# 2006-08-07 ML test refactored (MySQL 5.1)
@@ -61,6 +62,7 @@ INSERT INTO t1 SELECT id + 16, 7, 0 FROM t1;
-- disable_query_log
-- disable_result_log
+set global rocksdb_force_flush_memtable_now=1;
analyze table t1;
-- enable_result_log
-- enable_query_log
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/information_schema.test b/storage/rocksdb/mysql-test/rocksdb/t/information_schema.test
index 2ffc186dd8f..67dae5d6263 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/information_schema.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/information_schema.test
@@ -19,8 +19,8 @@ drop table t1;
--let $max_index_id = query_get_value(SELECT * from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO where type = 'MAX_INDEX_ID', VALUE, 1)
--replace_result $max_index_id max_index_id
-select * from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO;
-select count(*) from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO;
+select * from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO where type<>'DDL_DROP_INDEX_ONGOING';
+select count(*) from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO where type<>'DDL_DROP_INDEX_ONGOING';
SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK=0;
select VALUE into @keysIn from INFORMATION_SCHEMA.ROCKSDB_COMPACTION_STATS where CF_NAME = 'default' and LEVEL = 'Sum' and TYPE = 'KeyIn';
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/insert_optimized_config-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/insert_optimized_config-master.opt
index a0bf5759ec4..3b7d80662db 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/insert_optimized_config-master.opt
+++ b/storage/rocksdb/mysql-test/rocksdb/t/insert_optimized_config-master.opt
@@ -1,4 +1,5 @@
--rocksdb_write_disable_wal=1
+--rocksdb_flush_log_at_trx_commit=0
--rocksdb_default_cf_options=write_buffer_size=16k;target_file_size_base=16k;level0_file_num_compaction_trigger=4;level0_slowdown_writes_trigger=256;level0_stop_writes_trigger=256;max_write_buffer_number=16;compression_per_level=kNoCompression;memtable=vector:1024
--rocksdb_override_cf_options=__system__={memtable=skip_list:16}
--rocksdb_compaction_sequential_deletes=0
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/issue255.test b/storage/rocksdb/mysql-test/rocksdb/t/issue255.test
index 4f1927d366c..d75f1e3c2a8 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/issue255.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/issue255.test
@@ -8,6 +8,38 @@ SHOW TABLE STATUS LIKE 't1';
INSERT INTO t1 VALUES ('538647864786478647864');
--replace_column 3 # 6 # 7 # 8 # 9 # 10 #
+SELECT * FROM t1;
+SHOW TABLE STATUS LIKE 't1';
+
+--error ER_DUP_ENTRY
+INSERT INTO t1 VALUES ();
+SELECT * FROM t1;
+--replace_column 3 # 6 # 7 # 8 # 9 # 10 #
+SHOW TABLE STATUS LIKE 't1';
+
+--error ER_DUP_ENTRY
+INSERT INTO t1 VALUES ();
+SELECT * FROM t1;
+--replace_column 3 # 6 # 7 # 8 # 9 # 10 #
+SHOW TABLE STATUS LIKE 't1';
+
+DROP TABLE t1;
+
+CREATE TABLE t1 (pk TINYINT NOT NULL PRIMARY KEY AUTO_INCREMENT);
+
+INSERT INTO t1 VALUES (5);
+--replace_column 3 # 6 # 7 # 8 # 9 # 10 #
+SHOW TABLE STATUS LIKE 't1';
+
+INSERT INTO t1 VALUES (1000);
+--replace_column 3 # 6 # 7 # 8 # 9 # 10 #
+SELECT * FROM t1;
+SHOW TABLE STATUS LIKE 't1';
+
+--error ER_DUP_ENTRY
+INSERT INTO t1 VALUES ();
+SELECT * FROM t1;
+--replace_column 3 # 6 # 7 # 8 # 9 # 10 #
SHOW TABLE STATUS LIKE 't1';
--error ER_DUP_ENTRY
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/lock_wait_timeout_stats.test b/storage/rocksdb/mysql-test/rocksdb/t/lock_wait_timeout_stats.test
index f1777ea3e93..5288680c3bd 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/lock_wait_timeout_stats.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/lock_wait_timeout_stats.test
@@ -16,16 +16,20 @@ set @@rocksdb_lock_wait_timeout=1;
begin;
--connection con1
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_row_lock_wait_timeouts';
--error ER_LOCK_WAIT_TIMEOUT
insert into t values(0);
select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t";
select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t";
+select case when variable_value-@a = 1 then 'true' else 'false' end as waits from information_schema.global_status where variable_name='rocksdb_row_lock_wait_timeouts';
--connection con2
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_row_lock_wait_timeouts';
--error ER_LOCK_WAIT_TIMEOUT
insert into t values(0);
select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t";
select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t";
+select case when variable_value-@a = 1 then 'true' else 'false' end as waits from information_schema.global_status where variable_name='rocksdb_row_lock_wait_timeouts';
--disconnect con1
--connection default
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/max_open_files.test b/storage/rocksdb/mysql-test/rocksdb/t/max_open_files.test
new file mode 100644
index 00000000000..c7c5e7b2ef3
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/max_open_files.test
@@ -0,0 +1,53 @@
+--source include/have_rocksdb.inc
+
+# Basic Sysbench run fails with basic MyROCKS install due to lack of open files
+
+# test for over limit
+CALL mtr.add_suppression("RocksDB: rocksdb_max_open_files should not be greater than the open_files_limit*");
+
+--let $over_rocksdb_max_open_files=`SELECT @@global.open_files_limit + 100`
+--let $under_rocksdb_max_open_files=`SELECT @@global.open_files_limit -1`
+--let SEARCH_FILE=$MYSQLTEST_VARDIR/tmp/rocksdb.max_open_files.err
+--let SEARCH_PATTERN=RocksDB: rocksdb_max_open_files should not be greater than the open_files_limit
+
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR $over_rocksdb_max_open_files over_rocksdb_max_open_files
+--let $_mysqld_option=--log-error=$SEARCH_FILE --rocksdb_max_open_files=$over_rocksdb_max_open_files
+--source include/restart_mysqld_with_option.inc
+--source include/search_pattern_in_file.inc
+
+SELECT FLOOR(@@global.open_files_limit / 2) = @@global.rocksdb_max_open_files;
+
+# test for within limit
+--let $_mysqld_option=--rocksdb_max_open_files=$under_rocksdb_max_open_files
+--source include/restart_mysqld_with_option.inc
+
+SELECT @@global.open_files_limit - 1 = @@global.rocksdb_max_open_files;
+
+# test for minimal value
+--let $_mysqld_option=--rocksdb_max_open_files=0
+--source include/restart_mysqld_with_option.inc
+
+SELECT @@global.rocksdb_max_open_files;
+
+# verify that we can still do work with no descriptor cache
+CREATE TABLE t1(a INT) ENGINE=ROCKSDB;
+INSERT INTO t1 VALUES(0),(1),(2),(3),(4);
+SET GLOBAL rocksdb_force_flush_memtable_and_lzero_now=1;
+DROP TABLE t1;
+
+# test for unlimited
+--let $_mysqld_option=--rocksdb_max_open_files=-1
+--source include/restart_mysqld_with_option.inc
+
+SELECT @@global.rocksdb_max_open_files;
+
+# test for auto-tune
+--let $_mysqld_option=--rocksdb_max_open_files=-2
+--source include/restart_mysqld_with_option.inc
+
+SELECT FLOOR(@@global.open_files_limit / 2) = @@global.rocksdb_max_open_files;
+
+# cleanup
+--let _$mysqld_option=
+--source include/restart_mysqld.inc
+--remove_file $SEARCH_FILE
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/mysqldump.test b/storage/rocksdb/mysql-test/rocksdb/t/mysqldump.test
index 9e904908330..4947ffb59b8 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/mysqldump.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/mysqldump.test
@@ -32,7 +32,7 @@ BEGIN;
insert into r1 values (5,5,5,5,5,5,5,5);
update r1 set value1=value1+100 where id1=1 and id2=1 and id3='1';
---exec $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key --rocksdb --order-by-primary-desc --rocksdb_bulk_load test
+--exec ASAN_OPTIONS="detect_leaks=0" $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key --rocksdb --order-by-primary-desc --rocksdb_bulk_load test
rollback;
@@ -44,16 +44,16 @@ source include/search_pattern_in_file.inc;
set @save_default_storage_engine=@@global.default_storage_engine;
SET GLOBAL default_storage_engine=rocksdb;
---exec $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key test
+--exec ASAN_OPTIONS="detect_leaks=0" $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key test
source include/search_pattern_in_file.inc;
# Sanity test mysqldump when the --innodb-stats-on-metadata is specified (no effect)
--echo ==== mysqldump with --innodb-stats-on-metadata ====
---exec $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key --innodb-stats-on-metadata test
+--exec ASAN_OPTIONS="detect_leaks=0" $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key --innodb-stats-on-metadata test
# testing mysqldump work with statement based binary logging
SET GLOBAL binlog_format=statement;
---exec $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key test > /dev/null
+--exec ASAN_OPTIONS="detect_leaks=0" $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key test > /dev/null
SET GLOBAL binlog_format=row;
drop table r1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/mysqldump2.test b/storage/rocksdb/mysql-test/rocksdb/t/mysqldump2.test
index 3631e703de6..ca9eb5d2ecf 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/mysqldump2.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/mysqldump2.test
@@ -29,7 +29,7 @@ let $restart_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
select variable_value into @a from information_schema.global_status where variable_name='rocksdb_block_cache_add';
---exec $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key --rocksdb test > /dev/null
+--exec ASAN_OPTIONS="detect_leaks=0" $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key --rocksdb test > /dev/null
# verifying block cache was not filled
select case when variable_value - @a > 20 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_block_cache_add';
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test
index 9199c572933..b884738424f 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test
@@ -574,7 +574,6 @@ insert into t30 values
('row3', 'row3-key', 'row3-data'),
('row4', 'row4-key', 'row4-data'),
('row5', 'row5-key', 'row5-data');
-analyze table t30;
--replace_column 9 #
explain
@@ -786,11 +785,15 @@ drop table t45;
--echo # Now it fails if there is data overlap with what
--echo # already exists
--echo #
+# We exclude rocksdb_max_open_files here because it value is dependent on
+# the value of the servers open_file_limit and is expected to be different
+# across distros and installs
--replace_regex /[a-f0-9]{40}/#/
show variables
where
variable_name like 'rocksdb%' and
+ variable_name not like 'rocksdb_max_open_files' and
variable_name not like 'rocksdb_supported_compression_types';
create table t47 (pk int primary key, col1 varchar(12)) engine=rocksdb;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_debug.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_debug.test
new file mode 100644
index 00000000000..7cd4e09e946
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_debug.test
@@ -0,0 +1,14 @@
+--source include/have_rocksdb.inc
+--source include/have_debug.inc
+
+--echo #
+--echo # Issue #728: Assertion `covers_key(b)' failed in int
+--echo # myrocks::Rdb_key_def::cmp_full_keys(const rocks db::Slice&,
+--echo # const rocksdb::Slice&)
+--echo #
+
+CREATE TABLE t2(c1 TINYINT SIGNED KEY,c2 TINYINT UNSIGNED,c3 INT);
+INSERT INTO t2(c1)VALUES(0);
+SELECT * FROM t2 WHERE c1<=127 ORDER BY c1 DESC;
+DROP TABLE t2;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_range2.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_range2.test
index 6b8d0b90e90..a7ac236451e 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_range2.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_range2.test
@@ -15,6 +15,7 @@ while ($i<10000)
--enable_query_log
analyze table t1;
select count(*) from t1;
+--replace_column 9 #
explain select c1 from t1 where c1 > 5 limit 10;
drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/skip_validate_tmp_table.test b/storage/rocksdb/mysql-test/rocksdb/t/skip_validate_tmp_table.test
index 945b0079cce..c4321462dfd 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/skip_validate_tmp_table.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/skip_validate_tmp_table.test
@@ -1,39 +1,39 @@
--source include/have_rocksdb.inc
--source include/have_debug.inc
-# Write file to make mysql-test-run.pl expect the "crash", but don't restart the
-# server until it is told to
--let $_server_id= `SELECT @@server_id`
---let $_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.$_server_id.expect
-CREATE TABLE t1 (pk int primary key) ENGINE=ROCKSDB;
+create table t1 (pk int primary key) engine=rocksdb;
# Create a .frm file without a matching table
--exec cp $MYSQLTEST_VARDIR/mysqld.$_server_id/data/test/t1.frm $MYSQLTEST_VARDIR/mysqld.$_server_id/data/test/t1#sql-test.frm
-# Restart the server with a .frm file exist but that table is not registered in RocksDB
---exec echo "wait" >$_expect_file_name
-shutdown_server 10;
---exec echo "restart" >$_expect_file_name
---sleep 5
---enable_reconnect
---source include/wait_until_connected_again.inc
---disable_reconnect
+--source include/restart_mysqld.inc
+
+show tables;
+
+# MariaDB produces a warning:
+call mtr.add_suppression('Invalid .old.. table or database name .t1#sql-test.');
# This will append '#sql-test' to the end of new name
set session debug_dbug="+d,gen_sql_table_name";
rename table t1 to t2;
set session debug_dbug= "-d,gen_sql_table_name";
+show tables;
+
# Remove the corresponding .frm files
--remove_files_wildcard $MYSQLTEST_VARDIR/mysqld.$_server_id/data/test *t1*.frm
--remove_files_wildcard $MYSQLTEST_VARDIR/mysqld.$_server_id/data/test *t2*.frm
# Restart the server with a table registered in RocksDB but does not have a .frm file
---exec echo "wait" >$_expect_file_name
-shutdown_server 10;
---exec echo "restart" >$_expect_file_name
---sleep 5
---enable_reconnect
---source include/wait_until_connected_again.inc
---disable_reconnect
+--source include/restart_mysqld.inc
+
+show tables;
+
+# try to recreate a table with the same name
+create table t2 (pk int primary key) engine=rocksdb;
+
+show tables;
+
+drop table t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_read_filtering.test b/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_read_filtering.test
index 5a694b7b222..4e8b081c4d5 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_read_filtering.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_read_filtering.test
@@ -78,22 +78,28 @@ INSERT INTO t1 values (7);
set global rocksdb_debug_ttl_rec_ts = 0;
# should return nothing.
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
--sorted_result
SELECT * FROM t1;
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
# disable filtering
set global rocksdb_enable_ttl_read_filtering=0;
# should return everything
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
--sorted_result
SELECT * FROM t1;
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
-# disable filtering
+# enable filtering
set global rocksdb_enable_ttl_read_filtering=1;
# should return nothing.
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
--sorted_result
SELECT * FROM t1;
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
DROP TABLE t1;
@@ -286,28 +292,37 @@ SELECT * FROM t1; # <= shouldn't be filtered out here
--echo # Switching to connection 2
connection con2;
-# compaction doesn't do anythign since con1 snapshot is still open
+# compaction doesn't do anything since con1 snapshot is still open
set global rocksdb_force_flush_memtable_now=1;
set global rocksdb_compact_cf='default';
# read filtered out, because on a different connection, on
# this connection the records have 'expired' already so they are filtered out
# even though they have not yet been removed by compaction
+
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
--sorted_result
SELECT * FROM t1;
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
--echo # Switching to connection 1
connection con1;
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
--sorted_result
SELECT * FROM t1; # <= shouldn't be filtered out here
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
UPDATE t1 set a = a + 1;
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
--sorted_result
SELECT * FROM t1; # <= shouldn't be filtered out here
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
COMMIT;
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
--sorted_result # <= filtered out here because time has passed.
SELECT * FROM t1;
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
DROP TABLE t1;
disconnect con1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_varchar.test b/storage/rocksdb/mysql-test/rocksdb/t/type_varchar.test
index e45b6836f67..b631615c266 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/type_varchar.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_varchar.test
@@ -1,9 +1,5 @@
--source include/have_rocksdb.inc
---disable_warnings
-drop table if exists t1,t2;
---enable_warnings
-
#
# VARCHAR column types
#
@@ -73,3 +69,14 @@ select 'email_i' as index_name, count(*) AS count from t force index(email_i);
drop table t;
set global rocksdb_checksums_pct = @save_rocksdb_checksums_pct;
set session rocksdb_verify_row_debug_checksums = @save_rocksdb_verify_row_debug_checksums;
+
+# Issue #784 - Skip trailing space bytes for non-unpackable fields
+
+drop table if exists t;
+create table t (h varchar(31) character set utf8 collate utf8_bin not null, i varchar(19) collate latin1_bin not null, primary key(i), key(h)) engine=rocksdb;
+insert into t(i,h) values('a','b');
+check table t;
+alter table t modify h varchar(31) character set cp1257 collate cp1257_bin not null;
+check table t;
+drop table t;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test b/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test
index 7a053c659b2..8dfbe312ea8 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test
@@ -11,37 +11,51 @@ select plugin_name, plugin_type from information_schema.plugins where plugin_nam
# caused an assertion in RocksDB. Now it should not be allowed and ROCKSDB
# plugin will not load in such configuration.
#
-# We want the server to still start, so we specify default-storage-engine=myisam
+--let LOG=$MYSQLTEST_VARDIR/tmp/use_direct_reads_writes.err
+--let SEARCH_FILE=$LOG
---let $_mysqld_option=--rocksdb_use_direct_reads=1 --rocksdb_allow_mmap_reads=1 --default-storage-engine=myisam
---source include/restart_mysqld_with_option.inc
+--echo Checking direct reads
+--let $_mysqld_option=--log-error=$LOG --rocksdb_use_direct_reads=1 --rocksdb_allow_mmap_reads=1
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--source include/restart_mysqld_with_invalid_option.inc
---echo # Check that ROCKSDB plugin is not loaded:
-select plugin_name, plugin_type from information_schema.plugins where plugin_name='RocksDB';
+--let SEARCH_PATTERN=enable both use_direct_reads
+--source include/search_pattern_in_file.inc
+--remove_file $LOG
---echo # Check that MyRocks has printed an error message into server error log:
-let SEARCH_FILE=$MYSQLTEST_VARDIR/log/mysqld.1.err;
-let SEARCH_PATTERN=enable both use_direct_reads;
-source include/search_pattern_in_file.inc;
---echo # Now, restart the server back with regular settings
---source include/restart_mysqld.inc
-select plugin_name, plugin_type from information_schema.plugins where plugin_name='RocksDB';
+# Repeat with direct-writes
+--echo Checking direct writes
+--let $_mysqld_option=--log-error=$LOG --rocksdb_use_direct_io_for_flush_and_compaction=1 --rocksdb_allow_mmap_writes=1
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--source include/restart_mysqld_with_invalid_option.inc
---echo #
---echo # Now, repeat the same with another set of invalid arguments
---echo #
---let $_mysqld_option=--rocksdb_use_direct_io_for_flush_and_compaction=1 --rocksdb_allow_mmap_writes=1 --default-storage-engine=myisam
---source include/restart_mysqld_with_option.inc
+--let SEARCH_PATTERN=enable both use_direct_io_for_flush_and_compaction
+--source include/search_pattern_in_file.inc
+--remove_file $LOG
---echo # Check that ROCKSDB plugin is not loaded:
-select plugin_name, plugin_type from information_schema.plugins where plugin_name='RocksDB';
-let SEARCH_FILE=$MYSQLTEST_VARDIR/log/mysqld.1.err;
-let SEARCH_PATTERN=enable both use_direct_io_for_flush_and_compaction;
-source include/search_pattern_in_file.inc;
+# Verify invalid direct-writes and --rocksdb_flush_log_at_trx_commit combination at startup fails
+--echo Checking rocksdb_flush_log_at_trx_commit
+--let $_mysqld_option=--log-error=$LOG --rocksdb_flush_log_at_trx_commit=1 --rocksdb_allow_mmap_writes=1
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--source include/restart_mysqld_with_invalid_option.inc
---echo # Now, restart the server back with regular settings
---source include/restart_mysqld.inc
-select plugin_name, plugin_type from information_schema.plugins where plugin_name='RocksDB';
+--let SEARCH_PATTERN=rocksdb_flush_log_at_trx_commit needs to be
+--source include/search_pattern_in_file.inc
+--remove_file $LOG
+
+
+# Verify rocksdb_flush_log_at_trx_commit cannot be changed if direct writes are used
+--echo Validate flush_log settings when direct writes is enabled
+--let $_mysqld_option=--rocksdb_flush_log_at_trx_commit=0 --rocksdb_allow_mmap_writes=1
+--source include/restart_mysqld_with_option.inc
+
+set global rocksdb_flush_log_at_trx_commit=0;
+--error ER_WRONG_VALUE_FOR_VAR
+set global rocksdb_flush_log_at_trx_commit=1;
+--error ER_WRONG_VALUE_FOR_VAR
+set global rocksdb_flush_log_at_trx_commit=2;
+# Cleanup
+--source include/restart_mysqld.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/write_sync.test b/storage/rocksdb/mysql-test/rocksdb/t/write_sync.test
index c20bb1fc89c..e97a0b0bcc9 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/write_sync.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/write_sync.test
@@ -7,7 +7,8 @@ SET GLOBAL rocksdb_write_ignore_missing_column_families=true;
create table aaa (id int primary key, i int) engine rocksdb;
set @save_rocksdb_flush_log_at_trx_commit=@@global.rocksdb_flush_log_at_trx_commit;
SET GLOBAL rocksdb_flush_log_at_trx_commit=1;
---exec sleep 5
+insert aaa(id, i) values(0,1);
+
select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced';
insert aaa(id, i) values(1,1);
select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
@@ -16,18 +17,16 @@ select variable_value-@a from information_schema.global_status where variable_na
insert aaa(id, i) values(3,1);
select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
-SET GLOBAL rocksdb_flush_log_at_trx_commit=0;
---exec sleep 5
select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced';
+SET GLOBAL rocksdb_flush_log_at_trx_commit=0;
insert aaa(id, i) values(4,1);
let $status_var=rocksdb_wal_synced;
let $status_var_value=`select @a+1`;
source include/wait_for_status_var.inc;
-SET GLOBAL rocksdb_flush_log_at_trx_commit=2;
---exec sleep 5
select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced';
+SET GLOBAL rocksdb_flush_log_at_trx_commit=2;
insert aaa(id, i) values(5,1);
let $status_var=rocksdb_wal_synced;
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/include/rpl_no_unique_check_on_lag.inc b/storage/rocksdb/mysql-test/rocksdb_rpl/include/rpl_no_unique_check_on_lag.inc
index 8f03c16e2f1..d983bdf8b58 100644
--- a/storage/rocksdb/mysql-test/rocksdb_rpl/include/rpl_no_unique_check_on_lag.inc
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/include/rpl_no_unique_check_on_lag.inc
@@ -30,6 +30,7 @@ INSERT INTO t1 VALUES(1, 1);
connection slave;
--let $slave_sql_errno= 1062
--let $not_switch_connection= 0
+--let $slave_timeout= 120
--source include/wait_for_slave_sql_error_and_skip.inc
set global reset_seconds_behind_master=0;
--source include/stop_slave_io.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/singledelete_idempotent_recovery.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/singledelete_idempotent_recovery.result
index 3d734c9498d..89e93f6b8f0 100644
--- a/storage/rocksdb/mysql-test/rocksdb_rpl/r/singledelete_idempotent_recovery.result
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/singledelete_idempotent_recovery.result
@@ -10,6 +10,7 @@ insert into r1 values (1, 1000);
set global rocksdb_force_flush_memtable_now=1;
include/rpl_start_server.inc [server_number=2]
include/start_slave.inc
+insert into r1 values (2,2000);
delete r1 from r1 force index (i) where id2=1000;
select id1,id2 from r1 force index (primary) where id1=1 and id2=1000;
id1 id2
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc.test
index 6143824eea6..ff484171213 100644
--- a/storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc.test
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc.test
@@ -62,6 +62,7 @@ SET GLOBAL SYNC_BINLOG = 1;
insert into t1 values (1000000, 1, "i_am_just_here_to_trigger_a_flush");
+--error 0,2013
SET DEBUG_SYNC='now SIGNAL go';
--source include/wait_until_disconnected.inc
--enable_reconnect
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_recovery.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_recovery.test
index 9180afa881f..6d953ead4e9 100644
--- a/storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_recovery.test
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_recovery.test
@@ -53,8 +53,14 @@ EOF
--source include/rpl_start_server.inc
--source include/start_slave.inc
+
+# Due to the binlogs being truncated, the slave may still think it's processed up to
+# the truncated binlog and select master_pos_wait() can return prematurely. Add
+# a new transaction to the master to force master_pos_wait() to wait.
connection master;
+insert into r1 values (2,2000);
sync_slave_with_master;
+
connection slave;
delete r1 from r1 force index (i) where id2=1000;
select id1,id2 from r1 force index (primary) where id1=1 and id2=1000;
diff --git a/storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress.result b/storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress.result
index 3d76e035e05..9f161b18c05 100644
--- a/storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress.result
+++ b/storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress.result
@@ -9,6 +9,8 @@ zero_sum INT DEFAULT 0,
msg VARCHAR(1024),
msg_length int,
msg_checksum varchar(128),
+auto_inc BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
+KEY(auto_inc),
KEY msg_i(msg(255), zero_sum))
ENGINE=RocksDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin;
stop slave;
diff --git a/storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress_crash.result b/storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress_crash.result
index 3d76e035e05..9f161b18c05 100644
--- a/storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress_crash.result
+++ b/storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress_crash.result
@@ -9,6 +9,8 @@ zero_sum INT DEFAULT 0,
msg VARCHAR(1024),
msg_length int,
msg_checksum varchar(128),
+auto_inc BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
+KEY(auto_inc),
KEY msg_i(msg(255), zero_sum))
ENGINE=RocksDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin;
stop slave;
diff --git a/storage/rocksdb/mysql-test/rocksdb_stress/t/load_generator.py b/storage/rocksdb/mysql-test/rocksdb_stress/t/load_generator.py
index 20098f49b42..c1d3e7fb81c 100644
--- a/storage/rocksdb/mysql-test/rocksdb_stress/t/load_generator.py
+++ b/storage/rocksdb/mysql-test/rocksdb_stress/t/load_generator.py
@@ -95,6 +95,8 @@ LOADERS_READY = 0
REQUEST_ID = 1
REQUEST_ID_LOCK = threading.Lock()
+INSERT_ID_SET = set()
+
def get_next_request_id():
global REQUEST_ID
with REQUEST_ID_LOCK:
@@ -302,10 +304,19 @@ class PopulateWorker(WorkerThread):
execute(self.cur, stmt)
if i % 101 == 0:
self.con.commit()
+ check_id(self.con.insert_id())
self.con.commit()
+ check_id(self.con.insert_id())
logging.info("Inserted %d rows starting at id %d" %
(self.num_to_add, self.start_id))
+def check_id(id):
+ if id == 0:
+ return
+ if id in INSERT_ID_SET:
+ raise Exception("Duplicate auto_inc id %d" % id)
+ INSERT_ID_SET.add(id)
+
def populate_table(num_records):
logging.info("Populate_table started for %d records" % num_records)
@@ -422,6 +433,7 @@ class LoadGenWorker(WorkerThread):
execute(self.cur, gen_insert(self.table, idx, self.thread_id,
request_id, 0))
self.con.commit()
+ check_id(self.con.insert_id())
self.id_map.append(request_id)
@@ -687,6 +699,7 @@ class LoadGenWorker(WorkerThread):
else:
self.cur_txn_state = self.TXN_COMMIT_STARTED
self.con.commit()
+ check_id(self.con.insert_id())
if not self.con.get_server_info():
raise MySQLdb.OperationalError(MySQLdb.constants.CR.CONNECTION_ERROR,
"Possible connection error on commit")
diff --git a/storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress.test b/storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress.test
index 7d92bb3f83a..307211a124d 100644
--- a/storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress.test
+++ b/storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress.test
@@ -17,6 +17,8 @@ CREATE TABLE t1(id INT PRIMARY KEY,
msg VARCHAR(1024),
msg_length int,
msg_checksum varchar(128),
+ auto_inc BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
+ KEY(auto_inc),
KEY msg_i(msg(255), zero_sum))
ENGINE=RocksDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin;
diff --git a/storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress_crash.test b/storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress_crash.test
index 6f6128579b5..8ef4c73c3b0 100644
--- a/storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress_crash.test
+++ b/storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress_crash.test
@@ -18,6 +18,8 @@ CREATE TABLE t1(id INT PRIMARY KEY,
msg VARCHAR(1024),
msg_length int,
msg_checksum varchar(128),
+ auto_inc BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
+ KEY(auto_inc),
KEY msg_i(msg(255), zero_sum))
ENGINE=RocksDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_allow_to_start_after_corruption_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_allow_to_start_after_corruption_basic.result
new file mode 100644
index 00000000000..086010dc79e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_allow_to_start_after_corruption_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_ALLOW_TO_START_AFTER_CORRUPTION;
+SELECT @start_global_value;
+@start_global_value
+0
+"Trying to set variable @@global.ROCKSDB_ALLOW_TO_START_AFTER_CORRUPTION to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_ALLOW_TO_START_AFTER_CORRUPTION = 444;
+ERROR HY000: Variable 'rocksdb_allow_to_start_after_corruption' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bytes_per_sync_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bytes_per_sync_basic.result
index ede02afcb60..9af4f730a21 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bytes_per_sync_basic.result
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bytes_per_sync_basic.result
@@ -1,7 +1,85 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(100);
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+INSERT INTO invalid_values VALUES('\'-1\'');
+INSERT INTO invalid_values VALUES('\'101\'');
+INSERT INTO invalid_values VALUES('\'484436\'');
SET @start_global_value = @@global.ROCKSDB_BYTES_PER_SYNC;
SELECT @start_global_value;
@start_global_value
0
-"Trying to set variable @@global.ROCKSDB_BYTES_PER_SYNC to 444. It should fail because it is readonly."
-SET @@global.ROCKSDB_BYTES_PER_SYNC = 444;
-ERROR HY000: Variable 'rocksdb_bytes_per_sync' is a read only variable
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_BYTES_PER_SYNC to 100"
+SET @@global.ROCKSDB_BYTES_PER_SYNC = 100;
+SELECT @@global.ROCKSDB_BYTES_PER_SYNC;
+@@global.ROCKSDB_BYTES_PER_SYNC
+100
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_BYTES_PER_SYNC = DEFAULT;
+SELECT @@global.ROCKSDB_BYTES_PER_SYNC;
+@@global.ROCKSDB_BYTES_PER_SYNC
+0
+"Trying to set variable @@global.ROCKSDB_BYTES_PER_SYNC to 1"
+SET @@global.ROCKSDB_BYTES_PER_SYNC = 1;
+SELECT @@global.ROCKSDB_BYTES_PER_SYNC;
+@@global.ROCKSDB_BYTES_PER_SYNC
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_BYTES_PER_SYNC = DEFAULT;
+SELECT @@global.ROCKSDB_BYTES_PER_SYNC;
+@@global.ROCKSDB_BYTES_PER_SYNC
+0
+"Trying to set variable @@global.ROCKSDB_BYTES_PER_SYNC to 0"
+SET @@global.ROCKSDB_BYTES_PER_SYNC = 0;
+SELECT @@global.ROCKSDB_BYTES_PER_SYNC;
+@@global.ROCKSDB_BYTES_PER_SYNC
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_BYTES_PER_SYNC = DEFAULT;
+SELECT @@global.ROCKSDB_BYTES_PER_SYNC;
+@@global.ROCKSDB_BYTES_PER_SYNC
+0
+"Trying to set variable @@session.ROCKSDB_BYTES_PER_SYNC to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_BYTES_PER_SYNC = 444;
+ERROR HY000: Variable 'rocksdb_bytes_per_sync' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_BYTES_PER_SYNC to 'aaa'"
+SET @@global.ROCKSDB_BYTES_PER_SYNC = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_BYTES_PER_SYNC;
+@@global.ROCKSDB_BYTES_PER_SYNC
+0
+"Trying to set variable @@global.ROCKSDB_BYTES_PER_SYNC to 'bbb'"
+SET @@global.ROCKSDB_BYTES_PER_SYNC = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_BYTES_PER_SYNC;
+@@global.ROCKSDB_BYTES_PER_SYNC
+0
+"Trying to set variable @@global.ROCKSDB_BYTES_PER_SYNC to '-1'"
+SET @@global.ROCKSDB_BYTES_PER_SYNC = '-1';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_BYTES_PER_SYNC;
+@@global.ROCKSDB_BYTES_PER_SYNC
+0
+"Trying to set variable @@global.ROCKSDB_BYTES_PER_SYNC to '101'"
+SET @@global.ROCKSDB_BYTES_PER_SYNC = '101';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_BYTES_PER_SYNC;
+@@global.ROCKSDB_BYTES_PER_SYNC
+0
+"Trying to set variable @@global.ROCKSDB_BYTES_PER_SYNC to '484436'"
+SET @@global.ROCKSDB_BYTES_PER_SYNC = '484436';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_BYTES_PER_SYNC;
+@@global.ROCKSDB_BYTES_PER_SYNC
+0
+SET @@global.ROCKSDB_BYTES_PER_SYNC = @start_global_value;
+SELECT @@global.ROCKSDB_BYTES_PER_SYNC;
+@@global.ROCKSDB_BYTES_PER_SYNC
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_flush_memtable_on_analyze_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_flush_memtable_on_analyze_basic.result
deleted file mode 100644
index 905feec9b1a..00000000000
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_flush_memtable_on_analyze_basic.result
+++ /dev/null
@@ -1,58 +0,0 @@
-drop table if exists t1;
-CREATE TABLE t1 (a INT AUTO_INCREMENT, b INT, PRIMARY KEY(a)) ENGINE=rocksdb;
-SHOW CREATE TABLE t1;
-Table Create Table
-t1 CREATE TABLE `t1` (
- `a` int(11) NOT NULL AUTO_INCREMENT,
- `b` int(11) DEFAULT NULL,
- PRIMARY KEY (`a`)
-) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
-INSERT INTO t1 (b) VALUES (1);
-INSERT INTO t1 (b) VALUES (2);
-INSERT INTO t1 (b) VALUES (3);
-SELECT * FROM t1;
-a b
-1 1
-2 2
-3 3
-set session rocksdb_flush_memtable_on_analyze=off;
-ANALYZE TABLE t1;
-Table Op Msg_type Msg_text
-test.t1 analyze status OK
-SHOW INDEXES FROM t1;
-Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
-t1 0 PRIMARY 1 a A 3 NULL NULL LSMTREE
-set session rocksdb_flush_memtable_on_analyze=on;
-ANALYZE TABLE t1;
-Table Op Msg_type Msg_text
-test.t1 analyze status OK
-SHOW INDEXES FROM t1;
-Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
-t1 0 PRIMARY 1 a A 3 NULL NULL LSMTREE
-DROP TABLE t1;
-CREATE TABLE t1 (a INT AUTO_INCREMENT, b INT, PRIMARY KEY(a)) ENGINE=rocksdb;
-SHOW CREATE TABLE t1;
-Table Create Table
-t1 CREATE TABLE `t1` (
- `a` int(11) NOT NULL AUTO_INCREMENT,
- `b` int(11) DEFAULT NULL,
- PRIMARY KEY (`a`)
-) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
-INSERT INTO t1 (b) VALUES (1);
-INSERT INTO t1 (b) VALUES (2);
-INSERT INTO t1 (b) VALUES (3);
-SELECT * FROM t1;
-a b
-1 1
-2 2
-3 3
-SHOW TABLE STATUS LIKE 't1';
-Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed # # # 0 0 0 4 NULL NULL NULL latin1_swedish_ci NULL
-ANALYZE TABLE t1;
-Table Op Msg_type Msg_text
-test.t1 analyze status OK
-SHOW TABLE STATUS LIKE 't1';
-Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed # # # 0 0 0 4 NULL NULL NULL latin1_swedish_ci NULL
-DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_ignore_unknown_options_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_ignore_unknown_options_basic.result
new file mode 100644
index 00000000000..621213cd79b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_ignore_unknown_options_basic.result
@@ -0,0 +1,14 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_IGNORE_UNKNOWN_OPTIONS;
+SELECT @start_global_value;
+@start_global_value
+1
+"Trying to set variable @@global.ROCKSDB_IGNORE_UNKNOWN_OPTIONS to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_IGNORE_UNKNOWN_OPTIONS = 444;
+ERROR HY000: Variable 'rocksdb_ignore_unknown_options' is a read only variable
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_open_files_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_open_files_basic.result
index b058ebf05f8..60f505310c6 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_open_files_basic.result
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_open_files_basic.result
@@ -1,7 +1,3 @@
-SET @start_global_value = @@global.ROCKSDB_MAX_OPEN_FILES;
-SELECT @start_global_value;
-@start_global_value
--1
-"Trying to set variable @@global.ROCKSDB_MAX_OPEN_FILES to 444. It should fail because it is readonly."
-SET @@global.ROCKSDB_MAX_OPEN_FILES = 444;
-ERROR HY000: Variable 'rocksdb_max_open_files' is a read only variable
+show variables like 'rocksdb_max_open_files';
+Variable_name Value
+rocksdb_max_open_files #
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_row_locks_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_row_locks_basic.result
index e417e4d5c4e..c925a68d4ed 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_row_locks_basic.result
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_row_locks_basic.result
@@ -6,11 +6,11 @@ INSERT INTO invalid_values VALUES('\'aaa\'');
SET @start_global_value = @@global.ROCKSDB_MAX_ROW_LOCKS;
SELECT @start_global_value;
@start_global_value
-1073741824
+1048576
SET @start_session_value = @@session.ROCKSDB_MAX_ROW_LOCKS;
SELECT @start_session_value;
@start_session_value
-1073741824
+1048576
'# Setting to valid values in global scope#'
"Trying to set variable @@global.ROCKSDB_MAX_ROW_LOCKS to 1"
SET @@global.ROCKSDB_MAX_ROW_LOCKS = 1;
@@ -21,7 +21,7 @@ SELECT @@global.ROCKSDB_MAX_ROW_LOCKS;
SET @@global.ROCKSDB_MAX_ROW_LOCKS = DEFAULT;
SELECT @@global.ROCKSDB_MAX_ROW_LOCKS;
@@global.ROCKSDB_MAX_ROW_LOCKS
-1073741824
+1048576
"Trying to set variable @@global.ROCKSDB_MAX_ROW_LOCKS to 1024"
SET @@global.ROCKSDB_MAX_ROW_LOCKS = 1024;
SELECT @@global.ROCKSDB_MAX_ROW_LOCKS;
@@ -31,7 +31,7 @@ SELECT @@global.ROCKSDB_MAX_ROW_LOCKS;
SET @@global.ROCKSDB_MAX_ROW_LOCKS = DEFAULT;
SELECT @@global.ROCKSDB_MAX_ROW_LOCKS;
@@global.ROCKSDB_MAX_ROW_LOCKS
-1073741824
+1048576
'# Setting to valid values in session scope#'
"Trying to set variable @@session.ROCKSDB_MAX_ROW_LOCKS to 1"
SET @@session.ROCKSDB_MAX_ROW_LOCKS = 1;
@@ -42,7 +42,7 @@ SELECT @@session.ROCKSDB_MAX_ROW_LOCKS;
SET @@session.ROCKSDB_MAX_ROW_LOCKS = DEFAULT;
SELECT @@session.ROCKSDB_MAX_ROW_LOCKS;
@@session.ROCKSDB_MAX_ROW_LOCKS
-1073741824
+1048576
"Trying to set variable @@session.ROCKSDB_MAX_ROW_LOCKS to 1024"
SET @@session.ROCKSDB_MAX_ROW_LOCKS = 1024;
SELECT @@session.ROCKSDB_MAX_ROW_LOCKS;
@@ -52,21 +52,21 @@ SELECT @@session.ROCKSDB_MAX_ROW_LOCKS;
SET @@session.ROCKSDB_MAX_ROW_LOCKS = DEFAULT;
SELECT @@session.ROCKSDB_MAX_ROW_LOCKS;
@@session.ROCKSDB_MAX_ROW_LOCKS
-1073741824
+1048576
'# Testing with invalid values in global scope #'
"Trying to set variable @@global.ROCKSDB_MAX_ROW_LOCKS to 'aaa'"
SET @@global.ROCKSDB_MAX_ROW_LOCKS = 'aaa';
Got one of the listed errors
SELECT @@global.ROCKSDB_MAX_ROW_LOCKS;
@@global.ROCKSDB_MAX_ROW_LOCKS
-1073741824
+1048576
SET @@global.ROCKSDB_MAX_ROW_LOCKS = @start_global_value;
SELECT @@global.ROCKSDB_MAX_ROW_LOCKS;
@@global.ROCKSDB_MAX_ROW_LOCKS
-1073741824
+1048576
SET @@session.ROCKSDB_MAX_ROW_LOCKS = @start_session_value;
SELECT @@session.ROCKSDB_MAX_ROW_LOCKS;
@@session.ROCKSDB_MAX_ROW_LOCKS
-1073741824
+1048576
DROP TABLE valid_values;
DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_concurrent_prepare_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_two_write_queues_basic.result
index 11d4f2363f6..5a19016bf91 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_concurrent_prepare_basic.result
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_two_write_queues_basic.result
@@ -3,12 +3,12 @@ INSERT INTO valid_values VALUES(1);
INSERT INTO valid_values VALUES(1024);
CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
INSERT INTO invalid_values VALUES('\'aaa\'');
-SET @start_global_value = @@global.ROCKSDB_CONCURRENT_PREPARE;
+SET @start_global_value = @@global.ROCKSDB_TWO_WRITE_QUEUES;
SELECT @start_global_value;
@start_global_value
1
-"Trying to set variable @@global.ROCKSDB_CONCURRENT_PREPARE to 444. It should fail because it is readonly."
-SET @@global.ROCKSDB_CONCURRENT_PREPARE = 444;
-ERROR HY000: Variable 'rocksdb_concurrent_prepare' is a read only variable
+"Trying to set variable @@global.ROCKSDB_TWO_WRITE_QUEUES to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_TWO_WRITE_QUEUES = 444;
+ERROR HY000: Variable 'rocksdb_two_write_queues' is a read only variable
DROP TABLE valid_values;
DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options.result
new file mode 100644
index 00000000000..126b4cffe8b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options.result
@@ -0,0 +1,38 @@
+CREATE TABLE t1 (a INT, PRIMARY KEY (a) COMMENT 'update_cf1') ENGINE=ROCKSDB;
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS='update_cf1={write_buffer_size=8m;target_file_size_base=2m};';
+SELECT @@global.rocksdb_update_cf_options;
+@@global.rocksdb_update_cf_options
+update_cf1={write_buffer_size=8m;target_file_size_base=2m};
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=NULL;
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+Variable_name Value
+rocksdb_update_cf_options
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=NULL;
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+Variable_name Value
+rocksdb_update_cf_options
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS="";
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+Variable_name Value
+rocksdb_update_cf_options
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=NULL;
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+Variable_name Value
+rocksdb_update_cf_options
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS='update_cf1={write_buffer_size=8m;target_file_size_base=2m};';
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+Variable_name Value
+rocksdb_update_cf_options update_cf1={write_buffer_size=8m;target_file_size_base=2m};
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS='update_cf2={write_buffer_size=8m;target_file_size_base=2m};';
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+Variable_name Value
+rocksdb_update_cf_options update_cf2={write_buffer_size=8m;target_file_size_base=2m};
+DROP TABLE t1;
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS='update_cf1={write_buffer_size=8m;target_file_size_base=2m};';
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+Variable_name Value
+rocksdb_update_cf_options update_cf1={write_buffer_size=8m;target_file_size_base=2m};
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=DEFAULT;
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+Variable_name Value
+rocksdb_update_cf_options
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options_basic.result
index 5ad5394db29..ba24fafd0ec 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options_basic.result
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options_basic.result
@@ -32,10 +32,19 @@ SET @@global.rocksdb_update_cf_options = NULL;
SELECT @@global.rocksdb_update_cf_options;
@@global.rocksdb_update_cf_options
NULL
-SET @@global.rocksdb_update_cf_options = 'aaaaa';
+SET @@global.rocksdb_update_cf_options = NULL;
SELECT @@global.rocksdb_update_cf_options;
@@global.rocksdb_update_cf_options
NULL
+SET @@global.rocksdb_update_cf_options = '';
+SELECT @@global.rocksdb_update_cf_options;
+@@global.rocksdb_update_cf_options
+
+SET @@global.rocksdb_update_cf_options = 'aaaaa';;
+ERROR 42000: Variable 'rocksdb_update_cf_options' can't be set to the value of 'aaaaa'
+SELECT @@global.rocksdb_update_cf_options;
+@@global.rocksdb_update_cf_options
+
SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='default' AND OPTION_TYPE='WRITE_BUFFER_SIZE';
CF_NAME OPTION_TYPE VALUE
default WRITE_BUFFER_SIZE 67108864
@@ -100,7 +109,12 @@ cf1={target_file_size_base=24m};foo={max_bytes_for_level_multiplier=8};
SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf1' AND OPTION_TYPE='TARGET_FILE_SIZE_BASE';
CF_NAME OPTION_TYPE VALUE
cf1 TARGET_FILE_SIZE_BASE 25165824
-SET @@global.rocksdb_update_cf_options = 'default={foo=bar};';
+SET @@global.rocksdb_update_cf_options = 'default={foo=bar};';;
+ERROR 42000: Variable 'rocksdb_update_cf_options' can't be set to the value of 'default={foo=bar};'
+SELECT @@global.rocksdb_update_cf_options;
+@@global.rocksdb_update_cf_options
+cf1={target_file_size_base=24m};foo={max_bytes_for_level_multiplier=8};
+SET @@global.rocksdb_update_cf_options = NULL;
SELECT @@global.rocksdb_update_cf_options;
@@global.rocksdb_update_cf_options
NULL
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_wal_bytes_per_sync_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_wal_bytes_per_sync_basic.result
index 7da628b73fd..f432f1f7750 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_wal_bytes_per_sync_basic.result
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_wal_bytes_per_sync_basic.result
@@ -1,7 +1,85 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(100);
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+INSERT INTO invalid_values VALUES('\'-1\'');
+INSERT INTO invalid_values VALUES('\'101\'');
+INSERT INTO invalid_values VALUES('\'484436\'');
SET @start_global_value = @@global.ROCKSDB_WAL_BYTES_PER_SYNC;
SELECT @start_global_value;
@start_global_value
0
-"Trying to set variable @@global.ROCKSDB_WAL_BYTES_PER_SYNC to 444. It should fail because it is readonly."
-SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = 444;
-ERROR HY000: Variable 'rocksdb_wal_bytes_per_sync' is a read only variable
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_WAL_BYTES_PER_SYNC to 100"
+SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = 100;
+SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC;
+@@global.ROCKSDB_WAL_BYTES_PER_SYNC
+100
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = DEFAULT;
+SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC;
+@@global.ROCKSDB_WAL_BYTES_PER_SYNC
+0
+"Trying to set variable @@global.ROCKSDB_WAL_BYTES_PER_SYNC to 1"
+SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = 1;
+SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC;
+@@global.ROCKSDB_WAL_BYTES_PER_SYNC
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = DEFAULT;
+SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC;
+@@global.ROCKSDB_WAL_BYTES_PER_SYNC
+0
+"Trying to set variable @@global.ROCKSDB_WAL_BYTES_PER_SYNC to 0"
+SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = 0;
+SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC;
+@@global.ROCKSDB_WAL_BYTES_PER_SYNC
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = DEFAULT;
+SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC;
+@@global.ROCKSDB_WAL_BYTES_PER_SYNC
+0
+"Trying to set variable @@session.ROCKSDB_WAL_BYTES_PER_SYNC to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_WAL_BYTES_PER_SYNC = 444;
+ERROR HY000: Variable 'rocksdb_wal_bytes_per_sync' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_WAL_BYTES_PER_SYNC to 'aaa'"
+SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC;
+@@global.ROCKSDB_WAL_BYTES_PER_SYNC
+0
+"Trying to set variable @@global.ROCKSDB_WAL_BYTES_PER_SYNC to 'bbb'"
+SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC;
+@@global.ROCKSDB_WAL_BYTES_PER_SYNC
+0
+"Trying to set variable @@global.ROCKSDB_WAL_BYTES_PER_SYNC to '-1'"
+SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = '-1';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC;
+@@global.ROCKSDB_WAL_BYTES_PER_SYNC
+0
+"Trying to set variable @@global.ROCKSDB_WAL_BYTES_PER_SYNC to '101'"
+SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = '101';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC;
+@@global.ROCKSDB_WAL_BYTES_PER_SYNC
+0
+"Trying to set variable @@global.ROCKSDB_WAL_BYTES_PER_SYNC to '484436'"
+SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = '484436';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC;
+@@global.ROCKSDB_WAL_BYTES_PER_SYNC
+0
+SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = @start_global_value;
+SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC;
+@@global.ROCKSDB_WAL_BYTES_PER_SYNC
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_allow_to_start_after_corruption_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_allow_to_start_after_corruption_basic.test
new file mode 100644
index 00000000000..64fb2458424
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_allow_to_start_after_corruption_basic.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_ALLOW_TO_START_AFTER_CORRUPTION
+--let $read_only=1
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bytes_per_sync_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bytes_per_sync_basic.test
index d1d6b2b5695..bf78f578b6c 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bytes_per_sync_basic.test
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bytes_per_sync_basic.test
@@ -1,7 +1,22 @@
--source include/have_rocksdb.inc
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(100);
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+INSERT INTO invalid_values VALUES('\'-1\'');
+INSERT INTO invalid_values VALUES('\'101\'');
+INSERT INTO invalid_values VALUES('\'484436\'');
+
--let $sys_var=ROCKSDB_BYTES_PER_SYNC
---let $read_only=1
+--let $read_only=0
--let $session=0
--source include/rocksdb_sys_var.inc
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_flush_memtable_on_analyze_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_flush_memtable_on_analyze_basic.test
deleted file mode 100644
index 574375cd1ea..00000000000
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_flush_memtable_on_analyze_basic.test
+++ /dev/null
@@ -1,46 +0,0 @@
---source include/have_rocksdb.inc
-
---disable_warnings
-drop table if exists t1;
---enable_warnings
-
-##
-## test cardinality for analyze statements after flushing table
-##
-
-CREATE TABLE t1 (a INT AUTO_INCREMENT, b INT, PRIMARY KEY(a)) ENGINE=rocksdb;
-SHOW CREATE TABLE t1;
-INSERT INTO t1 (b) VALUES (1);
-INSERT INTO t1 (b) VALUES (2);
-INSERT INTO t1 (b) VALUES (3);
---sorted_result
-SELECT * FROM t1;
-
-set session rocksdb_flush_memtable_on_analyze=off;
-ANALYZE TABLE t1;
-SHOW INDEXES FROM t1;
-
-set session rocksdb_flush_memtable_on_analyze=on;
-ANALYZE TABLE t1;
-SHOW INDEXES FROM t1;
-DROP TABLE t1;
-
-##
-## test data length for show table status statements for tables with few rows
-##
-
-CREATE TABLE t1 (a INT AUTO_INCREMENT, b INT, PRIMARY KEY(a)) ENGINE=rocksdb;
-SHOW CREATE TABLE t1;
-INSERT INTO t1 (b) VALUES (1);
-INSERT INTO t1 (b) VALUES (2);
-INSERT INTO t1 (b) VALUES (3);
---sorted_result
-SELECT * FROM t1;
-
---replace_column 5 # 6 # 7 #
-SHOW TABLE STATUS LIKE 't1';
-ANALYZE TABLE t1;
---replace_column 5 # 6 # 7 #
-SHOW TABLE STATUS LIKE 't1';
-
-DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_ignore_unknown_options_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_ignore_unknown_options_basic.test
new file mode 100644
index 00000000000..f10ff2c6123
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_ignore_unknown_options_basic.test
@@ -0,0 +1,16 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_IGNORE_UNKNOWN_OPTIONS
+--let $read_only=1
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_open_files_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_open_files_basic.test
index ba3293264ab..36996761507 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_open_files_basic.test
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_open_files_basic.test
@@ -1,6 +1,8 @@
--source include/have_rocksdb.inc
---let $sys_var=ROCKSDB_MAX_OPEN_FILES
---let $read_only=1
---let $session=0
---source include/rocksdb_sys_var.inc
+# We can not use rocksdb_sys_var.inc here as this is a global, read only option
+# whose value is dependent on the servers open_files_limit. It is more fully
+# tested in the rocksdb.max_open_files test.
+
+--replace_column 2 #
+show variables like 'rocksdb_max_open_files';
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_concurrent_prepare_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_two_write_queues_basic.test
index 451653fe769..43579faba82 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_concurrent_prepare_basic.test
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_two_write_queues_basic.test
@@ -7,7 +7,7 @@ INSERT INTO valid_values VALUES(1024);
CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
INSERT INTO invalid_values VALUES('\'aaa\'');
---let $sys_var=ROCKSDB_CONCURRENT_PREPARE
+--let $sys_var=ROCKSDB_TWO_WRITE_QUEUES
--let $read_only=1
--let $session=0
--source ../include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options.test
new file mode 100644
index 00000000000..03626260cab
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options.test
@@ -0,0 +1,22 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE t1 (a INT, PRIMARY KEY (a) COMMENT 'update_cf1') ENGINE=ROCKSDB;
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS='update_cf1={write_buffer_size=8m;target_file_size_base=2m};';
+SELECT @@global.rocksdb_update_cf_options;
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=NULL;
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=NULL;
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS="";
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=NULL;
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS='update_cf1={write_buffer_size=8m;target_file_size_base=2m};';
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS='update_cf2={write_buffer_size=8m;target_file_size_base=2m};';
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+DROP TABLE t1;
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS='update_cf1={write_buffer_size=8m;target_file_size_base=2m};';
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=DEFAULT;
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options_basic.test
index 0e675dafed3..533b2db8204 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options_basic.test
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options_basic.test
@@ -39,8 +39,17 @@ SELECT @@global.rocksdb_update_cf_options;
SET @@global.rocksdb_update_cf_options = NULL;
SELECT @@global.rocksdb_update_cf_options;
+# Make sure that we do not double free the NULL string
+SET @@global.rocksdb_update_cf_options = NULL;
+SELECT @@global.rocksdb_update_cf_options;
+
+# Attempt setting an empty string
+SET @@global.rocksdb_update_cf_options = '';
+SELECT @@global.rocksdb_update_cf_options;
+
# Will fail to parse. Value not updated.
-SET @@global.rocksdb_update_cf_options = 'aaaaa';
+--Error ER_WRONG_VALUE_FOR_VAR
+--eval SET @@global.rocksdb_update_cf_options = 'aaaaa';
SELECT @@global.rocksdb_update_cf_options;
SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='default' AND OPTION_TYPE='WRITE_BUFFER_SIZE';
@@ -87,7 +96,11 @@ SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf1' AND OPTION_TYPE='TARGET_FIL
# Will fail to parse. No valid assignments included. Value not updated and
# reset to NULL.
-SET @@global.rocksdb_update_cf_options = 'default={foo=bar};';
+--Error ER_WRONG_VALUE_FOR_VAR
+--eval SET @@global.rocksdb_update_cf_options = 'default={foo=bar};';
+SELECT @@global.rocksdb_update_cf_options;
+
+SET @@global.rocksdb_update_cf_options = NULL;
SELECT @@global.rocksdb_update_cf_options;
USE test;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_wal_bytes_per_sync_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_wal_bytes_per_sync_basic.test
index afab0f20d40..9c2a1f4f391 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_wal_bytes_per_sync_basic.test
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_wal_bytes_per_sync_basic.test
@@ -1,6 +1,22 @@
--source include/have_rocksdb.inc
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(100);
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+INSERT INTO invalid_values VALUES('\'-1\'');
+INSERT INTO invalid_values VALUES('\'101\'');
+INSERT INTO invalid_values VALUES('\'484436\'');
+
--let $sys_var=ROCKSDB_WAL_BYTES_PER_SYNC
---let $read_only=1
+--let $read_only=0
--let $session=0
--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
+
diff --git a/storage/rocksdb/patch/port/win/io_win.h b/storage/rocksdb/patch/port/win/io_win.h
deleted file mode 100644
index f5ff253bbaa..00000000000
--- a/storage/rocksdb/patch/port/win/io_win.h
+++ /dev/null
@@ -1,446 +0,0 @@
-// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
-// This source code is licensed under both the GPLv2 (found in the
-// COPYING file in the root directory) and Apache 2.0 License
-// (found in the LICENSE.Apache file in the root directory).
-//
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-#pragma once
-
-#include <stdint.h>
-#include <mutex>
-#include <string>
-
-#include "rocksdb/status.h"
-#include "rocksdb/env.h"
-#include "util/aligned_buffer.h"
-
-#include <windows.h>
-
-
-namespace rocksdb {
-namespace port {
-
-std::string GetWindowsErrSz(DWORD err);
-
-inline Status IOErrorFromWindowsError(const std::string& context, DWORD err) {
- return ((err == ERROR_HANDLE_DISK_FULL) || (err == ERROR_DISK_FULL))
- ? Status::NoSpace(context, GetWindowsErrSz(err))
- : Status::IOError(context, GetWindowsErrSz(err));
-}
-
-inline Status IOErrorFromLastWindowsError(const std::string& context) {
- return IOErrorFromWindowsError(context, GetLastError());
-}
-
-inline Status IOError(const std::string& context, int err_number) {
- return (err_number == ENOSPC)
- ? Status::NoSpace(context, strerror(err_number))
- : Status::IOError(context, strerror(err_number));
-}
-
-// Note the below two do not set errno because they are used only here in this
-// file
-// on a Windows handle and, therefore, not necessary. Translating GetLastError()
-// to errno
-// is a sad business
-inline int fsync(HANDLE hFile) {
- if (!FlushFileBuffers(hFile)) {
- return -1;
- }
-
- return 0;
-}
-
-SSIZE_T pwrite(HANDLE hFile, const char* src, size_t numBytes, uint64_t offset);
-
-SSIZE_T pread(HANDLE hFile, char* src, size_t numBytes, uint64_t offset);
-
-Status fallocate(const std::string& filename, HANDLE hFile, uint64_t to_size);
-
-Status ftruncate(const std::string& filename, HANDLE hFile, uint64_t toSize);
-
-size_t GetUniqueIdFromFile(HANDLE hFile, char* id, size_t max_size);
-
-class WinFileData {
- protected:
- const std::string filename_;
- HANDLE hFile_;
- // If ture, the I/O issued would be direct I/O which the buffer
- // will need to be aligned (not sure there is a guarantee that the buffer
- // passed in is aligned).
- const bool use_direct_io_;
-
- public:
- // We want this class be usable both for inheritance (prive
- // or protected) and for containment so __ctor and __dtor public
- WinFileData(const std::string& filename, HANDLE hFile, bool direct_io)
- : filename_(filename), hFile_(hFile), use_direct_io_(direct_io) {}
-
- virtual ~WinFileData() { this->CloseFile(); }
-
- bool CloseFile() {
- bool result = true;
-
- if (hFile_ != NULL && hFile_ != INVALID_HANDLE_VALUE) {
- result = ::CloseHandle(hFile_);
- assert(result);
- hFile_ = NULL;
- }
- return result;
- }
-
- const std::string& GetName() const { return filename_; }
-
- HANDLE GetFileHandle() const { return hFile_; }
-
- bool use_direct_io() const { return use_direct_io_; }
-
- WinFileData(const WinFileData&) = delete;
- WinFileData& operator=(const WinFileData&) = delete;
-};
-
-class WinSequentialFile : protected WinFileData, public SequentialFile {
-
- // Override for behavior change when creating a custom env
- virtual SSIZE_T PositionedReadInternal(char* src, size_t numBytes,
- uint64_t offset) const;
-
-public:
- WinSequentialFile(const std::string& fname, HANDLE f,
- const EnvOptions& options);
-
- ~WinSequentialFile();
-
- WinSequentialFile(const WinSequentialFile&) = delete;
- WinSequentialFile& operator=(const WinSequentialFile&) = delete;
-
- virtual Status Read(size_t n, Slice* result, char* scratch) override;
- virtual Status PositionedRead(uint64_t offset, size_t n, Slice* result,
- char* scratch) override;
-
- virtual Status Skip(uint64_t n) override;
-
- virtual Status InvalidateCache(size_t offset, size_t length) override;
-
- virtual bool use_direct_io() const override { return WinFileData::use_direct_io(); }
-};
-
-// mmap() based random-access
-class WinMmapReadableFile : private WinFileData, public RandomAccessFile {
- HANDLE hMap_;
-
- const void* mapped_region_;
- const size_t length_;
-
- public:
- // mapped_region_[0,length-1] contains the mmapped contents of the file.
- WinMmapReadableFile(const std::string& fileName, HANDLE hFile, HANDLE hMap,
- const void* mapped_region, size_t length);
-
- ~WinMmapReadableFile();
-
- WinMmapReadableFile(const WinMmapReadableFile&) = delete;
- WinMmapReadableFile& operator=(const WinMmapReadableFile&) = delete;
-
- virtual Status Read(uint64_t offset, size_t n, Slice* result,
- char* scratch) const override;
-
- virtual Status InvalidateCache(size_t offset, size_t length) override;
-
- virtual size_t GetUniqueId(char* id, size_t max_size) const override;
-};
-
-// We preallocate and use memcpy to append new
-// data to the file. This is safe since we either properly close the
-// file before reading from it, or for log files, the reading code
-// knows enough to skip zero suffixes.
-class WinMmapFile : private WinFileData, public WritableFile {
- private:
- HANDLE hMap_;
-
- const size_t page_size_; // We flush the mapping view in page_size
- // increments. We may decide if this is a memory
- // page size or SSD page size
- const size_t
- allocation_granularity_; // View must start at such a granularity
-
- size_t reserved_size_; // Preallocated size
-
- size_t mapping_size_; // The max size of the mapping object
- // we want to guess the final file size to minimize the remapping
- size_t view_size_; // How much memory to map into a view at a time
-
- char* mapped_begin_; // Must begin at the file offset that is aligned with
- // allocation_granularity_
- char* mapped_end_;
- char* dst_; // Where to write next (in range [mapped_begin_,mapped_end_])
- char* last_sync_; // Where have we synced up to
-
- uint64_t file_offset_; // Offset of mapped_begin_ in file
-
- // Do we have unsynced writes?
- bool pending_sync_;
-
- // Can only truncate or reserve to a sector size aligned if
- // used on files that are opened with Unbuffered I/O
- Status TruncateFile(uint64_t toSize);
-
- Status UnmapCurrentRegion();
-
- Status MapNewRegion();
-
- virtual Status PreallocateInternal(uint64_t spaceToReserve);
-
- public:
- WinMmapFile(const std::string& fname, HANDLE hFile, size_t page_size,
- size_t allocation_granularity, const EnvOptions& options);
-
- ~WinMmapFile();
-
- WinMmapFile(const WinMmapFile&) = delete;
- WinMmapFile& operator=(const WinMmapFile&) = delete;
-
- virtual Status Append(const Slice& data) override;
-
- // Means Close() will properly take care of truncate
- // and it does not need any additional information
- virtual Status Truncate(uint64_t size) override;
-
- virtual Status Close() override;
-
- virtual Status Flush() override;
-
- // Flush only data
- virtual Status Sync() override;
-
- /**
- * Flush data as well as metadata to stable storage.
- */
- virtual Status Fsync() override;
-
- /**
- * Get the size of valid data in the file. This will not match the
- * size that is returned from the filesystem because we use mmap
- * to extend file by map_size every time.
- */
- virtual uint64_t GetFileSize() override;
-
- virtual Status InvalidateCache(size_t offset, size_t length) override;
-
- virtual Status Allocate(uint64_t offset, uint64_t len) override;
-
- virtual size_t GetUniqueId(char* id, size_t max_size) const override;
-};
-
-class WinRandomAccessImpl {
- protected:
- WinFileData* file_base_;
- size_t alignment_;
-
- // Override for behavior change when creating a custom env
- virtual SSIZE_T PositionedReadInternal(char* src, size_t numBytes,
- uint64_t offset) const;
-
- WinRandomAccessImpl(WinFileData* file_base, size_t alignment,
- const EnvOptions& options);
-
- virtual ~WinRandomAccessImpl() {}
-
- Status ReadImpl(uint64_t offset, size_t n, Slice* result,
- char* scratch) const;
-
- size_t GetAlignment() const { return alignment_; }
-
- public:
-
- WinRandomAccessImpl(const WinRandomAccessImpl&) = delete;
- WinRandomAccessImpl& operator=(const WinRandomAccessImpl&) = delete;
-};
-
-// pread() based random-access
-class WinRandomAccessFile
- : private WinFileData,
- protected WinRandomAccessImpl, // Want to be able to override
- // PositionedReadInternal
- public RandomAccessFile {
- public:
- WinRandomAccessFile(const std::string& fname, HANDLE hFile, size_t alignment,
- const EnvOptions& options);
-
- ~WinRandomAccessFile();
-
- virtual Status Read(uint64_t offset, size_t n, Slice* result,
- char* scratch) const override;
-
- virtual size_t GetUniqueId(char* id, size_t max_size) const override;
-
- virtual bool use_direct_io() const override { return WinFileData::use_direct_io(); }
-
- virtual Status InvalidateCache(size_t offset, size_t length) override;
-
- virtual size_t GetRequiredBufferAlignment() const override;
-};
-
-// This is a sequential write class. It has been mimicked (as others) after
-// the original Posix class. We add support for unbuffered I/O on windows as
-// well
-// we utilize the original buffer as an alignment buffer to write directly to
-// file with no buffering.
-// No buffering requires that the provided buffer is aligned to the physical
-// sector size (SSD page size) and
-// that all SetFilePointer() operations to occur with such an alignment.
-// We thus always write in sector/page size increments to the drive and leave
-// the tail for the next write OR for Close() at which point we pad with zeros.
-// No padding is required for
-// buffered access.
-class WinWritableImpl {
- protected:
- WinFileData* file_data_;
- const uint64_t alignment_;
- uint64_t next_write_offset_; // Needed because Windows does not support O_APPEND
- uint64_t reservedsize_; // how far we have reserved space
-
- virtual Status PreallocateInternal(uint64_t spaceToReserve);
-
- WinWritableImpl(WinFileData* file_data, size_t alignment);
-
- ~WinWritableImpl() {}
-
-
- uint64_t GetAlignement() const { return alignment_; }
-
- Status AppendImpl(const Slice& data);
-
- // Requires that the data is aligned as specified by
- // GetRequiredBufferAlignment()
- Status PositionedAppendImpl(const Slice& data, uint64_t offset);
-
- Status TruncateImpl(uint64_t size);
-
- Status CloseImpl();
-
- Status SyncImpl();
-
- uint64_t GetFileNextWriteOffset() {
- // Double accounting now here with WritableFileWriter
- // and this size will be wrong when unbuffered access is used
- // but tests implement their own writable files and do not use
- // WritableFileWrapper
- // so we need to squeeze a square peg through
- // a round hole here.
- return next_write_offset_;
- }
-
- Status AllocateImpl(uint64_t offset, uint64_t len);
-
- public:
- WinWritableImpl(const WinWritableImpl&) = delete;
- WinWritableImpl& operator=(const WinWritableImpl&) = delete;
-};
-
-class WinWritableFile : private WinFileData,
- protected WinWritableImpl,
- public WritableFile {
- public:
- WinWritableFile(const std::string& fname, HANDLE hFile, size_t alignment,
- size_t capacity, const EnvOptions& options);
-
- ~WinWritableFile();
-
- bool IsSyncThreadSafe() const override {
- return true;
- }
-
- virtual Status Append(const Slice& data) override;
-
- // Requires that the data is aligned as specified by
- // GetRequiredBufferAlignment()
- virtual Status PositionedAppend(const Slice& data, uint64_t offset) override;
-
- // Need to implement this so the file is truncated correctly
- // when buffered and unbuffered mode
- virtual Status Truncate(uint64_t size) override;
-
- virtual Status Close() override;
-
- // write out the cached data to the OS cache
- // This is now taken care of the WritableFileWriter
- virtual Status Flush() override;
-
- virtual Status Sync() override;
-
- virtual Status Fsync() override;
-
- // Indicates if the class makes use of direct I/O
- // Use PositionedAppend
- virtual bool use_direct_io() const override;
-
- virtual size_t GetRequiredBufferAlignment() const override;
-
- virtual uint64_t GetFileSize() override;
-
- virtual Status Allocate(uint64_t offset, uint64_t len) override;
-
- virtual size_t GetUniqueId(char* id, size_t max_size) const override;
-};
-
-class WinRandomRWFile : private WinFileData,
- protected WinRandomAccessImpl,
- protected WinWritableImpl,
- public RandomRWFile {
- public:
- WinRandomRWFile(const std::string& fname, HANDLE hFile, size_t alignment,
- const EnvOptions& options);
-
- ~WinRandomRWFile() {}
-
- // Indicates if the class makes use of direct I/O
- // If false you must pass aligned buffer to Write()
- virtual bool use_direct_io() const override;
-
- // Use the returned alignment value to allocate aligned
- // buffer for Write() when use_direct_io() returns true
- virtual size_t GetRequiredBufferAlignment() const override;
-
- // Write bytes in `data` at offset `offset`, Returns Status::OK() on success.
- // Pass aligned buffer when use_direct_io() returns true.
- virtual Status Write(uint64_t offset, const Slice& data) override;
-
- // Read up to `n` bytes starting from offset `offset` and store them in
- // result, provided `scratch` size should be at least `n`.
- // Returns Status::OK() on success.
- virtual Status Read(uint64_t offset, size_t n, Slice* result,
- char* scratch) const override;
-
- virtual Status Flush() override;
-
- virtual Status Sync() override;
-
- virtual Status Fsync() { return Sync(); }
-
- virtual Status Close() override;
-};
-
-class WinDirectory : public Directory {
- public:
- WinDirectory() {}
-
- virtual Status Fsync() override;
-};
-
-class WinFileLock : public FileLock {
- public:
- explicit WinFileLock(HANDLE hFile) : hFile_(hFile) {
- assert(hFile != NULL);
- assert(hFile != INVALID_HANDLE_VALUE);
- }
-
- ~WinFileLock();
-
- private:
- HANDLE hFile_;
-};
-}
-}
diff --git a/storage/rocksdb/properties_collector.cc b/storage/rocksdb/properties_collector.cc
index 368e6d88d1c..92396b23f7e 100644
--- a/storage/rocksdb/properties_collector.cc
+++ b/storage/rocksdb/properties_collector.cc
@@ -54,17 +54,9 @@ Rdb_tbl_prop_coll::Rdb_tbl_prop_coll(Rdb_ddl_manager *const ddl_manager,
: m_cf_id(cf_id), m_ddl_manager(ddl_manager), m_last_stats(nullptr),
m_rows(0l), m_window_pos(0l), m_deleted_rows(0l), m_max_deleted_rows(0l),
m_file_size(0), m_params(params),
- m_table_stats_sampling_pct(table_stats_sampling_pct),
- m_seed(time(nullptr)), m_card_adj_extra(1.) {
+ m_cardinality_collector(table_stats_sampling_pct) {
DBUG_ASSERT(ddl_manager != nullptr);
- // We need to adjust the index cardinality numbers based on the sampling
- // rate so that the output of "SHOW INDEX" command will reflect reality
- // more closely. It will still be an approximation, just a better one.
- if (m_table_stats_sampling_pct > 0) {
- m_card_adj_extra = 100. / m_table_stats_sampling_pct;
- }
-
m_deleted_rows_window.resize(m_params.m_window, false);
}
@@ -147,7 +139,7 @@ Rdb_index_stats *Rdb_tbl_prop_coll::AccessStats(const rocksdb::Slice &key) {
m_last_stats->m_name = m_keydef->get_name();
}
}
- m_last_key.clear();
+ m_cardinality_collector.Reset();
}
return m_last_stats;
@@ -157,7 +149,7 @@ void Rdb_tbl_prop_coll::CollectStatsForRow(const rocksdb::Slice &key,
const rocksdb::Slice &value,
const rocksdb::EntryType &type,
const uint64_t &file_size) {
- const auto stats = AccessStats(key);
+ auto stats = AccessStats(key);
stats->m_data_size += key.size() + value.size();
@@ -183,38 +175,15 @@ void Rdb_tbl_prop_coll::CollectStatsForRow(const rocksdb::Slice &key,
sql_print_error("RocksDB: Unexpected entry type found: %u. "
"This should not happen so aborting the system.",
type);
- abort_with_stack_traces();
+ abort();
break;
}
stats->m_actual_disk_size += file_size - m_file_size;
m_file_size = file_size;
- if (m_keydef != nullptr && ShouldCollectStats()) {
- std::size_t column = 0;
- bool new_key = true;
-
- if (!m_last_key.empty()) {
- rocksdb::Slice last(m_last_key.data(), m_last_key.size());
- new_key = (m_keydef->compare_keys(&last, &key, &column) == 0);
- }
-
- if (new_key) {
- DBUG_ASSERT(column <= stats->m_distinct_keys_per_prefix.size());
-
- for (auto i = column; i < stats->m_distinct_keys_per_prefix.size(); i++) {
- stats->m_distinct_keys_per_prefix[i]++;
- }
-
- // assign new last_key for the next call
- // however, we only need to change the last key
- // if one of the first n-1 columns is different
- // If the n-1 prefix is the same, no sense in storing
- // the new key
- if (column < stats->m_distinct_keys_per_prefix.size()) {
- m_last_key.assign(key.data(), key.size());
- }
- }
+ if (m_keydef != nullptr) {
+ m_cardinality_collector.ProcessKey(key, m_keydef.get(), stats);
}
}
@@ -261,8 +230,10 @@ Rdb_tbl_prop_coll::Finish(rocksdb::UserCollectedProperties *const properties) {
rocksdb_num_sst_entry_other += num_sst_entry_other;
}
- properties->insert({INDEXSTATS_KEY,
- Rdb_index_stats::materialize(m_stats, m_card_adj_extra)});
+ for (Rdb_index_stats &stat : m_stats) {
+ m_cardinality_collector.AdjustStats(&stat);
+ }
+ properties->insert({INDEXSTATS_KEY, Rdb_index_stats::materialize(m_stats)});
return rocksdb::Status::OK();
}
@@ -272,23 +243,6 @@ bool Rdb_tbl_prop_coll::NeedCompact() const {
(m_max_deleted_rows > m_params.m_deletes);
}
-bool Rdb_tbl_prop_coll::ShouldCollectStats() {
- // Zero means that we'll use all the keys to update statistics.
- if (!m_table_stats_sampling_pct ||
- RDB_TBL_STATS_SAMPLE_PCT_MAX == m_table_stats_sampling_pct) {
- return true;
- }
-
- const int val = rand_r(&m_seed) % (RDB_TBL_STATS_SAMPLE_PCT_MAX -
- RDB_TBL_STATS_SAMPLE_PCT_MIN + 1) +
- RDB_TBL_STATS_SAMPLE_PCT_MIN;
-
- DBUG_ASSERT(val >= RDB_TBL_STATS_SAMPLE_PCT_MIN);
- DBUG_ASSERT(val <= RDB_TBL_STATS_SAMPLE_PCT_MAX);
-
- return val <= m_table_stats_sampling_pct;
-}
-
/*
Returns the same as above, but in human-readable way for logging
*/
@@ -365,8 +319,7 @@ void Rdb_tbl_prop_coll::read_stats_from_tbl_props(
Serializes an array of Rdb_index_stats into a network string.
*/
std::string
-Rdb_index_stats::materialize(const std::vector<Rdb_index_stats> &stats,
- const float card_adj_extra) {
+Rdb_index_stats::materialize(const std::vector<Rdb_index_stats> &stats) {
String ret;
rdb_netstr_append_uint16(&ret, INDEX_STATS_VERSION_ENTRY_TYPES);
for (const auto &i : stats) {
@@ -382,8 +335,7 @@ Rdb_index_stats::materialize(const std::vector<Rdb_index_stats> &stats,
rdb_netstr_append_uint64(&ret, i.m_entry_merges);
rdb_netstr_append_uint64(&ret, i.m_entry_others);
for (const auto &num_keys : i.m_distinct_keys_per_prefix) {
- const float upd_num_keys = num_keys * card_adj_extra;
- rdb_netstr_append_uint64(&ret, static_cast<int64_t>(upd_num_keys));
+ rdb_netstr_append_uint64(&ret, num_keys);
}
}
@@ -416,7 +368,7 @@ int Rdb_index_stats::unmaterialize(const std::string &s,
sql_print_error("Index stats version %d was outside of supported range. "
"This should not happen so aborting the system.",
version);
- abort_with_stack_traces();
+ abort();
}
size_t needed = sizeof(stats.m_gl_index_id.cf_id) +
@@ -521,4 +473,75 @@ void Rdb_index_stats::merge(const Rdb_index_stats &s, const bool &increment,
}
}
+Rdb_tbl_card_coll::Rdb_tbl_card_coll(const uint8_t &table_stats_sampling_pct)
+ : m_table_stats_sampling_pct(table_stats_sampling_pct),
+ m_seed(time(nullptr)) {}
+
+bool Rdb_tbl_card_coll::IsSampingDisabled() {
+ // Zero means that we'll use all the keys to update statistics.
+ return m_table_stats_sampling_pct == 0 ||
+ RDB_TBL_STATS_SAMPLE_PCT_MAX == m_table_stats_sampling_pct;
+}
+
+bool Rdb_tbl_card_coll::ShouldCollectStats() {
+ if (IsSampingDisabled()) {
+ return true; // collect every key
+ }
+
+ const int val = rand_r(&m_seed) % (RDB_TBL_STATS_SAMPLE_PCT_MAX -
+ RDB_TBL_STATS_SAMPLE_PCT_MIN + 1) +
+ RDB_TBL_STATS_SAMPLE_PCT_MIN;
+
+ DBUG_ASSERT(val >= RDB_TBL_STATS_SAMPLE_PCT_MIN);
+ DBUG_ASSERT(val <= RDB_TBL_STATS_SAMPLE_PCT_MAX);
+
+ return val <= m_table_stats_sampling_pct;
+}
+
+void Rdb_tbl_card_coll::ProcessKey(const rocksdb::Slice &key,
+ const Rdb_key_def *keydef,
+ Rdb_index_stats *stats) {
+ if (ShouldCollectStats()) {
+ std::size_t column = 0;
+ bool new_key = true;
+
+ if (!m_last_key.empty()) {
+ rocksdb::Slice last(m_last_key.data(), m_last_key.size());
+ new_key = (keydef->compare_keys(&last, &key, &column) == 0);
+ }
+
+ if (new_key) {
+ DBUG_ASSERT(column <= stats->m_distinct_keys_per_prefix.size());
+
+ for (auto i = column; i < stats->m_distinct_keys_per_prefix.size(); i++) {
+ stats->m_distinct_keys_per_prefix[i]++;
+ }
+
+ // assign new last_key for the next call
+ // however, we only need to change the last key
+ // if one of the first n-1 columns is different
+ // If the n-1 prefix is the same, no sense in storing
+ // the new key
+ if (column < stats->m_distinct_keys_per_prefix.size()) {
+ m_last_key.assign(key.data(), key.size());
+ }
+ }
+ }
+}
+
+void Rdb_tbl_card_coll::Reset() { m_last_key.clear(); }
+
+// We need to adjust the index cardinality numbers based on the sampling
+// rate so that the output of "SHOW INDEX" command will reflect reality
+// more closely. It will still be an approximation, just a better one.
+void Rdb_tbl_card_coll::AdjustStats(Rdb_index_stats *stats) {
+ if (IsSampingDisabled()) {
+ // no sampling was done, return as stats is
+ return;
+ }
+ for (int64_t &num_keys : stats->m_distinct_keys_per_prefix) {
+ num_keys = num_keys * 100 / m_table_stats_sampling_pct;
+ }
+}
+
} // namespace myrocks
diff --git a/storage/rocksdb/properties_collector.h b/storage/rocksdb/properties_collector.h
index 9ae519d95c7..1441d893420 100644
--- a/storage/rocksdb/properties_collector.h
+++ b/storage/rocksdb/properties_collector.h
@@ -56,8 +56,7 @@ struct Rdb_index_stats {
std::vector<int64_t> m_distinct_keys_per_prefix;
std::string m_name; // name is not persisted
- static std::string materialize(const std::vector<Rdb_index_stats> &stats,
- const float card_adj_extra);
+ static std::string materialize(const std::vector<Rdb_index_stats> &stats);
static int unmaterialize(const std::string &s,
std::vector<Rdb_index_stats> *const ret);
@@ -71,6 +70,40 @@ struct Rdb_index_stats {
const int64_t &estimated_data_len = 0);
};
+// The helper class to calculate index cardinality
+class Rdb_tbl_card_coll {
+ public:
+ explicit Rdb_tbl_card_coll(const uint8_t &table_stats_sampling_pct);
+
+ public:
+ void ProcessKey(const rocksdb::Slice &key, const Rdb_key_def *keydef,
+ Rdb_index_stats *stats);
+ /*
+ * Resets the state of the collector to start calculating statistics for a
+ * next index.
+ */
+ void Reset();
+
+ /*
+ * Cardinality statistics might be calculated using some sampling strategy.
+ * This method adjusts gathered statistics according to the sampling
+ * strategy used. Note that adjusted cardinality value is just an estimate
+ * and can return a value exeeding number of rows in a table, so the
+ * returned value should be capped by row count before using it by
+ * an optrimizer or displaying it to a clent.
+ */
+ void AdjustStats(Rdb_index_stats *stats);
+
+ private:
+ bool ShouldCollectStats();
+ bool IsSampingDisabled();
+
+ private:
+ std::string m_last_key;
+ uint8_t m_table_stats_sampling_pct;
+ unsigned int m_seed;
+};
+
class Rdb_tbl_prop_coll : public rocksdb::TablePropertiesCollector {
public:
Rdb_tbl_prop_coll(Rdb_ddl_manager *const ddl_manager,
@@ -130,9 +163,7 @@ private:
uint64_t m_rows, m_window_pos, m_deleted_rows, m_max_deleted_rows;
uint64_t m_file_size;
Rdb_compact_params m_params;
- uint8_t m_table_stats_sampling_pct;
- unsigned int m_seed;
- float m_card_adj_extra;
+ Rdb_tbl_card_coll m_cardinality_collector;
};
class Rdb_tbl_prop_coll_factory
diff --git a/storage/rocksdb/rdb_cf_options.cc b/storage/rocksdb/rdb_cf_options.cc
index e608580c666..4d97ab8058d 100644
--- a/storage/rocksdb/rdb_cf_options.cc
+++ b/storage/rocksdb/rdb_cf_options.cc
@@ -325,6 +325,13 @@ Rdb_cf_options::get_cf_comparator(const std::string &cf_name) {
}
}
+std::shared_ptr<rocksdb::MergeOperator>
+Rdb_cf_options::get_cf_merge_operator(const std::string &cf_name) {
+ return (cf_name == DEFAULT_SYSTEM_CF_NAME)
+ ? std::make_shared<Rdb_system_merge_op>()
+ : nullptr;
+}
+
void Rdb_cf_options::get_cf_options(const std::string &cf_name,
rocksdb::ColumnFamilyOptions *const opts) {
DBUG_ASSERT(opts != nullptr);
@@ -334,6 +341,7 @@ void Rdb_cf_options::get_cf_options(const std::string &cf_name,
// Set the comparator according to 'rev:'
opts->comparator = get_cf_comparator(cf_name);
+ opts->merge_operator = get_cf_merge_operator(cf_name);
}
} // namespace myrocks
diff --git a/storage/rocksdb/rdb_cf_options.h b/storage/rocksdb/rdb_cf_options.h
index 32f2308284f..19e5da6a79e 100644
--- a/storage/rocksdb/rdb_cf_options.h
+++ b/storage/rocksdb/rdb_cf_options.h
@@ -64,6 +64,9 @@ public:
static const rocksdb::Comparator *
get_cf_comparator(const std::string &cf_name);
+ std::shared_ptr<rocksdb::MergeOperator>
+ get_cf_merge_operator(const std::string &cf_name);
+
void get_cf_options(const std::string &cf_name,
rocksdb::ColumnFamilyOptions *const opts)
MY_ATTRIBUTE((__nonnull__));
diff --git a/storage/rocksdb/rdb_compact_filter.h b/storage/rocksdb/rdb_compact_filter.h
index 9bc7ece6e7a..20ae3c740c1 100644
--- a/storage/rocksdb/rdb_compact_filter.h
+++ b/storage/rocksdb/rdb_compact_filter.h
@@ -167,7 +167,7 @@ public:
sql_print_error("Decoding ttl from PK value failed in compaction filter, "
"for index (%u,%u), val: %s",
m_prev_index.cf_id, m_prev_index.index_id, buf.c_str());
- abort_with_stack_traces();
+ abort();
}
/*
diff --git a/storage/rocksdb/rdb_datadic.cc b/storage/rocksdb/rdb_datadic.cc
index 01dc2d6b176..a38711e6c6b 100644
--- a/storage/rocksdb/rdb_datadic.cc
+++ b/storage/rocksdb/rdb_datadic.cc
@@ -32,6 +32,7 @@
#include <limits>
#include <map>
#include <set>
+#include <string>
#include <utility>
#include <vector>
@@ -826,6 +827,25 @@ int Rdb_key_def::successor(uchar *const packed_tuple, const uint &len) {
return changed;
}
+/*
+ @return Number of bytes that were changed
+*/
+int Rdb_key_def::predecessor(uchar *const packed_tuple, const uint &len) {
+ DBUG_ASSERT(packed_tuple != nullptr);
+
+ int changed = 0;
+ uchar *p = packed_tuple + len - 1;
+ for (; p > packed_tuple; p--) {
+ changed++;
+ if (*p != uchar(0x00)) {
+ *p = *p - 1;
+ break;
+ }
+ *p = 0xFF;
+ }
+ return changed;
+}
+
static const std::map<char, size_t> UNPACK_HEADER_SIZES = {
{RDB_UNPACK_DATA_TAG, RDB_UNPACK_HEADER_SIZE},
{RDB_UNPACK_COVERED_DATA_TAG, RDB_UNPACK_COVERED_HEADER_SIZE}};
@@ -1429,11 +1449,11 @@ int Rdb_key_def::unpack_record(TABLE *const table, uchar *const buf,
MY_BITMAP covered_bitmap;
my_bitmap_map covered_bits;
uint curr_bitmap_pos = 0;
- bitmap_init(&covered_bitmap, &covered_bits, MAX_REF_PARTS, false);
const bool has_covered_bitmap =
has_unpack_info && (unpack_header[0] == RDB_UNPACK_COVERED_DATA_TAG);
if (has_covered_bitmap) {
+ bitmap_init(&covered_bitmap, &covered_bits, MAX_REF_PARTS, false);
covered_bits = rdb_netbuf_to_uint16((const uchar *)unpack_header +
sizeof(RDB_UNPACK_COVERED_DATA_TAG) +
RDB_UNPACK_COVERED_DATA_LEN_SIZE);
@@ -1508,6 +1528,18 @@ int Rdb_key_def::unpack_record(TABLE *const table, uchar *const buf,
}
if ((this->*fpi->m_skip_func)(fpi, field, &reader))
return HA_ERR_ROCKSDB_CORRUPT_DATA;
+
+ // If this is a space padded varchar, we need to skip the indicator
+ // bytes for trailing bytes. They're useless since we can't restore the
+ // field anyway.
+ //
+ // There is a special case for prefixed varchars where we do not
+ // generate unpack info, because we know prefixed varchars cannot be
+ // unpacked. In this case, it is not necessary to skip.
+ if (fpi->m_skip_func == &Rdb_key_def::skip_variable_space_pad &&
+ !fpi->m_unpack_info_stores_value) {
+ unp_reader.read(fpi->m_unpack_info_uses_two_bytes ? 2 : 1);
+ }
}
}
@@ -3487,6 +3519,20 @@ void Rdb_tbl_def::set_name(const std::string &name) {
check_if_is_mysql_system_table();
}
+GL_INDEX_ID Rdb_tbl_def::get_autoincr_gl_index_id() {
+ for (uint i = 0; i < m_key_count; i++) {
+ auto &k = m_key_descr_arr[i];
+ if (k->m_index_type == Rdb_key_def::INDEX_TYPE_PRIMARY ||
+ k->m_index_type == Rdb_key_def::INDEX_TYPE_HIDDEN_PRIMARY) {
+ return k->get_gl_index_id();
+ }
+ }
+
+ // Every table must have a primary key, even if it's hidden.
+ abort();
+ return GL_INDEX_ID();
+}
+
/*
Static function of type my_hash_get_key that gets invoked by
the m_ddl_hash object of type my_core::HASH.
@@ -3714,6 +3760,68 @@ bool Rdb_validate_tbls::compare_to_actual_tables(const std::string &datadir,
}
/*
+ Validate that all auto increment values in the data dictionary are on a
+ supported version.
+*/
+bool Rdb_ddl_manager::validate_auto_incr() {
+ std::unique_ptr<rocksdb::Iterator> it(m_dict->new_iterator());
+
+ uchar auto_incr_entry[Rdb_key_def::INDEX_NUMBER_SIZE];
+ rdb_netbuf_store_index(auto_incr_entry, Rdb_key_def::AUTO_INC);
+ const rocksdb::Slice auto_incr_entry_slice(
+ reinterpret_cast<char *>(auto_incr_entry),
+ Rdb_key_def::INDEX_NUMBER_SIZE);
+ for (it->Seek(auto_incr_entry_slice); it->Valid(); it->Next()) {
+ const rocksdb::Slice key = it->key();
+ const rocksdb::Slice val = it->value();
+ GL_INDEX_ID gl_index_id;
+
+ if (key.size() >= Rdb_key_def::INDEX_NUMBER_SIZE &&
+ memcmp(key.data(), auto_incr_entry, Rdb_key_def::INDEX_NUMBER_SIZE))
+ break;
+
+ if (key.size() != Rdb_key_def::INDEX_NUMBER_SIZE * 3) {
+ return false;
+ }
+
+ if (val.size() <= Rdb_key_def::VERSION_SIZE) {
+ return false;
+ }
+
+ // Check if we have orphaned entries for whatever reason by cross
+ // referencing ddl entries.
+ auto ptr = reinterpret_cast<const uchar *>(key.data());
+ ptr += Rdb_key_def::INDEX_NUMBER_SIZE;
+ rdb_netbuf_read_gl_index(&ptr, &gl_index_id);
+ if (!m_dict->get_index_info(gl_index_id, nullptr)) {
+ // NO_LINT_DEBUG
+ sql_print_warning("RocksDB: AUTOINC mismatch - "
+ "Index number (%u, %u) found in AUTOINC "
+ "but does not exist as a DDL entry",
+ gl_index_id.cf_id, gl_index_id.index_id);
+ return false;
+ }
+
+ ptr = reinterpret_cast<const uchar *>(val.data());
+ const int version = rdb_netbuf_read_uint16(&ptr);
+ if (version > Rdb_key_def::AUTO_INCREMENT_VERSION) {
+ // NO_LINT_DEBUG
+ sql_print_warning("RocksDB: AUTOINC mismatch - "
+ "Index number (%u, %u) found in AUTOINC "
+ "is on unsupported version %d",
+ gl_index_id.cf_id, gl_index_id.index_id, version);
+ return false;
+ }
+ }
+
+ if (!it->status().ok()) {
+ return false;
+ }
+
+ return true;
+}
+
+/*
Validate that all the tables in the RocksDB database dictionary match the .frm
files in the datadir
*/
@@ -3877,10 +3985,18 @@ bool Rdb_ddl_manager::init(Rdb_dict_manager *const dict_arg,
If validate_tables is greater than 0 run the validation. Only fail the
initialzation if the setting is 1. If the setting is 2 we continue.
*/
- if (validate_tables > 0 && !validate_schemas()) {
- if (validate_tables == 1) {
- sql_print_error("RocksDB: Problems validating data dictionary "
- "against .frm files, exiting");
+ if (validate_tables > 0) {
+ std::string msg;
+ if (!validate_schemas()) {
+ msg = "RocksDB: Problems validating data dictionary "
+ "against .frm files, exiting";
+ } else if (!validate_auto_incr()) {
+ msg = "RocksDB: Problems validating auto increment values in "
+ "data dictionary, exiting";
+ }
+ if (validate_tables == 1 && !msg.empty()) {
+ // NO_LINT_DEBUG
+ sql_print_error("%s", msg.c_str());
return true;
}
}
@@ -4154,6 +4270,7 @@ bool Rdb_ddl_manager::rename(const std::string &from, const std::string &to,
new_rec->m_auto_incr_val =
rec->m_auto_incr_val.load(std::memory_order_relaxed);
new_rec->m_key_descr_arr = rec->m_key_descr_arr;
+
// so that it's not free'd when deleting the old rec
rec->m_key_descr_arr = nullptr;
@@ -4613,13 +4730,16 @@ void Rdb_dict_manager::delete_index_info(rocksdb::WriteBatch *batch,
const GL_INDEX_ID &gl_index_id) const {
delete_with_prefix(batch, Rdb_key_def::INDEX_INFO, gl_index_id);
delete_with_prefix(batch, Rdb_key_def::INDEX_STATISTICS, gl_index_id);
+ delete_with_prefix(batch, Rdb_key_def::AUTO_INC, gl_index_id);
}
bool Rdb_dict_manager::get_index_info(
const GL_INDEX_ID &gl_index_id,
struct Rdb_index_info *const index_info) const {
- index_info->m_gl_index_id = gl_index_id;
+ if (index_info) {
+ index_info->m_gl_index_id = gl_index_id;
+ }
bool found = false;
bool error = false;
@@ -4630,6 +4750,10 @@ bool Rdb_dict_manager::get_index_info(
const rocksdb::Status &status = get_value(key, &value);
if (status.ok()) {
+ if (!index_info) {
+ return true;
+ }
+
const uchar *const val = (const uchar *)value.c_str();
const uchar *ptr = val;
index_info->m_index_dict_version = rdb_netbuf_to_uint16(val);
@@ -4668,6 +4792,11 @@ bool Rdb_dict_manager::get_index_info(
index_info->m_kv_version = rdb_netbuf_to_uint16(ptr);
ptr += RDB_SIZEOF_KV_VERSION;
index_info->m_ttl_duration = rdb_netbuf_to_uint64(ptr);
+ if ((index_info->m_kv_version ==
+ Rdb_key_def::PRIMARY_FORMAT_VERSION_TTL) &&
+ index_info->m_ttl_duration > 0) {
+ index_info->m_index_flags = Rdb_key_def::TTL_FLAG;
+ }
found = true;
break;
@@ -4709,7 +4838,7 @@ bool Rdb_dict_manager::get_index_info(
"and it may be a bug.",
index_info->m_index_dict_version, index_info->m_index_type,
index_info->m_kv_version, index_info->m_ttl_duration);
- abort_with_stack_traces();
+ abort();
}
return found;
@@ -4972,7 +5101,7 @@ void Rdb_dict_manager::resume_drop_indexes() const {
"bug.",
max_index_id_in_dict, gl_index_id.cf_id,
gl_index_id.index_id);
- abort_with_stack_traces();
+ abort();
}
}
}
@@ -5021,7 +5150,7 @@ void Rdb_dict_manager::log_start_drop_index(GL_INDEX_ID gl_index_id,
"from index id (%u,%u). MyRocks data dictionary may "
"get corrupted.",
gl_index_id.cf_id, gl_index_id.index_id);
- abort_with_stack_traces();
+ abort();
}
}
}
@@ -5079,7 +5208,7 @@ void Rdb_dict_manager::add_stats(
// IndexStats::materialize takes complete care of serialization including
// storing the version
const auto value =
- Rdb_index_stats::materialize(std::vector<Rdb_index_stats>{it}, 1.);
+ Rdb_index_stats::materialize(std::vector<Rdb_index_stats>{it});
batch->Put(m_system_cfh, rocksdb::Slice((char *)key_buf, sizeof(key_buf)),
value);
@@ -5105,6 +5234,53 @@ Rdb_index_stats Rdb_dict_manager::get_stats(GL_INDEX_ID gl_index_id) const {
return Rdb_index_stats();
}
+rocksdb::Status
+Rdb_dict_manager::put_auto_incr_val(rocksdb::WriteBatchBase *batch,
+ const GL_INDEX_ID &gl_index_id,
+ ulonglong val, bool overwrite) const {
+ uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 3] = {0};
+ dump_index_id(key_buf, Rdb_key_def::AUTO_INC, gl_index_id);
+ const rocksdb::Slice key =
+ rocksdb::Slice(reinterpret_cast<char *>(key_buf), sizeof(key_buf));
+
+ // Value is constructed by storing the version and the value.
+ uchar value_buf[RDB_SIZEOF_AUTO_INCREMENT_VERSION +
+ ROCKSDB_SIZEOF_AUTOINC_VALUE] = {0};
+ uchar *ptr = value_buf;
+ rdb_netbuf_store_uint16(ptr, Rdb_key_def::AUTO_INCREMENT_VERSION);
+ ptr += RDB_SIZEOF_AUTO_INCREMENT_VERSION;
+ rdb_netbuf_store_uint64(ptr, val);
+ ptr += ROCKSDB_SIZEOF_AUTOINC_VALUE;
+ const rocksdb::Slice value =
+ rocksdb::Slice(reinterpret_cast<char *>(value_buf), ptr - value_buf);
+
+ if (overwrite) {
+ return batch->Put(m_system_cfh, key, value);
+ }
+ return batch->Merge(m_system_cfh, key, value);
+}
+
+bool Rdb_dict_manager::get_auto_incr_val(const GL_INDEX_ID &gl_index_id,
+ ulonglong *new_val) const {
+ uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 3] = {0};
+ dump_index_id(key_buf, Rdb_key_def::AUTO_INC, gl_index_id);
+
+ std::string value;
+ const rocksdb::Status status = get_value(
+ rocksdb::Slice(reinterpret_cast<char *>(key_buf), sizeof(key_buf)),
+ &value);
+
+ if (status.ok()) {
+ const uchar *const val = reinterpret_cast<const uchar *>(value.data());
+
+ if (rdb_netbuf_to_uint16(val) <= Rdb_key_def::AUTO_INCREMENT_VERSION) {
+ *new_val = rdb_netbuf_to_uint64(val + RDB_SIZEOF_AUTO_INCREMENT_VERSION);
+ return true;
+ }
+ }
+ return false;
+}
+
uint Rdb_seq_generator::get_and_update_next_number(
Rdb_dict_manager *const dict) {
DBUG_ASSERT(dict != nullptr);
diff --git a/storage/rocksdb/rdb_datadic.h b/storage/rocksdb/rdb_datadic.h
index 223f61edb43..f97c0d08d29 100644
--- a/storage/rocksdb/rdb_datadic.h
+++ b/storage/rocksdb/rdb_datadic.h
@@ -138,6 +138,7 @@ const size_t RDB_SIZEOF_INDEX_INFO_VERSION = sizeof(uint16);
const size_t RDB_SIZEOF_INDEX_TYPE = sizeof(uchar);
const size_t RDB_SIZEOF_KV_VERSION = sizeof(uint16);
const size_t RDB_SIZEOF_INDEX_FLAGS = sizeof(uint32);
+const size_t RDB_SIZEOF_AUTO_INCREMENT_VERSION = sizeof(uint16);
// Possible return values for rdb_index_field_unpack_t functions.
enum {
@@ -237,17 +238,44 @@ public:
*size = INDEX_NUMBER_SIZE;
}
+ /*
+ Get the first key that you need to position at to start iterating.
+
+ Stores into *key a "supremum" or "infimum" key value for the index.
+
+ @return Number of bytes in the key that are usable for bloom filter use.
+ */
+ inline int get_first_key(uchar *const key, uint *const size) const {
+ if (m_is_reverse_cf)
+ get_supremum_key(key, size);
+ else
+ get_infimum_key(key, size);
+
+ /* Find out how many bytes of infimum are the same as m_index_number */
+ uchar unmodified_key[INDEX_NUMBER_SIZE];
+ rdb_netbuf_store_index(unmodified_key, m_index_number);
+ int i;
+ for (i = 0; i < INDEX_NUMBER_SIZE; i++) {
+ if (key[i] != unmodified_key[i])
+ break;
+ }
+ return i;
+ }
+
/* Make a key that is right after the given key. */
static int successor(uchar *const packed_tuple, const uint &len);
+ /* Make a key that is right before the given key. */
+ static int predecessor(uchar *const packed_tuple, const uint &len);
+
/*
This can be used to compare prefixes.
if X is a prefix of Y, then we consider that X = Y.
*/
// b describes the lookup key, which can be a prefix of a.
+ // b might be outside of the index_number range, if successor() is called.
int cmp_full_keys(const rocksdb::Slice &a, const rocksdb::Slice &b) const {
DBUG_ASSERT(covers_key(a));
- DBUG_ASSERT(covers_key(b));
return memcmp(a.data(), b.data(), std::min(a.size(), b.size()));
}
@@ -383,6 +411,7 @@ public:
INDEX_STATISTICS = 6,
MAX_INDEX_ID = 7,
DDL_CREATE_INDEX_ONGOING = 8,
+ AUTO_INC = 9,
END_DICT_INDEX_ID = 255
};
@@ -395,6 +424,7 @@ public:
DDL_DROP_INDEX_ONGOING_VERSION = 1,
MAX_INDEX_ID_VERSION = 1,
DDL_CREATE_INDEX_ONGOING_VERSION = 1,
+ AUTO_INCREMENT_VERSION = 1,
// Version for index stats is stored in IndexStats struct
};
@@ -968,17 +998,17 @@ public:
Rdb_tbl_def &operator=(const Rdb_tbl_def &) = delete;
explicit Rdb_tbl_def(const std::string &name)
- : m_key_descr_arr(nullptr), m_hidden_pk_val(1), m_auto_incr_val(1) {
+ : m_key_descr_arr(nullptr), m_hidden_pk_val(0), m_auto_incr_val(0) {
set_name(name);
}
Rdb_tbl_def(const char *const name, const size_t &len)
- : m_key_descr_arr(nullptr), m_hidden_pk_val(1), m_auto_incr_val(1) {
+ : m_key_descr_arr(nullptr), m_hidden_pk_val(0), m_auto_incr_val(0) {
set_name(std::string(name, len));
}
explicit Rdb_tbl_def(const rocksdb::Slice &slice, const size_t &pos = 0)
- : m_key_descr_arr(nullptr), m_hidden_pk_val(1), m_auto_incr_val(1) {
+ : m_key_descr_arr(nullptr), m_hidden_pk_val(0), m_auto_incr_val(0) {
set_name(std::string(slice.data() + pos, slice.size() - pos));
}
@@ -991,7 +1021,7 @@ public:
std::shared_ptr<Rdb_key_def> *m_key_descr_arr;
std::atomic<longlong> m_hidden_pk_val;
- std::atomic<longlong> m_auto_incr_val;
+ std::atomic<ulonglong> m_auto_incr_val;
/* Is this a system table */
bool m_is_mysql_system_table;
@@ -1003,6 +1033,7 @@ public:
const std::string &base_dbname() const { return m_dbname; }
const std::string &base_tablename() const { return m_tablename; }
const std::string &base_partition() const { return m_partition; }
+ GL_INDEX_ID get_autoincr_gl_index_id();
};
/*
@@ -1115,6 +1146,8 @@ private:
static void free_hash_elem(void *const data);
bool validate_schemas();
+
+ bool validate_auto_incr();
};
/*
@@ -1179,8 +1212,9 @@ private:
2. internal cf_id, index id => index information
key: Rdb_key_def::INDEX_INFO(0x2) + cf_id + index_id
- value: version, index_type, kv_format_version, ttl_duration
+ value: version, index_type, kv_format_version, index_flags, ttl_duration
index_type is 1 byte, version and kv_format_version are 2 bytes.
+ index_flags is 4 bytes.
ttl_duration is 8 bytes.
3. CF id => CF flags
@@ -1209,6 +1243,11 @@ private:
key: Rdb_key_def::DDL_CREATE_INDEX_ONGOING(0x8) + cf_id + index_id
value: version
+ 9. auto_increment values
+ key: Rdb_key_def::AUTO_INC(0x9) + cf_id + index_id
+ value: version, {max auto_increment so far}
+ max auto_increment is 8 bytes
+
Data dictionary operations are atomic inside RocksDB. For example,
when creating a table with two indexes, it is necessary to call Put
three times. They have to be atomic. Rdb_dict_manager has a wrapper function
@@ -1350,6 +1389,13 @@ public:
void add_stats(rocksdb::WriteBatch *const batch,
const std::vector<Rdb_index_stats> &stats) const;
Rdb_index_stats get_stats(GL_INDEX_ID gl_index_id) const;
+
+ rocksdb::Status put_auto_incr_val(rocksdb::WriteBatchBase *batch,
+ const GL_INDEX_ID &gl_index_id,
+ ulonglong val,
+ bool overwrite = false) const;
+ bool get_auto_incr_val(const GL_INDEX_ID &gl_index_id,
+ ulonglong *new_val) const;
};
struct Rdb_index_info {
@@ -1361,6 +1407,109 @@ struct Rdb_index_info {
uint64 m_ttl_duration = 0;
};
+/*
+ @brief
+ Merge Operator for the auto_increment value in the system_cf
+
+ @detail
+ This class implements the rocksdb Merge Operator for auto_increment values
+ that are stored to the data dictionary every transaction.
+
+ The actual Merge function is triggered on compaction, memtable flushes, or
+ when get() is called on the same key.
+
+ */
+class Rdb_system_merge_op : public rocksdb::AssociativeMergeOperator {
+ public:
+ /*
+ Updates the new value associated with a key to be the maximum of the
+ passed in value and the existing value.
+
+ @param[IN] key
+ @param[IN] existing_value existing value for a key; nullptr if nonexistent
+ key
+ @param[IN] value
+ @param[OUT] new_value new value after Merge
+ @param[IN] logger
+ */
+ bool Merge(const rocksdb::Slice &key, const rocksdb::Slice *existing_value,
+ const rocksdb::Slice &value, std::string *new_value,
+ rocksdb::Logger *logger) const override {
+ DBUG_ASSERT(new_value != nullptr);
+
+ if (key.size() != Rdb_key_def::INDEX_NUMBER_SIZE * 3 ||
+ GetKeyType(key) != Rdb_key_def::AUTO_INC ||
+ value.size() !=
+ RDB_SIZEOF_AUTO_INCREMENT_VERSION + ROCKSDB_SIZEOF_AUTOINC_VALUE ||
+ GetVersion(value) > Rdb_key_def::AUTO_INCREMENT_VERSION) {
+ abort();
+ }
+
+ uint64_t merged_value = Deserialize(value);
+
+ if (existing_value != nullptr) {
+ if (existing_value->size() != RDB_SIZEOF_AUTO_INCREMENT_VERSION +
+ ROCKSDB_SIZEOF_AUTOINC_VALUE ||
+ GetVersion(*existing_value) > Rdb_key_def::AUTO_INCREMENT_VERSION) {
+ abort();
+ }
+
+ merged_value = std::max(merged_value, Deserialize(*existing_value));
+ }
+ Serialize(merged_value, new_value);
+ return true;
+ }
+
+ virtual const char *Name() const override { return "Rdb_system_merge_op"; }
+
+ private:
+ /*
+ Serializes the integer data to the new_value buffer or the target buffer
+ the merge operator will update to
+ */
+ void Serialize(const uint64_t data, std::string *new_value) const {
+ uchar value_buf[RDB_SIZEOF_AUTO_INCREMENT_VERSION +
+ ROCKSDB_SIZEOF_AUTOINC_VALUE] = {0};
+ uchar *ptr = value_buf;
+ /* fill in the auto increment version */
+ rdb_netbuf_store_uint16(ptr, Rdb_key_def::AUTO_INCREMENT_VERSION);
+ ptr += RDB_SIZEOF_AUTO_INCREMENT_VERSION;
+ /* fill in the auto increment value */
+ rdb_netbuf_store_uint64(ptr, data);
+ ptr += ROCKSDB_SIZEOF_AUTOINC_VALUE;
+ new_value->assign(reinterpret_cast<char *>(value_buf), ptr - value_buf);
+ }
+
+ /*
+ Gets the value of auto_increment type in the data dictionary from the
+ value slice
+
+ @Note Only to be used on data dictionary keys for the auto_increment type
+ */
+ uint64_t Deserialize(const rocksdb::Slice &s) const {
+ return rdb_netbuf_to_uint64(reinterpret_cast<const uchar *>(s.data()) +
+ RDB_SIZEOF_AUTO_INCREMENT_VERSION);
+ }
+
+ /*
+ Gets the type of the key of the key in the data dictionary.
+
+ @Note Only to be used on data dictionary keys for the auto_increment type
+ */
+ uint16_t GetKeyType(const rocksdb::Slice &s) const {
+ return rdb_netbuf_to_uint32(reinterpret_cast<const uchar *>(s.data()));
+ }
+
+ /*
+ Gets the version of the auto_increment value in the data dictionary.
+
+ @Note Only to be used on data dictionary value for the auto_increment type
+ */
+ uint16_t GetVersion(const rocksdb::Slice &s) const {
+ return rdb_netbuf_to_uint16(reinterpret_cast<const uchar *>(s.data()));
+ }
+};
+
bool rdb_is_collation_supported(const my_core::CHARSET_INFO *const cs);
} // namespace myrocks
diff --git a/storage/rocksdb/rdb_i_s.cc b/storage/rocksdb/rdb_i_s.cc
index 8d801dd430b..de33d69ddb5 100644
--- a/storage/rocksdb/rdb_i_s.cc
+++ b/storage/rocksdb/rdb_i_s.cc
@@ -797,7 +797,7 @@ static int rdb_i_s_global_info_fill_table(
"from CF with id = %u. MyRocks data dictionary may "
"be corrupted.",
cf_handle->GetID());
- abort_with_stack_traces();
+ abort();
}
snprintf(cf_id_buf, INT_BUF_LEN, "%u", cf_handle->GetID());
@@ -928,7 +928,10 @@ enum {
INDEX_NUMBER,
INDEX_TYPE,
KV_FORMAT_VERSION,
- CF
+ TTL_DURATION,
+ INDEX_FLAGS,
+ CF,
+ AUTO_INCREMENT
};
} // namespace RDB_DDL_FIELD
@@ -943,7 +946,11 @@ static ST_FIELD_INFO rdb_i_s_ddl_fields_info[] = {
ROCKSDB_FIELD_INFO("INDEX_TYPE", sizeof(uint16_t), MYSQL_TYPE_SHORT, 0),
ROCKSDB_FIELD_INFO("KV_FORMAT_VERSION", sizeof(uint16_t), MYSQL_TYPE_SHORT,
0),
+ ROCKSDB_FIELD_INFO("TTL_DURATION", sizeof(uint64), MYSQL_TYPE_LONGLONG, 0),
+ ROCKSDB_FIELD_INFO("INDEX_FLAGS", sizeof(uint64), MYSQL_TYPE_LONGLONG, 0),
ROCKSDB_FIELD_INFO("CF", NAME_LEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO("AUTO_INCREMENT", sizeof(uint64_t), MYSQL_TYPE_LONGLONG,
+ MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED),
ROCKSDB_FIELD_INFO_END};
int Rdb_ddl_scanner::add_table(Rdb_tbl_def *tdef) {
@@ -954,6 +961,7 @@ int Rdb_ddl_scanner::add_table(Rdb_tbl_def *tdef) {
DBUG_ASSERT(m_table != nullptr);
Field **field = m_table->field;
DBUG_ASSERT(field != nullptr);
+ const Rdb_dict_manager *dict_manager = rdb_get_dict_manager();
const std::string &dbname = tdef->base_dbname();
field[RDB_DDL_FIELD::TABLE_SCHEMA]->store(dbname.c_str(), dbname.size(),
@@ -984,10 +992,20 @@ int Rdb_ddl_scanner::add_table(Rdb_tbl_def *tdef) {
field[RDB_DDL_FIELD::INDEX_TYPE]->store(kd.m_index_type, true);
field[RDB_DDL_FIELD::KV_FORMAT_VERSION]->store(kd.m_kv_format_version,
true);
+ field[RDB_DDL_FIELD::TTL_DURATION]->store(kd.m_ttl_duration, true);
+ field[RDB_DDL_FIELD::INDEX_FLAGS]->store(kd.m_index_flags_bitmap, true);
std::string cf_name = kd.get_cf()->GetName();
field[RDB_DDL_FIELD::CF]->store(cf_name.c_str(), cf_name.size(),
system_charset_info);
+ ulonglong auto_incr;
+ if (dict_manager->get_auto_incr_val(tdef->get_autoincr_gl_index_id(),
+ &auto_incr)) {
+ field[RDB_DDL_FIELD::AUTO_INCREMENT]->set_notnull();
+ field[RDB_DDL_FIELD::AUTO_INCREMENT]->store(auto_incr, true);
+ } else {
+ field[RDB_DDL_FIELD::AUTO_INCREMENT]->set_null();
+ }
ret = my_core::schema_table_store_record(m_thd, m_table);
if (ret)
@@ -1495,6 +1513,117 @@ static int rdb_i_s_trx_info_init(void *const p) {
DBUG_RETURN(0);
}
+/*
+ Support for INFORMATION_SCHEMA.ROCKSDB_DEADLOCK dynamic table
+ */
+namespace RDB_DEADLOCK_FIELD {
+enum {
+ DEADLOCK_ID = 0,
+ TRANSACTION_ID,
+ CF_NAME,
+ WAITING_KEY,
+ LOCK_TYPE,
+ INDEX_NAME,
+ TABLE_NAME,
+ ROLLED_BACK
+};
+} // namespace RDB_TRX_FIELD
+
+static ST_FIELD_INFO rdb_i_s_deadlock_info_fields_info[] = {
+ ROCKSDB_FIELD_INFO("DEADLOCK_ID", sizeof(ulonglong), MYSQL_TYPE_LONGLONG,
+ 0),
+ ROCKSDB_FIELD_INFO("TRANSACTION_ID", sizeof(ulonglong), MYSQL_TYPE_LONGLONG,
+ 0),
+ ROCKSDB_FIELD_INFO("CF_NAME", NAME_LEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO("WAITING_KEY", FN_REFLEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO("LOCK_TYPE", NAME_LEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO("INDEX_NAME", NAME_LEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO("TABLE_NAME", NAME_LEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO("ROLLED_BACK", sizeof(ulonglong), MYSQL_TYPE_LONGLONG,
+ 0),
+ ROCKSDB_FIELD_INFO_END};
+
+/* Fill the information_schema.rocksdb_trx virtual table */
+static int rdb_i_s_deadlock_info_fill_table(
+ my_core::THD *const thd, my_core::TABLE_LIST *const tables,
+ my_core::Item *const cond MY_ATTRIBUTE((__unused__))) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(thd != nullptr);
+ DBUG_ASSERT(tables != nullptr);
+ DBUG_ASSERT(tables->table != nullptr);
+ DBUG_ASSERT(tables->table->field != nullptr);
+
+ static const std::string str_exclusive("EXCLUSIVE");
+ static const std::string str_shared("SHARED");
+
+ int ret = 0;
+ rocksdb::DB *const rdb = rdb_get_rocksdb_db();
+
+ if (!rdb) {
+ DBUG_RETURN(ret);
+ }
+
+ const std::vector<Rdb_deadlock_info> &all_dl_info = rdb_get_deadlock_info();
+
+ ulonglong id = 0;
+ for (const auto &info : all_dl_info) {
+ for (const auto &trx_info : info.path) {
+ tables->table->field[RDB_DEADLOCK_FIELD::DEADLOCK_ID]->store(id, true);
+ tables->table->field[RDB_DEADLOCK_FIELD::TRANSACTION_ID]->store(
+ trx_info.trx_id, true);
+ tables->table->field[RDB_DEADLOCK_FIELD::CF_NAME]->store(
+ trx_info.cf_name.c_str(), trx_info.cf_name.length(),
+ system_charset_info);
+ tables->table->field[RDB_DEADLOCK_FIELD::WAITING_KEY]->store(
+ trx_info.waiting_key.c_str(), trx_info.waiting_key.length(),
+ system_charset_info);
+ if (trx_info.exclusive_lock) {
+ tables->table->field[RDB_DEADLOCK_FIELD::LOCK_TYPE]->store(
+ str_exclusive.c_str(), str_exclusive.length(), system_charset_info);
+ } else {
+ tables->table->field[RDB_DEADLOCK_FIELD::LOCK_TYPE]->store(
+ str_shared.c_str(), str_shared.length(), system_charset_info);
+ }
+ tables->table->field[RDB_DEADLOCK_FIELD::INDEX_NAME]->store(
+ trx_info.index_name.c_str(), trx_info.index_name.length(),
+ system_charset_info);
+ tables->table->field[RDB_DEADLOCK_FIELD::TABLE_NAME]->store(
+ trx_info.table_name.c_str(), trx_info.table_name.length(),
+ system_charset_info);
+ tables->table->field[RDB_DEADLOCK_FIELD::ROLLED_BACK]->store(
+ trx_info.trx_id == info.victim_trx_id, true);
+
+ /* Tell MySQL about this row in the virtual table */
+ ret = static_cast<int>(
+ my_core::schema_table_store_record(thd, tables->table));
+
+ if (ret != 0) {
+ break;
+ }
+ }
+ id++;
+ }
+
+ DBUG_RETURN(ret);
+}
+
+/* Initialize the information_schema.rocksdb_trx_info virtual table */
+static int rdb_i_s_deadlock_info_init(void *const p) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(p != nullptr);
+
+ my_core::ST_SCHEMA_TABLE *schema;
+
+ schema = (my_core::ST_SCHEMA_TABLE *)p;
+
+ schema->fields_info = rdb_i_s_deadlock_info_fields_info;
+ schema->fill_table = rdb_i_s_deadlock_info_fill_table;
+
+ DBUG_RETURN(0);
+}
+
static int rdb_i_s_deinit(void *p MY_ATTRIBUTE((__unused__))) {
DBUG_ENTER_FUNC();
DBUG_RETURN(0);
@@ -1678,4 +1807,20 @@ struct st_maria_plugin rdb_i_s_trx_info = {
nullptr, /* config options */
MYROCKS_MARIADB_PLUGIN_MATURITY_LEVEL
};
+
+struct st_maria_plugin rdb_i_s_deadlock_info = {
+ MYSQL_INFORMATION_SCHEMA_PLUGIN,
+ &rdb_i_s_info,
+ "ROCKSDB_DEADLOCK",
+ "Facebook",
+ "RocksDB transaction information",
+ PLUGIN_LICENSE_GPL,
+ rdb_i_s_deadlock_info_init,
+ nullptr,
+ 0x0001, /* version number (0.1) */
+ nullptr, /* status variables */
+ nullptr, /* system variables */
+ nullptr, /* config options */
+ MYROCKS_MARIADB_PLUGIN_MATURITY_LEVEL
+};
} // namespace myrocks
diff --git a/storage/rocksdb/rdb_i_s.h b/storage/rocksdb/rdb_i_s.h
index 08d35e17ba9..d6a48bf3fec 100644
--- a/storage/rocksdb/rdb_i_s.h
+++ b/storage/rocksdb/rdb_i_s.h
@@ -32,4 +32,5 @@ extern struct st_maria_plugin rdb_i_s_ddl;
extern struct st_maria_plugin rdb_i_s_index_file_map;
extern struct st_maria_plugin rdb_i_s_lock_info;
extern struct st_maria_plugin rdb_i_s_trx_info;
+extern struct st_maria_plugin rdb_i_s_deadlock_info;
} // namespace myrocks
diff --git a/storage/rocksdb/rdb_io_watchdog.cc b/storage/rocksdb/rdb_io_watchdog.cc
index 039b0d7baf1..f09efefcd2a 100644
--- a/storage/rocksdb/rdb_io_watchdog.cc
+++ b/storage/rocksdb/rdb_io_watchdog.cc
@@ -45,7 +45,7 @@ void Rdb_io_watchdog::expire_io_callback(union sigval timer_data) {
"Shutting the service down.",
m_write_timeout);
- abort_with_stack_traces();
+ abort();
}
void Rdb_io_watchdog::io_check_callback(union sigval timer_data) {
diff --git a/storage/rocksdb/rdb_perf_context.cc b/storage/rocksdb/rdb_perf_context.cc
index d126d156314..0c561c62ab2 100644
--- a/storage/rocksdb/rdb_perf_context.cc
+++ b/storage/rocksdb/rdb_perf_context.cc
@@ -47,8 +47,13 @@ std::string rdb_pc_stat_types[] = {
"BLOCK_READ_TIME",
"BLOCK_CHECKSUM_TIME",
"BLOCK_DECOMPRESS_TIME",
+ "GET_READ_BYTES",
+ "MULTIGET_READ_BYTES",
+ "ITER_READ_BYTES",
"INTERNAL_KEY_SKIPPED_COUNT",
"INTERNAL_DELETE_SKIPPED_COUNT",
+ "INTERNAL_RECENT_SKIPPED_COUNT",
+ "INTERNAL_MERGE_COUNT",
"GET_SNAPSHOT_TIME",
"GET_FROM_MEMTABLE_TIME",
"GET_FROM_MEMTABLE_COUNT",
@@ -56,9 +61,12 @@ std::string rdb_pc_stat_types[] = {
"GET_FROM_OUTPUT_FILES_TIME",
"SEEK_ON_MEMTABLE_TIME",
"SEEK_ON_MEMTABLE_COUNT",
+ "NEXT_ON_MEMTABLE_COUNT",
+ "PREV_ON_MEMTABLE_COUNT",
"SEEK_CHILD_SEEK_TIME",
"SEEK_CHILD_SEEK_COUNT",
- "SEEK_IN_HEAP_TIME",
+ "SEEK_MIN_HEAP_TIME",
+ "SEEK_MAX_HEAP_TIME",
"SEEK_INTERNAL_SEEK_TIME",
"FIND_NEXT_USER_ENTRY_TIME",
"WRITE_WAL_TIME",
@@ -74,6 +82,12 @@ std::string rdb_pc_stat_types[] = {
"NEW_TABLE_ITERATOR_NANOS",
"BLOCK_SEEK_NANOS",
"FIND_TABLE_NANOS",
+ "BLOOM_MEMTABLE_HIT_COUNT",
+ "BLOOM_MEMTABLE_MISS_COUNT",
+ "BLOOM_SST_HIT_COUNT",
+ "BLOOM_SST_MISS_COUNT",
+ "KEY_LOCK_WAIT_TIME",
+ "KEY_LOCK_WAIT_COUNT",
"IO_THREAD_POOL_ID",
"IO_BYTES_WRITTEN",
"IO_BYTES_READ",
@@ -107,8 +121,13 @@ static void harvest_diffs(Rdb_atomic_perf_counters *const counters) {
IO_PERF_RECORD(block_read_time);
IO_PERF_RECORD(block_checksum_time);
IO_PERF_RECORD(block_decompress_time);
+ IO_PERF_RECORD(get_read_bytes);
+ IO_PERF_RECORD(multiget_read_bytes);
+ IO_PERF_RECORD(iter_read_bytes);
IO_PERF_RECORD(internal_key_skipped_count);
IO_PERF_RECORD(internal_delete_skipped_count);
+ IO_PERF_RECORD(internal_recent_skipped_count);
+ IO_PERF_RECORD(internal_merge_count);
IO_PERF_RECORD(get_snapshot_time);
IO_PERF_RECORD(get_from_memtable_time);
IO_PERF_RECORD(get_from_memtable_count);
@@ -116,9 +135,12 @@ static void harvest_diffs(Rdb_atomic_perf_counters *const counters) {
IO_PERF_RECORD(get_from_output_files_time);
IO_PERF_RECORD(seek_on_memtable_time);
IO_PERF_RECORD(seek_on_memtable_count);
+ IO_PERF_RECORD(next_on_memtable_count);
+ IO_PERF_RECORD(prev_on_memtable_count);
IO_PERF_RECORD(seek_child_seek_time);
IO_PERF_RECORD(seek_child_seek_count);
IO_PERF_RECORD(seek_min_heap_time);
+ IO_PERF_RECORD(seek_max_heap_time);
IO_PERF_RECORD(seek_internal_seek_time);
IO_PERF_RECORD(find_next_user_entry_time);
IO_PERF_RECORD(write_wal_time);
@@ -134,6 +156,13 @@ static void harvest_diffs(Rdb_atomic_perf_counters *const counters) {
IO_PERF_RECORD(new_table_iterator_nanos);
IO_PERF_RECORD(block_seek_nanos);
IO_PERF_RECORD(find_table_nanos);
+ IO_PERF_RECORD(bloom_memtable_hit_count);
+ IO_PERF_RECORD(bloom_memtable_miss_count);
+ IO_PERF_RECORD(bloom_sst_hit_count);
+ IO_PERF_RECORD(bloom_sst_miss_count);
+ IO_PERF_RECORD(key_lock_wait_time);
+ IO_PERF_RECORD(key_lock_wait_count);
+
IO_STAT_RECORD(thread_pool_id);
IO_STAT_RECORD(bytes_written);
IO_STAT_RECORD(bytes_read);
diff --git a/storage/rocksdb/rdb_perf_context.h b/storage/rocksdb/rdb_perf_context.h
index f9b9fd48d3e..2aca3dc3bfd 100644
--- a/storage/rocksdb/rdb_perf_context.h
+++ b/storage/rocksdb/rdb_perf_context.h
@@ -37,8 +37,13 @@ enum {
PC_BLOCK_READ_TIME,
PC_BLOCK_CHECKSUM_TIME,
PC_BLOCK_DECOMPRESS_TIME,
+ PC_GET_READ_BYTES,
+ PC_MULTIGET_READ_BYTES,
+ PC_ITER_READ_BYTES,
PC_KEY_SKIPPED,
PC_DELETE_SKIPPED,
+ PC_RECENT_SKIPPED,
+ PC_MERGE,
PC_GET_SNAPSHOT_TIME,
PC_GET_FROM_MEMTABLE_TIME,
PC_GET_FROM_MEMTABLE_COUNT,
@@ -46,9 +51,12 @@ enum {
PC_GET_FROM_OUTPUT_FILES_TIME,
PC_SEEK_ON_MEMTABLE_TIME,
PC_SEEK_ON_MEMTABLE_COUNT,
+ PC_NEXT_ON_MEMTABLE_COUNT,
+ PC_PREV_ON_MEMTABLE_COUNT,
PC_SEEK_CHILD_SEEK_TIME,
PC_SEEK_CHILD_SEEK_COUNT,
PC_SEEK_MIN_HEAP_TIME,
+ PC_SEEK_MAX_HEAP_TIME,
PC_SEEK_INTERNAL_SEEK_TIME,
PC_FIND_NEXT_USER_ENTRY_TIME,
PC_WRITE_WAL_TIME,
@@ -64,6 +72,12 @@ enum {
PC_NEW_TABLE_ITERATOR_NANOS,
PC_BLOCK_SEEK_NANOS,
PC_FIND_TABLE_NANOS,
+ PC_BLOOM_MEMTABLE_HIT_COUNT,
+ PC_BLOOM_MEMTABLE_MISS_COUNT,
+ PC_BLOOM_SST_HIT_COUNT,
+ PC_BLOOM_SST_MISS_COUNT,
+ PC_KEY_LOCK_WAIT_TIME,
+ PC_KEY_LOCK_WAIT_COUNT,
PC_IO_THREAD_POOL_ID,
PC_IO_BYTES_WRITTEN,
PC_IO_BYTES_READ,
diff --git a/storage/rocksdb/rdb_psi.cc b/storage/rocksdb/rdb_psi.cc
index b6bc89a02f9..b5309df5973 100644
--- a/storage/rocksdb/rdb_psi.cc
+++ b/storage/rocksdb/rdb_psi.cc
@@ -48,7 +48,7 @@ my_core::PSI_thread_info all_rocksdb_threads[] = {
my_core::PSI_mutex_key rdb_psi_open_tbls_mutex_key, rdb_signal_bg_psi_mutex_key,
rdb_signal_drop_idx_psi_mutex_key, rdb_collation_data_mutex_key,
rdb_mem_cmp_space_mutex_key, key_mutex_tx_list, rdb_sysvars_psi_mutex_key,
- rdb_cfm_mutex_key;
+ rdb_cfm_mutex_key, rdb_sst_commit_key;
my_core::PSI_mutex_info all_rocksdb_mutexes[] = {
{&rdb_psi_open_tbls_mutex_key, "open tables", PSI_FLAG_GLOBAL},
@@ -60,6 +60,7 @@ my_core::PSI_mutex_info all_rocksdb_mutexes[] = {
{&key_mutex_tx_list, "tx_list", PSI_FLAG_GLOBAL},
{&rdb_sysvars_psi_mutex_key, "setting sysvar", PSI_FLAG_GLOBAL},
{&rdb_cfm_mutex_key, "column family manager", PSI_FLAG_GLOBAL},
+ {&rdb_sst_commit_key, "sst commit", PSI_FLAG_GLOBAL},
};
my_core::PSI_rwlock_key key_rwlock_collation_exception_list,
diff --git a/storage/rocksdb/rdb_psi.h b/storage/rocksdb/rdb_psi.h
index 0a62f411ade..d4318ee3dba 100644
--- a/storage/rocksdb/rdb_psi.h
+++ b/storage/rocksdb/rdb_psi.h
@@ -40,7 +40,8 @@ extern my_core::PSI_thread_key rdb_background_psi_thread_key,
extern my_core::PSI_mutex_key rdb_psi_open_tbls_mutex_key,
rdb_signal_bg_psi_mutex_key, rdb_signal_drop_idx_psi_mutex_key,
rdb_collation_data_mutex_key, rdb_mem_cmp_space_mutex_key,
- key_mutex_tx_list, rdb_sysvars_psi_mutex_key, rdb_cfm_mutex_key;
+ key_mutex_tx_list, rdb_sysvars_psi_mutex_key, rdb_cfm_mutex_key,
+ rdb_sst_commit_key;
extern my_core::PSI_rwlock_key key_rwlock_collation_exception_list,
key_rwlock_read_free_rpl_tables, key_rwlock_skip_unique_check_tables;
diff --git a/storage/rocksdb/rdb_sst_info.cc b/storage/rocksdb/rdb_sst_info.cc
index 72abfab5d6b..e0dfb011f87 100644
--- a/storage/rocksdb/rdb_sst_info.cc
+++ b/storage/rocksdb/rdb_sst_info.cc
@@ -43,6 +43,7 @@
#include "./ha_rocksdb.h"
#include "./ha_rocksdb_proto.h"
#include "./rdb_cf_options.h"
+#include "./rdb_psi.h"
namespace myrocks {
@@ -262,7 +263,6 @@ rocksdb::Status Rdb_sst_file_ordered::put(const rocksdb::Slice &key,
if (!m_first_key.empty()) {
rocksdb::Slice first_key_slice(m_first_key);
int cmp = m_file.compare(first_key_slice, key);
- DBUG_ASSERT(cmp != 0);
m_use_stack = (cmp > 0);
// Apply the first key to the stack or SST
@@ -326,11 +326,11 @@ Rdb_sst_info::Rdb_sst_info(rocksdb::DB *const db, const std::string &tablename,
const rocksdb::DBOptions &db_options,
const bool &tracing)
: m_db(db), m_cf(cf), m_db_options(db_options), m_curr_size(0),
- m_sst_count(0), m_background_error(HA_EXIT_SUCCESS),
+ m_sst_count(0), m_background_error(HA_EXIT_SUCCESS), m_committed(false),
#if defined(RDB_SST_INFO_USE_THREAD)
m_queue(), m_mutex(), m_cond(), m_thread(nullptr), m_finished(false),
#endif
- m_sst_file(nullptr), m_tracing(tracing) {
+ m_sst_file(nullptr), m_tracing(tracing), m_print_client_error(true) {
m_prefix = db->GetName() + "/";
std::string normalized_table;
@@ -357,6 +357,7 @@ Rdb_sst_info::Rdb_sst_info(rocksdb::DB *const db, const std::string &tablename,
// Set the maximum size to 3 times the cf's target size
m_max_size = cf_descr.options.target_file_size_base * 3;
}
+ mysql_mutex_init(rdb_sst_commit_key, &m_commit_mutex, MY_MUTEX_INIT_FAST);
}
Rdb_sst_info::~Rdb_sst_info() {
@@ -364,6 +365,7 @@ Rdb_sst_info::~Rdb_sst_info() {
#if defined(RDB_SST_INFO_USE_THREAD)
DBUG_ASSERT(m_thread == nullptr);
#endif
+ mysql_mutex_destroy(&m_commit_mutex);
}
int Rdb_sst_info::open_new_sst_file() {
@@ -428,6 +430,8 @@ void Rdb_sst_info::close_curr_sst_file() {
int Rdb_sst_info::put(const rocksdb::Slice &key, const rocksdb::Slice &value) {
int rc;
+ DBUG_ASSERT(!m_committed);
+
if (m_curr_size + key.size() + value.size() >= m_max_size) {
// The current sst file has reached its maximum, close it out
close_curr_sst_file();
@@ -461,7 +465,21 @@ int Rdb_sst_info::put(const rocksdb::Slice &key, const rocksdb::Slice &value) {
return HA_EXIT_SUCCESS;
}
-int Rdb_sst_info::commit() {
+int Rdb_sst_info::commit(bool print_client_error) {
+ int ret = HA_EXIT_SUCCESS;
+
+ // Both the transaction clean up and the ha_rocksdb handler have
+ // references to this Rdb_sst_info and both can call commit, so
+ // synchronize on the object here.
+ RDB_MUTEX_LOCK_CHECK(m_commit_mutex);
+
+ if (m_committed) {
+ RDB_MUTEX_UNLOCK_CHECK(m_commit_mutex);
+ return ret;
+ }
+
+ m_print_client_error = print_client_error;
+
if (m_curr_size > 0) {
// Close out any existing files
close_curr_sst_file();
@@ -480,16 +498,24 @@ int Rdb_sst_info::commit() {
}
#endif
+ m_committed = true;
+ RDB_MUTEX_UNLOCK_CHECK(m_commit_mutex);
+
// Did we get any errors?
if (have_background_error()) {
- return get_and_reset_background_error();
+ ret = get_and_reset_background_error();
}
- return HA_EXIT_SUCCESS;
+ m_print_client_error = true;
+ return ret;
}
void Rdb_sst_info::set_error_msg(const std::string &sst_file_name,
const rocksdb::Status &s) {
+
+ if (!m_print_client_error)
+ return;
+
#if defined(RDB_SST_INFO_USE_THREAD)
// Both the foreground and background threads can set the error message
// so lock the mutex to protect it. We only want the first error that
diff --git a/storage/rocksdb/rdb_sst_info.h b/storage/rocksdb/rdb_sst_info.h
index 1dee0fd0518..42f6458e46b 100644
--- a/storage/rocksdb/rdb_sst_info.h
+++ b/storage/rocksdb/rdb_sst_info.h
@@ -128,6 +128,8 @@ class Rdb_sst_info {
std::string m_prefix;
static std::atomic<uint64_t> m_prefix_counter;
static std::string m_suffix;
+ bool m_committed;
+ mysql_mutex_t m_commit_mutex;
#if defined(RDB_SST_INFO_USE_THREAD)
std::queue<Rdb_sst_file_ordered *> m_queue;
std::mutex m_mutex;
@@ -137,6 +139,7 @@ class Rdb_sst_info {
#endif
Rdb_sst_file_ordered *m_sst_file;
const bool m_tracing;
+ bool m_print_client_error;
int open_new_sst_file();
void close_curr_sst_file();
@@ -157,7 +160,8 @@ class Rdb_sst_info {
~Rdb_sst_info();
int put(const rocksdb::Slice &key, const rocksdb::Slice &value);
- int commit();
+ int commit(bool print_client_error = true);
+ bool is_committed() const { return m_committed; }
bool have_background_error() { return m_background_error != 0; }
diff --git a/storage/rocksdb/rdb_utils.cc b/storage/rocksdb/rdb_utils.cc
index 335676a6ba4..723e079a165 100644
--- a/storage/rocksdb/rdb_utils.cc
+++ b/storage/rocksdb/rdb_utils.cc
@@ -352,4 +352,37 @@ const char *get_rocksdb_supported_compression_types()
return compression_methods_buf.c_str();
}
+bool rdb_check_rocksdb_corruption() {
+ return !my_access(myrocks::rdb_corruption_marker_file_name().c_str(), F_OK);
+}
+
+void rdb_persist_corruption_marker() {
+ const std::string &fileName(myrocks::rdb_corruption_marker_file_name());
+ /* O_SYNC is not supported on windows */
+ int fd = my_open(fileName.c_str(), O_CREAT | IF_WIN(0, O_SYNC), MYF(MY_WME));
+ if (fd < 0) {
+ sql_print_error("RocksDB: Can't create file %s to mark rocksdb as "
+ "corrupted.",
+ fileName.c_str());
+ } else {
+ sql_print_information("RocksDB: Creating the file %s to abort mysqld "
+ "restarts. Remove this file from the data directory "
+ "after fixing the corruption to recover. ",
+ fileName.c_str());
+ }
+
+#ifdef _WIN32
+ /* A replacement for O_SYNC flag above */
+ if (fd >= 0)
+ my_sync(fd, MYF(0));
+#endif
+
+ int ret = my_close(fd, MYF(MY_WME));
+ if (ret) {
+ // NO_LINT_DEBUG
+ sql_print_error("RocksDB: Error (%d) closing the file %s", ret,
+ fileName.c_str());
+ }
+}
+
} // namespace myrocks
diff --git a/storage/rocksdb/rdb_utils.h b/storage/rocksdb/rdb_utils.h
index 3feda5d82ad..3125941ee78 100644
--- a/storage/rocksdb/rdb_utils.h
+++ b/storage/rocksdb/rdb_utils.h
@@ -84,7 +84,7 @@ namespace myrocks {
do { \
if (!(expr)) { \
my_safe_printf_stderr("\nShip assert failure: \'%s\'\n", #expr); \
- abort_with_stack_traces(); \
+ abort(); \
} \
} while (0)
#endif // SHIP_ASSERT
@@ -250,12 +250,20 @@ inline void rdb_check_mutex_call_result(const char *function_name,
// This will hopefully result in a meaningful stack trace which we can use
// to efficiently debug the root cause.
- abort_with_stack_traces();
+ abort();
}
}
void rdb_log_status_error(const rocksdb::Status &s, const char *msg = nullptr);
+// return true if the marker file exists which indicates that the corruption
+// has been detected
+bool rdb_check_rocksdb_corruption();
+
+// stores a marker file in the data directory so that after restart server
+// is still aware that rocksdb data is corrupted
+void rdb_persist_corruption_marker();
+
/*
Helper functions to parse strings.
*/
diff --git a/storage/rocksdb/rocksdb b/storage/rocksdb/rocksdb
-Subproject 9a970c81af9807071bd690f4c808c5045866291
+Subproject ba295cda29daee3ffe58549542804efdfd96978